This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest.xs: Fix compile warning: format ‘%d’ expects argument of type ‘int’, but...
[perl5.git] / ext / XS-APItest / APItest.xs
index fa11b05..fcaea38 100644 (file)
@@ -9,10 +9,15 @@
 #include "EXTERN.h"
 #include "perl.h"
 #include "XSUB.h"
+
+typedef FILE NativeFile;
+
 #include "fakesdio.h"   /* Causes us to use PerlIO below */
 
 typedef SV *SVREF;
 typedef PTR_TBL_t *XS__APItest__PtrTable;
+typedef PerlIO * InputStream;
+typedef PerlIO * OutputStream;
 
 #define croak_fail() croak("fail at " __FILE__ " line %d", __LINE__)
 #define croak_fail_nep(h, w) croak("fail %p!=%p at " __FILE__ " line %d", (h), (w), __LINE__)
@@ -754,6 +759,7 @@ static SV *hintkey_swaplabel_sv, *hintkey_labelconst_sv;
 static SV *hintkey_arrayfullexpr_sv, *hintkey_arraylistexpr_sv;
 static SV *hintkey_arraytermexpr_sv, *hintkey_arrayarithexpr_sv;
 static SV *hintkey_arrayexprflags_sv;
+static SV *hintkey_subsignature_sv;
 static SV *hintkey_DEFSV_sv;
 static SV *hintkey_with_vars_sv;
 static SV *hintkey_join_with_space_sv;
@@ -1046,6 +1052,65 @@ static OP *THX_parse_keyword_arrayexprflags(pTHX)
     return o ? newANONLIST(o) : newANONHASH(newOP(OP_STUB, 0));
 }
 
+#define parse_keyword_subsignature() THX_parse_keyword_subsignature(aTHX)
+static OP *THX_parse_keyword_subsignature(pTHX)
+{
+    OP *retop = NULL, *listop, *sigop = parse_subsignature(0);
+    OP *kid;
+    int seen_nextstate = 0;
+
+    /* We can't yield the optree as is to the caller because it won't be
+     * executable outside of a called sub. We'll have to convert it into
+     * something safe for them to invoke.
+     * sigop should be an OP_NULL above a OP_LINESEQ containing
+     * OP_NEXTSTATE-separated OP_ARGCHECK and OP_ARGELEMs
+     */
+    if(sigop->op_type != OP_NULL)
+       croak("Expected parse_subsignature() to yield an OP_NULL");
+    
+    if(!(sigop->op_flags & OPf_KIDS))
+       croak("Expected parse_subsignature() to yield an OP_NULL with kids");
+    listop = cUNOPx(sigop)->op_first;
+
+    if(listop->op_type != OP_LINESEQ)
+       croak("Expected parse_subsignature() to yield an OP_LINESEQ");
+
+    for(kid = cLISTOPx(listop)->op_first; kid; kid = OpSIBLING(kid)) {
+       switch(kid->op_type) {
+           case OP_NEXTSTATE:
+               /* Only emit the first one otherwise they get boring */
+               if(seen_nextstate)
+                   break;
+               seen_nextstate++;
+               retop = op_append_list(OP_LIST, retop, newSVOP(OP_CONST, 0,
+                   /* newSVpvf("nextstate:%s:%d", CopFILE(cCOPx(kid)), cCOPx(kid)->cop_line))); */
+                   newSVpvf("nextstate:%u", (unsigned int)cCOPx(kid)->cop_line)));
+               break;
+           case OP_ARGCHECK: {
+               UNOP_AUX_item *aux = cUNOP_AUXx(kid)->op_aux;
+               char slurpy = (char)(aux[2].iv);
+               retop = op_append_list(OP_LIST, retop, newSVOP(OP_CONST, 0,
+                   newSVpvf("argcheck:%d:%d:%c", (int)(aux[0].iv), (int)(aux[1].iv), slurpy ? slurpy : '-')));
+               break;
+           }
+           case OP_ARGELEM: {
+               PADOFFSET padix = kid->op_targ;
+               PADNAMELIST *names = PadlistNAMES(CvPADLIST(find_runcv(0)));
+               char *namepv = PadnamePV(padnamelist_fetch(names, padix));
+               retop = op_append_list(OP_LIST, retop, newSVOP(OP_CONST, 0,
+                   newSVpvf(kid->op_flags & OPf_KIDS ? "argelem:%s:d" : "argelem:%s", namepv)));
+               break;
+           }
+           default:
+               fprintf(stderr, "TODO: examine kid %p (optype=%s)\n", kid, PL_op_name[kid->op_type]);
+               break;
+       }
+    }
+
+    op_free(sigop);
+    return newANONLIST(retop);
+}
+
 #define parse_keyword_DEFSV() THX_parse_keyword_DEFSV(aTHX)
 static OP *THX_parse_keyword_DEFSV(pTHX)
 {
@@ -1241,7 +1306,12 @@ static int my_keyword_plugin(pTHX_
                    keyword_active(hintkey_join_with_space_sv)) {
        *op_ptr = parse_join_with_space();
        return KEYWORD_PLUGIN_EXPR;
+    } else if (memEQs(keyword_ptr, keyword_len, "subsignature") &&
+                   keyword_active(hintkey_subsignature_sv)) {
+       *op_ptr = parse_keyword_subsignature();
+       return KEYWORD_PLUGIN_EXPR;
     } else {
+        assert(next_keyword_plugin != my_keyword_plugin);
        return next_keyword_plugin(aTHX_ keyword_ptr, keyword_len, op_ptr);
     }
 }
@@ -1333,7 +1403,7 @@ my_ck_rv2cv(pTHX_ OP *o)
     {
        SvGROW(ref, SvCUR(ref)+2);
        *SvEND(ref) = '_';
-       SvCUR(ref)++;
+       SvCUR(ref)++; /* Not _set, so we don't accidentally break non-PERL_CORE */
        *SvEND(ref) = '\0';
     }
     return old_ck_rv2cv(aTHX_ o);
@@ -1374,15 +1444,76 @@ bytes_cmp_utf8(bytes, utf8)
        RETVAL
 
 AV *
+test_utf8_to_bytes(bytes, len)
+        U8 * bytes
+        STRLEN len
+    PREINIT:
+        char * ret;
+    CODE:
+        RETVAL = newAV();
+        sv_2mortal((SV*)RETVAL);
+
+        ret = (char *) utf8_to_bytes(bytes, &len);
+        av_push(RETVAL, newSVpv(ret, 0));
+
+        /* utf8_to_bytes uses (STRLEN)-1 to signal errors, and we want to
+         * return that as -1 to perl, so cast to SSize_t in case
+         * sizeof(IV) > sizeof(STRLEN) */
+        av_push(RETVAL, newSViv((SSize_t)len));
+        av_push(RETVAL, newSVpv((const char *) bytes, 0));
+
+    OUTPUT:
+        RETVAL
+
+AV *
+test_utf8n_to_uvchr_msgs(s, len, flags)
+        char *s
+        STRLEN len
+        U32 flags
+    PREINIT:
+        STRLEN retlen;
+        UV ret;
+        U32 errors;
+        AV *msgs = NULL;
+
+    CODE:
+        RETVAL = newAV();
+        sv_2mortal((SV*)RETVAL);
+
+        ret = utf8n_to_uvchr_msgs((U8*)  s,
+                                         len,
+                                         &retlen,
+                                         flags,
+                                         &errors,
+                                         &msgs);
+
+        /* Returns the return value in [0]; <retlen> in [1], <errors> in [2] */
+        av_push(RETVAL, newSVuv(ret));
+        if (retlen == (STRLEN) -1) {
+            av_push(RETVAL, newSViv(-1));
+        }
+        else {
+            av_push(RETVAL, newSVuv(retlen));
+        }
+        av_push(RETVAL, newSVuv(errors));
+
+        /* And any messages in [3] */
+        if (msgs) {
+            av_push(RETVAL, newRV_noinc((SV*)msgs));
+        }
+
+    OUTPUT:
+        RETVAL
+
+AV *
 test_utf8n_to_uvchr_error(s, len, flags)
 
-        SV *s
-        SV *len
-        SV *flags
+        char *s
+        STRLEN len
+        U32 flags
     PREINIT:
         STRLEN retlen;
         UV ret;
-        STRLEN slen;
         U32 errors;
 
     CODE:
@@ -1395,10 +1526,10 @@ test_utf8n_to_uvchr_error(s, len, flags)
         RETVAL = newAV();
         sv_2mortal((SV*)RETVAL);
 
-        ret = utf8n_to_uvchr_error((U8*) SvPV(s, slen),
-                                         SvUV(len),
+        ret = utf8n_to_uvchr_error((U8*) s,
+                                         len,
                                          &retlen,
-                                         SvUV(flags),
+                                         flags,
                                          &errors);
 
         /* Returns the return value in [0]; <retlen> in [1], <errors> in [2] */
@@ -1446,7 +1577,7 @@ test_uvchr_to_utf8_flags(uv, flags)
         SV *uv
         SV *flags
     PREINIT:
-        U8 dest[UTF8_MAXBYTES];
+        U8 dest[UTF8_MAXBYTES + 1];
         U8 *ret;
 
     CODE:
@@ -1460,6 +1591,36 @@ test_uvchr_to_utf8_flags(uv, flags)
     OUTPUT:
         RETVAL
 
+AV *
+test_uvchr_to_utf8_flags_msgs(uv, flags)
+
+        SV *uv
+        SV *flags
+    PREINIT:
+        U8 dest[UTF8_MAXBYTES + 1];
+        U8 *ret;
+
+    CODE:
+        HV *msgs = NULL;
+        RETVAL = newAV();
+        sv_2mortal((SV*)RETVAL);
+
+        ret = uvchr_to_utf8_flags_msgs(dest, SvUV(uv), SvUV(flags), &msgs);
+
+        if (ret) {
+            av_push(RETVAL, newSVpvn((char *) dest, ret - dest));
+        }
+        else {
+            av_push(RETVAL,  &PL_sv_undef);
+        }
+
+        if (msgs) {
+            av_push(RETVAL, newRV_noinc((SV*)msgs));
+        }
+
+    OUTPUT:
+        RETVAL
+
 MODULE = XS::APItest:Overload  PACKAGE = XS::APItest::Overload
 
 void
@@ -2312,6 +2473,7 @@ PREINIT:
     I32 retcnt;
     SV * errsv;
     char * errstr;
+    STRLEN errlen;
     SV * miscsv = sv_newmortal();
     HV * hv = (HV*)sv_2mortal((SV*)newHV());
 CODE:
@@ -2337,17 +2499,24 @@ CODE:
        only current internal behavior, these tests can be changed in the
        future if necessery */
     PUSHMARK(SP);
-    retcnt = call_sv(&PL_sv_yes, 0); /* does nothing */
+    retcnt = call_sv(&PL_sv_yes, G_EVAL);
     SPAGAIN;
     SP -= retcnt;
+    errsv = ERRSV;
+    errstr = SvPV(errsv, errlen);
+    if(memBEGINs(errstr, errlen, "Undefined subroutine &main::1 called at")) {
+        PUSHMARK(SP);
+        retcnt = call_sv((SV*)i_sub, 0); /* call again to increase counter */
+        SPAGAIN;
+        SP -= retcnt;
+    }
     PUSHMARK(SP);
     retcnt = call_sv(&PL_sv_no, G_EVAL);
     SPAGAIN;
     SP -= retcnt;
     errsv = ERRSV;
-    errstr = SvPV_nolen(errsv);
-    if(strnEQ(errstr, "Undefined subroutine &main:: called at",
-              sizeof("Undefined subroutine &main:: called at") - 1)) {
+    errstr = SvPV(errsv, errlen);
+    if(memBEGINs(errstr, errlen, "Undefined subroutine &main:: called at")) {
         PUSHMARK(SP);
         retcnt = call_sv((SV*)i_sub, 0); /* call again to increase counter */
         SPAGAIN;
@@ -2358,9 +2527,8 @@ CODE:
     SPAGAIN;
     SP -= retcnt;
     errsv = ERRSV;
-    errstr = SvPV_nolen(errsv);
-    if(strnEQ(errstr, "Can't use an undefined value as a subroutine reference at",
-              sizeof("Can't use an undefined value as a subroutine reference at") - 1)) {
+    errstr = SvPV(errsv, errlen);
+    if(memBEGINs(errstr, errlen, "Can't use an undefined value as a subroutine reference at")) {
         PUSHMARK(SP);
         retcnt = call_sv((SV*)i_sub, 0); /* call again to increase counter */
         SPAGAIN;
@@ -2371,9 +2539,8 @@ CODE:
     SPAGAIN;
     SP -= retcnt;
     errsv = ERRSV;
-    errstr = SvPV_nolen(errsv);
-    if(strnEQ(errstr, "Not a CODE reference at",
-              sizeof("Not a CODE reference at") - 1)) {
+    errstr = SvPV(errsv, errlen);
+    if(memBEGINs(errstr, errlen, "Not a CODE reference at")) {
         PUSHMARK(SP);
         retcnt = call_sv((SV*)i_sub, 0); /* call again to increase counter */
         SPAGAIN;
@@ -2887,7 +3054,7 @@ utf16_to_utf8 (sv, ...)
            len = SvUV(ST(1));
        }
        /* Mortalise this right now, as we'll be testing croak()s  */
-       dest = sv_2mortal(newSV(len * 3 / 2 + 1));
+       dest = sv_2mortal(newSV(len * 2 + 1));
        if (ix) {
            utf16_to_utf8_reversed(source, (U8 *)SvPVX(dest), len, &got);
        } else {
@@ -3892,11 +4059,11 @@ BOOT:
     hintkey_arraytermexpr_sv = newSVpvs_share("XS::APItest/arraytermexpr");
     hintkey_arrayarithexpr_sv = newSVpvs_share("XS::APItest/arrayarithexpr");
     hintkey_arrayexprflags_sv = newSVpvs_share("XS::APItest/arrayexprflags");
+    hintkey_subsignature_sv = newSVpvs_share("XS::APItest/subsignature");
     hintkey_DEFSV_sv = newSVpvs_share("XS::APItest/DEFSV");
     hintkey_with_vars_sv = newSVpvs_share("XS::APItest/with_vars");
     hintkey_join_with_space_sv = newSVpvs_share("XS::APItest/join_with_space");
-    next_keyword_plugin = PL_keyword_plugin;
-    PL_keyword_plugin = my_keyword_plugin;
+    wrap_keyword_plugin(my_keyword_plugin, &next_keyword_plugin);
 }
 
 void
@@ -4059,12 +4226,26 @@ OUTPUT:
     RETVAL
 
 char *
+SvPVbyte_nomg(SV *sv)
+CODE:
+    RETVAL = SvPVbyte_nomg(sv, PL_na);
+OUTPUT:
+    RETVAL
+
+char *
 SvPVutf8(SV *sv)
 CODE:
     RETVAL = SvPVutf8_nolen(sv);
 OUTPUT:
     RETVAL
 
+char *
+SvPVutf8_nomg(SV *sv)
+CODE:
+    RETVAL = SvPVutf8_nomg(sv, PL_na);
+OUTPUT:
+    RETVAL
+
 void
 setup_addissub()
 CODE:
@@ -4095,7 +4276,6 @@ CODE:
        /* The slab allocator does not like CvROOT being set. */
        CvROOT(PL_compcv) = (OP *)1;
        o = newFOROP(0, 0, newOP(OP_PUSHMARK, 0), 0, 0);
-#ifdef PERL_OP_PARENT
        if (cLOOPx(cUNOPo->op_first)->op_last->op_sibparent
                != cUNOPo->op_first)
        {
@@ -4103,7 +4283,6 @@ CODE:
            RETVAL = FALSE;
        }
        else
-#endif
            /* If we do not crash before returning, the test passes. */
            RETVAL = TRUE;
        op_free(o);
@@ -4296,6 +4475,28 @@ get_cv_flags(SV *sv, UV flags)
     OUTPUT:
         RETVAL
 
+void
+unshift_and_set_defav(SV *sv,...)
+    CODE:
+       av_unshift(GvAVn(PL_defgv), 1);
+       av_store(GvAV(PL_defgv), 0, newSVuv(42));
+       sv_setuv(sv, 43);
+
+PerlIO *
+PerlIO_stderr()
+
+OutputStream
+PerlIO_stdout()
+
+InputStream
+PerlIO_stdin()
+
+#undef FILE
+#define FILE NativeFile
+
+FILE *
+PerlIO_exportFILE(PerlIO *f, const char *mode)
+
 MODULE = XS::APItest PACKAGE = XS::APItest::AUTOLOADtest
 
 int
@@ -4460,9 +4661,9 @@ test_isBLANK_LC(UV ord)
         RETVAL
 
 bool
-test_isBLANK_utf8(unsigned char * p, int type)
+test_isBLANK_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
 
         /* In this function and those that follow, the boolean 'type'
@@ -4473,22 +4674,22 @@ test_isBLANK_utf8(unsigned char * p, int type)
             RETVAL = isBLANK_utf8_safe(p, e);
         }
         else {
-            RETVAL = isBLANK_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isBLANK_LC_utf8(unsigned char * p, int type)
+test_isBLANK_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isBLANK_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isBLANK_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4508,16 +4709,16 @@ test_isVERTWS_uvchr(UV ord)
         RETVAL
 
 bool
-test_isVERTWS_utf8(unsigned char * p, int type)
+test_isVERTWS_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isVERTWS_utf8_safe(p, e);
         }
         else {
-            RETVAL = isVERTWS_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4572,31 +4773,31 @@ test_isUPPER_LC(UV ord)
         RETVAL
 
 bool
-test_isUPPER_utf8(unsigned char * p, int type)
+test_isUPPER_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isUPPER_utf8_safe(p, e);
         }
         else {
-            RETVAL = isUPPER_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isUPPER_LC_utf8(unsigned char * p, int type)
+test_isUPPER_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isUPPER_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isUPPER_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4651,31 +4852,31 @@ test_isLOWER_LC(UV ord)
         RETVAL
 
 bool
-test_isLOWER_utf8(unsigned char * p, int type)
+test_isLOWER_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isLOWER_utf8_safe(p, e);
         }
         else {
-            RETVAL = isLOWER_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isLOWER_LC_utf8(unsigned char * p, int type)
+test_isLOWER_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isLOWER_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isLOWER_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4730,31 +4931,31 @@ test_isALPHA_LC(UV ord)
         RETVAL
 
 bool
-test_isALPHA_utf8(unsigned char * p, int type)
+test_isALPHA_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isALPHA_utf8_safe(p, e);
         }
         else {
-            RETVAL = isALPHA_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isALPHA_LC_utf8(unsigned char * p, int type)
+test_isALPHA_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isALPHA_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isALPHA_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4809,31 +5010,31 @@ test_isWORDCHAR_LC(UV ord)
         RETVAL
 
 bool
-test_isWORDCHAR_utf8(unsigned char * p, int type)
+test_isWORDCHAR_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isWORDCHAR_utf8_safe(p, e);
         }
         else {
-            RETVAL = isWORDCHAR_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isWORDCHAR_LC_utf8(unsigned char * p, int type)
+test_isWORDCHAR_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isWORDCHAR_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isWORDCHAR_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4888,31 +5089,31 @@ test_isALPHANUMERIC_LC(UV ord)
         RETVAL
 
 bool
-test_isALPHANUMERIC_utf8(unsigned char * p, int type)
+test_isALPHANUMERIC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isALPHANUMERIC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isALPHANUMERIC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isALPHANUMERIC_LC_utf8(unsigned char * p, int type)
+test_isALPHANUMERIC_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isALPHANUMERIC_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isALPHANUMERIC_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4946,31 +5147,31 @@ test_isALNUM_LC(UV ord)
         RETVAL
 
 bool
-test_isALNUM_utf8(unsigned char * p, int type)
+test_isALNUM_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isWORDCHAR_utf8_safe(p, e);
         }
         else {
-            RETVAL = isWORDCHAR_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isALNUM_LC_utf8(unsigned char * p, int type)
+test_isALNUM_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isWORDCHAR_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isWORDCHAR_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -4997,31 +5198,31 @@ test_isDIGIT_LC_uvchr(UV ord)
         RETVAL
 
 bool
-test_isDIGIT_utf8(unsigned char * p, int type)
+test_isDIGIT_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isDIGIT_utf8_safe(p, e);
         }
         else {
-            RETVAL = isDIGIT_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isDIGIT_LC_utf8(unsigned char * p, int type)
+test_isDIGIT_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isDIGIT_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isDIGIT_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5125,31 +5326,31 @@ test_isIDFIRST_LC(UV ord)
         RETVAL
 
 bool
-test_isIDFIRST_utf8(unsigned char * p, int type)
+test_isIDFIRST_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isIDFIRST_utf8_safe(p, e);
         }
         else {
-            RETVAL = isIDFIRST_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isIDFIRST_LC_utf8(unsigned char * p, int type)
+test_isIDFIRST_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isIDFIRST_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isIDFIRST_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5204,31 +5405,31 @@ test_isIDCONT_LC(UV ord)
         RETVAL
 
 bool
-test_isIDCONT_utf8(unsigned char * p, int type)
+test_isIDCONT_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isIDCONT_utf8_safe(p, e);
         }
         else {
-            RETVAL = isIDCONT_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isIDCONT_LC_utf8(unsigned char * p, int type)
+test_isIDCONT_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isIDCONT_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isIDCONT_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5283,31 +5484,31 @@ test_isSPACE_LC(UV ord)
         RETVAL
 
 bool
-test_isSPACE_utf8(unsigned char * p, int type)
+test_isSPACE_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isSPACE_utf8_safe(p, e);
         }
         else {
-            RETVAL = isSPACE_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isSPACE_LC_utf8(unsigned char * p, int type)
+test_isSPACE_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isSPACE_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isSPACE_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5362,9 +5563,9 @@ test_isASCII_LC(UV ord)
         RETVAL
 
 bool
-test_isASCII_utf8(unsigned char * p, int type)
+test_isASCII_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
 #ifndef DEBUGGING
         PERL_UNUSED_VAR(e);
@@ -5374,15 +5575,15 @@ test_isASCII_utf8(unsigned char * p, int type)
             RETVAL = isASCII_utf8_safe(p, e);
         }
         else {
-            RETVAL = isASCII_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isASCII_LC_utf8(unsigned char * p, int type)
+test_isASCII_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
 #ifndef DEBUGGING
         PERL_UNUSED_VAR(e);
@@ -5392,7 +5593,7 @@ test_isASCII_LC_utf8(unsigned char * p, int type)
             RETVAL = isASCII_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isASCII_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5447,31 +5648,31 @@ test_isCNTRL_LC(UV ord)
         RETVAL
 
 bool
-test_isCNTRL_utf8(unsigned char * p, int type)
+test_isCNTRL_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isCNTRL_utf8_safe(p, e);
         }
         else {
-            RETVAL = isCNTRL_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isCNTRL_LC_utf8(unsigned char * p, int type)
+test_isCNTRL_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isCNTRL_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isCNTRL_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5526,31 +5727,31 @@ test_isPRINT_LC(UV ord)
         RETVAL
 
 bool
-test_isPRINT_utf8(unsigned char * p, int type)
+test_isPRINT_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isPRINT_utf8_safe(p, e);
         }
         else {
-            RETVAL = isPRINT_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isPRINT_LC_utf8(unsigned char * p, int type)
+test_isPRINT_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isPRINT_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isPRINT_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5605,31 +5806,31 @@ test_isGRAPH_LC(UV ord)
         RETVAL
 
 bool
-test_isGRAPH_utf8(unsigned char * p, int type)
+test_isGRAPH_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isGRAPH_utf8_safe(p, e);
         }
         else {
-            RETVAL = isGRAPH_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isGRAPH_LC_utf8(unsigned char * p, int type)
+test_isGRAPH_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isGRAPH_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isGRAPH_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5684,31 +5885,31 @@ test_isPUNCT_LC(UV ord)
         RETVAL
 
 bool
-test_isPUNCT_utf8(unsigned char * p, int type)
+test_isPUNCT_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isPUNCT_utf8_safe(p, e);
         }
         else {
-            RETVAL = isPUNCT_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isPUNCT_LC_utf8(unsigned char * p, int type)
+test_isPUNCT_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isPUNCT_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isPUNCT_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5763,31 +5964,31 @@ test_isXDIGIT_LC(UV ord)
         RETVAL
 
 bool
-test_isXDIGIT_utf8(unsigned char * p, int type)
+test_isXDIGIT_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isXDIGIT_utf8_safe(p, e);
         }
         else {
-            RETVAL = isXDIGIT_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isXDIGIT_LC_utf8(unsigned char * p, int type)
+test_isXDIGIT_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isXDIGIT_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isXDIGIT_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -5842,31 +6043,31 @@ test_isPSXSPC_LC(UV ord)
         RETVAL
 
 bool
-test_isPSXSPC_utf8(unsigned char * p, int type)
+test_isPSXSPC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isPSXSPC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isPSXSPC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
 
 bool
-test_isPSXSPC_LC_utf8(unsigned char * p, int type)
+test_isPSXSPC_LC_utf8(U8 * p, int type)
     PREINIT:
-       const unsigned char * e;
+       const U8 * e;
     CODE:
         if (type >= 0) {
             e = p + UTF8SKIP(p) - type;
             RETVAL = isPSXSPC_LC_utf8_safe(p, e);
         }
         else {
-            RETVAL = isPSXSPC_LC_utf8(p);
+            RETVAL = 0;
         }
     OUTPUT:
         RETVAL
@@ -6006,6 +6207,48 @@ test_is_utf8_string(char *s, STRLEN len)
     OUTPUT:
         RETVAL
 
+#define WORDSIZE            sizeof(PERL_UINTMAX_T)
+
+AV *
+test_is_utf8_invariant_string_loc(U8 *s, STRLEN offset, STRLEN len)
+    PREINIT:
+        AV *av;
+        const U8 * ep = NULL;
+        PERL_UINTMAX_T* copy;
+    CODE:
+        /* 'offset' is number of bytes past a word boundary the testing of 's'
+         * is to start at.  Allocate space that does start at the word
+         * boundary, and copy 's' to the correct offset past it.  Then call the
+         * tested function with that position */
+        Newx(copy, 1 + ((len + WORDSIZE - 1) / WORDSIZE), PERL_UINTMAX_T);
+        Copy(s, (U8 *) copy + offset, len, U8);
+        av = newAV();
+        av_push(av, newSViv(is_utf8_invariant_string_loc((U8 *) copy + offset, len, &ep)));
+        av_push(av, newSViv(ep - ((U8 *) copy + offset)));
+        RETVAL = av;
+        Safefree(copy);
+    OUTPUT:
+        RETVAL
+
+STRLEN
+test_variant_under_utf8_count(U8 *s, STRLEN offset, STRLEN len)
+    PREINIT:
+        PERL_UINTMAX_T * copy;
+    CODE:
+        Newx(copy, 1 + ((len + WORDSIZE - 1) / WORDSIZE), PERL_UINTMAX_T);
+        Copy(s, (U8 *) copy + offset, len, U8);
+        RETVAL = variant_under_utf8_count((U8 *) copy + offset, (U8 *) copy + offset + len);
+        Safefree(copy);
+    OUTPUT:
+        RETVAL
+
+STRLEN
+test_utf8_length(U8 *s, STRLEN offset, STRLEN len)
+CODE:
+    RETVAL = utf8_length(s + offset, s + len);
+OUTPUT:
+    RETVAL
+
 AV *
 test_is_utf8_string_loc(char *s, STRLEN len)
     PREINIT:
@@ -6256,7 +6499,7 @@ test_toLOWER_utf8(SV * p, int type)
         STRLEN len;
         AV *av;
         SV *utf8;
-       const unsigned char * e;
+       const U8 * e;
         UV resultant_cp = UV_MAX;   /* Initialized because of dumb compilers */
     CODE:
         input = (U8 *) SvPV(p, len);
@@ -6264,23 +6507,18 @@ test_toLOWER_utf8(SV * p, int type)
         if (type >= 0) {
             e = input + UTF8SKIP(input) - type;
             resultant_cp = toLOWER_utf8_safe(input, e, s, &len);
+            av_push(av, newSVuv(resultant_cp));
+
+            utf8 = newSVpvn((char *) s, len);
+            SvUTF8_on(utf8);
+            av_push(av, utf8);
+
+            av_push(av, newSVuv(len));
+            RETVAL = av;
         }
-        else if (type == -1) {
-            resultant_cp = toLOWER_utf8(input, s, &len);
-        }
-#ifndef NO_MATHOMS
         else {
-            resultant_cp = Perl_to_utf8_lower(aTHX_ input, s, &len);
+            RETVAL = 0;
         }
-#endif
-        av_push(av, newSVuv(resultant_cp));
-
-        utf8 = newSVpvn((char *) s, len);
-        SvUTF8_on(utf8);
-        av_push(av, utf8);
-
-        av_push(av, newSVuv(len));
-        RETVAL = av;
     OUTPUT:
         RETVAL
 
@@ -6346,7 +6584,7 @@ test_toFOLD_utf8(SV * p, int type)
         STRLEN len;
         AV *av;
         SV *utf8;
-       const unsigned char * e;
+       const U8 * e;
         UV resultant_cp = UV_MAX;
     CODE:
         input = (U8 *) SvPV(p, len);
@@ -6354,23 +6592,18 @@ test_toFOLD_utf8(SV * p, int type)
         if (type >= 0) {
             e = input + UTF8SKIP(input) - type;
             resultant_cp = toFOLD_utf8_safe(input, e, s, &len);
+            av_push(av, newSVuv(resultant_cp));
+
+            utf8 = newSVpvn((char *) s, len);
+            SvUTF8_on(utf8);
+            av_push(av, utf8);
+
+            av_push(av, newSVuv(len));
+            RETVAL = av;
         }
-        else if (type == -1) {
-            resultant_cp = toFOLD_utf8(input, s, &len);
-        }
-#ifndef NO_MATHOMS
         else {
-            resultant_cp = Perl_to_utf8_fold(aTHX_ input, s, &len);
+            RETVAL = 0;
         }
-#endif
-        av_push(av, newSVuv(resultant_cp));
-
-        utf8 = newSVpvn((char *) s, len);
-        SvUTF8_on(utf8);
-        av_push(av, utf8);
-
-        av_push(av, newSVuv(len));
-        RETVAL = av;
     OUTPUT:
         RETVAL
 
@@ -6436,7 +6669,7 @@ test_toUPPER_utf8(SV * p, int type)
         STRLEN len;
         AV *av;
         SV *utf8;
-       const unsigned char * e;
+       const U8 * e;
         UV resultant_cp = UV_MAX;
     CODE:
         input = (U8 *) SvPV(p, len);
@@ -6444,23 +6677,18 @@ test_toUPPER_utf8(SV * p, int type)
         if (type >= 0) {
             e = input + UTF8SKIP(input) - type;
             resultant_cp = toUPPER_utf8_safe(input, e, s, &len);
+            av_push(av, newSVuv(resultant_cp));
+
+            utf8 = newSVpvn((char *) s, len);
+            SvUTF8_on(utf8);
+            av_push(av, utf8);
+
+            av_push(av, newSVuv(len));
+            RETVAL = av;
         }
-        else if (type == -1) {
-            resultant_cp = toUPPER_utf8(input, s, &len);
-        }
-#ifndef NO_MATHOMS
         else {
-            resultant_cp = Perl_to_utf8_upper(aTHX_ input, s, &len);
+            RETVAL = 0;
         }
-#endif
-        av_push(av, newSVuv(resultant_cp));
-
-        utf8 = newSVpvn((char *) s, len);
-        SvUTF8_on(utf8);
-        av_push(av, utf8);
-
-        av_push(av, newSVuv(len));
-        RETVAL = av;
     OUTPUT:
         RETVAL
 
@@ -6519,7 +6747,7 @@ test_toTITLE_utf8(SV * p, int type)
         STRLEN len;
         AV *av;
         SV *utf8;
-       const unsigned char * e;
+       const U8 * e;
         UV resultant_cp = UV_MAX;
     CODE:
         input = (U8 *) SvPV(p, len);
@@ -6527,23 +6755,18 @@ test_toTITLE_utf8(SV * p, int type)
         if (type >= 0) {
             e = input + UTF8SKIP(input) - type;
             resultant_cp = toTITLE_utf8_safe(input, e, s, &len);
+            av_push(av, newSVuv(resultant_cp));
+
+            utf8 = newSVpvn((char *) s, len);
+            SvUTF8_on(utf8);
+            av_push(av, utf8);
+
+            av_push(av, newSVuv(len));
+            RETVAL = av;
         }
-        else if (type == -1) {
-            resultant_cp = toTITLE_utf8(input, s, &len);
-        }
-#ifndef NO_MATHOMS
         else {
-            resultant_cp = Perl_to_utf8_title(aTHX_ input, s, &len);
+            RETVAL = 0;
         }
-#endif
-        av_push(av, newSVuv(resultant_cp));
-
-        utf8 = newSVpvn((char *) s, len);
-        SvUTF8_on(utf8);
-        av_push(av, utf8);
-
-        av_push(av, newSVuv(len));
-        RETVAL = av;
     OUTPUT:
         RETVAL
 
@@ -6613,12 +6836,12 @@ Comctl32Version()
         if(!dll)
             croak("Comctl32Version: comctl32.dll not in process???");
         hrsc = FindResource(dll,    MAKEINTRESOURCE(VS_VERSION_INFO),
-                                    MAKEINTRESOURCE(VS_FILE_INFO));
+                                    MAKEINTRESOURCE((Size_t)VS_FILE_INFO));
         if(!hrsc)
             croak("Comctl32Version: comctl32.dll no version???");
         ver = LoadResource(dll, hrsc);
         len = SizeofResource(dll, hrsc);
-        vercopy = _alloca(len);
+        vercopy = (void *)sv_grow(sv_newmortal(),len);
         memcpy(vercopy, ver, len);
         if (VerQueryValue(vercopy, "\\", (void**)&info, &len)) {
             int dwValueMS1 = (info->dwFileVersionMS>>16);