This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
isPSXSPC() is a synonym for isSPACE
[perl5.git] / dist / Devel-PPPort / parts / inc / misc
index 949c481..e53e0aa 100644 (file)
 =provides
 
 __UNDEFINED__
-PERL_UNUSED_DECL
+END_EXTERN_C
+EXTERN_C
+INT2PTR
+MUTABLE_PTR
+NVTYPE
+PERLIO_FUNCS_CAST
+PERLIO_FUNCS_DECL
 PERL_UNUSED_ARG
-PERL_UNUSED_VAR
 PERL_UNUSED_CONTEXT
+PERL_UNUSED_DECL
 PERL_UNUSED_RESULT
-PERL_GCC_BRACE_GROUPS_FORBIDDEN
+PERL_UNUSED_VAR
 PERL_USE_GCC_BRACE_GROUPS
-PERLIO_FUNCS_DECL
-PERLIO_FUNCS_CAST
-NVTYPE
-INT2PTR
-PTRV
-NUM2PTR
-PERL_HASH
-PTR2IV
-PTR2UV
-PTR2NV
 PTR2ul
+PTRV
 START_EXTERN_C
-END_EXTERN_C
-EXTERN_C
-STMT_START
 STMT_END
-UTF8_MAXBYTES
+STMT_START
+SvRX
 WIDEST_UTYPE
 XSRETURN
-HeUTF8
-C_ARRAY_LENGTH
-C_ARRAY_END
-SvRX
-SvRXOK
-cBOOL
-OpHAS_SIBLING
-OpSIBLING
-OpMORESIB_set
-OpLASTSIB_set
-OpMAYBESIB_set
 
 =implementation
 
@@ -58,27 +42,43 @@ __UNDEFINED__ OpSIBLING(o)          (0 + (o)->op_sibling)
 __UNDEFINED__ OpMORESIB_set(o, sib) ((o)->op_sibling = (sib))
 __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL)
 __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib))
+__UNDEFINED__ HEf_SVKEY   -2
 
-#ifndef SvRX
-#if { NEED SvRX }
-
-void *
-SvRX(pTHX_ SV *rv)
-{
-       if (SvROK(rv)) {
-               SV *sv = SvRV(rv);
-               if (SvMAGICAL(sv)) {
-                       MAGIC *mg = mg_find(sv, PERL_MAGIC_qr);
-                       if (mg && mg->mg_obj) {
-                               return mg->mg_obj;
-                       }
-               }
-       }
-       return 0;
-}
-#endif
+#if defined(DEBUGGING) && !defined(__COVERITY__)
+__UNDEFINED__ __ASSERT_(statement)  assert(statement),
+#else
+__UNDEFINED__ __ASSERT_(statement)
 #endif
 
+/* These could become provided when they become part of the public API */
+__UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n)                                 \
+   (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
+__UNDEF_NOT_PROVIDED__ inRANGE(c, l, u)                                    \
+   (  (sizeof(c) == sizeof(U8))  ? withinCOUNT(((U8)  (c)), (l), ((u) - (l)))  \
+    : (sizeof(c) == sizeof(U16)) ? withinCOUNT(((U16) (c)), (l), ((u) - (l)))  \
+    : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l)))  \
+    : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l)))))
+
+/* Create the macro for "is'macro'_utf8_safe(s, e)".  For code points below
+ * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
+ * point.  That is so that it can automatically get the bug fixes done in this
+ * file. */
+#define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro)                             \
+   (((e) - (s)) <= 0                                                        \
+     ? 0                                                                    \
+     : UTF8_IS_INVARIANT((s)[0])                                            \
+       ? is ## macro ## _L1((s)[0])                                         \
+       : (((e) - (s)) < UTF8SKIP(s))                                        \
+          ? 0                                                               \
+          : UTF8_IS_DOWNGRADEABLE_START((s)[0])                             \
+              /* The cast in the line below is only to silence warnings */  \
+            ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE(           \
+                                  UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
+                                                     & UTF_START_MASK(2),   \
+                                                  (s)[1])))                 \
+            : is ## macro ## _utf8(s))
+
+__UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL)
 __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv))
 
 #ifndef PERL_UNUSED_DECL
@@ -175,9 +175,9 @@ __UNDEFINED__  PTR2NV(p)       NUM2PTR(NV,p)
 #  define EXTERN_C extern
 #endif
 
-#if defined(PERL_GCC_PEDANTIC)
+#if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC)
 #  ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN
-#    define PERL_GCC_BRACE_GROUPS_FORBIDDEN
+__UNDEF_NOT_PROVIDED__  PERL_GCC_BRACE_GROUPS_FORBIDDEN
 #  endif
 #endif
 
@@ -212,6 +212,9 @@ __UNDEFINED__  DEFSV_set(sv) (DEFSV = (sv))
 /* Older perls (<=5.003) lack AvFILLp */
 __UNDEFINED__  AvFILLp      AvFILL
 
+__UNDEFINED__  av_tindex    AvFILL
+__UNDEFINED__  av_top_index AvFILL
+
 __UNDEFINED__  ERRSV        get_sv("@",FALSE)
 
 /* Hint: gv_stashpvn
@@ -262,8 +265,6 @@ __UNDEFINED__  dVAR            dNOOP
 
 __UNDEFINED__  SVf             "_"
 
-__UNDEFINED__  UTF8_MAXBYTES   UTF8_MAXLEN
-
 __UNDEFINED__  CPERLscope(x)   x
 
 __UNDEFINED__  PERL_HASH(hash,str,len) \
@@ -299,26 +300,6 @@ typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*);
 
 #endif
 
-__UNDEFINED__ isPSXSPC(c)       (isSPACE(c) || (c) == '\v')
-__UNDEFINED__ isBLANK(c)        ((c) == ' ' || (c) == '\t')
-#ifdef EBCDIC
-__UNDEFINED__ isALNUMC(c)       isalnum(c)
-__UNDEFINED__ isASCII(c)        isascii(c)
-__UNDEFINED__ isCNTRL(c)        iscntrl(c)
-__UNDEFINED__ isGRAPH(c)        isgraph(c)
-__UNDEFINED__ isPRINT(c)        isprint(c)
-__UNDEFINED__ isPUNCT(c)        ispunct(c)
-__UNDEFINED__ isXDIGIT(c)       isxdigit(c)
-#else
-# if { VERSION < 5.10.0 }
-/* Hint: isPRINT
- * The implementation in older perl versions includes all of the
- * isSPACE() characters, which is wrong. The version provided by
- * Devel::PPPort always overrides a present buggy version.
- */
-#  undef isPRINT
-# endif
-
 #ifndef WIDEST_UTYPE
 # ifdef QUADKIND
 #  ifdef U64TYPE
@@ -331,15 +312,445 @@ __UNDEFINED__ isXDIGIT(c)       isxdigit(c)
 # endif
 #endif
 
-__UNDEFINED__ isALNUMC(c)       (isALPHA(c) || isDIGIT(c))
+/* On versions without NATIVE_TO_ASCII, only ASCII is supported */
+#if defined(EBCDIC) && defined(NATIVE_TO_ASCI)
+__UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c)
+__UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c)
+__UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c))
+__UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c))
+#else
+__UNDEFINED__ NATIVE_TO_LATIN1(c) (c)
+__UNDEFINED__ LATIN1_TO_NATIVE(c) (c)
+__UNDEFINED__ NATIVE_TO_UNI(c) (c)
+__UNDEFINED__ UNI_TO_NATIVE(c) (c)
+#endif
+
+/* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE
+   EBCDIC is not supported on versions earlier than 5.7.1
+ */
+
+/* The meaning of this changed; use the modern version */
+#undef isPSXSPC
+#undef isPSXSPC_A
+#undef isPSXSPC_L1
+
+/* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe
+    This is equivalent to the corresponding isSPACE-type macro.  On perls
+    before 5.18, this matched a vertical tab and SPACE didn't.  But the
+    ppport.h SPACE version does match VT in all perl releases.  Since VT's are
+    extremely rarely found in real-life files, this difference effectively
+    doesn't matter */
+
+/* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe
+    Until Perl 5.18, this did not match the vertical tab (VT).  The ppport.h
+    version does match it in all perl releases. Since VT's are extremely rarely
+    found in real-life files, this difference effectively doesn't matter */
+
+#ifdef EBCDIC
+
+/* This is the first version where these macros are fully correct on EBCDIC
+ * platforms.  Relying on * the C library functions, as earlier releases did,
+ * causes problems with * locales */
+# if { VERSION < 5.22.0 }
+#  undef isALNUM
+#  undef isALNUM_A
+#  undef isALNUM_L1
+#  undef isALNUMC
+#  undef isALNUMC_A
+#  undef isALNUMC_L1
+#  undef isALPHA
+#  undef isALPHA_A
+#  undef isALPHA_L1
+#  undef isALPHANUMERIC
+#  undef isALPHANUMERIC_A
+#  undef isALPHANUMERIC_L1
+#  undef isASCII
+#  undef isASCII_A
+#  undef isASCII_L1
+#  undef isBLANK
+#  undef isBLANK_A
+#  undef isBLANK_L1
+#  undef isCNTRL
+#  undef isCNTRL_A
+#  undef isCNTRL_L1
+#  undef isDIGIT
+#  undef isDIGIT_A
+#  undef isDIGIT_L1
+#  undef isGRAPH
+#  undef isGRAPH_A
+#  undef isGRAPH_L1
+#  undef isIDCONT
+#  undef isIDCONT_A
+#  undef isIDCONT_L1
+#  undef isIDFIRST
+#  undef isIDFIRST_A
+#  undef isIDFIRST_L1
+#  undef isLOWER
+#  undef isLOWER_A
+#  undef isLOWER_L1
+#  undef isOCTAL
+#  undef isOCTAL_A
+#  undef isOCTAL_L1
+#  undef isPRINT
+#  undef isPRINT_A
+#  undef isPRINT_L1
+#  undef isPUNCT
+#  undef isPUNCT_A
+#  undef isPUNCT_L1
+#  undef isSPACE
+#  undef isSPACE_A
+#  undef isSPACE_L1
+#  undef isUPPER
+#  undef isUPPER_A
+#  undef isUPPER_L1
+#  undef isWORDCHAR
+#  undef isWORDCHAR_A
+#  undef isWORDCHAR_L1
+#  undef isXDIGIT
+#  undef isXDIGIT_A
+#  undef isXDIGIT_L1
+# endif
+
+__UNDEFINED__ isASCII(c)    (isCNTRL(c) || isPRINT(c))
+
+        /* The below is accurate for all EBCDIC code pages supported by
+         * all the versions of Perl overridden by this */
+__UNDEFINED__ isCNTRL(c)    (    (c) == '\0' || (c) == '\a' || (c) == '\b'      \
+                             ||  (c) == '\f' || (c) == '\n' || (c) == '\r'      \
+                             ||  (c) == '\t' || (c) == '\v'                     \
+                             || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */      \
+                             ||  (c) == 7    /* U+7F DEL */                     \
+                             || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */       \
+                                                      /* DLE, DC[1-3] */        \
+                             ||  (c) == 0x18 /* U+18 CAN */                     \
+                             ||  (c) == 0x19 /* U+19 EOM */                     \
+                             || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */      \
+                             ||  (c) == 0x26 /* U+17 ETB */                     \
+                             ||  (c) == 0x27 /* U+1B ESC */                     \
+                             ||  (c) == 0x2D /* U+05 ENQ */                     \
+                             ||  (c) == 0x2E /* U+06 ACK */                     \
+                             ||  (c) == 0x32 /* U+16 SYN */                     \
+                             ||  (c) == 0x37 /* U+04 EOT */                     \
+                             ||  (c) == 0x3C /* U+14 DC4 */                     \
+                             ||  (c) == 0x3D /* U+15 NAK */                     \
+                             ||  (c) == 0x3F /* U+1A SUB */                     \
+                            )
+
+#if '^' == 106    /* EBCDIC POSIX-BC */
+#  define D_PPP_OUTLIER_CONTROL 0x5F
+#else   /* EBCDIC 1047 037 */
+#  define D_PPP_OUTLIER_CONTROL 0xFF
+#endif
+
+/* The controls are everything below blank, plus one outlier */
+__UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' '                           \
+                          || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL)
+/* The ordering of the tests in this and isUPPER are to exclude most characters
+ * early */
+__UNDEFINED__ isLOWER(c)    (        (c) >= 'a' && (c) <= 'z'                   \
+                             &&  (   (c) <= 'i'                                 \
+                                 || ((c) >= 'j' && (c) <= 'r')                  \
+                                 ||  (c) >= 's'))
+__UNDEFINED__ isUPPER(c)    (        (c) >= 'A' && (c) <= 'Z'                   \
+                             && (    (c) <= 'I'                                 \
+                                 || ((c) >= 'J' && (c) <= 'R')                  \
+                                 ||  (c) >= 'S'))
+
+#else   /* Above is EBCDIC; below is ASCII */
+
+# if { VERSION < 5.4.0 }
+/* The implementation of these in older perl versions can give wrong results if
+ * the C program locale is set to other than the C locale */
+#  undef isALNUM
+#  undef isALNUM_A
+#  undef isALPHA
+#  undef isALPHA_A
+#  undef isDIGIT
+#  undef isDIGIT_A
+#  undef isIDFIRST
+#  undef isIDFIRST_A
+#  undef isLOWER
+#  undef isLOWER_A
+#  undef isUPPER
+#  undef isUPPER_A
+# endif
+
+# if { VERSION < 5.8.0 } /* earlier perls omitted DEL */
+#  undef isCNTRL
+# endif
+
+# if { VERSION < 5.10.0 }
+/* earlier perls included all of the isSPACE() characters, which is wrong. The
+ * version provided by Devel::PPPort always overrides an existing buggy
+ * version. */
+#  undef isPRINT
+#  undef isPRINT_A
+# endif
+
+# if { VERSION < 5.14.0 }
+/* earlier perls always returned true if the parameter was a signed char */
+#  undef isASCII
+#  undef isASCII_A
+# endif
+
+# if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */
+#  undef isPUNCT_L1
+# endif
+
+# if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */
+#  undef isALNUMC_L1
+#endif
+
+# if { VERSION < 5.20.0 } /* earlier perls didn't include \v */
+#  undef isSPACE
+#  undef isSPACE_A
+#  undef isSPACE_L1
+
+# endif
+
 __UNDEFINED__ isASCII(c)        ((WIDEST_UTYPE) (c) <= 127)
 __UNDEFINED__ isCNTRL(c)        ((WIDEST_UTYPE) (c) < ' ' || (c) == 127)
-__UNDEFINED__ isGRAPH(c)        (isALNUM(c) || isPUNCT(c))
-__UNDEFINED__ isPRINT(c)        (((c) >= 32 && (c) < 127))
-__UNDEFINED__ isPUNCT(c)        (((c) >= 33 && (c) <= 47) || ((c) >= 58 && (c) <= 64)  || ((c) >= 91 && (c) <= 96) || ((c) >= 123 && (c) <= 126))
-__UNDEFINED__ isXDIGIT(c)       (isDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
+__UNDEFINED__ isCNTRL_L1(c)     (isCNTRL(c) || (   (WIDEST_UTYPE) (c) <= 0x9F  \
+                                                && (WIDEST_UTYPE) (c) >= 0x80))
+__UNDEFINED__ isLOWER(c)        ((c) >= 'a' && (c) <= 'z')
+__UNDEFINED__ isUPPER(c)        ((c) <= 'Z' && (c) >= 'A')
+
+#endif /* Below are definitions common to EBCDIC and ASCII */
+
+__UNDEFINED__ isASCII_L1(c)     isASCII(c)
+__UNDEFINED__ isALNUM(c)        isWORDCHAR(c)
+__UNDEFINED__ isALNUMC(c)       isALPHANUMERIC(c)
+__UNDEFINED__ isALNUMC_L1(c)    isALPHANUMERIC_L1(c)
+__UNDEFINED__ isALPHA(c)        (isUPPER(c) || isLOWER(c))
+__UNDEFINED__ isALPHA_L1(c)     (isUPPER_L1(c) || isLOWER_L1(c))
+__UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c))
+__UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c))
+__UNDEFINED__ isBLANK(c)        ((c) == ' ' || (c) == '\t')
+__UNDEFINED__ isBLANK_L1(c) (    isBLANK(c)                                    \
+                             || (   (WIDEST_UTYPE) (c) < 256                   \
+                                 && NATIVE_TO_LATIN1((U8) c) == 0xA0))
+__UNDEFINED__ isDIGIT(c)        ((c) <= '9' && (c) >= '0')
+__UNDEFINED__ isDIGIT_L1(c)     isDIGIT(c)
+__UNDEFINED__ isGRAPH(c)        (isWORDCHAR(c) || isPUNCT(c))
+__UNDEFINED__ isGRAPH_L1(c)     (   isPRINT_L1(c)                              \
+                                 && (c) != ' '                                 \
+                                 && NATIVE_TO_LATIN1((U8) c) != 0xA0)
+__UNDEFINED__ isIDCONT(c)       isWORDCHAR(c)
+__UNDEFINED__ isIDCONT_L1(c)   isWORDCHAR_L1(c)
+__UNDEFINED__ isIDFIRST(c)      (isALPHA(c) || (c) == '_')
+__UNDEFINED__ isIDFIRST_L1(c)   (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_')
+__UNDEFINED__ isLOWER_L1(c) (    isLOWER(c)                                    \
+                             || (   (WIDEST_UTYPE) (c) < 256                   \
+                                 && (  (   NATIVE_TO_LATIN1((U8) c) >= 0xDF    \
+                                        && NATIVE_TO_LATIN1((U8) c) != 0xF7)   \
+                                     || NATIVE_TO_LATIN1((U8) c) == 0xAA       \
+                                     || NATIVE_TO_LATIN1((U8) c) == 0xBA       \
+                                     || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
+__UNDEFINED__ isOCTAL(c)        (((WIDEST_UTYPE)((c)) & ~7) == '0')
+__UNDEFINED__ isOCTAL_L1(c)     isOCTAL(c)
+__UNDEFINED__ isPRINT(c)        (isGRAPH(c) || (c) == ' ')
+__UNDEFINED__ isPRINT_L1(c)     ((WIDEST_UTYPE) (c) < 256 && ! isCNTRL_L1(c))
+__UNDEFINED__ isPSXSPC(c)       isSPACE(c)
+__UNDEFINED__ isPSXSPC_L1(c)    isSPACE_L1(c)
+__UNDEFINED__ isPUNCT(c)    (   (c) == '-' || (c) == '!' || (c) == '"'         \
+                             || (c) == '#' || (c) == '$' || (c) == '%'         \
+                             || (c) == '&' || (c) == '\'' || (c) == '('        \
+                             || (c) == ')' || (c) == '*' || (c) == '+'         \
+                             || (c) == ',' || (c) == '.' || (c) == '/'         \
+                             || (c) == ':' || (c) == ';' || (c) == '<'         \
+                             || (c) == '=' || (c) == '>' || (c) == '?'         \
+                             || (c) == '@' || (c) == '[' || (c) == '\\'        \
+                             || (c) == ']' || (c) == '^' || (c) == '_'         \
+                             || (c) == '`' || (c) == '{' || (c) == '|'         \
+                             || (c) == '}' || (c) == '~')
+__UNDEFINED__ isPUNCT_L1(c)  (    isPUNCT(c)                                   \
+                              || (   (WIDEST_UTYPE) (c) < 256                  \
+                                  && (   NATIVE_TO_LATIN1((U8) c) == 0xA1      \
+                                      || NATIVE_TO_LATIN1((U8) c) == 0xA7      \
+                                      || NATIVE_TO_LATIN1((U8) c) == 0xAB      \
+                                      || NATIVE_TO_LATIN1((U8) c) == 0xB6      \
+                                      || NATIVE_TO_LATIN1((U8) c) == 0xB7      \
+                                      || NATIVE_TO_LATIN1((U8) c) == 0xBB      \
+                                      || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
+__UNDEFINED__ isSPACE(c)        (   isBLANK(c) || (c) == '\n' || (c) == '\r'   \
+                                 || (c) == '\v' || (c) == '\f')
+__UNDEFINED__ isSPACE_L1(c) (    isSPACE(c)                                    \
+                             || (   (WIDEST_UTYPE) (c) < 256                   \
+                                 && (   NATIVE_TO_LATIN1((U8) c) == 0x85       \
+                                     || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
+__UNDEFINED__ isUPPER_L1(c) (   isUPPER(c)                                     \
+                             || (   (WIDEST_UTYPE) (c) < 256                   \
+                                 && (   NATIVE_TO_LATIN1((U8) c) >= 0xC0       \
+                                     && NATIVE_TO_LATIN1((U8) c) <= 0xDE       \
+                                     && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
+__UNDEFINED__ isWORDCHAR(c)     (isALPHANUMERIC(c) || (c) == '_')
+__UNDEFINED__ isWORDCHAR_L1(c)  (isIDFIRST_L1(c) || isDIGIT(c))
+__UNDEFINED__ isXDIGIT(c)       (   isDIGIT(c)                                 \
+                                 || ((c) >= 'a' && (c) <= 'f')                 \
+                                 || ((c) >= 'A' && (c) <= 'F'))
+__UNDEFINED__ isXDIGIT_L1(c)    isXDIGIT(c)
+
+__UNDEFINED__ isALNUM_A(c)         isALNUM(c)
+__UNDEFINED__ isALNUMC_A(c)        isALNUMC(c)
+__UNDEFINED__ isALPHA_A(c)         isALPHA(c)
+__UNDEFINED__ isALPHANUMERIC_A(c)  isALPHANUMERIC(c)
+__UNDEFINED__ isASCII_A(c)         isASCII(c)
+__UNDEFINED__ isBLANK_A(c)         isBLANK(c)
+__UNDEFINED__ isCNTRL_A(c)         isCNTRL(c)
+__UNDEFINED__ isDIGIT_A(c)         isDIGIT(c)
+__UNDEFINED__ isGRAPH_A(c)         isGRAPH(c)
+__UNDEFINED__ isIDCONT_A(c)        isIDCONT(c)
+__UNDEFINED__ isIDFIRST_A(c)       isIDFIRST(c)
+__UNDEFINED__ isLOWER_A(c)         isLOWER(c)
+__UNDEFINED__ isOCTAL_A(c)         isOCTAL(c)
+__UNDEFINED__ isPRINT_A(c)         isPRINT(c)
+__UNDEFINED__ isPSXSPC_A(c)        isPSXSPC(c)
+__UNDEFINED__ isPUNCT_A(c)         isPUNCT(c)
+__UNDEFINED__ isSPACE_A(c)         isSPACE(c)
+__UNDEFINED__ isUPPER_A(c)         isUPPER(c)
+__UNDEFINED__ isWORDCHAR_A(c)     isWORDCHAR(c)
+__UNDEFINED__ isXDIGIT_A(c)       isXDIGIT(c)
+
+__UNDEFINED__ isASCII_utf8_safe(s,e)  isASCII(*(s))
+
+#if { VERSION >= 5.006 }
+
+__UNDEFINED__ isALPHA_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA)
+#  ifdef isALPHANUMERIC_utf8
+__UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e)                                 \
+                                D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC)
+#  else
+__UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e)                                \
+                        (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e))
+#  endif
+
+/* This was broken before 5.18, and just use this instead of worrying about
+ * which releases the official works on */
+#  if 'A' == 65
+__UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
+( ( LIKELY((e) > (s)) ) ?   /* Machine generated */                         \
+    ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1        \
+    : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
+           ( ( 0xC2 == ((const U8*)s)[0] ) ?                               \
+               ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
+           : ( 0xE1 == ((const U8*)s)[0] ) ?                               \
+               ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+           : ( 0xE2 == ((const U8*)s)[0] ) ?                               \
+               ( ( 0x80 == ((const U8*)s)[1] ) ?                           \
+                   ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\
+               : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+           : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+       : 0 )                                                               \
+ : 0 )
+
+#  elif 'A' == 193  && '^' == 95 /* EBCDIC 1047 */
+
+__UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
+( ( LIKELY((e) > (s)) ) ?                                                   \
+    ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1        \
+    : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
+           ( ( 0x80 == ((const U8*)s)[0] ) ?                               \
+               ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
+           : ( 0xBC == ((const U8*)s)[0] ) ?                               \
+               ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+           : ( 0xCA == ((const U8*)s)[0] ) ?                               \
+               ( ( 0x41 == ((const U8*)s)[1] ) ?                           \
+                   ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
+               : ( 0x42 == ((const U8*)s)[1] ) ?                           \
+                   ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )               \
+               : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+           : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+       : 0 )                                                               \
+: 0 )
+
+#  elif 'A' == 193  && '^' == 176 /* EBCDIC 037 */
+
+__UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
+( ( LIKELY((e) > (s)) ) ?                                                   \
+    ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1        \
+    : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
+           ( ( 0x78 == ((const U8*)s)[0] ) ?                               \
+               ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
+           : ( 0xBD == ((const U8*)s)[0] ) ?                               \
+               ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+           : ( 0xCA == ((const U8*)s)[0] ) ?                               \
+               ( ( 0x41 == ((const U8*)s)[1] ) ?                           \
+                   ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
+               : ( 0x42 == ((const U8*)s)[1] ) ?                           \
+                   ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )               \
+               : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+           : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+       : 0 )                                                               \
+: 0 )
+
+#  else
+#    error Unknown character set
+#  endif
+
+__UNDEFINED__ isCNTRL_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL)
+__UNDEFINED__ isDIGIT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT)
+__UNDEFINED__ isGRAPH_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH)
+#  ifdef isIDCONT_utf8
+__UNDEFINED__ isIDCONT_utf8_safe(s,e)   D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT)
+#  else
+__UNDEFINED__ isIDCONT_utf8_safe(s,e)   isWORDCHAR_utf8_safe(s,e)
+#  endif
+
+__UNDEFINED__ isIDFIRST_utf8_safe(s,e)  D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST)
+__UNDEFINED__ isLOWER_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER)
+__UNDEFINED__ isPRINT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT)
+
+#  undef isPSXSPC_utf8_safe   /* Use the modern definition */
+__UNDEFINED__ isPSXSPC_utf8_safe(s,e)   isSPACE_utf8_safe(s,e)
+
+__UNDEFINED__ isPUNCT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT)
+__UNDEFINED__ isSPACE_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE)
+__UNDEFINED__ isUPPER_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER)
+
+#  ifdef isWORDCHAR_utf8
+__UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR)
+#  else
+__UNDEFINED__ isWORDCHAR_utf8_safe(s,e)                                               \
+                               (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_')
+#  endif
+
+/* This was broken before 5.12, and just use this instead of worrying about
+ * which releases the official works on */
+#  if 'A' == 65
+__UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
+( ( LIKELY((e) > (s)) ) ?                                                   \
+    ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\
+    : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\
+                   ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\
+               : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\
+: 0 )
+
+#  elif 'A' == 193  && '^' == 95 /* EBCDIC 1047 */
+
+__UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
+( ( LIKELY((e) > (s)) ) ?                                                   \
+    ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
+    : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\
+                       ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\
+                   : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
+: 0 )
+
+#  elif 'A' == 193  && '^' == 176 /* EBCDIC 037 */
+
+__UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
+( ( LIKELY((e) > (s)) ) ?                                                   \
+    ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
+    : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\
+                       ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\
+                   : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
+: 0 )
+
+#  else
+#    error Unknown character set
+#  endif
 #endif
 
+
 /* Until we figure out how to support this in older perls... */
 #if { VERSION >= 5.8.0 }
 
@@ -352,6 +763,19 @@ __UNDEFINED__ HeUTF8(he)        ((HeKLEN(he) == HEf_SVKEY) ?            \
 __UNDEFINED__ C_ARRAY_LENGTH(a)                (sizeof(a)/sizeof((a)[0]))
 __UNDEFINED__ C_ARRAY_END(a)           ((a) + C_ARRAY_LENGTH(a))
 
+__UNDEFINED__ LIKELY(x) (x)
+__UNDEFINED__ UNLIKELY(x) (x)
+
+#ifndef MUTABLE_PTR
+#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
+#  define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
+#else
+#  define MUTABLE_PTR(p) ((void *) (p))
+#endif
+#endif
+
+__UNDEFINED__ MUTABLE_SV(p)   ((SV *)MUTABLE_PTR(p))
+
 =xsmisc
 
 typedef XSPROTO(XSPROTO_test_t);
@@ -386,10 +810,6 @@ XS(XS_Devel__PPPort_dAXMARK)
   XSRETURN(1);
 }
 
-=xsinit
-
-#define NEED_SvRX
-
 =xsboot
 
 {
@@ -405,6 +825,7 @@ OpSIBLING_tests()
        PREINIT:
                OP *x;
                OP *kid;
+               OP *middlekid;
                OP *lastkid;
                int count = 0;
                int failures = 0;
@@ -428,6 +849,7 @@ OpSIBLING_tests()
                        kid = OpSIBLING(kid);
                        lastkid = kid;
                }
+                middlekid = OpSIBLING(x);
 
                /* Should now have a sibling */
                if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
@@ -471,6 +893,9 @@ OpSIBLING_tests()
                        failures++; warn("Op should have had a sib after maybesibset");
                }
 
+                op_free(lastkid);
+                op_free(middlekid);
+                op_free(x);
                RETVAL = failures;
        OUTPUT:
                RETVAL
@@ -590,7 +1015,7 @@ DEFSV_modify()
 int
 ERRSV()
         CODE:
-                RETVAL = SvTRUE(ERRSV);
+                RETVAL = SvTRUEx(ERRSV);
         OUTPUT:
                 RETVAL
 
@@ -676,7 +1101,653 @@ check_c_array()
                 mXPUSHi(C_ARRAY_LENGTH(x));  /* 4 */
                 mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */
 
-=tests plan => 48
+bool
+isBLANK(ord)
+    UV ord
+    CODE:
+        RETVAL = isBLANK(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isBLANK_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isBLANK_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isBLANK_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isBLANK_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isUPPER(ord)
+    UV ord
+    CODE:
+        RETVAL = isUPPER(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isUPPER_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isUPPER_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isUPPER_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isUPPER_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isLOWER(ord)
+    UV ord
+    CODE:
+        RETVAL = isLOWER(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isLOWER_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isLOWER_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isLOWER_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isLOWER_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALPHA(ord)
+    UV ord
+    CODE:
+        RETVAL = isALPHA(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALPHA_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isALPHA_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALPHA_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isALPHA_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isWORDCHAR(ord)
+    UV ord
+    CODE:
+        RETVAL = isWORDCHAR(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isWORDCHAR_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isWORDCHAR_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isWORDCHAR_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isWORDCHAR_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALPHANUMERIC(ord)
+    UV ord
+    CODE:
+        RETVAL = isALPHANUMERIC(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALPHANUMERIC_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isALPHANUMERIC_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALNUM(ord)
+    UV ord
+    CODE:
+        RETVAL = isALNUM(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALNUM_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isALNUM_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isDIGIT(ord)
+    UV ord
+    CODE:
+        RETVAL = isDIGIT(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isDIGIT_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isDIGIT_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isOCTAL(ord)
+    UV ord
+    CODE:
+        RETVAL = isOCTAL(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isOCTAL_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isOCTAL_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDFIRST(ord)
+    UV ord
+    CODE:
+        RETVAL = isIDFIRST(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDFIRST_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isIDFIRST_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDCONT(ord)
+    UV ord
+    CODE:
+        RETVAL = isIDCONT(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDCONT_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isIDCONT_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isSPACE(ord)
+    UV ord
+    CODE:
+        RETVAL = isSPACE(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isSPACE_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isSPACE_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isASCII(ord)
+    UV ord
+    CODE:
+        RETVAL = isASCII(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isASCII_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isASCII_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isCNTRL(ord)
+    UV ord
+    CODE:
+        RETVAL = isCNTRL(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isCNTRL_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isCNTRL_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPRINT(ord)
+    UV ord
+    CODE:
+        RETVAL = isPRINT(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPRINT_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isPRINT_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isGRAPH(ord)
+    UV ord
+    CODE:
+        RETVAL = isGRAPH(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isGRAPH_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isGRAPH_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPUNCT(ord)
+    UV ord
+    CODE:
+        RETVAL = isPUNCT(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPUNCT_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isPUNCT_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isXDIGIT(ord)
+    UV ord
+    CODE:
+        RETVAL = isXDIGIT(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isXDIGIT_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isXDIGIT_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPSXSPC(ord)
+    UV ord
+    CODE:
+        RETVAL = isPSXSPC(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPSXSPC_A(ord)
+    UV ord
+    CODE:
+        RETVAL = isPSXSPC_A(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALPHANUMERIC_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isALPHANUMERIC_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isALNUMC_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isALNUMC_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isDIGIT_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isDIGIT_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isOCTAL_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isOCTAL_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDFIRST_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isIDFIRST_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDCONT_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isIDCONT_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isSPACE_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isSPACE_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isASCII_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isASCII_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isCNTRL_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isCNTRL_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPRINT_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isPRINT_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isGRAPH_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isGRAPH_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPUNCT_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isPUNCT_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isXDIGIT_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isXDIGIT_L1(ord);
+    OUTPUT:
+        RETVAL
+
+bool
+isPSXSPC_L1(ord)
+    UV ord
+    CODE:
+        RETVAL = isPSXSPC_L1(ord);
+    OUTPUT:
+        RETVAL
+
+#if { VERSION >= 5.006 }
+
+bool
+isALPHA_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isALPHANUMERIC_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isASCII_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isBLANK_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isCNTRL_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isDIGIT_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isGRAPH_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDCONT_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isIDFIRST_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isLOWER_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isPRINT_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isPSXSPC_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isPUNCT_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isSPACE_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isUPPER_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isWORDCHAR_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+bool
+isXDIGIT_utf8_safe(s, offset)
+    unsigned char * s
+    int offset
+    CODE:
+        RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
+    OUTPUT:
+        RETVAL
+
+#endif
+
+UV
+LATIN1_TO_NATIVE(cp)
+        UV cp
+        CODE:
+                if (cp > 255) RETVAL= cp;
+                else RETVAL= LATIN1_TO_NATIVE(cp);
+        OUTPUT:
+                RETVAL
+
+UV
+NATIVE_TO_LATIN1(cp)
+        UV cp
+        CODE:
+                RETVAL= NATIVE_TO_LATIN1(cp);
+        OUTPUT:
+                RETVAL
+
+STRLEN
+av_tindex(av)
+        SV *av
+        CODE:
+                RETVAL = av_tindex((AV*)SvRV(av));
+        OUTPUT:
+                RETVAL
+
+STRLEN
+av_top_index(av)
+        SV *av
+        CODE:
+                RETVAL = av_top_index((AV*)SvRV(av));
+        OUTPUT:
+                RETVAL
+
+=tests plan => 17678
 
 use vars qw($my_sv @my_av %my_hv);
 
@@ -687,7 +1758,7 @@ $_ = "Fred";
 ok(&Devel::PPPort::DEFSV(), "Fred");
 ok(&Devel::PPPort::UNDERBAR(), "Fred");
 
-if ($] >= 5.009002 && $] < 5.023 && $] < 5.023004) {
+if ("$]" >= 5.009002 && "$]" < 5.023 && "$]" < 5.023004) {
   eval q{
     no warnings "deprecated";
     no if $^V > v5.17.9, warnings => "experimental::lexical_topic";
@@ -750,8 +1821,8 @@ ok(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2');
 ok(Devel::PPPort::PERL_ABS(42), 42);
 ok(Devel::PPPort::PERL_ABS(-13), 13);
 
-ok(Devel::PPPort::SVf(42), $] >= 5.004 ? '[42]' : '42');
-ok(Devel::PPPort::SVf('abc'), $] >= 5.004 ? '[abc]' : 'abc');
+ok(Devel::PPPort::SVf(42), "$]" >= 5.004 ? '[42]' : '42');
+ok(Devel::PPPort::SVf('abc'), "$]" >= 5.004 ? '[abc]' : 'abc');
 
 ok(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo");
 
@@ -759,7 +1830,7 @@ ok(&Devel::PPPort::ptrtests(), 63);
 
 ok(&Devel::PPPort::OpSIBLING_tests(), 0);
 
-if ($] >= 5.009000) {
+if ("$]" >= 5.009000) {
   eval q{
     ok(&Devel::PPPort::check_HeUTF8("hello"), "norm");
     ok(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8");
@@ -776,7 +1847,7 @@ ok($r[1], "13");
 ok(!Devel::PPPort::SvRXOK(""));
 ok(!Devel::PPPort::SvRXOK(bless [], "Regexp"));
 
-if ($] < 5.005) {
+if ("$]" < 5.005) {
         skip 'no qr// objects in this perl', 0;
         skip 'no qr// objects in this perl', 0;
 } else {
@@ -784,3 +1855,184 @@ if ($] < 5.005) {
         ok(Devel::PPPort::SvRXOK($qr));
         ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise"));
 }
+
+ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6);
+ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1);
+ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41);
+ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30);
+
+ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6);
+if (ord("A") == 65) {
+    ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41);
+    ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30);
+}
+else {
+    ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1);
+    ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0);
+}
+
+ok(  Devel::PPPort::isALNUMC_L1(ord("5")));
+ok(  Devel::PPPort::isALNUMC_L1(0xFC));
+ok(! Devel::PPPort::isALNUMC_L1(0xB6));
+
+ok(  Devel::PPPort::isOCTAL(ord("7")));
+ok(! Devel::PPPort::isOCTAL(ord("8")));
+
+ok(  Devel::PPPort::isOCTAL_A(ord("0")));
+ok(! Devel::PPPort::isOCTAL_A(ord("9")));
+
+ok(  Devel::PPPort::isOCTAL_L1(ord("2")));
+ok(! Devel::PPPort::isOCTAL_L1(ord("8")));
+
+# For the other properties, we test every code point from 0.255, and a
+# smattering of higher ones.  First populate a hash with keys like '65:ALPHA'
+# to indicate that the code point there is alphabetic
+my $i;
+my %types;
+for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6,
+        0xF8..0x101)
+{
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:ALPHA"} = 1;
+    $types{"$native:ALPHANUMERIC"} = 1;
+    $types{"$native:IDFIRST"} = 1;
+    $types{"$native:IDCONT"} = 1;
+    $types{"$native:PRINT"} = 1;
+    $types{"$native:WORDCHAR"} = 1;
+}
+for $i (0x30..0x39, 0x660, 0xFF19) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:ALPHANUMERIC"} = 1;
+    $types{"$native:DIGIT"} = 1;
+    $types{"$native:IDCONT"} = 1;
+    $types{"$native:WORDCHAR"} = 1;
+    $types{"$native:GRAPH"} = 1;
+    $types{"$native:PRINT"} = 1;
+    $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19);
+}
+
+for $i (0..0x7F) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:ASCII"} = 1;
+}
+for $i (0..0x1f, 0x7F..0x9F) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:CNTRL"} = 1;
+}
+for $i (0x21..0x7E, 0xA1..0x101, 0x660) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:GRAPH"} = 1;
+    $types{"$native:PRINT"} = 1;
+}
+for $i (0x09, 0x20, 0xA0) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:BLANK"} = 1;
+    $types{"$native:SPACE"} = 1;
+    $types{"$native:PSXSPC"} = 1;
+    $types{"$native:PRINT"} = 1 if $i > 0x09;
+}
+for $i (0x09..0x0D, 0x85, 0x2029) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:SPACE"} = 1;
+    $types{"$native:PSXSPC"} = 1;
+}
+for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:UPPER"} = 1;
+    $types{"$native:XDIGIT"} = 1 if $i < 0x47;
+}
+for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:LOWER"} = 1;
+    $types{"$native:XDIGIT"} = 1 if $i < 0x67;
+}
+for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB,
+        0xB7, 0xBB, 0xBF, 0x5BE)
+{
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    $types{"$native:PUNCT"} = 1;
+    $types{"$native:GRAPH"} = 1;
+    $types{"$native:PRINT"} = 1;
+}
+
+$i = ord('_');
+$types{"$i:WORDCHAR"} = 1;
+$types{"$i:IDFIRST"} = 1;
+$types{"$i:IDCONT"} = 1;
+
+# Now find all the unique code points included above.
+my %code_points_to_test;
+my $key;
+for $key (keys %types) {
+    $key =~ s/:.*//;
+    $code_points_to_test{$key} = 1;
+}
+
+# And test each one
+for $i (sort { $a <=> $b } keys %code_points_to_test) {
+    my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
+    my $hex = sprintf("0x%02X", $native);
+
+    # And for each code point test each of the classes
+    my $class;
+    for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT
+                   IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR
+                   XDIGIT))
+    {
+        if ($i < 256) {  # For the ones that can fit in a byte, test each of
+                         #three macros.
+            my $suffix;
+            for $suffix ("", "_A", "_L1") {
+                my $should_be = ($i > 0x7F && $suffix ne "_L1")
+                                ? 0     # Fail on non-ASCII unless L1
+                                : ($types{"$native:$class"} || 0);
+                my $eval_string = "Devel::PPPort::is${class}$suffix($hex)";
+                my $is = eval $eval_string || 0;
+                die "eval 'For $i: $eval_string' gave $@" if $@;
+                ok($is, $should_be, "'$eval_string'");
+            }
+        }
+
+        # For all code points, test the '_utf8' macros
+        if ("$]" < 5.006) {
+            skip("No UTF-8 on this perl", 0);
+            if ($i > 255) {
+                skip("No UTF-8 on this perl", 0);
+            }
+        }
+        else {
+            my $utf8 = quotemeta Devel::PPPort::uvoffuni_to_utf8($i);
+            if ("$]" < 5.007 && $native > 255) {
+                skip("Perls earlier than 5.7 give wrong answers for above Latin1 code points", 0);
+            }
+            elsif ("$]" <= 5.011003 && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) {
+                skip("Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH", 0);
+            }
+            else {
+
+                my $should_be = $types{"$native:$class"} || 0;
+                my $eval_string = "Devel::PPPort::is${class}_utf8_safe(\"$utf8\", 0)";
+                my $is = eval $eval_string || 0;
+                die "eval 'For $i, $eval_string' gave $@" if $@;
+                ok($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string));
+            }
+
+            # And for the high code points, test that a too short malformation (the
+            # -1) causes it to fail
+            if ($i > 255) {
+                if ("$]" >= 5.025009) {
+                    skip("Prints an annoying error message that khw doesn't know how to easily suppress", 0);
+                }
+                else {
+                    my $eval_string = "Devel::PPPort::is${class}_utf8_safe(\"$utf8\", -1)";
+                    my $is = eval "no warnings; $eval_string" || 0;
+                    die "eval '$eval_string' gave $@" if $@;
+                    ok($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string));
+                }
+            }
+        }
+    }
+}
+
+ok(&Devel::PPPort::av_top_index([1,2,3]), 2);
+ok(&Devel::PPPort::av_tindex([1,2,3,4]), 3);