X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/f2615e360947a74a2317135477274293f0e14fe2..91497b7f1eadb85318d1721ab68c3a72fedb43c6:/dist/Devel-PPPort/parts/inc/misc diff --git a/dist/Devel-PPPort/parts/inc/misc b/dist/Devel-PPPort/parts/inc/misc index 9a2e090..ffed764 100644 --- a/dist/Devel-PPPort/parts/inc/misc +++ b/dist/Devel-PPPort/parts/inc/misc @@ -12,48 +12,40 @@ =provides __UNDEFINED__ -C_ARRAY_END -C_ARRAY_LENGTH -cBOOL +__REDEFINE__ END_EXTERN_C EXTERN_C -HeUTF8 INT2PTR MUTABLE_PTR -MUTABLE_SV -NUM2PTR NVTYPE -OpHAS_SIBLING -OpLASTSIB_set -OpMAYBESIB_set -OpMORESIB_set -OpSIBLING -PERL_GCC_BRACE_GROUPS_FORBIDDEN -PERL_HASH PERLIO_FUNCS_CAST PERLIO_FUNCS_DECL +PERL_STATIC_INLINE PERL_UNUSED_ARG PERL_UNUSED_CONTEXT PERL_UNUSED_DECL PERL_UNUSED_RESULT PERL_UNUSED_VAR PERL_USE_GCC_BRACE_GROUPS -PTR2IV -PTR2NV PTR2ul -PTR2UV PTRV START_EXTERN_C STMT_END STMT_START SvRX -SvRXOK -UTF8_MAXBYTES WIDEST_UTYPE XSRETURN +NOT_REACHED +ASSUME =implementation +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +__UNDEFINED__ PERL_STATIC_INLINE static inline +#else +__UNDEFINED__ PERL_STATIC_INLINE static +#endif + __UNDEFINED__ cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0) __UNDEFINED__ OpHAS_SIBLING(o) (cBOOL((o)->op_sibling)) __UNDEFINED__ OpSIBLING(o) (0 + (o)->op_sibling) @@ -62,26 +54,136 @@ __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL) __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib)) __UNDEFINED__ HEf_SVKEY -2 -#ifndef SvRX -#if { NEED SvRX } +#if defined(DEBUGGING) && !defined(__COVERITY__) +__UNDEFINED__ __ASSERT_(statement) assert(statement), +#else +__UNDEFINED__ __ASSERT_(statement) +#endif -void * -SvRX(pTHX_ SV *rv) -{ - if (SvROK(rv)) { - SV *sv = SvRV(rv); - if (SvMAGICAL(sv)) { - MAGIC *mg = mg_find(sv, PERL_MAGIC_qr); - if (mg && mg->mg_obj) { - return mg->mg_obj; - } - } - } - return 0; -} +__UNDEF_NOT_PROVIDED__ __has_builtin(x) 0 + +#if __has_builtin(__builtin_unreachable) +# define D_PPP_HAS_BUILTIN_UNREACHABLE +#elif (defined(__GNUC__) && ( __GNUC__ > 4 \ + || __GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +# define D_PPP_HAS_BUILTIN_UNREACHABLE +#endif + +#ifndef ASSUME +# ifdef DEBUGGING +# define ASSUME(x) assert(x) +# elif defined(_MSC_VER) +# define ASSUME(x) __assume(x) +# elif defined(__ARMCC_VERSION) +# define ASSUME(x) __promise(x) +# elif defined(D_PPP_HAS_BUILTIN_UNREACHABLE) +# define ASSUME(x) ((x) ? (void) 0 : __builtin_unreachable()) +# else +# define ASSUME(x) assert(x) +# endif +#endif + +#ifndef NOT_REACHED +# ifdef D_PPP_HAS_BUILTIN_UNREACHABLE +# define NOT_REACHED \ + STMT_START { \ + ASSUME(!"UNREACHABLE"); __builtin_unreachable(); \ + } STMT_END +# elif ! defined(__GNUC__) && (defined(__sun) || defined(__hpux)) +# define NOT_REACHED +# else +# define NOT_REACHED ASSUME(!"UNREACHABLE") +# endif #endif + +#ifndef WIDEST_UTYPE +# ifdef QUADKIND +# ifdef U64TYPE +# define WIDEST_UTYPE U64TYPE +# else +# define WIDEST_UTYPE unsigned Quad_t +# endif +# else +# define WIDEST_UTYPE U32 +# endif #endif +/* These could become provided if/when they become part of the public API */ +__UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n) \ + (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0)))) +__UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \ + ( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \ + : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \ + : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l))))) + +/* The '| 0' part ensures a compiler error if c is not integer (like e.g., a + * pointer) */ +#undef FITS_IN_8_BITS /* handy.h version uses a core-only constant */ +__UNDEF_NOT_PROVIDED__ FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \ + || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF)) + +/* Create the macro for "is'macro'_utf8_safe(s, e)". For code points below + * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code + * point. That is so that it can automatically get the bug fixes done in this + * file. */ +#define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro) \ + (((e) - (s)) <= 0 \ + ? 0 \ + : UTF8_IS_INVARIANT((s)[0]) \ + ? is ## macro ## _L1((s)[0]) \ + : (((e) - (s)) < UTF8SKIP(s)) \ + ? 0 \ + : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ + /* The cast in the line below is only to silence warnings */ \ + ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ + UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ + & UTF_START_MASK(2), \ + (s)[1]))) \ + : is ## macro ## _utf8(s)) + +/* Create the macro for "is'macro'_LC_utf8_safe(s, e)". For code points below + * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code + * point. That is so that it can automatically get the bug fixes done in this + * file. */ +#define D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, macro) \ + (((e) - (s)) <= 0 \ + ? 0 \ + : UTF8_IS_INVARIANT((s)[0]) \ + ? is ## macro ## _LC((s)[0]) \ + : (((e) - (s)) < UTF8SKIP(s)) \ + ? 0 \ + : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ + /* The cast in the line below is only to silence warnings */ \ + ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ + UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ + & UTF_START_MASK(2), \ + (s)[1]))) \ + : is ## macro ## _utf8(s)) + +/* A few of the early functions are broken. For these and the non-LC case, + * machine generated code is substituted. But that code doesn't work for + * locales. This is just like the above macro, but at the end, we call the + * macro we've generated for the above 255 case, which is correct since locale + * isn't involved. This will generate extra code to handle the 0-255 inputs, + * but hopefully it will be optimized out by the C compiler. But just in case + * it isn't, this macro is only used on the few versions that are broken */ + +#define D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, macro) \ + (((e) - (s)) <= 0 \ + ? 0 \ + : UTF8_IS_INVARIANT((s)[0]) \ + ? is ## macro ## _LC((s)[0]) \ + : (((e) - (s)) < UTF8SKIP(s)) \ + ? 0 \ + : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ + /* The cast in the line below is only to silence warnings */ \ + ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ + UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ + & UTF_START_MASK(2), \ + (s)[1]))) \ + : is ## macro ## _utf8_safe(s, e)) + +__UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL) __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv)) #ifndef PERL_UNUSED_DECL @@ -126,7 +228,10 @@ __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv)) #endif __UNDEFINED__ NOOP /*EMPTY*/(void)0 -__UNDEFINED__ dNOOP extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL + +#if { VERSION < 5.6.1 } && { VERSION < 5.27.7 } +__REDEFINE__ dNOOP struct Perl___notused_struct +#endif #ifndef NVTYPE # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) @@ -165,44 +270,36 @@ __UNDEFINED__ PTR2IV(p) INT2PTR(IV,p) __UNDEFINED__ PTR2UV(p) INT2PTR(UV,p) __UNDEFINED__ PTR2NV(p) NUM2PTR(NV,p) -#undef START_EXTERN_C -#undef END_EXTERN_C -#undef EXTERN_C #ifdef __cplusplus -# define START_EXTERN_C extern "C" { -# define END_EXTERN_C } -# define EXTERN_C extern "C" +__REDEFINE__ START_EXTERN_C extern "C" { +__REDEFINE__ END_EXTERN_C } +__REDEFINE__ EXTERN_C extern "C" #else -# define START_EXTERN_C -# define END_EXTERN_C -# define EXTERN_C extern +__REDEFINE__ START_EXTERN_C +__REDEFINE__ END_EXTERN_C +__REDEFINE__ EXTERN_C extern #endif -#if defined(PERL_GCC_PEDANTIC) +#if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC) # ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN -# define PERL_GCC_BRACE_GROUPS_FORBIDDEN +__UNDEF_NOT_PROVIDED__ PERL_GCC_BRACE_GROUPS_FORBIDDEN # endif #endif -#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) && !defined(__cplusplus) +#if ! defined(__GNUC__) || defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) || defined(__cplusplus) +# undef PERL_USE_GCC_BRACE_GROUPS +#else # ifndef PERL_USE_GCC_BRACE_GROUPS # define PERL_USE_GCC_BRACE_GROUPS # endif #endif -#undef STMT_START -#undef STMT_END -#ifdef PERL_USE_GCC_BRACE_GROUPS -# define STMT_START (void)( /* gcc supports ``({ STATEMENTS; })'' */ -# define STMT_END ) +#if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__) +__REDEFINE__ STMT_START if (1) +__REDEFINE__ STMT_END else (void)0 #else -# if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__) -# define STMT_START if (1) -# define STMT_END else (void)0 -# else -# define STMT_START do -# define STMT_END while (0) -# endif +__REDEFINE__ STMT_START do +__REDEFINE__ STMT_END while (0) #endif __UNDEFINED__ boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no) @@ -215,6 +312,10 @@ __UNDEFINED__ DEFSV_set(sv) (DEFSV = (sv)) /* Older perls (<=5.003) lack AvFILLp */ __UNDEFINED__ AvFILLp AvFILL +__UNDEFINED__ av_tindex AvFILL +__UNDEFINED__ av_top_index AvFILL +__UNDEFINED__ av_count(av) (AvFILL(av)+1) + __UNDEFINED__ ERRSV get_sv("@",FALSE) /* Hint: gv_stashpvn @@ -242,14 +343,13 @@ __UNDEFINED__ dITEMS I32 items = SP - MARK __UNDEFINED__ dXSTARG SV * targ = sv_newmortal() __UNDEFINED__ dAXMARK I32 ax = POPMARK; \ - register SV ** const mark = PL_stack_base + ax++ + SV ** const mark = PL_stack_base + ax++ __UNDEFINED__ XSprePUSH (sp = PL_stack_base + ax - 1) #if { VERSION < 5.005 } -# undef XSRETURN -# define XSRETURN(off) \ +__REDEFINE__ XSRETURN(off) \ STMT_START { \ PL_stack_sp = PL_stack_base + ax + ((off) - 1); \ return; \ @@ -265,8 +365,6 @@ __UNDEFINED__ dVAR dNOOP __UNDEFINED__ SVf "_" -__UNDEFINED__ UTF8_MAXBYTES UTF8_MAXLEN - __UNDEFINED__ CPERLscope(x) x __UNDEFINED__ PERL_HASH(hash,str,len) \ @@ -302,64 +400,103 @@ typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*); #endif -#ifndef WIDEST_UTYPE -# ifdef QUADKIND -# ifdef U64TYPE -# define WIDEST_UTYPE U64TYPE -# else -# define WIDEST_UTYPE Quad_t -# endif -# else -# define WIDEST_UTYPE U32 -# endif +/* On versions without NATIVE_TO_ASCII, only ASCII is supported */ +#if defined(EBCDIC) && defined(NATIVE_TO_ASCI) +__UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c) +__UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c) +__UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c)) +__UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c)) +#else +__UNDEFINED__ NATIVE_TO_LATIN1(c) (c) +__UNDEFINED__ LATIN1_TO_NATIVE(c) (c) +__UNDEFINED__ NATIVE_TO_UNI(c) (c) +__UNDEFINED__ UNI_TO_NATIVE(c) (c) #endif +/* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE + EBCDIC is not supported on versions earlier than 5.7.1 + */ + +/* The meaning of this changed; use the modern version */ +#undef isPSXSPC +#undef isPSXSPC_A +#undef isPSXSPC_L1 + +/* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe + This is equivalent to the corresponding isSPACE-type macro. On perls + before 5.18, this matched a vertical tab and SPACE didn't. But the + ppport.h SPACE version does match VT in all perl releases. Since VT's are + extremely rarely found in real-life files, this difference effectively + doesn't matter */ + +/* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe + Until Perl 5.18, this did not match the vertical tab (VT). The ppport.h + version does match it in all perl releases. Since VT's are extremely rarely + found in real-life files, this difference effectively doesn't matter */ + #ifdef EBCDIC -/* This is the first version where these macros are fully correct. Relying on - * the C library functions, as earlier releases did, causes problems with - * locales */ +/* This is the first version where these macros are fully correct on EBCDIC + * platforms. Relying on the C library functions, as earlier releases did, + * causes problems with locales */ # if { VERSION < 5.22.0 } # undef isALNUM # undef isALNUM_A +# undef isALNUM_L1 # undef isALNUMC # undef isALNUMC_A +# undef isALNUMC_L1 # undef isALPHA # undef isALPHA_A +# undef isALPHA_L1 # undef isALPHANUMERIC # undef isALPHANUMERIC_A +# undef isALPHANUMERIC_L1 # undef isASCII # undef isASCII_A +# undef isASCII_L1 # undef isBLANK # undef isBLANK_A +# undef isBLANK_L1 # undef isCNTRL # undef isCNTRL_A +# undef isCNTRL_L1 # undef isDIGIT # undef isDIGIT_A +# undef isDIGIT_L1 # undef isGRAPH # undef isGRAPH_A +# undef isGRAPH_L1 # undef isIDCONT # undef isIDCONT_A +# undef isIDCONT_L1 # undef isIDFIRST # undef isIDFIRST_A +# undef isIDFIRST_L1 # undef isLOWER # undef isLOWER_A +# undef isLOWER_L1 # undef isOCTAL # undef isOCTAL_A +# undef isOCTAL_L1 # undef isPRINT # undef isPRINT_A -# undef isPSXSPC -# undef isPSXSPC_A +# undef isPRINT_L1 # undef isPUNCT # undef isPUNCT_A +# undef isPUNCT_L1 # undef isSPACE # undef isSPACE_A +# undef isSPACE_L1 # undef isUPPER # undef isUPPER_A +# undef isUPPER_L1 # undef isWORDCHAR # undef isWORDCHAR_A +# undef isWORDCHAR_L1 # undef isXDIGIT # undef isXDIGIT_A +# undef isXDIGIT_L1 # endif __UNDEFINED__ isASCII(c) (isCNTRL(c) || isPRINT(c)) @@ -386,6 +523,16 @@ __UNDEFINED__ isCNTRL(c) ( (c) == '\0' || (c) == '\a' || (c) == '\b' || (c) == 0x3D /* U+15 NAK */ \ || (c) == 0x3F /* U+1A SUB */ \ ) + +#if '^' == 106 /* EBCDIC POSIX-BC */ +# define D_PPP_OUTLIER_CONTROL 0x5F +#else /* EBCDIC 1047 037 */ +# define D_PPP_OUTLIER_CONTROL 0xFF +#endif + +/* The controls are everything below blank, plus one outlier */ +__UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' ' \ + || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL) /* The ordering of the tests in this and isUPPER are to exclude most characters * early */ __UNDEFINED__ isLOWER(c) ( (c) >= 'a' && (c) <= 'z' \ @@ -416,94 +563,534 @@ __UNDEFINED__ isUPPER(c) ( (c) >= 'A' && (c) <= 'Z' # undef isUPPER_A # endif -# if { VERSION < 5.8.0 } -/* Hint: isCNTRL - * Earlier perls omitted DEL */ +# if { VERSION == 5.7.0 } /* this perl made space GRAPH */ +# undef isGRAPH +# endif + +# if { VERSION < 5.8.0 } /* earlier perls omitted DEL */ # undef isCNTRL # endif # if { VERSION < 5.10.0 } -/* Hint: isPRINT - * The implementation in older perl versions includes all of the - * isSPACE() characters, which is wrong. The version provided by - * Devel::PPPort always overrides a present buggy version. - */ +/* earlier perls included all of the isSPACE() characters, which is wrong. The + * version provided by Devel::PPPort always overrides an existing buggy + * version. */ # undef isPRINT # undef isPRINT_A # endif # if { VERSION < 5.14.0 } -/* Hint: isASCII - * The implementation in older perl versions always returned true if the - * parameter was a signed char - */ +/* earlier perls always returned true if the parameter was a signed char */ # undef isASCII # undef isASCII_A # endif -# if { VERSION < 5.20.0 } -/* Hint: isSPACE - * The implementation in older perl versions didn't include \v */ +# if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */ +# undef isPUNCT_L1 +# endif + +# if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */ +# undef isALNUMC_L1 +#endif + +# if { VERSION < 5.20.0 } /* earlier perls didn't include \v */ # undef isSPACE # undef isSPACE_A +# undef isSPACE_L1 + # endif __UNDEFINED__ isASCII(c) ((WIDEST_UTYPE) (c) <= 127) __UNDEFINED__ isCNTRL(c) ((WIDEST_UTYPE) (c) < ' ' || (c) == 127) -__UNDEFINED__ isLOWER(c) ((c) >= 'a' && (c) <= 'z') -__UNDEFINED__ isUPPER(c) ((c) <= 'Z' && (c) >= 'A') +__UNDEFINED__ isCNTRL_L1(c) ( (WIDEST_UTYPE) (c) < ' ' \ + || inRANGE((c), 0x7F, 0x9F)) +__UNDEFINED__ isLOWER(c) inRANGE((c), 'a', 'z') +__UNDEFINED__ isUPPER(c) inRANGE((c), 'A', 'Z') + #endif /* Below are definitions common to EBCDIC and ASCII */ +__UNDEFINED__ isASCII_L1(c) isASCII(c) +__UNDEFINED__ isASCII_LC(c) isASCII(c) __UNDEFINED__ isALNUM(c) isWORDCHAR(c) __UNDEFINED__ isALNUMC(c) isALPHANUMERIC(c) +__UNDEFINED__ isALNUMC_L1(c) isALPHANUMERIC_L1(c) __UNDEFINED__ isALPHA(c) (isUPPER(c) || isLOWER(c)) +__UNDEFINED__ isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c)) __UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c)) +__UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c)) +__UNDEFINED__ isALPHANUMERIC_LC(c) (isALPHA_LC(c) || isDIGIT_LC(c)) __UNDEFINED__ isBLANK(c) ((c) == ' ' || (c) == '\t') -__UNDEFINED__ isDIGIT(c) ((c) <= '9' && (c) >= '0') +__UNDEFINED__ isBLANK_L1(c) ( isBLANK(c) \ + || ( FITS_IN_8_BITS(c) \ + && NATIVE_TO_LATIN1((U8) c) == 0xA0)) +__UNDEFINED__ isBLANK_LC(c) isBLANK(c) +__UNDEFINED__ isDIGIT(c) inRANGE(c, '0', '9') +__UNDEFINED__ isDIGIT_L1(c) isDIGIT(c) __UNDEFINED__ isGRAPH(c) (isWORDCHAR(c) || isPUNCT(c)) +__UNDEFINED__ isGRAPH_L1(c) ( isPRINT_L1(c) \ + && (c) != ' ' \ + && NATIVE_TO_LATIN1((U8) c) != 0xA0) __UNDEFINED__ isIDCONT(c) isWORDCHAR(c) +__UNDEFINED__ isIDCONT_L1(c) isWORDCHAR_L1(c) +__UNDEFINED__ isIDCONT_LC(c) isWORDCHAR_LC(c) __UNDEFINED__ isIDFIRST(c) (isALPHA(c) || (c) == '_') +__UNDEFINED__ isIDFIRST_L1(c) (isALPHA_L1(c) || (U8) (c) == '_') +__UNDEFINED__ isIDFIRST_LC(c) (isALPHA_LC(c) || (U8) (c) == '_') +__UNDEFINED__ isLOWER_L1(c) ( isLOWER(c) \ + || ( FITS_IN_8_BITS(c) \ + && ( ( NATIVE_TO_LATIN1((U8) c) >= 0xDF \ + && NATIVE_TO_LATIN1((U8) c) != 0xF7) \ + || NATIVE_TO_LATIN1((U8) c) == 0xAA \ + || NATIVE_TO_LATIN1((U8) c) == 0xBA \ + || NATIVE_TO_LATIN1((U8) c) == 0xB5))) __UNDEFINED__ isOCTAL(c) (((WIDEST_UTYPE)((c)) & ~7) == '0') +__UNDEFINED__ isOCTAL_L1(c) isOCTAL(c) __UNDEFINED__ isPRINT(c) (isGRAPH(c) || (c) == ' ') +__UNDEFINED__ isPRINT_L1(c) (FITS_IN_8_BITS(c) && ! isCNTRL_L1(c)) __UNDEFINED__ isPSXSPC(c) isSPACE(c) -__UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \ - || (c) == '#' || (c) == '$' || (c) == '%' \ - || (c) == '&' || (c) == '\'' || (c) == '(' \ - || (c) == ')' || (c) == '*' || (c) == '+' \ - || (c) == ',' || (c) == '.' || (c) == '/' \ - || (c) == ':' || (c) == ';' || (c) == '<' \ - || (c) == '=' || (c) == '>' || (c) == '?' \ - || (c) == '@' || (c) == '[' || (c) == '\\' \ - || (c) == ']' || (c) == '^' || (c) == '_' \ - || (c) == '`' || (c) == '{' || (c) == '|' \ +__UNDEFINED__ isPSXSPC_L1(c) isSPACE_L1(c) +__UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \ + || (c) == '#' || (c) == '$' || (c) == '%' \ + || (c) == '&' || (c) == '\'' || (c) == '(' \ + || (c) == ')' || (c) == '*' || (c) == '+' \ + || (c) == ',' || (c) == '.' || (c) == '/' \ + || (c) == ':' || (c) == ';' || (c) == '<' \ + || (c) == '=' || (c) == '>' || (c) == '?' \ + || (c) == '@' || (c) == '[' || (c) == '\\' \ + || (c) == ']' || (c) == '^' || (c) == '_' \ + || (c) == '`' || (c) == '{' || (c) == '|' \ || (c) == '}' || (c) == '~') -__UNDEFINED__ isSPACE(c) ( isBLANK(c) || (c) == '\n' || (c) == '\r' \ +__UNDEFINED__ isPUNCT_L1(c) ( isPUNCT(c) \ + || ( FITS_IN_8_BITS(c) \ + && ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \ + || NATIVE_TO_LATIN1((U8) c) == 0xA7 \ + || NATIVE_TO_LATIN1((U8) c) == 0xAB \ + || NATIVE_TO_LATIN1((U8) c) == 0xB6 \ + || NATIVE_TO_LATIN1((U8) c) == 0xB7 \ + || NATIVE_TO_LATIN1((U8) c) == 0xBB \ + || NATIVE_TO_LATIN1((U8) c) == 0xBF))) +__UNDEFINED__ isSPACE(c) ( isBLANK(c) || (c) == '\n' || (c) == '\r' \ || (c) == '\v' || (c) == '\f') +__UNDEFINED__ isSPACE_L1(c) ( isSPACE(c) \ + || (FITS_IN_8_BITS(c) \ + && ( NATIVE_TO_LATIN1((U8) c) == 0x85 \ + || NATIVE_TO_LATIN1((U8) c) == 0xA0))) +__UNDEFINED__ isUPPER_L1(c) ( isUPPER(c) \ + || (FITS_IN_8_BITS(c) \ + && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \ + && NATIVE_TO_LATIN1((U8) c) <= 0xDE \ + && NATIVE_TO_LATIN1((U8) c) != 0xD7))) __UNDEFINED__ isWORDCHAR(c) (isALPHANUMERIC(c) || (c) == '_') -__UNDEFINED__ isXDIGIT(c) ( isDIGIT(c) \ - || ((c) >= 'a' && (c) <= 'f') \ - || ((c) >= 'A' && (c) <= 'F')) - -__UNDEFINED__ isALNUM_A isALNUM -__UNDEFINED__ isALNUMC_A isALNUMC -__UNDEFINED__ isALPHA_A isALPHA -__UNDEFINED__ isALPHANUMERIC_A isALPHANUMERIC -__UNDEFINED__ isASCII_A isASCII -__UNDEFINED__ isBLANK_A isBLANK -__UNDEFINED__ isCNTRL_A isCNTRL -__UNDEFINED__ isDIGIT_A isDIGIT -__UNDEFINED__ isGRAPH_A isGRAPH -__UNDEFINED__ isIDCONT_A isIDCONT -__UNDEFINED__ isIDFIRST_A isIDFIRST -__UNDEFINED__ isLOWER_A isLOWER -__UNDEFINED__ isOCTAL_A isOCTAL -__UNDEFINED__ isPRINT_A isPRINT -__UNDEFINED__ isPSXSPC_A isPSXSPC -__UNDEFINED__ isPUNCT_A isPUNCT -__UNDEFINED__ isSPACE_A isSPACE -__UNDEFINED__ isUPPER_A isUPPER -__UNDEFINED__ isWORDCHAR_A isWORDCHAR -__UNDEFINED__ isXDIGIT_A isXDIGIT +__UNDEFINED__ isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT(c)) +__UNDEFINED__ isWORDCHAR_LC(c) (isIDFIRST_LC(c) || isDIGIT_LC(c)) +__UNDEFINED__ isXDIGIT(c) ( isDIGIT(c) \ + || inRANGE((c), 'a', 'f') \ + || inRANGE((c), 'A', 'F')) +__UNDEFINED__ isXDIGIT_L1(c) isXDIGIT(c) +__UNDEFINED__ isXDIGIT_LC(c) isxdigit(c) + +__UNDEFINED__ isALNUM_A(c) isALNUM(c) +__UNDEFINED__ isALNUMC_A(c) isALNUMC(c) +__UNDEFINED__ isALPHA_A(c) isALPHA(c) +__UNDEFINED__ isALPHANUMERIC_A(c) isALPHANUMERIC(c) +__UNDEFINED__ isASCII_A(c) isASCII(c) +__UNDEFINED__ isBLANK_A(c) isBLANK(c) +__UNDEFINED__ isCNTRL_A(c) isCNTRL(c) +__UNDEFINED__ isDIGIT_A(c) isDIGIT(c) +__UNDEFINED__ isGRAPH_A(c) isGRAPH(c) +__UNDEFINED__ isIDCONT_A(c) isIDCONT(c) +__UNDEFINED__ isIDFIRST_A(c) isIDFIRST(c) +__UNDEFINED__ isLOWER_A(c) isLOWER(c) +__UNDEFINED__ isOCTAL_A(c) isOCTAL(c) +__UNDEFINED__ isPRINT_A(c) isPRINT(c) +__UNDEFINED__ isPSXSPC_A(c) isPSXSPC(c) +__UNDEFINED__ isPUNCT_A(c) isPUNCT(c) +__UNDEFINED__ isSPACE_A(c) isSPACE(c) +__UNDEFINED__ isUPPER_A(c) isUPPER(c) +__UNDEFINED__ isWORDCHAR_A(c) isWORDCHAR(c) +__UNDEFINED__ isXDIGIT_A(c) isXDIGIT(c) + +__UNDEFINED__ isASCII_utf8_safe(s,e) (((e) - (s)) <= 0 ? 0 : isASCII(*(s))) +__UNDEFINED__ isASCII_uvchr(c) (FITS_IN_8_BITS(c) ? isASCII_L1(c) : 0) + +#if { VERSION >= 5.006 } +# ifdef isALPHA_uni /* If one defined, all are; this is just an exemplar */ +# define D_PPP_is_ctype(upper, lower, c) \ + (FITS_IN_8_BITS(c) \ + ? is ## upper ## _L1(c) \ + : is ## upper ## _uni((UV) (c))) /* _uni is old synonym */ +# else +# define D_PPP_is_ctype(upper, lower, c) \ + (FITS_IN_8_BITS(c) \ + ? is ## upper ## _L1(c) \ + : is_uni_ ## lower((UV) (c))) /* is_uni_ is even older */ +# endif + +__UNDEFINED__ isALPHA_uvchr(c) D_PPP_is_ctype(ALPHA, alpha, c) +__UNDEFINED__ isALPHANUMERIC_uvchr(c) (isALPHA_uvchr(c) || isDIGIT_uvchr(c)) +# ifdef is_uni_blank +__UNDEFINED__ isBLANK_uvchr(c) D_PPP_is_ctype(BLANK, blank, c) +# else +__UNDEFINED__ isBLANK_uvchr(c) (FITS_IN_8_BITS(c) \ + ? isBLANK_L1(c) \ + : ( (UV) (c) == 0x1680 /* Unicode 3.0 */ \ + || inRANGE((UV) (c), 0x2000, 0x200A) \ + || (UV) (c) == 0x202F /* Unicode 3.0 */\ + || (UV) (c) == 0x205F /* Unicode 3.2 */\ + || (UV) (c) == 0x3000)) +# endif +__UNDEFINED__ isCNTRL_uvchr(c) D_PPP_is_ctype(CNTRL, cntrl, c) +__UNDEFINED__ isDIGIT_uvchr(c) D_PPP_is_ctype(DIGIT, digit, c) +__UNDEFINED__ isGRAPH_uvchr(c) D_PPP_is_ctype(GRAPH, graph, c) +__UNDEFINED__ isIDCONT_uvchr(c) isWORDCHAR_uvchr(c) +__UNDEFINED__ isIDFIRST_uvchr(c) D_PPP_is_ctype(IDFIRST, idfirst, c) +__UNDEFINED__ isLOWER_uvchr(c) D_PPP_is_ctype(LOWER, lower, c) +__UNDEFINED__ isPRINT_uvchr(c) D_PPP_is_ctype(PRINT, print, c) +__UNDEFINED__ isPSXSPC_uvchr(c) isSPACE_uvchr(c) +__UNDEFINED__ isPUNCT_uvchr(c) D_PPP_is_ctype(PUNCT, punct, c) +__UNDEFINED__ isSPACE_uvchr(c) D_PPP_is_ctype(SPACE, space, c) +__UNDEFINED__ isUPPER_uvchr(c) D_PPP_is_ctype(UPPER, upper, c) +__UNDEFINED__ isXDIGIT_uvchr(c) D_PPP_is_ctype(XDIGIT, xdigit, c) +__UNDEFINED__ isWORDCHAR_uvchr(c) (FITS_IN_8_BITS(c) \ + ? isWORDCHAR_L1(c) : isALPHANUMERIC_uvchr(c)) + +__UNDEFINED__ isALPHA_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA) +# ifdef isALPHANUMERIC_utf8 +__UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \ + D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC) +# else +__UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \ + (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e)) +# endif + +/* This was broken before 5.18, and just use this instead of worrying about + * which releases the official works on */ +# if 'A' == 65 +__UNDEFINED__ isBLANK_utf8_safe(s,e) \ +( ( LIKELY((e) > (s)) ) ? /* Machine generated */ \ + ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1 \ + : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \ + ( ( 0xC2 == ((const U8*)s)[0] ) ? \ + ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0xE1 == ((const U8*)s)[0] ) ? \ + ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : ( 0xE2 == ((const U8*)s)[0] ) ? \ + ( ( 0x80 == ((const U8*)s)[1] ) ? \ + ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\ + : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : 0 ) \ + : 0 ) + +# elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */ + +__UNDEFINED__ isBLANK_utf8_safe(s,e) \ +( ( LIKELY((e) > (s)) ) ? \ + ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \ + : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \ + ( ( 0x80 == ((const U8*)s)[0] ) ? \ + ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0xBC == ((const U8*)s)[0] ) ? \ + ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : ( 0xCA == ((const U8*)s)[0] ) ? \ + ( ( 0x41 == ((const U8*)s)[1] ) ? \ + ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\ + : ( 0x42 == ((const U8*)s)[1] ) ? \ + ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : 0 ) \ +: 0 ) + +# elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */ + +__UNDEFINED__ isBLANK_utf8_safe(s,e) \ +( ( LIKELY((e) > (s)) ) ? \ + ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \ + : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \ + ( ( 0x78 == ((const U8*)s)[0] ) ? \ + ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0xBD == ((const U8*)s)[0] ) ? \ + ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : ( 0xCA == ((const U8*)s)[0] ) ? \ + ( ( 0x41 == ((const U8*)s)[1] ) ? \ + ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\ + : ( 0x42 == ((const U8*)s)[1] ) ? \ + ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\ + : 0 ) \ +: 0 ) + +# else +# error Unknown character set +# endif + +__UNDEFINED__ isCNTRL_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL) +__UNDEFINED__ isDIGIT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT) +__UNDEFINED__ isGRAPH_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH) +# ifdef isIDCONT_utf8 +__UNDEFINED__ isIDCONT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT) +# else +__UNDEFINED__ isIDCONT_utf8_safe(s,e) isWORDCHAR_utf8_safe(s,e) +# endif + +__UNDEFINED__ isIDFIRST_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST) +__UNDEFINED__ isLOWER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER) +__UNDEFINED__ isPRINT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT) + +/* Use the modern definition */ +__REDEFINE__ isPSXSPC_utf8_safe(s,e) isSPACE_utf8_safe(s,e) + +__UNDEFINED__ isPUNCT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT) +__UNDEFINED__ isSPACE_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE) +__UNDEFINED__ isUPPER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER) + +# ifdef isWORDCHAR_utf8 +__UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR) +# else +__UNDEFINED__ isWORDCHAR_utf8_safe(s,e) \ + (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_') +# endif + +/* This was broken before 5.12, and just use this instead of worrying about + * which releases the official works on */ +# if 'A' == 65 +__UNDEFINED__ isXDIGIT_utf8_safe(s,e) \ +( ( LIKELY((e) > (s)) ) ? \ + ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\ + : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\ + ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\ + : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\ +: 0 ) + +# elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */ + +__UNDEFINED__ isXDIGIT_utf8_safe(s,e) \ +( ( LIKELY((e) > (s)) ) ? \ + ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\ + : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\ + ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\ + : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\ +: 0 ) + +# elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */ + +__UNDEFINED__ isXDIGIT_utf8_safe(s,e) \ +( ( LIKELY((e) > (s)) ) ? \ + ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\ + : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\ + ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\ + : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\ +: 0 ) + +# else +# error Unknown character set +# endif + +__UNDEFINED__ isALPHA_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHA) +# ifdef isALPHANUMERIC_utf8 +__UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \ + D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHANUMERIC) +# else +__UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \ + (isALPHA_LC_utf8_safe(s,e) || isDIGIT_LC_utf8_safe(s,e)) +# endif + +__UNDEFINED__ isBLANK_LC_utf8_safe(s,e) \ + D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, BLANK) +__UNDEFINED__ isCNTRL_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, CNTRL) +__UNDEFINED__ isDIGIT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, DIGIT) +__UNDEFINED__ isGRAPH_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, GRAPH) +# ifdef isIDCONT_utf8 +__UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDCONT) +# else +__UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) isWORDCHAR_LC_utf8_safe(s,e) +# endif + +__UNDEFINED__ isIDFIRST_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDFIRST) +__UNDEFINED__ isLOWER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, LOWER) +__UNDEFINED__ isPRINT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PRINT) + +/* Use the modern definition */ +__REDEFINE__ isPSXSPC_LC_utf8_safe(s,e) isSPACE_LC_utf8_safe(s,e) + +__UNDEFINED__ isPUNCT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PUNCT) +__UNDEFINED__ isSPACE_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, SPACE) +__UNDEFINED__ isUPPER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, UPPER) + +# ifdef isWORDCHAR_utf8 +__UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, WORDCHAR) +# else +__UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) \ + (isALPHANUMERIC_LC_utf8_safe(s,e) || (*(s)) == '_') +# endif + +__UNDEFINED__ isXDIGIT_LC_utf8_safe(s,e) \ + D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, XDIGIT) + +/* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe, + * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe, + * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe, + * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe, + * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe, + * isXDIGIT_utf8_safe, + * isALPHANUMERIC_LC_utf8_safe, isALPHA_LC_utf8_safe, isASCII_LC_utf8_safe, + * isBLANK_LC_utf8_safe, isCNTRL_LC_utf8_safe, isDIGIT_LC_utf8_safe, + * isGRAPH_LC_utf8_safe, isIDCONT_LC_utf8_safe, isIDFIRST_LC_utf8_safe, + * isLOWER_LC_utf8_safe, isPRINT_LC_utf8_safe, isPSXSPC_LC_utf8_safe, + * isPUNCT_LC_utf8_safe, isSPACE_LC_utf8_safe, isUPPER_LC_utf8_safe, + * isWORDCHAR_LC_utf8_safe, isWORDCHAR_LC_utf8_safe, isXDIGIT_LC_utf8_safe, + * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr, + * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr, + * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr, + * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr, + * isWORDCHAR_uvchr, isXDIGIT_uvchr + * + * The UTF-8 handling is buggy in early Perls, and this can give inaccurate + * results for code points above 0xFF, until the implementation started + * settling down in 5.12 and 5.14 */ + +#endif + +#define D_PPP_TOO_SHORT_MSG "Malformed UTF-8 character starting with:" \ + " \\x%02x (too short; %d bytes available, need" \ + " %d)\n" +/* Perls starting here had a new API which handled multi-character results */ +#if { VERSION >= 5.7.3 } + +__UNDEFINED__ toLOWER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l)) +__UNDEFINED__ toUPPER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l)) +__UNDEFINED__ toTITLE_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l)) +__UNDEFINED__ toFOLD_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l)) + +# if { VERSION != 5.15.6 } /* Just this version is broken */ + + /* Prefer the macro to the function */ +# if defined toLOWER_utf8 +# define D_PPP_TO_LOWER_CALLEE(s,r,l) toLOWER_utf8(s,r,l) +# else +# define D_PPP_TO_LOWER_CALLEE(s,r,l) to_utf8_lower(s,r,l) +# endif +# if defined toTITLE_utf8 +# define D_PPP_TO_TITLE_CALLEE(s,r,l) toTITLE_utf8(s,r,l) +# else +# define D_PPP_TO_TITLE_CALLEE(s,r,l) to_utf8_title(s,r,l) +# endif +# if defined toUPPER_utf8 +# define D_PPP_TO_UPPER_CALLEE(s,r,l) toUPPER_utf8(s,r,l) +# else +# define D_PPP_TO_UPPER_CALLEE(s,r,l) to_utf8_upper(s,r,l) +# endif +# if defined toFOLD_utf8 +# define D_PPP_TO_FOLD_CALLEE(s,r,l) toFOLD_utf8(s,r,l) +# else +# define D_PPP_TO_FOLD_CALLEE(s,r,l) to_utf8_fold(s,r,l) +# endif +# else /* Below is 5.15.6, which failed to make the macros available +# outside of core, so we have to use the 'Perl_' form. khw +# decided it was easier to just handle this case than have to +# document the exception, and make an exception in the tests below +# */ +# define D_PPP_TO_LOWER_CALLEE(s,r,l) \ + Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL) +# define D_PPP_TO_TITLE_CALLEE(s,r,l) \ + Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL) +# define D_PPP_TO_UPPER_CALLEE(s,r,l) \ + Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL) +# define D_PPP_TO_FOLD_CALLEE(s,r,l) \ + Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL) +# endif + +/* The actual implementation of the backported macros. If too short, croak, + * otherwise call the original that doesn't have an upper limit parameter */ +# define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l) \ + (((((e) - (s)) <= 0) \ + /* We could just do nothing, but modern perls croak */ \ + ? (croak("Attempting case change on zero length string"), \ + 0) /* So looks like it returns something, and will compile */ \ + : ((e) - (s)) < UTF8SKIP(s)) \ + ? (croak(D_PPP_TOO_SHORT_MSG, \ + s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \ + 0) \ + : D_PPP_TO_ ## name ## _CALLEE(s,r,l)) + +__UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l) +__UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l) +__UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l) +__UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l) + +#elif { VERSION >= 5.006 } + +/* Here we have UTF-8 support, but using the original API where the case + * changing functions merely returned the changed code point; hence they + * couldn't handle multi-character results. */ + +# ifdef uvchr_to_utf8 +# define D_PPP_UV_TO_UTF8 uvchr_to_utf8 +# else +# define D_PPP_UV_TO_UTF8 uv_to_utf8 +# endif + + /* Get the utf8 of the case changed value, and store its length; then have + * to re-calculate the changed case value in order to return it */ +# define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l) \ + (*(l) = (D_PPP_UV_TO_UTF8(s, \ + UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)), \ + UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) + +__UNDEFINED__ toLOWER_uvchr(c, s, l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l) +__UNDEFINED__ toUPPER_uvchr(c, s, l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l) +__UNDEFINED__ toTITLE_uvchr(c, s, l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l) +__UNDEFINED__ toFOLD_uvchr(c, s, l) toLOWER_uvchr(c, s, l) + +# define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l) \ + (((((e) - (s)) <= 0) \ + ? (croak("Attempting case change on zero length string"), \ + 0) /* So looks like it returns something, and will compile */ \ + : ((e) - (s)) < UTF8SKIP(s)) \ + ? (croak(D_PPP_TOO_SHORT_MSG, \ + s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \ + 0) \ + /* Get the changed code point and store its UTF-8 */ \ + : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)), \ + /* Then store its length, and re-get code point for return */ \ + *(l) = UTF8SKIP(r), to_utf8_ ## name(r)) + +/* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe, + * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr + The UTF-8 case changing operations had bugs before around 5.12 or 5.14; + this backport does not correct them. + + In perls before 7.3, multi-character case changing is not implemented; this + backport uses the simple case changes available in those perls. */ + +__UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l) +__UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l) +__UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l) + + /* Warning: toFOLD_utf8_safe, toFOLD_uvchr + The UTF-8 case changing operations had bugs before around 5.12 or 5.14; + this backport does not correct them. + + In perls before 7.3, case folding is not implemented; instead, this + backport substitutes simple (not multi-character, which isn't available) + lowercasing. This gives the correct result in most, but not all, instances + */ + +__UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) toLOWER_utf8_safe(s,e,r,l) + +#endif /* Until we figure out how to support this in older perls... */ #if { VERSION >= 5.8.0 } @@ -517,14 +1104,22 @@ __UNDEFINED__ HeUTF8(he) ((HeKLEN(he) == HEf_SVKEY) ? \ __UNDEFINED__ C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) __UNDEFINED__ C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a)) +__UNDEFINED__ LIKELY(x) (x) +__UNDEFINED__ UNLIKELY(x) (x) + #ifndef MUTABLE_PTR -#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) +#if defined(PERL_USE_GCC_BRACE_GROUPS) # define MUTABLE_PTR(p) ({ void *_p = (p); _p; }) #else # define MUTABLE_PTR(p) ((void *) (p)) #endif #endif +__UNDEFINED__ MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p)) __UNDEFINED__ MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p)) =xsmisc @@ -561,10 +1156,6 @@ XS(XS_Devel__PPPort_dAXMARK) XSRETURN(1); } -=xsinit - -#define NEED_SvRX - =xsboot { @@ -578,10 +1169,10 @@ newXS("Devel::PPPort::dAXMARK", XS_Devel__PPPort_dAXMARK, file); int OpSIBLING_tests() PREINIT: - OP *x; - OP *kid; - OP *middlekid; - OP *lastkid; + OP *x = NULL; + OP *kid = NULL; + OP *middlekid = NULL; + OP *lastkid = NULL; int count = 0; int failures = 0; int i; @@ -770,7 +1361,7 @@ DEFSV_modify() int ERRSV() CODE: - RETVAL = SvTRUE(ERRSV); + RETVAL = SvTRUEx(ERRSV); OUTPUT: RETVAL @@ -837,8 +1428,8 @@ check_HeUTF8(utf8_key) hash = newHV(); key = SvPV(utf8_key, klen); - if (SvUTF8(utf8_key)) klen *= -1; - hv_store(hash, key, klen, newSVpvs("string"), 0); + hv_store(hash, key, SvUTF8(utf8_key) ? -klen : klen, + newSVpvs("string"), 0); hv_iterinit(hash); ent = hv_iternext(hash); assert(ent); @@ -857,502 +1448,1582 @@ check_c_array() mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */ bool -test_isBLANK(UV ord) +isBLANK(ord) + UV ord CODE: RETVAL = isBLANK(ord); OUTPUT: RETVAL bool -test_isBLANK_A(UV ord) +isBLANK_A(ord) + UV ord CODE: RETVAL = isBLANK_A(ord); OUTPUT: RETVAL bool -test_isUPPER(UV ord) +isBLANK_L1(ord) + UV ord + CODE: + RETVAL = isBLANK_L1(ord); + OUTPUT: + RETVAL + +bool +isUPPER(ord) + UV ord CODE: RETVAL = isUPPER(ord); OUTPUT: RETVAL bool -test_isUPPER_A(UV ord) +isUPPER_A(ord) + UV ord CODE: RETVAL = isUPPER_A(ord); OUTPUT: RETVAL bool -test_isLOWER(UV ord) +isUPPER_L1(ord) + UV ord + CODE: + RETVAL = isUPPER_L1(ord); + OUTPUT: + RETVAL + +bool +isLOWER(ord) + UV ord CODE: RETVAL = isLOWER(ord); OUTPUT: RETVAL bool -test_isLOWER_A(UV ord) +isLOWER_A(ord) + UV ord CODE: RETVAL = isLOWER_A(ord); OUTPUT: RETVAL bool -test_isALPHA(UV ord) +isLOWER_L1(ord) + UV ord + CODE: + RETVAL = isLOWER_L1(ord); + OUTPUT: + RETVAL + +bool +isALPHA(ord) + UV ord CODE: RETVAL = isALPHA(ord); OUTPUT: RETVAL bool -test_isALPHA_A(UV ord) +isALPHA_A(ord) + UV ord CODE: RETVAL = isALPHA_A(ord); OUTPUT: RETVAL bool -test_isWORDCHAR(UV ord) +isALPHA_L1(ord) + UV ord + CODE: + RETVAL = isALPHA_L1(ord); + OUTPUT: + RETVAL + +bool +isWORDCHAR(ord) + UV ord CODE: RETVAL = isWORDCHAR(ord); OUTPUT: RETVAL bool -test_isWORDCHAR_A(UV ord) +isWORDCHAR_A(ord) + UV ord CODE: RETVAL = isWORDCHAR_A(ord); OUTPUT: RETVAL bool -test_isALPHANUMERIC(UV ord) +isWORDCHAR_L1(ord) + UV ord + CODE: + RETVAL = isWORDCHAR_L1(ord); + OUTPUT: + RETVAL + +bool +isALPHANUMERIC(ord) + UV ord CODE: RETVAL = isALPHANUMERIC(ord); OUTPUT: RETVAL bool -test_isALPHANUMERIC_A(UV ord) +isALPHANUMERIC_A(ord) + UV ord CODE: RETVAL = isALPHANUMERIC_A(ord); OUTPUT: RETVAL bool -test_isALNUM(UV ord) +isALNUM(ord) + UV ord CODE: RETVAL = isALNUM(ord); OUTPUT: RETVAL bool -test_isALNUM_A(UV ord) +isALNUM_A(ord) + UV ord CODE: RETVAL = isALNUM_A(ord); OUTPUT: RETVAL bool -test_isDIGIT(UV ord) +isDIGIT(ord) + UV ord CODE: RETVAL = isDIGIT(ord); OUTPUT: RETVAL bool -test_isDIGIT_A(UV ord) +isDIGIT_A(ord) + UV ord CODE: RETVAL = isDIGIT_A(ord); OUTPUT: RETVAL bool -test_isOCTAL(UV ord) +isOCTAL(ord) + UV ord CODE: RETVAL = isOCTAL(ord); OUTPUT: RETVAL bool -test_isOCTAL_A(UV ord) +isOCTAL_A(ord) + UV ord CODE: RETVAL = isOCTAL_A(ord); OUTPUT: RETVAL bool -test_isIDFIRST(UV ord) +isIDFIRST(ord) + UV ord CODE: RETVAL = isIDFIRST(ord); OUTPUT: RETVAL bool -test_isIDFIRST_A(UV ord) +isIDFIRST_A(ord) + UV ord CODE: RETVAL = isIDFIRST_A(ord); OUTPUT: RETVAL bool -test_isIDCONT(UV ord) +isIDCONT(ord) + UV ord CODE: RETVAL = isIDCONT(ord); OUTPUT: RETVAL bool -test_isIDCONT_A(UV ord) +isIDCONT_A(ord) + UV ord CODE: RETVAL = isIDCONT_A(ord); OUTPUT: RETVAL bool -test_isSPACE(UV ord) +isSPACE(ord) + UV ord CODE: RETVAL = isSPACE(ord); OUTPUT: RETVAL bool -test_isSPACE_A(UV ord) +isSPACE_A(ord) + UV ord CODE: RETVAL = isSPACE_A(ord); OUTPUT: RETVAL bool -test_isASCII(UV ord) +isASCII(ord) + UV ord CODE: RETVAL = isASCII(ord); OUTPUT: RETVAL bool -test_isASCII_A(UV ord) +isASCII_A(ord) + UV ord CODE: RETVAL = isASCII_A(ord); OUTPUT: RETVAL bool -test_isCNTRL(UV ord) +isCNTRL(ord) + UV ord CODE: RETVAL = isCNTRL(ord); OUTPUT: RETVAL bool -test_isCNTRL_A(UV ord) +isCNTRL_A(ord) + UV ord CODE: RETVAL = isCNTRL_A(ord); OUTPUT: RETVAL bool -test_isPRINT(UV ord) +isPRINT(ord) + UV ord CODE: RETVAL = isPRINT(ord); OUTPUT: RETVAL bool -test_isPRINT_A(UV ord) +isPRINT_A(ord) + UV ord CODE: RETVAL = isPRINT_A(ord); OUTPUT: RETVAL bool -test_isGRAPH(UV ord) +isGRAPH(ord) + UV ord CODE: RETVAL = isGRAPH(ord); OUTPUT: RETVAL bool -test_isGRAPH_A(UV ord) +isGRAPH_A(ord) + UV ord CODE: RETVAL = isGRAPH_A(ord); OUTPUT: RETVAL bool -test_isPUNCT(UV ord) +isPUNCT(ord) + UV ord CODE: RETVAL = isPUNCT(ord); OUTPUT: RETVAL bool -test_isPUNCT_A(UV ord) +isPUNCT_A(ord) + UV ord CODE: RETVAL = isPUNCT_A(ord); OUTPUT: RETVAL bool -test_isXDIGIT(UV ord) +isXDIGIT(ord) + UV ord CODE: RETVAL = isXDIGIT(ord); OUTPUT: RETVAL bool -test_isXDIGIT_A(UV ord) +isXDIGIT_A(ord) + UV ord CODE: RETVAL = isXDIGIT_A(ord); OUTPUT: RETVAL bool -test_isPSXSPC(UV ord) +isPSXSPC(ord) + UV ord CODE: RETVAL = isPSXSPC(ord); OUTPUT: RETVAL bool -test_isPSXSPC_A(UV ord) +isPSXSPC_A(ord) + UV ord CODE: RETVAL = isPSXSPC_A(ord); OUTPUT: RETVAL -=tests plan => 126 - -use vars qw($my_sv @my_av %my_hv); +bool +isALPHANUMERIC_L1(ord) + UV ord + CODE: + RETVAL = isALPHANUMERIC_L1(ord); + OUTPUT: + RETVAL -ok(&Devel::PPPort::boolSV(1)); -ok(!&Devel::PPPort::boolSV(0)); +bool +isALNUMC_L1(ord) + UV ord + CODE: + RETVAL = isALNUMC_L1(ord); + OUTPUT: + RETVAL -$_ = "Fred"; -ok(&Devel::PPPort::DEFSV(), "Fred"); -ok(&Devel::PPPort::UNDERBAR(), "Fred"); +bool +isDIGIT_L1(ord) + UV ord + CODE: + RETVAL = isDIGIT_L1(ord); + OUTPUT: + RETVAL -if ("$]" >= 5.009002 && "$]" < 5.023 && "$]" < 5.023004) { - eval q{ - no warnings "deprecated"; - no if $^V > v5.17.9, warnings => "experimental::lexical_topic"; - my $_ = "Tony"; - ok(&Devel::PPPort::DEFSV(), "Fred"); - ok(&Devel::PPPort::UNDERBAR(), "Tony"); - }; -} -else { - ok(1); - ok(1); -} +bool +isOCTAL_L1(ord) + UV ord + CODE: + RETVAL = isOCTAL_L1(ord); + OUTPUT: + RETVAL -my @r = &Devel::PPPort::DEFSV_modify(); +bool +isIDFIRST_L1(ord) + UV ord + CODE: + RETVAL = isIDFIRST_L1(ord); + OUTPUT: + RETVAL -ok(@r == 3); -ok($r[0], 'Fred'); -ok($r[1], 'DEFSV'); -ok($r[2], 'Fred'); +bool +isIDCONT_L1(ord) + UV ord + CODE: + RETVAL = isIDCONT_L1(ord); + OUTPUT: + RETVAL -ok(&Devel::PPPort::DEFSV(), "Fred"); +bool +isSPACE_L1(ord) + UV ord + CODE: + RETVAL = isSPACE_L1(ord); + OUTPUT: + RETVAL -eval { 1 }; -ok(!&Devel::PPPort::ERRSV()); -eval { cannot_call_this_one() }; -ok(&Devel::PPPort::ERRSV()); +bool +isASCII_L1(ord) + UV ord + CODE: + RETVAL = isASCII_L1(ord); + OUTPUT: + RETVAL -ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0)); -ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0)); -ok(&Devel::PPPort::gv_stashpvn('does::not::exist', 1)); +bool +isCNTRL_L1(ord) + UV ord + CODE: + RETVAL = isCNTRL_L1(ord); + OUTPUT: + RETVAL -$my_sv = 1; -ok(&Devel::PPPort::get_sv('my_sv', 0)); -ok(!&Devel::PPPort::get_sv('not_my_sv', 0)); -ok(&Devel::PPPort::get_sv('not_my_sv', 1)); +bool +isPRINT_L1(ord) + UV ord + CODE: + RETVAL = isPRINT_L1(ord); + OUTPUT: + RETVAL -@my_av = (1); -ok(&Devel::PPPort::get_av('my_av', 0)); -ok(!&Devel::PPPort::get_av('not_my_av', 0)); -ok(&Devel::PPPort::get_av('not_my_av', 1)); +bool +isGRAPH_L1(ord) + UV ord + CODE: + RETVAL = isGRAPH_L1(ord); + OUTPUT: + RETVAL -%my_hv = (a=>1); -ok(&Devel::PPPort::get_hv('my_hv', 0)); -ok(!&Devel::PPPort::get_hv('not_my_hv', 0)); -ok(&Devel::PPPort::get_hv('not_my_hv', 1)); +bool +isPUNCT_L1(ord) + UV ord + CODE: + RETVAL = isPUNCT_L1(ord); + OUTPUT: + RETVAL -sub my_cv { 1 }; -ok(&Devel::PPPort::get_cv('my_cv', 0)); -ok(!&Devel::PPPort::get_cv('not_my_cv', 0)); -ok(&Devel::PPPort::get_cv('not_my_cv', 1)); +bool +isXDIGIT_L1(ord) + UV ord + CODE: + RETVAL = isXDIGIT_L1(ord); + OUTPUT: + RETVAL -ok(Devel::PPPort::dXSTARG(42), 43); -ok(Devel::PPPort::dAXMARK(4711), 4710); +bool +isPSXSPC_L1(ord) + UV ord + CODE: + RETVAL = isPSXSPC_L1(ord); + OUTPUT: + RETVAL -ok(Devel::PPPort::prepush(), 42); +bool +isASCII_uvchr(ord) + UV ord + CODE: + RETVAL = isASCII_uvchr(ord); + OUTPUT: + RETVAL -ok(join(':', Devel::PPPort::xsreturn(0)), 'test1'); -ok(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2'); +bool +isASCII_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + PERL_UNUSED_ARG(offset); + RETVAL = isASCII_utf8_safe(s, s + 1 + offset); + OUTPUT: + RETVAL -ok(Devel::PPPort::PERL_ABS(42), 42); -ok(Devel::PPPort::PERL_ABS(-13), 13); +#if { VERSION >= 5.006 } -ok(Devel::PPPort::SVf(42), "$]" >= 5.004 ? '[42]' : '42'); -ok(Devel::PPPort::SVf('abc'), "$]" >= 5.004 ? '[abc]' : 'abc'); +bool +isBLANK_uvchr(ord) + UV ord + CODE: + RETVAL = isBLANK_uvchr(ord); + OUTPUT: + RETVAL -ok(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo"); +bool +isALPHA_uvchr(ord) + UV ord + CODE: + RETVAL = isALPHA_uvchr(ord); + OUTPUT: + RETVAL -ok(&Devel::PPPort::ptrtests(), 63); +bool +isALPHANUMERIC_uvchr(ord) + UV ord + CODE: + RETVAL = isALPHANUMERIC_uvchr(ord); + OUTPUT: + RETVAL -ok(&Devel::PPPort::OpSIBLING_tests(), 0); +bool +isCNTRL_uvchr(ord) + UV ord + CODE: + RETVAL = isCNTRL_uvchr(ord); + OUTPUT: + RETVAL -if ("$]" >= 5.009000) { - eval q{ - ok(&Devel::PPPort::check_HeUTF8("hello"), "norm"); - ok(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8"); - }; -} else { - ok(1, 1); - ok(1, 1); -} +bool +isDIGIT_uvchr(ord) + UV ord + CODE: + RETVAL = isDIGIT_uvchr(ord); + OUTPUT: + RETVAL -@r = &Devel::PPPort::check_c_array(); -ok($r[0], 4); -ok($r[1], "13"); +bool +isIDFIRST_uvchr(ord) + UV ord + CODE: + RETVAL = isIDFIRST_uvchr(ord); + OUTPUT: + RETVAL -ok(!Devel::PPPort::SvRXOK("")); -ok(!Devel::PPPort::SvRXOK(bless [], "Regexp")); +bool +isIDCONT_uvchr(ord) + UV ord + CODE: + RETVAL = isIDCONT_uvchr(ord); + OUTPUT: + RETVAL -if ("$]" < 5.005) { - skip 'no qr// objects in this perl', 0; - skip 'no qr// objects in this perl', 0; -} else { - my $qr = eval 'qr/./'; - ok(Devel::PPPort::SvRXOK($qr)); - ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise")); -} +bool +isGRAPH_uvchr(ord) + UV ord + CODE: + RETVAL = isGRAPH_uvchr(ord); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isBLANK(ord(" "))); -ok(! Devel::PPPort::test_isBLANK(ord("\n"))); +bool +isLOWER_uvchr(ord) + UV ord + CODE: + RETVAL = isLOWER_uvchr(ord); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isBLANK_A(ord("\t"))); -ok(! Devel::PPPort::test_isBLANK_A(ord("\r"))); +bool +isPRINT_uvchr(ord) + UV ord + CODE: + RETVAL = isPRINT_uvchr(ord); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isUPPER(ord("A"))); -ok(! Devel::PPPort::test_isUPPER(ord("a"))); +bool +isPSXSPC_uvchr(ord) + UV ord + CODE: + RETVAL = isPSXSPC_uvchr(ord); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isUPPER_A(ord("Z"))); +bool +isPUNCT_uvchr(ord) + UV ord + CODE: + RETVAL = isPUNCT_uvchr(ord); + OUTPUT: + RETVAL -# One of these two is uppercase in EBCDIC; the other in Latin1, but neither are -# ASCII uppercase. -ok(! Devel::PPPort::test_isUPPER_A(ord(0xDC))); -ok(! Devel::PPPort::test_isUPPER_A(ord(0xFC))); +bool +isSPACE_uvchr(ord) + UV ord + CODE: + RETVAL = isSPACE_uvchr(ord); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isLOWER(ord("b"))); -ok(! Devel::PPPort::test_isLOWER(ord("B"))); +bool +isUPPER_uvchr(ord) + UV ord + CODE: + RETVAL = isUPPER_uvchr(ord); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isLOWER_A(ord("y"))); +bool +isWORDCHAR_uvchr(ord) + UV ord + CODE: + RETVAL = isWORDCHAR_uvchr(ord); + OUTPUT: + RETVAL -# One of these two is lowercase in EBCDIC; the other in Latin1, but neither are -# ASCII lowercase. -ok(! Devel::PPPort::test_isLOWER_A(ord(0xDC))); -ok(! Devel::PPPort::test_isLOWER_A(ord(0xFC))); +bool +isXDIGIT_uvchr(ord) + UV ord + CODE: + RETVAL = isXDIGIT_uvchr(ord); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isALPHA(ord("C"))); -ok(! Devel::PPPort::test_isALPHA(ord("1"))); +bool +isALPHA_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isALPHA_A(ord("x"))); -ok(! Devel::PPPort::test_isALPHA_A(0xDC)); +bool +isALPHANUMERIC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isWORDCHAR(ord("_"))); -ok(! Devel::PPPort::test_isWORDCHAR(ord("@"))); +bool +isBLANK_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isWORDCHAR_A(ord("2"))); -ok(! Devel::PPPort::test_isWORDCHAR_A(0xFC)); +bool +isCNTRL_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isALPHANUMERIC(ord("4"))); -ok(! Devel::PPPort::test_isALPHANUMERIC(ord("_"))); +bool +isDIGIT_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isALPHANUMERIC_A(ord("l"))); -ok(! Devel::PPPort::test_isALPHANUMERIC_A(0xDC)); +bool +isGRAPH_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isALNUM(ord("c"))); -ok(! Devel::PPPort::test_isALNUM(ord("}"))); +bool +isIDCONT_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isALNUM_A(ord("5"))); -ok(! Devel::PPPort::test_isALNUM_A(0xFC)); +bool +isIDFIRST_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isDIGIT(ord("6"))); -ok(! Devel::PPPort::test_isDIGIT(ord("_"))); +bool +isLOWER_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isDIGIT_A(ord("7"))); -ok(! Devel::PPPort::test_isDIGIT_A(0xDC)); +bool +isPRINT_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isOCTAL(ord("7"))); -ok(! Devel::PPPort::test_isOCTAL(ord("8"))); +bool +isPSXSPC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isOCTAL_A(ord("0"))); -ok(! Devel::PPPort::test_isOCTAL_A(ord("9"))); +bool +isPUNCT_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isIDFIRST(ord("D"))); -ok(! Devel::PPPort::test_isIDFIRST(ord("1"))); +bool +isSPACE_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isIDFIRST_A(ord("_"))); -ok(! Devel::PPPort::test_isIDFIRST_A(0xFC)); +bool +isUPPER_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isIDCONT(ord("e"))); -ok(! Devel::PPPort::test_isIDCONT(ord("@"))); +bool +isWORDCHAR_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isIDCONT_A(ord("2"))); -ok(! Devel::PPPort::test_isIDCONT_A(0xDC)); +bool +isXDIGIT_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isSPACE(ord(" "))); -ok(! Devel::PPPort::test_isSPACE(ord("_"))); +bool +isALPHA_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isALPHA_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isSPACE_A(ord("\cK"))); -ok(! Devel::PPPort::test_isSPACE_A(ord("F"))); +bool +isALPHANUMERIC_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isALPHANUMERIC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -# This stresses the edge for ASCII machines, but happens to work on EBCDIC as -# well -ok( Devel::PPPort::test_isASCII(0x7F)); -ok(! Devel::PPPort::test_isASCII(0x80)); +bool +isASCII_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + PERL_UNUSED_ARG(offset); + RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isASCII_A(ord("9"))); +bool +isBLANK_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isBLANK_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -# B6 is the PARAGRAPH SIGN in ASCII and EBCDIC -ok(! Devel::PPPort::test_isASCII_A(0xB6)); +bool +isCNTRL_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isCNTRL_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isCNTRL(ord("\e"))); -ok(! Devel::PPPort::test_isCNTRL(ord(" "))); +bool +isDIGIT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isCNTRL_A(ord("\a"))); -ok(! Devel::PPPort::test_isCNTRL_A(0xB6)); +bool +isGRAPH_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isGRAPH_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isPRINT(ord(" "))); -ok(! Devel::PPPort::test_isPRINT(ord("\n"))); +bool +isIDCONT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isIDCONT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isPRINT_A(ord("G"))); -ok(! Devel::PPPort::test_isPRINT_A(0xB6)); +bool +isIDFIRST_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isIDFIRST_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isGRAPH(ord("h"))); -ok(! Devel::PPPort::test_isGRAPH(ord(" "))); +bool +isLOWER_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isLOWER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isGRAPH_A(ord("i"))); -ok(! Devel::PPPort::test_isGRAPH_A(0xB6)); +bool +isPRINT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPRINT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isPUNCT(ord("#"))); -ok(! Devel::PPPort::test_isPUNCT(ord(" "))); +bool +isPSXSPC_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPSXSPC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isPUNCT_A(ord("*"))); -ok(! Devel::PPPort::test_isPUNCT_A(0xB6)); +bool +isPUNCT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPUNCT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isXDIGIT(ord("A"))); -ok(! Devel::PPPort::test_isXDIGIT(ord("_"))); +bool +isSPACE_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isSPACE_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isXDIGIT_A(ord("9"))); -ok(! Devel::PPPort::test_isXDIGIT_A(0xDC)); +bool +isUPPER_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isUPPER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isWORDCHAR_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isWORDCHAR_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isXDIGIT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isXDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +AV * +toLOWER_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toTITLE_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toUPPER_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toFOLD_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toLOWER_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toLOWER_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toTITLE_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toTITLE_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL -ok( Devel::PPPort::test_isPSXSPC(ord(" "))); -ok(! Devel::PPPort::test_isPSXSPC(ord("k"))); +AV * +toUPPER_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toUPPER_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toFOLD_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toFOLD_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +#endif + +UV +LATIN1_TO_NATIVE(cp) + UV cp + CODE: + if (cp > 255) RETVAL= cp; + else RETVAL= LATIN1_TO_NATIVE(cp); + OUTPUT: + RETVAL + +UV +NATIVE_TO_LATIN1(cp) + UV cp + CODE: + RETVAL= NATIVE_TO_LATIN1(cp); + OUTPUT: + RETVAL + +STRLEN +av_tindex(av) + SV *av + CODE: + RETVAL = av_tindex((AV*)SvRV(av)); + OUTPUT: + RETVAL + +STRLEN +av_top_index(av) + SV *av + CODE: + RETVAL = av_top_index((AV*)SvRV(av)); + OUTPUT: + RETVAL + +STRLEN +av_count(av) + SV *av + CODE: + RETVAL = av_count((AV*)SvRV(av)); + OUTPUT: + RETVAL + +=tests plan => 26827 + +use vars qw($my_sv @my_av %my_hv); + +ok(&Devel::PPPort::boolSV(1), "Verify boolSV(1) is true"); +ok(!&Devel::PPPort::boolSV(0), "Verify boolSV(0) is false"); + +$_ = "Fred"; +is(&Devel::PPPort::DEFSV(), "Fred", '$_ is FRED; Verify DEFSV is FRED'); +is(&Devel::PPPort::UNDERBAR(), "Fred", 'And verify UNDERBAR is FRED'); + +if (ivers($]) >= ivers(5.9.2) && ivers($]) < ivers(5.23)) { + eval q{ + no warnings "deprecated"; + no if $^V >= v5.17.9, warnings => "experimental::lexical_topic"; + my $_ = "Tony"; + is(&Devel::PPPort::DEFSV(), "Fred", 'lexical_topic eval: $_ is Tony; Verify DEFSV is Fred'); + is(&Devel::PPPort::UNDERBAR(), "Tony", 'And verify UNDERBAR is Tony'); + }; + die __FILE__ . __LINE__ . ": $@" if $@; +} +else { + skip("perl version outside testing range of lexical_topic", 2); +} + +my @r = &Devel::PPPort::DEFSV_modify(); + +ok(@r == 3, "Verify got 3 elements"); +is($r[0], 'Fred'); +is($r[1], 'DEFSV'); +is($r[2], 'Fred'); + +is(&Devel::PPPort::DEFSV(), "Fred"); + +eval { 1 }; +ok(!&Devel::PPPort::ERRSV(), "Verify ERRSV on true is false"); +eval { cannot_call_this_one() }; +ok(&Devel::PPPort::ERRSV(), "Verify ERRSV on false is true"); + +ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0)); +ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0)); +ok(&Devel::PPPort::gv_stashpvn('does::not::exist', 1)); + +$my_sv = 1; +ok(&Devel::PPPort::get_sv('my_sv', 0)); +ok(!&Devel::PPPort::get_sv('not_my_sv', 0)); +ok(&Devel::PPPort::get_sv('not_my_sv', 1)); + +@my_av = (1); +ok(&Devel::PPPort::get_av('my_av', 0)); +ok(!&Devel::PPPort::get_av('not_my_av', 0)); +ok(&Devel::PPPort::get_av('not_my_av', 1)); + +%my_hv = (a=>1); +ok(&Devel::PPPort::get_hv('my_hv', 0)); +ok(!&Devel::PPPort::get_hv('not_my_hv', 0)); +ok(&Devel::PPPort::get_hv('not_my_hv', 1)); + +sub my_cv { 1 }; +ok(&Devel::PPPort::get_cv('my_cv', 0)); +ok(!&Devel::PPPort::get_cv('not_my_cv', 0)); +ok(&Devel::PPPort::get_cv('not_my_cv', 1)); + +is(Devel::PPPort::dXSTARG(42), 43); +is(Devel::PPPort::dAXMARK(4711), 4710); + +is(Devel::PPPort::prepush(), 42); + +is(join(':', Devel::PPPort::xsreturn(0)), 'test1'); +is(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2'); + +is(Devel::PPPort::PERL_ABS(42), 42, "Verify PERL_ABS(42) is 42"); +is(Devel::PPPort::PERL_ABS(-13), 13, "Verify PERL_ABS(-13) is 13"); + +is(Devel::PPPort::SVf(42), ivers($]) >= ivers(5.4) ? '[42]' : '42'); +is(Devel::PPPort::SVf('abc'), ivers($]) >= ivers(5.4) ? '[abc]' : 'abc'); + +is(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo"); + +is(&Devel::PPPort::ptrtests(), 63); + +is(&Devel::PPPort::OpSIBLING_tests(), 0); + +if (ivers($]) >= ivers(5.9)) { + eval q{ + is(&Devel::PPPort::check_HeUTF8("hello"), "norm"); + is(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8"); + }; +} else { + skip("Too early perl version", 2); +} + +@r = &Devel::PPPort::check_c_array(); +is($r[0], 4); +is($r[1], "13"); + +ok(!Devel::PPPort::SvRXOK("")); +ok(!Devel::PPPort::SvRXOK(bless [], "Regexp")); + +if (ivers($]) < ivers(5.5)) { + skip 'no qr// objects in this perl', 2; +} else { + my $qr = eval 'qr/./'; + ok(Devel::PPPort::SvRXOK($qr), "SVRXOK(qr) is true"); + ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise")); +} + +ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6); +ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1); +ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41); +ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30); + +ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6, "Verify LATIN1_TO_NATIVE(0xB6) is 0xB6"); +if (ord("A") == 65) { + ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41); + ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30); +} +else { + ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1); + ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0); +} + +ok( Devel::PPPort::isALNUMC_L1(ord("5"))); +ok( Devel::PPPort::isALNUMC_L1(0xFC)); +ok(! Devel::PPPort::isALNUMC_L1(0xB6)); + +ok( Devel::PPPort::isOCTAL(ord("7")), "Verify '7' is OCTAL"); +ok(! Devel::PPPort::isOCTAL(ord("8")), "Verify '8' isn't OCTAL"); + +ok( Devel::PPPort::isOCTAL_A(ord("0")), "Verify '0' is OCTAL_A"); +ok(! Devel::PPPort::isOCTAL_A(ord("9")), "Verify '9' isn't OCTAL_A"); + +ok( Devel::PPPort::isOCTAL_L1(ord("2")), "Verify '2' is OCTAL_L1"); +ok(! Devel::PPPort::isOCTAL_L1(ord("8")), "Verify '8' isn't OCTAL_L1"); + +my $way_too_early_msg = 'UTF-8 not implemented on this perl'; + +# For the other properties, we test every code point from 0.255, and a +# smattering of higher ones. First populate a hash with keys like '65:ALPHA' +# to indicate that the code point there is alphabetic +my $i; +my %types; +for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6, + 0xF8..0x101) +{ + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:ALPHA"} = 1; + $types{"$native:ALPHANUMERIC"} = 1; + $types{"$native:IDFIRST"} = 1; + $types{"$native:IDCONT"} = 1; + $types{"$native:PRINT"} = 1; + $types{"$native:WORDCHAR"} = 1; +} +for $i (0x30..0x39, 0x660, 0xFF19) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:ALPHANUMERIC"} = 1; + $types{"$native:DIGIT"} = 1; + $types{"$native:IDCONT"} = 1; + $types{"$native:WORDCHAR"} = 1; + $types{"$native:GRAPH"} = 1; + $types{"$native:PRINT"} = 1; + $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19); +} + +for $i (0..0x7F) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:ASCII"} = 1; +} +for $i (0..0x1f, 0x7F..0x9F) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:CNTRL"} = 1; +} +for $i (0x21..0x7E, 0xA1..0x101, 0x660) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:GRAPH"} = 1; + $types{"$native:PRINT"} = 1; +} +for $i (0x09, 0x20, 0xA0) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:BLANK"} = 1; + $types{"$native:SPACE"} = 1; + $types{"$native:PSXSPC"} = 1; + $types{"$native:PRINT"} = 1 if $i > 0x09; +} +for $i (0x09..0x0D, 0x85, 0x2029) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:SPACE"} = 1; + $types{"$native:PSXSPC"} = 1; +} +for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:UPPER"} = 1; + $types{"$native:XDIGIT"} = 1 if $i < 0x47; +} +for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:LOWER"} = 1; + $types{"$native:XDIGIT"} = 1 if $i < 0x67; +} +for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB, + 0xB7, 0xBB, 0xBF, 0x5BE) +{ + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + $types{"$native:PUNCT"} = 1; + $types{"$native:GRAPH"} = 1; + $types{"$native:PRINT"} = 1; +} + +$i = ord('_'); +$types{"$i:WORDCHAR"} = 1; +$types{"$i:IDFIRST"} = 1; +$types{"$i:IDCONT"} = 1; + +# Now find all the unique code points included above. +my %code_points_to_test; +my $key; +for $key (keys %types) { + $key =~ s/:.*//; + $code_points_to_test{$key} = 1; +} + +# And test each one +for $i (sort { $a <=> $b } keys %code_points_to_test) { + my $native = Devel::PPPort::LATIN1_TO_NATIVE($i); + my $hex = sprintf("0x%02X", $native); + + # And for each code point test each of the classes + my $class; + for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT + IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR + XDIGIT)) + { + if ($i < 256) { # For the ones that can fit in a byte, test each of + # three macros. + my $suffix; + for $suffix ("", "_A", "_L1", "_uvchr") { + my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/) + ? 0 # Fail on non-ASCII unless unicode + : ($types{"$native:$class"} || 0); + if (ivers($]) < ivers(5.6) && $suffix eq '_uvchr') { + skip("No UTF-8 on this perl", 1); + next; + } + + my $eval_string = "Devel::PPPort::is${class}$suffix($hex)"; + local $SIG{__WARN__} = sub {}; + my $is = eval $eval_string || 0; + die "eval 'For $i: $eval_string' gave $@" if $@; + is($is, $should_be, "'$eval_string'"); + } + } + + # For all code points, test the '_utf8' macros + my $sub_fcn; + for $sub_fcn ("", "_LC") { + my $skip = ""; + if (ivers($]) < ivers(5.6)) { + $skip = $way_too_early_msg; + } + elsif (ivers($]) < ivers(5.7) && $native > 255) { + $skip = "Perls earlier than 5.7 give wrong answers for above Latin1 code points"; + } + elsif (ivers($]) <= ivers(5.11.3) && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) { + $skip = "Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH"; + } + elsif ($sub_fcn eq '_LC' && $i < 256) { + $skip = "Testing of code points whose results depend on locale is skipped "; + } + my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe"; + my $utf8; + + if ($skip) { + skip $skip, 1; + } + else { + $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($native); + my $should_be = $types{"$native:$class"} || 0; + my $eval_string = "$fcn(\"$utf8\", 0)"; + local $SIG{__WARN__} = sub {}; + my $is = eval $eval_string || 0; + die "eval 'For $i, $eval_string' gave $@" if $@; + is($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string)); + } + + # And for the high code points, test that a too short malformation (the + # -1) causes it to fail + if ($i > 255) { + if ($skip) { + skip $skip, 1; + } + elsif (ivers($]) >= ivers(5.25.9)) { + skip("Prints an annoying error message that khw doesn't know how to easily suppress", 1); + } + else { + my $eval_string = "$fcn(\"$utf8\", -1)"; + local $SIG{__WARN__} = sub {}; + my $is = eval "$eval_string" || 0; + die "eval '$eval_string' gave $@" if $@; + is($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string)); + } + } + } + } +} + +my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0), + Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ], + [ 0x100, 0x101 ], + ], + 'FOLD' => [ [ ord('C'), ord('c') ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0), + Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ], + [ 0x104, 0x105 ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), + 'ss' ], + ], + 'UPPER' => [ [ ord('a'), ord('A'), ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xE0), + Devel::PPPort::LATIN1_TO_NATIVE(0xC0) ], + [ 0x101, 0x100 ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), + 'SS' ], + ], + 'TITLE' => [ [ ord('c'), ord('C'), ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xE2), + Devel::PPPort::LATIN1_TO_NATIVE(0xC2) ], + [ 0x103, 0x102 ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), + 'Ss' ], + ], + ); + +my $name; +for $name (keys %case_changing) { + my @code_points_to_test = @{$case_changing{$name}}; + my $unchanged; + for $unchanged (@code_points_to_test) { + my @pair = @$unchanged; + my $original = $pair[0]; + my $changed = $pair[1]; + my $utf8_changed = $changed; + my $is_cp = $utf8_changed =~ /^\d+$/; + my $should_be_bytes; + if (ivers($]) >= ivers(5.6)) { + if ($is_cp) { + $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed); + $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0); + } + else { + die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /[[:^ascii:]]/'; + $should_be_bytes = length $utf8_changed; + } + } + + my $fcn = "to${name}_uvchr"; + my $skip = ""; + + if (ivers($]) < ivers(5.6)) { + $skip = $way_too_early_msg; + } + elsif (! $is_cp) { + $skip = "Can't do uvchr on a multi-char string"; + } + if ($skip) { + skip $skip, 4; + } + else { + if ($is_cp) { + $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed); + $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0); + } + else { + my $non_ascii_re = (ivers($]) >= ivers(5.6)) ? '[[:^ascii:]]' : '[^\x00-\x7F]'; + die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /$non_ascii_re/'; + $should_be_bytes = length $utf8_changed; + } + + my $ret = eval "Devel::PPPort::$fcn($original)"; + my $fail = $@; # Have to save $@, as it gets destroyed + is ($fail, "", "$fcn($original) didn't fail"); + my $first = (ivers($]) != ivers(5.6)) + ? substr($utf8_changed, 0, 1) + : $utf8_changed, 0, 1; + is($ret->[0], ord $first, + "ord of $fcn($original) is $changed"); + is($ret->[1], $utf8_changed, + "UTF-8 of of $fcn($original) is correct"); + is($ret->[2], $should_be_bytes, + "Length of $fcn($original) is $should_be_bytes"); + } + + my $truncate; + for $truncate (0..2) { + my $skip; + if (ivers($]) < ivers(5.6)) { + $skip = $way_too_early_msg; + } + elsif (! $is_cp && ivers($]) < ivers(5.7.3)) { + $skip = "Multi-character case change not implemented until 5.7.3"; + } + elsif ($truncate == 2 && ivers($]) > ivers(5.25.8)) { + $skip = "Zero length inputs cause assertion failure; test dies in modern perls"; + } + elsif ($truncate > 0 && length $changed > 1) { + $skip = "Don't test shortened multi-char case changes"; + } + elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) { + $skip = "Don't try to test shortened single bytes"; + } + if ($skip) { + skip $skip, 4; + } + else { + my $fcn = "to${name}_utf8_safe"; + my $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($original); + my $real_truncate = ($truncate < 2) + ? $truncate : $should_be_bytes; + my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)"; + my $ret = eval "no warnings; $eval_string" || 0; + my $fail = $@; # Have to save $@, as it gets destroyed + if ($truncate == 0) { + is ($fail, "", "Didn't fail on full length input"); + my $first = (ivers($]) != ivers(5.6)) + ? substr($utf8_changed, 0, 1) + : $utf8_changed, 0, 1; + is($ret->[0], ord $first, + "ord of $fcn($original) is $changed"); + is($ret->[1], $utf8_changed, + "UTF-8 of of $fcn($original) is correct"); + is($ret->[2], $should_be_bytes, + "Length of $fcn($original) is $should_be_bytes"); + } + else { + is ($fail, eval 'qr/Malformed UTF-8 character/', + "Gave appropriate error for short char: $original"); + skip("Expected failure means remaining tests for" + . " this aren't relevant", 3); + } + } + } + } +} -ok( Devel::PPPort::test_isPSXSPC_A(ord("\cK"))); -ok(! Devel::PPPort::test_isPSXSPC_A(0xFC)); +is(&Devel::PPPort::av_top_index([1,2,3]), 2); +is(&Devel::PPPort::av_tindex([1,2,3,4]), 3); +is(&Devel::PPPort::av_count([1,2,3,4]), 4);