X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/6ec3ee8fe7d3c1aae780f1e30076b7a6d8d19da4..91497b7f1eadb85318d1721ab68c3a72fedb43c6:/dist/Devel-PPPort/parts/inc/misc diff --git a/dist/Devel-PPPort/parts/inc/misc b/dist/Devel-PPPort/parts/inc/misc index e53e0aa..ffed764 100644 --- a/dist/Devel-PPPort/parts/inc/misc +++ b/dist/Devel-PPPort/parts/inc/misc @@ -12,6 +12,7 @@ =provides __UNDEFINED__ +__REDEFINE__ END_EXTERN_C EXTERN_C INT2PTR @@ -19,6 +20,7 @@ MUTABLE_PTR NVTYPE PERLIO_FUNCS_CAST PERLIO_FUNCS_DECL +PERL_STATIC_INLINE PERL_UNUSED_ARG PERL_UNUSED_CONTEXT PERL_UNUSED_DECL @@ -33,9 +35,17 @@ STMT_START SvRX WIDEST_UTYPE XSRETURN +NOT_REACHED +ASSUME =implementation +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +__UNDEFINED__ PERL_STATIC_INLINE static inline +#else +__UNDEFINED__ PERL_STATIC_INLINE static +#endif + __UNDEFINED__ cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0) __UNDEFINED__ OpHAS_SIBLING(o) (cBOOL((o)->op_sibling)) __UNDEFINED__ OpSIBLING(o) (0 + (o)->op_sibling) @@ -50,15 +60,68 @@ __UNDEFINED__ __ASSERT_(statement) assert(statement), __UNDEFINED__ __ASSERT_(statement) #endif -/* These could become provided when they become part of the public API */ -__UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n) \ +__UNDEF_NOT_PROVIDED__ __has_builtin(x) 0 + +#if __has_builtin(__builtin_unreachable) +# define D_PPP_HAS_BUILTIN_UNREACHABLE +#elif (defined(__GNUC__) && ( __GNUC__ > 4 \ + || __GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +# define D_PPP_HAS_BUILTIN_UNREACHABLE +#endif + +#ifndef ASSUME +# ifdef DEBUGGING +# define ASSUME(x) assert(x) +# elif defined(_MSC_VER) +# define ASSUME(x) __assume(x) +# elif defined(__ARMCC_VERSION) +# define ASSUME(x) __promise(x) +# elif defined(D_PPP_HAS_BUILTIN_UNREACHABLE) +# define ASSUME(x) ((x) ? (void) 0 : __builtin_unreachable()) +# else +# define ASSUME(x) assert(x) +# endif +#endif + +#ifndef NOT_REACHED +# ifdef D_PPP_HAS_BUILTIN_UNREACHABLE +# define NOT_REACHED \ + STMT_START { \ + ASSUME(!"UNREACHABLE"); __builtin_unreachable(); \ + } STMT_END +# elif ! defined(__GNUC__) && (defined(__sun) || defined(__hpux)) +# define NOT_REACHED +# else +# define NOT_REACHED ASSUME(!"UNREACHABLE") +# endif +#endif + +#ifndef WIDEST_UTYPE +# ifdef QUADKIND +# ifdef U64TYPE +# define WIDEST_UTYPE U64TYPE +# else +# define WIDEST_UTYPE unsigned Quad_t +# endif +# else +# define WIDEST_UTYPE U32 +# endif +#endif + +/* These could become provided if/when they become part of the public API */ +__UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n) \ (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0)))) -__UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \ +__UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \ ( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \ - : (sizeof(c) == sizeof(U16)) ? withinCOUNT(((U16) (c)), (l), ((u) - (l))) \ : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \ : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l))))) +/* The '| 0' part ensures a compiler error if c is not integer (like e.g., a + * pointer) */ +#undef FITS_IN_8_BITS /* handy.h version uses a core-only constant */ +__UNDEF_NOT_PROVIDED__ FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \ + || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF)) + /* Create the macro for "is'macro'_utf8_safe(s, e)". For code points below * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code * point. That is so that it can automatically get the bug fixes done in this @@ -78,6 +141,48 @@ __UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \ (s)[1]))) \ : is ## macro ## _utf8(s)) +/* Create the macro for "is'macro'_LC_utf8_safe(s, e)". For code points below + * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code + * point. That is so that it can automatically get the bug fixes done in this + * file. */ +#define D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, macro) \ + (((e) - (s)) <= 0 \ + ? 0 \ + : UTF8_IS_INVARIANT((s)[0]) \ + ? is ## macro ## _LC((s)[0]) \ + : (((e) - (s)) < UTF8SKIP(s)) \ + ? 0 \ + : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ + /* The cast in the line below is only to silence warnings */ \ + ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ + UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ + & UTF_START_MASK(2), \ + (s)[1]))) \ + : is ## macro ## _utf8(s)) + +/* A few of the early functions are broken. For these and the non-LC case, + * machine generated code is substituted. But that code doesn't work for + * locales. This is just like the above macro, but at the end, we call the + * macro we've generated for the above 255 case, which is correct since locale + * isn't involved. This will generate extra code to handle the 0-255 inputs, + * but hopefully it will be optimized out by the C compiler. But just in case + * it isn't, this macro is only used on the few versions that are broken */ + +#define D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, macro) \ + (((e) - (s)) <= 0 \ + ? 0 \ + : UTF8_IS_INVARIANT((s)[0]) \ + ? is ## macro ## _LC((s)[0]) \ + : (((e) - (s)) < UTF8SKIP(s)) \ + ? 0 \ + : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \ + /* The cast in the line below is only to silence warnings */ \ + ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \ + UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \ + & UTF_START_MASK(2), \ + (s)[1]))) \ + : is ## macro ## _utf8_safe(s, e)) + __UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL) __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv)) @@ -123,7 +228,10 @@ __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv)) #endif __UNDEFINED__ NOOP /*EMPTY*/(void)0 -__UNDEFINED__ dNOOP extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL + +#if { VERSION < 5.6.1 } && { VERSION < 5.27.7 } +__REDEFINE__ dNOOP struct Perl___notused_struct +#endif #ifndef NVTYPE # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) @@ -162,17 +270,14 @@ __UNDEFINED__ PTR2IV(p) INT2PTR(IV,p) __UNDEFINED__ PTR2UV(p) INT2PTR(UV,p) __UNDEFINED__ PTR2NV(p) NUM2PTR(NV,p) -#undef START_EXTERN_C -#undef END_EXTERN_C -#undef EXTERN_C #ifdef __cplusplus -# define START_EXTERN_C extern "C" { -# define END_EXTERN_C } -# define EXTERN_C extern "C" +__REDEFINE__ START_EXTERN_C extern "C" { +__REDEFINE__ END_EXTERN_C } +__REDEFINE__ EXTERN_C extern "C" #else -# define START_EXTERN_C -# define END_EXTERN_C -# define EXTERN_C extern +__REDEFINE__ START_EXTERN_C +__REDEFINE__ END_EXTERN_C +__REDEFINE__ EXTERN_C extern #endif #if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC) @@ -181,25 +286,20 @@ __UNDEF_NOT_PROVIDED__ PERL_GCC_BRACE_GROUPS_FORBIDDEN # endif #endif -#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) && !defined(__cplusplus) +#if ! defined(__GNUC__) || defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) || defined(__cplusplus) +# undef PERL_USE_GCC_BRACE_GROUPS +#else # ifndef PERL_USE_GCC_BRACE_GROUPS # define PERL_USE_GCC_BRACE_GROUPS # endif #endif -#undef STMT_START -#undef STMT_END -#ifdef PERL_USE_GCC_BRACE_GROUPS -# define STMT_START (void)( /* gcc supports ``({ STATEMENTS; })'' */ -# define STMT_END ) +#if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__) +__REDEFINE__ STMT_START if (1) +__REDEFINE__ STMT_END else (void)0 #else -# if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__) -# define STMT_START if (1) -# define STMT_END else (void)0 -# else -# define STMT_START do -# define STMT_END while (0) -# endif +__REDEFINE__ STMT_START do +__REDEFINE__ STMT_END while (0) #endif __UNDEFINED__ boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no) @@ -214,6 +314,7 @@ __UNDEFINED__ AvFILLp AvFILL __UNDEFINED__ av_tindex AvFILL __UNDEFINED__ av_top_index AvFILL +__UNDEFINED__ av_count(av) (AvFILL(av)+1) __UNDEFINED__ ERRSV get_sv("@",FALSE) @@ -242,14 +343,13 @@ __UNDEFINED__ dITEMS I32 items = SP - MARK __UNDEFINED__ dXSTARG SV * targ = sv_newmortal() __UNDEFINED__ dAXMARK I32 ax = POPMARK; \ - register SV ** const mark = PL_stack_base + ax++ + SV ** const mark = PL_stack_base + ax++ __UNDEFINED__ XSprePUSH (sp = PL_stack_base + ax - 1) #if { VERSION < 5.005 } -# undef XSRETURN -# define XSRETURN(off) \ +__REDEFINE__ XSRETURN(off) \ STMT_START { \ PL_stack_sp = PL_stack_base + ax + ((off) - 1); \ return; \ @@ -300,18 +400,6 @@ typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*); #endif -#ifndef WIDEST_UTYPE -# ifdef QUADKIND -# ifdef U64TYPE -# define WIDEST_UTYPE U64TYPE -# else -# define WIDEST_UTYPE Quad_t -# endif -# else -# define WIDEST_UTYPE U32 -# endif -#endif - /* On versions without NATIVE_TO_ASCII, only ASCII is supported */ #if defined(EBCDIC) && defined(NATIVE_TO_ASCI) __UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c) @@ -349,8 +437,8 @@ __UNDEFINED__ UNI_TO_NATIVE(c) (c) #ifdef EBCDIC /* This is the first version where these macros are fully correct on EBCDIC - * platforms. Relying on * the C library functions, as earlier releases did, - * causes problems with * locales */ + * platforms. Relying on the C library functions, as earlier releases did, + * causes problems with locales */ # if { VERSION < 5.22.0 } # undef isALNUM # undef isALNUM_A @@ -475,6 +563,10 @@ __UNDEFINED__ isUPPER(c) ( (c) >= 'A' && (c) <= 'Z' # undef isUPPER_A # endif +# if { VERSION == 5.7.0 } /* this perl made space GRAPH */ +# undef isGRAPH +# endif + # if { VERSION < 5.8.0 } /* earlier perls omitted DEL */ # undef isCNTRL # endif @@ -510,14 +602,15 @@ __UNDEFINED__ isUPPER(c) ( (c) >= 'A' && (c) <= 'Z' __UNDEFINED__ isASCII(c) ((WIDEST_UTYPE) (c) <= 127) __UNDEFINED__ isCNTRL(c) ((WIDEST_UTYPE) (c) < ' ' || (c) == 127) -__UNDEFINED__ isCNTRL_L1(c) (isCNTRL(c) || ( (WIDEST_UTYPE) (c) <= 0x9F \ - && (WIDEST_UTYPE) (c) >= 0x80)) -__UNDEFINED__ isLOWER(c) ((c) >= 'a' && (c) <= 'z') -__UNDEFINED__ isUPPER(c) ((c) <= 'Z' && (c) >= 'A') +__UNDEFINED__ isCNTRL_L1(c) ( (WIDEST_UTYPE) (c) < ' ' \ + || inRANGE((c), 0x7F, 0x9F)) +__UNDEFINED__ isLOWER(c) inRANGE((c), 'a', 'z') +__UNDEFINED__ isUPPER(c) inRANGE((c), 'A', 'Z') #endif /* Below are definitions common to EBCDIC and ASCII */ __UNDEFINED__ isASCII_L1(c) isASCII(c) +__UNDEFINED__ isASCII_LC(c) isASCII(c) __UNDEFINED__ isALNUM(c) isWORDCHAR(c) __UNDEFINED__ isALNUMC(c) isALPHANUMERIC(c) __UNDEFINED__ isALNUMC_L1(c) isALPHANUMERIC_L1(c) @@ -525,11 +618,13 @@ __UNDEFINED__ isALPHA(c) (isUPPER(c) || isLOWER(c)) __UNDEFINED__ isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c)) __UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c)) __UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c)) +__UNDEFINED__ isALPHANUMERIC_LC(c) (isALPHA_LC(c) || isDIGIT_LC(c)) __UNDEFINED__ isBLANK(c) ((c) == ' ' || (c) == '\t') __UNDEFINED__ isBLANK_L1(c) ( isBLANK(c) \ - || ( (WIDEST_UTYPE) (c) < 256 \ + || ( FITS_IN_8_BITS(c) \ && NATIVE_TO_LATIN1((U8) c) == 0xA0)) -__UNDEFINED__ isDIGIT(c) ((c) <= '9' && (c) >= '0') +__UNDEFINED__ isBLANK_LC(c) isBLANK(c) +__UNDEFINED__ isDIGIT(c) inRANGE(c, '0', '9') __UNDEFINED__ isDIGIT_L1(c) isDIGIT(c) __UNDEFINED__ isGRAPH(c) (isWORDCHAR(c) || isPUNCT(c)) __UNDEFINED__ isGRAPH_L1(c) ( isPRINT_L1(c) \ @@ -537,10 +632,12 @@ __UNDEFINED__ isGRAPH_L1(c) ( isPRINT_L1(c) \ && NATIVE_TO_LATIN1((U8) c) != 0xA0) __UNDEFINED__ isIDCONT(c) isWORDCHAR(c) __UNDEFINED__ isIDCONT_L1(c) isWORDCHAR_L1(c) +__UNDEFINED__ isIDCONT_LC(c) isWORDCHAR_LC(c) __UNDEFINED__ isIDFIRST(c) (isALPHA(c) || (c) == '_') -__UNDEFINED__ isIDFIRST_L1(c) (isALPHA_L1(c) || NATIVE_TO_LATIN1(c) == '_') +__UNDEFINED__ isIDFIRST_L1(c) (isALPHA_L1(c) || (U8) (c) == '_') +__UNDEFINED__ isIDFIRST_LC(c) (isALPHA_LC(c) || (U8) (c) == '_') __UNDEFINED__ isLOWER_L1(c) ( isLOWER(c) \ - || ( (WIDEST_UTYPE) (c) < 256 \ + || ( FITS_IN_8_BITS(c) \ && ( ( NATIVE_TO_LATIN1((U8) c) >= 0xDF \ && NATIVE_TO_LATIN1((U8) c) != 0xF7) \ || NATIVE_TO_LATIN1((U8) c) == 0xAA \ @@ -549,7 +646,7 @@ __UNDEFINED__ isLOWER_L1(c) ( isLOWER(c) \ __UNDEFINED__ isOCTAL(c) (((WIDEST_UTYPE)((c)) & ~7) == '0') __UNDEFINED__ isOCTAL_L1(c) isOCTAL(c) __UNDEFINED__ isPRINT(c) (isGRAPH(c) || (c) == ' ') -__UNDEFINED__ isPRINT_L1(c) ((WIDEST_UTYPE) (c) < 256 && ! isCNTRL_L1(c)) +__UNDEFINED__ isPRINT_L1(c) (FITS_IN_8_BITS(c) && ! isCNTRL_L1(c)) __UNDEFINED__ isPSXSPC(c) isSPACE(c) __UNDEFINED__ isPSXSPC_L1(c) isSPACE_L1(c) __UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \ @@ -564,7 +661,7 @@ __UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \ || (c) == '`' || (c) == '{' || (c) == '|' \ || (c) == '}' || (c) == '~') __UNDEFINED__ isPUNCT_L1(c) ( isPUNCT(c) \ - || ( (WIDEST_UTYPE) (c) < 256 \ + || ( FITS_IN_8_BITS(c) \ && ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \ || NATIVE_TO_LATIN1((U8) c) == 0xA7 \ || NATIVE_TO_LATIN1((U8) c) == 0xAB \ @@ -575,20 +672,22 @@ __UNDEFINED__ isPUNCT_L1(c) ( isPUNCT(c) \ __UNDEFINED__ isSPACE(c) ( isBLANK(c) || (c) == '\n' || (c) == '\r' \ || (c) == '\v' || (c) == '\f') __UNDEFINED__ isSPACE_L1(c) ( isSPACE(c) \ - || ( (WIDEST_UTYPE) (c) < 256 \ + || (FITS_IN_8_BITS(c) \ && ( NATIVE_TO_LATIN1((U8) c) == 0x85 \ || NATIVE_TO_LATIN1((U8) c) == 0xA0))) __UNDEFINED__ isUPPER_L1(c) ( isUPPER(c) \ - || ( (WIDEST_UTYPE) (c) < 256 \ + || (FITS_IN_8_BITS(c) \ && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \ && NATIVE_TO_LATIN1((U8) c) <= 0xDE \ && NATIVE_TO_LATIN1((U8) c) != 0xD7))) __UNDEFINED__ isWORDCHAR(c) (isALPHANUMERIC(c) || (c) == '_') __UNDEFINED__ isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT(c)) +__UNDEFINED__ isWORDCHAR_LC(c) (isIDFIRST_LC(c) || isDIGIT_LC(c)) __UNDEFINED__ isXDIGIT(c) ( isDIGIT(c) \ - || ((c) >= 'a' && (c) <= 'f') \ - || ((c) >= 'A' && (c) <= 'F')) + || inRANGE((c), 'a', 'f') \ + || inRANGE((c), 'A', 'F')) __UNDEFINED__ isXDIGIT_L1(c) isXDIGIT(c) +__UNDEFINED__ isXDIGIT_LC(c) isxdigit(c) __UNDEFINED__ isALNUM_A(c) isALNUM(c) __UNDEFINED__ isALNUMC_A(c) isALNUMC(c) @@ -611,9 +710,49 @@ __UNDEFINED__ isUPPER_A(c) isUPPER(c) __UNDEFINED__ isWORDCHAR_A(c) isWORDCHAR(c) __UNDEFINED__ isXDIGIT_A(c) isXDIGIT(c) -__UNDEFINED__ isASCII_utf8_safe(s,e) isASCII(*(s)) +__UNDEFINED__ isASCII_utf8_safe(s,e) (((e) - (s)) <= 0 ? 0 : isASCII(*(s))) +__UNDEFINED__ isASCII_uvchr(c) (FITS_IN_8_BITS(c) ? isASCII_L1(c) : 0) #if { VERSION >= 5.006 } +# ifdef isALPHA_uni /* If one defined, all are; this is just an exemplar */ +# define D_PPP_is_ctype(upper, lower, c) \ + (FITS_IN_8_BITS(c) \ + ? is ## upper ## _L1(c) \ + : is ## upper ## _uni((UV) (c))) /* _uni is old synonym */ +# else +# define D_PPP_is_ctype(upper, lower, c) \ + (FITS_IN_8_BITS(c) \ + ? is ## upper ## _L1(c) \ + : is_uni_ ## lower((UV) (c))) /* is_uni_ is even older */ +# endif + +__UNDEFINED__ isALPHA_uvchr(c) D_PPP_is_ctype(ALPHA, alpha, c) +__UNDEFINED__ isALPHANUMERIC_uvchr(c) (isALPHA_uvchr(c) || isDIGIT_uvchr(c)) +# ifdef is_uni_blank +__UNDEFINED__ isBLANK_uvchr(c) D_PPP_is_ctype(BLANK, blank, c) +# else +__UNDEFINED__ isBLANK_uvchr(c) (FITS_IN_8_BITS(c) \ + ? isBLANK_L1(c) \ + : ( (UV) (c) == 0x1680 /* Unicode 3.0 */ \ + || inRANGE((UV) (c), 0x2000, 0x200A) \ + || (UV) (c) == 0x202F /* Unicode 3.0 */\ + || (UV) (c) == 0x205F /* Unicode 3.2 */\ + || (UV) (c) == 0x3000)) +# endif +__UNDEFINED__ isCNTRL_uvchr(c) D_PPP_is_ctype(CNTRL, cntrl, c) +__UNDEFINED__ isDIGIT_uvchr(c) D_PPP_is_ctype(DIGIT, digit, c) +__UNDEFINED__ isGRAPH_uvchr(c) D_PPP_is_ctype(GRAPH, graph, c) +__UNDEFINED__ isIDCONT_uvchr(c) isWORDCHAR_uvchr(c) +__UNDEFINED__ isIDFIRST_uvchr(c) D_PPP_is_ctype(IDFIRST, idfirst, c) +__UNDEFINED__ isLOWER_uvchr(c) D_PPP_is_ctype(LOWER, lower, c) +__UNDEFINED__ isPRINT_uvchr(c) D_PPP_is_ctype(PRINT, print, c) +__UNDEFINED__ isPSXSPC_uvchr(c) isSPACE_uvchr(c) +__UNDEFINED__ isPUNCT_uvchr(c) D_PPP_is_ctype(PUNCT, punct, c) +__UNDEFINED__ isSPACE_uvchr(c) D_PPP_is_ctype(SPACE, space, c) +__UNDEFINED__ isUPPER_uvchr(c) D_PPP_is_ctype(UPPER, upper, c) +__UNDEFINED__ isXDIGIT_uvchr(c) D_PPP_is_ctype(XDIGIT, xdigit, c) +__UNDEFINED__ isWORDCHAR_uvchr(c) (FITS_IN_8_BITS(c) \ + ? isWORDCHAR_L1(c) : isALPHANUMERIC_uvchr(c)) __UNDEFINED__ isALPHA_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA) # ifdef isALPHANUMERIC_utf8 @@ -700,8 +839,8 @@ __UNDEFINED__ isIDFIRST_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST __UNDEFINED__ isLOWER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER) __UNDEFINED__ isPRINT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT) -# undef isPSXSPC_utf8_safe /* Use the modern definition */ -__UNDEFINED__ isPSXSPC_utf8_safe(s,e) isSPACE_utf8_safe(s,e) +/* Use the modern definition */ +__REDEFINE__ isPSXSPC_utf8_safe(s,e) isSPACE_utf8_safe(s,e) __UNDEFINED__ isPUNCT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT) __UNDEFINED__ isSPACE_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE) @@ -748,8 +887,210 @@ __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \ # else # error Unknown character set # endif + +__UNDEFINED__ isALPHA_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHA) +# ifdef isALPHANUMERIC_utf8 +__UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \ + D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHANUMERIC) +# else +__UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \ + (isALPHA_LC_utf8_safe(s,e) || isDIGIT_LC_utf8_safe(s,e)) +# endif + +__UNDEFINED__ isBLANK_LC_utf8_safe(s,e) \ + D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, BLANK) +__UNDEFINED__ isCNTRL_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, CNTRL) +__UNDEFINED__ isDIGIT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, DIGIT) +__UNDEFINED__ isGRAPH_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, GRAPH) +# ifdef isIDCONT_utf8 +__UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDCONT) +# else +__UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) isWORDCHAR_LC_utf8_safe(s,e) +# endif + +__UNDEFINED__ isIDFIRST_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDFIRST) +__UNDEFINED__ isLOWER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, LOWER) +__UNDEFINED__ isPRINT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PRINT) + +/* Use the modern definition */ +__REDEFINE__ isPSXSPC_LC_utf8_safe(s,e) isSPACE_LC_utf8_safe(s,e) + +__UNDEFINED__ isPUNCT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PUNCT) +__UNDEFINED__ isSPACE_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, SPACE) +__UNDEFINED__ isUPPER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, UPPER) + +# ifdef isWORDCHAR_utf8 +__UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, WORDCHAR) +# else +__UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) \ + (isALPHANUMERIC_LC_utf8_safe(s,e) || (*(s)) == '_') +# endif + +__UNDEFINED__ isXDIGIT_LC_utf8_safe(s,e) \ + D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, XDIGIT) + +/* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe, + * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe, + * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe, + * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe, + * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe, + * isXDIGIT_utf8_safe, + * isALPHANUMERIC_LC_utf8_safe, isALPHA_LC_utf8_safe, isASCII_LC_utf8_safe, + * isBLANK_LC_utf8_safe, isCNTRL_LC_utf8_safe, isDIGIT_LC_utf8_safe, + * isGRAPH_LC_utf8_safe, isIDCONT_LC_utf8_safe, isIDFIRST_LC_utf8_safe, + * isLOWER_LC_utf8_safe, isPRINT_LC_utf8_safe, isPSXSPC_LC_utf8_safe, + * isPUNCT_LC_utf8_safe, isSPACE_LC_utf8_safe, isUPPER_LC_utf8_safe, + * isWORDCHAR_LC_utf8_safe, isWORDCHAR_LC_utf8_safe, isXDIGIT_LC_utf8_safe, + * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr, + * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr, + * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr, + * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr, + * isWORDCHAR_uvchr, isXDIGIT_uvchr + * + * The UTF-8 handling is buggy in early Perls, and this can give inaccurate + * results for code points above 0xFF, until the implementation started + * settling down in 5.12 and 5.14 */ + #endif +#define D_PPP_TOO_SHORT_MSG "Malformed UTF-8 character starting with:" \ + " \\x%02x (too short; %d bytes available, need" \ + " %d)\n" +/* Perls starting here had a new API which handled multi-character results */ +#if { VERSION >= 5.7.3 } + +__UNDEFINED__ toLOWER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l)) +__UNDEFINED__ toUPPER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l)) +__UNDEFINED__ toTITLE_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l)) +__UNDEFINED__ toFOLD_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l)) + +# if { VERSION != 5.15.6 } /* Just this version is broken */ + + /* Prefer the macro to the function */ +# if defined toLOWER_utf8 +# define D_PPP_TO_LOWER_CALLEE(s,r,l) toLOWER_utf8(s,r,l) +# else +# define D_PPP_TO_LOWER_CALLEE(s,r,l) to_utf8_lower(s,r,l) +# endif +# if defined toTITLE_utf8 +# define D_PPP_TO_TITLE_CALLEE(s,r,l) toTITLE_utf8(s,r,l) +# else +# define D_PPP_TO_TITLE_CALLEE(s,r,l) to_utf8_title(s,r,l) +# endif +# if defined toUPPER_utf8 +# define D_PPP_TO_UPPER_CALLEE(s,r,l) toUPPER_utf8(s,r,l) +# else +# define D_PPP_TO_UPPER_CALLEE(s,r,l) to_utf8_upper(s,r,l) +# endif +# if defined toFOLD_utf8 +# define D_PPP_TO_FOLD_CALLEE(s,r,l) toFOLD_utf8(s,r,l) +# else +# define D_PPP_TO_FOLD_CALLEE(s,r,l) to_utf8_fold(s,r,l) +# endif +# else /* Below is 5.15.6, which failed to make the macros available +# outside of core, so we have to use the 'Perl_' form. khw +# decided it was easier to just handle this case than have to +# document the exception, and make an exception in the tests below +# */ +# define D_PPP_TO_LOWER_CALLEE(s,r,l) \ + Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL) +# define D_PPP_TO_TITLE_CALLEE(s,r,l) \ + Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL) +# define D_PPP_TO_UPPER_CALLEE(s,r,l) \ + Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL) +# define D_PPP_TO_FOLD_CALLEE(s,r,l) \ + Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL) +# endif + +/* The actual implementation of the backported macros. If too short, croak, + * otherwise call the original that doesn't have an upper limit parameter */ +# define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l) \ + (((((e) - (s)) <= 0) \ + /* We could just do nothing, but modern perls croak */ \ + ? (croak("Attempting case change on zero length string"), \ + 0) /* So looks like it returns something, and will compile */ \ + : ((e) - (s)) < UTF8SKIP(s)) \ + ? (croak(D_PPP_TOO_SHORT_MSG, \ + s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \ + 0) \ + : D_PPP_TO_ ## name ## _CALLEE(s,r,l)) + +__UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l) +__UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l) +__UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l) +__UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l) + +#elif { VERSION >= 5.006 } + +/* Here we have UTF-8 support, but using the original API where the case + * changing functions merely returned the changed code point; hence they + * couldn't handle multi-character results. */ + +# ifdef uvchr_to_utf8 +# define D_PPP_UV_TO_UTF8 uvchr_to_utf8 +# else +# define D_PPP_UV_TO_UTF8 uv_to_utf8 +# endif + + /* Get the utf8 of the case changed value, and store its length; then have + * to re-calculate the changed case value in order to return it */ +# define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l) \ + (*(l) = (D_PPP_UV_TO_UTF8(s, \ + UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)), \ + UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) + +__UNDEFINED__ toLOWER_uvchr(c, s, l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l) +__UNDEFINED__ toUPPER_uvchr(c, s, l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l) +__UNDEFINED__ toTITLE_uvchr(c, s, l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l) +__UNDEFINED__ toFOLD_uvchr(c, s, l) toLOWER_uvchr(c, s, l) + +# define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l) \ + (((((e) - (s)) <= 0) \ + ? (croak("Attempting case change on zero length string"), \ + 0) /* So looks like it returns something, and will compile */ \ + : ((e) - (s)) < UTF8SKIP(s)) \ + ? (croak(D_PPP_TOO_SHORT_MSG, \ + s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \ + 0) \ + /* Get the changed code point and store its UTF-8 */ \ + : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)), \ + /* Then store its length, and re-get code point for return */ \ + *(l) = UTF8SKIP(r), to_utf8_ ## name(r)) + +/* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe, + * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr + The UTF-8 case changing operations had bugs before around 5.12 or 5.14; + this backport does not correct them. + + In perls before 7.3, multi-character case changing is not implemented; this + backport uses the simple case changes available in those perls. */ + +__UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l) +__UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l) +__UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \ + D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l) + + /* Warning: toFOLD_utf8_safe, toFOLD_uvchr + The UTF-8 case changing operations had bugs before around 5.12 or 5.14; + this backport does not correct them. + + In perls before 7.3, case folding is not implemented; instead, this + backport substitutes simple (not multi-character, which isn't available) + lowercasing. This gives the correct result in most, but not all, instances + */ + +__UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) toLOWER_utf8_safe(s,e,r,l) + +#endif /* Until we figure out how to support this in older perls... */ #if { VERSION >= 5.8.0 } @@ -767,13 +1108,18 @@ __UNDEFINED__ LIKELY(x) (x) __UNDEFINED__ UNLIKELY(x) (x) #ifndef MUTABLE_PTR -#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) +#if defined(PERL_USE_GCC_BRACE_GROUPS) # define MUTABLE_PTR(p) ({ void *_p = (p); _p; }) #else # define MUTABLE_PTR(p) ((void *) (p)) #endif #endif +__UNDEFINED__ MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p)) +__UNDEFINED__ MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p)) __UNDEFINED__ MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p)) =xsmisc @@ -823,10 +1169,10 @@ newXS("Devel::PPPort::dAXMARK", XS_Devel__PPPort_dAXMARK, file); int OpSIBLING_tests() PREINIT: - OP *x; - OP *kid; - OP *middlekid; - OP *lastkid; + OP *x = NULL; + OP *kid = NULL; + OP *middlekid = NULL; + OP *lastkid = NULL; int count = 0; int failures = 0; int i; @@ -1082,8 +1428,8 @@ check_HeUTF8(utf8_key) hash = newHV(); key = SvPV(utf8_key, klen); - if (SvUTF8(utf8_key)) klen *= -1; - hv_store(hash, key, klen, newSVpvs("string"), 0); + hv_store(hash, key, SvUTF8(utf8_key) ? -klen : klen, + newSVpvs("string"), 0); hv_iterinit(hash); ent = hv_iternext(hash); assert(ent); @@ -1557,32 +1903,169 @@ isPSXSPC_L1(ord) OUTPUT: RETVAL -#if { VERSION >= 5.006 } +bool +isASCII_uvchr(ord) + UV ord + CODE: + RETVAL = isASCII_uvchr(ord); + OUTPUT: + RETVAL bool -isALPHA_utf8_safe(s, offset) +isASCII_utf8_safe(s, offset) unsigned char * s int offset CODE: - RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset); + PERL_UNUSED_ARG(offset); + RETVAL = isASCII_utf8_safe(s, s + 1 + offset); OUTPUT: RETVAL +#if { VERSION >= 5.006 } + bool -isALPHANUMERIC_utf8_safe(s, offset) +isBLANK_uvchr(ord) + UV ord + CODE: + RETVAL = isBLANK_uvchr(ord); + OUTPUT: + RETVAL + +bool +isALPHA_uvchr(ord) + UV ord + CODE: + RETVAL = isALPHA_uvchr(ord); + OUTPUT: + RETVAL + +bool +isALPHANUMERIC_uvchr(ord) + UV ord + CODE: + RETVAL = isALPHANUMERIC_uvchr(ord); + OUTPUT: + RETVAL + +bool +isCNTRL_uvchr(ord) + UV ord + CODE: + RETVAL = isCNTRL_uvchr(ord); + OUTPUT: + RETVAL + +bool +isDIGIT_uvchr(ord) + UV ord + CODE: + RETVAL = isDIGIT_uvchr(ord); + OUTPUT: + RETVAL + +bool +isIDFIRST_uvchr(ord) + UV ord + CODE: + RETVAL = isIDFIRST_uvchr(ord); + OUTPUT: + RETVAL + +bool +isIDCONT_uvchr(ord) + UV ord + CODE: + RETVAL = isIDCONT_uvchr(ord); + OUTPUT: + RETVAL + +bool +isGRAPH_uvchr(ord) + UV ord + CODE: + RETVAL = isGRAPH_uvchr(ord); + OUTPUT: + RETVAL + +bool +isLOWER_uvchr(ord) + UV ord + CODE: + RETVAL = isLOWER_uvchr(ord); + OUTPUT: + RETVAL + +bool +isPRINT_uvchr(ord) + UV ord + CODE: + RETVAL = isPRINT_uvchr(ord); + OUTPUT: + RETVAL + +bool +isPSXSPC_uvchr(ord) + UV ord + CODE: + RETVAL = isPSXSPC_uvchr(ord); + OUTPUT: + RETVAL + +bool +isPUNCT_uvchr(ord) + UV ord + CODE: + RETVAL = isPUNCT_uvchr(ord); + OUTPUT: + RETVAL + +bool +isSPACE_uvchr(ord) + UV ord + CODE: + RETVAL = isSPACE_uvchr(ord); + OUTPUT: + RETVAL + +bool +isUPPER_uvchr(ord) + UV ord + CODE: + RETVAL = isUPPER_uvchr(ord); + OUTPUT: + RETVAL + +bool +isWORDCHAR_uvchr(ord) + UV ord + CODE: + RETVAL = isWORDCHAR_uvchr(ord); + OUTPUT: + RETVAL + +bool +isXDIGIT_uvchr(ord) + UV ord + CODE: + RETVAL = isXDIGIT_uvchr(ord); + OUTPUT: + RETVAL + +bool +isALPHA_utf8_safe(s, offset) unsigned char * s int offset CODE: - RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset); + RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset); OUTPUT: RETVAL bool -isASCII_utf8_safe(s, offset) +isALPHANUMERIC_utf8_safe(s, offset) unsigned char * s int offset CODE: - RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset); + RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset); OUTPUT: RETVAL @@ -1712,6 +2195,348 @@ isXDIGIT_utf8_safe(s, offset) OUTPUT: RETVAL +bool +isALPHA_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isALPHA_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isALPHANUMERIC_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isALPHANUMERIC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isASCII_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + PERL_UNUSED_ARG(offset); + RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isBLANK_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isBLANK_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isCNTRL_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isCNTRL_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isDIGIT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isGRAPH_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isGRAPH_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isIDCONT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isIDCONT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isIDFIRST_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isIDFIRST_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isLOWER_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isLOWER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isPRINT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPRINT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isPSXSPC_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPSXSPC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isPUNCT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isPUNCT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isSPACE_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isSPACE_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isUPPER_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isUPPER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isWORDCHAR_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isWORDCHAR_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +bool +isXDIGIT_LC_utf8_safe(s, offset) + unsigned char * s + int offset + CODE: + RETVAL = isXDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset); + OUTPUT: + RETVAL + +AV * +toLOWER_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toTITLE_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toUPPER_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toFOLD_utf8_safe(s, offset) + unsigned char * s + int offset + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toLOWER_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toLOWER_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toTITLE_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toTITLE_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toUPPER_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toUPPER_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + +AV * +toFOLD_uvchr(c) + UV c + PREINIT: + U8 u[UTF8_MAXBYTES+1]; + Size_t len; + UV ret; + SV* utf8; + AV * av; + CODE: + av = newAV(); + ret = toFOLD_uvchr(c, u, &len); + av_push(av, newSVuv(ret)); + + utf8 = newSVpvn((char *) u, len); + SvUTF8_on(utf8); + av_push(av, utf8); + + av_push(av, newSVuv(len)); + RETVAL = av; + OUTPUT: + RETVAL + #endif UV @@ -1747,44 +2572,52 @@ av_top_index(av) OUTPUT: RETVAL -=tests plan => 17678 +STRLEN +av_count(av) + SV *av + CODE: + RETVAL = av_count((AV*)SvRV(av)); + OUTPUT: + RETVAL + +=tests plan => 26827 use vars qw($my_sv @my_av %my_hv); -ok(&Devel::PPPort::boolSV(1)); -ok(!&Devel::PPPort::boolSV(0)); +ok(&Devel::PPPort::boolSV(1), "Verify boolSV(1) is true"); +ok(!&Devel::PPPort::boolSV(0), "Verify boolSV(0) is false"); $_ = "Fred"; -ok(&Devel::PPPort::DEFSV(), "Fred"); -ok(&Devel::PPPort::UNDERBAR(), "Fred"); +is(&Devel::PPPort::DEFSV(), "Fred", '$_ is FRED; Verify DEFSV is FRED'); +is(&Devel::PPPort::UNDERBAR(), "Fred", 'And verify UNDERBAR is FRED'); -if ("$]" >= 5.009002 && "$]" < 5.023 && "$]" < 5.023004) { +if (ivers($]) >= ivers(5.9.2) && ivers($]) < ivers(5.23)) { eval q{ no warnings "deprecated"; - no if $^V > v5.17.9, warnings => "experimental::lexical_topic"; + no if $^V >= v5.17.9, warnings => "experimental::lexical_topic"; my $_ = "Tony"; - ok(&Devel::PPPort::DEFSV(), "Fred"); - ok(&Devel::PPPort::UNDERBAR(), "Tony"); + is(&Devel::PPPort::DEFSV(), "Fred", 'lexical_topic eval: $_ is Tony; Verify DEFSV is Fred'); + is(&Devel::PPPort::UNDERBAR(), "Tony", 'And verify UNDERBAR is Tony'); }; + die __FILE__ . __LINE__ . ": $@" if $@; } else { - ok(1); - ok(1); + skip("perl version outside testing range of lexical_topic", 2); } my @r = &Devel::PPPort::DEFSV_modify(); -ok(@r == 3); -ok($r[0], 'Fred'); -ok($r[1], 'DEFSV'); -ok($r[2], 'Fred'); +ok(@r == 3, "Verify got 3 elements"); +is($r[0], 'Fred'); +is($r[1], 'DEFSV'); +is($r[2], 'Fred'); -ok(&Devel::PPPort::DEFSV(), "Fred"); +is(&Devel::PPPort::DEFSV(), "Fred"); eval { 1 }; -ok(!&Devel::PPPort::ERRSV()); +ok(!&Devel::PPPort::ERRSV(), "Verify ERRSV on true is false"); eval { cannot_call_this_one() }; -ok(&Devel::PPPort::ERRSV()); +ok(&Devel::PPPort::ERRSV(), "Verify ERRSV on false is true"); ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0)); ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0)); @@ -1810,49 +2643,47 @@ ok(&Devel::PPPort::get_cv('my_cv', 0)); ok(!&Devel::PPPort::get_cv('not_my_cv', 0)); ok(&Devel::PPPort::get_cv('not_my_cv', 1)); -ok(Devel::PPPort::dXSTARG(42), 43); -ok(Devel::PPPort::dAXMARK(4711), 4710); +is(Devel::PPPort::dXSTARG(42), 43); +is(Devel::PPPort::dAXMARK(4711), 4710); -ok(Devel::PPPort::prepush(), 42); +is(Devel::PPPort::prepush(), 42); -ok(join(':', Devel::PPPort::xsreturn(0)), 'test1'); -ok(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2'); +is(join(':', Devel::PPPort::xsreturn(0)), 'test1'); +is(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2'); -ok(Devel::PPPort::PERL_ABS(42), 42); -ok(Devel::PPPort::PERL_ABS(-13), 13); +is(Devel::PPPort::PERL_ABS(42), 42, "Verify PERL_ABS(42) is 42"); +is(Devel::PPPort::PERL_ABS(-13), 13, "Verify PERL_ABS(-13) is 13"); -ok(Devel::PPPort::SVf(42), "$]" >= 5.004 ? '[42]' : '42'); -ok(Devel::PPPort::SVf('abc'), "$]" >= 5.004 ? '[abc]' : 'abc'); +is(Devel::PPPort::SVf(42), ivers($]) >= ivers(5.4) ? '[42]' : '42'); +is(Devel::PPPort::SVf('abc'), ivers($]) >= ivers(5.4) ? '[abc]' : 'abc'); -ok(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo"); +is(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo"); -ok(&Devel::PPPort::ptrtests(), 63); +is(&Devel::PPPort::ptrtests(), 63); -ok(&Devel::PPPort::OpSIBLING_tests(), 0); +is(&Devel::PPPort::OpSIBLING_tests(), 0); -if ("$]" >= 5.009000) { +if (ivers($]) >= ivers(5.9)) { eval q{ - ok(&Devel::PPPort::check_HeUTF8("hello"), "norm"); - ok(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8"); + is(&Devel::PPPort::check_HeUTF8("hello"), "norm"); + is(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8"); }; } else { - ok(1, 1); - ok(1, 1); + skip("Too early perl version", 2); } @r = &Devel::PPPort::check_c_array(); -ok($r[0], 4); -ok($r[1], "13"); +is($r[0], 4); +is($r[1], "13"); ok(!Devel::PPPort::SvRXOK("")); ok(!Devel::PPPort::SvRXOK(bless [], "Regexp")); -if ("$]" < 5.005) { - skip 'no qr// objects in this perl', 0; - skip 'no qr// objects in this perl', 0; +if (ivers($]) < ivers(5.5)) { + skip 'no qr// objects in this perl', 2; } else { my $qr = eval 'qr/./'; - ok(Devel::PPPort::SvRXOK($qr)); + ok(Devel::PPPort::SvRXOK($qr), "SVRXOK(qr) is true"); ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise")); } @@ -1861,7 +2692,7 @@ ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1); ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41); ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30); -ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6); +ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6, "Verify LATIN1_TO_NATIVE(0xB6) is 0xB6"); if (ord("A") == 65) { ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41); ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30); @@ -1875,14 +2706,16 @@ ok( Devel::PPPort::isALNUMC_L1(ord("5"))); ok( Devel::PPPort::isALNUMC_L1(0xFC)); ok(! Devel::PPPort::isALNUMC_L1(0xB6)); -ok( Devel::PPPort::isOCTAL(ord("7"))); -ok(! Devel::PPPort::isOCTAL(ord("8"))); +ok( Devel::PPPort::isOCTAL(ord("7")), "Verify '7' is OCTAL"); +ok(! Devel::PPPort::isOCTAL(ord("8")), "Verify '8' isn't OCTAL"); -ok( Devel::PPPort::isOCTAL_A(ord("0"))); -ok(! Devel::PPPort::isOCTAL_A(ord("9"))); +ok( Devel::PPPort::isOCTAL_A(ord("0")), "Verify '0' is OCTAL_A"); +ok(! Devel::PPPort::isOCTAL_A(ord("9")), "Verify '9' isn't OCTAL_A"); -ok( Devel::PPPort::isOCTAL_L1(ord("2"))); -ok(! Devel::PPPort::isOCTAL_L1(ord("8"))); +ok( Devel::PPPort::isOCTAL_L1(ord("2")), "Verify '2' is OCTAL_L1"); +ok(! Devel::PPPort::isOCTAL_L1(ord("8")), "Verify '8' isn't OCTAL_L1"); + +my $way_too_early_msg = 'UTF-8 not implemented on this perl'; # For the other properties, we test every code point from 0.255, and a # smattering of higher ones. First populate a hash with keys like '65:ALPHA' @@ -1980,59 +2813,217 @@ for $i (sort { $a <=> $b } keys %code_points_to_test) { XDIGIT)) { if ($i < 256) { # For the ones that can fit in a byte, test each of - #three macros. + # three macros. my $suffix; - for $suffix ("", "_A", "_L1") { - my $should_be = ($i > 0x7F && $suffix ne "_L1") - ? 0 # Fail on non-ASCII unless L1 + for $suffix ("", "_A", "_L1", "_uvchr") { + my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/) + ? 0 # Fail on non-ASCII unless unicode : ($types{"$native:$class"} || 0); + if (ivers($]) < ivers(5.6) && $suffix eq '_uvchr') { + skip("No UTF-8 on this perl", 1); + next; + } + my $eval_string = "Devel::PPPort::is${class}$suffix($hex)"; + local $SIG{__WARN__} = sub {}; my $is = eval $eval_string || 0; die "eval 'For $i: $eval_string' gave $@" if $@; - ok($is, $should_be, "'$eval_string'"); + is($is, $should_be, "'$eval_string'"); } } # For all code points, test the '_utf8' macros - if ("$]" < 5.006) { - skip("No UTF-8 on this perl", 0); - if ($i > 255) { - skip("No UTF-8 on this perl", 0); + my $sub_fcn; + for $sub_fcn ("", "_LC") { + my $skip = ""; + if (ivers($]) < ivers(5.6)) { + $skip = $way_too_early_msg; } - } - else { - my $utf8 = quotemeta Devel::PPPort::uvoffuni_to_utf8($i); - if ("$]" < 5.007 && $native > 255) { - skip("Perls earlier than 5.7 give wrong answers for above Latin1 code points", 0); + elsif (ivers($]) < ivers(5.7) && $native > 255) { + $skip = "Perls earlier than 5.7 give wrong answers for above Latin1 code points"; } - elsif ("$]" <= 5.011003 && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) { - skip("Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH", 0); + elsif (ivers($]) <= ivers(5.11.3) && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) { + $skip = "Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH"; } - else { + elsif ($sub_fcn eq '_LC' && $i < 256) { + $skip = "Testing of code points whose results depend on locale is skipped "; + } + my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe"; + my $utf8; + if ($skip) { + skip $skip, 1; + } + else { + $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($native); my $should_be = $types{"$native:$class"} || 0; - my $eval_string = "Devel::PPPort::is${class}_utf8_safe(\"$utf8\", 0)"; + my $eval_string = "$fcn(\"$utf8\", 0)"; + local $SIG{__WARN__} = sub {}; my $is = eval $eval_string || 0; die "eval 'For $i, $eval_string' gave $@" if $@; - ok($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string)); + is($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string)); } # And for the high code points, test that a too short malformation (the # -1) causes it to fail if ($i > 255) { - if ("$]" >= 5.025009) { - skip("Prints an annoying error message that khw doesn't know how to easily suppress", 0); + if ($skip) { + skip $skip, 1; + } + elsif (ivers($]) >= ivers(5.25.9)) { + skip("Prints an annoying error message that khw doesn't know how to easily suppress", 1); } else { - my $eval_string = "Devel::PPPort::is${class}_utf8_safe(\"$utf8\", -1)"; - my $is = eval "no warnings; $eval_string" || 0; + my $eval_string = "$fcn(\"$utf8\", -1)"; + local $SIG{__WARN__} = sub {}; + my $is = eval "$eval_string" || 0; die "eval '$eval_string' gave $@" if $@; - ok($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string)); + is($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string)); + } + } + } + } +} + +my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0), + Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ], + [ 0x100, 0x101 ], + ], + 'FOLD' => [ [ ord('C'), ord('c') ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0), + Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ], + [ 0x104, 0x105 ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), + 'ss' ], + ], + 'UPPER' => [ [ ord('a'), ord('A'), ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xE0), + Devel::PPPort::LATIN1_TO_NATIVE(0xC0) ], + [ 0x101, 0x100 ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), + 'SS' ], + ], + 'TITLE' => [ [ ord('c'), ord('C'), ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xE2), + Devel::PPPort::LATIN1_TO_NATIVE(0xC2) ], + [ 0x103, 0x102 ], + [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF), + 'Ss' ], + ], + ); + +my $name; +for $name (keys %case_changing) { + my @code_points_to_test = @{$case_changing{$name}}; + my $unchanged; + for $unchanged (@code_points_to_test) { + my @pair = @$unchanged; + my $original = $pair[0]; + my $changed = $pair[1]; + my $utf8_changed = $changed; + my $is_cp = $utf8_changed =~ /^\d+$/; + my $should_be_bytes; + if (ivers($]) >= ivers(5.6)) { + if ($is_cp) { + $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed); + $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0); + } + else { + die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /[[:^ascii:]]/'; + $should_be_bytes = length $utf8_changed; + } + } + + my $fcn = "to${name}_uvchr"; + my $skip = ""; + + if (ivers($]) < ivers(5.6)) { + $skip = $way_too_early_msg; + } + elsif (! $is_cp) { + $skip = "Can't do uvchr on a multi-char string"; + } + if ($skip) { + skip $skip, 4; + } + else { + if ($is_cp) { + $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed); + $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0); + } + else { + my $non_ascii_re = (ivers($]) >= ivers(5.6)) ? '[[:^ascii:]]' : '[^\x00-\x7F]'; + die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /$non_ascii_re/'; + $should_be_bytes = length $utf8_changed; + } + + my $ret = eval "Devel::PPPort::$fcn($original)"; + my $fail = $@; # Have to save $@, as it gets destroyed + is ($fail, "", "$fcn($original) didn't fail"); + my $first = (ivers($]) != ivers(5.6)) + ? substr($utf8_changed, 0, 1) + : $utf8_changed, 0, 1; + is($ret->[0], ord $first, + "ord of $fcn($original) is $changed"); + is($ret->[1], $utf8_changed, + "UTF-8 of of $fcn($original) is correct"); + is($ret->[2], $should_be_bytes, + "Length of $fcn($original) is $should_be_bytes"); + } + + my $truncate; + for $truncate (0..2) { + my $skip; + if (ivers($]) < ivers(5.6)) { + $skip = $way_too_early_msg; + } + elsif (! $is_cp && ivers($]) < ivers(5.7.3)) { + $skip = "Multi-character case change not implemented until 5.7.3"; + } + elsif ($truncate == 2 && ivers($]) > ivers(5.25.8)) { + $skip = "Zero length inputs cause assertion failure; test dies in modern perls"; + } + elsif ($truncate > 0 && length $changed > 1) { + $skip = "Don't test shortened multi-char case changes"; + } + elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) { + $skip = "Don't try to test shortened single bytes"; + } + if ($skip) { + skip $skip, 4; + } + else { + my $fcn = "to${name}_utf8_safe"; + my $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($original); + my $real_truncate = ($truncate < 2) + ? $truncate : $should_be_bytes; + my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)"; + my $ret = eval "no warnings; $eval_string" || 0; + my $fail = $@; # Have to save $@, as it gets destroyed + if ($truncate == 0) { + is ($fail, "", "Didn't fail on full length input"); + my $first = (ivers($]) != ivers(5.6)) + ? substr($utf8_changed, 0, 1) + : $utf8_changed, 0, 1; + is($ret->[0], ord $first, + "ord of $fcn($original) is $changed"); + is($ret->[1], $utf8_changed, + "UTF-8 of of $fcn($original) is correct"); + is($ret->[2], $should_be_bytes, + "Length of $fcn($original) is $should_be_bytes"); + } + else { + is ($fail, eval 'qr/Malformed UTF-8 character/', + "Gave appropriate error for short char: $original"); + skip("Expected failure means remaining tests for" + . " this aren't relevant", 3); } } } } } -ok(&Devel::PPPort::av_top_index([1,2,3]), 2); -ok(&Devel::PPPort::av_tindex([1,2,3,4]), 3); +is(&Devel::PPPort::av_top_index([1,2,3]), 2); +is(&Devel::PPPort::av_tindex([1,2,3,4]), 3); +is(&Devel::PPPort::av_count([1,2,3,4]), 4);