1 ################################################################################
3 ## Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
4 ## Version 2.x, Copyright (C) 2001, Paul Marquess.
5 ## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
7 ## This program is free software; you can redistribute it and/or
8 ## modify it under the same terms as Perl itself.
10 ################################################################################
28 PERL_USE_GCC_BRACE_GROUPS
42 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
43 __UNDEFINED__ PERL_STATIC_INLINE static inline
45 __UNDEFINED__ PERL_STATIC_INLINE static
48 __UNDEFINED__ cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
49 __UNDEFINED__ OpHAS_SIBLING(o) (cBOOL((o)->op_sibling))
50 __UNDEFINED__ OpSIBLING(o) (0 + (o)->op_sibling)
51 __UNDEFINED__ OpMORESIB_set(o, sib) ((o)->op_sibling = (sib))
52 __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL)
53 __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib))
54 __UNDEFINED__ HEf_SVKEY -2
56 #if defined(DEBUGGING) && !defined(__COVERITY__)
57 __UNDEFINED__ __ASSERT_(statement) assert(statement),
59 __UNDEFINED__ __ASSERT_(statement)
62 __UNDEF_NOT_PROVIDED__ __has_builtin(x) 0
64 #if __has_builtin(__builtin_unreachable)
65 # define D_PPP_HAS_BUILTIN_UNREACHABLE
66 #elif (defined(__GNUC__) && ( __GNUC__ > 4 \
67 || __GNUC__ == 4 && __GNUC_MINOR__ >= 5))
68 # define D_PPP_HAS_BUILTIN_UNREACHABLE
73 # define ASSUME(x) assert(x)
74 # elif defined(_MSC_VER)
75 # define ASSUME(x) __assume(x)
76 # elif defined(__ARMCC_VERSION)
77 # define ASSUME(x) __promise(x)
78 # elif defined(D_PPP_HAS_BUILTIN_UNREACHABLE)
79 # define ASSUME(x) ((x) ? (void) 0 : __builtin_unreachable())
81 # define ASSUME(x) assert(x)
86 # ifdef D_PPP_HAS_BUILTIN_UNREACHABLE
87 # define NOT_REACHED \
89 ASSUME(!"UNREACHABLE"); __builtin_unreachable(); \
91 # elif ! defined(__GNUC__) && (defined(__sun) || defined(__hpux))
94 # define NOT_REACHED ASSUME(!"UNREACHABLE")
101 # define WIDEST_UTYPE U64TYPE
103 # define WIDEST_UTYPE unsigned Quad_t
106 # define WIDEST_UTYPE U32
110 /* These could become provided if/when they become part of the public API */
111 __UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n) \
112 (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
113 __UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \
114 ( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \
115 : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \
116 : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l)))))
118 /* The '| 0' part ensures a compiler error if c is not integer (like e.g., a
120 #undef FITS_IN_8_BITS /* handy.h version uses a core-only constant */
121 __UNDEF_NOT_PROVIDED__ FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \
122 || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF))
124 /* Create the macro for "is'macro'_utf8_safe(s, e)". For code points below
125 * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
126 * point. That is so that it can automatically get the bug fixes done in this
128 #define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro) \
131 : UTF8_IS_INVARIANT((s)[0]) \
132 ? is ## macro ## _L1((s)[0]) \
133 : (((e) - (s)) < UTF8SKIP(s)) \
135 : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \
136 /* The cast in the line below is only to silence warnings */ \
137 ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE( \
138 UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
139 & UTF_START_MASK(2), \
141 : is ## macro ## _utf8(s))
143 /* Create the macro for "is'macro'_LC_utf8_safe(s, e)". For code points below
144 * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
145 * point. That is so that it can automatically get the bug fixes done in this
147 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, macro) \
150 : UTF8_IS_INVARIANT((s)[0]) \
151 ? is ## macro ## _LC((s)[0]) \
152 : (((e) - (s)) < UTF8SKIP(s)) \
154 : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \
155 /* The cast in the line below is only to silence warnings */ \
156 ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \
157 UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
158 & UTF_START_MASK(2), \
160 : is ## macro ## _utf8(s))
162 /* A few of the early functions are broken. For these and the non-LC case,
163 * machine generated code is substituted. But that code doesn't work for
164 * locales. This is just like the above macro, but at the end, we call the
165 * macro we've generated for the above 255 case, which is correct since locale
166 * isn't involved. This will generate extra code to handle the 0-255 inputs,
167 * but hopefully it will be optimized out by the C compiler. But just in case
168 * it isn't, this macro is only used on the few versions that are broken */
170 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, macro) \
173 : UTF8_IS_INVARIANT((s)[0]) \
174 ? is ## macro ## _LC((s)[0]) \
175 : (((e) - (s)) < UTF8SKIP(s)) \
177 : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \
178 /* The cast in the line below is only to silence warnings */ \
179 ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \
180 UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
181 & UTF_START_MASK(2), \
183 : is ## macro ## _utf8_safe(s, e))
185 __UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL)
186 __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv))
188 #ifndef PERL_UNUSED_DECL
190 # if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
191 # define PERL_UNUSED_DECL
193 # define PERL_UNUSED_DECL __attribute__((unused))
196 # define PERL_UNUSED_DECL
200 #ifndef PERL_UNUSED_ARG
201 # if defined(lint) && defined(S_SPLINT_S) /* www.splint.org */
203 # define PERL_UNUSED_ARG(x) NOTE(ARGUNUSED(x))
205 # define PERL_UNUSED_ARG(x) ((void)x)
209 #ifndef PERL_UNUSED_VAR
210 # define PERL_UNUSED_VAR(x) ((void)x)
213 #ifndef PERL_UNUSED_CONTEXT
215 # define PERL_UNUSED_CONTEXT PERL_UNUSED_ARG(my_perl)
217 # define PERL_UNUSED_CONTEXT
221 #ifndef PERL_UNUSED_RESULT
222 # if defined(__GNUC__) && defined(HASATTRIBUTE_WARN_UNUSED_RESULT)
223 # define PERL_UNUSED_RESULT(v) STMT_START { __typeof__(v) z = (v); (void)sizeof(z); } STMT_END
225 # define PERL_UNUSED_RESULT(v) ((void)(v))
229 __UNDEFINED__ NOOP /*EMPTY*/(void)0
230 __UNDEFINED__ dNOOP extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL
233 # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
234 # define NVTYPE long double
236 # define NVTYPE double
242 # if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE)
244 # define INT2PTR(any,d) (any)(d)
246 # if PTRSIZE == LONGSIZE
247 # define PTRV unsigned long
249 # define PTRV unsigned
251 # define INT2PTR(any,d) (any)(PTRV)(d)
256 # if PTRSIZE == LONGSIZE
257 # define PTR2ul(p) (unsigned long)(p)
259 # define PTR2ul(p) INT2PTR(unsigned long,p)
263 __UNDEFINED__ PTR2nat(p) (PTRV)(p)
264 __UNDEFINED__ NUM2PTR(any,d) (any)PTR2nat(d)
265 __UNDEFINED__ PTR2IV(p) INT2PTR(IV,p)
266 __UNDEFINED__ PTR2UV(p) INT2PTR(UV,p)
267 __UNDEFINED__ PTR2NV(p) NUM2PTR(NV,p)
269 #undef START_EXTERN_C
273 # define START_EXTERN_C extern "C" {
274 # define END_EXTERN_C }
275 # define EXTERN_C extern "C"
277 # define START_EXTERN_C
278 # define END_EXTERN_C
279 # define EXTERN_C extern
282 #if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC)
283 # ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN
284 __UNDEF_NOT_PROVIDED__ PERL_GCC_BRACE_GROUPS_FORBIDDEN
288 #if ! defined(__GNUC__) || defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) || defined(__cplusplus)
289 # undef PERL_USE_GCC_BRACE_GROUPS
291 # ifndef PERL_USE_GCC_BRACE_GROUPS
292 # define PERL_USE_GCC_BRACE_GROUPS
298 #if defined(VOIDFLAGS) && defined(PERL_USE_GCC_BRACE_GROUPS)
299 # define STMT_START (void)( /* gcc supports ``({ STATEMENTS; })'' */
302 # if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__)
303 # define STMT_START if (1)
304 # define STMT_END else (void)0
306 # define STMT_START do
307 # define STMT_END while (0)
311 __UNDEFINED__ boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no)
313 /* DEFSV appears first in 5.004_56 */
314 __UNDEFINED__ DEFSV GvSV(PL_defgv)
315 __UNDEFINED__ SAVE_DEFSV SAVESPTR(GvSV(PL_defgv))
316 __UNDEFINED__ DEFSV_set(sv) (DEFSV = (sv))
318 /* Older perls (<=5.003) lack AvFILLp */
319 __UNDEFINED__ AvFILLp AvFILL
321 __UNDEFINED__ av_tindex AvFILL
322 __UNDEFINED__ av_top_index AvFILL
323 __UNDEFINED__ av_count(av) (AvFILL(av)+1)
325 __UNDEFINED__ ERRSV get_sv("@",FALSE)
328 * This function's backport doesn't support the length parameter, but
329 * rather ignores it. Portability can only be ensured if the length
330 * parameter is used for speed reasons, but the length can always be
331 * correctly computed from the string argument.
334 __UNDEFINED__ gv_stashpvn(str,len,create) gv_stashpv(str,create)
337 __UNDEFINED__ get_cv perl_get_cv
338 __UNDEFINED__ get_sv perl_get_sv
339 __UNDEFINED__ get_av perl_get_av
340 __UNDEFINED__ get_hv perl_get_hv
343 __UNDEFINED__ dUNDERBAR dNOOP
344 __UNDEFINED__ UNDERBAR DEFSV
346 __UNDEFINED__ dAX I32 ax = MARK - PL_stack_base + 1
347 __UNDEFINED__ dITEMS I32 items = SP - MARK
349 __UNDEFINED__ dXSTARG SV * targ = sv_newmortal()
351 __UNDEFINED__ dAXMARK I32 ax = POPMARK; \
352 SV ** const mark = PL_stack_base + ax++
355 __UNDEFINED__ XSprePUSH (sp = PL_stack_base + ax - 1)
357 #if { VERSION < 5.005 }
359 # define XSRETURN(off) \
361 PL_stack_sp = PL_stack_base + ax + ((off) - 1); \
366 __UNDEFINED__ XSPROTO(name) void name(pTHX_ CV* cv)
367 __UNDEFINED__ SVfARG(p) ((void*)(p))
369 __UNDEFINED__ PERL_ABS(x) ((x) < 0 ? -(x) : (x))
371 __UNDEFINED__ dVAR dNOOP
373 __UNDEFINED__ SVf "_"
375 __UNDEFINED__ CPERLscope(x) x
377 __UNDEFINED__ PERL_HASH(hash,str,len) \
379 const char *s_PeRlHaSh = str; \
380 I32 i_PeRlHaSh = len; \
381 U32 hash_PeRlHaSh = 0; \
382 while (i_PeRlHaSh--) \
383 hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \
384 (hash) = hash_PeRlHaSh; \
387 #ifndef PERLIO_FUNCS_DECL
388 # ifdef PERLIO_FUNCS_CONST
389 # define PERLIO_FUNCS_DECL(funcs) const PerlIO_funcs funcs
390 # define PERLIO_FUNCS_CAST(funcs) (PerlIO_funcs*)(funcs)
392 # define PERLIO_FUNCS_DECL(funcs) PerlIO_funcs funcs
393 # define PERLIO_FUNCS_CAST(funcs) (funcs)
397 /* provide these typedefs for older perls */
398 #if { VERSION < 5.9.3 }
401 typedef OP* (CPERLscope(*Perl_ppaddr_t))(ARGSproto);
403 typedef OP* (CPERLscope(*Perl_ppaddr_t))(pTHX);
406 typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*);
410 /* On versions without NATIVE_TO_ASCII, only ASCII is supported */
411 #if defined(EBCDIC) && defined(NATIVE_TO_ASCI)
412 __UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c)
413 __UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c)
414 __UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c))
415 __UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c))
417 __UNDEFINED__ NATIVE_TO_LATIN1(c) (c)
418 __UNDEFINED__ LATIN1_TO_NATIVE(c) (c)
419 __UNDEFINED__ NATIVE_TO_UNI(c) (c)
420 __UNDEFINED__ UNI_TO_NATIVE(c) (c)
423 /* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE
424 EBCDIC is not supported on versions earlier than 5.7.1
427 /* The meaning of this changed; use the modern version */
432 /* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe
433 This is equivalent to the corresponding isSPACE-type macro. On perls
434 before 5.18, this matched a vertical tab and SPACE didn't. But the
435 ppport.h SPACE version does match VT in all perl releases. Since VT's are
436 extremely rarely found in real-life files, this difference effectively
439 /* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe
440 Until Perl 5.18, this did not match the vertical tab (VT). The ppport.h
441 version does match it in all perl releases. Since VT's are extremely rarely
442 found in real-life files, this difference effectively doesn't matter */
446 /* This is the first version where these macros are fully correct on EBCDIC
447 * platforms. Relying on the C library functions, as earlier releases did,
448 * causes problems with locales */
449 # if { VERSION < 5.22.0 }
459 # undef isALPHANUMERIC
460 # undef isALPHANUMERIC_A
461 # undef isALPHANUMERIC_L1
503 # undef isWORDCHAR_L1
509 __UNDEFINED__ isASCII(c) (isCNTRL(c) || isPRINT(c))
511 /* The below is accurate for all EBCDIC code pages supported by
512 * all the versions of Perl overridden by this */
513 __UNDEFINED__ isCNTRL(c) ( (c) == '\0' || (c) == '\a' || (c) == '\b' \
514 || (c) == '\f' || (c) == '\n' || (c) == '\r' \
515 || (c) == '\t' || (c) == '\v' \
516 || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \
517 || (c) == 7 /* U+7F DEL */ \
518 || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \
520 || (c) == 0x18 /* U+18 CAN */ \
521 || (c) == 0x19 /* U+19 EOM */ \
522 || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \
523 || (c) == 0x26 /* U+17 ETB */ \
524 || (c) == 0x27 /* U+1B ESC */ \
525 || (c) == 0x2D /* U+05 ENQ */ \
526 || (c) == 0x2E /* U+06 ACK */ \
527 || (c) == 0x32 /* U+16 SYN */ \
528 || (c) == 0x37 /* U+04 EOT */ \
529 || (c) == 0x3C /* U+14 DC4 */ \
530 || (c) == 0x3D /* U+15 NAK */ \
531 || (c) == 0x3F /* U+1A SUB */ \
534 #if '^' == 106 /* EBCDIC POSIX-BC */
535 # define D_PPP_OUTLIER_CONTROL 0x5F
536 #else /* EBCDIC 1047 037 */
537 # define D_PPP_OUTLIER_CONTROL 0xFF
540 /* The controls are everything below blank, plus one outlier */
541 __UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' ' \
542 || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL)
543 /* The ordering of the tests in this and isUPPER are to exclude most characters
545 __UNDEFINED__ isLOWER(c) ( (c) >= 'a' && (c) <= 'z' \
547 || ((c) >= 'j' && (c) <= 'r') \
549 __UNDEFINED__ isUPPER(c) ( (c) >= 'A' && (c) <= 'Z' \
551 || ((c) >= 'J' && (c) <= 'R') \
554 #else /* Above is EBCDIC; below is ASCII */
556 # if { VERSION < 5.4.0 }
557 /* The implementation of these in older perl versions can give wrong results if
558 * the C program locale is set to other than the C locale */
573 # if { VERSION == 5.7.0 } /* this perl made space GRAPH */
577 # if { VERSION < 5.8.0 } /* earlier perls omitted DEL */
581 # if { VERSION < 5.10.0 }
582 /* earlier perls included all of the isSPACE() characters, which is wrong. The
583 * version provided by Devel::PPPort always overrides an existing buggy
589 # if { VERSION < 5.14.0 }
590 /* earlier perls always returned true if the parameter was a signed char */
595 # if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */
599 # if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */
603 # if { VERSION < 5.20.0 } /* earlier perls didn't include \v */
610 __UNDEFINED__ isASCII(c) ((WIDEST_UTYPE) (c) <= 127)
611 __UNDEFINED__ isCNTRL(c) ((WIDEST_UTYPE) (c) < ' ' || (c) == 127)
612 __UNDEFINED__ isCNTRL_L1(c) ( (WIDEST_UTYPE) (c) < ' ' \
613 || inRANGE((c), 0x7F, 0x9F))
614 __UNDEFINED__ isLOWER(c) inRANGE((c), 'a', 'z')
615 __UNDEFINED__ isUPPER(c) inRANGE((c), 'A', 'Z')
617 #endif /* Below are definitions common to EBCDIC and ASCII */
619 __UNDEFINED__ isASCII_L1(c) isASCII(c)
620 __UNDEFINED__ isASCII_LC(c) isASCII(c)
621 __UNDEFINED__ isALNUM(c) isWORDCHAR(c)
622 __UNDEFINED__ isALNUMC(c) isALPHANUMERIC(c)
623 __UNDEFINED__ isALNUMC_L1(c) isALPHANUMERIC_L1(c)
624 __UNDEFINED__ isALPHA(c) (isUPPER(c) || isLOWER(c))
625 __UNDEFINED__ isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c))
626 __UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c))
627 __UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c))
628 __UNDEFINED__ isALPHANUMERIC_LC(c) (isALPHA_LC(c) || isDIGIT_LC(c))
629 __UNDEFINED__ isBLANK(c) ((c) == ' ' || (c) == '\t')
630 __UNDEFINED__ isBLANK_L1(c) ( isBLANK(c) \
631 || ( FITS_IN_8_BITS(c) \
632 && NATIVE_TO_LATIN1((U8) c) == 0xA0))
633 __UNDEFINED__ isBLANK_LC(c) isBLANK(c)
634 __UNDEFINED__ isDIGIT(c) inRANGE(c, '0', '9')
635 __UNDEFINED__ isDIGIT_L1(c) isDIGIT(c)
636 __UNDEFINED__ isGRAPH(c) (isWORDCHAR(c) || isPUNCT(c))
637 __UNDEFINED__ isGRAPH_L1(c) ( isPRINT_L1(c) \
639 && NATIVE_TO_LATIN1((U8) c) != 0xA0)
640 __UNDEFINED__ isIDCONT(c) isWORDCHAR(c)
641 __UNDEFINED__ isIDCONT_L1(c) isWORDCHAR_L1(c)
642 __UNDEFINED__ isIDCONT_LC(c) isWORDCHAR_LC(c)
643 __UNDEFINED__ isIDFIRST(c) (isALPHA(c) || (c) == '_')
644 __UNDEFINED__ isIDFIRST_L1(c) (isALPHA_L1(c) || (U8) (c) == '_')
645 __UNDEFINED__ isIDFIRST_LC(c) (isALPHA_LC(c) || (U8) (c) == '_')
646 __UNDEFINED__ isLOWER_L1(c) ( isLOWER(c) \
647 || ( FITS_IN_8_BITS(c) \
648 && ( ( NATIVE_TO_LATIN1((U8) c) >= 0xDF \
649 && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
650 || NATIVE_TO_LATIN1((U8) c) == 0xAA \
651 || NATIVE_TO_LATIN1((U8) c) == 0xBA \
652 || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
653 __UNDEFINED__ isOCTAL(c) (((WIDEST_UTYPE)((c)) & ~7) == '0')
654 __UNDEFINED__ isOCTAL_L1(c) isOCTAL(c)
655 __UNDEFINED__ isPRINT(c) (isGRAPH(c) || (c) == ' ')
656 __UNDEFINED__ isPRINT_L1(c) (FITS_IN_8_BITS(c) && ! isCNTRL_L1(c))
657 __UNDEFINED__ isPSXSPC(c) isSPACE(c)
658 __UNDEFINED__ isPSXSPC_L1(c) isSPACE_L1(c)
659 __UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \
660 || (c) == '#' || (c) == '$' || (c) == '%' \
661 || (c) == '&' || (c) == '\'' || (c) == '(' \
662 || (c) == ')' || (c) == '*' || (c) == '+' \
663 || (c) == ',' || (c) == '.' || (c) == '/' \
664 || (c) == ':' || (c) == ';' || (c) == '<' \
665 || (c) == '=' || (c) == '>' || (c) == '?' \
666 || (c) == '@' || (c) == '[' || (c) == '\\' \
667 || (c) == ']' || (c) == '^' || (c) == '_' \
668 || (c) == '`' || (c) == '{' || (c) == '|' \
669 || (c) == '}' || (c) == '~')
670 __UNDEFINED__ isPUNCT_L1(c) ( isPUNCT(c) \
671 || ( FITS_IN_8_BITS(c) \
672 && ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \
673 || NATIVE_TO_LATIN1((U8) c) == 0xA7 \
674 || NATIVE_TO_LATIN1((U8) c) == 0xAB \
675 || NATIVE_TO_LATIN1((U8) c) == 0xB6 \
676 || NATIVE_TO_LATIN1((U8) c) == 0xB7 \
677 || NATIVE_TO_LATIN1((U8) c) == 0xBB \
678 || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
679 __UNDEFINED__ isSPACE(c) ( isBLANK(c) || (c) == '\n' || (c) == '\r' \
680 || (c) == '\v' || (c) == '\f')
681 __UNDEFINED__ isSPACE_L1(c) ( isSPACE(c) \
682 || (FITS_IN_8_BITS(c) \
683 && ( NATIVE_TO_LATIN1((U8) c) == 0x85 \
684 || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
685 __UNDEFINED__ isUPPER_L1(c) ( isUPPER(c) \
686 || (FITS_IN_8_BITS(c) \
687 && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
688 && NATIVE_TO_LATIN1((U8) c) <= 0xDE \
689 && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
690 __UNDEFINED__ isWORDCHAR(c) (isALPHANUMERIC(c) || (c) == '_')
691 __UNDEFINED__ isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT(c))
692 __UNDEFINED__ isWORDCHAR_LC(c) (isIDFIRST_LC(c) || isDIGIT_LC(c))
693 __UNDEFINED__ isXDIGIT(c) ( isDIGIT(c) \
694 || inRANGE((c), 'a', 'f') \
695 || inRANGE((c), 'A', 'F'))
696 __UNDEFINED__ isXDIGIT_L1(c) isXDIGIT(c)
697 __UNDEFINED__ isXDIGIT_LC(c) isxdigit(c)
699 __UNDEFINED__ isALNUM_A(c) isALNUM(c)
700 __UNDEFINED__ isALNUMC_A(c) isALNUMC(c)
701 __UNDEFINED__ isALPHA_A(c) isALPHA(c)
702 __UNDEFINED__ isALPHANUMERIC_A(c) isALPHANUMERIC(c)
703 __UNDEFINED__ isASCII_A(c) isASCII(c)
704 __UNDEFINED__ isBLANK_A(c) isBLANK(c)
705 __UNDEFINED__ isCNTRL_A(c) isCNTRL(c)
706 __UNDEFINED__ isDIGIT_A(c) isDIGIT(c)
707 __UNDEFINED__ isGRAPH_A(c) isGRAPH(c)
708 __UNDEFINED__ isIDCONT_A(c) isIDCONT(c)
709 __UNDEFINED__ isIDFIRST_A(c) isIDFIRST(c)
710 __UNDEFINED__ isLOWER_A(c) isLOWER(c)
711 __UNDEFINED__ isOCTAL_A(c) isOCTAL(c)
712 __UNDEFINED__ isPRINT_A(c) isPRINT(c)
713 __UNDEFINED__ isPSXSPC_A(c) isPSXSPC(c)
714 __UNDEFINED__ isPUNCT_A(c) isPUNCT(c)
715 __UNDEFINED__ isSPACE_A(c) isSPACE(c)
716 __UNDEFINED__ isUPPER_A(c) isUPPER(c)
717 __UNDEFINED__ isWORDCHAR_A(c) isWORDCHAR(c)
718 __UNDEFINED__ isXDIGIT_A(c) isXDIGIT(c)
720 __UNDEFINED__ isASCII_utf8_safe(s,e) (((e) - (s)) <= 0 ? 0 : isASCII(*(s)))
721 __UNDEFINED__ isASCII_uvchr(c) (FITS_IN_8_BITS(c) ? isASCII_L1(c) : 0)
723 #if { VERSION >= 5.006 }
724 # ifdef isALPHA_uni /* If one defined, all are; this is just an exemplar */
725 # define D_PPP_is_ctype(upper, lower, c) \
727 ? is ## upper ## _L1(c) \
728 : is ## upper ## _uni((UV) (c))) /* _uni is old synonym */
730 # define D_PPP_is_ctype(upper, lower, c) \
732 ? is ## upper ## _L1(c) \
733 : is_uni_ ## lower((UV) (c))) /* is_uni_ is even older */
736 __UNDEFINED__ isALPHA_uvchr(c) D_PPP_is_ctype(ALPHA, alpha, c)
737 __UNDEFINED__ isALPHANUMERIC_uvchr(c) (isALPHA_uvchr(c) || isDIGIT_uvchr(c))
739 __UNDEFINED__ isBLANK_uvchr(c) D_PPP_is_ctype(BLANK, blank, c)
741 __UNDEFINED__ isBLANK_uvchr(c) (FITS_IN_8_BITS(c) \
743 : ( (UV) (c) == 0x1680 /* Unicode 3.0 */ \
744 || inRANGE((UV) (c), 0x2000, 0x200A) \
745 || (UV) (c) == 0x202F /* Unicode 3.0 */\
746 || (UV) (c) == 0x205F /* Unicode 3.2 */\
747 || (UV) (c) == 0x3000))
749 __UNDEFINED__ isCNTRL_uvchr(c) D_PPP_is_ctype(CNTRL, cntrl, c)
750 __UNDEFINED__ isDIGIT_uvchr(c) D_PPP_is_ctype(DIGIT, digit, c)
751 __UNDEFINED__ isGRAPH_uvchr(c) D_PPP_is_ctype(GRAPH, graph, c)
752 __UNDEFINED__ isIDCONT_uvchr(c) isWORDCHAR_uvchr(c)
753 __UNDEFINED__ isIDFIRST_uvchr(c) D_PPP_is_ctype(IDFIRST, idfirst, c)
754 __UNDEFINED__ isLOWER_uvchr(c) D_PPP_is_ctype(LOWER, lower, c)
755 __UNDEFINED__ isPRINT_uvchr(c) D_PPP_is_ctype(PRINT, print, c)
756 __UNDEFINED__ isPSXSPC_uvchr(c) isSPACE_uvchr(c)
757 __UNDEFINED__ isPUNCT_uvchr(c) D_PPP_is_ctype(PUNCT, punct, c)
758 __UNDEFINED__ isSPACE_uvchr(c) D_PPP_is_ctype(SPACE, space, c)
759 __UNDEFINED__ isUPPER_uvchr(c) D_PPP_is_ctype(UPPER, upper, c)
760 __UNDEFINED__ isXDIGIT_uvchr(c) D_PPP_is_ctype(XDIGIT, xdigit, c)
761 __UNDEFINED__ isWORDCHAR_uvchr(c) (FITS_IN_8_BITS(c) \
762 ? isWORDCHAR_L1(c) : isALPHANUMERIC_uvchr(c))
764 __UNDEFINED__ isALPHA_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA)
765 # ifdef isALPHANUMERIC_utf8
766 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \
767 D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC)
769 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \
770 (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e))
773 /* This was broken before 5.18, and just use this instead of worrying about
774 * which releases the official works on */
776 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
777 ( ( LIKELY((e) > (s)) ) ? /* Machine generated */ \
778 ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1 \
779 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
780 ( ( 0xC2 == ((const U8*)s)[0] ) ? \
781 ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 ) \
782 : ( 0xE1 == ((const U8*)s)[0] ) ? \
783 ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
784 : ( 0xE2 == ((const U8*)s)[0] ) ? \
785 ( ( 0x80 == ((const U8*)s)[1] ) ? \
786 ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\
787 : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\
788 : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
792 # elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */
794 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
795 ( ( LIKELY((e) > (s)) ) ? \
796 ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \
797 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
798 ( ( 0x80 == ((const U8*)s)[0] ) ? \
799 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \
800 : ( 0xBC == ((const U8*)s)[0] ) ? \
801 ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
802 : ( 0xCA == ((const U8*)s)[0] ) ? \
803 ( ( 0x41 == ((const U8*)s)[1] ) ? \
804 ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
805 : ( 0x42 == ((const U8*)s)[1] ) ? \
806 ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
807 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
808 : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
812 # elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */
814 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
815 ( ( LIKELY((e) > (s)) ) ? \
816 ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \
817 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
818 ( ( 0x78 == ((const U8*)s)[0] ) ? \
819 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \
820 : ( 0xBD == ((const U8*)s)[0] ) ? \
821 ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
822 : ( 0xCA == ((const U8*)s)[0] ) ? \
823 ( ( 0x41 == ((const U8*)s)[1] ) ? \
824 ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
825 : ( 0x42 == ((const U8*)s)[1] ) ? \
826 ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
827 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
828 : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
833 # error Unknown character set
836 __UNDEFINED__ isCNTRL_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL)
837 __UNDEFINED__ isDIGIT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT)
838 __UNDEFINED__ isGRAPH_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH)
839 # ifdef isIDCONT_utf8
840 __UNDEFINED__ isIDCONT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT)
842 __UNDEFINED__ isIDCONT_utf8_safe(s,e) isWORDCHAR_utf8_safe(s,e)
845 __UNDEFINED__ isIDFIRST_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST)
846 __UNDEFINED__ isLOWER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER)
847 __UNDEFINED__ isPRINT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT)
849 # undef isPSXSPC_utf8_safe /* Use the modern definition */
850 __UNDEFINED__ isPSXSPC_utf8_safe(s,e) isSPACE_utf8_safe(s,e)
852 __UNDEFINED__ isPUNCT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT)
853 __UNDEFINED__ isSPACE_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE)
854 __UNDEFINED__ isUPPER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER)
856 # ifdef isWORDCHAR_utf8
857 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR)
859 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) \
860 (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_')
863 /* This was broken before 5.12, and just use this instead of worrying about
864 * which releases the official works on */
866 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
867 ( ( LIKELY((e) > (s)) ) ? \
868 ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\
869 : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\
870 ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\
871 : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\
874 # elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */
876 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
877 ( ( LIKELY((e) > (s)) ) ? \
878 ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
879 : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\
880 ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\
881 : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
884 # elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */
886 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
887 ( ( LIKELY((e) > (s)) ) ? \
888 ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
889 : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\
890 ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\
891 : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
895 # error Unknown character set
898 __UNDEFINED__ isALPHA_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHA)
899 # ifdef isALPHANUMERIC_utf8
900 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \
901 D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHANUMERIC)
903 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \
904 (isALPHA_LC_utf8_safe(s,e) || isDIGIT_LC_utf8_safe(s,e))
907 __UNDEFINED__ isBLANK_LC_utf8_safe(s,e) \
908 D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, BLANK)
909 __UNDEFINED__ isCNTRL_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, CNTRL)
910 __UNDEFINED__ isDIGIT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, DIGIT)
911 __UNDEFINED__ isGRAPH_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, GRAPH)
912 # ifdef isIDCONT_utf8
913 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDCONT)
915 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) isWORDCHAR_LC_utf8_safe(s,e)
918 __UNDEFINED__ isIDFIRST_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDFIRST)
919 __UNDEFINED__ isLOWER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, LOWER)
920 __UNDEFINED__ isPRINT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PRINT)
922 # undef isPSXSPC_LC_utf8_safe /* Use the modern definition */
923 __UNDEFINED__ isPSXSPC_LC_utf8_safe(s,e) isSPACE_LC_utf8_safe(s,e)
925 __UNDEFINED__ isPUNCT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PUNCT)
926 __UNDEFINED__ isSPACE_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, SPACE)
927 __UNDEFINED__ isUPPER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, UPPER)
929 # ifdef isWORDCHAR_utf8
930 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, WORDCHAR)
932 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) \
933 (isALPHANUMERIC_LC_utf8_safe(s,e) || (*(s)) == '_')
936 __UNDEFINED__ isXDIGIT_LC_utf8_safe(s,e) \
937 D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, XDIGIT)
939 /* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe,
940 * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe,
941 * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe,
942 * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe,
943 * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe,
944 * isXDIGIT_utf8_safe,
945 * isALPHANUMERIC_LC_utf8_safe, isALPHA_LC_utf8_safe, isASCII_LC_utf8_safe,
946 * isBLANK_LC_utf8_safe, isCNTRL_LC_utf8_safe, isDIGIT_LC_utf8_safe,
947 * isGRAPH_LC_utf8_safe, isIDCONT_LC_utf8_safe, isIDFIRST_LC_utf8_safe,
948 * isLOWER_LC_utf8_safe, isPRINT_LC_utf8_safe, isPSXSPC_LC_utf8_safe,
949 * isPUNCT_LC_utf8_safe, isSPACE_LC_utf8_safe, isUPPER_LC_utf8_safe,
950 * isWORDCHAR_LC_utf8_safe, isWORDCHAR_LC_utf8_safe, isXDIGIT_LC_utf8_safe,
951 * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr,
952 * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr,
953 * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr,
954 * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr,
955 * isWORDCHAR_uvchr, isXDIGIT_uvchr
957 * The UTF-8 handling is buggy in early Perls, and this can give inaccurate
958 * results for code points above 0xFF, until the implementation started
959 * settling down in 5.12 and 5.14 */
963 #define D_PPP_TOO_SHORT_MSG "Malformed UTF-8 character starting with:" \
964 " \\x%02x (too short; %d bytes available, need" \
966 /* Perls starting here had a new API which handled multi-character results */
967 #if { VERSION >= 5.7.3 }
969 __UNDEFINED__ toLOWER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l))
970 __UNDEFINED__ toUPPER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l))
971 __UNDEFINED__ toTITLE_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l))
972 __UNDEFINED__ toFOLD_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l))
974 # if { VERSION != 5.15.6 } /* Just this version is broken */
976 /* Prefer the macro to the function */
977 # if defined toLOWER_utf8
978 # define D_PPP_TO_LOWER_CALLEE(s,r,l) toLOWER_utf8(s,r,l)
980 # define D_PPP_TO_LOWER_CALLEE(s,r,l) to_utf8_lower(s,r,l)
982 # if defined toTITLE_utf8
983 # define D_PPP_TO_TITLE_CALLEE(s,r,l) toTITLE_utf8(s,r,l)
985 # define D_PPP_TO_TITLE_CALLEE(s,r,l) to_utf8_title(s,r,l)
987 # if defined toUPPER_utf8
988 # define D_PPP_TO_UPPER_CALLEE(s,r,l) toUPPER_utf8(s,r,l)
990 # define D_PPP_TO_UPPER_CALLEE(s,r,l) to_utf8_upper(s,r,l)
992 # if defined toFOLD_utf8
993 # define D_PPP_TO_FOLD_CALLEE(s,r,l) toFOLD_utf8(s,r,l)
995 # define D_PPP_TO_FOLD_CALLEE(s,r,l) to_utf8_fold(s,r,l)
997 # else /* Below is 5.15.6, which failed to make the macros available
998 # outside of core, so we have to use the 'Perl_' form. khw
999 # decided it was easier to just handle this case than have to
1000 # document the exception, and make an exception in the tests below
1002 # define D_PPP_TO_LOWER_CALLEE(s,r,l) \
1003 Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL)
1004 # define D_PPP_TO_TITLE_CALLEE(s,r,l) \
1005 Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL)
1006 # define D_PPP_TO_UPPER_CALLEE(s,r,l) \
1007 Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL)
1008 # define D_PPP_TO_FOLD_CALLEE(s,r,l) \
1009 Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL)
1012 /* The actual implementation of the backported macros. If too short, croak,
1013 * otherwise call the original that doesn't have an upper limit parameter */
1014 # define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l) \
1015 (((((e) - (s)) <= 0) \
1016 /* We could just do nothing, but modern perls croak */ \
1017 ? (croak("Attempting case change on zero length string"), \
1018 0) /* So looks like it returns something, and will compile */ \
1019 : ((e) - (s)) < UTF8SKIP(s)) \
1020 ? (croak(D_PPP_TOO_SHORT_MSG, \
1021 s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1023 : D_PPP_TO_ ## name ## _CALLEE(s,r,l))
1025 __UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \
1026 D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l)
1027 __UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \
1028 D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l)
1029 __UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \
1030 D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l)
1031 __UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) \
1032 D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l)
1034 #elif { VERSION >= 5.006 }
1036 /* Here we have UTF-8 support, but using the original API where the case
1037 * changing functions merely returned the changed code point; hence they
1038 * couldn't handle multi-character results. */
1040 # ifdef uvchr_to_utf8
1041 # define D_PPP_UV_TO_UTF8 uvchr_to_utf8
1043 # define D_PPP_UV_TO_UTF8 uv_to_utf8
1046 /* Get the utf8 of the case changed value, and store its length; then have
1047 * to re-calculate the changed case value in order to return it */
1048 # define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l) \
1049 (*(l) = (D_PPP_UV_TO_UTF8(s, \
1050 UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)), \
1051 UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c))))
1053 __UNDEFINED__ toLOWER_uvchr(c, s, l) \
1054 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l)
1055 __UNDEFINED__ toUPPER_uvchr(c, s, l) \
1056 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l)
1057 __UNDEFINED__ toTITLE_uvchr(c, s, l) \
1058 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l)
1059 __UNDEFINED__ toFOLD_uvchr(c, s, l) toLOWER_uvchr(c, s, l)
1061 # define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l) \
1062 (((((e) - (s)) <= 0) \
1063 ? (croak("Attempting case change on zero length string"), \
1064 0) /* So looks like it returns something, and will compile */ \
1065 : ((e) - (s)) < UTF8SKIP(s)) \
1066 ? (croak(D_PPP_TOO_SHORT_MSG, \
1067 s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1069 /* Get the changed code point and store its UTF-8 */ \
1070 : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)), \
1071 /* Then store its length, and re-get code point for return */ \
1072 *(l) = UTF8SKIP(r), to_utf8_ ## name(r))
1074 /* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe,
1075 * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr
1076 The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1077 this backport does not correct them.
1079 In perls before 7.3, multi-character case changing is not implemented; this
1080 backport uses the simple case changes available in those perls. */
1082 __UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \
1083 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l)
1084 __UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \
1085 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l)
1086 __UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \
1087 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l)
1089 /* Warning: toFOLD_utf8_safe, toFOLD_uvchr
1090 The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1091 this backport does not correct them.
1093 In perls before 7.3, case folding is not implemented; instead, this
1094 backport substitutes simple (not multi-character, which isn't available)
1095 lowercasing. This gives the correct result in most, but not all, instances
1098 __UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) toLOWER_utf8_safe(s,e,r,l)
1102 /* Until we figure out how to support this in older perls... */
1103 #if { VERSION >= 5.8.0 }
1105 __UNDEFINED__ HeUTF8(he) ((HeKLEN(he) == HEf_SVKEY) ? \
1106 SvUTF8(HeKEY_sv(he)) : \
1111 __UNDEFINED__ C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
1112 __UNDEFINED__ C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a))
1114 __UNDEFINED__ LIKELY(x) (x)
1115 __UNDEFINED__ UNLIKELY(x) (x)
1118 #if defined(PERL_USE_GCC_BRACE_GROUPS)
1119 # define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
1121 # define MUTABLE_PTR(p) ((void *) (p))
1125 __UNDEFINED__ MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p))
1126 __UNDEFINED__ MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p))
1127 __UNDEFINED__ MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p))
1128 __UNDEFINED__ MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p))
1129 __UNDEFINED__ MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p))
1130 __UNDEFINED__ MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))
1134 typedef XSPROTO(XSPROTO_test_t);
1135 typedef XSPROTO_test_t *XSPROTO_test_t_ptr;
1137 XS(XS_Devel__PPPort_dXSTARG); /* prototype */
1138 XS(XS_Devel__PPPort_dXSTARG)
1144 PERL_UNUSED_VAR(cv);
1146 iv = SvIV(ST(0)) + 1;
1151 XS(XS_Devel__PPPort_dAXMARK); /* prototype */
1152 XS(XS_Devel__PPPort_dAXMARK)
1159 PERL_UNUSED_VAR(cv);
1161 iv = SvIV(ST(0)) - 1;
1169 XSPROTO_test_t_ptr p = &XS_Devel__PPPort_dXSTARG;
1170 newXS("Devel::PPPort::dXSTARG", *p, file);
1172 newXS("Devel::PPPort::dAXMARK", XS_Devel__PPPort_dAXMARK, file);
1187 x = newOP(OP_PUSHMARK, 0);
1189 /* No siblings yet! */
1190 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1191 failures++; warn("Op should not have had a sib");
1195 /* Add 2 siblings */
1198 for (i = 0; i < 2; i++) {
1199 OP *newsib = newOP(OP_PUSHMARK, 0);
1200 OpMORESIB_set(kid, newsib);
1202 kid = OpSIBLING(kid);
1205 middlekid = OpSIBLING(x);
1207 /* Should now have a sibling */
1208 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1209 failures++; warn("Op should have had a sib after moresib_set");
1212 /* Count the siblings */
1213 for (kid = OpSIBLING(x); kid; kid = OpSIBLING(kid)) {
1218 failures++; warn("Kid had %d sibs, expected 2", count);
1221 if (OpHAS_SIBLING(lastkid) || OpSIBLING(lastkid)) {
1222 failures++; warn("Last kid should not have a sib");
1225 /* Really sets the parent, and says 'no more siblings' */
1226 OpLASTSIB_set(x, lastkid);
1228 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1229 failures++; warn("OpLASTSIB_set failed?");
1232 /* Restore the kid */
1233 OpMORESIB_set(x, lastkid);
1235 /* Try to remove it again */
1236 OpLASTSIB_set(x, NULL);
1238 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1239 failures++; warn("OpLASTSIB_set with NULL failed?");
1242 /* Try to restore with maybesib_set */
1243 OpMAYBESIB_set(x, lastkid, NULL);
1245 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1246 failures++; warn("Op should have had a sib after maybesibset");
1260 RETVAL = SvRXOK(sv);
1271 RETVAL += PTR2nat(p) != 0 ? 1 : 0;
1272 RETVAL += PTR2ul(p) != 0UL ? 2 : 0;
1273 RETVAL += PTR2UV(p) != (UV) 0 ? 4 : 0;
1274 RETVAL += PTR2IV(p) != (IV) 0 ? 8 : 0;
1275 RETVAL += PTR2NV(p) != (NV) 0 ? 16 : 0;
1276 RETVAL += p > NUM2PTR(int *, 0) ? 32 : 0;
1282 gv_stashpvn(name, create)
1286 RETVAL = gv_stashpvn(name, strlen(name), create) != NULL;
1291 get_sv(name, create)
1295 RETVAL = get_sv(name, create) != NULL;
1300 get_av(name, create)
1304 RETVAL = get_av(name, create) != NULL;
1309 get_hv(name, create)
1313 RETVAL = get_hv(name, create) != NULL;
1318 get_cv(name, create)
1322 RETVAL = get_cv(name, create) != NULL;
1330 mXPUSHp("test1", 5);
1332 mXPUSHp("test2", 5);
1342 RETVAL = newSVsv(boolSV(value));
1349 RETVAL = newSVsv(DEFSV);
1356 XPUSHs(sv_mortalcopy(DEFSV));
1359 DEFSV_set(newSVpvs("DEFSV"));
1360 XPUSHs(sv_mortalcopy(DEFSV));
1361 /* Yes, this leaks the above scalar; 5.005 with threads for some reason */
1362 /* frees it upon LEAVE, thus mortalizing it causes "attempt to free..." */
1363 /* sv_2mortal(DEFSV); */
1365 XPUSHs(sv_mortalcopy(DEFSV));
1371 RETVAL = SvTRUEx(ERRSV);
1380 RETVAL = newSVsv(UNDERBAR);
1403 #if { VERSION >= 5.004 }
1404 x = sv_2mortal(newSVpvf("[%" SVf "]", SVfARG(x)));
1410 Perl_ppaddr_t(string)
1413 Perl_ppaddr_t lower;
1415 lower = PL_ppaddr[OP_LC];
1416 mXPUSHs(newSVpv(string, 0));
1419 (void)*(lower)(aTHXR);
1424 #if { VERSION >= 5.8.0 }
1427 check_HeUTF8(utf8_key)
1437 key = SvPV(utf8_key, klen);
1438 if (SvUTF8(utf8_key)) klen *= -1;
1439 hv_store(hash, key, klen, newSVpvs("string"), 0);
1441 ent = hv_iternext(hash);
1443 mXPUSHp((HeUTF8(ent) == 0 ? "norm" : "utf8"), 4);
1452 int x[] = { 10, 11, 12, 13 };
1454 mXPUSHi(C_ARRAY_LENGTH(x)); /* 4 */
1455 mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */
1461 RETVAL = isBLANK(ord);
1469 RETVAL = isBLANK_A(ord);
1477 RETVAL = isBLANK_L1(ord);
1485 RETVAL = isUPPER(ord);
1493 RETVAL = isUPPER_A(ord);
1501 RETVAL = isUPPER_L1(ord);
1509 RETVAL = isLOWER(ord);
1517 RETVAL = isLOWER_A(ord);
1525 RETVAL = isLOWER_L1(ord);
1533 RETVAL = isALPHA(ord);
1541 RETVAL = isALPHA_A(ord);
1549 RETVAL = isALPHA_L1(ord);
1557 RETVAL = isWORDCHAR(ord);
1565 RETVAL = isWORDCHAR_A(ord);
1573 RETVAL = isWORDCHAR_L1(ord);
1581 RETVAL = isALPHANUMERIC(ord);
1586 isALPHANUMERIC_A(ord)
1589 RETVAL = isALPHANUMERIC_A(ord);
1597 RETVAL = isALNUM(ord);
1605 RETVAL = isALNUM_A(ord);
1613 RETVAL = isDIGIT(ord);
1621 RETVAL = isDIGIT_A(ord);
1629 RETVAL = isOCTAL(ord);
1637 RETVAL = isOCTAL_A(ord);
1645 RETVAL = isIDFIRST(ord);
1653 RETVAL = isIDFIRST_A(ord);
1661 RETVAL = isIDCONT(ord);
1669 RETVAL = isIDCONT_A(ord);
1677 RETVAL = isSPACE(ord);
1685 RETVAL = isSPACE_A(ord);
1693 RETVAL = isASCII(ord);
1701 RETVAL = isASCII_A(ord);
1709 RETVAL = isCNTRL(ord);
1717 RETVAL = isCNTRL_A(ord);
1725 RETVAL = isPRINT(ord);
1733 RETVAL = isPRINT_A(ord);
1741 RETVAL = isGRAPH(ord);
1749 RETVAL = isGRAPH_A(ord);
1757 RETVAL = isPUNCT(ord);
1765 RETVAL = isPUNCT_A(ord);
1773 RETVAL = isXDIGIT(ord);
1781 RETVAL = isXDIGIT_A(ord);
1789 RETVAL = isPSXSPC(ord);
1797 RETVAL = isPSXSPC_A(ord);
1802 isALPHANUMERIC_L1(ord)
1805 RETVAL = isALPHANUMERIC_L1(ord);
1813 RETVAL = isALNUMC_L1(ord);
1821 RETVAL = isDIGIT_L1(ord);
1829 RETVAL = isOCTAL_L1(ord);
1837 RETVAL = isIDFIRST_L1(ord);
1845 RETVAL = isIDCONT_L1(ord);
1853 RETVAL = isSPACE_L1(ord);
1861 RETVAL = isASCII_L1(ord);
1869 RETVAL = isCNTRL_L1(ord);
1877 RETVAL = isPRINT_L1(ord);
1885 RETVAL = isGRAPH_L1(ord);
1893 RETVAL = isPUNCT_L1(ord);
1901 RETVAL = isXDIGIT_L1(ord);
1909 RETVAL = isPSXSPC_L1(ord);
1917 RETVAL = isASCII_uvchr(ord);
1922 isASCII_utf8_safe(s, offset)
1926 PERL_UNUSED_ARG(offset);
1927 RETVAL = isASCII_utf8_safe(s, s + 1 + offset);
1931 #if { VERSION >= 5.006 }
1937 RETVAL = isBLANK_uvchr(ord);
1945 RETVAL = isALPHA_uvchr(ord);
1950 isALPHANUMERIC_uvchr(ord)
1953 RETVAL = isALPHANUMERIC_uvchr(ord);
1961 RETVAL = isCNTRL_uvchr(ord);
1969 RETVAL = isDIGIT_uvchr(ord);
1974 isIDFIRST_uvchr(ord)
1977 RETVAL = isIDFIRST_uvchr(ord);
1985 RETVAL = isIDCONT_uvchr(ord);
1993 RETVAL = isGRAPH_uvchr(ord);
2001 RETVAL = isLOWER_uvchr(ord);
2009 RETVAL = isPRINT_uvchr(ord);
2017 RETVAL = isPSXSPC_uvchr(ord);
2025 RETVAL = isPUNCT_uvchr(ord);
2033 RETVAL = isSPACE_uvchr(ord);
2041 RETVAL = isUPPER_uvchr(ord);
2046 isWORDCHAR_uvchr(ord)
2049 RETVAL = isWORDCHAR_uvchr(ord);
2057 RETVAL = isXDIGIT_uvchr(ord);
2062 isALPHA_utf8_safe(s, offset)
2066 RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset);
2071 isALPHANUMERIC_utf8_safe(s, offset)
2075 RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2080 isBLANK_utf8_safe(s, offset)
2084 RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset);
2089 isCNTRL_utf8_safe(s, offset)
2093 RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset);
2098 isDIGIT_utf8_safe(s, offset)
2102 RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2107 isGRAPH_utf8_safe(s, offset)
2111 RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset);
2116 isIDCONT_utf8_safe(s, offset)
2120 RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2125 isIDFIRST_utf8_safe(s, offset)
2129 RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset);
2134 isLOWER_utf8_safe(s, offset)
2138 RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2143 isPRINT_utf8_safe(s, offset)
2147 RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2152 isPSXSPC_utf8_safe(s, offset)
2156 RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2161 isPUNCT_utf8_safe(s, offset)
2165 RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2170 isSPACE_utf8_safe(s, offset)
2174 RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset);
2179 isUPPER_utf8_safe(s, offset)
2183 RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2188 isWORDCHAR_utf8_safe(s, offset)
2192 RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset);
2197 isXDIGIT_utf8_safe(s, offset)
2201 RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2206 isALPHA_LC_utf8_safe(s, offset)
2210 RETVAL = isALPHA_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2215 isALPHANUMERIC_LC_utf8_safe(s, offset)
2219 RETVAL = isALPHANUMERIC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2224 isASCII_LC_utf8_safe(s, offset)
2228 PERL_UNUSED_ARG(offset);
2229 RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset);
2234 isBLANK_LC_utf8_safe(s, offset)
2238 RETVAL = isBLANK_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2243 isCNTRL_LC_utf8_safe(s, offset)
2247 RETVAL = isCNTRL_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2252 isDIGIT_LC_utf8_safe(s, offset)
2256 RETVAL = isDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2261 isGRAPH_LC_utf8_safe(s, offset)
2265 RETVAL = isGRAPH_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2270 isIDCONT_LC_utf8_safe(s, offset)
2274 RETVAL = isIDCONT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2279 isIDFIRST_LC_utf8_safe(s, offset)
2283 RETVAL = isIDFIRST_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2288 isLOWER_LC_utf8_safe(s, offset)
2292 RETVAL = isLOWER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2297 isPRINT_LC_utf8_safe(s, offset)
2301 RETVAL = isPRINT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2306 isPSXSPC_LC_utf8_safe(s, offset)
2310 RETVAL = isPSXSPC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2315 isPUNCT_LC_utf8_safe(s, offset)
2319 RETVAL = isPUNCT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2324 isSPACE_LC_utf8_safe(s, offset)
2328 RETVAL = isSPACE_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2333 isUPPER_LC_utf8_safe(s, offset)
2337 RETVAL = isUPPER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2342 isWORDCHAR_LC_utf8_safe(s, offset)
2346 RETVAL = isWORDCHAR_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2351 isXDIGIT_LC_utf8_safe(s, offset)
2355 RETVAL = isXDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2360 toLOWER_utf8_safe(s, offset)
2364 U8 u[UTF8_MAXBYTES+1];
2371 ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2372 av_push(av, newSVuv(ret));
2374 utf8 = newSVpvn((char *) u, len);
2378 av_push(av, newSVuv(len));
2384 toTITLE_utf8_safe(s, offset)
2388 U8 u[UTF8_MAXBYTES+1];
2395 ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2396 av_push(av, newSVuv(ret));
2398 utf8 = newSVpvn((char *) u, len);
2402 av_push(av, newSVuv(len));
2408 toUPPER_utf8_safe(s, offset)
2412 U8 u[UTF8_MAXBYTES+1];
2419 ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2420 av_push(av, newSVuv(ret));
2422 utf8 = newSVpvn((char *) u, len);
2426 av_push(av, newSVuv(len));
2432 toFOLD_utf8_safe(s, offset)
2436 U8 u[UTF8_MAXBYTES+1];
2443 ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2444 av_push(av, newSVuv(ret));
2446 utf8 = newSVpvn((char *) u, len);
2450 av_push(av, newSVuv(len));
2459 U8 u[UTF8_MAXBYTES+1];
2466 ret = toLOWER_uvchr(c, u, &len);
2467 av_push(av, newSVuv(ret));
2469 utf8 = newSVpvn((char *) u, len);
2473 av_push(av, newSVuv(len));
2482 U8 u[UTF8_MAXBYTES+1];
2489 ret = toTITLE_uvchr(c, u, &len);
2490 av_push(av, newSVuv(ret));
2492 utf8 = newSVpvn((char *) u, len);
2496 av_push(av, newSVuv(len));
2505 U8 u[UTF8_MAXBYTES+1];
2512 ret = toUPPER_uvchr(c, u, &len);
2513 av_push(av, newSVuv(ret));
2515 utf8 = newSVpvn((char *) u, len);
2519 av_push(av, newSVuv(len));
2528 U8 u[UTF8_MAXBYTES+1];
2535 ret = toFOLD_uvchr(c, u, &len);
2536 av_push(av, newSVuv(ret));
2538 utf8 = newSVpvn((char *) u, len);
2542 av_push(av, newSVuv(len));
2550 LATIN1_TO_NATIVE(cp)
2553 if (cp > 255) RETVAL= cp;
2554 else RETVAL= LATIN1_TO_NATIVE(cp);
2559 NATIVE_TO_LATIN1(cp)
2562 RETVAL= NATIVE_TO_LATIN1(cp);
2570 RETVAL = av_tindex((AV*)SvRV(av));
2578 RETVAL = av_top_index((AV*)SvRV(av));
2586 RETVAL = av_count((AV*)SvRV(av));
2590 =tests plan => 26827
2592 use vars qw($my_sv @my_av %my_hv);
2594 ok(&Devel::PPPort::boolSV(1), "Verify boolSV(1) is true");
2595 ok(!&Devel::PPPort::boolSV(0), "Verify boolSV(0) is false");
2598 is(&Devel::PPPort::DEFSV(), "Fred", '$_ is FRED; Verify DEFSV is FRED');
2599 is(&Devel::PPPort::UNDERBAR(), "Fred", 'And verify UNDERBAR is FRED');
2601 if (ivers($]) >= ivers(5.9.2) && ivers($]) < ivers(5.23)) {
2603 no warnings "deprecated";
2604 no if $^V >= v5.17.9, warnings => "experimental::lexical_topic";
2606 is(&Devel::PPPort::DEFSV(), "Fred", 'lexical_topic eval: $_ is Tony; Verify DEFSV is Fred');
2607 is(&Devel::PPPort::UNDERBAR(), "Tony", 'And verify UNDERBAR is Tony');
2609 die __FILE__ . __LINE__ . ": $@" if $@;
2612 skip("perl version outside testing range of lexical_topic", 2);
2615 my @r = &Devel::PPPort::DEFSV_modify();
2617 ok(@r == 3, "Verify got 3 elements");
2622 is(&Devel::PPPort::DEFSV(), "Fred");
2625 ok(!&Devel::PPPort::ERRSV(), "Verify ERRSV on true is false");
2626 eval { cannot_call_this_one() };
2627 ok(&Devel::PPPort::ERRSV(), "Verify ERRSV on false is true");
2629 ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0));
2630 ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0));
2631 ok(&Devel::PPPort::gv_stashpvn('does::not::exist', 1));
2634 ok(&Devel::PPPort::get_sv('my_sv', 0));
2635 ok(!&Devel::PPPort::get_sv('not_my_sv', 0));
2636 ok(&Devel::PPPort::get_sv('not_my_sv', 1));
2639 ok(&Devel::PPPort::get_av('my_av', 0));
2640 ok(!&Devel::PPPort::get_av('not_my_av', 0));
2641 ok(&Devel::PPPort::get_av('not_my_av', 1));
2644 ok(&Devel::PPPort::get_hv('my_hv', 0));
2645 ok(!&Devel::PPPort::get_hv('not_my_hv', 0));
2646 ok(&Devel::PPPort::get_hv('not_my_hv', 1));
2649 ok(&Devel::PPPort::get_cv('my_cv', 0));
2650 ok(!&Devel::PPPort::get_cv('not_my_cv', 0));
2651 ok(&Devel::PPPort::get_cv('not_my_cv', 1));
2653 is(Devel::PPPort::dXSTARG(42), 43);
2654 is(Devel::PPPort::dAXMARK(4711), 4710);
2656 is(Devel::PPPort::prepush(), 42);
2658 is(join(':', Devel::PPPort::xsreturn(0)), 'test1');
2659 is(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2');
2661 is(Devel::PPPort::PERL_ABS(42), 42, "Verify PERL_ABS(42) is 42");
2662 is(Devel::PPPort::PERL_ABS(-13), 13, "Verify PERL_ABS(-13) is 13");
2664 is(Devel::PPPort::SVf(42), ivers($]) >= ivers(5.4) ? '[42]' : '42');
2665 is(Devel::PPPort::SVf('abc'), ivers($]) >= ivers(5.4) ? '[abc]' : 'abc');
2667 is(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo");
2669 is(&Devel::PPPort::ptrtests(), 63);
2671 is(&Devel::PPPort::OpSIBLING_tests(), 0);
2673 if (ivers($]) >= ivers(5.9)) {
2675 is(&Devel::PPPort::check_HeUTF8("hello"), "norm");
2676 is(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8");
2679 skip("Too early perl version", 2);
2682 @r = &Devel::PPPort::check_c_array();
2686 ok(!Devel::PPPort::SvRXOK(""));
2687 ok(!Devel::PPPort::SvRXOK(bless [], "Regexp"));
2689 if (ivers($]) < ivers(5.5)) {
2690 skip 'no qr// objects in this perl', 2;
2692 my $qr = eval 'qr/./';
2693 ok(Devel::PPPort::SvRXOK($qr), "SVRXOK(qr) is true");
2694 ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise"));
2697 ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6);
2698 ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1);
2699 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41);
2700 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30);
2702 ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6, "Verify LATIN1_TO_NATIVE(0xB6) is 0xB6");
2703 if (ord("A") == 65) {
2704 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41);
2705 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30);
2708 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1);
2709 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0);
2712 ok( Devel::PPPort::isALNUMC_L1(ord("5")));
2713 ok( Devel::PPPort::isALNUMC_L1(0xFC));
2714 ok(! Devel::PPPort::isALNUMC_L1(0xB6));
2716 ok( Devel::PPPort::isOCTAL(ord("7")), "Verify '7' is OCTAL");
2717 ok(! Devel::PPPort::isOCTAL(ord("8")), "Verify '8' isn't OCTAL");
2719 ok( Devel::PPPort::isOCTAL_A(ord("0")), "Verify '0' is OCTAL_A");
2720 ok(! Devel::PPPort::isOCTAL_A(ord("9")), "Verify '9' isn't OCTAL_A");
2722 ok( Devel::PPPort::isOCTAL_L1(ord("2")), "Verify '2' is OCTAL_L1");
2723 ok(! Devel::PPPort::isOCTAL_L1(ord("8")), "Verify '8' isn't OCTAL_L1");
2725 my $way_too_early_msg = 'UTF-8 not implemented on this perl';
2727 # For the other properties, we test every code point from 0.255, and a
2728 # smattering of higher ones. First populate a hash with keys like '65:ALPHA'
2729 # to indicate that the code point there is alphabetic
2732 for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6,
2735 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2736 $types{"$native:ALPHA"} = 1;
2737 $types{"$native:ALPHANUMERIC"} = 1;
2738 $types{"$native:IDFIRST"} = 1;
2739 $types{"$native:IDCONT"} = 1;
2740 $types{"$native:PRINT"} = 1;
2741 $types{"$native:WORDCHAR"} = 1;
2743 for $i (0x30..0x39, 0x660, 0xFF19) {
2744 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2745 $types{"$native:ALPHANUMERIC"} = 1;
2746 $types{"$native:DIGIT"} = 1;
2747 $types{"$native:IDCONT"} = 1;
2748 $types{"$native:WORDCHAR"} = 1;
2749 $types{"$native:GRAPH"} = 1;
2750 $types{"$native:PRINT"} = 1;
2751 $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19);
2755 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2756 $types{"$native:ASCII"} = 1;
2758 for $i (0..0x1f, 0x7F..0x9F) {
2759 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2760 $types{"$native:CNTRL"} = 1;
2762 for $i (0x21..0x7E, 0xA1..0x101, 0x660) {
2763 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2764 $types{"$native:GRAPH"} = 1;
2765 $types{"$native:PRINT"} = 1;
2767 for $i (0x09, 0x20, 0xA0) {
2768 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2769 $types{"$native:BLANK"} = 1;
2770 $types{"$native:SPACE"} = 1;
2771 $types{"$native:PSXSPC"} = 1;
2772 $types{"$native:PRINT"} = 1 if $i > 0x09;
2774 for $i (0x09..0x0D, 0x85, 0x2029) {
2775 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2776 $types{"$native:SPACE"} = 1;
2777 $types{"$native:PSXSPC"} = 1;
2779 for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) {
2780 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2781 $types{"$native:UPPER"} = 1;
2782 $types{"$native:XDIGIT"} = 1 if $i < 0x47;
2784 for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) {
2785 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2786 $types{"$native:LOWER"} = 1;
2787 $types{"$native:XDIGIT"} = 1 if $i < 0x67;
2789 for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB,
2790 0xB7, 0xBB, 0xBF, 0x5BE)
2792 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2793 $types{"$native:PUNCT"} = 1;
2794 $types{"$native:GRAPH"} = 1;
2795 $types{"$native:PRINT"} = 1;
2799 $types{"$i:WORDCHAR"} = 1;
2800 $types{"$i:IDFIRST"} = 1;
2801 $types{"$i:IDCONT"} = 1;
2803 # Now find all the unique code points included above.
2804 my %code_points_to_test;
2806 for $key (keys %types) {
2808 $code_points_to_test{$key} = 1;
2812 for $i (sort { $a <=> $b } keys %code_points_to_test) {
2813 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2814 my $hex = sprintf("0x%02X", $native);
2816 # And for each code point test each of the classes
2818 for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT
2819 IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR
2822 if ($i < 256) { # For the ones that can fit in a byte, test each of
2825 for $suffix ("", "_A", "_L1", "_uvchr") {
2826 my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/)
2827 ? 0 # Fail on non-ASCII unless unicode
2828 : ($types{"$native:$class"} || 0);
2829 if (ivers($]) < ivers(5.6) && $suffix eq '_uvchr') {
2830 skip("No UTF-8 on this perl", 1);
2834 my $eval_string = "Devel::PPPort::is${class}$suffix($hex)";
2835 local $SIG{__WARN__} = sub {};
2836 my $is = eval $eval_string || 0;
2837 die "eval 'For $i: $eval_string' gave $@" if $@;
2838 is($is, $should_be, "'$eval_string'");
2842 # For all code points, test the '_utf8' macros
2844 for $sub_fcn ("", "_LC") {
2846 if (ivers($]) < ivers(5.6)) {
2847 $skip = $way_too_early_msg;
2849 elsif (ivers($]) < ivers(5.7) && $native > 255) {
2850 $skip = "Perls earlier than 5.7 give wrong answers for above Latin1 code points";
2852 elsif (ivers($]) <= ivers(5.11.3) && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) {
2853 $skip = "Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH";
2855 elsif ($sub_fcn eq '_LC' && $i < 256) {
2856 $skip = "Testing of code points whose results depend on locale is skipped ";
2858 my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe";
2865 $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($native);
2866 my $should_be = $types{"$native:$class"} || 0;
2867 my $eval_string = "$fcn(\"$utf8\", 0)";
2868 local $SIG{__WARN__} = sub {};
2869 my $is = eval $eval_string || 0;
2870 die "eval 'For $i, $eval_string' gave $@" if $@;
2871 is($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string));
2874 # And for the high code points, test that a too short malformation (the
2875 # -1) causes it to fail
2880 elsif (ivers($]) >= ivers(5.25.9)) {
2881 skip("Prints an annoying error message that khw doesn't know how to easily suppress", 1);
2884 my $eval_string = "$fcn(\"$utf8\", -1)";
2885 local $SIG{__WARN__} = sub {};
2886 my $is = eval "$eval_string" || 0;
2887 die "eval '$eval_string' gave $@" if $@;
2888 is($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string));
2895 my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ],
2896 [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2897 Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2900 'FOLD' => [ [ ord('C'), ord('c') ],
2901 [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2902 Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2904 [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2907 'UPPER' => [ [ ord('a'), ord('A'), ],
2908 [ Devel::PPPort::LATIN1_TO_NATIVE(0xE0),
2909 Devel::PPPort::LATIN1_TO_NATIVE(0xC0) ],
2911 [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2914 'TITLE' => [ [ ord('c'), ord('C'), ],
2915 [ Devel::PPPort::LATIN1_TO_NATIVE(0xE2),
2916 Devel::PPPort::LATIN1_TO_NATIVE(0xC2) ],
2918 [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2924 for $name (keys %case_changing) {
2925 my @code_points_to_test = @{$case_changing{$name}};
2927 for $unchanged (@code_points_to_test) {
2928 my @pair = @$unchanged;
2929 my $original = $pair[0];
2930 my $changed = $pair[1];
2931 my $utf8_changed = $changed;
2932 my $is_cp = $utf8_changed =~ /^\d+$/;
2933 my $should_be_bytes;
2934 if (ivers($]) >= ivers(5.6)) {
2936 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2937 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2940 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /[[:^ascii:]]/';
2941 $should_be_bytes = length $utf8_changed;
2945 my $fcn = "to${name}_uvchr";
2948 if (ivers($]) < ivers(5.6)) {
2949 $skip = $way_too_early_msg;
2952 $skip = "Can't do uvchr on a multi-char string";
2959 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2960 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2963 my $non_ascii_re = (ivers($]) >= ivers(5.6)) ? '[[:^ascii:]]' : '[^\x00-\x7F]';
2964 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /$non_ascii_re/';
2965 $should_be_bytes = length $utf8_changed;
2968 my $ret = eval "Devel::PPPort::$fcn($original)";
2969 my $fail = $@; # Have to save $@, as it gets destroyed
2970 is ($fail, "", "$fcn($original) didn't fail");
2971 my $first = (ivers($]) != ivers(5.6))
2972 ? substr($utf8_changed, 0, 1)
2973 : $utf8_changed, 0, 1;
2974 is($ret->[0], ord $first,
2975 "ord of $fcn($original) is $changed");
2976 is($ret->[1], $utf8_changed,
2977 "UTF-8 of of $fcn($original) is correct");
2978 is($ret->[2], $should_be_bytes,
2979 "Length of $fcn($original) is $should_be_bytes");
2983 for $truncate (0..2) {
2985 if (ivers($]) < ivers(5.6)) {
2986 $skip = $way_too_early_msg;
2988 elsif (! $is_cp && ivers($]) < ivers(5.7.3)) {
2989 $skip = "Multi-character case change not implemented until 5.7.3";
2991 elsif ($truncate == 2 && ivers($]) > ivers(5.25.8)) {
2992 $skip = "Zero length inputs cause assertion failure; test dies in modern perls";
2994 elsif ($truncate > 0 && length $changed > 1) {
2995 $skip = "Don't test shortened multi-char case changes";
2997 elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) {
2998 $skip = "Don't try to test shortened single bytes";
3004 my $fcn = "to${name}_utf8_safe";
3005 my $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($original);
3006 my $real_truncate = ($truncate < 2)
3007 ? $truncate : $should_be_bytes;
3008 my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)";
3009 my $ret = eval "no warnings; $eval_string" || 0;
3010 my $fail = $@; # Have to save $@, as it gets destroyed
3011 if ($truncate == 0) {
3012 is ($fail, "", "Didn't fail on full length input");
3013 my $first = (ivers($]) != ivers(5.6))
3014 ? substr($utf8_changed, 0, 1)
3015 : $utf8_changed, 0, 1;
3016 is($ret->[0], ord $first,
3017 "ord of $fcn($original) is $changed");
3018 is($ret->[1], $utf8_changed,
3019 "UTF-8 of of $fcn($original) is correct");
3020 is($ret->[2], $should_be_bytes,
3021 "Length of $fcn($original) is $should_be_bytes");
3024 is ($fail, eval 'qr/Malformed UTF-8 character/',
3025 "Gave appropriate error for short char: $original");
3026 skip("Expected failure means remaining tests for"
3027 . " this aren't relevant", 3);
3034 is(&Devel::PPPort::av_top_index([1,2,3]), 2);
3035 is(&Devel::PPPort::av_tindex([1,2,3,4]), 3);
3036 is(&Devel::PPPort::av_count([1,2,3,4]), 4);