1 ################################################################################
3 ## Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
4 ## Version 2.x, Copyright (C) 2001, Paul Marquess.
5 ## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
7 ## This program is free software; you can redistribute it and/or
8 ## modify it under the same terms as Perl itself.
10 ################################################################################
27 PERL_USE_GCC_BRACE_GROUPS
41 __UNDEFINED__ cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
42 __UNDEFINED__ OpHAS_SIBLING(o) (cBOOL((o)->op_sibling))
43 __UNDEFINED__ OpSIBLING(o) (0 + (o)->op_sibling)
44 __UNDEFINED__ OpMORESIB_set(o, sib) ((o)->op_sibling = (sib))
45 __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL)
46 __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib))
47 __UNDEFINED__ HEf_SVKEY -2
49 #if defined(DEBUGGING) && !defined(__COVERITY__)
50 __UNDEFINED__ __ASSERT_(statement) assert(statement),
52 __UNDEFINED__ __ASSERT_(statement)
55 __UNDEFINED__ __has_builtin(x) 0
57 #if __has_builtin(__builtin_unreachable)
58 # define D_PPP_HAS_BUILTIN_UNREACHABLE
59 #elif (defined(__GNUC__) && ( __GNUC__ > 4 \
60 || __GNUC__ == 4 && __GNUC_MINOR__ >= 5))
61 # define D_PPP_HAS_BUILTIN_UNREACHABLE
66 # define ASSUME(x) assert(x)
67 # elif defined(_MSC_VER)
68 # define ASSUME(x) __assume(x)
69 # elif defined(__ARMCC_VERSION)
70 # define ASSUME(x) __promise(x)
71 # elif defined(D_PPP_HAS_BUILTIN_UNREACHABLE)
72 # define ASSUME(x) ((x) ? (void) 0 : __builtin_unreachable())
74 # define ASSUME(x) assert(x)
79 # ifdef D_PPP_HAS_BUILTIN_UNREACHABLE
80 # define NOT_REACHED \
82 ASSUME(!"UNREACHABLE"); __builtin_unreachable(); \
84 # elif ! defined(__GNUC__) && (defined(__sun) || defined(__hpux))
87 # define NOT_REACHED ASSUME(!"UNREACHABLE")
94 # define WIDEST_UTYPE U64TYPE
96 # define WIDEST_UTYPE unsigned Quad_t
99 # define WIDEST_UTYPE U32
103 /* These could become provided if/when they become part of the public API */
104 __UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n) \
105 (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
106 __UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \
107 ( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \
108 : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \
109 : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l)))))
111 /* The '| 0' part ensures a compiler error if c is not integer (like e.g., a
113 #undef FITS_IN_8_BITS /* handy.h version uses a core-only constant */
114 __UNDEF_NOT_PROVIDED__ FITS_IN_8_BITS(c) ( (sizeof(c) == 1) \
115 || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF))
117 /* Create the macro for "is'macro'_utf8_safe(s, e)". For code points below
118 * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
119 * point. That is so that it can automatically get the bug fixes done in this
121 #define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro) \
124 : UTF8_IS_INVARIANT((s)[0]) \
125 ? is ## macro ## _L1((s)[0]) \
126 : (((e) - (s)) < UTF8SKIP(s)) \
128 : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \
129 /* The cast in the line below is only to silence warnings */ \
130 ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE( \
131 UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
132 & UTF_START_MASK(2), \
134 : is ## macro ## _utf8(s))
136 /* Create the macro for "is'macro'_LC_utf8_safe(s, e)". For code points below
137 * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
138 * point. That is so that it can automatically get the bug fixes done in this
140 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, macro) \
143 : UTF8_IS_INVARIANT((s)[0]) \
144 ? is ## macro ## _LC((s)[0]) \
145 : (((e) - (s)) < UTF8SKIP(s)) \
147 : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \
148 /* The cast in the line below is only to silence warnings */ \
149 ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \
150 UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
151 & UTF_START_MASK(2), \
153 : is ## macro ## _utf8(s))
155 /* A few of the early functions are broken. For these and the non-LC case,
156 * machine generated code is substituted. But that code doesn't work for
157 * locales. This is just like the above macro, but at the end, we call the
158 * macro we've generated for the above 255 case, which is correct since locale
159 * isn't involved. This will generate extra code to handle the 0-255 inputs,
160 * but hopefully it will be optimized out by the C compiler. But just in case
161 * it isn't, this macro is only used on the few versions that are broken */
163 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, macro) \
166 : UTF8_IS_INVARIANT((s)[0]) \
167 ? is ## macro ## _LC((s)[0]) \
168 : (((e) - (s)) < UTF8SKIP(s)) \
170 : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \
171 /* The cast in the line below is only to silence warnings */ \
172 ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE( \
173 UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
174 & UTF_START_MASK(2), \
176 : is ## macro ## _utf8_safe(s, e))
178 __UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL)
179 __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv))
181 #ifndef PERL_UNUSED_DECL
183 # if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
184 # define PERL_UNUSED_DECL
186 # define PERL_UNUSED_DECL __attribute__((unused))
189 # define PERL_UNUSED_DECL
193 #ifndef PERL_UNUSED_ARG
194 # if defined(lint) && defined(S_SPLINT_S) /* www.splint.org */
196 # define PERL_UNUSED_ARG(x) NOTE(ARGUNUSED(x))
198 # define PERL_UNUSED_ARG(x) ((void)x)
202 #ifndef PERL_UNUSED_VAR
203 # define PERL_UNUSED_VAR(x) ((void)x)
206 #ifndef PERL_UNUSED_CONTEXT
208 # define PERL_UNUSED_CONTEXT PERL_UNUSED_ARG(my_perl)
210 # define PERL_UNUSED_CONTEXT
214 #ifndef PERL_UNUSED_RESULT
215 # if defined(__GNUC__) && defined(HASATTRIBUTE_WARN_UNUSED_RESULT)
216 # define PERL_UNUSED_RESULT(v) STMT_START { __typeof__(v) z = (v); (void)sizeof(z); } STMT_END
218 # define PERL_UNUSED_RESULT(v) ((void)(v))
222 __UNDEFINED__ NOOP /*EMPTY*/(void)0
223 __UNDEFINED__ dNOOP extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL
226 # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
227 # define NVTYPE long double
229 # define NVTYPE double
235 # if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE)
237 # define INT2PTR(any,d) (any)(d)
239 # if PTRSIZE == LONGSIZE
240 # define PTRV unsigned long
242 # define PTRV unsigned
244 # define INT2PTR(any,d) (any)(PTRV)(d)
249 # if PTRSIZE == LONGSIZE
250 # define PTR2ul(p) (unsigned long)(p)
252 # define PTR2ul(p) INT2PTR(unsigned long,p)
256 __UNDEFINED__ PTR2nat(p) (PTRV)(p)
257 __UNDEFINED__ NUM2PTR(any,d) (any)PTR2nat(d)
258 __UNDEFINED__ PTR2IV(p) INT2PTR(IV,p)
259 __UNDEFINED__ PTR2UV(p) INT2PTR(UV,p)
260 __UNDEFINED__ PTR2NV(p) NUM2PTR(NV,p)
262 #undef START_EXTERN_C
266 # define START_EXTERN_C extern "C" {
267 # define END_EXTERN_C }
268 # define EXTERN_C extern "C"
270 # define START_EXTERN_C
271 # define END_EXTERN_C
272 # define EXTERN_C extern
275 #if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC)
276 # ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN
277 __UNDEF_NOT_PROVIDED__ PERL_GCC_BRACE_GROUPS_FORBIDDEN
281 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) && !defined(__cplusplus)
282 # ifndef PERL_USE_GCC_BRACE_GROUPS
283 # define PERL_USE_GCC_BRACE_GROUPS
289 #ifdef PERL_USE_GCC_BRACE_GROUPS
290 # define STMT_START (void)( /* gcc supports ``({ STATEMENTS; })'' */
293 # if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__)
294 # define STMT_START if (1)
295 # define STMT_END else (void)0
297 # define STMT_START do
298 # define STMT_END while (0)
302 __UNDEFINED__ boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no)
304 /* DEFSV appears first in 5.004_56 */
305 __UNDEFINED__ DEFSV GvSV(PL_defgv)
306 __UNDEFINED__ SAVE_DEFSV SAVESPTR(GvSV(PL_defgv))
307 __UNDEFINED__ DEFSV_set(sv) (DEFSV = (sv))
309 /* Older perls (<=5.003) lack AvFILLp */
310 __UNDEFINED__ AvFILLp AvFILL
312 __UNDEFINED__ av_tindex AvFILL
313 __UNDEFINED__ av_top_index AvFILL
314 __UNDEFINED__ av_count(av) (AvFILL(av)+1)
316 __UNDEFINED__ ERRSV get_sv("@",FALSE)
319 * This function's backport doesn't support the length parameter, but
320 * rather ignores it. Portability can only be ensured if the length
321 * parameter is used for speed reasons, but the length can always be
322 * correctly computed from the string argument.
325 __UNDEFINED__ gv_stashpvn(str,len,create) gv_stashpv(str,create)
328 __UNDEFINED__ get_cv perl_get_cv
329 __UNDEFINED__ get_sv perl_get_sv
330 __UNDEFINED__ get_av perl_get_av
331 __UNDEFINED__ get_hv perl_get_hv
334 __UNDEFINED__ dUNDERBAR dNOOP
335 __UNDEFINED__ UNDERBAR DEFSV
337 __UNDEFINED__ dAX I32 ax = MARK - PL_stack_base + 1
338 __UNDEFINED__ dITEMS I32 items = SP - MARK
340 __UNDEFINED__ dXSTARG SV * targ = sv_newmortal()
342 __UNDEFINED__ dAXMARK I32 ax = POPMARK; \
343 SV ** const mark = PL_stack_base + ax++
346 __UNDEFINED__ XSprePUSH (sp = PL_stack_base + ax - 1)
348 #if { VERSION < 5.005 }
350 # define XSRETURN(off) \
352 PL_stack_sp = PL_stack_base + ax + ((off) - 1); \
357 __UNDEFINED__ XSPROTO(name) void name(pTHX_ CV* cv)
358 __UNDEFINED__ SVfARG(p) ((void*)(p))
360 __UNDEFINED__ PERL_ABS(x) ((x) < 0 ? -(x) : (x))
362 __UNDEFINED__ dVAR dNOOP
364 __UNDEFINED__ SVf "_"
366 __UNDEFINED__ CPERLscope(x) x
368 __UNDEFINED__ PERL_HASH(hash,str,len) \
370 const char *s_PeRlHaSh = str; \
371 I32 i_PeRlHaSh = len; \
372 U32 hash_PeRlHaSh = 0; \
373 while (i_PeRlHaSh--) \
374 hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \
375 (hash) = hash_PeRlHaSh; \
378 #ifndef PERLIO_FUNCS_DECL
379 # ifdef PERLIO_FUNCS_CONST
380 # define PERLIO_FUNCS_DECL(funcs) const PerlIO_funcs funcs
381 # define PERLIO_FUNCS_CAST(funcs) (PerlIO_funcs*)(funcs)
383 # define PERLIO_FUNCS_DECL(funcs) PerlIO_funcs funcs
384 # define PERLIO_FUNCS_CAST(funcs) (funcs)
388 /* provide these typedefs for older perls */
389 #if { VERSION < 5.9.3 }
392 typedef OP* (CPERLscope(*Perl_ppaddr_t))(ARGSproto);
394 typedef OP* (CPERLscope(*Perl_ppaddr_t))(pTHX);
397 typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*);
401 /* On versions without NATIVE_TO_ASCII, only ASCII is supported */
402 #if defined(EBCDIC) && defined(NATIVE_TO_ASCI)
403 __UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c)
404 __UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c)
405 __UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c))
406 __UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c))
408 __UNDEFINED__ NATIVE_TO_LATIN1(c) (c)
409 __UNDEFINED__ LATIN1_TO_NATIVE(c) (c)
410 __UNDEFINED__ NATIVE_TO_UNI(c) (c)
411 __UNDEFINED__ UNI_TO_NATIVE(c) (c)
414 /* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE
415 EBCDIC is not supported on versions earlier than 5.7.1
418 /* The meaning of this changed; use the modern version */
423 /* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe
424 This is equivalent to the corresponding isSPACE-type macro. On perls
425 before 5.18, this matched a vertical tab and SPACE didn't. But the
426 ppport.h SPACE version does match VT in all perl releases. Since VT's are
427 extremely rarely found in real-life files, this difference effectively
430 /* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe
431 Until Perl 5.18, this did not match the vertical tab (VT). The ppport.h
432 version does match it in all perl releases. Since VT's are extremely rarely
433 found in real-life files, this difference effectively doesn't matter */
437 /* This is the first version where these macros are fully correct on EBCDIC
438 * platforms. Relying on the C library functions, as earlier releases did,
439 * causes problems with locales */
440 # if { VERSION < 5.22.0 }
450 # undef isALPHANUMERIC
451 # undef isALPHANUMERIC_A
452 # undef isALPHANUMERIC_L1
494 # undef isWORDCHAR_L1
500 __UNDEFINED__ isASCII(c) (isCNTRL(c) || isPRINT(c))
502 /* The below is accurate for all EBCDIC code pages supported by
503 * all the versions of Perl overridden by this */
504 __UNDEFINED__ isCNTRL(c) ( (c) == '\0' || (c) == '\a' || (c) == '\b' \
505 || (c) == '\f' || (c) == '\n' || (c) == '\r' \
506 || (c) == '\t' || (c) == '\v' \
507 || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \
508 || (c) == 7 /* U+7F DEL */ \
509 || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \
511 || (c) == 0x18 /* U+18 CAN */ \
512 || (c) == 0x19 /* U+19 EOM */ \
513 || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \
514 || (c) == 0x26 /* U+17 ETB */ \
515 || (c) == 0x27 /* U+1B ESC */ \
516 || (c) == 0x2D /* U+05 ENQ */ \
517 || (c) == 0x2E /* U+06 ACK */ \
518 || (c) == 0x32 /* U+16 SYN */ \
519 || (c) == 0x37 /* U+04 EOT */ \
520 || (c) == 0x3C /* U+14 DC4 */ \
521 || (c) == 0x3D /* U+15 NAK */ \
522 || (c) == 0x3F /* U+1A SUB */ \
525 #if '^' == 106 /* EBCDIC POSIX-BC */
526 # define D_PPP_OUTLIER_CONTROL 0x5F
527 #else /* EBCDIC 1047 037 */
528 # define D_PPP_OUTLIER_CONTROL 0xFF
531 /* The controls are everything below blank, plus one outlier */
532 __UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' ' \
533 || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL)
534 /* The ordering of the tests in this and isUPPER are to exclude most characters
536 __UNDEFINED__ isLOWER(c) ( (c) >= 'a' && (c) <= 'z' \
538 || ((c) >= 'j' && (c) <= 'r') \
540 __UNDEFINED__ isUPPER(c) ( (c) >= 'A' && (c) <= 'Z' \
542 || ((c) >= 'J' && (c) <= 'R') \
545 #else /* Above is EBCDIC; below is ASCII */
547 # if { VERSION < 5.4.0 }
548 /* The implementation of these in older perl versions can give wrong results if
549 * the C program locale is set to other than the C locale */
564 # if { VERSION == 5.7.0 } /* this perl made space GRAPH */
568 # if { VERSION < 5.8.0 } /* earlier perls omitted DEL */
572 # if { VERSION < 5.10.0 }
573 /* earlier perls included all of the isSPACE() characters, which is wrong. The
574 * version provided by Devel::PPPort always overrides an existing buggy
580 # if { VERSION < 5.14.0 }
581 /* earlier perls always returned true if the parameter was a signed char */
586 # if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */
590 # if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */
594 # if { VERSION < 5.20.0 } /* earlier perls didn't include \v */
601 __UNDEFINED__ isASCII(c) ((WIDEST_UTYPE) (c) <= 127)
602 __UNDEFINED__ isCNTRL(c) ((WIDEST_UTYPE) (c) < ' ' || (c) == 127)
603 __UNDEFINED__ isCNTRL_L1(c) ( (WIDEST_UTYPE) (c) < ' ' \
604 || inRANGE((c), 0x7F, 0x9F))
605 __UNDEFINED__ isLOWER(c) inRANGE((c), 'a', 'z')
606 __UNDEFINED__ isUPPER(c) inRANGE((c), 'A', 'Z')
608 #endif /* Below are definitions common to EBCDIC and ASCII */
610 __UNDEFINED__ isASCII_L1(c) isASCII(c)
611 __UNDEFINED__ isASCII_LC(c) isASCII(c)
612 __UNDEFINED__ isALNUM(c) isWORDCHAR(c)
613 __UNDEFINED__ isALNUMC(c) isALPHANUMERIC(c)
614 __UNDEFINED__ isALNUMC_L1(c) isALPHANUMERIC_L1(c)
615 __UNDEFINED__ isALPHA(c) (isUPPER(c) || isLOWER(c))
616 __UNDEFINED__ isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c))
617 __UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c))
618 __UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c))
619 __UNDEFINED__ isALPHANUMERIC_LC(c) (isALPHA_LC(c) || isDIGIT_LC(c))
620 __UNDEFINED__ isBLANK(c) ((c) == ' ' || (c) == '\t')
621 __UNDEFINED__ isBLANK_L1(c) ( isBLANK(c) \
622 || ( FITS_IN_8_BITS(c) \
623 && NATIVE_TO_LATIN1((U8) c) == 0xA0))
624 __UNDEFINED__ isBLANK_LC(c) isBLANK(c)
625 __UNDEFINED__ isDIGIT(c) inRANGE(c, '0', '9')
626 __UNDEFINED__ isDIGIT_L1(c) isDIGIT(c)
627 __UNDEFINED__ isGRAPH(c) (isWORDCHAR(c) || isPUNCT(c))
628 __UNDEFINED__ isGRAPH_L1(c) ( isPRINT_L1(c) \
630 && NATIVE_TO_LATIN1((U8) c) != 0xA0)
631 __UNDEFINED__ isIDCONT(c) isWORDCHAR(c)
632 __UNDEFINED__ isIDCONT_L1(c) isWORDCHAR_L1(c)
633 __UNDEFINED__ isIDCONT_LC(c) isWORDCHAR_LC(c)
634 __UNDEFINED__ isIDFIRST(c) (isALPHA(c) || (c) == '_')
635 __UNDEFINED__ isIDFIRST_L1(c) (isALPHA_L1(c) || (U8) (c) == '_')
636 __UNDEFINED__ isIDFIRST_LC(c) (isALPHA_LC(c) || (U8) (c) == '_')
637 __UNDEFINED__ isLOWER_L1(c) ( isLOWER(c) \
638 || ( FITS_IN_8_BITS(c) \
639 && ( ( NATIVE_TO_LATIN1((U8) c) >= 0xDF \
640 && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
641 || NATIVE_TO_LATIN1((U8) c) == 0xAA \
642 || NATIVE_TO_LATIN1((U8) c) == 0xBA \
643 || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
644 __UNDEFINED__ isOCTAL(c) (((WIDEST_UTYPE)((c)) & ~7) == '0')
645 __UNDEFINED__ isOCTAL_L1(c) isOCTAL(c)
646 __UNDEFINED__ isPRINT(c) (isGRAPH(c) || (c) == ' ')
647 __UNDEFINED__ isPRINT_L1(c) (FITS_IN_8_BITS(c) && ! isCNTRL_L1(c))
648 __UNDEFINED__ isPSXSPC(c) isSPACE(c)
649 __UNDEFINED__ isPSXSPC_L1(c) isSPACE_L1(c)
650 __UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \
651 || (c) == '#' || (c) == '$' || (c) == '%' \
652 || (c) == '&' || (c) == '\'' || (c) == '(' \
653 || (c) == ')' || (c) == '*' || (c) == '+' \
654 || (c) == ',' || (c) == '.' || (c) == '/' \
655 || (c) == ':' || (c) == ';' || (c) == '<' \
656 || (c) == '=' || (c) == '>' || (c) == '?' \
657 || (c) == '@' || (c) == '[' || (c) == '\\' \
658 || (c) == ']' || (c) == '^' || (c) == '_' \
659 || (c) == '`' || (c) == '{' || (c) == '|' \
660 || (c) == '}' || (c) == '~')
661 __UNDEFINED__ isPUNCT_L1(c) ( isPUNCT(c) \
662 || ( FITS_IN_8_BITS(c) \
663 && ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \
664 || NATIVE_TO_LATIN1((U8) c) == 0xA7 \
665 || NATIVE_TO_LATIN1((U8) c) == 0xAB \
666 || NATIVE_TO_LATIN1((U8) c) == 0xB6 \
667 || NATIVE_TO_LATIN1((U8) c) == 0xB7 \
668 || NATIVE_TO_LATIN1((U8) c) == 0xBB \
669 || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
670 __UNDEFINED__ isSPACE(c) ( isBLANK(c) || (c) == '\n' || (c) == '\r' \
671 || (c) == '\v' || (c) == '\f')
672 __UNDEFINED__ isSPACE_L1(c) ( isSPACE(c) \
673 || (FITS_IN_8_BITS(c) \
674 && ( NATIVE_TO_LATIN1((U8) c) == 0x85 \
675 || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
676 __UNDEFINED__ isUPPER_L1(c) ( isUPPER(c) \
677 || (FITS_IN_8_BITS(c) \
678 && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
679 && NATIVE_TO_LATIN1((U8) c) <= 0xDE \
680 && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
681 __UNDEFINED__ isWORDCHAR(c) (isALPHANUMERIC(c) || (c) == '_')
682 __UNDEFINED__ isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT(c))
683 __UNDEFINED__ isWORDCHAR_LC(c) (isIDFIRST_LC(c) || isDIGIT_LC(c))
684 __UNDEFINED__ isXDIGIT(c) ( isDIGIT(c) \
685 || inRANGE((c), 'a', 'f') \
686 || inRANGE((c), 'A', 'F'))
687 __UNDEFINED__ isXDIGIT_L1(c) isXDIGIT(c)
688 __UNDEFINED__ isXDIGIT_LC(c) isxdigit(c)
690 __UNDEFINED__ isALNUM_A(c) isALNUM(c)
691 __UNDEFINED__ isALNUMC_A(c) isALNUMC(c)
692 __UNDEFINED__ isALPHA_A(c) isALPHA(c)
693 __UNDEFINED__ isALPHANUMERIC_A(c) isALPHANUMERIC(c)
694 __UNDEFINED__ isASCII_A(c) isASCII(c)
695 __UNDEFINED__ isBLANK_A(c) isBLANK(c)
696 __UNDEFINED__ isCNTRL_A(c) isCNTRL(c)
697 __UNDEFINED__ isDIGIT_A(c) isDIGIT(c)
698 __UNDEFINED__ isGRAPH_A(c) isGRAPH(c)
699 __UNDEFINED__ isIDCONT_A(c) isIDCONT(c)
700 __UNDEFINED__ isIDFIRST_A(c) isIDFIRST(c)
701 __UNDEFINED__ isLOWER_A(c) isLOWER(c)
702 __UNDEFINED__ isOCTAL_A(c) isOCTAL(c)
703 __UNDEFINED__ isPRINT_A(c) isPRINT(c)
704 __UNDEFINED__ isPSXSPC_A(c) isPSXSPC(c)
705 __UNDEFINED__ isPUNCT_A(c) isPUNCT(c)
706 __UNDEFINED__ isSPACE_A(c) isSPACE(c)
707 __UNDEFINED__ isUPPER_A(c) isUPPER(c)
708 __UNDEFINED__ isWORDCHAR_A(c) isWORDCHAR(c)
709 __UNDEFINED__ isXDIGIT_A(c) isXDIGIT(c)
711 __UNDEFINED__ isASCII_utf8_safe(s,e) (((e) - (s)) <= 0 ? 0 : isASCII(*(s)))
712 __UNDEFINED__ isASCII_uvchr(c) (FITS_IN_8_BITS(c) ? isASCII_L1(c) : 0)
714 #if { VERSION >= 5.006 }
715 # ifdef isALPHA_uni /* If one defined, all are; this is just an exemplar */
716 # define D_PPP_is_ctype(upper, lower, c) \
718 ? is ## upper ## _L1(c) \
719 : is ## upper ## _uni((UV) (c))) /* _uni is old synonym */
721 # define D_PPP_is_ctype(upper, lower, c) \
723 ? is ## upper ## _L1(c) \
724 : is_uni_ ## lower((UV) (c))) /* is_uni_ is even older */
727 __UNDEFINED__ isALPHA_uvchr(c) D_PPP_is_ctype(ALPHA, alpha, c)
728 __UNDEFINED__ isALPHANUMERIC_uvchr(c) (isALPHA_uvchr(c) || isDIGIT_uvchr(c))
730 __UNDEFINED__ isBLANK_uvchr(c) D_PPP_is_ctype(BLANK, blank, c)
732 __UNDEFINED__ isBLANK_uvchr(c) (FITS_IN_8_BITS(c) \
734 : ( (UV) (c) == 0x1680 /* Unicode 3.0 */ \
735 || inRANGE((UV) (c), 0x2000, 0x200A) \
736 || (UV) (c) == 0x202F /* Unicode 3.0 */\
737 || (UV) (c) == 0x205F /* Unicode 3.2 */\
738 || (UV) (c) == 0x3000))
740 __UNDEFINED__ isCNTRL_uvchr(c) D_PPP_is_ctype(CNTRL, cntrl, c)
741 __UNDEFINED__ isDIGIT_uvchr(c) D_PPP_is_ctype(DIGIT, digit, c)
742 __UNDEFINED__ isGRAPH_uvchr(c) D_PPP_is_ctype(GRAPH, graph, c)
743 __UNDEFINED__ isIDCONT_uvchr(c) isWORDCHAR_uvchr(c)
744 __UNDEFINED__ isIDFIRST_uvchr(c) D_PPP_is_ctype(IDFIRST, idfirst, c)
745 __UNDEFINED__ isLOWER_uvchr(c) D_PPP_is_ctype(LOWER, lower, c)
746 __UNDEFINED__ isPRINT_uvchr(c) D_PPP_is_ctype(PRINT, print, c)
747 __UNDEFINED__ isPSXSPC_uvchr(c) isSPACE_uvchr(c)
748 __UNDEFINED__ isPUNCT_uvchr(c) D_PPP_is_ctype(PUNCT, punct, c)
749 __UNDEFINED__ isSPACE_uvchr(c) D_PPP_is_ctype(SPACE, space, c)
750 __UNDEFINED__ isUPPER_uvchr(c) D_PPP_is_ctype(UPPER, upper, c)
751 __UNDEFINED__ isXDIGIT_uvchr(c) D_PPP_is_ctype(XDIGIT, xdigit, c)
752 __UNDEFINED__ isWORDCHAR_uvchr(c) (FITS_IN_8_BITS(c) \
753 ? isWORDCHAR_L1(c) : isALPHANUMERIC_uvchr(c))
755 __UNDEFINED__ isALPHA_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA)
756 # ifdef isALPHANUMERIC_utf8
757 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \
758 D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC)
760 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \
761 (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e))
764 /* This was broken before 5.18, and just use this instead of worrying about
765 * which releases the official works on */
767 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
768 ( ( LIKELY((e) > (s)) ) ? /* Machine generated */ \
769 ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1 \
770 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
771 ( ( 0xC2 == ((const U8*)s)[0] ) ? \
772 ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 ) \
773 : ( 0xE1 == ((const U8*)s)[0] ) ? \
774 ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
775 : ( 0xE2 == ((const U8*)s)[0] ) ? \
776 ( ( 0x80 == ((const U8*)s)[1] ) ? \
777 ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\
778 : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\
779 : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
783 # elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */
785 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
786 ( ( LIKELY((e) > (s)) ) ? \
787 ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \
788 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
789 ( ( 0x80 == ((const U8*)s)[0] ) ? \
790 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \
791 : ( 0xBC == ((const U8*)s)[0] ) ? \
792 ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
793 : ( 0xCA == ((const U8*)s)[0] ) ? \
794 ( ( 0x41 == ((const U8*)s)[1] ) ? \
795 ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
796 : ( 0x42 == ((const U8*)s)[1] ) ? \
797 ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
798 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
799 : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
803 # elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */
805 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
806 ( ( LIKELY((e) > (s)) ) ? \
807 ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \
808 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
809 ( ( 0x78 == ((const U8*)s)[0] ) ? \
810 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \
811 : ( 0xBD == ((const U8*)s)[0] ) ? \
812 ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
813 : ( 0xCA == ((const U8*)s)[0] ) ? \
814 ( ( 0x41 == ((const U8*)s)[1] ) ? \
815 ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
816 : ( 0x42 == ((const U8*)s)[1] ) ? \
817 ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
818 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
819 : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
824 # error Unknown character set
827 __UNDEFINED__ isCNTRL_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL)
828 __UNDEFINED__ isDIGIT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT)
829 __UNDEFINED__ isGRAPH_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH)
830 # ifdef isIDCONT_utf8
831 __UNDEFINED__ isIDCONT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT)
833 __UNDEFINED__ isIDCONT_utf8_safe(s,e) isWORDCHAR_utf8_safe(s,e)
836 __UNDEFINED__ isIDFIRST_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST)
837 __UNDEFINED__ isLOWER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER)
838 __UNDEFINED__ isPRINT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT)
840 # undef isPSXSPC_utf8_safe /* Use the modern definition */
841 __UNDEFINED__ isPSXSPC_utf8_safe(s,e) isSPACE_utf8_safe(s,e)
843 __UNDEFINED__ isPUNCT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT)
844 __UNDEFINED__ isSPACE_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE)
845 __UNDEFINED__ isUPPER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER)
847 # ifdef isWORDCHAR_utf8
848 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR)
850 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) \
851 (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_')
854 /* This was broken before 5.12, and just use this instead of worrying about
855 * which releases the official works on */
857 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
858 ( ( LIKELY((e) > (s)) ) ? \
859 ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\
860 : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\
861 ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\
862 : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\
865 # elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */
867 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
868 ( ( LIKELY((e) > (s)) ) ? \
869 ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
870 : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\
871 ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\
872 : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
875 # elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */
877 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
878 ( ( LIKELY((e) > (s)) ) ? \
879 ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
880 : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\
881 ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\
882 : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
886 # error Unknown character set
889 __UNDEFINED__ isALPHA_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHA)
890 # ifdef isALPHANUMERIC_utf8
891 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \
892 D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHANUMERIC)
894 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e) \
895 (isALPHA_LC_utf8_safe(s,e) || isDIGIT_LC_utf8_safe(s,e))
898 __UNDEFINED__ isBLANK_LC_utf8_safe(s,e) \
899 D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, BLANK)
900 __UNDEFINED__ isCNTRL_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, CNTRL)
901 __UNDEFINED__ isDIGIT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, DIGIT)
902 __UNDEFINED__ isGRAPH_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, GRAPH)
903 # ifdef isIDCONT_utf8
904 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDCONT)
906 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e) isWORDCHAR_LC_utf8_safe(s,e)
909 __UNDEFINED__ isIDFIRST_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDFIRST)
910 __UNDEFINED__ isLOWER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, LOWER)
911 __UNDEFINED__ isPRINT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PRINT)
913 # undef isPSXSPC_LC_utf8_safe /* Use the modern definition */
914 __UNDEFINED__ isPSXSPC_LC_utf8_safe(s,e) isSPACE_LC_utf8_safe(s,e)
916 __UNDEFINED__ isPUNCT_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PUNCT)
917 __UNDEFINED__ isSPACE_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, SPACE)
918 __UNDEFINED__ isUPPER_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, UPPER)
920 # ifdef isWORDCHAR_utf8
921 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, WORDCHAR)
923 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) \
924 (isALPHANUMERIC_LC_utf8_safe(s,e) || (*(s)) == '_')
927 __UNDEFINED__ isXDIGIT_LC_utf8_safe(s,e) \
928 D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, XDIGIT)
930 /* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe,
931 * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe,
932 * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe,
933 * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe,
934 * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe,
935 * isXDIGIT_utf8_safe,
936 * isALPHANUMERIC_LC_utf8_safe, isALPHA_LC_utf8_safe, isASCII_LC_utf8_safe,
937 * isBLANK_LC_utf8_safe, isCNTRL_LC_utf8_safe, isDIGIT_LC_utf8_safe,
938 * isGRAPH_LC_utf8_safe, isIDCONT_LC_utf8_safe, isIDFIRST_LC_utf8_safe,
939 * isLOWER_LC_utf8_safe, isPRINT_LC_utf8_safe, isPSXSPC_LC_utf8_safe,
940 * isPUNCT_LC_utf8_safe, isSPACE_LC_utf8_safe, isUPPER_LC_utf8_safe,
941 * isWORDCHAR_LC_utf8_safe, isWORDCHAR_LC_utf8_safe, isXDIGIT_LC_utf8_safe,
942 * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr,
943 * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr,
944 * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr,
945 * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr,
946 * isWORDCHAR_uvchr, isXDIGIT_uvchr
948 * The UTF-8 handling is buggy in early Perls, and this can give inaccurate
949 * results for code points above 0xFF, until the implementation started
950 * settling down in 5.12 and 5.14 */
954 #define D_PPP_TOO_SHORT_MSG "Malformed UTF-8 character starting with:" \
955 " \\x%02x (too short; %d bytes available, need" \
957 /* Perls starting here had a new API which handled multi-character results */
958 #if { VERSION >= 5.7.3 }
960 __UNDEFINED__ toLOWER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l))
961 __UNDEFINED__ toUPPER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l))
962 __UNDEFINED__ toTITLE_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l))
963 __UNDEFINED__ toFOLD_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l))
965 # if { VERSION != 5.15.6 } /* Just this version is broken */
967 /* Prefer the macro to the function */
968 # if defined toLOWER_utf8
969 # define D_PPP_TO_LOWER_CALLEE(s,r,l) toLOWER_utf8(s,r,l)
971 # define D_PPP_TO_LOWER_CALLEE(s,r,l) to_utf8_lower(s,r,l)
973 # if defined toTITLE_utf8
974 # define D_PPP_TO_TITLE_CALLEE(s,r,l) toTITLE_utf8(s,r,l)
976 # define D_PPP_TO_TITLE_CALLEE(s,r,l) to_utf8_title(s,r,l)
978 # if defined toUPPER_utf8
979 # define D_PPP_TO_UPPER_CALLEE(s,r,l) toUPPER_utf8(s,r,l)
981 # define D_PPP_TO_UPPER_CALLEE(s,r,l) to_utf8_upper(s,r,l)
983 # if defined toFOLD_utf8
984 # define D_PPP_TO_FOLD_CALLEE(s,r,l) toFOLD_utf8(s,r,l)
986 # define D_PPP_TO_FOLD_CALLEE(s,r,l) to_utf8_fold(s,r,l)
988 # else /* Below is 5.15.6, which failed to make the macros available
989 # outside of core, so we have to use the 'Perl_' form. khw
990 # decided it was easier to just handle this case than have to
991 # document the exception, and make an exception in the tests below
993 # define D_PPP_TO_LOWER_CALLEE(s,r,l) \
994 Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL)
995 # define D_PPP_TO_TITLE_CALLEE(s,r,l) \
996 Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL)
997 # define D_PPP_TO_UPPER_CALLEE(s,r,l) \
998 Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL)
999 # define D_PPP_TO_FOLD_CALLEE(s,r,l) \
1000 Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL)
1003 /* The actual implementation of the backported macros. If too short, croak,
1004 * otherwise call the original that doesn't have an upper limit parameter */
1005 # define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l) \
1006 (((((e) - (s)) <= 0) \
1007 /* We could just do nothing, but modern perls croak */ \
1008 ? (croak("Attempting case change on zero length string"), \
1009 0) /* So looks like it returns something, and will compile */ \
1010 : ((e) - (s)) < UTF8SKIP(s)) \
1011 ? (croak(D_PPP_TOO_SHORT_MSG, \
1012 s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1014 : D_PPP_TO_ ## name ## _CALLEE(s,r,l))
1016 __UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \
1017 D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l)
1018 __UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \
1019 D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l)
1020 __UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \
1021 D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l)
1022 __UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) \
1023 D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l)
1025 #elif { VERSION >= 5.006 }
1027 /* Here we have UTF-8 support, but using the original API where the case
1028 * changing functions merely returned the changed code point; hence they
1029 * couldn't handle multi-character results. */
1031 # ifdef uvchr_to_utf8
1032 # define D_PPP_UV_TO_UTF8 uvchr_to_utf8
1034 # define D_PPP_UV_TO_UTF8 uv_to_utf8
1037 /* Get the utf8 of the case changed value, and store its length; then have
1038 * to re-calculate the changed case value in order to return it */
1039 # define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l) \
1040 (*(l) = (D_PPP_UV_TO_UTF8(s, \
1041 UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)), \
1042 UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c))))
1044 __UNDEFINED__ toLOWER_uvchr(c, s, l) \
1045 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l)
1046 __UNDEFINED__ toUPPER_uvchr(c, s, l) \
1047 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l)
1048 __UNDEFINED__ toTITLE_uvchr(c, s, l) \
1049 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l)
1050 __UNDEFINED__ toFOLD_uvchr(c, s, l) toLOWER_uvchr(c, s, l)
1052 # define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l) \
1053 (((((e) - (s)) <= 0) \
1054 ? (croak("Attempting case change on zero length string"), \
1055 0) /* So looks like it returns something, and will compile */ \
1056 : ((e) - (s)) < UTF8SKIP(s)) \
1057 ? (croak(D_PPP_TOO_SHORT_MSG, \
1058 s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1060 /* Get the changed code point and store its UTF-8 */ \
1061 : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)), \
1062 /* Then store its length, and re-get code point for return */ \
1063 *(l) = UTF8SKIP(r), to_utf8_ ## name(r))
1065 /* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe,
1066 * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr
1067 The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1068 this backport does not correct them.
1070 In perls before 7.3, multi-character case changing is not implemented; this
1071 backport uses the simple case changes available in those perls. */
1073 __UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \
1074 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l)
1075 __UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \
1076 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l)
1077 __UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \
1078 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l)
1080 /* Warning: toFOLD_utf8_safe, toFOLD_uvchr
1081 The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1082 this backport does not correct them.
1084 In perls before 7.3, case folding is not implemented; instead, this
1085 backport substitutes simple (not multi-character, which isn't available)
1086 lowercasing. This gives the correct result in most, but not all, instances
1089 __UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) toLOWER_utf8_safe(s,e,r,l)
1093 /* Until we figure out how to support this in older perls... */
1094 #if { VERSION >= 5.8.0 }
1096 __UNDEFINED__ HeUTF8(he) ((HeKLEN(he) == HEf_SVKEY) ? \
1097 SvUTF8(HeKEY_sv(he)) : \
1102 __UNDEFINED__ C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
1103 __UNDEFINED__ C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a))
1105 __UNDEFINED__ LIKELY(x) (x)
1106 __UNDEFINED__ UNLIKELY(x) (x)
1109 #if defined(PERL_USE_GCC_BRACE_GROUPS)
1110 # define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
1112 # define MUTABLE_PTR(p) ((void *) (p))
1116 __UNDEFINED__ MUTABLE_AV(p) ((AV *)MUTABLE_PTR(p))
1117 __UNDEFINED__ MUTABLE_CV(p) ((CV *)MUTABLE_PTR(p))
1118 __UNDEFINED__ MUTABLE_GV(p) ((GV *)MUTABLE_PTR(p))
1119 __UNDEFINED__ MUTABLE_HV(p) ((HV *)MUTABLE_PTR(p))
1120 __UNDEFINED__ MUTABLE_IO(p) ((IO *)MUTABLE_PTR(p))
1121 __UNDEFINED__ MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))
1125 typedef XSPROTO(XSPROTO_test_t);
1126 typedef XSPROTO_test_t *XSPROTO_test_t_ptr;
1128 XS(XS_Devel__PPPort_dXSTARG); /* prototype */
1129 XS(XS_Devel__PPPort_dXSTARG)
1135 PERL_UNUSED_VAR(cv);
1137 iv = SvIV(ST(0)) + 1;
1142 XS(XS_Devel__PPPort_dAXMARK); /* prototype */
1143 XS(XS_Devel__PPPort_dAXMARK)
1150 PERL_UNUSED_VAR(cv);
1152 iv = SvIV(ST(0)) - 1;
1160 XSPROTO_test_t_ptr p = &XS_Devel__PPPort_dXSTARG;
1161 newXS("Devel::PPPort::dXSTARG", *p, file);
1163 newXS("Devel::PPPort::dAXMARK", XS_Devel__PPPort_dAXMARK, file);
1178 x = newOP(OP_PUSHMARK, 0);
1180 /* No siblings yet! */
1181 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1182 failures++; warn("Op should not have had a sib");
1186 /* Add 2 siblings */
1189 for (i = 0; i < 2; i++) {
1190 OP *newsib = newOP(OP_PUSHMARK, 0);
1191 OpMORESIB_set(kid, newsib);
1193 kid = OpSIBLING(kid);
1196 middlekid = OpSIBLING(x);
1198 /* Should now have a sibling */
1199 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1200 failures++; warn("Op should have had a sib after moresib_set");
1203 /* Count the siblings */
1204 for (kid = OpSIBLING(x); kid; kid = OpSIBLING(kid)) {
1209 failures++; warn("Kid had %d sibs, expected 2", count);
1212 if (OpHAS_SIBLING(lastkid) || OpSIBLING(lastkid)) {
1213 failures++; warn("Last kid should not have a sib");
1216 /* Really sets the parent, and says 'no more siblings' */
1217 OpLASTSIB_set(x, lastkid);
1219 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1220 failures++; warn("OpLASTSIB_set failed?");
1223 /* Restore the kid */
1224 OpMORESIB_set(x, lastkid);
1226 /* Try to remove it again */
1227 OpLASTSIB_set(x, NULL);
1229 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1230 failures++; warn("OpLASTSIB_set with NULL failed?");
1233 /* Try to restore with maybesib_set */
1234 OpMAYBESIB_set(x, lastkid, NULL);
1236 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1237 failures++; warn("Op should have had a sib after maybesibset");
1251 RETVAL = SvRXOK(sv);
1262 RETVAL += PTR2nat(p) != 0 ? 1 : 0;
1263 RETVAL += PTR2ul(p) != 0UL ? 2 : 0;
1264 RETVAL += PTR2UV(p) != (UV) 0 ? 4 : 0;
1265 RETVAL += PTR2IV(p) != (IV) 0 ? 8 : 0;
1266 RETVAL += PTR2NV(p) != (NV) 0 ? 16 : 0;
1267 RETVAL += p > NUM2PTR(int *, 0) ? 32 : 0;
1273 gv_stashpvn(name, create)
1277 RETVAL = gv_stashpvn(name, strlen(name), create) != NULL;
1282 get_sv(name, create)
1286 RETVAL = get_sv(name, create) != NULL;
1291 get_av(name, create)
1295 RETVAL = get_av(name, create) != NULL;
1300 get_hv(name, create)
1304 RETVAL = get_hv(name, create) != NULL;
1309 get_cv(name, create)
1313 RETVAL = get_cv(name, create) != NULL;
1321 mXPUSHp("test1", 5);
1323 mXPUSHp("test2", 5);
1333 RETVAL = newSVsv(boolSV(value));
1340 RETVAL = newSVsv(DEFSV);
1347 XPUSHs(sv_mortalcopy(DEFSV));
1350 DEFSV_set(newSVpvs("DEFSV"));
1351 XPUSHs(sv_mortalcopy(DEFSV));
1352 /* Yes, this leaks the above scalar; 5.005 with threads for some reason */
1353 /* frees it upon LEAVE, thus mortalizing it causes "attempt to free..." */
1354 /* sv_2mortal(DEFSV); */
1356 XPUSHs(sv_mortalcopy(DEFSV));
1362 RETVAL = SvTRUEx(ERRSV);
1371 RETVAL = newSVsv(UNDERBAR);
1394 #if { VERSION >= 5.004 }
1395 x = sv_2mortal(newSVpvf("[%" SVf "]", SVfARG(x)));
1401 Perl_ppaddr_t(string)
1404 Perl_ppaddr_t lower;
1406 lower = PL_ppaddr[OP_LC];
1407 mXPUSHs(newSVpv(string, 0));
1410 (void)*(lower)(aTHXR);
1415 #if { VERSION >= 5.8.0 }
1418 check_HeUTF8(utf8_key)
1428 key = SvPV(utf8_key, klen);
1429 if (SvUTF8(utf8_key)) klen *= -1;
1430 hv_store(hash, key, klen, newSVpvs("string"), 0);
1432 ent = hv_iternext(hash);
1434 mXPUSHp((HeUTF8(ent) == 0 ? "norm" : "utf8"), 4);
1443 int x[] = { 10, 11, 12, 13 };
1445 mXPUSHi(C_ARRAY_LENGTH(x)); /* 4 */
1446 mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */
1452 RETVAL = isBLANK(ord);
1460 RETVAL = isBLANK_A(ord);
1468 RETVAL = isBLANK_L1(ord);
1476 RETVAL = isUPPER(ord);
1484 RETVAL = isUPPER_A(ord);
1492 RETVAL = isUPPER_L1(ord);
1500 RETVAL = isLOWER(ord);
1508 RETVAL = isLOWER_A(ord);
1516 RETVAL = isLOWER_L1(ord);
1524 RETVAL = isALPHA(ord);
1532 RETVAL = isALPHA_A(ord);
1540 RETVAL = isALPHA_L1(ord);
1548 RETVAL = isWORDCHAR(ord);
1556 RETVAL = isWORDCHAR_A(ord);
1564 RETVAL = isWORDCHAR_L1(ord);
1572 RETVAL = isALPHANUMERIC(ord);
1577 isALPHANUMERIC_A(ord)
1580 RETVAL = isALPHANUMERIC_A(ord);
1588 RETVAL = isALNUM(ord);
1596 RETVAL = isALNUM_A(ord);
1604 RETVAL = isDIGIT(ord);
1612 RETVAL = isDIGIT_A(ord);
1620 RETVAL = isOCTAL(ord);
1628 RETVAL = isOCTAL_A(ord);
1636 RETVAL = isIDFIRST(ord);
1644 RETVAL = isIDFIRST_A(ord);
1652 RETVAL = isIDCONT(ord);
1660 RETVAL = isIDCONT_A(ord);
1668 RETVAL = isSPACE(ord);
1676 RETVAL = isSPACE_A(ord);
1684 RETVAL = isASCII(ord);
1692 RETVAL = isASCII_A(ord);
1700 RETVAL = isCNTRL(ord);
1708 RETVAL = isCNTRL_A(ord);
1716 RETVAL = isPRINT(ord);
1724 RETVAL = isPRINT_A(ord);
1732 RETVAL = isGRAPH(ord);
1740 RETVAL = isGRAPH_A(ord);
1748 RETVAL = isPUNCT(ord);
1756 RETVAL = isPUNCT_A(ord);
1764 RETVAL = isXDIGIT(ord);
1772 RETVAL = isXDIGIT_A(ord);
1780 RETVAL = isPSXSPC(ord);
1788 RETVAL = isPSXSPC_A(ord);
1793 isALPHANUMERIC_L1(ord)
1796 RETVAL = isALPHANUMERIC_L1(ord);
1804 RETVAL = isALNUMC_L1(ord);
1812 RETVAL = isDIGIT_L1(ord);
1820 RETVAL = isOCTAL_L1(ord);
1828 RETVAL = isIDFIRST_L1(ord);
1836 RETVAL = isIDCONT_L1(ord);
1844 RETVAL = isSPACE_L1(ord);
1852 RETVAL = isASCII_L1(ord);
1860 RETVAL = isCNTRL_L1(ord);
1868 RETVAL = isPRINT_L1(ord);
1876 RETVAL = isGRAPH_L1(ord);
1884 RETVAL = isPUNCT_L1(ord);
1892 RETVAL = isXDIGIT_L1(ord);
1900 RETVAL = isPSXSPC_L1(ord);
1908 RETVAL = isASCII_uvchr(ord);
1913 isASCII_utf8_safe(s, offset)
1917 PERL_UNUSED_ARG(offset);
1918 RETVAL = isASCII_utf8_safe(s, s + 1 + offset);
1922 #if { VERSION >= 5.006 }
1928 RETVAL = isBLANK_uvchr(ord);
1936 RETVAL = isALPHA_uvchr(ord);
1941 isALPHANUMERIC_uvchr(ord)
1944 RETVAL = isALPHANUMERIC_uvchr(ord);
1952 RETVAL = isCNTRL_uvchr(ord);
1960 RETVAL = isDIGIT_uvchr(ord);
1965 isIDFIRST_uvchr(ord)
1968 RETVAL = isIDFIRST_uvchr(ord);
1976 RETVAL = isIDCONT_uvchr(ord);
1984 RETVAL = isGRAPH_uvchr(ord);
1992 RETVAL = isLOWER_uvchr(ord);
2000 RETVAL = isPRINT_uvchr(ord);
2008 RETVAL = isPSXSPC_uvchr(ord);
2016 RETVAL = isPUNCT_uvchr(ord);
2024 RETVAL = isSPACE_uvchr(ord);
2032 RETVAL = isUPPER_uvchr(ord);
2037 isWORDCHAR_uvchr(ord)
2040 RETVAL = isWORDCHAR_uvchr(ord);
2048 RETVAL = isXDIGIT_uvchr(ord);
2053 isALPHA_utf8_safe(s, offset)
2057 RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset);
2062 isALPHANUMERIC_utf8_safe(s, offset)
2066 RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2071 isBLANK_utf8_safe(s, offset)
2075 RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset);
2080 isCNTRL_utf8_safe(s, offset)
2084 RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset);
2089 isDIGIT_utf8_safe(s, offset)
2093 RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2098 isGRAPH_utf8_safe(s, offset)
2102 RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset);
2107 isIDCONT_utf8_safe(s, offset)
2111 RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2116 isIDFIRST_utf8_safe(s, offset)
2120 RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset);
2125 isLOWER_utf8_safe(s, offset)
2129 RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2134 isPRINT_utf8_safe(s, offset)
2138 RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2143 isPSXSPC_utf8_safe(s, offset)
2147 RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2152 isPUNCT_utf8_safe(s, offset)
2156 RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2161 isSPACE_utf8_safe(s, offset)
2165 RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset);
2170 isUPPER_utf8_safe(s, offset)
2174 RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2179 isWORDCHAR_utf8_safe(s, offset)
2183 RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset);
2188 isXDIGIT_utf8_safe(s, offset)
2192 RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2197 isALPHA_LC_utf8_safe(s, offset)
2201 RETVAL = isALPHA_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2206 isALPHANUMERIC_LC_utf8_safe(s, offset)
2210 RETVAL = isALPHANUMERIC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2215 isASCII_LC_utf8_safe(s, offset)
2219 PERL_UNUSED_ARG(offset);
2220 RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset);
2225 isBLANK_LC_utf8_safe(s, offset)
2229 RETVAL = isBLANK_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2234 isCNTRL_LC_utf8_safe(s, offset)
2238 RETVAL = isCNTRL_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2243 isDIGIT_LC_utf8_safe(s, offset)
2247 RETVAL = isDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2252 isGRAPH_LC_utf8_safe(s, offset)
2256 RETVAL = isGRAPH_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2261 isIDCONT_LC_utf8_safe(s, offset)
2265 RETVAL = isIDCONT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2270 isIDFIRST_LC_utf8_safe(s, offset)
2274 RETVAL = isIDFIRST_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2279 isLOWER_LC_utf8_safe(s, offset)
2283 RETVAL = isLOWER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2288 isPRINT_LC_utf8_safe(s, offset)
2292 RETVAL = isPRINT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2297 isPSXSPC_LC_utf8_safe(s, offset)
2301 RETVAL = isPSXSPC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2306 isPUNCT_LC_utf8_safe(s, offset)
2310 RETVAL = isPUNCT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2315 isSPACE_LC_utf8_safe(s, offset)
2319 RETVAL = isSPACE_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2324 isUPPER_LC_utf8_safe(s, offset)
2328 RETVAL = isUPPER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2333 isWORDCHAR_LC_utf8_safe(s, offset)
2337 RETVAL = isWORDCHAR_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2342 isXDIGIT_LC_utf8_safe(s, offset)
2346 RETVAL = isXDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2351 toLOWER_utf8_safe(s, offset)
2355 U8 u[UTF8_MAXBYTES+1];
2362 ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2363 av_push(av, newSVuv(ret));
2365 utf8 = newSVpvn((char *) u, len);
2369 av_push(av, newSVuv(len));
2375 toTITLE_utf8_safe(s, offset)
2379 U8 u[UTF8_MAXBYTES+1];
2386 ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2387 av_push(av, newSVuv(ret));
2389 utf8 = newSVpvn((char *) u, len);
2393 av_push(av, newSVuv(len));
2399 toUPPER_utf8_safe(s, offset)
2403 U8 u[UTF8_MAXBYTES+1];
2410 ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2411 av_push(av, newSVuv(ret));
2413 utf8 = newSVpvn((char *) u, len);
2417 av_push(av, newSVuv(len));
2423 toFOLD_utf8_safe(s, offset)
2427 U8 u[UTF8_MAXBYTES+1];
2434 ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2435 av_push(av, newSVuv(ret));
2437 utf8 = newSVpvn((char *) u, len);
2441 av_push(av, newSVuv(len));
2450 U8 u[UTF8_MAXBYTES+1];
2457 ret = toLOWER_uvchr(c, u, &len);
2458 av_push(av, newSVuv(ret));
2460 utf8 = newSVpvn((char *) u, len);
2464 av_push(av, newSVuv(len));
2473 U8 u[UTF8_MAXBYTES+1];
2480 ret = toTITLE_uvchr(c, u, &len);
2481 av_push(av, newSVuv(ret));
2483 utf8 = newSVpvn((char *) u, len);
2487 av_push(av, newSVuv(len));
2496 U8 u[UTF8_MAXBYTES+1];
2503 ret = toUPPER_uvchr(c, u, &len);
2504 av_push(av, newSVuv(ret));
2506 utf8 = newSVpvn((char *) u, len);
2510 av_push(av, newSVuv(len));
2519 U8 u[UTF8_MAXBYTES+1];
2526 ret = toFOLD_uvchr(c, u, &len);
2527 av_push(av, newSVuv(ret));
2529 utf8 = newSVpvn((char *) u, len);
2533 av_push(av, newSVuv(len));
2541 LATIN1_TO_NATIVE(cp)
2544 if (cp > 255) RETVAL= cp;
2545 else RETVAL= LATIN1_TO_NATIVE(cp);
2550 NATIVE_TO_LATIN1(cp)
2553 RETVAL= NATIVE_TO_LATIN1(cp);
2561 RETVAL = av_tindex((AV*)SvRV(av));
2569 RETVAL = av_top_index((AV*)SvRV(av));
2577 RETVAL = av_count((AV*)SvRV(av));
2581 =tests plan => 26827
2583 use vars qw($my_sv @my_av %my_hv);
2585 ok(&Devel::PPPort::boolSV(1), "Verify boolSV(1) is true");
2586 ok(!&Devel::PPPort::boolSV(0), "Verify boolSV(0) is false");
2589 is(&Devel::PPPort::DEFSV(), "Fred", '$_ is FRED; Verify DEFSV is FRED');
2590 is(&Devel::PPPort::UNDERBAR(), "Fred", 'And verify UNDERBAR is FRED');
2592 if (ivers($]) >= ivers(5.9.2) && ivers($]) < ivers(5.23)) {
2594 no warnings "deprecated";
2595 no if $^V >= v5.17.9, warnings => "experimental::lexical_topic";
2597 is(&Devel::PPPort::DEFSV(), "Fred", 'lexical_topic eval: $_ is Tony; Verify DEFSV is Fred');
2598 is(&Devel::PPPort::UNDERBAR(), "Tony", 'And verify UNDERBAR is Tony');
2600 die __FILE__ . __LINE__ . ": $@" if $@;
2603 skip("perl version outside testing range of lexical_topic", 2);
2606 my @r = &Devel::PPPort::DEFSV_modify();
2608 ok(@r == 3, "Verify got 3 elements");
2613 is(&Devel::PPPort::DEFSV(), "Fred");
2616 ok(!&Devel::PPPort::ERRSV(), "Verify ERRSV on true is false");
2617 eval { cannot_call_this_one() };
2618 ok(&Devel::PPPort::ERRSV(), "Verify ERRSV on false is true");
2620 ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0));
2621 ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0));
2622 ok(&Devel::PPPort::gv_stashpvn('does::not::exist', 1));
2625 ok(&Devel::PPPort::get_sv('my_sv', 0));
2626 ok(!&Devel::PPPort::get_sv('not_my_sv', 0));
2627 ok(&Devel::PPPort::get_sv('not_my_sv', 1));
2630 ok(&Devel::PPPort::get_av('my_av', 0));
2631 ok(!&Devel::PPPort::get_av('not_my_av', 0));
2632 ok(&Devel::PPPort::get_av('not_my_av', 1));
2635 ok(&Devel::PPPort::get_hv('my_hv', 0));
2636 ok(!&Devel::PPPort::get_hv('not_my_hv', 0));
2637 ok(&Devel::PPPort::get_hv('not_my_hv', 1));
2640 ok(&Devel::PPPort::get_cv('my_cv', 0));
2641 ok(!&Devel::PPPort::get_cv('not_my_cv', 0));
2642 ok(&Devel::PPPort::get_cv('not_my_cv', 1));
2644 is(Devel::PPPort::dXSTARG(42), 43);
2645 is(Devel::PPPort::dAXMARK(4711), 4710);
2647 is(Devel::PPPort::prepush(), 42);
2649 is(join(':', Devel::PPPort::xsreturn(0)), 'test1');
2650 is(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2');
2652 is(Devel::PPPort::PERL_ABS(42), 42, "Verify PERL_ABS(42) is 42");
2653 is(Devel::PPPort::PERL_ABS(-13), 13, "Verify PERL_ABS(-13) is 13");
2655 is(Devel::PPPort::SVf(42), ivers($]) >= ivers(5.4) ? '[42]' : '42');
2656 is(Devel::PPPort::SVf('abc'), ivers($]) >= ivers(5.4) ? '[abc]' : 'abc');
2658 is(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo");
2660 is(&Devel::PPPort::ptrtests(), 63);
2662 is(&Devel::PPPort::OpSIBLING_tests(), 0);
2664 if (ivers($]) >= ivers(5.9)) {
2666 is(&Devel::PPPort::check_HeUTF8("hello"), "norm");
2667 is(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8");
2670 skip("Too early perl version", 2);
2673 @r = &Devel::PPPort::check_c_array();
2677 ok(!Devel::PPPort::SvRXOK(""));
2678 ok(!Devel::PPPort::SvRXOK(bless [], "Regexp"));
2680 if (ivers($]) < ivers(5.5)) {
2681 skip 'no qr// objects in this perl', 2;
2683 my $qr = eval 'qr/./';
2684 ok(Devel::PPPort::SvRXOK($qr), "SVRXOK(qr) is true");
2685 ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise"));
2688 ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6);
2689 ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1);
2690 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41);
2691 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30);
2693 ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6, "Verify LATIN1_TO_NATIVE(0xB6) is 0xB6");
2694 if (ord("A") == 65) {
2695 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41);
2696 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30);
2699 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1);
2700 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0);
2703 ok( Devel::PPPort::isALNUMC_L1(ord("5")));
2704 ok( Devel::PPPort::isALNUMC_L1(0xFC));
2705 ok(! Devel::PPPort::isALNUMC_L1(0xB6));
2707 ok( Devel::PPPort::isOCTAL(ord("7")), "Verify '7' is OCTAL");
2708 ok(! Devel::PPPort::isOCTAL(ord("8")), "Verify '8' isn't OCTAL");
2710 ok( Devel::PPPort::isOCTAL_A(ord("0")), "Verify '0' is OCTAL_A");
2711 ok(! Devel::PPPort::isOCTAL_A(ord("9")), "Verify '9' isn't OCTAL_A");
2713 ok( Devel::PPPort::isOCTAL_L1(ord("2")), "Verify '2' is OCTAL_L1");
2714 ok(! Devel::PPPort::isOCTAL_L1(ord("8")), "Verify '8' isn't OCTAL_L1");
2716 my $way_too_early_msg = 'UTF-8 not implemented on this perl';
2718 # For the other properties, we test every code point from 0.255, and a
2719 # smattering of higher ones. First populate a hash with keys like '65:ALPHA'
2720 # to indicate that the code point there is alphabetic
2723 for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6,
2726 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2727 $types{"$native:ALPHA"} = 1;
2728 $types{"$native:ALPHANUMERIC"} = 1;
2729 $types{"$native:IDFIRST"} = 1;
2730 $types{"$native:IDCONT"} = 1;
2731 $types{"$native:PRINT"} = 1;
2732 $types{"$native:WORDCHAR"} = 1;
2734 for $i (0x30..0x39, 0x660, 0xFF19) {
2735 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2736 $types{"$native:ALPHANUMERIC"} = 1;
2737 $types{"$native:DIGIT"} = 1;
2738 $types{"$native:IDCONT"} = 1;
2739 $types{"$native:WORDCHAR"} = 1;
2740 $types{"$native:GRAPH"} = 1;
2741 $types{"$native:PRINT"} = 1;
2742 $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19);
2746 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2747 $types{"$native:ASCII"} = 1;
2749 for $i (0..0x1f, 0x7F..0x9F) {
2750 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2751 $types{"$native:CNTRL"} = 1;
2753 for $i (0x21..0x7E, 0xA1..0x101, 0x660) {
2754 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2755 $types{"$native:GRAPH"} = 1;
2756 $types{"$native:PRINT"} = 1;
2758 for $i (0x09, 0x20, 0xA0) {
2759 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2760 $types{"$native:BLANK"} = 1;
2761 $types{"$native:SPACE"} = 1;
2762 $types{"$native:PSXSPC"} = 1;
2763 $types{"$native:PRINT"} = 1 if $i > 0x09;
2765 for $i (0x09..0x0D, 0x85, 0x2029) {
2766 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2767 $types{"$native:SPACE"} = 1;
2768 $types{"$native:PSXSPC"} = 1;
2770 for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) {
2771 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2772 $types{"$native:UPPER"} = 1;
2773 $types{"$native:XDIGIT"} = 1 if $i < 0x47;
2775 for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) {
2776 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2777 $types{"$native:LOWER"} = 1;
2778 $types{"$native:XDIGIT"} = 1 if $i < 0x67;
2780 for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB,
2781 0xB7, 0xBB, 0xBF, 0x5BE)
2783 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2784 $types{"$native:PUNCT"} = 1;
2785 $types{"$native:GRAPH"} = 1;
2786 $types{"$native:PRINT"} = 1;
2790 $types{"$i:WORDCHAR"} = 1;
2791 $types{"$i:IDFIRST"} = 1;
2792 $types{"$i:IDCONT"} = 1;
2794 # Now find all the unique code points included above.
2795 my %code_points_to_test;
2797 for $key (keys %types) {
2799 $code_points_to_test{$key} = 1;
2803 for $i (sort { $a <=> $b } keys %code_points_to_test) {
2804 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2805 my $hex = sprintf("0x%02X", $native);
2807 # And for each code point test each of the classes
2809 for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT
2810 IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR
2813 if ($i < 256) { # For the ones that can fit in a byte, test each of
2816 for $suffix ("", "_A", "_L1", "_uvchr") {
2817 my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/)
2818 ? 0 # Fail on non-ASCII unless unicode
2819 : ($types{"$native:$class"} || 0);
2820 if (ivers($]) < ivers(5.6) && $suffix eq '_uvchr') {
2821 skip("No UTF-8 on this perl", 1);
2825 my $eval_string = "Devel::PPPort::is${class}$suffix($hex)";
2826 local $SIG{__WARN__} = sub {};
2827 my $is = eval $eval_string || 0;
2828 die "eval 'For $i: $eval_string' gave $@" if $@;
2829 is($is, $should_be, "'$eval_string'");
2833 # For all code points, test the '_utf8' macros
2835 for $sub_fcn ("", "_LC") {
2837 if (ivers($]) < ivers(5.6)) {
2838 $skip = $way_too_early_msg;
2840 elsif (ivers($]) < ivers(5.7) && $native > 255) {
2841 $skip = "Perls earlier than 5.7 give wrong answers for above Latin1 code points";
2843 elsif (ivers($]) <= ivers(5.11.3) && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) {
2844 $skip = "Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH";
2846 elsif ($sub_fcn eq '_LC' && $i < 256) {
2847 $skip = "Testing of code points whose results depend on locale is skipped ";
2849 my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe";
2856 $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($native);
2857 my $should_be = $types{"$native:$class"} || 0;
2858 my $eval_string = "$fcn(\"$utf8\", 0)";
2859 local $SIG{__WARN__} = sub {};
2860 my $is = eval $eval_string || 0;
2861 die "eval 'For $i, $eval_string' gave $@" if $@;
2862 is($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string));
2865 # And for the high code points, test that a too short malformation (the
2866 # -1) causes it to fail
2871 elsif (ivers($]) >= ivers(5.25.9)) {
2872 skip("Prints an annoying error message that khw doesn't know how to easily suppress", 1);
2875 my $eval_string = "$fcn(\"$utf8\", -1)";
2876 local $SIG{__WARN__} = sub {};
2877 my $is = eval "$eval_string" || 0;
2878 die "eval '$eval_string' gave $@" if $@;
2879 is($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string));
2886 my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ],
2887 [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2888 Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2891 'FOLD' => [ [ ord('C'), ord('c') ],
2892 [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2893 Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2895 [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2898 'UPPER' => [ [ ord('a'), ord('A'), ],
2899 [ Devel::PPPort::LATIN1_TO_NATIVE(0xE0),
2900 Devel::PPPort::LATIN1_TO_NATIVE(0xC0) ],
2902 [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2905 'TITLE' => [ [ ord('c'), ord('C'), ],
2906 [ Devel::PPPort::LATIN1_TO_NATIVE(0xE2),
2907 Devel::PPPort::LATIN1_TO_NATIVE(0xC2) ],
2909 [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2915 for $name (keys %case_changing) {
2916 my @code_points_to_test = @{$case_changing{$name}};
2918 for $unchanged (@code_points_to_test) {
2919 my @pair = @$unchanged;
2920 my $original = $pair[0];
2921 my $changed = $pair[1];
2922 my $utf8_changed = $changed;
2923 my $is_cp = $utf8_changed =~ /^\d+$/;
2924 my $should_be_bytes;
2925 if (ivers($]) >= ivers(5.6)) {
2927 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2928 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2931 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /[[:^ascii:]]/';
2932 $should_be_bytes = length $utf8_changed;
2936 my $fcn = "to${name}_uvchr";
2939 if (ivers($]) < ivers(5.6)) {
2940 $skip = $way_too_early_msg;
2943 $skip = "Can't do uvchr on a multi-char string";
2950 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2951 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2954 my $non_ascii_re = (ivers($]) >= ivers(5.6)) ? '[[:^ascii:]]' : '[^\x00-\x7F]';
2955 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /$non_ascii_re/';
2956 $should_be_bytes = length $utf8_changed;
2959 my $ret = eval "Devel::PPPort::$fcn($original)";
2960 my $fail = $@; # Have to save $@, as it gets destroyed
2961 is ($fail, "", "$fcn($original) didn't fail");
2962 my $first = (ivers($]) != ivers(5.6))
2963 ? substr($utf8_changed, 0, 1)
2964 : $utf8_changed, 0, 1;
2965 is($ret->[0], ord $first,
2966 "ord of $fcn($original) is $changed");
2967 is($ret->[1], $utf8_changed,
2968 "UTF-8 of of $fcn($original) is correct");
2969 is($ret->[2], $should_be_bytes,
2970 "Length of $fcn($original) is $should_be_bytes");
2974 for $truncate (0..2) {
2976 if (ivers($]) < ivers(5.6)) {
2977 $skip = $way_too_early_msg;
2979 elsif (! $is_cp && ivers($]) < ivers(5.7.3)) {
2980 $skip = "Multi-character case change not implemented until 5.7.3";
2982 elsif ($truncate == 2 && ivers($]) > ivers(5.25.8)) {
2983 $skip = "Zero length inputs cause assertion failure; test dies in modern perls";
2985 elsif ($truncate > 0 && length $changed > 1) {
2986 $skip = "Don't test shortened multi-char case changes";
2988 elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) {
2989 $skip = "Don't try to test shortened single bytes";
2995 my $fcn = "to${name}_utf8_safe";
2996 my $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($original);
2997 my $real_truncate = ($truncate < 2)
2998 ? $truncate : $should_be_bytes;
2999 my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)";
3000 my $ret = eval "no warnings; $eval_string" || 0;
3001 my $fail = $@; # Have to save $@, as it gets destroyed
3002 if ($truncate == 0) {
3003 is ($fail, "", "Didn't fail on full length input");
3004 my $first = (ivers($]) != ivers(5.6))
3005 ? substr($utf8_changed, 0, 1)
3006 : $utf8_changed, 0, 1;
3007 is($ret->[0], ord $first,
3008 "ord of $fcn($original) is $changed");
3009 is($ret->[1], $utf8_changed,
3010 "UTF-8 of of $fcn($original) is correct");
3011 is($ret->[2], $should_be_bytes,
3012 "Length of $fcn($original) is $should_be_bytes");
3015 is ($fail, eval 'qr/Malformed UTF-8 character/',
3016 "Gave appropriate error for short char: $original");
3017 skip("Expected failure means remaining tests for"
3018 . " this aren't relevant", 3);
3025 is(&Devel::PPPort::av_top_index([1,2,3]), 2);
3026 is(&Devel::PPPort::av_tindex([1,2,3,4]), 3);
3027 is(&Devel::PPPort::av_count([1,2,3,4]), 4);