1 ################################################################################
3 ## Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
4 ## Version 2.x, Copyright (C) 2001, Paul Marquess.
5 ## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
7 ## This program is free software; you can redistribute it and/or
8 ## modify it under the same terms as Perl itself.
10 ################################################################################
27 PERL_USE_GCC_BRACE_GROUPS
39 __UNDEFINED__ cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
40 __UNDEFINED__ OpHAS_SIBLING(o) (cBOOL((o)->op_sibling))
41 __UNDEFINED__ OpSIBLING(o) (0 + (o)->op_sibling)
42 __UNDEFINED__ OpMORESIB_set(o, sib) ((o)->op_sibling = (sib))
43 __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL)
44 __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib))
45 __UNDEFINED__ HEf_SVKEY -2
47 #if defined(DEBUGGING) && !defined(__COVERITY__)
48 __UNDEFINED__ __ASSERT_(statement) assert(statement),
50 __UNDEFINED__ __ASSERT_(statement)
53 /* These could become provided when they become part of the public API */
54 __UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n) \
55 (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
56 __UNDEF_NOT_PROVIDED__ inRANGE(c, l, u) \
57 ( (sizeof(c) == sizeof(U8)) ? withinCOUNT(((U8) (c)), (l), ((u) - (l))) \
58 : (sizeof(c) == sizeof(U16)) ? withinCOUNT(((U16) (c)), (l), ((u) - (l))) \
59 : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l))) \
60 : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l)))))
62 /* Create the macro for "is'macro'_utf8_safe(s, e)". For code points below
63 * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
64 * point. That is so that it can automatically get the bug fixes done in this
66 #define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro) \
69 : UTF8_IS_INVARIANT((s)[0]) \
70 ? is ## macro ## _L1((s)[0]) \
71 : (((e) - (s)) < UTF8SKIP(s)) \
73 : UTF8_IS_DOWNGRADEABLE_START((s)[0]) \
74 /* The cast in the line below is only to silence warnings */ \
75 ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE( \
76 UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
77 & UTF_START_MASK(2), \
79 : is ## macro ## _utf8(s))
81 __UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL)
82 __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv))
84 #ifndef PERL_UNUSED_DECL
86 # if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
87 # define PERL_UNUSED_DECL
89 # define PERL_UNUSED_DECL __attribute__((unused))
92 # define PERL_UNUSED_DECL
96 #ifndef PERL_UNUSED_ARG
97 # if defined(lint) && defined(S_SPLINT_S) /* www.splint.org */
99 # define PERL_UNUSED_ARG(x) NOTE(ARGUNUSED(x))
101 # define PERL_UNUSED_ARG(x) ((void)x)
105 #ifndef PERL_UNUSED_VAR
106 # define PERL_UNUSED_VAR(x) ((void)x)
109 #ifndef PERL_UNUSED_CONTEXT
111 # define PERL_UNUSED_CONTEXT PERL_UNUSED_ARG(my_perl)
113 # define PERL_UNUSED_CONTEXT
117 #ifndef PERL_UNUSED_RESULT
118 # if defined(__GNUC__) && defined(HASATTRIBUTE_WARN_UNUSED_RESULT)
119 # define PERL_UNUSED_RESULT(v) STMT_START { __typeof__(v) z = (v); (void)sizeof(z); } STMT_END
121 # define PERL_UNUSED_RESULT(v) ((void)(v))
125 __UNDEFINED__ NOOP /*EMPTY*/(void)0
126 __UNDEFINED__ dNOOP extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL
129 # if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
130 # define NVTYPE long double
132 # define NVTYPE double
138 # if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE)
140 # define INT2PTR(any,d) (any)(d)
142 # if PTRSIZE == LONGSIZE
143 # define PTRV unsigned long
145 # define PTRV unsigned
147 # define INT2PTR(any,d) (any)(PTRV)(d)
152 # if PTRSIZE == LONGSIZE
153 # define PTR2ul(p) (unsigned long)(p)
155 # define PTR2ul(p) INT2PTR(unsigned long,p)
159 __UNDEFINED__ PTR2nat(p) (PTRV)(p)
160 __UNDEFINED__ NUM2PTR(any,d) (any)PTR2nat(d)
161 __UNDEFINED__ PTR2IV(p) INT2PTR(IV,p)
162 __UNDEFINED__ PTR2UV(p) INT2PTR(UV,p)
163 __UNDEFINED__ PTR2NV(p) NUM2PTR(NV,p)
165 #undef START_EXTERN_C
169 # define START_EXTERN_C extern "C" {
170 # define END_EXTERN_C }
171 # define EXTERN_C extern "C"
173 # define START_EXTERN_C
174 # define END_EXTERN_C
175 # define EXTERN_C extern
178 #if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC)
179 # ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN
180 __UNDEF_NOT_PROVIDED__ PERL_GCC_BRACE_GROUPS_FORBIDDEN
184 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) && !defined(__cplusplus)
185 # ifndef PERL_USE_GCC_BRACE_GROUPS
186 # define PERL_USE_GCC_BRACE_GROUPS
192 #ifdef PERL_USE_GCC_BRACE_GROUPS
193 # define STMT_START (void)( /* gcc supports ``({ STATEMENTS; })'' */
196 # if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__)
197 # define STMT_START if (1)
198 # define STMT_END else (void)0
200 # define STMT_START do
201 # define STMT_END while (0)
205 __UNDEFINED__ boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no)
207 /* DEFSV appears first in 5.004_56 */
208 __UNDEFINED__ DEFSV GvSV(PL_defgv)
209 __UNDEFINED__ SAVE_DEFSV SAVESPTR(GvSV(PL_defgv))
210 __UNDEFINED__ DEFSV_set(sv) (DEFSV = (sv))
212 /* Older perls (<=5.003) lack AvFILLp */
213 __UNDEFINED__ AvFILLp AvFILL
215 __UNDEFINED__ av_tindex AvFILL
216 __UNDEFINED__ av_top_index AvFILL
218 __UNDEFINED__ ERRSV get_sv("@",FALSE)
221 * This function's backport doesn't support the length parameter, but
222 * rather ignores it. Portability can only be ensured if the length
223 * parameter is used for speed reasons, but the length can always be
224 * correctly computed from the string argument.
227 __UNDEFINED__ gv_stashpvn(str,len,create) gv_stashpv(str,create)
230 __UNDEFINED__ get_cv perl_get_cv
231 __UNDEFINED__ get_sv perl_get_sv
232 __UNDEFINED__ get_av perl_get_av
233 __UNDEFINED__ get_hv perl_get_hv
236 __UNDEFINED__ dUNDERBAR dNOOP
237 __UNDEFINED__ UNDERBAR DEFSV
239 __UNDEFINED__ dAX I32 ax = MARK - PL_stack_base + 1
240 __UNDEFINED__ dITEMS I32 items = SP - MARK
242 __UNDEFINED__ dXSTARG SV * targ = sv_newmortal()
244 __UNDEFINED__ dAXMARK I32 ax = POPMARK; \
245 register SV ** const mark = PL_stack_base + ax++
248 __UNDEFINED__ XSprePUSH (sp = PL_stack_base + ax - 1)
250 #if { VERSION < 5.005 }
252 # define XSRETURN(off) \
254 PL_stack_sp = PL_stack_base + ax + ((off) - 1); \
259 __UNDEFINED__ XSPROTO(name) void name(pTHX_ CV* cv)
260 __UNDEFINED__ SVfARG(p) ((void*)(p))
262 __UNDEFINED__ PERL_ABS(x) ((x) < 0 ? -(x) : (x))
264 __UNDEFINED__ dVAR dNOOP
266 __UNDEFINED__ SVf "_"
268 __UNDEFINED__ CPERLscope(x) x
270 __UNDEFINED__ PERL_HASH(hash,str,len) \
272 const char *s_PeRlHaSh = str; \
273 I32 i_PeRlHaSh = len; \
274 U32 hash_PeRlHaSh = 0; \
275 while (i_PeRlHaSh--) \
276 hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \
277 (hash) = hash_PeRlHaSh; \
280 #ifndef PERLIO_FUNCS_DECL
281 # ifdef PERLIO_FUNCS_CONST
282 # define PERLIO_FUNCS_DECL(funcs) const PerlIO_funcs funcs
283 # define PERLIO_FUNCS_CAST(funcs) (PerlIO_funcs*)(funcs)
285 # define PERLIO_FUNCS_DECL(funcs) PerlIO_funcs funcs
286 # define PERLIO_FUNCS_CAST(funcs) (funcs)
290 /* provide these typedefs for older perls */
291 #if { VERSION < 5.9.3 }
294 typedef OP* (CPERLscope(*Perl_ppaddr_t))(ARGSproto);
296 typedef OP* (CPERLscope(*Perl_ppaddr_t))(pTHX);
299 typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*);
306 # define WIDEST_UTYPE U64TYPE
308 # define WIDEST_UTYPE Quad_t
311 # define WIDEST_UTYPE U32
315 /* On versions without NATIVE_TO_ASCII, only ASCII is supported */
316 #if defined(EBCDIC) && defined(NATIVE_TO_ASCI)
317 __UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c)
318 __UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c)
319 __UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c))
320 __UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c))
322 __UNDEFINED__ NATIVE_TO_LATIN1(c) (c)
323 __UNDEFINED__ LATIN1_TO_NATIVE(c) (c)
324 __UNDEFINED__ NATIVE_TO_UNI(c) (c)
325 __UNDEFINED__ UNI_TO_NATIVE(c) (c)
328 /* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE
329 EBCDIC is not supported on versions earlier than 5.7.1
332 /* The meaning of this changed; use the modern version */
337 /* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe
338 This is equivalent to the corresponding isSPACE-type macro. On perls
339 before 5.18, this matched a vertical tab and SPACE didn't. But the
340 ppport.h SPACE version does match VT in all perl releases. Since VT's are
341 extremely rarely found in real-life files, this difference effectively
344 /* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe
345 Until Perl 5.18, this did not match the vertical tab (VT). The ppport.h
346 version does match it in all perl releases. Since VT's are extremely rarely
347 found in real-life files, this difference effectively doesn't matter */
351 /* This is the first version where these macros are fully correct on EBCDIC
352 * platforms. Relying on * the C library functions, as earlier releases did,
353 * causes problems with * locales */
354 # if { VERSION < 5.22.0 }
364 # undef isALPHANUMERIC
365 # undef isALPHANUMERIC_A
366 # undef isALPHANUMERIC_L1
408 # undef isWORDCHAR_L1
414 __UNDEFINED__ isASCII(c) (isCNTRL(c) || isPRINT(c))
416 /* The below is accurate for all EBCDIC code pages supported by
417 * all the versions of Perl overridden by this */
418 __UNDEFINED__ isCNTRL(c) ( (c) == '\0' || (c) == '\a' || (c) == '\b' \
419 || (c) == '\f' || (c) == '\n' || (c) == '\r' \
420 || (c) == '\t' || (c) == '\v' \
421 || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */ \
422 || (c) == 7 /* U+7F DEL */ \
423 || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */ \
425 || (c) == 0x18 /* U+18 CAN */ \
426 || (c) == 0x19 /* U+19 EOM */ \
427 || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */ \
428 || (c) == 0x26 /* U+17 ETB */ \
429 || (c) == 0x27 /* U+1B ESC */ \
430 || (c) == 0x2D /* U+05 ENQ */ \
431 || (c) == 0x2E /* U+06 ACK */ \
432 || (c) == 0x32 /* U+16 SYN */ \
433 || (c) == 0x37 /* U+04 EOT */ \
434 || (c) == 0x3C /* U+14 DC4 */ \
435 || (c) == 0x3D /* U+15 NAK */ \
436 || (c) == 0x3F /* U+1A SUB */ \
439 #if '^' == 106 /* EBCDIC POSIX-BC */
440 # define D_PPP_OUTLIER_CONTROL 0x5F
441 #else /* EBCDIC 1047 037 */
442 # define D_PPP_OUTLIER_CONTROL 0xFF
445 /* The controls are everything below blank, plus one outlier */
446 __UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' ' \
447 || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL)
448 /* The ordering of the tests in this and isUPPER are to exclude most characters
450 __UNDEFINED__ isLOWER(c) ( (c) >= 'a' && (c) <= 'z' \
452 || ((c) >= 'j' && (c) <= 'r') \
454 __UNDEFINED__ isUPPER(c) ( (c) >= 'A' && (c) <= 'Z' \
456 || ((c) >= 'J' && (c) <= 'R') \
459 #else /* Above is EBCDIC; below is ASCII */
461 # if { VERSION < 5.4.0 }
462 /* The implementation of these in older perl versions can give wrong results if
463 * the C program locale is set to other than the C locale */
478 # if { VERSION == 5.7.0 } /* this perl made space GRAPH */
482 # if { VERSION < 5.8.0 } /* earlier perls omitted DEL */
486 # if { VERSION < 5.10.0 }
487 /* earlier perls included all of the isSPACE() characters, which is wrong. The
488 * version provided by Devel::PPPort always overrides an existing buggy
494 # if { VERSION < 5.14.0 }
495 /* earlier perls always returned true if the parameter was a signed char */
500 # if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */
504 # if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */
508 # if { VERSION < 5.20.0 } /* earlier perls didn't include \v */
515 __UNDEFINED__ isASCII(c) ((WIDEST_UTYPE) (c) <= 127)
516 __UNDEFINED__ isCNTRL(c) ((WIDEST_UTYPE) (c) < ' ' || (c) == 127)
517 __UNDEFINED__ isCNTRL_L1(c) (isCNTRL(c) || ( (WIDEST_UTYPE) (c) <= 0x9F \
518 && (WIDEST_UTYPE) (c) >= 0x80))
519 __UNDEFINED__ isLOWER(c) ((c) >= 'a' && (c) <= 'z')
520 __UNDEFINED__ isUPPER(c) ((c) <= 'Z' && (c) >= 'A')
522 #endif /* Below are definitions common to EBCDIC and ASCII */
524 __UNDEFINED__ isASCII_L1(c) isASCII(c)
525 __UNDEFINED__ isALNUM(c) isWORDCHAR(c)
526 __UNDEFINED__ isALNUMC(c) isALPHANUMERIC(c)
527 __UNDEFINED__ isALNUMC_L1(c) isALPHANUMERIC_L1(c)
528 __UNDEFINED__ isALPHA(c) (isUPPER(c) || isLOWER(c))
529 __UNDEFINED__ isALPHA_L1(c) (isUPPER_L1(c) || isLOWER_L1(c))
530 __UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c))
531 __UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c))
532 __UNDEFINED__ isBLANK(c) ((c) == ' ' || (c) == '\t')
533 __UNDEFINED__ isBLANK_L1(c) ( isBLANK(c) \
534 || ( (WIDEST_UTYPE) (c) < 256 \
535 && NATIVE_TO_LATIN1((U8) c) == 0xA0))
536 __UNDEFINED__ isDIGIT(c) ((c) <= '9' && (c) >= '0')
537 __UNDEFINED__ isDIGIT_L1(c) isDIGIT(c)
538 __UNDEFINED__ isGRAPH(c) (isWORDCHAR(c) || isPUNCT(c))
539 __UNDEFINED__ isGRAPH_L1(c) ( isPRINT_L1(c) \
541 && NATIVE_TO_LATIN1((U8) c) != 0xA0)
542 __UNDEFINED__ isIDCONT(c) isWORDCHAR(c)
543 __UNDEFINED__ isIDCONT_L1(c) isWORDCHAR_L1(c)
544 __UNDEFINED__ isIDFIRST(c) (isALPHA(c) || (c) == '_')
545 __UNDEFINED__ isIDFIRST_L1(c) (isALPHA_L1(c) || (U8) (c) == '_')
546 __UNDEFINED__ isLOWER_L1(c) ( isLOWER(c) \
547 || ( (WIDEST_UTYPE) (c) < 256 \
548 && ( ( NATIVE_TO_LATIN1((U8) c) >= 0xDF \
549 && NATIVE_TO_LATIN1((U8) c) != 0xF7) \
550 || NATIVE_TO_LATIN1((U8) c) == 0xAA \
551 || NATIVE_TO_LATIN1((U8) c) == 0xBA \
552 || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
553 __UNDEFINED__ isOCTAL(c) (((WIDEST_UTYPE)((c)) & ~7) == '0')
554 __UNDEFINED__ isOCTAL_L1(c) isOCTAL(c)
555 __UNDEFINED__ isPRINT(c) (isGRAPH(c) || (c) == ' ')
556 __UNDEFINED__ isPRINT_L1(c) ((WIDEST_UTYPE) (c) < 256 && ! isCNTRL_L1(c))
557 __UNDEFINED__ isPSXSPC(c) isSPACE(c)
558 __UNDEFINED__ isPSXSPC_L1(c) isSPACE_L1(c)
559 __UNDEFINED__ isPUNCT(c) ( (c) == '-' || (c) == '!' || (c) == '"' \
560 || (c) == '#' || (c) == '$' || (c) == '%' \
561 || (c) == '&' || (c) == '\'' || (c) == '(' \
562 || (c) == ')' || (c) == '*' || (c) == '+' \
563 || (c) == ',' || (c) == '.' || (c) == '/' \
564 || (c) == ':' || (c) == ';' || (c) == '<' \
565 || (c) == '=' || (c) == '>' || (c) == '?' \
566 || (c) == '@' || (c) == '[' || (c) == '\\' \
567 || (c) == ']' || (c) == '^' || (c) == '_' \
568 || (c) == '`' || (c) == '{' || (c) == '|' \
569 || (c) == '}' || (c) == '~')
570 __UNDEFINED__ isPUNCT_L1(c) ( isPUNCT(c) \
571 || ( (WIDEST_UTYPE) (c) < 256 \
572 && ( NATIVE_TO_LATIN1((U8) c) == 0xA1 \
573 || NATIVE_TO_LATIN1((U8) c) == 0xA7 \
574 || NATIVE_TO_LATIN1((U8) c) == 0xAB \
575 || NATIVE_TO_LATIN1((U8) c) == 0xB6 \
576 || NATIVE_TO_LATIN1((U8) c) == 0xB7 \
577 || NATIVE_TO_LATIN1((U8) c) == 0xBB \
578 || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
579 __UNDEFINED__ isSPACE(c) ( isBLANK(c) || (c) == '\n' || (c) == '\r' \
580 || (c) == '\v' || (c) == '\f')
581 __UNDEFINED__ isSPACE_L1(c) ( isSPACE(c) \
582 || ( (WIDEST_UTYPE) (c) < 256 \
583 && ( NATIVE_TO_LATIN1((U8) c) == 0x85 \
584 || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
585 __UNDEFINED__ isUPPER_L1(c) ( isUPPER(c) \
586 || ( (WIDEST_UTYPE) (c) < 256 \
587 && ( NATIVE_TO_LATIN1((U8) c) >= 0xC0 \
588 && NATIVE_TO_LATIN1((U8) c) <= 0xDE \
589 && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
590 __UNDEFINED__ isWORDCHAR(c) (isALPHANUMERIC(c) || (c) == '_')
591 __UNDEFINED__ isWORDCHAR_L1(c) (isIDFIRST_L1(c) || isDIGIT(c))
592 __UNDEFINED__ isXDIGIT(c) ( isDIGIT(c) \
593 || ((c) >= 'a' && (c) <= 'f') \
594 || ((c) >= 'A' && (c) <= 'F'))
595 __UNDEFINED__ isXDIGIT_L1(c) isXDIGIT(c)
597 __UNDEFINED__ isALNUM_A(c) isALNUM(c)
598 __UNDEFINED__ isALNUMC_A(c) isALNUMC(c)
599 __UNDEFINED__ isALPHA_A(c) isALPHA(c)
600 __UNDEFINED__ isALPHANUMERIC_A(c) isALPHANUMERIC(c)
601 __UNDEFINED__ isASCII_A(c) isASCII(c)
602 __UNDEFINED__ isBLANK_A(c) isBLANK(c)
603 __UNDEFINED__ isCNTRL_A(c) isCNTRL(c)
604 __UNDEFINED__ isDIGIT_A(c) isDIGIT(c)
605 __UNDEFINED__ isGRAPH_A(c) isGRAPH(c)
606 __UNDEFINED__ isIDCONT_A(c) isIDCONT(c)
607 __UNDEFINED__ isIDFIRST_A(c) isIDFIRST(c)
608 __UNDEFINED__ isLOWER_A(c) isLOWER(c)
609 __UNDEFINED__ isOCTAL_A(c) isOCTAL(c)
610 __UNDEFINED__ isPRINT_A(c) isPRINT(c)
611 __UNDEFINED__ isPSXSPC_A(c) isPSXSPC(c)
612 __UNDEFINED__ isPUNCT_A(c) isPUNCT(c)
613 __UNDEFINED__ isSPACE_A(c) isSPACE(c)
614 __UNDEFINED__ isUPPER_A(c) isUPPER(c)
615 __UNDEFINED__ isWORDCHAR_A(c) isWORDCHAR(c)
616 __UNDEFINED__ isXDIGIT_A(c) isXDIGIT(c)
618 __UNDEFINED__ isASCII_utf8_safe(s,e) (((e) - (s)) <= 0 ? 0 : isASCII(*(s)))
619 __UNDEFINED__ isASCII_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
622 #if { VERSION >= 5.006 }
624 __UNDEFINED__ isALPHA_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
625 ? isALPHA_L1(c) : is_uni_alpha((UV) (c)))
626 __UNDEFINED__ isALPHANUMERIC_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
627 ? isALPHANUMERIC_L1(c) : (is_uni_alpha((UV) (c)) || is_uni_digit((UV) (c))))
629 __UNDEFINED__ isBLANK_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
630 ? isBLANK_L1(c) : is_uni_blank((UV) (c)))
632 __UNDEFINED__ isBLANK_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
634 : ( (UV) (c) == 0x1680 /* Unicode 3.0 */ \
635 || inRANGE((UV) (c), 0x2000, 0x200A) \
636 || (UV) (c) == 0x202F /* Unicode 3.0 */\
637 || (UV) (c) == 0x205F /* Unicode 3.2 */\
638 || (UV) (c) == 0x3000))
640 __UNDEFINED__ isCNTRL_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
641 ? isCNTRL_L1(c) : is_uni_cntrl((UV) (c)))
642 __UNDEFINED__ isDIGIT_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
643 ? isDIGIT_L1(c) : is_uni_digit((UV) (c)))
644 __UNDEFINED__ isGRAPH_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
645 ? isGRAPH_L1(c) : is_uni_graph((UV) (c)))
646 __UNDEFINED__ isIDCONT_uvchr(c) isWORDCHAR_uvchr(c)
647 __UNDEFINED__ isIDFIRST_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
648 ? isIDFIRST_L1(c) : is_uni_idfirst((UV) (c)))
649 __UNDEFINED__ isLOWER_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
650 ? isLOWER_L1(c) : is_uni_lower((UV) (c)))
651 __UNDEFINED__ isPRINT_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
652 ? isPRINT_L1(c) : is_uni_print((UV) (c)))
653 __UNDEFINED__ isPSXSPC_uvchr(c) isSPACE_uvchr(c)
654 __UNDEFINED__ isPUNCT_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
655 ? isPUNCT_L1(c) : is_uni_punct((UV) (c)))
656 __UNDEFINED__ isSPACE_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
657 ? isSPACE_L1(c) : is_uni_space((UV) (c)))
658 __UNDEFINED__ isUPPER_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
659 ? isUPPER_L1(c) : is_uni_upper((UV) (c)))
660 __UNDEFINED__ isXDIGIT_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
661 ? isXDIGIT_L1(c) : is_uni_xdigit((UV) (c)))
662 __UNDEFINED__ isWORDCHAR_uvchr(c) ((WIDEST_UTYPE) (c) < 256 \
663 ? isWORDCHAR_L1(c) : is_uni_alnum((UV) (c)))
665 __UNDEFINED__ isALPHA_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA)
666 # ifdef isALPHANUMERIC_utf8
667 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \
668 D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC)
670 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e) \
671 (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e))
674 /* This was broken before 5.18, and just use this instead of worrying about
675 * which releases the official works on */
677 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
678 ( ( LIKELY((e) > (s)) ) ? /* Machine generated */ \
679 ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1 \
680 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
681 ( ( 0xC2 == ((const U8*)s)[0] ) ? \
682 ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 ) \
683 : ( 0xE1 == ((const U8*)s)[0] ) ? \
684 ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
685 : ( 0xE2 == ((const U8*)s)[0] ) ? \
686 ( ( 0x80 == ((const U8*)s)[1] ) ? \
687 ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\
688 : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\
689 : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
693 # elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */
695 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
696 ( ( LIKELY((e) > (s)) ) ? \
697 ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \
698 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
699 ( ( 0x80 == ((const U8*)s)[0] ) ? \
700 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \
701 : ( 0xBC == ((const U8*)s)[0] ) ? \
702 ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
703 : ( 0xCA == ((const U8*)s)[0] ) ? \
704 ( ( 0x41 == ((const U8*)s)[1] ) ? \
705 ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
706 : ( 0x42 == ((const U8*)s)[1] ) ? \
707 ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
708 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
709 : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
713 # elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */
715 __UNDEFINED__ isBLANK_utf8_safe(s,e) \
716 ( ( LIKELY((e) > (s)) ) ? \
717 ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1 \
718 : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ? \
719 ( ( 0x78 == ((const U8*)s)[0] ) ? \
720 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 ) \
721 : ( 0xBD == ((const U8*)s)[0] ) ? \
722 ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
723 : ( 0xCA == ((const U8*)s)[0] ) ? \
724 ( ( 0x41 == ((const U8*)s)[1] ) ? \
725 ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
726 : ( 0x42 == ((const U8*)s)[1] ) ? \
727 ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
728 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
729 : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
734 # error Unknown character set
737 __UNDEFINED__ isCNTRL_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL)
738 __UNDEFINED__ isDIGIT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT)
739 __UNDEFINED__ isGRAPH_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH)
740 # ifdef isIDCONT_utf8
741 __UNDEFINED__ isIDCONT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT)
743 __UNDEFINED__ isIDCONT_utf8_safe(s,e) isWORDCHAR_utf8_safe(s,e)
746 __UNDEFINED__ isIDFIRST_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST)
747 __UNDEFINED__ isLOWER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER)
748 __UNDEFINED__ isPRINT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT)
750 # undef isPSXSPC_utf8_safe /* Use the modern definition */
751 __UNDEFINED__ isPSXSPC_utf8_safe(s,e) isSPACE_utf8_safe(s,e)
753 __UNDEFINED__ isPUNCT_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT)
754 __UNDEFINED__ isSPACE_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE)
755 __UNDEFINED__ isUPPER_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER)
757 # ifdef isWORDCHAR_utf8
758 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR)
760 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) \
761 (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_')
764 /* This was broken before 5.12, and just use this instead of worrying about
765 * which releases the official works on */
767 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
768 ( ( LIKELY((e) > (s)) ) ? \
769 ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\
770 : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\
771 ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\
772 : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\
775 # elif 'A' == 193 && '^' == 95 /* EBCDIC 1047 */
777 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
778 ( ( LIKELY((e) > (s)) ) ? \
779 ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
780 : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\
781 ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\
782 : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
785 # elif 'A' == 193 && '^' == 176 /* EBCDIC 037 */
787 __UNDEFINED__ isXDIGIT_utf8_safe(s,e) \
788 ( ( LIKELY((e) > (s)) ) ? \
789 ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
790 : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\
791 ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\
792 : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
796 # error Unknown character set
799 /* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe,
800 * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe,
801 * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe,
802 * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe,
803 * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe,
804 * isXDIGIT_utf8_safe,
805 * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr,
806 * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr,
807 * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr,
808 * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr,
809 * isWORDCHAR_uvchr, isXDIGIT_uvchr
811 * The UTF-8 handling is buggy in early Perls, and this can give inaccurate
812 * results for code points above 0xFF, until the implementation started
813 * settling down in 5.12 and 5.14 */
817 #define D_PPP_TOO_SHORT_MSG "Malformed UTF-8 character starting with:" \
818 " \\x%02x (too short; %d bytes available, need" \
820 /* Perls starting here had a new API which handled multi-character results */
821 #if { VERSION >= 5.7.3 }
823 __UNDEFINED__ toLOWER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l))
824 __UNDEFINED__ toUPPER_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l))
825 __UNDEFINED__ toTITLE_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l))
826 __UNDEFINED__ toFOLD_uvchr(c, s, l) UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l))
828 # if { VERSION != 5.15.6 } /* Just this version is broken */
830 /* Prefer the macro to the function */
831 # if defined toLOWER_utf8
832 # define D_PPP_TO_LOWER_CALLEE(s,r,l) toLOWER_utf8(s,r,l)
834 # define D_PPP_TO_LOWER_CALLEE(s,r,l) to_utf8_lower(s,r,l)
836 # if defined toTITLE_utf8
837 # define D_PPP_TO_TITLE_CALLEE(s,r,l) toTITLE_utf8(s,r,l)
839 # define D_PPP_TO_TITLE_CALLEE(s,r,l) to_utf8_title(s,r,l)
841 # if defined toUPPER_utf8
842 # define D_PPP_TO_UPPER_CALLEE(s,r,l) toUPPER_utf8(s,r,l)
844 # define D_PPP_TO_UPPER_CALLEE(s,r,l) to_utf8_upper(s,r,l)
846 # if defined toFOLD_utf8
847 # define D_PPP_TO_FOLD_CALLEE(s,r,l) toFOLD_utf8(s,r,l)
849 # define D_PPP_TO_FOLD_CALLEE(s,r,l) to_utf8_fold(s,r,l)
851 # else /* Below is 5.15.6, which failed to make the macros available
852 # outside of core, so we have to use the 'Perl_' form. khw
853 # decided it was easier to just handle this case than have to
854 # document the exception, and make an exception in the tests below
856 # define D_PPP_TO_LOWER_CALLEE(s,r,l) \
857 Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL)
858 # define D_PPP_TO_TITLE_CALLEE(s,r,l) \
859 Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL)
860 # define D_PPP_TO_UPPER_CALLEE(s,r,l) \
861 Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL)
862 # define D_PPP_TO_FOLD_CALLEE(s,r,l) \
863 Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL)
866 /* The actual implementation of the backported macros. If too short, croak,
867 * otherwise call the original that doesn't have an upper limit parameter */
868 # define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l) \
869 (((((e) - (s)) <= 0) \
870 /* We could just do nothing, but modern perls croak */ \
871 ? (croak("Attempting case change on zero length string"), \
872 0) /* So looks like it returns something, and will compile */ \
873 : ((e) - (s)) < UTF8SKIP(s)) \
874 ? (croak(D_PPP_TOO_SHORT_MSG, \
875 s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
877 : D_PPP_TO_ ## name ## _CALLEE(s,r,l))
879 __UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \
880 D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l)
881 __UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \
882 D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l)
883 __UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \
884 D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l)
885 __UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) \
886 D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l)
888 #elif defined(UTF8SKIP)
890 /* Here we have UTF-8 support, but using the original API where the case
891 * changing functions merely returned the changed code point; hence they
892 * couldn't handle multi-character results. */
894 # ifdef uvchr_to_utf8
895 # define D_PPP_UV_TO_UTF8 uvchr_to_utf8
897 # define D_PPP_UV_TO_UTF8 uv_to_utf8
900 /* Get the utf8 of the case changed value, and store its length; then have
901 * to re-calculate the changed case value in order to return it */
902 # define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l) \
903 (*(l) = (D_PPP_UV_TO_UTF8(s, \
904 UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)), \
905 UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c))))
907 __UNDEFINED__ toLOWER_uvchr(c, s, l) \
908 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l)
909 __UNDEFINED__ toUPPER_uvchr(c, s, l) \
910 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l)
911 __UNDEFINED__ toTITLE_uvchr(c, s, l) \
912 D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l)
913 __UNDEFINED__ toFOLD_uvchr(c, s, l) toLOWER_uvchr(c, s, l)
915 # define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l) \
916 (((((e) - (s)) <= 0) \
917 ? (croak("Attempting case change on zero length string"), \
918 0) /* So looks like it returns something, and will compile */ \
919 : ((e) - (s)) < UTF8SKIP(s)) \
920 ? (croak(D_PPP_TOO_SHORT_MSG, \
921 s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
923 /* Get the changed code point and store its UTF-8 */ \
924 : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)), \
925 /* Then store its length, and re-get code point for return */ \
926 *(l) = UTF8SKIP(r), to_utf8_ ## name(r))
928 /* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe,
929 * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr
930 The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
931 this backport does not correct them.
933 In perls before 7.3, multi-character case changing is not implemented; this
934 backport uses the simple case changes available in those perls. */
936 __UNDEFINED__ toUPPER_utf8_safe(s,e,r,l) \
937 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l)
938 __UNDEFINED__ toLOWER_utf8_safe(s,e,r,l) \
939 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l)
940 __UNDEFINED__ toTITLE_utf8_safe(s,e,r,l) \
941 D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l)
943 /* Warning: toFOLD_utf8_safe, toFOLD_uvchr
944 The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
945 this backport does not correct them.
947 In perls before 7.3, case folding is not implemented; instead, this
948 backport substitutes simple (not multi-character, which isn't available)
949 lowercasing. This gives the correct result in most, but not all, instances
952 __UNDEFINED__ toFOLD_utf8_safe(s,e,r,l) toLOWER_utf8_safe(s,e,r,l)
956 /* Until we figure out how to support this in older perls... */
957 #if { VERSION >= 5.8.0 }
959 __UNDEFINED__ HeUTF8(he) ((HeKLEN(he) == HEf_SVKEY) ? \
960 SvUTF8(HeKEY_sv(he)) : \
965 __UNDEFINED__ C_ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
966 __UNDEFINED__ C_ARRAY_END(a) ((a) + C_ARRAY_LENGTH(a))
968 __UNDEFINED__ LIKELY(x) (x)
969 __UNDEFINED__ UNLIKELY(x) (x)
972 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
973 # define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
975 # define MUTABLE_PTR(p) ((void *) (p))
979 __UNDEFINED__ MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))
983 typedef XSPROTO(XSPROTO_test_t);
984 typedef XSPROTO_test_t *XSPROTO_test_t_ptr;
986 XS(XS_Devel__PPPort_dXSTARG); /* prototype */
987 XS(XS_Devel__PPPort_dXSTARG)
995 iv = SvIV(ST(0)) + 1;
1000 XS(XS_Devel__PPPort_dAXMARK); /* prototype */
1001 XS(XS_Devel__PPPort_dAXMARK)
1008 PERL_UNUSED_VAR(cv);
1010 iv = SvIV(ST(0)) - 1;
1018 XSPROTO_test_t_ptr p = &XS_Devel__PPPort_dXSTARG;
1019 newXS("Devel::PPPort::dXSTARG", *p, file);
1021 newXS("Devel::PPPort::dAXMARK", XS_Devel__PPPort_dAXMARK, file);
1036 x = newOP(OP_PUSHMARK, 0);
1038 /* No siblings yet! */
1039 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1040 failures++; warn("Op should not have had a sib");
1044 /* Add 2 siblings */
1047 for (i = 0; i < 2; i++) {
1048 OP *newsib = newOP(OP_PUSHMARK, 0);
1049 OpMORESIB_set(kid, newsib);
1051 kid = OpSIBLING(kid);
1054 middlekid = OpSIBLING(x);
1056 /* Should now have a sibling */
1057 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1058 failures++; warn("Op should have had a sib after moresib_set");
1061 /* Count the siblings */
1062 for (kid = OpSIBLING(x); kid; kid = OpSIBLING(kid)) {
1067 failures++; warn("Kid had %d sibs, expected 2", count);
1070 if (OpHAS_SIBLING(lastkid) || OpSIBLING(lastkid)) {
1071 failures++; warn("Last kid should not have a sib");
1074 /* Really sets the parent, and says 'no more siblings' */
1075 OpLASTSIB_set(x, lastkid);
1077 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1078 failures++; warn("OpLASTSIB_set failed?");
1081 /* Restore the kid */
1082 OpMORESIB_set(x, lastkid);
1084 /* Try to remove it again */
1085 OpLASTSIB_set(x, NULL);
1087 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1088 failures++; warn("OpLASTSIB_set with NULL failed?");
1091 /* Try to restore with maybesib_set */
1092 OpMAYBESIB_set(x, lastkid, NULL);
1094 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1095 failures++; warn("Op should have had a sib after maybesibset");
1109 RETVAL = SvRXOK(sv);
1120 RETVAL += PTR2nat(p) != 0 ? 1 : 0;
1121 RETVAL += PTR2ul(p) != 0UL ? 2 : 0;
1122 RETVAL += PTR2UV(p) != (UV) 0 ? 4 : 0;
1123 RETVAL += PTR2IV(p) != (IV) 0 ? 8 : 0;
1124 RETVAL += PTR2NV(p) != (NV) 0 ? 16 : 0;
1125 RETVAL += p > NUM2PTR(int *, 0) ? 32 : 0;
1131 gv_stashpvn(name, create)
1135 RETVAL = gv_stashpvn(name, strlen(name), create) != NULL;
1140 get_sv(name, create)
1144 RETVAL = get_sv(name, create) != NULL;
1149 get_av(name, create)
1153 RETVAL = get_av(name, create) != NULL;
1158 get_hv(name, create)
1162 RETVAL = get_hv(name, create) != NULL;
1167 get_cv(name, create)
1171 RETVAL = get_cv(name, create) != NULL;
1179 mXPUSHp("test1", 5);
1181 mXPUSHp("test2", 5);
1191 RETVAL = newSVsv(boolSV(value));
1198 RETVAL = newSVsv(DEFSV);
1205 XPUSHs(sv_mortalcopy(DEFSV));
1208 DEFSV_set(newSVpvs("DEFSV"));
1209 XPUSHs(sv_mortalcopy(DEFSV));
1210 /* Yes, this leaks the above scalar; 5.005 with threads for some reason */
1211 /* frees it upon LEAVE, thus mortalizing it causes "attempt to free..." */
1212 /* sv_2mortal(DEFSV); */
1214 XPUSHs(sv_mortalcopy(DEFSV));
1220 RETVAL = SvTRUEx(ERRSV);
1229 RETVAL = newSVsv(UNDERBAR);
1252 #if { VERSION >= 5.004 }
1253 x = sv_2mortal(newSVpvf("[%" SVf "]", SVfARG(x)));
1259 Perl_ppaddr_t(string)
1262 Perl_ppaddr_t lower;
1264 lower = PL_ppaddr[OP_LC];
1265 mXPUSHs(newSVpv(string, 0));
1268 (void)*(lower)(aTHXR);
1273 #if { VERSION >= 5.8.0 }
1276 check_HeUTF8(utf8_key)
1286 key = SvPV(utf8_key, klen);
1287 if (SvUTF8(utf8_key)) klen *= -1;
1288 hv_store(hash, key, klen, newSVpvs("string"), 0);
1290 ent = hv_iternext(hash);
1292 mXPUSHp((HeUTF8(ent) == 0 ? "norm" : "utf8"), 4);
1301 int x[] = { 10, 11, 12, 13 };
1303 mXPUSHi(C_ARRAY_LENGTH(x)); /* 4 */
1304 mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */
1310 RETVAL = isBLANK(ord);
1318 RETVAL = isBLANK_A(ord);
1326 RETVAL = isBLANK_L1(ord);
1334 RETVAL = isUPPER(ord);
1342 RETVAL = isUPPER_A(ord);
1350 RETVAL = isUPPER_L1(ord);
1358 RETVAL = isLOWER(ord);
1366 RETVAL = isLOWER_A(ord);
1374 RETVAL = isLOWER_L1(ord);
1382 RETVAL = isALPHA(ord);
1390 RETVAL = isALPHA_A(ord);
1398 RETVAL = isALPHA_L1(ord);
1406 RETVAL = isWORDCHAR(ord);
1414 RETVAL = isWORDCHAR_A(ord);
1422 RETVAL = isWORDCHAR_L1(ord);
1430 RETVAL = isALPHANUMERIC(ord);
1435 isALPHANUMERIC_A(ord)
1438 RETVAL = isALPHANUMERIC_A(ord);
1446 RETVAL = isALNUM(ord);
1454 RETVAL = isALNUM_A(ord);
1462 RETVAL = isDIGIT(ord);
1470 RETVAL = isDIGIT_A(ord);
1478 RETVAL = isOCTAL(ord);
1486 RETVAL = isOCTAL_A(ord);
1494 RETVAL = isIDFIRST(ord);
1502 RETVAL = isIDFIRST_A(ord);
1510 RETVAL = isIDCONT(ord);
1518 RETVAL = isIDCONT_A(ord);
1526 RETVAL = isSPACE(ord);
1534 RETVAL = isSPACE_A(ord);
1542 RETVAL = isASCII(ord);
1550 RETVAL = isASCII_A(ord);
1558 RETVAL = isCNTRL(ord);
1566 RETVAL = isCNTRL_A(ord);
1574 RETVAL = isPRINT(ord);
1582 RETVAL = isPRINT_A(ord);
1590 RETVAL = isGRAPH(ord);
1598 RETVAL = isGRAPH_A(ord);
1606 RETVAL = isPUNCT(ord);
1614 RETVAL = isPUNCT_A(ord);
1622 RETVAL = isXDIGIT(ord);
1630 RETVAL = isXDIGIT_A(ord);
1638 RETVAL = isPSXSPC(ord);
1646 RETVAL = isPSXSPC_A(ord);
1651 isALPHANUMERIC_L1(ord)
1654 RETVAL = isALPHANUMERIC_L1(ord);
1662 RETVAL = isALNUMC_L1(ord);
1670 RETVAL = isDIGIT_L1(ord);
1678 RETVAL = isOCTAL_L1(ord);
1686 RETVAL = isIDFIRST_L1(ord);
1694 RETVAL = isIDCONT_L1(ord);
1702 RETVAL = isSPACE_L1(ord);
1710 RETVAL = isASCII_L1(ord);
1718 RETVAL = isCNTRL_L1(ord);
1726 RETVAL = isPRINT_L1(ord);
1734 RETVAL = isGRAPH_L1(ord);
1742 RETVAL = isPUNCT_L1(ord);
1750 RETVAL = isXDIGIT_L1(ord);
1758 RETVAL = isPSXSPC_L1(ord);
1766 RETVAL = isASCII_uvchr(ord);
1771 isASCII_utf8_safe(s, offset)
1775 RETVAL = isASCII_utf8_safe(s, s + 1 + offset);
1779 #if { VERSION >= 5.006 }
1785 RETVAL = isBLANK_uvchr(ord);
1793 RETVAL = isALPHA_uvchr(ord);
1798 isALPHANUMERIC_uvchr(ord)
1801 RETVAL = isALPHANUMERIC_uvchr(ord);
1809 RETVAL = isCNTRL_uvchr(ord);
1817 RETVAL = isDIGIT_uvchr(ord);
1822 isIDFIRST_uvchr(ord)
1825 RETVAL = isIDFIRST_uvchr(ord);
1833 RETVAL = isIDCONT_uvchr(ord);
1841 RETVAL = isGRAPH_uvchr(ord);
1849 RETVAL = isLOWER_uvchr(ord);
1857 RETVAL = isPRINT_uvchr(ord);
1865 RETVAL = isPSXSPC_uvchr(ord);
1873 RETVAL = isPUNCT_uvchr(ord);
1881 RETVAL = isSPACE_uvchr(ord);
1889 RETVAL = isUPPER_uvchr(ord);
1894 isWORDCHAR_uvchr(ord)
1897 RETVAL = isWORDCHAR_uvchr(ord);
1905 RETVAL = isXDIGIT_uvchr(ord);
1910 isALPHA_utf8_safe(s, offset)
1914 RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset);
1919 isALPHANUMERIC_utf8_safe(s, offset)
1923 RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset);
1928 isBLANK_utf8_safe(s, offset)
1932 RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset);
1937 isCNTRL_utf8_safe(s, offset)
1941 RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset);
1946 isDIGIT_utf8_safe(s, offset)
1950 RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
1955 isGRAPH_utf8_safe(s, offset)
1959 RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset);
1964 isIDCONT_utf8_safe(s, offset)
1968 RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset);
1973 isIDFIRST_utf8_safe(s, offset)
1977 RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset);
1982 isLOWER_utf8_safe(s, offset)
1986 RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset);
1991 isPRINT_utf8_safe(s, offset)
1995 RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2000 isPSXSPC_utf8_safe(s, offset)
2004 RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2009 isPUNCT_utf8_safe(s, offset)
2013 RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2018 isSPACE_utf8_safe(s, offset)
2022 RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset);
2027 isUPPER_utf8_safe(s, offset)
2031 RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2036 isWORDCHAR_utf8_safe(s, offset)
2040 RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset);
2045 isXDIGIT_utf8_safe(s, offset)
2049 RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2054 toLOWER_utf8_safe(s, offset)
2058 U8 u[UTF8_MAXBYTES+1];
2065 ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2066 av_push(av, newSVuv(ret));
2068 utf8 = newSVpvn((char *) u, len);
2072 av_push(av, newSVuv(len));
2078 toTITLE_utf8_safe(s, offset)
2082 U8 u[UTF8_MAXBYTES+1];
2089 ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2090 av_push(av, newSVuv(ret));
2092 utf8 = newSVpvn((char *) u, len);
2096 av_push(av, newSVuv(len));
2102 toUPPER_utf8_safe(s, offset)
2106 U8 u[UTF8_MAXBYTES+1];
2113 ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2114 av_push(av, newSVuv(ret));
2116 utf8 = newSVpvn((char *) u, len);
2120 av_push(av, newSVuv(len));
2126 toFOLD_utf8_safe(s, offset)
2130 U8 u[UTF8_MAXBYTES+1];
2137 ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2138 av_push(av, newSVuv(ret));
2140 utf8 = newSVpvn((char *) u, len);
2144 av_push(av, newSVuv(len));
2153 U8 u[UTF8_MAXBYTES+1];
2160 ret = toLOWER_uvchr(c, u, &len);
2161 av_push(av, newSVuv(ret));
2163 utf8 = newSVpvn((char *) u, len);
2167 av_push(av, newSVuv(len));
2176 U8 u[UTF8_MAXBYTES+1];
2183 ret = toTITLE_uvchr(c, u, &len);
2184 av_push(av, newSVuv(ret));
2186 utf8 = newSVpvn((char *) u, len);
2190 av_push(av, newSVuv(len));
2199 U8 u[UTF8_MAXBYTES+1];
2206 ret = toUPPER_uvchr(c, u, &len);
2207 av_push(av, newSVuv(ret));
2209 utf8 = newSVpvn((char *) u, len);
2213 av_push(av, newSVuv(len));
2222 U8 u[UTF8_MAXBYTES+1];
2229 ret = toFOLD_uvchr(c, u, &len);
2230 av_push(av, newSVuv(ret));
2232 utf8 = newSVpvn((char *) u, len);
2236 av_push(av, newSVuv(len));
2244 LATIN1_TO_NATIVE(cp)
2247 if (cp > 255) RETVAL= cp;
2248 else RETVAL= LATIN1_TO_NATIVE(cp);
2253 NATIVE_TO_LATIN1(cp)
2256 RETVAL= NATIVE_TO_LATIN1(cp);
2264 RETVAL = av_tindex((AV*)SvRV(av));
2272 RETVAL = av_top_index((AV*)SvRV(av));
2276 =tests plan => 22270
2278 use vars qw($my_sv @my_av %my_hv);
2280 ok(&Devel::PPPort::boolSV(1));
2281 ok(!&Devel::PPPort::boolSV(0));
2284 ok(&Devel::PPPort::DEFSV(), "Fred");
2285 ok(&Devel::PPPort::UNDERBAR(), "Fred");
2287 if ("$]" >= 5.009002 && "$]" < 5.023 && "$]" < 5.023004) {
2289 no warnings "deprecated";
2290 no if $^V > v5.17.9, warnings => "experimental::lexical_topic";
2292 ok(&Devel::PPPort::DEFSV(), "Fred");
2293 ok(&Devel::PPPort::UNDERBAR(), "Tony");
2301 my @r = &Devel::PPPort::DEFSV_modify();
2308 ok(&Devel::PPPort::DEFSV(), "Fred");
2311 ok(!&Devel::PPPort::ERRSV());
2312 eval { cannot_call_this_one() };
2313 ok(&Devel::PPPort::ERRSV());
2315 ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0));
2316 ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0));
2317 ok(&Devel::PPPort::gv_stashpvn('does::not::exist', 1));
2320 ok(&Devel::PPPort::get_sv('my_sv', 0));
2321 ok(!&Devel::PPPort::get_sv('not_my_sv', 0));
2322 ok(&Devel::PPPort::get_sv('not_my_sv', 1));
2325 ok(&Devel::PPPort::get_av('my_av', 0));
2326 ok(!&Devel::PPPort::get_av('not_my_av', 0));
2327 ok(&Devel::PPPort::get_av('not_my_av', 1));
2330 ok(&Devel::PPPort::get_hv('my_hv', 0));
2331 ok(!&Devel::PPPort::get_hv('not_my_hv', 0));
2332 ok(&Devel::PPPort::get_hv('not_my_hv', 1));
2335 ok(&Devel::PPPort::get_cv('my_cv', 0));
2336 ok(!&Devel::PPPort::get_cv('not_my_cv', 0));
2337 ok(&Devel::PPPort::get_cv('not_my_cv', 1));
2339 ok(Devel::PPPort::dXSTARG(42), 43);
2340 ok(Devel::PPPort::dAXMARK(4711), 4710);
2342 ok(Devel::PPPort::prepush(), 42);
2344 ok(join(':', Devel::PPPort::xsreturn(0)), 'test1');
2345 ok(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2');
2347 ok(Devel::PPPort::PERL_ABS(42), 42);
2348 ok(Devel::PPPort::PERL_ABS(-13), 13);
2350 ok(Devel::PPPort::SVf(42), "$]" >= 5.004 ? '[42]' : '42');
2351 ok(Devel::PPPort::SVf('abc'), "$]" >= 5.004 ? '[abc]' : 'abc');
2353 ok(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo");
2355 ok(&Devel::PPPort::ptrtests(), 63);
2357 ok(&Devel::PPPort::OpSIBLING_tests(), 0);
2359 if ("$]" >= 5.009000) {
2361 ok(&Devel::PPPort::check_HeUTF8("hello"), "norm");
2362 ok(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8");
2369 @r = &Devel::PPPort::check_c_array();
2373 ok(!Devel::PPPort::SvRXOK(""));
2374 ok(!Devel::PPPort::SvRXOK(bless [], "Regexp"));
2377 skip 'no qr// objects in this perl', 0;
2378 skip 'no qr// objects in this perl', 0;
2380 my $qr = eval 'qr/./';
2381 ok(Devel::PPPort::SvRXOK($qr));
2382 ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise"));
2385 ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6);
2386 ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1);
2387 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41);
2388 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30);
2390 ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6);
2391 if (ord("A") == 65) {
2392 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41);
2393 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30);
2396 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1);
2397 ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0);
2400 ok( Devel::PPPort::isALNUMC_L1(ord("5")));
2401 ok( Devel::PPPort::isALNUMC_L1(0xFC));
2402 ok(! Devel::PPPort::isALNUMC_L1(0xB6));
2404 ok( Devel::PPPort::isOCTAL(ord("7")));
2405 ok(! Devel::PPPort::isOCTAL(ord("8")));
2407 ok( Devel::PPPort::isOCTAL_A(ord("0")));
2408 ok(! Devel::PPPort::isOCTAL_A(ord("9")));
2410 ok( Devel::PPPort::isOCTAL_L1(ord("2")));
2411 ok(! Devel::PPPort::isOCTAL_L1(ord("8")));
2413 # For the other properties, we test every code point from 0.255, and a
2414 # smattering of higher ones. First populate a hash with keys like '65:ALPHA'
2415 # to indicate that the code point there is alphabetic
2418 for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6,
2421 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2422 $types{"$native:ALPHA"} = 1;
2423 $types{"$native:ALPHANUMERIC"} = 1;
2424 $types{"$native:IDFIRST"} = 1;
2425 $types{"$native:IDCONT"} = 1;
2426 $types{"$native:PRINT"} = 1;
2427 $types{"$native:WORDCHAR"} = 1;
2429 for $i (0x30..0x39, 0x660, 0xFF19) {
2430 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2431 $types{"$native:ALPHANUMERIC"} = 1;
2432 $types{"$native:DIGIT"} = 1;
2433 $types{"$native:IDCONT"} = 1;
2434 $types{"$native:WORDCHAR"} = 1;
2435 $types{"$native:GRAPH"} = 1;
2436 $types{"$native:PRINT"} = 1;
2437 $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19);
2441 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2442 $types{"$native:ASCII"} = 1;
2444 for $i (0..0x1f, 0x7F..0x9F) {
2445 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2446 $types{"$native:CNTRL"} = 1;
2448 for $i (0x21..0x7E, 0xA1..0x101, 0x660) {
2449 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2450 $types{"$native:GRAPH"} = 1;
2451 $types{"$native:PRINT"} = 1;
2453 for $i (0x09, 0x20, 0xA0) {
2454 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2455 $types{"$native:BLANK"} = 1;
2456 $types{"$native:SPACE"} = 1;
2457 $types{"$native:PSXSPC"} = 1;
2458 $types{"$native:PRINT"} = 1 if $i > 0x09;
2460 for $i (0x09..0x0D, 0x85, 0x2029) {
2461 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2462 $types{"$native:SPACE"} = 1;
2463 $types{"$native:PSXSPC"} = 1;
2465 for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) {
2466 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2467 $types{"$native:UPPER"} = 1;
2468 $types{"$native:XDIGIT"} = 1 if $i < 0x47;
2470 for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) {
2471 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2472 $types{"$native:LOWER"} = 1;
2473 $types{"$native:XDIGIT"} = 1 if $i < 0x67;
2475 for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB,
2476 0xB7, 0xBB, 0xBF, 0x5BE)
2478 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2479 $types{"$native:PUNCT"} = 1;
2480 $types{"$native:GRAPH"} = 1;
2481 $types{"$native:PRINT"} = 1;
2485 $types{"$i:WORDCHAR"} = 1;
2486 $types{"$i:IDFIRST"} = 1;
2487 $types{"$i:IDCONT"} = 1;
2489 # Now find all the unique code points included above.
2490 my %code_points_to_test;
2492 for $key (keys %types) {
2494 $code_points_to_test{$key} = 1;
2498 for $i (sort { $a <=> $b } keys %code_points_to_test) {
2499 my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2500 my $hex = sprintf("0x%02X", $native);
2502 # And for each code point test each of the classes
2504 for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT
2505 IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR
2508 if ($i < 256) { # For the ones that can fit in a byte, test each of
2511 for $suffix ("", "_A", "_L1", "_uvchr") {
2512 my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/)
2513 ? 0 # Fail on non-ASCII unless unicode
2514 : ($types{"$native:$class"} || 0);
2515 if ("$]" < 5.006 && $suffix eq '_uvchr') {
2516 skip("No UTF-8 on this perl", 0);
2520 my $eval_string = "Devel::PPPort::is${class}$suffix($hex)";
2521 my $is = eval $eval_string || 0;
2522 die "eval 'For $i: $eval_string' gave $@" if $@;
2523 ok($is, $should_be, "'$eval_string'");
2527 # For all code points, test the '_utf8' macros
2529 skip("No UTF-8 on this perl", 0);
2531 skip("No UTF-8 on this perl", 0);
2537 my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe";
2538 my $utf8 = quotemeta Devel::PPPort::uvoffuni_to_utf8($i);
2539 if ("$]" < 5.007 && $native > 255) {
2540 skip("Perls earlier than 5.7 give wrong answers for above Latin1 code points", 0);
2542 elsif ("$]" <= 5.011003 && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) {
2543 skip("Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH", 0);
2546 my $should_be = $types{"$native:$class"} || 0;
2547 my $eval_string = "$fcn(\"$utf8\", 0)";
2548 my $is = eval $eval_string || 0;
2549 die "eval 'For $i, $eval_string' gave $@" if $@;
2550 ok($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string));
2553 # And for the high code points, test that a too short malformation (the
2554 # -1) causes it to fail
2556 if ("$]" >= 5.025009) {
2557 skip("Prints an annoying error message that khw doesn't know how to easily suppress", 0);
2560 my $eval_string = "$fcn(\"$utf8\", -1)";
2561 my $is = eval "no warnings; $eval_string" || 0;
2562 die "eval '$eval_string' gave $@" if $@;
2563 ok($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string));
2571 my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ],
2575 'FOLD' => [ [ ord('C'), ord('c') ],
2580 'UPPER' => [ [ ord('a'),ord('A'), ],
2585 'TITLE' => [ [ ord('c'),ord('C'), ],
2592 my $way_too_early_msg = 'UTF-8 not implemented on this perl';
2595 for $name (keys %case_changing) {
2596 my @code_points_to_test = @{$case_changing{$name}};
2598 for $unchanged (@code_points_to_test) {
2599 my @pair = @$unchanged;
2600 my $original = $pair[0];
2601 my $changed = $pair[1];
2602 my $utf8_changed = $changed;
2603 my $is_cp = $utf8_changed =~ /^\d+$/;
2604 my $should_be_bytes;
2605 if ("$]" >= 5.006000) {
2607 $utf8_changed = Devel::PPPort::uvoffuni_to_utf8($changed);
2608 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2611 die("Test currently doesn't work for non-ASCII multi-char case changes") if $utf8_changed =~ /[[:^ascii:]]/;
2612 $should_be_bytes = length $utf8_changed;
2616 my $fcn = "to${name}_uvchr";
2619 if ("$]" < 5.006000) {
2620 $skip = $way_too_early_msg;
2623 $skip = "Can't do uvchr on a multi-char string";
2632 $utf8_changed = Devel::PPPort::uvoffuni_to_utf8($changed);
2633 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2636 die("Test currently doesn't work for non-ASCII multi-char case changes") if $utf8_changed =~ /[[:^ascii:]]/;
2637 $should_be_bytes = length $utf8_changed;
2640 my $ret = eval "Devel::PPPort::$fcn($original)";
2641 my $fail = $@; # Have to save $@, as it gets destroyed
2642 ok ($fail, "", "$fcn($original) didn't fail");
2643 my $first = ("$]" != 5.006000)
2644 ? substr($utf8_changed, 0, 1)
2645 : $utf8_changed, 0, 1;
2646 ok($ret->[0], ord $first,
2647 "ord of $fcn($original) is $changed");
2648 ok($ret->[1], $utf8_changed,
2649 "UTF-8 of of $fcn($original) is correct");
2650 ok($ret->[2], $should_be_bytes,
2651 "Length of $fcn($original) is $should_be_bytes");
2655 for $truncate (0..2) {
2657 if ("$]" < 5.006000) {
2658 $skip = 'UTF-8 not implemented on this perl';
2660 elsif (! $is_cp && "$]" < 5.007003) {
2661 $skip = "Multi-character case change not implemented until 5.7.3";
2663 elsif ($truncate == 2 && "$]" > 5.025008) {
2664 $skip = "Zero length inputs cause assertion failure; test dies in modern perls";
2666 elsif ($truncate > 0 && length $changed > 1) {
2667 $skip = "Don't test shortened multi-char case changes";
2669 elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) {
2670 $skip = "Don't try to test shortened single bytes";
2678 my $fcn = "to${name}_utf8_safe";
2679 my $utf8 = quotemeta Devel::PPPort::uvoffuni_to_utf8($original);
2680 my $real_truncate = ($truncate < 2)
2681 ? $truncate : $should_be_bytes;
2682 my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)";
2683 my $ret = eval "no warnings; $eval_string" || 0;
2684 my $fail = $@; # Have to save $@, as it gets destroyed
2685 if ($truncate == 0) {
2686 ok ($fail, "", "Didn't fail on full length input");
2687 my $first = ("$]" != 5.006000)
2688 ? substr($utf8_changed, 0, 1)
2689 : $utf8_changed, 0, 1;
2690 ok($ret->[0], ord $first,
2691 "ord of $fcn($original) is $changed");
2692 ok($ret->[1], $utf8_changed,
2693 "UTF-8 of of $fcn($original) is correct");
2694 ok($ret->[2], $should_be_bytes,
2695 "Length of $fcn($original) is $should_be_bytes");
2698 ok ($fail, eval 'qr/Malformed UTF-8 character/',
2699 "Gave appropriate error for short char: $original");
2701 skip("Expected failure means remaining tests for"
2702 . " this aren't relevant", 0);
2710 ok(&Devel::PPPort::av_top_index([1,2,3]), 2);
2711 ok(&Devel::PPPort::av_tindex([1,2,3,4]), 3);