This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Devel-PPPort to match 3.67
[perl5.git] / dist / Devel-PPPort / parts / inc / misc
1 ################################################################################
2 ##
3 ##  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
4 ##  Version 2.x, Copyright (C) 2001, Paul Marquess.
5 ##  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
6 ##
7 ##  This program is free software; you can redistribute it and/or
8 ##  modify it under the same terms as Perl itself.
9 ##
10 ################################################################################
11
12 =provides
13
14 __UNDEFINED__
15 END_EXTERN_C
16 EXTERN_C
17 INT2PTR
18 MUTABLE_PTR
19 NVTYPE
20 PERLIO_FUNCS_CAST
21 PERLIO_FUNCS_DECL
22 PERL_STATIC_INLINE
23 PERL_UNUSED_ARG
24 PERL_UNUSED_CONTEXT
25 PERL_UNUSED_DECL
26 PERL_UNUSED_RESULT
27 PERL_UNUSED_VAR
28 PERL_USE_GCC_BRACE_GROUPS
29 PTR2ul
30 PTRV
31 START_EXTERN_C
32 STMT_END
33 STMT_START
34 SvRX
35 WIDEST_UTYPE
36 XSRETURN
37 NOT_REACHED
38 ASSUME
39
40 =implementation
41
42 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
43 __UNDEFINED__ PERL_STATIC_INLINE static inline
44 #else
45 __UNDEFINED__ PERL_STATIC_INLINE static
46 #endif
47
48 __UNDEFINED__ cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
49 __UNDEFINED__ OpHAS_SIBLING(o)      (cBOOL((o)->op_sibling))
50 __UNDEFINED__ OpSIBLING(o)          (0 + (o)->op_sibling)
51 __UNDEFINED__ OpMORESIB_set(o, sib) ((o)->op_sibling = (sib))
52 __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL)
53 __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib))
54 __UNDEFINED__ HEf_SVKEY   -2
55
56 #if defined(DEBUGGING) && !defined(__COVERITY__)
57 __UNDEFINED__ __ASSERT_(statement)  assert(statement),
58 #else
59 __UNDEFINED__ __ASSERT_(statement)
60 #endif
61
62 __UNDEF_NOT_PROVIDED__  __has_builtin(x) 0
63
64 #if __has_builtin(__builtin_unreachable)
65 #  define D_PPP_HAS_BUILTIN_UNREACHABLE
66 #elif (defined(__GNUC__) && (   __GNUC__ > 4                              \
67                              || __GNUC__ == 4 && __GNUC_MINOR__ >= 5))
68 #  define D_PPP_HAS_BUILTIN_UNREACHABLE
69 #endif
70
71 #ifndef ASSUME
72 #  ifdef DEBUGGING
73 #    define ASSUME(x) assert(x)
74 #  elif defined(_MSC_VER)
75 #    define ASSUME(x) __assume(x)
76 #  elif defined(__ARMCC_VERSION)
77 #    define ASSUME(x) __promise(x)
78 #  elif defined(D_PPP_HAS_BUILTIN_UNREACHABLE)
79 #    define ASSUME(x) ((x) ? (void) 0 : __builtin_unreachable())
80 #  else
81 #    define ASSUME(x) assert(x)
82 #  endif
83 #endif
84
85 #ifndef NOT_REACHED
86 #  ifdef D_PPP_HAS_BUILTIN_UNREACHABLE
87 #    define NOT_REACHED                                                     \
88         STMT_START {                                                        \
89             ASSUME(!"UNREACHABLE"); __builtin_unreachable();                \
90         } STMT_END
91 #  elif ! defined(__GNUC__) && (defined(__sun) || defined(__hpux))
92 #    define NOT_REACHED
93 #  else
94 #    define NOT_REACHED  ASSUME(!"UNREACHABLE")
95 #  endif
96 #endif
97
98 #ifndef WIDEST_UTYPE
99 # ifdef QUADKIND
100 #  ifdef U64TYPE
101 #   define WIDEST_UTYPE U64TYPE
102 #  else
103 #   define WIDEST_UTYPE unsigned Quad_t
104 #  endif
105 # else
106 #  define WIDEST_UTYPE U32
107 # endif
108 #endif
109
110 /* These could become provided if/when they become part of the public API */
111 __UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n)                                    \
112    (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
113 __UNDEF_NOT_PROVIDED__ inRANGE(c, l, u)                                        \
114    (  (sizeof(c) == sizeof(U8))  ? withinCOUNT(((U8)  (c)), (l), ((u) - (l)))  \
115     : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l)))  \
116     : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l)))))
117
118 /* The '| 0' part ensures a compiler error if c is not integer (like e.g., a
119  * pointer) */
120 #undef FITS_IN_8_BITS   /* handy.h version uses a core-only constant */
121 __UNDEF_NOT_PROVIDED__ FITS_IN_8_BITS(c) (   (sizeof(c) == 1)               \
122                                     || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF))
123
124 /* Create the macro for "is'macro'_utf8_safe(s, e)".  For code points below
125  * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
126  * point.  That is so that it can automatically get the bug fixes done in this
127  * file. */
128 #define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro)                             \
129    (((e) - (s)) <= 0                                                        \
130      ? 0                                                                    \
131      : UTF8_IS_INVARIANT((s)[0])                                            \
132        ? is ## macro ## _L1((s)[0])                                         \
133        : (((e) - (s)) < UTF8SKIP(s))                                        \
134           ? 0                                                               \
135           : UTF8_IS_DOWNGRADEABLE_START((s)[0])                             \
136               /* The cast in the line below is only to silence warnings */  \
137             ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE(           \
138                                   UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
139                                                      & UTF_START_MASK(2),   \
140                                                   (s)[1])))                 \
141             : is ## macro ## _utf8(s))
142
143 /* Create the macro for "is'macro'_LC_utf8_safe(s, e)".  For code points below
144  * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
145  * point.  That is so that it can automatically get the bug fixes done in this
146  * file. */
147 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, macro)                          \
148    (((e) - (s)) <= 0                                                        \
149      ? 0                                                                    \
150      : UTF8_IS_INVARIANT((s)[0])                                            \
151        ? is ## macro ## _LC((s)[0])                                         \
152        : (((e) - (s)) < UTF8SKIP(s))                                        \
153           ? 0                                                               \
154           : UTF8_IS_DOWNGRADEABLE_START((s)[0])                             \
155               /* The cast in the line below is only to silence warnings */  \
156             ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE(           \
157                                   UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
158                                                      & UTF_START_MASK(2),   \
159                                                   (s)[1])))                 \
160             : is ## macro ## _utf8(s))
161
162 /* A few of the early functions are broken.  For these and the non-LC case,
163  * machine generated code is substituted.  But that code doesn't work for
164  * locales.  This is just like the above macro, but at the end, we call the
165  * macro we've generated for the above 255 case, which is correct since locale
166  * isn't involved.  This will generate extra code to handle the 0-255 inputs,
167  * but hopefully it will be optimized out by the C compiler.  But just in case
168  * it isn't, this macro is only used on the few versions that are broken */
169
170 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, macro)                   \
171    (((e) - (s)) <= 0                                                        \
172      ? 0                                                                    \
173      : UTF8_IS_INVARIANT((s)[0])                                            \
174        ? is ## macro ## _LC((s)[0])                                         \
175        : (((e) - (s)) < UTF8SKIP(s))                                        \
176           ? 0                                                               \
177           : UTF8_IS_DOWNGRADEABLE_START((s)[0])                             \
178               /* The cast in the line below is only to silence warnings */  \
179             ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE(           \
180                                   UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
181                                                      & UTF_START_MASK(2),   \
182                                                   (s)[1])))                 \
183             : is ## macro ## _utf8_safe(s, e))
184
185 __UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL)
186 __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv))
187
188 #ifndef PERL_UNUSED_DECL
189 #  ifdef HASATTRIBUTE
190 #    if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
191 #      define PERL_UNUSED_DECL
192 #    else
193 #      define PERL_UNUSED_DECL __attribute__((unused))
194 #    endif
195 #  else
196 #    define PERL_UNUSED_DECL
197 #  endif
198 #endif
199
200 #ifndef PERL_UNUSED_ARG
201 #  if defined(lint) && defined(S_SPLINT_S) /* www.splint.org */
202 #    include <note.h>
203 #    define PERL_UNUSED_ARG(x) NOTE(ARGUNUSED(x))
204 #  else
205 #    define PERL_UNUSED_ARG(x) ((void)x)
206 #  endif
207 #endif
208
209 #ifndef PERL_UNUSED_VAR
210 #  define PERL_UNUSED_VAR(x) ((void)x)
211 #endif
212
213 #ifndef PERL_UNUSED_CONTEXT
214 #  ifdef USE_ITHREADS
215 #    define PERL_UNUSED_CONTEXT PERL_UNUSED_ARG(my_perl)
216 #  else
217 #    define PERL_UNUSED_CONTEXT
218 #  endif
219 #endif
220
221 #ifndef PERL_UNUSED_RESULT
222 #  if defined(__GNUC__) && defined(HASATTRIBUTE_WARN_UNUSED_RESULT)
223 #    define PERL_UNUSED_RESULT(v) STMT_START { __typeof__(v) z = (v); (void)sizeof(z); } STMT_END
224 #  else
225 #    define PERL_UNUSED_RESULT(v) ((void)(v))
226 #  endif
227 #endif
228
229 __UNDEFINED__  NOOP          /*EMPTY*/(void)0
230 __UNDEFINED__  dNOOP         extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL
231
232 #ifndef NVTYPE
233 #  if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
234 #    define NVTYPE long double
235 #  else
236 #    define NVTYPE double
237 #  endif
238 typedef NVTYPE NV;
239 #endif
240
241 #ifndef INT2PTR
242 #  if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE)
243 #    define PTRV                  UV
244 #    define INT2PTR(any,d)        (any)(d)
245 #  else
246 #    if PTRSIZE == LONGSIZE
247 #      define PTRV                unsigned long
248 #    else
249 #      define PTRV                unsigned
250 #    endif
251 #    define INT2PTR(any,d)        (any)(PTRV)(d)
252 #  endif
253 #endif
254
255 #ifndef PTR2ul
256 #  if PTRSIZE == LONGSIZE
257 #    define PTR2ul(p)     (unsigned long)(p)
258 #  else
259 #    define PTR2ul(p)     INT2PTR(unsigned long,p)
260 #  endif
261 #endif
262
263 __UNDEFINED__  PTR2nat(p)      (PTRV)(p)
264 __UNDEFINED__  NUM2PTR(any,d)  (any)PTR2nat(d)
265 __UNDEFINED__  PTR2IV(p)       INT2PTR(IV,p)
266 __UNDEFINED__  PTR2UV(p)       INT2PTR(UV,p)
267 __UNDEFINED__  PTR2NV(p)       NUM2PTR(NV,p)
268
269 #undef START_EXTERN_C
270 #undef END_EXTERN_C
271 #undef EXTERN_C
272 #ifdef __cplusplus
273 #  define START_EXTERN_C extern "C" {
274 #  define END_EXTERN_C }
275 #  define EXTERN_C extern "C"
276 #else
277 #  define START_EXTERN_C
278 #  define END_EXTERN_C
279 #  define EXTERN_C extern
280 #endif
281
282 #if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC)
283 #  ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN
284 __UNDEF_NOT_PROVIDED__  PERL_GCC_BRACE_GROUPS_FORBIDDEN
285 #  endif
286 #endif
287
288 #if  ! defined(__GNUC__) || defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) || defined(__cplusplus)
289 #    undef PERL_USE_GCC_BRACE_GROUPS
290 #else
291 #  ifndef PERL_USE_GCC_BRACE_GROUPS
292 #    define PERL_USE_GCC_BRACE_GROUPS
293 #  endif
294 #endif
295
296 #undef STMT_START
297 #undef STMT_END
298 #if defined(VOIDFLAGS) && defined(PERL_USE_GCC_BRACE_GROUPS)
299 #  define STMT_START    (void)( /* gcc supports ``({ STATEMENTS; })'' */
300 #  define STMT_END      )
301 #else
302 #  if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__)
303 #    define STMT_START  if (1)
304 #    define STMT_END    else (void)0
305 #  else
306 #    define STMT_START  do
307 #    define STMT_END    while (0)
308 #  endif
309 #endif
310
311 __UNDEFINED__  boolSV(b)    ((b) ? &PL_sv_yes : &PL_sv_no)
312
313 /* DEFSV appears first in 5.004_56 */
314 __UNDEFINED__  DEFSV        GvSV(PL_defgv)
315 __UNDEFINED__  SAVE_DEFSV   SAVESPTR(GvSV(PL_defgv))
316 __UNDEFINED__  DEFSV_set(sv) (DEFSV = (sv))
317
318 /* Older perls (<=5.003) lack AvFILLp */
319 __UNDEFINED__  AvFILLp      AvFILL
320
321 __UNDEFINED__  av_tindex    AvFILL
322 __UNDEFINED__  av_top_index AvFILL
323 __UNDEFINED__  av_count(av) (AvFILL(av)+1)
324
325 __UNDEFINED__  ERRSV        get_sv("@",FALSE)
326
327 /* Hint: gv_stashpvn
328  * This function's backport doesn't support the length parameter, but
329  * rather ignores it. Portability can only be ensured if the length
330  * parameter is used for speed reasons, but the length can always be
331  * correctly computed from the string argument.
332  */
333
334 __UNDEFINED__  gv_stashpvn(str,len,create)  gv_stashpv(str,create)
335
336 /* Replace: 1 */
337 __UNDEFINED__  get_cv          perl_get_cv
338 __UNDEFINED__  get_sv          perl_get_sv
339 __UNDEFINED__  get_av          perl_get_av
340 __UNDEFINED__  get_hv          perl_get_hv
341 /* Replace: 0 */
342
343 __UNDEFINED__  dUNDERBAR       dNOOP
344 __UNDEFINED__  UNDERBAR        DEFSV
345
346 __UNDEFINED__  dAX             I32 ax = MARK - PL_stack_base + 1
347 __UNDEFINED__  dITEMS          I32 items = SP - MARK
348
349 __UNDEFINED__  dXSTARG         SV * targ = sv_newmortal()
350
351 __UNDEFINED__  dAXMARK         I32 ax = POPMARK; \
352                                SV ** const mark = PL_stack_base + ax++
353
354
355 __UNDEFINED__  XSprePUSH       (sp = PL_stack_base + ax - 1)
356
357 #if { VERSION < 5.005 }
358 #  undef XSRETURN
359 #  define XSRETURN(off)                                   \
360       STMT_START {                                        \
361           PL_stack_sp = PL_stack_base + ax + ((off) - 1); \
362           return;                                         \
363       } STMT_END
364 #endif
365
366 __UNDEFINED__  XSPROTO(name)   void name(pTHX_ CV* cv)
367 __UNDEFINED__  SVfARG(p)       ((void*)(p))
368
369 __UNDEFINED__  PERL_ABS(x)     ((x) < 0 ? -(x) : (x))
370
371 __UNDEFINED__  dVAR            dNOOP
372
373 __UNDEFINED__  SVf             "_"
374
375 __UNDEFINED__  CPERLscope(x)   x
376
377 __UNDEFINED__  PERL_HASH(hash,str,len) \
378      STMT_START { \
379         const char *s_PeRlHaSh = str; \
380         I32 i_PeRlHaSh = len; \
381         U32 hash_PeRlHaSh = 0; \
382         while (i_PeRlHaSh--) \
383             hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \
384         (hash) = hash_PeRlHaSh; \
385     } STMT_END
386
387 #ifndef PERLIO_FUNCS_DECL
388 # ifdef PERLIO_FUNCS_CONST
389 #  define PERLIO_FUNCS_DECL(funcs) const PerlIO_funcs funcs
390 #  define PERLIO_FUNCS_CAST(funcs) (PerlIO_funcs*)(funcs)
391 # else
392 #  define PERLIO_FUNCS_DECL(funcs) PerlIO_funcs funcs
393 #  define PERLIO_FUNCS_CAST(funcs) (funcs)
394 # endif
395 #endif
396
397 /* provide these typedefs for older perls */
398 #if { VERSION < 5.9.3 }
399
400 # ifdef ARGSproto
401 typedef OP* (CPERLscope(*Perl_ppaddr_t))(ARGSproto);
402 # else
403 typedef OP* (CPERLscope(*Perl_ppaddr_t))(pTHX);
404 # endif
405
406 typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*);
407
408 #endif
409
410 /* On versions without NATIVE_TO_ASCII, only ASCII is supported */
411 #if defined(EBCDIC) && defined(NATIVE_TO_ASCI)
412 __UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c)
413 __UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c)
414 __UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c))
415 __UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c))
416 #else
417 __UNDEFINED__ NATIVE_TO_LATIN1(c) (c)
418 __UNDEFINED__ LATIN1_TO_NATIVE(c) (c)
419 __UNDEFINED__ NATIVE_TO_UNI(c) (c)
420 __UNDEFINED__ UNI_TO_NATIVE(c) (c)
421 #endif
422
423 /* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE
424    EBCDIC is not supported on versions earlier than 5.7.1
425  */
426
427 /* The meaning of this changed; use the modern version */
428 #undef isPSXSPC
429 #undef isPSXSPC_A
430 #undef isPSXSPC_L1
431
432 /* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe
433     This is equivalent to the corresponding isSPACE-type macro.  On perls
434     before 5.18, this matched a vertical tab and SPACE didn't.  But the
435     ppport.h SPACE version does match VT in all perl releases.  Since VT's are
436     extremely rarely found in real-life files, this difference effectively
437     doesn't matter */
438
439 /* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe
440     Until Perl 5.18, this did not match the vertical tab (VT).  The ppport.h
441     version does match it in all perl releases. Since VT's are extremely rarely
442     found in real-life files, this difference effectively doesn't matter */
443
444 #ifdef EBCDIC
445
446 /* This is the first version where these macros are fully correct on EBCDIC
447  * platforms.  Relying on the C library functions, as earlier releases did,
448  * causes problems with locales */
449 # if { VERSION < 5.22.0 }
450 #  undef isALNUM
451 #  undef isALNUM_A
452 #  undef isALNUM_L1
453 #  undef isALNUMC
454 #  undef isALNUMC_A
455 #  undef isALNUMC_L1
456 #  undef isALPHA
457 #  undef isALPHA_A
458 #  undef isALPHA_L1
459 #  undef isALPHANUMERIC
460 #  undef isALPHANUMERIC_A
461 #  undef isALPHANUMERIC_L1
462 #  undef isASCII
463 #  undef isASCII_A
464 #  undef isASCII_L1
465 #  undef isBLANK
466 #  undef isBLANK_A
467 #  undef isBLANK_L1
468 #  undef isCNTRL
469 #  undef isCNTRL_A
470 #  undef isCNTRL_L1
471 #  undef isDIGIT
472 #  undef isDIGIT_A
473 #  undef isDIGIT_L1
474 #  undef isGRAPH
475 #  undef isGRAPH_A
476 #  undef isGRAPH_L1
477 #  undef isIDCONT
478 #  undef isIDCONT_A
479 #  undef isIDCONT_L1
480 #  undef isIDFIRST
481 #  undef isIDFIRST_A
482 #  undef isIDFIRST_L1
483 #  undef isLOWER
484 #  undef isLOWER_A
485 #  undef isLOWER_L1
486 #  undef isOCTAL
487 #  undef isOCTAL_A
488 #  undef isOCTAL_L1
489 #  undef isPRINT
490 #  undef isPRINT_A
491 #  undef isPRINT_L1
492 #  undef isPUNCT
493 #  undef isPUNCT_A
494 #  undef isPUNCT_L1
495 #  undef isSPACE
496 #  undef isSPACE_A
497 #  undef isSPACE_L1
498 #  undef isUPPER
499 #  undef isUPPER_A
500 #  undef isUPPER_L1
501 #  undef isWORDCHAR
502 #  undef isWORDCHAR_A
503 #  undef isWORDCHAR_L1
504 #  undef isXDIGIT
505 #  undef isXDIGIT_A
506 #  undef isXDIGIT_L1
507 # endif
508
509 __UNDEFINED__ isASCII(c)    (isCNTRL(c) || isPRINT(c))
510
511         /* The below is accurate for all EBCDIC code pages supported by
512          * all the versions of Perl overridden by this */
513 __UNDEFINED__ isCNTRL(c)    (    (c) == '\0' || (c) == '\a' || (c) == '\b'      \
514                              ||  (c) == '\f' || (c) == '\n' || (c) == '\r'      \
515                              ||  (c) == '\t' || (c) == '\v'                     \
516                              || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */      \
517                              ||  (c) == 7    /* U+7F DEL */                     \
518                              || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */       \
519                                                       /* DLE, DC[1-3] */        \
520                              ||  (c) == 0x18 /* U+18 CAN */                     \
521                              ||  (c) == 0x19 /* U+19 EOM */                     \
522                              || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */      \
523                              ||  (c) == 0x26 /* U+17 ETB */                     \
524                              ||  (c) == 0x27 /* U+1B ESC */                     \
525                              ||  (c) == 0x2D /* U+05 ENQ */                     \
526                              ||  (c) == 0x2E /* U+06 ACK */                     \
527                              ||  (c) == 0x32 /* U+16 SYN */                     \
528                              ||  (c) == 0x37 /* U+04 EOT */                     \
529                              ||  (c) == 0x3C /* U+14 DC4 */                     \
530                              ||  (c) == 0x3D /* U+15 NAK */                     \
531                              ||  (c) == 0x3F /* U+1A SUB */                     \
532                             )
533
534 #if '^' == 106    /* EBCDIC POSIX-BC */
535 #  define D_PPP_OUTLIER_CONTROL 0x5F
536 #else   /* EBCDIC 1047 037 */
537 #  define D_PPP_OUTLIER_CONTROL 0xFF
538 #endif
539
540 /* The controls are everything below blank, plus one outlier */
541 __UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' '                           \
542                           || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL)
543 /* The ordering of the tests in this and isUPPER are to exclude most characters
544  * early */
545 __UNDEFINED__ isLOWER(c)    (        (c) >= 'a' && (c) <= 'z'                   \
546                              &&  (   (c) <= 'i'                                 \
547                                  || ((c) >= 'j' && (c) <= 'r')                  \
548                                  ||  (c) >= 's'))
549 __UNDEFINED__ isUPPER(c)    (        (c) >= 'A' && (c) <= 'Z'                   \
550                              && (    (c) <= 'I'                                 \
551                                  || ((c) >= 'J' && (c) <= 'R')                  \
552                                  ||  (c) >= 'S'))
553
554 #else   /* Above is EBCDIC; below is ASCII */
555
556 # if { VERSION < 5.4.0 }
557 /* The implementation of these in older perl versions can give wrong results if
558  * the C program locale is set to other than the C locale */
559 #  undef isALNUM
560 #  undef isALNUM_A
561 #  undef isALPHA
562 #  undef isALPHA_A
563 #  undef isDIGIT
564 #  undef isDIGIT_A
565 #  undef isIDFIRST
566 #  undef isIDFIRST_A
567 #  undef isLOWER
568 #  undef isLOWER_A
569 #  undef isUPPER
570 #  undef isUPPER_A
571 # endif
572
573 #  if { VERSION == 5.7.0 } /* this perl made space GRAPH */
574 #    undef isGRAPH
575 #  endif
576
577 # if { VERSION < 5.8.0 } /* earlier perls omitted DEL */
578 #  undef isCNTRL
579 # endif
580
581 # if { VERSION < 5.10.0 }
582 /* earlier perls included all of the isSPACE() characters, which is wrong. The
583  * version provided by Devel::PPPort always overrides an existing buggy
584  * version. */
585 #  undef isPRINT
586 #  undef isPRINT_A
587 # endif
588
589 # if { VERSION < 5.14.0 }
590 /* earlier perls always returned true if the parameter was a signed char */
591 #  undef isASCII
592 #  undef isASCII_A
593 # endif
594
595 # if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */
596 #  undef isPUNCT_L1
597 # endif
598
599 # if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */
600 #  undef isALNUMC_L1
601 #endif
602
603 # if { VERSION < 5.20.0 } /* earlier perls didn't include \v */
604 #  undef isSPACE
605 #  undef isSPACE_A
606 #  undef isSPACE_L1
607
608 # endif
609
610 __UNDEFINED__ isASCII(c)        ((WIDEST_UTYPE) (c) <= 127)
611 __UNDEFINED__ isCNTRL(c)        ((WIDEST_UTYPE) (c) < ' ' || (c) == 127)
612 __UNDEFINED__ isCNTRL_L1(c)     (   (WIDEST_UTYPE) (c) < ' '                \
613                                  || inRANGE((c), 0x7F, 0x9F))
614 __UNDEFINED__ isLOWER(c)        inRANGE((c), 'a', 'z')
615 __UNDEFINED__ isUPPER(c)        inRANGE((c), 'A', 'Z')
616
617 #endif /* Below are definitions common to EBCDIC and ASCII */
618
619 __UNDEFINED__ isASCII_L1(c)     isASCII(c)
620 __UNDEFINED__ isASCII_LC(c)     isASCII(c)
621 __UNDEFINED__ isALNUM(c)        isWORDCHAR(c)
622 __UNDEFINED__ isALNUMC(c)       isALPHANUMERIC(c)
623 __UNDEFINED__ isALNUMC_L1(c)    isALPHANUMERIC_L1(c)
624 __UNDEFINED__ isALPHA(c)        (isUPPER(c) || isLOWER(c))
625 __UNDEFINED__ isALPHA_L1(c)     (isUPPER_L1(c) || isLOWER_L1(c))
626 __UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c))
627 __UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c))
628 __UNDEFINED__ isALPHANUMERIC_LC(c) (isALPHA_LC(c) || isDIGIT_LC(c))
629 __UNDEFINED__ isBLANK(c)        ((c) == ' ' || (c) == '\t')
630 __UNDEFINED__ isBLANK_L1(c) (    isBLANK(c)                                    \
631                              || (   FITS_IN_8_BITS(c)                          \
632                                  && NATIVE_TO_LATIN1((U8) c) == 0xA0))
633 __UNDEFINED__ isBLANK_LC(c)     isBLANK(c)
634 __UNDEFINED__ isDIGIT(c)        inRANGE(c, '0', '9')
635 __UNDEFINED__ isDIGIT_L1(c)     isDIGIT(c)
636 __UNDEFINED__ isGRAPH(c)        (isWORDCHAR(c) || isPUNCT(c))
637 __UNDEFINED__ isGRAPH_L1(c)     (   isPRINT_L1(c)                              \
638                                  && (c) != ' '                                 \
639                                  && NATIVE_TO_LATIN1((U8) c) != 0xA0)
640 __UNDEFINED__ isIDCONT(c)       isWORDCHAR(c)
641 __UNDEFINED__ isIDCONT_L1(c)    isWORDCHAR_L1(c)
642 __UNDEFINED__ isIDCONT_LC(c)    isWORDCHAR_LC(c)
643 __UNDEFINED__ isIDFIRST(c)      (isALPHA(c) || (c) == '_')
644 __UNDEFINED__ isIDFIRST_L1(c)   (isALPHA_L1(c) || (U8) (c) == '_')
645 __UNDEFINED__ isIDFIRST_LC(c)   (isALPHA_LC(c) || (U8) (c) == '_')
646 __UNDEFINED__ isLOWER_L1(c) (    isLOWER(c)                                    \
647                              || (   FITS_IN_8_BITS(c)                          \
648                                  && (  (   NATIVE_TO_LATIN1((U8) c) >= 0xDF    \
649                                         && NATIVE_TO_LATIN1((U8) c) != 0xF7)   \
650                                      || NATIVE_TO_LATIN1((U8) c) == 0xAA       \
651                                      || NATIVE_TO_LATIN1((U8) c) == 0xBA       \
652                                      || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
653 __UNDEFINED__ isOCTAL(c)        (((WIDEST_UTYPE)((c)) & ~7) == '0')
654 __UNDEFINED__ isOCTAL_L1(c)     isOCTAL(c)
655 __UNDEFINED__ isPRINT(c)        (isGRAPH(c) || (c) == ' ')
656 __UNDEFINED__ isPRINT_L1(c)     (FITS_IN_8_BITS(c) && ! isCNTRL_L1(c))
657 __UNDEFINED__ isPSXSPC(c)       isSPACE(c)
658 __UNDEFINED__ isPSXSPC_L1(c)    isSPACE_L1(c)
659 __UNDEFINED__ isPUNCT(c)    (   (c) == '-' || (c) == '!' || (c) == '"'         \
660                              || (c) == '#' || (c) == '$' || (c) == '%'         \
661                              || (c) == '&' || (c) == '\'' || (c) == '('        \
662                              || (c) == ')' || (c) == '*' || (c) == '+'         \
663                              || (c) == ',' || (c) == '.' || (c) == '/'         \
664                              || (c) == ':' || (c) == ';' || (c) == '<'         \
665                              || (c) == '=' || (c) == '>' || (c) == '?'         \
666                              || (c) == '@' || (c) == '[' || (c) == '\\'        \
667                              || (c) == ']' || (c) == '^' || (c) == '_'         \
668                              || (c) == '`' || (c) == '{' || (c) == '|'         \
669                              || (c) == '}' || (c) == '~')
670 __UNDEFINED__ isPUNCT_L1(c)  (    isPUNCT(c)                                   \
671                               || (  FITS_IN_8_BITS(c)                          \
672                                   && (   NATIVE_TO_LATIN1((U8) c) == 0xA1      \
673                                       || NATIVE_TO_LATIN1((U8) c) == 0xA7      \
674                                       || NATIVE_TO_LATIN1((U8) c) == 0xAB      \
675                                       || NATIVE_TO_LATIN1((U8) c) == 0xB6      \
676                                       || NATIVE_TO_LATIN1((U8) c) == 0xB7      \
677                                       || NATIVE_TO_LATIN1((U8) c) == 0xBB      \
678                                       || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
679 __UNDEFINED__ isSPACE(c)        (   isBLANK(c) || (c) == '\n' || (c) == '\r'   \
680                                  || (c) == '\v' || (c) == '\f')
681 __UNDEFINED__ isSPACE_L1(c) (    isSPACE(c)                                    \
682                              || (FITS_IN_8_BITS(c)                             \
683                                  && (   NATIVE_TO_LATIN1((U8) c) == 0x85       \
684                                      || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
685 __UNDEFINED__ isUPPER_L1(c) (   isUPPER(c)                                     \
686                              || (FITS_IN_8_BITS(c)                             \
687                                  && (   NATIVE_TO_LATIN1((U8) c) >= 0xC0       \
688                                      && NATIVE_TO_LATIN1((U8) c) <= 0xDE       \
689                                      && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
690 __UNDEFINED__ isWORDCHAR(c)     (isALPHANUMERIC(c) || (c) == '_')
691 __UNDEFINED__ isWORDCHAR_L1(c)  (isIDFIRST_L1(c) || isDIGIT(c))
692 __UNDEFINED__ isWORDCHAR_LC(c)  (isIDFIRST_LC(c) || isDIGIT_LC(c))
693 __UNDEFINED__ isXDIGIT(c)       (   isDIGIT(c)                                 \
694                                  || inRANGE((c), 'a', 'f')                     \
695                                  || inRANGE((c), 'A', 'F'))
696 __UNDEFINED__ isXDIGIT_L1(c)    isXDIGIT(c)
697 __UNDEFINED__ isXDIGIT_LC(c)    isxdigit(c)
698
699 __UNDEFINED__ isALNUM_A(c)         isALNUM(c)
700 __UNDEFINED__ isALNUMC_A(c)        isALNUMC(c)
701 __UNDEFINED__ isALPHA_A(c)         isALPHA(c)
702 __UNDEFINED__ isALPHANUMERIC_A(c)  isALPHANUMERIC(c)
703 __UNDEFINED__ isASCII_A(c)         isASCII(c)
704 __UNDEFINED__ isBLANK_A(c)         isBLANK(c)
705 __UNDEFINED__ isCNTRL_A(c)         isCNTRL(c)
706 __UNDEFINED__ isDIGIT_A(c)         isDIGIT(c)
707 __UNDEFINED__ isGRAPH_A(c)         isGRAPH(c)
708 __UNDEFINED__ isIDCONT_A(c)        isIDCONT(c)
709 __UNDEFINED__ isIDFIRST_A(c)       isIDFIRST(c)
710 __UNDEFINED__ isLOWER_A(c)         isLOWER(c)
711 __UNDEFINED__ isOCTAL_A(c)         isOCTAL(c)
712 __UNDEFINED__ isPRINT_A(c)         isPRINT(c)
713 __UNDEFINED__ isPSXSPC_A(c)        isPSXSPC(c)
714 __UNDEFINED__ isPUNCT_A(c)         isPUNCT(c)
715 __UNDEFINED__ isSPACE_A(c)         isSPACE(c)
716 __UNDEFINED__ isUPPER_A(c)         isUPPER(c)
717 __UNDEFINED__ isWORDCHAR_A(c)      isWORDCHAR(c)
718 __UNDEFINED__ isXDIGIT_A(c)        isXDIGIT(c)
719
720 __UNDEFINED__ isASCII_utf8_safe(s,e)  (((e) - (s)) <= 0 ? 0 : isASCII(*(s)))
721 __UNDEFINED__ isASCII_uvchr(c)    (FITS_IN_8_BITS(c) ? isASCII_L1(c) : 0)
722
723 #if { VERSION >= 5.006 }
724 #  ifdef isALPHA_uni    /* If one defined, all are; this is just an exemplar */
725 #    define D_PPP_is_ctype(upper, lower, c)                                 \
726         (FITS_IN_8_BITS(c)                                                  \
727         ? is ## upper ## _L1(c)                                             \
728         : is ## upper ## _uni((UV) (c)))    /* _uni is old synonym */
729 #  else
730 #    define D_PPP_is_ctype(upper, lower, c)                                 \
731         (FITS_IN_8_BITS(c)                                                  \
732         ? is ## upper ## _L1(c)                                             \
733         : is_uni_ ## lower((UV) (c)))     /* is_uni_ is even older */
734 #  endif
735
736 __UNDEFINED__ isALPHA_uvchr(c)    D_PPP_is_ctype(ALPHA, alpha, c)
737 __UNDEFINED__ isALPHANUMERIC_uvchr(c) (isALPHA_uvchr(c) || isDIGIT_uvchr(c))
738 #  ifdef is_uni_blank
739 __UNDEFINED__ isBLANK_uvchr(c)    D_PPP_is_ctype(BLANK, blank, c)
740 #  else
741 __UNDEFINED__ isBLANK_uvchr(c)  (FITS_IN_8_BITS(c)                          \
742                                  ? isBLANK_L1(c)                            \
743                                  : (   (UV) (c) == 0x1680 /* Unicode 3.0 */ \
744                                     || inRANGE((UV) (c), 0x2000, 0x200A)    \
745                                     || (UV) (c) == 0x202F  /* Unicode 3.0 */\
746                                     || (UV) (c) == 0x205F  /* Unicode 3.2 */\
747                                     || (UV) (c) == 0x3000))
748 #  endif
749 __UNDEFINED__ isCNTRL_uvchr(c)    D_PPP_is_ctype(CNTRL, cntrl, c)
750 __UNDEFINED__ isDIGIT_uvchr(c)    D_PPP_is_ctype(DIGIT, digit, c)
751 __UNDEFINED__ isGRAPH_uvchr(c)    D_PPP_is_ctype(GRAPH, graph, c)
752 __UNDEFINED__ isIDCONT_uvchr(c)   isWORDCHAR_uvchr(c)
753 __UNDEFINED__ isIDFIRST_uvchr(c)  D_PPP_is_ctype(IDFIRST, idfirst, c)
754 __UNDEFINED__ isLOWER_uvchr(c)    D_PPP_is_ctype(LOWER, lower, c)
755 __UNDEFINED__ isPRINT_uvchr(c)    D_PPP_is_ctype(PRINT, print, c)
756 __UNDEFINED__ isPSXSPC_uvchr(c)   isSPACE_uvchr(c)
757 __UNDEFINED__ isPUNCT_uvchr(c)    D_PPP_is_ctype(PUNCT, punct, c)
758 __UNDEFINED__ isSPACE_uvchr(c)    D_PPP_is_ctype(SPACE, space, c)
759 __UNDEFINED__ isUPPER_uvchr(c)    D_PPP_is_ctype(UPPER, upper, c)
760 __UNDEFINED__ isXDIGIT_uvchr(c)   D_PPP_is_ctype(XDIGIT, xdigit, c)
761 __UNDEFINED__ isWORDCHAR_uvchr(c) (FITS_IN_8_BITS(c)                        \
762                                ? isWORDCHAR_L1(c) : isALPHANUMERIC_uvchr(c))
763
764 __UNDEFINED__ isALPHA_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA)
765 #  ifdef isALPHANUMERIC_utf8
766 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e)                                 \
767                                 D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC)
768 #  else
769 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e)                                 \
770                         (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e))
771 #  endif
772
773 /* This was broken before 5.18, and just use this instead of worrying about
774  * which releases the official works on */
775 #  if 'A' == 65
776 __UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
777 ( ( LIKELY((e) > (s)) ) ?   /* Machine generated */                         \
778     ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1        \
779     : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
780             ( ( 0xC2 == ((const U8*)s)[0] ) ?                               \
781                 ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
782             : ( 0xE1 == ((const U8*)s)[0] ) ?                               \
783                 ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
784             : ( 0xE2 == ((const U8*)s)[0] ) ?                               \
785                 ( ( 0x80 == ((const U8*)s)[1] ) ?                           \
786                     ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\
787                 : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\
788             : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
789         : 0 )                                                               \
790  : 0 )
791
792 #  elif 'A' == 193  && '^' == 95 /* EBCDIC 1047 */
793
794 __UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
795 ( ( LIKELY((e) > (s)) ) ?                                                   \
796     ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1        \
797     : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
798             ( ( 0x80 == ((const U8*)s)[0] ) ?                               \
799                 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
800             : ( 0xBC == ((const U8*)s)[0] ) ?                               \
801                 ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
802             : ( 0xCA == ((const U8*)s)[0] ) ?                               \
803                 ( ( 0x41 == ((const U8*)s)[1] ) ?                           \
804                     ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
805                 : ( 0x42 == ((const U8*)s)[1] ) ?                           \
806                     ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )               \
807                 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
808             : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
809         : 0 )                                                               \
810 : 0 )
811
812 #  elif 'A' == 193  && '^' == 176 /* EBCDIC 037 */
813
814 __UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
815 ( ( LIKELY((e) > (s)) ) ?                                                   \
816     ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1        \
817     : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
818             ( ( 0x78 == ((const U8*)s)[0] ) ?                               \
819                 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
820             : ( 0xBD == ((const U8*)s)[0] ) ?                               \
821                 ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
822             : ( 0xCA == ((const U8*)s)[0] ) ?                               \
823                 ( ( 0x41 == ((const U8*)s)[1] ) ?                           \
824                     ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
825                 : ( 0x42 == ((const U8*)s)[1] ) ?                           \
826                     ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )               \
827                 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
828             : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
829         : 0 )                                                               \
830 : 0 )
831
832 #  else
833 #    error Unknown character set
834 #  endif
835
836 __UNDEFINED__ isCNTRL_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL)
837 __UNDEFINED__ isDIGIT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT)
838 __UNDEFINED__ isGRAPH_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH)
839 #  ifdef isIDCONT_utf8
840 __UNDEFINED__ isIDCONT_utf8_safe(s,e)   D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT)
841 #  else
842 __UNDEFINED__ isIDCONT_utf8_safe(s,e)   isWORDCHAR_utf8_safe(s,e)
843 #  endif
844
845 __UNDEFINED__ isIDFIRST_utf8_safe(s,e)  D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST)
846 __UNDEFINED__ isLOWER_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER)
847 __UNDEFINED__ isPRINT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT)
848
849 #  undef isPSXSPC_utf8_safe   /* Use the modern definition */
850 __UNDEFINED__ isPSXSPC_utf8_safe(s,e)   isSPACE_utf8_safe(s,e)
851
852 __UNDEFINED__ isPUNCT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT)
853 __UNDEFINED__ isSPACE_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE)
854 __UNDEFINED__ isUPPER_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER)
855
856 #  ifdef isWORDCHAR_utf8
857 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR)
858 #  else
859 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e)                                        \
860                                (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_')
861 #  endif
862
863 /* This was broken before 5.12, and just use this instead of worrying about
864  * which releases the official works on */
865 #  if 'A' == 65
866 __UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
867 ( ( LIKELY((e) > (s)) ) ?                                                   \
868     ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\
869     : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\
870                     ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\
871                 : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\
872 : 0 )
873
874 #  elif 'A' == 193  && '^' == 95 /* EBCDIC 1047 */
875
876 __UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
877 ( ( LIKELY((e) > (s)) ) ?                                                   \
878     ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
879     : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\
880                         ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\
881                     : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
882 : 0 )
883
884 #  elif 'A' == 193  && '^' == 176 /* EBCDIC 037 */
885
886 __UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
887 ( ( LIKELY((e) > (s)) ) ?                                                   \
888     ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
889     : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\
890                         ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\
891                     : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
892 : 0 )
893
894 #  else
895 #    error Unknown character set
896 #  endif
897
898 __UNDEFINED__ isALPHA_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHA)
899 #  ifdef isALPHANUMERIC_utf8
900 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e)                                 \
901                                 D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHANUMERIC)
902 #  else
903 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e)                              \
904                         (isALPHA_LC_utf8_safe(s,e) || isDIGIT_LC_utf8_safe(s,e))
905 #  endif
906
907 __UNDEFINED__  isBLANK_LC_utf8_safe(s,e)                                       \
908                             D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, BLANK)
909 __UNDEFINED__ isCNTRL_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, CNTRL)
910 __UNDEFINED__ isDIGIT_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, DIGIT)
911 __UNDEFINED__ isGRAPH_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, GRAPH)
912 #  ifdef isIDCONT_utf8
913 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e)   D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDCONT)
914 #  else
915 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e)   isWORDCHAR_LC_utf8_safe(s,e)
916 #  endif
917
918 __UNDEFINED__ isIDFIRST_LC_utf8_safe(s,e)  D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDFIRST)
919 __UNDEFINED__ isLOWER_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, LOWER)
920 __UNDEFINED__ isPRINT_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PRINT)
921
922 #  undef isPSXSPC_LC_utf8_safe   /* Use the modern definition */
923 __UNDEFINED__ isPSXSPC_LC_utf8_safe(s,e)   isSPACE_LC_utf8_safe(s,e)
924
925 __UNDEFINED__ isPUNCT_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PUNCT)
926 __UNDEFINED__ isSPACE_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, SPACE)
927 __UNDEFINED__ isUPPER_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, UPPER)
928
929 #  ifdef isWORDCHAR_utf8
930 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, WORDCHAR)
931 #  else
932 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e)                                             \
933                                (isALPHANUMERIC_LC_utf8_safe(s,e) || (*(s)) == '_')
934 #  endif
935
936 __UNDEFINED__  isXDIGIT_LC_utf8_safe(s,e)                                       \
937                             D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, XDIGIT)
938
939 /* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe,
940  * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe,
941  * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe,
942  * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe,
943  * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe,
944  * isXDIGIT_utf8_safe,
945  * isALPHANUMERIC_LC_utf8_safe, isALPHA_LC_utf8_safe, isASCII_LC_utf8_safe,
946  * isBLANK_LC_utf8_safe, isCNTRL_LC_utf8_safe, isDIGIT_LC_utf8_safe,
947  * isGRAPH_LC_utf8_safe, isIDCONT_LC_utf8_safe, isIDFIRST_LC_utf8_safe,
948  * isLOWER_LC_utf8_safe, isPRINT_LC_utf8_safe, isPSXSPC_LC_utf8_safe,
949  * isPUNCT_LC_utf8_safe, isSPACE_LC_utf8_safe, isUPPER_LC_utf8_safe,
950  * isWORDCHAR_LC_utf8_safe, isWORDCHAR_LC_utf8_safe, isXDIGIT_LC_utf8_safe,
951  * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr,
952  * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr,
953  * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr,
954  * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr,
955  * isWORDCHAR_uvchr, isXDIGIT_uvchr
956  *
957  * The UTF-8 handling is buggy in early Perls, and this can give inaccurate
958  * results for code points above 0xFF, until the implementation started
959  * settling down in 5.12 and 5.14 */
960
961 #endif
962
963 #define D_PPP_TOO_SHORT_MSG  "Malformed UTF-8 character starting with:"      \
964                              " \\x%02x (too short; %d bytes available, need" \
965                              " %d)\n"
966 /* Perls starting here had a new API which handled multi-character results */
967 #if { VERSION >= 5.7.3 }
968
969 __UNDEFINED__ toLOWER_uvchr(c, s, l)  UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l))
970 __UNDEFINED__ toUPPER_uvchr(c, s, l)  UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l))
971 __UNDEFINED__ toTITLE_uvchr(c, s, l)  UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l))
972 __UNDEFINED__ toFOLD_uvchr(c, s, l)   UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l))
973
974 #  if { VERSION != 5.15.6 }     /* Just this version is broken */
975
976       /* Prefer the macro to the function */
977 #    if defined toLOWER_utf8
978 #      define D_PPP_TO_LOWER_CALLEE(s,r,l)    toLOWER_utf8(s,r,l)
979 #    else
980 #      define D_PPP_TO_LOWER_CALLEE(s,r,l)    to_utf8_lower(s,r,l)
981 #    endif
982 #    if defined toTITLE_utf8
983 #      define D_PPP_TO_TITLE_CALLEE(s,r,l)    toTITLE_utf8(s,r,l)
984 #    else
985 #      define D_PPP_TO_TITLE_CALLEE(s,r,l)    to_utf8_title(s,r,l)
986 #    endif
987 #    if defined toUPPER_utf8
988 #      define D_PPP_TO_UPPER_CALLEE(s,r,l)    toUPPER_utf8(s,r,l)
989 #    else
990 #      define D_PPP_TO_UPPER_CALLEE(s,r,l)    to_utf8_upper(s,r,l)
991 #    endif
992 #    if defined toFOLD_utf8
993 #      define D_PPP_TO_FOLD_CALLEE(s,r,l)     toFOLD_utf8(s,r,l)
994 #    else
995 #      define D_PPP_TO_FOLD_CALLEE(s,r,l)     to_utf8_fold(s,r,l)
996 #    endif
997 #  else     /* Below is 5.15.6, which failed to make the macros available
998 #              outside of core, so we have to use the 'Perl_' form.  khw
999 #              decided it was easier to just handle this case than have to
1000 #              document the exception, and make an exception in the tests below
1001 #              */
1002 #    define D_PPP_TO_LOWER_CALLEE(s,r,l)                                    \
1003                         Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL)
1004 #    define D_PPP_TO_TITLE_CALLEE(s,r,l)                                    \
1005                         Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL)
1006 #    define D_PPP_TO_UPPER_CALLEE(s,r,l)                                    \
1007                         Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL)
1008 #    define D_PPP_TO_FOLD_CALLEE(s,r,l)                                     \
1009             Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL)
1010 #  endif
1011
1012 /* The actual implementation of the backported macros.  If too short, croak,
1013  * otherwise call the original that doesn't have an upper limit parameter */
1014 #  define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l)                        \
1015     (((((e) - (s)) <= 0)                                                    \
1016          /* We could just do nothing, but modern perls croak */             \
1017       ? (croak("Attempting case change on zero length string"),             \
1018          0) /* So looks like it returns something, and will compile */      \
1019       : ((e) - (s)) < UTF8SKIP(s))                                          \
1020         ? (croak(D_PPP_TOO_SHORT_MSG,                                       \
1021                                s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1022            0)                                                               \
1023         : D_PPP_TO_ ## name ## _CALLEE(s,r,l))
1024
1025 __UNDEFINED__  toUPPER_utf8_safe(s,e,r,l)                                   \
1026                         D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l)
1027 __UNDEFINED__  toLOWER_utf8_safe(s,e,r,l)                                   \
1028                         D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l)
1029 __UNDEFINED__  toTITLE_utf8_safe(s,e,r,l)                                   \
1030                         D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l)
1031 __UNDEFINED__  toFOLD_utf8_safe(s,e,r,l)                                    \
1032                         D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l)
1033
1034 #elif { VERSION >= 5.006 }
1035
1036 /* Here we have UTF-8 support, but using the original API where the case
1037  * changing functions merely returned the changed code point; hence they
1038  * couldn't handle multi-character results. */
1039
1040 #  ifdef uvchr_to_utf8
1041 #    define D_PPP_UV_TO_UTF8 uvchr_to_utf8
1042 #  else
1043 #    define D_PPP_UV_TO_UTF8 uv_to_utf8
1044 #  endif
1045
1046    /* Get the utf8 of the case changed value, and store its length; then have
1047     * to re-calculate the changed case value in order to return it */
1048 #  define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l)                  \
1049         (*(l) = (D_PPP_UV_TO_UTF8(s,                                        \
1050                  UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)),  \
1051         UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c))))
1052
1053 __UNDEFINED__ toLOWER_uvchr(c, s, l)                                        \
1054                         D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l)
1055 __UNDEFINED__ toUPPER_uvchr(c, s, l)                                        \
1056                         D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l)
1057 __UNDEFINED__ toTITLE_uvchr(c, s, l)                                        \
1058                         D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l)
1059 __UNDEFINED__ toFOLD_uvchr(c, s, l)   toLOWER_uvchr(c, s, l)
1060
1061 #  define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l)                \
1062     (((((e) - (s)) <= 0)                                                    \
1063       ? (croak("Attempting case change on zero length string"),             \
1064          0) /* So looks like it returns something, and will compile */      \
1065       : ((e) - (s)) < UTF8SKIP(s))                                          \
1066         ? (croak(D_PPP_TOO_SHORT_MSG,                                       \
1067                                s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1068            0)                                                               \
1069           /* Get the changed code point and store its UTF-8 */              \
1070         : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)),                         \
1071             /* Then store its length, and re-get code point for return */   \
1072             *(l) = UTF8SKIP(r), to_utf8_ ## name(r))
1073
1074 /* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe,
1075  * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr
1076     The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1077     this backport does not correct them.
1078
1079     In perls before 7.3, multi-character case changing is not implemented; this
1080     backport uses the simple case changes available in those perls. */
1081
1082 __UNDEFINED__  toUPPER_utf8_safe(s,e,r,l)                                   \
1083                         D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l)
1084 __UNDEFINED__  toLOWER_utf8_safe(s,e,r,l)                                   \
1085                         D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l)
1086 __UNDEFINED__  toTITLE_utf8_safe(s,e,r,l)                                   \
1087                         D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l)
1088
1089  /* Warning: toFOLD_utf8_safe, toFOLD_uvchr
1090     The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1091     this backport does not correct them.
1092
1093     In perls before 7.3, case folding is not implemented; instead, this
1094     backport substitutes simple (not multi-character, which isn't available)
1095     lowercasing.  This gives the correct result in most, but not all, instances
1096     */
1097
1098 __UNDEFINED__  toFOLD_utf8_safe(s,e,r,l)  toLOWER_utf8_safe(s,e,r,l)
1099
1100 #endif
1101
1102 /* Until we figure out how to support this in older perls... */
1103 #if { VERSION >= 5.8.0 }
1104
1105 __UNDEFINED__ HeUTF8(he)        ((HeKLEN(he) == HEf_SVKEY) ?            \
1106                                  SvUTF8(HeKEY_sv(he)) :                 \
1107                                  (U32)HeKUTF8(he))
1108
1109 #endif
1110
1111 __UNDEFINED__ C_ARRAY_LENGTH(a)         (sizeof(a)/sizeof((a)[0]))
1112 __UNDEFINED__ C_ARRAY_END(a)            ((a) + C_ARRAY_LENGTH(a))
1113
1114 __UNDEFINED__ LIKELY(x) (x)
1115 __UNDEFINED__ UNLIKELY(x) (x)
1116
1117 #ifndef MUTABLE_PTR
1118 #if defined(PERL_USE_GCC_BRACE_GROUPS)
1119 #  define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
1120 #else
1121 #  define MUTABLE_PTR(p) ((void *) (p))
1122 #endif
1123 #endif
1124
1125 __UNDEFINED__ MUTABLE_AV(p)   ((AV *)MUTABLE_PTR(p))
1126 __UNDEFINED__ MUTABLE_CV(p)   ((CV *)MUTABLE_PTR(p))
1127 __UNDEFINED__ MUTABLE_GV(p)   ((GV *)MUTABLE_PTR(p))
1128 __UNDEFINED__ MUTABLE_HV(p)   ((HV *)MUTABLE_PTR(p))
1129 __UNDEFINED__ MUTABLE_IO(p)   ((IO *)MUTABLE_PTR(p))
1130 __UNDEFINED__ MUTABLE_SV(p)   ((SV *)MUTABLE_PTR(p))
1131
1132 =xsmisc
1133
1134 typedef XSPROTO(XSPROTO_test_t);
1135 typedef XSPROTO_test_t *XSPROTO_test_t_ptr;
1136
1137 XS(XS_Devel__PPPort_dXSTARG);  /* prototype */
1138 XS(XS_Devel__PPPort_dXSTARG)
1139 {
1140   dXSARGS;
1141   dXSTARG;
1142   IV iv;
1143
1144   PERL_UNUSED_VAR(cv);
1145   SP -= items;
1146   iv = SvIV(ST(0)) + 1;
1147   PUSHi(iv);
1148   XSRETURN(1);
1149 }
1150
1151 XS(XS_Devel__PPPort_dAXMARK);  /* prototype */
1152 XS(XS_Devel__PPPort_dAXMARK)
1153 {
1154   dSP;
1155   dAXMARK;
1156   dITEMS;
1157   IV iv;
1158
1159   PERL_UNUSED_VAR(cv);
1160   SP -= items;
1161   iv = SvIV(ST(0)) - 1;
1162   mPUSHi(iv);
1163   XSRETURN(1);
1164 }
1165
1166 =xsboot
1167
1168 {
1169   XSPROTO_test_t_ptr p = &XS_Devel__PPPort_dXSTARG;
1170   newXS("Devel::PPPort::dXSTARG", *p, file);
1171 }
1172 newXS("Devel::PPPort::dAXMARK", XS_Devel__PPPort_dAXMARK, file);
1173
1174 =xsubs
1175
1176 int
1177 OpSIBLING_tests()
1178         PREINIT:
1179                 OP *x;
1180                 OP *kid;
1181                 OP *middlekid;
1182                 OP *lastkid;
1183                 int count = 0;
1184                 int failures = 0;
1185                 int i;
1186         CODE:
1187                 x = newOP(OP_PUSHMARK, 0);
1188
1189                 /* No siblings yet! */
1190                 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1191                         failures++; warn("Op should not have had a sib");
1192                 }
1193
1194
1195                 /* Add 2 siblings */
1196                 kid = x;
1197
1198                 for (i = 0; i < 2; i++) {
1199                         OP *newsib = newOP(OP_PUSHMARK, 0);
1200                         OpMORESIB_set(kid, newsib);
1201
1202                         kid = OpSIBLING(kid);
1203                         lastkid = kid;
1204                 }
1205                 middlekid = OpSIBLING(x);
1206
1207                 /* Should now have a sibling */
1208                 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1209                         failures++; warn("Op should have had a sib after moresib_set");
1210                 }
1211
1212                 /* Count the siblings */
1213                 for (kid = OpSIBLING(x); kid; kid = OpSIBLING(kid)) {
1214                         count++;
1215                 }
1216
1217                 if (count != 2) {
1218                         failures++; warn("Kid had %d sibs, expected 2", count);
1219                 }
1220
1221                 if (OpHAS_SIBLING(lastkid) || OpSIBLING(lastkid)) {
1222                         failures++; warn("Last kid should not have a sib");
1223                 }
1224
1225                 /* Really sets the parent, and says 'no more siblings' */
1226                 OpLASTSIB_set(x, lastkid);
1227
1228                 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1229                         failures++; warn("OpLASTSIB_set failed?");
1230                 }
1231
1232                 /* Restore the kid */
1233                 OpMORESIB_set(x, lastkid);
1234
1235                 /* Try to remove it again */
1236                 OpLASTSIB_set(x, NULL);
1237
1238                 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1239                         failures++; warn("OpLASTSIB_set with NULL failed?");
1240                 }
1241
1242                 /* Try to restore with maybesib_set */
1243                 OpMAYBESIB_set(x, lastkid, NULL);
1244
1245                 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1246                         failures++; warn("Op should have had a sib after maybesibset");
1247                 }
1248
1249                 op_free(lastkid);
1250                 op_free(middlekid);
1251                 op_free(x);
1252                 RETVAL = failures;
1253         OUTPUT:
1254                 RETVAL
1255
1256 int
1257 SvRXOK(sv)
1258         SV *sv
1259         CODE:
1260                 RETVAL = SvRXOK(sv);
1261         OUTPUT:
1262                 RETVAL
1263
1264 int
1265 ptrtests()
1266         PREINIT:
1267                 int var, *p = &var;
1268
1269         CODE:
1270                 RETVAL = 0;
1271                 RETVAL += PTR2nat(p) != 0       ?  1 : 0;
1272                 RETVAL += PTR2ul(p) != 0UL      ?  2 : 0;
1273                 RETVAL += PTR2UV(p) != (UV) 0   ?  4 : 0;
1274                 RETVAL += PTR2IV(p) != (IV) 0   ?  8 : 0;
1275                 RETVAL += PTR2NV(p) != (NV) 0   ? 16 : 0;
1276                 RETVAL += p > NUM2PTR(int *, 0) ? 32 : 0;
1277
1278         OUTPUT:
1279                 RETVAL
1280
1281 int
1282 gv_stashpvn(name, create)
1283         char *name
1284         I32 create
1285         CODE:
1286                 RETVAL = gv_stashpvn(name, strlen(name), create) != NULL;
1287         OUTPUT:
1288                 RETVAL
1289
1290 int
1291 get_sv(name, create)
1292         char *name
1293         I32 create
1294         CODE:
1295                 RETVAL = get_sv(name, create) != NULL;
1296         OUTPUT:
1297                 RETVAL
1298
1299 int
1300 get_av(name, create)
1301         char *name
1302         I32 create
1303         CODE:
1304                 RETVAL = get_av(name, create) != NULL;
1305         OUTPUT:
1306                 RETVAL
1307
1308 int
1309 get_hv(name, create)
1310         char *name
1311         I32 create
1312         CODE:
1313                 RETVAL = get_hv(name, create) != NULL;
1314         OUTPUT:
1315                 RETVAL
1316
1317 int
1318 get_cv(name, create)
1319         char *name
1320         I32 create
1321         CODE:
1322                 RETVAL = get_cv(name, create) != NULL;
1323         OUTPUT:
1324                 RETVAL
1325
1326 void
1327 xsreturn(two)
1328         int two
1329         PPCODE:
1330                 mXPUSHp("test1", 5);
1331                 if (two)
1332                   mXPUSHp("test2", 5);
1333                 if (two)
1334                   XSRETURN(2);
1335                 else
1336                   XSRETURN(1);
1337
1338 SV*
1339 boolSV(value)
1340         int value
1341         CODE:
1342                 RETVAL = newSVsv(boolSV(value));
1343         OUTPUT:
1344                 RETVAL
1345
1346 SV*
1347 DEFSV()
1348         CODE:
1349                 RETVAL = newSVsv(DEFSV);
1350         OUTPUT:
1351                 RETVAL
1352
1353 void
1354 DEFSV_modify()
1355         PPCODE:
1356                 XPUSHs(sv_mortalcopy(DEFSV));
1357                 ENTER;
1358                 SAVE_DEFSV;
1359                 DEFSV_set(newSVpvs("DEFSV"));
1360                 XPUSHs(sv_mortalcopy(DEFSV));
1361                 /* Yes, this leaks the above scalar; 5.005 with threads for some reason */
1362                 /* frees it upon LEAVE, thus mortalizing it causes "attempt to free..." */
1363                 /* sv_2mortal(DEFSV); */
1364                 LEAVE;
1365                 XPUSHs(sv_mortalcopy(DEFSV));
1366                 XSRETURN(3);
1367
1368 int
1369 ERRSV()
1370         CODE:
1371                 RETVAL = SvTRUEx(ERRSV);
1372         OUTPUT:
1373                 RETVAL
1374
1375 SV*
1376 UNDERBAR()
1377         CODE:
1378                 {
1379                   dUNDERBAR;
1380                   RETVAL = newSVsv(UNDERBAR);
1381                 }
1382         OUTPUT:
1383                 RETVAL
1384
1385 void
1386 prepush()
1387         CODE:
1388                 {
1389                   dXSTARG;
1390                   XSprePUSH;
1391                   PUSHi(42);
1392                   XSRETURN(1);
1393                 }
1394
1395 int
1396 PERL_ABS(a)
1397         int a
1398
1399 void
1400 SVf(x)
1401         SV *x
1402         PPCODE:
1403 #if { VERSION >= 5.004 }
1404                 x = sv_2mortal(newSVpvf("[%" SVf "]", SVfARG(x)));
1405 #endif
1406                 XPUSHs(x);
1407                 XSRETURN(1);
1408
1409 void
1410 Perl_ppaddr_t(string)
1411         char *string
1412         PREINIT:
1413                 Perl_ppaddr_t lower;
1414         PPCODE:
1415                 lower = PL_ppaddr[OP_LC];
1416                 mXPUSHs(newSVpv(string, 0));
1417                 PUTBACK;
1418                 ENTER;
1419                 (void)*(lower)(aTHXR);
1420                 SPAGAIN;
1421                 LEAVE;
1422                 XSRETURN(1);
1423
1424 #if { VERSION >= 5.8.0 }
1425
1426 void
1427 check_HeUTF8(utf8_key)
1428         SV *utf8_key;
1429         PREINIT:
1430                 HV *hash;
1431                 HE *ent;
1432                 STRLEN klen;
1433                 char *key;
1434         PPCODE:
1435                 hash = newHV();
1436
1437                 key = SvPV(utf8_key, klen);
1438                 if (SvUTF8(utf8_key)) klen *= -1;
1439                 hv_store(hash, key, klen, newSVpvs("string"), 0);
1440                 hv_iterinit(hash);
1441                 ent = hv_iternext(hash);
1442                 assert(ent);
1443                 mXPUSHp((HeUTF8(ent) == 0 ? "norm" : "utf8"), 4);
1444                 hv_undef(hash);
1445
1446
1447 #endif
1448
1449 void
1450 check_c_array()
1451         PREINIT:
1452                 int x[] = { 10, 11, 12, 13 };
1453         PPCODE:
1454                 mXPUSHi(C_ARRAY_LENGTH(x));  /* 4 */
1455                 mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */
1456
1457 bool
1458 isBLANK(ord)
1459     UV ord
1460     CODE:
1461         RETVAL = isBLANK(ord);
1462     OUTPUT:
1463         RETVAL
1464
1465 bool
1466 isBLANK_A(ord)
1467     UV ord
1468     CODE:
1469         RETVAL = isBLANK_A(ord);
1470     OUTPUT:
1471         RETVAL
1472
1473 bool
1474 isBLANK_L1(ord)
1475     UV ord
1476     CODE:
1477         RETVAL = isBLANK_L1(ord);
1478     OUTPUT:
1479         RETVAL
1480
1481 bool
1482 isUPPER(ord)
1483     UV ord
1484     CODE:
1485         RETVAL = isUPPER(ord);
1486     OUTPUT:
1487         RETVAL
1488
1489 bool
1490 isUPPER_A(ord)
1491     UV ord
1492     CODE:
1493         RETVAL = isUPPER_A(ord);
1494     OUTPUT:
1495         RETVAL
1496
1497 bool
1498 isUPPER_L1(ord)
1499     UV ord
1500     CODE:
1501         RETVAL = isUPPER_L1(ord);
1502     OUTPUT:
1503         RETVAL
1504
1505 bool
1506 isLOWER(ord)
1507     UV ord
1508     CODE:
1509         RETVAL = isLOWER(ord);
1510     OUTPUT:
1511         RETVAL
1512
1513 bool
1514 isLOWER_A(ord)
1515     UV ord
1516     CODE:
1517         RETVAL = isLOWER_A(ord);
1518     OUTPUT:
1519         RETVAL
1520
1521 bool
1522 isLOWER_L1(ord)
1523     UV ord
1524     CODE:
1525         RETVAL = isLOWER_L1(ord);
1526     OUTPUT:
1527         RETVAL
1528
1529 bool
1530 isALPHA(ord)
1531     UV ord
1532     CODE:
1533         RETVAL = isALPHA(ord);
1534     OUTPUT:
1535         RETVAL
1536
1537 bool
1538 isALPHA_A(ord)
1539     UV ord
1540     CODE:
1541         RETVAL = isALPHA_A(ord);
1542     OUTPUT:
1543         RETVAL
1544
1545 bool
1546 isALPHA_L1(ord)
1547     UV ord
1548     CODE:
1549         RETVAL = isALPHA_L1(ord);
1550     OUTPUT:
1551         RETVAL
1552
1553 bool
1554 isWORDCHAR(ord)
1555     UV ord
1556     CODE:
1557         RETVAL = isWORDCHAR(ord);
1558     OUTPUT:
1559         RETVAL
1560
1561 bool
1562 isWORDCHAR_A(ord)
1563     UV ord
1564     CODE:
1565         RETVAL = isWORDCHAR_A(ord);
1566     OUTPUT:
1567         RETVAL
1568
1569 bool
1570 isWORDCHAR_L1(ord)
1571     UV ord
1572     CODE:
1573         RETVAL = isWORDCHAR_L1(ord);
1574     OUTPUT:
1575         RETVAL
1576
1577 bool
1578 isALPHANUMERIC(ord)
1579     UV ord
1580     CODE:
1581         RETVAL = isALPHANUMERIC(ord);
1582     OUTPUT:
1583         RETVAL
1584
1585 bool
1586 isALPHANUMERIC_A(ord)
1587     UV ord
1588     CODE:
1589         RETVAL = isALPHANUMERIC_A(ord);
1590     OUTPUT:
1591         RETVAL
1592
1593 bool
1594 isALNUM(ord)
1595     UV ord
1596     CODE:
1597         RETVAL = isALNUM(ord);
1598     OUTPUT:
1599         RETVAL
1600
1601 bool
1602 isALNUM_A(ord)
1603     UV ord
1604     CODE:
1605         RETVAL = isALNUM_A(ord);
1606     OUTPUT:
1607         RETVAL
1608
1609 bool
1610 isDIGIT(ord)
1611     UV ord
1612     CODE:
1613         RETVAL = isDIGIT(ord);
1614     OUTPUT:
1615         RETVAL
1616
1617 bool
1618 isDIGIT_A(ord)
1619     UV ord
1620     CODE:
1621         RETVAL = isDIGIT_A(ord);
1622     OUTPUT:
1623         RETVAL
1624
1625 bool
1626 isOCTAL(ord)
1627     UV ord
1628     CODE:
1629         RETVAL = isOCTAL(ord);
1630     OUTPUT:
1631         RETVAL
1632
1633 bool
1634 isOCTAL_A(ord)
1635     UV ord
1636     CODE:
1637         RETVAL = isOCTAL_A(ord);
1638     OUTPUT:
1639         RETVAL
1640
1641 bool
1642 isIDFIRST(ord)
1643     UV ord
1644     CODE:
1645         RETVAL = isIDFIRST(ord);
1646     OUTPUT:
1647         RETVAL
1648
1649 bool
1650 isIDFIRST_A(ord)
1651     UV ord
1652     CODE:
1653         RETVAL = isIDFIRST_A(ord);
1654     OUTPUT:
1655         RETVAL
1656
1657 bool
1658 isIDCONT(ord)
1659     UV ord
1660     CODE:
1661         RETVAL = isIDCONT(ord);
1662     OUTPUT:
1663         RETVAL
1664
1665 bool
1666 isIDCONT_A(ord)
1667     UV ord
1668     CODE:
1669         RETVAL = isIDCONT_A(ord);
1670     OUTPUT:
1671         RETVAL
1672
1673 bool
1674 isSPACE(ord)
1675     UV ord
1676     CODE:
1677         RETVAL = isSPACE(ord);
1678     OUTPUT:
1679         RETVAL
1680
1681 bool
1682 isSPACE_A(ord)
1683     UV ord
1684     CODE:
1685         RETVAL = isSPACE_A(ord);
1686     OUTPUT:
1687         RETVAL
1688
1689 bool
1690 isASCII(ord)
1691     UV ord
1692     CODE:
1693         RETVAL = isASCII(ord);
1694     OUTPUT:
1695         RETVAL
1696
1697 bool
1698 isASCII_A(ord)
1699     UV ord
1700     CODE:
1701         RETVAL = isASCII_A(ord);
1702     OUTPUT:
1703         RETVAL
1704
1705 bool
1706 isCNTRL(ord)
1707     UV ord
1708     CODE:
1709         RETVAL = isCNTRL(ord);
1710     OUTPUT:
1711         RETVAL
1712
1713 bool
1714 isCNTRL_A(ord)
1715     UV ord
1716     CODE:
1717         RETVAL = isCNTRL_A(ord);
1718     OUTPUT:
1719         RETVAL
1720
1721 bool
1722 isPRINT(ord)
1723     UV ord
1724     CODE:
1725         RETVAL = isPRINT(ord);
1726     OUTPUT:
1727         RETVAL
1728
1729 bool
1730 isPRINT_A(ord)
1731     UV ord
1732     CODE:
1733         RETVAL = isPRINT_A(ord);
1734     OUTPUT:
1735         RETVAL
1736
1737 bool
1738 isGRAPH(ord)
1739     UV ord
1740     CODE:
1741         RETVAL = isGRAPH(ord);
1742     OUTPUT:
1743         RETVAL
1744
1745 bool
1746 isGRAPH_A(ord)
1747     UV ord
1748     CODE:
1749         RETVAL = isGRAPH_A(ord);
1750     OUTPUT:
1751         RETVAL
1752
1753 bool
1754 isPUNCT(ord)
1755     UV ord
1756     CODE:
1757         RETVAL = isPUNCT(ord);
1758     OUTPUT:
1759         RETVAL
1760
1761 bool
1762 isPUNCT_A(ord)
1763     UV ord
1764     CODE:
1765         RETVAL = isPUNCT_A(ord);
1766     OUTPUT:
1767         RETVAL
1768
1769 bool
1770 isXDIGIT(ord)
1771     UV ord
1772     CODE:
1773         RETVAL = isXDIGIT(ord);
1774     OUTPUT:
1775         RETVAL
1776
1777 bool
1778 isXDIGIT_A(ord)
1779     UV ord
1780     CODE:
1781         RETVAL = isXDIGIT_A(ord);
1782     OUTPUT:
1783         RETVAL
1784
1785 bool
1786 isPSXSPC(ord)
1787     UV ord
1788     CODE:
1789         RETVAL = isPSXSPC(ord);
1790     OUTPUT:
1791         RETVAL
1792
1793 bool
1794 isPSXSPC_A(ord)
1795     UV ord
1796     CODE:
1797         RETVAL = isPSXSPC_A(ord);
1798     OUTPUT:
1799         RETVAL
1800
1801 bool
1802 isALPHANUMERIC_L1(ord)
1803     UV ord
1804     CODE:
1805         RETVAL = isALPHANUMERIC_L1(ord);
1806     OUTPUT:
1807         RETVAL
1808
1809 bool
1810 isALNUMC_L1(ord)
1811     UV ord
1812     CODE:
1813         RETVAL = isALNUMC_L1(ord);
1814     OUTPUT:
1815         RETVAL
1816
1817 bool
1818 isDIGIT_L1(ord)
1819     UV ord
1820     CODE:
1821         RETVAL = isDIGIT_L1(ord);
1822     OUTPUT:
1823         RETVAL
1824
1825 bool
1826 isOCTAL_L1(ord)
1827     UV ord
1828     CODE:
1829         RETVAL = isOCTAL_L1(ord);
1830     OUTPUT:
1831         RETVAL
1832
1833 bool
1834 isIDFIRST_L1(ord)
1835     UV ord
1836     CODE:
1837         RETVAL = isIDFIRST_L1(ord);
1838     OUTPUT:
1839         RETVAL
1840
1841 bool
1842 isIDCONT_L1(ord)
1843     UV ord
1844     CODE:
1845         RETVAL = isIDCONT_L1(ord);
1846     OUTPUT:
1847         RETVAL
1848
1849 bool
1850 isSPACE_L1(ord)
1851     UV ord
1852     CODE:
1853         RETVAL = isSPACE_L1(ord);
1854     OUTPUT:
1855         RETVAL
1856
1857 bool
1858 isASCII_L1(ord)
1859     UV ord
1860     CODE:
1861         RETVAL = isASCII_L1(ord);
1862     OUTPUT:
1863         RETVAL
1864
1865 bool
1866 isCNTRL_L1(ord)
1867     UV ord
1868     CODE:
1869         RETVAL = isCNTRL_L1(ord);
1870     OUTPUT:
1871         RETVAL
1872
1873 bool
1874 isPRINT_L1(ord)
1875     UV ord
1876     CODE:
1877         RETVAL = isPRINT_L1(ord);
1878     OUTPUT:
1879         RETVAL
1880
1881 bool
1882 isGRAPH_L1(ord)
1883     UV ord
1884     CODE:
1885         RETVAL = isGRAPH_L1(ord);
1886     OUTPUT:
1887         RETVAL
1888
1889 bool
1890 isPUNCT_L1(ord)
1891     UV ord
1892     CODE:
1893         RETVAL = isPUNCT_L1(ord);
1894     OUTPUT:
1895         RETVAL
1896
1897 bool
1898 isXDIGIT_L1(ord)
1899     UV ord
1900     CODE:
1901         RETVAL = isXDIGIT_L1(ord);
1902     OUTPUT:
1903         RETVAL
1904
1905 bool
1906 isPSXSPC_L1(ord)
1907     UV ord
1908     CODE:
1909         RETVAL = isPSXSPC_L1(ord);
1910     OUTPUT:
1911         RETVAL
1912
1913 bool
1914 isASCII_uvchr(ord)
1915     UV ord
1916     CODE:
1917         RETVAL = isASCII_uvchr(ord);
1918     OUTPUT:
1919         RETVAL
1920
1921 bool
1922 isASCII_utf8_safe(s, offset)
1923     unsigned char * s
1924     int offset
1925     CODE:
1926         PERL_UNUSED_ARG(offset);
1927         RETVAL = isASCII_utf8_safe(s, s + 1 + offset);
1928     OUTPUT:
1929         RETVAL
1930
1931 #if { VERSION >= 5.006 }
1932
1933 bool
1934 isBLANK_uvchr(ord)
1935     UV ord
1936     CODE:
1937         RETVAL = isBLANK_uvchr(ord);
1938     OUTPUT:
1939         RETVAL
1940
1941 bool
1942 isALPHA_uvchr(ord)
1943     UV ord
1944     CODE:
1945         RETVAL = isALPHA_uvchr(ord);
1946     OUTPUT:
1947         RETVAL
1948
1949 bool
1950 isALPHANUMERIC_uvchr(ord)
1951     UV ord
1952     CODE:
1953         RETVAL = isALPHANUMERIC_uvchr(ord);
1954     OUTPUT:
1955         RETVAL
1956
1957 bool
1958 isCNTRL_uvchr(ord)
1959     UV ord
1960     CODE:
1961         RETVAL = isCNTRL_uvchr(ord);
1962     OUTPUT:
1963         RETVAL
1964
1965 bool
1966 isDIGIT_uvchr(ord)
1967     UV ord
1968     CODE:
1969         RETVAL = isDIGIT_uvchr(ord);
1970     OUTPUT:
1971         RETVAL
1972
1973 bool
1974 isIDFIRST_uvchr(ord)
1975     UV ord
1976     CODE:
1977         RETVAL = isIDFIRST_uvchr(ord);
1978     OUTPUT:
1979         RETVAL
1980
1981 bool
1982 isIDCONT_uvchr(ord)
1983     UV ord
1984     CODE:
1985         RETVAL = isIDCONT_uvchr(ord);
1986     OUTPUT:
1987         RETVAL
1988
1989 bool
1990 isGRAPH_uvchr(ord)
1991     UV ord
1992     CODE:
1993         RETVAL = isGRAPH_uvchr(ord);
1994     OUTPUT:
1995         RETVAL
1996
1997 bool
1998 isLOWER_uvchr(ord)
1999     UV ord
2000     CODE:
2001         RETVAL = isLOWER_uvchr(ord);
2002     OUTPUT:
2003         RETVAL
2004
2005 bool
2006 isPRINT_uvchr(ord)
2007     UV ord
2008     CODE:
2009         RETVAL = isPRINT_uvchr(ord);
2010     OUTPUT:
2011         RETVAL
2012
2013 bool
2014 isPSXSPC_uvchr(ord)
2015     UV ord
2016     CODE:
2017         RETVAL = isPSXSPC_uvchr(ord);
2018     OUTPUT:
2019         RETVAL
2020
2021 bool
2022 isPUNCT_uvchr(ord)
2023     UV ord
2024     CODE:
2025         RETVAL = isPUNCT_uvchr(ord);
2026     OUTPUT:
2027         RETVAL
2028
2029 bool
2030 isSPACE_uvchr(ord)
2031     UV ord
2032     CODE:
2033         RETVAL = isSPACE_uvchr(ord);
2034     OUTPUT:
2035         RETVAL
2036
2037 bool
2038 isUPPER_uvchr(ord)
2039     UV ord
2040     CODE:
2041         RETVAL = isUPPER_uvchr(ord);
2042     OUTPUT:
2043         RETVAL
2044
2045 bool
2046 isWORDCHAR_uvchr(ord)
2047     UV ord
2048     CODE:
2049         RETVAL = isWORDCHAR_uvchr(ord);
2050     OUTPUT:
2051         RETVAL
2052
2053 bool
2054 isXDIGIT_uvchr(ord)
2055     UV ord
2056     CODE:
2057         RETVAL = isXDIGIT_uvchr(ord);
2058     OUTPUT:
2059         RETVAL
2060
2061 bool
2062 isALPHA_utf8_safe(s, offset)
2063     unsigned char * s
2064     int offset
2065     CODE:
2066         RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset);
2067     OUTPUT:
2068         RETVAL
2069
2070 bool
2071 isALPHANUMERIC_utf8_safe(s, offset)
2072     unsigned char * s
2073     int offset
2074     CODE:
2075         RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2076     OUTPUT:
2077         RETVAL
2078
2079 bool
2080 isBLANK_utf8_safe(s, offset)
2081     unsigned char * s
2082     int offset
2083     CODE:
2084         RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset);
2085     OUTPUT:
2086         RETVAL
2087
2088 bool
2089 isCNTRL_utf8_safe(s, offset)
2090     unsigned char * s
2091     int offset
2092     CODE:
2093         RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset);
2094     OUTPUT:
2095         RETVAL
2096
2097 bool
2098 isDIGIT_utf8_safe(s, offset)
2099     unsigned char * s
2100     int offset
2101     CODE:
2102         RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2103     OUTPUT:
2104         RETVAL
2105
2106 bool
2107 isGRAPH_utf8_safe(s, offset)
2108     unsigned char * s
2109     int offset
2110     CODE:
2111         RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset);
2112     OUTPUT:
2113         RETVAL
2114
2115 bool
2116 isIDCONT_utf8_safe(s, offset)
2117     unsigned char * s
2118     int offset
2119     CODE:
2120         RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2121     OUTPUT:
2122         RETVAL
2123
2124 bool
2125 isIDFIRST_utf8_safe(s, offset)
2126     unsigned char * s
2127     int offset
2128     CODE:
2129         RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset);
2130     OUTPUT:
2131         RETVAL
2132
2133 bool
2134 isLOWER_utf8_safe(s, offset)
2135     unsigned char * s
2136     int offset
2137     CODE:
2138         RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2139     OUTPUT:
2140         RETVAL
2141
2142 bool
2143 isPRINT_utf8_safe(s, offset)
2144     unsigned char * s
2145     int offset
2146     CODE:
2147         RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2148     OUTPUT:
2149         RETVAL
2150
2151 bool
2152 isPSXSPC_utf8_safe(s, offset)
2153     unsigned char * s
2154     int offset
2155     CODE:
2156         RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2157     OUTPUT:
2158         RETVAL
2159
2160 bool
2161 isPUNCT_utf8_safe(s, offset)
2162     unsigned char * s
2163     int offset
2164     CODE:
2165         RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2166     OUTPUT:
2167         RETVAL
2168
2169 bool
2170 isSPACE_utf8_safe(s, offset)
2171     unsigned char * s
2172     int offset
2173     CODE:
2174         RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset);
2175     OUTPUT:
2176         RETVAL
2177
2178 bool
2179 isUPPER_utf8_safe(s, offset)
2180     unsigned char * s
2181     int offset
2182     CODE:
2183         RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2184     OUTPUT:
2185         RETVAL
2186
2187 bool
2188 isWORDCHAR_utf8_safe(s, offset)
2189     unsigned char * s
2190     int offset
2191     CODE:
2192         RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset);
2193     OUTPUT:
2194         RETVAL
2195
2196 bool
2197 isXDIGIT_utf8_safe(s, offset)
2198     unsigned char * s
2199     int offset
2200     CODE:
2201         RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2202     OUTPUT:
2203         RETVAL
2204
2205 bool
2206 isALPHA_LC_utf8_safe(s, offset)
2207     unsigned char * s
2208     int offset
2209     CODE:
2210         RETVAL = isALPHA_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2211     OUTPUT:
2212         RETVAL
2213
2214 bool
2215 isALPHANUMERIC_LC_utf8_safe(s, offset)
2216     unsigned char * s
2217     int offset
2218     CODE:
2219         RETVAL = isALPHANUMERIC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2220     OUTPUT:
2221         RETVAL
2222
2223 bool
2224 isASCII_LC_utf8_safe(s, offset)
2225     unsigned char * s
2226     int offset
2227     CODE:
2228         PERL_UNUSED_ARG(offset);
2229         RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset);
2230     OUTPUT:
2231         RETVAL
2232
2233 bool
2234 isBLANK_LC_utf8_safe(s, offset)
2235     unsigned char * s
2236     int offset
2237     CODE:
2238         RETVAL = isBLANK_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2239     OUTPUT:
2240         RETVAL
2241
2242 bool
2243 isCNTRL_LC_utf8_safe(s, offset)
2244     unsigned char * s
2245     int offset
2246     CODE:
2247         RETVAL = isCNTRL_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2248     OUTPUT:
2249         RETVAL
2250
2251 bool
2252 isDIGIT_LC_utf8_safe(s, offset)
2253     unsigned char * s
2254     int offset
2255     CODE:
2256         RETVAL = isDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2257     OUTPUT:
2258         RETVAL
2259
2260 bool
2261 isGRAPH_LC_utf8_safe(s, offset)
2262     unsigned char * s
2263     int offset
2264     CODE:
2265         RETVAL = isGRAPH_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2266     OUTPUT:
2267         RETVAL
2268
2269 bool
2270 isIDCONT_LC_utf8_safe(s, offset)
2271     unsigned char * s
2272     int offset
2273     CODE:
2274         RETVAL = isIDCONT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2275     OUTPUT:
2276         RETVAL
2277
2278 bool
2279 isIDFIRST_LC_utf8_safe(s, offset)
2280     unsigned char * s
2281     int offset
2282     CODE:
2283         RETVAL = isIDFIRST_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2284     OUTPUT:
2285         RETVAL
2286
2287 bool
2288 isLOWER_LC_utf8_safe(s, offset)
2289     unsigned char * s
2290     int offset
2291     CODE:
2292         RETVAL = isLOWER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2293     OUTPUT:
2294         RETVAL
2295
2296 bool
2297 isPRINT_LC_utf8_safe(s, offset)
2298     unsigned char * s
2299     int offset
2300     CODE:
2301         RETVAL = isPRINT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2302     OUTPUT:
2303         RETVAL
2304
2305 bool
2306 isPSXSPC_LC_utf8_safe(s, offset)
2307     unsigned char * s
2308     int offset
2309     CODE:
2310         RETVAL = isPSXSPC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2311     OUTPUT:
2312         RETVAL
2313
2314 bool
2315 isPUNCT_LC_utf8_safe(s, offset)
2316     unsigned char * s
2317     int offset
2318     CODE:
2319         RETVAL = isPUNCT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2320     OUTPUT:
2321         RETVAL
2322
2323 bool
2324 isSPACE_LC_utf8_safe(s, offset)
2325     unsigned char * s
2326     int offset
2327     CODE:
2328         RETVAL = isSPACE_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2329     OUTPUT:
2330         RETVAL
2331
2332 bool
2333 isUPPER_LC_utf8_safe(s, offset)
2334     unsigned char * s
2335     int offset
2336     CODE:
2337         RETVAL = isUPPER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2338     OUTPUT:
2339         RETVAL
2340
2341 bool
2342 isWORDCHAR_LC_utf8_safe(s, offset)
2343     unsigned char * s
2344     int offset
2345     CODE:
2346         RETVAL = isWORDCHAR_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2347     OUTPUT:
2348         RETVAL
2349
2350 bool
2351 isXDIGIT_LC_utf8_safe(s, offset)
2352     unsigned char * s
2353     int offset
2354     CODE:
2355         RETVAL = isXDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2356     OUTPUT:
2357         RETVAL
2358
2359 AV *
2360 toLOWER_utf8_safe(s, offset)
2361     unsigned char * s
2362     int offset
2363     PREINIT:
2364         U8 u[UTF8_MAXBYTES+1];
2365         Size_t len;
2366         UV ret;
2367         SV* utf8;
2368         AV * av;
2369     CODE:
2370         av = newAV();
2371         ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2372         av_push(av, newSVuv(ret));
2373
2374         utf8 = newSVpvn((char *) u, len);
2375         SvUTF8_on(utf8);
2376         av_push(av, utf8);
2377
2378         av_push(av, newSVuv(len));
2379         RETVAL = av;
2380     OUTPUT:
2381         RETVAL
2382
2383 AV *
2384 toTITLE_utf8_safe(s, offset)
2385     unsigned char * s
2386     int offset
2387     PREINIT:
2388         U8 u[UTF8_MAXBYTES+1];
2389         Size_t len;
2390         UV ret;
2391         SV* utf8;
2392         AV * av;
2393     CODE:
2394         av = newAV();
2395         ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2396         av_push(av, newSVuv(ret));
2397
2398         utf8 = newSVpvn((char *) u, len);
2399         SvUTF8_on(utf8);
2400         av_push(av, utf8);
2401
2402         av_push(av, newSVuv(len));
2403         RETVAL = av;
2404     OUTPUT:
2405         RETVAL
2406
2407 AV *
2408 toUPPER_utf8_safe(s, offset)
2409     unsigned char * s
2410     int offset
2411     PREINIT:
2412         U8 u[UTF8_MAXBYTES+1];
2413         Size_t len;
2414         UV ret;
2415         SV* utf8;
2416         AV * av;
2417     CODE:
2418         av = newAV();
2419         ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2420         av_push(av, newSVuv(ret));
2421
2422         utf8 = newSVpvn((char *) u, len);
2423         SvUTF8_on(utf8);
2424         av_push(av, utf8);
2425
2426         av_push(av, newSVuv(len));
2427         RETVAL = av;
2428     OUTPUT:
2429         RETVAL
2430
2431 AV *
2432 toFOLD_utf8_safe(s, offset)
2433     unsigned char * s
2434     int offset
2435     PREINIT:
2436         U8 u[UTF8_MAXBYTES+1];
2437         Size_t len;
2438         UV ret;
2439         SV* utf8;
2440         AV * av;
2441     CODE:
2442         av = newAV();
2443         ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2444         av_push(av, newSVuv(ret));
2445
2446         utf8 = newSVpvn((char *) u, len);
2447         SvUTF8_on(utf8);
2448         av_push(av, utf8);
2449
2450         av_push(av, newSVuv(len));
2451         RETVAL = av;
2452     OUTPUT:
2453         RETVAL
2454
2455 AV *
2456 toLOWER_uvchr(c)
2457     UV c
2458     PREINIT:
2459         U8 u[UTF8_MAXBYTES+1];
2460         Size_t len;
2461         UV ret;
2462         SV* utf8;
2463         AV * av;
2464     CODE:
2465         av = newAV();
2466         ret = toLOWER_uvchr(c, u, &len);
2467         av_push(av, newSVuv(ret));
2468
2469         utf8 = newSVpvn((char *) u, len);
2470         SvUTF8_on(utf8);
2471         av_push(av, utf8);
2472
2473         av_push(av, newSVuv(len));
2474         RETVAL = av;
2475     OUTPUT:
2476         RETVAL
2477
2478 AV *
2479 toTITLE_uvchr(c)
2480     UV c
2481     PREINIT:
2482         U8 u[UTF8_MAXBYTES+1];
2483         Size_t len;
2484         UV ret;
2485         SV* utf8;
2486         AV * av;
2487     CODE:
2488         av = newAV();
2489         ret = toTITLE_uvchr(c, u, &len);
2490         av_push(av, newSVuv(ret));
2491
2492         utf8 = newSVpvn((char *) u, len);
2493         SvUTF8_on(utf8);
2494         av_push(av, utf8);
2495
2496         av_push(av, newSVuv(len));
2497         RETVAL = av;
2498     OUTPUT:
2499         RETVAL
2500
2501 AV *
2502 toUPPER_uvchr(c)
2503     UV c
2504     PREINIT:
2505         U8 u[UTF8_MAXBYTES+1];
2506         Size_t len;
2507         UV ret;
2508         SV* utf8;
2509         AV * av;
2510     CODE:
2511         av = newAV();
2512         ret = toUPPER_uvchr(c, u, &len);
2513         av_push(av, newSVuv(ret));
2514
2515         utf8 = newSVpvn((char *) u, len);
2516         SvUTF8_on(utf8);
2517         av_push(av, utf8);
2518
2519         av_push(av, newSVuv(len));
2520         RETVAL = av;
2521     OUTPUT:
2522         RETVAL
2523
2524 AV *
2525 toFOLD_uvchr(c)
2526     UV c
2527     PREINIT:
2528         U8 u[UTF8_MAXBYTES+1];
2529         Size_t len;
2530         UV ret;
2531         SV* utf8;
2532         AV * av;
2533     CODE:
2534         av = newAV();
2535         ret = toFOLD_uvchr(c, u, &len);
2536         av_push(av, newSVuv(ret));
2537
2538         utf8 = newSVpvn((char *) u, len);
2539         SvUTF8_on(utf8);
2540         av_push(av, utf8);
2541
2542         av_push(av, newSVuv(len));
2543         RETVAL = av;
2544     OUTPUT:
2545         RETVAL
2546
2547 #endif
2548
2549 UV
2550 LATIN1_TO_NATIVE(cp)
2551         UV cp
2552         CODE:
2553                 if (cp > 255) RETVAL= cp;
2554                 else RETVAL= LATIN1_TO_NATIVE(cp);
2555         OUTPUT:
2556                 RETVAL
2557
2558 UV
2559 NATIVE_TO_LATIN1(cp)
2560         UV cp
2561         CODE:
2562                 RETVAL= NATIVE_TO_LATIN1(cp);
2563         OUTPUT:
2564                 RETVAL
2565
2566 STRLEN
2567 av_tindex(av)
2568         SV *av
2569         CODE:
2570                 RETVAL = av_tindex((AV*)SvRV(av));
2571         OUTPUT:
2572                 RETVAL
2573
2574 STRLEN
2575 av_top_index(av)
2576         SV *av
2577         CODE:
2578                 RETVAL = av_top_index((AV*)SvRV(av));
2579         OUTPUT:
2580                 RETVAL
2581
2582 STRLEN
2583 av_count(av)
2584         SV *av
2585         CODE:
2586                 RETVAL = av_count((AV*)SvRV(av));
2587         OUTPUT:
2588                 RETVAL
2589
2590 =tests plan => 26827
2591
2592 use vars qw($my_sv @my_av %my_hv);
2593
2594 ok(&Devel::PPPort::boolSV(1), "Verify boolSV(1) is true");
2595 ok(!&Devel::PPPort::boolSV(0), "Verify boolSV(0) is false");
2596
2597 $_ = "Fred";
2598 is(&Devel::PPPort::DEFSV(), "Fred", '$_ is FRED; Verify DEFSV is FRED');
2599 is(&Devel::PPPort::UNDERBAR(), "Fred", 'And verify UNDERBAR is FRED');
2600
2601 if (ivers($]) >= ivers(5.9.2) && ivers($]) < ivers(5.23)) {
2602   eval q{
2603     no warnings "deprecated";
2604     no if $^V >= v5.17.9, warnings => "experimental::lexical_topic";
2605     my $_ = "Tony";
2606     is(&Devel::PPPort::DEFSV(), "Fred", 'lexical_topic eval: $_ is Tony; Verify DEFSV is Fred');
2607     is(&Devel::PPPort::UNDERBAR(), "Tony", 'And verify UNDERBAR is Tony');
2608   };
2609   die __FILE__ . __LINE__ . ": $@" if $@;
2610 }
2611 else {
2612   skip("perl version outside testing range of lexical_topic", 2);
2613 }
2614
2615 my @r = &Devel::PPPort::DEFSV_modify();
2616
2617 ok(@r == 3, "Verify got 3 elements");
2618 is($r[0], 'Fred');
2619 is($r[1], 'DEFSV');
2620 is($r[2], 'Fred');
2621
2622 is(&Devel::PPPort::DEFSV(), "Fred");
2623
2624 eval { 1 };
2625 ok(!&Devel::PPPort::ERRSV(), "Verify ERRSV on true is false");
2626 eval { cannot_call_this_one() };
2627 ok(&Devel::PPPort::ERRSV(), "Verify ERRSV on false is true");
2628
2629 ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0));
2630 ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0));
2631 ok(&Devel::PPPort::gv_stashpvn('does::not::exist', 1));
2632
2633 $my_sv = 1;
2634 ok(&Devel::PPPort::get_sv('my_sv', 0));
2635 ok(!&Devel::PPPort::get_sv('not_my_sv', 0));
2636 ok(&Devel::PPPort::get_sv('not_my_sv', 1));
2637
2638 @my_av = (1);
2639 ok(&Devel::PPPort::get_av('my_av', 0));
2640 ok(!&Devel::PPPort::get_av('not_my_av', 0));
2641 ok(&Devel::PPPort::get_av('not_my_av', 1));
2642
2643 %my_hv = (a=>1);
2644 ok(&Devel::PPPort::get_hv('my_hv', 0));
2645 ok(!&Devel::PPPort::get_hv('not_my_hv', 0));
2646 ok(&Devel::PPPort::get_hv('not_my_hv', 1));
2647
2648 sub my_cv { 1 };
2649 ok(&Devel::PPPort::get_cv('my_cv', 0));
2650 ok(!&Devel::PPPort::get_cv('not_my_cv', 0));
2651 ok(&Devel::PPPort::get_cv('not_my_cv', 1));
2652
2653 is(Devel::PPPort::dXSTARG(42), 43);
2654 is(Devel::PPPort::dAXMARK(4711), 4710);
2655
2656 is(Devel::PPPort::prepush(), 42);
2657
2658 is(join(':', Devel::PPPort::xsreturn(0)), 'test1');
2659 is(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2');
2660
2661 is(Devel::PPPort::PERL_ABS(42), 42, "Verify PERL_ABS(42) is 42");
2662 is(Devel::PPPort::PERL_ABS(-13), 13, "Verify PERL_ABS(-13) is 13");
2663
2664 is(Devel::PPPort::SVf(42), ivers($]) >= ivers(5.4) ? '[42]' : '42');
2665 is(Devel::PPPort::SVf('abc'), ivers($]) >= ivers(5.4) ? '[abc]' : 'abc');
2666
2667 is(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo");
2668
2669 is(&Devel::PPPort::ptrtests(), 63);
2670
2671 is(&Devel::PPPort::OpSIBLING_tests(), 0);
2672
2673 if (ivers($]) >= ivers(5.9)) {
2674   eval q{
2675     is(&Devel::PPPort::check_HeUTF8("hello"), "norm");
2676     is(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8");
2677   };
2678 } else {
2679   skip("Too early perl version", 2);
2680 }
2681
2682 @r = &Devel::PPPort::check_c_array();
2683 is($r[0], 4);
2684 is($r[1], "13");
2685
2686 ok(!Devel::PPPort::SvRXOK(""));
2687 ok(!Devel::PPPort::SvRXOK(bless [], "Regexp"));
2688
2689 if (ivers($]) < ivers(5.5)) {
2690         skip 'no qr// objects in this perl', 2;
2691 } else {
2692         my $qr = eval 'qr/./';
2693         ok(Devel::PPPort::SvRXOK($qr), "SVRXOK(qr) is true");
2694         ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise"));
2695 }
2696
2697 ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6);
2698 ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1);
2699 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41);
2700 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30);
2701
2702 ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6, "Verify LATIN1_TO_NATIVE(0xB6) is 0xB6");
2703 if (ord("A") == 65) {
2704     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41);
2705     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30);
2706 }
2707 else {
2708     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1);
2709     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0);
2710 }
2711
2712 ok(  Devel::PPPort::isALNUMC_L1(ord("5")));
2713 ok(  Devel::PPPort::isALNUMC_L1(0xFC));
2714 ok(! Devel::PPPort::isALNUMC_L1(0xB6));
2715
2716 ok(  Devel::PPPort::isOCTAL(ord("7")), "Verify '7' is OCTAL");
2717 ok(! Devel::PPPort::isOCTAL(ord("8")), "Verify '8' isn't OCTAL");
2718
2719 ok(  Devel::PPPort::isOCTAL_A(ord("0")), "Verify '0' is OCTAL_A");
2720 ok(! Devel::PPPort::isOCTAL_A(ord("9")), "Verify '9' isn't OCTAL_A");
2721
2722 ok(  Devel::PPPort::isOCTAL_L1(ord("2")), "Verify '2' is OCTAL_L1");
2723 ok(! Devel::PPPort::isOCTAL_L1(ord("8")), "Verify '8' isn't OCTAL_L1");
2724
2725 my $way_too_early_msg = 'UTF-8 not implemented on this perl';
2726
2727 # For the other properties, we test every code point from 0.255, and a
2728 # smattering of higher ones.  First populate a hash with keys like '65:ALPHA'
2729 # to indicate that the code point there is alphabetic
2730 my $i;
2731 my %types;
2732 for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6,
2733         0xF8..0x101)
2734 {
2735     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2736     $types{"$native:ALPHA"} = 1;
2737     $types{"$native:ALPHANUMERIC"} = 1;
2738     $types{"$native:IDFIRST"} = 1;
2739     $types{"$native:IDCONT"} = 1;
2740     $types{"$native:PRINT"} = 1;
2741     $types{"$native:WORDCHAR"} = 1;
2742 }
2743 for $i (0x30..0x39, 0x660, 0xFF19) {
2744     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2745     $types{"$native:ALPHANUMERIC"} = 1;
2746     $types{"$native:DIGIT"} = 1;
2747     $types{"$native:IDCONT"} = 1;
2748     $types{"$native:WORDCHAR"} = 1;
2749     $types{"$native:GRAPH"} = 1;
2750     $types{"$native:PRINT"} = 1;
2751     $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19);
2752 }
2753
2754 for $i (0..0x7F) {
2755     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2756     $types{"$native:ASCII"} = 1;
2757 }
2758 for $i (0..0x1f, 0x7F..0x9F) {
2759     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2760     $types{"$native:CNTRL"} = 1;
2761 }
2762 for $i (0x21..0x7E, 0xA1..0x101, 0x660) {
2763     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2764     $types{"$native:GRAPH"} = 1;
2765     $types{"$native:PRINT"} = 1;
2766 }
2767 for $i (0x09, 0x20, 0xA0) {
2768     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2769     $types{"$native:BLANK"} = 1;
2770     $types{"$native:SPACE"} = 1;
2771     $types{"$native:PSXSPC"} = 1;
2772     $types{"$native:PRINT"} = 1 if $i > 0x09;
2773 }
2774 for $i (0x09..0x0D, 0x85, 0x2029) {
2775     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2776     $types{"$native:SPACE"} = 1;
2777     $types{"$native:PSXSPC"} = 1;
2778 }
2779 for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) {
2780     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2781     $types{"$native:UPPER"} = 1;
2782     $types{"$native:XDIGIT"} = 1 if $i < 0x47;
2783 }
2784 for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) {
2785     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2786     $types{"$native:LOWER"} = 1;
2787     $types{"$native:XDIGIT"} = 1 if $i < 0x67;
2788 }
2789 for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB,
2790         0xB7, 0xBB, 0xBF, 0x5BE)
2791 {
2792     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2793     $types{"$native:PUNCT"} = 1;
2794     $types{"$native:GRAPH"} = 1;
2795     $types{"$native:PRINT"} = 1;
2796 }
2797
2798 $i = ord('_');
2799 $types{"$i:WORDCHAR"} = 1;
2800 $types{"$i:IDFIRST"} = 1;
2801 $types{"$i:IDCONT"} = 1;
2802
2803 # Now find all the unique code points included above.
2804 my %code_points_to_test;
2805 my $key;
2806 for $key (keys %types) {
2807     $key =~ s/:.*//;
2808     $code_points_to_test{$key} = 1;
2809 }
2810
2811 # And test each one
2812 for $i (sort { $a <=> $b } keys %code_points_to_test) {
2813     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2814     my $hex = sprintf("0x%02X", $native);
2815
2816     # And for each code point test each of the classes
2817     my $class;
2818     for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT
2819                    IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR
2820                    XDIGIT))
2821     {
2822         if ($i < 256) {  # For the ones that can fit in a byte, test each of
2823                          # three macros.
2824             my $suffix;
2825             for $suffix ("", "_A", "_L1", "_uvchr") {
2826                 my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/)
2827                                 ? 0     # Fail on non-ASCII unless unicode
2828                                 : ($types{"$native:$class"} || 0);
2829                 if (ivers($]) < ivers(5.6) && $suffix eq '_uvchr') {
2830                     skip("No UTF-8 on this perl", 1);
2831                     next;
2832                 }
2833
2834                 my $eval_string = "Devel::PPPort::is${class}$suffix($hex)";
2835                 local $SIG{__WARN__} = sub {};
2836                 my $is = eval $eval_string || 0;
2837                 die "eval 'For $i: $eval_string' gave $@" if $@;
2838                 is($is, $should_be, "'$eval_string'");
2839             }
2840         }
2841
2842         # For all code points, test the '_utf8' macros
2843         my $sub_fcn;
2844         for $sub_fcn ("", "_LC") {
2845             my $skip = "";
2846             if (ivers($]) < ivers(5.6)) {
2847                 $skip = $way_too_early_msg;
2848             }
2849             elsif (ivers($]) < ivers(5.7) && $native > 255) {
2850                 $skip = "Perls earlier than 5.7 give wrong answers for above Latin1 code points";
2851             }
2852             elsif (ivers($]) <= ivers(5.11.3) && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) {
2853                 $skip = "Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH";
2854             }
2855             elsif ($sub_fcn eq '_LC' && $i < 256) {
2856                 $skip = "Testing of code points whose results depend on locale is skipped ";
2857             }
2858             my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe";
2859             my $utf8;
2860
2861             if ($skip) {
2862                 skip $skip, 1;
2863             }
2864             else {
2865                 $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($native);
2866                 my $should_be = $types{"$native:$class"} || 0;
2867                 my $eval_string = "$fcn(\"$utf8\", 0)";
2868                 local $SIG{__WARN__} = sub {};
2869                 my $is = eval $eval_string || 0;
2870                 die "eval 'For $i, $eval_string' gave $@" if $@;
2871                 is($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string));
2872             }
2873
2874             # And for the high code points, test that a too short malformation (the
2875             # -1) causes it to fail
2876             if ($i > 255) {
2877                 if ($skip) {
2878                     skip $skip, 1;
2879                 }
2880                 elsif (ivers($]) >= ivers(5.25.9)) {
2881                     skip("Prints an annoying error message that khw doesn't know how to easily suppress", 1);
2882                 }
2883                 else {
2884                     my $eval_string = "$fcn(\"$utf8\", -1)";
2885                     local $SIG{__WARN__} = sub {};
2886                     my $is = eval "$eval_string" || 0;
2887                     die "eval '$eval_string' gave $@" if $@;
2888                     is($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string));
2889                 }
2890             }
2891         }
2892     }
2893 }
2894
2895 my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ],
2896                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2897                                      Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2898                                    [ 0x100, 0x101 ],
2899                                  ],
2900                       'FOLD'  => [ [ ord('C'), ord('c') ],
2901                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2902                                      Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2903                                    [ 0x104, 0x105 ],
2904                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2905                                      'ss' ],
2906                                  ],
2907                       'UPPER' => [ [ ord('a'), ord('A'),  ],
2908                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xE0),
2909                                      Devel::PPPort::LATIN1_TO_NATIVE(0xC0) ],
2910                                    [ 0x101, 0x100 ],
2911                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2912                                      'SS' ],
2913                                  ],
2914                       'TITLE' => [ [ ord('c'), ord('C'),  ],
2915                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xE2),
2916                                      Devel::PPPort::LATIN1_TO_NATIVE(0xC2) ],
2917                                    [ 0x103, 0x102 ],
2918                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2919                                      'Ss' ],
2920                                  ],
2921                     );
2922
2923 my $name;
2924 for $name (keys %case_changing) {
2925     my @code_points_to_test = @{$case_changing{$name}};
2926     my $unchanged;
2927     for $unchanged (@code_points_to_test) {
2928         my @pair = @$unchanged;
2929         my $original = $pair[0];
2930         my $changed = $pair[1];
2931         my $utf8_changed = $changed;
2932         my $is_cp = $utf8_changed =~ /^\d+$/;
2933         my $should_be_bytes;
2934         if (ivers($]) >= ivers(5.6)) {
2935             if ($is_cp) {
2936                 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2937                 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2938             }
2939             else {
2940                 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /[[:^ascii:]]/';
2941                 $should_be_bytes = length $utf8_changed;
2942             }
2943         }
2944
2945         my $fcn = "to${name}_uvchr";
2946         my $skip = "";
2947
2948         if (ivers($]) < ivers(5.6)) {
2949             $skip = $way_too_early_msg;
2950         }
2951         elsif (! $is_cp) {
2952             $skip = "Can't do uvchr on a multi-char string";
2953         }
2954         if ($skip) {
2955             skip $skip, 4;
2956         }
2957         else {
2958             if ($is_cp) {
2959                 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2960                 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2961             }
2962             else {
2963                 my $non_ascii_re = (ivers($]) >= ivers(5.6)) ? '[[:^ascii:]]' : '[^\x00-\x7F]';
2964                 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /$non_ascii_re/';
2965                 $should_be_bytes = length $utf8_changed;
2966             }
2967
2968             my $ret = eval "Devel::PPPort::$fcn($original)";
2969             my $fail = $@;  # Have to save $@, as it gets destroyed
2970             is ($fail, "", "$fcn($original) didn't fail");
2971             my $first = (ivers($]) != ivers(5.6))
2972                         ? substr($utf8_changed, 0, 1)
2973                         : $utf8_changed, 0, 1;
2974             is($ret->[0], ord $first,
2975                "ord of $fcn($original) is $changed");
2976             is($ret->[1], $utf8_changed,
2977                "UTF-8 of of $fcn($original) is correct");
2978             is($ret->[2], $should_be_bytes,
2979                "Length of $fcn($original) is $should_be_bytes");
2980         }
2981
2982         my $truncate;
2983         for $truncate (0..2) {
2984             my $skip;
2985             if (ivers($]) < ivers(5.6)) {
2986                 $skip = $way_too_early_msg;
2987             }
2988             elsif (! $is_cp && ivers($]) < ivers(5.7.3)) {
2989                 $skip = "Multi-character case change not implemented until 5.7.3";
2990             }
2991             elsif ($truncate == 2 && ivers($]) > ivers(5.25.8)) {
2992                 $skip = "Zero length inputs cause assertion failure; test dies in modern perls";
2993             }
2994             elsif ($truncate > 0 && length $changed > 1) {
2995                 $skip = "Don't test shortened multi-char case changes";
2996             }
2997             elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) {
2998                 $skip = "Don't try to test shortened single bytes";
2999             }
3000             if ($skip) {
3001                 skip $skip, 4;
3002             }
3003             else {
3004                 my $fcn = "to${name}_utf8_safe";
3005                 my $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($original);
3006                 my $real_truncate = ($truncate < 2)
3007                                     ? $truncate : $should_be_bytes;
3008                 my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)";
3009                 my $ret = eval "no warnings; $eval_string" || 0;
3010                 my $fail = $@;  # Have to save $@, as it gets destroyed
3011                 if ($truncate == 0) {
3012                     is ($fail, "", "Didn't fail on full length input");
3013                     my $first = (ivers($]) != ivers(5.6))
3014                                 ? substr($utf8_changed, 0, 1)
3015                                 : $utf8_changed, 0, 1;
3016                     is($ret->[0], ord $first,
3017                        "ord of $fcn($original) is $changed");
3018                     is($ret->[1], $utf8_changed,
3019                        "UTF-8 of of $fcn($original) is correct");
3020                     is($ret->[2], $should_be_bytes,
3021                     "Length of $fcn($original) is $should_be_bytes");
3022                 }
3023                 else {
3024                     is ($fail, eval 'qr/Malformed UTF-8 character/',
3025                         "Gave appropriate error for short char: $original");
3026                     skip("Expected failure means remaining tests for"
3027                        . " this aren't relevant", 3);
3028                 }
3029             }
3030         }
3031     }
3032 }
3033
3034 is(&Devel::PPPort::av_top_index([1,2,3]), 2);
3035 is(&Devel::PPPort::av_tindex([1,2,3,4]), 3);
3036 is(&Devel::PPPort::av_count([1,2,3,4]), 4);