This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Sync Devel::PPPort with CPAN version 3.63
[perl5.git] / dist / Devel-PPPort / parts / inc / misc
1 ################################################################################
2 ##
3 ##  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
4 ##  Version 2.x, Copyright (C) 2001, Paul Marquess.
5 ##  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
6 ##
7 ##  This program is free software; you can redistribute it and/or
8 ##  modify it under the same terms as Perl itself.
9 ##
10 ################################################################################
11
12 =provides
13
14 __UNDEFINED__
15 END_EXTERN_C
16 EXTERN_C
17 INT2PTR
18 MUTABLE_PTR
19 NVTYPE
20 PERLIO_FUNCS_CAST
21 PERLIO_FUNCS_DECL
22 PERL_UNUSED_ARG
23 PERL_UNUSED_CONTEXT
24 PERL_UNUSED_DECL
25 PERL_UNUSED_RESULT
26 PERL_UNUSED_VAR
27 PERL_USE_GCC_BRACE_GROUPS
28 PTR2ul
29 PTRV
30 START_EXTERN_C
31 STMT_END
32 STMT_START
33 SvRX
34 WIDEST_UTYPE
35 XSRETURN
36 NOT_REACHED
37 ASSUME
38
39 =implementation
40
41 __UNDEFINED__ cBOOL(cbool) ((cbool) ? (bool)1 : (bool)0)
42 __UNDEFINED__ OpHAS_SIBLING(o)      (cBOOL((o)->op_sibling))
43 __UNDEFINED__ OpSIBLING(o)          (0 + (o)->op_sibling)
44 __UNDEFINED__ OpMORESIB_set(o, sib) ((o)->op_sibling = (sib))
45 __UNDEFINED__ OpLASTSIB_set(o, parent) ((o)->op_sibling = NULL)
46 __UNDEFINED__ OpMAYBESIB_set(o, sib, parent) ((o)->op_sibling = (sib))
47 __UNDEFINED__ HEf_SVKEY   -2
48
49 #if defined(DEBUGGING) && !defined(__COVERITY__)
50 __UNDEFINED__ __ASSERT_(statement)  assert(statement),
51 #else
52 __UNDEFINED__ __ASSERT_(statement)
53 #endif
54
55 __UNDEFINED__  __has_builtin(x) 0
56
57 #if __has_builtin(__builtin_unreachable)
58 #  define D_PPP_HAS_BUILTIN_UNREACHABLE
59 #elif (defined(__GNUC__) && (   __GNUC__ > 4                              \
60                              || __GNUC__ == 4 && __GNUC_MINOR__ >= 5))
61 #  define D_PPP_HAS_BUILTIN_UNREACHABLE
62 #endif
63
64 #ifndef ASSUME
65 #  ifdef DEBUGGING
66 #    define ASSUME(x) assert(x)
67 #  elif defined(_MSC_VER)
68 #    define ASSUME(x) __assume(x)
69 #  elif defined(__ARMCC_VERSION)
70 #    define ASSUME(x) __promise(x)
71 #  elif defined(D_PPP_HAS_BUILTIN_UNREACHABLE)
72 #    define ASSUME(x) ((x) ? (void) 0 : __builtin_unreachable())
73 #  else
74 #    define ASSUME(x) assert(x)
75 #  endif
76 #endif
77
78 #ifndef NOT_REACHED
79 #  ifdef D_PPP_HAS_BUILTIN_UNREACHABLE
80 #    define NOT_REACHED                                                     \
81         STMT_START {                                                        \
82             ASSUME(!"UNREACHABLE"); __builtin_unreachable();                \
83         } STMT_END
84 #  elif ! defined(__GNUC__) && (defined(__sun) || defined(__hpux))
85 #    define NOT_REACHED
86 #  else
87 #    define NOT_REACHED  ASSUME(!"UNREACHABLE")
88 #  endif
89 #endif
90
91 #ifndef WIDEST_UTYPE
92 # ifdef QUADKIND
93 #  ifdef U64TYPE
94 #   define WIDEST_UTYPE U64TYPE
95 #  else
96 #   define WIDEST_UTYPE unsigned Quad_t
97 #  endif
98 # else
99 #  define WIDEST_UTYPE U32
100 # endif
101 #endif
102
103 /* These could become provided if/when they become part of the public API */
104 __UNDEF_NOT_PROVIDED__ withinCOUNT(c, l, n)                                    \
105    (((WIDEST_UTYPE) (((c)) - ((l) | 0))) <= (((WIDEST_UTYPE) ((n) | 0))))
106 __UNDEF_NOT_PROVIDED__ inRANGE(c, l, u)                                        \
107    (  (sizeof(c) == sizeof(U8))  ? withinCOUNT(((U8)  (c)), (l), ((u) - (l)))  \
108     : (sizeof(c) == sizeof(U32)) ? withinCOUNT(((U32) (c)), (l), ((u) - (l)))  \
109     : (withinCOUNT(((WIDEST_UTYPE) (c)), (l), ((u) - (l)))))
110
111 /* The '| 0' part ensures a compiler error if c is not integer (like e.g., a
112  * pointer) */
113 #undef FITS_IN_8_BITS   /* handy.h version uses a core-only constant */
114 __UNDEF_NOT_PROVIDED__ FITS_IN_8_BITS(c) (   (sizeof(c) == 1)               \
115                                     || !(((WIDEST_UTYPE)((c) | 0)) & ~0xFF))
116
117 /* Create the macro for "is'macro'_utf8_safe(s, e)".  For code points below
118  * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
119  * point.  That is so that it can automatically get the bug fixes done in this
120  * file. */
121 #define D_PPP_IS_GENERIC_UTF8_SAFE(s, e, macro)                             \
122    (((e) - (s)) <= 0                                                        \
123      ? 0                                                                    \
124      : UTF8_IS_INVARIANT((s)[0])                                            \
125        ? is ## macro ## _L1((s)[0])                                         \
126        : (((e) - (s)) < UTF8SKIP(s))                                        \
127           ? 0                                                               \
128           : UTF8_IS_DOWNGRADEABLE_START((s)[0])                             \
129               /* The cast in the line below is only to silence warnings */  \
130             ? is ## macro ## _L1((WIDEST_UTYPE) LATIN1_TO_NATIVE(           \
131                                   UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
132                                                      & UTF_START_MASK(2),   \
133                                                   (s)[1])))                 \
134             : is ## macro ## _utf8(s))
135
136 /* Create the macro for "is'macro'_LC_utf8_safe(s, e)".  For code points below
137  * 256, it calls the equivalent _L1 macro by converting the UTF-8 to code
138  * point.  That is so that it can automatically get the bug fixes done in this
139  * file. */
140 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, macro)                          \
141    (((e) - (s)) <= 0                                                        \
142      ? 0                                                                    \
143      : UTF8_IS_INVARIANT((s)[0])                                            \
144        ? is ## macro ## _LC((s)[0])                                         \
145        : (((e) - (s)) < UTF8SKIP(s))                                        \
146           ? 0                                                               \
147           : UTF8_IS_DOWNGRADEABLE_START((s)[0])                             \
148               /* The cast in the line below is only to silence warnings */  \
149             ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE(           \
150                                   UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
151                                                      & UTF_START_MASK(2),   \
152                                                   (s)[1])))                 \
153             : is ## macro ## _utf8(s))
154
155 /* A few of the early functions are broken.  For these and the non-LC case,
156  * machine generated code is substituted.  But that code doesn't work for
157  * locales.  This is just like the above macro, but at the end, we call the
158  * macro we've generated for the above 255 case, which is correct since locale
159  * isn't involved.  This will generate extra code to handle the 0-255 inputs,
160  * but hopefully it will be optimized out by the C compiler.  But just in case
161  * it isn't, this macro is only used on the few versions that are broken */
162
163 #define D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, macro)                   \
164    (((e) - (s)) <= 0                                                        \
165      ? 0                                                                    \
166      : UTF8_IS_INVARIANT((s)[0])                                            \
167        ? is ## macro ## _LC((s)[0])                                         \
168        : (((e) - (s)) < UTF8SKIP(s))                                        \
169           ? 0                                                               \
170           : UTF8_IS_DOWNGRADEABLE_START((s)[0])                             \
171               /* The cast in the line below is only to silence warnings */  \
172             ? is ## macro ## _LC((WIDEST_UTYPE) LATIN1_TO_NATIVE(           \
173                                   UTF8_ACCUMULATE(NATIVE_UTF8_TO_I8((s)[0]) \
174                                                      & UTF_START_MASK(2),   \
175                                                   (s)[1])))                 \
176             : is ## macro ## _utf8_safe(s, e))
177
178 __UNDEFINED__ SvRX(rv) (SvROK((rv)) ? (SvMAGICAL(SvRV((rv))) ? (mg_find(SvRV((rv)), PERL_MAGIC_qr) ? mg_find(SvRV((rv)), PERL_MAGIC_qr)->mg_obj : NULL) : NULL) : NULL)
179 __UNDEFINED__ SvRXOK(sv) (!!SvRX(sv))
180
181 #ifndef PERL_UNUSED_DECL
182 #  ifdef HASATTRIBUTE
183 #    if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
184 #      define PERL_UNUSED_DECL
185 #    else
186 #      define PERL_UNUSED_DECL __attribute__((unused))
187 #    endif
188 #  else
189 #    define PERL_UNUSED_DECL
190 #  endif
191 #endif
192
193 #ifndef PERL_UNUSED_ARG
194 #  if defined(lint) && defined(S_SPLINT_S) /* www.splint.org */
195 #    include <note.h>
196 #    define PERL_UNUSED_ARG(x) NOTE(ARGUNUSED(x))
197 #  else
198 #    define PERL_UNUSED_ARG(x) ((void)x)
199 #  endif
200 #endif
201
202 #ifndef PERL_UNUSED_VAR
203 #  define PERL_UNUSED_VAR(x) ((void)x)
204 #endif
205
206 #ifndef PERL_UNUSED_CONTEXT
207 #  ifdef USE_ITHREADS
208 #    define PERL_UNUSED_CONTEXT PERL_UNUSED_ARG(my_perl)
209 #  else
210 #    define PERL_UNUSED_CONTEXT
211 #  endif
212 #endif
213
214 #ifndef PERL_UNUSED_RESULT
215 #  if defined(__GNUC__) && defined(HASATTRIBUTE_WARN_UNUSED_RESULT)
216 #    define PERL_UNUSED_RESULT(v) STMT_START { __typeof__(v) z = (v); (void)sizeof(z); } STMT_END
217 #  else
218 #    define PERL_UNUSED_RESULT(v) ((void)(v))
219 #  endif
220 #endif
221
222 __UNDEFINED__  NOOP          /*EMPTY*/(void)0
223 __UNDEFINED__  dNOOP         extern int /*@unused@*/ Perl___notused PERL_UNUSED_DECL
224
225 #ifndef NVTYPE
226 #  if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
227 #    define NVTYPE long double
228 #  else
229 #    define NVTYPE double
230 #  endif
231 typedef NVTYPE NV;
232 #endif
233
234 #ifndef INT2PTR
235 #  if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE)
236 #    define PTRV                  UV
237 #    define INT2PTR(any,d)        (any)(d)
238 #  else
239 #    if PTRSIZE == LONGSIZE
240 #      define PTRV                unsigned long
241 #    else
242 #      define PTRV                unsigned
243 #    endif
244 #    define INT2PTR(any,d)        (any)(PTRV)(d)
245 #  endif
246 #endif
247
248 #ifndef PTR2ul
249 #  if PTRSIZE == LONGSIZE
250 #    define PTR2ul(p)     (unsigned long)(p)
251 #  else
252 #    define PTR2ul(p)     INT2PTR(unsigned long,p)
253 #  endif
254 #endif
255
256 __UNDEFINED__  PTR2nat(p)      (PTRV)(p)
257 __UNDEFINED__  NUM2PTR(any,d)  (any)PTR2nat(d)
258 __UNDEFINED__  PTR2IV(p)       INT2PTR(IV,p)
259 __UNDEFINED__  PTR2UV(p)       INT2PTR(UV,p)
260 __UNDEFINED__  PTR2NV(p)       NUM2PTR(NV,p)
261
262 #undef START_EXTERN_C
263 #undef END_EXTERN_C
264 #undef EXTERN_C
265 #ifdef __cplusplus
266 #  define START_EXTERN_C extern "C" {
267 #  define END_EXTERN_C }
268 #  define EXTERN_C extern "C"
269 #else
270 #  define START_EXTERN_C
271 #  define END_EXTERN_C
272 #  define EXTERN_C extern
273 #endif
274
275 #if { VERSION < 5.004 } || defined(PERL_GCC_PEDANTIC)
276 #  ifndef PERL_GCC_BRACE_GROUPS_FORBIDDEN
277 __UNDEF_NOT_PROVIDED__  PERL_GCC_BRACE_GROUPS_FORBIDDEN
278 #  endif
279 #endif
280
281 #if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) && !defined(__cplusplus)
282 #  ifndef PERL_USE_GCC_BRACE_GROUPS
283 #    define PERL_USE_GCC_BRACE_GROUPS
284 #  endif
285 #endif
286
287 #undef STMT_START
288 #undef STMT_END
289 #ifdef PERL_USE_GCC_BRACE_GROUPS
290 #  define STMT_START    (void)( /* gcc supports ``({ STATEMENTS; })'' */
291 #  define STMT_END      )
292 #else
293 #  if defined(VOIDFLAGS) && (VOIDFLAGS) && (defined(sun) || defined(__sun__)) && !defined(__GNUC__)
294 #    define STMT_START  if (1)
295 #    define STMT_END    else (void)0
296 #  else
297 #    define STMT_START  do
298 #    define STMT_END    while (0)
299 #  endif
300 #endif
301
302 __UNDEFINED__  boolSV(b)    ((b) ? &PL_sv_yes : &PL_sv_no)
303
304 /* DEFSV appears first in 5.004_56 */
305 __UNDEFINED__  DEFSV        GvSV(PL_defgv)
306 __UNDEFINED__  SAVE_DEFSV   SAVESPTR(GvSV(PL_defgv))
307 __UNDEFINED__  DEFSV_set(sv) (DEFSV = (sv))
308
309 /* Older perls (<=5.003) lack AvFILLp */
310 __UNDEFINED__  AvFILLp      AvFILL
311
312 __UNDEFINED__  av_tindex    AvFILL
313 __UNDEFINED__  av_top_index AvFILL
314 __UNDEFINED__  av_count(av) (AvFILL(av)+1)
315
316 __UNDEFINED__  ERRSV        get_sv("@",FALSE)
317
318 /* Hint: gv_stashpvn
319  * This function's backport doesn't support the length parameter, but
320  * rather ignores it. Portability can only be ensured if the length
321  * parameter is used for speed reasons, but the length can always be
322  * correctly computed from the string argument.
323  */
324
325 __UNDEFINED__  gv_stashpvn(str,len,create)  gv_stashpv(str,create)
326
327 /* Replace: 1 */
328 __UNDEFINED__  get_cv          perl_get_cv
329 __UNDEFINED__  get_sv          perl_get_sv
330 __UNDEFINED__  get_av          perl_get_av
331 __UNDEFINED__  get_hv          perl_get_hv
332 /* Replace: 0 */
333
334 __UNDEFINED__  dUNDERBAR       dNOOP
335 __UNDEFINED__  UNDERBAR        DEFSV
336
337 __UNDEFINED__  dAX             I32 ax = MARK - PL_stack_base + 1
338 __UNDEFINED__  dITEMS          I32 items = SP - MARK
339
340 __UNDEFINED__  dXSTARG         SV * targ = sv_newmortal()
341
342 __UNDEFINED__  dAXMARK         I32 ax = POPMARK; \
343                                SV ** const mark = PL_stack_base + ax++
344
345
346 __UNDEFINED__  XSprePUSH       (sp = PL_stack_base + ax - 1)
347
348 #if { VERSION < 5.005 }
349 #  undef XSRETURN
350 #  define XSRETURN(off)                                   \
351       STMT_START {                                        \
352           PL_stack_sp = PL_stack_base + ax + ((off) - 1); \
353           return;                                         \
354       } STMT_END
355 #endif
356
357 __UNDEFINED__  XSPROTO(name)   void name(pTHX_ CV* cv)
358 __UNDEFINED__  SVfARG(p)       ((void*)(p))
359
360 __UNDEFINED__  PERL_ABS(x)     ((x) < 0 ? -(x) : (x))
361
362 __UNDEFINED__  dVAR            dNOOP
363
364 __UNDEFINED__  SVf             "_"
365
366 __UNDEFINED__  CPERLscope(x)   x
367
368 __UNDEFINED__  PERL_HASH(hash,str,len) \
369      STMT_START { \
370         const char *s_PeRlHaSh = str; \
371         I32 i_PeRlHaSh = len; \
372         U32 hash_PeRlHaSh = 0; \
373         while (i_PeRlHaSh--) \
374             hash_PeRlHaSh = hash_PeRlHaSh * 33 + *s_PeRlHaSh++; \
375         (hash) = hash_PeRlHaSh; \
376     } STMT_END
377
378 #ifndef PERLIO_FUNCS_DECL
379 # ifdef PERLIO_FUNCS_CONST
380 #  define PERLIO_FUNCS_DECL(funcs) const PerlIO_funcs funcs
381 #  define PERLIO_FUNCS_CAST(funcs) (PerlIO_funcs*)(funcs)
382 # else
383 #  define PERLIO_FUNCS_DECL(funcs) PerlIO_funcs funcs
384 #  define PERLIO_FUNCS_CAST(funcs) (funcs)
385 # endif
386 #endif
387
388 /* provide these typedefs for older perls */
389 #if { VERSION < 5.9.3 }
390
391 # ifdef ARGSproto
392 typedef OP* (CPERLscope(*Perl_ppaddr_t))(ARGSproto);
393 # else
394 typedef OP* (CPERLscope(*Perl_ppaddr_t))(pTHX);
395 # endif
396
397 typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*);
398
399 #endif
400
401 /* On versions without NATIVE_TO_ASCII, only ASCII is supported */
402 #if defined(EBCDIC) && defined(NATIVE_TO_ASCI)
403 __UNDEFINED__ NATIVE_TO_LATIN1(c) NATIVE_TO_ASCII(c)
404 __UNDEFINED__ LATIN1_TO_NATIVE(c) ASCII_TO_NATIVE(c)
405 __UNDEFINED__ NATIVE_TO_UNI(c) ((c) > 255 ? (c) : NATIVE_TO_LATIN1(c))
406 __UNDEFINED__ UNI_TO_NATIVE(c) ((c) > 255 ? (c) : LATIN1_TO_NATIVE(c))
407 #else
408 __UNDEFINED__ NATIVE_TO_LATIN1(c) (c)
409 __UNDEFINED__ LATIN1_TO_NATIVE(c) (c)
410 __UNDEFINED__ NATIVE_TO_UNI(c) (c)
411 __UNDEFINED__ UNI_TO_NATIVE(c) (c)
412 #endif
413
414 /* Warning: LATIN1_TO_NATIVE, NATIVE_TO_LATIN1 NATIVE_TO_UNI UNI_TO_NATIVE
415    EBCDIC is not supported on versions earlier than 5.7.1
416  */
417
418 /* The meaning of this changed; use the modern version */
419 #undef isPSXSPC
420 #undef isPSXSPC_A
421 #undef isPSXSPC_L1
422
423 /* Hint: isPSXSPC, isPSXSPC_A, isPSXSPC_L1, isPSXSPC_utf8_safe
424     This is equivalent to the corresponding isSPACE-type macro.  On perls
425     before 5.18, this matched a vertical tab and SPACE didn't.  But the
426     ppport.h SPACE version does match VT in all perl releases.  Since VT's are
427     extremely rarely found in real-life files, this difference effectively
428     doesn't matter */
429
430 /* Hint: isSPACE, isSPACE_A, isSPACE_L1, isSPACE_utf8_safe
431     Until Perl 5.18, this did not match the vertical tab (VT).  The ppport.h
432     version does match it in all perl releases. Since VT's are extremely rarely
433     found in real-life files, this difference effectively doesn't matter */
434
435 #ifdef EBCDIC
436
437 /* This is the first version where these macros are fully correct on EBCDIC
438  * platforms.  Relying on the C library functions, as earlier releases did,
439  * causes problems with locales */
440 # if { VERSION < 5.22.0 }
441 #  undef isALNUM
442 #  undef isALNUM_A
443 #  undef isALNUM_L1
444 #  undef isALNUMC
445 #  undef isALNUMC_A
446 #  undef isALNUMC_L1
447 #  undef isALPHA
448 #  undef isALPHA_A
449 #  undef isALPHA_L1
450 #  undef isALPHANUMERIC
451 #  undef isALPHANUMERIC_A
452 #  undef isALPHANUMERIC_L1
453 #  undef isASCII
454 #  undef isASCII_A
455 #  undef isASCII_L1
456 #  undef isBLANK
457 #  undef isBLANK_A
458 #  undef isBLANK_L1
459 #  undef isCNTRL
460 #  undef isCNTRL_A
461 #  undef isCNTRL_L1
462 #  undef isDIGIT
463 #  undef isDIGIT_A
464 #  undef isDIGIT_L1
465 #  undef isGRAPH
466 #  undef isGRAPH_A
467 #  undef isGRAPH_L1
468 #  undef isIDCONT
469 #  undef isIDCONT_A
470 #  undef isIDCONT_L1
471 #  undef isIDFIRST
472 #  undef isIDFIRST_A
473 #  undef isIDFIRST_L1
474 #  undef isLOWER
475 #  undef isLOWER_A
476 #  undef isLOWER_L1
477 #  undef isOCTAL
478 #  undef isOCTAL_A
479 #  undef isOCTAL_L1
480 #  undef isPRINT
481 #  undef isPRINT_A
482 #  undef isPRINT_L1
483 #  undef isPUNCT
484 #  undef isPUNCT_A
485 #  undef isPUNCT_L1
486 #  undef isSPACE
487 #  undef isSPACE_A
488 #  undef isSPACE_L1
489 #  undef isUPPER
490 #  undef isUPPER_A
491 #  undef isUPPER_L1
492 #  undef isWORDCHAR
493 #  undef isWORDCHAR_A
494 #  undef isWORDCHAR_L1
495 #  undef isXDIGIT
496 #  undef isXDIGIT_A
497 #  undef isXDIGIT_L1
498 # endif
499
500 __UNDEFINED__ isASCII(c)    (isCNTRL(c) || isPRINT(c))
501
502         /* The below is accurate for all EBCDIC code pages supported by
503          * all the versions of Perl overridden by this */
504 __UNDEFINED__ isCNTRL(c)    (    (c) == '\0' || (c) == '\a' || (c) == '\b'      \
505                              ||  (c) == '\f' || (c) == '\n' || (c) == '\r'      \
506                              ||  (c) == '\t' || (c) == '\v'                     \
507                              || ((c) <= 3 && (c) >= 1) /* SOH, STX, ETX */      \
508                              ||  (c) == 7    /* U+7F DEL */                     \
509                              || ((c) <= 0x13 && (c) >= 0x0E) /* SO, SI */       \
510                                                       /* DLE, DC[1-3] */        \
511                              ||  (c) == 0x18 /* U+18 CAN */                     \
512                              ||  (c) == 0x19 /* U+19 EOM */                     \
513                              || ((c) <= 0x1F && (c) >= 0x1C) /* [FGRU]S */      \
514                              ||  (c) == 0x26 /* U+17 ETB */                     \
515                              ||  (c) == 0x27 /* U+1B ESC */                     \
516                              ||  (c) == 0x2D /* U+05 ENQ */                     \
517                              ||  (c) == 0x2E /* U+06 ACK */                     \
518                              ||  (c) == 0x32 /* U+16 SYN */                     \
519                              ||  (c) == 0x37 /* U+04 EOT */                     \
520                              ||  (c) == 0x3C /* U+14 DC4 */                     \
521                              ||  (c) == 0x3D /* U+15 NAK */                     \
522                              ||  (c) == 0x3F /* U+1A SUB */                     \
523                             )
524
525 #if '^' == 106    /* EBCDIC POSIX-BC */
526 #  define D_PPP_OUTLIER_CONTROL 0x5F
527 #else   /* EBCDIC 1047 037 */
528 #  define D_PPP_OUTLIER_CONTROL 0xFF
529 #endif
530
531 /* The controls are everything below blank, plus one outlier */
532 __UNDEFINED__ isCNTRL_L1(c) ((WIDEST_UTYPE) (c) < ' '                           \
533                           || (WIDEST_UTYPE) (c) == D_PPP_OUTLIER_CONTROL)
534 /* The ordering of the tests in this and isUPPER are to exclude most characters
535  * early */
536 __UNDEFINED__ isLOWER(c)    (        (c) >= 'a' && (c) <= 'z'                   \
537                              &&  (   (c) <= 'i'                                 \
538                                  || ((c) >= 'j' && (c) <= 'r')                  \
539                                  ||  (c) >= 's'))
540 __UNDEFINED__ isUPPER(c)    (        (c) >= 'A' && (c) <= 'Z'                   \
541                              && (    (c) <= 'I'                                 \
542                                  || ((c) >= 'J' && (c) <= 'R')                  \
543                                  ||  (c) >= 'S'))
544
545 #else   /* Above is EBCDIC; below is ASCII */
546
547 # if { VERSION < 5.4.0 }
548 /* The implementation of these in older perl versions can give wrong results if
549  * the C program locale is set to other than the C locale */
550 #  undef isALNUM
551 #  undef isALNUM_A
552 #  undef isALPHA
553 #  undef isALPHA_A
554 #  undef isDIGIT
555 #  undef isDIGIT_A
556 #  undef isIDFIRST
557 #  undef isIDFIRST_A
558 #  undef isLOWER
559 #  undef isLOWER_A
560 #  undef isUPPER
561 #  undef isUPPER_A
562 # endif
563
564 #  if { VERSION == 5.7.0 } /* this perl made space GRAPH */
565 #    undef isGRAPH
566 #  endif
567
568 # if { VERSION < 5.8.0 } /* earlier perls omitted DEL */
569 #  undef isCNTRL
570 # endif
571
572 # if { VERSION < 5.10.0 }
573 /* earlier perls included all of the isSPACE() characters, which is wrong. The
574  * version provided by Devel::PPPort always overrides an existing buggy
575  * version. */
576 #  undef isPRINT
577 #  undef isPRINT_A
578 # endif
579
580 # if { VERSION < 5.14.0 }
581 /* earlier perls always returned true if the parameter was a signed char */
582 #  undef isASCII
583 #  undef isASCII_A
584 # endif
585
586 # if { VERSION < 5.17.8 } /* earlier perls didn't include PILCROW, SECTION SIGN */
587 #  undef isPUNCT_L1
588 # endif
589
590 # if { VERSION < 5.13.7 } /* khw didn't investigate why this failed */
591 #  undef isALNUMC_L1
592 #endif
593
594 # if { VERSION < 5.20.0 } /* earlier perls didn't include \v */
595 #  undef isSPACE
596 #  undef isSPACE_A
597 #  undef isSPACE_L1
598
599 # endif
600
601 __UNDEFINED__ isASCII(c)        ((WIDEST_UTYPE) (c) <= 127)
602 __UNDEFINED__ isCNTRL(c)        ((WIDEST_UTYPE) (c) < ' ' || (c) == 127)
603 __UNDEFINED__ isCNTRL_L1(c)     (   (WIDEST_UTYPE) (c) < ' '                \
604                                  || inRANGE((c), 0x7F, 0x9F))
605 __UNDEFINED__ isLOWER(c)        inRANGE((c), 'a', 'z')
606 __UNDEFINED__ isUPPER(c)        inRANGE((c), 'A', 'Z')
607
608 #endif /* Below are definitions common to EBCDIC and ASCII */
609
610 __UNDEFINED__ isASCII_L1(c)     isASCII(c)
611 __UNDEFINED__ isASCII_LC(c)     isASCII(c)
612 __UNDEFINED__ isALNUM(c)        isWORDCHAR(c)
613 __UNDEFINED__ isALNUMC(c)       isALPHANUMERIC(c)
614 __UNDEFINED__ isALNUMC_L1(c)    isALPHANUMERIC_L1(c)
615 __UNDEFINED__ isALPHA(c)        (isUPPER(c) || isLOWER(c))
616 __UNDEFINED__ isALPHA_L1(c)     (isUPPER_L1(c) || isLOWER_L1(c))
617 __UNDEFINED__ isALPHANUMERIC(c) (isALPHA(c) || isDIGIT(c))
618 __UNDEFINED__ isALPHANUMERIC_L1(c) (isALPHA_L1(c) || isDIGIT(c))
619 __UNDEFINED__ isALPHANUMERIC_LC(c) (isALPHA_LC(c) || isDIGIT_LC(c))
620 __UNDEFINED__ isBLANK(c)        ((c) == ' ' || (c) == '\t')
621 __UNDEFINED__ isBLANK_L1(c) (    isBLANK(c)                                    \
622                              || (   FITS_IN_8_BITS(c)                          \
623                                  && NATIVE_TO_LATIN1((U8) c) == 0xA0))
624 __UNDEFINED__ isBLANK_LC(c)     isBLANK(c)
625 __UNDEFINED__ isDIGIT(c)        inRANGE(c, '0', '9')
626 __UNDEFINED__ isDIGIT_L1(c)     isDIGIT(c)
627 __UNDEFINED__ isGRAPH(c)        (isWORDCHAR(c) || isPUNCT(c))
628 __UNDEFINED__ isGRAPH_L1(c)     (   isPRINT_L1(c)                              \
629                                  && (c) != ' '                                 \
630                                  && NATIVE_TO_LATIN1((U8) c) != 0xA0)
631 __UNDEFINED__ isIDCONT(c)       isWORDCHAR(c)
632 __UNDEFINED__ isIDCONT_L1(c)    isWORDCHAR_L1(c)
633 __UNDEFINED__ isIDCONT_LC(c)    isWORDCHAR_LC(c)
634 __UNDEFINED__ isIDFIRST(c)      (isALPHA(c) || (c) == '_')
635 __UNDEFINED__ isIDFIRST_L1(c)   (isALPHA_L1(c) || (U8) (c) == '_')
636 __UNDEFINED__ isIDFIRST_LC(c)   (isALPHA_LC(c) || (U8) (c) == '_')
637 __UNDEFINED__ isLOWER_L1(c) (    isLOWER(c)                                    \
638                              || (   FITS_IN_8_BITS(c)                          \
639                                  && (  (   NATIVE_TO_LATIN1((U8) c) >= 0xDF    \
640                                         && NATIVE_TO_LATIN1((U8) c) != 0xF7)   \
641                                      || NATIVE_TO_LATIN1((U8) c) == 0xAA       \
642                                      || NATIVE_TO_LATIN1((U8) c) == 0xBA       \
643                                      || NATIVE_TO_LATIN1((U8) c) == 0xB5)))
644 __UNDEFINED__ isOCTAL(c)        (((WIDEST_UTYPE)((c)) & ~7) == '0')
645 __UNDEFINED__ isOCTAL_L1(c)     isOCTAL(c)
646 __UNDEFINED__ isPRINT(c)        (isGRAPH(c) || (c) == ' ')
647 __UNDEFINED__ isPRINT_L1(c)     (FITS_IN_8_BITS(c) && ! isCNTRL_L1(c))
648 __UNDEFINED__ isPSXSPC(c)       isSPACE(c)
649 __UNDEFINED__ isPSXSPC_L1(c)    isSPACE_L1(c)
650 __UNDEFINED__ isPUNCT(c)    (   (c) == '-' || (c) == '!' || (c) == '"'         \
651                              || (c) == '#' || (c) == '$' || (c) == '%'         \
652                              || (c) == '&' || (c) == '\'' || (c) == '('        \
653                              || (c) == ')' || (c) == '*' || (c) == '+'         \
654                              || (c) == ',' || (c) == '.' || (c) == '/'         \
655                              || (c) == ':' || (c) == ';' || (c) == '<'         \
656                              || (c) == '=' || (c) == '>' || (c) == '?'         \
657                              || (c) == '@' || (c) == '[' || (c) == '\\'        \
658                              || (c) == ']' || (c) == '^' || (c) == '_'         \
659                              || (c) == '`' || (c) == '{' || (c) == '|'         \
660                              || (c) == '}' || (c) == '~')
661 __UNDEFINED__ isPUNCT_L1(c)  (    isPUNCT(c)                                   \
662                               || (  FITS_IN_8_BITS(c)                          \
663                                   && (   NATIVE_TO_LATIN1((U8) c) == 0xA1      \
664                                       || NATIVE_TO_LATIN1((U8) c) == 0xA7      \
665                                       || NATIVE_TO_LATIN1((U8) c) == 0xAB      \
666                                       || NATIVE_TO_LATIN1((U8) c) == 0xB6      \
667                                       || NATIVE_TO_LATIN1((U8) c) == 0xB7      \
668                                       || NATIVE_TO_LATIN1((U8) c) == 0xBB      \
669                                       || NATIVE_TO_LATIN1((U8) c) == 0xBF)))
670 __UNDEFINED__ isSPACE(c)        (   isBLANK(c) || (c) == '\n' || (c) == '\r'   \
671                                  || (c) == '\v' || (c) == '\f')
672 __UNDEFINED__ isSPACE_L1(c) (    isSPACE(c)                                    \
673                              || (FITS_IN_8_BITS(c)                             \
674                                  && (   NATIVE_TO_LATIN1((U8) c) == 0x85       \
675                                      || NATIVE_TO_LATIN1((U8) c) == 0xA0)))
676 __UNDEFINED__ isUPPER_L1(c) (   isUPPER(c)                                     \
677                              || (FITS_IN_8_BITS(c)                             \
678                                  && (   NATIVE_TO_LATIN1((U8) c) >= 0xC0       \
679                                      && NATIVE_TO_LATIN1((U8) c) <= 0xDE       \
680                                      && NATIVE_TO_LATIN1((U8) c) != 0xD7)))
681 __UNDEFINED__ isWORDCHAR(c)     (isALPHANUMERIC(c) || (c) == '_')
682 __UNDEFINED__ isWORDCHAR_L1(c)  (isIDFIRST_L1(c) || isDIGIT(c))
683 __UNDEFINED__ isWORDCHAR_LC(c)  (isIDFIRST_LC(c) || isDIGIT_LC(c))
684 __UNDEFINED__ isXDIGIT(c)       (   isDIGIT(c)                                 \
685                                  || inRANGE((c), 'a', 'f')                     \
686                                  || inRANGE((c), 'A', 'F'))
687 __UNDEFINED__ isXDIGIT_L1(c)    isXDIGIT(c)
688 __UNDEFINED__ isXDIGIT_LC(c)    isxdigit(c)
689
690 __UNDEFINED__ isALNUM_A(c)         isALNUM(c)
691 __UNDEFINED__ isALNUMC_A(c)        isALNUMC(c)
692 __UNDEFINED__ isALPHA_A(c)         isALPHA(c)
693 __UNDEFINED__ isALPHANUMERIC_A(c)  isALPHANUMERIC(c)
694 __UNDEFINED__ isASCII_A(c)         isASCII(c)
695 __UNDEFINED__ isBLANK_A(c)         isBLANK(c)
696 __UNDEFINED__ isCNTRL_A(c)         isCNTRL(c)
697 __UNDEFINED__ isDIGIT_A(c)         isDIGIT(c)
698 __UNDEFINED__ isGRAPH_A(c)         isGRAPH(c)
699 __UNDEFINED__ isIDCONT_A(c)        isIDCONT(c)
700 __UNDEFINED__ isIDFIRST_A(c)       isIDFIRST(c)
701 __UNDEFINED__ isLOWER_A(c)         isLOWER(c)
702 __UNDEFINED__ isOCTAL_A(c)         isOCTAL(c)
703 __UNDEFINED__ isPRINT_A(c)         isPRINT(c)
704 __UNDEFINED__ isPSXSPC_A(c)        isPSXSPC(c)
705 __UNDEFINED__ isPUNCT_A(c)         isPUNCT(c)
706 __UNDEFINED__ isSPACE_A(c)         isSPACE(c)
707 __UNDEFINED__ isUPPER_A(c)         isUPPER(c)
708 __UNDEFINED__ isWORDCHAR_A(c)      isWORDCHAR(c)
709 __UNDEFINED__ isXDIGIT_A(c)        isXDIGIT(c)
710
711 __UNDEFINED__ isASCII_utf8_safe(s,e)  (((e) - (s)) <= 0 ? 0 : isASCII(*(s)))
712 __UNDEFINED__ isASCII_uvchr(c)    (FITS_IN_8_BITS(c) ? isASCII_L1(c) : 0)
713
714 #if { VERSION >= 5.006 }
715 #  ifdef isALPHA_uni    /* If one defined, all are; this is just an exemplar */
716 #    define D_PPP_is_ctype(upper, lower, c)                                 \
717         (FITS_IN_8_BITS(c)                                                  \
718         ? is ## upper ## _L1(c)                                             \
719         : is ## upper ## _uni((UV) (c)))    /* _uni is old synonym */
720 #  else
721 #    define D_PPP_is_ctype(upper, lower, c)                                 \
722         (FITS_IN_8_BITS(c)                                                  \
723         ? is ## upper ## _L1(c)                                             \
724         : is_uni_ ## lower((UV) (c)))     /* is_uni_ is even older */
725 #  endif
726
727 __UNDEFINED__ isALPHA_uvchr(c)    D_PPP_is_ctype(ALPHA, alpha, c)
728 __UNDEFINED__ isALPHANUMERIC_uvchr(c) (isALPHA_uvchr(c) || isDIGIT_uvchr(c))
729 #  ifdef is_uni_blank
730 __UNDEFINED__ isBLANK_uvchr(c)    D_PPP_is_ctype(BLANK, blank, c)
731 #  else
732 __UNDEFINED__ isBLANK_uvchr(c)  (FITS_IN_8_BITS(c)                          \
733                                  ? isBLANK_L1(c)                            \
734                                  : (   (UV) (c) == 0x1680 /* Unicode 3.0 */ \
735                                     || inRANGE((UV) (c), 0x2000, 0x200A)    \
736                                     || (UV) (c) == 0x202F  /* Unicode 3.0 */\
737                                     || (UV) (c) == 0x205F  /* Unicode 3.2 */\
738                                     || (UV) (c) == 0x3000))
739 #  endif
740 __UNDEFINED__ isCNTRL_uvchr(c)    D_PPP_is_ctype(CNTRL, cntrl, c)
741 __UNDEFINED__ isDIGIT_uvchr(c)    D_PPP_is_ctype(DIGIT, digit, c)
742 __UNDEFINED__ isGRAPH_uvchr(c)    D_PPP_is_ctype(GRAPH, graph, c)
743 __UNDEFINED__ isIDCONT_uvchr(c)   isWORDCHAR_uvchr(c)
744 __UNDEFINED__ isIDFIRST_uvchr(c)  D_PPP_is_ctype(IDFIRST, idfirst, c)
745 __UNDEFINED__ isLOWER_uvchr(c)    D_PPP_is_ctype(LOWER, lower, c)
746 __UNDEFINED__ isPRINT_uvchr(c)    D_PPP_is_ctype(PRINT, print, c)
747 __UNDEFINED__ isPSXSPC_uvchr(c)   isSPACE_uvchr(c)
748 __UNDEFINED__ isPUNCT_uvchr(c)    D_PPP_is_ctype(PUNCT, punct, c)
749 __UNDEFINED__ isSPACE_uvchr(c)    D_PPP_is_ctype(SPACE, space, c)
750 __UNDEFINED__ isUPPER_uvchr(c)    D_PPP_is_ctype(UPPER, upper, c)
751 __UNDEFINED__ isXDIGIT_uvchr(c)   D_PPP_is_ctype(XDIGIT, xdigit, c)
752 __UNDEFINED__ isWORDCHAR_uvchr(c) (FITS_IN_8_BITS(c)                        \
753                                ? isWORDCHAR_L1(c) : isALPHANUMERIC_uvchr(c))
754
755 __UNDEFINED__ isALPHA_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHA)
756 #  ifdef isALPHANUMERIC_utf8
757 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e)                                 \
758                                 D_PPP_IS_GENERIC_UTF8_SAFE(s, e, ALPHANUMERIC)
759 #  else
760 __UNDEFINED__ isALPHANUMERIC_utf8_safe(s,e)                                 \
761                         (isALPHA_utf8_safe(s,e) || isDIGIT_utf8_safe(s,e))
762 #  endif
763
764 /* This was broken before 5.18, and just use this instead of worrying about
765  * which releases the official works on */
766 #  if 'A' == 65
767 __UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
768 ( ( LIKELY((e) > (s)) ) ?   /* Machine generated */                         \
769     ( ( 0x09 == ((const U8*)s)[0] || 0x20 == ((const U8*)s)[0] ) ? 1        \
770     : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
771             ( ( 0xC2 == ((const U8*)s)[0] ) ?                               \
772                 ( ( 0xA0 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
773             : ( 0xE1 == ((const U8*)s)[0] ) ?                               \
774                 ( ( ( 0x9A == ((const U8*)s)[1] ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
775             : ( 0xE2 == ((const U8*)s)[0] ) ?                               \
776                 ( ( 0x80 == ((const U8*)s)[1] ) ?                           \
777                     ( ( inRANGE(((const U8*)s)[2], 0x80, 0x8A ) || 0xAF == ((const U8*)s)[2] ) ? 3 : 0 )\
778                 : ( ( 0x81 == ((const U8*)s)[1] ) && ( 0x9F == ((const U8*)s)[2] ) ) ? 3 : 0 )\
779             : ( ( ( 0xE3 == ((const U8*)s)[0] ) && ( 0x80 == ((const U8*)s)[1] ) ) && ( 0x80 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
780         : 0 )                                                               \
781  : 0 )
782
783 #  elif 'A' == 193  && '^' == 95 /* EBCDIC 1047 */
784
785 __UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
786 ( ( LIKELY((e) > (s)) ) ?                                                   \
787     ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1        \
788     : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
789             ( ( 0x80 == ((const U8*)s)[0] ) ?                               \
790                 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
791             : ( 0xBC == ((const U8*)s)[0] ) ?                               \
792                 ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
793             : ( 0xCA == ((const U8*)s)[0] ) ?                               \
794                 ( ( 0x41 == ((const U8*)s)[1] ) ?                           \
795                     ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
796                 : ( 0x42 == ((const U8*)s)[1] ) ?                           \
797                     ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )               \
798                 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
799             : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
800         : 0 )                                                               \
801 : 0 )
802
803 #  elif 'A' == 193  && '^' == 176 /* EBCDIC 037 */
804
805 __UNDEFINED__  isBLANK_utf8_safe(s,e)                                       \
806 ( ( LIKELY((e) > (s)) ) ?                                                   \
807     ( ( 0x05 == ((const U8*)s)[0] || 0x40 == ((const U8*)s)[0] ) ? 1        \
808     : ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ?                              \
809             ( ( 0x78 == ((const U8*)s)[0] ) ?                               \
810                 ( ( 0x41 == ((const U8*)s)[1] ) ? 2 : 0 )                   \
811             : ( 0xBD == ((const U8*)s)[0] ) ?                               \
812                 ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
813             : ( 0xCA == ((const U8*)s)[0] ) ?                               \
814                 ( ( 0x41 == ((const U8*)s)[1] ) ?                           \
815                     ( ( inRANGE(((const U8*)s)[2], 0x41, 0x4A ) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
816                 : ( 0x42 == ((const U8*)s)[1] ) ?                           \
817                     ( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )               \
818                 : ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
819             : ( ( ( 0xCE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] ) ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
820         : 0 )                                                               \
821 : 0 )
822
823 #  else
824 #    error Unknown character set
825 #  endif
826
827 __UNDEFINED__ isCNTRL_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, CNTRL)
828 __UNDEFINED__ isDIGIT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, DIGIT)
829 __UNDEFINED__ isGRAPH_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, GRAPH)
830 #  ifdef isIDCONT_utf8
831 __UNDEFINED__ isIDCONT_utf8_safe(s,e)   D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDCONT)
832 #  else
833 __UNDEFINED__ isIDCONT_utf8_safe(s,e)   isWORDCHAR_utf8_safe(s,e)
834 #  endif
835
836 __UNDEFINED__ isIDFIRST_utf8_safe(s,e)  D_PPP_IS_GENERIC_UTF8_SAFE(s, e, IDFIRST)
837 __UNDEFINED__ isLOWER_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, LOWER)
838 __UNDEFINED__ isPRINT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PRINT)
839
840 #  undef isPSXSPC_utf8_safe   /* Use the modern definition */
841 __UNDEFINED__ isPSXSPC_utf8_safe(s,e)   isSPACE_utf8_safe(s,e)
842
843 __UNDEFINED__ isPUNCT_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, PUNCT)
844 __UNDEFINED__ isSPACE_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, SPACE)
845 __UNDEFINED__ isUPPER_utf8_safe(s,e)    D_PPP_IS_GENERIC_UTF8_SAFE(s, e, UPPER)
846
847 #  ifdef isWORDCHAR_utf8
848 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e) D_PPP_IS_GENERIC_UTF8_SAFE(s, e, WORDCHAR)
849 #  else
850 __UNDEFINED__ isWORDCHAR_utf8_safe(s,e)                                        \
851                                (isALPHANUMERIC_utf8_safe(s,e) || (*(s)) == '_')
852 #  endif
853
854 /* This was broken before 5.12, and just use this instead of worrying about
855  * which releases the official works on */
856 #  if 'A' == 65
857 __UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
858 ( ( LIKELY((e) > (s)) ) ?                                                   \
859     ( ( inRANGE(((const U8*)s)[0], 0x30, 0x39 ) || inRANGE(((const U8*)s)[0], 0x41, 0x46 ) || inRANGE(((const U8*)s)[0], 0x61, 0x66 ) ) ? 1\
860     : ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xEF == ((const U8*)s)[0] ) ) ? ( ( 0xBC == ((const U8*)s)[1] ) ?\
861                     ( ( inRANGE(((const U8*)s)[2], 0x90, 0x99 ) || inRANGE(((const U8*)s)[2], 0xA1, 0xA6 ) ) ? 3 : 0 )\
862                 : ( ( 0xBD == ((const U8*)s)[1] ) && ( inRANGE(((const U8*)s)[2], 0x81, 0x86 ) ) ) ? 3 : 0 ) : 0 )\
863 : 0 )
864
865 #  elif 'A' == 193  && '^' == 95 /* EBCDIC 1047 */
866
867 __UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
868 ( ( LIKELY((e) > (s)) ) ?                                                   \
869     ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
870     : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\
871                         ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || inRANGE(((const U8*)s)[3], 0x62, 0x68 ) ) ? 4 : 0 )\
872                     : ( ( inRANGE(((const U8*)s)[2], 0x68, 0x69 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
873 : 0 )
874
875 #  elif 'A' == 193  && '^' == 176 /* EBCDIC 037 */
876
877 __UNDEFINED__  isXDIGIT_utf8_safe(s,e)                                       \
878 ( ( LIKELY((e) > (s)) ) ?                                                   \
879     ( ( inRANGE(((const U8*)s)[0], 0x81, 0x86 ) || inRANGE(((const U8*)s)[0], 0xC1, 0xC6 ) || inRANGE(((const U8*)s)[0], 0xF0, 0xF9 ) ) ? 1\
880     : ( ( ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\
881                         ( ( inRANGE(((const U8*)s)[3], 0x57, 0x59 ) || 0x5F == ((const U8*)s)[3] || inRANGE(((const U8*)s)[3], 0x62, 0x67 ) ) ? 4 : 0 )\
882                     : ( ( inRANGE(((const U8*)s)[2], 0x67, 0x68 ) ) && ( inRANGE(((const U8*)s)[3], 0x42, 0x47 ) ) ) ? 4 : 0 ) : 0 )\
883 : 0 )
884
885 #  else
886 #    error Unknown character set
887 #  endif
888
889 __UNDEFINED__ isALPHA_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHA)
890 #  ifdef isALPHANUMERIC_utf8
891 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e)                                 \
892                                 D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, ALPHANUMERIC)
893 #  else
894 __UNDEFINED__ isALPHANUMERIC_LC_utf8_safe(s,e)                              \
895                         (isALPHA_LC_utf8_safe(s,e) || isDIGIT_LC_utf8_safe(s,e))
896 #  endif
897
898 __UNDEFINED__  isBLANK_LC_utf8_safe(s,e)                                       \
899                             D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, BLANK)
900 __UNDEFINED__ isCNTRL_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, CNTRL)
901 __UNDEFINED__ isDIGIT_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, DIGIT)
902 __UNDEFINED__ isGRAPH_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, GRAPH)
903 #  ifdef isIDCONT_utf8
904 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e)   D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDCONT)
905 #  else
906 __UNDEFINED__ isIDCONT_LC_utf8_safe(s,e)   isWORDCHAR_LC_utf8_safe(s,e)
907 #  endif
908
909 __UNDEFINED__ isIDFIRST_LC_utf8_safe(s,e)  D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, IDFIRST)
910 __UNDEFINED__ isLOWER_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, LOWER)
911 __UNDEFINED__ isPRINT_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PRINT)
912
913 #  undef isPSXSPC_LC_utf8_safe   /* Use the modern definition */
914 __UNDEFINED__ isPSXSPC_LC_utf8_safe(s,e)   isSPACE_LC_utf8_safe(s,e)
915
916 __UNDEFINED__ isPUNCT_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, PUNCT)
917 __UNDEFINED__ isSPACE_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, SPACE)
918 __UNDEFINED__ isUPPER_LC_utf8_safe(s,e)    D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, UPPER)
919
920 #  ifdef isWORDCHAR_utf8
921 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e) D_PPP_IS_GENERIC_LC_UTF8_SAFE(s, e, WORDCHAR)
922 #  else
923 __UNDEFINED__ isWORDCHAR_LC_utf8_safe(s,e)                                             \
924                                (isALPHANUMERIC_LC_utf8_safe(s,e) || (*(s)) == '_')
925 #  endif
926
927 __UNDEFINED__  isXDIGIT_LC_utf8_safe(s,e)                                       \
928                             D_PPP_IS_GENERIC_LC_UTF8_SAFE_BROKEN(s, e, XDIGIT)
929
930 /* Warning: isALPHANUMERIC_utf8_safe, isALPHA_utf8_safe, isASCII_utf8_safe,
931  * isBLANK_utf8_safe, isCNTRL_utf8_safe, isDIGIT_utf8_safe, isGRAPH_utf8_safe,
932  * isIDCONT_utf8_safe, isIDFIRST_utf8_safe, isLOWER_utf8_safe,
933  * isPRINT_utf8_safe, isPSXSPC_utf8_safe, isPUNCT_utf8_safe, isSPACE_utf8_safe,
934  * isUPPER_utf8_safe, isWORDCHAR_utf8_safe, isWORDCHAR_utf8_safe,
935  * isXDIGIT_utf8_safe,
936  * isALPHANUMERIC_LC_utf8_safe, isALPHA_LC_utf8_safe, isASCII_LC_utf8_safe,
937  * isBLANK_LC_utf8_safe, isCNTRL_LC_utf8_safe, isDIGIT_LC_utf8_safe,
938  * isGRAPH_LC_utf8_safe, isIDCONT_LC_utf8_safe, isIDFIRST_LC_utf8_safe,
939  * isLOWER_LC_utf8_safe, isPRINT_LC_utf8_safe, isPSXSPC_LC_utf8_safe,
940  * isPUNCT_LC_utf8_safe, isSPACE_LC_utf8_safe, isUPPER_LC_utf8_safe,
941  * isWORDCHAR_LC_utf8_safe, isWORDCHAR_LC_utf8_safe, isXDIGIT_LC_utf8_safe,
942  * isALPHANUMERIC_uvchr, isALPHA_uvchr, isASCII_uvchr, isBLANK_uvchr,
943  * isCNTRL_uvchr, isDIGIT_uvchr, isGRAPH_uvchr, isIDCONT_uvchr,
944  * isIDFIRST_uvchr, isLOWER_uvchr, isPRINT_uvchr, isPSXSPC_uvchr,
945  * isPUNCT_uvchr, isSPACE_uvchr, isUPPER_uvchr, isWORDCHAR_uvchr,
946  * isWORDCHAR_uvchr, isXDIGIT_uvchr
947  *
948  * The UTF-8 handling is buggy in early Perls, and this can give inaccurate
949  * results for code points above 0xFF, until the implementation started
950  * settling down in 5.12 and 5.14 */
951
952 #endif
953
954 #define D_PPP_TOO_SHORT_MSG  "Malformed UTF-8 character starting with:"      \
955                              " \\x%02x (too short; %d bytes available, need" \
956                              " %d)\n"
957 /* Perls starting here had a new API which handled multi-character results */
958 #if { VERSION >= 5.7.3 }
959
960 __UNDEFINED__ toLOWER_uvchr(c, s, l)  UNI_TO_NATIVE(to_uni_lower(NATIVE_TO_UNI(c), s, l))
961 __UNDEFINED__ toUPPER_uvchr(c, s, l)  UNI_TO_NATIVE(to_uni_upper(NATIVE_TO_UNI(c), s, l))
962 __UNDEFINED__ toTITLE_uvchr(c, s, l)  UNI_TO_NATIVE(to_uni_title(NATIVE_TO_UNI(c), s, l))
963 __UNDEFINED__ toFOLD_uvchr(c, s, l)   UNI_TO_NATIVE(to_uni_fold( NATIVE_TO_UNI(c), s, l))
964
965 #  if { VERSION != 5.15.6 }     /* Just this version is broken */
966
967       /* Prefer the macro to the function */
968 #    if defined toLOWER_utf8
969 #      define D_PPP_TO_LOWER_CALLEE(s,r,l)    toLOWER_utf8(s,r,l)
970 #    else
971 #      define D_PPP_TO_LOWER_CALLEE(s,r,l)    to_utf8_lower(s,r,l)
972 #    endif
973 #    if defined toTITLE_utf8
974 #      define D_PPP_TO_TITLE_CALLEE(s,r,l)    toTITLE_utf8(s,r,l)
975 #    else
976 #      define D_PPP_TO_TITLE_CALLEE(s,r,l)    to_utf8_title(s,r,l)
977 #    endif
978 #    if defined toUPPER_utf8
979 #      define D_PPP_TO_UPPER_CALLEE(s,r,l)    toUPPER_utf8(s,r,l)
980 #    else
981 #      define D_PPP_TO_UPPER_CALLEE(s,r,l)    to_utf8_upper(s,r,l)
982 #    endif
983 #    if defined toFOLD_utf8
984 #      define D_PPP_TO_FOLD_CALLEE(s,r,l)     toFOLD_utf8(s,r,l)
985 #    else
986 #      define D_PPP_TO_FOLD_CALLEE(s,r,l)     to_utf8_fold(s,r,l)
987 #    endif
988 #  else     /* Below is 5.15.6, which failed to make the macros available
989 #              outside of core, so we have to use the 'Perl_' form.  khw
990 #              decided it was easier to just handle this case than have to
991 #              document the exception, and make an exception in the tests below
992 #              */
993 #    define D_PPP_TO_LOWER_CALLEE(s,r,l)                                    \
994                         Perl__to_utf8_lower_flags(aTHX_ s, r, l, 0, NULL)
995 #    define D_PPP_TO_TITLE_CALLEE(s,r,l)                                    \
996                         Perl__to_utf8_title_flags(aTHX_ s, r, l, 0, NULL)
997 #    define D_PPP_TO_UPPER_CALLEE(s,r,l)                                    \
998                         Perl__to_utf8_upper_flags(aTHX_ s, r, l, 0, NULL)
999 #    define D_PPP_TO_FOLD_CALLEE(s,r,l)                                     \
1000             Perl__to_utf8_fold_flags(aTHX_ s, r, l, FOLD_FLAGS_FULL, NULL)
1001 #  endif
1002
1003 /* The actual implementation of the backported macros.  If too short, croak,
1004  * otherwise call the original that doesn't have an upper limit parameter */
1005 #  define D_PPP_GENERIC_MULTI_ARG_TO(name, s, e,r,l)                        \
1006     (((((e) - (s)) <= 0)                                                    \
1007          /* We could just do nothing, but modern perls croak */             \
1008       ? (croak("Attempting case change on zero length string"),             \
1009          0) /* So looks like it returns something, and will compile */      \
1010       : ((e) - (s)) < UTF8SKIP(s))                                          \
1011         ? (croak(D_PPP_TOO_SHORT_MSG,                                       \
1012                                s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1013            0)                                                               \
1014         : D_PPP_TO_ ## name ## _CALLEE(s,r,l))
1015
1016 __UNDEFINED__  toUPPER_utf8_safe(s,e,r,l)                                   \
1017                         D_PPP_GENERIC_MULTI_ARG_TO(UPPER,s,e,r,l)
1018 __UNDEFINED__  toLOWER_utf8_safe(s,e,r,l)                                   \
1019                         D_PPP_GENERIC_MULTI_ARG_TO(LOWER,s,e,r,l)
1020 __UNDEFINED__  toTITLE_utf8_safe(s,e,r,l)                                   \
1021                         D_PPP_GENERIC_MULTI_ARG_TO(TITLE,s,e,r,l)
1022 __UNDEFINED__  toFOLD_utf8_safe(s,e,r,l)                                    \
1023                         D_PPP_GENERIC_MULTI_ARG_TO(FOLD,s,e,r,l)
1024
1025 #elif { VERSION >= 5.006 }
1026
1027 /* Here we have UTF-8 support, but using the original API where the case
1028  * changing functions merely returned the changed code point; hence they
1029  * couldn't handle multi-character results. */
1030
1031 #  ifdef uvchr_to_utf8
1032 #    define D_PPP_UV_TO_UTF8 uvchr_to_utf8
1033 #  else
1034 #    define D_PPP_UV_TO_UTF8 uv_to_utf8
1035 #  endif
1036
1037    /* Get the utf8 of the case changed value, and store its length; then have
1038     * to re-calculate the changed case value in order to return it */
1039 #  define D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(name, c, s, l)                  \
1040         (*(l) = (D_PPP_UV_TO_UTF8(s,                                        \
1041                  UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c)))) - (s)),  \
1042         UNI_TO_NATIVE(to_uni_ ## name(NATIVE_TO_UNI(c))))
1043
1044 __UNDEFINED__ toLOWER_uvchr(c, s, l)                                        \
1045                         D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(lower, c, s, l)
1046 __UNDEFINED__ toUPPER_uvchr(c, s, l)                                        \
1047                         D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(upper, c, s, l)
1048 __UNDEFINED__ toTITLE_uvchr(c, s, l)                                        \
1049                         D_PPP_GENERIC_SINGLE_ARG_TO_UVCHR(title, c, s, l)
1050 __UNDEFINED__ toFOLD_uvchr(c, s, l)   toLOWER_uvchr(c, s, l)
1051
1052 #  define D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(name, s, e, r, l)                \
1053     (((((e) - (s)) <= 0)                                                    \
1054       ? (croak("Attempting case change on zero length string"),             \
1055          0) /* So looks like it returns something, and will compile */      \
1056       : ((e) - (s)) < UTF8SKIP(s))                                          \
1057         ? (croak(D_PPP_TOO_SHORT_MSG,                                       \
1058                                s[0], (int) ((e) - (s)), (int) UTF8SKIP(s)), \
1059            0)                                                               \
1060           /* Get the changed code point and store its UTF-8 */              \
1061         : D_PPP_UV_TO_UTF8(r, to_utf8_ ## name(s)),                         \
1062             /* Then store its length, and re-get code point for return */   \
1063             *(l) = UTF8SKIP(r), to_utf8_ ## name(r))
1064
1065 /* Warning: toUPPER_utf8_safe, toLOWER_utf8_safe, toTITLE_utf8_safe,
1066  * toUPPER_uvchr, toLOWER_uvchr, toTITLE_uvchr
1067     The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1068     this backport does not correct them.
1069
1070     In perls before 7.3, multi-character case changing is not implemented; this
1071     backport uses the simple case changes available in those perls. */
1072
1073 __UNDEFINED__  toUPPER_utf8_safe(s,e,r,l)                                   \
1074                         D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(upper, s, e, r, l)
1075 __UNDEFINED__  toLOWER_utf8_safe(s,e,r,l)                                   \
1076                         D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(lower, s, e, r, l)
1077 __UNDEFINED__  toTITLE_utf8_safe(s,e,r,l)                                   \
1078                         D_PPP_GENERIC_SINGLE_ARG_TO_UTF8(title, s, e, r, l)
1079
1080  /* Warning: toFOLD_utf8_safe, toFOLD_uvchr
1081     The UTF-8 case changing operations had bugs before around 5.12 or 5.14;
1082     this backport does not correct them.
1083
1084     In perls before 7.3, case folding is not implemented; instead, this
1085     backport substitutes simple (not multi-character, which isn't available)
1086     lowercasing.  This gives the correct result in most, but not all, instances
1087     */
1088
1089 __UNDEFINED__  toFOLD_utf8_safe(s,e,r,l)  toLOWER_utf8_safe(s,e,r,l)
1090
1091 #endif
1092
1093 /* Until we figure out how to support this in older perls... */
1094 #if { VERSION >= 5.8.0 }
1095
1096 __UNDEFINED__ HeUTF8(he)        ((HeKLEN(he) == HEf_SVKEY) ?            \
1097                                  SvUTF8(HeKEY_sv(he)) :                 \
1098                                  (U32)HeKUTF8(he))
1099
1100 #endif
1101
1102 __UNDEFINED__ C_ARRAY_LENGTH(a)         (sizeof(a)/sizeof((a)[0]))
1103 __UNDEFINED__ C_ARRAY_END(a)            ((a) + C_ARRAY_LENGTH(a))
1104
1105 __UNDEFINED__ LIKELY(x) (x)
1106 __UNDEFINED__ UNLIKELY(x) (x)
1107
1108 #ifndef MUTABLE_PTR
1109 #if defined(PERL_USE_GCC_BRACE_GROUPS)
1110 #  define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
1111 #else
1112 #  define MUTABLE_PTR(p) ((void *) (p))
1113 #endif
1114 #endif
1115
1116 __UNDEFINED__ MUTABLE_AV(p)   ((AV *)MUTABLE_PTR(p))
1117 __UNDEFINED__ MUTABLE_CV(p)   ((CV *)MUTABLE_PTR(p))
1118 __UNDEFINED__ MUTABLE_GV(p)   ((GV *)MUTABLE_PTR(p))
1119 __UNDEFINED__ MUTABLE_HV(p)   ((HV *)MUTABLE_PTR(p))
1120 __UNDEFINED__ MUTABLE_IO(p)   ((IO *)MUTABLE_PTR(p))
1121 __UNDEFINED__ MUTABLE_SV(p)   ((SV *)MUTABLE_PTR(p))
1122
1123 =xsmisc
1124
1125 typedef XSPROTO(XSPROTO_test_t);
1126 typedef XSPROTO_test_t *XSPROTO_test_t_ptr;
1127
1128 XS(XS_Devel__PPPort_dXSTARG);  /* prototype */
1129 XS(XS_Devel__PPPort_dXSTARG)
1130 {
1131   dXSARGS;
1132   dXSTARG;
1133   IV iv;
1134
1135   PERL_UNUSED_VAR(cv);
1136   SP -= items;
1137   iv = SvIV(ST(0)) + 1;
1138   PUSHi(iv);
1139   XSRETURN(1);
1140 }
1141
1142 XS(XS_Devel__PPPort_dAXMARK);  /* prototype */
1143 XS(XS_Devel__PPPort_dAXMARK)
1144 {
1145   dSP;
1146   dAXMARK;
1147   dITEMS;
1148   IV iv;
1149
1150   PERL_UNUSED_VAR(cv);
1151   SP -= items;
1152   iv = SvIV(ST(0)) - 1;
1153   mPUSHi(iv);
1154   XSRETURN(1);
1155 }
1156
1157 =xsboot
1158
1159 {
1160   XSPROTO_test_t_ptr p = &XS_Devel__PPPort_dXSTARG;
1161   newXS("Devel::PPPort::dXSTARG", *p, file);
1162 }
1163 newXS("Devel::PPPort::dAXMARK", XS_Devel__PPPort_dAXMARK, file);
1164
1165 =xsubs
1166
1167 int
1168 OpSIBLING_tests()
1169         PREINIT:
1170                 OP *x;
1171                 OP *kid;
1172                 OP *middlekid;
1173                 OP *lastkid;
1174                 int count = 0;
1175                 int failures = 0;
1176                 int i;
1177         CODE:
1178                 x = newOP(OP_PUSHMARK, 0);
1179
1180                 /* No siblings yet! */
1181                 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1182                         failures++; warn("Op should not have had a sib");
1183                 }
1184
1185
1186                 /* Add 2 siblings */
1187                 kid = x;
1188
1189                 for (i = 0; i < 2; i++) {
1190                         OP *newsib = newOP(OP_PUSHMARK, 0);
1191                         OpMORESIB_set(kid, newsib);
1192
1193                         kid = OpSIBLING(kid);
1194                         lastkid = kid;
1195                 }
1196                 middlekid = OpSIBLING(x);
1197
1198                 /* Should now have a sibling */
1199                 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1200                         failures++; warn("Op should have had a sib after moresib_set");
1201                 }
1202
1203                 /* Count the siblings */
1204                 for (kid = OpSIBLING(x); kid; kid = OpSIBLING(kid)) {
1205                         count++;
1206                 }
1207
1208                 if (count != 2) {
1209                         failures++; warn("Kid had %d sibs, expected 2", count);
1210                 }
1211
1212                 if (OpHAS_SIBLING(lastkid) || OpSIBLING(lastkid)) {
1213                         failures++; warn("Last kid should not have a sib");
1214                 }
1215
1216                 /* Really sets the parent, and says 'no more siblings' */
1217                 OpLASTSIB_set(x, lastkid);
1218
1219                 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1220                         failures++; warn("OpLASTSIB_set failed?");
1221                 }
1222
1223                 /* Restore the kid */
1224                 OpMORESIB_set(x, lastkid);
1225
1226                 /* Try to remove it again */
1227                 OpLASTSIB_set(x, NULL);
1228
1229                 if (OpHAS_SIBLING(x) || OpSIBLING(x)) {
1230                         failures++; warn("OpLASTSIB_set with NULL failed?");
1231                 }
1232
1233                 /* Try to restore with maybesib_set */
1234                 OpMAYBESIB_set(x, lastkid, NULL);
1235
1236                 if (! OpHAS_SIBLING(x) || ! OpSIBLING(x) ) {
1237                         failures++; warn("Op should have had a sib after maybesibset");
1238                 }
1239
1240                 op_free(lastkid);
1241                 op_free(middlekid);
1242                 op_free(x);
1243                 RETVAL = failures;
1244         OUTPUT:
1245                 RETVAL
1246
1247 int
1248 SvRXOK(sv)
1249         SV *sv
1250         CODE:
1251                 RETVAL = SvRXOK(sv);
1252         OUTPUT:
1253                 RETVAL
1254
1255 int
1256 ptrtests()
1257         PREINIT:
1258                 int var, *p = &var;
1259
1260         CODE:
1261                 RETVAL = 0;
1262                 RETVAL += PTR2nat(p) != 0       ?  1 : 0;
1263                 RETVAL += PTR2ul(p) != 0UL      ?  2 : 0;
1264                 RETVAL += PTR2UV(p) != (UV) 0   ?  4 : 0;
1265                 RETVAL += PTR2IV(p) != (IV) 0   ?  8 : 0;
1266                 RETVAL += PTR2NV(p) != (NV) 0   ? 16 : 0;
1267                 RETVAL += p > NUM2PTR(int *, 0) ? 32 : 0;
1268
1269         OUTPUT:
1270                 RETVAL
1271
1272 int
1273 gv_stashpvn(name, create)
1274         char *name
1275         I32 create
1276         CODE:
1277                 RETVAL = gv_stashpvn(name, strlen(name), create) != NULL;
1278         OUTPUT:
1279                 RETVAL
1280
1281 int
1282 get_sv(name, create)
1283         char *name
1284         I32 create
1285         CODE:
1286                 RETVAL = get_sv(name, create) != NULL;
1287         OUTPUT:
1288                 RETVAL
1289
1290 int
1291 get_av(name, create)
1292         char *name
1293         I32 create
1294         CODE:
1295                 RETVAL = get_av(name, create) != NULL;
1296         OUTPUT:
1297                 RETVAL
1298
1299 int
1300 get_hv(name, create)
1301         char *name
1302         I32 create
1303         CODE:
1304                 RETVAL = get_hv(name, create) != NULL;
1305         OUTPUT:
1306                 RETVAL
1307
1308 int
1309 get_cv(name, create)
1310         char *name
1311         I32 create
1312         CODE:
1313                 RETVAL = get_cv(name, create) != NULL;
1314         OUTPUT:
1315                 RETVAL
1316
1317 void
1318 xsreturn(two)
1319         int two
1320         PPCODE:
1321                 mXPUSHp("test1", 5);
1322                 if (two)
1323                   mXPUSHp("test2", 5);
1324                 if (two)
1325                   XSRETURN(2);
1326                 else
1327                   XSRETURN(1);
1328
1329 SV*
1330 boolSV(value)
1331         int value
1332         CODE:
1333                 RETVAL = newSVsv(boolSV(value));
1334         OUTPUT:
1335                 RETVAL
1336
1337 SV*
1338 DEFSV()
1339         CODE:
1340                 RETVAL = newSVsv(DEFSV);
1341         OUTPUT:
1342                 RETVAL
1343
1344 void
1345 DEFSV_modify()
1346         PPCODE:
1347                 XPUSHs(sv_mortalcopy(DEFSV));
1348                 ENTER;
1349                 SAVE_DEFSV;
1350                 DEFSV_set(newSVpvs("DEFSV"));
1351                 XPUSHs(sv_mortalcopy(DEFSV));
1352                 /* Yes, this leaks the above scalar; 5.005 with threads for some reason */
1353                 /* frees it upon LEAVE, thus mortalizing it causes "attempt to free..." */
1354                 /* sv_2mortal(DEFSV); */
1355                 LEAVE;
1356                 XPUSHs(sv_mortalcopy(DEFSV));
1357                 XSRETURN(3);
1358
1359 int
1360 ERRSV()
1361         CODE:
1362                 RETVAL = SvTRUEx(ERRSV);
1363         OUTPUT:
1364                 RETVAL
1365
1366 SV*
1367 UNDERBAR()
1368         CODE:
1369                 {
1370                   dUNDERBAR;
1371                   RETVAL = newSVsv(UNDERBAR);
1372                 }
1373         OUTPUT:
1374                 RETVAL
1375
1376 void
1377 prepush()
1378         CODE:
1379                 {
1380                   dXSTARG;
1381                   XSprePUSH;
1382                   PUSHi(42);
1383                   XSRETURN(1);
1384                 }
1385
1386 int
1387 PERL_ABS(a)
1388         int a
1389
1390 void
1391 SVf(x)
1392         SV *x
1393         PPCODE:
1394 #if { VERSION >= 5.004 }
1395                 x = sv_2mortal(newSVpvf("[%" SVf "]", SVfARG(x)));
1396 #endif
1397                 XPUSHs(x);
1398                 XSRETURN(1);
1399
1400 void
1401 Perl_ppaddr_t(string)
1402         char *string
1403         PREINIT:
1404                 Perl_ppaddr_t lower;
1405         PPCODE:
1406                 lower = PL_ppaddr[OP_LC];
1407                 mXPUSHs(newSVpv(string, 0));
1408                 PUTBACK;
1409                 ENTER;
1410                 (void)*(lower)(aTHXR);
1411                 SPAGAIN;
1412                 LEAVE;
1413                 XSRETURN(1);
1414
1415 #if { VERSION >= 5.8.0 }
1416
1417 void
1418 check_HeUTF8(utf8_key)
1419         SV *utf8_key;
1420         PREINIT:
1421                 HV *hash;
1422                 HE *ent;
1423                 STRLEN klen;
1424                 char *key;
1425         PPCODE:
1426                 hash = newHV();
1427
1428                 key = SvPV(utf8_key, klen);
1429                 if (SvUTF8(utf8_key)) klen *= -1;
1430                 hv_store(hash, key, klen, newSVpvs("string"), 0);
1431                 hv_iterinit(hash);
1432                 ent = hv_iternext(hash);
1433                 assert(ent);
1434                 mXPUSHp((HeUTF8(ent) == 0 ? "norm" : "utf8"), 4);
1435                 hv_undef(hash);
1436
1437
1438 #endif
1439
1440 void
1441 check_c_array()
1442         PREINIT:
1443                 int x[] = { 10, 11, 12, 13 };
1444         PPCODE:
1445                 mXPUSHi(C_ARRAY_LENGTH(x));  /* 4 */
1446                 mXPUSHi(*(C_ARRAY_END(x)-1)); /* 13 */
1447
1448 bool
1449 isBLANK(ord)
1450     UV ord
1451     CODE:
1452         RETVAL = isBLANK(ord);
1453     OUTPUT:
1454         RETVAL
1455
1456 bool
1457 isBLANK_A(ord)
1458     UV ord
1459     CODE:
1460         RETVAL = isBLANK_A(ord);
1461     OUTPUT:
1462         RETVAL
1463
1464 bool
1465 isBLANK_L1(ord)
1466     UV ord
1467     CODE:
1468         RETVAL = isBLANK_L1(ord);
1469     OUTPUT:
1470         RETVAL
1471
1472 bool
1473 isUPPER(ord)
1474     UV ord
1475     CODE:
1476         RETVAL = isUPPER(ord);
1477     OUTPUT:
1478         RETVAL
1479
1480 bool
1481 isUPPER_A(ord)
1482     UV ord
1483     CODE:
1484         RETVAL = isUPPER_A(ord);
1485     OUTPUT:
1486         RETVAL
1487
1488 bool
1489 isUPPER_L1(ord)
1490     UV ord
1491     CODE:
1492         RETVAL = isUPPER_L1(ord);
1493     OUTPUT:
1494         RETVAL
1495
1496 bool
1497 isLOWER(ord)
1498     UV ord
1499     CODE:
1500         RETVAL = isLOWER(ord);
1501     OUTPUT:
1502         RETVAL
1503
1504 bool
1505 isLOWER_A(ord)
1506     UV ord
1507     CODE:
1508         RETVAL = isLOWER_A(ord);
1509     OUTPUT:
1510         RETVAL
1511
1512 bool
1513 isLOWER_L1(ord)
1514     UV ord
1515     CODE:
1516         RETVAL = isLOWER_L1(ord);
1517     OUTPUT:
1518         RETVAL
1519
1520 bool
1521 isALPHA(ord)
1522     UV ord
1523     CODE:
1524         RETVAL = isALPHA(ord);
1525     OUTPUT:
1526         RETVAL
1527
1528 bool
1529 isALPHA_A(ord)
1530     UV ord
1531     CODE:
1532         RETVAL = isALPHA_A(ord);
1533     OUTPUT:
1534         RETVAL
1535
1536 bool
1537 isALPHA_L1(ord)
1538     UV ord
1539     CODE:
1540         RETVAL = isALPHA_L1(ord);
1541     OUTPUT:
1542         RETVAL
1543
1544 bool
1545 isWORDCHAR(ord)
1546     UV ord
1547     CODE:
1548         RETVAL = isWORDCHAR(ord);
1549     OUTPUT:
1550         RETVAL
1551
1552 bool
1553 isWORDCHAR_A(ord)
1554     UV ord
1555     CODE:
1556         RETVAL = isWORDCHAR_A(ord);
1557     OUTPUT:
1558         RETVAL
1559
1560 bool
1561 isWORDCHAR_L1(ord)
1562     UV ord
1563     CODE:
1564         RETVAL = isWORDCHAR_L1(ord);
1565     OUTPUT:
1566         RETVAL
1567
1568 bool
1569 isALPHANUMERIC(ord)
1570     UV ord
1571     CODE:
1572         RETVAL = isALPHANUMERIC(ord);
1573     OUTPUT:
1574         RETVAL
1575
1576 bool
1577 isALPHANUMERIC_A(ord)
1578     UV ord
1579     CODE:
1580         RETVAL = isALPHANUMERIC_A(ord);
1581     OUTPUT:
1582         RETVAL
1583
1584 bool
1585 isALNUM(ord)
1586     UV ord
1587     CODE:
1588         RETVAL = isALNUM(ord);
1589     OUTPUT:
1590         RETVAL
1591
1592 bool
1593 isALNUM_A(ord)
1594     UV ord
1595     CODE:
1596         RETVAL = isALNUM_A(ord);
1597     OUTPUT:
1598         RETVAL
1599
1600 bool
1601 isDIGIT(ord)
1602     UV ord
1603     CODE:
1604         RETVAL = isDIGIT(ord);
1605     OUTPUT:
1606         RETVAL
1607
1608 bool
1609 isDIGIT_A(ord)
1610     UV ord
1611     CODE:
1612         RETVAL = isDIGIT_A(ord);
1613     OUTPUT:
1614         RETVAL
1615
1616 bool
1617 isOCTAL(ord)
1618     UV ord
1619     CODE:
1620         RETVAL = isOCTAL(ord);
1621     OUTPUT:
1622         RETVAL
1623
1624 bool
1625 isOCTAL_A(ord)
1626     UV ord
1627     CODE:
1628         RETVAL = isOCTAL_A(ord);
1629     OUTPUT:
1630         RETVAL
1631
1632 bool
1633 isIDFIRST(ord)
1634     UV ord
1635     CODE:
1636         RETVAL = isIDFIRST(ord);
1637     OUTPUT:
1638         RETVAL
1639
1640 bool
1641 isIDFIRST_A(ord)
1642     UV ord
1643     CODE:
1644         RETVAL = isIDFIRST_A(ord);
1645     OUTPUT:
1646         RETVAL
1647
1648 bool
1649 isIDCONT(ord)
1650     UV ord
1651     CODE:
1652         RETVAL = isIDCONT(ord);
1653     OUTPUT:
1654         RETVAL
1655
1656 bool
1657 isIDCONT_A(ord)
1658     UV ord
1659     CODE:
1660         RETVAL = isIDCONT_A(ord);
1661     OUTPUT:
1662         RETVAL
1663
1664 bool
1665 isSPACE(ord)
1666     UV ord
1667     CODE:
1668         RETVAL = isSPACE(ord);
1669     OUTPUT:
1670         RETVAL
1671
1672 bool
1673 isSPACE_A(ord)
1674     UV ord
1675     CODE:
1676         RETVAL = isSPACE_A(ord);
1677     OUTPUT:
1678         RETVAL
1679
1680 bool
1681 isASCII(ord)
1682     UV ord
1683     CODE:
1684         RETVAL = isASCII(ord);
1685     OUTPUT:
1686         RETVAL
1687
1688 bool
1689 isASCII_A(ord)
1690     UV ord
1691     CODE:
1692         RETVAL = isASCII_A(ord);
1693     OUTPUT:
1694         RETVAL
1695
1696 bool
1697 isCNTRL(ord)
1698     UV ord
1699     CODE:
1700         RETVAL = isCNTRL(ord);
1701     OUTPUT:
1702         RETVAL
1703
1704 bool
1705 isCNTRL_A(ord)
1706     UV ord
1707     CODE:
1708         RETVAL = isCNTRL_A(ord);
1709     OUTPUT:
1710         RETVAL
1711
1712 bool
1713 isPRINT(ord)
1714     UV ord
1715     CODE:
1716         RETVAL = isPRINT(ord);
1717     OUTPUT:
1718         RETVAL
1719
1720 bool
1721 isPRINT_A(ord)
1722     UV ord
1723     CODE:
1724         RETVAL = isPRINT_A(ord);
1725     OUTPUT:
1726         RETVAL
1727
1728 bool
1729 isGRAPH(ord)
1730     UV ord
1731     CODE:
1732         RETVAL = isGRAPH(ord);
1733     OUTPUT:
1734         RETVAL
1735
1736 bool
1737 isGRAPH_A(ord)
1738     UV ord
1739     CODE:
1740         RETVAL = isGRAPH_A(ord);
1741     OUTPUT:
1742         RETVAL
1743
1744 bool
1745 isPUNCT(ord)
1746     UV ord
1747     CODE:
1748         RETVAL = isPUNCT(ord);
1749     OUTPUT:
1750         RETVAL
1751
1752 bool
1753 isPUNCT_A(ord)
1754     UV ord
1755     CODE:
1756         RETVAL = isPUNCT_A(ord);
1757     OUTPUT:
1758         RETVAL
1759
1760 bool
1761 isXDIGIT(ord)
1762     UV ord
1763     CODE:
1764         RETVAL = isXDIGIT(ord);
1765     OUTPUT:
1766         RETVAL
1767
1768 bool
1769 isXDIGIT_A(ord)
1770     UV ord
1771     CODE:
1772         RETVAL = isXDIGIT_A(ord);
1773     OUTPUT:
1774         RETVAL
1775
1776 bool
1777 isPSXSPC(ord)
1778     UV ord
1779     CODE:
1780         RETVAL = isPSXSPC(ord);
1781     OUTPUT:
1782         RETVAL
1783
1784 bool
1785 isPSXSPC_A(ord)
1786     UV ord
1787     CODE:
1788         RETVAL = isPSXSPC_A(ord);
1789     OUTPUT:
1790         RETVAL
1791
1792 bool
1793 isALPHANUMERIC_L1(ord)
1794     UV ord
1795     CODE:
1796         RETVAL = isALPHANUMERIC_L1(ord);
1797     OUTPUT:
1798         RETVAL
1799
1800 bool
1801 isALNUMC_L1(ord)
1802     UV ord
1803     CODE:
1804         RETVAL = isALNUMC_L1(ord);
1805     OUTPUT:
1806         RETVAL
1807
1808 bool
1809 isDIGIT_L1(ord)
1810     UV ord
1811     CODE:
1812         RETVAL = isDIGIT_L1(ord);
1813     OUTPUT:
1814         RETVAL
1815
1816 bool
1817 isOCTAL_L1(ord)
1818     UV ord
1819     CODE:
1820         RETVAL = isOCTAL_L1(ord);
1821     OUTPUT:
1822         RETVAL
1823
1824 bool
1825 isIDFIRST_L1(ord)
1826     UV ord
1827     CODE:
1828         RETVAL = isIDFIRST_L1(ord);
1829     OUTPUT:
1830         RETVAL
1831
1832 bool
1833 isIDCONT_L1(ord)
1834     UV ord
1835     CODE:
1836         RETVAL = isIDCONT_L1(ord);
1837     OUTPUT:
1838         RETVAL
1839
1840 bool
1841 isSPACE_L1(ord)
1842     UV ord
1843     CODE:
1844         RETVAL = isSPACE_L1(ord);
1845     OUTPUT:
1846         RETVAL
1847
1848 bool
1849 isASCII_L1(ord)
1850     UV ord
1851     CODE:
1852         RETVAL = isASCII_L1(ord);
1853     OUTPUT:
1854         RETVAL
1855
1856 bool
1857 isCNTRL_L1(ord)
1858     UV ord
1859     CODE:
1860         RETVAL = isCNTRL_L1(ord);
1861     OUTPUT:
1862         RETVAL
1863
1864 bool
1865 isPRINT_L1(ord)
1866     UV ord
1867     CODE:
1868         RETVAL = isPRINT_L1(ord);
1869     OUTPUT:
1870         RETVAL
1871
1872 bool
1873 isGRAPH_L1(ord)
1874     UV ord
1875     CODE:
1876         RETVAL = isGRAPH_L1(ord);
1877     OUTPUT:
1878         RETVAL
1879
1880 bool
1881 isPUNCT_L1(ord)
1882     UV ord
1883     CODE:
1884         RETVAL = isPUNCT_L1(ord);
1885     OUTPUT:
1886         RETVAL
1887
1888 bool
1889 isXDIGIT_L1(ord)
1890     UV ord
1891     CODE:
1892         RETVAL = isXDIGIT_L1(ord);
1893     OUTPUT:
1894         RETVAL
1895
1896 bool
1897 isPSXSPC_L1(ord)
1898     UV ord
1899     CODE:
1900         RETVAL = isPSXSPC_L1(ord);
1901     OUTPUT:
1902         RETVAL
1903
1904 bool
1905 isASCII_uvchr(ord)
1906     UV ord
1907     CODE:
1908         RETVAL = isASCII_uvchr(ord);
1909     OUTPUT:
1910         RETVAL
1911
1912 bool
1913 isASCII_utf8_safe(s, offset)
1914     unsigned char * s
1915     int offset
1916     CODE:
1917         PERL_UNUSED_ARG(offset);
1918         RETVAL = isASCII_utf8_safe(s, s + 1 + offset);
1919     OUTPUT:
1920         RETVAL
1921
1922 #if { VERSION >= 5.006 }
1923
1924 bool
1925 isBLANK_uvchr(ord)
1926     UV ord
1927     CODE:
1928         RETVAL = isBLANK_uvchr(ord);
1929     OUTPUT:
1930         RETVAL
1931
1932 bool
1933 isALPHA_uvchr(ord)
1934     UV ord
1935     CODE:
1936         RETVAL = isALPHA_uvchr(ord);
1937     OUTPUT:
1938         RETVAL
1939
1940 bool
1941 isALPHANUMERIC_uvchr(ord)
1942     UV ord
1943     CODE:
1944         RETVAL = isALPHANUMERIC_uvchr(ord);
1945     OUTPUT:
1946         RETVAL
1947
1948 bool
1949 isCNTRL_uvchr(ord)
1950     UV ord
1951     CODE:
1952         RETVAL = isCNTRL_uvchr(ord);
1953     OUTPUT:
1954         RETVAL
1955
1956 bool
1957 isDIGIT_uvchr(ord)
1958     UV ord
1959     CODE:
1960         RETVAL = isDIGIT_uvchr(ord);
1961     OUTPUT:
1962         RETVAL
1963
1964 bool
1965 isIDFIRST_uvchr(ord)
1966     UV ord
1967     CODE:
1968         RETVAL = isIDFIRST_uvchr(ord);
1969     OUTPUT:
1970         RETVAL
1971
1972 bool
1973 isIDCONT_uvchr(ord)
1974     UV ord
1975     CODE:
1976         RETVAL = isIDCONT_uvchr(ord);
1977     OUTPUT:
1978         RETVAL
1979
1980 bool
1981 isGRAPH_uvchr(ord)
1982     UV ord
1983     CODE:
1984         RETVAL = isGRAPH_uvchr(ord);
1985     OUTPUT:
1986         RETVAL
1987
1988 bool
1989 isLOWER_uvchr(ord)
1990     UV ord
1991     CODE:
1992         RETVAL = isLOWER_uvchr(ord);
1993     OUTPUT:
1994         RETVAL
1995
1996 bool
1997 isPRINT_uvchr(ord)
1998     UV ord
1999     CODE:
2000         RETVAL = isPRINT_uvchr(ord);
2001     OUTPUT:
2002         RETVAL
2003
2004 bool
2005 isPSXSPC_uvchr(ord)
2006     UV ord
2007     CODE:
2008         RETVAL = isPSXSPC_uvchr(ord);
2009     OUTPUT:
2010         RETVAL
2011
2012 bool
2013 isPUNCT_uvchr(ord)
2014     UV ord
2015     CODE:
2016         RETVAL = isPUNCT_uvchr(ord);
2017     OUTPUT:
2018         RETVAL
2019
2020 bool
2021 isSPACE_uvchr(ord)
2022     UV ord
2023     CODE:
2024         RETVAL = isSPACE_uvchr(ord);
2025     OUTPUT:
2026         RETVAL
2027
2028 bool
2029 isUPPER_uvchr(ord)
2030     UV ord
2031     CODE:
2032         RETVAL = isUPPER_uvchr(ord);
2033     OUTPUT:
2034         RETVAL
2035
2036 bool
2037 isWORDCHAR_uvchr(ord)
2038     UV ord
2039     CODE:
2040         RETVAL = isWORDCHAR_uvchr(ord);
2041     OUTPUT:
2042         RETVAL
2043
2044 bool
2045 isXDIGIT_uvchr(ord)
2046     UV ord
2047     CODE:
2048         RETVAL = isXDIGIT_uvchr(ord);
2049     OUTPUT:
2050         RETVAL
2051
2052 bool
2053 isALPHA_utf8_safe(s, offset)
2054     unsigned char * s
2055     int offset
2056     CODE:
2057         RETVAL = isALPHA_utf8_safe(s, s + UTF8SKIP(s) + offset);
2058     OUTPUT:
2059         RETVAL
2060
2061 bool
2062 isALPHANUMERIC_utf8_safe(s, offset)
2063     unsigned char * s
2064     int offset
2065     CODE:
2066         RETVAL = isALPHANUMERIC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2067     OUTPUT:
2068         RETVAL
2069
2070 bool
2071 isBLANK_utf8_safe(s, offset)
2072     unsigned char * s
2073     int offset
2074     CODE:
2075         RETVAL = isBLANK_utf8_safe(s, s + UTF8SKIP(s) + offset);
2076     OUTPUT:
2077         RETVAL
2078
2079 bool
2080 isCNTRL_utf8_safe(s, offset)
2081     unsigned char * s
2082     int offset
2083     CODE:
2084         RETVAL = isCNTRL_utf8_safe(s, s + UTF8SKIP(s) + offset);
2085     OUTPUT:
2086         RETVAL
2087
2088 bool
2089 isDIGIT_utf8_safe(s, offset)
2090     unsigned char * s
2091     int offset
2092     CODE:
2093         RETVAL = isDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2094     OUTPUT:
2095         RETVAL
2096
2097 bool
2098 isGRAPH_utf8_safe(s, offset)
2099     unsigned char * s
2100     int offset
2101     CODE:
2102         RETVAL = isGRAPH_utf8_safe(s, s + UTF8SKIP(s) + offset);
2103     OUTPUT:
2104         RETVAL
2105
2106 bool
2107 isIDCONT_utf8_safe(s, offset)
2108     unsigned char * s
2109     int offset
2110     CODE:
2111         RETVAL = isIDCONT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2112     OUTPUT:
2113         RETVAL
2114
2115 bool
2116 isIDFIRST_utf8_safe(s, offset)
2117     unsigned char * s
2118     int offset
2119     CODE:
2120         RETVAL = isIDFIRST_utf8_safe(s, s + UTF8SKIP(s) + offset);
2121     OUTPUT:
2122         RETVAL
2123
2124 bool
2125 isLOWER_utf8_safe(s, offset)
2126     unsigned char * s
2127     int offset
2128     CODE:
2129         RETVAL = isLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2130     OUTPUT:
2131         RETVAL
2132
2133 bool
2134 isPRINT_utf8_safe(s, offset)
2135     unsigned char * s
2136     int offset
2137     CODE:
2138         RETVAL = isPRINT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2139     OUTPUT:
2140         RETVAL
2141
2142 bool
2143 isPSXSPC_utf8_safe(s, offset)
2144     unsigned char * s
2145     int offset
2146     CODE:
2147         RETVAL = isPSXSPC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2148     OUTPUT:
2149         RETVAL
2150
2151 bool
2152 isPUNCT_utf8_safe(s, offset)
2153     unsigned char * s
2154     int offset
2155     CODE:
2156         RETVAL = isPUNCT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2157     OUTPUT:
2158         RETVAL
2159
2160 bool
2161 isSPACE_utf8_safe(s, offset)
2162     unsigned char * s
2163     int offset
2164     CODE:
2165         RETVAL = isSPACE_utf8_safe(s, s + UTF8SKIP(s) + offset);
2166     OUTPUT:
2167         RETVAL
2168
2169 bool
2170 isUPPER_utf8_safe(s, offset)
2171     unsigned char * s
2172     int offset
2173     CODE:
2174         RETVAL = isUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset);
2175     OUTPUT:
2176         RETVAL
2177
2178 bool
2179 isWORDCHAR_utf8_safe(s, offset)
2180     unsigned char * s
2181     int offset
2182     CODE:
2183         RETVAL = isWORDCHAR_utf8_safe(s, s + UTF8SKIP(s) + offset);
2184     OUTPUT:
2185         RETVAL
2186
2187 bool
2188 isXDIGIT_utf8_safe(s, offset)
2189     unsigned char * s
2190     int offset
2191     CODE:
2192         RETVAL = isXDIGIT_utf8_safe(s, s + UTF8SKIP(s) + offset);
2193     OUTPUT:
2194         RETVAL
2195
2196 bool
2197 isALPHA_LC_utf8_safe(s, offset)
2198     unsigned char * s
2199     int offset
2200     CODE:
2201         RETVAL = isALPHA_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2202     OUTPUT:
2203         RETVAL
2204
2205 bool
2206 isALPHANUMERIC_LC_utf8_safe(s, offset)
2207     unsigned char * s
2208     int offset
2209     CODE:
2210         RETVAL = isALPHANUMERIC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2211     OUTPUT:
2212         RETVAL
2213
2214 bool
2215 isASCII_LC_utf8_safe(s, offset)
2216     unsigned char * s
2217     int offset
2218     CODE:
2219         PERL_UNUSED_ARG(offset);
2220         RETVAL = isASCII_utf8_safe(s, s + UTF8SKIP(s) + offset);
2221     OUTPUT:
2222         RETVAL
2223
2224 bool
2225 isBLANK_LC_utf8_safe(s, offset)
2226     unsigned char * s
2227     int offset
2228     CODE:
2229         RETVAL = isBLANK_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2230     OUTPUT:
2231         RETVAL
2232
2233 bool
2234 isCNTRL_LC_utf8_safe(s, offset)
2235     unsigned char * s
2236     int offset
2237     CODE:
2238         RETVAL = isCNTRL_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2239     OUTPUT:
2240         RETVAL
2241
2242 bool
2243 isDIGIT_LC_utf8_safe(s, offset)
2244     unsigned char * s
2245     int offset
2246     CODE:
2247         RETVAL = isDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2248     OUTPUT:
2249         RETVAL
2250
2251 bool
2252 isGRAPH_LC_utf8_safe(s, offset)
2253     unsigned char * s
2254     int offset
2255     CODE:
2256         RETVAL = isGRAPH_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2257     OUTPUT:
2258         RETVAL
2259
2260 bool
2261 isIDCONT_LC_utf8_safe(s, offset)
2262     unsigned char * s
2263     int offset
2264     CODE:
2265         RETVAL = isIDCONT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2266     OUTPUT:
2267         RETVAL
2268
2269 bool
2270 isIDFIRST_LC_utf8_safe(s, offset)
2271     unsigned char * s
2272     int offset
2273     CODE:
2274         RETVAL = isIDFIRST_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2275     OUTPUT:
2276         RETVAL
2277
2278 bool
2279 isLOWER_LC_utf8_safe(s, offset)
2280     unsigned char * s
2281     int offset
2282     CODE:
2283         RETVAL = isLOWER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2284     OUTPUT:
2285         RETVAL
2286
2287 bool
2288 isPRINT_LC_utf8_safe(s, offset)
2289     unsigned char * s
2290     int offset
2291     CODE:
2292         RETVAL = isPRINT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2293     OUTPUT:
2294         RETVAL
2295
2296 bool
2297 isPSXSPC_LC_utf8_safe(s, offset)
2298     unsigned char * s
2299     int offset
2300     CODE:
2301         RETVAL = isPSXSPC_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2302     OUTPUT:
2303         RETVAL
2304
2305 bool
2306 isPUNCT_LC_utf8_safe(s, offset)
2307     unsigned char * s
2308     int offset
2309     CODE:
2310         RETVAL = isPUNCT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2311     OUTPUT:
2312         RETVAL
2313
2314 bool
2315 isSPACE_LC_utf8_safe(s, offset)
2316     unsigned char * s
2317     int offset
2318     CODE:
2319         RETVAL = isSPACE_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2320     OUTPUT:
2321         RETVAL
2322
2323 bool
2324 isUPPER_LC_utf8_safe(s, offset)
2325     unsigned char * s
2326     int offset
2327     CODE:
2328         RETVAL = isUPPER_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2329     OUTPUT:
2330         RETVAL
2331
2332 bool
2333 isWORDCHAR_LC_utf8_safe(s, offset)
2334     unsigned char * s
2335     int offset
2336     CODE:
2337         RETVAL = isWORDCHAR_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2338     OUTPUT:
2339         RETVAL
2340
2341 bool
2342 isXDIGIT_LC_utf8_safe(s, offset)
2343     unsigned char * s
2344     int offset
2345     CODE:
2346         RETVAL = isXDIGIT_LC_utf8_safe(s, s + UTF8SKIP(s) + offset);
2347     OUTPUT:
2348         RETVAL
2349
2350 AV *
2351 toLOWER_utf8_safe(s, offset)
2352     unsigned char * s
2353     int offset
2354     PREINIT:
2355         U8 u[UTF8_MAXBYTES+1];
2356         Size_t len;
2357         UV ret;
2358         SV* utf8;
2359         AV * av;
2360     CODE:
2361         av = newAV();
2362         ret = toLOWER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2363         av_push(av, newSVuv(ret));
2364
2365         utf8 = newSVpvn((char *) u, len);
2366         SvUTF8_on(utf8);
2367         av_push(av, utf8);
2368
2369         av_push(av, newSVuv(len));
2370         RETVAL = av;
2371     OUTPUT:
2372         RETVAL
2373
2374 AV *
2375 toTITLE_utf8_safe(s, offset)
2376     unsigned char * s
2377     int offset
2378     PREINIT:
2379         U8 u[UTF8_MAXBYTES+1];
2380         Size_t len;
2381         UV ret;
2382         SV* utf8;
2383         AV * av;
2384     CODE:
2385         av = newAV();
2386         ret = toTITLE_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2387         av_push(av, newSVuv(ret));
2388
2389         utf8 = newSVpvn((char *) u, len);
2390         SvUTF8_on(utf8);
2391         av_push(av, utf8);
2392
2393         av_push(av, newSVuv(len));
2394         RETVAL = av;
2395     OUTPUT:
2396         RETVAL
2397
2398 AV *
2399 toUPPER_utf8_safe(s, offset)
2400     unsigned char * s
2401     int offset
2402     PREINIT:
2403         U8 u[UTF8_MAXBYTES+1];
2404         Size_t len;
2405         UV ret;
2406         SV* utf8;
2407         AV * av;
2408     CODE:
2409         av = newAV();
2410         ret = toUPPER_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2411         av_push(av, newSVuv(ret));
2412
2413         utf8 = newSVpvn((char *) u, len);
2414         SvUTF8_on(utf8);
2415         av_push(av, utf8);
2416
2417         av_push(av, newSVuv(len));
2418         RETVAL = av;
2419     OUTPUT:
2420         RETVAL
2421
2422 AV *
2423 toFOLD_utf8_safe(s, offset)
2424     unsigned char * s
2425     int offset
2426     PREINIT:
2427         U8 u[UTF8_MAXBYTES+1];
2428         Size_t len;
2429         UV ret;
2430         SV* utf8;
2431         AV * av;
2432     CODE:
2433         av = newAV();
2434         ret = toFOLD_utf8_safe(s, s + UTF8SKIP(s) + offset, u, &len);
2435         av_push(av, newSVuv(ret));
2436
2437         utf8 = newSVpvn((char *) u, len);
2438         SvUTF8_on(utf8);
2439         av_push(av, utf8);
2440
2441         av_push(av, newSVuv(len));
2442         RETVAL = av;
2443     OUTPUT:
2444         RETVAL
2445
2446 AV *
2447 toLOWER_uvchr(c)
2448     UV c
2449     PREINIT:
2450         U8 u[UTF8_MAXBYTES+1];
2451         Size_t len;
2452         UV ret;
2453         SV* utf8;
2454         AV * av;
2455     CODE:
2456         av = newAV();
2457         ret = toLOWER_uvchr(c, u, &len);
2458         av_push(av, newSVuv(ret));
2459
2460         utf8 = newSVpvn((char *) u, len);
2461         SvUTF8_on(utf8);
2462         av_push(av, utf8);
2463
2464         av_push(av, newSVuv(len));
2465         RETVAL = av;
2466     OUTPUT:
2467         RETVAL
2468
2469 AV *
2470 toTITLE_uvchr(c)
2471     UV c
2472     PREINIT:
2473         U8 u[UTF8_MAXBYTES+1];
2474         Size_t len;
2475         UV ret;
2476         SV* utf8;
2477         AV * av;
2478     CODE:
2479         av = newAV();
2480         ret = toTITLE_uvchr(c, u, &len);
2481         av_push(av, newSVuv(ret));
2482
2483         utf8 = newSVpvn((char *) u, len);
2484         SvUTF8_on(utf8);
2485         av_push(av, utf8);
2486
2487         av_push(av, newSVuv(len));
2488         RETVAL = av;
2489     OUTPUT:
2490         RETVAL
2491
2492 AV *
2493 toUPPER_uvchr(c)
2494     UV c
2495     PREINIT:
2496         U8 u[UTF8_MAXBYTES+1];
2497         Size_t len;
2498         UV ret;
2499         SV* utf8;
2500         AV * av;
2501     CODE:
2502         av = newAV();
2503         ret = toUPPER_uvchr(c, u, &len);
2504         av_push(av, newSVuv(ret));
2505
2506         utf8 = newSVpvn((char *) u, len);
2507         SvUTF8_on(utf8);
2508         av_push(av, utf8);
2509
2510         av_push(av, newSVuv(len));
2511         RETVAL = av;
2512     OUTPUT:
2513         RETVAL
2514
2515 AV *
2516 toFOLD_uvchr(c)
2517     UV c
2518     PREINIT:
2519         U8 u[UTF8_MAXBYTES+1];
2520         Size_t len;
2521         UV ret;
2522         SV* utf8;
2523         AV * av;
2524     CODE:
2525         av = newAV();
2526         ret = toFOLD_uvchr(c, u, &len);
2527         av_push(av, newSVuv(ret));
2528
2529         utf8 = newSVpvn((char *) u, len);
2530         SvUTF8_on(utf8);
2531         av_push(av, utf8);
2532
2533         av_push(av, newSVuv(len));
2534         RETVAL = av;
2535     OUTPUT:
2536         RETVAL
2537
2538 #endif
2539
2540 UV
2541 LATIN1_TO_NATIVE(cp)
2542         UV cp
2543         CODE:
2544                 if (cp > 255) RETVAL= cp;
2545                 else RETVAL= LATIN1_TO_NATIVE(cp);
2546         OUTPUT:
2547                 RETVAL
2548
2549 UV
2550 NATIVE_TO_LATIN1(cp)
2551         UV cp
2552         CODE:
2553                 RETVAL= NATIVE_TO_LATIN1(cp);
2554         OUTPUT:
2555                 RETVAL
2556
2557 STRLEN
2558 av_tindex(av)
2559         SV *av
2560         CODE:
2561                 RETVAL = av_tindex((AV*)SvRV(av));
2562         OUTPUT:
2563                 RETVAL
2564
2565 STRLEN
2566 av_top_index(av)
2567         SV *av
2568         CODE:
2569                 RETVAL = av_top_index((AV*)SvRV(av));
2570         OUTPUT:
2571                 RETVAL
2572
2573 STRLEN
2574 av_count(av)
2575         SV *av
2576         CODE:
2577                 RETVAL = av_count((AV*)SvRV(av));
2578         OUTPUT:
2579                 RETVAL
2580
2581 =tests plan => 26827
2582
2583 use vars qw($my_sv @my_av %my_hv);
2584
2585 ok(&Devel::PPPort::boolSV(1), "Verify boolSV(1) is true");
2586 ok(!&Devel::PPPort::boolSV(0), "Verify boolSV(0) is false");
2587
2588 $_ = "Fred";
2589 is(&Devel::PPPort::DEFSV(), "Fred", '$_ is FRED; Verify DEFSV is FRED');
2590 is(&Devel::PPPort::UNDERBAR(), "Fred", 'And verify UNDERBAR is FRED');
2591
2592 if (ivers($]) >= ivers(5.9.2) && ivers($]) < ivers(5.23)) {
2593   eval q{
2594     no warnings "deprecated";
2595     no if $^V >= v5.17.9, warnings => "experimental::lexical_topic";
2596     my $_ = "Tony";
2597     is(&Devel::PPPort::DEFSV(), "Fred", 'lexical_topic eval: $_ is Tony; Verify DEFSV is Fred');
2598     is(&Devel::PPPort::UNDERBAR(), "Tony", 'And verify UNDERBAR is Tony');
2599   };
2600   die __FILE__ . __LINE__ . ": $@" if $@;
2601 }
2602 else {
2603   skip("perl version outside testing range of lexical_topic", 2);
2604 }
2605
2606 my @r = &Devel::PPPort::DEFSV_modify();
2607
2608 ok(@r == 3, "Verify got 3 elements");
2609 is($r[0], 'Fred');
2610 is($r[1], 'DEFSV');
2611 is($r[2], 'Fred');
2612
2613 is(&Devel::PPPort::DEFSV(), "Fred");
2614
2615 eval { 1 };
2616 ok(!&Devel::PPPort::ERRSV(), "Verify ERRSV on true is false");
2617 eval { cannot_call_this_one() };
2618 ok(&Devel::PPPort::ERRSV(), "Verify ERRSV on false is true");
2619
2620 ok(&Devel::PPPort::gv_stashpvn('Devel::PPPort', 0));
2621 ok(!&Devel::PPPort::gv_stashpvn('does::not::exist', 0));
2622 ok(&Devel::PPPort::gv_stashpvn('does::not::exist', 1));
2623
2624 $my_sv = 1;
2625 ok(&Devel::PPPort::get_sv('my_sv', 0));
2626 ok(!&Devel::PPPort::get_sv('not_my_sv', 0));
2627 ok(&Devel::PPPort::get_sv('not_my_sv', 1));
2628
2629 @my_av = (1);
2630 ok(&Devel::PPPort::get_av('my_av', 0));
2631 ok(!&Devel::PPPort::get_av('not_my_av', 0));
2632 ok(&Devel::PPPort::get_av('not_my_av', 1));
2633
2634 %my_hv = (a=>1);
2635 ok(&Devel::PPPort::get_hv('my_hv', 0));
2636 ok(!&Devel::PPPort::get_hv('not_my_hv', 0));
2637 ok(&Devel::PPPort::get_hv('not_my_hv', 1));
2638
2639 sub my_cv { 1 };
2640 ok(&Devel::PPPort::get_cv('my_cv', 0));
2641 ok(!&Devel::PPPort::get_cv('not_my_cv', 0));
2642 ok(&Devel::PPPort::get_cv('not_my_cv', 1));
2643
2644 is(Devel::PPPort::dXSTARG(42), 43);
2645 is(Devel::PPPort::dAXMARK(4711), 4710);
2646
2647 is(Devel::PPPort::prepush(), 42);
2648
2649 is(join(':', Devel::PPPort::xsreturn(0)), 'test1');
2650 is(join(':', Devel::PPPort::xsreturn(1)), 'test1:test2');
2651
2652 is(Devel::PPPort::PERL_ABS(42), 42, "Verify PERL_ABS(42) is 42");
2653 is(Devel::PPPort::PERL_ABS(-13), 13, "Verify PERL_ABS(-13) is 13");
2654
2655 is(Devel::PPPort::SVf(42), ivers($]) >= ivers(5.4) ? '[42]' : '42');
2656 is(Devel::PPPort::SVf('abc'), ivers($]) >= ivers(5.4) ? '[abc]' : 'abc');
2657
2658 is(&Devel::PPPort::Perl_ppaddr_t("FOO"), "foo");
2659
2660 is(&Devel::PPPort::ptrtests(), 63);
2661
2662 is(&Devel::PPPort::OpSIBLING_tests(), 0);
2663
2664 if (ivers($]) >= ivers(5.9)) {
2665   eval q{
2666     is(&Devel::PPPort::check_HeUTF8("hello"), "norm");
2667     is(&Devel::PPPort::check_HeUTF8("\N{U+263a}"), "utf8");
2668   };
2669 } else {
2670   skip("Too early perl version", 2);
2671 }
2672
2673 @r = &Devel::PPPort::check_c_array();
2674 is($r[0], 4);
2675 is($r[1], "13");
2676
2677 ok(!Devel::PPPort::SvRXOK(""));
2678 ok(!Devel::PPPort::SvRXOK(bless [], "Regexp"));
2679
2680 if (ivers($]) < ivers(5.5)) {
2681         skip 'no qr// objects in this perl', 2;
2682 } else {
2683         my $qr = eval 'qr/./';
2684         ok(Devel::PPPort::SvRXOK($qr), "SVRXOK(qr) is true");
2685         ok(Devel::PPPort::SvRXOK(bless $qr, "Surprise"));
2686 }
2687
2688 ok( Devel::PPPort::NATIVE_TO_LATIN1(0xB6) == 0xB6);
2689 ok( Devel::PPPort::NATIVE_TO_LATIN1(0x1) == 0x1);
2690 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("A")) == 0x41);
2691 ok( Devel::PPPort::NATIVE_TO_LATIN1(ord("0")) == 0x30);
2692
2693 ok( Devel::PPPort::LATIN1_TO_NATIVE(0xB6) == 0xB6, "Verify LATIN1_TO_NATIVE(0xB6) is 0xB6");
2694 if (ord("A") == 65) {
2695     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0x41);
2696     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0x30);
2697 }
2698 else {
2699     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x41) == 0xC1);
2700     ok( Devel::PPPort::LATIN1_TO_NATIVE(0x30) == 0xF0);
2701 }
2702
2703 ok(  Devel::PPPort::isALNUMC_L1(ord("5")));
2704 ok(  Devel::PPPort::isALNUMC_L1(0xFC));
2705 ok(! Devel::PPPort::isALNUMC_L1(0xB6));
2706
2707 ok(  Devel::PPPort::isOCTAL(ord("7")), "Verify '7' is OCTAL");
2708 ok(! Devel::PPPort::isOCTAL(ord("8")), "Verify '8' isn't OCTAL");
2709
2710 ok(  Devel::PPPort::isOCTAL_A(ord("0")), "Verify '0' is OCTAL_A");
2711 ok(! Devel::PPPort::isOCTAL_A(ord("9")), "Verify '9' isn't OCTAL_A");
2712
2713 ok(  Devel::PPPort::isOCTAL_L1(ord("2")), "Verify '2' is OCTAL_L1");
2714 ok(! Devel::PPPort::isOCTAL_L1(ord("8")), "Verify '8' isn't OCTAL_L1");
2715
2716 my $way_too_early_msg = 'UTF-8 not implemented on this perl';
2717
2718 # For the other properties, we test every code point from 0.255, and a
2719 # smattering of higher ones.  First populate a hash with keys like '65:ALPHA'
2720 # to indicate that the code point there is alphabetic
2721 my $i;
2722 my %types;
2723 for $i (0x41..0x5A, 0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xC0..0xD6, 0xD8..0xF6,
2724         0xF8..0x101)
2725 {
2726     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2727     $types{"$native:ALPHA"} = 1;
2728     $types{"$native:ALPHANUMERIC"} = 1;
2729     $types{"$native:IDFIRST"} = 1;
2730     $types{"$native:IDCONT"} = 1;
2731     $types{"$native:PRINT"} = 1;
2732     $types{"$native:WORDCHAR"} = 1;
2733 }
2734 for $i (0x30..0x39, 0x660, 0xFF19) {
2735     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2736     $types{"$native:ALPHANUMERIC"} = 1;
2737     $types{"$native:DIGIT"} = 1;
2738     $types{"$native:IDCONT"} = 1;
2739     $types{"$native:WORDCHAR"} = 1;
2740     $types{"$native:GRAPH"} = 1;
2741     $types{"$native:PRINT"} = 1;
2742     $types{"$native:XDIGIT"} = 1 if $i < 255 || ($i >= 0xFF10 && $i <= 0xFF19);
2743 }
2744
2745 for $i (0..0x7F) {
2746     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2747     $types{"$native:ASCII"} = 1;
2748 }
2749 for $i (0..0x1f, 0x7F..0x9F) {
2750     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2751     $types{"$native:CNTRL"} = 1;
2752 }
2753 for $i (0x21..0x7E, 0xA1..0x101, 0x660) {
2754     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2755     $types{"$native:GRAPH"} = 1;
2756     $types{"$native:PRINT"} = 1;
2757 }
2758 for $i (0x09, 0x20, 0xA0) {
2759     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2760     $types{"$native:BLANK"} = 1;
2761     $types{"$native:SPACE"} = 1;
2762     $types{"$native:PSXSPC"} = 1;
2763     $types{"$native:PRINT"} = 1 if $i > 0x09;
2764 }
2765 for $i (0x09..0x0D, 0x85, 0x2029) {
2766     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2767     $types{"$native:SPACE"} = 1;
2768     $types{"$native:PSXSPC"} = 1;
2769 }
2770 for $i (0x41..0x5A, 0xC0..0xD6, 0xD8..0xDE, 0x100) {
2771     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2772     $types{"$native:UPPER"} = 1;
2773     $types{"$native:XDIGIT"} = 1 if $i < 0x47;
2774 }
2775 for $i (0x61..0x7A, 0xAA, 0xB5, 0xBA, 0xDF..0xF6, 0xF8..0xFF, 0x101) {
2776     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2777     $types{"$native:LOWER"} = 1;
2778     $types{"$native:XDIGIT"} = 1 if $i < 0x67;
2779 }
2780 for $i (0x21..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7E, 0xB6, 0xA1, 0xA7, 0xAB,
2781         0xB7, 0xBB, 0xBF, 0x5BE)
2782 {
2783     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2784     $types{"$native:PUNCT"} = 1;
2785     $types{"$native:GRAPH"} = 1;
2786     $types{"$native:PRINT"} = 1;
2787 }
2788
2789 $i = ord('_');
2790 $types{"$i:WORDCHAR"} = 1;
2791 $types{"$i:IDFIRST"} = 1;
2792 $types{"$i:IDCONT"} = 1;
2793
2794 # Now find all the unique code points included above.
2795 my %code_points_to_test;
2796 my $key;
2797 for $key (keys %types) {
2798     $key =~ s/:.*//;
2799     $code_points_to_test{$key} = 1;
2800 }
2801
2802 # And test each one
2803 for $i (sort { $a <=> $b } keys %code_points_to_test) {
2804     my $native = Devel::PPPort::LATIN1_TO_NATIVE($i);
2805     my $hex = sprintf("0x%02X", $native);
2806
2807     # And for each code point test each of the classes
2808     my $class;
2809     for $class (qw(ALPHA ALPHANUMERIC ASCII BLANK CNTRL DIGIT GRAPH IDCONT
2810                    IDFIRST LOWER PRINT PSXSPC PUNCT SPACE UPPER WORDCHAR
2811                    XDIGIT))
2812     {
2813         if ($i < 256) {  # For the ones that can fit in a byte, test each of
2814                          # three macros.
2815             my $suffix;
2816             for $suffix ("", "_A", "_L1", "_uvchr") {
2817                 my $should_be = ($i > 0x7F && $suffix !~ /_(uvchr|L1)/)
2818                                 ? 0     # Fail on non-ASCII unless unicode
2819                                 : ($types{"$native:$class"} || 0);
2820                 if (ivers($]) < ivers(5.6) && $suffix eq '_uvchr') {
2821                     skip("No UTF-8 on this perl", 1);
2822                     next;
2823                 }
2824
2825                 my $eval_string = "Devel::PPPort::is${class}$suffix($hex)";
2826                 local $SIG{__WARN__} = sub {};
2827                 my $is = eval $eval_string || 0;
2828                 die "eval 'For $i: $eval_string' gave $@" if $@;
2829                 is($is, $should_be, "'$eval_string'");
2830             }
2831         }
2832
2833         # For all code points, test the '_utf8' macros
2834         my $sub_fcn;
2835         for $sub_fcn ("", "_LC") {
2836             my $skip = "";
2837             if (ivers($]) < ivers(5.6)) {
2838                 $skip = $way_too_early_msg;
2839             }
2840             elsif (ivers($]) < ivers(5.7) && $native > 255) {
2841                 $skip = "Perls earlier than 5.7 give wrong answers for above Latin1 code points";
2842             }
2843             elsif (ivers($]) <= ivers(5.11.3) && $native == 0x2029 && ($class eq 'PRINT' || $class eq 'GRAPH')) {
2844                 $skip = "Perls earlier than 5.11.3 considered high space characters as isPRINT and isGRAPH";
2845             }
2846             elsif ($sub_fcn eq '_LC' && $i < 256) {
2847                 $skip = "Testing of code points whose results depend on locale is skipped ";
2848             }
2849             my $fcn = "Devel::PPPort::is${class}${sub_fcn}_utf8_safe";
2850             my $utf8;
2851
2852             if ($skip) {
2853                 skip $skip, 1;
2854             }
2855             else {
2856                 $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($native);
2857                 my $should_be = $types{"$native:$class"} || 0;
2858                 my $eval_string = "$fcn(\"$utf8\", 0)";
2859                 local $SIG{__WARN__} = sub {};
2860                 my $is = eval $eval_string || 0;
2861                 die "eval 'For $i, $eval_string' gave $@" if $@;
2862                 is($is, $should_be, sprintf("For U+%04X '%s'", $native, $eval_string));
2863             }
2864
2865             # And for the high code points, test that a too short malformation (the
2866             # -1) causes it to fail
2867             if ($i > 255) {
2868                 if ($skip) {
2869                     skip $skip, 1;
2870                 }
2871                 elsif (ivers($]) >= ivers(5.25.9)) {
2872                     skip("Prints an annoying error message that khw doesn't know how to easily suppress", 1);
2873                 }
2874                 else {
2875                     my $eval_string = "$fcn(\"$utf8\", -1)";
2876                     local $SIG{__WARN__} = sub {};
2877                     my $is = eval "$eval_string" || 0;
2878                     die "eval '$eval_string' gave $@" if $@;
2879                     is($is, 0, sprintf("For U+%04X '%s'", $native, $eval_string));
2880                 }
2881             }
2882         }
2883     }
2884 }
2885
2886 my %case_changing = ( 'LOWER' => [ [ ord('A'), ord('a') ],
2887                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2888                                      Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2889                                    [ 0x100, 0x101 ],
2890                                  ],
2891                       'FOLD'  => [ [ ord('C'), ord('c') ],
2892                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xC0),
2893                                      Devel::PPPort::LATIN1_TO_NATIVE(0xE0) ],
2894                                    [ 0x104, 0x105 ],
2895                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2896                                      'ss' ],
2897                                  ],
2898                       'UPPER' => [ [ ord('a'), ord('A'),  ],
2899                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xE0),
2900                                      Devel::PPPort::LATIN1_TO_NATIVE(0xC0) ],
2901                                    [ 0x101, 0x100 ],
2902                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2903                                      'SS' ],
2904                                  ],
2905                       'TITLE' => [ [ ord('c'), ord('C'),  ],
2906                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xE2),
2907                                      Devel::PPPort::LATIN1_TO_NATIVE(0xC2) ],
2908                                    [ 0x103, 0x102 ],
2909                                    [ Devel::PPPort::LATIN1_TO_NATIVE(0xDF),
2910                                      'Ss' ],
2911                                  ],
2912                     );
2913
2914 my $name;
2915 for $name (keys %case_changing) {
2916     my @code_points_to_test = @{$case_changing{$name}};
2917     my $unchanged;
2918     for $unchanged (@code_points_to_test) {
2919         my @pair = @$unchanged;
2920         my $original = $pair[0];
2921         my $changed = $pair[1];
2922         my $utf8_changed = $changed;
2923         my $is_cp = $utf8_changed =~ /^\d+$/;
2924         my $should_be_bytes;
2925         if (ivers($]) >= ivers(5.6)) {
2926             if ($is_cp) {
2927                 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2928                 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2929             }
2930             else {
2931                 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /[[:^ascii:]]/';
2932                 $should_be_bytes = length $utf8_changed;
2933             }
2934         }
2935
2936         my $fcn = "to${name}_uvchr";
2937         my $skip = "";
2938
2939         if (ivers($]) < ivers(5.6)) {
2940             $skip = $way_too_early_msg;
2941         }
2942         elsif (! $is_cp) {
2943             $skip = "Can't do uvchr on a multi-char string";
2944         }
2945         if ($skip) {
2946             skip $skip, 4;
2947         }
2948         else {
2949             if ($is_cp) {
2950                 $utf8_changed = Devel::PPPort::uvchr_to_utf8($changed);
2951                 $should_be_bytes = Devel::PPPort::UTF8_SAFE_SKIP($utf8_changed, 0);
2952             }
2953             else {
2954                 my $non_ascii_re = (ivers($]) >= ivers(5.6)) ? '[[:^ascii:]]' : '[^\x00-\x7F]';
2955                 die("Test currently doesn't work for non-ASCII multi-char case changes") if eval '$utf8_changed =~ /$non_ascii_re/';
2956                 $should_be_bytes = length $utf8_changed;
2957             }
2958
2959             my $ret = eval "Devel::PPPort::$fcn($original)";
2960             my $fail = $@;  # Have to save $@, as it gets destroyed
2961             is ($fail, "", "$fcn($original) didn't fail");
2962             my $first = (ivers($]) != ivers(5.6))
2963                         ? substr($utf8_changed, 0, 1)
2964                         : $utf8_changed, 0, 1;
2965             is($ret->[0], ord $first,
2966                "ord of $fcn($original) is $changed");
2967             is($ret->[1], $utf8_changed,
2968                "UTF-8 of of $fcn($original) is correct");
2969             is($ret->[2], $should_be_bytes,
2970                "Length of $fcn($original) is $should_be_bytes");
2971         }
2972
2973         my $truncate;
2974         for $truncate (0..2) {
2975             my $skip;
2976             if (ivers($]) < ivers(5.6)) {
2977                 $skip = $way_too_early_msg;
2978             }
2979             elsif (! $is_cp && ivers($]) < ivers(5.7.3)) {
2980                 $skip = "Multi-character case change not implemented until 5.7.3";
2981             }
2982             elsif ($truncate == 2 && ivers($]) > ivers(5.25.8)) {
2983                 $skip = "Zero length inputs cause assertion failure; test dies in modern perls";
2984             }
2985             elsif ($truncate > 0 && length $changed > 1) {
2986                 $skip = "Don't test shortened multi-char case changes";
2987             }
2988             elsif ($truncate > 0 && Devel::PPPort::UVCHR_IS_INVARIANT($original)) {
2989                 $skip = "Don't try to test shortened single bytes";
2990             }
2991             if ($skip) {
2992                 skip $skip, 4;
2993             }
2994             else {
2995                 my $fcn = "to${name}_utf8_safe";
2996                 my $utf8 = quotemeta Devel::PPPort::uvchr_to_utf8($original);
2997                 my $real_truncate = ($truncate < 2)
2998                                     ? $truncate : $should_be_bytes;
2999                 my $eval_string = "Devel::PPPort::$fcn(\"$utf8\", $real_truncate)";
3000                 my $ret = eval "no warnings; $eval_string" || 0;
3001                 my $fail = $@;  # Have to save $@, as it gets destroyed
3002                 if ($truncate == 0) {
3003                     is ($fail, "", "Didn't fail on full length input");
3004                     my $first = (ivers($]) != ivers(5.6))
3005                                 ? substr($utf8_changed, 0, 1)
3006                                 : $utf8_changed, 0, 1;
3007                     is($ret->[0], ord $first,
3008                        "ord of $fcn($original) is $changed");
3009                     is($ret->[1], $utf8_changed,
3010                        "UTF-8 of of $fcn($original) is correct");
3011                     is($ret->[2], $should_be_bytes,
3012                     "Length of $fcn($original) is $should_be_bytes");
3013                 }
3014                 else {
3015                     is ($fail, eval 'qr/Malformed UTF-8 character/',
3016                         "Gave appropriate error for short char: $original");
3017                     skip("Expected failure means remaining tests for"
3018                        . " this aren't relevant", 3);
3019                 }
3020             }
3021         }
3022     }
3023 }
3024
3025 is(&Devel::PPPort::av_top_index([1,2,3]), 2);
3026 is(&Devel::PPPort::av_tindex([1,2,3,4]), 3);
3027 is(&Devel::PPPort::av_count([1,2,3,4]), 4);