inline.h

   1 /*    inline.h
   2  *
   3  *    Copyright (C) 2012 by Larry Wall and others
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  * This file is a home for static inline functions that cannot go in other
   9  * headers files, because they depend on proto.h (included after most other
  10  * headers) or struct definitions.
  11  *
  12  * Each section names the header file that the functions "belong" to.
  13  */
  14
  15 /* ------------------------------- av.h ------------------------------- */
  16
  17 PERL_STATIC_INLINE SSize_t
  18 S_av_top_index(pTHX_ AV *av)
  19 {
  20     PERL_ARGS_ASSERT_AV_TOP_INDEX;
  21     assert(SvTYPE(av) == SVt_PVAV);
  22
  23     return AvFILL(av);
  24 }
  25
  26 /* ------------------------------- cv.h ------------------------------- */
  27
  28 PERL_STATIC_INLINE GV *
  29 S_CvGV(pTHX_ CV *sv)
  30 {
  31     return CvNAMED(sv)
  32         ? Perl_cvgv_from_hek(aTHX_ sv)
  33         : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
  34 }
  35
  36 PERL_STATIC_INLINE I32 *
  37 S_CvDEPTHp(const CV * const sv)
  38 {
  39     assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
  40     return &((XPVCV*)SvANY(sv))->xcv_depth;
  41 }
  42
  43 /*
  44  CvPROTO returns the prototype as stored, which is not necessarily what
  45  the interpreter should be using. Specifically, the interpreter assumes
  46  that spaces have been stripped, which has been the case if the prototype
  47  was added by toke.c, but is generally not the case if it was added elsewhere.
  48  Since we can't enforce the spacelessness at assignment time, this routine
  49  provides a temporary copy at parse time with spaces removed.
  50  I<orig> is the start of the original buffer, I<len> is the length of the
  51  prototype and will be updated when this returns.
  52  */
  53
  54 #ifdef PERL_CORE
  55 PERL_STATIC_INLINE char *
  56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
  57 {
  58     SV * tmpsv;
  59     char * tmps;
  60     tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
  61     tmps = SvPVX(tmpsv);
  62     while ((*len)--) {
  63         if (!isSPACE(*orig))
  64             *tmps++ = *orig;
  65         orig++;
  66     }
  67     *tmps = '\0';
  68     *len = tmps - SvPVX(tmpsv);
  69                 return SvPVX(tmpsv);
  70 }
  71 #endif
  72
  73 /* ------------------------------- mg.h ------------------------------- */
  74
  75 #if defined(PERL_CORE) || defined(PERL_EXT)
  76 /* assumes get-magic and stringification have already occurred */
  77 PERL_STATIC_INLINE STRLEN
  78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
  79 {
  80     assert(mg->mg_type == PERL_MAGIC_regex_global);
  81     assert(mg->mg_len != -1);
  82     if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
  83         return (STRLEN)mg->mg_len;
  84     else {
  85         const STRLEN pos = (STRLEN)mg->mg_len;
  86         /* Without this check, we may read past the end of the buffer: */
  87         if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
  88         return sv_or_pv_pos_u2b(sv, s, pos, NULL);
  89     }
  90 }
  91 #endif
  92
  93 /* ------------------------------- pad.h ------------------------------ */
  94
  95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
  96 PERL_STATIC_INLINE bool
  97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
  98 {
  99     /* is seq within the range _LOW to _HIGH ?
 100      * This is complicated by the fact that PL_cop_seqmax
 101      * may have wrapped around at some point */
 102     if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
 103         return FALSE; /* not yet introduced */
 104
 105     if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
 106     /* in compiling scope */
 107         if (
 108             (seq >  COP_SEQ_RANGE_LOW(pn))
 109             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
 110             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
 111         )
 112             return TRUE;
 113     }
 114     else if (
 115         (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
 116         ?
 117             (  seq >  COP_SEQ_RANGE_LOW(pn)
 118             || seq <= COP_SEQ_RANGE_HIGH(pn))
 119
 120         :    (  seq >  COP_SEQ_RANGE_LOW(pn)
 121              && seq <= COP_SEQ_RANGE_HIGH(pn))
 122     )
 123         return TRUE;
 124     return FALSE;
 125 }
 126 #endif
 127
 128 /* ------------------------------- pp.h ------------------------------- */
 129
 130 PERL_STATIC_INLINE I32
 131 S_TOPMARK(pTHX)
 132 {
 133     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 134                                  "MARK top  %p %" IVdf "\n",
 135                                   PL_markstack_ptr,
 136                                   (IV)*PL_markstack_ptr)));
 137     return *PL_markstack_ptr;
 138 }
 139
 140 PERL_STATIC_INLINE I32
 141 S_POPMARK(pTHX)
 142 {
 143     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 144                                  "MARK pop  %p %" IVdf "\n",
 145                                   (PL_markstack_ptr-1),
 146                                   (IV)*(PL_markstack_ptr-1))));
 147     assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
 148     return *PL_markstack_ptr--;
 149 }
 150
 151 /* ----------------------------- regexp.h ----------------------------- */
 152
 153 PERL_STATIC_INLINE struct regexp *
 154 S_ReANY(const REGEXP * const re)
 155 {
 156     XPV* const p = (XPV*)SvANY(re);
 157     assert(isREGEXP(re));
 158     return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
 159                                    : (struct regexp *)p;
 160 }
 161
 162 /* ------------------------------- sv.h ------------------------------- */
 163
 164 PERL_STATIC_INLINE SV *
 165 S_SvREFCNT_inc(SV *sv)
 166 {
 167     if (LIKELY(sv != NULL))
 168         SvREFCNT(sv)++;
 169     return sv;
 170 }
 171 PERL_STATIC_INLINE SV *
 172 S_SvREFCNT_inc_NN(SV *sv)
 173 {
 174     SvREFCNT(sv)++;
 175     return sv;
 176 }
 177 PERL_STATIC_INLINE void
 178 S_SvREFCNT_inc_void(SV *sv)
 179 {
 180     if (LIKELY(sv != NULL))
 181         SvREFCNT(sv)++;
 182 }
 183 PERL_STATIC_INLINE void
 184 S_SvREFCNT_dec(pTHX_ SV *sv)
 185 {
 186     if (LIKELY(sv != NULL)) {
 187         U32 rc = SvREFCNT(sv);
 188         if (LIKELY(rc > 1))
 189             SvREFCNT(sv) = rc - 1;
 190         else
 191             Perl_sv_free2(aTHX_ sv, rc);
 192     }
 193 }
 194
 195 PERL_STATIC_INLINE void
 196 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
 197 {
 198     U32 rc = SvREFCNT(sv);
 199     if (LIKELY(rc > 1))
 200         SvREFCNT(sv) = rc - 1;
 201     else
 202         Perl_sv_free2(aTHX_ sv, rc);
 203 }
 204
 205 PERL_STATIC_INLINE void
 206 SvAMAGIC_on(SV *sv)
 207 {
 208     assert(SvROK(sv));
 209     if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
 210 }
 211 PERL_STATIC_INLINE void
 212 SvAMAGIC_off(SV *sv)
 213 {
 214     if (SvROK(sv) && SvOBJECT(SvRV(sv)))
 215         HvAMAGIC_off(SvSTASH(SvRV(sv)));
 216 }
 217
 218 PERL_STATIC_INLINE U32
 219 S_SvPADSTALE_on(SV *sv)
 220 {
 221     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 222     return SvFLAGS(sv) |= SVs_PADSTALE;
 223 }
 224 PERL_STATIC_INLINE U32
 225 S_SvPADSTALE_off(SV *sv)
 226 {
 227     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 228     return SvFLAGS(sv) &= ~SVs_PADSTALE;
 229 }
 230 #if defined(PERL_CORE) || defined (PERL_EXT)
 231 PERL_STATIC_INLINE STRLEN
 232 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
 233 {
 234     PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
 235     if (SvGAMAGIC(sv)) {
 236         U8 *hopped = utf8_hop((U8 *)pv, pos);
 237         if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
 238         return (STRLEN)(hopped - (U8 *)pv);
 239     }
 240     return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
 241 }
 242 #endif
 243
 244 /* ------------------------------- handy.h ------------------------------- */
 245
 246 /* saves machine code for a common noreturn idiom typically used in Newx*() */
 247 GCC_DIAG_IGNORE_DECL(-Wunused-function);
 248 static void
 249 S_croak_memory_wrap(void)
 250 {
 251     Perl_croak_nocontext("%s",PL_memory_wrap);
 252 }
 253 GCC_DIAG_RESTORE_DECL;
 254
 255 /* ------------------------------- utf8.h ------------------------------- */
 256
 257 /*
 258 =head1 Unicode Support
 259 */
 260
 261 PERL_STATIC_INLINE void
 262 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
 263 {
 264     /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
 265      * encoded string at '*dest', updating '*dest' to include it */
 266
 267     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
 268
 269     if (NATIVE_BYTE_IS_INVARIANT(byte))
 270         *((*dest)++) = byte;
 271     else {
 272         *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
 273         *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
 274     }
 275 }
 276
 277 /*
 278 =for apidoc valid_utf8_to_uvchr
 279 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
 280 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
 281 it passes C<L</isUTF8_CHAR>>.  Surrogates, non-character code points, and
 282 non-Unicode code points are allowed.
 283
 284 =cut
 285
 286  */
 287
 288 PERL_STATIC_INLINE UV
 289 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
 290 {
 291     const UV expectlen = UTF8SKIP(s);
 292     const U8* send = s + expectlen;
 293     UV uv = *s;
 294
 295     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
 296
 297     if (retlen) {
 298         *retlen = expectlen;
 299     }
 300
 301     /* An invariant is trivially returned */
 302     if (expectlen == 1) {
 303         return uv;
 304     }
 305
 306     /* Remove the leading bits that indicate the number of bytes, leaving just
 307      * the bits that are part of the value */
 308     uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
 309
 310     /* Now, loop through the remaining bytes, accumulating each into the
 311      * working total as we go.  (I khw tried unrolling the loop for up to 4
 312      * bytes, but there was no performance improvement) */
 313     for (++s; s < send; s++) {
 314         uv = UTF8_ACCUMULATE(uv, *s);
 315     }
 316
 317     return UNI_TO_NATIVE(uv);
 318
 319 }
 320
 321 /*
 322 =for apidoc is_utf8_invariant_string
 323
 324 Returns TRUE if the first C<len> bytes of the string C<s> are the same
 325 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
 326 EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
 327 are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
 328 the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
 329 characters are invariant, but so also are the C1 controls.
 330
 331 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
 332 use this option, that C<s> can't have embedded C<NUL> characters and has to
 333 have a terminating C<NUL> byte).
 334
 335 See also
 336 C<L</is_utf8_string>>,
 337 C<L</is_utf8_string_flags>>,
 338 C<L</is_utf8_string_loc>>,
 339 C<L</is_utf8_string_loc_flags>>,
 340 C<L</is_utf8_string_loclen>>,
 341 C<L</is_utf8_string_loclen_flags>>,
 342 C<L</is_utf8_fixed_width_buf_flags>>,
 343 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 344 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 345 C<L</is_strict_utf8_string>>,
 346 C<L</is_strict_utf8_string_loc>>,
 347 C<L</is_strict_utf8_string_loclen>>,
 348 C<L</is_c9strict_utf8_string>>,
 349 C<L</is_c9strict_utf8_string_loc>>,
 350 and
 351 C<L</is_c9strict_utf8_string_loclen>>.
 352
 353 =cut
 354
 355 */
 356
 357 #define is_utf8_invariant_string(s, len)                                    \
 358                                 is_utf8_invariant_string_loc(s, len, NULL)
 359
 360 /*
 361 =for apidoc is_utf8_invariant_string_loc
 362
 363 Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
 364 the first UTF-8 variant character in the C<ep> pointer; if all characters are
 365 UTF-8 invariant, this function does not change the contents of C<*ep>.
 366
 367 =cut
 368
 369 */
 370
 371 PERL_STATIC_INLINE bool
 372 S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 373 {
 374     const U8* send;
 375     const U8* x = s;
 376
 377     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 378
 379     if (len == 0) {
 380         len = strlen((const char *)s);
 381     }
 382
 383     send = s + len;
 384
 385 #ifndef EBCDIC
 386
 387 /* This looks like 0x010101... */
 388 #define PERL_COUNT_MULTIPLIER   (~ (UINTMAX_C(0)) / 0xFF)
 389
 390 /* This looks like 0x808080... */
 391 #define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
 392 #define PERL_WORDSIZE            sizeof(PERL_COUNT_MULTIPLIER)
 393 #define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
 394
 395 /* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
 396  * or'ing together the lowest bits of 'x'.  Hopefully the final term gets
 397  * optimized out completely on a 32-bit system, and its mask gets optimized out
 398  * on a 64-bit system */
 399 #define PERL_IS_SUBWORD_ADDR(x) (1 & (       PTR2nat(x)                       \
 400                                       |   (  PTR2nat(x) >> 1)                 \
 401                                       | ( ( (PTR2nat(x)                       \
 402                                            & PERL_WORD_BOUNDARY_MASK) >> 2))))
 403
 404     /* Do the word-at-a-time iff there is at least one usable full word.  That
 405      * means that after advancing to a word boundary, there still is at least a
 406      * full word left.  The number of bytes needed to advance is 'wordsize -
 407      * offset' unless offset is 0. */
 408     if ((STRLEN) (send - x) >= PERL_WORDSIZE
 409
 410                             /* This term is wordsize if subword; 0 if not */
 411                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
 412
 413                             /* 'offset' */
 414                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
 415     {
 416
 417         /* Process per-byte until reach word boundary.  XXX This loop could be
 418          * eliminated if we knew that this platform had fast unaligned reads */
 419         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
 420             if (! UTF8_IS_INVARIANT(*x)) {
 421                 if (ep) {
 422                     *ep = x;
 423                 }
 424
 425                 return FALSE;
 426             }
 427             x++;
 428         }
 429
 430         /* Here, we know we have at least one full word to process.  Process
 431          * per-word as long as we have at least a full word left */
 432         do {
 433             if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK)  {
 434
 435                 /* Found a variant.  Just return if caller doesn't want its
 436                  * exact position */
 437                 if (! ep) {
 438                     return FALSE;
 439                 }
 440
 441                 /* Otherwise fall into final loop to find which byte it is */
 442                 break;
 443             }
 444             x += PERL_WORDSIZE;
 445         } while (x + PERL_WORDSIZE <= send);
 446     }
 447
 448 #endif
 449
 450     /* Process per-byte */
 451     while (x < send) {
 452         if (! UTF8_IS_INVARIANT(*x)) {
 453             if (ep) {
 454                 *ep = x;
 455             }
 456
 457             return FALSE;
 458         }
 459
 460         x++;
 461     }
 462
 463     return TRUE;
 464 }
 465
 466 #if defined(PERL_CORE) || defined(PERL_EXT)
 467
 468 /*
 469 =for apidoc variant_under_utf8_count
 470
 471 This function looks at the sequence of bytes between C<s> and C<e>, which are
 472 assumed to be encoded in ASCII/Latin1, and returns how many of them would
 473 change should the string be translated into UTF-8.  Due to the nature of UTF-8,
 474 each of these would occupy two bytes instead of the single one in the input
 475 string.  Thus, this function returns the precise number of bytes the string
 476 would expand by when translated to UTF-8.
 477
 478 Unlike most of the other functions that have C<utf8> in their name, the input
 479 to this function is NOT a UTF-8-encoded string.  The function name is slightly
 480 I<odd> to emphasize this.
 481
 482 This function is internal to Perl because khw thinks that any XS code that
 483 would want this is probably operating too close to the internals.  Presenting a
 484 valid use case could change that.
 485
 486 See also
 487 C<L<perlapi/is_utf8_invariant_string>>
 488 and
 489 C<L<perlapi/is_utf8_invariant_string_loc>>,
 490
 491 =cut
 492
 493 */
 494
 495 PERL_STATIC_INLINE Size_t
 496 S_variant_under_utf8_count(const U8* const s, const U8* const e)
 497 {
 498     const U8* x = s;
 499     Size_t count = 0;
 500
 501     PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
 502
 503 #  ifndef EBCDIC
 504
 505     if ((STRLEN) (e - x) >= PERL_WORDSIZE
 506                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
 507                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
 508     {
 509
 510         /* Process per-byte until reach word boundary.  XXX This loop could be
 511          * eliminated if we knew that this platform had fast unaligned reads */
 512         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
 513             count += ! UTF8_IS_INVARIANT(*x++);
 514         }
 515
 516         /* Process per-word as long as we have at least a full word left */
 517         do {    /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
 518                    explanation of how this works */
 519             count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
 520                       * PERL_COUNT_MULTIPLIER)
 521                     >> ((PERL_WORDSIZE - 1) * CHARBITS);
 522             x += PERL_WORDSIZE;
 523         } while (x + PERL_WORDSIZE <= e);
 524     }
 525
 526 #  endif
 527
 528     /* Process per-byte */
 529     while (x < e) {
 530         if (! UTF8_IS_INVARIANT(*x)) {
 531             count++;
 532         }
 533
 534         x++;
 535     }
 536
 537     return count;
 538 }
 539
 540 #endif
 541
 542 #ifndef PERL_IN_REGEXEC_C   /* Keep  these around for that file */
 543 #  undef PERL_WORDSIZE
 544 #  undef PERL_COUNT_MULTIPLIER
 545 #  undef PERL_WORD_BOUNDARY_MASK
 546 #  undef PERL_VARIANTS_WORD_MASK
 547 #endif
 548
 549 /*
 550 =for apidoc is_utf8_string
 551
 552 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 553 Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
 554 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
 555 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 556 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 557
 558 This function considers Perl's extended UTF-8 to be valid.  That means that
 559 code points above Unicode, surrogates, and non-character code points are
 560 considered valid by this function.  Use C<L</is_strict_utf8_string>>,
 561 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
 562 code points are considered valid.
 563
 564 See also
 565 C<L</is_utf8_invariant_string>>,
 566 C<L</is_utf8_invariant_string_loc>>,
 567 C<L</is_utf8_string_loc>>,
 568 C<L</is_utf8_string_loclen>>,
 569 C<L</is_utf8_fixed_width_buf_flags>>,
 570 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 571 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 572
 573 =cut
 574 */
 575
 576 #define is_utf8_string(s, len)  is_utf8_string_loclen(s, len, NULL, NULL)
 577
 578 #if defined(PERL_CORE) || defined (PERL_EXT)
 579
 580 /*
 581 =for apidoc is_utf8_non_invariant_string
 582
 583 Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
 584 C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
 585 UTF-8; otherwise returns FALSE.
 586
 587 A TRUE return means that at least one code point represented by the sequence
 588 either is a wide character not representable as a single byte, or the
 589 representation differs depending on whether the sequence is encoded in UTF-8 or
 590 not.
 591
 592 See also
 593 C<L<perlapi/is_utf8_invariant_string>>,
 594 C<L<perlapi/is_utf8_string>>
 595
 596 =cut
 597
 598 This is commonly used to determine if a SV's UTF-8 flag should be turned on.
 599 It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
 600 it otherwise contains invalid UTF-8.
 601
 602 It is an internal function because khw thinks that XS code shouldn't be working
 603 at this low a level.  A valid use case could change that.
 604
 605 */
 606
 607 PERL_STATIC_INLINE bool
 608 S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
 609 {
 610     const U8 * first_variant;
 611
 612     PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
 613
 614     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 615         return FALSE;
 616     }
 617
 618     return is_utf8_string(first_variant, len - (first_variant - s));
 619 }
 620
 621 #endif
 622
 623 /*
 624 =for apidoc is_strict_utf8_string
 625
 626 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 627 UTF-8-encoded string that is fully interchangeable by any application using
 628 Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
 629 calculated using C<strlen(s)> (which means if you use this option, that C<s>
 630 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 631 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 632
 633 This function returns FALSE for strings containing any
 634 code points above the Unicode max of 0x10FFFF, surrogate code points, or
 635 non-character code points.
 636
 637 See also
 638 C<L</is_utf8_invariant_string>>,
 639 C<L</is_utf8_invariant_string_loc>>,
 640 C<L</is_utf8_string>>,
 641 C<L</is_utf8_string_flags>>,
 642 C<L</is_utf8_string_loc>>,
 643 C<L</is_utf8_string_loc_flags>>,
 644 C<L</is_utf8_string_loclen>>,
 645 C<L</is_utf8_string_loclen_flags>>,
 646 C<L</is_utf8_fixed_width_buf_flags>>,
 647 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 648 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 649 C<L</is_strict_utf8_string_loc>>,
 650 C<L</is_strict_utf8_string_loclen>>,
 651 C<L</is_c9strict_utf8_string>>,
 652 C<L</is_c9strict_utf8_string_loc>>,
 653 and
 654 C<L</is_c9strict_utf8_string_loclen>>.
 655
 656 =cut
 657 */
 658
 659 #define is_strict_utf8_string(s, len)  is_strict_utf8_string_loclen(s, len, NULL, NULL)
 660
 661 /*
 662 =for apidoc is_c9strict_utf8_string
 663
 664 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 665 UTF-8-encoded string that conforms to
 666 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
 667 otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
 668 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
 669 C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
 670 characters being ASCII constitute 'a valid UTF-8 string'.
 671
 672 This function returns FALSE for strings containing any code points above the
 673 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
 674 code points per
 675 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
 676
 677 See also
 678 C<L</is_utf8_invariant_string>>,
 679 C<L</is_utf8_invariant_string_loc>>,
 680 C<L</is_utf8_string>>,
 681 C<L</is_utf8_string_flags>>,
 682 C<L</is_utf8_string_loc>>,
 683 C<L</is_utf8_string_loc_flags>>,
 684 C<L</is_utf8_string_loclen>>,
 685 C<L</is_utf8_string_loclen_flags>>,
 686 C<L</is_utf8_fixed_width_buf_flags>>,
 687 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 688 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 689 C<L</is_strict_utf8_string>>,
 690 C<L</is_strict_utf8_string_loc>>,
 691 C<L</is_strict_utf8_string_loclen>>,
 692 C<L</is_c9strict_utf8_string_loc>>,
 693 and
 694 C<L</is_c9strict_utf8_string_loclen>>.
 695
 696 =cut
 697 */
 698
 699 #define is_c9strict_utf8_string(s, len)  is_c9strict_utf8_string_loclen(s, len, NULL, 0)
 700
 701 /*
 702 =for apidoc is_utf8_string_flags
 703
 704 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 705 UTF-8 string, subject to the restrictions imposed by C<flags>;
 706 returns FALSE otherwise.  If C<len> is 0, it will be calculated
 707 using C<strlen(s)> (which means if you use this option, that C<s> can't have
 708 embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
 709 that all characters being ASCII constitute 'a valid UTF-8 string'.
 710
 711 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
 712 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
 713 as C<L</is_strict_utf8_string>>; and if C<flags> is
 714 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
 715 C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
 716 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
 717 C<L</utf8n_to_uvchr>>, with the same meanings.
 718
 719 See also
 720 C<L</is_utf8_invariant_string>>,
 721 C<L</is_utf8_invariant_string_loc>>,
 722 C<L</is_utf8_string>>,
 723 C<L</is_utf8_string_loc>>,
 724 C<L</is_utf8_string_loc_flags>>,
 725 C<L</is_utf8_string_loclen>>,
 726 C<L</is_utf8_string_loclen_flags>>,
 727 C<L</is_utf8_fixed_width_buf_flags>>,
 728 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 729 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 730 C<L</is_strict_utf8_string>>,
 731 C<L</is_strict_utf8_string_loc>>,
 732 C<L</is_strict_utf8_string_loclen>>,
 733 C<L</is_c9strict_utf8_string>>,
 734 C<L</is_c9strict_utf8_string_loc>>,
 735 and
 736 C<L</is_c9strict_utf8_string_loclen>>.
 737
 738 =cut
 739 */
 740
 741 PERL_STATIC_INLINE bool
 742 S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
 743 {
 744     const U8 * first_variant;
 745
 746     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
 747     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 748                           |UTF8_DISALLOW_PERL_EXTENDED)));
 749
 750     if (len == 0) {
 751         len = strlen((const char *)s);
 752     }
 753
 754     if (flags == 0) {
 755         return is_utf8_string(s, len);
 756     }
 757
 758     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 759                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 760     {
 761         return is_strict_utf8_string(s, len);
 762     }
 763
 764     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 765                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 766     {
 767         return is_c9strict_utf8_string(s, len);
 768     }
 769
 770     if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
 771         const U8* const send = s + len;
 772         const U8* x = first_variant;
 773
 774         while (x < send) {
 775             STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 776             if (UNLIKELY(! cur_len)) {
 777                 return FALSE;
 778             }
 779             x += cur_len;
 780         }
 781     }
 782
 783     return TRUE;
 784 }
 785
 786 /*
 787
 788 =for apidoc is_utf8_string_loc
 789
 790 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 791 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 792 "utf8ness success") in the C<ep> pointer.
 793
 794 See also C<L</is_utf8_string_loclen>>.
 795
 796 =cut
 797 */
 798
 799 #define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
 800
 801 /*
 802
 803 =for apidoc is_utf8_string_loclen
 804
 805 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 806 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 807 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 808 encoded characters in the C<el> pointer.
 809
 810 See also C<L</is_utf8_string_loc>>.
 811
 812 =cut
 813 */
 814
 815 PERL_STATIC_INLINE bool
 816 Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
 817 {
 818     const U8 * first_variant;
 819
 820     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
 821
 822     if (len == 0) {
 823         len = strlen((const char *) s);
 824     }
 825
 826     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 827         if (el)
 828             *el = len;
 829
 830         if (ep) {
 831             *ep = s + len;
 832         }
 833
 834         return TRUE;
 835     }
 836
 837     {
 838         const U8* const send = s + len;
 839         const U8* x = first_variant;
 840         STRLEN outlen = first_variant - s;
 841
 842         while (x < send) {
 843             const STRLEN cur_len = isUTF8_CHAR(x, send);
 844             if (UNLIKELY(! cur_len)) {
 845                 break;
 846             }
 847             x += cur_len;
 848             outlen++;
 849         }
 850
 851         if (el)
 852             *el = outlen;
 853
 854         if (ep) {
 855             *ep = x;
 856         }
 857
 858         return (x == send);
 859     }
 860 }
 861
 862 /*
 863
 864 =for apidoc is_strict_utf8_string_loc
 865
 866 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 867 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 868 "utf8ness success") in the C<ep> pointer.
 869
 870 See also C<L</is_strict_utf8_string_loclen>>.
 871
 872 =cut
 873 */
 874
 875 #define is_strict_utf8_string_loc(s, len, ep)                               \
 876                                 is_strict_utf8_string_loclen(s, len, ep, 0)
 877
 878 /*
 879
 880 =for apidoc is_strict_utf8_string_loclen
 881
 882 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 883 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 884 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 885 encoded characters in the C<el> pointer.
 886
 887 See also C<L</is_strict_utf8_string_loc>>.
 888
 889 =cut
 890 */
 891
 892 PERL_STATIC_INLINE bool
 893 S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
 894 {
 895     const U8 * first_variant;
 896
 897     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
 898
 899     if (len == 0) {
 900         len = strlen((const char *) s);
 901     }
 902
 903     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 904         if (el)
 905             *el = len;
 906
 907         if (ep) {
 908             *ep = s + len;
 909         }
 910
 911         return TRUE;
 912     }
 913
 914     {
 915         const U8* const send = s + len;
 916         const U8* x = first_variant;
 917         STRLEN outlen = first_variant - s;
 918
 919         while (x < send) {
 920             const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
 921             if (UNLIKELY(! cur_len)) {
 922                 break;
 923             }
 924             x += cur_len;
 925             outlen++;
 926         }
 927
 928         if (el)
 929             *el = outlen;
 930
 931         if (ep) {
 932             *ep = x;
 933         }
 934
 935         return (x == send);
 936     }
 937 }
 938
 939 /*
 940
 941 =for apidoc is_c9strict_utf8_string_loc
 942
 943 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 944 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 945 "utf8ness success") in the C<ep> pointer.
 946
 947 See also C<L</is_c9strict_utf8_string_loclen>>.
 948
 949 =cut
 950 */
 951
 952 #define is_c9strict_utf8_string_loc(s, len, ep)                             \
 953                             is_c9strict_utf8_string_loclen(s, len, ep, 0)
 954
 955 /*
 956
 957 =for apidoc is_c9strict_utf8_string_loclen
 958
 959 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 960 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 961 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
 962 characters in the C<el> pointer.
 963
 964 See also C<L</is_c9strict_utf8_string_loc>>.
 965
 966 =cut
 967 */
 968
 969 PERL_STATIC_INLINE bool
 970 S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
 971 {
 972     const U8 * first_variant;
 973
 974     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
 975
 976     if (len == 0) {
 977         len = strlen((const char *) s);
 978     }
 979
 980     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 981         if (el)
 982             *el = len;
 983
 984         if (ep) {
 985             *ep = s + len;
 986         }
 987
 988         return TRUE;
 989     }
 990
 991     {
 992         const U8* const send = s + len;
 993         const U8* x = first_variant;
 994         STRLEN outlen = first_variant - s;
 995
 996         while (x < send) {
 997             const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
 998             if (UNLIKELY(! cur_len)) {
 999                 break;
1000             }
1001             x += cur_len;
1002             outlen++;
1003         }
1004
1005         if (el)
1006             *el = outlen;
1007
1008         if (ep) {
1009             *ep = x;
1010         }
1011
1012         return (x == send);
1013     }
1014 }
1015
1016 /*
1017
1018 =for apidoc is_utf8_string_loc_flags
1019
1020 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1021 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1022 "utf8ness success") in the C<ep> pointer.
1023
1024 See also C<L</is_utf8_string_loclen_flags>>.
1025
1026 =cut
1027 */
1028
1029 #define is_utf8_string_loc_flags(s, len, ep, flags)                         \
1030                         is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1031
1032
1033 /* The above 3 actual functions could have been moved into the more general one
1034  * just below, and made #defines that call it with the right 'flags'.  They are
1035  * currently kept separate to increase their chances of getting inlined */
1036
1037 /*
1038
1039 =for apidoc is_utf8_string_loclen_flags
1040
1041 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1042 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1043 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
1044 encoded characters in the C<el> pointer.
1045
1046 See also C<L</is_utf8_string_loc_flags>>.
1047
1048 =cut
1049 */
1050
1051 PERL_STATIC_INLINE bool
1052 S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
1053 {
1054     const U8 * first_variant;
1055
1056     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1057     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1058                           |UTF8_DISALLOW_PERL_EXTENDED)));
1059
1060     if (len == 0) {
1061         len = strlen((const char *) s);
1062     }
1063
1064     if (flags == 0) {
1065         return is_utf8_string_loclen(s, len, ep, el);
1066     }
1067
1068     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1069                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1070     {
1071         return is_strict_utf8_string_loclen(s, len, ep, el);
1072     }
1073
1074     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1075                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1076     {
1077         return is_c9strict_utf8_string_loclen(s, len, ep, el);
1078     }
1079
1080     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1081         if (el)
1082             *el = len;
1083
1084         if (ep) {
1085             *ep = s + len;
1086         }
1087
1088         return TRUE;
1089     }
1090
1091     {
1092         const U8* send = s + len;
1093         const U8* x = first_variant;
1094         STRLEN outlen = first_variant - s;
1095
1096         while (x < send) {
1097             const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1098             if (UNLIKELY(! cur_len)) {
1099                 break;
1100             }
1101             x += cur_len;
1102             outlen++;
1103         }
1104
1105         if (el)
1106             *el = outlen;
1107
1108         if (ep) {
1109             *ep = x;
1110         }
1111
1112         return (x == send);
1113     }
1114 }
1115
1116 /*
1117 =for apidoc utf8_distance
1118
1119 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1120 and C<b>.
1121
1122 WARNING: use only if you *know* that the pointers point inside the
1123 same UTF-8 buffer.
1124
1125 =cut
1126 */
1127
1128 PERL_STATIC_INLINE IV
1129 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1130 {
1131     PERL_ARGS_ASSERT_UTF8_DISTANCE;
1132
1133     return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1134 }
1135
1136 /*
1137 =for apidoc utf8_hop
1138
1139 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1140 forward or backward.
1141
1142 WARNING: do not use the following unless you *know* C<off> is within
1143 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1144 on the first byte of character or just after the last byte of a character.
1145
1146 =cut
1147 */
1148
1149 PERL_STATIC_INLINE U8 *
1150 Perl_utf8_hop(const U8 *s, SSize_t off)
1151 {
1152     PERL_ARGS_ASSERT_UTF8_HOP;
1153
1154     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1155      * the bitops (especially ~) can create illegal UTF-8.
1156      * In other words: in Perl UTF-8 is not just for Unicode. */
1157
1158     if (off >= 0) {
1159         while (off--)
1160             s += UTF8SKIP(s);
1161     }
1162     else {
1163         while (off++) {
1164             s--;
1165             while (UTF8_IS_CONTINUATION(*s))
1166                 s--;
1167         }
1168     }
1169     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1170     return (U8 *)s;
1171     GCC_DIAG_RESTORE_STMT;
1172 }
1173
1174 /*
1175 =for apidoc utf8_hop_forward
1176
1177 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1178 forward.
1179
1180 C<off> must be non-negative.
1181
1182 C<s> must be before or equal to C<end>.
1183
1184 When moving forward it will not move beyond C<end>.
1185
1186 Will not exceed this limit even if the string is not valid "UTF-8".
1187
1188 =cut
1189 */
1190
1191 PERL_STATIC_INLINE U8 *
1192 Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1193 {
1194     PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1195
1196     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1197      * the bitops (especially ~) can create illegal UTF-8.
1198      * In other words: in Perl UTF-8 is not just for Unicode. */
1199
1200     assert(s <= end);
1201     assert(off >= 0);
1202
1203     while (off--) {
1204         STRLEN skip = UTF8SKIP(s);
1205         if ((STRLEN)(end - s) <= skip) {
1206             GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1207             return (U8 *)end;
1208             GCC_DIAG_RESTORE_STMT;
1209         }
1210         s += skip;
1211     }
1212
1213     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1214     return (U8 *)s;
1215     GCC_DIAG_RESTORE_STMT;
1216 }
1217
1218 /*
1219 =for apidoc utf8_hop_back
1220
1221 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1222 backward.
1223
1224 C<off> must be non-positive.
1225
1226 C<s> must be after or equal to C<start>.
1227
1228 When moving backward it will not move before C<start>.
1229
1230 Will not exceed this limit even if the string is not valid "UTF-8".
1231
1232 =cut
1233 */
1234
1235 PERL_STATIC_INLINE U8 *
1236 Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1237 {
1238     PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1239
1240     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1241      * the bitops (especially ~) can create illegal UTF-8.
1242      * In other words: in Perl UTF-8 is not just for Unicode. */
1243
1244     assert(start <= s);
1245     assert(off <= 0);
1246
1247     while (off++ && s > start) {
1248         s--;
1249         while (UTF8_IS_CONTINUATION(*s) && s > start)
1250             s--;
1251     }
1252
1253     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1254     return (U8 *)s;
1255     GCC_DIAG_RESTORE_STMT;
1256 }
1257
1258 /*
1259 =for apidoc utf8_hop_safe
1260
1261 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1262 either forward or backward.
1263
1264 When moving backward it will not move before C<start>.
1265
1266 When moving forward it will not move beyond C<end>.
1267
1268 Will not exceed those limits even if the string is not valid "UTF-8".
1269
1270 =cut
1271 */
1272
1273 PERL_STATIC_INLINE U8 *
1274 Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1275 {
1276     PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1277
1278     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1279      * the bitops (especially ~) can create illegal UTF-8.
1280      * In other words: in Perl UTF-8 is not just for Unicode. */
1281
1282     assert(start <= s && s <= end);
1283
1284     if (off >= 0) {
1285         return utf8_hop_forward(s, off, end);
1286     }
1287     else {
1288         return utf8_hop_back(s, off, start);
1289     }
1290 }
1291
1292 /*
1293
1294 =for apidoc is_utf8_valid_partial_char
1295
1296 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1297 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1298 points.  Otherwise, it returns 1 if there exists at least one non-empty
1299 sequence of bytes that when appended to sequence C<s>, starting at position
1300 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1301 otherwise returns 0.
1302
1303 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1304 point.
1305
1306 This is useful when a fixed-length buffer is being tested for being well-formed
1307 UTF-8, but the final few bytes in it don't comprise a full character; that is,
1308 it is split somewhere in the middle of the final code point's UTF-8
1309 representation.  (Presumably when the buffer is refreshed with the next chunk
1310 of data, the new first bytes will complete the partial code point.)   This
1311 function is used to verify that the final bytes in the current buffer are in
1312 fact the legal beginning of some code point, so that if they aren't, the
1313 failure can be signalled without having to wait for the next read.
1314
1315 =cut
1316 */
1317 #define is_utf8_valid_partial_char(s, e)                                    \
1318                                 is_utf8_valid_partial_char_flags(s, e, 0)
1319
1320 /*
1321
1322 =for apidoc is_utf8_valid_partial_char_flags
1323
1324 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1325 or not the input is a valid UTF-8 encoded partial character, but it takes an
1326 extra parameter, C<flags>, which can further restrict which code points are
1327 considered valid.
1328
1329 If C<flags> is 0, this behaves identically to
1330 C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
1331 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
1332 there is any sequence of bytes that can complete the input partial character in
1333 such a way that a non-prohibited character is formed, the function returns
1334 TRUE; otherwise FALSE.  Non character code points cannot be determined based on
1335 partial character input.  But many  of the other possible excluded types can be
1336 determined from just the first one or two bytes.
1337
1338 =cut
1339  */
1340
1341 PERL_STATIC_INLINE bool
1342 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1343 {
1344     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1345
1346     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1347                           |UTF8_DISALLOW_PERL_EXTENDED)));
1348
1349     if (s >= e || s + UTF8SKIP(s) <= e) {
1350         return FALSE;
1351     }
1352
1353     return cBOOL(_is_utf8_char_helper(s, e, flags));
1354 }
1355
1356 /*
1357
1358 =for apidoc is_utf8_fixed_width_buf_flags
1359
1360 Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1361 is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1362 otherwise it returns FALSE.
1363
1364 If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1365 without restriction.  If the final few bytes of the buffer do not form a
1366 complete code point, this will return TRUE anyway, provided that
1367 C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1368
1369 If C<flags> in non-zero, it can be any combination of the
1370 C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1371 same meanings.
1372
1373 This function differs from C<L</is_utf8_string_flags>> only in that the latter
1374 returns FALSE if the final few bytes of the string don't form a complete code
1375 point.
1376
1377 =cut
1378  */
1379 #define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
1380                 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1381
1382 /*
1383
1384 =for apidoc is_utf8_fixed_width_buf_loc_flags
1385
1386 Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1387 failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
1388 to the beginning of any partial character at the end of the buffer; if there is
1389 no partial character C<*ep> will contain C<s>+C<len>.
1390
1391 See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1392
1393 =cut
1394 */
1395
1396 #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
1397                 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1398
1399 /*
1400
1401 =for apidoc is_utf8_fixed_width_buf_loclen_flags
1402
1403 Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1404 complete, valid characters found in the C<el> pointer.
1405
1406 =cut
1407 */
1408
1409 PERL_STATIC_INLINE bool
1410 S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1411                                        STRLEN len,
1412                                        const U8 **ep,
1413                                        STRLEN *el,
1414                                        const U32 flags)
1415 {
1416     const U8 * maybe_partial;
1417
1418     PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1419
1420     if (! ep) {
1421         ep  = &maybe_partial;
1422     }
1423
1424     /* If it's entirely valid, return that; otherwise see if the only error is
1425      * that the final few bytes are for a partial character */
1426     return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
1427            || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1428 }
1429
1430 /* ------------------------------- perl.h ----------------------------- */
1431
1432 /*
1433 =head1 Miscellaneous Functions
1434
1435 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1436
1437 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1438 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1439
1440 Return TRUE if the name is safe.
1441
1442 Used by the C<IS_SAFE_SYSCALL()> macro.
1443
1444 =cut
1445 */
1446
1447 PERL_STATIC_INLINE bool
1448 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1449     /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1450      * perl itself uses xce*() functions which accept 8-bit strings.
1451      */
1452
1453     PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1454
1455     if (len > 1) {
1456         char *null_at;
1457         if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1458                 SETERRNO(ENOENT, LIB_INVARG);
1459                 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1460                                    "Invalid \\0 character in %s for %s: %s\\0%s",
1461                                    what, op_name, pv, null_at+1);
1462                 return FALSE;
1463         }
1464     }
1465
1466     return TRUE;
1467 }
1468
1469 /*
1470
1471 Return true if the supplied filename has a newline character
1472 immediately before the first (hopefully only) NUL.
1473
1474 My original look at this incorrectly used the len from SvPV(), but
1475 that's incorrect, since we allow for a NUL in pv[len-1].
1476
1477 So instead, strlen() and work from there.
1478
1479 This allow for the user reading a filename, forgetting to chomp it,
1480 then calling:
1481
1482   open my $foo, "$file\0";
1483
1484 */
1485
1486 #ifdef PERL_CORE
1487
1488 PERL_STATIC_INLINE bool
1489 S_should_warn_nl(const char *pv) {
1490     STRLEN len;
1491
1492     PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1493
1494     len = strlen(pv);
1495
1496     return len > 0 && pv[len-1] == '\n';
1497 }
1498
1499 #endif
1500
1501 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1502
1503 #define MAX_CHARSET_NAME_LENGTH 2
1504
1505 PERL_STATIC_INLINE const char *
1506 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1507 {
1508     /* Returns a string that corresponds to the name of the regex character set
1509      * given by 'flags', and *lenp is set the length of that string, which
1510      * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1511
1512     *lenp = 1;
1513     switch (get_regex_charset(flags)) {
1514         case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1515         case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
1516         case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1517         case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1518         case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1519             *lenp = 2;
1520             return ASCII_MORE_RESTRICT_PAT_MODS;
1521     }
1522     /* The NOT_REACHED; hides an assert() which has a rather complex
1523      * definition in perl.h. */
1524     NOT_REACHED; /* NOTREACHED */
1525     return "?";     /* Unknown */
1526 }
1527
1528 /*
1529
1530 Return false if any get magic is on the SV other than taint magic.
1531
1532 */
1533
1534 PERL_STATIC_INLINE bool
1535 S_sv_only_taint_gmagic(SV *sv) {
1536     MAGIC *mg = SvMAGIC(sv);
1537
1538     PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1539
1540     while (mg) {
1541         if (mg->mg_type != PERL_MAGIC_taint
1542             && !(mg->mg_flags & MGf_GSKIP)
1543             && mg->mg_virtual->svt_get) {
1544             return FALSE;
1545         }
1546         mg = mg->mg_moremagic;
1547     }
1548
1549     return TRUE;
1550 }
1551
1552 /* ------------------ cop.h ------------------------------------------- */
1553
1554
1555 /* Enter a block. Push a new base context and return its address. */
1556
1557 PERL_STATIC_INLINE PERL_CONTEXT *
1558 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1559 {
1560     PERL_CONTEXT * cx;
1561
1562     PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1563
1564     CXINC;
1565     cx = CX_CUR();
1566     cx->cx_type        = type;
1567     cx->blk_gimme      = gimme;
1568     cx->blk_oldsaveix  = saveix;
1569     cx->blk_oldsp      = (I32)(sp - PL_stack_base);
1570     cx->blk_oldcop     = PL_curcop;
1571     cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
1572     cx->blk_oldscopesp = PL_scopestack_ix;
1573     cx->blk_oldpm      = PL_curpm;
1574     cx->blk_old_tmpsfloor = PL_tmps_floor;
1575
1576     PL_tmps_floor        = PL_tmps_ix;
1577     CX_DEBUG(cx, "PUSH");
1578     return cx;
1579 }
1580
1581
1582 /* Exit a block (RETURN and LAST). */
1583
1584 PERL_STATIC_INLINE void
1585 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1586 {
1587     PERL_ARGS_ASSERT_CX_POPBLOCK;
1588
1589     CX_DEBUG(cx, "POP");
1590     /* these 3 are common to cx_popblock and cx_topblock */
1591     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1592     PL_scopestack_ix = cx->blk_oldscopesp;
1593     PL_curpm         = cx->blk_oldpm;
1594
1595     /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1596      * and leaves a CX entry lying around for repeated use, so
1597      * skip for multicall */                  \
1598     assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1599             || PL_savestack_ix == cx->blk_oldsaveix);
1600     PL_curcop     = cx->blk_oldcop;
1601     PL_tmps_floor = cx->blk_old_tmpsfloor;
1602 }
1603
1604 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1605  * Whereas cx_popblock() restores the state to the point just before
1606  * cx_pushblock() was called,  cx_topblock() restores it to the point just
1607  * *after* cx_pushblock() was called. */
1608
1609 PERL_STATIC_INLINE void
1610 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1611 {
1612     PERL_ARGS_ASSERT_CX_TOPBLOCK;
1613
1614     CX_DEBUG(cx, "TOP");
1615     /* these 3 are common to cx_popblock and cx_topblock */
1616     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1617     PL_scopestack_ix = cx->blk_oldscopesp;
1618     PL_curpm         = cx->blk_oldpm;
1619
1620     PL_stack_sp      = PL_stack_base + cx->blk_oldsp;
1621 }
1622
1623
1624 PERL_STATIC_INLINE void
1625 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1626 {
1627     U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1628
1629     PERL_ARGS_ASSERT_CX_PUSHSUB;
1630
1631     PERL_DTRACE_PROBE_ENTRY(cv);
1632     cx->blk_sub.cv = cv;
1633     cx->blk_sub.olddepth = CvDEPTH(cv);
1634     cx->blk_sub.prevcomppad = PL_comppad;
1635     cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1636     cx->blk_sub.retop = retop;
1637     SvREFCNT_inc_simple_void_NN(cv);
1638     cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1639 }
1640
1641
1642 /* subsets of cx_popsub() */
1643
1644 PERL_STATIC_INLINE void
1645 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1646 {
1647     CV *cv;
1648
1649     PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1650     assert(CxTYPE(cx) == CXt_SUB);
1651
1652     PL_comppad = cx->blk_sub.prevcomppad;
1653     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1654     cv = cx->blk_sub.cv;
1655     CvDEPTH(cv) = cx->blk_sub.olddepth;
1656     cx->blk_sub.cv = NULL;
1657     SvREFCNT_dec(cv);
1658 }
1659
1660
1661 /* handle the @_ part of leaving a sub */
1662
1663 PERL_STATIC_INLINE void
1664 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1665 {
1666     AV *av;
1667
1668     PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1669     assert(CxTYPE(cx) == CXt_SUB);
1670     assert(AvARRAY(MUTABLE_AV(
1671         PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1672                 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1673
1674     CX_POP_SAVEARRAY(cx);
1675     av = MUTABLE_AV(PAD_SVl(0));
1676     if (UNLIKELY(AvREAL(av)))
1677         /* abandon @_ if it got reified */
1678         clear_defarray(av, 0);
1679     else {
1680         CLEAR_ARGARRAY(av);
1681     }
1682 }
1683
1684
1685 PERL_STATIC_INLINE void
1686 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1687 {
1688     PERL_ARGS_ASSERT_CX_POPSUB;
1689     assert(CxTYPE(cx) == CXt_SUB);
1690
1691     PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1692
1693     if (CxHASARGS(cx))
1694         cx_popsub_args(cx);
1695     cx_popsub_common(cx);
1696 }
1697
1698
1699 PERL_STATIC_INLINE void
1700 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1701 {
1702     PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1703
1704     cx->blk_format.cv          = cv;
1705     cx->blk_format.retop       = retop;
1706     cx->blk_format.gv          = gv;
1707     cx->blk_format.dfoutgv     = PL_defoutgv;
1708     cx->blk_format.prevcomppad = PL_comppad;
1709     cx->blk_u16                = 0;
1710
1711     SvREFCNT_inc_simple_void_NN(cv);
1712     CvDEPTH(cv)++;
1713     SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1714 }
1715
1716
1717 PERL_STATIC_INLINE void
1718 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1719 {
1720     CV *cv;
1721     GV *dfout;
1722
1723     PERL_ARGS_ASSERT_CX_POPFORMAT;
1724     assert(CxTYPE(cx) == CXt_FORMAT);
1725
1726     dfout = cx->blk_format.dfoutgv;
1727     setdefout(dfout);
1728     cx->blk_format.dfoutgv = NULL;
1729     SvREFCNT_dec_NN(dfout);
1730
1731     PL_comppad = cx->blk_format.prevcomppad;
1732     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1733     cv = cx->blk_format.cv;
1734     cx->blk_format.cv = NULL;
1735     --CvDEPTH(cv);
1736     SvREFCNT_dec_NN(cv);
1737 }
1738
1739
1740 PERL_STATIC_INLINE void
1741 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1742 {
1743     PERL_ARGS_ASSERT_CX_PUSHEVAL;
1744
1745     cx->blk_eval.retop         = retop;
1746     cx->blk_eval.old_namesv    = namesv;
1747     cx->blk_eval.old_eval_root = PL_eval_root;
1748     cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
1749     cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
1750     cx->blk_eval.cur_top_env   = PL_top_env;
1751
1752     assert(!(PL_in_eval     & ~ 0x3F));
1753     assert(!(PL_op->op_type & ~0x1FF));
1754     cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1755 }
1756
1757
1758 PERL_STATIC_INLINE void
1759 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1760 {
1761     SV *sv;
1762
1763     PERL_ARGS_ASSERT_CX_POPEVAL;
1764     assert(CxTYPE(cx) == CXt_EVAL);
1765
1766     PL_in_eval = CxOLD_IN_EVAL(cx);
1767     assert(!(PL_in_eval & 0xc0));
1768     PL_eval_root = cx->blk_eval.old_eval_root;
1769     sv = cx->blk_eval.cur_text;
1770     if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1771         cx->blk_eval.cur_text = NULL;
1772         SvREFCNT_dec_NN(sv);
1773     }
1774
1775     sv = cx->blk_eval.old_namesv;
1776     if (sv) {
1777         cx->blk_eval.old_namesv = NULL;
1778         SvREFCNT_dec_NN(sv);
1779     }
1780 }
1781
1782
1783 /* push a plain loop, i.e.
1784  *     { block }
1785  *     while (cond) { block }
1786  *     for (init;cond;continue) { block }
1787  * This loop can be last/redo'ed etc.
1788  */
1789
1790 PERL_STATIC_INLINE void
1791 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1792 {
1793     PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1794     cx->blk_loop.my_op = cLOOP;
1795 }
1796
1797
1798 /* push a true for loop, i.e.
1799  *     for var (list) { block }
1800  */
1801
1802 PERL_STATIC_INLINE void
1803 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1804 {
1805     PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1806
1807     /* this one line is common with cx_pushloop_plain */
1808     cx->blk_loop.my_op = cLOOP;
1809
1810     cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1811     cx->blk_loop.itersave      = itersave;
1812 #ifdef USE_ITHREADS
1813     cx->blk_loop.oldcomppad = PL_comppad;
1814 #endif
1815 }
1816
1817
1818 /* pop all loop types, including plain */
1819
1820 PERL_STATIC_INLINE void
1821 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1822 {
1823     PERL_ARGS_ASSERT_CX_POPLOOP;
1824
1825     assert(CxTYPE_is_LOOP(cx));
1826     if (  CxTYPE(cx) == CXt_LOOP_ARY
1827        || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1828     {
1829         /* Free ary or cur. This assumes that state_u.ary.ary
1830          * aligns with state_u.lazysv.cur. See cx_dup() */
1831         SV *sv = cx->blk_loop.state_u.lazysv.cur;
1832         cx->blk_loop.state_u.lazysv.cur = NULL;
1833         SvREFCNT_dec_NN(sv);
1834         if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1835             sv = cx->blk_loop.state_u.lazysv.end;
1836             cx->blk_loop.state_u.lazysv.end = NULL;
1837             SvREFCNT_dec_NN(sv);
1838         }
1839     }
1840     if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1841         SV *cursv;
1842         SV **svp = (cx)->blk_loop.itervar_u.svp;
1843         if ((cx->cx_type & CXp_FOR_GV))
1844             svp = &GvSV((GV*)svp);
1845         cursv = *svp;
1846         *svp = cx->blk_loop.itersave;
1847         cx->blk_loop.itersave = NULL;
1848         SvREFCNT_dec(cursv);
1849     }
1850 }
1851
1852
1853 PERL_STATIC_INLINE void
1854 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1855 {
1856     PERL_ARGS_ASSERT_CX_PUSHWHEN;
1857
1858     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1859 }
1860
1861
1862 PERL_STATIC_INLINE void
1863 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1864 {
1865     PERL_ARGS_ASSERT_CX_POPWHEN;
1866     assert(CxTYPE(cx) == CXt_WHEN);
1867
1868     PERL_UNUSED_ARG(cx);
1869     PERL_UNUSED_CONTEXT;
1870     /* currently NOOP */
1871 }
1872
1873
1874 PERL_STATIC_INLINE void
1875 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1876 {
1877     PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1878
1879     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1880     cx->blk_givwhen.defsv_save = orig_defsv;
1881 }
1882
1883
1884 PERL_STATIC_INLINE void
1885 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1886 {
1887     SV *sv;
1888
1889     PERL_ARGS_ASSERT_CX_POPGIVEN;
1890     assert(CxTYPE(cx) == CXt_GIVEN);
1891
1892     sv = GvSV(PL_defgv);
1893     GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1894     cx->blk_givwhen.defsv_save = NULL;
1895     SvREFCNT_dec(sv);
1896 }
1897
1898 /* ------------------ util.h ------------------------------------------- */
1899
1900 /*
1901 =head1 Miscellaneous Functions
1902
1903 =for apidoc foldEQ
1904
1905 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1906 same
1907 case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
1908 match themselves and their opposite case counterparts.  Non-cased and non-ASCII
1909 range bytes match only themselves.
1910
1911 =cut
1912 */
1913
1914 PERL_STATIC_INLINE I32
1915 Perl_foldEQ(const char *s1, const char *s2, I32 len)
1916 {
1917     const U8 *a = (const U8 *)s1;
1918     const U8 *b = (const U8 *)s2;
1919
1920     PERL_ARGS_ASSERT_FOLDEQ;
1921
1922     assert(len >= 0);
1923
1924     while (len--) {
1925         if (*a != *b && *a != PL_fold[*b])
1926             return 0;
1927         a++,b++;
1928     }
1929     return 1;
1930 }
1931
1932 PERL_STATIC_INLINE I32
1933 Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1934 {
1935     /* Compare non-utf8 using Unicode (Latin1) semantics.  Does not work on
1936      * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1937      * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these.  Nor
1938      * does it check that the strings each have at least 'len' characters */
1939
1940     const U8 *a = (const U8 *)s1;
1941     const U8 *b = (const U8 *)s2;
1942
1943     PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1944
1945     assert(len >= 0);
1946
1947     while (len--) {
1948         if (*a != *b && *a != PL_fold_latin1[*b]) {
1949             return 0;
1950         }
1951         a++, b++;
1952     }
1953     return 1;
1954 }
1955
1956 /*
1957 =for apidoc foldEQ_locale
1958
1959 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1960 same case-insensitively in the current locale; false otherwise.
1961
1962 =cut
1963 */
1964
1965 PERL_STATIC_INLINE I32
1966 Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1967 {
1968     dVAR;
1969     const U8 *a = (const U8 *)s1;
1970     const U8 *b = (const U8 *)s2;
1971
1972     PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1973
1974     assert(len >= 0);
1975
1976     while (len--) {
1977         if (*a != *b && *a != PL_fold_locale[*b])
1978             return 0;
1979         a++,b++;
1980     }
1981     return 1;
1982 }
1983
1984 #if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1985
1986 PERL_STATIC_INLINE void *
1987 S_my_memrchr(const char * s, const char c, const STRLEN len)
1988 {
1989     /* memrchr(), since many platforms lack it */
1990
1991     const char * t = s + len - 1;
1992
1993     PERL_ARGS_ASSERT_MY_MEMRCHR;
1994
1995     while (t >= s) {
1996         if (*t == c) {
1997             return (void *) t;
1998         }
1999         t--;
2000     }
2001
2002     return NULL;
2003 }
2004
2005 #endif
2006
2007 /*
2008  * ex: set ts=8 sts=4 sw=4 et:
2009  */