inline.h

   1 /*    inline.h
   2  *
   3  *    Copyright (C) 2012 by Larry Wall and others
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  * This file is a home for static inline functions that cannot go in other
   9  * headers files, because they depend on proto.h (included after most other
  10  * headers) or struct definitions.
  11  *
  12  * Each section names the header file that the functions "belong" to.
  13  */
  14
  15 /* ------------------------------- av.h ------------------------------- */
  16
  17 PERL_STATIC_INLINE SSize_t
  18 S_av_top_index(pTHX_ AV *av)
  19 {
  20     PERL_ARGS_ASSERT_AV_TOP_INDEX;
  21     assert(SvTYPE(av) == SVt_PVAV);
  22
  23     return AvFILL(av);
  24 }
  25
  26 /* ------------------------------- cv.h ------------------------------- */
  27
  28 PERL_STATIC_INLINE GV *
  29 S_CvGV(pTHX_ CV *sv)
  30 {
  31     return CvNAMED(sv)
  32         ? Perl_cvgv_from_hek(aTHX_ sv)
  33         : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
  34 }
  35
  36 PERL_STATIC_INLINE I32 *
  37 S_CvDEPTHp(const CV * const sv)
  38 {
  39     assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
  40     return &((XPVCV*)SvANY(sv))->xcv_depth;
  41 }
  42
  43 /*
  44  CvPROTO returns the prototype as stored, which is not necessarily what
  45  the interpreter should be using. Specifically, the interpreter assumes
  46  that spaces have been stripped, which has been the case if the prototype
  47  was added by toke.c, but is generally not the case if it was added elsewhere.
  48  Since we can't enforce the spacelessness at assignment time, this routine
  49  provides a temporary copy at parse time with spaces removed.
  50  I<orig> is the start of the original buffer, I<len> is the length of the
  51  prototype and will be updated when this returns.
  52  */
  53
  54 #ifdef PERL_CORE
  55 PERL_STATIC_INLINE char *
  56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
  57 {
  58     SV * tmpsv;
  59     char * tmps;
  60     tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
  61     tmps = SvPVX(tmpsv);
  62     while ((*len)--) {
  63         if (!isSPACE(*orig))
  64             *tmps++ = *orig;
  65         orig++;
  66     }
  67     *tmps = '\0';
  68     *len = tmps - SvPVX(tmpsv);
  69                 return SvPVX(tmpsv);
  70 }
  71 #endif
  72
  73 /* ------------------------------- mg.h ------------------------------- */
  74
  75 #if defined(PERL_CORE) || defined(PERL_EXT)
  76 /* assumes get-magic and stringification have already occurred */
  77 PERL_STATIC_INLINE STRLEN
  78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
  79 {
  80     assert(mg->mg_type == PERL_MAGIC_regex_global);
  81     assert(mg->mg_len != -1);
  82     if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
  83         return (STRLEN)mg->mg_len;
  84     else {
  85         const STRLEN pos = (STRLEN)mg->mg_len;
  86         /* Without this check, we may read past the end of the buffer: */
  87         if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
  88         return sv_or_pv_pos_u2b(sv, s, pos, NULL);
  89     }
  90 }
  91 #endif
  92
  93 /* ------------------------------- pad.h ------------------------------ */
  94
  95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
  96 PERL_STATIC_INLINE bool
  97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
  98 {
  99     /* is seq within the range _LOW to _HIGH ?
 100      * This is complicated by the fact that PL_cop_seqmax
 101      * may have wrapped around at some point */
 102     if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
 103         return FALSE; /* not yet introduced */
 104
 105     if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
 106     /* in compiling scope */
 107         if (
 108             (seq >  COP_SEQ_RANGE_LOW(pn))
 109             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
 110             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
 111         )
 112             return TRUE;
 113     }
 114     else if (
 115         (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
 116         ?
 117             (  seq >  COP_SEQ_RANGE_LOW(pn)
 118             || seq <= COP_SEQ_RANGE_HIGH(pn))
 119
 120         :    (  seq >  COP_SEQ_RANGE_LOW(pn)
 121              && seq <= COP_SEQ_RANGE_HIGH(pn))
 122     )
 123         return TRUE;
 124     return FALSE;
 125 }
 126 #endif
 127
 128 /* ------------------------------- pp.h ------------------------------- */
 129
 130 PERL_STATIC_INLINE I32
 131 S_TOPMARK(pTHX)
 132 {
 133     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 134                                  "MARK top  %p %" IVdf "\n",
 135                                   PL_markstack_ptr,
 136                                   (IV)*PL_markstack_ptr)));
 137     return *PL_markstack_ptr;
 138 }
 139
 140 PERL_STATIC_INLINE I32
 141 S_POPMARK(pTHX)
 142 {
 143     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 144                                  "MARK pop  %p %" IVdf "\n",
 145                                   (PL_markstack_ptr-1),
 146                                   (IV)*(PL_markstack_ptr-1))));
 147     assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
 148     return *PL_markstack_ptr--;
 149 }
 150
 151 /* ----------------------------- regexp.h ----------------------------- */
 152
 153 PERL_STATIC_INLINE struct regexp *
 154 S_ReANY(const REGEXP * const re)
 155 {
 156     XPV* const p = (XPV*)SvANY(re);
 157     assert(isREGEXP(re));
 158     return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
 159                                    : (struct regexp *)p;
 160 }
 161
 162 /* ------------------------------- sv.h ------------------------------- */
 163
 164 PERL_STATIC_INLINE SV *
 165 S_SvREFCNT_inc(SV *sv)
 166 {
 167     if (LIKELY(sv != NULL))
 168         SvREFCNT(sv)++;
 169     return sv;
 170 }
 171 PERL_STATIC_INLINE SV *
 172 S_SvREFCNT_inc_NN(SV *sv)
 173 {
 174     SvREFCNT(sv)++;
 175     return sv;
 176 }
 177 PERL_STATIC_INLINE void
 178 S_SvREFCNT_inc_void(SV *sv)
 179 {
 180     if (LIKELY(sv != NULL))
 181         SvREFCNT(sv)++;
 182 }
 183 PERL_STATIC_INLINE void
 184 S_SvREFCNT_dec(pTHX_ SV *sv)
 185 {
 186     if (LIKELY(sv != NULL)) {
 187         U32 rc = SvREFCNT(sv);
 188         if (LIKELY(rc > 1))
 189             SvREFCNT(sv) = rc - 1;
 190         else
 191             Perl_sv_free2(aTHX_ sv, rc);
 192     }
 193 }
 194
 195 PERL_STATIC_INLINE void
 196 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
 197 {
 198     U32 rc = SvREFCNT(sv);
 199     if (LIKELY(rc > 1))
 200         SvREFCNT(sv) = rc - 1;
 201     else
 202         Perl_sv_free2(aTHX_ sv, rc);
 203 }
 204
 205 PERL_STATIC_INLINE void
 206 SvAMAGIC_on(SV *sv)
 207 {
 208     assert(SvROK(sv));
 209     if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
 210 }
 211 PERL_STATIC_INLINE void
 212 SvAMAGIC_off(SV *sv)
 213 {
 214     if (SvROK(sv) && SvOBJECT(SvRV(sv)))
 215         HvAMAGIC_off(SvSTASH(SvRV(sv)));
 216 }
 217
 218 PERL_STATIC_INLINE U32
 219 S_SvPADSTALE_on(SV *sv)
 220 {
 221     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 222     return SvFLAGS(sv) |= SVs_PADSTALE;
 223 }
 224 PERL_STATIC_INLINE U32
 225 S_SvPADSTALE_off(SV *sv)
 226 {
 227     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 228     return SvFLAGS(sv) &= ~SVs_PADSTALE;
 229 }
 230 #if defined(PERL_CORE) || defined (PERL_EXT)
 231 PERL_STATIC_INLINE STRLEN
 232 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
 233 {
 234     PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
 235     if (SvGAMAGIC(sv)) {
 236         U8 *hopped = utf8_hop((U8 *)pv, pos);
 237         if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
 238         return (STRLEN)(hopped - (U8 *)pv);
 239     }
 240     return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
 241 }
 242 #endif
 243
 244 /* ------------------------------- handy.h ------------------------------- */
 245
 246 /* saves machine code for a common noreturn idiom typically used in Newx*() */
 247 GCC_DIAG_IGNORE_DECL(-Wunused-function);
 248 static void
 249 S_croak_memory_wrap(void)
 250 {
 251     Perl_croak_nocontext("%s",PL_memory_wrap);
 252 }
 253 GCC_DIAG_RESTORE_DECL;
 254
 255 /* ------------------------------- utf8.h ------------------------------- */
 256
 257 /*
 258 =head1 Unicode Support
 259 */
 260
 261 PERL_STATIC_INLINE void
 262 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
 263 {
 264     /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
 265      * encoded string at '*dest', updating '*dest' to include it */
 266
 267     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
 268
 269     if (NATIVE_BYTE_IS_INVARIANT(byte))
 270         *((*dest)++) = byte;
 271     else {
 272         *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
 273         *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
 274     }
 275 }
 276
 277 /*
 278 =for apidoc valid_utf8_to_uvchr
 279 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
 280 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
 281 it passes C<L</isUTF8_CHAR>>.  Surrogates, non-character code points, and
 282 non-Unicode code points are allowed.
 283
 284 =cut
 285
 286  */
 287
 288 PERL_STATIC_INLINE UV
 289 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
 290 {
 291     const UV expectlen = UTF8SKIP(s);
 292     const U8* send = s + expectlen;
 293     UV uv = *s;
 294
 295     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
 296
 297     if (retlen) {
 298         *retlen = expectlen;
 299     }
 300
 301     /* An invariant is trivially returned */
 302     if (expectlen == 1) {
 303         return uv;
 304     }
 305
 306     /* Remove the leading bits that indicate the number of bytes, leaving just
 307      * the bits that are part of the value */
 308     uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
 309
 310     /* Now, loop through the remaining bytes, accumulating each into the
 311      * working total as we go.  (I khw tried unrolling the loop for up to 4
 312      * bytes, but there was no performance improvement) */
 313     for (++s; s < send; s++) {
 314         uv = UTF8_ACCUMULATE(uv, *s);
 315     }
 316
 317     return UNI_TO_NATIVE(uv);
 318
 319 }
 320
 321 /*
 322 =for apidoc is_utf8_invariant_string
 323
 324 Returns TRUE if the first C<len> bytes of the string C<s> are the same
 325 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
 326 EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
 327 are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
 328 the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
 329 characters are invariant, but so also are the C1 controls.
 330
 331 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
 332 use this option, that C<s> can't have embedded C<NUL> characters and has to
 333 have a terminating C<NUL> byte).
 334
 335 See also
 336 C<L</is_utf8_string>>,
 337 C<L</is_utf8_string_flags>>,
 338 C<L</is_utf8_string_loc>>,
 339 C<L</is_utf8_string_loc_flags>>,
 340 C<L</is_utf8_string_loclen>>,
 341 C<L</is_utf8_string_loclen_flags>>,
 342 C<L</is_utf8_fixed_width_buf_flags>>,
 343 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 344 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 345 C<L</is_strict_utf8_string>>,
 346 C<L</is_strict_utf8_string_loc>>,
 347 C<L</is_strict_utf8_string_loclen>>,
 348 C<L</is_c9strict_utf8_string>>,
 349 C<L</is_c9strict_utf8_string_loc>>,
 350 and
 351 C<L</is_c9strict_utf8_string_loclen>>.
 352
 353 =cut
 354
 355 */
 356
 357 #define is_utf8_invariant_string(s, len)                                    \
 358                                 is_utf8_invariant_string_loc(s, len, NULL)
 359
 360 /*
 361 =for apidoc is_utf8_invariant_string_loc
 362
 363 Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
 364 the first UTF-8 variant character in the C<ep> pointer; if all characters are
 365 UTF-8 invariant, this function does not change the contents of C<*ep>.
 366
 367 =cut
 368
 369 */
 370
 371 PERL_STATIC_INLINE bool
 372 S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 373 {
 374     const U8* send;
 375     const U8* x = s;
 376
 377     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 378
 379     if (len == 0) {
 380         len = strlen((const char *)s);
 381     }
 382
 383     send = s + len;
 384
 385 /* This looks like 0x010101... */
 386 #  define PERL_COUNT_MULTIPLIER   (~ (UINTMAX_C(0)) / 0xFF)
 387
 388 /* This looks like 0x808080... */
 389 #  define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
 390 #  define PERL_WORDSIZE            sizeof(PERL_COUNT_MULTIPLIER)
 391 #  define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
 392
 393 /* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
 394  * or'ing together the lowest bits of 'x'.  Hopefully the final term gets
 395  * optimized out completely on a 32-bit system, and its mask gets optimized out
 396  * on a 64-bit system */
 397 #  define PERL_IS_SUBWORD_ADDR(x) (1 & (       PTR2nat(x)                     \
 398                                       |   (  PTR2nat(x) >> 1)                 \
 399                                       | ( ( (PTR2nat(x)                       \
 400                                            & PERL_WORD_BOUNDARY_MASK) >> 2))))
 401
 402 #ifndef EBCDIC
 403
 404     /* Do the word-at-a-time iff there is at least one usable full word.  That
 405      * means that after advancing to a word boundary, there still is at least a
 406      * full word left.  The number of bytes needed to advance is 'wordsize -
 407      * offset' unless offset is 0. */
 408     if ((STRLEN) (send - x) >= PERL_WORDSIZE
 409
 410                             /* This term is wordsize if subword; 0 if not */
 411                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
 412
 413                             /* 'offset' */
 414                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
 415     {
 416
 417         /* Process per-byte until reach word boundary.  XXX This loop could be
 418          * eliminated if we knew that this platform had fast unaligned reads */
 419         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
 420             if (! UTF8_IS_INVARIANT(*x)) {
 421                 if (ep) {
 422                     *ep = x;
 423                 }
 424
 425                 return FALSE;
 426             }
 427             x++;
 428         }
 429
 430         /* Here, we know we have at least one full word to process.  Process
 431          * per-word as long as we have at least a full word left */
 432         do {
 433             if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK)  {
 434
 435                 /* Found a variant.  Just return if caller doesn't want its
 436                  * exact position */
 437                 if (! ep) {
 438                     return FALSE;
 439                 }
 440
 441 #  if   BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    \
 442      || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 443
 444                 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
 445                 assert(*ep >= s && *ep < send);
 446
 447                 return FALSE;
 448
 449 #  else   /* If weird byte order, drop into next loop to do byte-at-a-time
 450            checks. */
 451
 452                 break;
 453 #  endif
 454             }
 455
 456             x += PERL_WORDSIZE;
 457
 458         } while (x + PERL_WORDSIZE <= send);
 459     }
 460
 461 #endif      /* End of ! EBCDIC */
 462
 463     /* Process per-byte */
 464     while (x < send) {
 465         if (! UTF8_IS_INVARIANT(*x)) {
 466             if (ep) {
 467                 *ep = x;
 468             }
 469
 470             return FALSE;
 471         }
 472
 473         x++;
 474     }
 475
 476     return TRUE;
 477 }
 478
 479 #ifndef EBCDIC
 480
 481 PERL_STATIC_INLINE unsigned int
 482 S__variant_byte_number(PERL_UINTMAX_T word)
 483 {
 484
 485     /* This returns the position in a word (0..7) of the first variant byte in
 486      * it.  This is a helper function.  Note that there are no branches */
 487
 488     assert(word);
 489
 490     /* Get just the msb bits of each byte */
 491     word &= PERL_VARIANTS_WORD_MASK;
 492
 493 #  ifdef USING_MSVC6    /* VC6 has some issues with the normal code, and the
 494                            easiest thing is to hide that from the callers */
 495     {
 496         unsigned int i;
 497         const U8 * s = (U8 *) &word;
 498         dTHX;
 499
 500         for (i = 0; i < sizeof(word); i++ ) {
 501             if (s[i]) {
 502                 return i;
 503             }
 504         }
 505
 506         Perl_croak(aTHX_ "panic: %s: %d: unexpected zero word\n",
 507                                  __FILE__, __LINE__);
 508     }
 509
 510 #  elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
 511
 512     /* Bytes are stored like
 513      *  Byte8 ... Byte2 Byte1
 514      *  63..56...15...8 7...0
 515      *
 516      *  Isolate the lsb;
 517      * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
 518      *
 519      * The word will look this this, with a rightmost set bit in position 's':
 520      * ('x's are don't cares)
 521      *      s
 522      *  x..x100..0
 523      *  x..xx10..0      Right shift (rightmost 0 is shifted off)
 524      *  x..xx01..1      Subtract 1, turns all the trailing zeros into 1's and
 525      *                  the 1 just to their left into a 0; the remainder is
 526      *                  untouched
 527      *  0..0011..1      The xor with x..xx10..0 clears that remainder, sets
 528      *                  bottom to all 1
 529      *  0..0100..0      Add 1 to clear the word except for the bit in 's'
 530      *
 531      * Another method is to do 'word &= -word'; but it generates a compiler
 532      * message on some platforms about taking the negative of an unsigned */
 533
 534     word >>= 1;
 535     word = 1 + (word ^ (word - 1));
 536
 537 #  elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 538
 539     /* Bytes are stored like
 540      *  Byte1 Byte2  ... Byte8
 541      * 63..56 55..47 ... 7...0
 542      *
 543      * Isolate the msb; http://codeforces.com/blog/entry/10330
 544      *
 545      * Only the most significant set bit matters.  Or'ing word with its right
 546      * shift of 1 makes that bit and the next one to its right both 1.  Then
 547      * right shifting by 2 makes for 4 1-bits in a row. ...  We end with the
 548      * msb and all to the right being 1. */
 549     word |= word >>  1;
 550     word |= word >>  2;
 551     word |= word >>  4;
 552     word |= word >>  8;
 553     word |= word >> 16;
 554     word |= word >> 32;  /* This should get optimized out on 32-bit systems. */
 555
 556     /* Then subtracting the right shift by 1 clears all but the left-most of
 557      * the 1 bits, which is our desired result */
 558     word -= (word >> 1);
 559
 560 #  else
 561 #    error Unexpected byte order
 562 #  endif
 563
 564     /* Here 'word' has a single bit set: the  msb of the first byte in which it
 565      * is set.  Calculate that position in the word.  We can use this
 566      * specialized solution: https://stackoverflow.com/a/32339674/1626653,
 567      * assumes an 8-bit byte.  (On a 32-bit machine, the larger numbers should
 568      * just get shifted off at compile time) */
 569     word = (word >> 7) * ((UINTMAX_C( 7) << 56) | (UINTMAX_C(15) << 48)
 570                         | (UINTMAX_C(23) << 40) | (UINTMAX_C(31) << 32)
 571                         |           (39 <<  24) |           (47 <<  16)
 572                         |           (55 <<   8) |           (63 <<   0));
 573     word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
 574
 575     /* Here, word contains the position 7..63 of that bit.  Convert to 0..7 */
 576     word = ((word + 1) >> 3) - 1;
 577
 578 #  if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 579
 580     /* And invert the result */
 581     word = CHARBITS - word - 1;
 582
 583 #  endif
 584
 585     return (unsigned int) word;
 586 }
 587
 588 #endif
 589 #if defined(PERL_CORE) || defined(PERL_EXT)
 590
 591 /*
 592 =for apidoc variant_under_utf8_count
 593
 594 This function looks at the sequence of bytes between C<s> and C<e>, which are
 595 assumed to be encoded in ASCII/Latin1, and returns how many of them would
 596 change should the string be translated into UTF-8.  Due to the nature of UTF-8,
 597 each of these would occupy two bytes instead of the single one in the input
 598 string.  Thus, this function returns the precise number of bytes the string
 599 would expand by when translated to UTF-8.
 600
 601 Unlike most of the other functions that have C<utf8> in their name, the input
 602 to this function is NOT a UTF-8-encoded string.  The function name is slightly
 603 I<odd> to emphasize this.
 604
 605 This function is internal to Perl because khw thinks that any XS code that
 606 would want this is probably operating too close to the internals.  Presenting a
 607 valid use case could change that.
 608
 609 See also
 610 C<L<perlapi/is_utf8_invariant_string>>
 611 and
 612 C<L<perlapi/is_utf8_invariant_string_loc>>,
 613
 614 =cut
 615
 616 */
 617
 618 PERL_STATIC_INLINE Size_t
 619 S_variant_under_utf8_count(const U8* const s, const U8* const e)
 620 {
 621     const U8* x = s;
 622     Size_t count = 0;
 623
 624     PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
 625
 626 #  ifndef EBCDIC
 627
 628     /* Test if the string is long enough to use word-at-a-time.  (Logic is the
 629      * same as for is_utf8_invariant_string()) */
 630     if ((STRLEN) (e - x) >= PERL_WORDSIZE
 631                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
 632                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
 633     {
 634
 635         /* Process per-byte until reach word boundary.  XXX This loop could be
 636          * eliminated if we knew that this platform had fast unaligned reads */
 637         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
 638             count += ! UTF8_IS_INVARIANT(*x++);
 639         }
 640
 641         /* Process per-word as long as we have at least a full word left */
 642         do {    /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
 643                    explanation of how this works */
 644             count += (Size_t)
 645                     ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
 646                       * PERL_COUNT_MULTIPLIER)
 647                     >> ((PERL_WORDSIZE - 1) * CHARBITS);
 648             x += PERL_WORDSIZE;
 649         } while (x + PERL_WORDSIZE <= e);
 650     }
 651
 652 #  endif
 653
 654     /* Process per-byte */
 655     while (x < e) {
 656         if (! UTF8_IS_INVARIANT(*x)) {
 657             count++;
 658         }
 659
 660         x++;
 661     }
 662
 663     return count;
 664 }
 665
 666 #endif
 667
 668 #ifndef PERL_IN_REGEXEC_C   /* Keep  these around for that file */
 669 #  undef PERL_WORDSIZE
 670 #  undef PERL_COUNT_MULTIPLIER
 671 #  undef PERL_WORD_BOUNDARY_MASK
 672 #  undef PERL_VARIANTS_WORD_MASK
 673 #endif
 674
 675 /*
 676 =for apidoc is_utf8_string
 677
 678 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 679 Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
 680 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
 681 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 682 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 683
 684 This function considers Perl's extended UTF-8 to be valid.  That means that
 685 code points above Unicode, surrogates, and non-character code points are
 686 considered valid by this function.  Use C<L</is_strict_utf8_string>>,
 687 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
 688 code points are considered valid.
 689
 690 See also
 691 C<L</is_utf8_invariant_string>>,
 692 C<L</is_utf8_invariant_string_loc>>,
 693 C<L</is_utf8_string_loc>>,
 694 C<L</is_utf8_string_loclen>>,
 695 C<L</is_utf8_fixed_width_buf_flags>>,
 696 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 697 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 698
 699 =cut
 700 */
 701
 702 #define is_utf8_string(s, len)  is_utf8_string_loclen(s, len, NULL, NULL)
 703
 704 #if defined(PERL_CORE) || defined (PERL_EXT)
 705
 706 /*
 707 =for apidoc is_utf8_non_invariant_string
 708
 709 Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
 710 C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
 711 UTF-8; otherwise returns FALSE.
 712
 713 A TRUE return means that at least one code point represented by the sequence
 714 either is a wide character not representable as a single byte, or the
 715 representation differs depending on whether the sequence is encoded in UTF-8 or
 716 not.
 717
 718 See also
 719 C<L<perlapi/is_utf8_invariant_string>>,
 720 C<L<perlapi/is_utf8_string>>
 721
 722 =cut
 723
 724 This is commonly used to determine if a SV's UTF-8 flag should be turned on.
 725 It generally needn't be if its string is entirely UTF-8 invariant, and it
 726 shouldn't be if it otherwise contains invalid UTF-8.
 727
 728 It is an internal function because khw thinks that XS code shouldn't be working
 729 at this low a level.  A valid use case could change that.
 730
 731 */
 732
 733 PERL_STATIC_INLINE bool
 734 S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
 735 {
 736     const U8 * first_variant;
 737
 738     PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
 739
 740     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 741         return FALSE;
 742     }
 743
 744     return is_utf8_string(first_variant, len - (first_variant - s));
 745 }
 746
 747 #endif
 748
 749 /*
 750 =for apidoc is_strict_utf8_string
 751
 752 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 753 UTF-8-encoded string that is fully interchangeable by any application using
 754 Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
 755 calculated using C<strlen(s)> (which means if you use this option, that C<s>
 756 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 757 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 758
 759 This function returns FALSE for strings containing any
 760 code points above the Unicode max of 0x10FFFF, surrogate code points, or
 761 non-character code points.
 762
 763 See also
 764 C<L</is_utf8_invariant_string>>,
 765 C<L</is_utf8_invariant_string_loc>>,
 766 C<L</is_utf8_string>>,
 767 C<L</is_utf8_string_flags>>,
 768 C<L</is_utf8_string_loc>>,
 769 C<L</is_utf8_string_loc_flags>>,
 770 C<L</is_utf8_string_loclen>>,
 771 C<L</is_utf8_string_loclen_flags>>,
 772 C<L</is_utf8_fixed_width_buf_flags>>,
 773 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 774 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 775 C<L</is_strict_utf8_string_loc>>,
 776 C<L</is_strict_utf8_string_loclen>>,
 777 C<L</is_c9strict_utf8_string>>,
 778 C<L</is_c9strict_utf8_string_loc>>,
 779 and
 780 C<L</is_c9strict_utf8_string_loclen>>.
 781
 782 =cut
 783 */
 784
 785 #define is_strict_utf8_string(s, len)  is_strict_utf8_string_loclen(s, len, NULL, NULL)
 786
 787 /*
 788 =for apidoc is_c9strict_utf8_string
 789
 790 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 791 UTF-8-encoded string that conforms to
 792 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
 793 otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
 794 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
 795 C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
 796 characters being ASCII constitute 'a valid UTF-8 string'.
 797
 798 This function returns FALSE for strings containing any code points above the
 799 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
 800 code points per
 801 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
 802
 803 See also
 804 C<L</is_utf8_invariant_string>>,
 805 C<L</is_utf8_invariant_string_loc>>,
 806 C<L</is_utf8_string>>,
 807 C<L</is_utf8_string_flags>>,
 808 C<L</is_utf8_string_loc>>,
 809 C<L</is_utf8_string_loc_flags>>,
 810 C<L</is_utf8_string_loclen>>,
 811 C<L</is_utf8_string_loclen_flags>>,
 812 C<L</is_utf8_fixed_width_buf_flags>>,
 813 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 814 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 815 C<L</is_strict_utf8_string>>,
 816 C<L</is_strict_utf8_string_loc>>,
 817 C<L</is_strict_utf8_string_loclen>>,
 818 C<L</is_c9strict_utf8_string_loc>>,
 819 and
 820 C<L</is_c9strict_utf8_string_loclen>>.
 821
 822 =cut
 823 */
 824
 825 #define is_c9strict_utf8_string(s, len)  is_c9strict_utf8_string_loclen(s, len, NULL, 0)
 826
 827 /*
 828 =for apidoc is_utf8_string_flags
 829
 830 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 831 UTF-8 string, subject to the restrictions imposed by C<flags>;
 832 returns FALSE otherwise.  If C<len> is 0, it will be calculated
 833 using C<strlen(s)> (which means if you use this option, that C<s> can't have
 834 embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
 835 that all characters being ASCII constitute 'a valid UTF-8 string'.
 836
 837 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
 838 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
 839 as C<L</is_strict_utf8_string>>; and if C<flags> is
 840 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
 841 C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
 842 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
 843 C<L</utf8n_to_uvchr>>, with the same meanings.
 844
 845 See also
 846 C<L</is_utf8_invariant_string>>,
 847 C<L</is_utf8_invariant_string_loc>>,
 848 C<L</is_utf8_string>>,
 849 C<L</is_utf8_string_loc>>,
 850 C<L</is_utf8_string_loc_flags>>,
 851 C<L</is_utf8_string_loclen>>,
 852 C<L</is_utf8_string_loclen_flags>>,
 853 C<L</is_utf8_fixed_width_buf_flags>>,
 854 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 855 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 856 C<L</is_strict_utf8_string>>,
 857 C<L</is_strict_utf8_string_loc>>,
 858 C<L</is_strict_utf8_string_loclen>>,
 859 C<L</is_c9strict_utf8_string>>,
 860 C<L</is_c9strict_utf8_string_loc>>,
 861 and
 862 C<L</is_c9strict_utf8_string_loclen>>.
 863
 864 =cut
 865 */
 866
 867 PERL_STATIC_INLINE bool
 868 S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
 869 {
 870     const U8 * first_variant;
 871
 872     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
 873     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 874                           |UTF8_DISALLOW_PERL_EXTENDED)));
 875
 876     if (len == 0) {
 877         len = strlen((const char *)s);
 878     }
 879
 880     if (flags == 0) {
 881         return is_utf8_string(s, len);
 882     }
 883
 884     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 885                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 886     {
 887         return is_strict_utf8_string(s, len);
 888     }
 889
 890     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 891                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 892     {
 893         return is_c9strict_utf8_string(s, len);
 894     }
 895
 896     if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
 897         const U8* const send = s + len;
 898         const U8* x = first_variant;
 899
 900         while (x < send) {
 901             STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 902             if (UNLIKELY(! cur_len)) {
 903                 return FALSE;
 904             }
 905             x += cur_len;
 906         }
 907     }
 908
 909     return TRUE;
 910 }
 911
 912 /*
 913
 914 =for apidoc is_utf8_string_loc
 915
 916 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 917 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 918 "utf8ness success") in the C<ep> pointer.
 919
 920 See also C<L</is_utf8_string_loclen>>.
 921
 922 =cut
 923 */
 924
 925 #define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
 926
 927 /*
 928
 929 =for apidoc is_utf8_string_loclen
 930
 931 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 932 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 933 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 934 encoded characters in the C<el> pointer.
 935
 936 See also C<L</is_utf8_string_loc>>.
 937
 938 =cut
 939 */
 940
 941 PERL_STATIC_INLINE bool
 942 Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
 943 {
 944     const U8 * first_variant;
 945
 946     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
 947
 948     if (len == 0) {
 949         len = strlen((const char *) s);
 950     }
 951
 952     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 953         if (el)
 954             *el = len;
 955
 956         if (ep) {
 957             *ep = s + len;
 958         }
 959
 960         return TRUE;
 961     }
 962
 963     {
 964         const U8* const send = s + len;
 965         const U8* x = first_variant;
 966         STRLEN outlen = first_variant - s;
 967
 968         while (x < send) {
 969             const STRLEN cur_len = isUTF8_CHAR(x, send);
 970             if (UNLIKELY(! cur_len)) {
 971                 break;
 972             }
 973             x += cur_len;
 974             outlen++;
 975         }
 976
 977         if (el)
 978             *el = outlen;
 979
 980         if (ep) {
 981             *ep = x;
 982         }
 983
 984         return (x == send);
 985     }
 986 }
 987
 988 /*
 989
 990 =for apidoc is_strict_utf8_string_loc
 991
 992 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 993 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 994 "utf8ness success") in the C<ep> pointer.
 995
 996 See also C<L</is_strict_utf8_string_loclen>>.
 997
 998 =cut
 999 */
1000
1001 #define is_strict_utf8_string_loc(s, len, ep)                               \
1002                                 is_strict_utf8_string_loclen(s, len, ep, 0)
1003
1004 /*
1005
1006 =for apidoc is_strict_utf8_string_loclen
1007
1008 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
1009 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1010 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
1011 encoded characters in the C<el> pointer.
1012
1013 See also C<L</is_strict_utf8_string_loc>>.
1014
1015 =cut
1016 */
1017
1018 PERL_STATIC_INLINE bool
1019 S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
1020 {
1021     const U8 * first_variant;
1022
1023     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1024
1025     if (len == 0) {
1026         len = strlen((const char *) s);
1027     }
1028
1029     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1030         if (el)
1031             *el = len;
1032
1033         if (ep) {
1034             *ep = s + len;
1035         }
1036
1037         return TRUE;
1038     }
1039
1040     {
1041         const U8* const send = s + len;
1042         const U8* x = first_variant;
1043         STRLEN outlen = first_variant - s;
1044
1045         while (x < send) {
1046             const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1047             if (UNLIKELY(! cur_len)) {
1048                 break;
1049             }
1050             x += cur_len;
1051             outlen++;
1052         }
1053
1054         if (el)
1055             *el = outlen;
1056
1057         if (ep) {
1058             *ep = x;
1059         }
1060
1061         return (x == send);
1062     }
1063 }
1064
1065 /*
1066
1067 =for apidoc is_c9strict_utf8_string_loc
1068
1069 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1070 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1071 "utf8ness success") in the C<ep> pointer.
1072
1073 See also C<L</is_c9strict_utf8_string_loclen>>.
1074
1075 =cut
1076 */
1077
1078 #define is_c9strict_utf8_string_loc(s, len, ep)                             \
1079                             is_c9strict_utf8_string_loclen(s, len, ep, 0)
1080
1081 /*
1082
1083 =for apidoc is_c9strict_utf8_string_loclen
1084
1085 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1086 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1087 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1088 characters in the C<el> pointer.
1089
1090 See also C<L</is_c9strict_utf8_string_loc>>.
1091
1092 =cut
1093 */
1094
1095 PERL_STATIC_INLINE bool
1096 S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
1097 {
1098     const U8 * first_variant;
1099
1100     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1101
1102     if (len == 0) {
1103         len = strlen((const char *) s);
1104     }
1105
1106     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1107         if (el)
1108             *el = len;
1109
1110         if (ep) {
1111             *ep = s + len;
1112         }
1113
1114         return TRUE;
1115     }
1116
1117     {
1118         const U8* const send = s + len;
1119         const U8* x = first_variant;
1120         STRLEN outlen = first_variant - s;
1121
1122         while (x < send) {
1123             const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1124             if (UNLIKELY(! cur_len)) {
1125                 break;
1126             }
1127             x += cur_len;
1128             outlen++;
1129         }
1130
1131         if (el)
1132             *el = outlen;
1133
1134         if (ep) {
1135             *ep = x;
1136         }
1137
1138         return (x == send);
1139     }
1140 }
1141
1142 /*
1143
1144 =for apidoc is_utf8_string_loc_flags
1145
1146 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1147 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1148 "utf8ness success") in the C<ep> pointer.
1149
1150 See also C<L</is_utf8_string_loclen_flags>>.
1151
1152 =cut
1153 */
1154
1155 #define is_utf8_string_loc_flags(s, len, ep, flags)                         \
1156                         is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1157
1158
1159 /* The above 3 actual functions could have been moved into the more general one
1160  * just below, and made #defines that call it with the right 'flags'.  They are
1161  * currently kept separate to increase their chances of getting inlined */
1162
1163 /*
1164
1165 =for apidoc is_utf8_string_loclen_flags
1166
1167 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1168 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1169 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
1170 encoded characters in the C<el> pointer.
1171
1172 See also C<L</is_utf8_string_loc_flags>>.
1173
1174 =cut
1175 */
1176
1177 PERL_STATIC_INLINE bool
1178 S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
1179 {
1180     const U8 * first_variant;
1181
1182     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1183     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1184                           |UTF8_DISALLOW_PERL_EXTENDED)));
1185
1186     if (len == 0) {
1187         len = strlen((const char *) s);
1188     }
1189
1190     if (flags == 0) {
1191         return is_utf8_string_loclen(s, len, ep, el);
1192     }
1193
1194     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1195                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1196     {
1197         return is_strict_utf8_string_loclen(s, len, ep, el);
1198     }
1199
1200     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1201                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1202     {
1203         return is_c9strict_utf8_string_loclen(s, len, ep, el);
1204     }
1205
1206     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1207         if (el)
1208             *el = len;
1209
1210         if (ep) {
1211             *ep = s + len;
1212         }
1213
1214         return TRUE;
1215     }
1216
1217     {
1218         const U8* send = s + len;
1219         const U8* x = first_variant;
1220         STRLEN outlen = first_variant - s;
1221
1222         while (x < send) {
1223             const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1224             if (UNLIKELY(! cur_len)) {
1225                 break;
1226             }
1227             x += cur_len;
1228             outlen++;
1229         }
1230
1231         if (el)
1232             *el = outlen;
1233
1234         if (ep) {
1235             *ep = x;
1236         }
1237
1238         return (x == send);
1239     }
1240 }
1241
1242 /*
1243 =for apidoc utf8_distance
1244
1245 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1246 and C<b>.
1247
1248 WARNING: use only if you *know* that the pointers point inside the
1249 same UTF-8 buffer.
1250
1251 =cut
1252 */
1253
1254 PERL_STATIC_INLINE IV
1255 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1256 {
1257     PERL_ARGS_ASSERT_UTF8_DISTANCE;
1258
1259     return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1260 }
1261
1262 /*
1263 =for apidoc utf8_hop
1264
1265 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1266 forward or backward.
1267
1268 WARNING: do not use the following unless you *know* C<off> is within
1269 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1270 on the first byte of character or just after the last byte of a character.
1271
1272 =cut
1273 */
1274
1275 PERL_STATIC_INLINE U8 *
1276 Perl_utf8_hop(const U8 *s, SSize_t off)
1277 {
1278     PERL_ARGS_ASSERT_UTF8_HOP;
1279
1280     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1281      * the bitops (especially ~) can create illegal UTF-8.
1282      * In other words: in Perl UTF-8 is not just for Unicode. */
1283
1284     if (off >= 0) {
1285         while (off--)
1286             s += UTF8SKIP(s);
1287     }
1288     else {
1289         while (off++) {
1290             s--;
1291             while (UTF8_IS_CONTINUATION(*s))
1292                 s--;
1293         }
1294     }
1295     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1296     return (U8 *)s;
1297     GCC_DIAG_RESTORE_STMT;
1298 }
1299
1300 /*
1301 =for apidoc utf8_hop_forward
1302
1303 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1304 forward.
1305
1306 C<off> must be non-negative.
1307
1308 C<s> must be before or equal to C<end>.
1309
1310 When moving forward it will not move beyond C<end>.
1311
1312 Will not exceed this limit even if the string is not valid "UTF-8".
1313
1314 =cut
1315 */
1316
1317 PERL_STATIC_INLINE U8 *
1318 Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1319 {
1320     PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1321
1322     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1323      * the bitops (especially ~) can create illegal UTF-8.
1324      * In other words: in Perl UTF-8 is not just for Unicode. */
1325
1326     assert(s <= end);
1327     assert(off >= 0);
1328
1329     while (off--) {
1330         STRLEN skip = UTF8SKIP(s);
1331         if ((STRLEN)(end - s) <= skip) {
1332             GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1333             return (U8 *)end;
1334             GCC_DIAG_RESTORE_STMT;
1335         }
1336         s += skip;
1337     }
1338
1339     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1340     return (U8 *)s;
1341     GCC_DIAG_RESTORE_STMT;
1342 }
1343
1344 /*
1345 =for apidoc utf8_hop_back
1346
1347 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1348 backward.
1349
1350 C<off> must be non-positive.
1351
1352 C<s> must be after or equal to C<start>.
1353
1354 When moving backward it will not move before C<start>.
1355
1356 Will not exceed this limit even if the string is not valid "UTF-8".
1357
1358 =cut
1359 */
1360
1361 PERL_STATIC_INLINE U8 *
1362 Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1363 {
1364     PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1365
1366     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1367      * the bitops (especially ~) can create illegal UTF-8.
1368      * In other words: in Perl UTF-8 is not just for Unicode. */
1369
1370     assert(start <= s);
1371     assert(off <= 0);
1372
1373     while (off++ && s > start) {
1374         s--;
1375         while (UTF8_IS_CONTINUATION(*s) && s > start)
1376             s--;
1377     }
1378
1379     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1380     return (U8 *)s;
1381     GCC_DIAG_RESTORE_STMT;
1382 }
1383
1384 /*
1385 =for apidoc utf8_hop_safe
1386
1387 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1388 either forward or backward.
1389
1390 When moving backward it will not move before C<start>.
1391
1392 When moving forward it will not move beyond C<end>.
1393
1394 Will not exceed those limits even if the string is not valid "UTF-8".
1395
1396 =cut
1397 */
1398
1399 PERL_STATIC_INLINE U8 *
1400 Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1401 {
1402     PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1403
1404     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1405      * the bitops (especially ~) can create illegal UTF-8.
1406      * In other words: in Perl UTF-8 is not just for Unicode. */
1407
1408     assert(start <= s && s <= end);
1409
1410     if (off >= 0) {
1411         return utf8_hop_forward(s, off, end);
1412     }
1413     else {
1414         return utf8_hop_back(s, off, start);
1415     }
1416 }
1417
1418 /*
1419
1420 =for apidoc is_utf8_valid_partial_char
1421
1422 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1423 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1424 points.  Otherwise, it returns 1 if there exists at least one non-empty
1425 sequence of bytes that when appended to sequence C<s>, starting at position
1426 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1427 otherwise returns 0.
1428
1429 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1430 point.
1431
1432 This is useful when a fixed-length buffer is being tested for being well-formed
1433 UTF-8, but the final few bytes in it don't comprise a full character; that is,
1434 it is split somewhere in the middle of the final code point's UTF-8
1435 representation.  (Presumably when the buffer is refreshed with the next chunk
1436 of data, the new first bytes will complete the partial code point.)   This
1437 function is used to verify that the final bytes in the current buffer are in
1438 fact the legal beginning of some code point, so that if they aren't, the
1439 failure can be signalled without having to wait for the next read.
1440
1441 =cut
1442 */
1443 #define is_utf8_valid_partial_char(s, e)                                    \
1444                                 is_utf8_valid_partial_char_flags(s, e, 0)
1445
1446 /*
1447
1448 =for apidoc is_utf8_valid_partial_char_flags
1449
1450 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1451 or not the input is a valid UTF-8 encoded partial character, but it takes an
1452 extra parameter, C<flags>, which can further restrict which code points are
1453 considered valid.
1454
1455 If C<flags> is 0, this behaves identically to
1456 C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
1457 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
1458 there is any sequence of bytes that can complete the input partial character in
1459 such a way that a non-prohibited character is formed, the function returns
1460 TRUE; otherwise FALSE.  Non character code points cannot be determined based on
1461 partial character input.  But many  of the other possible excluded types can be
1462 determined from just the first one or two bytes.
1463
1464 =cut
1465  */
1466
1467 PERL_STATIC_INLINE bool
1468 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1469 {
1470     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1471
1472     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1473                           |UTF8_DISALLOW_PERL_EXTENDED)));
1474
1475     if (s >= e || s + UTF8SKIP(s) <= e) {
1476         return FALSE;
1477     }
1478
1479     return cBOOL(_is_utf8_char_helper(s, e, flags));
1480 }
1481
1482 /*
1483
1484 =for apidoc is_utf8_fixed_width_buf_flags
1485
1486 Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1487 is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1488 otherwise it returns FALSE.
1489
1490 If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1491 without restriction.  If the final few bytes of the buffer do not form a
1492 complete code point, this will return TRUE anyway, provided that
1493 C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1494
1495 If C<flags> in non-zero, it can be any combination of the
1496 C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1497 same meanings.
1498
1499 This function differs from C<L</is_utf8_string_flags>> only in that the latter
1500 returns FALSE if the final few bytes of the string don't form a complete code
1501 point.
1502
1503 =cut
1504  */
1505 #define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
1506                 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1507
1508 /*
1509
1510 =for apidoc is_utf8_fixed_width_buf_loc_flags
1511
1512 Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1513 failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
1514 to the beginning of any partial character at the end of the buffer; if there is
1515 no partial character C<*ep> will contain C<s>+C<len>.
1516
1517 See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1518
1519 =cut
1520 */
1521
1522 #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
1523                 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1524
1525 /*
1526
1527 =for apidoc is_utf8_fixed_width_buf_loclen_flags
1528
1529 Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1530 complete, valid characters found in the C<el> pointer.
1531
1532 =cut
1533 */
1534
1535 PERL_STATIC_INLINE bool
1536 S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1537                                        STRLEN len,
1538                                        const U8 **ep,
1539                                        STRLEN *el,
1540                                        const U32 flags)
1541 {
1542     const U8 * maybe_partial;
1543
1544     PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1545
1546     if (! ep) {
1547         ep  = &maybe_partial;
1548     }
1549
1550     /* If it's entirely valid, return that; otherwise see if the only error is
1551      * that the final few bytes are for a partial character */
1552     return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
1553            || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1554 }
1555
1556 /* ------------------------------- perl.h ----------------------------- */
1557
1558 /*
1559 =head1 Miscellaneous Functions
1560
1561 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1562
1563 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1564 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1565
1566 Return TRUE if the name is safe.
1567
1568 Used by the C<IS_SAFE_SYSCALL()> macro.
1569
1570 =cut
1571 */
1572
1573 PERL_STATIC_INLINE bool
1574 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1575     /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1576      * perl itself uses xce*() functions which accept 8-bit strings.
1577      */
1578
1579     PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1580
1581     if (len > 1) {
1582         char *null_at;
1583         if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1584                 SETERRNO(ENOENT, LIB_INVARG);
1585                 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1586                                    "Invalid \\0 character in %s for %s: %s\\0%s",
1587                                    what, op_name, pv, null_at+1);
1588                 return FALSE;
1589         }
1590     }
1591
1592     return TRUE;
1593 }
1594
1595 /*
1596
1597 Return true if the supplied filename has a newline character
1598 immediately before the first (hopefully only) NUL.
1599
1600 My original look at this incorrectly used the len from SvPV(), but
1601 that's incorrect, since we allow for a NUL in pv[len-1].
1602
1603 So instead, strlen() and work from there.
1604
1605 This allow for the user reading a filename, forgetting to chomp it,
1606 then calling:
1607
1608   open my $foo, "$file\0";
1609
1610 */
1611
1612 #ifdef PERL_CORE
1613
1614 PERL_STATIC_INLINE bool
1615 S_should_warn_nl(const char *pv) {
1616     STRLEN len;
1617
1618     PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1619
1620     len = strlen(pv);
1621
1622     return len > 0 && pv[len-1] == '\n';
1623 }
1624
1625 #endif
1626
1627 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1628
1629 #define MAX_CHARSET_NAME_LENGTH 2
1630
1631 PERL_STATIC_INLINE const char *
1632 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1633 {
1634     /* Returns a string that corresponds to the name of the regex character set
1635      * given by 'flags', and *lenp is set the length of that string, which
1636      * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1637
1638     *lenp = 1;
1639     switch (get_regex_charset(flags)) {
1640         case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1641         case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
1642         case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1643         case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1644         case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1645             *lenp = 2;
1646             return ASCII_MORE_RESTRICT_PAT_MODS;
1647     }
1648     /* The NOT_REACHED; hides an assert() which has a rather complex
1649      * definition in perl.h. */
1650     NOT_REACHED; /* NOTREACHED */
1651     return "?";     /* Unknown */
1652 }
1653
1654 /*
1655
1656 Return false if any get magic is on the SV other than taint magic.
1657
1658 */
1659
1660 PERL_STATIC_INLINE bool
1661 S_sv_only_taint_gmagic(SV *sv) {
1662     MAGIC *mg = SvMAGIC(sv);
1663
1664     PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1665
1666     while (mg) {
1667         if (mg->mg_type != PERL_MAGIC_taint
1668             && !(mg->mg_flags & MGf_GSKIP)
1669             && mg->mg_virtual->svt_get) {
1670             return FALSE;
1671         }
1672         mg = mg->mg_moremagic;
1673     }
1674
1675     return TRUE;
1676 }
1677
1678 /* ------------------ cop.h ------------------------------------------- */
1679
1680
1681 /* Enter a block. Push a new base context and return its address. */
1682
1683 PERL_STATIC_INLINE PERL_CONTEXT *
1684 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1685 {
1686     PERL_CONTEXT * cx;
1687
1688     PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1689
1690     CXINC;
1691     cx = CX_CUR();
1692     cx->cx_type        = type;
1693     cx->blk_gimme      = gimme;
1694     cx->blk_oldsaveix  = saveix;
1695     cx->blk_oldsp      = (I32)(sp - PL_stack_base);
1696     cx->blk_oldcop     = PL_curcop;
1697     cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
1698     cx->blk_oldscopesp = PL_scopestack_ix;
1699     cx->blk_oldpm      = PL_curpm;
1700     cx->blk_old_tmpsfloor = PL_tmps_floor;
1701
1702     PL_tmps_floor        = PL_tmps_ix;
1703     CX_DEBUG(cx, "PUSH");
1704     return cx;
1705 }
1706
1707
1708 /* Exit a block (RETURN and LAST). */
1709
1710 PERL_STATIC_INLINE void
1711 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1712 {
1713     PERL_ARGS_ASSERT_CX_POPBLOCK;
1714
1715     CX_DEBUG(cx, "POP");
1716     /* these 3 are common to cx_popblock and cx_topblock */
1717     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1718     PL_scopestack_ix = cx->blk_oldscopesp;
1719     PL_curpm         = cx->blk_oldpm;
1720
1721     /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1722      * and leaves a CX entry lying around for repeated use, so
1723      * skip for multicall */                  \
1724     assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1725             || PL_savestack_ix == cx->blk_oldsaveix);
1726     PL_curcop     = cx->blk_oldcop;
1727     PL_tmps_floor = cx->blk_old_tmpsfloor;
1728 }
1729
1730 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1731  * Whereas cx_popblock() restores the state to the point just before
1732  * cx_pushblock() was called,  cx_topblock() restores it to the point just
1733  * *after* cx_pushblock() was called. */
1734
1735 PERL_STATIC_INLINE void
1736 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1737 {
1738     PERL_ARGS_ASSERT_CX_TOPBLOCK;
1739
1740     CX_DEBUG(cx, "TOP");
1741     /* these 3 are common to cx_popblock and cx_topblock */
1742     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1743     PL_scopestack_ix = cx->blk_oldscopesp;
1744     PL_curpm         = cx->blk_oldpm;
1745
1746     PL_stack_sp      = PL_stack_base + cx->blk_oldsp;
1747 }
1748
1749
1750 PERL_STATIC_INLINE void
1751 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1752 {
1753     U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1754
1755     PERL_ARGS_ASSERT_CX_PUSHSUB;
1756
1757     PERL_DTRACE_PROBE_ENTRY(cv);
1758     cx->blk_sub.cv = cv;
1759     cx->blk_sub.olddepth = CvDEPTH(cv);
1760     cx->blk_sub.prevcomppad = PL_comppad;
1761     cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1762     cx->blk_sub.retop = retop;
1763     SvREFCNT_inc_simple_void_NN(cv);
1764     cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1765 }
1766
1767
1768 /* subsets of cx_popsub() */
1769
1770 PERL_STATIC_INLINE void
1771 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1772 {
1773     CV *cv;
1774
1775     PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1776     assert(CxTYPE(cx) == CXt_SUB);
1777
1778     PL_comppad = cx->blk_sub.prevcomppad;
1779     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1780     cv = cx->blk_sub.cv;
1781     CvDEPTH(cv) = cx->blk_sub.olddepth;
1782     cx->blk_sub.cv = NULL;
1783     SvREFCNT_dec(cv);
1784 }
1785
1786
1787 /* handle the @_ part of leaving a sub */
1788
1789 PERL_STATIC_INLINE void
1790 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1791 {
1792     AV *av;
1793
1794     PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1795     assert(CxTYPE(cx) == CXt_SUB);
1796     assert(AvARRAY(MUTABLE_AV(
1797         PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1798                 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1799
1800     CX_POP_SAVEARRAY(cx);
1801     av = MUTABLE_AV(PAD_SVl(0));
1802     if (UNLIKELY(AvREAL(av)))
1803         /* abandon @_ if it got reified */
1804         clear_defarray(av, 0);
1805     else {
1806         CLEAR_ARGARRAY(av);
1807     }
1808 }
1809
1810
1811 PERL_STATIC_INLINE void
1812 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1813 {
1814     PERL_ARGS_ASSERT_CX_POPSUB;
1815     assert(CxTYPE(cx) == CXt_SUB);
1816
1817     PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1818
1819     if (CxHASARGS(cx))
1820         cx_popsub_args(cx);
1821     cx_popsub_common(cx);
1822 }
1823
1824
1825 PERL_STATIC_INLINE void
1826 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1827 {
1828     PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1829
1830     cx->blk_format.cv          = cv;
1831     cx->blk_format.retop       = retop;
1832     cx->blk_format.gv          = gv;
1833     cx->blk_format.dfoutgv     = PL_defoutgv;
1834     cx->blk_format.prevcomppad = PL_comppad;
1835     cx->blk_u16                = 0;
1836
1837     SvREFCNT_inc_simple_void_NN(cv);
1838     CvDEPTH(cv)++;
1839     SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1840 }
1841
1842
1843 PERL_STATIC_INLINE void
1844 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1845 {
1846     CV *cv;
1847     GV *dfout;
1848
1849     PERL_ARGS_ASSERT_CX_POPFORMAT;
1850     assert(CxTYPE(cx) == CXt_FORMAT);
1851
1852     dfout = cx->blk_format.dfoutgv;
1853     setdefout(dfout);
1854     cx->blk_format.dfoutgv = NULL;
1855     SvREFCNT_dec_NN(dfout);
1856
1857     PL_comppad = cx->blk_format.prevcomppad;
1858     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1859     cv = cx->blk_format.cv;
1860     cx->blk_format.cv = NULL;
1861     --CvDEPTH(cv);
1862     SvREFCNT_dec_NN(cv);
1863 }
1864
1865
1866 PERL_STATIC_INLINE void
1867 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1868 {
1869     PERL_ARGS_ASSERT_CX_PUSHEVAL;
1870
1871     cx->blk_eval.retop         = retop;
1872     cx->blk_eval.old_namesv    = namesv;
1873     cx->blk_eval.old_eval_root = PL_eval_root;
1874     cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
1875     cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
1876     cx->blk_eval.cur_top_env   = PL_top_env;
1877
1878     assert(!(PL_in_eval     & ~ 0x3F));
1879     assert(!(PL_op->op_type & ~0x1FF));
1880     cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1881 }
1882
1883
1884 PERL_STATIC_INLINE void
1885 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1886 {
1887     SV *sv;
1888
1889     PERL_ARGS_ASSERT_CX_POPEVAL;
1890     assert(CxTYPE(cx) == CXt_EVAL);
1891
1892     PL_in_eval = CxOLD_IN_EVAL(cx);
1893     assert(!(PL_in_eval & 0xc0));
1894     PL_eval_root = cx->blk_eval.old_eval_root;
1895     sv = cx->blk_eval.cur_text;
1896     if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1897         cx->blk_eval.cur_text = NULL;
1898         SvREFCNT_dec_NN(sv);
1899     }
1900
1901     sv = cx->blk_eval.old_namesv;
1902     if (sv) {
1903         cx->blk_eval.old_namesv = NULL;
1904         SvREFCNT_dec_NN(sv);
1905     }
1906 }
1907
1908
1909 /* push a plain loop, i.e.
1910  *     { block }
1911  *     while (cond) { block }
1912  *     for (init;cond;continue) { block }
1913  * This loop can be last/redo'ed etc.
1914  */
1915
1916 PERL_STATIC_INLINE void
1917 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1918 {
1919     PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1920     cx->blk_loop.my_op = cLOOP;
1921 }
1922
1923
1924 /* push a true for loop, i.e.
1925  *     for var (list) { block }
1926  */
1927
1928 PERL_STATIC_INLINE void
1929 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1930 {
1931     PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1932
1933     /* this one line is common with cx_pushloop_plain */
1934     cx->blk_loop.my_op = cLOOP;
1935
1936     cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1937     cx->blk_loop.itersave      = itersave;
1938 #ifdef USE_ITHREADS
1939     cx->blk_loop.oldcomppad = PL_comppad;
1940 #endif
1941 }
1942
1943
1944 /* pop all loop types, including plain */
1945
1946 PERL_STATIC_INLINE void
1947 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1948 {
1949     PERL_ARGS_ASSERT_CX_POPLOOP;
1950
1951     assert(CxTYPE_is_LOOP(cx));
1952     if (  CxTYPE(cx) == CXt_LOOP_ARY
1953        || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1954     {
1955         /* Free ary or cur. This assumes that state_u.ary.ary
1956          * aligns with state_u.lazysv.cur. See cx_dup() */
1957         SV *sv = cx->blk_loop.state_u.lazysv.cur;
1958         cx->blk_loop.state_u.lazysv.cur = NULL;
1959         SvREFCNT_dec_NN(sv);
1960         if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1961             sv = cx->blk_loop.state_u.lazysv.end;
1962             cx->blk_loop.state_u.lazysv.end = NULL;
1963             SvREFCNT_dec_NN(sv);
1964         }
1965     }
1966     if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1967         SV *cursv;
1968         SV **svp = (cx)->blk_loop.itervar_u.svp;
1969         if ((cx->cx_type & CXp_FOR_GV))
1970             svp = &GvSV((GV*)svp);
1971         cursv = *svp;
1972         *svp = cx->blk_loop.itersave;
1973         cx->blk_loop.itersave = NULL;
1974         SvREFCNT_dec(cursv);
1975     }
1976 }
1977
1978
1979 PERL_STATIC_INLINE void
1980 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1981 {
1982     PERL_ARGS_ASSERT_CX_PUSHWHEN;
1983
1984     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1985 }
1986
1987
1988 PERL_STATIC_INLINE void
1989 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1990 {
1991     PERL_ARGS_ASSERT_CX_POPWHEN;
1992     assert(CxTYPE(cx) == CXt_WHEN);
1993
1994     PERL_UNUSED_ARG(cx);
1995     PERL_UNUSED_CONTEXT;
1996     /* currently NOOP */
1997 }
1998
1999
2000 PERL_STATIC_INLINE void
2001 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
2002 {
2003     PERL_ARGS_ASSERT_CX_PUSHGIVEN;
2004
2005     cx->blk_givwhen.leave_op = cLOGOP->op_other;
2006     cx->blk_givwhen.defsv_save = orig_defsv;
2007 }
2008
2009
2010 PERL_STATIC_INLINE void
2011 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
2012 {
2013     SV *sv;
2014
2015     PERL_ARGS_ASSERT_CX_POPGIVEN;
2016     assert(CxTYPE(cx) == CXt_GIVEN);
2017
2018     sv = GvSV(PL_defgv);
2019     GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2020     cx->blk_givwhen.defsv_save = NULL;
2021     SvREFCNT_dec(sv);
2022 }
2023
2024 /* ------------------ util.h ------------------------------------------- */
2025
2026 /*
2027 =head1 Miscellaneous Functions
2028
2029 =for apidoc foldEQ
2030
2031 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2032 same
2033 case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
2034 match themselves and their opposite case counterparts.  Non-cased and non-ASCII
2035 range bytes match only themselves.
2036
2037 =cut
2038 */
2039
2040 PERL_STATIC_INLINE I32
2041 Perl_foldEQ(const char *s1, const char *s2, I32 len)
2042 {
2043     const U8 *a = (const U8 *)s1;
2044     const U8 *b = (const U8 *)s2;
2045
2046     PERL_ARGS_ASSERT_FOLDEQ;
2047
2048     assert(len >= 0);
2049
2050     while (len--) {
2051         if (*a != *b && *a != PL_fold[*b])
2052             return 0;
2053         a++,b++;
2054     }
2055     return 1;
2056 }
2057
2058 PERL_STATIC_INLINE I32
2059 Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2060 {
2061     /* Compare non-utf8 using Unicode (Latin1) semantics.  Does not work on
2062      * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2063      * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these.  Nor
2064      * does it check that the strings each have at least 'len' characters */
2065
2066     const U8 *a = (const U8 *)s1;
2067     const U8 *b = (const U8 *)s2;
2068
2069     PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2070
2071     assert(len >= 0);
2072
2073     while (len--) {
2074         if (*a != *b && *a != PL_fold_latin1[*b]) {
2075             return 0;
2076         }
2077         a++, b++;
2078     }
2079     return 1;
2080 }
2081
2082 /*
2083 =for apidoc foldEQ_locale
2084
2085 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2086 same case-insensitively in the current locale; false otherwise.
2087
2088 =cut
2089 */
2090
2091 PERL_STATIC_INLINE I32
2092 Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2093 {
2094     dVAR;
2095     const U8 *a = (const U8 *)s1;
2096     const U8 *b = (const U8 *)s2;
2097
2098     PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2099
2100     assert(len >= 0);
2101
2102     while (len--) {
2103         if (*a != *b && *a != PL_fold_locale[*b])
2104             return 0;
2105         a++,b++;
2106     }
2107     return 1;
2108 }
2109
2110 #if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2111
2112 PERL_STATIC_INLINE void *
2113 S_my_memrchr(const char * s, const char c, const STRLEN len)
2114 {
2115     /* memrchr(), since many platforms lack it */
2116
2117     const char * t = s + len - 1;
2118
2119     PERL_ARGS_ASSERT_MY_MEMRCHR;
2120
2121     while (t >= s) {
2122         if (*t == c) {
2123             return (void *) t;
2124         }
2125         t--;
2126     }
2127
2128     return NULL;
2129 }
2130
2131 #endif
2132
2133 /*
2134  * ex: set ts=8 sts=4 sw=4 et:
2135  */