inline.h

   1 /*    inline.h
   2  *
   3  *    Copyright (C) 2012 by Larry Wall and others
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  * This file is a home for static inline functions that cannot go in other
   9  * headers files, because they depend on proto.h (included after most other
  10  * headers) or struct definitions.
  11  *
  12  * Each section names the header file that the functions "belong" to.
  13  */
  14
  15 /* ------------------------------- av.h ------------------------------- */
  16
  17 PERL_STATIC_INLINE SSize_t
  18 S_av_top_index(pTHX_ AV *av)
  19 {
  20     PERL_ARGS_ASSERT_AV_TOP_INDEX;
  21     assert(SvTYPE(av) == SVt_PVAV);
  22
  23     return AvFILL(av);
  24 }
  25
  26 /* ------------------------------- cv.h ------------------------------- */
  27
  28 PERL_STATIC_INLINE GV *
  29 S_CvGV(pTHX_ CV *sv)
  30 {
  31     return CvNAMED(sv)
  32         ? Perl_cvgv_from_hek(aTHX_ sv)
  33         : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
  34 }
  35
  36 PERL_STATIC_INLINE I32 *
  37 S_CvDEPTHp(const CV * const sv)
  38 {
  39     assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
  40     return &((XPVCV*)SvANY(sv))->xcv_depth;
  41 }
  42
  43 /*
  44  CvPROTO returns the prototype as stored, which is not necessarily what
  45  the interpreter should be using. Specifically, the interpreter assumes
  46  that spaces have been stripped, which has been the case if the prototype
  47  was added by toke.c, but is generally not the case if it was added elsewhere.
  48  Since we can't enforce the spacelessness at assignment time, this routine
  49  provides a temporary copy at parse time with spaces removed.
  50  I<orig> is the start of the original buffer, I<len> is the length of the
  51  prototype and will be updated when this returns.
  52  */
  53
  54 #ifdef PERL_CORE
  55 PERL_STATIC_INLINE char *
  56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
  57 {
  58     SV * tmpsv;
  59     char * tmps;
  60     tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
  61     tmps = SvPVX(tmpsv);
  62     while ((*len)--) {
  63         if (!isSPACE(*orig))
  64             *tmps++ = *orig;
  65         orig++;
  66     }
  67     *tmps = '\0';
  68     *len = tmps - SvPVX(tmpsv);
  69                 return SvPVX(tmpsv);
  70 }
  71 #endif
  72
  73 /* ------------------------------- mg.h ------------------------------- */
  74
  75 #if defined(PERL_CORE) || defined(PERL_EXT)
  76 /* assumes get-magic and stringification have already occurred */
  77 PERL_STATIC_INLINE STRLEN
  78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
  79 {
  80     assert(mg->mg_type == PERL_MAGIC_regex_global);
  81     assert(mg->mg_len != -1);
  82     if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
  83         return (STRLEN)mg->mg_len;
  84     else {
  85         const STRLEN pos = (STRLEN)mg->mg_len;
  86         /* Without this check, we may read past the end of the buffer: */
  87         if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
  88         return sv_or_pv_pos_u2b(sv, s, pos, NULL);
  89     }
  90 }
  91 #endif
  92
  93 /* ------------------------------- pad.h ------------------------------ */
  94
  95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
  96 PERL_STATIC_INLINE bool
  97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
  98 {
  99     /* is seq within the range _LOW to _HIGH ?
 100      * This is complicated by the fact that PL_cop_seqmax
 101      * may have wrapped around at some point */
 102     if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
 103         return FALSE; /* not yet introduced */
 104
 105     if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
 106     /* in compiling scope */
 107         if (
 108             (seq >  COP_SEQ_RANGE_LOW(pn))
 109             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
 110             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
 111         )
 112             return TRUE;
 113     }
 114     else if (
 115         (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
 116         ?
 117             (  seq >  COP_SEQ_RANGE_LOW(pn)
 118             || seq <= COP_SEQ_RANGE_HIGH(pn))
 119
 120         :    (  seq >  COP_SEQ_RANGE_LOW(pn)
 121              && seq <= COP_SEQ_RANGE_HIGH(pn))
 122     )
 123         return TRUE;
 124     return FALSE;
 125 }
 126 #endif
 127
 128 /* ------------------------------- pp.h ------------------------------- */
 129
 130 PERL_STATIC_INLINE I32
 131 S_TOPMARK(pTHX)
 132 {
 133     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 134                                  "MARK top  %p %" IVdf "\n",
 135                                   PL_markstack_ptr,
 136                                   (IV)*PL_markstack_ptr)));
 137     return *PL_markstack_ptr;
 138 }
 139
 140 PERL_STATIC_INLINE I32
 141 S_POPMARK(pTHX)
 142 {
 143     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 144                                  "MARK pop  %p %" IVdf "\n",
 145                                   (PL_markstack_ptr-1),
 146                                   (IV)*(PL_markstack_ptr-1))));
 147     assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
 148     return *PL_markstack_ptr--;
 149 }
 150
 151 /* ----------------------------- regexp.h ----------------------------- */
 152
 153 PERL_STATIC_INLINE struct regexp *
 154 S_ReANY(const REGEXP * const re)
 155 {
 156     XPV* const p = (XPV*)SvANY(re);
 157     assert(isREGEXP(re));
 158     return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
 159                                    : (struct regexp *)p;
 160 }
 161
 162 /* ------------------------------- sv.h ------------------------------- */
 163
 164 PERL_STATIC_INLINE SV *
 165 S_SvREFCNT_inc(SV *sv)
 166 {
 167     if (LIKELY(sv != NULL))
 168         SvREFCNT(sv)++;
 169     return sv;
 170 }
 171 PERL_STATIC_INLINE SV *
 172 S_SvREFCNT_inc_NN(SV *sv)
 173 {
 174     SvREFCNT(sv)++;
 175     return sv;
 176 }
 177 PERL_STATIC_INLINE void
 178 S_SvREFCNT_inc_void(SV *sv)
 179 {
 180     if (LIKELY(sv != NULL))
 181         SvREFCNT(sv)++;
 182 }
 183 PERL_STATIC_INLINE void
 184 S_SvREFCNT_dec(pTHX_ SV *sv)
 185 {
 186     if (LIKELY(sv != NULL)) {
 187         U32 rc = SvREFCNT(sv);
 188         if (LIKELY(rc > 1))
 189             SvREFCNT(sv) = rc - 1;
 190         else
 191             Perl_sv_free2(aTHX_ sv, rc);
 192     }
 193 }
 194
 195 PERL_STATIC_INLINE void
 196 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
 197 {
 198     U32 rc = SvREFCNT(sv);
 199     if (LIKELY(rc > 1))
 200         SvREFCNT(sv) = rc - 1;
 201     else
 202         Perl_sv_free2(aTHX_ sv, rc);
 203 }
 204
 205 PERL_STATIC_INLINE void
 206 SvAMAGIC_on(SV *sv)
 207 {
 208     assert(SvROK(sv));
 209     if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
 210 }
 211 PERL_STATIC_INLINE void
 212 SvAMAGIC_off(SV *sv)
 213 {
 214     if (SvROK(sv) && SvOBJECT(SvRV(sv)))
 215         HvAMAGIC_off(SvSTASH(SvRV(sv)));
 216 }
 217
 218 PERL_STATIC_INLINE U32
 219 S_SvPADSTALE_on(SV *sv)
 220 {
 221     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 222     return SvFLAGS(sv) |= SVs_PADSTALE;
 223 }
 224 PERL_STATIC_INLINE U32
 225 S_SvPADSTALE_off(SV *sv)
 226 {
 227     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 228     return SvFLAGS(sv) &= ~SVs_PADSTALE;
 229 }
 230 #if defined(PERL_CORE) || defined (PERL_EXT)
 231 PERL_STATIC_INLINE STRLEN
 232 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
 233 {
 234     PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
 235     if (SvGAMAGIC(sv)) {
 236         U8 *hopped = utf8_hop((U8 *)pv, pos);
 237         if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
 238         return (STRLEN)(hopped - (U8 *)pv);
 239     }
 240     return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
 241 }
 242 #endif
 243
 244 /* ------------------------------- handy.h ------------------------------- */
 245
 246 /* saves machine code for a common noreturn idiom typically used in Newx*() */
 247 GCC_DIAG_IGNORE_DECL(-Wunused-function);
 248 static void
 249 S_croak_memory_wrap(void)
 250 {
 251     Perl_croak_nocontext("%s",PL_memory_wrap);
 252 }
 253 GCC_DIAG_RESTORE_DECL;
 254
 255 /* ------------------------------- utf8.h ------------------------------- */
 256
 257 /*
 258 =head1 Unicode Support
 259 */
 260
 261 PERL_STATIC_INLINE void
 262 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
 263 {
 264     /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
 265      * encoded string at '*dest', updating '*dest' to include it */
 266
 267     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
 268
 269     if (NATIVE_BYTE_IS_INVARIANT(byte))
 270         *((*dest)++) = byte;
 271     else {
 272         *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
 273         *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
 274     }
 275 }
 276
 277 /*
 278 =for apidoc valid_utf8_to_uvchr
 279 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
 280 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
 281 it passes C<L</isUTF8_CHAR>>.  Surrogates, non-character code points, and
 282 non-Unicode code points are allowed.
 283
 284 =cut
 285
 286  */
 287
 288 PERL_STATIC_INLINE UV
 289 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
 290 {
 291     const UV expectlen = UTF8SKIP(s);
 292     const U8* send = s + expectlen;
 293     UV uv = *s;
 294
 295     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
 296
 297     if (retlen) {
 298         *retlen = expectlen;
 299     }
 300
 301     /* An invariant is trivially returned */
 302     if (expectlen == 1) {
 303         return uv;
 304     }
 305
 306     /* Remove the leading bits that indicate the number of bytes, leaving just
 307      * the bits that are part of the value */
 308     uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
 309
 310     /* Now, loop through the remaining bytes, accumulating each into the
 311      * working total as we go.  (I khw tried unrolling the loop for up to 4
 312      * bytes, but there was no performance improvement) */
 313     for (++s; s < send; s++) {
 314         uv = UTF8_ACCUMULATE(uv, *s);
 315     }
 316
 317     return UNI_TO_NATIVE(uv);
 318
 319 }
 320
 321 /*
 322 =for apidoc is_utf8_invariant_string
 323
 324 Returns TRUE if the first C<len> bytes of the string C<s> are the same
 325 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
 326 EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
 327 are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
 328 the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
 329 characters are invariant, but so also are the C1 controls.
 330
 331 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
 332 use this option, that C<s> can't have embedded C<NUL> characters and has to
 333 have a terminating C<NUL> byte).
 334
 335 See also
 336 C<L</is_utf8_string>>,
 337 C<L</is_utf8_string_flags>>,
 338 C<L</is_utf8_string_loc>>,
 339 C<L</is_utf8_string_loc_flags>>,
 340 C<L</is_utf8_string_loclen>>,
 341 C<L</is_utf8_string_loclen_flags>>,
 342 C<L</is_utf8_fixed_width_buf_flags>>,
 343 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 344 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 345 C<L</is_strict_utf8_string>>,
 346 C<L</is_strict_utf8_string_loc>>,
 347 C<L</is_strict_utf8_string_loclen>>,
 348 C<L</is_c9strict_utf8_string>>,
 349 C<L</is_c9strict_utf8_string_loc>>,
 350 and
 351 C<L</is_c9strict_utf8_string_loclen>>.
 352
 353 =cut
 354
 355 */
 356
 357 #define is_utf8_invariant_string(s, len)                                    \
 358                                 is_utf8_invariant_string_loc(s, len, NULL)
 359
 360 /*
 361 =for apidoc is_utf8_invariant_string_loc
 362
 363 Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
 364 the first UTF-8 variant character in the C<ep> pointer; if all characters are
 365 UTF-8 invariant, this function does not change the contents of C<*ep>.
 366
 367 =cut
 368
 369 */
 370
 371 PERL_STATIC_INLINE bool
 372 S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
 373 {
 374     const U8* send;
 375     const U8* x = s;
 376
 377     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 378
 379     if (len == 0) {
 380         len = strlen((const char *)s);
 381     }
 382
 383     send = s + len;
 384
 385 #ifndef EBCDIC
 386
 387 /* This looks like 0x010101... */
 388 #define PERL_COUNT_MULTIPLIER   (~ (UINTMAX_C(0)) / 0xFF)
 389
 390 /* This looks like 0x808080... */
 391 #define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
 392 #define PERL_WORDSIZE            sizeof(PERL_COUNT_MULTIPLIER)
 393 #define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
 394
 395 /* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
 396  * or'ing together the lowest bits of 'x'.  Hopefully the final term gets
 397  * optimized out completely on a 32-bit system, and its mask gets optimized out
 398  * on a 64-bit system */
 399 #define PERL_IS_SUBWORD_ADDR(x) (1 & (       PTR2nat(x)                       \
 400                                       |   (  PTR2nat(x) >> 1)                 \
 401                                       | ( ( (PTR2nat(x)                       \
 402                                            & PERL_WORD_BOUNDARY_MASK) >> 2))))
 403
 404     /* Do the word-at-a-time iff there is at least one usable full word.  That
 405      * means that after advancing to a word boundary, there still is at least a
 406      * full word left.  The number of bytes needed to advance is 'wordsize -
 407      * offset' unless offset is 0. */
 408     if ((STRLEN) (send - x) >= PERL_WORDSIZE
 409
 410                             /* This term is wordsize if subword; 0 if not */
 411                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
 412
 413                             /* 'offset' */
 414                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
 415     {
 416
 417         /* Process per-byte until reach word boundary.  XXX This loop could be
 418          * eliminated if we knew that this platform had fast unaligned reads */
 419         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
 420             if (! UTF8_IS_INVARIANT(*x)) {
 421                 if (ep) {
 422                     *ep = x;
 423                 }
 424
 425                 return FALSE;
 426             }
 427             x++;
 428         }
 429
 430         /* Here, we know we have at least one full word to process.  Process
 431          * per-word as long as we have at least a full word left */
 432         do {
 433             if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK)  {
 434
 435                 /* Found a variant.  Just return if caller doesn't want its
 436                  * exact position */
 437                 if (! ep) {
 438                     return FALSE;
 439                 }
 440
 441 #if   BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    \
 442    || BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 443
 444                 *ep = x + _variant_byte_number(* (PERL_UINTMAX_T *) x);
 445                 assert(*ep >= s && *ep < send);
 446
 447                 return FALSE;
 448
 449 #else   /* If weird byte order, drop into next loop to do byte-at-a-time
 450            checks. */
 451
 452                 break;
 453 #endif
 454             }
 455
 456             x += PERL_WORDSIZE;
 457
 458         } while (x + PERL_WORDSIZE <= send);
 459     }
 460
 461 #endif
 462
 463     /* Process per-byte */
 464     while (x < send) {
 465         if (! UTF8_IS_INVARIANT(*x)) {
 466             if (ep) {
 467                 *ep = x;
 468             }
 469
 470             return FALSE;
 471         }
 472
 473         x++;
 474     }
 475
 476     return TRUE;
 477 }
 478
 479 #ifndef EBCDIC
 480
 481 PERL_STATIC_INLINE unsigned int
 482 S__variant_byte_number(PERL_UINTMAX_T word)
 483 {
 484
 485     /* This returns the position in a word (0..7) of the first variant byte in
 486      * it.  This is a helper function.  Note that there are no branches */
 487
 488     assert(word);
 489
 490     /* Get just the msb bits of each byte */
 491     word &= PERL_VARIANTS_WORD_MASK;
 492
 493 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678
 494
 495     /* Bytes are stored like
 496      *  Byte8 ... Byte2 Byte1
 497      *  63..56...15...8 7...0
 498      *
 499      *  Isolate the lsb;
 500      * https://stackoverflow.com/questions/757059/position-of-least-significant-bit-that-is-set
 501      *
 502      * The word will look this this, with a rightmost set bit in position 's':
 503      * ('x's are don't cares)
 504      *      s
 505      *  x..x100..0
 506      *  x..xx10..0      Right shift (rightmost 0 is shifted off)
 507      *  x..xx01..1      Subtract 1, turns all the trailing zeros into 1's and
 508      *                  the 1 just to their left into a 0; the remainder is
 509      *                  untouched
 510      *  0..0011..1      The xor with x..xx10..0 clears that remainder, sets
 511      *                  bottom to all 1
 512      *  0..0100..0      Add 1 to clear the word except for the bit in 's'
 513      *
 514      * Another method is to do 'word &= -word'; but it generates a compiler
 515      * message on some platforms about taking the negative of an unsigned */
 516
 517     word >>= 1;
 518     word = 1 + (word ^ (word - 1));
 519
 520 #  elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 521
 522     /* Bytes are stored like
 523      *  Byte1 Byte2  ... Byte8
 524      * 63..56 55..47 ... 7...0
 525      *
 526      * Isolate the msb; http://codeforces.com/blog/entry/10330
 527      *
 528      * Only the most significant set bit matters.  Or'ing word with its right
 529      * shift of 1 makes that bit and the next one to its right both 1.  Then
 530      * right shifting by 2 makes for 4 1-bits in a row. ...  We end with the
 531      * msb and all to the right being 1. */
 532     word |= word >>  1;
 533     word |= word >>  2;
 534     word |= word >>  4;
 535     word |= word >>  8;
 536     word |= word >> 16;
 537     word |= word >> 32;  /* This should get optimized out on 32-bit systems. */
 538
 539     /* Then subtracting the right shift by 1 clears all but the left-most of
 540      * the 1 bits, which is our desired result */
 541     word -= (word >> 1);
 542
 543 #  else
 544 #    error Unexpected byte order
 545 #  endif
 546
 547     /* Here 'word' has a single bit set, the  msb is of the first byte which
 548      * has it set.  Calculate that position in the word.  We can use this
 549      * specialized solution: https://stackoverflow.com/a/32339674/1626653,
 550      * assumes an 8-bit byte */
 551     word = (word >> 7) * (( 7ULL << 56) | (15ULL << 48) | (23ULL << 40)
 552                         | (31ULL << 32) | (39ULL << 24) | (47ULL << 16)
 553                         | (55ULL <<  8) | (63ULL <<  0));
 554     word >>= PERL_WORDSIZE * 7; /* >> by either 56 or 24 */
 555
 556     /* Here, word contains the position 7..63 of that bit.  Convert to 0..7 */
 557     word = ((word + 1) >> 3) - 1;
 558
 559 #  if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321
 560
 561     /* And invert the result */
 562     word = CHARBITS - word - 1;
 563
 564 #  endif
 565
 566     return (unsigned int) word;
 567 }
 568
 569 #endif /* ! EBCDIC */
 570 #if defined(PERL_CORE) || defined(PERL_EXT)
 571
 572 /*
 573 =for apidoc variant_under_utf8_count
 574
 575 This function looks at the sequence of bytes between C<s> and C<e>, which are
 576 assumed to be encoded in ASCII/Latin1, and returns how many of them would
 577 change should the string be translated into UTF-8.  Due to the nature of UTF-8,
 578 each of these would occupy two bytes instead of the single one in the input
 579 string.  Thus, this function returns the precise number of bytes the string
 580 would expand by when translated to UTF-8.
 581
 582 Unlike most of the other functions that have C<utf8> in their name, the input
 583 to this function is NOT a UTF-8-encoded string.  The function name is slightly
 584 I<odd> to emphasize this.
 585
 586 This function is internal to Perl because khw thinks that any XS code that
 587 would want this is probably operating too close to the internals.  Presenting a
 588 valid use case could change that.
 589
 590 See also
 591 C<L<perlapi/is_utf8_invariant_string>>
 592 and
 593 C<L<perlapi/is_utf8_invariant_string_loc>>,
 594
 595 =cut
 596
 597 */
 598
 599 PERL_STATIC_INLINE Size_t
 600 S_variant_under_utf8_count(const U8* const s, const U8* const e)
 601 {
 602     const U8* x = s;
 603     Size_t count = 0;
 604
 605     PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
 606
 607 #  ifndef EBCDIC
 608
 609     /* Test if the string is long enough to use word-at-a-time.  (Logic is the
 610      * same as for is_utf8_invariant_string()) */
 611     if ((STRLEN) (e - x) >= PERL_WORDSIZE
 612                           + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
 613                           - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
 614     {
 615
 616         /* Process per-byte until reach word boundary.  XXX This loop could be
 617          * eliminated if we knew that this platform had fast unaligned reads */
 618         while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
 619             count += ! UTF8_IS_INVARIANT(*x++);
 620         }
 621
 622         /* Process per-word as long as we have at least a full word left */
 623         do {    /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
 624                    explanation of how this works */
 625             count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
 626                       * PERL_COUNT_MULTIPLIER)
 627                     >> ((PERL_WORDSIZE - 1) * CHARBITS);
 628             x += PERL_WORDSIZE;
 629         } while (x + PERL_WORDSIZE <= e);
 630     }
 631
 632 #  endif
 633
 634     /* Process per-byte */
 635     while (x < e) {
 636         if (! UTF8_IS_INVARIANT(*x)) {
 637             count++;
 638         }
 639
 640         x++;
 641     }
 642
 643     return count;
 644 }
 645
 646 #endif
 647
 648 #ifndef PERL_IN_REGEXEC_C   /* Keep  these around for that file */
 649 #  undef PERL_WORDSIZE
 650 #  undef PERL_COUNT_MULTIPLIER
 651 #  undef PERL_WORD_BOUNDARY_MASK
 652 #  undef PERL_VARIANTS_WORD_MASK
 653 #endif
 654
 655 /*
 656 =for apidoc is_utf8_string
 657
 658 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 659 Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
 660 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
 661 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 662 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 663
 664 This function considers Perl's extended UTF-8 to be valid.  That means that
 665 code points above Unicode, surrogates, and non-character code points are
 666 considered valid by this function.  Use C<L</is_strict_utf8_string>>,
 667 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
 668 code points are considered valid.
 669
 670 See also
 671 C<L</is_utf8_invariant_string>>,
 672 C<L</is_utf8_invariant_string_loc>>,
 673 C<L</is_utf8_string_loc>>,
 674 C<L</is_utf8_string_loclen>>,
 675 C<L</is_utf8_fixed_width_buf_flags>>,
 676 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 677 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 678
 679 =cut
 680 */
 681
 682 #define is_utf8_string(s, len)  is_utf8_string_loclen(s, len, NULL, NULL)
 683
 684 #if defined(PERL_CORE) || defined (PERL_EXT)
 685
 686 /*
 687 =for apidoc is_utf8_non_invariant_string
 688
 689 Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
 690 C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
 691 UTF-8; otherwise returns FALSE.
 692
 693 A TRUE return means that at least one code point represented by the sequence
 694 either is a wide character not representable as a single byte, or the
 695 representation differs depending on whether the sequence is encoded in UTF-8 or
 696 not.
 697
 698 See also
 699 C<L<perlapi/is_utf8_invariant_string>>,
 700 C<L<perlapi/is_utf8_string>>
 701
 702 =cut
 703
 704 This is commonly used to determine if a SV's UTF-8 flag should be turned on.
 705 It generally needn't be if its string is entirely UTF-8 invariant, and it
 706 shouldn't be if it otherwise contains invalid UTF-8.
 707
 708 It is an internal function because khw thinks that XS code shouldn't be working
 709 at this low a level.  A valid use case could change that.
 710
 711 */
 712
 713 PERL_STATIC_INLINE bool
 714 S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
 715 {
 716     const U8 * first_variant;
 717
 718     PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
 719
 720     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 721         return FALSE;
 722     }
 723
 724     return is_utf8_string(first_variant, len - (first_variant - s));
 725 }
 726
 727 #endif
 728
 729 /*
 730 =for apidoc is_strict_utf8_string
 731
 732 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 733 UTF-8-encoded string that is fully interchangeable by any application using
 734 Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
 735 calculated using C<strlen(s)> (which means if you use this option, that C<s>
 736 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 737 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 738
 739 This function returns FALSE for strings containing any
 740 code points above the Unicode max of 0x10FFFF, surrogate code points, or
 741 non-character code points.
 742
 743 See also
 744 C<L</is_utf8_invariant_string>>,
 745 C<L</is_utf8_invariant_string_loc>>,
 746 C<L</is_utf8_string>>,
 747 C<L</is_utf8_string_flags>>,
 748 C<L</is_utf8_string_loc>>,
 749 C<L</is_utf8_string_loc_flags>>,
 750 C<L</is_utf8_string_loclen>>,
 751 C<L</is_utf8_string_loclen_flags>>,
 752 C<L</is_utf8_fixed_width_buf_flags>>,
 753 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 754 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 755 C<L</is_strict_utf8_string_loc>>,
 756 C<L</is_strict_utf8_string_loclen>>,
 757 C<L</is_c9strict_utf8_string>>,
 758 C<L</is_c9strict_utf8_string_loc>>,
 759 and
 760 C<L</is_c9strict_utf8_string_loclen>>.
 761
 762 =cut
 763 */
 764
 765 #define is_strict_utf8_string(s, len)  is_strict_utf8_string_loclen(s, len, NULL, NULL)
 766
 767 /*
 768 =for apidoc is_c9strict_utf8_string
 769
 770 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 771 UTF-8-encoded string that conforms to
 772 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
 773 otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
 774 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
 775 C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
 776 characters being ASCII constitute 'a valid UTF-8 string'.
 777
 778 This function returns FALSE for strings containing any code points above the
 779 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
 780 code points per
 781 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
 782
 783 See also
 784 C<L</is_utf8_invariant_string>>,
 785 C<L</is_utf8_invariant_string_loc>>,
 786 C<L</is_utf8_string>>,
 787 C<L</is_utf8_string_flags>>,
 788 C<L</is_utf8_string_loc>>,
 789 C<L</is_utf8_string_loc_flags>>,
 790 C<L</is_utf8_string_loclen>>,
 791 C<L</is_utf8_string_loclen_flags>>,
 792 C<L</is_utf8_fixed_width_buf_flags>>,
 793 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 794 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 795 C<L</is_strict_utf8_string>>,
 796 C<L</is_strict_utf8_string_loc>>,
 797 C<L</is_strict_utf8_string_loclen>>,
 798 C<L</is_c9strict_utf8_string_loc>>,
 799 and
 800 C<L</is_c9strict_utf8_string_loclen>>.
 801
 802 =cut
 803 */
 804
 805 #define is_c9strict_utf8_string(s, len)  is_c9strict_utf8_string_loclen(s, len, NULL, 0)
 806
 807 /*
 808 =for apidoc is_utf8_string_flags
 809
 810 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 811 UTF-8 string, subject to the restrictions imposed by C<flags>;
 812 returns FALSE otherwise.  If C<len> is 0, it will be calculated
 813 using C<strlen(s)> (which means if you use this option, that C<s> can't have
 814 embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
 815 that all characters being ASCII constitute 'a valid UTF-8 string'.
 816
 817 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
 818 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
 819 as C<L</is_strict_utf8_string>>; and if C<flags> is
 820 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
 821 C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
 822 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
 823 C<L</utf8n_to_uvchr>>, with the same meanings.
 824
 825 See also
 826 C<L</is_utf8_invariant_string>>,
 827 C<L</is_utf8_invariant_string_loc>>,
 828 C<L</is_utf8_string>>,
 829 C<L</is_utf8_string_loc>>,
 830 C<L</is_utf8_string_loc_flags>>,
 831 C<L</is_utf8_string_loclen>>,
 832 C<L</is_utf8_string_loclen_flags>>,
 833 C<L</is_utf8_fixed_width_buf_flags>>,
 834 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 835 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 836 C<L</is_strict_utf8_string>>,
 837 C<L</is_strict_utf8_string_loc>>,
 838 C<L</is_strict_utf8_string_loclen>>,
 839 C<L</is_c9strict_utf8_string>>,
 840 C<L</is_c9strict_utf8_string_loc>>,
 841 and
 842 C<L</is_c9strict_utf8_string_loclen>>.
 843
 844 =cut
 845 */
 846
 847 PERL_STATIC_INLINE bool
 848 S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
 849 {
 850     const U8 * first_variant;
 851
 852     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
 853     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 854                           |UTF8_DISALLOW_PERL_EXTENDED)));
 855
 856     if (len == 0) {
 857         len = strlen((const char *)s);
 858     }
 859
 860     if (flags == 0) {
 861         return is_utf8_string(s, len);
 862     }
 863
 864     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 865                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 866     {
 867         return is_strict_utf8_string(s, len);
 868     }
 869
 870     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 871                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 872     {
 873         return is_c9strict_utf8_string(s, len);
 874     }
 875
 876     if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
 877         const U8* const send = s + len;
 878         const U8* x = first_variant;
 879
 880         while (x < send) {
 881             STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 882             if (UNLIKELY(! cur_len)) {
 883                 return FALSE;
 884             }
 885             x += cur_len;
 886         }
 887     }
 888
 889     return TRUE;
 890 }
 891
 892 /*
 893
 894 =for apidoc is_utf8_string_loc
 895
 896 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 897 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 898 "utf8ness success") in the C<ep> pointer.
 899
 900 See also C<L</is_utf8_string_loclen>>.
 901
 902 =cut
 903 */
 904
 905 #define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
 906
 907 /*
 908
 909 =for apidoc is_utf8_string_loclen
 910
 911 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 912 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 913 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 914 encoded characters in the C<el> pointer.
 915
 916 See also C<L</is_utf8_string_loc>>.
 917
 918 =cut
 919 */
 920
 921 PERL_STATIC_INLINE bool
 922 Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
 923 {
 924     const U8 * first_variant;
 925
 926     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
 927
 928     if (len == 0) {
 929         len = strlen((const char *) s);
 930     }
 931
 932     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
 933         if (el)
 934             *el = len;
 935
 936         if (ep) {
 937             *ep = s + len;
 938         }
 939
 940         return TRUE;
 941     }
 942
 943     {
 944         const U8* const send = s + len;
 945         const U8* x = first_variant;
 946         STRLEN outlen = first_variant - s;
 947
 948         while (x < send) {
 949             const STRLEN cur_len = isUTF8_CHAR(x, send);
 950             if (UNLIKELY(! cur_len)) {
 951                 break;
 952             }
 953             x += cur_len;
 954             outlen++;
 955         }
 956
 957         if (el)
 958             *el = outlen;
 959
 960         if (ep) {
 961             *ep = x;
 962         }
 963
 964         return (x == send);
 965     }
 966 }
 967
 968 /*
 969
 970 =for apidoc is_strict_utf8_string_loc
 971
 972 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 973 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 974 "utf8ness success") in the C<ep> pointer.
 975
 976 See also C<L</is_strict_utf8_string_loclen>>.
 977
 978 =cut
 979 */
 980
 981 #define is_strict_utf8_string_loc(s, len, ep)                               \
 982                                 is_strict_utf8_string_loclen(s, len, ep, 0)
 983
 984 /*
 985
 986 =for apidoc is_strict_utf8_string_loclen
 987
 988 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 989 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 990 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 991 encoded characters in the C<el> pointer.
 992
 993 See also C<L</is_strict_utf8_string_loc>>.
 994
 995 =cut
 996 */
 997
 998 PERL_STATIC_INLINE bool
 999 S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
1000 {
1001     const U8 * first_variant;
1002
1003     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
1004
1005     if (len == 0) {
1006         len = strlen((const char *) s);
1007     }
1008
1009     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1010         if (el)
1011             *el = len;
1012
1013         if (ep) {
1014             *ep = s + len;
1015         }
1016
1017         return TRUE;
1018     }
1019
1020     {
1021         const U8* const send = s + len;
1022         const U8* x = first_variant;
1023         STRLEN outlen = first_variant - s;
1024
1025         while (x < send) {
1026             const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
1027             if (UNLIKELY(! cur_len)) {
1028                 break;
1029             }
1030             x += cur_len;
1031             outlen++;
1032         }
1033
1034         if (el)
1035             *el = outlen;
1036
1037         if (ep) {
1038             *ep = x;
1039         }
1040
1041         return (x == send);
1042     }
1043 }
1044
1045 /*
1046
1047 =for apidoc is_c9strict_utf8_string_loc
1048
1049 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1050 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1051 "utf8ness success") in the C<ep> pointer.
1052
1053 See also C<L</is_c9strict_utf8_string_loclen>>.
1054
1055 =cut
1056 */
1057
1058 #define is_c9strict_utf8_string_loc(s, len, ep)                             \
1059                             is_c9strict_utf8_string_loclen(s, len, ep, 0)
1060
1061 /*
1062
1063 =for apidoc is_c9strict_utf8_string_loclen
1064
1065 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
1066 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1067 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
1068 characters in the C<el> pointer.
1069
1070 See also C<L</is_c9strict_utf8_string_loc>>.
1071
1072 =cut
1073 */
1074
1075 PERL_STATIC_INLINE bool
1076 S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
1077 {
1078     const U8 * first_variant;
1079
1080     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
1081
1082     if (len == 0) {
1083         len = strlen((const char *) s);
1084     }
1085
1086     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1087         if (el)
1088             *el = len;
1089
1090         if (ep) {
1091             *ep = s + len;
1092         }
1093
1094         return TRUE;
1095     }
1096
1097     {
1098         const U8* const send = s + len;
1099         const U8* x = first_variant;
1100         STRLEN outlen = first_variant - s;
1101
1102         while (x < send) {
1103             const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
1104             if (UNLIKELY(! cur_len)) {
1105                 break;
1106             }
1107             x += cur_len;
1108             outlen++;
1109         }
1110
1111         if (el)
1112             *el = outlen;
1113
1114         if (ep) {
1115             *ep = x;
1116         }
1117
1118         return (x == send);
1119     }
1120 }
1121
1122 /*
1123
1124 =for apidoc is_utf8_string_loc_flags
1125
1126 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1127 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1128 "utf8ness success") in the C<ep> pointer.
1129
1130 See also C<L</is_utf8_string_loclen_flags>>.
1131
1132 =cut
1133 */
1134
1135 #define is_utf8_string_loc_flags(s, len, ep, flags)                         \
1136                         is_utf8_string_loclen_flags(s, len, ep, 0, flags)
1137
1138
1139 /* The above 3 actual functions could have been moved into the more general one
1140  * just below, and made #defines that call it with the right 'flags'.  They are
1141  * currently kept separate to increase their chances of getting inlined */
1142
1143 /*
1144
1145 =for apidoc is_utf8_string_loclen_flags
1146
1147 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
1148 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
1149 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
1150 encoded characters in the C<el> pointer.
1151
1152 See also C<L</is_utf8_string_loc_flags>>.
1153
1154 =cut
1155 */
1156
1157 PERL_STATIC_INLINE bool
1158 S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
1159 {
1160     const U8 * first_variant;
1161
1162     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
1163     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1164                           |UTF8_DISALLOW_PERL_EXTENDED)));
1165
1166     if (len == 0) {
1167         len = strlen((const char *) s);
1168     }
1169
1170     if (flags == 0) {
1171         return is_utf8_string_loclen(s, len, ep, el);
1172     }
1173
1174     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1175                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
1176     {
1177         return is_strict_utf8_string_loclen(s, len, ep, el);
1178     }
1179
1180     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
1181                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1182     {
1183         return is_c9strict_utf8_string_loclen(s, len, ep, el);
1184     }
1185
1186     if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1187         if (el)
1188             *el = len;
1189
1190         if (ep) {
1191             *ep = s + len;
1192         }
1193
1194         return TRUE;
1195     }
1196
1197     {
1198         const U8* send = s + len;
1199         const U8* x = first_variant;
1200         STRLEN outlen = first_variant - s;
1201
1202         while (x < send) {
1203             const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1204             if (UNLIKELY(! cur_len)) {
1205                 break;
1206             }
1207             x += cur_len;
1208             outlen++;
1209         }
1210
1211         if (el)
1212             *el = outlen;
1213
1214         if (ep) {
1215             *ep = x;
1216         }
1217
1218         return (x == send);
1219     }
1220 }
1221
1222 /*
1223 =for apidoc utf8_distance
1224
1225 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1226 and C<b>.
1227
1228 WARNING: use only if you *know* that the pointers point inside the
1229 same UTF-8 buffer.
1230
1231 =cut
1232 */
1233
1234 PERL_STATIC_INLINE IV
1235 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1236 {
1237     PERL_ARGS_ASSERT_UTF8_DISTANCE;
1238
1239     return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1240 }
1241
1242 /*
1243 =for apidoc utf8_hop
1244
1245 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1246 forward or backward.
1247
1248 WARNING: do not use the following unless you *know* C<off> is within
1249 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1250 on the first byte of character or just after the last byte of a character.
1251
1252 =cut
1253 */
1254
1255 PERL_STATIC_INLINE U8 *
1256 Perl_utf8_hop(const U8 *s, SSize_t off)
1257 {
1258     PERL_ARGS_ASSERT_UTF8_HOP;
1259
1260     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1261      * the bitops (especially ~) can create illegal UTF-8.
1262      * In other words: in Perl UTF-8 is not just for Unicode. */
1263
1264     if (off >= 0) {
1265         while (off--)
1266             s += UTF8SKIP(s);
1267     }
1268     else {
1269         while (off++) {
1270             s--;
1271             while (UTF8_IS_CONTINUATION(*s))
1272                 s--;
1273         }
1274     }
1275     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1276     return (U8 *)s;
1277     GCC_DIAG_RESTORE_STMT;
1278 }
1279
1280 /*
1281 =for apidoc utf8_hop_forward
1282
1283 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1284 forward.
1285
1286 C<off> must be non-negative.
1287
1288 C<s> must be before or equal to C<end>.
1289
1290 When moving forward it will not move beyond C<end>.
1291
1292 Will not exceed this limit even if the string is not valid "UTF-8".
1293
1294 =cut
1295 */
1296
1297 PERL_STATIC_INLINE U8 *
1298 Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1299 {
1300     PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1301
1302     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1303      * the bitops (especially ~) can create illegal UTF-8.
1304      * In other words: in Perl UTF-8 is not just for Unicode. */
1305
1306     assert(s <= end);
1307     assert(off >= 0);
1308
1309     while (off--) {
1310         STRLEN skip = UTF8SKIP(s);
1311         if ((STRLEN)(end - s) <= skip) {
1312             GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1313             return (U8 *)end;
1314             GCC_DIAG_RESTORE_STMT;
1315         }
1316         s += skip;
1317     }
1318
1319     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1320     return (U8 *)s;
1321     GCC_DIAG_RESTORE_STMT;
1322 }
1323
1324 /*
1325 =for apidoc utf8_hop_back
1326
1327 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1328 backward.
1329
1330 C<off> must be non-positive.
1331
1332 C<s> must be after or equal to C<start>.
1333
1334 When moving backward it will not move before C<start>.
1335
1336 Will not exceed this limit even if the string is not valid "UTF-8".
1337
1338 =cut
1339 */
1340
1341 PERL_STATIC_INLINE U8 *
1342 Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1343 {
1344     PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1345
1346     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1347      * the bitops (especially ~) can create illegal UTF-8.
1348      * In other words: in Perl UTF-8 is not just for Unicode. */
1349
1350     assert(start <= s);
1351     assert(off <= 0);
1352
1353     while (off++ && s > start) {
1354         s--;
1355         while (UTF8_IS_CONTINUATION(*s) && s > start)
1356             s--;
1357     }
1358
1359     GCC_DIAG_IGNORE_STMT(-Wcast-qual);
1360     return (U8 *)s;
1361     GCC_DIAG_RESTORE_STMT;
1362 }
1363
1364 /*
1365 =for apidoc utf8_hop_safe
1366
1367 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1368 either forward or backward.
1369
1370 When moving backward it will not move before C<start>.
1371
1372 When moving forward it will not move beyond C<end>.
1373
1374 Will not exceed those limits even if the string is not valid "UTF-8".
1375
1376 =cut
1377 */
1378
1379 PERL_STATIC_INLINE U8 *
1380 Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1381 {
1382     PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1383
1384     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1385      * the bitops (especially ~) can create illegal UTF-8.
1386      * In other words: in Perl UTF-8 is not just for Unicode. */
1387
1388     assert(start <= s && s <= end);
1389
1390     if (off >= 0) {
1391         return utf8_hop_forward(s, off, end);
1392     }
1393     else {
1394         return utf8_hop_back(s, off, start);
1395     }
1396 }
1397
1398 /*
1399
1400 =for apidoc is_utf8_valid_partial_char
1401
1402 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1403 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1404 points.  Otherwise, it returns 1 if there exists at least one non-empty
1405 sequence of bytes that when appended to sequence C<s>, starting at position
1406 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1407 otherwise returns 0.
1408
1409 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1410 point.
1411
1412 This is useful when a fixed-length buffer is being tested for being well-formed
1413 UTF-8, but the final few bytes in it don't comprise a full character; that is,
1414 it is split somewhere in the middle of the final code point's UTF-8
1415 representation.  (Presumably when the buffer is refreshed with the next chunk
1416 of data, the new first bytes will complete the partial code point.)   This
1417 function is used to verify that the final bytes in the current buffer are in
1418 fact the legal beginning of some code point, so that if they aren't, the
1419 failure can be signalled without having to wait for the next read.
1420
1421 =cut
1422 */
1423 #define is_utf8_valid_partial_char(s, e)                                    \
1424                                 is_utf8_valid_partial_char_flags(s, e, 0)
1425
1426 /*
1427
1428 =for apidoc is_utf8_valid_partial_char_flags
1429
1430 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1431 or not the input is a valid UTF-8 encoded partial character, but it takes an
1432 extra parameter, C<flags>, which can further restrict which code points are
1433 considered valid.
1434
1435 If C<flags> is 0, this behaves identically to
1436 C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
1437 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
1438 there is any sequence of bytes that can complete the input partial character in
1439 such a way that a non-prohibited character is formed, the function returns
1440 TRUE; otherwise FALSE.  Non character code points cannot be determined based on
1441 partial character input.  But many  of the other possible excluded types can be
1442 determined from just the first one or two bytes.
1443
1444 =cut
1445  */
1446
1447 PERL_STATIC_INLINE bool
1448 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1449 {
1450     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1451
1452     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1453                           |UTF8_DISALLOW_PERL_EXTENDED)));
1454
1455     if (s >= e || s + UTF8SKIP(s) <= e) {
1456         return FALSE;
1457     }
1458
1459     return cBOOL(_is_utf8_char_helper(s, e, flags));
1460 }
1461
1462 /*
1463
1464 =for apidoc is_utf8_fixed_width_buf_flags
1465
1466 Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1467 is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1468 otherwise it returns FALSE.
1469
1470 If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1471 without restriction.  If the final few bytes of the buffer do not form a
1472 complete code point, this will return TRUE anyway, provided that
1473 C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1474
1475 If C<flags> in non-zero, it can be any combination of the
1476 C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1477 same meanings.
1478
1479 This function differs from C<L</is_utf8_string_flags>> only in that the latter
1480 returns FALSE if the final few bytes of the string don't form a complete code
1481 point.
1482
1483 =cut
1484  */
1485 #define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
1486                 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1487
1488 /*
1489
1490 =for apidoc is_utf8_fixed_width_buf_loc_flags
1491
1492 Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1493 failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
1494 to the beginning of any partial character at the end of the buffer; if there is
1495 no partial character C<*ep> will contain C<s>+C<len>.
1496
1497 See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1498
1499 =cut
1500 */
1501
1502 #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
1503                 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1504
1505 /*
1506
1507 =for apidoc is_utf8_fixed_width_buf_loclen_flags
1508
1509 Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1510 complete, valid characters found in the C<el> pointer.
1511
1512 =cut
1513 */
1514
1515 PERL_STATIC_INLINE bool
1516 S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1517                                        STRLEN len,
1518                                        const U8 **ep,
1519                                        STRLEN *el,
1520                                        const U32 flags)
1521 {
1522     const U8 * maybe_partial;
1523
1524     PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1525
1526     if (! ep) {
1527         ep  = &maybe_partial;
1528     }
1529
1530     /* If it's entirely valid, return that; otherwise see if the only error is
1531      * that the final few bytes are for a partial character */
1532     return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
1533            || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1534 }
1535
1536 /* ------------------------------- perl.h ----------------------------- */
1537
1538 /*
1539 =head1 Miscellaneous Functions
1540
1541 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1542
1543 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1544 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1545
1546 Return TRUE if the name is safe.
1547
1548 Used by the C<IS_SAFE_SYSCALL()> macro.
1549
1550 =cut
1551 */
1552
1553 PERL_STATIC_INLINE bool
1554 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1555     /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1556      * perl itself uses xce*() functions which accept 8-bit strings.
1557      */
1558
1559     PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1560
1561     if (len > 1) {
1562         char *null_at;
1563         if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1564                 SETERRNO(ENOENT, LIB_INVARG);
1565                 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1566                                    "Invalid \\0 character in %s for %s: %s\\0%s",
1567                                    what, op_name, pv, null_at+1);
1568                 return FALSE;
1569         }
1570     }
1571
1572     return TRUE;
1573 }
1574
1575 /*
1576
1577 Return true if the supplied filename has a newline character
1578 immediately before the first (hopefully only) NUL.
1579
1580 My original look at this incorrectly used the len from SvPV(), but
1581 that's incorrect, since we allow for a NUL in pv[len-1].
1582
1583 So instead, strlen() and work from there.
1584
1585 This allow for the user reading a filename, forgetting to chomp it,
1586 then calling:
1587
1588   open my $foo, "$file\0";
1589
1590 */
1591
1592 #ifdef PERL_CORE
1593
1594 PERL_STATIC_INLINE bool
1595 S_should_warn_nl(const char *pv) {
1596     STRLEN len;
1597
1598     PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1599
1600     len = strlen(pv);
1601
1602     return len > 0 && pv[len-1] == '\n';
1603 }
1604
1605 #endif
1606
1607 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1608
1609 #define MAX_CHARSET_NAME_LENGTH 2
1610
1611 PERL_STATIC_INLINE const char *
1612 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1613 {
1614     /* Returns a string that corresponds to the name of the regex character set
1615      * given by 'flags', and *lenp is set the length of that string, which
1616      * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1617
1618     *lenp = 1;
1619     switch (get_regex_charset(flags)) {
1620         case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1621         case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
1622         case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1623         case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1624         case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1625             *lenp = 2;
1626             return ASCII_MORE_RESTRICT_PAT_MODS;
1627     }
1628     /* The NOT_REACHED; hides an assert() which has a rather complex
1629      * definition in perl.h. */
1630     NOT_REACHED; /* NOTREACHED */
1631     return "?";     /* Unknown */
1632 }
1633
1634 /*
1635
1636 Return false if any get magic is on the SV other than taint magic.
1637
1638 */
1639
1640 PERL_STATIC_INLINE bool
1641 S_sv_only_taint_gmagic(SV *sv) {
1642     MAGIC *mg = SvMAGIC(sv);
1643
1644     PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1645
1646     while (mg) {
1647         if (mg->mg_type != PERL_MAGIC_taint
1648             && !(mg->mg_flags & MGf_GSKIP)
1649             && mg->mg_virtual->svt_get) {
1650             return FALSE;
1651         }
1652         mg = mg->mg_moremagic;
1653     }
1654
1655     return TRUE;
1656 }
1657
1658 /* ------------------ cop.h ------------------------------------------- */
1659
1660
1661 /* Enter a block. Push a new base context and return its address. */
1662
1663 PERL_STATIC_INLINE PERL_CONTEXT *
1664 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1665 {
1666     PERL_CONTEXT * cx;
1667
1668     PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1669
1670     CXINC;
1671     cx = CX_CUR();
1672     cx->cx_type        = type;
1673     cx->blk_gimme      = gimme;
1674     cx->blk_oldsaveix  = saveix;
1675     cx->blk_oldsp      = (I32)(sp - PL_stack_base);
1676     cx->blk_oldcop     = PL_curcop;
1677     cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
1678     cx->blk_oldscopesp = PL_scopestack_ix;
1679     cx->blk_oldpm      = PL_curpm;
1680     cx->blk_old_tmpsfloor = PL_tmps_floor;
1681
1682     PL_tmps_floor        = PL_tmps_ix;
1683     CX_DEBUG(cx, "PUSH");
1684     return cx;
1685 }
1686
1687
1688 /* Exit a block (RETURN and LAST). */
1689
1690 PERL_STATIC_INLINE void
1691 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1692 {
1693     PERL_ARGS_ASSERT_CX_POPBLOCK;
1694
1695     CX_DEBUG(cx, "POP");
1696     /* these 3 are common to cx_popblock and cx_topblock */
1697     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1698     PL_scopestack_ix = cx->blk_oldscopesp;
1699     PL_curpm         = cx->blk_oldpm;
1700
1701     /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1702      * and leaves a CX entry lying around for repeated use, so
1703      * skip for multicall */                  \
1704     assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1705             || PL_savestack_ix == cx->blk_oldsaveix);
1706     PL_curcop     = cx->blk_oldcop;
1707     PL_tmps_floor = cx->blk_old_tmpsfloor;
1708 }
1709
1710 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1711  * Whereas cx_popblock() restores the state to the point just before
1712  * cx_pushblock() was called,  cx_topblock() restores it to the point just
1713  * *after* cx_pushblock() was called. */
1714
1715 PERL_STATIC_INLINE void
1716 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1717 {
1718     PERL_ARGS_ASSERT_CX_TOPBLOCK;
1719
1720     CX_DEBUG(cx, "TOP");
1721     /* these 3 are common to cx_popblock and cx_topblock */
1722     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1723     PL_scopestack_ix = cx->blk_oldscopesp;
1724     PL_curpm         = cx->blk_oldpm;
1725
1726     PL_stack_sp      = PL_stack_base + cx->blk_oldsp;
1727 }
1728
1729
1730 PERL_STATIC_INLINE void
1731 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1732 {
1733     U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1734
1735     PERL_ARGS_ASSERT_CX_PUSHSUB;
1736
1737     PERL_DTRACE_PROBE_ENTRY(cv);
1738     cx->blk_sub.cv = cv;
1739     cx->blk_sub.olddepth = CvDEPTH(cv);
1740     cx->blk_sub.prevcomppad = PL_comppad;
1741     cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1742     cx->blk_sub.retop = retop;
1743     SvREFCNT_inc_simple_void_NN(cv);
1744     cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1745 }
1746
1747
1748 /* subsets of cx_popsub() */
1749
1750 PERL_STATIC_INLINE void
1751 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1752 {
1753     CV *cv;
1754
1755     PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1756     assert(CxTYPE(cx) == CXt_SUB);
1757
1758     PL_comppad = cx->blk_sub.prevcomppad;
1759     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1760     cv = cx->blk_sub.cv;
1761     CvDEPTH(cv) = cx->blk_sub.olddepth;
1762     cx->blk_sub.cv = NULL;
1763     SvREFCNT_dec(cv);
1764 }
1765
1766
1767 /* handle the @_ part of leaving a sub */
1768
1769 PERL_STATIC_INLINE void
1770 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1771 {
1772     AV *av;
1773
1774     PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1775     assert(CxTYPE(cx) == CXt_SUB);
1776     assert(AvARRAY(MUTABLE_AV(
1777         PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1778                 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1779
1780     CX_POP_SAVEARRAY(cx);
1781     av = MUTABLE_AV(PAD_SVl(0));
1782     if (UNLIKELY(AvREAL(av)))
1783         /* abandon @_ if it got reified */
1784         clear_defarray(av, 0);
1785     else {
1786         CLEAR_ARGARRAY(av);
1787     }
1788 }
1789
1790
1791 PERL_STATIC_INLINE void
1792 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1793 {
1794     PERL_ARGS_ASSERT_CX_POPSUB;
1795     assert(CxTYPE(cx) == CXt_SUB);
1796
1797     PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1798
1799     if (CxHASARGS(cx))
1800         cx_popsub_args(cx);
1801     cx_popsub_common(cx);
1802 }
1803
1804
1805 PERL_STATIC_INLINE void
1806 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1807 {
1808     PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1809
1810     cx->blk_format.cv          = cv;
1811     cx->blk_format.retop       = retop;
1812     cx->blk_format.gv          = gv;
1813     cx->blk_format.dfoutgv     = PL_defoutgv;
1814     cx->blk_format.prevcomppad = PL_comppad;
1815     cx->blk_u16                = 0;
1816
1817     SvREFCNT_inc_simple_void_NN(cv);
1818     CvDEPTH(cv)++;
1819     SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1820 }
1821
1822
1823 PERL_STATIC_INLINE void
1824 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1825 {
1826     CV *cv;
1827     GV *dfout;
1828
1829     PERL_ARGS_ASSERT_CX_POPFORMAT;
1830     assert(CxTYPE(cx) == CXt_FORMAT);
1831
1832     dfout = cx->blk_format.dfoutgv;
1833     setdefout(dfout);
1834     cx->blk_format.dfoutgv = NULL;
1835     SvREFCNT_dec_NN(dfout);
1836
1837     PL_comppad = cx->blk_format.prevcomppad;
1838     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1839     cv = cx->blk_format.cv;
1840     cx->blk_format.cv = NULL;
1841     --CvDEPTH(cv);
1842     SvREFCNT_dec_NN(cv);
1843 }
1844
1845
1846 PERL_STATIC_INLINE void
1847 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1848 {
1849     PERL_ARGS_ASSERT_CX_PUSHEVAL;
1850
1851     cx->blk_eval.retop         = retop;
1852     cx->blk_eval.old_namesv    = namesv;
1853     cx->blk_eval.old_eval_root = PL_eval_root;
1854     cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
1855     cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
1856     cx->blk_eval.cur_top_env   = PL_top_env;
1857
1858     assert(!(PL_in_eval     & ~ 0x3F));
1859     assert(!(PL_op->op_type & ~0x1FF));
1860     cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1861 }
1862
1863
1864 PERL_STATIC_INLINE void
1865 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1866 {
1867     SV *sv;
1868
1869     PERL_ARGS_ASSERT_CX_POPEVAL;
1870     assert(CxTYPE(cx) == CXt_EVAL);
1871
1872     PL_in_eval = CxOLD_IN_EVAL(cx);
1873     assert(!(PL_in_eval & 0xc0));
1874     PL_eval_root = cx->blk_eval.old_eval_root;
1875     sv = cx->blk_eval.cur_text;
1876     if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1877         cx->blk_eval.cur_text = NULL;
1878         SvREFCNT_dec_NN(sv);
1879     }
1880
1881     sv = cx->blk_eval.old_namesv;
1882     if (sv) {
1883         cx->blk_eval.old_namesv = NULL;
1884         SvREFCNT_dec_NN(sv);
1885     }
1886 }
1887
1888
1889 /* push a plain loop, i.e.
1890  *     { block }
1891  *     while (cond) { block }
1892  *     for (init;cond;continue) { block }
1893  * This loop can be last/redo'ed etc.
1894  */
1895
1896 PERL_STATIC_INLINE void
1897 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1898 {
1899     PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1900     cx->blk_loop.my_op = cLOOP;
1901 }
1902
1903
1904 /* push a true for loop, i.e.
1905  *     for var (list) { block }
1906  */
1907
1908 PERL_STATIC_INLINE void
1909 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1910 {
1911     PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1912
1913     /* this one line is common with cx_pushloop_plain */
1914     cx->blk_loop.my_op = cLOOP;
1915
1916     cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1917     cx->blk_loop.itersave      = itersave;
1918 #ifdef USE_ITHREADS
1919     cx->blk_loop.oldcomppad = PL_comppad;
1920 #endif
1921 }
1922
1923
1924 /* pop all loop types, including plain */
1925
1926 PERL_STATIC_INLINE void
1927 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1928 {
1929     PERL_ARGS_ASSERT_CX_POPLOOP;
1930
1931     assert(CxTYPE_is_LOOP(cx));
1932     if (  CxTYPE(cx) == CXt_LOOP_ARY
1933        || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1934     {
1935         /* Free ary or cur. This assumes that state_u.ary.ary
1936          * aligns with state_u.lazysv.cur. See cx_dup() */
1937         SV *sv = cx->blk_loop.state_u.lazysv.cur;
1938         cx->blk_loop.state_u.lazysv.cur = NULL;
1939         SvREFCNT_dec_NN(sv);
1940         if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1941             sv = cx->blk_loop.state_u.lazysv.end;
1942             cx->blk_loop.state_u.lazysv.end = NULL;
1943             SvREFCNT_dec_NN(sv);
1944         }
1945     }
1946     if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1947         SV *cursv;
1948         SV **svp = (cx)->blk_loop.itervar_u.svp;
1949         if ((cx->cx_type & CXp_FOR_GV))
1950             svp = &GvSV((GV*)svp);
1951         cursv = *svp;
1952         *svp = cx->blk_loop.itersave;
1953         cx->blk_loop.itersave = NULL;
1954         SvREFCNT_dec(cursv);
1955     }
1956 }
1957
1958
1959 PERL_STATIC_INLINE void
1960 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1961 {
1962     PERL_ARGS_ASSERT_CX_PUSHWHEN;
1963
1964     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1965 }
1966
1967
1968 PERL_STATIC_INLINE void
1969 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1970 {
1971     PERL_ARGS_ASSERT_CX_POPWHEN;
1972     assert(CxTYPE(cx) == CXt_WHEN);
1973
1974     PERL_UNUSED_ARG(cx);
1975     PERL_UNUSED_CONTEXT;
1976     /* currently NOOP */
1977 }
1978
1979
1980 PERL_STATIC_INLINE void
1981 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1982 {
1983     PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1984
1985     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1986     cx->blk_givwhen.defsv_save = orig_defsv;
1987 }
1988
1989
1990 PERL_STATIC_INLINE void
1991 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1992 {
1993     SV *sv;
1994
1995     PERL_ARGS_ASSERT_CX_POPGIVEN;
1996     assert(CxTYPE(cx) == CXt_GIVEN);
1997
1998     sv = GvSV(PL_defgv);
1999     GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
2000     cx->blk_givwhen.defsv_save = NULL;
2001     SvREFCNT_dec(sv);
2002 }
2003
2004 /* ------------------ util.h ------------------------------------------- */
2005
2006 /*
2007 =head1 Miscellaneous Functions
2008
2009 =for apidoc foldEQ
2010
2011 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2012 same
2013 case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
2014 match themselves and their opposite case counterparts.  Non-cased and non-ASCII
2015 range bytes match only themselves.
2016
2017 =cut
2018 */
2019
2020 PERL_STATIC_INLINE I32
2021 Perl_foldEQ(const char *s1, const char *s2, I32 len)
2022 {
2023     const U8 *a = (const U8 *)s1;
2024     const U8 *b = (const U8 *)s2;
2025
2026     PERL_ARGS_ASSERT_FOLDEQ;
2027
2028     assert(len >= 0);
2029
2030     while (len--) {
2031         if (*a != *b && *a != PL_fold[*b])
2032             return 0;
2033         a++,b++;
2034     }
2035     return 1;
2036 }
2037
2038 PERL_STATIC_INLINE I32
2039 Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
2040 {
2041     /* Compare non-utf8 using Unicode (Latin1) semantics.  Does not work on
2042      * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
2043      * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these.  Nor
2044      * does it check that the strings each have at least 'len' characters */
2045
2046     const U8 *a = (const U8 *)s1;
2047     const U8 *b = (const U8 *)s2;
2048
2049     PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
2050
2051     assert(len >= 0);
2052
2053     while (len--) {
2054         if (*a != *b && *a != PL_fold_latin1[*b]) {
2055             return 0;
2056         }
2057         a++, b++;
2058     }
2059     return 1;
2060 }
2061
2062 /*
2063 =for apidoc foldEQ_locale
2064
2065 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
2066 same case-insensitively in the current locale; false otherwise.
2067
2068 =cut
2069 */
2070
2071 PERL_STATIC_INLINE I32
2072 Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
2073 {
2074     dVAR;
2075     const U8 *a = (const U8 *)s1;
2076     const U8 *b = (const U8 *)s2;
2077
2078     PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
2079
2080     assert(len >= 0);
2081
2082     while (len--) {
2083         if (*a != *b && *a != PL_fold_locale[*b])
2084             return 0;
2085         a++,b++;
2086     }
2087     return 1;
2088 }
2089
2090 #if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
2091
2092 PERL_STATIC_INLINE void *
2093 S_my_memrchr(const char * s, const char c, const STRLEN len)
2094 {
2095     /* memrchr(), since many platforms lack it */
2096
2097     const char * t = s + len - 1;
2098
2099     PERL_ARGS_ASSERT_MY_MEMRCHR;
2100
2101     while (t >= s) {
2102         if (*t == c) {
2103             return (void *) t;
2104         }
2105         t--;
2106     }
2107
2108     return NULL;
2109 }
2110
2111 #endif
2112
2113 /*
2114  * ex: set ts=8 sts=4 sw=4 et:
2115  */