inline.h

   1 /*    inline.h
   2  *
   3  *    Copyright (C) 2012 by Larry Wall and others
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  * This file is a home for static inline functions that cannot go in other
   9  * headers files, because they depend on proto.h (included after most other
  10  * headers) or struct definitions.
  11  *
  12  * Each section names the header file that the functions "belong" to.
  13  */
  14
  15 /* ------------------------------- av.h ------------------------------- */
  16
  17 PERL_STATIC_INLINE SSize_t
  18 S_av_top_index(pTHX_ AV *av)
  19 {
  20     PERL_ARGS_ASSERT_AV_TOP_INDEX;
  21     assert(SvTYPE(av) == SVt_PVAV);
  22
  23     return AvFILL(av);
  24 }
  25
  26 /* ------------------------------- cv.h ------------------------------- */
  27
  28 PERL_STATIC_INLINE GV *
  29 S_CvGV(pTHX_ CV *sv)
  30 {
  31     return CvNAMED(sv)
  32         ? Perl_cvgv_from_hek(aTHX_ sv)
  33         : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
  34 }
  35
  36 PERL_STATIC_INLINE I32 *
  37 S_CvDEPTHp(const CV * const sv)
  38 {
  39     assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
  40     return &((XPVCV*)SvANY(sv))->xcv_depth;
  41 }
  42
  43 /*
  44  CvPROTO returns the prototype as stored, which is not necessarily what
  45  the interpreter should be using. Specifically, the interpreter assumes
  46  that spaces have been stripped, which has been the case if the prototype
  47  was added by toke.c, but is generally not the case if it was added elsewhere.
  48  Since we can't enforce the spacelessness at assignment time, this routine
  49  provides a temporary copy at parse time with spaces removed.
  50  I<orig> is the start of the original buffer, I<len> is the length of the
  51  prototype and will be updated when this returns.
  52  */
  53
  54 #ifdef PERL_CORE
  55 PERL_STATIC_INLINE char *
  56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
  57 {
  58     SV * tmpsv;
  59     char * tmps;
  60     tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
  61     tmps = SvPVX(tmpsv);
  62     while ((*len)--) {
  63         if (!isSPACE(*orig))
  64             *tmps++ = *orig;
  65         orig++;
  66     }
  67     *tmps = '\0';
  68     *len = tmps - SvPVX(tmpsv);
  69                 return SvPVX(tmpsv);
  70 }
  71 #endif
  72
  73 /* ------------------------------- mg.h ------------------------------- */
  74
  75 #if defined(PERL_CORE) || defined(PERL_EXT)
  76 /* assumes get-magic and stringification have already occurred */
  77 PERL_STATIC_INLINE STRLEN
  78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
  79 {
  80     assert(mg->mg_type == PERL_MAGIC_regex_global);
  81     assert(mg->mg_len != -1);
  82     if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
  83         return (STRLEN)mg->mg_len;
  84     else {
  85         const STRLEN pos = (STRLEN)mg->mg_len;
  86         /* Without this check, we may read past the end of the buffer: */
  87         if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
  88         return sv_or_pv_pos_u2b(sv, s, pos, NULL);
  89     }
  90 }
  91 #endif
  92
  93 /* ------------------------------- pad.h ------------------------------ */
  94
  95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
  96 PERL_STATIC_INLINE bool
  97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
  98 {
  99     /* is seq within the range _LOW to _HIGH ?
 100      * This is complicated by the fact that PL_cop_seqmax
 101      * may have wrapped around at some point */
 102     if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
 103         return FALSE; /* not yet introduced */
 104
 105     if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
 106     /* in compiling scope */
 107         if (
 108             (seq >  COP_SEQ_RANGE_LOW(pn))
 109             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
 110             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
 111         )
 112             return TRUE;
 113     }
 114     else if (
 115         (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
 116         ?
 117             (  seq >  COP_SEQ_RANGE_LOW(pn)
 118             || seq <= COP_SEQ_RANGE_HIGH(pn))
 119
 120         :    (  seq >  COP_SEQ_RANGE_LOW(pn)
 121              && seq <= COP_SEQ_RANGE_HIGH(pn))
 122     )
 123         return TRUE;
 124     return FALSE;
 125 }
 126 #endif
 127
 128 /* ------------------------------- pp.h ------------------------------- */
 129
 130 PERL_STATIC_INLINE I32
 131 S_TOPMARK(pTHX)
 132 {
 133     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 134                                  "MARK top  %p %" IVdf "\n",
 135                                   PL_markstack_ptr,
 136                                   (IV)*PL_markstack_ptr)));
 137     return *PL_markstack_ptr;
 138 }
 139
 140 PERL_STATIC_INLINE I32
 141 S_POPMARK(pTHX)
 142 {
 143     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 144                                  "MARK pop  %p %" IVdf "\n",
 145                                   (PL_markstack_ptr-1),
 146                                   (IV)*(PL_markstack_ptr-1))));
 147     assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
 148     return *PL_markstack_ptr--;
 149 }
 150
 151 /* ----------------------------- regexp.h ----------------------------- */
 152
 153 PERL_STATIC_INLINE struct regexp *
 154 S_ReANY(const REGEXP * const re)
 155 {
 156     assert(isREGEXP(re));
 157     return re->sv_u.svu_rx;
 158 }
 159
 160 /* ------------------------------- sv.h ------------------------------- */
 161
 162 PERL_STATIC_INLINE SV *
 163 S_SvREFCNT_inc(SV *sv)
 164 {
 165     if (LIKELY(sv != NULL))
 166         SvREFCNT(sv)++;
 167     return sv;
 168 }
 169 PERL_STATIC_INLINE SV *
 170 S_SvREFCNT_inc_NN(SV *sv)
 171 {
 172     SvREFCNT(sv)++;
 173     return sv;
 174 }
 175 PERL_STATIC_INLINE void
 176 S_SvREFCNT_inc_void(SV *sv)
 177 {
 178     if (LIKELY(sv != NULL))
 179         SvREFCNT(sv)++;
 180 }
 181 PERL_STATIC_INLINE void
 182 S_SvREFCNT_dec(pTHX_ SV *sv)
 183 {
 184     if (LIKELY(sv != NULL)) {
 185         U32 rc = SvREFCNT(sv);
 186         if (LIKELY(rc > 1))
 187             SvREFCNT(sv) = rc - 1;
 188         else
 189             Perl_sv_free2(aTHX_ sv, rc);
 190     }
 191 }
 192
 193 PERL_STATIC_INLINE void
 194 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
 195 {
 196     U32 rc = SvREFCNT(sv);
 197     if (LIKELY(rc > 1))
 198         SvREFCNT(sv) = rc - 1;
 199     else
 200         Perl_sv_free2(aTHX_ sv, rc);
 201 }
 202
 203 PERL_STATIC_INLINE void
 204 SvAMAGIC_on(SV *sv)
 205 {
 206     assert(SvROK(sv));
 207     if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
 208 }
 209 PERL_STATIC_INLINE void
 210 SvAMAGIC_off(SV *sv)
 211 {
 212     if (SvROK(sv) && SvOBJECT(SvRV(sv)))
 213         HvAMAGIC_off(SvSTASH(SvRV(sv)));
 214 }
 215
 216 PERL_STATIC_INLINE U32
 217 S_SvPADSTALE_on(SV *sv)
 218 {
 219     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 220     return SvFLAGS(sv) |= SVs_PADSTALE;
 221 }
 222 PERL_STATIC_INLINE U32
 223 S_SvPADSTALE_off(SV *sv)
 224 {
 225     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 226     return SvFLAGS(sv) &= ~SVs_PADSTALE;
 227 }
 228 #if defined(PERL_CORE) || defined (PERL_EXT)
 229 PERL_STATIC_INLINE STRLEN
 230 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
 231 {
 232     PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
 233     if (SvGAMAGIC(sv)) {
 234         U8 *hopped = utf8_hop((U8 *)pv, pos);
 235         if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
 236         return (STRLEN)(hopped - (U8 *)pv);
 237     }
 238     return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
 239 }
 240 #endif
 241
 242 /* ------------------------------- handy.h ------------------------------- */
 243
 244 /* saves machine code for a common noreturn idiom typically used in Newx*() */
 245 #ifdef GCC_DIAG_PRAGMA
 246 GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
 247 #endif
 248 static void
 249 S_croak_memory_wrap(void)
 250 {
 251     Perl_croak_nocontext("%s",PL_memory_wrap);
 252 }
 253 #ifdef GCC_DIAG_PRAGMA
 254 GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
 255 #endif
 256
 257 /* ------------------------------- utf8.h ------------------------------- */
 258
 259 /*
 260 =head1 Unicode Support
 261 */
 262
 263 PERL_STATIC_INLINE void
 264 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
 265 {
 266     /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
 267      * encoded string at '*dest', updating '*dest' to include it */
 268
 269     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
 270
 271     if (NATIVE_BYTE_IS_INVARIANT(byte))
 272         *((*dest)++) = byte;
 273     else {
 274         *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
 275         *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
 276     }
 277 }
 278
 279 /*
 280 =for apidoc valid_utf8_to_uvchr
 281 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
 282 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
 283 it passes C<L</isUTF8_CHAR>>.  Surrogates, non-character code points, and
 284 non-Unicode code points are allowed.
 285
 286 =cut
 287
 288  */
 289
 290 PERL_STATIC_INLINE UV
 291 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
 292 {
 293     const UV expectlen = UTF8SKIP(s);
 294     const U8* send = s + expectlen;
 295     UV uv = *s;
 296
 297     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
 298
 299     if (retlen) {
 300         *retlen = expectlen;
 301     }
 302
 303     /* An invariant is trivially returned */
 304     if (expectlen == 1) {
 305         return uv;
 306     }
 307
 308     /* Remove the leading bits that indicate the number of bytes, leaving just
 309      * the bits that are part of the value */
 310     uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
 311
 312     /* Now, loop through the remaining bytes, accumulating each into the
 313      * working total as we go.  (I khw tried unrolling the loop for up to 4
 314      * bytes, but there was no performance improvement) */
 315     for (++s; s < send; s++) {
 316         uv = UTF8_ACCUMULATE(uv, *s);
 317     }
 318
 319     return UNI_TO_NATIVE(uv);
 320
 321 }
 322
 323 /*
 324 =for apidoc is_utf8_invariant_string
 325
 326 Returns TRUE if the first C<len> bytes of the string C<s> are the same
 327 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
 328 EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
 329 are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
 330 the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
 331 characters are invariant, but so also are the C1 controls.
 332
 333 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
 334 use this option, that C<s> can't have embedded C<NUL> characters and has to
 335 have a terminating C<NUL> byte).
 336
 337 See also
 338 C<L</is_utf8_string>>,
 339 C<L</is_utf8_string_flags>>,
 340 C<L</is_utf8_string_loc>>,
 341 C<L</is_utf8_string_loc_flags>>,
 342 C<L</is_utf8_string_loclen>>,
 343 C<L</is_utf8_string_loclen_flags>>,
 344 C<L</is_utf8_fixed_width_buf_flags>>,
 345 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 346 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 347 C<L</is_strict_utf8_string>>,
 348 C<L</is_strict_utf8_string_loc>>,
 349 C<L</is_strict_utf8_string_loclen>>,
 350 C<L</is_c9strict_utf8_string>>,
 351 C<L</is_c9strict_utf8_string_loc>>,
 352 and
 353 C<L</is_c9strict_utf8_string_loclen>>.
 354
 355 =cut
 356 */
 357
 358 PERL_STATIC_INLINE bool
 359 S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
 360 {
 361     const U8* const send = s + (len ? len : strlen((const char *)s));
 362     const U8* x = s;
 363
 364     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
 365
 366     for (; x < send; ++x) {
 367         if (!UTF8_IS_INVARIANT(*x))
 368             return FALSE;
 369     }
 370
 371     return TRUE;
 372 }
 373
 374 /*
 375 =for apidoc is_utf8_string
 376
 377 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 378 Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
 379 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
 380 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 381 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 382
 383 This function considers Perl's extended UTF-8 to be valid.  That means that
 384 code points above Unicode, surrogates, and non-character code points are
 385 considered valid by this function.  Use C<L</is_strict_utf8_string>>,
 386 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
 387 code points are considered valid.
 388
 389 See also
 390 C<L</is_utf8_invariant_string>>,
 391 C<L</is_utf8_string_loc>>,
 392 C<L</is_utf8_string_loclen>>,
 393 C<L</is_utf8_fixed_width_buf_flags>>,
 394 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 395 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 396
 397 =cut
 398 */
 399
 400 PERL_STATIC_INLINE bool
 401 Perl_is_utf8_string(const U8 *s, const STRLEN len)
 402 {
 403     /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
 404      * Be aware of possible changes to that */
 405
 406     const U8* const send = s + (len ? len : strlen((const char *)s));
 407     const U8* x = s;
 408
 409     PERL_ARGS_ASSERT_IS_UTF8_STRING;
 410
 411     while (x < send) {
 412         const STRLEN cur_len = isUTF8_CHAR(x, send);
 413         if (UNLIKELY(! cur_len)) {
 414             return FALSE;
 415         }
 416         x += cur_len;
 417     }
 418
 419     return TRUE;
 420 }
 421
 422 /*
 423 =for apidoc is_strict_utf8_string
 424
 425 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 426 UTF-8-encoded string that is fully interchangeable by any application using
 427 Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
 428 calculated using C<strlen(s)> (which means if you use this option, that C<s>
 429 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 430 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 431
 432 This function returns FALSE for strings containing any
 433 code points above the Unicode max of 0x10FFFF, surrogate code points, or
 434 non-character code points.
 435
 436 See also
 437 C<L</is_utf8_invariant_string>>,
 438 C<L</is_utf8_string>>,
 439 C<L</is_utf8_string_flags>>,
 440 C<L</is_utf8_string_loc>>,
 441 C<L</is_utf8_string_loc_flags>>,
 442 C<L</is_utf8_string_loclen>>,
 443 C<L</is_utf8_string_loclen_flags>>,
 444 C<L</is_utf8_fixed_width_buf_flags>>,
 445 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 446 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 447 C<L</is_strict_utf8_string_loc>>,
 448 C<L</is_strict_utf8_string_loclen>>,
 449 C<L</is_c9strict_utf8_string>>,
 450 C<L</is_c9strict_utf8_string_loc>>,
 451 and
 452 C<L</is_c9strict_utf8_string_loclen>>.
 453
 454 =cut
 455 */
 456
 457 PERL_STATIC_INLINE bool
 458 S_is_strict_utf8_string(const U8 *s, const STRLEN len)
 459 {
 460     const U8* const send = s + (len ? len : strlen((const char *)s));
 461     const U8* x = s;
 462
 463     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
 464
 465     while (x < send) {
 466         const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
 467         if (UNLIKELY(! cur_len)) {
 468             return FALSE;
 469         }
 470         x += cur_len;
 471     }
 472
 473     return TRUE;
 474 }
 475
 476 /*
 477 =for apidoc is_c9strict_utf8_string
 478
 479 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 480 UTF-8-encoded string that conforms to
 481 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
 482 otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
 483 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
 484 C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
 485 characters being ASCII constitute 'a valid UTF-8 string'.
 486
 487 This function returns FALSE for strings containing any code points above the
 488 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
 489 code points per
 490 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
 491
 492 See also
 493 C<L</is_utf8_invariant_string>>,
 494 C<L</is_utf8_string>>,
 495 C<L</is_utf8_string_flags>>,
 496 C<L</is_utf8_string_loc>>,
 497 C<L</is_utf8_string_loc_flags>>,
 498 C<L</is_utf8_string_loclen>>,
 499 C<L</is_utf8_string_loclen_flags>>,
 500 C<L</is_utf8_fixed_width_buf_flags>>,
 501 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 502 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 503 C<L</is_strict_utf8_string>>,
 504 C<L</is_strict_utf8_string_loc>>,
 505 C<L</is_strict_utf8_string_loclen>>,
 506 C<L</is_c9strict_utf8_string_loc>>,
 507 and
 508 C<L</is_c9strict_utf8_string_loclen>>.
 509
 510 =cut
 511 */
 512
 513 PERL_STATIC_INLINE bool
 514 S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
 515 {
 516     const U8* const send = s + (len ? len : strlen((const char *)s));
 517     const U8* x = s;
 518
 519     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
 520
 521     while (x < send) {
 522         const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
 523         if (UNLIKELY(! cur_len)) {
 524             return FALSE;
 525         }
 526         x += cur_len;
 527     }
 528
 529     return TRUE;
 530 }
 531
 532 /* The above 3 functions could have been moved into the more general one just
 533  * below, and made #defines that call it with the right 'flags'.  They are
 534  * currently kept separate to increase their chances of getting inlined */
 535
 536 /*
 537 =for apidoc is_utf8_string_flags
 538
 539 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 540 UTF-8 string, subject to the restrictions imposed by C<flags>;
 541 returns FALSE otherwise.  If C<len> is 0, it will be calculated
 542 using C<strlen(s)> (which means if you use this option, that C<s> can't have
 543 embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
 544 that all characters being ASCII constitute 'a valid UTF-8 string'.
 545
 546 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
 547 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
 548 as C<L</is_strict_utf8_string>>; and if C<flags> is
 549 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
 550 C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
 551 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
 552 C<L</utf8n_to_uvchr>>, with the same meanings.
 553
 554 See also
 555 C<L</is_utf8_invariant_string>>,
 556 C<L</is_utf8_string>>,
 557 C<L</is_utf8_string_loc>>,
 558 C<L</is_utf8_string_loc_flags>>,
 559 C<L</is_utf8_string_loclen>>,
 560 C<L</is_utf8_string_loclen_flags>>,
 561 C<L</is_utf8_fixed_width_buf_flags>>,
 562 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 563 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 564 C<L</is_strict_utf8_string>>,
 565 C<L</is_strict_utf8_string_loc>>,
 566 C<L</is_strict_utf8_string_loclen>>,
 567 C<L</is_c9strict_utf8_string>>,
 568 C<L</is_c9strict_utf8_string_loc>>,
 569 and
 570 C<L</is_c9strict_utf8_string_loclen>>.
 571
 572 =cut
 573 */
 574
 575 PERL_STATIC_INLINE bool
 576 S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
 577 {
 578     const U8* const send = s + (len ? len : strlen((const char *)s));
 579     const U8* x = s;
 580
 581     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
 582     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 583                           |UTF8_DISALLOW_ABOVE_31_BIT)));
 584
 585     if (flags == 0) {
 586         return is_utf8_string(s, len);
 587     }
 588
 589     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 590                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 591     {
 592         return is_strict_utf8_string(s, len);
 593     }
 594
 595     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 596                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 597     {
 598         return is_c9strict_utf8_string(s, len);
 599     }
 600
 601     while (x < send) {
 602         STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 603         if (UNLIKELY(! cur_len)) {
 604             return FALSE;
 605         }
 606         x += cur_len;
 607     }
 608
 609     return TRUE;
 610 }
 611
 612 /*
 613
 614 =for apidoc is_utf8_string_loc
 615
 616 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 617 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 618 "utf8ness success") in the C<ep> pointer.
 619
 620 See also C<L</is_utf8_string_loclen>>.
 621
 622 =cut
 623 */
 624
 625 #define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
 626
 627 /*
 628
 629 =for apidoc is_utf8_string_loclen
 630
 631 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 632 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 633 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 634 encoded characters in the C<el> pointer.
 635
 636 See also C<L</is_utf8_string_loc>>.
 637
 638 =cut
 639 */
 640
 641 PERL_STATIC_INLINE bool
 642 Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 643 {
 644     const U8* const send = s + (len ? len : strlen((const char *)s));
 645     const U8* x = s;
 646     STRLEN outlen = 0;
 647
 648     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
 649
 650     while (x < send) {
 651         const STRLEN cur_len = isUTF8_CHAR(x, send);
 652         if (UNLIKELY(! cur_len)) {
 653             break;
 654         }
 655         x += cur_len;
 656         outlen++;
 657     }
 658
 659     if (el)
 660         *el = outlen;
 661
 662     if (ep) {
 663         *ep = x;
 664     }
 665
 666     return (x == send);
 667 }
 668
 669 /*
 670
 671 =for apidoc is_strict_utf8_string_loc
 672
 673 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 674 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 675 "utf8ness success") in the C<ep> pointer.
 676
 677 See also C<L</is_strict_utf8_string_loclen>>.
 678
 679 =cut
 680 */
 681
 682 #define is_strict_utf8_string_loc(s, len, ep)                               \
 683                                 is_strict_utf8_string_loclen(s, len, ep, 0)
 684
 685 /*
 686
 687 =for apidoc is_strict_utf8_string_loclen
 688
 689 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 690 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 691 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 692 encoded characters in the C<el> pointer.
 693
 694 See also C<L</is_strict_utf8_string_loc>>.
 695
 696 =cut
 697 */
 698
 699 PERL_STATIC_INLINE bool
 700 S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 701 {
 702     const U8* const send = s + (len ? len : strlen((const char *)s));
 703     const U8* x = s;
 704     STRLEN outlen = 0;
 705
 706     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
 707
 708     while (x < send) {
 709         const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
 710         if (UNLIKELY(! cur_len)) {
 711             break;
 712         }
 713         x += cur_len;
 714         outlen++;
 715     }
 716
 717     if (el)
 718         *el = outlen;
 719
 720     if (ep) {
 721         *ep = x;
 722     }
 723
 724     return (x == send);
 725 }
 726
 727 /*
 728
 729 =for apidoc is_c9strict_utf8_string_loc
 730
 731 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 732 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 733 "utf8ness success") in the C<ep> pointer.
 734
 735 See also C<L</is_c9strict_utf8_string_loclen>>.
 736
 737 =cut
 738 */
 739
 740 #define is_c9strict_utf8_string_loc(s, len, ep)                             \
 741                             is_c9strict_utf8_string_loclen(s, len, ep, 0)
 742
 743 /*
 744
 745 =for apidoc is_c9strict_utf8_string_loclen
 746
 747 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 748 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 749 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
 750 characters in the C<el> pointer.
 751
 752 See also C<L</is_c9strict_utf8_string_loc>>.
 753
 754 =cut
 755 */
 756
 757 PERL_STATIC_INLINE bool
 758 S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 759 {
 760     const U8* const send = s + (len ? len : strlen((const char *)s));
 761     const U8* x = s;
 762     STRLEN outlen = 0;
 763
 764     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
 765
 766     while (x < send) {
 767         const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
 768         if (UNLIKELY(! cur_len)) {
 769             break;
 770         }
 771         x += cur_len;
 772         outlen++;
 773     }
 774
 775     if (el)
 776         *el = outlen;
 777
 778     if (ep) {
 779         *ep = x;
 780     }
 781
 782     return (x == send);
 783 }
 784
 785 /*
 786
 787 =for apidoc is_utf8_string_loc_flags
 788
 789 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
 790 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 791 "utf8ness success") in the C<ep> pointer.
 792
 793 See also C<L</is_utf8_string_loclen_flags>>.
 794
 795 =cut
 796 */
 797
 798 #define is_utf8_string_loc_flags(s, len, ep, flags)                         \
 799                         is_utf8_string_loclen_flags(s, len, ep, 0, flags)
 800
 801
 802 /* The above 3 actual functions could have been moved into the more general one
 803  * just below, and made #defines that call it with the right 'flags'.  They are
 804  * currently kept separate to increase their chances of getting inlined */
 805
 806 /*
 807
 808 =for apidoc is_utf8_string_loclen_flags
 809
 810 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
 811 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 812 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 813 encoded characters in the C<el> pointer.
 814
 815 See also C<L</is_utf8_string_loc_flags>>.
 816
 817 =cut
 818 */
 819
 820 PERL_STATIC_INLINE bool
 821 S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
 822 {
 823     const U8* const send = s + (len ? len : strlen((const char *)s));
 824     const U8* x = s;
 825     STRLEN outlen = 0;
 826
 827     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
 828     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 829                           |UTF8_DISALLOW_ABOVE_31_BIT)));
 830
 831     if (flags == 0) {
 832         return is_utf8_string_loclen(s, len, ep, el);
 833     }
 834
 835     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 836                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 837     {
 838         return is_strict_utf8_string_loclen(s, len, ep, el);
 839     }
 840
 841     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 842                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 843     {
 844         return is_c9strict_utf8_string_loclen(s, len, ep, el);
 845     }
 846
 847     while (x < send) {
 848         const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 849         if (UNLIKELY(! cur_len)) {
 850             break;
 851         }
 852         x += cur_len;
 853         outlen++;
 854     }
 855
 856     if (el)
 857         *el = outlen;
 858
 859     if (ep) {
 860         *ep = x;
 861     }
 862
 863     return (x == send);
 864 }
 865
 866 /*
 867 =for apidoc utf8_distance
 868
 869 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
 870 and C<b>.
 871
 872 WARNING: use only if you *know* that the pointers point inside the
 873 same UTF-8 buffer.
 874
 875 =cut
 876 */
 877
 878 PERL_STATIC_INLINE IV
 879 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
 880 {
 881     PERL_ARGS_ASSERT_UTF8_DISTANCE;
 882
 883     return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
 884 }
 885
 886 /*
 887 =for apidoc utf8_hop
 888
 889 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
 890 forward or backward.
 891
 892 WARNING: do not use the following unless you *know* C<off> is within
 893 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
 894 on the first byte of character or just after the last byte of a character.
 895
 896 =cut
 897 */
 898
 899 PERL_STATIC_INLINE U8 *
 900 Perl_utf8_hop(const U8 *s, SSize_t off)
 901 {
 902     PERL_ARGS_ASSERT_UTF8_HOP;
 903
 904     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
 905      * the bitops (especially ~) can create illegal UTF-8.
 906      * In other words: in Perl UTF-8 is not just for Unicode. */
 907
 908     if (off >= 0) {
 909         while (off--)
 910             s += UTF8SKIP(s);
 911     }
 912     else {
 913         while (off++) {
 914             s--;
 915             while (UTF8_IS_CONTINUATION(*s))
 916                 s--;
 917         }
 918     }
 919     GCC_DIAG_IGNORE(-Wcast-qual);
 920     return (U8 *)s;
 921     GCC_DIAG_RESTORE;
 922 }
 923
 924 /*
 925 =for apidoc utf8_hop_forward
 926
 927 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
 928 forward.
 929
 930 C<off> must be non-negative.
 931
 932 C<s> must be before or equal to C<end>.
 933
 934 When moving forward it will not move beyond C<end>.
 935
 936 Will not exceed this limit even if the string is not valid "UTF-8".
 937
 938 =cut
 939 */
 940
 941 PERL_STATIC_INLINE U8 *
 942 Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
 943 {
 944     PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
 945
 946     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
 947      * the bitops (especially ~) can create illegal UTF-8.
 948      * In other words: in Perl UTF-8 is not just for Unicode. */
 949
 950     assert(s <= end);
 951     assert(off >= 0);
 952
 953     while (off--) {
 954         STRLEN skip = UTF8SKIP(s);
 955         if ((STRLEN)(end - s) <= skip) {
 956             GCC_DIAG_IGNORE(-Wcast-qual);
 957             return (U8 *)end;
 958             GCC_DIAG_RESTORE;
 959         }
 960         s += skip;
 961     }
 962
 963     GCC_DIAG_IGNORE(-Wcast-qual);
 964     return (U8 *)s;
 965     GCC_DIAG_RESTORE;
 966 }
 967
 968 /*
 969 =for apidoc utf8_hop_back
 970
 971 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
 972 backward.
 973
 974 C<off> must be non-positive.
 975
 976 C<s> must be after or equal to C<start>.
 977
 978 When moving backward it will not move before C<start>.
 979
 980 Will not exceed this limit even if the string is not valid "UTF-8".
 981
 982 =cut
 983 */
 984
 985 PERL_STATIC_INLINE U8 *
 986 Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
 987 {
 988     PERL_ARGS_ASSERT_UTF8_HOP_BACK;
 989
 990     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
 991      * the bitops (especially ~) can create illegal UTF-8.
 992      * In other words: in Perl UTF-8 is not just for Unicode. */
 993
 994     assert(start <= s);
 995     assert(off <= 0);
 996
 997     while (off++ && s > start) {
 998         s--;
 999         while (UTF8_IS_CONTINUATION(*s) && s > start)
1000             s--;
1001     }
1002
1003     GCC_DIAG_IGNORE(-Wcast-qual);
1004     return (U8 *)s;
1005     GCC_DIAG_RESTORE;
1006 }
1007
1008 /*
1009 =for apidoc utf8_hop_safe
1010
1011 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1012 either forward or backward.
1013
1014 When moving backward it will not move before C<start>.
1015
1016 When moving forward it will not move beyond C<end>.
1017
1018 Will not exceed those limits even if the string is not valid "UTF-8".
1019
1020 =cut
1021 */
1022
1023 PERL_STATIC_INLINE U8 *
1024 Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1025 {
1026     PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1027
1028     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1029      * the bitops (especially ~) can create illegal UTF-8.
1030      * In other words: in Perl UTF-8 is not just for Unicode. */
1031
1032     assert(start <= s && s <= end);
1033
1034     if (off >= 0) {
1035         return utf8_hop_forward(s, off, end);
1036     }
1037     else {
1038         return utf8_hop_back(s, off, start);
1039     }
1040 }
1041
1042 /*
1043
1044 =for apidoc is_utf8_valid_partial_char
1045
1046 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1047 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1048 points.  Otherwise, it returns 1 if there exists at least one non-empty
1049 sequence of bytes that when appended to sequence C<s>, starting at position
1050 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1051 otherwise returns 0.
1052
1053 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1054 point.
1055
1056 This is useful when a fixed-length buffer is being tested for being well-formed
1057 UTF-8, but the final few bytes in it don't comprise a full character; that is,
1058 it is split somewhere in the middle of the final code point's UTF-8
1059 representation.  (Presumably when the buffer is refreshed with the next chunk
1060 of data, the new first bytes will complete the partial code point.)   This
1061 function is used to verify that the final bytes in the current buffer are in
1062 fact the legal beginning of some code point, so that if they aren't, the
1063 failure can be signalled without having to wait for the next read.
1064
1065 =cut
1066 */
1067 #define is_utf8_valid_partial_char(s, e)                                    \
1068                                 is_utf8_valid_partial_char_flags(s, e, 0)
1069
1070 /*
1071
1072 =for apidoc is_utf8_valid_partial_char_flags
1073
1074 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1075 or not the input is a valid UTF-8 encoded partial character, but it takes an
1076 extra parameter, C<flags>, which can further restrict which code points are
1077 considered valid.
1078
1079 If C<flags> is 0, this behaves identically to
1080 C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
1081 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
1082 there is any sequence of bytes that can complete the input partial character in
1083 such a way that a non-prohibited character is formed, the function returns
1084 TRUE; otherwise FALSE.  Non character code points cannot be determined based on
1085 partial character input.  But many  of the other possible excluded types can be
1086 determined from just the first one or two bytes.
1087
1088 =cut
1089  */
1090
1091 PERL_STATIC_INLINE bool
1092 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1093 {
1094     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1095
1096     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1097                           |UTF8_DISALLOW_ABOVE_31_BIT)));
1098
1099     if (s >= e || s + UTF8SKIP(s) <= e) {
1100         return FALSE;
1101     }
1102
1103     return cBOOL(_is_utf8_char_helper(s, e, flags));
1104 }
1105
1106 /*
1107
1108 =for apidoc is_utf8_fixed_width_buf_flags
1109
1110 Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1111 is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1112 otherwise it returns FALSE.
1113
1114 If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1115 without restriction.  If the final few bytes of the buffer do not form a
1116 complete code point, this will return TRUE anyway, provided that
1117 C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1118
1119 If C<flags> in non-zero, it can be any combination of the
1120 C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1121 same meanings.
1122
1123 This function differs from C<L</is_utf8_string_flags>> only in that the latter
1124 returns FALSE if the final few bytes of the string don't form a complete code
1125 point.
1126
1127 =cut
1128  */
1129 #define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
1130                 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1131
1132 /*
1133
1134 =for apidoc is_utf8_fixed_width_buf_loc_flags
1135
1136 Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1137 failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
1138 to the beginning of any partial character at the end of the buffer; if there is
1139 no partial character C<*ep> will contain C<s>+C<len>.
1140
1141 See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1142
1143 =cut
1144 */
1145
1146 #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
1147                 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1148
1149 /*
1150
1151 =for apidoc is_utf8_fixed_width_buf_loclen_flags
1152
1153 Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1154 complete, valid characters found in the C<el> pointer.
1155
1156 =cut
1157 */
1158
1159 PERL_STATIC_INLINE bool
1160 S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1161                                        const STRLEN len,
1162                                        const U8 **ep,
1163                                        STRLEN *el,
1164                                        const U32 flags)
1165 {
1166     const U8 * maybe_partial;
1167
1168     PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1169
1170     if (! ep) {
1171         ep  = &maybe_partial;
1172     }
1173
1174     /* If it's entirely valid, return that; otherwise see if the only error is
1175      * that the final few bytes are for a partial character */
1176     return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
1177            || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1178 }
1179
1180 /* ------------------------------- perl.h ----------------------------- */
1181
1182 /*
1183 =head1 Miscellaneous Functions
1184
1185 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1186
1187 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1188 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1189
1190 Return TRUE if the name is safe.
1191
1192 Used by the C<IS_SAFE_SYSCALL()> macro.
1193
1194 =cut
1195 */
1196
1197 PERL_STATIC_INLINE bool
1198 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1199     /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1200      * perl itself uses xce*() functions which accept 8-bit strings.
1201      */
1202
1203     PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1204
1205     if (len > 1) {
1206         char *null_at;
1207         if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1208                 SETERRNO(ENOENT, LIB_INVARG);
1209                 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1210                                    "Invalid \\0 character in %s for %s: %s\\0%s",
1211                                    what, op_name, pv, null_at+1);
1212                 return FALSE;
1213         }
1214     }
1215
1216     return TRUE;
1217 }
1218
1219 /*
1220
1221 Return true if the supplied filename has a newline character
1222 immediately before the first (hopefully only) NUL.
1223
1224 My original look at this incorrectly used the len from SvPV(), but
1225 that's incorrect, since we allow for a NUL in pv[len-1].
1226
1227 So instead, strlen() and work from there.
1228
1229 This allow for the user reading a filename, forgetting to chomp it,
1230 then calling:
1231
1232   open my $foo, "$file\0";
1233
1234 */
1235
1236 #ifdef PERL_CORE
1237
1238 PERL_STATIC_INLINE bool
1239 S_should_warn_nl(const char *pv) {
1240     STRLEN len;
1241
1242     PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1243
1244     len = strlen(pv);
1245
1246     return len > 0 && pv[len-1] == '\n';
1247 }
1248
1249 #endif
1250
1251 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1252
1253 #define MAX_CHARSET_NAME_LENGTH 2
1254
1255 PERL_STATIC_INLINE const char *
1256 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1257 {
1258     /* Returns a string that corresponds to the name of the regex character set
1259      * given by 'flags', and *lenp is set the length of that string, which
1260      * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1261
1262     *lenp = 1;
1263     switch (get_regex_charset(flags)) {
1264         case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1265         case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
1266         case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1267         case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1268         case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1269             *lenp = 2;
1270             return ASCII_MORE_RESTRICT_PAT_MODS;
1271     }
1272     /* The NOT_REACHED; hides an assert() which has a rather complex
1273      * definition in perl.h. */
1274     NOT_REACHED; /* NOTREACHED */
1275     return "?";     /* Unknown */
1276 }
1277
1278 /*
1279
1280 Return false if any get magic is on the SV other than taint magic.
1281
1282 */
1283
1284 PERL_STATIC_INLINE bool
1285 S_sv_only_taint_gmagic(SV *sv) {
1286     MAGIC *mg = SvMAGIC(sv);
1287
1288     PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1289
1290     while (mg) {
1291         if (mg->mg_type != PERL_MAGIC_taint
1292             && !(mg->mg_flags & MGf_GSKIP)
1293             && mg->mg_virtual->svt_get) {
1294             return FALSE;
1295         }
1296         mg = mg->mg_moremagic;
1297     }
1298
1299     return TRUE;
1300 }
1301
1302 /* ------------------ cop.h ------------------------------------------- */
1303
1304
1305 /* Enter a block. Push a new base context and return its address. */
1306
1307 PERL_STATIC_INLINE PERL_CONTEXT *
1308 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1309 {
1310     PERL_CONTEXT * cx;
1311
1312     PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1313
1314     CXINC;
1315     cx = CX_CUR();
1316     cx->cx_type        = type;
1317     cx->blk_gimme      = gimme;
1318     cx->blk_oldsaveix  = saveix;
1319     cx->blk_oldsp      = (I32)(sp - PL_stack_base);
1320     cx->blk_oldcop     = PL_curcop;
1321     cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
1322     cx->blk_oldscopesp = PL_scopestack_ix;
1323     cx->blk_oldpm      = PL_curpm;
1324     cx->blk_old_tmpsfloor = PL_tmps_floor;
1325
1326     PL_tmps_floor        = PL_tmps_ix;
1327     CX_DEBUG(cx, "PUSH");
1328     return cx;
1329 }
1330
1331
1332 /* Exit a block (RETURN and LAST). */
1333
1334 PERL_STATIC_INLINE void
1335 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1336 {
1337     PERL_ARGS_ASSERT_CX_POPBLOCK;
1338
1339     CX_DEBUG(cx, "POP");
1340     /* these 3 are common to cx_popblock and cx_topblock */
1341     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1342     PL_scopestack_ix = cx->blk_oldscopesp;
1343     PL_curpm         = cx->blk_oldpm;
1344
1345     /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1346      * and leaves a CX entry lying around for repeated use, so
1347      * skip for multicall */                  \
1348     assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1349             || PL_savestack_ix == cx->blk_oldsaveix);
1350     PL_curcop     = cx->blk_oldcop;
1351     PL_tmps_floor = cx->blk_old_tmpsfloor;
1352 }
1353
1354 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1355  * Whereas cx_popblock() restores the state to the point just before
1356  * cx_pushblock() was called,  cx_topblock() restores it to the point just
1357  * *after* cx_pushblock() was called. */
1358
1359 PERL_STATIC_INLINE void
1360 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1361 {
1362     PERL_ARGS_ASSERT_CX_TOPBLOCK;
1363
1364     CX_DEBUG(cx, "TOP");
1365     /* these 3 are common to cx_popblock and cx_topblock */
1366     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1367     PL_scopestack_ix = cx->blk_oldscopesp;
1368     PL_curpm         = cx->blk_oldpm;
1369
1370     PL_stack_sp      = PL_stack_base + cx->blk_oldsp;
1371 }
1372
1373
1374 PERL_STATIC_INLINE void
1375 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1376 {
1377     U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1378
1379     PERL_ARGS_ASSERT_CX_PUSHSUB;
1380
1381     PERL_DTRACE_PROBE_ENTRY(cv);
1382     cx->blk_sub.cv = cv;
1383     cx->blk_sub.olddepth = CvDEPTH(cv);
1384     cx->blk_sub.prevcomppad = PL_comppad;
1385     cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1386     cx->blk_sub.retop = retop;
1387     SvREFCNT_inc_simple_void_NN(cv);
1388     cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1389 }
1390
1391
1392 /* subsets of cx_popsub() */
1393
1394 PERL_STATIC_INLINE void
1395 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1396 {
1397     CV *cv;
1398
1399     PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1400     assert(CxTYPE(cx) == CXt_SUB);
1401
1402     PL_comppad = cx->blk_sub.prevcomppad;
1403     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1404     cv = cx->blk_sub.cv;
1405     CvDEPTH(cv) = cx->blk_sub.olddepth;
1406     cx->blk_sub.cv = NULL;
1407     SvREFCNT_dec(cv);
1408 }
1409
1410
1411 /* handle the @_ part of leaving a sub */
1412
1413 PERL_STATIC_INLINE void
1414 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1415 {
1416     AV *av;
1417
1418     PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1419     assert(CxTYPE(cx) == CXt_SUB);
1420     assert(AvARRAY(MUTABLE_AV(
1421         PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1422                 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1423
1424     CX_POP_SAVEARRAY(cx);
1425     av = MUTABLE_AV(PAD_SVl(0));
1426     if (UNLIKELY(AvREAL(av)))
1427         /* abandon @_ if it got reified */
1428         clear_defarray(av, 0);
1429     else {
1430         CLEAR_ARGARRAY(av);
1431     }
1432 }
1433
1434
1435 PERL_STATIC_INLINE void
1436 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1437 {
1438     PERL_ARGS_ASSERT_CX_POPSUB;
1439     assert(CxTYPE(cx) == CXt_SUB);
1440
1441     PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1442
1443     if (CxHASARGS(cx))
1444         cx_popsub_args(cx);
1445     cx_popsub_common(cx);
1446 }
1447
1448
1449 PERL_STATIC_INLINE void
1450 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1451 {
1452     PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1453
1454     cx->blk_format.cv          = cv;
1455     cx->blk_format.retop       = retop;
1456     cx->blk_format.gv          = gv;
1457     cx->blk_format.dfoutgv     = PL_defoutgv;
1458     cx->blk_format.prevcomppad = PL_comppad;
1459     cx->blk_u16                = 0;
1460
1461     SvREFCNT_inc_simple_void_NN(cv);
1462     CvDEPTH(cv)++;
1463     SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1464 }
1465
1466
1467 PERL_STATIC_INLINE void
1468 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1469 {
1470     CV *cv;
1471     GV *dfout;
1472
1473     PERL_ARGS_ASSERT_CX_POPFORMAT;
1474     assert(CxTYPE(cx) == CXt_FORMAT);
1475
1476     dfout = cx->blk_format.dfoutgv;
1477     setdefout(dfout);
1478     cx->blk_format.dfoutgv = NULL;
1479     SvREFCNT_dec_NN(dfout);
1480
1481     PL_comppad = cx->blk_format.prevcomppad;
1482     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1483     cv = cx->blk_format.cv;
1484     cx->blk_format.cv = NULL;
1485     --CvDEPTH(cv);
1486     SvREFCNT_dec_NN(cv);
1487 }
1488
1489
1490 PERL_STATIC_INLINE void
1491 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1492 {
1493     PERL_ARGS_ASSERT_CX_PUSHEVAL;
1494
1495     cx->blk_eval.retop         = retop;
1496     cx->blk_eval.old_namesv    = namesv;
1497     cx->blk_eval.old_eval_root = PL_eval_root;
1498     cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
1499     cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
1500     cx->blk_eval.cur_top_env   = PL_top_env;
1501
1502     assert(!(PL_in_eval     & ~ 0x3F));
1503     assert(!(PL_op->op_type & ~0x1FF));
1504     cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1505 }
1506
1507
1508 PERL_STATIC_INLINE void
1509 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1510 {
1511     SV *sv;
1512
1513     PERL_ARGS_ASSERT_CX_POPEVAL;
1514     assert(CxTYPE(cx) == CXt_EVAL);
1515
1516     PL_in_eval = CxOLD_IN_EVAL(cx);
1517     assert(!(PL_in_eval & 0xc0));
1518     PL_eval_root = cx->blk_eval.old_eval_root;
1519     sv = cx->blk_eval.cur_text;
1520     if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1521         cx->blk_eval.cur_text = NULL;
1522         SvREFCNT_dec_NN(sv);
1523     }
1524
1525     sv = cx->blk_eval.old_namesv;
1526     if (sv) {
1527         cx->blk_eval.old_namesv = NULL;
1528         SvREFCNT_dec_NN(sv);
1529     }
1530 }
1531
1532
1533 /* push a plain loop, i.e.
1534  *     { block }
1535  *     while (cond) { block }
1536  *     for (init;cond;continue) { block }
1537  * This loop can be last/redo'ed etc.
1538  */
1539
1540 PERL_STATIC_INLINE void
1541 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1542 {
1543     PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1544     cx->blk_loop.my_op = cLOOP;
1545 }
1546
1547
1548 /* push a true for loop, i.e.
1549  *     for var (list) { block }
1550  */
1551
1552 PERL_STATIC_INLINE void
1553 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1554 {
1555     PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1556
1557     /* this one line is common with cx_pushloop_plain */
1558     cx->blk_loop.my_op = cLOOP;
1559
1560     cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1561     cx->blk_loop.itersave      = itersave;
1562 #ifdef USE_ITHREADS
1563     cx->blk_loop.oldcomppad = PL_comppad;
1564 #endif
1565 }
1566
1567
1568 /* pop all loop types, including plain */
1569
1570 PERL_STATIC_INLINE void
1571 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1572 {
1573     PERL_ARGS_ASSERT_CX_POPLOOP;
1574
1575     assert(CxTYPE_is_LOOP(cx));
1576     if (  CxTYPE(cx) == CXt_LOOP_ARY
1577        || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1578     {
1579         /* Free ary or cur. This assumes that state_u.ary.ary
1580          * aligns with state_u.lazysv.cur. See cx_dup() */
1581         SV *sv = cx->blk_loop.state_u.lazysv.cur;
1582         cx->blk_loop.state_u.lazysv.cur = NULL;
1583         SvREFCNT_dec_NN(sv);
1584         if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1585             sv = cx->blk_loop.state_u.lazysv.end;
1586             cx->blk_loop.state_u.lazysv.end = NULL;
1587             SvREFCNT_dec_NN(sv);
1588         }
1589     }
1590     if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1591         SV *cursv;
1592         SV **svp = (cx)->blk_loop.itervar_u.svp;
1593         if ((cx->cx_type & CXp_FOR_GV))
1594             svp = &GvSV((GV*)svp);
1595         cursv = *svp;
1596         *svp = cx->blk_loop.itersave;
1597         cx->blk_loop.itersave = NULL;
1598         SvREFCNT_dec(cursv);
1599     }
1600 }
1601
1602
1603 PERL_STATIC_INLINE void
1604 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1605 {
1606     PERL_ARGS_ASSERT_CX_PUSHWHEN;
1607
1608     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1609 }
1610
1611
1612 PERL_STATIC_INLINE void
1613 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1614 {
1615     PERL_ARGS_ASSERT_CX_POPWHEN;
1616     assert(CxTYPE(cx) == CXt_WHEN);
1617
1618     PERL_UNUSED_ARG(cx);
1619     PERL_UNUSED_CONTEXT;
1620     /* currently NOOP */
1621 }
1622
1623
1624 PERL_STATIC_INLINE void
1625 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1626 {
1627     PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1628
1629     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1630     cx->blk_givwhen.defsv_save = orig_defsv;
1631 }
1632
1633
1634 PERL_STATIC_INLINE void
1635 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1636 {
1637     SV *sv;
1638
1639     PERL_ARGS_ASSERT_CX_POPGIVEN;
1640     assert(CxTYPE(cx) == CXt_GIVEN);
1641
1642     sv = GvSV(PL_defgv);
1643     GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1644     cx->blk_givwhen.defsv_save = NULL;
1645     SvREFCNT_dec(sv);
1646 }
1647
1648 /* ------------------ util.h ------------------------------------------- */
1649
1650 /*
1651 =head1 Miscellaneous Functions
1652
1653 =for apidoc foldEQ
1654
1655 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1656 same
1657 case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
1658 match themselves and their opposite case counterparts.  Non-cased and non-ASCII
1659 range bytes match only themselves.
1660
1661 =cut
1662 */
1663
1664 PERL_STATIC_INLINE I32
1665 Perl_foldEQ(const char *s1, const char *s2, I32 len)
1666 {
1667     const U8 *a = (const U8 *)s1;
1668     const U8 *b = (const U8 *)s2;
1669
1670     PERL_ARGS_ASSERT_FOLDEQ;
1671
1672     assert(len >= 0);
1673
1674     while (len--) {
1675         if (*a != *b && *a != PL_fold[*b])
1676             return 0;
1677         a++,b++;
1678     }
1679     return 1;
1680 }
1681
1682 PERL_STATIC_INLINE I32
1683 Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1684 {
1685     /* Compare non-utf8 using Unicode (Latin1) semantics.  Does not work on
1686      * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1687      * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these.  Nor
1688      * does it check that the strings each have at least 'len' characters */
1689
1690     const U8 *a = (const U8 *)s1;
1691     const U8 *b = (const U8 *)s2;
1692
1693     PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1694
1695     assert(len >= 0);
1696
1697     while (len--) {
1698         if (*a != *b && *a != PL_fold_latin1[*b]) {
1699             return 0;
1700         }
1701         a++, b++;
1702     }
1703     return 1;
1704 }
1705
1706 /*
1707 =for apidoc foldEQ_locale
1708
1709 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1710 same case-insensitively in the current locale; false otherwise.
1711
1712 =cut
1713 */
1714
1715 PERL_STATIC_INLINE I32
1716 Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1717 {
1718     dVAR;
1719     const U8 *a = (const U8 *)s1;
1720     const U8 *b = (const U8 *)s2;
1721
1722     PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1723
1724     assert(len >= 0);
1725
1726     while (len--) {
1727         if (*a != *b && *a != PL_fold_locale[*b])
1728             return 0;
1729         a++,b++;
1730     }
1731     return 1;
1732 }
1733
1734 /*
1735  * ex: set ts=8 sts=4 sw=4 et:
1736  */