inline.h

   1 /*    inline.h
   2  *
   3  *    Copyright (C) 2012 by Larry Wall and others
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  * This file is a home for static inline functions that cannot go in other
   9  * headers files, because they depend on proto.h (included after most other
  10  * headers) or struct definitions.
  11  *
  12  * Each section names the header file that the functions "belong" to.
  13  */
  14
  15 /* ------------------------------- av.h ------------------------------- */
  16
  17 PERL_STATIC_INLINE SSize_t
  18 S_av_top_index(pTHX_ AV *av)
  19 {
  20     PERL_ARGS_ASSERT_AV_TOP_INDEX;
  21     assert(SvTYPE(av) == SVt_PVAV);
  22
  23     return AvFILL(av);
  24 }
  25
  26 /* ------------------------------- cv.h ------------------------------- */
  27
  28 PERL_STATIC_INLINE GV *
  29 S_CvGV(pTHX_ CV *sv)
  30 {
  31     return CvNAMED(sv)
  32         ? Perl_cvgv_from_hek(aTHX_ sv)
  33         : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
  34 }
  35
  36 PERL_STATIC_INLINE I32 *
  37 S_CvDEPTHp(const CV * const sv)
  38 {
  39     assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
  40     return &((XPVCV*)SvANY(sv))->xcv_depth;
  41 }
  42
  43 /*
  44  CvPROTO returns the prototype as stored, which is not necessarily what
  45  the interpreter should be using. Specifically, the interpreter assumes
  46  that spaces have been stripped, which has been the case if the prototype
  47  was added by toke.c, but is generally not the case if it was added elsewhere.
  48  Since we can't enforce the spacelessness at assignment time, this routine
  49  provides a temporary copy at parse time with spaces removed.
  50  I<orig> is the start of the original buffer, I<len> is the length of the
  51  prototype and will be updated when this returns.
  52  */
  53
  54 #ifdef PERL_CORE
  55 PERL_STATIC_INLINE char *
  56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
  57 {
  58     SV * tmpsv;
  59     char * tmps;
  60     tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
  61     tmps = SvPVX(tmpsv);
  62     while ((*len)--) {
  63         if (!isSPACE(*orig))
  64             *tmps++ = *orig;
  65         orig++;
  66     }
  67     *tmps = '\0';
  68     *len = tmps - SvPVX(tmpsv);
  69                 return SvPVX(tmpsv);
  70 }
  71 #endif
  72
  73 /* ------------------------------- mg.h ------------------------------- */
  74
  75 #if defined(PERL_CORE) || defined(PERL_EXT)
  76 /* assumes get-magic and stringification have already occurred */
  77 PERL_STATIC_INLINE STRLEN
  78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
  79 {
  80     assert(mg->mg_type == PERL_MAGIC_regex_global);
  81     assert(mg->mg_len != -1);
  82     if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
  83         return (STRLEN)mg->mg_len;
  84     else {
  85         const STRLEN pos = (STRLEN)mg->mg_len;
  86         /* Without this check, we may read past the end of the buffer: */
  87         if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
  88         return sv_or_pv_pos_u2b(sv, s, pos, NULL);
  89     }
  90 }
  91 #endif
  92
  93 /* ------------------------------- pad.h ------------------------------ */
  94
  95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
  96 PERL_STATIC_INLINE bool
  97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
  98 {
  99     /* is seq within the range _LOW to _HIGH ?
 100      * This is complicated by the fact that PL_cop_seqmax
 101      * may have wrapped around at some point */
 102     if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
 103         return FALSE; /* not yet introduced */
 104
 105     if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
 106     /* in compiling scope */
 107         if (
 108             (seq >  COP_SEQ_RANGE_LOW(pn))
 109             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
 110             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
 111         )
 112             return TRUE;
 113     }
 114     else if (
 115         (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
 116         ?
 117             (  seq >  COP_SEQ_RANGE_LOW(pn)
 118             || seq <= COP_SEQ_RANGE_HIGH(pn))
 119
 120         :    (  seq >  COP_SEQ_RANGE_LOW(pn)
 121              && seq <= COP_SEQ_RANGE_HIGH(pn))
 122     )
 123         return TRUE;
 124     return FALSE;
 125 }
 126 #endif
 127
 128 /* ------------------------------- pp.h ------------------------------- */
 129
 130 PERL_STATIC_INLINE I32
 131 S_TOPMARK(pTHX)
 132 {
 133     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 134                                  "MARK top  %p %" IVdf "\n",
 135                                   PL_markstack_ptr,
 136                                   (IV)*PL_markstack_ptr)));
 137     return *PL_markstack_ptr;
 138 }
 139
 140 PERL_STATIC_INLINE I32
 141 S_POPMARK(pTHX)
 142 {
 143     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 144                                  "MARK pop  %p %" IVdf "\n",
 145                                   (PL_markstack_ptr-1),
 146                                   (IV)*(PL_markstack_ptr-1))));
 147     assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
 148     return *PL_markstack_ptr--;
 149 }
 150
 151 /* ----------------------------- regexp.h ----------------------------- */
 152
 153 PERL_STATIC_INLINE struct regexp *
 154 S_ReANY(const REGEXP * const re)
 155 {
 156     XPV* const p = (XPV*)SvANY(re);
 157     assert(isREGEXP(re));
 158     return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
 159                                    : (struct regexp *)p;
 160 }
 161
 162 /* ------------------------------- sv.h ------------------------------- */
 163
 164 PERL_STATIC_INLINE SV *
 165 S_SvREFCNT_inc(SV *sv)
 166 {
 167     if (LIKELY(sv != NULL))
 168         SvREFCNT(sv)++;
 169     return sv;
 170 }
 171 PERL_STATIC_INLINE SV *
 172 S_SvREFCNT_inc_NN(SV *sv)
 173 {
 174     SvREFCNT(sv)++;
 175     return sv;
 176 }
 177 PERL_STATIC_INLINE void
 178 S_SvREFCNT_inc_void(SV *sv)
 179 {
 180     if (LIKELY(sv != NULL))
 181         SvREFCNT(sv)++;
 182 }
 183 PERL_STATIC_INLINE void
 184 S_SvREFCNT_dec(pTHX_ SV *sv)
 185 {
 186     if (LIKELY(sv != NULL)) {
 187         U32 rc = SvREFCNT(sv);
 188         if (LIKELY(rc > 1))
 189             SvREFCNT(sv) = rc - 1;
 190         else
 191             Perl_sv_free2(aTHX_ sv, rc);
 192     }
 193 }
 194
 195 PERL_STATIC_INLINE void
 196 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
 197 {
 198     U32 rc = SvREFCNT(sv);
 199     if (LIKELY(rc > 1))
 200         SvREFCNT(sv) = rc - 1;
 201     else
 202         Perl_sv_free2(aTHX_ sv, rc);
 203 }
 204
 205 PERL_STATIC_INLINE void
 206 SvAMAGIC_on(SV *sv)
 207 {
 208     assert(SvROK(sv));
 209     if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
 210 }
 211 PERL_STATIC_INLINE void
 212 SvAMAGIC_off(SV *sv)
 213 {
 214     if (SvROK(sv) && SvOBJECT(SvRV(sv)))
 215         HvAMAGIC_off(SvSTASH(SvRV(sv)));
 216 }
 217
 218 PERL_STATIC_INLINE U32
 219 S_SvPADSTALE_on(SV *sv)
 220 {
 221     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 222     return SvFLAGS(sv) |= SVs_PADSTALE;
 223 }
 224 PERL_STATIC_INLINE U32
 225 S_SvPADSTALE_off(SV *sv)
 226 {
 227     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 228     return SvFLAGS(sv) &= ~SVs_PADSTALE;
 229 }
 230 #if defined(PERL_CORE) || defined (PERL_EXT)
 231 PERL_STATIC_INLINE STRLEN
 232 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
 233 {
 234     PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
 235     if (SvGAMAGIC(sv)) {
 236         U8 *hopped = utf8_hop((U8 *)pv, pos);
 237         if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
 238         return (STRLEN)(hopped - (U8 *)pv);
 239     }
 240     return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
 241 }
 242 #endif
 243
 244 /* ------------------------------- handy.h ------------------------------- */
 245
 246 /* saves machine code for a common noreturn idiom typically used in Newx*() */
 247 #ifdef GCC_DIAG_PRAGMA
 248 GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
 249 #endif
 250 static void
 251 S_croak_memory_wrap(void)
 252 {
 253     Perl_croak_nocontext("%s",PL_memory_wrap);
 254 }
 255 #ifdef GCC_DIAG_PRAGMA
 256 GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
 257 #endif
 258
 259 /* ------------------------------- utf8.h ------------------------------- */
 260
 261 /*
 262 =head1 Unicode Support
 263 */
 264
 265 PERL_STATIC_INLINE void
 266 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
 267 {
 268     /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
 269      * encoded string at '*dest', updating '*dest' to include it */
 270
 271     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
 272
 273     if (NATIVE_BYTE_IS_INVARIANT(byte))
 274         *((*dest)++) = byte;
 275     else {
 276         *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
 277         *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
 278     }
 279 }
 280
 281 /*
 282 =for apidoc valid_utf8_to_uvchr
 283 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
 284 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
 285 it passes C<L</isUTF8_CHAR>>.  Surrogates, non-character code points, and
 286 non-Unicode code points are allowed.
 287
 288 =cut
 289
 290  */
 291
 292 PERL_STATIC_INLINE UV
 293 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
 294 {
 295     const UV expectlen = UTF8SKIP(s);
 296     const U8* send = s + expectlen;
 297     UV uv = *s;
 298
 299     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
 300
 301     if (retlen) {
 302         *retlen = expectlen;
 303     }
 304
 305     /* An invariant is trivially returned */
 306     if (expectlen == 1) {
 307         return uv;
 308     }
 309
 310     /* Remove the leading bits that indicate the number of bytes, leaving just
 311      * the bits that are part of the value */
 312     uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
 313
 314     /* Now, loop through the remaining bytes, accumulating each into the
 315      * working total as we go.  (I khw tried unrolling the loop for up to 4
 316      * bytes, but there was no performance improvement) */
 317     for (++s; s < send; s++) {
 318         uv = UTF8_ACCUMULATE(uv, *s);
 319     }
 320
 321     return UNI_TO_NATIVE(uv);
 322
 323 }
 324
 325 /*
 326 =for apidoc is_utf8_invariant_string
 327
 328 Returns TRUE if the first C<len> bytes of the string C<s> are the same
 329 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
 330 EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
 331 are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
 332 the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
 333 characters are invariant, but so also are the C1 controls.
 334
 335 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
 336 use this option, that C<s> can't have embedded C<NUL> characters and has to
 337 have a terminating C<NUL> byte).
 338
 339 See also
 340 C<L</is_utf8_string>>,
 341 C<L</is_utf8_string_flags>>,
 342 C<L</is_utf8_string_loc>>,
 343 C<L</is_utf8_string_loc_flags>>,
 344 C<L</is_utf8_string_loclen>>,
 345 C<L</is_utf8_string_loclen_flags>>,
 346 C<L</is_utf8_fixed_width_buf_flags>>,
 347 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 348 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 349 C<L</is_strict_utf8_string>>,
 350 C<L</is_strict_utf8_string_loc>>,
 351 C<L</is_strict_utf8_string_loclen>>,
 352 C<L</is_c9strict_utf8_string>>,
 353 C<L</is_c9strict_utf8_string_loc>>,
 354 and
 355 C<L</is_c9strict_utf8_string_loclen>>.
 356
 357 =cut
 358
 359 */
 360
 361 #define is_utf8_invariant_string(s, len)                                    \
 362                                 is_utf8_invariant_string_loc(s, len, NULL)
 363
 364 /*
 365 =for apidoc is_utf8_invariant_string_loc
 366
 367 Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
 368 the first UTF-8 variant character in the C<ep> pointer; if all characters are
 369 UTF-8 invariant, this function does not change the contents of C<*ep>.
 370
 371 =cut
 372
 373 XXX On ASCII machines this could be sped up by doing word-at-a-time operations
 374
 375 */
 376
 377 PERL_STATIC_INLINE bool
 378 S_is_utf8_invariant_string_loc(const U8* const s, const STRLEN len, const U8 ** ep)
 379 {
 380     const U8* const send = s + (len ? len : strlen((const char *)s));
 381     const U8* x = s;
 382
 383     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 384
 385     while (x < send) {
 386         if (UTF8_IS_INVARIANT(*x)) {
 387             x++;
 388             continue;
 389         }
 390
 391         if (ep) {
 392             *ep = x;
 393         }
 394
 395         return FALSE;
 396     }
 397
 398     return TRUE;
 399 }
 400
 401 /*
 402 =for apidoc is_utf8_string
 403
 404 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 405 Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
 406 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
 407 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 408 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 409
 410 This function considers Perl's extended UTF-8 to be valid.  That means that
 411 code points above Unicode, surrogates, and non-character code points are
 412 considered valid by this function.  Use C<L</is_strict_utf8_string>>,
 413 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
 414 code points are considered valid.
 415
 416 See also
 417 C<L</is_utf8_invariant_string>>,
 418 C<L</is_utf8_invariant_string_loc>>,
 419 C<L</is_utf8_string_loc>>,
 420 C<L</is_utf8_string_loclen>>,
 421 C<L</is_utf8_fixed_width_buf_flags>>,
 422 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 423 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 424
 425 =cut
 426 */
 427
 428 PERL_STATIC_INLINE bool
 429 Perl_is_utf8_string(const U8 *s, const STRLEN len)
 430 {
 431     /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
 432      * Be aware of possible changes to that */
 433
 434     const U8* const send = s + (len ? len : strlen((const char *)s));
 435     const U8* x = s;
 436
 437     PERL_ARGS_ASSERT_IS_UTF8_STRING;
 438
 439     while (x < send) {
 440         const STRLEN cur_len = isUTF8_CHAR(x, send);
 441         if (UNLIKELY(! cur_len)) {
 442             return FALSE;
 443         }
 444         x += cur_len;
 445     }
 446
 447     return TRUE;
 448 }
 449
 450 /*
 451 =for apidoc is_strict_utf8_string
 452
 453 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 454 UTF-8-encoded string that is fully interchangeable by any application using
 455 Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
 456 calculated using C<strlen(s)> (which means if you use this option, that C<s>
 457 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 458 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 459
 460 This function returns FALSE for strings containing any
 461 code points above the Unicode max of 0x10FFFF, surrogate code points, or
 462 non-character code points.
 463
 464 See also
 465 C<L</is_utf8_invariant_string>>,
 466 C<L</is_utf8_invariant_string_loc>>,
 467 C<L</is_utf8_string>>,
 468 C<L</is_utf8_string_flags>>,
 469 C<L</is_utf8_string_loc>>,
 470 C<L</is_utf8_string_loc_flags>>,
 471 C<L</is_utf8_string_loclen>>,
 472 C<L</is_utf8_string_loclen_flags>>,
 473 C<L</is_utf8_fixed_width_buf_flags>>,
 474 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 475 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 476 C<L</is_strict_utf8_string_loc>>,
 477 C<L</is_strict_utf8_string_loclen>>,
 478 C<L</is_c9strict_utf8_string>>,
 479 C<L</is_c9strict_utf8_string_loc>>,
 480 and
 481 C<L</is_c9strict_utf8_string_loclen>>.
 482
 483 =cut
 484 */
 485
 486 PERL_STATIC_INLINE bool
 487 S_is_strict_utf8_string(const U8 *s, const STRLEN len)
 488 {
 489     const U8* const send = s + (len ? len : strlen((const char *)s));
 490     const U8* x = s;
 491
 492     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
 493
 494     while (x < send) {
 495         const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
 496         if (UNLIKELY(! cur_len)) {
 497             return FALSE;
 498         }
 499         x += cur_len;
 500     }
 501
 502     return TRUE;
 503 }
 504
 505 /*
 506 =for apidoc is_c9strict_utf8_string
 507
 508 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 509 UTF-8-encoded string that conforms to
 510 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
 511 otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
 512 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
 513 C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
 514 characters being ASCII constitute 'a valid UTF-8 string'.
 515
 516 This function returns FALSE for strings containing any code points above the
 517 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
 518 code points per
 519 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
 520
 521 See also
 522 C<L</is_utf8_invariant_string>>,
 523 C<L</is_utf8_invariant_string_loc>>,
 524 C<L</is_utf8_string>>,
 525 C<L</is_utf8_string_flags>>,
 526 C<L</is_utf8_string_loc>>,
 527 C<L</is_utf8_string_loc_flags>>,
 528 C<L</is_utf8_string_loclen>>,
 529 C<L</is_utf8_string_loclen_flags>>,
 530 C<L</is_utf8_fixed_width_buf_flags>>,
 531 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 532 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 533 C<L</is_strict_utf8_string>>,
 534 C<L</is_strict_utf8_string_loc>>,
 535 C<L</is_strict_utf8_string_loclen>>,
 536 C<L</is_c9strict_utf8_string_loc>>,
 537 and
 538 C<L</is_c9strict_utf8_string_loclen>>.
 539
 540 =cut
 541 */
 542
 543 PERL_STATIC_INLINE bool
 544 S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
 545 {
 546     const U8* const send = s + (len ? len : strlen((const char *)s));
 547     const U8* x = s;
 548
 549     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
 550
 551     while (x < send) {
 552         const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
 553         if (UNLIKELY(! cur_len)) {
 554             return FALSE;
 555         }
 556         x += cur_len;
 557     }
 558
 559     return TRUE;
 560 }
 561
 562 /* The above 3 functions could have been moved into the more general one just
 563  * below, and made #defines that call it with the right 'flags'.  They are
 564  * currently kept separate to increase their chances of getting inlined */
 565
 566 /*
 567 =for apidoc is_utf8_string_flags
 568
 569 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 570 UTF-8 string, subject to the restrictions imposed by C<flags>;
 571 returns FALSE otherwise.  If C<len> is 0, it will be calculated
 572 using C<strlen(s)> (which means if you use this option, that C<s> can't have
 573 embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
 574 that all characters being ASCII constitute 'a valid UTF-8 string'.
 575
 576 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
 577 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
 578 as C<L</is_strict_utf8_string>>; and if C<flags> is
 579 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
 580 C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
 581 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
 582 C<L</utf8n_to_uvchr>>, with the same meanings.
 583
 584 See also
 585 C<L</is_utf8_invariant_string>>,
 586 C<L</is_utf8_invariant_string_loc>>,
 587 C<L</is_utf8_string>>,
 588 C<L</is_utf8_string_loc>>,
 589 C<L</is_utf8_string_loc_flags>>,
 590 C<L</is_utf8_string_loclen>>,
 591 C<L</is_utf8_string_loclen_flags>>,
 592 C<L</is_utf8_fixed_width_buf_flags>>,
 593 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 594 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 595 C<L</is_strict_utf8_string>>,
 596 C<L</is_strict_utf8_string_loc>>,
 597 C<L</is_strict_utf8_string_loclen>>,
 598 C<L</is_c9strict_utf8_string>>,
 599 C<L</is_c9strict_utf8_string_loc>>,
 600 and
 601 C<L</is_c9strict_utf8_string_loclen>>.
 602
 603 =cut
 604 */
 605
 606 PERL_STATIC_INLINE bool
 607 S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
 608 {
 609     const U8* const send = s + (len ? len : strlen((const char *)s));
 610     const U8* x = s;
 611
 612     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
 613     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 614                           |UTF8_DISALLOW_PERL_EXTENDED)));
 615
 616     if (flags == 0) {
 617         return is_utf8_string(s, len);
 618     }
 619
 620     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 621                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 622     {
 623         return is_strict_utf8_string(s, len);
 624     }
 625
 626     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 627                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 628     {
 629         return is_c9strict_utf8_string(s, len);
 630     }
 631
 632     while (x < send) {
 633         STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 634         if (UNLIKELY(! cur_len)) {
 635             return FALSE;
 636         }
 637         x += cur_len;
 638     }
 639
 640     return TRUE;
 641 }
 642
 643 /*
 644
 645 =for apidoc is_utf8_string_loc
 646
 647 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 648 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 649 "utf8ness success") in the C<ep> pointer.
 650
 651 See also C<L</is_utf8_string_loclen>>.
 652
 653 =cut
 654 */
 655
 656 #define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
 657
 658 /*
 659
 660 =for apidoc is_utf8_string_loclen
 661
 662 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 663 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 664 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 665 encoded characters in the C<el> pointer.
 666
 667 See also C<L</is_utf8_string_loc>>.
 668
 669 =cut
 670 */
 671
 672 PERL_STATIC_INLINE bool
 673 Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 674 {
 675     const U8* const send = s + (len ? len : strlen((const char *)s));
 676     const U8* x = s;
 677     STRLEN outlen = 0;
 678
 679     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
 680
 681     while (x < send) {
 682         const STRLEN cur_len = isUTF8_CHAR(x, send);
 683         if (UNLIKELY(! cur_len)) {
 684             break;
 685         }
 686         x += cur_len;
 687         outlen++;
 688     }
 689
 690     if (el)
 691         *el = outlen;
 692
 693     if (ep) {
 694         *ep = x;
 695     }
 696
 697     return (x == send);
 698 }
 699
 700 /*
 701
 702 =for apidoc is_strict_utf8_string_loc
 703
 704 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 705 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 706 "utf8ness success") in the C<ep> pointer.
 707
 708 See also C<L</is_strict_utf8_string_loclen>>.
 709
 710 =cut
 711 */
 712
 713 #define is_strict_utf8_string_loc(s, len, ep)                               \
 714                                 is_strict_utf8_string_loclen(s, len, ep, 0)
 715
 716 /*
 717
 718 =for apidoc is_strict_utf8_string_loclen
 719
 720 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 721 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 722 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 723 encoded characters in the C<el> pointer.
 724
 725 See also C<L</is_strict_utf8_string_loc>>.
 726
 727 =cut
 728 */
 729
 730 PERL_STATIC_INLINE bool
 731 S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 732 {
 733     const U8* const send = s + (len ? len : strlen((const char *)s));
 734     const U8* x = s;
 735     STRLEN outlen = 0;
 736
 737     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
 738
 739     while (x < send) {
 740         const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
 741         if (UNLIKELY(! cur_len)) {
 742             break;
 743         }
 744         x += cur_len;
 745         outlen++;
 746     }
 747
 748     if (el)
 749         *el = outlen;
 750
 751     if (ep) {
 752         *ep = x;
 753     }
 754
 755     return (x == send);
 756 }
 757
 758 /*
 759
 760 =for apidoc is_c9strict_utf8_string_loc
 761
 762 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 763 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 764 "utf8ness success") in the C<ep> pointer.
 765
 766 See also C<L</is_c9strict_utf8_string_loclen>>.
 767
 768 =cut
 769 */
 770
 771 #define is_c9strict_utf8_string_loc(s, len, ep)                             \
 772                             is_c9strict_utf8_string_loclen(s, len, ep, 0)
 773
 774 /*
 775
 776 =for apidoc is_c9strict_utf8_string_loclen
 777
 778 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 779 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 780 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
 781 characters in the C<el> pointer.
 782
 783 See also C<L</is_c9strict_utf8_string_loc>>.
 784
 785 =cut
 786 */
 787
 788 PERL_STATIC_INLINE bool
 789 S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 790 {
 791     const U8* const send = s + (len ? len : strlen((const char *)s));
 792     const U8* x = s;
 793     STRLEN outlen = 0;
 794
 795     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
 796
 797     while (x < send) {
 798         const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
 799         if (UNLIKELY(! cur_len)) {
 800             break;
 801         }
 802         x += cur_len;
 803         outlen++;
 804     }
 805
 806     if (el)
 807         *el = outlen;
 808
 809     if (ep) {
 810         *ep = x;
 811     }
 812
 813     return (x == send);
 814 }
 815
 816 /*
 817
 818 =for apidoc is_utf8_string_loc_flags
 819
 820 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
 821 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 822 "utf8ness success") in the C<ep> pointer.
 823
 824 See also C<L</is_utf8_string_loclen_flags>>.
 825
 826 =cut
 827 */
 828
 829 #define is_utf8_string_loc_flags(s, len, ep, flags)                         \
 830                         is_utf8_string_loclen_flags(s, len, ep, 0, flags)
 831
 832
 833 /* The above 3 actual functions could have been moved into the more general one
 834  * just below, and made #defines that call it with the right 'flags'.  They are
 835  * currently kept separate to increase their chances of getting inlined */
 836
 837 /*
 838
 839 =for apidoc is_utf8_string_loclen_flags
 840
 841 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
 842 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 843 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 844 encoded characters in the C<el> pointer.
 845
 846 See also C<L</is_utf8_string_loc_flags>>.
 847
 848 =cut
 849 */
 850
 851 PERL_STATIC_INLINE bool
 852 S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
 853 {
 854     const U8* const send = s + (len ? len : strlen((const char *)s));
 855     const U8* x = s;
 856     STRLEN outlen = 0;
 857
 858     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
 859     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 860                           |UTF8_DISALLOW_PERL_EXTENDED)));
 861
 862     if (flags == 0) {
 863         return is_utf8_string_loclen(s, len, ep, el);
 864     }
 865
 866     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 867                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 868     {
 869         return is_strict_utf8_string_loclen(s, len, ep, el);
 870     }
 871
 872     if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
 873                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 874     {
 875         return is_c9strict_utf8_string_loclen(s, len, ep, el);
 876     }
 877
 878     while (x < send) {
 879         const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 880         if (UNLIKELY(! cur_len)) {
 881             break;
 882         }
 883         x += cur_len;
 884         outlen++;
 885     }
 886
 887     if (el)
 888         *el = outlen;
 889
 890     if (ep) {
 891         *ep = x;
 892     }
 893
 894     return (x == send);
 895 }
 896
 897 /*
 898 =for apidoc utf8_distance
 899
 900 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
 901 and C<b>.
 902
 903 WARNING: use only if you *know* that the pointers point inside the
 904 same UTF-8 buffer.
 905
 906 =cut
 907 */
 908
 909 PERL_STATIC_INLINE IV
 910 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
 911 {
 912     PERL_ARGS_ASSERT_UTF8_DISTANCE;
 913
 914     return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
 915 }
 916
 917 /*
 918 =for apidoc utf8_hop
 919
 920 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
 921 forward or backward.
 922
 923 WARNING: do not use the following unless you *know* C<off> is within
 924 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
 925 on the first byte of character or just after the last byte of a character.
 926
 927 =cut
 928 */
 929
 930 PERL_STATIC_INLINE U8 *
 931 Perl_utf8_hop(const U8 *s, SSize_t off)
 932 {
 933     PERL_ARGS_ASSERT_UTF8_HOP;
 934
 935     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
 936      * the bitops (especially ~) can create illegal UTF-8.
 937      * In other words: in Perl UTF-8 is not just for Unicode. */
 938
 939     if (off >= 0) {
 940         while (off--)
 941             s += UTF8SKIP(s);
 942     }
 943     else {
 944         while (off++) {
 945             s--;
 946             while (UTF8_IS_CONTINUATION(*s))
 947                 s--;
 948         }
 949     }
 950     GCC_DIAG_IGNORE(-Wcast-qual);
 951     return (U8 *)s;
 952     GCC_DIAG_RESTORE;
 953 }
 954
 955 /*
 956 =for apidoc utf8_hop_forward
 957
 958 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
 959 forward.
 960
 961 C<off> must be non-negative.
 962
 963 C<s> must be before or equal to C<end>.
 964
 965 When moving forward it will not move beyond C<end>.
 966
 967 Will not exceed this limit even if the string is not valid "UTF-8".
 968
 969 =cut
 970 */
 971
 972 PERL_STATIC_INLINE U8 *
 973 Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
 974 {
 975     PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
 976
 977     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
 978      * the bitops (especially ~) can create illegal UTF-8.
 979      * In other words: in Perl UTF-8 is not just for Unicode. */
 980
 981     assert(s <= end);
 982     assert(off >= 0);
 983
 984     while (off--) {
 985         STRLEN skip = UTF8SKIP(s);
 986         if ((STRLEN)(end - s) <= skip) {
 987             GCC_DIAG_IGNORE(-Wcast-qual);
 988             return (U8 *)end;
 989             GCC_DIAG_RESTORE;
 990         }
 991         s += skip;
 992     }
 993
 994     GCC_DIAG_IGNORE(-Wcast-qual);
 995     return (U8 *)s;
 996     GCC_DIAG_RESTORE;
 997 }
 998
 999 /*
1000 =for apidoc utf8_hop_back
1001
1002 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1003 backward.
1004
1005 C<off> must be non-positive.
1006
1007 C<s> must be after or equal to C<start>.
1008
1009 When moving backward it will not move before C<start>.
1010
1011 Will not exceed this limit even if the string is not valid "UTF-8".
1012
1013 =cut
1014 */
1015
1016 PERL_STATIC_INLINE U8 *
1017 Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1018 {
1019     PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1020
1021     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1022      * the bitops (especially ~) can create illegal UTF-8.
1023      * In other words: in Perl UTF-8 is not just for Unicode. */
1024
1025     assert(start <= s);
1026     assert(off <= 0);
1027
1028     while (off++ && s > start) {
1029         s--;
1030         while (UTF8_IS_CONTINUATION(*s) && s > start)
1031             s--;
1032     }
1033
1034     GCC_DIAG_IGNORE(-Wcast-qual);
1035     return (U8 *)s;
1036     GCC_DIAG_RESTORE;
1037 }
1038
1039 /*
1040 =for apidoc utf8_hop_safe
1041
1042 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1043 either forward or backward.
1044
1045 When moving backward it will not move before C<start>.
1046
1047 When moving forward it will not move beyond C<end>.
1048
1049 Will not exceed those limits even if the string is not valid "UTF-8".
1050
1051 =cut
1052 */
1053
1054 PERL_STATIC_INLINE U8 *
1055 Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1056 {
1057     PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1058
1059     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1060      * the bitops (especially ~) can create illegal UTF-8.
1061      * In other words: in Perl UTF-8 is not just for Unicode. */
1062
1063     assert(start <= s && s <= end);
1064
1065     if (off >= 0) {
1066         return utf8_hop_forward(s, off, end);
1067     }
1068     else {
1069         return utf8_hop_back(s, off, start);
1070     }
1071 }
1072
1073 /*
1074
1075 =for apidoc is_utf8_valid_partial_char
1076
1077 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1078 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1079 points.  Otherwise, it returns 1 if there exists at least one non-empty
1080 sequence of bytes that when appended to sequence C<s>, starting at position
1081 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1082 otherwise returns 0.
1083
1084 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1085 point.
1086
1087 This is useful when a fixed-length buffer is being tested for being well-formed
1088 UTF-8, but the final few bytes in it don't comprise a full character; that is,
1089 it is split somewhere in the middle of the final code point's UTF-8
1090 representation.  (Presumably when the buffer is refreshed with the next chunk
1091 of data, the new first bytes will complete the partial code point.)   This
1092 function is used to verify that the final bytes in the current buffer are in
1093 fact the legal beginning of some code point, so that if they aren't, the
1094 failure can be signalled without having to wait for the next read.
1095
1096 =cut
1097 */
1098 #define is_utf8_valid_partial_char(s, e)                                    \
1099                                 is_utf8_valid_partial_char_flags(s, e, 0)
1100
1101 /*
1102
1103 =for apidoc is_utf8_valid_partial_char_flags
1104
1105 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1106 or not the input is a valid UTF-8 encoded partial character, but it takes an
1107 extra parameter, C<flags>, which can further restrict which code points are
1108 considered valid.
1109
1110 If C<flags> is 0, this behaves identically to
1111 C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
1112 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
1113 there is any sequence of bytes that can complete the input partial character in
1114 such a way that a non-prohibited character is formed, the function returns
1115 TRUE; otherwise FALSE.  Non character code points cannot be determined based on
1116 partial character input.  But many  of the other possible excluded types can be
1117 determined from just the first one or two bytes.
1118
1119 =cut
1120  */
1121
1122 PERL_STATIC_INLINE bool
1123 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1124 {
1125     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1126
1127     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1128                           |UTF8_DISALLOW_PERL_EXTENDED)));
1129
1130     if (s >= e || s + UTF8SKIP(s) <= e) {
1131         return FALSE;
1132     }
1133
1134     return cBOOL(_is_utf8_char_helper(s, e, flags));
1135 }
1136
1137 /*
1138
1139 =for apidoc is_utf8_fixed_width_buf_flags
1140
1141 Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1142 is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1143 otherwise it returns FALSE.
1144
1145 If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1146 without restriction.  If the final few bytes of the buffer do not form a
1147 complete code point, this will return TRUE anyway, provided that
1148 C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1149
1150 If C<flags> in non-zero, it can be any combination of the
1151 C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1152 same meanings.
1153
1154 This function differs from C<L</is_utf8_string_flags>> only in that the latter
1155 returns FALSE if the final few bytes of the string don't form a complete code
1156 point.
1157
1158 =cut
1159  */
1160 #define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
1161                 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1162
1163 /*
1164
1165 =for apidoc is_utf8_fixed_width_buf_loc_flags
1166
1167 Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1168 failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
1169 to the beginning of any partial character at the end of the buffer; if there is
1170 no partial character C<*ep> will contain C<s>+C<len>.
1171
1172 See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1173
1174 =cut
1175 */
1176
1177 #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
1178                 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1179
1180 /*
1181
1182 =for apidoc is_utf8_fixed_width_buf_loclen_flags
1183
1184 Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1185 complete, valid characters found in the C<el> pointer.
1186
1187 =cut
1188 */
1189
1190 PERL_STATIC_INLINE bool
1191 S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1192                                        const STRLEN len,
1193                                        const U8 **ep,
1194                                        STRLEN *el,
1195                                        const U32 flags)
1196 {
1197     const U8 * maybe_partial;
1198
1199     PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1200
1201     if (! ep) {
1202         ep  = &maybe_partial;
1203     }
1204
1205     /* If it's entirely valid, return that; otherwise see if the only error is
1206      * that the final few bytes are for a partial character */
1207     return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
1208            || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1209 }
1210
1211 /* ------------------------------- perl.h ----------------------------- */
1212
1213 /*
1214 =head1 Miscellaneous Functions
1215
1216 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1217
1218 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1219 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1220
1221 Return TRUE if the name is safe.
1222
1223 Used by the C<IS_SAFE_SYSCALL()> macro.
1224
1225 =cut
1226 */
1227
1228 PERL_STATIC_INLINE bool
1229 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1230     /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1231      * perl itself uses xce*() functions which accept 8-bit strings.
1232      */
1233
1234     PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1235
1236     if (len > 1) {
1237         char *null_at;
1238         if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1239                 SETERRNO(ENOENT, LIB_INVARG);
1240                 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1241                                    "Invalid \\0 character in %s for %s: %s\\0%s",
1242                                    what, op_name, pv, null_at+1);
1243                 return FALSE;
1244         }
1245     }
1246
1247     return TRUE;
1248 }
1249
1250 /*
1251
1252 Return true if the supplied filename has a newline character
1253 immediately before the first (hopefully only) NUL.
1254
1255 My original look at this incorrectly used the len from SvPV(), but
1256 that's incorrect, since we allow for a NUL in pv[len-1].
1257
1258 So instead, strlen() and work from there.
1259
1260 This allow for the user reading a filename, forgetting to chomp it,
1261 then calling:
1262
1263   open my $foo, "$file\0";
1264
1265 */
1266
1267 #ifdef PERL_CORE
1268
1269 PERL_STATIC_INLINE bool
1270 S_should_warn_nl(const char *pv) {
1271     STRLEN len;
1272
1273     PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1274
1275     len = strlen(pv);
1276
1277     return len > 0 && pv[len-1] == '\n';
1278 }
1279
1280 #endif
1281
1282 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1283
1284 #define MAX_CHARSET_NAME_LENGTH 2
1285
1286 PERL_STATIC_INLINE const char *
1287 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1288 {
1289     /* Returns a string that corresponds to the name of the regex character set
1290      * given by 'flags', and *lenp is set the length of that string, which
1291      * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1292
1293     *lenp = 1;
1294     switch (get_regex_charset(flags)) {
1295         case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1296         case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
1297         case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1298         case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1299         case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1300             *lenp = 2;
1301             return ASCII_MORE_RESTRICT_PAT_MODS;
1302     }
1303     /* The NOT_REACHED; hides an assert() which has a rather complex
1304      * definition in perl.h. */
1305     NOT_REACHED; /* NOTREACHED */
1306     return "?";     /* Unknown */
1307 }
1308
1309 /*
1310
1311 Return false if any get magic is on the SV other than taint magic.
1312
1313 */
1314
1315 PERL_STATIC_INLINE bool
1316 S_sv_only_taint_gmagic(SV *sv) {
1317     MAGIC *mg = SvMAGIC(sv);
1318
1319     PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1320
1321     while (mg) {
1322         if (mg->mg_type != PERL_MAGIC_taint
1323             && !(mg->mg_flags & MGf_GSKIP)
1324             && mg->mg_virtual->svt_get) {
1325             return FALSE;
1326         }
1327         mg = mg->mg_moremagic;
1328     }
1329
1330     return TRUE;
1331 }
1332
1333 /* ------------------ cop.h ------------------------------------------- */
1334
1335
1336 /* Enter a block. Push a new base context and return its address. */
1337
1338 PERL_STATIC_INLINE PERL_CONTEXT *
1339 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1340 {
1341     PERL_CONTEXT * cx;
1342
1343     PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1344
1345     CXINC;
1346     cx = CX_CUR();
1347     cx->cx_type        = type;
1348     cx->blk_gimme      = gimme;
1349     cx->blk_oldsaveix  = saveix;
1350     cx->blk_oldsp      = (I32)(sp - PL_stack_base);
1351     cx->blk_oldcop     = PL_curcop;
1352     cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
1353     cx->blk_oldscopesp = PL_scopestack_ix;
1354     cx->blk_oldpm      = PL_curpm;
1355     cx->blk_old_tmpsfloor = PL_tmps_floor;
1356
1357     PL_tmps_floor        = PL_tmps_ix;
1358     CX_DEBUG(cx, "PUSH");
1359     return cx;
1360 }
1361
1362
1363 /* Exit a block (RETURN and LAST). */
1364
1365 PERL_STATIC_INLINE void
1366 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1367 {
1368     PERL_ARGS_ASSERT_CX_POPBLOCK;
1369
1370     CX_DEBUG(cx, "POP");
1371     /* these 3 are common to cx_popblock and cx_topblock */
1372     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1373     PL_scopestack_ix = cx->blk_oldscopesp;
1374     PL_curpm         = cx->blk_oldpm;
1375
1376     /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1377      * and leaves a CX entry lying around for repeated use, so
1378      * skip for multicall */                  \
1379     assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1380             || PL_savestack_ix == cx->blk_oldsaveix);
1381     PL_curcop     = cx->blk_oldcop;
1382     PL_tmps_floor = cx->blk_old_tmpsfloor;
1383 }
1384
1385 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1386  * Whereas cx_popblock() restores the state to the point just before
1387  * cx_pushblock() was called,  cx_topblock() restores it to the point just
1388  * *after* cx_pushblock() was called. */
1389
1390 PERL_STATIC_INLINE void
1391 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1392 {
1393     PERL_ARGS_ASSERT_CX_TOPBLOCK;
1394
1395     CX_DEBUG(cx, "TOP");
1396     /* these 3 are common to cx_popblock and cx_topblock */
1397     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1398     PL_scopestack_ix = cx->blk_oldscopesp;
1399     PL_curpm         = cx->blk_oldpm;
1400
1401     PL_stack_sp      = PL_stack_base + cx->blk_oldsp;
1402 }
1403
1404
1405 PERL_STATIC_INLINE void
1406 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1407 {
1408     U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1409
1410     PERL_ARGS_ASSERT_CX_PUSHSUB;
1411
1412     PERL_DTRACE_PROBE_ENTRY(cv);
1413     cx->blk_sub.cv = cv;
1414     cx->blk_sub.olddepth = CvDEPTH(cv);
1415     cx->blk_sub.prevcomppad = PL_comppad;
1416     cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1417     cx->blk_sub.retop = retop;
1418     SvREFCNT_inc_simple_void_NN(cv);
1419     cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1420 }
1421
1422
1423 /* subsets of cx_popsub() */
1424
1425 PERL_STATIC_INLINE void
1426 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1427 {
1428     CV *cv;
1429
1430     PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1431     assert(CxTYPE(cx) == CXt_SUB);
1432
1433     PL_comppad = cx->blk_sub.prevcomppad;
1434     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1435     cv = cx->blk_sub.cv;
1436     CvDEPTH(cv) = cx->blk_sub.olddepth;
1437     cx->blk_sub.cv = NULL;
1438     SvREFCNT_dec(cv);
1439 }
1440
1441
1442 /* handle the @_ part of leaving a sub */
1443
1444 PERL_STATIC_INLINE void
1445 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1446 {
1447     AV *av;
1448
1449     PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1450     assert(CxTYPE(cx) == CXt_SUB);
1451     assert(AvARRAY(MUTABLE_AV(
1452         PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1453                 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1454
1455     CX_POP_SAVEARRAY(cx);
1456     av = MUTABLE_AV(PAD_SVl(0));
1457     if (UNLIKELY(AvREAL(av)))
1458         /* abandon @_ if it got reified */
1459         clear_defarray(av, 0);
1460     else {
1461         CLEAR_ARGARRAY(av);
1462     }
1463 }
1464
1465
1466 PERL_STATIC_INLINE void
1467 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1468 {
1469     PERL_ARGS_ASSERT_CX_POPSUB;
1470     assert(CxTYPE(cx) == CXt_SUB);
1471
1472     PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1473
1474     if (CxHASARGS(cx))
1475         cx_popsub_args(cx);
1476     cx_popsub_common(cx);
1477 }
1478
1479
1480 PERL_STATIC_INLINE void
1481 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1482 {
1483     PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1484
1485     cx->blk_format.cv          = cv;
1486     cx->blk_format.retop       = retop;
1487     cx->blk_format.gv          = gv;
1488     cx->blk_format.dfoutgv     = PL_defoutgv;
1489     cx->blk_format.prevcomppad = PL_comppad;
1490     cx->blk_u16                = 0;
1491
1492     SvREFCNT_inc_simple_void_NN(cv);
1493     CvDEPTH(cv)++;
1494     SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1495 }
1496
1497
1498 PERL_STATIC_INLINE void
1499 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1500 {
1501     CV *cv;
1502     GV *dfout;
1503
1504     PERL_ARGS_ASSERT_CX_POPFORMAT;
1505     assert(CxTYPE(cx) == CXt_FORMAT);
1506
1507     dfout = cx->blk_format.dfoutgv;
1508     setdefout(dfout);
1509     cx->blk_format.dfoutgv = NULL;
1510     SvREFCNT_dec_NN(dfout);
1511
1512     PL_comppad = cx->blk_format.prevcomppad;
1513     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1514     cv = cx->blk_format.cv;
1515     cx->blk_format.cv = NULL;
1516     --CvDEPTH(cv);
1517     SvREFCNT_dec_NN(cv);
1518 }
1519
1520
1521 PERL_STATIC_INLINE void
1522 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1523 {
1524     PERL_ARGS_ASSERT_CX_PUSHEVAL;
1525
1526     cx->blk_eval.retop         = retop;
1527     cx->blk_eval.old_namesv    = namesv;
1528     cx->blk_eval.old_eval_root = PL_eval_root;
1529     cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
1530     cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
1531     cx->blk_eval.cur_top_env   = PL_top_env;
1532
1533     assert(!(PL_in_eval     & ~ 0x3F));
1534     assert(!(PL_op->op_type & ~0x1FF));
1535     cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1536 }
1537
1538
1539 PERL_STATIC_INLINE void
1540 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1541 {
1542     SV *sv;
1543
1544     PERL_ARGS_ASSERT_CX_POPEVAL;
1545     assert(CxTYPE(cx) == CXt_EVAL);
1546
1547     PL_in_eval = CxOLD_IN_EVAL(cx);
1548     assert(!(PL_in_eval & 0xc0));
1549     PL_eval_root = cx->blk_eval.old_eval_root;
1550     sv = cx->blk_eval.cur_text;
1551     if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1552         cx->blk_eval.cur_text = NULL;
1553         SvREFCNT_dec_NN(sv);
1554     }
1555
1556     sv = cx->blk_eval.old_namesv;
1557     if (sv) {
1558         cx->blk_eval.old_namesv = NULL;
1559         SvREFCNT_dec_NN(sv);
1560     }
1561 }
1562
1563
1564 /* push a plain loop, i.e.
1565  *     { block }
1566  *     while (cond) { block }
1567  *     for (init;cond;continue) { block }
1568  * This loop can be last/redo'ed etc.
1569  */
1570
1571 PERL_STATIC_INLINE void
1572 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1573 {
1574     PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1575     cx->blk_loop.my_op = cLOOP;
1576 }
1577
1578
1579 /* push a true for loop, i.e.
1580  *     for var (list) { block }
1581  */
1582
1583 PERL_STATIC_INLINE void
1584 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1585 {
1586     PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1587
1588     /* this one line is common with cx_pushloop_plain */
1589     cx->blk_loop.my_op = cLOOP;
1590
1591     cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1592     cx->blk_loop.itersave      = itersave;
1593 #ifdef USE_ITHREADS
1594     cx->blk_loop.oldcomppad = PL_comppad;
1595 #endif
1596 }
1597
1598
1599 /* pop all loop types, including plain */
1600
1601 PERL_STATIC_INLINE void
1602 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1603 {
1604     PERL_ARGS_ASSERT_CX_POPLOOP;
1605
1606     assert(CxTYPE_is_LOOP(cx));
1607     if (  CxTYPE(cx) == CXt_LOOP_ARY
1608        || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1609     {
1610         /* Free ary or cur. This assumes that state_u.ary.ary
1611          * aligns with state_u.lazysv.cur. See cx_dup() */
1612         SV *sv = cx->blk_loop.state_u.lazysv.cur;
1613         cx->blk_loop.state_u.lazysv.cur = NULL;
1614         SvREFCNT_dec_NN(sv);
1615         if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1616             sv = cx->blk_loop.state_u.lazysv.end;
1617             cx->blk_loop.state_u.lazysv.end = NULL;
1618             SvREFCNT_dec_NN(sv);
1619         }
1620     }
1621     if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1622         SV *cursv;
1623         SV **svp = (cx)->blk_loop.itervar_u.svp;
1624         if ((cx->cx_type & CXp_FOR_GV))
1625             svp = &GvSV((GV*)svp);
1626         cursv = *svp;
1627         *svp = cx->blk_loop.itersave;
1628         cx->blk_loop.itersave = NULL;
1629         SvREFCNT_dec(cursv);
1630     }
1631 }
1632
1633
1634 PERL_STATIC_INLINE void
1635 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1636 {
1637     PERL_ARGS_ASSERT_CX_PUSHWHEN;
1638
1639     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1640 }
1641
1642
1643 PERL_STATIC_INLINE void
1644 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1645 {
1646     PERL_ARGS_ASSERT_CX_POPWHEN;
1647     assert(CxTYPE(cx) == CXt_WHEN);
1648
1649     PERL_UNUSED_ARG(cx);
1650     PERL_UNUSED_CONTEXT;
1651     /* currently NOOP */
1652 }
1653
1654
1655 PERL_STATIC_INLINE void
1656 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1657 {
1658     PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1659
1660     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1661     cx->blk_givwhen.defsv_save = orig_defsv;
1662 }
1663
1664
1665 PERL_STATIC_INLINE void
1666 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1667 {
1668     SV *sv;
1669
1670     PERL_ARGS_ASSERT_CX_POPGIVEN;
1671     assert(CxTYPE(cx) == CXt_GIVEN);
1672
1673     sv = GvSV(PL_defgv);
1674     GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1675     cx->blk_givwhen.defsv_save = NULL;
1676     SvREFCNT_dec(sv);
1677 }
1678
1679 /* ------------------ util.h ------------------------------------------- */
1680
1681 /*
1682 =head1 Miscellaneous Functions
1683
1684 =for apidoc foldEQ
1685
1686 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1687 same
1688 case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
1689 match themselves and their opposite case counterparts.  Non-cased and non-ASCII
1690 range bytes match only themselves.
1691
1692 =cut
1693 */
1694
1695 PERL_STATIC_INLINE I32
1696 Perl_foldEQ(const char *s1, const char *s2, I32 len)
1697 {
1698     const U8 *a = (const U8 *)s1;
1699     const U8 *b = (const U8 *)s2;
1700
1701     PERL_ARGS_ASSERT_FOLDEQ;
1702
1703     assert(len >= 0);
1704
1705     while (len--) {
1706         if (*a != *b && *a != PL_fold[*b])
1707             return 0;
1708         a++,b++;
1709     }
1710     return 1;
1711 }
1712
1713 PERL_STATIC_INLINE I32
1714 Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1715 {
1716     /* Compare non-utf8 using Unicode (Latin1) semantics.  Does not work on
1717      * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1718      * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these.  Nor
1719      * does it check that the strings each have at least 'len' characters */
1720
1721     const U8 *a = (const U8 *)s1;
1722     const U8 *b = (const U8 *)s2;
1723
1724     PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1725
1726     assert(len >= 0);
1727
1728     while (len--) {
1729         if (*a != *b && *a != PL_fold_latin1[*b]) {
1730             return 0;
1731         }
1732         a++, b++;
1733     }
1734     return 1;
1735 }
1736
1737 /*
1738 =for apidoc foldEQ_locale
1739
1740 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1741 same case-insensitively in the current locale; false otherwise.
1742
1743 =cut
1744 */
1745
1746 PERL_STATIC_INLINE I32
1747 Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1748 {
1749     dVAR;
1750     const U8 *a = (const U8 *)s1;
1751     const U8 *b = (const U8 *)s2;
1752
1753     PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1754
1755     assert(len >= 0);
1756
1757     while (len--) {
1758         if (*a != *b && *a != PL_fold_locale[*b])
1759             return 0;
1760         a++,b++;
1761     }
1762     return 1;
1763 }
1764
1765 #if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1766
1767 PERL_STATIC_INLINE void *
1768 S_my_memrchr(const char * s, const char c, const STRLEN len)
1769 {
1770     /* memrchr(), since many platforms lack it */
1771
1772     const char * t = s + len - 1;
1773
1774     PERL_ARGS_ASSERT_MY_MEMRCHR;
1775
1776     while (t >= s) {
1777         if (*t == c) {
1778             return (void *) t;
1779         }
1780         t--;
1781     }
1782
1783     return NULL;
1784 }
1785
1786 #endif
1787
1788 /*
1789  * ex: set ts=8 sts=4 sw=4 et:
1790  */