inline.h

   1 /*    inline.h
   2  *
   3  *    Copyright (C) 2012 by Larry Wall and others
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  * This file is a home for static inline functions that cannot go in other
   9  * headers files, because they depend on proto.h (included after most other
  10  * headers) or struct definitions.
  11  *
  12  * Each section names the header file that the functions "belong" to.
  13  */
  14
  15 /* ------------------------------- av.h ------------------------------- */
  16
  17 PERL_STATIC_INLINE SSize_t
  18 S_av_top_index(pTHX_ AV *av)
  19 {
  20     PERL_ARGS_ASSERT_AV_TOP_INDEX;
  21     assert(SvTYPE(av) == SVt_PVAV);
  22
  23     return AvFILL(av);
  24 }
  25
  26 /* ------------------------------- cv.h ------------------------------- */
  27
  28 PERL_STATIC_INLINE GV *
  29 S_CvGV(pTHX_ CV *sv)
  30 {
  31     return CvNAMED(sv)
  32         ? Perl_cvgv_from_hek(aTHX_ sv)
  33         : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
  34 }
  35
  36 PERL_STATIC_INLINE I32 *
  37 S_CvDEPTHp(const CV * const sv)
  38 {
  39     assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
  40     return &((XPVCV*)SvANY(sv))->xcv_depth;
  41 }
  42
  43 /*
  44  CvPROTO returns the prototype as stored, which is not necessarily what
  45  the interpreter should be using. Specifically, the interpreter assumes
  46  that spaces have been stripped, which has been the case if the prototype
  47  was added by toke.c, but is generally not the case if it was added elsewhere.
  48  Since we can't enforce the spacelessness at assignment time, this routine
  49  provides a temporary copy at parse time with spaces removed.
  50  I<orig> is the start of the original buffer, I<len> is the length of the
  51  prototype and will be updated when this returns.
  52  */
  53
  54 #ifdef PERL_CORE
  55 PERL_STATIC_INLINE char *
  56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
  57 {
  58     SV * tmpsv;
  59     char * tmps;
  60     tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
  61     tmps = SvPVX(tmpsv);
  62     while ((*len)--) {
  63         if (!isSPACE(*orig))
  64             *tmps++ = *orig;
  65         orig++;
  66     }
  67     *tmps = '\0';
  68     *len = tmps - SvPVX(tmpsv);
  69                 return SvPVX(tmpsv);
  70 }
  71 #endif
  72
  73 /* ------------------------------- mg.h ------------------------------- */
  74
  75 #if defined(PERL_CORE) || defined(PERL_EXT)
  76 /* assumes get-magic and stringification have already occurred */
  77 PERL_STATIC_INLINE STRLEN
  78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
  79 {
  80     assert(mg->mg_type == PERL_MAGIC_regex_global);
  81     assert(mg->mg_len != -1);
  82     if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
  83         return (STRLEN)mg->mg_len;
  84     else {
  85         const STRLEN pos = (STRLEN)mg->mg_len;
  86         /* Without this check, we may read past the end of the buffer: */
  87         if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
  88         return sv_or_pv_pos_u2b(sv, s, pos, NULL);
  89     }
  90 }
  91 #endif
  92
  93 /* ------------------------------- pad.h ------------------------------ */
  94
  95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
  96 PERL_STATIC_INLINE bool
  97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
  98 {
  99     /* is seq within the range _LOW to _HIGH ?
 100      * This is complicated by the fact that PL_cop_seqmax
 101      * may have wrapped around at some point */
 102     if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
 103         return FALSE; /* not yet introduced */
 104
 105     if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
 106     /* in compiling scope */
 107         if (
 108             (seq >  COP_SEQ_RANGE_LOW(pn))
 109             ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
 110             : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
 111         )
 112             return TRUE;
 113     }
 114     else if (
 115         (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
 116         ?
 117             (  seq >  COP_SEQ_RANGE_LOW(pn)
 118             || seq <= COP_SEQ_RANGE_HIGH(pn))
 119
 120         :    (  seq >  COP_SEQ_RANGE_LOW(pn)
 121              && seq <= COP_SEQ_RANGE_HIGH(pn))
 122     )
 123         return TRUE;
 124     return FALSE;
 125 }
 126 #endif
 127
 128 /* ------------------------------- pp.h ------------------------------- */
 129
 130 PERL_STATIC_INLINE I32
 131 S_TOPMARK(pTHX)
 132 {
 133     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 134                                  "MARK top  %p %" IVdf "\n",
 135                                   PL_markstack_ptr,
 136                                   (IV)*PL_markstack_ptr)));
 137     return *PL_markstack_ptr;
 138 }
 139
 140 PERL_STATIC_INLINE I32
 141 S_POPMARK(pTHX)
 142 {
 143     DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
 144                                  "MARK pop  %p %" IVdf "\n",
 145                                   (PL_markstack_ptr-1),
 146                                   (IV)*(PL_markstack_ptr-1))));
 147     assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
 148     return *PL_markstack_ptr--;
 149 }
 150
 151 /* ----------------------------- regexp.h ----------------------------- */
 152
 153 PERL_STATIC_INLINE struct regexp *
 154 S_ReANY(const REGEXP * const re)
 155 {
 156     assert(isREGEXP(re));
 157     return re->sv_u.svu_rx;
 158 }
 159
 160 /* ------------------------------- sv.h ------------------------------- */
 161
 162 PERL_STATIC_INLINE SV *
 163 S_SvREFCNT_inc(SV *sv)
 164 {
 165     if (LIKELY(sv != NULL))
 166         SvREFCNT(sv)++;
 167     return sv;
 168 }
 169 PERL_STATIC_INLINE SV *
 170 S_SvREFCNT_inc_NN(SV *sv)
 171 {
 172     SvREFCNT(sv)++;
 173     return sv;
 174 }
 175 PERL_STATIC_INLINE void
 176 S_SvREFCNT_inc_void(SV *sv)
 177 {
 178     if (LIKELY(sv != NULL))
 179         SvREFCNT(sv)++;
 180 }
 181 PERL_STATIC_INLINE void
 182 S_SvREFCNT_dec(pTHX_ SV *sv)
 183 {
 184     if (LIKELY(sv != NULL)) {
 185         U32 rc = SvREFCNT(sv);
 186         if (LIKELY(rc > 1))
 187             SvREFCNT(sv) = rc - 1;
 188         else
 189             Perl_sv_free2(aTHX_ sv, rc);
 190     }
 191 }
 192
 193 PERL_STATIC_INLINE void
 194 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
 195 {
 196     U32 rc = SvREFCNT(sv);
 197     if (LIKELY(rc > 1))
 198         SvREFCNT(sv) = rc - 1;
 199     else
 200         Perl_sv_free2(aTHX_ sv, rc);
 201 }
 202
 203 PERL_STATIC_INLINE void
 204 SvAMAGIC_on(SV *sv)
 205 {
 206     assert(SvROK(sv));
 207     if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
 208 }
 209 PERL_STATIC_INLINE void
 210 SvAMAGIC_off(SV *sv)
 211 {
 212     if (SvROK(sv) && SvOBJECT(SvRV(sv)))
 213         HvAMAGIC_off(SvSTASH(SvRV(sv)));
 214 }
 215
 216 PERL_STATIC_INLINE U32
 217 S_SvPADSTALE_on(SV *sv)
 218 {
 219     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 220     return SvFLAGS(sv) |= SVs_PADSTALE;
 221 }
 222 PERL_STATIC_INLINE U32
 223 S_SvPADSTALE_off(SV *sv)
 224 {
 225     assert(!(SvFLAGS(sv) & SVs_PADTMP));
 226     return SvFLAGS(sv) &= ~SVs_PADSTALE;
 227 }
 228 #if defined(PERL_CORE) || defined (PERL_EXT)
 229 PERL_STATIC_INLINE STRLEN
 230 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
 231 {
 232     PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
 233     if (SvGAMAGIC(sv)) {
 234         U8 *hopped = utf8_hop((U8 *)pv, pos);
 235         if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
 236         return (STRLEN)(hopped - (U8 *)pv);
 237     }
 238     return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
 239 }
 240 #endif
 241
 242 /* ------------------------------- handy.h ------------------------------- */
 243
 244 /* saves machine code for a common noreturn idiom typically used in Newx*() */
 245 #ifdef GCC_DIAG_PRAGMA
 246 GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
 247 #endif
 248 static void
 249 S_croak_memory_wrap(void)
 250 {
 251     Perl_croak_nocontext("%s",PL_memory_wrap);
 252 }
 253 #ifdef GCC_DIAG_PRAGMA
 254 GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
 255 #endif
 256
 257 /* ------------------------------- utf8.h ------------------------------- */
 258
 259 /*
 260 =head1 Unicode Support
 261 */
 262
 263 PERL_STATIC_INLINE void
 264 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
 265 {
 266     /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
 267      * encoded string at '*dest', updating '*dest' to include it */
 268
 269     PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
 270
 271     if (NATIVE_BYTE_IS_INVARIANT(byte))
 272         *((*dest)++) = byte;
 273     else {
 274         *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
 275         *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
 276     }
 277 }
 278
 279 /*
 280 =for apidoc valid_utf8_to_uvchr
 281 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
 282 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
 283 it passes C<L</isUTF8_CHAR>>.  Surrogates, non-character code points, and
 284 non-Unicode code points are allowed.
 285
 286 =cut
 287
 288  */
 289
 290 PERL_STATIC_INLINE UV
 291 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
 292 {
 293     const UV expectlen = UTF8SKIP(s);
 294     const U8* send = s + expectlen;
 295     UV uv = *s;
 296
 297     PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
 298
 299     if (retlen) {
 300         *retlen = expectlen;
 301     }
 302
 303     /* An invariant is trivially returned */
 304     if (expectlen == 1) {
 305         return uv;
 306     }
 307
 308     /* Remove the leading bits that indicate the number of bytes, leaving just
 309      * the bits that are part of the value */
 310     uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
 311
 312     /* Now, loop through the remaining bytes, accumulating each into the
 313      * working total as we go.  (I khw tried unrolling the loop for up to 4
 314      * bytes, but there was no performance improvement) */
 315     for (++s; s < send; s++) {
 316         uv = UTF8_ACCUMULATE(uv, *s);
 317     }
 318
 319     return UNI_TO_NATIVE(uv);
 320
 321 }
 322
 323 /*
 324 =for apidoc is_utf8_invariant_string
 325
 326 Returns TRUE if the first C<len> bytes of the string C<s> are the same
 327 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
 328 EBCDIC machines); otherwise it returns FALSE.  That is, it returns TRUE if they
 329 are UTF-8 invariant.  On ASCII-ish machines, all the ASCII characters and only
 330 the ASCII characters fit this definition.  On EBCDIC machines, the ASCII-range
 331 characters are invariant, but so also are the C1 controls.
 332
 333 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
 334 use this option, that C<s> can't have embedded C<NUL> characters and has to
 335 have a terminating C<NUL> byte).
 336
 337 See also
 338 C<L</is_utf8_string>>,
 339 C<L</is_utf8_string_flags>>,
 340 C<L</is_utf8_string_loc>>,
 341 C<L</is_utf8_string_loc_flags>>,
 342 C<L</is_utf8_string_loclen>>,
 343 C<L</is_utf8_string_loclen_flags>>,
 344 C<L</is_utf8_fixed_width_buf_flags>>,
 345 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 346 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 347 C<L</is_strict_utf8_string>>,
 348 C<L</is_strict_utf8_string_loc>>,
 349 C<L</is_strict_utf8_string_loclen>>,
 350 C<L</is_c9strict_utf8_string>>,
 351 C<L</is_c9strict_utf8_string_loc>>,
 352 and
 353 C<L</is_c9strict_utf8_string_loclen>>.
 354
 355 =cut
 356
 357 */
 358
 359 #define is_utf8_invariant_string(s, len)                                    \
 360                                 is_utf8_invariant_string_loc(s, len, NULL)
 361
 362 /*
 363 =for apidoc is_utf8_invariant_string_loc
 364
 365 Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
 366 the first UTF-8 variant character in the C<ep> pointer; if all characters are
 367 UTF-8 invariant, this function does not change the contents of C<*ep>.
 368
 369 =cut
 370
 371 XXX On ASCII machines this could be sped up by doing word-at-a-time operations
 372
 373 */
 374
 375 PERL_STATIC_INLINE bool
 376 S_is_utf8_invariant_string_loc(const U8* const s, const STRLEN len, const U8 ** ep)
 377 {
 378     const U8* const send = s + (len ? len : strlen((const char *)s));
 379     const U8* x = s;
 380
 381     PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
 382
 383     while (x < send) {
 384         if (UTF8_IS_INVARIANT(*x)) {
 385             x++;
 386             continue;
 387         }
 388
 389         if (ep) {
 390             *ep = x;
 391         }
 392
 393         return FALSE;
 394     }
 395
 396     return TRUE;
 397 }
 398
 399 /*
 400 =for apidoc is_utf8_string
 401
 402 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 403 Perl-extended-UTF-8 string; returns FALSE otherwise.  If C<len> is 0, it will
 404 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
 405 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 406 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 407
 408 This function considers Perl's extended UTF-8 to be valid.  That means that
 409 code points above Unicode, surrogates, and non-character code points are
 410 considered valid by this function.  Use C<L</is_strict_utf8_string>>,
 411 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
 412 code points are considered valid.
 413
 414 See also
 415 C<L</is_utf8_invariant_string>>,
 416 C<L</is_utf8_invariant_string_loc>>,
 417 C<L</is_utf8_string_loc>>,
 418 C<L</is_utf8_string_loclen>>,
 419 C<L</is_utf8_fixed_width_buf_flags>>,
 420 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 421 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 422
 423 =cut
 424 */
 425
 426 PERL_STATIC_INLINE bool
 427 Perl_is_utf8_string(const U8 *s, const STRLEN len)
 428 {
 429     /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
 430      * Be aware of possible changes to that */
 431
 432     const U8* const send = s + (len ? len : strlen((const char *)s));
 433     const U8* x = s;
 434
 435     PERL_ARGS_ASSERT_IS_UTF8_STRING;
 436
 437     while (x < send) {
 438         const STRLEN cur_len = isUTF8_CHAR(x, send);
 439         if (UNLIKELY(! cur_len)) {
 440             return FALSE;
 441         }
 442         x += cur_len;
 443     }
 444
 445     return TRUE;
 446 }
 447
 448 /*
 449 =for apidoc is_strict_utf8_string
 450
 451 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 452 UTF-8-encoded string that is fully interchangeable by any application using
 453 Unicode rules; otherwise it returns FALSE.  If C<len> is 0, it will be
 454 calculated using C<strlen(s)> (which means if you use this option, that C<s>
 455 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
 456 byte).  Note that all characters being ASCII constitute 'a valid UTF-8 string'.
 457
 458 This function returns FALSE for strings containing any
 459 code points above the Unicode max of 0x10FFFF, surrogate code points, or
 460 non-character code points.
 461
 462 See also
 463 C<L</is_utf8_invariant_string>>,
 464 C<L</is_utf8_invariant_string_loc>>,
 465 C<L</is_utf8_string>>,
 466 C<L</is_utf8_string_flags>>,
 467 C<L</is_utf8_string_loc>>,
 468 C<L</is_utf8_string_loc_flags>>,
 469 C<L</is_utf8_string_loclen>>,
 470 C<L</is_utf8_string_loclen_flags>>,
 471 C<L</is_utf8_fixed_width_buf_flags>>,
 472 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 473 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 474 C<L</is_strict_utf8_string_loc>>,
 475 C<L</is_strict_utf8_string_loclen>>,
 476 C<L</is_c9strict_utf8_string>>,
 477 C<L</is_c9strict_utf8_string_loc>>,
 478 and
 479 C<L</is_c9strict_utf8_string_loclen>>.
 480
 481 =cut
 482 */
 483
 484 PERL_STATIC_INLINE bool
 485 S_is_strict_utf8_string(const U8 *s, const STRLEN len)
 486 {
 487     const U8* const send = s + (len ? len : strlen((const char *)s));
 488     const U8* x = s;
 489
 490     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
 491
 492     while (x < send) {
 493         const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
 494         if (UNLIKELY(! cur_len)) {
 495             return FALSE;
 496         }
 497         x += cur_len;
 498     }
 499
 500     return TRUE;
 501 }
 502
 503 /*
 504 =for apidoc is_c9strict_utf8_string
 505
 506 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 507 UTF-8-encoded string that conforms to
 508 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
 509 otherwise it returns FALSE.  If C<len> is 0, it will be calculated using
 510 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
 511 C<NUL> characters and has to have a terminating C<NUL> byte).  Note that all
 512 characters being ASCII constitute 'a valid UTF-8 string'.
 513
 514 This function returns FALSE for strings containing any code points above the
 515 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
 516 code points per
 517 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
 518
 519 See also
 520 C<L</is_utf8_invariant_string>>,
 521 C<L</is_utf8_invariant_string_loc>>,
 522 C<L</is_utf8_string>>,
 523 C<L</is_utf8_string_flags>>,
 524 C<L</is_utf8_string_loc>>,
 525 C<L</is_utf8_string_loc_flags>>,
 526 C<L</is_utf8_string_loclen>>,
 527 C<L</is_utf8_string_loclen_flags>>,
 528 C<L</is_utf8_fixed_width_buf_flags>>,
 529 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 530 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 531 C<L</is_strict_utf8_string>>,
 532 C<L</is_strict_utf8_string_loc>>,
 533 C<L</is_strict_utf8_string_loclen>>,
 534 C<L</is_c9strict_utf8_string_loc>>,
 535 and
 536 C<L</is_c9strict_utf8_string_loclen>>.
 537
 538 =cut
 539 */
 540
 541 PERL_STATIC_INLINE bool
 542 S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
 543 {
 544     const U8* const send = s + (len ? len : strlen((const char *)s));
 545     const U8* x = s;
 546
 547     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
 548
 549     while (x < send) {
 550         const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
 551         if (UNLIKELY(! cur_len)) {
 552             return FALSE;
 553         }
 554         x += cur_len;
 555     }
 556
 557     return TRUE;
 558 }
 559
 560 /* The above 3 functions could have been moved into the more general one just
 561  * below, and made #defines that call it with the right 'flags'.  They are
 562  * currently kept separate to increase their chances of getting inlined */
 563
 564 /*
 565 =for apidoc is_utf8_string_flags
 566
 567 Returns TRUE if the first C<len> bytes of string C<s> form a valid
 568 UTF-8 string, subject to the restrictions imposed by C<flags>;
 569 returns FALSE otherwise.  If C<len> is 0, it will be calculated
 570 using C<strlen(s)> (which means if you use this option, that C<s> can't have
 571 embedded C<NUL> characters and has to have a terminating C<NUL> byte).  Note
 572 that all characters being ASCII constitute 'a valid UTF-8 string'.
 573
 574 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
 575 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
 576 as C<L</is_strict_utf8_string>>; and if C<flags> is
 577 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
 578 C<L</is_c9strict_utf8_string>>.  Otherwise C<flags> may be any
 579 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
 580 C<L</utf8n_to_uvchr>>, with the same meanings.
 581
 582 See also
 583 C<L</is_utf8_invariant_string>>,
 584 C<L</is_utf8_invariant_string_loc>>,
 585 C<L</is_utf8_string>>,
 586 C<L</is_utf8_string_loc>>,
 587 C<L</is_utf8_string_loc_flags>>,
 588 C<L</is_utf8_string_loclen>>,
 589 C<L</is_utf8_string_loclen_flags>>,
 590 C<L</is_utf8_fixed_width_buf_flags>>,
 591 C<L</is_utf8_fixed_width_buf_loc_flags>>,
 592 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
 593 C<L</is_strict_utf8_string>>,
 594 C<L</is_strict_utf8_string_loc>>,
 595 C<L</is_strict_utf8_string_loclen>>,
 596 C<L</is_c9strict_utf8_string>>,
 597 C<L</is_c9strict_utf8_string_loc>>,
 598 and
 599 C<L</is_c9strict_utf8_string_loclen>>.
 600
 601 =cut
 602 */
 603
 604 PERL_STATIC_INLINE bool
 605 S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
 606 {
 607     const U8* const send = s + (len ? len : strlen((const char *)s));
 608     const U8* x = s;
 609
 610     PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
 611     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 612                           |UTF8_DISALLOW_ABOVE_31_BIT)));
 613
 614     if (flags == 0) {
 615         return is_utf8_string(s, len);
 616     }
 617
 618     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 619                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 620     {
 621         return is_strict_utf8_string(s, len);
 622     }
 623
 624     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 625                                        == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 626     {
 627         return is_c9strict_utf8_string(s, len);
 628     }
 629
 630     while (x < send) {
 631         STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 632         if (UNLIKELY(! cur_len)) {
 633             return FALSE;
 634         }
 635         x += cur_len;
 636     }
 637
 638     return TRUE;
 639 }
 640
 641 /*
 642
 643 =for apidoc is_utf8_string_loc
 644
 645 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 646 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 647 "utf8ness success") in the C<ep> pointer.
 648
 649 See also C<L</is_utf8_string_loclen>>.
 650
 651 =cut
 652 */
 653
 654 #define is_utf8_string_loc(s, len, ep)  is_utf8_string_loclen(s, len, ep, 0)
 655
 656 /*
 657
 658 =for apidoc is_utf8_string_loclen
 659
 660 Like C<L</is_utf8_string>> but stores the location of the failure (in the
 661 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 662 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 663 encoded characters in the C<el> pointer.
 664
 665 See also C<L</is_utf8_string_loc>>.
 666
 667 =cut
 668 */
 669
 670 PERL_STATIC_INLINE bool
 671 Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 672 {
 673     const U8* const send = s + (len ? len : strlen((const char *)s));
 674     const U8* x = s;
 675     STRLEN outlen = 0;
 676
 677     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
 678
 679     while (x < send) {
 680         const STRLEN cur_len = isUTF8_CHAR(x, send);
 681         if (UNLIKELY(! cur_len)) {
 682             break;
 683         }
 684         x += cur_len;
 685         outlen++;
 686     }
 687
 688     if (el)
 689         *el = outlen;
 690
 691     if (ep) {
 692         *ep = x;
 693     }
 694
 695     return (x == send);
 696 }
 697
 698 /*
 699
 700 =for apidoc is_strict_utf8_string_loc
 701
 702 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 703 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 704 "utf8ness success") in the C<ep> pointer.
 705
 706 See also C<L</is_strict_utf8_string_loclen>>.
 707
 708 =cut
 709 */
 710
 711 #define is_strict_utf8_string_loc(s, len, ep)                               \
 712                                 is_strict_utf8_string_loclen(s, len, ep, 0)
 713
 714 /*
 715
 716 =for apidoc is_strict_utf8_string_loclen
 717
 718 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
 719 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 720 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 721 encoded characters in the C<el> pointer.
 722
 723 See also C<L</is_strict_utf8_string_loc>>.
 724
 725 =cut
 726 */
 727
 728 PERL_STATIC_INLINE bool
 729 S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 730 {
 731     const U8* const send = s + (len ? len : strlen((const char *)s));
 732     const U8* x = s;
 733     STRLEN outlen = 0;
 734
 735     PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
 736
 737     while (x < send) {
 738         const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
 739         if (UNLIKELY(! cur_len)) {
 740             break;
 741         }
 742         x += cur_len;
 743         outlen++;
 744     }
 745
 746     if (el)
 747         *el = outlen;
 748
 749     if (ep) {
 750         *ep = x;
 751     }
 752
 753     return (x == send);
 754 }
 755
 756 /*
 757
 758 =for apidoc is_c9strict_utf8_string_loc
 759
 760 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 761 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 762 "utf8ness success") in the C<ep> pointer.
 763
 764 See also C<L</is_c9strict_utf8_string_loclen>>.
 765
 766 =cut
 767 */
 768
 769 #define is_c9strict_utf8_string_loc(s, len, ep)                             \
 770                             is_c9strict_utf8_string_loclen(s, len, ep, 0)
 771
 772 /*
 773
 774 =for apidoc is_c9strict_utf8_string_loclen
 775
 776 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
 777 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 778 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
 779 characters in the C<el> pointer.
 780
 781 See also C<L</is_c9strict_utf8_string_loc>>.
 782
 783 =cut
 784 */
 785
 786 PERL_STATIC_INLINE bool
 787 S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
 788 {
 789     const U8* const send = s + (len ? len : strlen((const char *)s));
 790     const U8* x = s;
 791     STRLEN outlen = 0;
 792
 793     PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
 794
 795     while (x < send) {
 796         const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
 797         if (UNLIKELY(! cur_len)) {
 798             break;
 799         }
 800         x += cur_len;
 801         outlen++;
 802     }
 803
 804     if (el)
 805         *el = outlen;
 806
 807     if (ep) {
 808         *ep = x;
 809     }
 810
 811     return (x == send);
 812 }
 813
 814 /*
 815
 816 =for apidoc is_utf8_string_loc_flags
 817
 818 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
 819 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 820 "utf8ness success") in the C<ep> pointer.
 821
 822 See also C<L</is_utf8_string_loclen_flags>>.
 823
 824 =cut
 825 */
 826
 827 #define is_utf8_string_loc_flags(s, len, ep, flags)                         \
 828                         is_utf8_string_loclen_flags(s, len, ep, 0, flags)
 829
 830
 831 /* The above 3 actual functions could have been moved into the more general one
 832  * just below, and made #defines that call it with the right 'flags'.  They are
 833  * currently kept separate to increase their chances of getting inlined */
 834
 835 /*
 836
 837 =for apidoc is_utf8_string_loclen_flags
 838
 839 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
 840 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
 841 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
 842 encoded characters in the C<el> pointer.
 843
 844 See also C<L</is_utf8_string_loc_flags>>.
 845
 846 =cut
 847 */
 848
 849 PERL_STATIC_INLINE bool
 850 S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
 851 {
 852     const U8* const send = s + (len ? len : strlen((const char *)s));
 853     const U8* x = s;
 854     STRLEN outlen = 0;
 855
 856     PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
 857     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
 858                           |UTF8_DISALLOW_ABOVE_31_BIT)));
 859
 860     if (flags == 0) {
 861         return is_utf8_string_loclen(s, len, ep, el);
 862     }
 863
 864     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 865                                         == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
 866     {
 867         return is_strict_utf8_string_loclen(s, len, ep, el);
 868     }
 869
 870     if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
 871                                     == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
 872     {
 873         return is_c9strict_utf8_string_loclen(s, len, ep, el);
 874     }
 875
 876     while (x < send) {
 877         const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
 878         if (UNLIKELY(! cur_len)) {
 879             break;
 880         }
 881         x += cur_len;
 882         outlen++;
 883     }
 884
 885     if (el)
 886         *el = outlen;
 887
 888     if (ep) {
 889         *ep = x;
 890     }
 891
 892     return (x == send);
 893 }
 894
 895 /*
 896 =for apidoc utf8_distance
 897
 898 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
 899 and C<b>.
 900
 901 WARNING: use only if you *know* that the pointers point inside the
 902 same UTF-8 buffer.
 903
 904 =cut
 905 */
 906
 907 PERL_STATIC_INLINE IV
 908 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
 909 {
 910     PERL_ARGS_ASSERT_UTF8_DISTANCE;
 911
 912     return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
 913 }
 914
 915 /*
 916 =for apidoc utf8_hop
 917
 918 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
 919 forward or backward.
 920
 921 WARNING: do not use the following unless you *know* C<off> is within
 922 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
 923 on the first byte of character or just after the last byte of a character.
 924
 925 =cut
 926 */
 927
 928 PERL_STATIC_INLINE U8 *
 929 Perl_utf8_hop(const U8 *s, SSize_t off)
 930 {
 931     PERL_ARGS_ASSERT_UTF8_HOP;
 932
 933     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
 934      * the bitops (especially ~) can create illegal UTF-8.
 935      * In other words: in Perl UTF-8 is not just for Unicode. */
 936
 937     if (off >= 0) {
 938         while (off--)
 939             s += UTF8SKIP(s);
 940     }
 941     else {
 942         while (off++) {
 943             s--;
 944             while (UTF8_IS_CONTINUATION(*s))
 945                 s--;
 946         }
 947     }
 948     GCC_DIAG_IGNORE(-Wcast-qual);
 949     return (U8 *)s;
 950     GCC_DIAG_RESTORE;
 951 }
 952
 953 /*
 954 =for apidoc utf8_hop_forward
 955
 956 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
 957 forward.
 958
 959 C<off> must be non-negative.
 960
 961 C<s> must be before or equal to C<end>.
 962
 963 When moving forward it will not move beyond C<end>.
 964
 965 Will not exceed this limit even if the string is not valid "UTF-8".
 966
 967 =cut
 968 */
 969
 970 PERL_STATIC_INLINE U8 *
 971 Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
 972 {
 973     PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
 974
 975     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
 976      * the bitops (especially ~) can create illegal UTF-8.
 977      * In other words: in Perl UTF-8 is not just for Unicode. */
 978
 979     assert(s <= end);
 980     assert(off >= 0);
 981
 982     while (off--) {
 983         STRLEN skip = UTF8SKIP(s);
 984         if ((STRLEN)(end - s) <= skip) {
 985             GCC_DIAG_IGNORE(-Wcast-qual);
 986             return (U8 *)end;
 987             GCC_DIAG_RESTORE;
 988         }
 989         s += skip;
 990     }
 991
 992     GCC_DIAG_IGNORE(-Wcast-qual);
 993     return (U8 *)s;
 994     GCC_DIAG_RESTORE;
 995 }
 996
 997 /*
 998 =for apidoc utf8_hop_back
 999
1000 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1001 backward.
1002
1003 C<off> must be non-positive.
1004
1005 C<s> must be after or equal to C<start>.
1006
1007 When moving backward it will not move before C<start>.
1008
1009 Will not exceed this limit even if the string is not valid "UTF-8".
1010
1011 =cut
1012 */
1013
1014 PERL_STATIC_INLINE U8 *
1015 Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1016 {
1017     PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1018
1019     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1020      * the bitops (especially ~) can create illegal UTF-8.
1021      * In other words: in Perl UTF-8 is not just for Unicode. */
1022
1023     assert(start <= s);
1024     assert(off <= 0);
1025
1026     while (off++ && s > start) {
1027         s--;
1028         while (UTF8_IS_CONTINUATION(*s) && s > start)
1029             s--;
1030     }
1031
1032     GCC_DIAG_IGNORE(-Wcast-qual);
1033     return (U8 *)s;
1034     GCC_DIAG_RESTORE;
1035 }
1036
1037 /*
1038 =for apidoc utf8_hop_safe
1039
1040 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1041 either forward or backward.
1042
1043 When moving backward it will not move before C<start>.
1044
1045 When moving forward it will not move beyond C<end>.
1046
1047 Will not exceed those limits even if the string is not valid "UTF-8".
1048
1049 =cut
1050 */
1051
1052 PERL_STATIC_INLINE U8 *
1053 Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1054 {
1055     PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1056
1057     /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1058      * the bitops (especially ~) can create illegal UTF-8.
1059      * In other words: in Perl UTF-8 is not just for Unicode. */
1060
1061     assert(start <= s && s <= end);
1062
1063     if (off >= 0) {
1064         return utf8_hop_forward(s, off, end);
1065     }
1066     else {
1067         return utf8_hop_back(s, off, start);
1068     }
1069 }
1070
1071 /*
1072
1073 =for apidoc is_utf8_valid_partial_char
1074
1075 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1076 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1077 points.  Otherwise, it returns 1 if there exists at least one non-empty
1078 sequence of bytes that when appended to sequence C<s>, starting at position
1079 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1080 otherwise returns 0.
1081
1082 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1083 point.
1084
1085 This is useful when a fixed-length buffer is being tested for being well-formed
1086 UTF-8, but the final few bytes in it don't comprise a full character; that is,
1087 it is split somewhere in the middle of the final code point's UTF-8
1088 representation.  (Presumably when the buffer is refreshed with the next chunk
1089 of data, the new first bytes will complete the partial code point.)   This
1090 function is used to verify that the final bytes in the current buffer are in
1091 fact the legal beginning of some code point, so that if they aren't, the
1092 failure can be signalled without having to wait for the next read.
1093
1094 =cut
1095 */
1096 #define is_utf8_valid_partial_char(s, e)                                    \
1097                                 is_utf8_valid_partial_char_flags(s, e, 0)
1098
1099 /*
1100
1101 =for apidoc is_utf8_valid_partial_char_flags
1102
1103 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1104 or not the input is a valid UTF-8 encoded partial character, but it takes an
1105 extra parameter, C<flags>, which can further restrict which code points are
1106 considered valid.
1107
1108 If C<flags> is 0, this behaves identically to
1109 C<L</is_utf8_valid_partial_char>>.  Otherwise C<flags> can be any combination
1110 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>.  If
1111 there is any sequence of bytes that can complete the input partial character in
1112 such a way that a non-prohibited character is formed, the function returns
1113 TRUE; otherwise FALSE.  Non character code points cannot be determined based on
1114 partial character input.  But many  of the other possible excluded types can be
1115 determined from just the first one or two bytes.
1116
1117 =cut
1118  */
1119
1120 PERL_STATIC_INLINE bool
1121 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1122 {
1123     PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1124
1125     assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1126                           |UTF8_DISALLOW_ABOVE_31_BIT)));
1127
1128     if (s >= e || s + UTF8SKIP(s) <= e) {
1129         return FALSE;
1130     }
1131
1132     return cBOOL(_is_utf8_char_helper(s, e, flags));
1133 }
1134
1135 /*
1136
1137 =for apidoc is_utf8_fixed_width_buf_flags
1138
1139 Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1140 is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1141 otherwise it returns FALSE.
1142
1143 If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1144 without restriction.  If the final few bytes of the buffer do not form a
1145 complete code point, this will return TRUE anyway, provided that
1146 C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1147
1148 If C<flags> in non-zero, it can be any combination of the
1149 C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1150 same meanings.
1151
1152 This function differs from C<L</is_utf8_string_flags>> only in that the latter
1153 returns FALSE if the final few bytes of the string don't form a complete code
1154 point.
1155
1156 =cut
1157  */
1158 #define is_utf8_fixed_width_buf_flags(s, len, flags)                        \
1159                 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1160
1161 /*
1162
1163 =for apidoc is_utf8_fixed_width_buf_loc_flags
1164
1165 Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1166 failure in the C<ep> pointer.  If the function returns TRUE, C<*ep> will point
1167 to the beginning of any partial character at the end of the buffer; if there is
1168 no partial character C<*ep> will contain C<s>+C<len>.
1169
1170 See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1171
1172 =cut
1173 */
1174
1175 #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags)               \
1176                 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1177
1178 /*
1179
1180 =for apidoc is_utf8_fixed_width_buf_loclen_flags
1181
1182 Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1183 complete, valid characters found in the C<el> pointer.
1184
1185 =cut
1186 */
1187
1188 PERL_STATIC_INLINE bool
1189 S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1190                                        const STRLEN len,
1191                                        const U8 **ep,
1192                                        STRLEN *el,
1193                                        const U32 flags)
1194 {
1195     const U8 * maybe_partial;
1196
1197     PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1198
1199     if (! ep) {
1200         ep  = &maybe_partial;
1201     }
1202
1203     /* If it's entirely valid, return that; otherwise see if the only error is
1204      * that the final few bytes are for a partial character */
1205     return    is_utf8_string_loclen_flags(s, len, ep, el, flags)
1206            || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1207 }
1208
1209 /* ------------------------------- perl.h ----------------------------- */
1210
1211 /*
1212 =head1 Miscellaneous Functions
1213
1214 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1215
1216 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1217 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1218
1219 Return TRUE if the name is safe.
1220
1221 Used by the C<IS_SAFE_SYSCALL()> macro.
1222
1223 =cut
1224 */
1225
1226 PERL_STATIC_INLINE bool
1227 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1228     /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1229      * perl itself uses xce*() functions which accept 8-bit strings.
1230      */
1231
1232     PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1233
1234     if (len > 1) {
1235         char *null_at;
1236         if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1237                 SETERRNO(ENOENT, LIB_INVARG);
1238                 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1239                                    "Invalid \\0 character in %s for %s: %s\\0%s",
1240                                    what, op_name, pv, null_at+1);
1241                 return FALSE;
1242         }
1243     }
1244
1245     return TRUE;
1246 }
1247
1248 /*
1249
1250 Return true if the supplied filename has a newline character
1251 immediately before the first (hopefully only) NUL.
1252
1253 My original look at this incorrectly used the len from SvPV(), but
1254 that's incorrect, since we allow for a NUL in pv[len-1].
1255
1256 So instead, strlen() and work from there.
1257
1258 This allow for the user reading a filename, forgetting to chomp it,
1259 then calling:
1260
1261   open my $foo, "$file\0";
1262
1263 */
1264
1265 #ifdef PERL_CORE
1266
1267 PERL_STATIC_INLINE bool
1268 S_should_warn_nl(const char *pv) {
1269     STRLEN len;
1270
1271     PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1272
1273     len = strlen(pv);
1274
1275     return len > 0 && pv[len-1] == '\n';
1276 }
1277
1278 #endif
1279
1280 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1281
1282 #define MAX_CHARSET_NAME_LENGTH 2
1283
1284 PERL_STATIC_INLINE const char *
1285 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1286 {
1287     /* Returns a string that corresponds to the name of the regex character set
1288      * given by 'flags', and *lenp is set the length of that string, which
1289      * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1290
1291     *lenp = 1;
1292     switch (get_regex_charset(flags)) {
1293         case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1294         case REGEX_LOCALE_CHARSET:  return LOCALE_PAT_MODS;
1295         case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1296         case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1297         case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1298             *lenp = 2;
1299             return ASCII_MORE_RESTRICT_PAT_MODS;
1300     }
1301     /* The NOT_REACHED; hides an assert() which has a rather complex
1302      * definition in perl.h. */
1303     NOT_REACHED; /* NOTREACHED */
1304     return "?";     /* Unknown */
1305 }
1306
1307 /*
1308
1309 Return false if any get magic is on the SV other than taint magic.
1310
1311 */
1312
1313 PERL_STATIC_INLINE bool
1314 S_sv_only_taint_gmagic(SV *sv) {
1315     MAGIC *mg = SvMAGIC(sv);
1316
1317     PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1318
1319     while (mg) {
1320         if (mg->mg_type != PERL_MAGIC_taint
1321             && !(mg->mg_flags & MGf_GSKIP)
1322             && mg->mg_virtual->svt_get) {
1323             return FALSE;
1324         }
1325         mg = mg->mg_moremagic;
1326     }
1327
1328     return TRUE;
1329 }
1330
1331 /* ------------------ cop.h ------------------------------------------- */
1332
1333
1334 /* Enter a block. Push a new base context and return its address. */
1335
1336 PERL_STATIC_INLINE PERL_CONTEXT *
1337 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1338 {
1339     PERL_CONTEXT * cx;
1340
1341     PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1342
1343     CXINC;
1344     cx = CX_CUR();
1345     cx->cx_type        = type;
1346     cx->blk_gimme      = gimme;
1347     cx->blk_oldsaveix  = saveix;
1348     cx->blk_oldsp      = (I32)(sp - PL_stack_base);
1349     cx->blk_oldcop     = PL_curcop;
1350     cx->blk_oldmarksp  = (I32)(PL_markstack_ptr - PL_markstack);
1351     cx->blk_oldscopesp = PL_scopestack_ix;
1352     cx->blk_oldpm      = PL_curpm;
1353     cx->blk_old_tmpsfloor = PL_tmps_floor;
1354
1355     PL_tmps_floor        = PL_tmps_ix;
1356     CX_DEBUG(cx, "PUSH");
1357     return cx;
1358 }
1359
1360
1361 /* Exit a block (RETURN and LAST). */
1362
1363 PERL_STATIC_INLINE void
1364 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1365 {
1366     PERL_ARGS_ASSERT_CX_POPBLOCK;
1367
1368     CX_DEBUG(cx, "POP");
1369     /* these 3 are common to cx_popblock and cx_topblock */
1370     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1371     PL_scopestack_ix = cx->blk_oldscopesp;
1372     PL_curpm         = cx->blk_oldpm;
1373
1374     /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1375      * and leaves a CX entry lying around for repeated use, so
1376      * skip for multicall */                  \
1377     assert(   (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1378             || PL_savestack_ix == cx->blk_oldsaveix);
1379     PL_curcop     = cx->blk_oldcop;
1380     PL_tmps_floor = cx->blk_old_tmpsfloor;
1381 }
1382
1383 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1384  * Whereas cx_popblock() restores the state to the point just before
1385  * cx_pushblock() was called,  cx_topblock() restores it to the point just
1386  * *after* cx_pushblock() was called. */
1387
1388 PERL_STATIC_INLINE void
1389 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1390 {
1391     PERL_ARGS_ASSERT_CX_TOPBLOCK;
1392
1393     CX_DEBUG(cx, "TOP");
1394     /* these 3 are common to cx_popblock and cx_topblock */
1395     PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1396     PL_scopestack_ix = cx->blk_oldscopesp;
1397     PL_curpm         = cx->blk_oldpm;
1398
1399     PL_stack_sp      = PL_stack_base + cx->blk_oldsp;
1400 }
1401
1402
1403 PERL_STATIC_INLINE void
1404 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1405 {
1406     U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1407
1408     PERL_ARGS_ASSERT_CX_PUSHSUB;
1409
1410     PERL_DTRACE_PROBE_ENTRY(cv);
1411     cx->blk_sub.cv = cv;
1412     cx->blk_sub.olddepth = CvDEPTH(cv);
1413     cx->blk_sub.prevcomppad = PL_comppad;
1414     cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1415     cx->blk_sub.retop = retop;
1416     SvREFCNT_inc_simple_void_NN(cv);
1417     cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1418 }
1419
1420
1421 /* subsets of cx_popsub() */
1422
1423 PERL_STATIC_INLINE void
1424 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1425 {
1426     CV *cv;
1427
1428     PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1429     assert(CxTYPE(cx) == CXt_SUB);
1430
1431     PL_comppad = cx->blk_sub.prevcomppad;
1432     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1433     cv = cx->blk_sub.cv;
1434     CvDEPTH(cv) = cx->blk_sub.olddepth;
1435     cx->blk_sub.cv = NULL;
1436     SvREFCNT_dec(cv);
1437 }
1438
1439
1440 /* handle the @_ part of leaving a sub */
1441
1442 PERL_STATIC_INLINE void
1443 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1444 {
1445     AV *av;
1446
1447     PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1448     assert(CxTYPE(cx) == CXt_SUB);
1449     assert(AvARRAY(MUTABLE_AV(
1450         PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1451                 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1452
1453     CX_POP_SAVEARRAY(cx);
1454     av = MUTABLE_AV(PAD_SVl(0));
1455     if (UNLIKELY(AvREAL(av)))
1456         /* abandon @_ if it got reified */
1457         clear_defarray(av, 0);
1458     else {
1459         CLEAR_ARGARRAY(av);
1460     }
1461 }
1462
1463
1464 PERL_STATIC_INLINE void
1465 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1466 {
1467     PERL_ARGS_ASSERT_CX_POPSUB;
1468     assert(CxTYPE(cx) == CXt_SUB);
1469
1470     PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1471
1472     if (CxHASARGS(cx))
1473         cx_popsub_args(cx);
1474     cx_popsub_common(cx);
1475 }
1476
1477
1478 PERL_STATIC_INLINE void
1479 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1480 {
1481     PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1482
1483     cx->blk_format.cv          = cv;
1484     cx->blk_format.retop       = retop;
1485     cx->blk_format.gv          = gv;
1486     cx->blk_format.dfoutgv     = PL_defoutgv;
1487     cx->blk_format.prevcomppad = PL_comppad;
1488     cx->blk_u16                = 0;
1489
1490     SvREFCNT_inc_simple_void_NN(cv);
1491     CvDEPTH(cv)++;
1492     SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1493 }
1494
1495
1496 PERL_STATIC_INLINE void
1497 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1498 {
1499     CV *cv;
1500     GV *dfout;
1501
1502     PERL_ARGS_ASSERT_CX_POPFORMAT;
1503     assert(CxTYPE(cx) == CXt_FORMAT);
1504
1505     dfout = cx->blk_format.dfoutgv;
1506     setdefout(dfout);
1507     cx->blk_format.dfoutgv = NULL;
1508     SvREFCNT_dec_NN(dfout);
1509
1510     PL_comppad = cx->blk_format.prevcomppad;
1511     PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1512     cv = cx->blk_format.cv;
1513     cx->blk_format.cv = NULL;
1514     --CvDEPTH(cv);
1515     SvREFCNT_dec_NN(cv);
1516 }
1517
1518
1519 PERL_STATIC_INLINE void
1520 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1521 {
1522     PERL_ARGS_ASSERT_CX_PUSHEVAL;
1523
1524     cx->blk_eval.retop         = retop;
1525     cx->blk_eval.old_namesv    = namesv;
1526     cx->blk_eval.old_eval_root = PL_eval_root;
1527     cx->blk_eval.cur_text      = PL_parser ? PL_parser->linestr : NULL;
1528     cx->blk_eval.cv            = NULL; /* later set by doeval_compile() */
1529     cx->blk_eval.cur_top_env   = PL_top_env;
1530
1531     assert(!(PL_in_eval     & ~ 0x3F));
1532     assert(!(PL_op->op_type & ~0x1FF));
1533     cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1534 }
1535
1536
1537 PERL_STATIC_INLINE void
1538 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1539 {
1540     SV *sv;
1541
1542     PERL_ARGS_ASSERT_CX_POPEVAL;
1543     assert(CxTYPE(cx) == CXt_EVAL);
1544
1545     PL_in_eval = CxOLD_IN_EVAL(cx);
1546     assert(!(PL_in_eval & 0xc0));
1547     PL_eval_root = cx->blk_eval.old_eval_root;
1548     sv = cx->blk_eval.cur_text;
1549     if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1550         cx->blk_eval.cur_text = NULL;
1551         SvREFCNT_dec_NN(sv);
1552     }
1553
1554     sv = cx->blk_eval.old_namesv;
1555     if (sv) {
1556         cx->blk_eval.old_namesv = NULL;
1557         SvREFCNT_dec_NN(sv);
1558     }
1559 }
1560
1561
1562 /* push a plain loop, i.e.
1563  *     { block }
1564  *     while (cond) { block }
1565  *     for (init;cond;continue) { block }
1566  * This loop can be last/redo'ed etc.
1567  */
1568
1569 PERL_STATIC_INLINE void
1570 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1571 {
1572     PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1573     cx->blk_loop.my_op = cLOOP;
1574 }
1575
1576
1577 /* push a true for loop, i.e.
1578  *     for var (list) { block }
1579  */
1580
1581 PERL_STATIC_INLINE void
1582 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1583 {
1584     PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1585
1586     /* this one line is common with cx_pushloop_plain */
1587     cx->blk_loop.my_op = cLOOP;
1588
1589     cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1590     cx->blk_loop.itersave      = itersave;
1591 #ifdef USE_ITHREADS
1592     cx->blk_loop.oldcomppad = PL_comppad;
1593 #endif
1594 }
1595
1596
1597 /* pop all loop types, including plain */
1598
1599 PERL_STATIC_INLINE void
1600 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1601 {
1602     PERL_ARGS_ASSERT_CX_POPLOOP;
1603
1604     assert(CxTYPE_is_LOOP(cx));
1605     if (  CxTYPE(cx) == CXt_LOOP_ARY
1606        || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1607     {
1608         /* Free ary or cur. This assumes that state_u.ary.ary
1609          * aligns with state_u.lazysv.cur. See cx_dup() */
1610         SV *sv = cx->blk_loop.state_u.lazysv.cur;
1611         cx->blk_loop.state_u.lazysv.cur = NULL;
1612         SvREFCNT_dec_NN(sv);
1613         if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1614             sv = cx->blk_loop.state_u.lazysv.end;
1615             cx->blk_loop.state_u.lazysv.end = NULL;
1616             SvREFCNT_dec_NN(sv);
1617         }
1618     }
1619     if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1620         SV *cursv;
1621         SV **svp = (cx)->blk_loop.itervar_u.svp;
1622         if ((cx->cx_type & CXp_FOR_GV))
1623             svp = &GvSV((GV*)svp);
1624         cursv = *svp;
1625         *svp = cx->blk_loop.itersave;
1626         cx->blk_loop.itersave = NULL;
1627         SvREFCNT_dec(cursv);
1628     }
1629 }
1630
1631
1632 PERL_STATIC_INLINE void
1633 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1634 {
1635     PERL_ARGS_ASSERT_CX_PUSHWHEN;
1636
1637     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1638 }
1639
1640
1641 PERL_STATIC_INLINE void
1642 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1643 {
1644     PERL_ARGS_ASSERT_CX_POPWHEN;
1645     assert(CxTYPE(cx) == CXt_WHEN);
1646
1647     PERL_UNUSED_ARG(cx);
1648     PERL_UNUSED_CONTEXT;
1649     /* currently NOOP */
1650 }
1651
1652
1653 PERL_STATIC_INLINE void
1654 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1655 {
1656     PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1657
1658     cx->blk_givwhen.leave_op = cLOGOP->op_other;
1659     cx->blk_givwhen.defsv_save = orig_defsv;
1660 }
1661
1662
1663 PERL_STATIC_INLINE void
1664 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1665 {
1666     SV *sv;
1667
1668     PERL_ARGS_ASSERT_CX_POPGIVEN;
1669     assert(CxTYPE(cx) == CXt_GIVEN);
1670
1671     sv = GvSV(PL_defgv);
1672     GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1673     cx->blk_givwhen.defsv_save = NULL;
1674     SvREFCNT_dec(sv);
1675 }
1676
1677 /* ------------------ util.h ------------------------------------------- */
1678
1679 /*
1680 =head1 Miscellaneous Functions
1681
1682 =for apidoc foldEQ
1683
1684 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1685 same
1686 case-insensitively; false otherwise.  Uppercase and lowercase ASCII range bytes
1687 match themselves and their opposite case counterparts.  Non-cased and non-ASCII
1688 range bytes match only themselves.
1689
1690 =cut
1691 */
1692
1693 PERL_STATIC_INLINE I32
1694 Perl_foldEQ(const char *s1, const char *s2, I32 len)
1695 {
1696     const U8 *a = (const U8 *)s1;
1697     const U8 *b = (const U8 *)s2;
1698
1699     PERL_ARGS_ASSERT_FOLDEQ;
1700
1701     assert(len >= 0);
1702
1703     while (len--) {
1704         if (*a != *b && *a != PL_fold[*b])
1705             return 0;
1706         a++,b++;
1707     }
1708     return 1;
1709 }
1710
1711 PERL_STATIC_INLINE I32
1712 Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1713 {
1714     /* Compare non-utf8 using Unicode (Latin1) semantics.  Does not work on
1715      * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1716      * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these.  Nor
1717      * does it check that the strings each have at least 'len' characters */
1718
1719     const U8 *a = (const U8 *)s1;
1720     const U8 *b = (const U8 *)s2;
1721
1722     PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1723
1724     assert(len >= 0);
1725
1726     while (len--) {
1727         if (*a != *b && *a != PL_fold_latin1[*b]) {
1728             return 0;
1729         }
1730         a++, b++;
1731     }
1732     return 1;
1733 }
1734
1735 /*
1736 =for apidoc foldEQ_locale
1737
1738 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1739 same case-insensitively in the current locale; false otherwise.
1740
1741 =cut
1742 */
1743
1744 PERL_STATIC_INLINE I32
1745 Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1746 {
1747     dVAR;
1748     const U8 *a = (const U8 *)s1;
1749     const U8 *b = (const U8 *)s2;
1750
1751     PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1752
1753     assert(len >= 0);
1754
1755     while (len--) {
1756         if (*a != *b && *a != PL_fold_locale[*b])
1757             return 0;
1758         a++,b++;
1759     }
1760     return 1;
1761 }
1762
1763 /*
1764  * ex: set ts=8 sts=4 sw=4 et:
1765  */