3 * Copyright (C) 2012 by Larry Wall and others
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
12 * Each section names the header file that the functions "belong" to.
15 /* ------------------------------- av.h ------------------------------- */
17 PERL_STATIC_INLINE SSize_t
18 S_av_top_index(pTHX_ AV *av)
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
26 /* ------------------------------- cv.h ------------------------------- */
28 PERL_STATIC_INLINE GV *
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
36 PERL_STATIC_INLINE I32 *
37 S_CvDEPTHp(const CV * const sv)
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
40 return &((XPVCV*)SvANY(sv))->xcv_depth;
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
55 PERL_STATIC_INLINE char *
56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
68 *len = tmps - SvPVX(tmpsv);
73 /* ------------------------------- mg.h ------------------------------- */
75 #if defined(PERL_CORE) || defined(PERL_EXT)
76 /* assumes get-magic and stringification have already occurred */
77 PERL_STATIC_INLINE STRLEN
78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
93 /* ------------------------------- pad.h ------------------------------ */
95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96 PERL_STATIC_INLINE bool
97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
128 /* ------------------------------- pp.h ------------------------------- */
130 PERL_STATIC_INLINE I32
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
134 "MARK top %p %"IVdf"\n",
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
140 PERL_STATIC_INLINE I32
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
144 "MARK pop %p %"IVdf"\n",
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
151 /* ----------------------------- regexp.h ----------------------------- */
153 PERL_STATIC_INLINE struct regexp *
154 S_ReANY(const REGEXP * const re)
156 assert(isREGEXP(re));
157 return re->sv_u.svu_rx;
160 /* ------------------------------- sv.h ------------------------------- */
162 PERL_STATIC_INLINE SV *
163 S_SvREFCNT_inc(SV *sv)
165 if (LIKELY(sv != NULL))
169 PERL_STATIC_INLINE SV *
170 S_SvREFCNT_inc_NN(SV *sv)
175 PERL_STATIC_INLINE void
176 S_SvREFCNT_inc_void(SV *sv)
178 if (LIKELY(sv != NULL))
181 PERL_STATIC_INLINE void
182 S_SvREFCNT_dec(pTHX_ SV *sv)
184 if (LIKELY(sv != NULL)) {
185 U32 rc = SvREFCNT(sv);
187 SvREFCNT(sv) = rc - 1;
189 Perl_sv_free2(aTHX_ sv, rc);
193 PERL_STATIC_INLINE void
194 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
196 U32 rc = SvREFCNT(sv);
198 SvREFCNT(sv) = rc - 1;
200 Perl_sv_free2(aTHX_ sv, rc);
203 PERL_STATIC_INLINE void
207 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
209 PERL_STATIC_INLINE void
212 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
213 HvAMAGIC_off(SvSTASH(SvRV(sv)));
216 PERL_STATIC_INLINE U32
217 S_SvPADSTALE_on(SV *sv)
219 assert(!(SvFLAGS(sv) & SVs_PADTMP));
220 return SvFLAGS(sv) |= SVs_PADSTALE;
222 PERL_STATIC_INLINE U32
223 S_SvPADSTALE_off(SV *sv)
225 assert(!(SvFLAGS(sv) & SVs_PADTMP));
226 return SvFLAGS(sv) &= ~SVs_PADSTALE;
228 #if defined(PERL_CORE) || defined (PERL_EXT)
229 PERL_STATIC_INLINE STRLEN
230 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
232 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
234 U8 *hopped = utf8_hop((U8 *)pv, pos);
235 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
236 return (STRLEN)(hopped - (U8 *)pv);
238 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
242 /* ------------------------------- handy.h ------------------------------- */
244 /* saves machine code for a common noreturn idiom typically used in Newx*() */
245 #ifdef GCC_DIAG_PRAGMA
246 GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
249 S_croak_memory_wrap(void)
251 Perl_croak_nocontext("%s",PL_memory_wrap);
253 #ifdef GCC_DIAG_PRAGMA
254 GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
257 /* ------------------------------- utf8.h ------------------------------- */
260 =head1 Unicode Support
263 PERL_STATIC_INLINE void
264 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
266 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
267 * encoded string at '*dest', updating '*dest' to include it */
269 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
271 if (NATIVE_BYTE_IS_INVARIANT(byte))
274 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
275 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
280 =for apidoc valid_utf8_to_uvchr
281 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
282 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
283 it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
284 non-Unicode code points are allowed.
290 PERL_STATIC_INLINE UV
291 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
293 const UV expectlen = UTF8SKIP(s);
294 const U8* send = s + expectlen;
297 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
303 /* An invariant is trivially returned */
304 if (expectlen == 1) {
308 /* Remove the leading bits that indicate the number of bytes, leaving just
309 * the bits that are part of the value */
310 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
312 /* Now, loop through the remaining bytes, accumulating each into the
313 * working total as we go. (I khw tried unrolling the loop for up to 4
314 * bytes, but there was no performance improvement) */
315 for (++s; s < send; s++) {
316 uv = UTF8_ACCUMULATE(uv, *s);
319 return UNI_TO_NATIVE(uv);
324 =for apidoc is_utf8_invariant_string
326 Returns TRUE if the first C<len> bytes of the string C<s> are the same
327 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
328 EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
329 are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
330 the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
331 characters are invariant, but so also are the C1 controls.
333 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
334 use this option, that C<s> can't have embedded C<NUL> characters and has to
335 have a terminating C<NUL> byte).
338 C<L</is_utf8_string>>,
339 C<L</is_utf8_string_flags>>,
340 C<L</is_utf8_string_loc>>,
341 C<L</is_utf8_string_loc_flags>>,
342 C<L</is_utf8_string_loclen>>,
343 C<L</is_utf8_string_loclen_flags>>,
344 C<L</is_strict_utf8_string>>,
345 C<L</is_strict_utf8_string_loc>>,
346 C<L</is_strict_utf8_string_loclen>>,
347 C<L</is_c9strict_utf8_string>>,
348 C<L</is_c9strict_utf8_string_loc>>,
350 C<L</is_c9strict_utf8_string_loclen>>.
355 PERL_STATIC_INLINE bool
356 S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
358 const U8* const send = s + (len ? len : strlen((const char *)s));
361 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
363 for (; x < send; ++x) {
364 if (!UTF8_IS_INVARIANT(*x))
372 =for apidoc is_utf8_string
374 Returns TRUE if the first C<len> bytes of string C<s> form a valid
375 Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
376 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
377 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
378 byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
380 This function considers Perl's extended UTF-8 to be valid. That means that
381 code points above Unicode, surrogates, and non-character code points are
382 considered valid by this function. Use C<L</is_strict_utf8_string>>,
383 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
384 code points are considered valid.
387 C<L</is_utf8_invariant_string>>,
388 C<L</is_utf8_string_loc>>,
389 C<L</is_utf8_string_loclen>>,
394 PERL_STATIC_INLINE bool
395 Perl_is_utf8_string(const U8 *s, const STRLEN len)
397 /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
398 * Be aware of possible changes to that */
400 const U8* const send = s + (len ? len : strlen((const char *)s));
403 PERL_ARGS_ASSERT_IS_UTF8_STRING;
406 const STRLEN cur_len = isUTF8_CHAR(x, send);
407 if (UNLIKELY(! cur_len)) {
417 =for apidoc is_strict_utf8_string
419 Returns TRUE if the first C<len> bytes of string C<s> form a valid
420 UTF-8-encoded string that is fully interchangeable by any application using
421 Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
422 calculated using C<strlen(s)> (which means if you use this option, that C<s>
423 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
424 byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
426 This function returns FALSE for strings containing any
427 code points above the Unicode max of 0x10FFFF, surrogate code points, or
428 non-character code points.
431 C<L</is_utf8_invariant_string>>,
432 C<L</is_utf8_string>>,
433 C<L</is_utf8_string_flags>>,
434 C<L</is_utf8_string_loc>>,
435 C<L</is_utf8_string_loc_flags>>,
436 C<L</is_utf8_string_loclen>>,
437 C<L</is_utf8_string_loclen_flags>>,
438 C<L</is_strict_utf8_string_loc>>,
439 C<L</is_strict_utf8_string_loclen>>,
440 C<L</is_c9strict_utf8_string>>,
441 C<L</is_c9strict_utf8_string_loc>>,
443 C<L</is_c9strict_utf8_string_loclen>>.
448 PERL_STATIC_INLINE bool
449 S_is_strict_utf8_string(const U8 *s, const STRLEN len)
451 const U8* const send = s + (len ? len : strlen((const char *)s));
454 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
457 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
458 if (UNLIKELY(! cur_len)) {
468 =for apidoc is_c9strict_utf8_string
470 Returns TRUE if the first C<len> bytes of string C<s> form a valid
471 UTF-8-encoded string that conforms to
472 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
473 otherwise it returns FALSE. If C<len> is 0, it will be calculated using
474 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
475 C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
476 characters being ASCII constitute 'a valid UTF-8 string'.
478 This function returns FALSE for strings containing any code points above the
479 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
481 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
484 C<L</is_utf8_invariant_string>>,
485 C<L</is_utf8_string>>,
486 C<L</is_utf8_string_flags>>,
487 C<L</is_utf8_string_loc>>,
488 C<L</is_utf8_string_loc_flags>>,
489 C<L</is_utf8_string_loclen>>,
490 C<L</is_utf8_string_loclen_flags>>,
491 C<L</is_strict_utf8_string>>,
492 C<L</is_strict_utf8_string_loc>>,
493 C<L</is_strict_utf8_string_loclen>>,
494 C<L</is_c9strict_utf8_string_loc>>,
496 C<L</is_c9strict_utf8_string_loclen>>.
501 PERL_STATIC_INLINE bool
502 S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
504 const U8* const send = s + (len ? len : strlen((const char *)s));
507 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
510 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
511 if (UNLIKELY(! cur_len)) {
520 /* The above 3 functions could have been moved into the more general one just
521 * below, and made #defines that call it with the right 'flags'. They are
522 * currently kept separate to increase their chances of getting inlined */
525 =for apidoc is_utf8_string_flags
527 Returns TRUE if the first C<len> bytes of string C<s> form a valid
528 UTF-8 string, subject to the restrictions imposed by C<flags>;
529 returns FALSE otherwise. If C<len> is 0, it will be calculated
530 using C<strlen(s)> (which means if you use this option, that C<s> can't have
531 embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
532 that all characters being ASCII constitute 'a valid UTF-8 string'.
534 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
535 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
536 as C<L</is_strict_utf8_string>>; and if C<flags> is
537 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
538 C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
539 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
540 C<L</utf8n_to_uvchr>>, with the same meanings.
543 C<L</is_utf8_invariant_string>>,
544 C<L</is_utf8_string>>,
545 C<L</is_utf8_string_loc>>,
546 C<L</is_utf8_string_loc_flags>>,
547 C<L</is_utf8_string_loclen>>,
548 C<L</is_utf8_string_loclen_flags>>,
549 C<L</is_strict_utf8_string>>,
550 C<L</is_strict_utf8_string_loc>>,
551 C<L</is_strict_utf8_string_loclen>>,
552 C<L</is_c9strict_utf8_string>>,
553 C<L</is_c9strict_utf8_string_loc>>,
555 C<L</is_c9strict_utf8_string_loclen>>.
560 PERL_STATIC_INLINE bool
561 S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
563 const U8* const send = s + (len ? len : strlen((const char *)s));
566 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
567 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
568 |UTF8_DISALLOW_ABOVE_31_BIT)));
571 return is_utf8_string(s, len);
574 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
575 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
577 return is_strict_utf8_string(s, len);
580 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
581 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
583 return is_c9strict_utf8_string(s, len);
587 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
588 if (UNLIKELY(! cur_len)) {
599 =for apidoc is_utf8_string_loc
601 Like C<L</is_utf8_string>> but stores the location of the failure (in the
602 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
603 "utf8ness success") in the C<ep> pointer.
605 See also C<L</is_utf8_string_loclen>>.
610 #define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
614 =for apidoc is_utf8_string_loclen
616 Like C<L</is_utf8_string>> but stores the location of the failure (in the
617 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
618 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
619 encoded characters in the C<el> pointer.
621 See also C<L</is_utf8_string_loc>>.
626 PERL_STATIC_INLINE bool
627 Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
629 const U8* const send = s + (len ? len : strlen((const char *)s));
633 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
636 const STRLEN cur_len = isUTF8_CHAR(x, send);
637 if (UNLIKELY(! cur_len)) {
656 =for apidoc is_strict_utf8_string_loc
658 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
659 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
660 "utf8ness success") in the C<ep> pointer.
662 See also C<L</is_strict_utf8_string_loclen>>.
667 #define is_strict_utf8_string_loc(s, len, ep) \
668 is_strict_utf8_string_loclen(s, len, ep, 0)
672 =for apidoc is_strict_utf8_string_loclen
674 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
675 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
676 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
677 encoded characters in the C<el> pointer.
679 See also C<L</is_strict_utf8_string_loc>>.
684 PERL_STATIC_INLINE bool
685 S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
687 const U8* const send = s + (len ? len : strlen((const char *)s));
691 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
694 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
695 if (UNLIKELY(! cur_len)) {
714 =for apidoc is_c9strict_utf8_string_loc
716 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
717 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
718 "utf8ness success") in the C<ep> pointer.
720 See also C<L</is_c9strict_utf8_string_loclen>>.
725 #define is_c9strict_utf8_string_loc(s, len, ep) \
726 is_c9strict_utf8_string_loclen(s, len, ep, 0)
730 =for apidoc is_c9strict_utf8_string_loclen
732 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
733 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
734 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
735 characters in the C<el> pointer.
737 See also C<L</is_c9strict_utf8_string_loc>>.
742 PERL_STATIC_INLINE bool
743 S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
745 const U8* const send = s + (len ? len : strlen((const char *)s));
749 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
752 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
753 if (UNLIKELY(! cur_len)) {
772 =for apidoc is_utf8_string_loc_flags
774 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
775 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
776 "utf8ness success") in the C<ep> pointer.
778 See also C<L</is_utf8_string_loclen_flags>>.
783 #define is_utf8_string_loc_flags(s, len, ep, flags) \
784 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
787 /* The above 3 actual functions could have been moved into the more general one
788 * just below, and made #defines that call it with the right 'flags'. They are
789 * currently kept separate to increase their chances of getting inlined */
793 =for apidoc is_utf8_string_loclen_flags
795 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
796 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
797 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
798 encoded characters in the C<el> pointer.
800 See also C<L</is_utf8_string_loc_flags>>.
805 PERL_STATIC_INLINE bool
806 S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
808 const U8* const send = s + (len ? len : strlen((const char *)s));
812 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
813 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
814 |UTF8_DISALLOW_ABOVE_31_BIT)));
817 return is_utf8_string_loclen(s, len, ep, el);
820 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
821 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
823 return is_strict_utf8_string_loclen(s, len, ep, el);
826 if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
827 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
829 return is_c9strict_utf8_string_loclen(s, len, ep, el);
833 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
834 if (UNLIKELY(! cur_len)) {
852 =for apidoc utf8_distance
854 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
857 WARNING: use only if you *know* that the pointers point inside the
863 PERL_STATIC_INLINE IV
864 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
866 PERL_ARGS_ASSERT_UTF8_DISTANCE;
868 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
874 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
877 WARNING: do not use the following unless you *know* C<off> is within
878 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
879 on the first byte of character or just after the last byte of a character.
884 PERL_STATIC_INLINE U8 *
885 Perl_utf8_hop(const U8 *s, SSize_t off)
887 PERL_ARGS_ASSERT_UTF8_HOP;
889 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
890 * the bitops (especially ~) can create illegal UTF-8.
891 * In other words: in Perl UTF-8 is not just for Unicode. */
900 while (UTF8_IS_CONTINUATION(*s))
909 =for apidoc is_utf8_valid_partial_char
911 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
912 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
913 points. Otherwise, it returns 1 if there exists at least one non-empty
914 sequence of bytes that when appended to sequence C<s>, starting at position
915 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
918 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
921 This is useful when a fixed-length buffer is being tested for being well-formed
922 UTF-8, but the final few bytes in it don't comprise a full character; that is,
923 it is split somewhere in the middle of the final code point's UTF-8
924 representation. (Presumably when the buffer is refreshed with the next chunk
925 of data, the new first bytes will complete the partial code point.) This
926 function is used to verify that the final bytes in the current buffer are in
927 fact the legal beginning of some code point, so that if they aren't, the
928 failure can be signalled without having to wait for the next read.
932 #define is_utf8_valid_partial_char(s, e) \
933 is_utf8_valid_partial_char_flags(s, e, 0)
937 =for apidoc is_utf8_valid_partial_char_flags
939 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
940 or not the input is a valid UTF-8 encoded partial character, but it takes an
941 extra parameter, C<flags>, which can further restrict which code points are
944 If C<flags> is 0, this behaves identically to
945 C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
946 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
947 there is any sequence of bytes that can complete the input partial character in
948 such a way that a non-prohibited character is formed, the function returns
949 TRUE; otherwise FALSE. Non character code points cannot be determined based on
950 partial character input. But many of the other possible excluded types can be
951 determined from just the first one or two bytes.
956 PERL_STATIC_INLINE bool
957 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
959 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
961 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
962 |UTF8_DISALLOW_ABOVE_31_BIT)));
964 if (s >= e || s + UTF8SKIP(s) <= e) {
968 return cBOOL(_is_utf8_char_helper(s, e, flags));
971 /* ------------------------------- perl.h ----------------------------- */
974 =head1 Miscellaneous Functions
976 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
978 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
979 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
981 Return TRUE if the name is safe.
983 Used by the C<IS_SAFE_SYSCALL()> macro.
988 PERL_STATIC_INLINE bool
989 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
990 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
991 * perl itself uses xce*() functions which accept 8-bit strings.
994 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
998 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
999 SETERRNO(ENOENT, LIB_INVARG);
1000 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1001 "Invalid \\0 character in %s for %s: %s\\0%s",
1002 what, op_name, pv, null_at+1);
1012 Return true if the supplied filename has a newline character
1013 immediately before the first (hopefully only) NUL.
1015 My original look at this incorrectly used the len from SvPV(), but
1016 that's incorrect, since we allow for a NUL in pv[len-1].
1018 So instead, strlen() and work from there.
1020 This allow for the user reading a filename, forgetting to chomp it,
1023 open my $foo, "$file\0";
1029 PERL_STATIC_INLINE bool
1030 S_should_warn_nl(const char *pv) {
1033 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1037 return len > 0 && pv[len-1] == '\n';
1042 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1044 #define MAX_CHARSET_NAME_LENGTH 2
1046 PERL_STATIC_INLINE const char *
1047 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1049 /* Returns a string that corresponds to the name of the regex character set
1050 * given by 'flags', and *lenp is set the length of that string, which
1051 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1054 switch (get_regex_charset(flags)) {
1055 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1056 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1057 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1058 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1059 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1061 return ASCII_MORE_RESTRICT_PAT_MODS;
1063 /* The NOT_REACHED; hides an assert() which has a rather complex
1064 * definition in perl.h. */
1065 NOT_REACHED; /* NOTREACHED */
1066 return "?"; /* Unknown */
1071 Return false if any get magic is on the SV other than taint magic.
1075 PERL_STATIC_INLINE bool
1076 S_sv_only_taint_gmagic(SV *sv) {
1077 MAGIC *mg = SvMAGIC(sv);
1079 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1082 if (mg->mg_type != PERL_MAGIC_taint
1083 && !(mg->mg_flags & MGf_GSKIP)
1084 && mg->mg_virtual->svt_get) {
1087 mg = mg->mg_moremagic;
1093 /* ------------------ cop.h ------------------------------------------- */
1096 /* Enter a block. Push a new base context and return its address. */
1098 PERL_STATIC_INLINE PERL_CONTEXT *
1099 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1103 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1108 cx->blk_gimme = gimme;
1109 cx->blk_oldsaveix = saveix;
1110 cx->blk_oldsp = (I32)(sp - PL_stack_base);
1111 cx->blk_oldcop = PL_curcop;
1112 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
1113 cx->blk_oldscopesp = PL_scopestack_ix;
1114 cx->blk_oldpm = PL_curpm;
1115 cx->blk_old_tmpsfloor = PL_tmps_floor;
1117 PL_tmps_floor = PL_tmps_ix;
1118 CX_DEBUG(cx, "PUSH");
1123 /* Exit a block (RETURN and LAST). */
1125 PERL_STATIC_INLINE void
1126 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1128 PERL_ARGS_ASSERT_CX_POPBLOCK;
1130 CX_DEBUG(cx, "POP");
1131 /* these 3 are common to cx_popblock and cx_topblock */
1132 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1133 PL_scopestack_ix = cx->blk_oldscopesp;
1134 PL_curpm = cx->blk_oldpm;
1136 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1137 * and leaves a CX entry lying around for repeated use, so
1138 * skip for multicall */ \
1139 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1140 || PL_savestack_ix == cx->blk_oldsaveix);
1141 PL_curcop = cx->blk_oldcop;
1142 PL_tmps_floor = cx->blk_old_tmpsfloor;
1145 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1146 * Whereas cx_popblock() restores the state to the point just before
1147 * cx_pushblock() was called, cx_topblock() restores it to the point just
1148 * *after* cx_pushblock() was called. */
1150 PERL_STATIC_INLINE void
1151 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1153 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1155 CX_DEBUG(cx, "TOP");
1156 /* these 3 are common to cx_popblock and cx_topblock */
1157 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1158 PL_scopestack_ix = cx->blk_oldscopesp;
1159 PL_curpm = cx->blk_oldpm;
1161 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1165 PERL_STATIC_INLINE void
1166 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1168 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1170 PERL_ARGS_ASSERT_CX_PUSHSUB;
1172 PERL_DTRACE_PROBE_ENTRY(cv);
1173 cx->blk_sub.cv = cv;
1174 cx->blk_sub.olddepth = CvDEPTH(cv);
1175 cx->blk_sub.prevcomppad = PL_comppad;
1176 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1177 cx->blk_sub.retop = retop;
1178 SvREFCNT_inc_simple_void_NN(cv);
1179 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1183 /* subsets of cx_popsub() */
1185 PERL_STATIC_INLINE void
1186 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1190 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1191 assert(CxTYPE(cx) == CXt_SUB);
1193 PL_comppad = cx->blk_sub.prevcomppad;
1194 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1195 cv = cx->blk_sub.cv;
1196 CvDEPTH(cv) = cx->blk_sub.olddepth;
1197 cx->blk_sub.cv = NULL;
1202 /* handle the @_ part of leaving a sub */
1204 PERL_STATIC_INLINE void
1205 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1209 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1210 assert(CxTYPE(cx) == CXt_SUB);
1211 assert(AvARRAY(MUTABLE_AV(
1212 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1213 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1215 CX_POP_SAVEARRAY(cx);
1216 av = MUTABLE_AV(PAD_SVl(0));
1217 if (UNLIKELY(AvREAL(av)))
1218 /* abandon @_ if it got reified */
1219 clear_defarray(av, 0);
1226 PERL_STATIC_INLINE void
1227 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1229 PERL_ARGS_ASSERT_CX_POPSUB;
1230 assert(CxTYPE(cx) == CXt_SUB);
1232 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1236 cx_popsub_common(cx);
1240 PERL_STATIC_INLINE void
1241 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1243 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1245 cx->blk_format.cv = cv;
1246 cx->blk_format.retop = retop;
1247 cx->blk_format.gv = gv;
1248 cx->blk_format.dfoutgv = PL_defoutgv;
1249 cx->blk_format.prevcomppad = PL_comppad;
1252 SvREFCNT_inc_simple_void_NN(cv);
1254 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1258 PERL_STATIC_INLINE void
1259 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1264 PERL_ARGS_ASSERT_CX_POPFORMAT;
1265 assert(CxTYPE(cx) == CXt_FORMAT);
1267 dfout = cx->blk_format.dfoutgv;
1269 cx->blk_format.dfoutgv = NULL;
1270 SvREFCNT_dec_NN(dfout);
1272 PL_comppad = cx->blk_format.prevcomppad;
1273 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1274 cv = cx->blk_format.cv;
1275 cx->blk_format.cv = NULL;
1277 SvREFCNT_dec_NN(cv);
1281 PERL_STATIC_INLINE void
1282 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1284 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1286 cx->blk_eval.retop = retop;
1287 cx->blk_eval.old_namesv = namesv;
1288 cx->blk_eval.old_eval_root = PL_eval_root;
1289 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1290 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1291 cx->blk_eval.cur_top_env = PL_top_env;
1293 assert(!(PL_in_eval & ~ 0x7F));
1294 assert(!(PL_op->op_type & ~0x1FF));
1295 cx->blk_u16 = (PL_in_eval & 0x7F) | ((U16)PL_op->op_type << 7);
1299 PERL_STATIC_INLINE void
1300 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1304 PERL_ARGS_ASSERT_CX_POPEVAL;
1305 assert(CxTYPE(cx) == CXt_EVAL);
1307 PL_in_eval = CxOLD_IN_EVAL(cx);
1308 PL_eval_root = cx->blk_eval.old_eval_root;
1309 sv = cx->blk_eval.cur_text;
1310 if (sv && SvSCREAM(sv)) {
1311 cx->blk_eval.cur_text = NULL;
1312 SvREFCNT_dec_NN(sv);
1315 sv = cx->blk_eval.old_namesv;
1317 cx->blk_eval.old_namesv = NULL;
1318 SvREFCNT_dec_NN(sv);
1323 /* push a plain loop, i.e.
1325 * while (cond) { block }
1326 * for (init;cond;continue) { block }
1327 * This loop can be last/redo'ed etc.
1330 PERL_STATIC_INLINE void
1331 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1333 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1334 cx->blk_loop.my_op = cLOOP;
1338 /* push a true for loop, i.e.
1339 * for var (list) { block }
1342 PERL_STATIC_INLINE void
1343 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1345 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1347 /* this one line is common with cx_pushloop_plain */
1348 cx->blk_loop.my_op = cLOOP;
1350 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1351 cx->blk_loop.itersave = itersave;
1353 cx->blk_loop.oldcomppad = PL_comppad;
1358 /* pop all loop types, including plain */
1360 PERL_STATIC_INLINE void
1361 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1363 PERL_ARGS_ASSERT_CX_POPLOOP;
1365 assert(CxTYPE_is_LOOP(cx));
1366 if ( CxTYPE(cx) == CXt_LOOP_ARY
1367 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1369 /* Free ary or cur. This assumes that state_u.ary.ary
1370 * aligns with state_u.lazysv.cur. See cx_dup() */
1371 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1372 cx->blk_loop.state_u.lazysv.cur = NULL;
1373 SvREFCNT_dec_NN(sv);
1374 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1375 sv = cx->blk_loop.state_u.lazysv.end;
1376 cx->blk_loop.state_u.lazysv.end = NULL;
1377 SvREFCNT_dec_NN(sv);
1380 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1382 SV **svp = (cx)->blk_loop.itervar_u.svp;
1383 if ((cx->cx_type & CXp_FOR_GV))
1384 svp = &GvSV((GV*)svp);
1386 *svp = cx->blk_loop.itersave;
1387 cx->blk_loop.itersave = NULL;
1388 SvREFCNT_dec(cursv);
1393 PERL_STATIC_INLINE void
1394 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1396 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1398 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1402 PERL_STATIC_INLINE void
1403 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1405 PERL_ARGS_ASSERT_CX_POPWHEN;
1406 assert(CxTYPE(cx) == CXt_WHEN);
1408 PERL_UNUSED_ARG(cx);
1409 PERL_UNUSED_CONTEXT;
1410 /* currently NOOP */
1414 PERL_STATIC_INLINE void
1415 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1417 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1419 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1420 cx->blk_givwhen.defsv_save = orig_defsv;
1424 PERL_STATIC_INLINE void
1425 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1429 PERL_ARGS_ASSERT_CX_POPGIVEN;
1430 assert(CxTYPE(cx) == CXt_GIVEN);
1432 sv = GvSV(PL_defgv);
1433 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1434 cx->blk_givwhen.defsv_save = NULL;
1439 * ex: set ts=8 sts=4 sw=4 et: