3 * Copyright (C) 2012 by Larry Wall and others
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
8 * This file is a home for static inline functions that cannot go in other
9 * headers files, because they depend on proto.h (included after most other
10 * headers) or struct definitions.
12 * Each section names the header file that the functions "belong" to.
15 /* ------------------------------- av.h ------------------------------- */
17 PERL_STATIC_INLINE SSize_t
18 S_av_top_index(pTHX_ AV *av)
20 PERL_ARGS_ASSERT_AV_TOP_INDEX;
21 assert(SvTYPE(av) == SVt_PVAV);
26 /* ------------------------------- cv.h ------------------------------- */
28 PERL_STATIC_INLINE GV *
32 ? Perl_cvgv_from_hek(aTHX_ sv)
33 : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv;
36 PERL_STATIC_INLINE I32 *
37 S_CvDEPTHp(const CV * const sv)
39 assert(SvTYPE(sv) == SVt_PVCV || SvTYPE(sv) == SVt_PVFM);
40 return &((XPVCV*)SvANY(sv))->xcv_depth;
44 CvPROTO returns the prototype as stored, which is not necessarily what
45 the interpreter should be using. Specifically, the interpreter assumes
46 that spaces have been stripped, which has been the case if the prototype
47 was added by toke.c, but is generally not the case if it was added elsewhere.
48 Since we can't enforce the spacelessness at assignment time, this routine
49 provides a temporary copy at parse time with spaces removed.
50 I<orig> is the start of the original buffer, I<len> is the length of the
51 prototype and will be updated when this returns.
55 PERL_STATIC_INLINE char *
56 S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
60 tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
68 *len = tmps - SvPVX(tmpsv);
73 /* ------------------------------- mg.h ------------------------------- */
75 #if defined(PERL_CORE) || defined(PERL_EXT)
76 /* assumes get-magic and stringification have already occurred */
77 PERL_STATIC_INLINE STRLEN
78 S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len)
80 assert(mg->mg_type == PERL_MAGIC_regex_global);
81 assert(mg->mg_len != -1);
82 if (mg->mg_flags & MGf_BYTES || !DO_UTF8(sv))
83 return (STRLEN)mg->mg_len;
85 const STRLEN pos = (STRLEN)mg->mg_len;
86 /* Without this check, we may read past the end of the buffer: */
87 if (pos > sv_or_pv_len_utf8(sv, s, len)) return len+1;
88 return sv_or_pv_pos_u2b(sv, s, pos, NULL);
93 /* ------------------------------- pad.h ------------------------------ */
95 #if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C)
96 PERL_STATIC_INLINE bool
97 PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq)
99 /* is seq within the range _LOW to _HIGH ?
100 * This is complicated by the fact that PL_cop_seqmax
101 * may have wrapped around at some point */
102 if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO)
103 return FALSE; /* not yet introduced */
105 if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) {
106 /* in compiling scope */
108 (seq > COP_SEQ_RANGE_LOW(pn))
109 ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1))
110 : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1))
115 (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn))
117 ( seq > COP_SEQ_RANGE_LOW(pn)
118 || seq <= COP_SEQ_RANGE_HIGH(pn))
120 : ( seq > COP_SEQ_RANGE_LOW(pn)
121 && seq <= COP_SEQ_RANGE_HIGH(pn))
128 /* ------------------------------- pp.h ------------------------------- */
130 PERL_STATIC_INLINE I32
133 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
134 "MARK top %p %" IVdf "\n",
136 (IV)*PL_markstack_ptr)));
137 return *PL_markstack_ptr;
140 PERL_STATIC_INLINE I32
143 DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
144 "MARK pop %p %" IVdf "\n",
145 (PL_markstack_ptr-1),
146 (IV)*(PL_markstack_ptr-1))));
147 assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
148 return *PL_markstack_ptr--;
151 /* ----------------------------- regexp.h ----------------------------- */
153 PERL_STATIC_INLINE struct regexp *
154 S_ReANY(const REGEXP * const re)
156 XPV* const p = (XPV*)SvANY(re);
157 assert(isREGEXP(re));
158 return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
159 : (struct regexp *)p;
162 /* ------------------------------- sv.h ------------------------------- */
164 PERL_STATIC_INLINE SV *
165 S_SvREFCNT_inc(SV *sv)
167 if (LIKELY(sv != NULL))
171 PERL_STATIC_INLINE SV *
172 S_SvREFCNT_inc_NN(SV *sv)
177 PERL_STATIC_INLINE void
178 S_SvREFCNT_inc_void(SV *sv)
180 if (LIKELY(sv != NULL))
183 PERL_STATIC_INLINE void
184 S_SvREFCNT_dec(pTHX_ SV *sv)
186 if (LIKELY(sv != NULL)) {
187 U32 rc = SvREFCNT(sv);
189 SvREFCNT(sv) = rc - 1;
191 Perl_sv_free2(aTHX_ sv, rc);
195 PERL_STATIC_INLINE void
196 S_SvREFCNT_dec_NN(pTHX_ SV *sv)
198 U32 rc = SvREFCNT(sv);
200 SvREFCNT(sv) = rc - 1;
202 Perl_sv_free2(aTHX_ sv, rc);
205 PERL_STATIC_INLINE void
209 if (SvOBJECT(SvRV(sv))) HvAMAGIC_on(SvSTASH(SvRV(sv)));
211 PERL_STATIC_INLINE void
214 if (SvROK(sv) && SvOBJECT(SvRV(sv)))
215 HvAMAGIC_off(SvSTASH(SvRV(sv)));
218 PERL_STATIC_INLINE U32
219 S_SvPADSTALE_on(SV *sv)
221 assert(!(SvFLAGS(sv) & SVs_PADTMP));
222 return SvFLAGS(sv) |= SVs_PADSTALE;
224 PERL_STATIC_INLINE U32
225 S_SvPADSTALE_off(SV *sv)
227 assert(!(SvFLAGS(sv) & SVs_PADTMP));
228 return SvFLAGS(sv) &= ~SVs_PADSTALE;
230 #if defined(PERL_CORE) || defined (PERL_EXT)
231 PERL_STATIC_INLINE STRLEN
232 S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp)
234 PERL_ARGS_ASSERT_SV_OR_PV_POS_U2B;
236 U8 *hopped = utf8_hop((U8 *)pv, pos);
237 if (lenp) *lenp = (STRLEN)(utf8_hop(hopped, *lenp) - hopped);
238 return (STRLEN)(hopped - (U8 *)pv);
240 return sv_pos_u2b_flags(sv,pos,lenp,SV_CONST_RETURN);
244 /* ------------------------------- handy.h ------------------------------- */
246 /* saves machine code for a common noreturn idiom typically used in Newx*() */
247 #ifdef GCC_DIAG_PRAGMA
248 GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
251 S_croak_memory_wrap(void)
253 Perl_croak_nocontext("%s",PL_memory_wrap);
255 #ifdef GCC_DIAG_PRAGMA
256 GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
259 /* ------------------------------- utf8.h ------------------------------- */
262 =head1 Unicode Support
265 PERL_STATIC_INLINE void
266 S_append_utf8_from_native_byte(const U8 byte, U8** dest)
268 /* Takes an input 'byte' (Latin1 or EBCDIC) and appends it to the UTF-8
269 * encoded string at '*dest', updating '*dest' to include it */
271 PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE;
273 if (NATIVE_BYTE_IS_INVARIANT(byte))
276 *((*dest)++) = UTF8_EIGHT_BIT_HI(byte);
277 *((*dest)++) = UTF8_EIGHT_BIT_LO(byte);
282 =for apidoc valid_utf8_to_uvchr
283 Like C<L</utf8_to_uvchr_buf>>, but should only be called when it is known that
284 the next character in the input UTF-8 string C<s> is well-formed (I<e.g.>,
285 it passes C<L</isUTF8_CHAR>>. Surrogates, non-character code points, and
286 non-Unicode code points are allowed.
292 PERL_STATIC_INLINE UV
293 Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
295 const UV expectlen = UTF8SKIP(s);
296 const U8* send = s + expectlen;
299 PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
305 /* An invariant is trivially returned */
306 if (expectlen == 1) {
310 /* Remove the leading bits that indicate the number of bytes, leaving just
311 * the bits that are part of the value */
312 uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
314 /* Now, loop through the remaining bytes, accumulating each into the
315 * working total as we go. (I khw tried unrolling the loop for up to 4
316 * bytes, but there was no performance improvement) */
317 for (++s; s < send; s++) {
318 uv = UTF8_ACCUMULATE(uv, *s);
321 return UNI_TO_NATIVE(uv);
326 =for apidoc is_utf8_invariant_string
328 Returns TRUE if the first C<len> bytes of the string C<s> are the same
329 regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on
330 EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they
331 are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only
332 the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range
333 characters are invariant, but so also are the C1 controls.
335 If C<len> is 0, it will be calculated using C<strlen(s)>, (which means if you
336 use this option, that C<s> can't have embedded C<NUL> characters and has to
337 have a terminating C<NUL> byte).
340 C<L</is_utf8_string>>,
341 C<L</is_utf8_string_flags>>,
342 C<L</is_utf8_string_loc>>,
343 C<L</is_utf8_string_loc_flags>>,
344 C<L</is_utf8_string_loclen>>,
345 C<L</is_utf8_string_loclen_flags>>,
346 C<L</is_utf8_fixed_width_buf_flags>>,
347 C<L</is_utf8_fixed_width_buf_loc_flags>>,
348 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
349 C<L</is_strict_utf8_string>>,
350 C<L</is_strict_utf8_string_loc>>,
351 C<L</is_strict_utf8_string_loclen>>,
352 C<L</is_c9strict_utf8_string>>,
353 C<L</is_c9strict_utf8_string_loc>>,
355 C<L</is_c9strict_utf8_string_loclen>>.
361 #define is_utf8_invariant_string(s, len) \
362 is_utf8_invariant_string_loc(s, len, NULL)
365 =for apidoc is_utf8_invariant_string_loc
367 Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
368 the first UTF-8 variant character in the C<ep> pointer; if all characters are
369 UTF-8 invariant, this function does not change the contents of C<*ep>.
375 PERL_STATIC_INLINE bool
376 S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
381 PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
384 len = strlen((const char *)s);
391 /* This looks like 0x010101... */
392 #define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
394 /* This looks like 0x808080... */
395 #define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
396 #define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
397 #define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
399 /* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
400 * or'ing together the lowest bits of 'x'. Hopefully the final term gets
401 * optimized out completely on a 32-bit system, and its mask gets optimized out
402 * on a 64-bit system */
403 #define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
404 | (PTR2nat(x) >> 1) \
405 | ( (PTR2nat(x) >> 2) \
406 & PERL_WORD_BOUNDARY_MASK)))
408 /* Do the word-at-a-time iff there is at least one usable full word. That
409 * means that after advancing to a word boundary, there still is at least a
410 * full word left. The number of bytes needed to advance is 'wordsize -
411 * offset' unless offset is 0. */
412 if ((STRLEN) (send - x) >= PERL_WORDSIZE
414 /* This term is wordsize if subword; 0 if not */
415 + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
418 - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
421 /* Process per-byte until reach word boundary. XXX This loop could be
422 * eliminated if we knew that this platform had fast unaligned reads */
423 while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
424 if (! UTF8_IS_INVARIANT(*x)) {
434 /* Here, we know we have at least one full word to process. Process
435 * per-word as long as we have at least a full word left */
437 if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
439 /* Found a variant. Just return if caller doesn't want its
445 /* Otherwise fall into final loop to find which byte it is */
449 } while (x + PERL_WORDSIZE <= send);
452 # undef PERL_WORDSIZE
453 # undef PERL_WORD_BOUNDARY_MASK
454 # undef PERL_VARIANTS_WORD_MASK
457 /* Process per-byte */
459 if (! UTF8_IS_INVARIANT(*x)) {
474 =for apidoc is_utf8_string
476 Returns TRUE if the first C<len> bytes of string C<s> form a valid
477 Perl-extended-UTF-8 string; returns FALSE otherwise. If C<len> is 0, it will
478 be calculated using C<strlen(s)> (which means if you use this option, that C<s>
479 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
480 byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
482 This function considers Perl's extended UTF-8 to be valid. That means that
483 code points above Unicode, surrogates, and non-character code points are
484 considered valid by this function. Use C<L</is_strict_utf8_string>>,
485 C<L</is_c9strict_utf8_string>>, or C<L</is_utf8_string_flags>> to restrict what
486 code points are considered valid.
489 C<L</is_utf8_invariant_string>>,
490 C<L</is_utf8_invariant_string_loc>>,
491 C<L</is_utf8_string_loc>>,
492 C<L</is_utf8_string_loclen>>,
493 C<L</is_utf8_fixed_width_buf_flags>>,
494 C<L</is_utf8_fixed_width_buf_loc_flags>>,
495 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
500 #define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
502 #if defined(PERL_CORE) || defined (PERL_EXT)
505 =for apidoc is_utf8_non_invariant_string
507 Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
508 C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
509 UTF-8; otherwise returns FALSE.
511 A TRUE return means that at least one code point represented by the sequence
512 either is a wide character not representable as a single byte, or the
513 representation differs depending on whether the sequence is encoded in UTF-8 or
517 C<L<perlapi/is_utf8_invariant_string>>,
518 C<L<perlapi/is_utf8_string>>
522 This is commonly used to determine if a SV's UTF-8 flag should be turned on.
523 It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
524 it otherwise contains invalid UTF-8.
526 It is an internal function because khw thinks that XS code shouldn't be working
527 at this low a level. A valid use case could change that.
531 PERL_STATIC_INLINE bool
532 S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
534 const U8 * first_variant;
536 PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
538 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
542 return is_utf8_string(first_variant, len - (first_variant - s));
548 =for apidoc is_strict_utf8_string
550 Returns TRUE if the first C<len> bytes of string C<s> form a valid
551 UTF-8-encoded string that is fully interchangeable by any application using
552 Unicode rules; otherwise it returns FALSE. If C<len> is 0, it will be
553 calculated using C<strlen(s)> (which means if you use this option, that C<s>
554 can't have embedded C<NUL> characters and has to have a terminating C<NUL>
555 byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'.
557 This function returns FALSE for strings containing any
558 code points above the Unicode max of 0x10FFFF, surrogate code points, or
559 non-character code points.
562 C<L</is_utf8_invariant_string>>,
563 C<L</is_utf8_invariant_string_loc>>,
564 C<L</is_utf8_string>>,
565 C<L</is_utf8_string_flags>>,
566 C<L</is_utf8_string_loc>>,
567 C<L</is_utf8_string_loc_flags>>,
568 C<L</is_utf8_string_loclen>>,
569 C<L</is_utf8_string_loclen_flags>>,
570 C<L</is_utf8_fixed_width_buf_flags>>,
571 C<L</is_utf8_fixed_width_buf_loc_flags>>,
572 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
573 C<L</is_strict_utf8_string_loc>>,
574 C<L</is_strict_utf8_string_loclen>>,
575 C<L</is_c9strict_utf8_string>>,
576 C<L</is_c9strict_utf8_string_loc>>,
578 C<L</is_c9strict_utf8_string_loclen>>.
583 #define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
586 =for apidoc is_c9strict_utf8_string
588 Returns TRUE if the first C<len> bytes of string C<s> form a valid
589 UTF-8-encoded string that conforms to
590 L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>;
591 otherwise it returns FALSE. If C<len> is 0, it will be calculated using
592 C<strlen(s)> (which means if you use this option, that C<s> can't have embedded
593 C<NUL> characters and has to have a terminating C<NUL> byte). Note that all
594 characters being ASCII constitute 'a valid UTF-8 string'.
596 This function returns FALSE for strings containing any code points above the
597 Unicode max of 0x10FFFF or surrogate code points, but accepts non-character
599 L<Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
602 C<L</is_utf8_invariant_string>>,
603 C<L</is_utf8_invariant_string_loc>>,
604 C<L</is_utf8_string>>,
605 C<L</is_utf8_string_flags>>,
606 C<L</is_utf8_string_loc>>,
607 C<L</is_utf8_string_loc_flags>>,
608 C<L</is_utf8_string_loclen>>,
609 C<L</is_utf8_string_loclen_flags>>,
610 C<L</is_utf8_fixed_width_buf_flags>>,
611 C<L</is_utf8_fixed_width_buf_loc_flags>>,
612 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
613 C<L</is_strict_utf8_string>>,
614 C<L</is_strict_utf8_string_loc>>,
615 C<L</is_strict_utf8_string_loclen>>,
616 C<L</is_c9strict_utf8_string_loc>>,
618 C<L</is_c9strict_utf8_string_loclen>>.
623 #define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
626 =for apidoc is_utf8_string_flags
628 Returns TRUE if the first C<len> bytes of string C<s> form a valid
629 UTF-8 string, subject to the restrictions imposed by C<flags>;
630 returns FALSE otherwise. If C<len> is 0, it will be calculated
631 using C<strlen(s)> (which means if you use this option, that C<s> can't have
632 embedded C<NUL> characters and has to have a terminating C<NUL> byte). Note
633 that all characters being ASCII constitute 'a valid UTF-8 string'.
635 If C<flags> is 0, this gives the same results as C<L</is_utf8_string>>; if
636 C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results
637 as C<L</is_strict_utf8_string>>; and if C<flags> is
638 C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives the same results as
639 C<L</is_c9strict_utf8_string>>. Otherwise C<flags> may be any
640 combination of the C<UTF8_DISALLOW_I<foo>> flags understood by
641 C<L</utf8n_to_uvchr>>, with the same meanings.
644 C<L</is_utf8_invariant_string>>,
645 C<L</is_utf8_invariant_string_loc>>,
646 C<L</is_utf8_string>>,
647 C<L</is_utf8_string_loc>>,
648 C<L</is_utf8_string_loc_flags>>,
649 C<L</is_utf8_string_loclen>>,
650 C<L</is_utf8_string_loclen_flags>>,
651 C<L</is_utf8_fixed_width_buf_flags>>,
652 C<L</is_utf8_fixed_width_buf_loc_flags>>,
653 C<L</is_utf8_fixed_width_buf_loclen_flags>>,
654 C<L</is_strict_utf8_string>>,
655 C<L</is_strict_utf8_string_loc>>,
656 C<L</is_strict_utf8_string_loclen>>,
657 C<L</is_c9strict_utf8_string>>,
658 C<L</is_c9strict_utf8_string_loc>>,
660 C<L</is_c9strict_utf8_string_loclen>>.
665 PERL_STATIC_INLINE bool
666 S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
668 const U8 * first_variant;
670 PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
671 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
672 |UTF8_DISALLOW_PERL_EXTENDED)));
675 len = strlen((const char *)s);
679 return is_utf8_string(s, len);
682 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
683 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
685 return is_strict_utf8_string(s, len);
688 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
689 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
691 return is_c9strict_utf8_string(s, len);
694 if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
695 const U8* const send = s + len;
696 const U8* x = first_variant;
699 STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
700 if (UNLIKELY(! cur_len)) {
712 =for apidoc is_utf8_string_loc
714 Like C<L</is_utf8_string>> but stores the location of the failure (in the
715 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
716 "utf8ness success") in the C<ep> pointer.
718 See also C<L</is_utf8_string_loclen>>.
723 #define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
727 =for apidoc is_utf8_string_loclen
729 Like C<L</is_utf8_string>> but stores the location of the failure (in the
730 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
731 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
732 encoded characters in the C<el> pointer.
734 See also C<L</is_utf8_string_loc>>.
739 PERL_STATIC_INLINE bool
740 Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
742 const U8 * first_variant;
744 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
747 len = strlen((const char *) s);
750 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
762 const U8* const send = s + len;
763 const U8* x = first_variant;
764 STRLEN outlen = first_variant - s;
767 const STRLEN cur_len = isUTF8_CHAR(x, send);
768 if (UNLIKELY(! cur_len)) {
788 =for apidoc is_strict_utf8_string_loc
790 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
791 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
792 "utf8ness success") in the C<ep> pointer.
794 See also C<L</is_strict_utf8_string_loclen>>.
799 #define is_strict_utf8_string_loc(s, len, ep) \
800 is_strict_utf8_string_loclen(s, len, ep, 0)
804 =for apidoc is_strict_utf8_string_loclen
806 Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
807 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
808 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
809 encoded characters in the C<el> pointer.
811 See also C<L</is_strict_utf8_string_loc>>.
816 PERL_STATIC_INLINE bool
817 S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
819 const U8 * first_variant;
821 PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
824 len = strlen((const char *) s);
827 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
839 const U8* const send = s + len;
840 const U8* x = first_variant;
841 STRLEN outlen = first_variant - s;
844 const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
845 if (UNLIKELY(! cur_len)) {
865 =for apidoc is_c9strict_utf8_string_loc
867 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
868 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
869 "utf8ness success") in the C<ep> pointer.
871 See also C<L</is_c9strict_utf8_string_loclen>>.
876 #define is_c9strict_utf8_string_loc(s, len, ep) \
877 is_c9strict_utf8_string_loclen(s, len, ep, 0)
881 =for apidoc is_c9strict_utf8_string_loclen
883 Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
884 the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
885 "utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
886 characters in the C<el> pointer.
888 See also C<L</is_c9strict_utf8_string_loc>>.
893 PERL_STATIC_INLINE bool
894 S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
896 const U8 * first_variant;
898 PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
901 len = strlen((const char *) s);
904 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
916 const U8* const send = s + len;
917 const U8* x = first_variant;
918 STRLEN outlen = first_variant - s;
921 const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
922 if (UNLIKELY(! cur_len)) {
942 =for apidoc is_utf8_string_loc_flags
944 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
945 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
946 "utf8ness success") in the C<ep> pointer.
948 See also C<L</is_utf8_string_loclen_flags>>.
953 #define is_utf8_string_loc_flags(s, len, ep, flags) \
954 is_utf8_string_loclen_flags(s, len, ep, 0, flags)
957 /* The above 3 actual functions could have been moved into the more general one
958 * just below, and made #defines that call it with the right 'flags'. They are
959 * currently kept separate to increase their chances of getting inlined */
963 =for apidoc is_utf8_string_loclen_flags
965 Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
966 case of "utf8ness failure") or the location C<s>+C<len> (in the case of
967 "utf8ness success") in the C<ep> pointer, and the number of UTF-8
968 encoded characters in the C<el> pointer.
970 See also C<L</is_utf8_string_loc_flags>>.
975 PERL_STATIC_INLINE bool
976 S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
978 const U8 * first_variant;
980 PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
981 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
982 |UTF8_DISALLOW_PERL_EXTENDED)));
985 len = strlen((const char *) s);
989 return is_utf8_string_loclen(s, len, ep, el);
992 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
993 == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
995 return is_strict_utf8_string_loclen(s, len, ep, el);
998 if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
999 == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
1001 return is_c9strict_utf8_string_loclen(s, len, ep, el);
1004 if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
1016 const U8* send = s + len;
1017 const U8* x = first_variant;
1018 STRLEN outlen = first_variant - s;
1021 const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
1022 if (UNLIKELY(! cur_len)) {
1041 =for apidoc utf8_distance
1043 Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
1046 WARNING: use only if you *know* that the pointers point inside the
1052 PERL_STATIC_INLINE IV
1053 Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
1055 PERL_ARGS_ASSERT_UTF8_DISTANCE;
1057 return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
1061 =for apidoc utf8_hop
1063 Return the UTF-8 pointer C<s> displaced by C<off> characters, either
1064 forward or backward.
1066 WARNING: do not use the following unless you *know* C<off> is within
1067 the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
1068 on the first byte of character or just after the last byte of a character.
1073 PERL_STATIC_INLINE U8 *
1074 Perl_utf8_hop(const U8 *s, SSize_t off)
1076 PERL_ARGS_ASSERT_UTF8_HOP;
1078 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1079 * the bitops (especially ~) can create illegal UTF-8.
1080 * In other words: in Perl UTF-8 is not just for Unicode. */
1089 while (UTF8_IS_CONTINUATION(*s))
1093 GCC_DIAG_IGNORE(-Wcast-qual);
1099 =for apidoc utf8_hop_forward
1101 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1104 C<off> must be non-negative.
1106 C<s> must be before or equal to C<end>.
1108 When moving forward it will not move beyond C<end>.
1110 Will not exceed this limit even if the string is not valid "UTF-8".
1115 PERL_STATIC_INLINE U8 *
1116 Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
1118 PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
1120 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1121 * the bitops (especially ~) can create illegal UTF-8.
1122 * In other words: in Perl UTF-8 is not just for Unicode. */
1128 STRLEN skip = UTF8SKIP(s);
1129 if ((STRLEN)(end - s) <= skip) {
1130 GCC_DIAG_IGNORE(-Wcast-qual);
1137 GCC_DIAG_IGNORE(-Wcast-qual);
1143 =for apidoc utf8_hop_back
1145 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1148 C<off> must be non-positive.
1150 C<s> must be after or equal to C<start>.
1152 When moving backward it will not move before C<start>.
1154 Will not exceed this limit even if the string is not valid "UTF-8".
1159 PERL_STATIC_INLINE U8 *
1160 Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
1162 PERL_ARGS_ASSERT_UTF8_HOP_BACK;
1164 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1165 * the bitops (especially ~) can create illegal UTF-8.
1166 * In other words: in Perl UTF-8 is not just for Unicode. */
1171 while (off++ && s > start) {
1173 while (UTF8_IS_CONTINUATION(*s) && s > start)
1177 GCC_DIAG_IGNORE(-Wcast-qual);
1183 =for apidoc utf8_hop_safe
1185 Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
1186 either forward or backward.
1188 When moving backward it will not move before C<start>.
1190 When moving forward it will not move beyond C<end>.
1192 Will not exceed those limits even if the string is not valid "UTF-8".
1197 PERL_STATIC_INLINE U8 *
1198 Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
1200 PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
1202 /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
1203 * the bitops (especially ~) can create illegal UTF-8.
1204 * In other words: in Perl UTF-8 is not just for Unicode. */
1206 assert(start <= s && s <= end);
1209 return utf8_hop_forward(s, off, end);
1212 return utf8_hop_back(s, off, start);
1218 =for apidoc is_utf8_valid_partial_char
1220 Returns 0 if the sequence of bytes starting at C<s> and looking no further than
1221 S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
1222 points. Otherwise, it returns 1 if there exists at least one non-empty
1223 sequence of bytes that when appended to sequence C<s>, starting at position
1224 C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
1225 otherwise returns 0.
1227 In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
1230 This is useful when a fixed-length buffer is being tested for being well-formed
1231 UTF-8, but the final few bytes in it don't comprise a full character; that is,
1232 it is split somewhere in the middle of the final code point's UTF-8
1233 representation. (Presumably when the buffer is refreshed with the next chunk
1234 of data, the new first bytes will complete the partial code point.) This
1235 function is used to verify that the final bytes in the current buffer are in
1236 fact the legal beginning of some code point, so that if they aren't, the
1237 failure can be signalled without having to wait for the next read.
1241 #define is_utf8_valid_partial_char(s, e) \
1242 is_utf8_valid_partial_char_flags(s, e, 0)
1246 =for apidoc is_utf8_valid_partial_char_flags
1248 Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
1249 or not the input is a valid UTF-8 encoded partial character, but it takes an
1250 extra parameter, C<flags>, which can further restrict which code points are
1253 If C<flags> is 0, this behaves identically to
1254 C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
1255 of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
1256 there is any sequence of bytes that can complete the input partial character in
1257 such a way that a non-prohibited character is formed, the function returns
1258 TRUE; otherwise FALSE. Non character code points cannot be determined based on
1259 partial character input. But many of the other possible excluded types can be
1260 determined from just the first one or two bytes.
1265 PERL_STATIC_INLINE bool
1266 S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
1268 PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
1270 assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
1271 |UTF8_DISALLOW_PERL_EXTENDED)));
1273 if (s >= e || s + UTF8SKIP(s) <= e) {
1277 return cBOOL(_is_utf8_char_helper(s, e, flags));
1282 =for apidoc is_utf8_fixed_width_buf_flags
1284 Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
1285 is entirely valid UTF-8, subject to the restrictions given by C<flags>;
1286 otherwise it returns FALSE.
1288 If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
1289 without restriction. If the final few bytes of the buffer do not form a
1290 complete code point, this will return TRUE anyway, provided that
1291 C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
1293 If C<flags> in non-zero, it can be any combination of the
1294 C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
1297 This function differs from C<L</is_utf8_string_flags>> only in that the latter
1298 returns FALSE if the final few bytes of the string don't form a complete code
1303 #define is_utf8_fixed_width_buf_flags(s, len, flags) \
1304 is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
1308 =for apidoc is_utf8_fixed_width_buf_loc_flags
1310 Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
1311 failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
1312 to the beginning of any partial character at the end of the buffer; if there is
1313 no partial character C<*ep> will contain C<s>+C<len>.
1315 See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
1320 #define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
1321 is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
1325 =for apidoc is_utf8_fixed_width_buf_loclen_flags
1327 Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
1328 complete, valid characters found in the C<el> pointer.
1333 PERL_STATIC_INLINE bool
1334 S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
1340 const U8 * maybe_partial;
1342 PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
1345 ep = &maybe_partial;
1348 /* If it's entirely valid, return that; otherwise see if the only error is
1349 * that the final few bytes are for a partial character */
1350 return is_utf8_string_loclen_flags(s, len, ep, el, flags)
1351 || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
1354 /* ------------------------------- perl.h ----------------------------- */
1357 =head1 Miscellaneous Functions
1359 =for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name
1361 Test that the given C<pv> doesn't contain any internal C<NUL> characters.
1362 If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
1364 Return TRUE if the name is safe.
1366 Used by the C<IS_SAFE_SYSCALL()> macro.
1371 PERL_STATIC_INLINE bool
1372 S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
1373 /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
1374 * perl itself uses xce*() functions which accept 8-bit strings.
1377 PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
1381 if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
1382 SETERRNO(ENOENT, LIB_INVARG);
1383 Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
1384 "Invalid \\0 character in %s for %s: %s\\0%s",
1385 what, op_name, pv, null_at+1);
1395 Return true if the supplied filename has a newline character
1396 immediately before the first (hopefully only) NUL.
1398 My original look at this incorrectly used the len from SvPV(), but
1399 that's incorrect, since we allow for a NUL in pv[len-1].
1401 So instead, strlen() and work from there.
1403 This allow for the user reading a filename, forgetting to chomp it,
1406 open my $foo, "$file\0";
1412 PERL_STATIC_INLINE bool
1413 S_should_warn_nl(const char *pv) {
1416 PERL_ARGS_ASSERT_SHOULD_WARN_NL;
1420 return len > 0 && pv[len-1] == '\n';
1425 /* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
1427 #define MAX_CHARSET_NAME_LENGTH 2
1429 PERL_STATIC_INLINE const char *
1430 get_regex_charset_name(const U32 flags, STRLEN* const lenp)
1432 /* Returns a string that corresponds to the name of the regex character set
1433 * given by 'flags', and *lenp is set the length of that string, which
1434 * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
1437 switch (get_regex_charset(flags)) {
1438 case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
1439 case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
1440 case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
1441 case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
1442 case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
1444 return ASCII_MORE_RESTRICT_PAT_MODS;
1446 /* The NOT_REACHED; hides an assert() which has a rather complex
1447 * definition in perl.h. */
1448 NOT_REACHED; /* NOTREACHED */
1449 return "?"; /* Unknown */
1454 Return false if any get magic is on the SV other than taint magic.
1458 PERL_STATIC_INLINE bool
1459 S_sv_only_taint_gmagic(SV *sv) {
1460 MAGIC *mg = SvMAGIC(sv);
1462 PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
1465 if (mg->mg_type != PERL_MAGIC_taint
1466 && !(mg->mg_flags & MGf_GSKIP)
1467 && mg->mg_virtual->svt_get) {
1470 mg = mg->mg_moremagic;
1476 /* ------------------ cop.h ------------------------------------------- */
1479 /* Enter a block. Push a new base context and return its address. */
1481 PERL_STATIC_INLINE PERL_CONTEXT *
1482 S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
1486 PERL_ARGS_ASSERT_CX_PUSHBLOCK;
1491 cx->blk_gimme = gimme;
1492 cx->blk_oldsaveix = saveix;
1493 cx->blk_oldsp = (I32)(sp - PL_stack_base);
1494 cx->blk_oldcop = PL_curcop;
1495 cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
1496 cx->blk_oldscopesp = PL_scopestack_ix;
1497 cx->blk_oldpm = PL_curpm;
1498 cx->blk_old_tmpsfloor = PL_tmps_floor;
1500 PL_tmps_floor = PL_tmps_ix;
1501 CX_DEBUG(cx, "PUSH");
1506 /* Exit a block (RETURN and LAST). */
1508 PERL_STATIC_INLINE void
1509 S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
1511 PERL_ARGS_ASSERT_CX_POPBLOCK;
1513 CX_DEBUG(cx, "POP");
1514 /* these 3 are common to cx_popblock and cx_topblock */
1515 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1516 PL_scopestack_ix = cx->blk_oldscopesp;
1517 PL_curpm = cx->blk_oldpm;
1519 /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
1520 * and leaves a CX entry lying around for repeated use, so
1521 * skip for multicall */ \
1522 assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
1523 || PL_savestack_ix == cx->blk_oldsaveix);
1524 PL_curcop = cx->blk_oldcop;
1525 PL_tmps_floor = cx->blk_old_tmpsfloor;
1528 /* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
1529 * Whereas cx_popblock() restores the state to the point just before
1530 * cx_pushblock() was called, cx_topblock() restores it to the point just
1531 * *after* cx_pushblock() was called. */
1533 PERL_STATIC_INLINE void
1534 S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
1536 PERL_ARGS_ASSERT_CX_TOPBLOCK;
1538 CX_DEBUG(cx, "TOP");
1539 /* these 3 are common to cx_popblock and cx_topblock */
1540 PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
1541 PL_scopestack_ix = cx->blk_oldscopesp;
1542 PL_curpm = cx->blk_oldpm;
1544 PL_stack_sp = PL_stack_base + cx->blk_oldsp;
1548 PERL_STATIC_INLINE void
1549 S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
1551 U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
1553 PERL_ARGS_ASSERT_CX_PUSHSUB;
1555 PERL_DTRACE_PROBE_ENTRY(cv);
1556 cx->blk_sub.cv = cv;
1557 cx->blk_sub.olddepth = CvDEPTH(cv);
1558 cx->blk_sub.prevcomppad = PL_comppad;
1559 cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
1560 cx->blk_sub.retop = retop;
1561 SvREFCNT_inc_simple_void_NN(cv);
1562 cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
1566 /* subsets of cx_popsub() */
1568 PERL_STATIC_INLINE void
1569 S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
1573 PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
1574 assert(CxTYPE(cx) == CXt_SUB);
1576 PL_comppad = cx->blk_sub.prevcomppad;
1577 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1578 cv = cx->blk_sub.cv;
1579 CvDEPTH(cv) = cx->blk_sub.olddepth;
1580 cx->blk_sub.cv = NULL;
1585 /* handle the @_ part of leaving a sub */
1587 PERL_STATIC_INLINE void
1588 S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
1592 PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
1593 assert(CxTYPE(cx) == CXt_SUB);
1594 assert(AvARRAY(MUTABLE_AV(
1595 PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
1596 CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
1598 CX_POP_SAVEARRAY(cx);
1599 av = MUTABLE_AV(PAD_SVl(0));
1600 if (UNLIKELY(AvREAL(av)))
1601 /* abandon @_ if it got reified */
1602 clear_defarray(av, 0);
1609 PERL_STATIC_INLINE void
1610 S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
1612 PERL_ARGS_ASSERT_CX_POPSUB;
1613 assert(CxTYPE(cx) == CXt_SUB);
1615 PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
1619 cx_popsub_common(cx);
1623 PERL_STATIC_INLINE void
1624 S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
1626 PERL_ARGS_ASSERT_CX_PUSHFORMAT;
1628 cx->blk_format.cv = cv;
1629 cx->blk_format.retop = retop;
1630 cx->blk_format.gv = gv;
1631 cx->blk_format.dfoutgv = PL_defoutgv;
1632 cx->blk_format.prevcomppad = PL_comppad;
1635 SvREFCNT_inc_simple_void_NN(cv);
1637 SvREFCNT_inc_void(cx->blk_format.dfoutgv);
1641 PERL_STATIC_INLINE void
1642 S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
1647 PERL_ARGS_ASSERT_CX_POPFORMAT;
1648 assert(CxTYPE(cx) == CXt_FORMAT);
1650 dfout = cx->blk_format.dfoutgv;
1652 cx->blk_format.dfoutgv = NULL;
1653 SvREFCNT_dec_NN(dfout);
1655 PL_comppad = cx->blk_format.prevcomppad;
1656 PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
1657 cv = cx->blk_format.cv;
1658 cx->blk_format.cv = NULL;
1660 SvREFCNT_dec_NN(cv);
1664 PERL_STATIC_INLINE void
1665 S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
1667 PERL_ARGS_ASSERT_CX_PUSHEVAL;
1669 cx->blk_eval.retop = retop;
1670 cx->blk_eval.old_namesv = namesv;
1671 cx->blk_eval.old_eval_root = PL_eval_root;
1672 cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
1673 cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
1674 cx->blk_eval.cur_top_env = PL_top_env;
1676 assert(!(PL_in_eval & ~ 0x3F));
1677 assert(!(PL_op->op_type & ~0x1FF));
1678 cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
1682 PERL_STATIC_INLINE void
1683 S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
1687 PERL_ARGS_ASSERT_CX_POPEVAL;
1688 assert(CxTYPE(cx) == CXt_EVAL);
1690 PL_in_eval = CxOLD_IN_EVAL(cx);
1691 assert(!(PL_in_eval & 0xc0));
1692 PL_eval_root = cx->blk_eval.old_eval_root;
1693 sv = cx->blk_eval.cur_text;
1694 if (sv && CxEVAL_TXT_REFCNTED(cx)) {
1695 cx->blk_eval.cur_text = NULL;
1696 SvREFCNT_dec_NN(sv);
1699 sv = cx->blk_eval.old_namesv;
1701 cx->blk_eval.old_namesv = NULL;
1702 SvREFCNT_dec_NN(sv);
1707 /* push a plain loop, i.e.
1709 * while (cond) { block }
1710 * for (init;cond;continue) { block }
1711 * This loop can be last/redo'ed etc.
1714 PERL_STATIC_INLINE void
1715 S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
1717 PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
1718 cx->blk_loop.my_op = cLOOP;
1722 /* push a true for loop, i.e.
1723 * for var (list) { block }
1726 PERL_STATIC_INLINE void
1727 S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
1729 PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
1731 /* this one line is common with cx_pushloop_plain */
1732 cx->blk_loop.my_op = cLOOP;
1734 cx->blk_loop.itervar_u.svp = (SV**)itervarp;
1735 cx->blk_loop.itersave = itersave;
1737 cx->blk_loop.oldcomppad = PL_comppad;
1742 /* pop all loop types, including plain */
1744 PERL_STATIC_INLINE void
1745 S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
1747 PERL_ARGS_ASSERT_CX_POPLOOP;
1749 assert(CxTYPE_is_LOOP(cx));
1750 if ( CxTYPE(cx) == CXt_LOOP_ARY
1751 || CxTYPE(cx) == CXt_LOOP_LAZYSV)
1753 /* Free ary or cur. This assumes that state_u.ary.ary
1754 * aligns with state_u.lazysv.cur. See cx_dup() */
1755 SV *sv = cx->blk_loop.state_u.lazysv.cur;
1756 cx->blk_loop.state_u.lazysv.cur = NULL;
1757 SvREFCNT_dec_NN(sv);
1758 if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
1759 sv = cx->blk_loop.state_u.lazysv.end;
1760 cx->blk_loop.state_u.lazysv.end = NULL;
1761 SvREFCNT_dec_NN(sv);
1764 if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
1766 SV **svp = (cx)->blk_loop.itervar_u.svp;
1767 if ((cx->cx_type & CXp_FOR_GV))
1768 svp = &GvSV((GV*)svp);
1770 *svp = cx->blk_loop.itersave;
1771 cx->blk_loop.itersave = NULL;
1772 SvREFCNT_dec(cursv);
1777 PERL_STATIC_INLINE void
1778 S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
1780 PERL_ARGS_ASSERT_CX_PUSHWHEN;
1782 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1786 PERL_STATIC_INLINE void
1787 S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
1789 PERL_ARGS_ASSERT_CX_POPWHEN;
1790 assert(CxTYPE(cx) == CXt_WHEN);
1792 PERL_UNUSED_ARG(cx);
1793 PERL_UNUSED_CONTEXT;
1794 /* currently NOOP */
1798 PERL_STATIC_INLINE void
1799 S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
1801 PERL_ARGS_ASSERT_CX_PUSHGIVEN;
1803 cx->blk_givwhen.leave_op = cLOGOP->op_other;
1804 cx->blk_givwhen.defsv_save = orig_defsv;
1808 PERL_STATIC_INLINE void
1809 S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
1813 PERL_ARGS_ASSERT_CX_POPGIVEN;
1814 assert(CxTYPE(cx) == CXt_GIVEN);
1816 sv = GvSV(PL_defgv);
1817 GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
1818 cx->blk_givwhen.defsv_save = NULL;
1822 /* ------------------ util.h ------------------------------------------- */
1825 =head1 Miscellaneous Functions
1829 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1831 case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
1832 match themselves and their opposite case counterparts. Non-cased and non-ASCII
1833 range bytes match only themselves.
1838 PERL_STATIC_INLINE I32
1839 Perl_foldEQ(const char *s1, const char *s2, I32 len)
1841 const U8 *a = (const U8 *)s1;
1842 const U8 *b = (const U8 *)s2;
1844 PERL_ARGS_ASSERT_FOLDEQ;
1849 if (*a != *b && *a != PL_fold[*b])
1856 PERL_STATIC_INLINE I32
1857 Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
1859 /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
1860 * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
1861 * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
1862 * does it check that the strings each have at least 'len' characters */
1864 const U8 *a = (const U8 *)s1;
1865 const U8 *b = (const U8 *)s2;
1867 PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
1872 if (*a != *b && *a != PL_fold_latin1[*b]) {
1881 =for apidoc foldEQ_locale
1883 Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
1884 same case-insensitively in the current locale; false otherwise.
1889 PERL_STATIC_INLINE I32
1890 Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
1893 const U8 *a = (const U8 *)s1;
1894 const U8 *b = (const U8 *)s2;
1896 PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
1901 if (*a != *b && *a != PL_fold_locale[*b])
1908 #if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
1910 PERL_STATIC_INLINE void *
1911 S_my_memrchr(const char * s, const char c, const STRLEN len)
1913 /* memrchr(), since many platforms lack it */
1915 const char * t = s + len - 1;
1917 PERL_ARGS_ASSERT_MY_MEMRCHR;
1932 * ex: set ts=8 sts=4 sw=4 et: