X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7cb3f9598b37fcd8b30ea273d668c8b48d5f4c76..62805098c7dbcf459cf5674ecbe5961ba4807178:/inline.h diff --git a/inline.h b/inline.h index 86e005d..98d9edc 100644 --- a/inline.h +++ b/inline.h @@ -25,6 +25,14 @@ S_av_top_index(pTHX_ AV *av) /* ------------------------------- cv.h ------------------------------- */ +PERL_STATIC_INLINE GV * +S_CvGV(pTHX_ CV *sv) +{ + return CvNAMED(sv) + ? Perl_cvgv_from_hek(aTHX_ sv) + : ((XPVCV*)MUTABLE_PTR(SvANY(sv)))->xcv_gv_u.xcv_gv; +} + PERL_STATIC_INLINE I32 * S_CvDEPTHp(const CV * const sv) { @@ -82,13 +90,73 @@ S_MgBYTEPOS(pTHX_ MAGIC *mg, SV *sv, const char *s, STRLEN len) } #endif +/* ------------------------------- pad.h ------------------------------ */ + +#if defined(PERL_IN_PAD_C) || defined(PERL_IN_OP_C) +PERL_STATIC_INLINE bool +PadnameIN_SCOPE(const PADNAME * const pn, const U32 seq) +{ + /* is seq within the range _LOW to _HIGH ? + * This is complicated by the fact that PL_cop_seqmax + * may have wrapped around at some point */ + if (COP_SEQ_RANGE_LOW(pn) == PERL_PADSEQ_INTRO) + return FALSE; /* not yet introduced */ + + if (COP_SEQ_RANGE_HIGH(pn) == PERL_PADSEQ_INTRO) { + /* in compiling scope */ + if ( + (seq > COP_SEQ_RANGE_LOW(pn)) + ? (seq - COP_SEQ_RANGE_LOW(pn) < (U32_MAX >> 1)) + : (COP_SEQ_RANGE_LOW(pn) - seq > (U32_MAX >> 1)) + ) + return TRUE; + } + else if ( + (COP_SEQ_RANGE_LOW(pn) > COP_SEQ_RANGE_HIGH(pn)) + ? + ( seq > COP_SEQ_RANGE_LOW(pn) + || seq <= COP_SEQ_RANGE_HIGH(pn)) + + : ( seq > COP_SEQ_RANGE_LOW(pn) + && seq <= COP_SEQ_RANGE_HIGH(pn)) + ) + return TRUE; + return FALSE; +} +#endif + +/* ------------------------------- pp.h ------------------------------- */ + +PERL_STATIC_INLINE I32 +S_TOPMARK(pTHX) +{ + DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, + "MARK top %p %" IVdf "\n", + PL_markstack_ptr, + (IV)*PL_markstack_ptr))); + return *PL_markstack_ptr; +} + +PERL_STATIC_INLINE I32 +S_POPMARK(pTHX) +{ + DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, + "MARK pop %p %" IVdf "\n", + (PL_markstack_ptr-1), + (IV)*(PL_markstack_ptr-1)))); + assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow"); + return *PL_markstack_ptr--; +} + /* ----------------------------- regexp.h ----------------------------- */ PERL_STATIC_INLINE struct regexp * S_ReANY(const REGEXP * const re) { + XPV* const p = (XPV*)SvANY(re); assert(isREGEXP(re)); - return re->sv_u.svu_rx; + return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx + : (struct regexp *)p; } /* ------------------------------- sv.h ------------------------------- */ @@ -148,27 +216,15 @@ SvAMAGIC_off(SV *sv) } PERL_STATIC_INLINE U32 -S_SvPADTMP_on(SV *sv) -{ - assert(!(SvFLAGS(sv) & SVs_PADMY)); - return SvFLAGS(sv) |= SVs_PADTMP; -} -PERL_STATIC_INLINE U32 -S_SvPADTMP_off(SV *sv) -{ - assert(!(SvFLAGS(sv) & SVs_PADMY)); - return SvFLAGS(sv) &= ~SVs_PADTMP; -} -PERL_STATIC_INLINE U32 S_SvPADSTALE_on(SV *sv) { - assert(SvFLAGS(sv) & SVs_PADMY); + assert(!(SvFLAGS(sv) & SVs_PADTMP)); return SvFLAGS(sv) |= SVs_PADSTALE; } PERL_STATIC_INLINE U32 S_SvPADSTALE_off(SV *sv) { - assert(SvFLAGS(sv) & SVs_PADMY); + assert(!(SvFLAGS(sv) & SVs_PADTMP)); return SvFLAGS(sv) &= ~SVs_PADSTALE; } #if defined(PERL_CORE) || defined (PERL_EXT) @@ -188,68 +244,24 @@ S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp) /* ------------------------------- handy.h ------------------------------- */ /* saves machine code for a common noreturn idiom typically used in Newx*() */ -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-function" +#ifdef GCC_DIAG_PRAGMA +GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */ #endif static void S_croak_memory_wrap(void) { Perl_croak_nocontext("%s",PL_memory_wrap); } -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -#ifdef BOOTSTRAP_CHARSET -static bool -S_bootstrap_ctype(U8 character, UV classnum, bool full_Latin1) -{ - /* See comments in handy.h. This is placed in this file primarily to avoid - * having to have an entry for it in embed.fnc */ - - dTHX; - - if (! full_Latin1 && ! isASCII(character)) { - return FALSE; - } - - switch (classnum) { - case _CC_ALPHANUMERIC: return isALPHANUMERIC_L1(character); - case _CC_ALPHA: return isALPHA_L1(character); - case _CC_ASCII: return isASCII_L1(character); - case _CC_BLANK: return isBLANK_L1(character); - case _CC_CASED: return isLOWER_L1(character) - || isUPPER_L1(character); - case _CC_CNTRL: return isCNTRL_L1(character); - case _CC_DIGIT: return isDIGIT_L1(character); - case _CC_GRAPH: return isGRAPH_L1(character); - case _CC_LOWER: return isLOWER_L1(character); - case _CC_PRINT: return isPRINT_L1(character); - case _CC_PSXSPC: return isPSXSPC_L1(character); - case _CC_PUNCT: return isPUNCT_L1(character); - case _CC_SPACE: return isSPACE_L1(character); - case _CC_UPPER: return isUPPER_L1(character); - case _CC_WORDCHAR: return isWORDCHAR_L1(character); - case _CC_XDIGIT: return isXDIGIT_L1(character); - case _CC_VERTSPACE: return isSPACE_L1(character) && ! isBLANK_L1(character); - case _CC_IDFIRST: return isIDFIRST_L1(character); - case _CC_QUOTEMETA: return _isQUOTEMETA(character); - case _CC_CHARNAME_CONT: return isCHARNAME_CONT(character); - case _CC_NONLATIN1_FOLD: return _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(character); - case _CC_NON_FINAL_FOLD: return _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(character); - case _CC_IS_IN_SOME_FOLD: return _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(character); - case _CC_BACKSLASH_FOO_LBRACE_IS_META: return 0; - - - default: break; - } - Perl_croak(aTHX_ "panic: bootstrap_ctype() has an unexpected character class '%" UVxf "'", classnum); -} +#ifdef GCC_DIAG_PRAGMA +GCC_DIAG_RESTORE /* Intentionally left semicolonless. */ #endif /* ------------------------------- utf8.h ------------------------------- */ +/* +=head1 Unicode Support +*/ + PERL_STATIC_INLINE void S_append_utf8_from_native_byte(const U8 byte, U8** dest) { @@ -259,63 +271,423 @@ S_append_utf8_from_native_byte(const U8 byte, U8** dest) PERL_ARGS_ASSERT_APPEND_UTF8_FROM_NATIVE_BYTE; if (NATIVE_BYTE_IS_INVARIANT(byte)) - *(*dest)++ = byte; + *((*dest)++) = byte; else { - *(*dest)++ = UTF8_EIGHT_BIT_HI(byte); - *(*dest)++ = UTF8_EIGHT_BIT_LO(byte); + *((*dest)++) = UTF8_EIGHT_BIT_HI(byte); + *((*dest)++) = UTF8_EIGHT_BIT_LO(byte); + } +} + +/* +=for apidoc valid_utf8_to_uvchr +Like C>, but should only be called when it is known that +the next character in the input UTF-8 string C is well-formed (I, +it passes C>. Surrogates, non-character code points, and +non-Unicode code points are allowed. + +=cut + + */ + +PERL_STATIC_INLINE UV +Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen) +{ + const UV expectlen = UTF8SKIP(s); + const U8* send = s + expectlen; + UV uv = *s; + + PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR; + + if (retlen) { + *retlen = expectlen; + } + + /* An invariant is trivially returned */ + if (expectlen == 1) { + return uv; + } + + /* Remove the leading bits that indicate the number of bytes, leaving just + * the bits that are part of the value */ + uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen); + + /* Now, loop through the remaining bytes, accumulating each into the + * working total as we go. (I khw tried unrolling the loop for up to 4 + * bytes, but there was no performance improvement) */ + for (++s; s < send; s++) { + uv = UTF8_ACCUMULATE(uv, *s); } + + return UNI_TO_NATIVE(uv); + } -/* These two exist only to replace the macros they formerly were so that their - * use can be deprecated */ +/* +=for apidoc is_utf8_invariant_string + +Returns TRUE if the first C bytes of the string C are the same +regardless of the UTF-8 encoding of the string (or UTF-EBCDIC encoding on +EBCDIC machines); otherwise it returns FALSE. That is, it returns TRUE if they +are UTF-8 invariant. On ASCII-ish machines, all the ASCII characters and only +the ASCII characters fit this definition. On EBCDIC machines, the ASCII-range +characters are invariant, but so also are the C1 controls. + +If C is 0, it will be calculated using C, (which means if you +use this option, that C can't have embedded C characters and has to +have a terminating C byte). + +See also +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +and +C>. + +=cut + +*/ + +#define is_utf8_invariant_string(s, len) \ + is_utf8_invariant_string_loc(s, len, NULL) + +/* +=for apidoc is_utf8_invariant_string_loc + +Like C> but upon failure, stores the location of +the first UTF-8 variant character in the C pointer; if all characters are +UTF-8 invariant, this function does not change the contents of C<*ep>. + +=cut + +*/ PERL_STATIC_INLINE bool -S_isIDFIRST_lazy(pTHX_ const char* p) +S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep) { - PERL_ARGS_ASSERT_ISIDFIRST_LAZY; + const U8* send; + const U8* x = s; + + PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC; + + if (len == 0) { + len = strlen((const char *)s); + } + + send = s + len; + +#ifndef EBCDIC + /* Try to get the widest word on this platform */ +# ifdef HAS_LONG_LONG +# define PERL_WORDCAST unsigned long long +# define PERL_WORDSIZE LONGLONGSIZE +# else +# define PERL_WORDCAST UV +# define PERL_WORDSIZE UVSIZE +# endif + +# if PERL_WORDSIZE == 4 +# define PERL_VARIANTS_WORD_MASK 0x80808080 +# define PERL_WORD_BOUNDARY_MASK 0x3 +# elif PERL_WORDSIZE == 8 +# define PERL_VARIANTS_WORD_MASK UINT64_C(0x8080808080808080) +# define PERL_WORD_BOUNDARY_MASK 0x7 +# else +# error Unexpected word size +# endif + + /* Process per-byte until reach word boundary. XXX This loop could be + * eliminated if we knew that this platform had fast unaligned reads */ + while (x < send && (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) { + if (! UTF8_IS_INVARIANT(*x)) { + if (ep) { + *ep = x; + } + + return FALSE; + } + x++; + } + + /* Process per-word as long as we have at least a full word left */ + while (x + PERL_WORDSIZE <= send) { + if ((* (PERL_WORDCAST *) x) & PERL_VARIANTS_WORD_MASK) { + + /* Found a variant. Just return if caller doesn't want its exact + * position */ + if (! ep) { + return FALSE; + } + + /* Otherwise fall into final loop to find which byte it is */ + break; + } + x += PERL_WORDSIZE; + } - return isIDFIRST_lazy_if(p,1); +# undef PERL_WORDCAST +# undef PERL_WORDSIZE +# undef PERL_WORD_BOUNDARY_MASK +# undef PERL_VARIANTS_WORD_MASK +#endif + + /* Process per-byte */ + while (x < send) { + if (! UTF8_IS_INVARIANT(*x)) { + if (ep) { + *ep = x; + } + + return FALSE; + } + + x++; + } + + return TRUE; } +/* +=for apidoc is_utf8_string + +Returns TRUE if the first C bytes of string C form a valid +Perl-extended-UTF-8 string; returns FALSE otherwise. If C is 0, it will +be calculated using C (which means if you use this option, that C +can't have embedded C characters and has to have a terminating C +byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'. + +This function considers Perl's extended UTF-8 to be valid. That means that +code points above Unicode, surrogates, and non-character code points are +considered valid by this function. Use C>, +C>, or C> to restrict what +code points are considered valid. + +See also +C>, +C>, +C>, +C>, +C>, +C>, +C>, + +=cut +*/ + +#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL) + +#if defined(PERL_CORE) || defined (PERL_EXT) + +/* +=for apidoc is_utf8_non_invariant_string + +Returns TRUE if L returns FALSE for the first +C bytes of the string C, but they are, nonetheless, legal Perl-extended +UTF-8; otherwise returns FALSE. + +A TRUE return means that at least one code point represented by the sequence +either is a wide character not representable as a single byte, or the +representation differs depending on whether the sequence is encoded in UTF-8 or +not. + +See also +C>, +C> + +=cut + +This is commonly used to determine if a SV's UTF-8 flag should be turned on. +It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if +it otherwise contains invalid UTF-8. + +It is an internal function because khw thinks that XS code shouldn't be working +at this low a level. A valid use case could change that. + +*/ + PERL_STATIC_INLINE bool -S_isALNUM_lazy(pTHX_ const char* p) +S_is_utf8_non_invariant_string(const U8* const s, STRLEN len) { - PERL_ARGS_ASSERT_ISALNUM_LAZY; + const U8 * first_variant; + + PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING; + + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + return FALSE; + } - return isALNUM_lazy_if(p,1); + return is_utf8_string(first_variant, len - (first_variant - s)); } -/* ------------------------------- perl.h ----------------------------- */ +#endif /* -=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name +=for apidoc is_strict_utf8_string -Test that the given C doesn't contain any internal NUL characters. -If it does, set C to ENOENT, optionally warn, and return FALSE. +Returns TRUE if the first C bytes of string C form a valid +UTF-8-encoded string that is fully interchangeable by any application using +Unicode rules; otherwise it returns FALSE. If C is 0, it will be +calculated using C (which means if you use this option, that C +can't have embedded C characters and has to have a terminating C +byte). Note that all characters being ASCII constitute 'a valid UTF-8 string'. -Return TRUE if the name is safe. +This function returns FALSE for strings containing any +code points above the Unicode max of 0x10FFFF, surrogate code points, or +non-character code points. + +See also +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +and +C>. + +=cut +*/ + +#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL) + +/* +=for apidoc is_c9strict_utf8_string + +Returns TRUE if the first C bytes of string C form a valid +UTF-8-encoded string that conforms to +L; +otherwise it returns FALSE. If C is 0, it will be calculated using +C (which means if you use this option, that C can't have embedded +C characters and has to have a terminating C byte). Note that all +characters being ASCII constitute 'a valid UTF-8 string'. + +This function returns FALSE for strings containing any code points above the +Unicode max of 0x10FFFF or surrogate code points, but accepts non-character +code points per +L. + +See also +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +and +C>. + +=cut +*/ + +#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0) + +/* +=for apidoc is_utf8_string_flags + +Returns TRUE if the first C bytes of string C form a valid +UTF-8 string, subject to the restrictions imposed by C; +returns FALSE otherwise. If C is 0, it will be calculated +using C (which means if you use this option, that C can't have +embedded C characters and has to have a terminating C byte). Note +that all characters being ASCII constitute 'a valid UTF-8 string'. + +If C is 0, this gives the same results as C>; if +C is C, this gives the same results +as C>; and if C is +C, this gives the same results as +C>. Otherwise C may be any +combination of the C> flags understood by +C>, with the same meanings. -Used by the IS_SAFE_SYSCALL() macro. +See also +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +C>, +and +C>. =cut */ PERL_STATIC_INLINE bool -S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) { - /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs - * perl itself uses xce*() functions which accept 8-bit strings. - */ +S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags) +{ + const U8 * first_variant; - PERL_ARGS_ASSERT_IS_SAFE_SYSCALL; + PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS; + assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE + |UTF8_DISALLOW_PERL_EXTENDED))); - if (pv && len > 1) { - char *null_at; - if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) { - SETERRNO(ENOENT, LIB_INVARG); - Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS), - "Invalid \\0 character in %s for %s: %s\\0%s", - what, op_name, pv, null_at+1); + if (len == 0) { + len = strlen((const char *)s); + } + + if (flags == 0) { + return is_utf8_string(s, len); + } + + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) + == UTF8_DISALLOW_ILLEGAL_INTERCHANGE) + { + return is_strict_utf8_string(s, len); + } + + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) + == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE) + { + return is_c9strict_utf8_string(s, len); + } + + if (! is_utf8_invariant_string_loc(s, len, &first_variant)) { + const U8* const send = s + len; + const U8* x = first_variant; + + while (x < send) { + STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags); + if (UNLIKELY(! cur_len)) { return FALSE; + } + x += cur_len; } } @@ -324,42 +696,1225 @@ S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char /* -Return true if the supplied filename has a newline character -immediately before the final NUL. +=for apidoc is_utf8_string_loc -My original look at this incorrectly used the len from SvPV(), but -that's incorrect, since we allow for a NUL in pv[len-1]. +Like C> but stores the location of the failure (in the +case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer. -So instead, strlen() and work from there. +See also C>. -This allow for the user reading a filename, forgetting to chomp it, -then calling: +=cut +*/ - open my $foo, "$file\0"; +#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0) + +/* +=for apidoc is_utf8_string_loclen + +Like C> but stores the location of the failure (in the +case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer, and the number of UTF-8 +encoded characters in the C pointer. + +See also C>. + +=cut */ -#ifdef PERL_CORE +PERL_STATIC_INLINE bool +Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el) +{ + const U8 * first_variant; + + PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN; + + if (len == 0) { + len = strlen((const char *) s); + } + + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; + + if (ep) { + *ep = s + len; + } + + return TRUE; + } + + { + const U8* const send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isUTF8_CHAR(x, send); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } + + if (el) + *el = outlen; + + if (ep) { + *ep = x; + } + + return (x == send); + } +} + +/* + +=for apidoc is_strict_utf8_string_loc + +Like C> but stores the location of the failure (in the +case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer. + +See also C>. + +=cut +*/ + +#define is_strict_utf8_string_loc(s, len, ep) \ + is_strict_utf8_string_loclen(s, len, ep, 0) + +/* + +=for apidoc is_strict_utf8_string_loclen + +Like C> but stores the location of the failure (in the +case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer, and the number of UTF-8 +encoded characters in the C pointer. + +See also C>. + +=cut +*/ PERL_STATIC_INLINE bool -S_should_warn_nl(const char *pv) { - STRLEN len; +S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el) +{ + const U8 * first_variant; - PERL_ARGS_ASSERT_SHOULD_WARN_NL; + PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN; - len = strlen(pv); + if (len == 0) { + len = strlen((const char *) s); + } - return len > 0 && pv[len-1] == '\n'; + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; + + if (ep) { + *ep = s + len; + } + + return TRUE; + } + + { + const U8* const send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } + + if (el) + *el = outlen; + + if (ep) { + *ep = x; + } + + return (x == send); + } } -#endif +/* + +=for apidoc is_c9strict_utf8_string_loc + +Like C> but stores the location of the failure (in +the case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer. + +See also C>. + +=cut +*/ + +#define is_c9strict_utf8_string_loc(s, len, ep) \ + is_c9strict_utf8_string_loclen(s, len, ep, 0) + +/* + +=for apidoc is_c9strict_utf8_string_loclen + +Like C> but stores the location of the failure (in +the case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer, and the number of UTF-8 encoded +characters in the C pointer. + +See also C>. + +=cut +*/ + +PERL_STATIC_INLINE bool +S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el) +{ + const U8 * first_variant; + + PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN; + + if (len == 0) { + len = strlen((const char *) s); + } + + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; + + if (ep) { + *ep = s + len; + } + + return TRUE; + } + + { + const U8* const send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } + + if (el) + *el = outlen; + + if (ep) { + *ep = x; + } + + return (x == send); + } +} + +/* + +=for apidoc is_utf8_string_loc_flags + +Like C> but stores the location of the failure (in the +case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer. + +See also C>. + +=cut +*/ + +#define is_utf8_string_loc_flags(s, len, ep, flags) \ + is_utf8_string_loclen_flags(s, len, ep, 0, flags) + + +/* The above 3 actual functions could have been moved into the more general one + * just below, and made #defines that call it with the right 'flags'. They are + * currently kept separate to increase their chances of getting inlined */ + +/* + +=for apidoc is_utf8_string_loclen_flags + +Like C> but stores the location of the failure (in the +case of "utf8ness failure") or the location C+C (in the case of +"utf8ness success") in the C pointer, and the number of UTF-8 +encoded characters in the C pointer. + +See also C>. + +=cut +*/ + +PERL_STATIC_INLINE bool +S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags) +{ + const U8 * first_variant; + + PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS; + assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE + |UTF8_DISALLOW_PERL_EXTENDED))); + + if (len == 0) { + len = strlen((const char *) s); + } + + if (flags == 0) { + return is_utf8_string_loclen(s, len, ep, el); + } + + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) + == UTF8_DISALLOW_ILLEGAL_INTERCHANGE) + { + return is_strict_utf8_string_loclen(s, len, ep, el); + } + + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) + == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE) + { + return is_c9strict_utf8_string_loclen(s, len, ep, el); + } + + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; + + if (ep) { + *ep = s + len; + } + + return TRUE; + } + + { + const U8* send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } + + if (el) + *el = outlen; + + if (ep) { + *ep = x; + } + + return (x == send); + } +} + +/* +=for apidoc utf8_distance + +Returns the number of UTF-8 characters between the UTF-8 pointers C +and C. + +WARNING: use only if you *know* that the pointers point inside the +same UTF-8 buffer. + +=cut +*/ + +PERL_STATIC_INLINE IV +Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b) +{ + PERL_ARGS_ASSERT_UTF8_DISTANCE; + + return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a); +} + +/* +=for apidoc utf8_hop + +Return the UTF-8 pointer C displaced by C characters, either +forward or backward. + +WARNING: do not use the following unless you *know* C is within +the UTF-8 data pointed to by C *and* that on entry C is aligned +on the first byte of character or just after the last byte of a character. + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop(const U8 *s, SSize_t off) +{ + PERL_ARGS_ASSERT_UTF8_HOP; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + if (off >= 0) { + while (off--) + s += UTF8SKIP(s); + } + else { + while (off++) { + s--; + while (UTF8_IS_CONTINUATION(*s)) + s--; + } + } + GCC_DIAG_IGNORE(-Wcast-qual); + return (U8 *)s; + GCC_DIAG_RESTORE; +} + +/* +=for apidoc utf8_hop_forward + +Return the UTF-8 pointer C displaced by up to C characters, +forward. + +C must be non-negative. + +C must be before or equal to C. + +When moving forward it will not move beyond C. + +Will not exceed this limit even if the string is not valid "UTF-8". + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end) +{ + PERL_ARGS_ASSERT_UTF8_HOP_FORWARD; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + assert(s <= end); + assert(off >= 0); + + while (off--) { + STRLEN skip = UTF8SKIP(s); + if ((STRLEN)(end - s) <= skip) { + GCC_DIAG_IGNORE(-Wcast-qual); + return (U8 *)end; + GCC_DIAG_RESTORE; + } + s += skip; + } + + GCC_DIAG_IGNORE(-Wcast-qual); + return (U8 *)s; + GCC_DIAG_RESTORE; +} + +/* +=for apidoc utf8_hop_back + +Return the UTF-8 pointer C displaced by up to C characters, +backward. + +C must be non-positive. + +C must be after or equal to C. + +When moving backward it will not move before C. + +Will not exceed this limit even if the string is not valid "UTF-8". + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start) +{ + PERL_ARGS_ASSERT_UTF8_HOP_BACK; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + assert(start <= s); + assert(off <= 0); + + while (off++ && s > start) { + s--; + while (UTF8_IS_CONTINUATION(*s) && s > start) + s--; + } + + GCC_DIAG_IGNORE(-Wcast-qual); + return (U8 *)s; + GCC_DIAG_RESTORE; +} + +/* +=for apidoc utf8_hop_safe + +Return the UTF-8 pointer C displaced by up to C characters, +either forward or backward. + +When moving backward it will not move before C. + +When moving forward it will not move beyond C. + +Will not exceed those limits even if the string is not valid "UTF-8". + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end) +{ + PERL_ARGS_ASSERT_UTF8_HOP_SAFE; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + assert(start <= s && s <= end); + + if (off >= 0) { + return utf8_hop_forward(s, off, end); + } + else { + return utf8_hop_back(s, off, start); + } +} + +/* + +=for apidoc is_utf8_valid_partial_char + +Returns 0 if the sequence of bytes starting at C and looking no further than +S> is the UTF-8 encoding, as extended by Perl, for one or more code +points. Otherwise, it returns 1 if there exists at least one non-empty +sequence of bytes that when appended to sequence C, starting at position +C causes the entire sequence to be the well-formed UTF-8 of some code point; +otherwise returns 0. + +In other words this returns TRUE if C points to a partial UTF-8-encoded code +point. + +This is useful when a fixed-length buffer is being tested for being well-formed +UTF-8, but the final few bytes in it don't comprise a full character; that is, +it is split somewhere in the middle of the final code point's UTF-8 +representation. (Presumably when the buffer is refreshed with the next chunk +of data, the new first bytes will complete the partial code point.) This +function is used to verify that the final bytes in the current buffer are in +fact the legal beginning of some code point, so that if they aren't, the +failure can be signalled without having to wait for the next read. + +=cut +*/ +#define is_utf8_valid_partial_char(s, e) \ + is_utf8_valid_partial_char_flags(s, e, 0) + +/* + +=for apidoc is_utf8_valid_partial_char_flags + +Like C>, it returns a boolean giving whether +or not the input is a valid UTF-8 encoded partial character, but it takes an +extra parameter, C, which can further restrict which code points are +considered valid. + +If C is 0, this behaves identically to +C>. Otherwise C can be any combination +of the C> flags accepted by C>. If +there is any sequence of bytes that can complete the input partial character in +such a way that a non-prohibited character is formed, the function returns +TRUE; otherwise FALSE. Non character code points cannot be determined based on +partial character input. But many of the other possible excluded types can be +determined from just the first one or two bytes. + +=cut + */ + +PERL_STATIC_INLINE bool +S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags) +{ + PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS; + + assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE + |UTF8_DISALLOW_PERL_EXTENDED))); + + if (s >= e || s + UTF8SKIP(s) <= e) { + return FALSE; + } + + return cBOOL(_is_utf8_char_helper(s, e, flags)); +} + +/* + +=for apidoc is_utf8_fixed_width_buf_flags + +Returns TRUE if the fixed-width buffer starting at C with length C +is entirely valid UTF-8, subject to the restrictions given by C; +otherwise it returns FALSE. + +If C is 0, any well-formed UTF-8, as extended by Perl, is accepted +without restriction. If the final few bytes of the buffer do not form a +complete code point, this will return TRUE anyway, provided that +C> returns TRUE for them. + +If C in non-zero, it can be any combination of the +C> flags accepted by C>, and with the +same meanings. + +This function differs from C> only in that the latter +returns FALSE if the final few bytes of the string don't form a complete code +point. + +=cut + */ +#define is_utf8_fixed_width_buf_flags(s, len, flags) \ + is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags) + +/* + +=for apidoc is_utf8_fixed_width_buf_loc_flags + +Like C> but stores the location of the +failure in the C pointer. If the function returns TRUE, C<*ep> will point +to the beginning of any partial character at the end of the buffer; if there is +no partial character C<*ep> will contain C+C. + +See also C>. + +=cut +*/ + +#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \ + is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags) + +/* + +=for apidoc is_utf8_fixed_width_buf_loclen_flags + +Like C> but stores the number of +complete, valid characters found in the C pointer. + +=cut +*/ + +PERL_STATIC_INLINE bool +S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s, + STRLEN len, + const U8 **ep, + STRLEN *el, + const U32 flags) +{ + const U8 * maybe_partial; + + PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS; + + if (! ep) { + ep = &maybe_partial; + } + + /* If it's entirely valid, return that; otherwise see if the only error is + * that the final few bytes are for a partial character */ + return is_utf8_string_loclen_flags(s, len, ep, el, flags) + || is_utf8_valid_partial_char_flags(*ep, s + len, flags); +} + +/* ------------------------------- perl.h ----------------------------- */ + +/* +=head1 Miscellaneous Functions + +=for apidoc AiR|bool|is_safe_syscall|const char *pv|STRLEN len|const char *what|const char *op_name + +Test that the given C doesn't contain any internal C characters. +If it does, set C to C, optionally warn, and return FALSE. + +Return TRUE if the name is safe. + +Used by the C macro. + +=cut +*/ + +PERL_STATIC_INLINE bool +S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) { + /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs + * perl itself uses xce*() functions which accept 8-bit strings. + */ + + PERL_ARGS_ASSERT_IS_SAFE_SYSCALL; + + if (len > 1) { + char *null_at; + if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) { + SETERRNO(ENOENT, LIB_INVARG); + Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS), + "Invalid \\0 character in %s for %s: %s\\0%s", + what, op_name, pv, null_at+1); + return FALSE; + } + } + + return TRUE; +} + +/* + +Return true if the supplied filename has a newline character +immediately before the first (hopefully only) NUL. + +My original look at this incorrectly used the len from SvPV(), but +that's incorrect, since we allow for a NUL in pv[len-1]. + +So instead, strlen() and work from there. + +This allow for the user reading a filename, forgetting to chomp it, +then calling: + + open my $foo, "$file\0"; + +*/ + +#ifdef PERL_CORE + +PERL_STATIC_INLINE bool +S_should_warn_nl(const char *pv) { + STRLEN len; + + PERL_ARGS_ASSERT_SHOULD_WARN_NL; + + len = strlen(pv); + + return len > 0 && pv[len-1] == '\n'; +} + +#endif + +/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */ + +#define MAX_CHARSET_NAME_LENGTH 2 + +PERL_STATIC_INLINE const char * +get_regex_charset_name(const U32 flags, STRLEN* const lenp) +{ + /* Returns a string that corresponds to the name of the regex character set + * given by 'flags', and *lenp is set the length of that string, which + * cannot exceed MAX_CHARSET_NAME_LENGTH characters */ + + *lenp = 1; + switch (get_regex_charset(flags)) { + case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS; + case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS; + case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS; + case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS; + case REGEX_ASCII_MORE_RESTRICTED_CHARSET: + *lenp = 2; + return ASCII_MORE_RESTRICT_PAT_MODS; + } + /* The NOT_REACHED; hides an assert() which has a rather complex + * definition in perl.h. */ + NOT_REACHED; /* NOTREACHED */ + return "?"; /* Unknown */ +} + +/* + +Return false if any get magic is on the SV other than taint magic. + +*/ + +PERL_STATIC_INLINE bool +S_sv_only_taint_gmagic(SV *sv) { + MAGIC *mg = SvMAGIC(sv); + + PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC; + + while (mg) { + if (mg->mg_type != PERL_MAGIC_taint + && !(mg->mg_flags & MGf_GSKIP) + && mg->mg_virtual->svt_get) { + return FALSE; + } + mg = mg->mg_moremagic; + } + + return TRUE; +} + +/* ------------------ cop.h ------------------------------------------- */ + + +/* Enter a block. Push a new base context and return its address. */ + +PERL_STATIC_INLINE PERL_CONTEXT * +S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix) +{ + PERL_CONTEXT * cx; + + PERL_ARGS_ASSERT_CX_PUSHBLOCK; + + CXINC; + cx = CX_CUR(); + cx->cx_type = type; + cx->blk_gimme = gimme; + cx->blk_oldsaveix = saveix; + cx->blk_oldsp = (I32)(sp - PL_stack_base); + cx->blk_oldcop = PL_curcop; + cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack); + cx->blk_oldscopesp = PL_scopestack_ix; + cx->blk_oldpm = PL_curpm; + cx->blk_old_tmpsfloor = PL_tmps_floor; + + PL_tmps_floor = PL_tmps_ix; + CX_DEBUG(cx, "PUSH"); + return cx; +} + + +/* Exit a block (RETURN and LAST). */ + +PERL_STATIC_INLINE void +S_cx_popblock(pTHX_ PERL_CONTEXT *cx) +{ + PERL_ARGS_ASSERT_CX_POPBLOCK; + + CX_DEBUG(cx, "POP"); + /* these 3 are common to cx_popblock and cx_topblock */ + PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp; + PL_scopestack_ix = cx->blk_oldscopesp; + PL_curpm = cx->blk_oldpm; + + /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats + * and leaves a CX entry lying around for repeated use, so + * skip for multicall */ \ + assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx)) + || PL_savestack_ix == cx->blk_oldsaveix); + PL_curcop = cx->blk_oldcop; + PL_tmps_floor = cx->blk_old_tmpsfloor; +} + +/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO). + * Whereas cx_popblock() restores the state to the point just before + * cx_pushblock() was called, cx_topblock() restores it to the point just + * *after* cx_pushblock() was called. */ + +PERL_STATIC_INLINE void +S_cx_topblock(pTHX_ PERL_CONTEXT *cx) +{ + PERL_ARGS_ASSERT_CX_TOPBLOCK; + + CX_DEBUG(cx, "TOP"); + /* these 3 are common to cx_popblock and cx_topblock */ + PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp; + PL_scopestack_ix = cx->blk_oldscopesp; + PL_curpm = cx->blk_oldpm; + + PL_stack_sp = PL_stack_base + cx->blk_oldsp; +} + + +PERL_STATIC_INLINE void +S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs) +{ + U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub); + + PERL_ARGS_ASSERT_CX_PUSHSUB; + + PERL_DTRACE_PROBE_ENTRY(cv); + cx->blk_sub.cv = cv; + cx->blk_sub.olddepth = CvDEPTH(cv); + cx->blk_sub.prevcomppad = PL_comppad; + cx->cx_type |= (hasargs) ? CXp_HASARGS : 0; + cx->blk_sub.retop = retop; + SvREFCNT_inc_simple_void_NN(cv); + cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF); +} + + +/* subsets of cx_popsub() */ + +PERL_STATIC_INLINE void +S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx) +{ + CV *cv; + + PERL_ARGS_ASSERT_CX_POPSUB_COMMON; + assert(CxTYPE(cx) == CXt_SUB); + + PL_comppad = cx->blk_sub.prevcomppad; + PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL; + cv = cx->blk_sub.cv; + CvDEPTH(cv) = cx->blk_sub.olddepth; + cx->blk_sub.cv = NULL; + SvREFCNT_dec(cv); +} + + +/* handle the @_ part of leaving a sub */ + +PERL_STATIC_INLINE void +S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx) +{ + AV *av; + + PERL_ARGS_ASSERT_CX_POPSUB_ARGS; + assert(CxTYPE(cx) == CXt_SUB); + assert(AvARRAY(MUTABLE_AV( + PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[ + CvDEPTH(cx->blk_sub.cv)])) == PL_curpad); + + CX_POP_SAVEARRAY(cx); + av = MUTABLE_AV(PAD_SVl(0)); + if (UNLIKELY(AvREAL(av))) + /* abandon @_ if it got reified */ + clear_defarray(av, 0); + else { + CLEAR_ARGARRAY(av); + } +} + + +PERL_STATIC_INLINE void +S_cx_popsub(pTHX_ PERL_CONTEXT *cx) +{ + PERL_ARGS_ASSERT_CX_POPSUB; + assert(CxTYPE(cx) == CXt_SUB); + + PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv); + + if (CxHASARGS(cx)) + cx_popsub_args(cx); + cx_popsub_common(cx); +} + + +PERL_STATIC_INLINE void +S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv) +{ + PERL_ARGS_ASSERT_CX_PUSHFORMAT; + + cx->blk_format.cv = cv; + cx->blk_format.retop = retop; + cx->blk_format.gv = gv; + cx->blk_format.dfoutgv = PL_defoutgv; + cx->blk_format.prevcomppad = PL_comppad; + cx->blk_u16 = 0; + + SvREFCNT_inc_simple_void_NN(cv); + CvDEPTH(cv)++; + SvREFCNT_inc_void(cx->blk_format.dfoutgv); +} + + +PERL_STATIC_INLINE void +S_cx_popformat(pTHX_ PERL_CONTEXT *cx) +{ + CV *cv; + GV *dfout; + + PERL_ARGS_ASSERT_CX_POPFORMAT; + assert(CxTYPE(cx) == CXt_FORMAT); + + dfout = cx->blk_format.dfoutgv; + setdefout(dfout); + cx->blk_format.dfoutgv = NULL; + SvREFCNT_dec_NN(dfout); + + PL_comppad = cx->blk_format.prevcomppad; + PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL; + cv = cx->blk_format.cv; + cx->blk_format.cv = NULL; + --CvDEPTH(cv); + SvREFCNT_dec_NN(cv); +} + + +PERL_STATIC_INLINE void +S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv) +{ + PERL_ARGS_ASSERT_CX_PUSHEVAL; + + cx->blk_eval.retop = retop; + cx->blk_eval.old_namesv = namesv; + cx->blk_eval.old_eval_root = PL_eval_root; + cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL; + cx->blk_eval.cv = NULL; /* later set by doeval_compile() */ + cx->blk_eval.cur_top_env = PL_top_env; + + assert(!(PL_in_eval & ~ 0x3F)); + assert(!(PL_op->op_type & ~0x1FF)); + cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7); +} + + +PERL_STATIC_INLINE void +S_cx_popeval(pTHX_ PERL_CONTEXT *cx) +{ + SV *sv; + + PERL_ARGS_ASSERT_CX_POPEVAL; + assert(CxTYPE(cx) == CXt_EVAL); + + PL_in_eval = CxOLD_IN_EVAL(cx); + assert(!(PL_in_eval & 0xc0)); + PL_eval_root = cx->blk_eval.old_eval_root; + sv = cx->blk_eval.cur_text; + if (sv && CxEVAL_TXT_REFCNTED(cx)) { + cx->blk_eval.cur_text = NULL; + SvREFCNT_dec_NN(sv); + } + + sv = cx->blk_eval.old_namesv; + if (sv) { + cx->blk_eval.old_namesv = NULL; + SvREFCNT_dec_NN(sv); + } +} + + +/* push a plain loop, i.e. + * { block } + * while (cond) { block } + * for (init;cond;continue) { block } + * This loop can be last/redo'ed etc. + */ + +PERL_STATIC_INLINE void +S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx) +{ + PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN; + cx->blk_loop.my_op = cLOOP; +} + + +/* push a true for loop, i.e. + * for var (list) { block } + */ + +PERL_STATIC_INLINE void +S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave) +{ + PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR; + + /* this one line is common with cx_pushloop_plain */ + cx->blk_loop.my_op = cLOOP; + + cx->blk_loop.itervar_u.svp = (SV**)itervarp; + cx->blk_loop.itersave = itersave; +#ifdef USE_ITHREADS + cx->blk_loop.oldcomppad = PL_comppad; +#endif +} + + +/* pop all loop types, including plain */ + +PERL_STATIC_INLINE void +S_cx_poploop(pTHX_ PERL_CONTEXT *cx) +{ + PERL_ARGS_ASSERT_CX_POPLOOP; + + assert(CxTYPE_is_LOOP(cx)); + if ( CxTYPE(cx) == CXt_LOOP_ARY + || CxTYPE(cx) == CXt_LOOP_LAZYSV) + { + /* Free ary or cur. This assumes that state_u.ary.ary + * aligns with state_u.lazysv.cur. See cx_dup() */ + SV *sv = cx->blk_loop.state_u.lazysv.cur; + cx->blk_loop.state_u.lazysv.cur = NULL; + SvREFCNT_dec_NN(sv); + if (CxTYPE(cx) == CXt_LOOP_LAZYSV) { + sv = cx->blk_loop.state_u.lazysv.end; + cx->blk_loop.state_u.lazysv.end = NULL; + SvREFCNT_dec_NN(sv); + } + } + if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) { + SV *cursv; + SV **svp = (cx)->blk_loop.itervar_u.svp; + if ((cx->cx_type & CXp_FOR_GV)) + svp = &GvSV((GV*)svp); + cursv = *svp; + *svp = cx->blk_loop.itersave; + cx->blk_loop.itersave = NULL; + SvREFCNT_dec(cursv); + } +} + + +PERL_STATIC_INLINE void +S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx) +{ + PERL_ARGS_ASSERT_CX_PUSHWHEN; + + cx->blk_givwhen.leave_op = cLOGOP->op_other; +} + + +PERL_STATIC_INLINE void +S_cx_popwhen(pTHX_ PERL_CONTEXT *cx) +{ + PERL_ARGS_ASSERT_CX_POPWHEN; + assert(CxTYPE(cx) == CXt_WHEN); + + PERL_UNUSED_ARG(cx); + PERL_UNUSED_CONTEXT; + /* currently NOOP */ +} + + +PERL_STATIC_INLINE void +S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv) +{ + PERL_ARGS_ASSERT_CX_PUSHGIVEN; + + cx->blk_givwhen.leave_op = cLOGOP->op_other; + cx->blk_givwhen.defsv_save = orig_defsv; +} + + +PERL_STATIC_INLINE void +S_cx_popgiven(pTHX_ PERL_CONTEXT *cx) +{ + SV *sv; + + PERL_ARGS_ASSERT_CX_POPGIVEN; + assert(CxTYPE(cx) == CXt_GIVEN); + + sv = GvSV(PL_defgv); + GvSV(PL_defgv) = cx->blk_givwhen.defsv_save; + cx->blk_givwhen.defsv_save = NULL; + SvREFCNT_dec(sv); +} + +/* ------------------ util.h ------------------------------------------- */ + +/* +=head1 Miscellaneous Functions + +=for apidoc foldEQ + +Returns true if the leading C bytes of the strings C and C are the +same +case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes +match themselves and their opposite case counterparts. Non-cased and non-ASCII +range bytes match only themselves. + +=cut +*/ + +PERL_STATIC_INLINE I32 +Perl_foldEQ(const char *s1, const char *s2, I32 len) +{ + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold[*b]) + return 0; + a++,b++; + } + return 1; +} + +PERL_STATIC_INLINE I32 +Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len) +{ + /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on + * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor + * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor + * does it check that the strings each have at least 'len' characters */ + + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ_LATIN1; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold_latin1[*b]) { + return 0; + } + a++, b++; + } + return 1; +} + +/* +=for apidoc foldEQ_locale + +Returns true if the leading C bytes of the strings C and C are the +same case-insensitively in the current locale; false otherwise. + +=cut +*/ + +PERL_STATIC_INLINE I32 +Perl_foldEQ_locale(const char *s1, const char *s2, I32 len) +{ + dVAR; + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ_LOCALE; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold_locale[*b]) + return 0; + a++,b++; + } + return 1; +} + +#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT)) + +PERL_STATIC_INLINE void * +S_my_memrchr(const char * s, const char c, const STRLEN len) +{ + /* memrchr(), since many platforms lack it */ + + const char * t = s + len - 1; + + PERL_ARGS_ASSERT_MY_MEMRCHR; + + while (t >= s) { + if (*t == c) { + return (void *) t; + } + t--; + } + + return NULL; +} + +#endif /* - * Local variables: - * c-indentation-style: bsd - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * * ex: set ts=8 sts=4 sw=4 et: */