X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/8bc127bf58304a1e46a3e33d30b0b8b6f21abb07..5149e17891a64b8b39440a943d065e188e327008:/inline.h?ds=sidebyside diff --git a/inline.h b/inline.h index 66ba348..1abee4f 100644 --- a/inline.h +++ b/inline.h @@ -131,7 +131,7 @@ PERL_STATIC_INLINE I32 S_TOPMARK(pTHX) { DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, - "MARK top %p %"IVdf"\n", + "MARK top %p %" IVdf "\n", PL_markstack_ptr, (IV)*PL_markstack_ptr))); return *PL_markstack_ptr; @@ -141,7 +141,7 @@ PERL_STATIC_INLINE I32 S_POPMARK(pTHX) { DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log, - "MARK pop %p %"IVdf"\n", + "MARK pop %p %" IVdf "\n", (PL_markstack_ptr-1), (IV)*(PL_markstack_ptr-1)))); assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow"); @@ -153,8 +153,10 @@ S_POPMARK(pTHX) PERL_STATIC_INLINE struct regexp * S_ReANY(const REGEXP * const re) { + XPV* const p = (XPV*)SvANY(re); assert(isREGEXP(re)); - return re->sv_u.svu_rx; + return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx + : (struct regexp *)p; } /* ------------------------------- sv.h ------------------------------- */ @@ -242,17 +244,13 @@ S_sv_or_pv_pos_u2b(pTHX_ SV *sv, const char *pv, STRLEN pos, STRLEN *lenp) /* ------------------------------- handy.h ------------------------------- */ /* saves machine code for a common noreturn idiom typically used in Newx*() */ -#ifdef GCC_DIAG_PRAGMA -GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */ -#endif +GCC_DIAG_IGNORE_DECL(-Wunused-function); static void S_croak_memory_wrap(void) { Perl_croak_nocontext("%s",PL_memory_wrap); } -#ifdef GCC_DIAG_PRAGMA -GCC_DIAG_RESTORE /* Intentionally left semicolonless. */ -#endif +GCC_DIAG_RESTORE_DECL; /* ------------------------------- utf8.h ------------------------------- */ @@ -353,24 +351,201 @@ and C>. =cut + +*/ + +#define is_utf8_invariant_string(s, len) \ + is_utf8_invariant_string_loc(s, len, NULL) + +/* +=for apidoc is_utf8_invariant_string_loc + +Like C> but upon failure, stores the location of +the first UTF-8 variant character in the C pointer; if all characters are +UTF-8 invariant, this function does not change the contents of C<*ep>. + +=cut + */ PERL_STATIC_INLINE bool -S_is_utf8_invariant_string(const U8* const s, const STRLEN len) +S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep) { - const U8* const send = s + (len ? len : strlen((const char *)s)); + const U8* send; const U8* x = s; - PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING; + PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC; + + if (len == 0) { + len = strlen((const char *)s); + } + + send = s + len; + +#ifndef EBCDIC + +/* This looks like 0x010101... */ +#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF) + +/* This looks like 0x808080... */ +#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80) +#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER) +#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1) + +/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by + * or'ing together the lowest bits of 'x'. Hopefully the final term gets + * optimized out completely on a 32-bit system, and its mask gets optimized out + * on a 64-bit system */ +#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \ + | ( PTR2nat(x) >> 1) \ + | ( ( (PTR2nat(x) \ + & PERL_WORD_BOUNDARY_MASK) >> 2)))) + + /* Do the word-at-a-time iff there is at least one usable full word. That + * means that after advancing to a word boundary, there still is at least a + * full word left. The number of bytes needed to advance is 'wordsize - + * offset' unless offset is 0. */ + if ((STRLEN) (send - x) >= PERL_WORDSIZE - for (; x < send; ++x) { - if (!UTF8_IS_INVARIANT(*x)) - return FALSE; + /* This term is wordsize if subword; 0 if not */ + + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x) + + /* 'offset' */ + - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) + { + + /* Process per-byte until reach word boundary. XXX This loop could be + * eliminated if we knew that this platform had fast unaligned reads */ + while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) { + if (! UTF8_IS_INVARIANT(*x)) { + if (ep) { + *ep = x; + } + + return FALSE; + } + x++; + } + + /* Here, we know we have at least one full word to process. Process + * per-word as long as we have at least a full word left */ + do { + if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) { + + /* Found a variant. Just return if caller doesn't want its + * exact position */ + if (! ep) { + return FALSE; + } + + /* Otherwise fall into final loop to find which byte it is */ + break; + } + x += PERL_WORDSIZE; + } while (x + PERL_WORDSIZE <= send); + } + +#endif + + /* Process per-byte */ + while (x < send) { + if (! UTF8_IS_INVARIANT(*x)) { + if (ep) { + *ep = x; + } + + return FALSE; + } + + x++; } return TRUE; } +#if defined(PERL_CORE) || defined(PERL_EXT) + +/* +=for apidoc variant_under_utf8_count + +This function looks at the sequence of bytes between C and C, which are +assumed to be encoded in ASCII/Latin1, and returns how many of them would +change should the string be translated into UTF-8. Due to the nature of UTF-8, +each of these would occupy two bytes instead of the single one in the input +string. Thus, this function returns the precise number of bytes the string +would expand by when translated to UTF-8. + +Unlike most of the other functions that have C in their name, the input +to this function is NOT a UTF-8-encoded string. The function name is slightly +I to emphasize this. + +This function is internal to Perl because khw thinks that any XS code that +would want this is probably operating too close to the internals. Presenting a +valid use case could change that. + +See also +C> +and +C>, + +=cut + +*/ + +PERL_STATIC_INLINE Size_t +S_variant_under_utf8_count(const U8* const s, const U8* const e) +{ + const U8* x = s; + Size_t count = 0; + + PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT; + +# ifndef EBCDIC + + if ((STRLEN) (e - x) >= PERL_WORDSIZE + + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x) + - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK)) + { + + /* Process per-byte until reach word boundary. XXX This loop could be + * eliminated if we knew that this platform had fast unaligned reads */ + while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) { + count += ! UTF8_IS_INVARIANT(*x++); + } + + /* Process per-word as long as we have at least a full word left */ + do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an + explanation of how this works */ + count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7) + * PERL_COUNT_MULTIPLIER) + >> ((PERL_WORDSIZE - 1) * CHARBITS); + x += PERL_WORDSIZE; + } while (x + PERL_WORDSIZE <= e); + } + +# endif + + /* Process per-byte */ + while (x < e) { + if (! UTF8_IS_INVARIANT(*x)) { + count++; + } + + x++; + } + + return count; +} + +#endif + +#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */ +# undef PERL_WORDSIZE +# undef PERL_COUNT_MULTIPLIER +# undef PERL_WORD_BOUNDARY_MASK +# undef PERL_VARIANTS_WORD_MASK +#endif + /* =for apidoc is_utf8_string @@ -388,6 +563,7 @@ code points are considered valid. See also C>, +C>, C>, C>, C>, @@ -397,28 +573,53 @@ C>, =cut */ +#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL) + +#if defined(PERL_CORE) || defined (PERL_EXT) + +/* +=for apidoc is_utf8_non_invariant_string + +Returns TRUE if L returns FALSE for the first +C bytes of the string C, but they are, nonetheless, legal Perl-extended +UTF-8; otherwise returns FALSE. + +A TRUE return means that at least one code point represented by the sequence +either is a wide character not representable as a single byte, or the +representation differs depending on whether the sequence is encoded in UTF-8 or +not. + +See also +C>, +C> + +=cut + +This is commonly used to determine if a SV's UTF-8 flag should be turned on. +It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if +it otherwise contains invalid UTF-8. + +It is an internal function because khw thinks that XS code shouldn't be working +at this low a level. A valid use case could change that. + +*/ + PERL_STATIC_INLINE bool -Perl_is_utf8_string(const U8 *s, const STRLEN len) +S_is_utf8_non_invariant_string(const U8* const s, STRLEN len) { - /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure. - * Be aware of possible changes to that */ - - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; + const U8 * first_variant; - PERL_ARGS_ASSERT_IS_UTF8_STRING; + PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING; - while (x < send) { - const STRLEN cur_len = isUTF8_CHAR(x, send); - if (UNLIKELY(! cur_len)) { - return FALSE; - } - x += cur_len; + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + return FALSE; } - return TRUE; + return is_utf8_string(first_variant, len - (first_variant - s)); } +#endif + /* =for apidoc is_strict_utf8_string @@ -435,6 +636,7 @@ non-character code points. See also C>, +C>, C>, C>, C>, @@ -454,24 +656,7 @@ C>. =cut */ -PERL_STATIC_INLINE bool -S_is_strict_utf8_string(const U8 *s, const STRLEN len) -{ - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; - - PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING; - - while (x < send) { - const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send); - if (UNLIKELY(! cur_len)) { - return FALSE; - } - x += cur_len; - } - - return TRUE; -} +#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL) /* =for apidoc is_c9strict_utf8_string @@ -491,6 +676,7 @@ L. See also C>, +C>, C>, C>, C>, @@ -510,28 +696,7 @@ C>. =cut */ -PERL_STATIC_INLINE bool -S_is_c9strict_utf8_string(const U8 *s, const STRLEN len) -{ - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; - - PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING; - - while (x < send) { - const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send); - if (UNLIKELY(! cur_len)) { - return FALSE; - } - x += cur_len; - } - - return TRUE; -} - -/* The above 3 functions could have been moved into the more general one just - * below, and made #defines that call it with the right 'flags'. They are - * currently kept separate to increase their chances of getting inlined */ +#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0) /* =for apidoc is_utf8_string_flags @@ -553,6 +718,7 @@ C>, with the same meanings. See also C>, +C>, C>, C>, C>, @@ -573,37 +739,45 @@ C>. */ PERL_STATIC_INLINE bool -S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags) +S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags) { - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; + const U8 * first_variant; PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS; assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE - |UTF8_DISALLOW_ABOVE_31_BIT))); + |UTF8_DISALLOW_PERL_EXTENDED))); + + if (len == 0) { + len = strlen((const char *)s); + } if (flags == 0) { return is_utf8_string(s, len); } - if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT) + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) == UTF8_DISALLOW_ILLEGAL_INTERCHANGE) { return is_strict_utf8_string(s, len); } - if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT) + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE) { return is_c9strict_utf8_string(s, len); } - while (x < send) { - STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags); - if (UNLIKELY(! cur_len)) { - return FALSE; + if (! is_utf8_invariant_string_loc(s, len, &first_variant)) { + const U8* const send = s + len; + const U8* x = first_variant; + + while (x < send) { + STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags); + if (UNLIKELY(! cur_len)) { + return FALSE; + } + x += cur_len; } - x += cur_len; } return TRUE; @@ -639,31 +813,50 @@ See also C>. */ PERL_STATIC_INLINE bool -Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el) +Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el) { - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; - STRLEN outlen = 0; + const U8 * first_variant; PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN; - while (x < send) { - const STRLEN cur_len = isUTF8_CHAR(x, send); - if (UNLIKELY(! cur_len)) { - break; - } - x += cur_len; - outlen++; + if (len == 0) { + len = strlen((const char *) s); } - if (el) - *el = outlen; + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; - if (ep) { - *ep = x; + if (ep) { + *ep = s + len; + } + + return TRUE; } - return (x == send); + { + const U8* const send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isUTF8_CHAR(x, send); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } + + if (el) + *el = outlen; + + if (ep) { + *ep = x; + } + + return (x == send); + } } /* @@ -697,31 +890,50 @@ See also C>. */ PERL_STATIC_INLINE bool -S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el) +S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el) { - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; - STRLEN outlen = 0; + const U8 * first_variant; PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN; - while (x < send) { - const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send); - if (UNLIKELY(! cur_len)) { - break; - } - x += cur_len; - outlen++; + if (len == 0) { + len = strlen((const char *) s); } - if (el) - *el = outlen; + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; + + if (ep) { + *ep = s + len; + } - if (ep) { - *ep = x; + return TRUE; } - return (x == send); + { + const U8* const send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } + + if (el) + *el = outlen; + + if (ep) { + *ep = x; + } + + return (x == send); + } } /* @@ -755,31 +967,50 @@ See also C>. */ PERL_STATIC_INLINE bool -S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el) +S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el) { - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; - STRLEN outlen = 0; + const U8 * first_variant; PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN; - while (x < send) { - const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send); - if (UNLIKELY(! cur_len)) { - break; - } - x += cur_len; - outlen++; + if (len == 0) { + len = strlen((const char *) s); } - if (el) - *el = outlen; + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; + + if (ep) { + *ep = s + len; + } - if (ep) { - *ep = x; + return TRUE; } - return (x == send); + { + const U8* const send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } + + if (el) + *el = outlen; + + if (ep) { + *ep = x; + } + + return (x == send); + } } /* @@ -818,49 +1049,68 @@ See also C>. */ PERL_STATIC_INLINE bool -S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags) +S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags) { - const U8* const send = s + (len ? len : strlen((const char *)s)); - const U8* x = s; - STRLEN outlen = 0; + const U8 * first_variant; PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS; assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE - |UTF8_DISALLOW_ABOVE_31_BIT))); + |UTF8_DISALLOW_PERL_EXTENDED))); + + if (len == 0) { + len = strlen((const char *) s); + } if (flags == 0) { return is_utf8_string_loclen(s, len, ep, el); } - if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT) + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) == UTF8_DISALLOW_ILLEGAL_INTERCHANGE) { return is_strict_utf8_string_loclen(s, len, ep, el); } - if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT) + if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED) == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE) { return is_c9strict_utf8_string_loclen(s, len, ep, el); } - while (x < send) { - const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags); - if (UNLIKELY(! cur_len)) { - break; + if (is_utf8_invariant_string_loc(s, len, &first_variant)) { + if (el) + *el = len; + + if (ep) { + *ep = s + len; } - x += cur_len; - outlen++; + + return TRUE; } - if (el) - *el = outlen; + { + const U8* send = s + len; + const U8* x = first_variant; + STRLEN outlen = first_variant - s; + + while (x < send) { + const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags); + if (UNLIKELY(! cur_len)) { + break; + } + x += cur_len; + outlen++; + } - if (ep) { - *ep = x; - } + if (el) + *el = outlen; - return (x == send); + if (ep) { + *ep = x; + } + + return (x == send); + } } /* @@ -916,7 +1166,127 @@ Perl_utf8_hop(const U8 *s, SSize_t off) s--; } } + GCC_DIAG_IGNORE_STMT(-Wcast-qual); return (U8 *)s; + GCC_DIAG_RESTORE_STMT; +} + +/* +=for apidoc utf8_hop_forward + +Return the UTF-8 pointer C displaced by up to C characters, +forward. + +C must be non-negative. + +C must be before or equal to C. + +When moving forward it will not move beyond C. + +Will not exceed this limit even if the string is not valid "UTF-8". + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end) +{ + PERL_ARGS_ASSERT_UTF8_HOP_FORWARD; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + assert(s <= end); + assert(off >= 0); + + while (off--) { + STRLEN skip = UTF8SKIP(s); + if ((STRLEN)(end - s) <= skip) { + GCC_DIAG_IGNORE_STMT(-Wcast-qual); + return (U8 *)end; + GCC_DIAG_RESTORE_STMT; + } + s += skip; + } + + GCC_DIAG_IGNORE_STMT(-Wcast-qual); + return (U8 *)s; + GCC_DIAG_RESTORE_STMT; +} + +/* +=for apidoc utf8_hop_back + +Return the UTF-8 pointer C displaced by up to C characters, +backward. + +C must be non-positive. + +C must be after or equal to C. + +When moving backward it will not move before C. + +Will not exceed this limit even if the string is not valid "UTF-8". + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start) +{ + PERL_ARGS_ASSERT_UTF8_HOP_BACK; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + assert(start <= s); + assert(off <= 0); + + while (off++ && s > start) { + s--; + while (UTF8_IS_CONTINUATION(*s) && s > start) + s--; + } + + GCC_DIAG_IGNORE_STMT(-Wcast-qual); + return (U8 *)s; + GCC_DIAG_RESTORE_STMT; +} + +/* +=for apidoc utf8_hop_safe + +Return the UTF-8 pointer C displaced by up to C characters, +either forward or backward. + +When moving backward it will not move before C. + +When moving forward it will not move beyond C. + +Will not exceed those limits even if the string is not valid "UTF-8". + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end) +{ + PERL_ARGS_ASSERT_UTF8_HOP_SAFE; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + assert(start <= s && s <= end); + + if (off >= 0) { + return utf8_hop_forward(s, off, end); + } + else { + return utf8_hop_back(s, off, start); + } } /* @@ -974,7 +1344,7 @@ S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS; assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE - |UTF8_DISALLOW_ABOVE_31_BIT))); + |UTF8_DISALLOW_PERL_EXTENDED))); if (s >= e || s + UTF8SKIP(s) <= e) { return FALSE; @@ -1038,7 +1408,7 @@ complete, valid characters found in the C pointer. PERL_STATIC_INLINE bool S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s, - const STRLEN len, + STRLEN len, const U8 **ep, STRLEN *el, const U32 flags) @@ -1379,9 +1749,9 @@ S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv) cx->blk_eval.cv = NULL; /* later set by doeval_compile() */ cx->blk_eval.cur_top_env = PL_top_env; - assert(!(PL_in_eval & ~ 0x7F)); + assert(!(PL_in_eval & ~ 0x3F)); assert(!(PL_op->op_type & ~0x1FF)); - cx->blk_u16 = (PL_in_eval & 0x7F) | ((U16)PL_op->op_type << 7); + cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7); } @@ -1394,9 +1764,10 @@ S_cx_popeval(pTHX_ PERL_CONTEXT *cx) assert(CxTYPE(cx) == CXt_EVAL); PL_in_eval = CxOLD_IN_EVAL(cx); + assert(!(PL_in_eval & 0xc0)); PL_eval_root = cx->blk_eval.old_eval_root; sv = cx->blk_eval.cur_text; - if (sv && SvSCREAM(sv)) { + if (sv && CxEVAL_TXT_REFCNTED(cx)) { cx->blk_eval.cur_text = NULL; SvREFCNT_dec_NN(sv); } @@ -1524,6 +1895,115 @@ S_cx_popgiven(pTHX_ PERL_CONTEXT *cx) SvREFCNT_dec(sv); } +/* ------------------ util.h ------------------------------------------- */ + +/* +=head1 Miscellaneous Functions + +=for apidoc foldEQ + +Returns true if the leading C bytes of the strings C and C are the +same +case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes +match themselves and their opposite case counterparts. Non-cased and non-ASCII +range bytes match only themselves. + +=cut +*/ + +PERL_STATIC_INLINE I32 +Perl_foldEQ(const char *s1, const char *s2, I32 len) +{ + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold[*b]) + return 0; + a++,b++; + } + return 1; +} + +PERL_STATIC_INLINE I32 +Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len) +{ + /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on + * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor + * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor + * does it check that the strings each have at least 'len' characters */ + + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ_LATIN1; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold_latin1[*b]) { + return 0; + } + a++, b++; + } + return 1; +} + +/* +=for apidoc foldEQ_locale + +Returns true if the leading C bytes of the strings C and C are the +same case-insensitively in the current locale; false otherwise. + +=cut +*/ + +PERL_STATIC_INLINE I32 +Perl_foldEQ_locale(const char *s1, const char *s2, I32 len) +{ + dVAR; + const U8 *a = (const U8 *)s1; + const U8 *b = (const U8 *)s2; + + PERL_ARGS_ASSERT_FOLDEQ_LOCALE; + + assert(len >= 0); + + while (len--) { + if (*a != *b && *a != PL_fold_locale[*b]) + return 0; + a++,b++; + } + return 1; +} + +#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT)) + +PERL_STATIC_INLINE void * +S_my_memrchr(const char * s, const char c, const STRLEN len) +{ + /* memrchr(), since many platforms lack it */ + + const char * t = s + len - 1; + + PERL_ARGS_ASSERT_MY_MEMRCHR; + + while (t >= s) { + if (*t == c) { + return (void *) t; + } + t--; + } + + return NULL; +} + +#endif + /* * ex: set ts=8 sts=4 sw=4 et: */