S_TOPMARK(pTHX)
{
DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
- "MARK top %p %"IVdf"\n",
+ "MARK top %p %" IVdf "\n",
PL_markstack_ptr,
(IV)*PL_markstack_ptr)));
return *PL_markstack_ptr;
S_POPMARK(pTHX)
{
DEBUG_s(DEBUG_v(PerlIO_printf(Perl_debug_log,
- "MARK pop %p %"IVdf"\n",
+ "MARK pop %p %" IVdf "\n",
(PL_markstack_ptr-1),
(IV)*(PL_markstack_ptr-1))));
assert((PL_markstack_ptr > PL_markstack) || !"MARK underflow");
PERL_STATIC_INLINE struct regexp *
S_ReANY(const REGEXP * const re)
{
+ XPV* const p = (XPV*)SvANY(re);
assert(isREGEXP(re));
- return re->sv_u.svu_rx;
+ return SvTYPE(re) == SVt_PVLV ? p->xpv_len_u.xpvlenu_rx
+ : (struct regexp *)p;
}
/* ------------------------------- sv.h ------------------------------- */
/* ------------------------------- handy.h ------------------------------- */
/* saves machine code for a common noreturn idiom typically used in Newx*() */
-#ifdef GCC_DIAG_PRAGMA
-GCC_DIAG_IGNORE(-Wunused-function) /* Intentionally left semicolonless. */
-#endif
+GCC_DIAG_IGNORE_DECL(-Wunused-function);
static void
S_croak_memory_wrap(void)
{
Perl_croak_nocontext("%s",PL_memory_wrap);
}
-#ifdef GCC_DIAG_PRAGMA
-GCC_DIAG_RESTORE /* Intentionally left semicolonless. */
-#endif
+GCC_DIAG_RESTORE_DECL;
/* ------------------------------- utf8.h ------------------------------- */
C<L</is_c9strict_utf8_string_loclen>>.
=cut
+
+*/
+
+#define is_utf8_invariant_string(s, len) \
+ is_utf8_invariant_string_loc(s, len, NULL)
+
+/*
+=for apidoc is_utf8_invariant_string_loc
+
+Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
+the first UTF-8 variant character in the C<ep> pointer; if all characters are
+UTF-8 invariant, this function does not change the contents of C<*ep>.
+
+=cut
+
*/
PERL_STATIC_INLINE bool
-S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
+S_is_utf8_invariant_string_loc(const U8* const s, STRLEN len, const U8 ** ep)
{
- const U8* const send = s + (len ? len : strlen((const char *)s));
+ const U8* send;
const U8* x = s;
- PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
+ PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
+
+ if (len == 0) {
+ len = strlen((const char *)s);
+ }
+
+ send = s + len;
+
+#ifndef EBCDIC
+
+/* This looks like 0x010101... */
+#define PERL_COUNT_MULTIPLIER (~ (UINTMAX_C(0)) / 0xFF)
+
+/* This looks like 0x808080... */
+#define PERL_VARIANTS_WORD_MASK (PERL_COUNT_MULTIPLIER * 0x80)
+#define PERL_WORDSIZE sizeof(PERL_COUNT_MULTIPLIER)
+#define PERL_WORD_BOUNDARY_MASK (PERL_WORDSIZE - 1)
+
+/* Evaluates to 0 if 'x' is at a word boundary; otherwise evaluates to 1, by
+ * or'ing together the lowest bits of 'x'. Hopefully the final term gets
+ * optimized out completely on a 32-bit system, and its mask gets optimized out
+ * on a 64-bit system */
+#define PERL_IS_SUBWORD_ADDR(x) (1 & ( PTR2nat(x) \
+ | ( PTR2nat(x) >> 1) \
+ | ( ( (PTR2nat(x) \
+ & PERL_WORD_BOUNDARY_MASK) >> 2))))
+
+ /* Do the word-at-a-time iff there is at least one usable full word. That
+ * means that after advancing to a word boundary, there still is at least a
+ * full word left. The number of bytes needed to advance is 'wordsize -
+ * offset' unless offset is 0. */
+ if ((STRLEN) (send - x) >= PERL_WORDSIZE
- for (; x < send; ++x) {
- if (!UTF8_IS_INVARIANT(*x))
- return FALSE;
+ /* This term is wordsize if subword; 0 if not */
+ + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
+
+ /* 'offset' */
+ - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
+ {
+
+ /* Process per-byte until reach word boundary. XXX This loop could be
+ * eliminated if we knew that this platform had fast unaligned reads */
+ while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+ if (! UTF8_IS_INVARIANT(*x)) {
+ if (ep) {
+ *ep = x;
+ }
+
+ return FALSE;
+ }
+ x++;
+ }
+
+ /* Here, we know we have at least one full word to process. Process
+ * per-word as long as we have at least a full word left */
+ do {
+ if ((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) {
+
+ /* Found a variant. Just return if caller doesn't want its
+ * exact position */
+ if (! ep) {
+ return FALSE;
+ }
+
+ /* Otherwise fall into final loop to find which byte it is */
+ break;
+ }
+ x += PERL_WORDSIZE;
+ } while (x + PERL_WORDSIZE <= send);
+ }
+
+#endif
+
+ /* Process per-byte */
+ while (x < send) {
+ if (! UTF8_IS_INVARIANT(*x)) {
+ if (ep) {
+ *ep = x;
+ }
+
+ return FALSE;
+ }
+
+ x++;
}
return TRUE;
}
+#if defined(PERL_CORE) || defined(PERL_EXT)
+
+/*
+=for apidoc variant_under_utf8_count
+
+This function looks at the sequence of bytes between C<s> and C<e>, which are
+assumed to be encoded in ASCII/Latin1, and returns how many of them would
+change should the string be translated into UTF-8. Due to the nature of UTF-8,
+each of these would occupy two bytes instead of the single one in the input
+string. Thus, this function returns the precise number of bytes the string
+would expand by when translated to UTF-8.
+
+Unlike most of the other functions that have C<utf8> in their name, the input
+to this function is NOT a UTF-8-encoded string. The function name is slightly
+I<odd> to emphasize this.
+
+This function is internal to Perl because khw thinks that any XS code that
+would want this is probably operating too close to the internals. Presenting a
+valid use case could change that.
+
+See also
+C<L<perlapi/is_utf8_invariant_string>>
+and
+C<L<perlapi/is_utf8_invariant_string_loc>>,
+
+=cut
+
+*/
+
+PERL_STATIC_INLINE Size_t
+S_variant_under_utf8_count(const U8* const s, const U8* const e)
+{
+ const U8* x = s;
+ Size_t count = 0;
+
+ PERL_ARGS_ASSERT_VARIANT_UNDER_UTF8_COUNT;
+
+# ifndef EBCDIC
+
+ if ((STRLEN) (e - x) >= PERL_WORDSIZE
+ + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(x)
+ - (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK))
+ {
+
+ /* Process per-byte until reach word boundary. XXX This loop could be
+ * eliminated if we knew that this platform had fast unaligned reads */
+ while (PTR2nat(x) & PERL_WORD_BOUNDARY_MASK) {
+ count += ! UTF8_IS_INVARIANT(*x++);
+ }
+
+ /* Process per-word as long as we have at least a full word left */
+ do { /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
+ explanation of how this works */
+ count += ((((* (PERL_UINTMAX_T *) x) & PERL_VARIANTS_WORD_MASK) >> 7)
+ * PERL_COUNT_MULTIPLIER)
+ >> ((PERL_WORDSIZE - 1) * CHARBITS);
+ x += PERL_WORDSIZE;
+ } while (x + PERL_WORDSIZE <= e);
+ }
+
+# endif
+
+ /* Process per-byte */
+ while (x < e) {
+ if (! UTF8_IS_INVARIANT(*x)) {
+ count++;
+ }
+
+ x++;
+ }
+
+ return count;
+}
+
+#endif
+
+#ifndef PERL_IN_REGEXEC_C /* Keep these around for that file */
+# undef PERL_WORDSIZE
+# undef PERL_COUNT_MULTIPLIER
+# undef PERL_WORD_BOUNDARY_MASK
+# undef PERL_VARIANTS_WORD_MASK
+#endif
+
/*
=for apidoc is_utf8_string
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string_loc>>,
C<L</is_utf8_string_loclen>>,
C<L</is_utf8_fixed_width_buf_flags>>,
=cut
*/
+#define is_utf8_string(s, len) is_utf8_string_loclen(s, len, NULL, NULL)
+
+#if defined(PERL_CORE) || defined (PERL_EXT)
+
+/*
+=for apidoc is_utf8_non_invariant_string
+
+Returns TRUE if L<perlapi/is_utf8_invariant_string> returns FALSE for the first
+C<len> bytes of the string C<s>, but they are, nonetheless, legal Perl-extended
+UTF-8; otherwise returns FALSE.
+
+A TRUE return means that at least one code point represented by the sequence
+either is a wide character not representable as a single byte, or the
+representation differs depending on whether the sequence is encoded in UTF-8 or
+not.
+
+See also
+C<L<perlapi/is_utf8_invariant_string>>,
+C<L<perlapi/is_utf8_string>>
+
+=cut
+
+This is commonly used to determine if a SV's UTF-8 flag should be turned on.
+It needn't be if its string is entirely UTF-8 invariant, and it shouldn't be if
+it otherwise contains invalid UTF-8.
+
+It is an internal function because khw thinks that XS code shouldn't be working
+at this low a level. A valid use case could change that.
+
+*/
+
PERL_STATIC_INLINE bool
-Perl_is_utf8_string(const U8 *s, const STRLEN len)
+S_is_utf8_non_invariant_string(const U8* const s, STRLEN len)
{
- /* This is now marked pure in embed.fnc, because isUTF8_CHAR now is pure.
- * Be aware of possible changes to that */
-
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
+ const U8 * first_variant;
- PERL_ARGS_ASSERT_IS_UTF8_STRING;
+ PERL_ARGS_ASSERT_IS_UTF8_NON_INVARIANT_STRING;
- while (x < send) {
- const STRLEN cur_len = isUTF8_CHAR(x, send);
- if (UNLIKELY(! cur_len)) {
- return FALSE;
- }
- x += cur_len;
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ return FALSE;
}
- return TRUE;
+ return is_utf8_string(first_variant, len - (first_variant - s));
}
+#endif
+
/*
=for apidoc is_strict_utf8_string
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string>>,
C<L</is_utf8_string_flags>>,
C<L</is_utf8_string_loc>>,
=cut
*/
-PERL_STATIC_INLINE bool
-S_is_strict_utf8_string(const U8 *s, const STRLEN len)
-{
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
-
- PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING;
-
- while (x < send) {
- const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
- if (UNLIKELY(! cur_len)) {
- return FALSE;
- }
- x += cur_len;
- }
-
- return TRUE;
-}
+#define is_strict_utf8_string(s, len) is_strict_utf8_string_loclen(s, len, NULL, NULL)
/*
=for apidoc is_c9strict_utf8_string
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string>>,
C<L</is_utf8_string_flags>>,
C<L</is_utf8_string_loc>>,
=cut
*/
-PERL_STATIC_INLINE bool
-S_is_c9strict_utf8_string(const U8 *s, const STRLEN len)
-{
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
-
- PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING;
-
- while (x < send) {
- const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
- if (UNLIKELY(! cur_len)) {
- return FALSE;
- }
- x += cur_len;
- }
-
- return TRUE;
-}
-
-/* The above 3 functions could have been moved into the more general one just
- * below, and made #defines that call it with the right 'flags'. They are
- * currently kept separate to increase their chances of getting inlined */
+#define is_c9strict_utf8_string(s, len) is_c9strict_utf8_string_loclen(s, len, NULL, 0)
/*
=for apidoc is_utf8_string_flags
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string>>,
C<L</is_utf8_string_loc>>,
C<L</is_utf8_string_loc_flags>>,
*/
PERL_STATIC_INLINE bool
-S_is_utf8_string_flags(const U8 *s, const STRLEN len, const U32 flags)
+S_is_utf8_string_flags(const U8 *s, STRLEN len, const U32 flags)
{
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
+ const U8 * first_variant;
PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
+
+ if (len == 0) {
+ len = strlen((const char *)s);
+ }
if (flags == 0) {
return is_utf8_string(s, len);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
{
return is_strict_utf8_string(s, len);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
{
return is_c9strict_utf8_string(s, len);
}
- while (x < send) {
- STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
- if (UNLIKELY(! cur_len)) {
- return FALSE;
+ if (! is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ const U8* const send = s + len;
+ const U8* x = first_variant;
+
+ while (x < send) {
+ STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
+ if (UNLIKELY(! cur_len)) {
+ return FALSE;
+ }
+ x += cur_len;
}
- x += cur_len;
}
return TRUE;
*/
PERL_STATIC_INLINE bool
-Perl_is_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
+Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
{
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
- STRLEN outlen = 0;
+ const U8 * first_variant;
PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
- while (x < send) {
- const STRLEN cur_len = isUTF8_CHAR(x, send);
- if (UNLIKELY(! cur_len)) {
- break;
- }
- x += cur_len;
- outlen++;
+ if (len == 0) {
+ len = strlen((const char *) s);
}
- if (el)
- *el = outlen;
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
- if (ep) {
- *ep = x;
+ if (ep) {
+ *ep = s + len;
+ }
+
+ return TRUE;
}
- return (x == send);
+ {
+ const U8* const send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isUTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
}
/*
*/
PERL_STATIC_INLINE bool
-S_is_strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
+S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
{
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
- STRLEN outlen = 0;
+ const U8 * first_variant;
PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
- while (x < send) {
- const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
- if (UNLIKELY(! cur_len)) {
- break;
- }
- x += cur_len;
- outlen++;
+ if (len == 0) {
+ len = strlen((const char *) s);
}
- if (el)
- *el = outlen;
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
+
+ if (ep) {
+ *ep = s + len;
+ }
- if (ep) {
- *ep = x;
+ return TRUE;
}
- return (x == send);
+ {
+ const U8* const send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
}
/*
*/
PERL_STATIC_INLINE bool
-S_is_c9strict_utf8_string_loclen(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el)
+S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
{
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
- STRLEN outlen = 0;
+ const U8 * first_variant;
PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
- while (x < send) {
- const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
- if (UNLIKELY(! cur_len)) {
- break;
- }
- x += cur_len;
- outlen++;
+ if (len == 0) {
+ len = strlen((const char *) s);
}
- if (el)
- *el = outlen;
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
+
+ if (ep) {
+ *ep = s + len;
+ }
- if (ep) {
- *ep = x;
+ return TRUE;
}
- return (x == send);
+ {
+ const U8* const send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
}
/*
*/
PERL_STATIC_INLINE bool
-S_is_utf8_string_loclen_flags(const U8 *s, const STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
+S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
{
- const U8* const send = s + (len ? len : strlen((const char *)s));
- const U8* x = s;
- STRLEN outlen = 0;
+ const U8 * first_variant;
PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
+
+ if (len == 0) {
+ len = strlen((const char *) s);
+ }
if (flags == 0) {
return is_utf8_string_loclen(s, len, ep, el);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
{
return is_strict_utf8_string_loclen(s, len, ep, el);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
{
return is_c9strict_utf8_string_loclen(s, len, ep, el);
}
- while (x < send) {
- const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
- if (UNLIKELY(! cur_len)) {
- break;
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
+
+ if (ep) {
+ *ep = s + len;
}
- x += cur_len;
- outlen++;
+
+ return TRUE;
}
- if (el)
- *el = outlen;
+ {
+ const U8* send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
- if (ep) {
- *ep = x;
- }
+ if (el)
+ *el = outlen;
- return (x == send);
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
}
/*
s--;
}
}
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
return (U8 *)s;
+ GCC_DIAG_RESTORE_STMT;
+}
+
+/*
+=for apidoc utf8_hop_forward
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+forward.
+
+C<off> must be non-negative.
+
+C<s> must be before or equal to C<end>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(s <= end);
+ assert(off >= 0);
+
+ while (off--) {
+ STRLEN skip = UTF8SKIP(s);
+ if ((STRLEN)(end - s) <= skip) {
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
+ return (U8 *)end;
+ GCC_DIAG_RESTORE_STMT;
+ }
+ s += skip;
+ }
+
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
+ return (U8 *)s;
+ GCC_DIAG_RESTORE_STMT;
+}
+
+/*
+=for apidoc utf8_hop_back
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+backward.
+
+C<off> must be non-positive.
+
+C<s> must be after or equal to C<start>.
+
+When moving backward it will not move before C<start>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_BACK;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(start <= s);
+ assert(off <= 0);
+
+ while (off++ && s > start) {
+ s--;
+ while (UTF8_IS_CONTINUATION(*s) && s > start)
+ s--;
+ }
+
+ GCC_DIAG_IGNORE_STMT(-Wcast-qual);
+ return (U8 *)s;
+ GCC_DIAG_RESTORE_STMT;
+}
+
+/*
+=for apidoc utf8_hop_safe
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+either forward or backward.
+
+When moving backward it will not move before C<start>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed those limits even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(start <= s && s <= end);
+
+ if (off >= 0) {
+ return utf8_hop_forward(s, off, end);
+ }
+ else {
+ return utf8_hop_back(s, off, start);
+ }
}
/*
PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
if (s >= e || s + UTF8SKIP(s) <= e) {
return FALSE;
PERL_STATIC_INLINE bool
S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
- const STRLEN len,
+ STRLEN len,
const U8 **ep,
STRLEN *el,
const U32 flags)
cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
cx->blk_eval.cur_top_env = PL_top_env;
- assert(!(PL_in_eval & ~ 0x7F));
+ assert(!(PL_in_eval & ~ 0x3F));
assert(!(PL_op->op_type & ~0x1FF));
- cx->blk_u16 = (PL_in_eval & 0x7F) | ((U16)PL_op->op_type << 7);
+ cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
}
assert(CxTYPE(cx) == CXt_EVAL);
PL_in_eval = CxOLD_IN_EVAL(cx);
+ assert(!(PL_in_eval & 0xc0));
PL_eval_root = cx->blk_eval.old_eval_root;
sv = cx->blk_eval.cur_text;
- if (sv && SvSCREAM(sv)) {
+ if (sv && CxEVAL_TXT_REFCNTED(cx)) {
cx->blk_eval.cur_text = NULL;
SvREFCNT_dec_NN(sv);
}
SvREFCNT_dec(sv);
}
+/* ------------------ util.h ------------------------------------------- */
+
+/*
+=head1 Miscellaneous Functions
+
+=for apidoc foldEQ
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same
+case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
+match themselves and their opposite case counterparts. Non-cased and non-ASCII
+range bytes match only themselves.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ(const char *s1, const char *s2, I32 len)
+{
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold[*b])
+ return 0;
+ a++,b++;
+ }
+ return 1;
+}
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
+{
+ /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
+ * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
+ * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
+ * does it check that the strings each have at least 'len' characters */
+
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold_latin1[*b]) {
+ return 0;
+ }
+ a++, b++;
+ }
+ return 1;
+}
+
+/*
+=for apidoc foldEQ_locale
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same case-insensitively in the current locale; false otherwise.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
+{
+ dVAR;
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold_locale[*b])
+ return 0;
+ a++,b++;
+ }
+ return 1;
+}
+
+#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
+
+PERL_STATIC_INLINE void *
+S_my_memrchr(const char * s, const char c, const STRLEN len)
+{
+ /* memrchr(), since many platforms lack it */
+
+ const char * t = s + len - 1;
+
+ PERL_ARGS_ASSERT_MY_MEMRCHR;
+
+ while (t >= s) {
+ if (*t == c) {
+ return (void *) t;
+ }
+ t--;
+ }
+
+ return NULL;
+}
+
+#endif
+
/*
* ex: set ts=8 sts=4 sw=4 et:
*/