+
+=for apidoc is_utf8_string_loc
+
+Like C<L</is_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_utf8_string_loclen
+
+Like C<L</is_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_utf8_string_loc>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
+{
+ const U8 * first_variant;
+
+ PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN;
+
+ if (len == 0) {
+ len = strlen((const char *) s);
+ }
+
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
+
+ if (ep) {
+ *ep = s + len;
+ }
+
+ return TRUE;
+ }
+
+ {
+ const U8* const send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isUTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
+}
+
+/*
+
+=for apidoc isUTF8_CHAR
+
+Evaluates to non-zero if the first few bytes of the string starting at C<s> and
+looking no further than S<C<e - 1>> are well-formed UTF-8, as extended by Perl,
+that represents some code point; otherwise it evaluates to 0. If non-zero, the
+value gives how many bytes starting at C<s> comprise the code point's
+representation. Any bytes remaining before C<e>, but beyond the ones needed to
+form the first code point in C<s>, are not examined.
+
+The code point can be any that will fit in an IV on this machine, using Perl's
+extension to official UTF-8 to represent those higher than the Unicode maximum
+of 0x10FFFF. That means that this macro is used to efficiently decide if the
+next few bytes in C<s> is legal UTF-8 for a single character.
+
+Use C<L</isSTRICT_UTF8_CHAR>> to restrict the acceptable code points to those
+defined by Unicode to be fully interchangeable across applications;
+C<L</isC9_STRICT_UTF8_CHAR>> to use the L<Unicode Corrigendum
+#9|http://www.unicode.org/versions/corrigendum9.html> definition of allowable
+code points; and C<L</isUTF8_CHAR_flags>> for a more customized definition.
+
+Use C<L</is_utf8_string>>, C<L</is_utf8_string_loc>>, and
+C<L</is_utf8_string_loclen>> to check entire strings.
+
+Note also that a UTF-8 "invariant" character (i.e. ASCII on non-EBCDIC
+machines) is a valid UTF-8 character.
+
+=cut
+
+This uses an adaptation of the table and algorithm given in
+http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
+documentation of the original version. A copyright notice for the original
+version is given at the beginning of this file. The Perl adapation is
+documented at the definition of PL_extended_utf8_dfa_tab[].
+
+*/
+
+PERL_STATIC_INLINE Size_t
+S_isUTF8_CHAR(const U8 * const s0, const U8 * const e)
+{
+ const U8 * s = s0;
+ UV state = 0;
+
+ PERL_ARGS_ASSERT_ISUTF8_CHAR;
+
+ /* This dfa is fast. If it accepts the input, it was for a well-formed,
+ * code point, which can be returned immediately. Otherwise, it is either
+ * malformed, or for the start byte FF which the dfa doesn't handle (except
+ * on 32-bit ASCII platforms where it trivially is an error). Call a
+ * helper function for the other platforms. */
+
+ while (s < e && LIKELY(state != 1)) {
+ state = PL_extended_utf8_dfa_tab[256
+ + state
+ + PL_extended_utf8_dfa_tab[*s]];
+ if (state != 0) {
+ s++;
+ continue;
+ }
+
+ return s - s0 + 1;
+ }
+
+#if defined(UV_IS_QUAD) || defined(EBCDIC)
+
+ if (NATIVE_UTF8_TO_I8(*s0) == 0xFF && e - s0 >= UTF8_MAXBYTES) {
+ return _is_utf8_char_helper(s0, e, 0);
+ }
+
+#endif
+
+ return 0;
+}
+
+/*
+
+=for apidoc isSTRICT_UTF8_CHAR
+
+Evaluates to non-zero if the first few bytes of the string starting at C<s> and
+looking no further than S<C<e - 1>> are well-formed UTF-8 that represents some
+Unicode code point completely acceptable for open interchange between all
+applications; otherwise it evaluates to 0. If non-zero, the value gives how
+many bytes starting at C<s> comprise the code point's representation. Any
+bytes remaining before C<e>, but beyond the ones needed to form the first code
+point in C<s>, are not examined.
+
+The largest acceptable code point is the Unicode maximum 0x10FFFF, and must not
+be a surrogate nor a non-character code point. Thus this excludes any code
+point from Perl's extended UTF-8.
+
+This is used to efficiently decide if the next few bytes in C<s> is
+legal Unicode-acceptable UTF-8 for a single character.
+
+Use C<L</isC9_STRICT_UTF8_CHAR>> to use the L<Unicode Corrigendum
+#9|http://www.unicode.org/versions/corrigendum9.html> definition of allowable
+code points; C<L</isUTF8_CHAR>> to check for Perl's extended UTF-8;
+and C<L</isUTF8_CHAR_flags>> for a more customized definition.
+
+Use C<L</is_strict_utf8_string>>, C<L</is_strict_utf8_string_loc>>, and
+C<L</is_strict_utf8_string_loclen>> to check entire strings.
+
+=cut
+
+This uses an adaptation of the tables and algorithm given in
+http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
+documentation of the original version. A copyright notice for the original
+version is given at the beginning of this file. The Perl adapation is
+documented at the definition of strict_extended_utf8_dfa_tab[].
+
+*/
+
+PERL_STATIC_INLINE Size_t
+S_isSTRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
+{
+ const U8 * s = s0;
+ UV state = 0;
+
+ PERL_ARGS_ASSERT_ISSTRICT_UTF8_CHAR;
+
+ while (s < e && LIKELY(state != 1)) {
+ state = PL_strict_utf8_dfa_tab[256 + state + PL_strict_utf8_dfa_tab[*s]];
+
+ if (state != 0) {
+ s++;
+ continue;
+ }
+
+ return s - s0 + 1;
+ }
+
+#ifndef EBCDIC
+
+ /* The dfa above drops out for certain Hanguls; handle them specially */
+ if (is_HANGUL_ED_utf8_safe(s0, e)) {
+ return 3;
+ }
+
+#endif
+
+ return 0;
+}
+
+/*
+
+=for apidoc isC9_STRICT_UTF8_CHAR
+
+Evaluates to non-zero if the first few bytes of the string starting at C<s> and
+looking no further than S<C<e - 1>> are well-formed UTF-8 that represents some
+Unicode non-surrogate code point; otherwise it evaluates to 0. If non-zero,
+the value gives how many bytes starting at C<s> comprise the code point's
+representation. Any bytes remaining before C<e>, but beyond the ones needed to
+form the first code point in C<s>, are not examined.
+
+The largest acceptable code point is the Unicode maximum 0x10FFFF. This
+differs from C<L</isSTRICT_UTF8_CHAR>> only in that it accepts non-character
+code points. This corresponds to
+L<Unicode Corrigendum #9|http://www.unicode.org/versions/corrigendum9.html>.
+which said that non-character code points are merely discouraged rather than
+completely forbidden in open interchange. See
+L<perlunicode/Noncharacter code points>.
+
+Use C<L</isUTF8_CHAR>> to check for Perl's extended UTF-8; and
+C<L</isUTF8_CHAR_flags>> for a more customized definition.
+
+Use C<L</is_c9strict_utf8_string>>, C<L</is_c9strict_utf8_string_loc>>, and
+C<L</is_c9strict_utf8_string_loclen>> to check entire strings.
+
+=cut
+
+This uses an adaptation of the tables and algorithm given in
+http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides comprehensive
+documentation of the original version. A copyright notice for the original
+version is given at the beginning of this file. The Perl adapation is
+documented at the definition of PL_c9_utf8_dfa_tab[].
+
+*/
+
+PERL_STATIC_INLINE Size_t
+S_isC9_STRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e)
+{
+ const U8 * s = s0;
+ UV state = 0;
+
+ PERL_ARGS_ASSERT_ISC9_STRICT_UTF8_CHAR;
+
+ while (s < e && LIKELY(state != 1)) {
+ state = PL_c9_utf8_dfa_tab[256 + state + PL_c9_utf8_dfa_tab[*s]];
+
+ if (state != 0) {
+ s++;
+ continue;
+ }
+
+ return s - s0 + 1;
+ }
+
+ return 0;
+}
+
+/*
+
+=for apidoc is_strict_utf8_string_loc
+
+Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_strict_utf8_string_loc(s, len, ep) \
+ is_strict_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_strict_utf8_string_loclen
+
+Like C<L</is_strict_utf8_string>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_strict_utf8_string_loc>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
+{
+ const U8 * first_variant;
+
+ PERL_ARGS_ASSERT_IS_STRICT_UTF8_STRING_LOCLEN;
+
+ if (len == 0) {
+ len = strlen((const char *) s);
+ }
+
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
+
+ if (ep) {
+ *ep = s + len;
+ }
+
+ return TRUE;
+ }
+
+ {
+ const U8* const send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isSTRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
+}
+
+/*
+
+=for apidoc is_c9strict_utf8_string_loc
+
+Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
+the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_c9strict_utf8_string_loclen>>.
+
+=cut
+*/
+
+#define is_c9strict_utf8_string_loc(s, len, ep) \
+ is_c9strict_utf8_string_loclen(s, len, ep, 0)
+
+/*
+
+=for apidoc is_c9strict_utf8_string_loclen
+
+Like C<L</is_c9strict_utf8_string>> but stores the location of the failure (in
+the case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8 encoded
+characters in the C<el> pointer.
+
+See also C<L</is_c9strict_utf8_string_loc>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
+{
+ const U8 * first_variant;
+
+ PERL_ARGS_ASSERT_IS_C9STRICT_UTF8_STRING_LOCLEN;
+
+ if (len == 0) {
+ len = strlen((const char *) s);
+ }
+
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
+
+ if (ep) {
+ *ep = s + len;
+ }
+
+ return TRUE;
+ }
+
+ {
+ const U8* const send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isC9_STRICT_UTF8_CHAR(x, send);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
+}
+
+/*
+
+=for apidoc is_utf8_string_loc_flags
+
+Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer.
+
+See also C<L</is_utf8_string_loclen_flags>>.
+
+=cut
+*/
+
+#define is_utf8_string_loc_flags(s, len, ep, flags) \
+ is_utf8_string_loclen_flags(s, len, ep, 0, flags)
+
+
+/* The above 3 actual functions could have been moved into the more general one
+ * just below, and made #defines that call it with the right 'flags'. They are
+ * currently kept separate to increase their chances of getting inlined */
+
+/*
+
+=for apidoc is_utf8_string_loclen_flags
+
+Like C<L</is_utf8_string_flags>> but stores the location of the failure (in the
+case of "utf8ness failure") or the location C<s>+C<len> (in the case of
+"utf8ness success") in the C<ep> pointer, and the number of UTF-8
+encoded characters in the C<el> pointer.
+
+See also C<L</is_utf8_string_loc_flags>>.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_string_loclen_flags(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el, const U32 flags)
+{
+ const U8 * first_variant;
+
+ PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
+ assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+ |UTF8_DISALLOW_PERL_EXTENDED)));
+
+ if (len == 0) {
+ len = strlen((const char *) s);
+ }
+
+ if (flags == 0) {
+ return is_utf8_string_loclen(s, len, ep, el);
+ }
+
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
+ == UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
+ {
+ return is_strict_utf8_string_loclen(s, len, ep, el);
+ }
+
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
+ == UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
+ {
+ return is_c9strict_utf8_string_loclen(s, len, ep, el);
+ }
+
+ if (is_utf8_invariant_string_loc(s, len, &first_variant)) {
+ if (el)
+ *el = len;
+
+ if (ep) {
+ *ep = s + len;
+ }
+
+ return TRUE;
+ }
+
+ {
+ const U8* send = s + len;
+ const U8* x = first_variant;
+ STRLEN outlen = first_variant - s;
+
+ while (x < send) {
+ const STRLEN cur_len = isUTF8_CHAR_flags(x, send, flags);
+ if (UNLIKELY(! cur_len)) {
+ break;
+ }
+ x += cur_len;
+ outlen++;
+ }
+
+ if (el)
+ *el = outlen;
+
+ if (ep) {
+ *ep = x;
+ }
+
+ return (x == send);
+ }
+}
+
+/*
+=for apidoc utf8_distance
+
+Returns the number of UTF-8 characters between the UTF-8 pointers C<a>
+and C<b>.
+
+WARNING: use only if you *know* that the pointers point inside the
+same UTF-8 buffer.
+
+=cut
+*/
+
+PERL_STATIC_INLINE IV
+Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b)
+{
+ PERL_ARGS_ASSERT_UTF8_DISTANCE;
+
+ return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a);
+}
+
+/*
+=for apidoc utf8_hop
+
+Return the UTF-8 pointer C<s> displaced by C<off> characters, either
+forward or backward.
+
+WARNING: do not use the following unless you *know* C<off> is within
+the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned
+on the first byte of character or just after the last byte of a character.
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop(const U8 *s, SSize_t off)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ if (off >= 0) {
+ while (off--)
+ s += UTF8SKIP(s);
+ }
+ else {
+ while (off++) {
+ s--;
+ while (UTF8_IS_CONTINUATION(*s))
+ s--;
+ }
+ }
+ GCC_DIAG_IGNORE(-Wcast-qual)
+ return (U8 *)s;
+ GCC_DIAG_RESTORE
+}
+
+/*
+=for apidoc utf8_hop_forward
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+forward.
+
+C<off> must be non-negative.
+
+C<s> must be before or equal to C<end>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(s <= end);
+ assert(off >= 0);
+
+ while (off--) {
+ STRLEN skip = UTF8SKIP(s);
+ if ((STRLEN)(end - s) <= skip) {
+ GCC_DIAG_IGNORE(-Wcast-qual)
+ return (U8 *)end;
+ GCC_DIAG_RESTORE
+ }
+ s += skip;
+ }
+
+ GCC_DIAG_IGNORE(-Wcast-qual)
+ return (U8 *)s;
+ GCC_DIAG_RESTORE
+}
+
+/*
+=for apidoc utf8_hop_back
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+backward.
+
+C<off> must be non-positive.
+
+C<s> must be after or equal to C<start>.
+
+When moving backward it will not move before C<start>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_BACK;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(start <= s);
+ assert(off <= 0);
+
+ while (off++ && s > start) {
+ do {
+ s--;
+ } while (UTF8_IS_CONTINUATION(*s) && s > start);
+ }
+
+ GCC_DIAG_IGNORE(-Wcast-qual)
+ return (U8 *)s;
+ GCC_DIAG_RESTORE
+}
+
+/*
+=for apidoc utf8_hop_safe
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+either forward or backward.
+
+When moving backward it will not move before C<start>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed those limits even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(start <= s && s <= end);
+
+ if (off >= 0) {
+ return utf8_hop_forward(s, off, end);
+ }
+ else {
+ return utf8_hop_back(s, off, start);
+ }
+}
+
+/*
+
+=for apidoc is_utf8_valid_partial_char
+
+Returns 0 if the sequence of bytes starting at C<s> and looking no further than
+S<C<e - 1>> is the UTF-8 encoding, as extended by Perl, for one or more code
+points. Otherwise, it returns 1 if there exists at least one non-empty
+sequence of bytes that when appended to sequence C<s>, starting at position
+C<e> causes the entire sequence to be the well-formed UTF-8 of some code point;
+otherwise returns 0.
+
+In other words this returns TRUE if C<s> points to a partial UTF-8-encoded code
+point.
+
+This is useful when a fixed-length buffer is being tested for being well-formed
+UTF-8, but the final few bytes in it don't comprise a full character; that is,
+it is split somewhere in the middle of the final code point's UTF-8
+representation. (Presumably when the buffer is refreshed with the next chunk
+of data, the new first bytes will complete the partial code point.) This
+function is used to verify that the final bytes in the current buffer are in
+fact the legal beginning of some code point, so that if they aren't, the
+failure can be signalled without having to wait for the next read.
+
+=cut
+*/
+#define is_utf8_valid_partial_char(s, e) \
+ is_utf8_valid_partial_char_flags(s, e, 0)
+
+/*
+
+=for apidoc is_utf8_valid_partial_char_flags
+
+Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
+or not the input is a valid UTF-8 encoded partial character, but it takes an
+extra parameter, C<flags>, which can further restrict which code points are
+considered valid.
+
+If C<flags> is 0, this behaves identically to
+C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
+of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
+there is any sequence of bytes that can complete the input partial character in
+such a way that a non-prohibited character is formed, the function returns
+TRUE; otherwise FALSE. Non character code points cannot be determined based on
+partial character input. But many of the other possible excluded types can be
+determined from just the first one or two bytes.
+
+=cut
+ */
+
+PERL_STATIC_INLINE bool
+S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
+{
+ PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
+
+ assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+ |UTF8_DISALLOW_PERL_EXTENDED)));
+
+ if (s >= e || s + UTF8SKIP(s) <= e) {
+ return FALSE;
+ }
+
+ return cBOOL(_is_utf8_char_helper(s, e, flags));
+}
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_flags
+
+Returns TRUE if the fixed-width buffer starting at C<s> with length C<len>
+is entirely valid UTF-8, subject to the restrictions given by C<flags>;
+otherwise it returns FALSE.
+
+If C<flags> is 0, any well-formed UTF-8, as extended by Perl, is accepted
+without restriction. If the final few bytes of the buffer do not form a
+complete code point, this will return TRUE anyway, provided that
+C<L</is_utf8_valid_partial_char_flags>> returns TRUE for them.
+
+If C<flags> in non-zero, it can be any combination of the
+C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>, and with the
+same meanings.
+
+This function differs from C<L</is_utf8_string_flags>> only in that the latter
+returns FALSE if the final few bytes of the string don't form a complete code
+point.
+
+=cut
+ */
+#define is_utf8_fixed_width_buf_flags(s, len, flags) \
+ is_utf8_fixed_width_buf_loclen_flags(s, len, 0, 0, flags)
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_loc_flags
+
+Like C<L</is_utf8_fixed_width_buf_flags>> but stores the location of the
+failure in the C<ep> pointer. If the function returns TRUE, C<*ep> will point
+to the beginning of any partial character at the end of the buffer; if there is
+no partial character C<*ep> will contain C<s>+C<len>.
+
+See also C<L</is_utf8_fixed_width_buf_loclen_flags>>.
+
+=cut
+*/
+
+#define is_utf8_fixed_width_buf_loc_flags(s, len, loc, flags) \
+ is_utf8_fixed_width_buf_loclen_flags(s, len, loc, 0, flags)
+
+/*
+
+=for apidoc is_utf8_fixed_width_buf_loclen_flags
+
+Like C<L</is_utf8_fixed_width_buf_loc_flags>> but stores the number of
+complete, valid characters found in the C<el> pointer.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_utf8_fixed_width_buf_loclen_flags(const U8 * const s,
+ STRLEN len,
+ const U8 **ep,
+ STRLEN *el,
+ const U32 flags)
+{
+ const U8 * maybe_partial;
+
+ PERL_ARGS_ASSERT_IS_UTF8_FIXED_WIDTH_BUF_LOCLEN_FLAGS;
+
+ if (! ep) {
+ ep = &maybe_partial;
+ }
+
+ /* If it's entirely valid, return that; otherwise see if the only error is
+ * that the final few bytes are for a partial character */
+ return is_utf8_string_loclen_flags(s, len, ep, el, flags)
+ || is_utf8_valid_partial_char_flags(*ep, s + len, flags);
+}
+
+PERL_STATIC_INLINE UV
+S_utf8n_to_uvchr_msgs(const U8 *s,
+ STRLEN curlen,
+ STRLEN *retlen,
+ const U32 flags,
+ U32 * errors,
+ AV ** msgs)
+{
+ /* This is the inlined portion of utf8n_to_uvchr_msgs. It handles the
+ * simple cases, and, if necessary calls a helper function to deal with the
+ * more complex ones. Almost all well-formed non-problematic code points
+ * are considered simple, so that it's unlikely that the helper function
+ * will need to be called.
+ *
+ * This is an adaptation of the tables and algorithm given in
+ * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides
+ * comprehensive documentation of the original version. A copyright notice
+ * for the original version is given at the beginning of this file. The
+ * Perl adapation is documented at the definition of PL_strict_utf8_dfa_tab[].
+ */
+
+ const U8 * const s0 = s;
+ const U8 * send = s0 + curlen;
+ UV uv = 0; /* The 0 silences some stupid compilers */
+ UV state = 0;
+
+ PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
+
+ /* This dfa is fast. If it accepts the input, it was for a well-formed,
+ * non-problematic code point, which can be returned immediately.
+ * Otherwise we call a helper function to figure out the more complicated
+ * cases. */
+
+ while (s < send && LIKELY(state != 1)) {
+ UV type = PL_strict_utf8_dfa_tab[*s];
+
+ uv = (state == 0)
+ ? ((0xff >> type) & NATIVE_UTF8_TO_I8(*s))
+ : UTF8_ACCUMULATE(uv, *s);
+ state = PL_strict_utf8_dfa_tab[256 + state + type];
+
+ if (state != 0) {
+ s++;
+ continue;
+ }
+
+ if (retlen) {
+ *retlen = s - s0 + 1;
+ }
+ if (errors) {
+ *errors = 0;
+ }
+ if (msgs) {
+ *msgs = NULL;
+ }
+
+ return uv;
+ }
+
+ /* Here is potentially problematic. Use the full mechanism */
+ return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs);
+}
+
+PERL_STATIC_INLINE UV
+S__utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT__UTF8_TO_UVCHR_BUF;
+
+ assert(s < send);
+
+ if (! ckWARN_d(WARN_UTF8)) {
+ return utf8n_to_uvchr(s, send - s, retlen,
+ (UTF8_ALLOW_ANY & ~UTF8_ALLOW_EMPTY));
+ }
+ else {
+ UV ret = utf8n_to_uvchr(s, send - s, retlen, 0);
+ if (retlen && ret == 0 && *s != '\0') {
+ *retlen = (STRLEN) -1;
+ }
+
+ return ret;
+ }
+}
+
+/* ------------------------------- perl.h ----------------------------- */
+
+/*
+=head1 Miscellaneous Functions
+
+=for apidoc is_safe_syscall
+
+Test that the given C<pv> doesn't contain any internal C<NUL> characters.
+If it does, set C<errno> to C<ENOENT>, optionally warn, and return FALSE.
+
+Return TRUE if the name is safe.
+
+Used by the C<IS_SAFE_SYSCALL()> macro.
+
+=cut
+*/
+
+PERL_STATIC_INLINE bool
+S_is_safe_syscall(pTHX_ const char *pv, STRLEN len, const char *what, const char *op_name) {
+ /* While the Windows CE API provides only UCS-16 (or UTF-16) APIs
+ * perl itself uses xce*() functions which accept 8-bit strings.
+ */
+
+ PERL_ARGS_ASSERT_IS_SAFE_SYSCALL;
+
+ if (len > 1) {
+ char *null_at;
+ if (UNLIKELY((null_at = (char *)memchr(pv, 0, len-1)) != NULL)) {
+ SETERRNO(ENOENT, LIB_INVARG);
+ Perl_ck_warner(aTHX_ packWARN(WARN_SYSCALLS),
+ "Invalid \\0 character in %s for %s: %s\\0%s",
+ what, op_name, pv, null_at+1);
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+/*
+
+Return true if the supplied filename has a newline character
+immediately before the first (hopefully only) NUL.
+
+My original look at this incorrectly used the len from SvPV(), but
+that's incorrect, since we allow for a NUL in pv[len-1].
+
+So instead, strlen() and work from there.
+
+This allow for the user reading a filename, forgetting to chomp it,
+then calling:
+
+ open my $foo, "$file\0";
+
+*/
+
+#ifdef PERL_CORE
+
+PERL_STATIC_INLINE bool
+S_should_warn_nl(const char *pv) {
+ STRLEN len;
+
+ PERL_ARGS_ASSERT_SHOULD_WARN_NL;
+
+ len = strlen(pv);
+
+ return len > 0 && pv[len-1] == '\n';
+}
+
+#endif
+
+#if defined(PERL_IN_PP_C) || defined(PERL_IN_PP_HOT_C)
+
+PERL_STATIC_INLINE bool
+S_lossless_NV_to_IV(const NV nv, IV *ivp)
+{
+ /* This function determines if the input NV 'nv' may be converted without
+ * loss of data to an IV. If not, it returns FALSE taking no other action.
+ * But if it is possible, it does the conversion, returning TRUE, and
+ * storing the converted result in '*ivp' */
+
+ PERL_ARGS_ASSERT_LOSSLESS_NV_TO_IV;
+
+# if defined(Perl_isnan)
+
+ if (UNLIKELY(Perl_isnan(nv))) {
+ return FALSE;
+ }
+
+# endif
+
+ if (UNLIKELY(nv < IV_MIN) || UNLIKELY(nv > IV_MAX)) {
+ return FALSE;
+ }
+
+ if ((IV) nv != nv) {
+ return FALSE;
+ }
+
+ *ivp = (IV) nv;
+ return TRUE;
+}
+
+#endif
+
+/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */
+
+#define MAX_CHARSET_NAME_LENGTH 2
+
+PERL_STATIC_INLINE const char *
+get_regex_charset_name(const U32 flags, STRLEN* const lenp)
+{
+ /* Returns a string that corresponds to the name of the regex character set
+ * given by 'flags', and *lenp is set the length of that string, which
+ * cannot exceed MAX_CHARSET_NAME_LENGTH characters */
+
+ *lenp = 1;
+ switch (get_regex_charset(flags)) {
+ case REGEX_DEPENDS_CHARSET: return DEPENDS_PAT_MODS;
+ case REGEX_LOCALE_CHARSET: return LOCALE_PAT_MODS;
+ case REGEX_UNICODE_CHARSET: return UNICODE_PAT_MODS;
+ case REGEX_ASCII_RESTRICTED_CHARSET: return ASCII_RESTRICT_PAT_MODS;
+ case REGEX_ASCII_MORE_RESTRICTED_CHARSET:
+ *lenp = 2;
+ return ASCII_MORE_RESTRICT_PAT_MODS;
+ }
+ /* The NOT_REACHED; hides an assert() which has a rather complex
+ * definition in perl.h. */
+ NOT_REACHED; /* NOTREACHED */
+ return "?"; /* Unknown */
+}
+
+/*
+
+Return false if any get magic is on the SV other than taint magic.
+
+*/
+
+PERL_STATIC_INLINE bool
+S_sv_only_taint_gmagic(SV *sv) {
+ MAGIC *mg = SvMAGIC(sv);
+
+ PERL_ARGS_ASSERT_SV_ONLY_TAINT_GMAGIC;
+
+ while (mg) {
+ if (mg->mg_type != PERL_MAGIC_taint
+ && !(mg->mg_flags & MGf_GSKIP)
+ && mg->mg_virtual->svt_get) {
+ return FALSE;
+ }
+ mg = mg->mg_moremagic;
+ }
+
+ return TRUE;
+}
+
+/* ------------------ cop.h ------------------------------------------- */
+
+
+/* Enter a block. Push a new base context and return its address. */
+
+PERL_STATIC_INLINE PERL_CONTEXT *
+S_cx_pushblock(pTHX_ U8 type, U8 gimme, SV** sp, I32 saveix)
+{
+ PERL_CONTEXT * cx;
+
+ PERL_ARGS_ASSERT_CX_PUSHBLOCK;
+
+ CXINC;
+ cx = CX_CUR();
+ cx->cx_type = type;
+ cx->blk_gimme = gimme;
+ cx->blk_oldsaveix = saveix;
+ cx->blk_oldsp = (I32)(sp - PL_stack_base);
+ cx->blk_oldcop = PL_curcop;
+ cx->blk_oldmarksp = (I32)(PL_markstack_ptr - PL_markstack);
+ cx->blk_oldscopesp = PL_scopestack_ix;
+ cx->blk_oldpm = PL_curpm;
+ cx->blk_old_tmpsfloor = PL_tmps_floor;
+
+ PL_tmps_floor = PL_tmps_ix;
+ CX_DEBUG(cx, "PUSH");
+ return cx;
+}
+
+
+/* Exit a block (RETURN and LAST). */
+
+PERL_STATIC_INLINE void
+S_cx_popblock(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPBLOCK;
+
+ CX_DEBUG(cx, "POP");
+ /* these 3 are common to cx_popblock and cx_topblock */
+ PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
+ PL_scopestack_ix = cx->blk_oldscopesp;
+ PL_curpm = cx->blk_oldpm;
+
+ /* LEAVE_SCOPE() should have made this true. /(?{})/ cheats
+ * and leaves a CX entry lying around for repeated use, so
+ * skip for multicall */ \
+ assert( (CxTYPE(cx) == CXt_SUB && CxMULTICALL(cx))
+ || PL_savestack_ix == cx->blk_oldsaveix);
+ PL_curcop = cx->blk_oldcop;
+ PL_tmps_floor = cx->blk_old_tmpsfloor;
+}
+
+/* Continue a block elsewhere (e.g. NEXT, REDO, GOTO).
+ * Whereas cx_popblock() restores the state to the point just before
+ * cx_pushblock() was called, cx_topblock() restores it to the point just
+ * *after* cx_pushblock() was called. */
+
+PERL_STATIC_INLINE void
+S_cx_topblock(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_TOPBLOCK;
+
+ CX_DEBUG(cx, "TOP");
+ /* these 3 are common to cx_popblock and cx_topblock */
+ PL_markstack_ptr = PL_markstack + cx->blk_oldmarksp;
+ PL_scopestack_ix = cx->blk_oldscopesp;
+ PL_curpm = cx->blk_oldpm;
+
+ PL_stack_sp = PL_stack_base + cx->blk_oldsp;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushsub(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, bool hasargs)
+{
+ U8 phlags = CX_PUSHSUB_GET_LVALUE_MASK(Perl_was_lvalue_sub);
+
+ PERL_ARGS_ASSERT_CX_PUSHSUB;
+
+ PERL_DTRACE_PROBE_ENTRY(cv);
+ cx->blk_sub.cv = cv;
+ cx->blk_sub.olddepth = CvDEPTH(cv);
+ cx->blk_sub.prevcomppad = PL_comppad;
+ cx->cx_type |= (hasargs) ? CXp_HASARGS : 0;
+ cx->blk_sub.retop = retop;
+ SvREFCNT_inc_simple_void_NN(cv);
+ cx->blk_u16 = PL_op->op_private & (phlags|OPpDEREF);
+}
+
+
+/* subsets of cx_popsub() */
+
+PERL_STATIC_INLINE void
+S_cx_popsub_common(pTHX_ PERL_CONTEXT *cx)
+{
+ CV *cv;
+
+ PERL_ARGS_ASSERT_CX_POPSUB_COMMON;
+ assert(CxTYPE(cx) == CXt_SUB);
+
+ PL_comppad = cx->blk_sub.prevcomppad;
+ PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
+ cv = cx->blk_sub.cv;
+ CvDEPTH(cv) = cx->blk_sub.olddepth;
+ cx->blk_sub.cv = NULL;
+ SvREFCNT_dec(cv);
+}
+
+
+/* handle the @_ part of leaving a sub */
+
+PERL_STATIC_INLINE void
+S_cx_popsub_args(pTHX_ PERL_CONTEXT *cx)
+{
+ AV *av;
+
+ PERL_ARGS_ASSERT_CX_POPSUB_ARGS;
+ assert(CxTYPE(cx) == CXt_SUB);
+ assert(AvARRAY(MUTABLE_AV(
+ PadlistARRAY(CvPADLIST(cx->blk_sub.cv))[
+ CvDEPTH(cx->blk_sub.cv)])) == PL_curpad);
+
+ CX_POP_SAVEARRAY(cx);
+ av = MUTABLE_AV(PAD_SVl(0));
+ if (UNLIKELY(AvREAL(av)))
+ /* abandon @_ if it got reified */
+ clear_defarray(av, 0);
+ else {
+ CLEAR_ARGARRAY(av);
+ }
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popsub(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPSUB;
+ assert(CxTYPE(cx) == CXt_SUB);
+
+ PERL_DTRACE_PROBE_RETURN(cx->blk_sub.cv);
+
+ if (CxHASARGS(cx))
+ cx_popsub_args(cx);
+ cx_popsub_common(cx);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushformat(pTHX_ PERL_CONTEXT *cx, CV *cv, OP *retop, GV *gv)
+{
+ PERL_ARGS_ASSERT_CX_PUSHFORMAT;
+
+ cx->blk_format.cv = cv;
+ cx->blk_format.retop = retop;
+ cx->blk_format.gv = gv;
+ cx->blk_format.dfoutgv = PL_defoutgv;
+ cx->blk_format.prevcomppad = PL_comppad;
+ cx->blk_u16 = 0;
+
+ SvREFCNT_inc_simple_void_NN(cv);
+ CvDEPTH(cv)++;
+ SvREFCNT_inc_void(cx->blk_format.dfoutgv);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popformat(pTHX_ PERL_CONTEXT *cx)
+{
+ CV *cv;
+ GV *dfout;
+
+ PERL_ARGS_ASSERT_CX_POPFORMAT;
+ assert(CxTYPE(cx) == CXt_FORMAT);
+
+ dfout = cx->blk_format.dfoutgv;
+ setdefout(dfout);
+ cx->blk_format.dfoutgv = NULL;
+ SvREFCNT_dec_NN(dfout);
+
+ PL_comppad = cx->blk_format.prevcomppad;
+ PL_curpad = LIKELY(PL_comppad) ? AvARRAY(PL_comppad) : NULL;
+ cv = cx->blk_format.cv;
+ cx->blk_format.cv = NULL;
+ --CvDEPTH(cv);
+ SvREFCNT_dec_NN(cv);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pusheval(pTHX_ PERL_CONTEXT *cx, OP *retop, SV *namesv)
+{
+ PERL_ARGS_ASSERT_CX_PUSHEVAL;
+
+ cx->blk_eval.retop = retop;
+ cx->blk_eval.old_namesv = namesv;
+ cx->blk_eval.old_eval_root = PL_eval_root;
+ cx->blk_eval.cur_text = PL_parser ? PL_parser->linestr : NULL;
+ cx->blk_eval.cv = NULL; /* later set by doeval_compile() */
+ cx->blk_eval.cur_top_env = PL_top_env;
+
+ assert(!(PL_in_eval & ~ 0x3F));
+ assert(!(PL_op->op_type & ~0x1FF));
+ cx->blk_u16 = (PL_in_eval & 0x3F) | ((U16)PL_op->op_type << 7);
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popeval(pTHX_ PERL_CONTEXT *cx)
+{
+ SV *sv;
+
+ PERL_ARGS_ASSERT_CX_POPEVAL;
+ assert(CxTYPE(cx) == CXt_EVAL);
+
+ PL_in_eval = CxOLD_IN_EVAL(cx);
+ assert(!(PL_in_eval & 0xc0));
+ PL_eval_root = cx->blk_eval.old_eval_root;
+ sv = cx->blk_eval.cur_text;
+ if (sv && CxEVAL_TXT_REFCNTED(cx)) {
+ cx->blk_eval.cur_text = NULL;
+ SvREFCNT_dec_NN(sv);
+ }
+
+ sv = cx->blk_eval.old_namesv;
+ if (sv) {
+ cx->blk_eval.old_namesv = NULL;
+ SvREFCNT_dec_NN(sv);
+ }
+}
+
+
+/* push a plain loop, i.e.
+ * { block }
+ * while (cond) { block }
+ * for (init;cond;continue) { block }
+ * This loop can be last/redo'ed etc.
+ */
+
+PERL_STATIC_INLINE void
+S_cx_pushloop_plain(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_PUSHLOOP_PLAIN;
+ cx->blk_loop.my_op = cLOOP;
+}
+
+
+/* push a true for loop, i.e.
+ * for var (list) { block }
+ */
+
+PERL_STATIC_INLINE void
+S_cx_pushloop_for(pTHX_ PERL_CONTEXT *cx, void *itervarp, SV* itersave)
+{
+ PERL_ARGS_ASSERT_CX_PUSHLOOP_FOR;
+
+ /* this one line is common with cx_pushloop_plain */
+ cx->blk_loop.my_op = cLOOP;
+
+ cx->blk_loop.itervar_u.svp = (SV**)itervarp;
+ cx->blk_loop.itersave = itersave;
+#ifdef USE_ITHREADS
+ cx->blk_loop.oldcomppad = PL_comppad;
+#endif
+}
+
+
+/* pop all loop types, including plain */
+
+PERL_STATIC_INLINE void
+S_cx_poploop(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPLOOP;
+
+ assert(CxTYPE_is_LOOP(cx));
+ if ( CxTYPE(cx) == CXt_LOOP_ARY
+ || CxTYPE(cx) == CXt_LOOP_LAZYSV)
+ {
+ /* Free ary or cur. This assumes that state_u.ary.ary
+ * aligns with state_u.lazysv.cur. See cx_dup() */
+ SV *sv = cx->blk_loop.state_u.lazysv.cur;
+ cx->blk_loop.state_u.lazysv.cur = NULL;
+ SvREFCNT_dec_NN(sv);
+ if (CxTYPE(cx) == CXt_LOOP_LAZYSV) {
+ sv = cx->blk_loop.state_u.lazysv.end;
+ cx->blk_loop.state_u.lazysv.end = NULL;
+ SvREFCNT_dec_NN(sv);
+ }
+ }
+ if (cx->cx_type & (CXp_FOR_PAD|CXp_FOR_GV)) {
+ SV *cursv;
+ SV **svp = (cx)->blk_loop.itervar_u.svp;
+ if ((cx->cx_type & CXp_FOR_GV))
+ svp = &GvSV((GV*)svp);
+ cursv = *svp;
+ *svp = cx->blk_loop.itersave;
+ cx->blk_loop.itersave = NULL;
+ SvREFCNT_dec(cursv);
+ }
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushwhen(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_PUSHWHEN;
+
+ cx->blk_givwhen.leave_op = cLOGOP->op_other;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popwhen(pTHX_ PERL_CONTEXT *cx)
+{
+ PERL_ARGS_ASSERT_CX_POPWHEN;
+ assert(CxTYPE(cx) == CXt_WHEN);
+
+ PERL_UNUSED_ARG(cx);
+ PERL_UNUSED_CONTEXT;
+ /* currently NOOP */
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_pushgiven(pTHX_ PERL_CONTEXT *cx, SV *orig_defsv)
+{
+ PERL_ARGS_ASSERT_CX_PUSHGIVEN;
+
+ cx->blk_givwhen.leave_op = cLOGOP->op_other;
+ cx->blk_givwhen.defsv_save = orig_defsv;
+}
+
+
+PERL_STATIC_INLINE void
+S_cx_popgiven(pTHX_ PERL_CONTEXT *cx)
+{
+ SV *sv;
+
+ PERL_ARGS_ASSERT_CX_POPGIVEN;
+ assert(CxTYPE(cx) == CXt_GIVEN);
+
+ sv = GvSV(PL_defgv);
+ GvSV(PL_defgv) = cx->blk_givwhen.defsv_save;
+ cx->blk_givwhen.defsv_save = NULL;
+ SvREFCNT_dec(sv);
+}
+
+/* ------------------ util.h ------------------------------------------- */
+
+/*
+=head1 Miscellaneous Functions
+
+=for apidoc foldEQ
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same
+case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
+match themselves and their opposite case counterparts. Non-cased and non-ASCII
+range bytes match only themselves.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ(const char *s1, const char *s2, I32 len)
+{
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold[*b])
+ return 0;
+ a++,b++;
+ }
+ return 1;
+}
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
+{
+ /* Compare non-UTF-8 using Unicode (Latin1) semantics. Works on all folds
+ * representable without UTF-8, except for LATIN_SMALL_LETTER_SHARP_S, and
+ * does not check for this. Nor does it check that the strings each have
+ * at least 'len' characters. */
+
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold_latin1[*b]) {
+ return 0;
+ }
+ a++, b++;
+ }
+ return 1;
+}
+
+/*
+=for apidoc foldEQ_locale
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same case-insensitively in the current locale; false otherwise.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
+{
+ dVAR;
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold_locale[*b])
+ return 0;
+ a++,b++;
+ }
+ return 1;
+}
+
+#if ! defined (HAS_MEMRCHR) && (defined(PERL_CORE) || defined(PERL_EXT))
+
+PERL_STATIC_INLINE void *
+S_my_memrchr(const char * s, const char c, const STRLEN len)
+{
+ /* memrchr(), since many platforms lack it */
+
+ const char * t = s + len - 1;
+
+ PERL_ARGS_ASSERT_MY_MEMRCHR;
+
+ while (t >= s) {
+ if (*t == c) {
+ return (void *) t;
+ }
+ t--;
+ }
+
+ return NULL;
+}
+
+#endif
+
+/*