C<L</is_c9strict_utf8_string_loclen>>.
=cut
+
+*/
+
+#define is_utf8_invariant_string(s, len) \
+ is_utf8_invariant_string_loc(s, len, NULL)
+
+/*
+=for apidoc is_utf8_invariant_string_loc
+
+Like C<L</is_utf8_invariant_string>> but upon failure, stores the location of
+the first UTF-8 variant character in the C<ep> pointer; if all characters are
+UTF-8 invariant, this function does not change the contents of C<*ep>.
+
+=cut
+
+XXX On ASCII machines this could be sped up by doing word-at-a-time operations
+
*/
PERL_STATIC_INLINE bool
-S_is_utf8_invariant_string(const U8* const s, const STRLEN len)
+S_is_utf8_invariant_string_loc(const U8* const s, const STRLEN len, const U8 ** ep)
{
const U8* const send = s + (len ? len : strlen((const char *)s));
const U8* x = s;
- PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING;
+ PERL_ARGS_ASSERT_IS_UTF8_INVARIANT_STRING_LOC;
+
+ while (x < send) {
+ if (UTF8_IS_INVARIANT(*x)) {
+ x++;
+ continue;
+ }
+
+ if (ep) {
+ *ep = x;
+ }
- for (; x < send; ++x) {
- if (!UTF8_IS_INVARIANT(*x))
- return FALSE;
+ return FALSE;
}
return TRUE;
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string_loc>>,
C<L</is_utf8_string_loclen>>,
C<L</is_utf8_fixed_width_buf_flags>>,
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string>>,
C<L</is_utf8_string_flags>>,
C<L</is_utf8_string_loc>>,
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string>>,
C<L</is_utf8_string_flags>>,
C<L</is_utf8_string_loc>>,
See also
C<L</is_utf8_invariant_string>>,
+C<L</is_utf8_invariant_string_loc>>,
C<L</is_utf8_string>>,
C<L</is_utf8_string_loc>>,
C<L</is_utf8_string_loc_flags>>,
PERL_ARGS_ASSERT_IS_UTF8_STRING_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
if (flags == 0) {
return is_utf8_string(s, len);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
{
return is_strict_utf8_string(s, len);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
{
return is_c9strict_utf8_string(s, len);
PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
if (flags == 0) {
return is_utf8_string_loclen(s, len, ep, el);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_INTERCHANGE)
{
return is_strict_utf8_string_loclen(s, len, ep, el);
}
- if ((flags & ~UTF8_DISALLOW_ABOVE_31_BIT)
+ if ((flags & ~UTF8_DISALLOW_PERL_EXTENDED)
== UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE)
{
return is_c9strict_utf8_string_loclen(s, len, ep, el);
PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
- |UTF8_DISALLOW_ABOVE_31_BIT)));
+ |UTF8_DISALLOW_PERL_EXTENDED)));
if (s >= e || s + UTF8SKIP(s) <= e) {
return FALSE;
SvREFCNT_dec(sv);
}
+/* ------------------ util.h ------------------------------------------- */
+
+/*
+=head1 Miscellaneous Functions
+
+=for apidoc foldEQ
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same
+case-insensitively; false otherwise. Uppercase and lowercase ASCII range bytes
+match themselves and their opposite case counterparts. Non-cased and non-ASCII
+range bytes match only themselves.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ(const char *s1, const char *s2, I32 len)
+{
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold[*b])
+ return 0;
+ a++,b++;
+ }
+ return 1;
+}
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_latin1(const char *s1, const char *s2, I32 len)
+{
+ /* Compare non-utf8 using Unicode (Latin1) semantics. Does not work on
+ * MICRO_SIGN, LATIN_SMALL_LETTER_SHARP_S, nor
+ * LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, and does not check for these. Nor
+ * does it check that the strings each have at least 'len' characters */
+
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ_LATIN1;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold_latin1[*b]) {
+ return 0;
+ }
+ a++, b++;
+ }
+ return 1;
+}
+
+/*
+=for apidoc foldEQ_locale
+
+Returns true if the leading C<len> bytes of the strings C<s1> and C<s2> are the
+same case-insensitively in the current locale; false otherwise.
+
+=cut
+*/
+
+PERL_STATIC_INLINE I32
+Perl_foldEQ_locale(const char *s1, const char *s2, I32 len)
+{
+ dVAR;
+ const U8 *a = (const U8 *)s1;
+ const U8 *b = (const U8 *)s2;
+
+ PERL_ARGS_ASSERT_FOLDEQ_LOCALE;
+
+ assert(len >= 0);
+
+ while (len--) {
+ if (*a != *b && *a != PL_fold_locale[*b])
+ return 0;
+ a++,b++;
+ }
+ return 1;
+}
+
/*
* ex: set ts=8 sts=4 sw=4 et:
*/