}
/* Like L</utf8_to_uvuni_buf>(), but should only be called when it is known that
- * there are no malformations in the input UTF-8 string C<s>. surrogates,
+ * there are no malformations in the input UTF-8 string C<s>. Surrogates,
* non-character code points, and non-Unicode code points are allowed */
UV
}
bool
+Perl_is_uni_blank(pTHX_ UV c)
+{
+ U8 tmpbuf[UTF8_MAXBYTES+1];
+ uvchr_to_utf8(tmpbuf, c);
+ return is_utf8_blank(tmpbuf);
+}
+
+bool
Perl_is_uni_space(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
return 'S';
default:
Perl_croak(aTHX_ "panic: to_upper_title_latin1 did not expect '%c' to map to '%c'", c, LATIN_SMALL_LETTER_Y_WITH_DIAERESIS);
- /* NOTREACHED */
+ assert(0); /* NOTREACHED */
}
}
}
bool
+Perl_is_uni_blank_lc(pTHX_ UV c)
+{
+ return is_uni_blank(c); /* XXX no locale support yet */
+}
+
+bool
Perl_is_uni_space_lc(pTHX_ UV c)
{
return is_uni_space(c); /* XXX no locale support yet */
}
bool
+Perl_is_utf8_blank(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_BLANK;
+
+ return is_utf8_common(p, &PL_utf8_blank, "XPosixBlank");
+}
+
+bool
Perl_is_utf8_space(pTHX_ const U8 *p)
{
dVAR;
PERL_ARGS_ASSERT_IS_UTF8_X_PREPEND;
- return is_utf8_common(p, &PL_utf8_X_prepend, "GCB=Prepend");
+ return is_utf8_common(p, &PL_utf8_X_prepend, "_X_GCB_Prepend");
}
bool
PERL_ARGS_ASSERT_IS_UTF8_X_NON_HANGUL;
- return is_utf8_common(p, &PL_utf8_X_non_hangul, "HST=Not_Applicable");
+ return is_utf8_common(p, &PL_utf8_X_non_hangul, "_X_HST_Not_Applicable");
}
bool
PERL_ARGS_ASSERT_IS_UTF8_X_L;
- return is_utf8_common(p, &PL_utf8_X_L, "GCB=L");
+ return is_utf8_common(p, &PL_utf8_X_L, "_X_GCB_L");
}
bool
PERL_ARGS_ASSERT_IS_UTF8_X_LV;
- return is_utf8_common(p, &PL_utf8_X_LV, "GCB=LV");
+ return is_utf8_common(p, &PL_utf8_X_LV, "_X_GCB_LV");
}
bool
PERL_ARGS_ASSERT_IS_UTF8_X_LVT;
- return is_utf8_common(p, &PL_utf8_X_LVT, "GCB=LVT");
+ return is_utf8_common(p, &PL_utf8_X_LVT, "_X_GCB_LVT");
}
bool
PERL_ARGS_ASSERT_IS_UTF8_X_T;
- return is_utf8_common(p, &PL_utf8_X_T, "GCB=T");
+ return is_utf8_common(p, &PL_utf8_X_T, "_X_GCB_T");
}
bool
PERL_ARGS_ASSERT_IS_UTF8_X_V;
- return is_utf8_common(p, &PL_utf8_X_V, "GCB=V");
+ return is_utf8_common(p, &PL_utf8_X_V, "_X_GCB_V");
}
bool
}
if (!len && *swashp) {
- const UV uv2 = swash_fetch(*swashp, tmpbuf, TRUE);
+ const UV uv2 = swash_fetch(*swashp, tmpbuf, TRUE /* => is utf8 */);
if (uv2) {
/* It was "normal" (a single character mapping). */
}
}
- if (!len) /* Neither: just copy. In other words, there was no mapping
- defined, which means that the code point maps to itself */
- len = uvchr_to_utf8(ustrp, uv0) - ustrp;
+ if (len) {
+ if (lenp) {
+ *lenp = len;
+ }
+ return valid_utf8_to_uvchr(ustrp, 0);
+ }
+
+ /* Here, there was no mapping defined, which means that the code point maps
+ * to itself. Return the inputs */
+ len = UTF8SKIP(p);
+ Copy(p, ustrp, len, U8);
if (lenp)
*lenp = len;
- return len ? valid_utf8_to_uvchr(ustrp, 0) : 0;
+ return uv0;
+
}
STATIC UV
Copy(ptr, PL_last_swash_key, klen, U8);
}
- if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_NON_UNICODE)) {
- SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE);
-
- /* This outputs warnings for binary properties only, assuming that
- * to_utf8_case() will output any for non-binary. Also, surrogates
- * aren't checked for, as that would warn on things like /\p{Gc=Cs}/ */
-
- if (! bitssvp || SvUV(*bitssvp) == 1) {
- /* User-defined properties can silently match above-Unicode */
- SV** const user_defined_svp = hv_fetchs(hv, "USER_DEFINED", FALSE);
- if (! user_defined_svp || ! SvUV(*user_defined_svp)) {
- const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, 0);
- Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
- "Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", code_point);
- }
- }
- }
-
switch ((int)((slen << 3) / needents)) {
case 1:
bit = 1 << (off & 7);
(U8*) SvPVX(*entryp),
(U8*) SvPVX(*entryp) + SvCUR(*entryp),
0)));
- /*DEBUG_U(PerlIO_printf(Perl_debug_log, "Adding %"UVXf" to list for %"UVXf"\n", valid_utf8_to_uvchr((U8*) SvPVX(*entryp), 0), u));*/
+ /*DEBUG_U(PerlIO_printf(Perl_debug_log, "%s: %d: Adding %"UVXf" to list for %"UVXf"\n", __FILE__, __LINE__, valid_utf8_to_uvchr((U8*) SvPVX(*entryp), 0), u));*/
}
}
}
/* Make sure there is a mapping to itself on the list */
if (! found_key) {
av_push(list, newSVuv(val));
- /*DEBUG_U(PerlIO_printf(Perl_debug_log, "Adding %"UVXf" to list for %"UVXf"\n", val, val));*/
+ /*DEBUG_U(PerlIO_printf(Perl_debug_log, "%s: %d: Adding %"UVXf" to list for %"UVXf"\n", __FILE__, __LINE__, val, val));*/
}
/* Simply add the value to the list */
if (! found_inverse) {
av_push(list, newSVuv(inverse));
- /*DEBUG_U(PerlIO_printf(Perl_debug_log, "Adding %"UVXf" to list for %"UVXf"\n", inverse, val));*/
+ /*DEBUG_U(PerlIO_printf(Perl_debug_log, "%s: %d: Adding %"UVXf" to list for %"UVXf"\n", __FILE__, __LINE__, inverse, val));*/
}
/* swatch_get() increments the value of val for each element in the
return invlist;
}
+bool
+Perl__is_swash_user_defined(pTHX_ SV* const swash)
+{
+ SV** ptr = hv_fetchs(MUTABLE_HV(SvRV(swash)), "USER_DEFINED", FALSE);
+
+ PERL_ARGS_ASSERT__IS_SWASH_USER_DEFINED;
+
+ if (! ptr) {
+ return FALSE;
+ }
+ return cBOOL(SvUV(*ptr));
+}
+
+SV*
+Perl__get_swash_invlist(pTHX_ SV* const swash)
+{
+ SV** ptr = hv_fetchs(MUTABLE_HV(SvRV(swash)), "INVLIST", FALSE);
+
+ PERL_ARGS_ASSERT__GET_SWASH_INVLIST;
+
+ if (! ptr) {
+ return NULL;
+ }
+
+ return *ptr;
+}
+
/*
=for apidoc uvchr_to_utf8