*/
UV
-Perl_utf8n_to_uvchr(pTHX_ const U8 *s,
- STRLEN curlen,
- STRLEN *retlen,
- const U32 flags)
+Perl_utf8n_to_uvchr(const U8 *s,
+ STRLEN curlen,
+ STRLEN *retlen,
+ const U32 flags)
{
PERL_ARGS_ASSERT_UTF8N_TO_UVCHR;
*/
UV
-Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
+Perl_utf8n_to_uvchr_error(const U8 *s,
STRLEN curlen,
STRLEN *retlen,
const U32 flags,
*/
UV
-Perl_utf8n_to_uvchr_msgs(pTHX_ const U8 *s,
+Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
STRLEN curlen,
STRLEN *retlen,
const U32 flags,
U8 temp_char_buf[UTF8_MAXBYTES + 1]; /* Used to avoid a Newx in this
routine; see [perl #130921] */
UV uv_so_far;
- UV state = 0;
-
- PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
-
- /* Measurements show that this dfa is somewhat faster than the regular code
- * below, so use it first, dropping down for the non-normal cases. */
-
-#define PERL_UTF8_DECODE_REJECT 1
-
- while (s < send && LIKELY(state != PERL_UTF8_DECODE_REJECT)) {
- UV type = strict_utf8_dfa_tab[*s];
-
- uv = (state == 0)
- ? ((0xff >> type) & NATIVE_UTF8_TO_I8(*s))
- : UTF8_ACCUMULATE(uv, *s);
- state = strict_utf8_dfa_tab[256 + state + type];
-
- if (state == 0) {
- if (retlen) {
- *retlen = s - s0 + 1;
- }
- if (errors) {
- *errors = 0;
- }
- if (msgs) {
- *msgs = NULL;
- }
+ dTHX;
- return uv;
- }
-
- s++;
- }
+ PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER;
/* Here, is one of: a) malformed; b) a problematic code point (surrogate,
* non-unicode, or nonchar); or c) on ASCII platforms, one of the Hangul
return _to_upper_title_latin1((U8) c, p, lenp, 'S');
}
- uvchr_to_utf8(p, c);
- return CALL_UPPER_CASE(c, p, p, lenp);
+ return CALL_UPPER_CASE(c, NULL, p, lenp);
}
UV
return _to_upper_title_latin1((U8) c, p, lenp, 's');
}
- uvchr_to_utf8(p, c);
- return CALL_TITLE_CASE(c, p, p, lenp);
+ return CALL_TITLE_CASE(c, NULL, p, lenp);
}
STATIC U8
return to_lower_latin1((U8) c, p, lenp, 0 /* 0 is a dummy arg */ );
}
- uvchr_to_utf8(p, c);
- return CALL_LOWER_CASE(c, p, p, lenp);
+ return CALL_LOWER_CASE(c, NULL, p, lenp);
}
UV
/* Here, above 255. If no special needs, just use the macro */
if ( ! (flags & (FOLD_FLAGS_LOCALE|FOLD_FLAGS_NOMIX_ASCII))) {
- uvchr_to_utf8(p, c);
- return CALL_FOLD_CASE(c, p, p, lenp, flags & FOLD_FLAGS_FULL);
+ return CALL_FOLD_CASE(c, NULL, p, lenp, flags & FOLD_FLAGS_FULL);
}
else { /* Otherwise, _toFOLD_utf8_flags has the intelligence to deal with
the special flags. */
{
PERL_ARGS_ASSERT__IS_UTF8_MARK;
- return is_utf8_common(p, &PL_utf8_mark, "IsM", NULL);
+ return is_utf8_common(p, NULL, "IsM", PL_utf8_mark);
}
STATIC UV
/* Here, there was no mapping defined, which means that the code point maps
* to itself. Return the inputs */
cases_to_self:
- len = UTF8SKIP(p);
- if (p != ustrp) { /* Don't copy onto itself */
- Copy(p, ustrp, len, U8);
+ if (p) {
+ len = UTF8SKIP(p);
+ if (p != ustrp) { /* Don't copy onto itself */
+ Copy(p, ustrp, len, U8);
+ }
+ *lenp = len;
+ }
+ else {
+ *lenp = uvchr_to_utf8(ustrp, uv1) - ustrp;
}
-
- if (lenp)
- *lenp = len;
return uv1;
PL_utf8_tofold = _new_invlist_C_array(Case_Folding_invlist);
PL_utf8_tosimplefold = _new_invlist_C_array(Simple_Case_Folding_invlist);
PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
+ PL_utf8_mark = _new_invlist_C_array(PL_uni_prop_ptrs[PL_M]);
}
SV *