ApdD |UV |utf8_to_uvuni_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
pM |bool |check_utf8_print |NN const U8 *s|const STRLEN len
-Adop |UV |utf8n_to_uvchr |NN const U8 *s \
+Adnop |UV |utf8n_to_uvchr |NN const U8 *s \
|STRLEN curlen \
|NULLOK STRLEN *retlen \
|const U32 flags
-Adop |UV |utf8n_to_uvchr_error|NN const U8 *s \
+Adnop |UV |utf8n_to_uvchr_error|NN const U8 *s \
|STRLEN curlen \
|NULLOK STRLEN *retlen \
|const U32 flags \
|NULLOK U32 * errors
-AMdp |UV |utf8n_to_uvchr_msgs|NN const U8 *s \
+AMndi |UV |utf8n_to_uvchr_msgs|NN const U8 *s \
+ |STRLEN curlen \
+ |NULLOK STRLEN *retlen \
+ |const U32 flags \
+ |NULLOK U32 * errors \
+ |NULLOK AV ** msgs
+AMnpd |UV |_utf8n_to_uvchr_msgs_helper \
+ |NN const U8 *s \
|STRLEN curlen \
|NULLOK STRLEN *retlen \
|const U32 flags \
#define _to_utf8_lower_flags(a,b,c,d,e,f,g) Perl__to_utf8_lower_flags(aTHX_ a,b,c,d,e,f,g)
#define _to_utf8_title_flags(a,b,c,d,e,f,g) Perl__to_utf8_title_flags(aTHX_ a,b,c,d,e,f,g)
#define _to_utf8_upper_flags(a,b,c,d,e,f,g) Perl__to_utf8_upper_flags(aTHX_ a,b,c,d,e,f,g)
+#define _utf8n_to_uvchr_msgs_helper Perl__utf8n_to_uvchr_msgs_helper
#define amagic_call(a,b,c,d) Perl_amagic_call(aTHX_ a,b,c,d)
#define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b)
#define apply_attrs_string(a,b,c,d) Perl_apply_attrs_string(aTHX_ a,b,c,d)
#define utf8_to_uvuni(a,b) Perl_utf8_to_uvuni(aTHX_ a,b)
#endif
#define utf8_to_uvuni_buf(a,b,c) Perl_utf8_to_uvuni_buf(aTHX_ a,b,c)
-#define utf8n_to_uvchr_msgs(a,b,c,d,e,f) Perl_utf8n_to_uvchr_msgs(aTHX_ a,b,c,d,e,f)
+#define utf8n_to_uvchr_msgs S_utf8n_to_uvchr_msgs
#define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
#define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
#define uvuni_to_utf8(a,b) Perl_uvuni_to_utf8(aTHX_ a,b)
|| is_utf8_valid_partial_char_flags(*ep, s + len, flags);
}
+PERL_STATIC_INLINE UV
+S_utf8n_to_uvchr_msgs(const U8 *s,
+ STRLEN curlen,
+ STRLEN *retlen,
+ const U32 flags,
+ U32 * errors,
+ AV ** msgs)
+{
+ /* This is the inlined portion of utf8n_to_uvchr_msgs. It handles the
+ * simple cases, and, if necessary calls a helper function to deal with the
+ * more complex ones. Almost all well-formed non-problematic code points
+ * are considered simple, so that it's unlikely that the helper function
+ * will need to be called.
+ *
+ * This is an adaptation of the tables and algorithm given in
+ * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/, which provides
+ * comprehensive documentation of the original version. A copyright notice
+ * for the original version is given at the beginning of this file. The
+ * Perl adapation is documented at the definition of strict_utf8_dfa_tab[].
+ */
+
+ const U8 * const s0 = s;
+ const U8 * send = s0 + curlen;
+ UV uv;
+ UV state = 0;
+
+ PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
+
+ /* This dfa is fast. If it accepts the input, it was for a well-formed,
+ * non-problematic code point, which can be returned immediately.
+ * Otherwise we call a helper function to figure out the more complicated
+ * cases. */
+
+ while (s < send && LIKELY(state != 1)) {
+ UV type = strict_utf8_dfa_tab[*s];
+
+ uv = (state == 0)
+ ? ((0xff >> type) & NATIVE_UTF8_TO_I8(*s))
+ : UTF8_ACCUMULATE(uv, *s);
+ state = strict_utf8_dfa_tab[256 + state + type];
+
+ if (state != 0) {
+ s++;
+ continue;
+ }
+
+ if (retlen) {
+ *retlen = s - s0 + 1;
+ }
+ if (errors) {
+ *errors = 0;
+ }
+ if (msgs) {
+ *msgs = NULL;
+ }
+
+ return uv;
+ }
+
+ /* Here is potentially problematic. Use the full mechanism */
+ return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs);
+}
+
/* ------------------------------- perl.h ----------------------------- */
/*
PERL_CALLCONV UV Perl__to_utf8_upper_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags, const char * const file, const int line);
#define PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS \
assert(p); assert(ustrp); assert(file)
+PERL_CALLCONV UV Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors, AV ** msgs);
+#define PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER \
+ assert(s)
PERL_CALLCONV void Perl__warn_problematic_locale(void);
PERL_CALLCONV_NO_RET void Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
__attribute__noreturn__;
#define PERL_ARGS_ASSERT_UTF8_TO_UVUNI_BUF \
assert(s); assert(send)
-PERL_CALLCONV UV Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags);
+PERL_CALLCONV UV Perl_utf8n_to_uvchr(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags);
#define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR \
assert(s)
-PERL_CALLCONV UV Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors);
+PERL_CALLCONV UV Perl_utf8n_to_uvchr_error(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors);
#define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_ERROR \
assert(s)
-PERL_CALLCONV UV Perl_utf8n_to_uvchr_msgs(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors, AV ** msgs);
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE UV S_utf8n_to_uvchr_msgs(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors, AV ** msgs);
#define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS \
assert(s)
+#endif
PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags);
#define PERL_ARGS_ASSERT_UTF8N_TO_UVUNI \
assert(s)
*/
UV
-Perl_utf8n_to_uvchr(pTHX_ const U8 *s,
- STRLEN curlen,
- STRLEN *retlen,
- const U32 flags)
+Perl_utf8n_to_uvchr(const U8 *s,
+ STRLEN curlen,
+ STRLEN *retlen,
+ const U32 flags)
{
PERL_ARGS_ASSERT_UTF8N_TO_UVCHR;
*/
UV
-Perl_utf8n_to_uvchr_error(pTHX_ const U8 *s,
+Perl_utf8n_to_uvchr_error(const U8 *s,
STRLEN curlen,
STRLEN *retlen,
const U32 flags,
*/
UV
-Perl_utf8n_to_uvchr_msgs(pTHX_ const U8 *s,
+Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
STRLEN curlen,
STRLEN *retlen,
const U32 flags,
U8 temp_char_buf[UTF8_MAXBYTES + 1]; /* Used to avoid a Newx in this
routine; see [perl #130921] */
UV uv_so_far;
- UV state = 0;
-
- PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
-
- /* Measurements show that this dfa is somewhat faster than the regular code
- * below, so use it first, dropping down for the non-normal cases. */
-
-#define PERL_UTF8_DECODE_REJECT 1
-
- while (s < send && LIKELY(state != PERL_UTF8_DECODE_REJECT)) {
- UV type = strict_utf8_dfa_tab[*s];
-
- uv = (state == 0)
- ? ((0xff >> type) & NATIVE_UTF8_TO_I8(*s))
- : UTF8_ACCUMULATE(uv, *s);
- state = strict_utf8_dfa_tab[256 + state + type];
-
- if (state == 0) {
- if (retlen) {
- *retlen = s - s0 + 1;
- }
- if (errors) {
- *errors = 0;
- }
- if (msgs) {
- *msgs = NULL;
- }
+ dTHX;
- return uv;
- }
-
- s++;
- }
+ PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER;
/* Here, is one of: a) malformed; b) a problematic code point (surrogate,
* non-unicode, or nonchar); or c) on ASCII platforms, one of the Hangul