#ifdef EBCDIC
Adp |UV |utf8n_to_uvchr |NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
-ApM |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
#else
Adpbm |UV |utf8n_to_uvchr |NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
-ApbmM |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
#endif
+ApM |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
Adp |UV |utf8n_to_uvuni |NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
#define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
#define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX_ a,b,c)
#define uvuni_to_utf8_flags(a,b,c) Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
+#define valid_utf8_to_uvchr(a,b) Perl_valid_utf8_to_uvchr(aTHX_ a,b)
#define valid_utf8_to_uvuni(a,b) Perl_valid_utf8_to_uvuni(aTHX_ a,b)
#define vcmp(a,b) Perl_vcmp(aTHX_ a,b)
#define vcroak(a,b) Perl_vcroak(aTHX_ a,b)
#if defined(EBCDIC)
#define utf8n_to_uvchr(a,b,c,d) Perl_utf8n_to_uvchr(aTHX_ a,b,c,d)
#define uvchr_to_utf8(a,b) Perl_uvchr_to_utf8(aTHX_ a,b)
-#define valid_utf8_to_uvchr(a,b) Perl_valid_utf8_to_uvchr(aTHX_ a,b)
#endif
#if defined(HAS_SIGACTION) && defined(SA_SIGINFO)
#define csighandler Perl_csighandler
#define PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS \
assert(d)
+PERL_CALLCONV UV Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR \
+ assert(s)
+
PERL_CALLCONV UV Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI \
#define PERL_ARGS_ASSERT_UVCHR_TO_UTF8 \
assert(d)
-/* PERL_CALLCONV UV Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
- __attribute__nonnull__(pTHX_1); */
-#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR \
- assert(s)
-
#endif
#if !(defined(HAS_SIGACTION) && defined(SA_SIGINFO))
PERL_CALLCONV Signal_t Perl_csighandler(int sig);
#define PERL_ARGS_ASSERT_UVCHR_TO_UTF8 \
assert(d)
-PERL_CALLCONV UV Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR \
- assert(s)
-
#endif
#if defined(HAS_MSG) || defined(HAS_SEM) || defined(HAS_SHM)
PERL_CALLCONV I32 Perl_do_ipcctl(pTHX_ I32 optype, SV** mark, SV** sp)
/* Separate prototypes needed because in ASCII systems these are
* usually macros but they still are compiled as code, too. */
PERL_CALLCONV UV Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags);
-PERL_CALLCONV UV Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen);
PERL_CALLCONV U8* Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv);
#endif
UV
Perl_valid_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
{
- const UV uv = valid_utf8_to_uvuni(s, retlen);
+ UV expectlen = UTF8SKIP(s);
+ const U8* send = s + expectlen;
+ UV uv = NATIVE_UTF8_TO_I8(*s);
PERL_ARGS_ASSERT_VALID_UTF8_TO_UVCHR;
+ if (retlen) {
+ *retlen = expectlen;
+ }
+
+ /* An invariant is trivially returned */
+ if (expectlen == 1) {
+ return LATIN1_TO_NATIVE(uv);
+ }
+
+ /* Remove the leading bits that indicate the number of bytes, leaving just
+ * the bits that are part of the value */
+ uv &= UTF_START_MASK(expectlen);
+
+ /* Now, loop through the remaining bytes, accumulating each into the
+ * working total as we go. (I khw tried unrolling the loop for up to 4
+ * bytes, but there was no performance improvement) */
+ for (++s; s < send; s++) {
+ uv = UTF8_ACCUMULATE(uv, *s);
+ }
+
return UNI_TO_NATIVE(uv);
+
}
/*
UV
Perl_valid_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
{
- UV expectlen = UTF8SKIP(s);
- const U8* send = s + expectlen;
- UV uv = NATIVE_UTF8_TO_I8(*s);
-
PERL_ARGS_ASSERT_VALID_UTF8_TO_UVUNI;
- if (retlen) {
- *retlen = expectlen;
- }
-
- /* An invariant is trivially returned */
- if (expectlen == 1) {
- return uv;
- }
-
- /* Remove the leading bits that indicate the number of bytes, leaving just
- * the bits that are part of the value */
- uv &= UTF_START_MASK(expectlen);
-
- /* Now, loop through the remaining bytes, accumulating each into the
- * working total as we go. (I khw tried unrolling the loop for up to 4
- * bytes, but there was no performance improvement) */
- for (++s; s < send; s++) {
- uv = UTF8_ACCUMULATE(uv, *s);
- }
-
- return uv;
+ return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
}
/*
/* As there are no translations, avoid the function wrapper */
#define utf8n_to_uvchr utf8n_to_uvuni
-#define valid_utf8_to_uvchr valid_utf8_to_uvuni
#define uvchr_to_utf8 uvuni_to_utf8
/*