This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8_to_uvchr_buf() make behavior match docs
authorKarl Williamson <khw@cpan.org>
Sat, 29 Jun 2019 05:57:31 +0000 (23:57 -0600)
committerKarl Williamson <khw@cpan.org>
Tue, 2 Jul 2019 01:38:00 +0000 (19:38 -0600)
For well formed input, there is no change.  But for malformed it wasn't
returning the documented length when warnings were enabled, and not
always the documented value when they were disabled.

This is implemented as an inline function, called from both the macro
and the Perl_ form.

Devel::PPPort has sufficient tests for this.

embed.fnc
embed.h
inline.h
proto.h
utf8.c
utf8.h

index 0cd0917..bfc9dca 100644 (file)
--- a/embed.fnc
+++ b/embed.fnc
@@ -1928,6 +1928,7 @@ ApdD      |UV     |utf8_to_uvchr  |NN const U8 *s|NULLOK STRLEN *retlen
 AbpdD  |UV     |utf8_to_uvuni  |NN const U8 *s|NULLOK STRLEN *retlen
 AbpxD  |UV     |valid_utf8_to_uvuni    |NN const U8 *s|NULLOK STRLEN *retlen
 AMpd   |UV     |utf8_to_uvchr_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
+Ai     |UV     |_utf8_to_uvchr_buf     |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
 ApdD   |UV     |utf8_to_uvuni_buf      |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
 px     |bool   |check_utf8_print       |NN const U8 *s|const STRLEN len
 
diff --git a/embed.h b/embed.h
index 506327f..f72a7ee 100644 (file)
--- a/embed.h
+++ b/embed.h
@@ -46,6 +46,7 @@
 #define _to_utf8_lower_flags(a,b,c,d,e,f,g)    Perl__to_utf8_lower_flags(aTHX_ a,b,c,d,e,f,g)
 #define _to_utf8_title_flags(a,b,c,d,e,f,g)    Perl__to_utf8_title_flags(aTHX_ a,b,c,d,e,f,g)
 #define _to_utf8_upper_flags(a,b,c,d,e,f,g)    Perl__to_utf8_upper_flags(aTHX_ a,b,c,d,e,f,g)
+#define _utf8_to_uvchr_buf(a,b,c)      S__utf8_to_uvchr_buf(aTHX_ a,b,c)
 #define _utf8n_to_uvchr_msgs_helper    Perl__utf8n_to_uvchr_msgs_helper
 #define amagic_call(a,b,c,d)   Perl_amagic_call(aTHX_ a,b,c,d)
 #define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b)
index 442c08e..7d81da6 100644 (file)
--- a/inline.h
+++ b/inline.h
@@ -1842,6 +1842,27 @@ S_utf8n_to_uvchr_msgs(const U8 *s,
     return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs);
 }
 
+PERL_STATIC_INLINE UV
+S__utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
+{
+    PERL_ARGS_ASSERT__UTF8_TO_UVCHR_BUF;
+
+    assert(s < send);
+
+    if (! ckWARN_d(WARN_UTF8)) {
+        return utf8n_to_uvchr(s, send - s, retlen,
+                              (UTF8_ALLOW_ANY & ~UTF8_ALLOW_EMPTY));
+    }
+    else {
+        UV ret = utf8n_to_uvchr(s, send - s, retlen, 0);
+        if (retlen && ret == 0 && *s != '\0') {
+            *retlen = (STRLEN) -1;
+        }
+
+        return ret;
+    }
+}
+
 /* ------------------------------- perl.h ----------------------------- */
 
 /*
diff --git a/proto.h b/proto.h
index 9359016..a708e14 100644 (file)
--- a/proto.h
+++ b/proto.h
@@ -143,6 +143,11 @@ PERL_CALLCONV UV   Perl__to_utf8_title_flags(pTHX_ const U8 *p, const U8* e, U8* u
 PERL_CALLCONV UV       Perl__to_utf8_upper_flags(pTHX_ const U8 *p, const U8 *e, U8* ustrp, STRLEN *lenp, bool flags, const char * const file, const int line);
 #define PERL_ARGS_ASSERT__TO_UTF8_UPPER_FLAGS  \
        assert(p); assert(ustrp); assert(file)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE UV  S__utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen);
+#define PERL_ARGS_ASSERT__UTF8_TO_UVCHR_BUF    \
+       assert(s); assert(send)
+#endif
 PERL_CALLCONV UV       Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 * errors, AV ** msgs);
 #define PERL_ARGS_ASSERT__UTF8N_TO_UVCHR_MSGS_HELPER   \
        assert(s)
diff --git a/utf8.c b/utf8.c
index 84db2f6..07e4df7 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -2271,10 +2271,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
 {
     PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF;
 
-    assert(s < send);
-
-    return utf8n_to_uvchr(s, send - s, retlen,
-                     ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+    return _utf8_to_uvchr_buf(s, send, retlen);
 }
 
 /* This is marked as deprecated
diff --git a/utf8.h b/utf8.h
index d0b8742..88e74de 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -69,9 +69,7 @@ the string is invariant.
 #define uvchr_to_utf8_flags_msgs(d,uv,flags,msgs)                              \
                 uvoffuni_to_utf8_flags_msgs(d,NATIVE_TO_UNI(uv),flags, msgs)
 #define utf8_to_uvchr_buf(s, e, lenp)                                          \
-                                (__ASSERT_((U8*) (e) > (U8*) (s))              \
-                                 utf8n_to_uvchr(s, (U8*)(e) - (U8*)(s), lenp,  \
-                                    ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY))
+                   _utf8_to_uvchr_buf((const U8 *) (s), (const U8 *) e, lenp)
 #define utf8n_to_uvchr(s, len, lenp, flags)                                    \
                                 utf8n_to_uvchr_error(s, len, lenp, flags, 0)
 #define utf8n_to_uvchr_error(s, len, lenp, flags, errors)                      \