This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Backport UVCHR_SKIP
authorKarl Williamson <khw@cpan.org>
Fri, 12 Jul 2019 18:42:26 +0000 (12:42 -0600)
committerNicolas R <atoomic@cpan.org>
Fri, 27 Sep 2019 22:51:29 +0000 (16:51 -0600)
(cherry picked from commit bfe660f9f9775fc1cbbf1c5fd7ed809b3e4dd369)
Signed-off-by: Nicolas R <atoomic@cpan.org>
dist/Devel-PPPort/parts/inc/utf8
dist/Devel-PPPort/t/utf8.t

index 02a4928..8a7271d 100644 (file)
@@ -47,6 +47,33 @@ __UNDEFINED__ UTF8_IS_INVARIANT(c)  (isASCII(c) || isCNTRL_L1(c))
 
 __UNDEFINED__ UVCHR_IS_INVARIANT(c)  UTF8_IS_INVARIANT(c)
 
+#ifdef UVCHR_IS_INVARIANT
+#  if 'A' == 65
+#    define D_PPP_BYTE_INFO_BITS 6  /* 6 bits meaningful in continuation bytes */
+#    ifdef QUADKIND
+#      define D_PPP_UVCHR_SKIP_UPPER(c)                                         \
+          (WIDEST_UTYPE) (c) <                                                  \
+        (((WIDEST_UTYPE) 1) << (6 * D_PPP_BYTE_INFO_BITS)) ? 7 : 13
+#    else
+#      define D_PPP_UVCHR_SKIP_UPPER(c) 7  /* 32 bit platform */
+#    endif
+#  else
+#    define D_PPP_BYTE_INFO_BITS 5      /* EBCDIC has only 5 meaningful bits */
+
+     /* In the releases this is backported to, UTF-EBCDIC had a max of 2**31-1 */
+#    define D_PPP_UVCHR_SKIP_UPPER(c) 7
+#  endif
+
+__UNDEFINED__ UVCHR_SKIP(c)                                                     \
+          UVCHR_IS_INVARIANT(c)                                          ? 1 :  \
+          (WIDEST_UTYPE) (c) < (32 * (1U << (    D_PPP_BYTE_INFO_BITS))) ? 2 :  \
+          (WIDEST_UTYPE) (c) < (16 * (1U << (2 * D_PPP_BYTE_INFO_BITS))) ? 3 :  \
+          (WIDEST_UTYPE) (c) < ( 8 * (1U << (3 * D_PPP_BYTE_INFO_BITS))) ? 4 :  \
+          (WIDEST_UTYPE) (c) < ( 4 * (1U << (4 * D_PPP_BYTE_INFO_BITS))) ? 5 :  \
+          (WIDEST_UTYPE) (c) < ( 2 * (1U << (5 * D_PPP_BYTE_INFO_BITS))) ? 6 :  \
+          D_PPP_UVCHR_SKIP_UPPER(c)
+#endif
+
 #ifdef is_ascii_string
 __UNDEFINED__ is_invariant_string(s,l) is_ascii_string(s,l)
 __UNDEFINED__ is_utf8_invariant_string(s,l) is_ascii_string(s,l)
@@ -491,13 +518,26 @@ UVCHR_IS_INVARIANT(c)
 
 #endif
 
-=tests plan => 84
+#ifdef UVCHR_SKIP
+
+STRLEN
+UVCHR_SKIP(c)
+        UV c
+        PREINIT:
+        CODE:
+            RETVAL = UVCHR_SKIP(c);
+        OUTPUT:
+            RETVAL
+
+#endif
+
+=tests plan => 93
 
 BEGIN { require warnings if "$]" > '5.006' }
 
 # skip tests on 5.6.0 and earlier, plus 7.0
 if ("$]" <= '5.006' || "$]" == '5.007' ) {
-    for (1..84) {
+    for (1..93) {
         skip 'skip: broken utf8 support', 0;
     }
     exit;
@@ -515,6 +555,28 @@ ok(&Devel::PPPort::UVCHR_IS_INVARIANT(ord("A")), 1);
 ok(! &Devel::PPPort::UVCHR_IS_INVARIANT(0xb6));
 ok(! &Devel::PPPort::UVCHR_IS_INVARIANT(0x100));
 
+if ("$]" < '5.006') {
+    for (1 ..9) {
+        ok(1, 1)
+    }
+}
+else {
+    ok(&Devel::PPPort::UVCHR_SKIP(ord("A")), 1);
+    ok(&Devel::PPPort::UVCHR_SKIP(0xb6),     2, "This is a test");
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FF),    2);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFF),   3);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFFF),  4);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFFFF), 5);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFFFFF), ord("A") == 65 ? 5 : 6);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x4000000), ord("A") == 65 ? 6 : 7);
+    if (ord("A") != 65) {
+        ok(1, 1)
+    }
+    else {
+        ok(&Devel::PPPort::UVCHR_SKIP(0xFFFFFFFF), 7);
+    }
+}
+
 if ("$]" < '5.008') {
     for (1 ..3) {
         ok(1, 1)
index 397a7e7..2f164da 100644 (file)
@@ -30,9 +30,9 @@ BEGIN {
     require 'testutil.pl' if $@;
   }
 
-  if (84) {
+  if (93) {
     load();
-    plan(tests => 84);
+    plan(tests => 93);
   }
 }
 
@@ -52,7 +52,7 @@ BEGIN { require warnings if "$]" > '5.006' }
 
 # skip tests on 5.6.0 and earlier, plus 7.0
 if ("$]" <= '5.006' || "$]" == '5.007' ) {
-    for (1..84) {
+    for (1..93) {
         skip 'skip: broken utf8 support', 0;
     }
     exit;
@@ -70,6 +70,28 @@ ok(&Devel::PPPort::UVCHR_IS_INVARIANT(ord("A")), 1);
 ok(! &Devel::PPPort::UVCHR_IS_INVARIANT(0xb6));
 ok(! &Devel::PPPort::UVCHR_IS_INVARIANT(0x100));
 
+if ("$]" < '5.006') {
+    for (1 ..9) {
+        ok(1, 1)
+    }
+}
+else {
+    ok(&Devel::PPPort::UVCHR_SKIP(ord("A")), 1);
+    ok(&Devel::PPPort::UVCHR_SKIP(0xb6),     2, "This is a test");
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FF),    2);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFF),   3);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFFF),  4);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFFFF), 5);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x3FFFFFF), ord("A") == 65 ? 5 : 6);
+    ok(&Devel::PPPort::UVCHR_SKIP(0x4000000), ord("A") == 65 ? 6 : 7);
+    if (ord("A") != 65) {
+        ok(1, 1)
+    }
+    else {
+        ok(&Devel::PPPort::UVCHR_SKIP(0xFFFFFFFF), 7);
+    }
+}
+
 if ("$]" < '5.008') {
     for (1 ..3) {
         ok(1, 1)