This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Fix non-final-fold table
authorKarl Williamson <khw@cpan.org>
Fri, 1 Nov 2019 03:30:34 +0000 (21:30 -0600)
committerKarl Williamson <khw@cpan.org>
Sat, 16 Nov 2019 18:37:01 +0000 (11:37 -0700)
This wasn't generating the correct values.  It is no longer used, and
the next commit will remove it, but I wanted to get it right, in case it
is ever needed again.

charclass_invlists.h
lib/unicore/mktables
lib/unicore/uni_keywords.pl
regcharclass.h
uni_keywords.h

index 05573bb..00f489c 100644 (file)
@@ -184036,49 +184036,67 @@ static const UV UNI__PERL_IS_IN_MULTI_CHAR_FOLD_invlist[] = {  /* for EBCDIC 037
 #  if 'A' == 65 /* ASCII/Latin1 */
 
 static const UV UNI__PERL_NON_FINAL_FOLDS_invlist[] = {  /* for ASCII/Latin1 */
-       39,     /* Number of elements */
+       57,     /* Number of elements */
        148565664, /* Version and data structure type */
        1,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
        0x0,
+       0x61,
+       0x62,
        0x66,
        0x67,
-       0x69,
-       0x6A,
-       0x6C,
-       0x6D,
-       0x6E,
-       0x6F,
+       0x68,
+       0x6B,
        0x73,
        0x75,
-       0x2BE,
-       0x2BF,
-       0x300,
-       0x302,
-       0x307,
+       0x77,
+       0x78,
+       0x79,
+       0x7A,
+       0x2BC,
+       0x2BD,
+       0x308,
        0x309,
-       0x30A,
-       0x30B,
-       0x30C,
-       0x30D,
        0x313,
        0x314,
-       0x331,
-       0x332,
        0x342,
        0x343,
+       0x3AC,
+       0x3AD,
+       0x3AE,
+       0x3AF,
+       0x3B1,
+       0x3B2,
+       0x3B7,
+       0x3B8,
        0x3B9,
        0x3BA,
+       0x3C1,
+       0x3C2,
+       0x3C5,
+       0x3C6,
+       0x3C9,
+       0x3CA,
+       0x3CE,
+       0x3CF,
        0x565,
        0x566,
-       0x56B,
-       0x56C,
-       0x56D,
-       0x56E,
-       0x576,
-       0x577,
-       0x582,
-       0x583
+       0x574,
+       0x575,
+       0x57E,
+       0x57F,
+       0x1F00,
+       0x1F08,
+       0x1F20,
+       0x1F28,
+       0x1F60,
+       0x1F68,
+       0x1F70,
+       0x1F71,
+       0x1F74,
+       0x1F75,
+       0x1F7C,
+       0x1F7D
 };
 
 #  endif       /* ASCII/Latin1 */
@@ -184089,49 +184107,69 @@ static const UV UNI__PERL_NON_FINAL_FOLDS_invlist[] = {  /* for ASCII/Latin1 */
      && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21
 
 static const UV UNI__PERL_NON_FINAL_FOLDS_invlist[] = {  /* for EBCDIC 1047 */
-       39,     /* Number of elements */
+       59,     /* Number of elements */
        148565664, /* Version and data structure type */
        1,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
        0x0,
+       0x81,
+       0x82,
        0x86,
        0x87,
-       0x89,
+       0x88,
        0x8A,
-       0x93,
-       0x94,
-       0x95,
-       0x96,
+       0x91,
+       0x92,
        0xA2,
        0xA4,
-       0x2BE,
-       0x2BF,
-       0x300,
-       0x302,
-       0x307,
+       0xA6,
+       0xA7,
+       0xA8,
+       0xA9,
+       0x2BC,
+       0x2BD,
+       0x308,
        0x309,
-       0x30A,
-       0x30B,
-       0x30C,
-       0x30D,
        0x313,
        0x314,
-       0x331,
-       0x332,
        0x342,
        0x343,
+       0x3AC,
+       0x3AD,
+       0x3AE,
+       0x3AF,
+       0x3B1,
+       0x3B2,
+       0x3B7,
+       0x3B8,
        0x3B9,
        0x3BA,
+       0x3C1,
+       0x3C2,
+       0x3C5,
+       0x3C6,
+       0x3C9,
+       0x3CA,
+       0x3CE,
+       0x3CF,
        0x565,
        0x566,
-       0x56B,
-       0x56C,
-       0x56D,
-       0x56E,
-       0x576,
-       0x577,
-       0x582,
-       0x583
+       0x574,
+       0x575,
+       0x57E,
+       0x57F,
+       0x1F00,
+       0x1F08,
+       0x1F20,
+       0x1F28,
+       0x1F60,
+       0x1F68,
+       0x1F70,
+       0x1F71,
+       0x1F74,
+       0x1F75,
+       0x1F7C,
+       0x1F7D
 };
 
 #  endif       /* EBCDIC 1047 */
@@ -184142,49 +184180,69 @@ static const UV UNI__PERL_NON_FINAL_FOLDS_invlist[] = {  /* for EBCDIC 1047 */
      && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 37
 
 static const UV UNI__PERL_NON_FINAL_FOLDS_invlist[] = {  /* for EBCDIC 037 */
-       39,     /* Number of elements */
+       59,     /* Number of elements */
        148565664, /* Version and data structure type */
        1,      /* 0 if the list starts at 0;
                   1 if it starts at the element beyond 0 */
        0x0,
+       0x81,
+       0x82,
        0x86,
        0x87,
-       0x89,
+       0x88,
        0x8A,
-       0x93,
-       0x94,
-       0x95,
-       0x96,
+       0x91,
+       0x92,
        0xA2,
        0xA4,
-       0x2BE,
-       0x2BF,
-       0x300,
-       0x302,
-       0x307,
+       0xA6,
+       0xA7,
+       0xA8,
+       0xA9,
+       0x2BC,
+       0x2BD,
+       0x308,
        0x309,
-       0x30A,
-       0x30B,
-       0x30C,
-       0x30D,
        0x313,
        0x314,
-       0x331,
-       0x332,
        0x342,
        0x343,
+       0x3AC,
+       0x3AD,
+       0x3AE,
+       0x3AF,
+       0x3B1,
+       0x3B2,
+       0x3B7,
+       0x3B8,
        0x3B9,
        0x3BA,
+       0x3C1,
+       0x3C2,
+       0x3C5,
+       0x3C6,
+       0x3C9,
+       0x3CA,
+       0x3CE,
+       0x3CF,
        0x565,
        0x566,
-       0x56B,
-       0x56C,
-       0x56D,
-       0x56E,
-       0x576,
-       0x577,
-       0x582,
-       0x583
+       0x574,
+       0x575,
+       0x57E,
+       0x57F,
+       0x1F00,
+       0x1F08,
+       0x1F20,
+       0x1F28,
+       0x1F60,
+       0x1F68,
+       0x1F70,
+       0x1F71,
+       0x1F74,
+       0x1F75,
+       0x1F7C,
+       0x1F7D
 };
 
 #  endif       /* EBCDIC 037 */
@@ -395307,7 +395365,7 @@ static const U8 WB_table[23][23] = {
  * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
  * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
  * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
- * 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables
+ * 0f310085a69b19f991fa1d8cf6e066d6c89d840b6238ec3c7c6e1059dc5dbe8b lib/unicore/mktables
  * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * e9283c761c5a95e3379384ca47c13a284f08d743c2be6e5091f1152b1b6b7a37 regen/mk_PL_charclass.pl
index 5f032b4..2ed8ffd 100644 (file)
@@ -15038,9 +15038,14 @@ sub compile_perl() {
 
         my $found_locale_problematic = 0;
 
+        my $folded_count = @hex_folds;
+        if ($folded_count > 3) {
+            die Carp::my_carp("Maximum number of characters in a fold should be 3: Instead, it's  $folded_count for U+" . sprintf "%04X", $range->start);
+        }
+
         # Look at each of the folded-to characters...
-        foreach my $i (0 .. @hex_folds - 1) {
-            my $cp = hex $hex_folds[$i];
+        foreach my $i (1 .. $folded_count) {
+            my $cp = hex $hex_folds[$i-1];
             $any_folds->add_range($cp, $cp);
 
             # The fold is problematic if any of the folded-to characters is
@@ -15050,9 +15055,9 @@ sub compile_perl() {
                 $found_locale_problematic = 1;
             }
 
-            if (@hex_folds > 1) {
+            if ($folded_count > 1) {
                 $in_multi_fold->add_range($cp, $cp);
-                next if $i < @hex_folds - 1;
+                next if $i == $folded_count;    # In final position
                 $non_final_fold->add_range($cp, $cp);
             }
         }
index 45d2934..4cc9ab4 100644 (file)
 # 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
 # 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
 # 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
-# 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables
+# 0f310085a69b19f991fa1d8cf6e066d6c89d840b6238ec3c7c6e1059dc5dbe8b lib/unicore/mktables
 # a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
 # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
 # e9283c761c5a95e3379384ca47c13a284f08d743c2be6e5091f1152b1b6b7a37 regen/mk_PL_charclass.pl
index 0f51ade..e036c47 100644 (file)
  * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
  * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
  * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
- * 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables
+ * 0f310085a69b19f991fa1d8cf6e066d6c89d840b6238ec3c7c6e1059dc5dbe8b lib/unicore/mktables
  * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * f9a393e7add8c7c2728356473ce5b52246d51295b2da0c48fb6f0aa21799e2bb regen/regcharclass.pl
index eca8459..f790d82 100644 (file)
@@ -7284,7 +7284,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
  * 78e2600e24fa7d5ab62117de50b382f8b31b08401c37a0782c38dacb340b64e7 lib/unicore/extracted/DLineBreak.txt
  * 1bde4ad73e271c6349fbd1972e54f38bba5cc1900c28f678e79b9e8909b31793 lib/unicore/extracted/DNumType.txt
  * 6278722699123f3890e4b1cc42011e96d8960e4958a3b93484361530983d2611 lib/unicore/extracted/DNumValues.txt
- * 5214f368c189077a2a748b7ef0a5300abd0d012be568d18c1bbd8bede55818ae lib/unicore/mktables
+ * 0f310085a69b19f991fa1d8cf6e066d6c89d840b6238ec3c7c6e1059dc5dbe8b lib/unicore/mktables
  * a712c758275b460d18fa77a26ed3589689bb3f69dcc1ea99b913e32db92a5cd2 lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * e9283c761c5a95e3379384ca47c13a284f08d743c2be6e5091f1152b1b6b7a37 regen/mk_PL_charclass.pl