This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Prepare for Unicode 10.0
authorKarl Williamson <khw@cpan.org>
Tue, 20 Jun 2017 22:49:27 +0000 (16:49 -0600)
committerKarl Williamson <khw@cpan.org>
Wed, 21 Jun 2017 03:34:58 +0000 (21:34 -0600)
This informs mktables of the new files in 10.0, and updates some
comments in other files to reflect new Unicode terminology.

charclass_invlists.h
lib/unicore/mktables
regcharclass.h
regen/mk_invlists.pl
regexec.c

index 4b3d6cc..2f56895 100644 (file)
@@ -95407,8 +95407,8 @@ static const U8 WB_table[24][24] = {
  * 37f6186253da9824bdb27f4ad867bfe8c25d4dc6bdb2f05585e40a034675a348 lib/unicore/extracted/DLineBreak.txt
  * ef24061b5a5dc93d7e90c2e34530ec757180ee75d872cba65ffc946e52624ae8 lib/unicore/extracted/DNumType.txt
  * a197371fec9a1b517058b440841f60f9378d81682084eef8db22a88cb2f96e90 lib/unicore/extracted/DNumValues.txt
- * 717985d5fe0830f5b72ca21287a7d9f15ba6a383c29fbd3c47231d5f63edb68b lib/unicore/mktables
+ * fd5dda65f76b9209787c5d0bb99f4cde8708c0bf86bcefe344d2d474a0306e28 lib/unicore/mktables
  * cdecb300baad839a6f62791229f551a4fa33f3cbdca08e378dc976466354e778 lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
- * 9534d0cc3914fa1f5d574332c3199605c3d14f8691a0729d68d8498ac2b36280 regen/mk_invlists.pl
+ * 48418cbf454eb9ef35c73468ed5ef72ad8603490eabe74181ce4fae42ec72579 regen/mk_invlists.pl
  * ex: set ro: */
index d8ccd2b..0c8c83c 100644 (file)
@@ -45,7 +45,7 @@ sub NON_ASCII_PLATFORM { ord("A") != 65 }
 # expected, a warning will be generated.  If an older version is being
 # compiled, any bounds tests that fail in the generated test file (-maketest
 # option) will be marked as TODO.
-my $version_of_mk_invlist_bounds = v9.0.0;
+my $version_of_mk_invlist_bounds = v10.0.0;
 
 ##########################################################################
 #
@@ -19067,6 +19067,14 @@ my @input_file_objects = (
                     Property => 'Joining_Type',
                     Has_Missings_Defaults => $NOT_IGNORED,
                    ),
+    Input_file->new("${EXTRACTED}DName.txt", v10.0.0,
+                    Skip => 'This file adds no new information not already'
+                          . ' present in other files',
+                    # And it's unnecessary programmer work to handle this new
+                    # format.  Previous Derived files actually had bug fixes
+                    # in them that were useful, but that should not be the
+                    # case here.
+                   ),
     Input_file->new('Jamo.txt', v2.0.0,
                     Property => 'Jamo_Short_Name',
                     Each_Line_Handler => \&filter_jamo_line,
@@ -19505,6 +19513,13 @@ my @input_file_objects = (
                           . ' informative radical-stroke values that are used'
                           . ' internally by Unicode',
                    ),
+    Input_file->new('VerticalOrientation.txt', v10.0.0,
+                    Property => 'Vertical_Orientation',
+                    Has_Missings_Defaults => $NOT_IGNORED,
+                   ),
+    Input_file->new('NushuSources.txt', v10.0.0,
+                    Skip => 'Specifies source material for Nushu characters',
+                   ),
 );
 
 # End of all the preliminaries.
index 7879574..3ae0fc7 100644 (file)
  * 37f6186253da9824bdb27f4ad867bfe8c25d4dc6bdb2f05585e40a034675a348 lib/unicore/extracted/DLineBreak.txt
  * ef24061b5a5dc93d7e90c2e34530ec757180ee75d872cba65ffc946e52624ae8 lib/unicore/extracted/DNumType.txt
  * a197371fec9a1b517058b440841f60f9378d81682084eef8db22a88cb2f96e90 lib/unicore/extracted/DNumValues.txt
- * 717985d5fe0830f5b72ca21287a7d9f15ba6a383c29fbd3c47231d5f63edb68b lib/unicore/mktables
+ * fd5dda65f76b9209787c5d0bb99f4cde8708c0bf86bcefe344d2d474a0306e28 lib/unicore/mktables
  * cdecb300baad839a6f62791229f551a4fa33f3cbdca08e378dc976466354e778 lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 9ea6338945a7d70e5ea4b31ac7856c0b521df96be002e94b4b3b7d31debbf3ab regen/regcharclass.pl
index feac27e..6b0e900 100644 (file)
@@ -759,7 +759,7 @@ sub output_GCB_table() {
     # regional indicator (RI) symbols if there is an odd number of RI
     # characters before the break point.  Must be resolved in runtime code.
     #
-    # GB12 ^ (RI RI)* RI × RI
+    # GB12 sot (RI RI)* RI × RI
     # GB13 [^RI] (RI RI)* RI × RI
     $gcb_table[$gcb_enums{'Regional_Indicator'}]
               [$gcb_enums{'Regional_Indicator'}] = $gcb_actions{GCB_RI_then_RI};
@@ -1509,7 +1509,7 @@ sub output_WB_table() {
     # regional indicator (RI) symbols if there is an odd number of RI
     # characters before the break point.
     # WB16  [^RI] (RI RI)* RI × RI
-    # WB15   ^    (RI RI)* RI × RI
+    # WB15   sot    (RI RI)* RI × RI
     $wb_table[$wb_enums{'Regional_Indicator'}]
              [$wb_enums{'Regional_Indicator'}] = $wb_actions{'WB_RI_then_RI'};
 
index 134b196..05675ad 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -4345,7 +4345,7 @@ S_isGCB(pTHX_ const GCB_enum before, const GCB_enum after, const U8 * const strb
                 /* Do not break within emoji flag sequences. That is, do not
                  * break between regional indicator (RI) symbols if there is an
                  * odd number of RI characters before the break point.
-                 *  GB12     ^ (RI RI)* RI × RI
+                 *  GB12   sot (RI RI)* RI × RI
                  *  GB13 [^RI] (RI RI)* RI × RI */
 
                 while (backup_one_GCB(strbeg,
@@ -4637,7 +4637,7 @@ S_isLB(pTHX_ LB_enum before,
                  * only if there are an even number of regional indicators
                  * preceding the position of the break.
                  *
-                 *  sot (RI RI)* RI × RI
+                 *    sot (RI RI)* RI × RI
                  *  [^RI] (RI RI)* RI × RI */
 
                 while (backup_one_LB(strbeg,
@@ -5167,7 +5167,7 @@ S_isWB(pTHX_ WB_enum previous,
                  * odd number of RI characters before the potential break
                  * point.
                  *
-                 * WB15     ^ (RI RI)* RI × RI
+                 * WB15   sot (RI RI)* RI × RI
                  * WB16 [^RI] (RI RI)* RI × RI */
 
                 while (backup_one_WB(&previous,