This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Handle early Unicodes' script names
authorKarl Williamson <khw@cpan.org>
Mon, 22 Jun 2015 16:06:45 +0000 (10:06 -0600)
committerKarl Williamson <khw@cpan.org>
Wed, 29 Jul 2015 04:15:53 +0000 (22:15 -0600)
These were all caps in early releases

charclass_invlists.h
lib/unicore/mktables
regcharclass.h

index f8a413e..36c9835 100644 (file)
@@ -99521,7 +99521,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 234024b47cb9fd57ae95b60cd4e9087feea586b70c1243b84141534d1bca2918 lib/unicore/mktables
+ * 255e5cd690d01db3f4797adb31fd7955a47e33a1848e4ff4700609a292129bba lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * f199f92c0b5f87882b0198936ea8ef3dc43627b57a77ac3eb9250bd2664bbd88 regen/mk_invlists.pl
index 2f6fc5f..f7ceed8 100644 (file)
@@ -13040,6 +13040,22 @@ sub filter_early_version_name_alias_line {
     return;
 }
 
+sub filter_all_caps_script_names {
+
+    # Some early Unicode releases had the script names in all CAPS.  This
+    # converts them to just the first letter of each word being capital.
+
+    my ($range, $script, @remainder)
+        = split /\s*;\s*/, $_, -1; # -1 => retain trailing null fields
+    my @words = split "_", $script;
+    for my $word (@words) {
+        $word =
+            ucfirst(lc($word)) if $word ne 'CJK';
+    }
+    $script = join "_", @words;
+    $_ = join ";", $range, $script, @remainder;
+}
+
 sub finish_Unicode() {
     # This routine should be called after all the Unicode files have been read
     # in.  It:
@@ -18152,6 +18168,9 @@ my @input_file_objects = (
                     ),
     Input_file->new('Scripts.txt', v3.1.0,
                     Property => 'Script',
+                    Each_Line_Handler => (($v_version le v4.0.0)
+                                          ? \&filter_all_caps_script_names
+                                          : undef),
                     Has_Missings_Defaults => $NOT_IGNORED,
                     ),
     Input_file->new('DNormalizationProps.txt', v3.1.0,
index a9409af..01922ef 100644 (file)
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 234024b47cb9fd57ae95b60cd4e9087feea586b70c1243b84141534d1bca2918 lib/unicore/mktables
+ * 255e5cd690d01db3f4797adb31fd7955a47e33a1848e4ff4700609a292129bba lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * 5e47f645eac3a918246254e19c06b604c8ea088cf62da5be84dcb953ef2bf16c regen/regcharclass.pl