This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Generate _Perl_SCX property
authorKarl Williamson <khw@cpan.org>
Sat, 23 Dec 2017 22:08:45 +0000 (15:08 -0700)
committerKarl Williamson <khw@cpan.org>
Mon, 25 Dec 2017 00:20:45 +0000 (17:20 -0700)
charclass_invlists.h
lib/unicore/mktables
regcharclass.h

index 1598b80..daf842d 100644 (file)
@@ -97430,7 +97430,7 @@ static const U8 WB_table[24][24] = {
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * 850e28e9ceab3cd75bab215575ab8beb7cca61b32b989b725ed55d51991394ad lib/unicore/mktables
+ * 5671c3de473b25e7ea47097e4906260624dfabe3e9b1739f490aecbc3d858459 lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 520051ffc8f534c956ec809456efdd79ba9708933c5f009f823e92b4eeb7a163 regen/mk_invlists.pl
index 8877535..c643672 100644 (file)
@@ -3554,7 +3554,7 @@ sub trace { return main::trace(@_); }
     main::set_access('end', \%end, 'r', 's');
 
     my %value;
-    main::set_access('value', \%value, 'r');
+    main::set_access('value', \%value, 'r', 's');
 
     my %type;
     main::set_access('type', \%type, 'r');
@@ -13418,8 +13418,8 @@ sub setup_script_extensions {
     # property.
 
     $scx = property_ref("Script_Extensions");
-    $scx = Property->new("scx", Full_Name => "Script_Extensions")
-                                                            if ! defined $scx;
+    return unless defined $scx;
+
     $scx->_set_format($STRING_WHITE_SPACE_LIST);
     $scx->initialize($script);
     $scx->set_default_map($script->default_map);
@@ -15607,6 +15607,71 @@ END
         }
     }
 
+    # This property is a modification of the scx property
+    my $perl_scx = Property->new('_Perl_SCX',
+                                 Fate => $INTERNAL_ONLY,
+                                 Perl_Extension => 1,
+                                 Directory => $map_directory,
+                                 Type => $ENUM);
+    my $source;
+
+    # Use scx if available; otherwise sc;  if neither is there (a very old
+    # Unicode version, just say that everything is 'Common'
+    if (defined $scx) {
+        $source = $scx;
+        $perl_scx->set_default_map('Unknown');
+    }
+    elsif (defined $script) {
+        $source = $script;
+
+        # Early versions of 'sc', had everything be 'Common'
+        if (defined $script->table('Unknown')) {
+            $perl_scx->set_default_map('Unknown');
+        }
+        else {
+            $perl_scx->set_default_map('Common');
+        }
+    } else {
+        $perl_scx->add_match_table('Common');
+        $perl_scx->add_map(0, $MAX_UNICODE_CODEPOINT, 'Common');
+
+        $perl_scx->add_match_table('Unknown');
+        $perl_scx->set_default_map('Unknown');
+    }
+
+    $perl_scx->_set_format($STRING_WHITE_SPACE_LIST);
+    $perl_scx->set_pre_declared_maps(0); # PropValueAliases doesn't list these
+
+    if (defined $source) {
+        $perl_scx->initialize($source);
+
+        # UTS 39 says that the scx property should be modified for these
+        # countries where certain mixed scripts are commonly used.
+        for my $range ($perl_scx->ranges) {
+            my $value = $range->value;
+            my $changed = $value =~ s/ ( \b Han i? \b ) /$1 Hanb Jpan Kore/xi;
+             $changed |=  $value =~ s/ ( \b Hira (gana)? \b ) /$1 Jpan/xi;
+             $changed |=  $value =~ s/ ( \b Kata (kana)? \b ) /$1 Jpan/xi;
+             $changed |=  $value =~ s{ ( \b Katakana_or_Hiragana \b ) }
+                                     {$1 Katakana Hiragana Jpan}xi;
+             $changed |=  $value =~ s/ ( \b Hang (ul)? \b ) /$1 Kore/xi;
+             $changed |=  $value =~ s/ ( \b Bopo (mofo)? \b ) /$1 Hanb/xi;
+
+            if ($changed) {
+                $value = join " ", uniques split " ", $value;
+                $range->set_value($value)
+            }
+        }
+
+        foreach my $table ($source->tables) {
+            my $scx_table = $perl_scx->add_match_table($table->name,
+                                    Full_Name => $table->full_name);
+            foreach my $alias ($table->aliases) {
+                $scx_table->add_alias($alias->name);
+            }
+        }
+    }
+
     # Here done with all the basic stuff.  Ready to populate the information
     # about each character if annotating them.
     if ($annotate) {
@@ -19834,6 +19899,9 @@ my @input_file_objects = (
                    ),
     Input_file->new('ScriptExtensions.txt', v6.0.0,
                     Property => 'Script_Extensions',
+                    Early => [ sub {} ], # Doesn't do anything but ensures
+                                         # that this isn't skipped for early
+                                         # versions
                     Pre_Handler => \&setup_script_extensions,
                     Each_Line_Handler => \&filter_script_extensions_line,
                     Has_Missings_Defaults => (($v_version le v6.0.0)
index b5b9934..b575bd8 100644 (file)
  * be0f129691d479aa38646e4ca0ec1ee576ae7f75b0300a5624a7fa862fa8abba lib/unicore/extracted/DLineBreak.txt
  * 92449d354d9f6b6f2f97a292ebb59f6344ffdeb83d120d7d23e569c43ba67cd5 lib/unicore/extracted/DNumType.txt
  * e3a319527153b0c6c0c549b40fc6f3a01a7a0dcd6620784391db25901df3b154 lib/unicore/extracted/DNumValues.txt
- * 850e28e9ceab3cd75bab215575ab8beb7cca61b32b989b725ed55d51991394ad lib/unicore/mktables
+ * 5671c3de473b25e7ea47097e4906260624dfabe3e9b1739f490aecbc3d858459 lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 9ea6338945a7d70e5ea4b31ac7856c0b521df96be002e94b4b3b7d31debbf3ab regen/regcharclass.pl