This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Get fold generation to work on early Unicodes
authorKarl Williamson <khw@cpan.org>
Fri, 26 Jun 2015 20:17:06 +0000 (14:17 -0600)
committerKarl Williamson <khw@cpan.org>
Wed, 29 Jul 2015 04:15:55 +0000 (22:15 -0600)
CaseFolding.txt isn't present in early Unicodes.  For these lowercasing
is used instead.  But the format of the lowercasing table isn't quite
the same, so add code to handle the differences.

charclass_invlists.h
lib/unicore/mktables
regcharclass.h

index bcdf064..4989969 100644 (file)
@@ -99521,7 +99521,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 0f5ec92c91c841a77738852e07df377be436e53c77b388f4a96500690cbdecf4 lib/unicore/mktables
+ * f80d9451fb07e23977f0a4d04b58e477f7b565f8897670a959a8f6395ab2c706 lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * f199f92c0b5f87882b0198936ea8ef3dc43627b57a77ac3eb9250bd2664bbd88 regen/mk_invlists.pl
index 5963d1c..3cce20a 100644 (file)
@@ -14005,10 +14005,17 @@ sub compile_perl() {
             my $fold_range = $cf->containing_range($code_point);
             next unless defined $fold_range;
 
+            # Skip if folds to itself
+            next if $fold_range->value eq $CODE_POINT;
+
             my @hex_folds = split " ", $fold_range->value;
-            my $start_cp = hex $hex_folds[0];
+            my $start_cp = $hex_folds[0];
+            next if $start_cp eq $CODE_POINT;
+            $start_cp = hex $start_cp;
             foreach my $i (0 .. @hex_folds - 1) {
-                my $cp = hex $hex_folds[$i];
+                my $cp = $hex_folds[$i];
+                next if $cp eq $CODE_POINT;
+                $cp = hex $cp;
                 next unless $cp > 255;    # Already have the < 256 ones
 
                 $loc_problem_folds->add_range($cp, $cp);
@@ -14028,6 +14035,7 @@ sub compile_perl() {
 
     # Look through all the known folds to populate these tables.
     foreach my $range ($cf->ranges) {
+        next if $range->value eq $CODE_POINT;
         my $start = $range->start;
         my $end = $range->end;
         $any_folds->add_range($start, $end);
index 5e35498..baaa3ef 100644 (file)
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 0f5ec92c91c841a77738852e07df377be436e53c77b388f4a96500690cbdecf4 lib/unicore/mktables
+ * f80d9451fb07e23977f0a4d04b58e477f7b565f8897670a959a8f6395ab2c706 lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * 8b29da548b7ad90659de234b5061a8c9fb0f40322a256d60fc5e9385ae4ece0e regen/regcharclass.pl