This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Refactor populating simple case folding tables
authorKarl Williamson <public@khwilliamson.com>
Sat, 21 Jan 2012 19:57:41 +0000 (12:57 -0700)
committerKarl Williamson <public@khwilliamson.com>
Sat, 4 Feb 2012 23:29:29 +0000 (16:29 -0700)
These three tables are handled alike; this creates a loop to execute the
same instructions on each of them.  Currently there is so little to do,
that it wouldn't be worth it, except that future commits will add
complications, and this makes those easier to handle.

There is now a test that the input data is sane, and instead of
overwriting a value in a table with a known identical value, we skip
that.  This doesn't save much effort, because most of the work is
looking up the value (which we can now check sanity for), but again will
be useful for future commits.

lib/unicore/mktables

index ab2d3a0..00a38ef 100644 (file)
@@ -10793,23 +10793,51 @@ END
             return;
         }
 
-        $_ = "$fields[0]; lc; $fields[1]";
-        $file->insert_adjusted_lines("$fields[0]; tc; $fields[2]");
-        $file->insert_adjusted_lines("$fields[0]; uc; $fields[3]");
-
-        # Copy any simple case change to the special tables constructed if
-        # being overridden by a multi-character case change.
-        if ($fields[1] ne $fields[0]
-            && (my $value = $lc->value_of(hex $fields[0])) ne $CODE_POINT)
-        {
-            $file->insert_adjusted_lines("$fields[0]; _slc; $value");
-        }
-        if ($fields[3] ne $fields[0]
-            && (my $value = $uc->value_of(hex $fields[0])) ne $CODE_POINT)
-        {
-            $file->insert_adjusted_lines("$fields[0]; _suc; $value");
+        my $decimal_code_point = hex $fields[0];
+
+        # Loop to handle each of the three mappings in the input line, in
+        # order, with $i indicating the current field number.
+        my $i = 0;
+        for my $object ($lc, $tc, $uc) {
+            $i++;   # First time through, $i = 0 ... 3rd time = 3
+
+            my $value = $object->value_of($decimal_code_point);
+            $value = ($value eq $CODE_POINT)
+                      ? $decimal_code_point
+                      : hex $value;
+
+            # If this isn't a multi-character mapping, it should already have
+            # been read in.
+            if ($fields[$i] !~ / /) {
+                if ($value != hex $fields[$i]) {
+                    Carp::my_carp("Bad news. UnicodeData.txt thinks "
+                                  . $object->name
+                                  . "(0x$fields[0]) is $value"
+                                  . " and SpecialCasing.txt thinks it is "
+                                  . hex $fields[$i]
+                                  . ".  Good luck.  Proceeding anyway.");
+                }
+            }
+            else {
+                $file->insert_adjusted_lines("$fields[0]; "
+                                             . $object->full_name
+                                             . "; $fields[$i]");
+
+                # Copy any simple case change to the special tables
+                # constructed if being overridden by a multi-character case
+                # change.
+                if ($value != $decimal_code_point) {
+                    $file->insert_adjusted_lines(sprintf("%s; _s%s; %04X",
+                                                 $fields[0],
+                                                 $object->name,
+                                                 $value));
+                }
+            }
         }
 
+        # Everything has been handled by the insert_adjusted_lines()
+        $_ = "";
+
         return;
     }
 }