This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Add code point ages to --annotate option
authorKarl Williamson <khw@cpan.org>
Tue, 18 Aug 2015 20:47:48 +0000 (14:47 -0600)
committerKarl Williamson <khw@cpan.org>
Tue, 18 Aug 2015 21:03:35 +0000 (15:03 -0600)
This can be useful information.

charclass_invlists.h
lib/unicore/mktables
regcharclass.h

index 64406bf..8f14d31 100644 (file)
@@ -99537,7 +99537,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables
+ * ad739a46951b5f46396074b0682a2cfeed24b633a742a8e1aa0e337f69ef8b1c lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * 8a097f8f726bb1619af2f27f149ab87e60a1602f790147e3a561358be16abd27 regen/mk_invlists.pl
index 27cb45a..8153936 100644 (file)
@@ -1441,6 +1441,7 @@ sub objaddr($) {
 # after all the input has been processed.  But most can be skipped, as they
 # have the same descriptive phrases, such as being unassigned
 my @viacode;            # Contains the 1 million character names
+my @age;                # And their ages ("" if none)
 my @printable;          # boolean: And are those characters printable?
 my @annotate_char_type; # Contains a type of those characters, specifically
                         # for the purposes of annotation.
@@ -1499,12 +1500,14 @@ sub populate_char_info ($) {
             $annotate_char_type[$i] = $ABOVE_UNICODE_TYPE;
             $printable[$i] = 0;
             $end = $MAX_WORKING_CODEPOINT;
+            $age[$i] = "";
         }
         elsif ($gc-> table('Private_use')->contains($i)) {
             $viacode[$i] = 'Private Use';
             $annotate_char_type[$i] = $PRIVATE_USE_TYPE;
             $printable[$i] = 0;
             $end = $gc->table('Private_Use')->containing_range($i)->end;
+            $age[$i] = property_ref("Age")->value_of($i);
         }
         elsif ((defined ($nonchar =
                             Property::property_ref('Noncharacter_Code_Point'))
@@ -1515,11 +1518,13 @@ sub populate_char_info ($) {
             $printable[$i] = 0;
             $end = property_ref('Noncharacter_Code_Point')->table('Y')->
                                                     containing_range($i)->end;
+            $age[$i] = property_ref("Age")->value_of($i);
         }
         elsif ($gc-> table('Control')->contains($i)) {
             $viacode[$i] = property_ref('Name_Alias')->value_of($i) || 'Control';
             $annotate_char_type[$i] = $CONTROL_TYPE;
             $printable[$i] = 0;
+            $age[$i] = property_ref("Age")->value_of($i);
         }
         elsif ($gc-> table('Unassigned')->contains($i)) {
             $annotate_char_type[$i] = $UNASSIGNED_TYPE;
@@ -1540,12 +1545,14 @@ sub populate_char_info ($) {
                            $unassigned_sans_noncharacters->
                                                     containing_range($i)->end);
             }
+            $age[$i] = property_ref("Age")->value_of($i);
         }
         elsif ($perl->table('_Perl_Surrogate')->contains($i)) {
             $viacode[$i] = 'Surrogate';
             $annotate_char_type[$i] = $SURROGATE_TYPE;
             $printable[$i] = 0;
             $end = $gc->table('Surrogate')->containing_range($i)->end;
+            $age[$i] = property_ref("Age")->value_of($i);
         }
         else {
             Carp::my_carp_bug("Can't figure out how to annotate "
@@ -1561,7 +1568,18 @@ sub populate_char_info ($) {
     # appended to the name, do that.
     elsif ($annotate_char_type[$i] == $CP_IN_NAME) {
         $viacode[$i] .= sprintf("-%04X", $i);
-        $end = $perl_charname->containing_range($i)->end;
+
+        # Do all these as groups of the same age, instead of individually,
+        # because their names are so meaningless, and there are typically
+        # large quantities of them.
+        my $Age = property_ref("Age");
+        $age[$i] = $Age->value_of($i);
+        my $limit = $perl_charname->containing_range($i)->end;
+        $end = $i + 1;
+        while ($end <= $limit && $Age->value_of($end) == $age[$i]) {
+            $end++;
+        }
+        $end--;
     }
 
     # And here, has a name, but if it's a hangul syllable one, replace it with
@@ -1574,8 +1592,12 @@ sub populate_char_info ($) {
         my $T = $TBase + $SIndex % $TCount;
         $viacode[$i] = "HANGUL SYLLABLE $Jamo{$L}$Jamo{$V}";
         $viacode[$i] .= $Jamo{$T} if $T != $TBase;
+        $age[$i] = property_ref("Age")->value_of($i);
         $end = $perl_charname->containing_range($i)->end;
     }
+    else {
+        $age[$i] = property_ref("Age")->value_of($i);
+    }
 
     return if ! defined wantarray;
     return $i if ! defined $end;    # If not a range, return the input
@@ -6341,7 +6363,10 @@ END
                                 else { # Indent if not displaying code points
                                     $annotation = " " x 4;
                                 }
-                                $annotation .= " $range_name" if $range_name;
+                                if ($range_name) {
+                                    $annotation .= " $age[$i]" if $age[$i];
+                                    $annotation .= " $range_name";
+                                }
 
                                 # Include the number of code points in the
                                 # range
@@ -6418,7 +6443,7 @@ END
                                 }
 
                                 if ($include_cp) {
-                                    $annotation = sprintf "%04X", $i;
+                                    $annotation = sprintf "%04X %s", $i, $age[$i];
                                     if ($use_adjustments) {
                                         $annotation .= " => $output_value";
                                     }
index f7cd2fd..f4b6298 100644 (file)
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables
+ * ad739a46951b5f46396074b0682a2cfeed24b633a742a8e1aa0e337f69ef8b1c lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
  * d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl