This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Change named sequences to 5 digits
authorKarl Williamson <khw@cpan.org>
Fri, 20 Mar 2020 04:02:38 +0000 (22:02 -0600)
committerKarl Williamson <khw@cpan.org>
Fri, 20 Mar 2020 13:44:30 +0000 (07:44 -0600)
This makes them correspond to names for single characters, and will make
parsing easier in the next commits.

charclass_invlists.h
lib/unicore/mktables
lib/unicore/uni_keywords.pl
regcharclass.h
uni_keywords.h

index 1e4ecd5..7acc01f 100644 (file)
@@ -419864,7 +419864,7 @@ static const U8 WB_table[23][23] = {
  * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
  * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
  * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+ * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
  * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
index b601d06..456e6d0 100644 (file)
@@ -12164,15 +12164,12 @@ sub process_NamedSequences($file) {
             next;
         }
 
-        # Code points below 0x0100 need to be converted to native
-        $sequence =~ s{ \b 00 ( [0-9A-F]{2} ) \b }
-                      { sprintf("%04X", utf8::unicode_to_native(hex $1)) }gxe
-                                                        if NON_ASCII_PLATFORM;
-
-        # Note single \t in keeping with special output format of
-        # Perl_charnames.  But it turns out that the code points don't have to
-        # be 5 digits long, like the rest, based on the internal workings of
-        # charnames.pm.  This could be easily changed for consistency.
+        # Code points need to be 5 digits long like the other entries in
+        # Name.pl, for regcomp.c parsing; and the ones below 0x0100 need to be
+        # converted to native
+        $sequence = join " ", map { sprintf("%05X",
+                                    utf8::unicode_to_native(hex $_))
+                                  } split / /, $sequence;
         push @named_sequences, "$sequence\n$name\n";
     }
     return;
index 4b42de2..0d3291e 100644 (file)
 # baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
 # 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
 # 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
-# 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+# d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
 # 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
 # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
 # 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
index ce81ea5..40211cc 100644 (file)
  * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
  * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
  * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+ * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
  * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * f9a393e7add8c7c2728356473ce5b52246d51295b2da0c48fb6f0aa21799e2bb regen/regcharclass.pl
index c5744ae..ea20c36 100644 (file)
@@ -7537,7 +7537,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
  * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
  * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
  * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+ * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
  * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
  * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
  * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl