This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regen/mk_invlists.pl: Handle EBCDIC
authorKarl Williamson <khw@cpan.org>
Thu, 22 Mar 2018 20:25:44 +0000 (14:25 -0600)
committerKarl Williamson <khw@cpan.org>
Mon, 26 Mar 2018 22:26:54 +0000 (16:26 -0600)
The code and comment here were out-of-date.  When we are constructing
EBCDIC we need to modify the code points that are the result of a case
change that expands to more than one code point.  This code is not
currently in used.

charclass_invlists.h
regen/mk_invlists.pl

index 1acfa85..9ff8d52 100644 (file)
@@ -109391,5 +109391,5 @@ static const U8 WB_table[24][24] = {
  * ea4dc61a00d2db9bd46f3ddec706b5b7b11e8fcf848fb384b54b507fb70d8e90 lib/unicore/mktables
  * 21653d2744fdd071f9ef138c805393901bb9547cf3e777ebf50215a191f986ea lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
- * a9bc2e7a8c910b4065e21e30dd2c2976b09a47108ba10cf5c01e8a0dd61efd7e regen/mk_invlists.pl
+ * 9f2870ea71fbbbca07abf885c9e9c5d555facf587ca105420f48ca3fb9bbfb15 regen/mk_invlists.pl
  * ex: set ro: */
index 38856e5..6327ed5 100644 (file)
@@ -1928,7 +1928,7 @@ for my $charset (get_supported_code_pages()) {
                     @invmap = @$map_ref;
                     $map_format = $format;
                     $map_default = $default;
-                    $maps_to_code_point = $map_format =~ /x/;
+                    $maps_to_code_point = $map_format =~ / a ($ | [^r] ) /x;
                     $to_adjust = $map_format =~ /a/;
                 }
             }
@@ -2032,12 +2032,9 @@ for my $charset (get_supported_code_pages()) {
                     # A hash key can't be a ref (we are only expecting arrays
                     # of scalars here), so convert any such to a string that
                     # will be converted back later (using a vertical tab as
-                    # the separator).  Even if the mapping is to code points,
-                    # we don't translate to native here because the code
-                    # output_invmap() calls to output these arrays assumes the
-                    # input is Unicode, not native.
+                    # the separator).
                     if (ref $invmap[0]) {
-                        $bucket = join "\cK", @{$invmap[0]};
+                        $bucket = join "\cK", map { a2n($_) }  @{$invmap[0]};
                     }
                     elsif ($maps_to_code_point && $invmap[0] =~ $numeric_re) {