This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
re/uniprops: Fix EBCDIC issue
authorKarl Williamson <khw@cpan.org>
Fri, 29 Jan 2016 04:31:36 +0000 (21:31 -0700)
committerKarl Williamson <khw@cpan.org>
Wed, 3 Feb 2016 17:34:23 +0000 (10:34 -0700)
Things like qr/\s/ are expecting native code points, not EBCDIC.

charclass_invlists.h
lib/unicore/mktables
regcharclass.h

index d533154..a362530 100644 (file)
@@ -87887,7 +87887,7 @@ static const U8 WB_table[19][19] = {
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * b99d791e3e9edab7a3e2016943081585b069aa9c18447b3cdf3137b97f453f7c lib/unicore/mktables
+ * 7baa3c79b0ac81279720b4871737ab448d7ddd1bfad31b981437ce49c1292535 lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * 5774f77d07a81945b6a679ecce07ad90cdb334f3fb402ff63bdbecd2ec67da05 regen/mk_invlists.pl
index 8cb5f0f..83333dc 100644 (file)
@@ -19689,10 +19689,11 @@ my $WB_Extend_or_Format_re = eval 'qr/[\p{WB=Extend}\p{WB=Format}]/';
 sub _test_break($$) {
     # Test various break property matches.  The 2nd parameter gives the
     # property name.  The input is a line from auxiliary/*Test.txt for the
-    # given property.  Each such line is a sequence of code points given by
-    # their hex numbers, separated by the two characters defined just before
-    # this subroutine that indicate that either there can or cannot be a break
-    # between the adjacent code points.  All these are tested.
+    # given property.  Each such line is a sequence of Unicode (not native)
+    # code points given by their hex numbers, separated by the two characters
+    # defined just before this subroutine that indicate that either there can
+    # or cannot be a break between the adjacent code points.  All these are
+    # tested.
     #
     # For the gcb property extra tests are made.  if there isn't a break, that
     # means the sequence forms an extended grapheme cluster, which means that
@@ -19736,8 +19737,8 @@ sub _test_break($$) {
             # non-breaks.
             next if $line[$i+1] =~ /$nobreak/;
 
-            my $lhs = chr hex $line[$i];
-            my $rhs = chr hex $line[$i+2];
+            my $lhs = chr utf8::unicode_to_native(hex $line[$i]);
+            my $rhs = chr utf8::unicode_to_native(hex $line[$i+2]);
 
             # And it only affects adjacent space characters.
             next if $lhs !~ /\s/u;
index 217837e..99b2fda 100644 (file)
  * 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
  * 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
  * a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * b99d791e3e9edab7a3e2016943081585b069aa9c18447b3cdf3137b97f453f7c lib/unicore/mktables
+ * 7baa3c79b0ac81279720b4871737ab448d7ddd1bfad31b981437ce49c1292535 lib/unicore/mktables
  * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
  * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
  * d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl