t/re/pat.t: Generalize for EBCDIC

author Karl Williamson <public@khwilliamson.com>

Thu, 4 Apr 2013 02:15:17 +0000 (20:15 -0600)

committer Karl Williamson <khw@cpan.org>

Fri, 6 Mar 2015 04:48:26 +0000 (21:48 -0700)
author Karl Williamson <public@khwilliamson.com>
Thu, 4 Apr 2013 02:15:17 +0000 (20:15 -0600)
committer Karl Williamson <khw@cpan.org>
Fri, 6 Mar 2015 04:48:26 +0000 (21:48 -0700)
diff --git a/t/re/pat.t b/t/re/pat.t

index 5dad5ef..a60bfe5 100644 (file)
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -17,7 +17,7 @@ BEGIN {
      chdir 't' if -d 't';
      @INC = ('../lib','.','../ext/re');
      require Config; import Config;
-    require './test.pl';
+    require './test.pl'; require './charset_tools.pl';
      skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
      skip_all_without_unicode_tables();
  }
@@ -1099,7 +1099,7 @@ sub run_tests {
      }
      {
          # we are actually testing that we dont die when executing these patterns
-        my $e = "B\x{f6}ck";
+        my $e = "B" . uni_to_native("\x{f6}") . "ck";
          ok(!utf8::is_utf8($e), "got a latin string - rt75680");
  
          ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680");
@@ -1126,13 +1126,10 @@ sub run_tests {
  
      }
  
-    SKIP: {   # Some constructs with Latin1 characters cause a utf8 string not
-              # to match itself in non-utf8
-        if ($::IS_EBCDIC) {
-            skip "Needs to be customized to run on EBCDIC", 6;
-        }
-        my $c = "\xc0";
-        my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/;
+    {   # Some constructs with Latin1 characters cause a utf8 string not
+        # to match itself in non-utf8
+        my $c = uni_to_native("\xc0");
+        my $pattern = my $utf8_pattern = qr/(($c)+,?)/;
          utf8::upgrade($utf8_pattern);
          ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8";
          ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8";
@@ -1145,13 +1142,10 @@ sub run_tests {
          ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8";
      }
  
-    SKIP: {   # Make sure can override the formatting
-        if ($::IS_EBCDIC) {
-            skip "Needs to be customized to run on EBCDIC", 2;
-        }
+    {   # Make sure can override the formatting
          use feature 'unicode_strings';
-        ok "\xc0" =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/';
-        ok "\xc0" !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/';
+        ok uni_to_native("\xc0") =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/';
+        ok uni_to_native("\xc0") !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/';
      }
  
      {
@@ -1294,7 +1288,8 @@ EOP
  
      { # [perl #111174]
          use re '/u';
-        like "\xe0", qr/(?i:\xc0)/, "(?i: shouldn't lose the passed in /u";
+        my $A_grave = uni_to_native("\xc0");
+        like uni_to_native("\xe0"), qr/(?i:$A_grave)/, "(?i: shouldn't lose the passed in /u";
          use re '/a';
          unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a";
          use re '/aa';
@@ -1306,7 +1301,7 @@ EOP
         # consider the UTF8ness of the previous and current pattern
         # string, as well as the physical bytes of the pattern string
  
-       for my $s ("\xc4\x80", "\x{100}") {
+       for my $s (byte_utf8a_to_utf8n("\xc4\x80"), "\x{100}") {
             ok($s =~ /^$s$/, "re-compile check is UTF8-aware");
         }
      }
@@ -1365,11 +1360,12 @@ EOP
          # EXACTish node
          ;
          ;
-        ok("\x{017F}\x{017F}" =~ qr/^[\x{00DF}]?$/i, "[] to EXACTish optimization");
+        my $sharp_s = uni_to_native("\xdf");
+        ok("\x{017F}\x{017F}" =~ qr/^[$sharp_s]?$/i, "[] to EXACTish optimization");
      }
  
      {
-        for my $char (":", "\x{f7}", "\x{2010}") {
+        for my $char (":", uni_to_native("\x{f7}"), "\x{2010}") {
              my $utf8_char = $char;
              utf8::upgrade($utf8_char);
              my $display = $char;
@@ -1518,10 +1514,10 @@ EOP
          # Need to use eval, because tries to compile on ASCII platforms even
          # though the tests are skipped, and fails because 0x89-j is an illegal
          # range there.
-        like("\x89", eval "qr/[\x{89}-j]/", '"\x89" should match [\x{89}-j]');
-        like("\x8A", eval "qr/[\x{89}-j]/", '"\x8A" should match [\x{89}-j]');
-        like("\x90", eval "qr/[\x{89}-j]/", '"\x90" should match [\x{89}-j]');
-        like("\x91", eval "qr/[\x{89}-j]/", '"\x91" should match [\x{89}-j]');
+        like("\x89", eval 'qr/[\x{89}-j]/', '"\x89" should match [\x{89}-j]');
+        like("\x8A", eval 'qr/[\x{89}-j]/', '"\x8A" should match [\x{89}-j]');
+        like("\x90", eval 'qr/[\x{89}-j]/', '"\x90" should match [\x{89}-j]');
+        like("\x91", eval 'qr/[\x{89}-j]/', '"\x91" should match [\x{89}-j]');
      }
  
      # These are based on looking at the code in regcomp.c
@@ -1549,7 +1545,9 @@ EOP
          like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works";
      }
  
-    {
+    SKIP: {
+        skip("Tests are ASCII-centric, some would fail on EBCDIC", 12) if $::IS_EBCDIC;
+
          # Verify that the very last Latin-1 U+00FF
          # (LATIN SMALL LETTER Y WITH DIAERESIS)
          # and its UPPER counterpart (U+0178 which is pure Unicode),
author	Karl Williamson <public@khwilliamson.com>
	Thu, 4 Apr 2013 02:15:17 +0000 (20:15 -0600)
committer	Karl Williamson <khw@cpan.org>
	Fri, 6 Mar 2015 04:48:26 +0000 (21:48 -0700)