t/re/fold_grind.t: Generalize for non-ASCII platforms

author Karl Williamson <public@khwilliamson.com>

Tue, 19 Mar 2013 17:32:55 +0000 (11:32 -0600)

committer Karl Williamson <khw@cpan.org>

Fri, 6 Mar 2015 04:48:24 +0000 (21:48 -0700)
author Karl Williamson <public@khwilliamson.com>
Tue, 19 Mar 2013 17:32:55 +0000 (11:32 -0600)
committer Karl Williamson <khw@cpan.org>
Fri, 6 Mar 2015 04:48:24 +0000 (21:48 -0700)
diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t

index a39affb..27f730d 100644 (file)
--- a/t/re/fold_grind.t
+++ b/t/re/fold_grind.t
@@ -26,7 +26,7 @@ use POSIX;
  
  # Special-cased characters in the .c's that we want to make sure get tested.
  my %be_sure_to_test = (
-        "\xDF" => 1, # LATIN_SMALL_LETTER_SHARP_S
+        chr utf8::unicode_to_native(0xDF) => 1, # LATIN_SMALL_LETTER_SHARP_S
          "\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S
          "\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS
          "\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS
@@ -71,7 +71,7 @@ my $skip_apparently_redundant = ! $ENV{PERL_RUN_SLOW_TESTS};
  sub range_type {
      my $ord = ord shift;
  
-    return $ASCII if $ord < 128;
+    return $ASCII if utf8::native_to_unicode($ord) < 128;
      return $Latin1 if $ord < 256;
      return $Unicode;
  }
@@ -260,7 +260,7 @@ my $file="../lib/unicore/CaseFolding.txt";
  # for), and it is in the modern format (starting in Unicode 3.1.0) and it is
  # available.  This avoids being affected by potential bugs introduced by other
  # layers of Perl
-if (ord('A') == 65
+if ($::IS_ASCII
      && pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v3.1.0
      && open my $fh, "<", $file)
  {
@@ -304,7 +304,8 @@ else {  # Here, can't use the .txt file: read the Unicode rules file and
              $adjust++;
              my @to = map { $_ + $adjust } @{$invmap_ref->[$i]};
              push @{$folds{$j}}, @to;
-            my $folded_str = pack "U0U*", @to;
+            my $folded_str = join "", map { chr } @to;
+            utf8::upgrade($folded_str);
              #note (sprintf "%d: %04X: %s", __LINE__, $j, join " ",
              #    map { sprintf "%04X", $_  + $adjust } @{$invmap_ref->[$i]});
              push @{$inverse_folds{$folded_str}}, chr $j;
@@ -389,8 +390,8 @@ foreach my $to (sort { (length $a == length $b)
  }
  
  # For each range type, test additionally a character that folds to itself
-add_test(chr 0x3A, chr 0x3A);
-add_test(chr 0xF7, chr 0xF7);
+add_test(":", ":");
+add_test(chr utf8::unicode_to_native(0xF7), chr utf8::unicode_to_native(0xF7));
  add_test(chr 0x2C7, chr 0x2C7);
  
  # To cut down on the number of tests
@@ -417,7 +418,7 @@ sub prefix {
  # It doesn't return pairs like (a, a), (b, b).  Change the slice to an array
  # to do that.  This was just to have fewer tests.
  sub pairs (@) {
-    #print __LINE__, ": ", join(" XXX ", map { sprintf "%04X", $_ } @_), "\n";
+    #print STDERR __LINE__, ": ", join(" XXX ", map { sprintf "%04X", $_ } @_), "\n";
      map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
  }
  
@@ -434,7 +435,7 @@ if($Config{d_setlocale}) {
          # legal, but since we don't know what the right answers should be,
          # skip the locale tests in that situation.
          for my $i (128 .. 255) {
-            my $char = chr($i);
+            my $char = chr(utf8::unicode_to_native($i));
              goto skip_C_locale_tests if uc($char) ne $char || lc($char) ne $char;
          }
          push @charsets, 'l';
@@ -480,23 +481,21 @@ foreach my $test (sort { numerically } keys %tests) {
      # happens to generate multi/multi, skip.
      next if @target > 1 && @pattern > 1;
  
-    # Have to convert non-utf8 chars to native char set
-    @target = map { utf8::unicode_to_native($_) } @target;
-    @pattern = map { utf8::unicode_to_native($_) } @pattern;
-
      # Get in hex form.
      my @x_target = map { sprintf "\\x{%04X}", $_ } @target;
      my @x_pattern = map { sprintf "\\x{%04X}", $_ } @pattern;
  
      my $target_above_latin1 = grep { $_ > 255 } @target;
      my $pattern_above_latin1 = grep { $_ > 255 } @pattern;
-    my $target_has_ascii = grep { $_ < 128 } @target;
-    my $pattern_has_ascii = grep { $_ < 128 } @pattern;
-    my $target_only_ascii = ! grep { $_ > 127 } @target;
-    my $pattern_only_ascii = ! grep { $_ > 127 } @pattern;
+    my $target_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @target;
+    my $pattern_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @pattern;
+    my $target_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @target;
+    my $pattern_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @pattern;
      my $target_has_latin1 = grep { $_ < 256 } @target;
-    my $target_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @target;
-    my $pattern_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @pattern;
+    my $target_has_upper_latin1
+                = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @target;
+    my $pattern_has_upper_latin1
+                = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @pattern;
      my $pattern_has_latin1 = grep { $_ < 256 } @pattern;
      my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0];
  
@@ -699,7 +698,7 @@ foreach my $test (sort { numerically } keys %tests) {
            my $lhs_str = eval qq{"$lhs"}; fail($@) if $@;
            my @rhs = @x_pattern;
            my $rhs = join "", @rhs;
-          my $should_fail = (! $uni_semantics && $ord >= 128 && $ord < 256 && ! $is_self)
+          my $should_fail = (! $uni_semantics && $ord < 256 && ! $is_self && utf8::native_to_unicode($ord) >= 128)
                              || ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii)
                              || ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1);
author	Karl Williamson <public@khwilliamson.com>
	Tue, 19 Mar 2013 17:32:55 +0000 (11:32 -0600)
committer	Karl Williamson <khw@cpan.org>
	Fri, 6 Mar 2015 04:48:24 +0000 (21:48 -0700)