# Special-cased characters in the .c's that we want to make sure get tested.
my %be_sure_to_test = (
- "\xDF" => 1, # LATIN_SMALL_LETTER_SHARP_S
+ chr utf8::unicode_to_native(0xDF) => 1, # LATIN_SMALL_LETTER_SHARP_S
"\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S
"\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS
"\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS
sub range_type {
my $ord = ord shift;
- return $ASCII if $ord < 128;
+ return $ASCII if utf8::native_to_unicode($ord) < 128;
return $Latin1 if $ord < 256;
return $Unicode;
}
# for), and it is in the modern format (starting in Unicode 3.1.0) and it is
# available. This avoids being affected by potential bugs introduced by other
# layers of Perl
-if (ord('A') == 65
+if ($::IS_ASCII
&& pack("C*", split /\./, Unicode::UCD::UnicodeVersion()) ge v3.1.0
&& open my $fh, "<", $file)
{
$adjust++;
my @to = map { $_ + $adjust } @{$invmap_ref->[$i]};
push @{$folds{$j}}, @to;
- my $folded_str = pack "U0U*", @to;
+ my $folded_str = join "", map { chr } @to;
+ utf8::upgrade($folded_str);
#note (sprintf "%d: %04X: %s", __LINE__, $j, join " ",
# map { sprintf "%04X", $_ + $adjust } @{$invmap_ref->[$i]});
push @{$inverse_folds{$folded_str}}, chr $j;
}
# For each range type, test additionally a character that folds to itself
-add_test(chr 0x3A, chr 0x3A);
-add_test(chr 0xF7, chr 0xF7);
+add_test(":", ":");
+add_test(chr utf8::unicode_to_native(0xF7), chr utf8::unicode_to_native(0xF7));
add_test(chr 0x2C7, chr 0x2C7);
# To cut down on the number of tests
# It doesn't return pairs like (a, a), (b, b). Change the slice to an array
# to do that. This was just to have fewer tests.
sub pairs (@) {
- #print __LINE__, ": ", join(" XXX ", map { sprintf "%04X", $_ } @_), "\n";
+ #print STDERR __LINE__, ": ", join(" XXX ", map { sprintf "%04X", $_ } @_), "\n";
map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
}
# legal, but since we don't know what the right answers should be,
# skip the locale tests in that situation.
for my $i (128 .. 255) {
- my $char = chr($i);
+ my $char = chr(utf8::unicode_to_native($i));
goto skip_C_locale_tests if uc($char) ne $char || lc($char) ne $char;
}
push @charsets, 'l';
# happens to generate multi/multi, skip.
next if @target > 1 && @pattern > 1;
- # Have to convert non-utf8 chars to native char set
- @target = map { utf8::unicode_to_native($_) } @target;
- @pattern = map { utf8::unicode_to_native($_) } @pattern;
-
# Get in hex form.
my @x_target = map { sprintf "\\x{%04X}", $_ } @target;
my @x_pattern = map { sprintf "\\x{%04X}", $_ } @pattern;
my $target_above_latin1 = grep { $_ > 255 } @target;
my $pattern_above_latin1 = grep { $_ > 255 } @pattern;
- my $target_has_ascii = grep { $_ < 128 } @target;
- my $pattern_has_ascii = grep { $_ < 128 } @pattern;
- my $target_only_ascii = ! grep { $_ > 127 } @target;
- my $pattern_only_ascii = ! grep { $_ > 127 } @pattern;
+ my $target_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @target;
+ my $pattern_has_ascii = grep { utf8::native_to_unicode($_) < 128 } @pattern;
+ my $target_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @target;
+ my $pattern_only_ascii = ! grep { utf8::native_to_unicode($_) > 127 } @pattern;
my $target_has_latin1 = grep { $_ < 256 } @target;
- my $target_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @target;
- my $pattern_has_upper_latin1 = grep { $_ < 256 && $_ > 127 } @pattern;
+ my $target_has_upper_latin1
+ = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @target;
+ my $pattern_has_upper_latin1
+ = grep { $_ < 256 && utf8::native_to_unicode($_) > 127 } @pattern;
my $pattern_has_latin1 = grep { $_ < 256 } @pattern;
my $is_self = @target == 1 && @pattern == 1 && $target[0] == $pattern[0];
my $lhs_str = eval qq{"$lhs"}; fail($@) if $@;
my @rhs = @x_pattern;
my $rhs = join "", @rhs;
- my $should_fail = (! $uni_semantics && $ord >= 128 && $ord < 256 && ! $is_self)
+ my $should_fail = (! $uni_semantics && $ord < 256 && ! $is_self && utf8::native_to_unicode($ord) >= 128)
|| ($charset eq 'aa' && $target_has_ascii != $pattern_has_ascii)
|| ($charset eq 'l' && $target_has_latin1 != $pattern_has_latin1);