Commit | Line | Data |
---|---|---|
24df86f6 RGS |
1 | #!perl |
2 | ||
3 | BEGIN { | |
4 | chdir 't' if -d 't'; | |
5 | @INC = '../lib'; | |
0214bff6 | 6 | require './test.pl'; |
24df86f6 RGS |
7 | } |
8 | ||
a0a388a1 YO |
9 | use strict; |
10 | use warnings; | |
a0a388a1 | 11 | my @tests; |
a0a388a1 | 12 | |
2f7760b5 DM |
13 | my %todo_pass = map { $_ => 1 } |
14 | qw(00DF 1E9E FB00 FB01 FB02 FB03 FB04 FB05 FB06); | |
15 | ||
a0a388a1 | 16 | my $file="../lib/unicore/CaseFolding.txt"; |
24df86f6 | 17 | open my $fh,"<",$file or die "Failed to read '$file': $!"; |
a0a388a1 YO |
18 | while (<$fh>) { |
19 | chomp; | |
20 | my ($line,$comment)= split/\s+#\s+/, $_; | |
1443f10d | 21 | my ($cp,$type,@folded)=split/[\s;]+/,$line||''; |
a0a388a1 | 22 | next unless $type and ($type eq 'F' or $type eq 'C'); |
8bfc9fab | 23 | next if $type eq 'C'; # 'C' tests now done by fold_grind.t |
1443f10d KW |
24 | my $fold_above_latin1 = grep { hex("0x$_") > 255 } @folded; |
25 | $_="\\x{$_}" for @folded; | |
a0a388a1 | 26 | my $cpv=hex("0x$cp"); |
1443f10d | 27 | my $chr="\\x{$cp}"; |
a0a388a1 | 28 | my @str; |
1443f10d | 29 | foreach my $swap (0, 1) { # swap lhs and rhs, or not. |
3366dfc6 | 30 | foreach my $charclass (0) { # Put rhs in [...], or not |
1443f10d KW |
31 | my $lhs; |
32 | my $rhs; | |
33 | if ($swap) { | |
34 | $lhs = join "", @folded; | |
35 | $rhs = $chr; | |
36 | $rhs = "[$rhs]" if $charclass; | |
37 | } else { | |
38 | $lhs = $chr; | |
39 | $rhs = ""; | |
40 | foreach my $rhs_char (@folded) { | |
41 | $rhs .= '[' if $charclass; | |
42 | $rhs .= $rhs_char; | |
43 | $rhs .= ']' if $charclass; | |
44 | } | |
45 | } | |
46 | $lhs = "\"$lhs\""; | |
47 | $rhs = "/^$rhs\$/i"; | |
24df86f6 | 48 | |
1443f10d KW |
49 | # Try both Latin1 and Unicode for code points below 256 |
50 | foreach my $upgrade ("", 'utf8::upgrade($c); ') { | |
51 | if ($upgrade) { | |
52 | next if $swap && $fold_above_latin1; | |
53 | next if !$swap && $cpv > 255; | |
54 | } | |
55 | my $eval = "my \$c = $lhs; $upgrade\$c =~ $rhs"; | |
56 | #print __LINE__, ": $eval\n"; | |
57 | push @tests, qq[ok(eval '$eval', '$eval - $comment')]; | |
58 | if (! $swap && ($cp eq '0390' || $cp eq '03B0')) { | |
59 | $tests[-1]="TODO: { local \$::TODO='[13:41] <BinGOs> cue *It is all Greek to me* joke.';\n$tests[-1] }" | |
60 | } elsif ($charclass && @folded > 1 && $swap && ! $upgrade && ! $fold_above_latin1) { | |
61 | $tests[-1]="TODO: { local \$::TODO='Multi-char, non-utf8 folded inside character class [ ] doesnt work';\n$tests[-1] }" | |
62 | } elsif (! $upgrade && $cpv >= 128 && $cpv <= 255 && $cpv != 0xb5) { | |
63 | $tests[-1]="TODO: { local \$::TODO='Most non-utf8 latin1 doesnt work';\n$tests[-1] }" | |
2f7760b5 DM |
64 | } elsif (! $swap && $charclass && @folded > 1 |
65 | && ! $todo_pass{$cp}) | |
66 | { | |
1443f10d KW |
67 | # There are a few of these that pass; most fail. |
68 | $tests[-1]="TODO: { local \$::TODO='Some multi-char, f8 folded inside character class [ ] doesnt work';\n$tests[-1] }" | |
69 | } | |
1443f10d KW |
70 | } |
71 | } | |
a0a388a1 | 72 | } |
24df86f6 | 73 | } |
2726813d | 74 | |
fad448f4 KW |
75 | # Now verify the case folding tables. First compute the mappings without |
76 | # resorting to the functions we're testing. | |
77 | ||
78 | # Initialize the array so each $i maps to itself. | |
79 | my @fold_ascii; | |
80 | for my $i (0 .. 255) { | |
81 | $fold_ascii[$i] = $i; | |
82 | } | |
83 | my @fold_latin1 = @fold_ascii; | |
84 | ||
85 | # Override the uppercase elements to fold to their lower case equivalents, | |
86 | # using the fact that 'A' in ASCII is 0x41, 'a' is 0x41+32, 'B' is 0x42, and | |
87 | # so on. The same paradigm applies for most of the Latin1 range cased | |
88 | # characters, but in posix anything outside ASCII maps to itself, as we've | |
89 | # already set up. | |
90 | for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) { | |
91 | my $upper_ord = ord_latin1_to_native($i); | |
92 | my $lower_ord = ord_latin1_to_native($i + 32); | |
93 | ||
94 | $fold_latin1[$upper_ord] = $lower_ord; | |
95 | ||
96 | next if $i > 127; | |
97 | $fold_ascii[$upper_ord] = $lower_ord; | |
98 | } | |
99 | ||
100 | # Same for folding lower to the upper equivalents | |
101 | for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) { | |
102 | my $lower_ord = ord_latin1_to_native($i); | |
103 | my $upper_ord = ord_latin1_to_native($i - 32); | |
104 | ||
105 | $fold_latin1[$lower_ord] = $upper_ord; | |
106 | ||
107 | next if $i > 127; | |
108 | $fold_ascii[$lower_ord] = $upper_ord; | |
109 | } | |
110 | ||
111 | # Test every latin1 character that the correct values in both /u and /d | |
112 | for my $i (0 .. 255) { | |
113 | my $chr = sprintf "\\x%02X", $i; | |
114 | my $hex_fold_ascii = sprintf "0x%02X", $fold_ascii[$i]; | |
115 | my $hex_fold_latin1 = sprintf "0x%02X", $fold_latin1[$i]; | |
116 | push @tests, qq[like chr($hex_fold_ascii), qr/(?d:$chr)/i, 'chr($hex_fold_ascii) =~ qr/(?d:$chr)/i']; | |
fad448f4 | 117 | push @tests, qq[like chr($hex_fold_latin1), qr/(?u:$chr)/i, 'chr($hex_fold_latin1) =~ qr/(?u:$chr)/i']; |
fad448f4 KW |
118 | } |
119 | ||
120 | ||
2726813d | 121 | push @tests, qq[like chr(0x0430), qr/[=\x{0410}-\x{0411}]/i, 'Bug #71752 Unicode /i char in a range']; |
2726813d | 122 | push @tests, qq[like 'a', qr/\\p{Upper}/i, "'a' =~ /\\\\p{Upper}/i"]; |
8951c461 | 123 | push @tests, q[my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); like $c, $p, 'Bug #78994: my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); $c =~ $p']; |
2726813d | 124 | |
7b98bc43 KW |
125 | use charnames ":full"; |
126 | push @tests, q[my $re1 = "\N{WHITE SMILING FACE}";like "\xE8", qr/[\w$re1]/, 'my $re = "\N{WHITE SMILING FACE}"; "\xE8" =~ qr/[\w$re]/']; | |
127 | push @tests, q[my $re2 = "\N{WHITE SMILING FACE}";like "\xE8", qr/\w|$re2/, 'my $re = "\N{WHITE SMILING FACE}"; "\xE8" =~ qr/\w|$re/']; | |
128 | ||
b2a1b324 | 129 | eval join ";\n","plan tests=>". (scalar @tests), @tests, "1" |
a0a388a1 YO |
130 | or die $@; |
131 | __DATA__ |