This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Replaced 'unlink' with 'unlink_all' in t/op/magic.t
[perl5.git] / t / re / reg_fold.t
CommitLineData
24df86f6
RGS
1#!perl
2
3BEGIN {
4 chdir 't' if -d 't';
5 @INC = '../lib';
0214bff6 6 require './test.pl';
24df86f6
RGS
7}
8
a0a388a1
YO
9use strict;
10use warnings;
a0a388a1 11my @tests;
a0a388a1 12
2f7760b5
DM
13my %todo_pass = map { $_ => 1 }
14 qw(00DF 1E9E FB00 FB01 FB02 FB03 FB04 FB05 FB06);
15
a0a388a1 16my $file="../lib/unicore/CaseFolding.txt";
24df86f6 17open my $fh,"<",$file or die "Failed to read '$file': $!";
a0a388a1
YO
18while (<$fh>) {
19 chomp;
20 my ($line,$comment)= split/\s+#\s+/, $_;
1443f10d 21 my ($cp,$type,@folded)=split/[\s;]+/,$line||'';
a0a388a1 22 next unless $type and ($type eq 'F' or $type eq 'C');
8bfc9fab 23 next if $type eq 'C'; # 'C' tests now done by fold_grind.t
1443f10d
KW
24 my $fold_above_latin1 = grep { hex("0x$_") > 255 } @folded;
25 $_="\\x{$_}" for @folded;
a0a388a1 26 my $cpv=hex("0x$cp");
1443f10d 27 my $chr="\\x{$cp}";
a0a388a1 28 my @str;
1443f10d 29 foreach my $swap (0, 1) { # swap lhs and rhs, or not.
3366dfc6 30 foreach my $charclass (0) { # Put rhs in [...], or not
1443f10d
KW
31 my $lhs;
32 my $rhs;
33 if ($swap) {
34 $lhs = join "", @folded;
35 $rhs = $chr;
36 $rhs = "[$rhs]" if $charclass;
37 } else {
38 $lhs = $chr;
39 $rhs = "";
40 foreach my $rhs_char (@folded) {
41 $rhs .= '[' if $charclass;
42 $rhs .= $rhs_char;
43 $rhs .= ']' if $charclass;
44 }
45 }
46 $lhs = "\"$lhs\"";
47 $rhs = "/^$rhs\$/i";
24df86f6 48
1443f10d
KW
49 # Try both Latin1 and Unicode for code points below 256
50 foreach my $upgrade ("", 'utf8::upgrade($c); ') {
51 if ($upgrade) {
52 next if $swap && $fold_above_latin1;
53 next if !$swap && $cpv > 255;
54 }
55 my $eval = "my \$c = $lhs; $upgrade\$c =~ $rhs";
56 #print __LINE__, ": $eval\n";
57 push @tests, qq[ok(eval '$eval', '$eval - $comment')];
58 if (! $swap && ($cp eq '0390' || $cp eq '03B0')) {
59 $tests[-1]="TODO: { local \$::TODO='[13:41] <BinGOs> cue *It is all Greek to me* joke.';\n$tests[-1] }"
60 } elsif ($charclass && @folded > 1 && $swap && ! $upgrade && ! $fold_above_latin1) {
61 $tests[-1]="TODO: { local \$::TODO='Multi-char, non-utf8 folded inside character class [ ] doesnt work';\n$tests[-1] }"
62 } elsif (! $upgrade && $cpv >= 128 && $cpv <= 255 && $cpv != 0xb5) {
63 $tests[-1]="TODO: { local \$::TODO='Most non-utf8 latin1 doesnt work';\n$tests[-1] }"
2f7760b5
DM
64 } elsif (! $swap && $charclass && @folded > 1
65 && ! $todo_pass{$cp})
66 {
1443f10d
KW
67 # There are a few of these that pass; most fail.
68 $tests[-1]="TODO: { local \$::TODO='Some multi-char, f8 folded inside character class [ ] doesnt work';\n$tests[-1] }"
69 }
1443f10d
KW
70 }
71 }
a0a388a1 72 }
24df86f6 73}
2726813d 74
fad448f4
KW
75# Now verify the case folding tables. First compute the mappings without
76# resorting to the functions we're testing.
77
78# Initialize the array so each $i maps to itself.
79my @fold_ascii;
80for my $i (0 .. 255) {
81 $fold_ascii[$i] = $i;
82}
83my @fold_latin1 = @fold_ascii;
84
85# Override the uppercase elements to fold to their lower case equivalents,
86# using the fact that 'A' in ASCII is 0x41, 'a' is 0x41+32, 'B' is 0x42, and
87# so on. The same paradigm applies for most of the Latin1 range cased
88# characters, but in posix anything outside ASCII maps to itself, as we've
89# already set up.
90for my $i (0x41 .. 0x5A, 0xC0 .. 0xD6, 0xD8 .. 0xDE) {
91 my $upper_ord = ord_latin1_to_native($i);
92 my $lower_ord = ord_latin1_to_native($i + 32);
93
94 $fold_latin1[$upper_ord] = $lower_ord;
95
96 next if $i > 127;
97 $fold_ascii[$upper_ord] = $lower_ord;
98}
99
100# Same for folding lower to the upper equivalents
101for my $i (0x61 .. 0x7A, 0xE0 .. 0xF6, 0xF8 .. 0xFE) {
102 my $lower_ord = ord_latin1_to_native($i);
103 my $upper_ord = ord_latin1_to_native($i - 32);
104
105 $fold_latin1[$lower_ord] = $upper_ord;
106
107 next if $i > 127;
108 $fold_ascii[$lower_ord] = $upper_ord;
109}
110
111# Test every latin1 character that the correct values in both /u and /d
112for my $i (0 .. 255) {
113 my $chr = sprintf "\\x%02X", $i;
114 my $hex_fold_ascii = sprintf "0x%02X", $fold_ascii[$i];
115 my $hex_fold_latin1 = sprintf "0x%02X", $fold_latin1[$i];
116 push @tests, qq[like chr($hex_fold_ascii), qr/(?d:$chr)/i, 'chr($hex_fold_ascii) =~ qr/(?d:$chr)/i'];
fad448f4 117 push @tests, qq[like chr($hex_fold_latin1), qr/(?u:$chr)/i, 'chr($hex_fold_latin1) =~ qr/(?u:$chr)/i'];
fad448f4
KW
118}
119
120
2726813d 121push @tests, qq[like chr(0x0430), qr/[=\x{0410}-\x{0411}]/i, 'Bug #71752 Unicode /i char in a range'];
2726813d 122push @tests, qq[like 'a', qr/\\p{Upper}/i, "'a' =~ /\\\\p{Upper}/i"];
8951c461 123push @tests, q[my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); like $c, $p, 'Bug #78994: my $c = "\x{212A}"; my $p = qr/(?:^[\x{004B}_]+$)/i; utf8::upgrade($p); $c =~ $p'];
2726813d 124
7b98bc43
KW
125use charnames ":full";
126push @tests, q[my $re1 = "\N{WHITE SMILING FACE}";like "\xE8", qr/[\w$re1]/, 'my $re = "\N{WHITE SMILING FACE}"; "\xE8" =~ qr/[\w$re]/'];
127push @tests, q[my $re2 = "\N{WHITE SMILING FACE}";like "\xE8", qr/\w|$re2/, 'my $re = "\N{WHITE SMILING FACE}"; "\xE8" =~ qr/\w|$re/'];
128
b2a1b324 129eval join ";\n","plan tests=>". (scalar @tests), @tests, "1"
a0a388a1
YO
130 or die $@;
131__DATA__