This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
reg_posixcc.t: display chr(\\xXX) should be chr(0xXX)
[perl5.git] / t / re / reg_posixcc.t
1 #!perl
2
3 BEGIN {
4     chdir 't' if -d 't';
5     @INC = '../lib';
6     require './test.pl';
7 }
8
9 use strict;
10 use warnings;
11 plan "no_plan";
12
13 my @pats=(
14             "\\w",
15             "\\W",
16             "\\s",
17             "\\S",
18             "\\d",
19             "\\D",
20             "\\h",
21             "\\H",
22             "\\v",
23             "\\V",
24             "[:alnum:]",
25             "[:^alnum:]",
26             "[:alpha:]",
27             "[:^alpha:]",
28             "[:ascii:]",
29             "[:^ascii:]",
30             "[:cntrl:]",
31             "[:^cntrl:]",
32             "[:graph:]",
33             "[:^graph:]",
34             "[:lower:]",
35             "[:^lower:]",
36             "[:print:]",
37             "[:^print:]",
38             "[:punct:]",
39             "[:^punct:]",
40             "[:upper:]",
41             "[:^upper:]",
42             "[:xdigit:]",
43             "[:^xdigit:]",
44             "[:space:]",
45             "[:^space:]",
46             "[:blank:]",
47             "[:^blank:]" );
48
49 sub rangify {
50     my $ary= shift;
51     my $fmt= shift || '%d';
52     my $sep= shift || ' ';
53     my $rng= shift || '..';
54     
55     
56     my $first= $ary->[0];
57     my $last= $ary->[0];
58     my $ret= sprintf $fmt, $first;
59     for my $idx (1..$#$ary) {
60         if ( $ary->[$idx] != $last + 1) {
61             if ($last!=$first) {
62                 $ret.=sprintf "%s$fmt",$rng, $last;
63             }             
64             $first= $last= $ary->[$idx];
65             $ret.=sprintf "%s$fmt",$sep,$first;
66          } else {
67             $last= $ary->[$idx];
68          }
69     }
70     if ( $last != $first) {
71         $ret.=sprintf "%s$fmt",$rng, $last;
72     }
73     return $ret;
74 }
75
76 # The bug is only fixed for /u
77 use feature 'unicode_strings';
78
79 my $description = "";
80 while (@pats) {
81     my ($yes,$no)= splice @pats,0,2;
82     
83     my %err_by_type;
84     my %singles;
85     my %complements;
86     foreach my $b (0..255) {
87         my %got;
88         my $display_b = sprintf("0x%02X", $b);
89         for my $type ('utf8','not-utf8') {
90             my $str=chr($b).chr($b);
91             if ($type eq 'utf8') {
92                 $str.=chr(256);
93                 chop $str;
94             }
95             if ($str=~/[$yes][$no]/){
96                 unlike($str,qr/[$yes][$no]/,
97                     "chr($display_b) X 2 =~/[$yes][$no]/ should not match under $type");
98                 push @{$err_by_type{$type}},$b;
99             }
100             $got{"[$yes]"}{$type} = $str=~/[$yes]/ ? 1 : 0;
101             $got{"[$no]"}{$type} = $str=~/[$no]/ ? 1 : 0;
102             $got{"[^$yes]"}{$type} = $str=~/[^$yes]/ ? 1 : 0;
103             $got{"[^$no]"}{$type} = $str=~/[^$no]/ ? 1 : 0;
104
105             # For \w, \s, and \d, \h, \v, also test without being in character
106             # classes.
107             next if $yes =~ /\[/;
108
109             # The rest of this .t was written when there were many test
110             # failures, so it goes to some lengths to summarize things.  Now
111             # those are fixed, so these missing tests just do standard
112             # procedures
113
114             my $chr = chr($b);
115             utf8::upgrade $chr if $type eq 'utf8';
116             ok (($chr =~ /$yes/) != ($chr =~ /$no/),
117                 "$type: chr($display_b) isn't both $yes and $no");
118         }
119         foreach my $which ("[$yes]","[$no]","[^$yes]","[^$no]") {
120             if ($got{$which}{'utf8'} != $got{$which}{'not-utf8'}){
121                 is($got{$which}{'utf8'},$got{$which}{'not-utf8'},
122                     "chr($display_b) X 2=~ /$which/ should have the same results regardless of internal string encoding");
123                 push @{$singles{$which}},$b;
124             }
125         }
126         foreach my $which ($yes,$no) {
127             foreach my $strtype ('utf8','not-utf8') {
128                 if ($got{"[$which]"}{$strtype} == $got{"[^$which]"}{$strtype}) {
129                     isnt($got{"[$which]"}{$strtype},$got{"[^$which]"}{$strtype},
130                         "chr($display_b) X 2 =~ /[$which]/ should not have the same result as chr($display_b)=~/[^$which]/");
131                     push @{$complements{$which}{$strtype}},$b;
132                 }
133             }
134         }
135     }
136     
137     
138     if (%err_by_type || %singles || %complements) {
139         $description||=" Error:\n";
140         $description .= "/[$yes][$no]/\n";
141         if (%err_by_type) {
142             foreach my $type (sort keys %err_by_type) {
143                 $description .= "\tmatches $type codepoints:\t";
144                 $description .= rangify($err_by_type{$type});
145                 $description .= "\n";
146             }
147             $description .= "\n";
148         }
149         if (%singles) {
150             $description .= "Unicode/Nonunicode mismatches:\n";
151             foreach my $type (sort keys %singles) {
152                 $description .= "\t$type:\t";
153                 $description .= rangify($singles{$type});
154                 $description .= "\n";
155             }
156             $description .= "\n";
157         }
158         if (%complements) {
159             foreach my $class (sort keys %complements) {
160                 foreach my $strtype (sort keys %{$complements{$class}}) {
161                     $description .= "\t$class has complement failures under $strtype for:\t";
162                     $description .= rangify($complements{$class}{$strtype});
163                     $description .= "\n";
164                 }
165             }
166         }
167     }
168 }
169 __DATA__