t/re/reg_posixcc.t

   1 #!perl
   2
   3 BEGIN {
   4     chdir 't' if -d 't';
   5     @INC = '../lib';
   6     require './test.pl';
   7 }
   8
   9 use strict;
  10 use warnings;
  11 plan "no_plan";
  12
  13 my @pats=(
  14             "\\w",
  15             "\\W",
  16             "\\s",
  17             "\\S",
  18             "\\d",
  19             "\\D",
  20             "[:alnum:]",
  21             "[:^alnum:]",
  22             "[:alpha:]",
  23             "[:^alpha:]",
  24             "[:ascii:]",
  25             "[:^ascii:]",
  26             "[:cntrl:]",
  27             "[:^cntrl:]",
  28             "[:graph:]",
  29             "[:^graph:]",
  30             "[:lower:]",
  31             "[:^lower:]",
  32             "[:print:]",
  33             "[:^print:]",
  34             "[:punct:]",
  35             "[:^punct:]",
  36             "[:upper:]",
  37             "[:^upper:]",
  38             "[:xdigit:]",
  39             "[:^xdigit:]",
  40             "[:space:]",
  41             "[:^space:]",
  42             "[:blank:]",
  43             "[:^blank:]" );
  44
  45 sub rangify {
  46     my $ary= shift;
  47     my $fmt= shift || '%d';
  48     my $sep= shift || ' ';
  49     my $rng= shift || '..';
  50
  51
  52     my $first= $ary->[0];
  53     my $last= $ary->[0];
  54     my $ret= sprintf $fmt, $first;
  55     for my $idx (1..$#$ary) {
  56         if ( $ary->[$idx] != $last + 1) {
  57             if ($last!=$first) {
  58                 $ret.=sprintf "%s$fmt",$rng, $last;
  59             }
  60             $first= $last= $ary->[$idx];
  61             $ret.=sprintf "%s$fmt",$sep,$first;
  62          } else {
  63             $last= $ary->[$idx];
  64          }
  65     }
  66     if ( $last != $first) {
  67         $ret.=sprintf "%s$fmt",$rng, $last;
  68     }
  69     return $ret;
  70 }
  71
  72 # The bug is only fixed for /u
  73 use feature 'unicode_strings';
  74
  75 my $description = "";
  76 while (@pats) {
  77     my ($yes,$no)= splice @pats,0,2;
  78
  79     my %err_by_type;
  80     my %singles;
  81     my %complements;
  82     foreach my $b (0..255) {
  83         my %got;
  84         my $display_b = sprintf("\\x%02X", $b);
  85         for my $type ('unicode','not-unicode') {
  86             my $str=chr($b).chr($b);
  87             if ($type eq 'unicode') {
  88                 $str.=chr(256);
  89                 chop $str;
  90             }
  91             if ($str=~/[$yes][$no]/){
  92                 unlike($str,qr/[$yes][$no]/,
  93                     "chr($display_b) X 2 =~/[$yes][$no]/ should not match under $type");
  94                 push @{$err_by_type{$type}},$b;
  95             }
  96             $got{"[$yes]"}{$type} = $str=~/[$yes]/ ? 1 : 0;
  97             $got{"[$no]"}{$type} = $str=~/[$no]/ ? 1 : 0;
  98             $got{"[^$yes]"}{$type} = $str=~/[^$yes]/ ? 1 : 0;
  99             $got{"[^$no]"}{$type} = $str=~/[^$no]/ ? 1 : 0;
 100
 101             # For \w, \s, and \d, also test without being in character
 102             # classes.
 103             next if $yes =~ /\[/;
 104
 105             # The rest of this .t was written when there were many test
 106             # failures, so it goes to some lengths to summarize things.  Now
 107             # those are fixed, so these missing tests just do standard
 108             # procedures
 109
 110             my $chr = chr($b);
 111             utf8::upgrade $chr if $type eq 'unicode';
 112             ok (($chr =~ /$yes/) != ($chr =~ /$no/),
 113                 "$type: chr($display_b) isn't both $yes and $no");
 114         }
 115         foreach my $which ("[$yes]","[$no]","[^$yes]","[^$no]") {
 116             if ($got{$which}{'unicode'} != $got{$which}{'not-unicode'}){
 117                 is($got{$which}{'unicode'},$got{$which}{'not-unicode'},
 118                     "chr($display_b) X 2=~ /$which/ should have the same results regardless of internal string encoding");
 119                 push @{$singles{$which}},$b;
 120             }
 121         }
 122         foreach my $which ($yes,$no) {
 123             foreach my $strtype ('unicode','not-unicode') {
 124                 if ($got{"[$which]"}{$strtype} == $got{"[^$which]"}{$strtype}) {
 125                     isnt($got{"[$which]"}{$strtype},$got{"[^$which]"}{$strtype},
 126                         "chr($display_b) X 2 =~ /[$which]/ should not have the same result as chr($display_b)=~/[^$which]/");
 127                     push @{$complements{$which}{$strtype}},$b;
 128                 }
 129             }
 130         }
 131     }
 132
 133
 134     if (%err_by_type || %singles || %complements) {
 135         $description||=" Error:\n";
 136         $description .= "/[$yes][$no]/\n";
 137         if (%err_by_type) {
 138             foreach my $type (sort keys %err_by_type) {
 139                 $description .= "\tmatches $type codepoints:\t";
 140                 $description .= rangify($err_by_type{$type});
 141                 $description .= "\n";
 142             }
 143             $description .= "\n";
 144         }
 145         if (%singles) {
 146             $description .= "Unicode/Nonunicode mismatches:\n";
 147             foreach my $type (sort keys %singles) {
 148                 $description .= "\t$type:\t";
 149                 $description .= rangify($singles{$type});
 150                 $description .= "\n";
 151             }
 152             $description .= "\n";
 153         }
 154         if (%complements) {
 155             foreach my $class (sort keys %complements) {
 156                 foreach my $strtype (sort keys %{$complements{$class}}) {
 157                     $description .= "\t$class has complement failures under $strtype for:\t";
 158                     $description .= rangify($complements{$class}{$strtype});
 159                     $description .= "\n";
 160                 }
 161             }
 162         }
 163     }
 164 }
 165 __DATA__