This is getting embarrassing.

[perl5.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index 4e91b62..19ec634 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -4,16 +4,16 @@
  # the format supported by op/regexp.t.  If you want to add a test
  # that does fit that format, add it to op/re_tests, not here.
  
-print "1..244\n";
+$| = 1;
+
+print "1..848\n";
  
  BEGIN {
      chdir 't' if -d 't';
      @INC = '../lib';
  }
-eval 'use Config';          #  Defaults assumed if this fails
  
-# XXX known to leak scalars
-$ENV{PERL_DESTRUCT_LEVEL} = 0 unless $ENV{PERL_DESTRUCT_LEVEL} > 3;
+eval 'use Config';          #  Defaults assumed if this fails
  
  $x = "abc\ndef\n";
  
@@ -292,7 +292,7 @@ for $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory
    print "# length=$l\nnot " unless "ba$a=" =~ /a$a=/;
    print "ok $test\n";
    $test++;
-  
+
    print "not " if "b$a=" =~ /a$a=/;
    print "ok $test\n";
    $test++;
@@ -312,11 +312,11 @@ $long_var_len = join '|', 8120 .. 28645;
         );
  
  for ( keys %ans ) {
-  print "# const-len `$_' not =>  $ans{$_}\nnot " 
+  print "# const-len `$_' not =>  $ans{$_}\nnot "
      if $ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o;
    print "ok $test\n";
    $test++;
-  print "# var-len   `$_' not =>  $ans{$_}\nnot " 
+  print "# var-len   `$_' not =>  $ans{$_}\nnot "
      if $ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o;
    print "ok $test\n";
    $test++;
@@ -325,26 +325,26 @@ for ( keys %ans ) {
  $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e";
  $expect = "(bla()) ((l)u((e))) (l(e)e)";
  
-sub matchit { 
+sub matchit {
    m/
       (
-       \( 
+       \(
         (?{ $c = 1 })           # Initialize
         (?:
          (?(?{ $c == 0 })       # PREVIOUS iteration was OK, stop the loop
            (?!
            )                    # Fail: will unwind one iteration back
-        )          
+        )      
          (?:
            [^()]+               # Match a big chunk
            (?=
              [()]
            )                    # Do not try to match subchunks
          |
-          \( 
+          \(
            (?{ ++$c })
          |
-          \) 
+          \)
            (?{ --$c })
          )
         )+                      # This may not match with different subblocks
@@ -411,7 +411,7 @@ for $code ('{$blah = 45}','=xx') {
    if ($code eq '=xx') {
      print "#'$@','$res','$blah'\nnot " unless not $@ and $res;
    } else {
-    print "#'$@','$res','$blah'\nnot " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12;    
+    print "#'$@','$res','$blah'\nnot " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12;
    }
    print "ok $test\n";
    $test++;
@@ -510,9 +510,9 @@ foreach $ans ('', 'a', '') {
  }
  
  sub prefixify {
-  my($v,$a,$b,$res) = @_; 
-  $v =~ s/\Q$a\E/$b/; 
-  print "not " unless $res eq $v; 
+  my($v,$a,$b,$res) = @_;
+  $v =~ s/\Q$a\E/$b/;
+  print "not " unless $res eq $v;
    print "ok $test\n";
    $test++;
  }
@@ -525,23 +525,23 @@ print "not " unless $1 and /$1/;
  print "ok $test\n";
  $test++;
  
-$a=qr/(?{++$b})/; 
+$a=qr/(?{++$b})/;
  $b = 7;
-/$a$a/; 
-print "not " unless $b eq '9'; 
+/$a$a/;
+print "not " unless $b eq '9';
  print "ok $test\n";
  $test++;
  
-$c="$a"; 
-/$a$a/; 
-print "not " unless $b eq '11'; 
+$c="$a";
+/$a$a/;
+print "not " unless $b eq '11';
  print "ok $test\n";
  $test++;
  
  {
-  use re "eval"; 
-  /$a$c$a/; 
-  print "not " unless $b eq '14'; 
+  use re "eval";
+  /$a$c$a/;
+  print "not " unless $b eq '14';
    print "ok $test\n";
    $test++;
  
@@ -561,9 +561,9 @@ $test++;
    $test++;
  
  
-  no re "eval"; 
+  no re "eval";
    $match = eval { /$a$c$a/ };
-  print "not " 
+  print "not "
      unless $b eq '14' and $@ =~ /Eval-group not allowed/ and not $match;
    print "ok $test\n";
    $test++;
@@ -597,8 +597,8 @@ print "ok $test\n";
  $test++;
  print "not " unless $c == 3;
  print "ok $test\n";
-$test++;  
-  
+$test++;
+
  sub must_warn_pat {
      my $warn_pat = shift;
      return sub { print "not " unless $_[0] =~ /$warn_pat/ }
@@ -659,7 +659,7 @@ print "not " if $+[0] != 2 or $-[0] != 1;
  print "ok $test\n";
  $test++;
  
-print "not " 
+print "not "
     if defined $+[1] or defined $-[1] or defined $+[2] or defined $-[2];
  print "ok $test\n";
  $test++;
@@ -681,7 +681,7 @@ print "not " if $+[2] != 3 or $-[2] != 2;
  print "ok $test\n";
  $test++;
  
-print "not " 
+print "not "
     if defined $+[3] or defined $-[3] or defined $+[4] or defined $-[4];
  print "ok $test\n";
  $test++;
@@ -703,7 +703,7 @@ print "not " if $+[3] != 3 or $-[3] != 2;
  print "ok $test\n";
  $test++;
  
-print "not " 
+print "not "
     if defined $+[2] or defined $-[2] or defined $+[4] or defined $-[4];
  print "ok $test\n";
  $test++;
@@ -721,31 +721,31 @@ print "not " if $+[1] != 2 or $-[1] != 1;
  print "ok $test\n";
  $test++;
  
-print "not " 
+print "not "
     if defined $+[2] or defined $-[2] or defined $+[3] or defined $-[3];
  print "ok $test\n";
  $test++;
  
  eval { $+[0] = 13; };
-print "not " 
+print "not "
     if $@ !~ /^Modification of a read-only value attempted/;
  print "ok $test\n";
  $test++;
  
  eval { $-[0] = 13; };
-print "not " 
+print "not "
     if $@ !~ /^Modification of a read-only value attempted/;
  print "ok $test\n";
  $test++;
  
  eval { @+ = (7, 6, 5); };
-print "not " 
+print "not "
     if $@ !~ /^Modification of a read-only value attempted/;
  print "ok $test\n";
  $test++;
  
  eval { @- = qw(foo bar); };
-print "not " 
+print "not "
     if $@ !~ /^Modification of a read-only value attempted/;
  print "ok $test\n";
  $test++;
@@ -791,7 +791,7 @@ $test++;
  
  undef $foo; undef $bar;
  print "#'$str','$foo','$bar'\nnot "
-    unless $str =~ /b(?{$foo = $_; $bar = pos})c/ 
+    unless $str =~ /b(?{$foo = $_; $bar = pos})c/
         and $foo eq 'abcde' and $bar eq 2;
  print "ok $test\n";
  $test++;
@@ -799,7 +799,7 @@ $test++;
  undef $foo; undef $bar;
  pos $str = undef;
  print "#'$str','$foo','$bar'\nnot "
-    unless $str =~ /b(?{$foo = $_; $bar = pos})c/g 
+    unless $str =~ /b(?{$foo = $_; $bar = pos})c/g
         and $foo eq 'abcde' and $bar eq 2 and pos $str eq 3;
  print "ok $test\n";
  $test++;
@@ -808,14 +808,14 @@ $_ = $str;
  
  undef $foo; undef $bar;
  print "#'$str','$foo','$bar'\nnot "
-    unless /b(?{$foo = $_; $bar = pos})c/ 
+    unless /b(?{$foo = $_; $bar = pos})c/
         and $foo eq 'abcde' and $bar eq 2;
  print "ok $test\n";
  $test++;
  
  undef $foo; undef $bar;
  print "#'$str','$foo','$bar'\nnot "
-    unless /b(?{$foo = $_; $bar = pos})c/g 
+    unless /b(?{$foo = $_; $bar = pos})c/g
         and $foo eq 'abcde' and $bar eq 2 and pos eq 3;
  print "ok $test\n";
  $test++;
@@ -831,7 +831,7 @@ $test++;
  undef $foo; undef $bar;
  $_ = 'abcde|abcde';
  print "#'$str','$foo','$bar','$_'\nnot "
-    unless s/b(?{$foo = $_; $bar = pos})c/x/g and $foo eq 'abcde|abcde' 
+    unless s/b(?{$foo = $_; $bar = pos})c/x/g and $foo eq 'abcde|abcde'
         and $bar eq 8 and $_ eq 'axde|axde';
  print "ok $test\n";
  $test++;
@@ -905,7 +905,7 @@ print "not " unless($1  eq 'cd');
  print "ok $test\n";
  $test++;
  
-$_='123x123'; 
+$_='123x123';
  @res = /(\d*|x)/g;
  print "not " unless('123||x|123|' eq join '|', @res);
  print "ok $test\n";
@@ -1117,7 +1117,7 @@ $test++;
  print "not " unless "@space2" eq "spc tab";
  print "ok $test # @space2\n";
  $test++;
- 
+
  # bugid 20001021.005 - this caused a SEGV
  print "not " unless undef =~ /^([^\/]*)(.*)$/;
  print "ok $test\n";
@@ -1129,6 +1129,8 @@ print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/;
  print "ok $test\n";
  $test++;
  
+my $ordA = ord('A');
+
  $_ = "a\x{100}b";
  if (/(.)(\C)(\C)(.)/) {
    print "ok 232\n";
@@ -1137,15 +1139,32 @@ if (/(.)(\C)(\C)(.)/) {
    } else {
      print "not ok 233\n";
    }
-  if ($2 eq "\xC4") {
-    print "ok 234\n";
-  } else {
-    print "not ok 234\n";
-  }
-  if ($3 eq "\x80") {
-    print "ok 235\n";
+  if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8
+      if ($2 eq "\xC4") {
+         print "ok 234\n";
+      } else {
+         print "not ok 234\n";
+      }
+      if ($3 eq "\x80") {
+         print "ok 235\n";
+      } else {
+         print "not ok 235\n";
+      }
+  } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC
+      if ($2 eq "\x8C") {
+         print "ok 234\n";
+      } else {
+         print "not ok 234\n";
+      }
+      if ($3 eq "\x41") {
+         print "ok 235\n";
+      } else {
+         print "not ok 235\n";
+      }
    } else {
-    print "not ok 235\n";
+      for (234..235) {
+         print "not ok $_ # ord('A') == $ordA\n";
+      }
    }
    if ($4 eq "b") {
      print "ok 236\n";
@@ -1160,10 +1179,21 @@ if (/(.)(\C)(\C)(.)/) {
  $_ = "\x{100}";
  if (/(\C)/g) {
    print "ok 237\n";
-  if ($1 eq "\xC4") {
-    print "ok 238\n";
+  # currently \C are still tagged as UTF-8
+  if ($ordA == 65) {
+      if ($1 eq "\xC4") {
+         print "ok 238\n";
+      } else {
+         print "not ok 238\n";
+      }
+  } elsif ($ordA == 193) {
+      if ($1 eq "\x8C") {
+         print "ok 238\n";
+      } else {
+         print "not ok 238\n";
+      }
    } else {
-    print "not ok 238\n";
+      print "not ok 238 # ord('A') == $ordA\n";
    }
  } else {
    for (237..238) {
@@ -1172,10 +1202,21 @@ if (/(\C)/g) {
  }
  if (/(\C)/g) {
    print "ok 239\n";
-  if ($1 eq "\x80") {
-    print "ok 240\n";
+  # currently \C are still tagged as UTF-8
+  if ($ordA == 65) {
+      if ($1 eq "\x80") {
+         print "ok 240\n";
+      } else {
+         print "not ok 240\n";
+      }
+  } elsif ($ordA == 193) {
+      if ($1 eq "\x41") {
+         print "ok 240\n";
+      } else {
+         print "not ok 240\n";
+      }
    } else {
-    print "not ok 240\n";
+      print "not ok 240 # ord('A') == $ordA\n";
    }
  } else {
    for (239..240) {
@@ -1183,49 +1224,1438 @@ if (/(\C)/g) {
    }
  }
  
-# Little background for 241..244 -- in EBCDIC:
-#
-# "\x89" eq 'i'
-# "\x91" eq 'i'
-# "\xc9" eq 'I'
-# "\xd1" eq 'J'
-#
-# If the character range is specified using explicit numeric endpoints,
-# non-characters (like \x8e and \xce) should match (241 and 242).
-#
-# If the character range is specified using alphabet endpoints,
-# non-characters (like \x8e and \xce) should not match (243 and 244).
-
-if ("\x8e" =~ /[\x89-\x91]/) {
+{
+  # japhy -- added 03/03/2001
+  () = (my $str = "abc") =~ /(...)/;
+  $str = "def";
+  print "not " if $1 ne "abc";
    print "ok 241\n";
-} else {
-  print "not ok 241\n";
  }
  
-if ("\xce" =~ /[\xc9-\xd1]/) {
+# The 242 and 243 go with the 244 and 245.
+# The trick is that in EBCDIC the explicit numeric range should match
+# (as also in non-EBCDIC) but the explicit alphabetic range should not match.
+
+if ("\x8e" =~ /[\x89-\x91]/) {
    print "ok 242\n";
  } else {
    print "not ok 242\n";
  }
  
-if (ord('i') == 0x89 && ord('j') == 0x91) { # EBCDIC
-
-if ("\x8e" !~ /[i-j]/) {
+if ("\xce" =~ /[\xc9-\xd1]/) {
    print "ok 243\n";
  } else {
    print "not ok 243\n";
  }
  
-if ("\xce" !~ /[I-J]/) {
-  print "ok 244\n";
+# In most places these tests would succeed since \x8e does not
+# in most character sets match 'i' or 'j' nor would \xce match
+# 'I' or 'J', but strictly speaking these tests are here for
+# the good of EBCDIC, so let's test these only there.
+if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC
+  if ("\x8e" !~ /[i-j]/) {
+    print "ok 244\n";
+  } else {
+    print "not ok 244\n";
+  }
+  if ("\xce" !~ /[I-J]/) {
+    print "ok 245\n";
+  } else {
+    print "not ok 245\n";
+  }
  } else {
-  print "not ok 244\n";
+  for (244..245) {
+    print "ok $_ # Skip: only in EBCDIC\n";
+  }
+}
+
+print "not " unless "\x{ab}" =~ /\x{ab}/;
+print "ok 246\n";
+
+print "not " unless "\x{abcd}" =~ /\x{abcd}/;
+print "ok 247\n";
+
+{
+    # bug id 20001008.001
+
+    my $test = 248;
+    my @x = ("stra\337e 138","stra\337e 138");
+    for (@x) {
+       s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
+       my($latin) = /^(.+)(?:\s+\d)/;
+       print $latin eq "stra\337e" ? "ok $test\n" :    # 248,249
+           "#latin[$latin]\nnot ok $test\n";
+       $test++;
+       $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
+       use utf8; # needed for the raw UTF-8
+       $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
+    }
+}
+
+{
+    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+    print "ok 250\n";
+
+    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+    print "ok 251\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+    print "ok 252\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+    print "ok 253\n";
+
+    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+    print "ok 254\n";
+
+    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+    print "ok 255\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+    print "ok 256\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+    print "ok 257\n";
+}
+
+{
+    # the first half of 20001028.003
+
+    my $X = chr(1448);
+    my ($Y) = $X =~ /(.*)/;
+    print "not " unless $Y eq v1448 && length($Y) == 1;
+    print "ok 258\n";
+}
+
+{
+    # 20001108.001
+
+    my $X = "Szab\x{f3},Bal\x{e1}zs";
+    my $Y = $X;
+    $Y =~ s/(B)/$1/ for 0..3;
+    print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs";
+    print "ok 259\n";
+}
+
+{
+    # the second half of 20001028.003
+
+    my $X = '';
+    $X =~ s/^/chr(1488)/e;
+    print "not " unless length $X == 1 && ord($X) == 1488;
+    print "ok 260\n";
+}
+
+{
+    # 20000517.001
+
+    my $x = "\x{100}A";
+
+    $x =~ s/A/B/;
+
+    print "not " unless $x eq "\x{100}B" && length($x) == 2;
+    print "ok 261\n";
+}
+
+{
+    # bug id 20001230.002
+
+    print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c';
+    print "ok 262\n";
+
+    print "not " unless "École" =~ /^\C\C(c)/;
+    print "ok 263\n";
+}
+
+{
+    my $test = 264; # till 575
+
+    use charnames ':full';
+
+    # This is far from complete testing, there are dozens of character
+    # classes in Unicode.  The mixing of literals and \N{...} is
+    # intentional so that in non-Latin-1 places we test the native
+    # characters, not the Unicode code points.
+
+    my %s = (
+            "a"                                => 'Ll',
+            "\N{CYRILLIC SMALL LETTER A}"      => 'Ll',
+            "A"                                => 'Lu',
+            "\N{GREEK CAPITAL LETTER ALPHA}"   => 'Lu',
+            "\N{HIRAGANA LETTER SMALL A}"      => 'Lo',
+            "\N{COMBINING GRAVE ACCENT}"       => 'Mn',
+            "0"                                => 'Nd',
+            "\N{ARABIC-INDIC DIGIT ZERO}"      => 'Nd',
+            "_"                                => 'N',
+            "!"                                => 'P',
+            " "                                => 'Zs',
+            "\0"                               => 'Cc',
+            );
+       
+    for my $char (map { s/^\S+ //; $_ }
+                    sort map { sprintf("%06x", ord($_))." $_" } keys %s) {
+       my $class = $s{$char};
+       my $code  = sprintf("%06x", ord($char));
+       printf "#\n# 0x$code\n#\n";
+       print "# IsAlpha\n";
+       if ($class =~ /^[LM]/) {
+           print "not " unless $char =~ /\p{IsAlpha}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsAlpha}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsAlpha}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsAlpha}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsAlnum\n";
+       if ($class =~ /^[LMN]/ && $char ne "_") {
+           print "not " unless $char =~ /\p{IsAlnum}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsAlnum}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsAlnum}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsAlnum}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsASCII\n";
+       if ($code le '00007f') {
+           print "not " unless $char =~ /\p{IsASCII}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsASCII}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsASCII}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsASCII}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsCntrl\n";
+       if ($class =~ /^C/) {
+           print "not " unless $char =~ /\p{IsCntrl}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsCntrl}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsCntrl}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsCntrl}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsBlank\n";
+       if ($class =~ /^Z[lp]/ || $char eq " ") {
+           print "not " unless $char =~ /\p{IsBlank}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsBlank}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsBlank}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsBlank}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsDigit\n";
+       if ($class =~ /^Nd$/) {
+           print "not " unless $char =~ /\p{IsDigit}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsDigit}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsDigit}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsDigit}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsGraph\n";
+       if ($class =~ /^([LMNPS])|Co/) {
+           print "not " unless $char =~ /\p{IsGraph}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsGraph}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsGraph}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsGraph}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsLower\n";
+       if ($class =~ /^Ll$/) {
+           print "not " unless $char =~ /\p{IsLower}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsLower}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsLower}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsLower}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsPrint\n";
+       if ($class =~ /^([LMNPS])|Co|Zs/) {
+           print "not " unless $char =~ /\p{IsPrint}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsPrint}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsPrint}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsPrint}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsPunct\n";
+       if ($class =~ /^P/ || $char eq "_") {
+           print "not " unless $char =~ /\p{IsPunct}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsPunct}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsPunct}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsPunct}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsSpace\n";
+       if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) {
+           print "not " unless $char =~ /\p{IsSpace}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsSpace}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsSpace}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsSpace}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsUpper\n";
+       if ($class =~ /^L[ut]/) {
+           print "not " unless $char =~ /\p{IsUpper}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsUpper}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsUpper}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsUpper}/;
+           print "ok $test\n"; $test++;
+       }
+       print "# IsWord\n";
+       if ($class =~ /^[LMN]/ || $char eq "_") {
+           print "not " unless $char =~ /\p{IsWord}/;
+           print "ok $test\n"; $test++;
+           print "not " if     $char =~ /\P{IsWord}/;
+           print "ok $test\n"; $test++;
+       } else {
+           print "not " if     $char =~ /\p{IsWord}/;
+           print "ok $test\n"; $test++;
+           print "not " unless $char =~ /\P{IsWord}/;
+           print "ok $test\n"; $test++;
+       }
+    }
+}
+
+{
+    $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg";
+
+    if (/(.\x{300})./) {
+       print "ok 576\n";
+
+       print "not " unless $` eq "abc\x{100}" && length($`) == 4;
+       print "ok 577\n";
+
+       print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3;
+       print "ok 578\n";
+
+       print "not " unless $' eq "\x{400}defg" && length($') == 5;
+       print "ok 579\n";
+
+       print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2;
+       print "ok 580\n";
+    } else {
+       for (576..580) { print "not ok $_\n" }
+    }
+}
+
+{
+    # bug id 20010306.008
+
+    $a = "a\x{1234}";
+    # The original bug report had 'no utf8' here but that was irrelevant.
+    $a =~ m/\w/; # used to core dump
+
+    print "ok 581\n";
+}
+
+{
+    $test = 582;
+
+    # bugid 20010410.006
+    for my $rx (
+               '/(.*?)\{(.*?)\}/csg',
+               '/(.*?)\{(.*?)\}/cg',
+               '/(.*?)\{(.*?)\}/sg',
+               '/(.*?)\{(.*?)\}/g',
+               '/(.+?)\{(.+?)\}/csg',
+              )
+    {
+       my($input, $i);
+
+       $i = 0;
+       $input = "a{b}c{d}";
+        eval <<EOT;
+       while (eval \$input =~ $rx) {
+           print "# \\\$1 = '\$1' \\\$2 = '\$2'\n";
+           ++\$i;
+       }
+EOT
+       print "not " unless $i == 2;
+       print "ok " . $test++ . "\n";
+    }
+}
+
+{
+    # from Robin Houston
+
+    my $x = "\x{10FFFD}";
+    $x =~ s/(.)/$1/g;
+    print "not " unless ord($x) == 0x10FFFD && length($x) == 1;
+    print "ok 587\n";
  }
  
+{
+    my $x = "\x7f";
+
+    print "not " if     $x =~ /[\x80-\xff]/;
+    print "ok 588\n";
+
+    print "not " if     $x =~ /[\x80-\x{100}]/;
+    print "ok 589\n";
+
+    print "not " if     $x =~ /[\x{100}]/;
+    print "ok 590\n";
+
+    print "not " if     $x =~ /\p{InLatin1Supplement}/;
+    print "ok 591\n";
+
+    print "not " unless $x =~ /\P{InLatin1Supplement}/;
+    print "ok 592\n";
+
+    print "not " if     $x =~ /\p{InLatinExtendedA}/;
+    print "ok 593\n";
+
+    print "not " unless $x =~ /\P{InLatinExtendedA}/;
+    print "ok 594\n";
+}
+
+{
+    my $x = "\x80";
+
+    print "not " unless $x =~ /[\x80-\xff]/;
+    print "ok 595\n";
+
+    print "not " unless $x =~ /[\x80-\x{100}]/;
+    print "ok 596\n";
+
+    print "not " if     $x =~ /[\x{100}]/;
+    print "ok 597\n";
+
+    print "not " unless $x =~ /\p{InLatin1Supplement}/;
+    print "ok 598\n";
+
+    print "not " if    $x =~ /\P{InLatin1Supplement}/;
+    print "ok 599\n";
+
+    print "not " if     $x =~ /\p{InLatinExtendedA}/;
+    print "ok 600\n";
+
+    print "not " unless $x =~ /\P{InLatinExtendedA}/;
+    print "ok 601\n";
+}
+
+{
+    my $x = "\xff";
+
+    print "not " unless $x =~ /[\x80-\xff]/;
+    print "ok 602\n";
+
+    print "not " unless $x =~ /[\x80-\x{100}]/;
+    print "ok 603\n";
+
+    print "not " if     $x =~ /[\x{100}]/;
+    print "ok 604\n";
+
+    print "not " unless $x =~ /\p{InLatin1Supplement}/;
+    print "ok 605\n";
+
+    print "not " if     $x =~ /\P{InLatin1Supplement}/;
+    print "ok 606\n";
+
+    print "not " if     $x =~ /\p{InLatinExtendedA}/;
+    print "ok 607\n";
+
+    print "not " unless $x =~ /\P{InLatinExtendedA}/;
+    print "ok 608\n";
+}
+
+{
+    my $x = "\x{100}";
+
+    print "not " if     $x =~ /[\x80-\xff]/;
+    print "ok 609\n";
+
+    print "not " unless $x =~ /[\x80-\x{100}]/;
+    print "ok 610\n";
+
+    print "not " unless $x =~ /[\x{100}]/;
+    print "ok 611\n";
+
+    print "not " if     $x =~ /\p{InLatin1Supplement}/;
+    print "ok 612\n";
+
+    print "not " unless $x =~ /\P{InLatin1Supplement}/;
+    print "ok 613\n";
+
+    print "not " unless $x =~ /\p{InLatinExtendedA}/;
+    print "ok 614\n";
+
+    print "not " if     $x =~ /\P{InLatinExtendedA}/;
+    print "ok 615\n";
+}
+
+{
+    # from japhy
+    my $w;
+    use warnings;    
+    local $SIG{__WARN__} = sub { $w .= shift };
+
+    $w = "";
+    eval 'qr/(?c)/';
+    print "not " if $w !~ /^Useless \(\?c\)/;
+    print "ok 616\n";
+
+    $w = "";
+    eval 'qr/(?-c)/';
+    print "not " if $w !~ /^Useless \(\?-c\)/;
+    print "ok 617\n";
+
+    $w = "";
+    eval 'qr/(?g)/';
+    print "not " if $w !~ /^Useless \(\?g\)/;
+    print "ok 618\n";
+
+    $w = "";
+    eval 'qr/(?-g)/';
+    print "not " if $w !~ /^Useless \(\?-g\)/;
+    print "ok 619\n";
+
+    $w = "";
+    eval 'qr/(?o)/';
+    print "not " if $w !~ /^Useless \(\?o\)/;
+    print "ok 620\n";
+
+    $w = "";
+    eval 'qr/(?-o)/';
+    print "not " if $w !~ /^Useless \(\?-o\)/;
+    print "ok 621\n";
+
+    # now test multi-error regexes
+
+    $w = "";
+    eval 'qr/(?g-o)/';
+    print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-o\)/;
+    print "ok 622\n";
+
+    $w = "";
+    eval 'qr/(?g-c)/';
+    print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-c\)/;
+    print "ok 623\n";
+
+    $w = "";
+    eval 'qr/(?o-cg)/';  # (?c) means (?g) error won't be thrown
+    print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?-c\)/;
+    print "ok 624\n";
+
+    $w = "";
+    eval 'qr/(?ogc)/';
+    print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?g\).*\nUseless \(\?c\)/;
+    print "ok 625\n";
+}
+
+# More Unicode "class" tests
+
+{
+    use charnames ':full';
+
+    print "not " unless "\N{LATIN CAPITAL LETTER A}" =~ /\p{InBasicLatin}/;
+    print "ok 626\n";
+
+    print "not " unless "\N{LATIN CAPITAL LETTER A WITH GRAVE}" =~ /\p{InLatin1Supplement}/;
+    print "ok 627\n";
+
+    print "not " unless "\N{LATIN CAPITAL LETTER A WITH MACRON}" =~ /\p{InLatinExtendedA}/;
+    print "ok 628\n";
+
+    print "not " unless "\N{LATIN SMALL LETTER B WITH STROKE}" =~ /\p{InLatinExtendedB}/;
+    print "ok 629\n";
+
+    print "not " unless "\N{KATAKANA LETTER SMALL A}" =~ /\p{InKatakana}/;
+    print "ok 630\n";
+}
+
+$_ = "foo";
+
+eval <<"EOT"; die if $@;
+  /f
+   o\r
+   o
+   \$
+  /x && print "ok 631\n";
+EOT
+
+eval <<"EOT"; die if $@;
+  /f
+   o
+   o
+   \$\r
+  /x && print "ok 632\n";
+EOT
+
+#test /o feature
+sub test_o { $_[0] =~/$_[1]/o; return $1}
+if(test_o('abc','(.)..') eq 'a') {
+    print "ok 633\n";
+} else {
+    print "not ok 633\n";
+}
+if(test_o('abc','..(.)') eq 'a') {
+    print "ok 634\n";
  } else {
+    print "not ok 634\n";
+}
+
+# 635..639: ID 20010619.003 (only the space character is
+# supposed to be [:print:], not the whole isprint()).
+
+print "not " if "\n"     =~ /[[:print:]]/;
+print "ok 635\n";
+
+print "not " if "\t"     =~ /[[:print:]]/;
+print "ok 636\n";
+
+# Amazingly vertical tabulator is the same in ASCII and EBCDIC.
+print "not " if "\014"  =~ /[[:print:]]/;
+print "ok 637\n";
+
+print "not " if "\r"    =~ /[[:print:]]/;
+print "ok 638\n";
+
+print "not " unless " " =~ /[[:print:]]/;
+print "ok 639\n";
+
+##
+## Test basic $^N usage outside of a regex
+##
+$x = "abcdef";
+$T="ok 640\n";if ($x =~ /cde/ and not defined $^N)         {print $T} else {print "not $T"};
+$T="ok 641\n";if ($x =~ /(cde)/          and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 642\n";if ($x =~ /(c)(d)(e)/      and $^N eq   "e") {print $T} else {print "not $T"};
+$T="ok 643\n";if ($x =~ /(c(d)e)/        and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/  and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/  and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/  and $^N eq "abc") {print $T} else {print "not $T"};
+$T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/  and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 649\n";if ($x =~ /(?:c(d)e)/      and $^N eq  "d" ) {print $T} else {print "not $T"};
+$T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq  "d" ) {print $T} else {print "not $T"};
+$T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq  "f" ){print $T} else {print "not $T"};
+$T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq  "f" ){print $T} else {print "not $T"};
+$T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/    and $^N eq  "e" ){print $T} else {print "not $T"};
+{
+ $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/   and $^N eq  "f" ){print $T} else {print "not $T"};
+}
+## test to see if $^N is automatically localized -- it should now
+## have the value set in test 653
+$T="ok 655\n";if ($^N eq  "e" ){print $T} else {print "not $T"};
+
+##
+## Now test inside (?{...})
+##
+$T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/      and $y eq "b" ){print $T} else {print "not $T"};
+$T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/     and $y eq "bc"){print $T} else {print "not $T"};
+$T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"};
+$T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd")
+              {print $T} else {print "not $T"};
+$T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde")
+              {print $T} else {print "not $T"};
+
+# Test the Unicode script classes
+
+print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1
+print "ok 661\n";
+
+print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside
+print "ok 662\n";
+
+print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock
+print "ok 663\n";
+
+print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock
+print "ok 664\n";
+
+print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range)
+print "ok 665\n";
+
+print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton
+print "ok 666\n";
+
+print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton
+print "ok 667\n";
+
+print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there
+print "ok 668\n";
+
+print "not " unless chr(0x388) =~ /\p{InGreek}/; # range
+print "ok 669\n";
+
+print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range
+print "ok 670\n";
+
+print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there
+print "ok 671\n";
+
+print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton
+print "ok 672\n";
+
+##
+## Test [:cntrl:]...
+##
+## Should probably put in tests for all the POSIX stuff, but not sure how to
+## guarantee a specific locale......
+##
+$AllBytes = join('', map { chr($_) } 0..255);
+($x = $AllBytes) =~ s/[[:cntrl:]]//g;
+if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " };
+print "ok 673\n";
+
+($x = $AllBytes) =~ s/[^[:cntrl:]]//g;
+if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " };
+print "ok 674\n";
+
+# With /s modifier UTF8 chars were interpreted as bytes
+{
+    my $a = "Hello \x{263A} World";
+    
+    my @a = ($a =~ /./gs);
+    
+    print "not " unless $#a == 12;
+    print "ok 675\n";
+}
+
+@a = ("foo\nbar" =~ /./g);
+print "ok 676\n" if @a == 6 && "@a" eq "f o o b a r";
+
+@a = ("foo\nbar" =~ /./gs);
+print "ok 677\n" if @a == 7 && "@a" eq "f o o \n b a r";
+
+@a = ("foo\nbar" =~ /\C/g);
+print "ok 678\n" if @a == 7 && "@a" eq "f o o \n b a r";
+
+@a = ("foo\nbar" =~ /\C/gs);
+print "ok 679\n" if @a == 7 && "@a" eq "f o o \n b a r";
+
+@a = ("foo\n\x{100}bar" =~ /./g);
+print "ok 680\n" if @a == 7 && "@a" eq "f o o \x{100} b a r";
+
+@a = ("foo\n\x{100}bar" =~ /./gs);
+print "ok 681\n" if @a == 8 && "@a" eq "f o o \n \x{100} b a r";
+
+($a, $b) = map { chr } ord('A') == 65 ? (0xc4, 0x80) : (0x8c, 0x41);
+
+@a = ("foo\n\x{100}bar" =~ /\C/g);
+print "ok 682\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r";
+
+@a = ("foo\n\x{100}bar" =~ /\C/gs);
+print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r";
+
+{
+    # [ID 20010814.004] pos() doesn't work when using =~m// in list context
+    $_ = "ababacadaea";
+    $a = join ":", /b./gc;
+    $b = join ":", /a./gc;
+    $c = pos;
+    print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n";
+}
+
+{
+    # [ID 20010407.006] matching utf8 return values from functions does not work
+
+    package ID_20010407_006;
+
+    sub x {
+       "a\x{1234}";
+    }
+
+    my $x = x;
+    my $y;
+
+    $x =~ /(..)/; $y = $1;
+    print "not " unless length($y) == 2 && $y eq $x;
+    print "ok 685\n";
  
-for (243..244) {
-  print "ok $_ # Skip: not EBCDIC\n";
+    x  =~ /(..)/; $y = $1;
+    print "not " unless length($y) == 2 && $y eq $x;
+    print "ok 686\n";
  }
  
+
+my $test = 687;
+
+# Force scalar context on the patern match
+sub ok ($$) {
+    my($ok, $name) = @_;
+
+    printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+
+    printf "# Failed test at line %d\n", (caller)[2] unless $ok;
+
+    $test++;
+    return $ok;
+}
+
+{
+    # Check that \x## works. 5.6.1 and 5.005_03 fail some of these.
+    $x = "\x4e" . "E";
+    ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched.");
+
+    $x = "\x4e" . "i";
+    ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)");
+
+    $x = "\x4" . "j";
+    ok ($x =~ /^\x4j$/,  "Check that invalid hex digit stops it (1)");
+
+    $x = "\x0" . "k";
+    ok ($x =~ /^\xk$/,   "Check that invalid hex digit stops it (0)");
+
+    $x = "\x0" . "x";
+    ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0");
+
+    $x = "\x0" . "xa";
+    ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa");
+
+    $x = "\x9" . "_b";
+    ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b");
+
+    print "# and now again in [] ranges\n";
+
+    $x = "\x4e" . "E";
+    ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched.");
+
+    $x = "\x4e" . "i";
+    ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)");
+
+    $x = "\x4" . "j";
+    ok ($x =~ /^[\x4j]{2}$/,  "Check that invalid hex digit stops it (1)");
+
+    $x = "\x0" . "k";
+    ok ($x =~ /^[\xk]{2}$/,   "Check that invalid hex digit stops it (0)");
+
+    $x = "\x0" . "x";
+    ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0");
+
+    $x = "\x0" . "xa";
+    ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa");
+
+    $x = "\x9" . "_b";
+    ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b");
+
+}
+
+{
+    # Check that \x{##} works. 5.6.1 fails quite a few of these.
+
+    $x = "\x9b";
+    ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b");
+
+    print "# and now again in [] ranges\n";
+
+    $x = "\x9b";
+    ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0");
+
+    $x = "\x0" . "y";
+    ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0");
+
+    $x = "\x9b" . "y";
+    ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b");
+}
+
+{
+    # high bit bug -- japhy
+    my $x = "ab\200d";
+    $x =~ /.*?\200/ or print "not ";
+    print "ok 715\n";
+}
+
+print "# some Unicode properties\n";
+
+{
+    # Dashes, underbars, case.
+    print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/;
+    print "ok 716\n";
+
+    # Complement, leading and trailing whitespace.
+    print "not " unless "\x80" =~ /\P{  ^  In Latin 1 Supplement  }/;
+    print "ok 717\n";
+
+    # No ^In, dashes, case, dash, any intervening (word-break) whitespace.
+    # (well, newlines don't work...)
+    print "not " unless "\x80" =~ /\p{latin-1   supplement}/;
+    print "ok 718\n";
+}
+
+{
+    print "not " unless "a" =~ /\pL/;
+    print "ok 719\n";
+
+    print "not " unless "a" =~ /\p{IsLl}/;
+    print "ok 720\n";
+
+    print "not " if     "a" =~ /\p{IsLu}/;
+    print "ok 721\n";
+
+    print "not " unless "a" =~ /\p{Ll}/;
+    print "ok 722\n";
+
+    print "not " if     "a" =~ /\p{Lu}/;
+    print "ok 723\n";
+
+    print "not " unless "A" =~ /\pL/;
+    print "ok 724\n";
+
+    print "not " unless "A" =~ /\p{IsLu}/;
+    print "ok 725\n";
+
+    print "not " if     "A" =~ /\p{IsLl}/;
+    print "ok 726\n";
+
+    print "not " unless "A" =~ /\p{Lu}/;
+    print "ok 727\n";
+
+    print "not " if     "A" =~ /\p{Ll}/;
+    print "ok 728\n";
+
+    print "not " if     "a" =~ /\PL/;
+    print "ok 729\n";
+
+    print "not " if     "a" =~ /\P{IsLl}/;
+    print "ok 730\n";
+
+    print "not " unless "a" =~ /\P{IsLu}/;
+    print "ok 731\n";
+
+    print "not " if     "a" =~ /\P{Ll}/;
+    print "ok 732\n";
+
+    print "not " unless "a" =~ /\P{Lu}/;
+    print "ok 733\n";
+
+    print "not " if     "A" =~ /\PL/;
+    print "ok 734\n";
+
+    print "not " if     "A" =~ /\P{IsLu}/;
+    print "ok 735\n";
+
+    print "not " unless "A" =~ /\P{IsLl}/;
+    print "ok 736\n";
+
+    print "not " if     "A" =~ /\P{Lu}/;
+    print "ok 737\n";
+
+    print "not " unless "A" =~ /\P{Ll}/;
+    print "ok 738\n";
+
+}
+
+{
+    print "not " if     "a" =~ /\p{Common}/;
+    print "ok 739\n";
+
+    print "not " unless "1" =~ /\p{Common}/;
+    print "ok 740\n";
+}
+
+{
+    print "not " if     "a"       =~ /\p{Inherited}/;
+    print "ok 741\n";
+
+    print "not " unless "\x{300}" =~ /\p{Inherited}/;
+    print "ok 742\n";
+}
+
+{
+    print "not " unless "a" =~ /\p{L&}/;
+    print "ok 743\n";
+
+    print "not " if     "1" =~ /\p{L&}/;
+    print "ok 744\n";
+}
+
+{
+    print "not " unless "a" =~ /\p{LowercaseLetter}/;
+    print "ok 745\n";
+
+    print "not " if     "A" =~ /\p{
+                                       Lowercase
+                                       Letter
+                                 }/x;
+    print "ok 746\n";
+}
+
+{
+    print "not " unless "\x{AC00}" =~ /\p{HangulSyllable}/;
+    print "ok 747\n";
+}
+
+{
+    # Script=, Block=, Category=
+
+    print "not " unless "\x{0100}" =~ /\p{Script=Latin}/;
+    print "ok 748\n";
+
+    print "not " unless "\x{0100}" =~ /\p{Block=LatinExtendedA}/;
+    print "ok 749\n";
+
+    print "not " unless "\x{0100}" =~ /\p{Category=UppercaseLetter}/;
+    print "ok 750\n";
+}
+
+{
+    print "# the basic character classes and Unicode \n";
+
+    # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101;
+    print "not " unless "\x{0100}" =~ /\w/;
+    print "ok 751\n";
+
+    # 0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;;
+    print "not " unless "\x{0660}" =~ /\d/;
+    print "ok 752\n";
+
+    # 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;;
+    print "not " unless "\x{1680}" =~ /\s/;
+    print "ok 753\n";
+}
+
+{
+    print "# folding matches and Unicode\n";
+
+    print "not " unless "a\x{100}" =~ /A/i;
+    print "ok 754\n";
+
+    print "not " unless "A\x{100}" =~ /A/i;
+    print "ok 755\n";
+
+    print "not " unless "a\x{100}" =~ /a/i;
+    print "ok 756\n";
+
+    print "not " unless "A\x{100}" =~ /A/i;
+    print "ok 757\n";
+
+    print "not " unless "\x{101}a" =~ /\x{100}/i;
+    print "ok 758\n";
+
+    print "not " unless "\x{100}a" =~ /\x{100}/i;
+    print "ok 759\n";
+
+    print "not " unless "\x{101}a" =~ /\x{101}/i;
+    print "ok 760\n";
+
+    print "not " unless "\x{100}a" =~ /\x{101}/i;
+    print "ok 761\n";
+
+    print "not " unless "a\x{100}" =~ /A\x{100}/i;
+    print "ok 762\n";
+
+    print "not " unless "A\x{100}" =~ /A\x{100}/i;
+    print "ok 763\n";
+
+    print "not " unless "a\x{100}" =~ /a\x{100}/i;
+    print "ok 764\n";
+
+    print "not " unless "A\x{100}" =~ /A\x{100}/i;
+    print "ok 765\n";
+
+    print "not " unless "a\x{100}" =~ /[A]/i;
+    print "ok 766\n";
+
+    print "not " unless "A\x{100}" =~ /[A]/i;
+    print "ok 767\n";
+
+    print "not " unless "a\x{100}" =~ /[a]/i;
+    print "ok 768\n";
+
+    print "not " unless "A\x{100}" =~ /[A]/i;
+    print "ok 769\n";
+
+    print "not " unless "\x{101}a" =~ /[\x{100}]/i;
+    print "ok 770\n";
+
+    print "not " unless "\x{100}a" =~ /[\x{100}]/i;
+    print "ok 771\n";
+
+    print "not " unless "\x{101}a" =~ /[\x{101}]/i;
+    print "ok 772\n";
+
+    print "not " unless "\x{100}a" =~ /[\x{101}]/i;
+    print "ok 773\n";
+
+}
+
+{
+    use charnames ':full';
+
+    print "# LATIN LETTER A WITH GRAVE\n";
+    my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}";
+    my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}";
+
+    print $lower =~ m/$UPPER/i   ? "ok 774\n" : "not ok 774\n";
+    print $UPPER =~ m/$lower/i   ? "ok 775\n" : "not ok 775\n";
+    print $lower =~ m/[$UPPER]/i ? "ok 776\n" : "not ok 776\n";
+    print $UPPER =~ m/[$lower]/i ? "ok 777\n" : "not ok 777\n";
+
+    print "# GREEK LETTER ALPHA WITH VRACHY\n";
+
+    $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}";
+    $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}";
+
+    print $lower =~ m/$UPPER/i   ? "ok 778\n" : "not ok 778\n";
+    print $UPPER =~ m/$lower/i   ? "ok 779\n" : "not ok 779\n";
+    print $lower =~ m/[$UPPER]/i ? "ok 780\n" : "not ok 780\n";
+    print $UPPER =~ m/[$lower]/i ? "ok 781\n" : "not ok 781\n";
+
+    print "# LATIN LETTER Y WITH DIAERESIS\n";
+
+    $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}";
+    $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}";
+    print $lower =~ m/$UPPER/i   ? "ok 782\n" : "not ok 782\n";
+    print $UPPER =~ m/$lower/i   ? "ok 783\n" : "not ok 783\n";
+    print $lower =~ m/[$UPPER]/i ? "ok 784\n" : "not ok 784\n";
+    print $UPPER =~ m/[$lower]/i ? "ok 785\n" : "not ok 785\n";
+}
+
+{
+    use warnings;
+    use charnames ':full';
+    
+    print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n";
+
+    my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}";
+    my $char  = "\N{COMBINING GREEK PERISPOMENI}";
+
+    # Before #13843 this was failing by matching falsely.
+    print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n";
+}
+
+{
+    print "# \\X\n";
+
+    use charnames ':full';
+
+    print "a!"              =~ /^(\X)!/ && $1 eq "a" ?
+       "ok 787\n" : "not ok 787 # $1\n";
+    print "\xDF!"           =~ /^(\X)!/ && $1 eq "\xDF" ?
+       "ok 788\n" : "not ok 788 # $1\n";
+    print "\x{100}!"        =~ /^(\X)!/ && $1 eq "\x{100}" ?
+       "ok 789\n" : "not ok 789 # $1\n";
+    print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ?
+       "ok 790\n" : "not ok 790 # $1\n";
+    print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ &&
+       $1 eq "\N{LATIN CAPITAL LETTER E}" ?
+       "ok 791\n" : "not ok 791 # $1\n";
+    print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~
+       /^(\X)!/ &&
+       $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ?
+       "ok 792\n" : "not ok 792 # $1\n";
+}
+
+{
+    print "#\\C and \\X\n";
+
+    print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n";
+    print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n";
+}
+
+{
+    print "# FINAL SIGMA\n";
+
+    my $SIGMA = "\x{03A3}"; # CAPITAL
+    my $Sigma = "\x{03C2}"; # SMALL FINAL
+    my $sigma = "\x{03C3}"; # SMALL
+
+    print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n";
+    print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n";
+    print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n";
+
+    print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n";
+    print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n";
+    print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n";
+
+    print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n";
+    print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n";
+    print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n";
+    
+    print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n";
+    print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n";
+    print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n";
+
+    print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n";
+    print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n";
+    print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n";
+
+    print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n";
+    print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n";
+    print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n";
+}
+
+{
+    print "# parlez-vous?\n";
+
+    use charnames ':full';
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran.ais/ &&
+       $& eq "francais" ?
+       "ok 813\n" : "not ok 813\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran.ais/ &&
+       $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 814\n" : "not ok 814\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+          /fran\Cais/ &&
+        $& eq "francais" ?
+       "ok 815\n" : "not ok 815\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /franc\C\Cais/ ? # COMBINING CEDILLA is two bytes when encoded
+       "ok 816\n" : "not ok 816\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran\Xais/ &&
+       $& eq "francais" ?
+       "ok 817\n" : "not ok 817\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran\Xais/  &&
+        $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 818\n" : "not ok 818\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /fran\Xais/ &&
+         $& eq "franc\N{COMBINING CEDILLA}ais" ?
+        "ok 819\n" : "not ok 819\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/  &&
+        $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 820\n" : "not ok 820\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /franc\N{COMBINING CEDILLA}ais/  &&
+        $& eq "franc\N{COMBINING CEDILLA}ais" ?
+       "ok 821\n" : "not ok 821\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "francais" ?
+       "ok 822\n" : "not ok 822\n";
+
+    print "fran\N{LATIN SMALL LETTER C}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "francais" ?
+       "ok 823\n" : "not ok 823\n";
+
+    print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ?
+       "ok 824\n" : "not ok 824\n";
+
+    print "franc\N{COMBINING CEDILLA}ais" =~
+         /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ &&
+       $& eq "franc\N{COMBINING CEDILLA}ais" ?
+       "ok 825\n" : "not ok 825\n";
+}
+
+{
+    print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n";
+
+    {
+       my $regex  = "ABcde";
+       my $string = "abcDE\x{100}";
+       chop($string);
+       if ($string =~ m/$regex/i) {
+           print "ok 826\n";
+       } else {
+           print "not ok 826\n";
+       }
+    }
+
+    {
+       my $regex  = "ABcde\x{100}";
+       my $string = "abcDE";
+       chop($regex);
+       if ($string =~ m/$regex/i) {
+           print "ok 827\n";
+       } else {
+           print "not ok 827\n";
+       }
+    }
+
+    {
+       my $regex  = "ABcde\x{100}";
+       my $string = "abcDE\x{100}";
+       chop($regex);
+       chop($string);
+       if ($string =~ m/$regex/i) {
+           print "ok 828\n";
+       } else {
+           print "not ok 828\n";
+       }
+    }
+}
+
+{
+    print "# more SIGMAs\n";
+
+    my $SIGMA = "\x{03A3}"; # CAPITAL
+    my $Sigma = "\x{03C2}"; # SMALL FINAL
+    my $sigma = "\x{03C3}"; # SMALL
+
+    my $S3 = "$SIGMA$Sigma$sigma";
+
+    print ":$S3:" =~ /:(($SIGMA)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 829\n" : "not ok 829\n";
+    print ":$S3:" =~ /:(($Sigma)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 830\n" : "not ok 830\n";
+    print ":$S3:" =~ /:(($sigma)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 831\n" : "not ok 831\n";
+
+    print ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 832\n" : "not ok 832\n";
+    print ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 833\n" : "not ok 833\n";
+    print ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 834\n" : "not ok 834\n";
+}
+
+{
+    print "# LATIN SMALL LETTER SHARP S\n";
+
+    use charnames ':full';
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /\N{LATIN SMALL LETTER SHARP S}/    ? "ok 835\n" : "not ok 835\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 836\n" : "not ok 836\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /[\N{LATIN SMALL LETTER SHARP S}]/  ? "ok 837\n" : "not ok 837\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n";
+
+    print "ss" =~
+       /\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 839\n" : "not ok 839\n";
+
+    print "SS" =~
+       /\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 840\n" : "not ok 840\n";
+
+# These are a bit tricky.  Since the LATIN SMALL LETTER SHARP S is U+00DF,
+# the ANYOF reduces to a byte.  The Unicodeness needs to be caught earlier.
+#    print "ss" =~
+#      /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n";
+#
+#    print "SS" =~
+#      /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
+}
+
+{
+    print "# more whitespace: U+0085, U+2028, U+2029\n";
+
+    # U+0085 needs to be forced to be Unicode, the \x{100} does that.
+    print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 841\n" : "not ok 841\n";
+    print "<\x{2028}>" =~ /<\s>/ ? "ok 842\n" : "not ok 842\n";
+    print "<\x{2029}>" =~ /<\s>/ ? "ok 843\n" : "not ok 843\n";
+}
+
+{
+    print "# . with /s should work on characters, as opposed to bytes\n";
+
+    my $s = "\x{e4}\x{100}";
+
+    # This is not expected to match: the point is that
+    # neither should we get "Malformed UTF-8" warnings.
+    print $s =~ /\G(.+?)\n/gcs ?
+       "not ok 844\n" : "ok 844\n";
+
+    my @c;
+
+    while ($s =~ /\G(.)/gs) {
+       push @c, $1;
+    }
+
+    print join("", @c) eq $s ? "ok 845\n" : "not ok 845\n";
+
+    my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256
+    my $r1 = "";
+    while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) {
+       $r1 .= $1 . $2;
+    }
+
+    my $t2 = $t1 . "\x{100}"; # repeat with a larger char
+    my $r2 = "";
+    while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) {
+       $r2 .= $1 . $2;
+    }
+    $r2 =~ s/\x{100}//;
+    print $r1 eq $r2 ? "ok 846\n" : "not ok 846\n";
+}
+
+{
+    print "# Unicode lookbehind\n";
+
+    print "A\x{100}B"        =~ /(?<=A.)B/  ? "ok 847\n" : "not ok 847\n";
+    print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 848\n" : "not ok 848\n";
  }