[perl #23769] Unicode regex broken on simple example

[perl5.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index ccf57a1..54f67fc 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
  
  $| = 1;
  
-print "1..1004\n";
+print "1..1015\n";
  
  BEGIN {
      chdir 't' if -d 't';
@@ -1365,10 +1365,10 @@ print "ok 247\n";
      print "ok 263\n";
  }
  
-{
+SKIP: {
      my $test = 264; # till 575
  
-    use charnames ':full';
+    use charnames ":full";
  
      # This is far from complete testing, there are dozens of character
      # classes in Unicode.  The mixing of literals and \N{...} is
@@ -3140,7 +3140,15 @@ ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]");
  {
      my $i;
      ok('-1-3-5-' eq join('', split /((??{$i++}))/, '-1-3-5-'),
-       "[perl #21411] (??{ .. }) corrupts split's stack")
+       "[perl #21411] (??{ .. }) corrupts split's stack");
+    split /(?{'WOW'})/, 'abc';
+    ok('a|b|c' eq join ('|', @_),
+       "[perl #21411] (?{ .. }) version of the above");
+}
+
+{
+    split /(?{ split "" })/, "abc";
+    ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0');
  }
  
  {
@@ -3181,4 +3189,39 @@ $_ = "x"; s/x/func "in multiline subst"/em;
  #$_ = "x"; /x(?{func "in regexp"})/;
  #$_ = "x"; /x(?{func "in multiline regexp"})/m;
  
-# last test 1004
+# bug #19049
+$_="abcdef\n";
+@x = m/./g;
+ok("abcde" eq "$`", '# TODO #19049 - global match not setting $`');
+
+ok("123\x{100}" =~ /^.*1.*23\x{100}$/, 'uft8 + multiple floating substr');
+
+# LATIN SMALL/CAPITAL LETTER A WITH MACRON
+ok("  \x{101}" =~ qr/\x{100}/i,
+   "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW
+ok("  \x{1E01}" =~ qr/\x{1E00}/i,
+   "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+# DESERET SMALL/CAPITAL LETTER LONG I
+ok("  \x{10428}" =~ qr/\x{10400}/i,
+   "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'
+ok("  \x{1E01}x" =~ qr/\x{1E00}X/i,
+   "<20030808193656.5109.1@llama.ni-s.u-net.com>");
+
+{
+    # [perl #23769] Unicode regex broken on simple example
+    # regrepeat() didn't handle UTF-8 EXACT case right.
+
+    my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
+
+    ok($s =~ /\x{a0}/,       "[perl #23769]");
+    ok($s =~ /\x{a0}+/,      "[perl #23769]");
+    ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]");
+}
+
+# last test 1015
+