Updating the test count is good; also rephrasing

[perl5.git] / t / op / pat.t
diff --git a/t/op/pat.t b/t/op/pat.t

index b797bdf..9318070 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
  
  $| = 1;
  
-print "1..828\n";
+print "1..849\n";
  
  BEGIN {
      chdir 't' if -d 't';
@@ -2380,11 +2380,7 @@ print "# some Unicode properties\n";
      print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n";
  
      my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}";
-
-    my $hSIGMA = sprintf "%04x", ord $SIGMA;
-    
-    my $char = "\N{COMBINING GREEK PERISPOMENI}";
-    my $code = sprintf "%04x", ord($char);
+    my $char  = "\N{COMBINING GREEK PERISPOMENI}";
  
      # Before #13843 this was failing by matching falsely.
      print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n";
@@ -2558,3 +2554,108 @@ print "# some Unicode properties\n";
         }
      }
  }
+
+{
+    print "# more SIGMAs\n";
+
+    my $SIGMA = "\x{03A3}"; # CAPITAL
+    my $Sigma = "\x{03C2}"; # SMALL FINAL
+    my $sigma = "\x{03C3}"; # SMALL
+
+    my $S3 = "$SIGMA$Sigma$sigma";
+
+    print ":$S3:" =~ /:(($SIGMA)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 829\n" : "not ok 829\n";
+    print ":$S3:" =~ /:(($Sigma)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 830\n" : "not ok 830\n";
+    print ":$S3:" =~ /:(($sigma)+):/i   && $1 eq $S3 && $2 eq $sigma ?
+       "ok 831\n" : "not ok 831\n";
+
+    print ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 832\n" : "not ok 832\n";
+    print ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 833\n" : "not ok 833\n";
+    print ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma ?
+       "ok 834\n" : "not ok 834\n";
+}
+
+{
+    print "# LATIN SMALL LETTER SHARP S\n";
+
+    use charnames ':full';
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /\N{LATIN SMALL LETTER SHARP S}/    ? "ok 835\n" : "not ok 835\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 836\n" : "not ok 836\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /[\N{LATIN SMALL LETTER SHARP S}]/  ? "ok 837\n" : "not ok 837\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~
+       /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n";
+
+    print "ss" =~
+       /\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 839\n" : "not ok 839\n";
+
+    print "SS" =~
+       /\N{LATIN SMALL LETTER SHARP S}/i   ? "ok 840\n" : "not ok 840\n";
+
+# These are a bit tricky.  Since the LATIN SMALL LETTER SHARP S is U+00DF,
+# the ANYOF reduces to a byte.  The Unicodeness needs to be caught earlier.
+#    print "ss" =~
+#      /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n";
+#
+#    print "SS" =~
+#      /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
+}
+
+{
+    print "# more whitespace: U+0085, U+2028, U+2029\n";
+
+    # U+0085 needs to be forced to be Unicode, the \x{100} does that.
+    print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 841\n" : "not ok 841\n";
+    print "<\x{2028}>" =~ /<\s>/ ? "ok 842\n" : "not ok 842\n";
+    print "<\x{2029}>" =~ /<\s>/ ? "ok 843\n" : "not ok 843\n";
+}
+
+{
+    print "# . with /s should work on characters, as opposed to bytes\n";
+
+    my $s = "\x{e4}\x{100}";
+
+    # This is not expected to match: the point is that
+    # neither should we get "Malformed UTF-8" warnings.
+    print $s =~ /\G(.+?)\n/gcs ?
+       "not ok 844\n" : "ok 844\n";
+
+    my @c;
+
+    while ($s =~ /\G(.)/gs) {
+       push @c, $1;
+    }
+
+    print join("", @c) eq $s ? "ok 845\n" : "not ok 845\n";
+
+    my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256
+    my $r1 = "";
+    while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) {
+       $r1 .= $1 . $2;
+    }
+
+    my $t2 = $t1 . "\x{100}"; # repeat with a larger char
+    my $r2 = "";
+    while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) {
+       $r2 .= $1 . $2;
+    }
+    $r2 =~ s/\x{100}//;
+    print $r1 eq $r2 ? "ok 846\n" : "not ok 846\n";
+}
+
+{
+    print "# Unicode lookbehind\n";
+
+    print "A\x{100}B"        =~ /(?<=A.)B/  ? "ok 847\n" : "not ok 847\n";
+    print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 848\n" : "not ok 848\n";
+}