X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/75685a94f35c086cc598b03baf224ef3dc31936b..a79135933e1df731ba243e532123f9956085f1b3:/t/op/pat.t diff --git a/t/op/pat.t b/t/op/pat.t index 2e89225..54f67fc 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..686\n"; +print "1..1015\n"; BEGIN { chdir 't' if -d 't'; @@ -20,9 +20,8 @@ $x = "abc\ndef\n"; if ($x =~ /^abc/) {print "ok 1\n";} else {print "not ok 1\n";} if ($x !~ /^def/) {print "ok 2\n";} else {print "not ok 2\n";} -$* = 1; -if ($x =~ /^def/) {print "ok 3\n";} else {print "not ok 3\n";} -$* = 0; +# used to be a test for $* +if ($x =~ /^def/m) {print "ok 3\n";} else {print "not ok 3\n";} $_ = '123'; if (/^([0-9][0-9]*)/) {print "ok 4\n";} else {print "not ok 4\n";} @@ -69,9 +68,8 @@ if (m|bc/*d|) {print "ok 22\n";} else {print "not ok 22\n";} if (/^$_$/) {print "ok 23\n";} else {print "not ok 23\n";} -$* = 1; # test 3 only tested the optimized version--this one is for real -if ("ab\ncd\n" =~ /^cd/) {print "ok 24\n";} else {print "not ok 24\n";} -$* = 0; +# used to be a test for $* +if ("ab\ncd\n" =~ /^cd/m) {print "ok 24\n";} else {print "not ok 24\n";} $XXX{123} = 123; $XXX{234} = 234; @@ -1367,10 +1365,10 @@ print "ok 247\n"; print "ok 263\n"; } -{ +SKIP: { my $test = 264; # till 575 - use charnames ':full'; + use charnames ":full"; # This is far from complete testing, there are dozens of character # classes in Unicode. The mixing of literals and \N{...} is @@ -1422,16 +1420,21 @@ print "ok 247\n"; print "ok $test\n"; $test++; } print "# IsASCII\n"; - if ($code le '00007f') { - print "not " unless $char =~ /\p{IsASCII}/; - print "ok $test\n"; $test++; - print "not " if $char =~ /\P{IsASCII}/; - print "ok $test\n"; $test++; + if (ord("A") == 193) { + print "ok $test # Skip: in EBCDIC\n"; $test++; + print "ok $test # Skip: in EBCDIC\n"; $test++; } else { - print "not " if $char =~ /\p{IsASCII}/; - print "ok $test\n"; $test++; - print "not " unless $char =~ /\P{IsASCII}/; - print "ok $test\n"; $test++; + if ($code le '00007f') { + print "not " unless $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } } print "# IsCntrl\n"; if ($class =~ /^C/) { @@ -1618,9 +1621,9 @@ EOT { # from Robin Houston - my $x = "\x{12345678}"; + my $x = "\x{10FFFD}"; $x =~ s/(.)/$1/g; - print "not " unless ord($x) == 0x12345678 && length($x) == 1; + print "not " unless ord($x) == 0x10FFFD && length($x) == 1; print "ok 587\n"; } @@ -1892,56 +1895,62 @@ $T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" # Test the Unicode script classes -print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1 +print "not " unless chr(0x100) =~ /\p{IsLatin}/; # outside Latin-1 print "ok 661\n"; -print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside +print "not " unless chr(0x212b) =~ /\p{IsLatin}/; # Angstrom sign, very outside print "ok 662\n"; -print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock +print "not " unless chr(0x5d0) =~ /\p{IsHebrew}/; # inside InHebrew print "ok 663\n"; -print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock +print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew print "ok 664\n"; -print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range) +print "not " unless chr(0xb5) =~ /\p{IsGreek}/; # singleton (not in a range) print "ok 665\n"; -print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton +print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton print "ok 666\n"; -print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton +print "not " unless chr(0x386) =~ /\p{IsGreek}/; # singleton print "ok 667\n"; -print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there +print "not " unless chr(0x387) =~ /\P{IsGreek}/; # not there print "ok 668\n"; -print "not " unless chr(0x388) =~ /\p{InGreek}/; # range +print "not " unless chr(0x388) =~ /\p{IsGreek}/; # range print "ok 669\n"; -print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range +print "not " unless chr(0x38a) =~ /\p{IsGreek}/; # range print "ok 670\n"; -print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there +print "not " unless chr(0x38b) =~ /\P{IsGreek}/; # not there print "ok 671\n"; -print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton +print "not " unless chr(0x38c) =~ /\p{IsGreek}/; # singleton print "ok 672\n"; +if (ord("A") == 65) { ## ## Test [:cntrl:]... ## ## Should probably put in tests for all the POSIX stuff, but not sure how to ## guarantee a specific locale...... ## -$AllBytes = join('', map { chr($_) } 0..255); -($x = $AllBytes) =~ s/[[:cntrl:]]//g; -if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " }; -print "ok 673\n"; + $AllBytes = join('', map { chr($_) } 0..255); + ($x = $AllBytes) =~ s/[[:cntrl:]]//g; + if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { + print "not "; + } + print "ok 673\n"; -($x = $AllBytes) =~ s/[^[:cntrl:]]//g; -if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " }; -print "ok 674\n"; + ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; + if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " } + print "ok 674\n"; +} else { + print "ok $_ # Skip: EBCDIC\n" for 673..674; +} # With /s modifier UTF8 chars were interpreted as bytes { @@ -2008,3 +2017,1211 @@ print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; print "not " unless length($y) == 2 && $y eq $x; print "ok 686\n"; } + + +my $test = 687; + +# Force scalar context on the patern match +sub ok ($$) { + my($ok, $name) = @_; + + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; + + printf "# Failed test at line %d\n", (caller)[2] unless $ok; + + $test++; + return $ok; +} + +{ + # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. + $x = "\x4e" . "E"; + ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); + + print "# and now again in [] ranges\n"; + + $x = "\x4e" . "E"; + ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); + +} + +{ + # Check that \x{##} works. 5.6.1 fails quite a few of these. + + $x = "\x9b"; + ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); + + print "# and now again in [] ranges\n"; + + $x = "\x9b"; + ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); +} + +{ + # high bit bug -- japhy + my $x = "ab\200d"; + $x =~ /.*?\200/ or print "not "; + print "ok 715\n"; +} + +print "# some Unicode properties\n"; + +{ + # Dashes, underbars, case. + print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/; + print "ok 716\n"; + + # Complement, leading and trailing whitespace. + print "not " unless "\x80" =~ /\P{ ^ In Latin 1 Supplement }/; + print "ok 717\n"; + + # No ^In, dashes, case, dash, any intervening (word-break) whitespace. + # (well, newlines don't work...) + print "not " unless "\x80" =~ /\p{latin-1 supplement}/; + print "ok 718\n"; +} + +{ + print "not " unless "a" =~ /\pL/; + print "ok 719\n"; + + print "not " unless "a" =~ /\p{IsLl}/; + print "ok 720\n"; + + print "not " if "a" =~ /\p{IsLu}/; + print "ok 721\n"; + + print "not " unless "a" =~ /\p{Ll}/; + print "ok 722\n"; + + print "not " if "a" =~ /\p{Lu}/; + print "ok 723\n"; + + print "not " unless "A" =~ /\pL/; + print "ok 724\n"; + + print "not " unless "A" =~ /\p{IsLu}/; + print "ok 725\n"; + + print "not " if "A" =~ /\p{IsLl}/; + print "ok 726\n"; + + print "not " unless "A" =~ /\p{Lu}/; + print "ok 727\n"; + + print "not " if "A" =~ /\p{Ll}/; + print "ok 728\n"; + + print "not " if "a" =~ /\PL/; + print "ok 729\n"; + + print "not " if "a" =~ /\P{IsLl}/; + print "ok 730\n"; + + print "not " unless "a" =~ /\P{IsLu}/; + print "ok 731\n"; + + print "not " if "a" =~ /\P{Ll}/; + print "ok 732\n"; + + print "not " unless "a" =~ /\P{Lu}/; + print "ok 733\n"; + + print "not " if "A" =~ /\PL/; + print "ok 734\n"; + + print "not " if "A" =~ /\P{IsLu}/; + print "ok 735\n"; + + print "not " unless "A" =~ /\P{IsLl}/; + print "ok 736\n"; + + print "not " if "A" =~ /\P{Lu}/; + print "ok 737\n"; + + print "not " unless "A" =~ /\P{Ll}/; + print "ok 738\n"; + +} + +{ + print "not " if "a" =~ /\p{Common}/; + print "ok 739\n"; + + print "not " unless "1" =~ /\p{Common}/; + print "ok 740\n"; +} + +{ + print "not " if "a" =~ /\p{Inherited}/; + print "ok 741\n"; + + print "not " unless "\x{300}" =~ /\p{Inherited}/; + print "ok 742\n"; +} + +{ + print "not " unless "a" =~ /\p{L&}/; + print "ok 743\n"; + + print "not " if "1" =~ /\p{L&}/; + print "ok 744\n"; +} + +{ + print "not " unless "a" =~ /\p{Lowercase Letter}/; + print "ok 745\n"; + + print "not " if "A" =~ /\p{lowercaseletter}/; + print "ok 746\n"; +} + +{ + print "not " unless "\x{AC00}" =~ /\p{HangulSyllables}/; + print "ok 747\n"; +} + +{ + # Script=, Block=, Category= + + print "not " unless "\x{0100}" =~ /\p{Script=Latin}/; + print "ok 748\n"; + + print "not " unless "\x{0100}" =~ /\p{Block=LatinExtendedA}/; + print "ok 749\n"; + + print "not " unless "\x{0100}" =~ /\p{Category=UppercaseLetter}/; + print "ok 750\n"; +} + +{ + print "# the basic character classes and Unicode \n"; + + # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101; + print "not " unless "\x{0100}" =~ /\w/; + print "ok 751\n"; + + # 0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;; + print "not " unless "\x{0660}" =~ /\d/; + print "ok 752\n"; + + # 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;; + print "not " unless "\x{1680}" =~ /\s/; + print "ok 753\n"; +} + +{ + print "# folding matches and Unicode\n"; + + print "not " unless "a\x{100}" =~ /A/i; + print "ok 754\n"; + + print "not " unless "A\x{100}" =~ /a/i; + print "ok 755\n"; + + print "not " unless "a\x{100}" =~ /a/i; + print "ok 756\n"; + + print "not " unless "A\x{100}" =~ /A/i; + print "ok 757\n"; + + print "not " unless "\x{101}a" =~ /\x{100}/i; + print "ok 758\n"; + + print "not " unless "\x{100}a" =~ /\x{100}/i; + print "ok 759\n"; + + print "not " unless "\x{101}a" =~ /\x{101}/i; + print "ok 760\n"; + + print "not " unless "\x{100}a" =~ /\x{101}/i; + print "ok 761\n"; + + print "not " unless "a\x{100}" =~ /A\x{100}/i; + print "ok 762\n"; + + print "not " unless "A\x{100}" =~ /a\x{100}/i; + print "ok 763\n"; + + print "not " unless "a\x{100}" =~ /a\x{100}/i; + print "ok 764\n"; + + print "not " unless "A\x{100}" =~ /A\x{100}/i; + print "ok 765\n"; + + print "not " unless "a\x{100}" =~ /[A]/i; + print "ok 766\n"; + + print "not " unless "A\x{100}" =~ /[a]/i; + print "ok 767\n"; + + print "not " unless "a\x{100}" =~ /[a]/i; + print "ok 768\n"; + + print "not " unless "A\x{100}" =~ /[A]/i; + print "ok 769\n"; + + print "not " unless "\x{101}a" =~ /[\x{100}]/i; + print "ok 770\n"; + + print "not " unless "\x{100}a" =~ /[\x{100}]/i; + print "ok 771\n"; + + print "not " unless "\x{101}a" =~ /[\x{101}]/i; + print "ok 772\n"; + + print "not " unless "\x{100}a" =~ /[\x{101}]/i; + print "ok 773\n"; + +} + +{ + use charnames ':full'; + + print "# LATIN LETTER A WITH GRAVE\n"; + my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; + my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; + + print $lower =~ m/$UPPER/i ? "ok 774\n" : "not ok 774\n"; + print $UPPER =~ m/$lower/i ? "ok 775\n" : "not ok 775\n"; + print $lower =~ m/[$UPPER]/i ? "ok 776\n" : "not ok 776\n"; + print $UPPER =~ m/[$lower]/i ? "ok 777\n" : "not ok 777\n"; + + print "# GREEK LETTER ALPHA WITH VRACHY\n"; + + $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; + $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; + + print $lower =~ m/$UPPER/i ? "ok 778\n" : "not ok 778\n"; + print $UPPER =~ m/$lower/i ? "ok 779\n" : "not ok 779\n"; + print $lower =~ m/[$UPPER]/i ? "ok 780\n" : "not ok 780\n"; + print $UPPER =~ m/[$lower]/i ? "ok 781\n" : "not ok 781\n"; + + print "# LATIN LETTER Y WITH DIAERESIS\n"; + + $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; + $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; + print $lower =~ m/$UPPER/i ? "ok 782\n" : "not ok 782\n"; + print $UPPER =~ m/$lower/i ? "ok 783\n" : "not ok 783\n"; + print $lower =~ m/[$UPPER]/i ? "ok 784\n" : "not ok 784\n"; + print $UPPER =~ m/[$lower]/i ? "ok 785\n" : "not ok 785\n"; +} + +{ + use warnings; + use charnames ':full'; + + print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n"; + + my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; + my $char = "\N{COMBINING GREEK PERISPOMENI}"; + + # Before #13843 this was failing by matching falsely. + print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n"; +} + +{ + print "# \\X\n"; + + use charnames ':full'; + + print "a!" =~ /^(\X)!/ && $1 eq "a" ? + "ok 787\n" : "not ok 787 # $1\n"; + print "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF" ? + "ok 788\n" : "not ok 788 # $1\n"; + print "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}" ? + "ok 789\n" : "not ok 789 # $1\n"; + print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ? + "ok 790\n" : "not ok 790 # $1\n"; + print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && + $1 eq "\N{LATIN CAPITAL LETTER E}" ? + "ok 791\n" : "not ok 791 # $1\n"; + print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~ + /^(\X)!/ && + $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ? + "ok 792\n" : "not ok 792 # $1\n"; +} + +{ + print "#\\C and \\X\n"; + + print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; + print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; +} + +{ + print "# FINAL SIGMA\n"; + + my $SIGMA = "\x{03A3}"; # CAPITAL + my $Sigma = "\x{03C2}"; # SMALL FINAL + my $sigma = "\x{03C3}"; # SMALL + + print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n"; + print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n"; + print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n"; + + print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n"; + print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n"; + print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n"; + + print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n"; + print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n"; + print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n"; + + print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n"; + print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n"; + print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n"; + + print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n"; + print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n"; + print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n"; + + print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n"; + print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n"; + print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n"; +} + +{ + print "# parlez-vous?\n"; + + use charnames ':full'; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran.ais/ && + $& eq "francais" ? + "ok 813\n" : "not ok 813\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran.ais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 814\n" : "not ok 814\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran\Cais/ && + $& eq "francais" ? + "ok 815\n" : "not ok 815\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /franc\C\Cais/ ? # COMBINING CEDILLA is two bytes when encoded + "ok 816\n" : "not ok 816\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran\Xais/ && + $& eq "francais" ? + "ok 817\n" : "not ok 817\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran\Xais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 818\n" : "not ok 818\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /fran\Xais/ && + $& eq "franc\N{COMBINING CEDILLA}ais" ? + "ok 819\n" : "not ok 819\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 820\n" : "not ok 820\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /franc\N{COMBINING CEDILLA}ais/ && + $& eq "franc\N{COMBINING CEDILLA}ais" ? + "ok 821\n" : "not ok 821\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "francais" ? + "ok 822\n" : "not ok 822\n"; + + print "fran\N{LATIN SMALL LETTER C}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "francais" ? + "ok 823\n" : "not ok 823\n"; + + print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? + "ok 824\n" : "not ok 824\n"; + + print "franc\N{COMBINING CEDILLA}ais" =~ + /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && + $& eq "franc\N{COMBINING CEDILLA}ais" ? + "ok 825\n" : "not ok 825\n"; +} + +{ + print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n"; + + { + my $regex = "ABcde"; + my $string = "abcDE\x{100}"; + chop($string); + if ($string =~ m/$regex/i) { + print "ok 826\n"; + } else { + print "not ok 826\n"; + } + } + + { + my $regex = "ABcde\x{100}"; + my $string = "abcDE"; + chop($regex); + if ($string =~ m/$regex/i) { + print "ok 827\n"; + } else { + print "not ok 827\n"; + } + } + + { + my $regex = "ABcde\x{100}"; + my $string = "abcDE\x{100}"; + chop($regex); + chop($string); + if ($string =~ m/$regex/i) { + print "ok 828\n"; + } else { + print "not ok 828\n"; + } + } +} + +{ + print "# more SIGMAs\n"; + + my $SIGMA = "\x{03A3}"; # CAPITAL + my $Sigma = "\x{03C2}"; # SMALL FINAL + my $sigma = "\x{03C3}"; # SMALL + + my $S3 = "$SIGMA$Sigma$sigma"; + + print ":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 829\n" : "not ok 829\n"; + print ":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 830\n" : "not ok 830\n"; + print ":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 831\n" : "not ok 831\n"; + + print ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 832\n" : "not ok 832\n"; + print ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 833\n" : "not ok 833\n"; + print ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? + "ok 834\n" : "not ok 834\n"; +} + +{ + print "# LATIN SMALL LETTER SHARP S\n"; + + use charnames ':full'; + + print "\N{LATIN SMALL LETTER SHARP S}" =~ + /\N{LATIN SMALL LETTER SHARP S}/ ? "ok 835\n" : "not ok 835\n"; + + print "\N{LATIN SMALL LETTER SHARP S}" =~ + /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 836\n" : "not ok 836\n"; + + print "\N{LATIN SMALL LETTER SHARP S}" =~ + /[\N{LATIN SMALL LETTER SHARP S}]/ ? "ok 837\n" : "not ok 837\n"; + + print "\N{LATIN SMALL LETTER SHARP S}" =~ + /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 838\n" : "not ok 838\n"; + + print "ss" =~ + /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 839\n" : "not ok 839\n"; + + print "SS" =~ + /\N{LATIN SMALL LETTER SHARP S}/i ? "ok 840\n" : "not ok 840\n"; + + print "ss" =~ + /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 841\n" : "not ok 841\n"; + + print "SS" =~ + /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n"; + + print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ? + "ok 843\n" : "not ok 843\n"; + + print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ? + "ok 844\n" : "not ok 844\n"; +} + +{ + print "# more whitespace: U+0085, U+2028, U+2029\n"; + + # U+0085 needs to be forced to be Unicode, the \x{100} does that. + print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n"; + print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n"; + print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n"; +} + +{ + print "# . with /s should work on characters, as opposed to bytes\n"; + + my $s = "\x{e4}\x{100}"; + + # This is not expected to match: the point is that + # neither should we get "Malformed UTF-8" warnings. + print $s =~ /\G(.+?)\n/gcs ? + "not ok 848\n" : "ok 848\n"; + + my @c; + + while ($s =~ /\G(.)/gs) { + push @c, $1; + } + + print join("", @c) eq $s ? "ok 849\n" : "not ok 849\n"; + + my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256 + my $r1 = ""; + while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r1 .= $1 . $2; + } + + my $t2 = $t1 . "\x{100}"; # repeat with a larger char + my $r2 = ""; + while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r2 .= $1 . $2; + } + $r2 =~ s/\x{100}//; + print $r1 eq $r2 ? "ok 850\n" : "not ok 850\n"; +} + +{ + print "# Unicode lookbehind\n"; + + print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 851\n" : "not ok 851\n"; + print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 852\n" : "not ok 852\n"; + print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n"; + print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n"; +} + +{ + print "# UTF-8 hash keys and /\$/\n"; + # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2002-01/msg01327.html + + my $u = "a\x{100}"; + my $v = substr($u,0,1); + my $w = substr($u,1,1); + my %u = ( $u => $u, $v => $v, $w => $w ); + my $i = 855; + for (keys %u) { + my $m1 = /^\w*$/ ? 1 : 0; + my $m2 = $u{$_}=~/^\w*$/ ? 1 : 0; + print $m1 == $m2 ? "ok $i\n" : "not ok $i # $m1 $m2\n"; + $i++; + } +} + +{ + print "# [ID 20020124.005]\n"; + # Fixed by #14795. + my $i = 858; + for my $char ("a", "\x{df}", "\x{100}"){ + $x = "$char b $char"; + $x =~ s{($char)}{ + "c" =~ /c/; + "x"; + }ge; + print substr($x,0,1) eq substr($x,-1,1) ? + "ok $i\n" : "not ok $i # debug: $x\n"; + $i++; + } +} + +{ + print "# SEGV in s/// and UTF-8\n"; + $s = "s#\x{100}" x 4; + $s =~ s/[^\w]/ /g; + print $s eq "s \x{100}" x 4 ? "ok 861\n" : "not ok 861\n"; +} + +{ + print "# UTF-8 bug (maybe alreayd known?)\n"; + my $u; + + $u = "foo"; + $u =~ s/./\x{100}/g; + print $u eq "\x{100}\x{100}\x{100}" ? "ok 862\n" : "not ok 862\n"; + + $u = "foobar"; + $u =~ s/[ao]/\x{100}/g; + print $u eq "f\x{100}\x{100}b\x{100}r" ? "ok 863\n" : "not ok 863\n"; + + $u =~ s/\x{100}/e/g; + print $u eq "feeber" ? "ok 864\n" : "not ok 864\n"; +} + +{ + print "# UTF-8 bug with s///\n"; + # check utf8/non-utf8 mixtures + # try to force all float/anchored check combinations + my $c = "\x{100}"; + my $test = 865; + my $subst; + for my $re ( + "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx", + ) { + print "xxx" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; + ++$test; + print +($subst = "xxx") =~ s/$re// ? "not ok $test\n" : "ok $test\n"; + ++$test; + } + for my $re ("xx.*$c*", "$c*.*xx") { + print "xxx" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; + ++$test; + ($subst = "xxx") =~ s/$re//; + print $subst eq '' ? "ok $test\n" : "not ok $test\t# $subst\n"; + ++$test; + } + for my $re ("xxy*", "y*xx") { + print "xx$c" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; + ++$test; + ($subst = "xx$c") =~ s/$re//; + print $subst eq $c ? "ok $test\n" : "not ok $test\n"; + ++$test; + print "xy$c" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; + ++$test; + print +($subst = "xy$c") =~ /$re/ ? "not ok $test\n" : "ok $test\n"; + ++$test; + } + for my $re ("xy$c*z", "x$c*yz") { + print "xyz" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; + ++$test; + ($subst = "xyz") =~ s/$re//; + print $subst eq '' ? "ok $test\n" : "not ok $test\n"; + ++$test; + } +} + +{ + print "# qr/.../x\n"; + my $test = 893; + + my $R = qr/ A B C # D E/x; + + print eval {"ABCDE" =~ $R} ? "ok $test\n" : "not ok $test\n"; + $test++; + + print eval {"ABCDE" =~ m/$R/} ? "ok $test\n" : "not ok $test\n"; + $test++; + + print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n"; + $test++; +} + +{ + print "# illegal Unicode properties\n"; + my $test = 896; + + print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n"; + $test++; + + print eval { "a" =~ /\p{qrst} / } ? "not ok $test\n" : "ok $test\n"; + $test++; +} + +{ + print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n"; + # requires reuse of last successful pattern + my $test = 898; + $test =~ /\d/; + for (0 .. 1) { + my $match = ?? + 0; + if ($match != $_) { + print "ok $test\n"; + } else { + printf "not ok %s\t# 'match once' %s on %s iteration\n", $test, + $match ? 'succeeded' : 'failed', $_ ? 'second' : 'first'; + } + ++$test; + } + $test =~ /(\d)/; + my $result = join '', $test =~ //g; + if ($result eq $test) { + print "ok $test\n"; + } else { + printf "not ok %s\t# expected '%s', got '%s'\n", $test, $test, $result; + } + ++$test; +} + +print "# user-defined character properties\n"; + +sub InKana1 { + return <<'END'; +3040 309F +30A0 30FF +END +} + +sub InKana2 { + return <<'END'; ++utf8::InHiragana ++utf8::InKatakana +END +} + +sub InKana3 { + return <<'END'; ++utf8::InHiragana ++utf8::InKatakana +-utf8::IsCn +END +} + +sub InNotKana { + return <<'END'; +!utf8::InHiragana +-utf8::InKatakana ++utf8::IsCn +END +} + +$test = 901; + +print "\x{3040}" =~ /\p{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{303F}" =~ /\P{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +print "\x{3040}" =~ /\p{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{303F}" =~ /\P{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +print "\x{3041}" =~ /\p{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{3040}" =~ /\P{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +print "\x{3040}" =~ /\p{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{3041}" =~ /\P{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +sub InConsonant { # Not EBCDIC-aware. + return < fail\n"; + ++$test; + print +(!$r or pos($s) == $len + 1) ? "ok $test\n" + : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n"; + ++$test; + } + } +} + +$test = 923; + +$a = bless qr/foo/, 'Foo'; +print(('goodfood' =~ $a ? '' : 'not '), + "ok $test\t# reblessed qr// matches\n"); +++$test; + +print(($a eq '(?-xism:foo)' ? '' : 'not '), + "ok $test\t# reblessed qr// stringizes\n"); +++$test; + +$x = "\x{3fe}"; +$z=$y = "\317\276"; # $y is byte representation of $x + +$a = qr/$x/; +print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n"); +++$test; + +print(("a$a" =~ $x ? '' : 'not '), + "ok $test - stringifed qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a$a\z/ ? '' : 'not '), + "ok $test - interpolated qr// preserves utf8\n"); +++$test; + +print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '), + "ok $test - postponed interpolation of qr// preserves utf8\n"); +++$test; + +print((length(qr/##/x) == 12 ? '' : 'not '), + "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n"); +++$test; + +{ use re 'eval'; + +print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re matches utf8\n"); +++$test; + +print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re matches utf8\n"); +++$test; + +print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n"); +++$test; + +print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n"); +++$test; + +print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n"); +++$test; + +print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '), + "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n"); +++$test; +$y = $z; # reset $y after upgrade + +print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '), + "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n"); +++$test; + +} # no re 'eval' + +print "# more user-defined character properties\n"; + +sub IsSyriac1 { + return <<'END'; +0712 072C +0730 074A +END +} + +print "\x{0712}" =~ /\p{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{072F}" =~ /\P{IsSyriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +sub Syriac1 { + return <<'END'; +0712 072C +0730 074A +END +} + +print "\x{0712}" =~ /\p{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; +print "\x{072F}" =~ /\P{Syriac1}/ ? "ok $test\n" : "not ok $test\n"; $test++; + +{ + print "# Change #18179\n"; + # previously failed with "panic: end_shift + my $s = "\x{100}" x 5; + my $ok = $s =~ /(\x{100}{4})/; + my($ord, $len) = (ord $1, length $1); + print +($ok && $ord == 0x100 && $len == 4) + ? "ok $test\n" : "not ok $test\t# $ok/$ord/$len\n"; + ++$test; +} + +{ + print "# [perl #15763]\n"; + + $a = "x\x{100}"; + chop $a; # but leaves the UTF-8 flag + $a .= "y"; # 1 byte before "y" + + ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + + ok($a =~ /^\Cy/, 'match \Cy'); + ok($a =~ /^\C{1}y/, 'match \C{1}y'); + + $a = "\x{100}y"; # 2 bytes before "y" + + ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + ok($a =~ /^\C\C/, 'match two \C'); + ok($a =~ /^\C{2}/, 'match \C{2}'); + + ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'); + ok($a =~ /^\C{3}/, 'match \C{3}'); + + ok($a =~ /^\C\Cy/, 'match two \C'); + ok($a =~ /^\C{2}y/, 'match \C{2}'); + + ok($a !~ /^\C\C\Cy/, q{don't match three \Cy}); + ok($a !~ /^\C{2}\Cy/, q{don't match \C{3}y}); + + $a = "\x{1000}y"; # 3 bytes before "y" + + ok($a =~ /^\C/, 'match one \C on three-byte UTF-8'); + ok($a =~ /^\C{1}/, 'match \C{1}'); + ok($a =~ /^\C\C/, 'match two \C'); + ok($a =~ /^\C{2}/, 'match \C{2}'); + ok($a =~ /^\C\C\C/, 'match three \C'); + ok($a =~ /^\C{3}/, 'match \C{3}'); + + ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'); + ok($a =~ /^\C{4}/, 'match \C{4}'); + + ok($a =~ /^\C\C\Cy/, 'match three \Cy'); + ok($a =~ /^\C{3}y/, 'match \C{3}y'); + + ok($a !~ /^\C\C\C\C\y/, q{don't match four \Cy}); + ok($a !~ /^\C{4}y/, q{don't match \C{4}y}); +} + +$_ = 'aaaaaaaaaa'; +utf8::upgrade($_); chop $_; $\="\n"; +ok(/[^\s]+/, "m/[^\s]/ utf8"); +ok(/[^\d]+/, "m/[^\d]/ utf8"); +ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8"); +ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8"); + +ok("\x{100}" =~ /\x{100}/, "[perl #15397]"); +ok("\x{100}" =~ /(\x{100})/, "[perl #15397]"); +ok("\x{100}" =~ /(\x{100}){1}/, "[perl #15397]"); +ok("\x{100}\x{100}" =~ /(\x{100}){2}/, "[perl #15397]"); +ok("\x{100}\x{100}" =~ /(\x{100})(\x{100})/, "[perl #15397]"); + +$x = "CD"; +$x =~ /(AB)*?CD/; +ok(!defined $1, "[perl #7471]"); + +$x = "CD"; +$x =~ /(AB)*CD/; +ok(!defined $1, "[perl #7471]"); + +$pattern = "^(b+?|a){1,2}c"; +ok("bac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); +ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); +ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); +ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); + +{ + # [perl #18232] + "\x{100}" =~ /(.)/; + ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' ); + { 'a' =~ /./; } + ok( $1 eq "\x{100}", '$1 is still utf-8' ); + ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' ); +} + +{ + use utf8; + my $attr = 'Name-1' ; + + my $NormalChar = qr/[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; + my $NormalWord = qr/${NormalChar}+?/; + my $PredNameHyphen = qr/^${NormalWord}(\-${NormalWord})*?$/; + + $attr =~ /^$/; + ok( $attr =~ $PredNameHyphen, "[perl #19767] original test" ); +} + +{ + use utf8; + "a" =~ m/[b]/; + ok ( "0" =~ /\p{N}+\z/, "[perl #19767] variant test" ); +} + +{ + + $p = 1; + foreach (1,2,3,4) { + $p++ if /(??{ $p })/ + } + ok ($p == 5, "[perl #20683] (??{ }) returns stale values"); + { package P; $a=1; sub TIESCALAR { bless[] } sub FETCH { $a++ } } + tie $p, P; + foreach (1,2,3,4) { + /(??{ $p })/ + } + ok ( $p == 5, "(??{ }) returns stale values"); +} + +{ + # Subject: Odd regexp behavior + # From: Markus Kuhn + # Date: Wed, 26 Feb 2003 16:53:12 +0000 + # Message-Id: + # To: perl-unicode@perl.org + + $x = "\x{2019}\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; + ok($x eq "\x{2019} k", "Markus Kuhn 2003-02-26"); + + $x = "b\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; + ok($x eq "b k", "Markus Kuhn 2003-02-26"); + + ok("\x{2019}" =~ /\S/, "Markus Kuhn 2003-02-26"); +} + +{ + my $i; + ok('-1-3-5-' eq join('', split /((??{$i++}))/, '-1-3-5-'), + "[perl #21411] (??{ .. }) corrupts split's stack"); + split /(?{'WOW'})/, 'abc'; + ok('a|b|c' eq join ('|', @_), + "[perl #21411] (?{ .. }) version of the above"); +} + +{ + split /(?{ split "" })/, "abc"; + ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'); +} + +{ + ok("\x{100}\n" =~ /\x{100}\n$/, "UTF8 length cache and fbm_compile"); +} + +{ + package Str; + use overload q/""/ => sub { ${$_[0]}; }; + sub new { my ($c, $v) = @_; bless \$v, $c; } + + package main; + $_ = Str->new("a\x{100}/\x{100}b"); + ok(join(":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"); +} + +{ + $_ = "code: 'x' { '...' }\n"; study; + my @x; push @x, $& while m/'[^\']*'/gx; + ok(join(":", @x) eq "'x':'...'", + "[perl #17757] Parse::RecDescent triggers infinite loop"); +} + +{ + my $re = qq/^([^X]*)X/; + utf8::upgrade($re); + ok("\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"); +} + +# bug #22354 +sub func ($) { + ok( "a\nb" !~ /^b/, $_[0] ); + ok( "a\nb" =~ /^b/m, "$_[0] - with /m" ); +} +func "standalone"; +$_ = "x"; s/x/func "in subst"/e; +$_ = "x"; s/x/func "in multiline subst"/em; +#$_ = "x"; /x(?{func "in regexp"})/; +#$_ = "x"; /x(?{func "in multiline regexp"})/m; + +# bug #19049 +$_="abcdef\n"; +@x = m/./g; +ok("abcde" eq "$`", '# TODO #19049 - global match not setting $`'); + +ok("123\x{100}" =~ /^.*1.*23\x{100}$/, 'uft8 + multiple floating substr'); + +# LATIN SMALL/CAPITAL LETTER A WITH MACRON +ok(" \x{101}" =~ qr/\x{100}/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW +ok(" \x{1E01}" =~ qr/\x{1E00}/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +# DESERET SMALL/CAPITAL LETTER LONG I +ok(" \x{10428}" =~ qr/\x{10400}/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +# LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' +ok(" \x{1E01}x" =~ qr/\x{1E00}X/i, + "<20030808193656.5109.1@llama.ni-s.u-net.com>"); + +{ + # [perl #23769] Unicode regex broken on simple example + # regrepeat() didn't handle UTF-8 EXACT case right. + + my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; + + ok($s =~ /\x{a0}/, "[perl #23769]"); + ok($s =~ /\x{a0}+/, "[perl #23769]"); + ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]"); +} + +# last test 1015 +