| 1 | #!./perl |
| 2 | # |
| 3 | # This is a home for regular expression tests that don't fit into |
| 4 | # the format supported by re/regexp.t. If you want to add a test |
| 5 | # that does fit that format, add it to re/re_tests, not here. |
| 6 | |
| 7 | use strict; |
| 8 | use warnings; |
| 9 | use 5.010; |
| 10 | use Config; |
| 11 | |
| 12 | sub run_tests; |
| 13 | |
| 14 | $| = 1; |
| 15 | |
| 16 | |
| 17 | BEGIN { |
| 18 | chdir 't' if -d 't'; |
| 19 | @INC = ('../lib','.'); |
| 20 | require './test.pl'; |
| 21 | skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-"); |
| 22 | } |
| 23 | |
| 24 | |
| 25 | plan tests => 2531; # Update this when adding/deleting tests. |
| 26 | |
| 27 | run_tests() unless caller; |
| 28 | |
| 29 | # |
| 30 | # Tests start here. |
| 31 | # |
| 32 | sub run_tests { |
| 33 | |
| 34 | like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/, |
| 35 | "Match UTF-8 char in presence of (??{ }); Bug 20000731.001"); |
| 36 | |
| 37 | { |
| 38 | no warnings 'uninitialized'; |
| 39 | ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005"); |
| 40 | } |
| 41 | |
| 42 | { |
| 43 | my $message = 'bug id 20001008.001'; |
| 44 | |
| 45 | my @x = ("stra\337e 138", "stra\337e 138"); |
| 46 | for (@x) { |
| 47 | ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message); |
| 48 | ok(my ($latin) = /^(.+)(?:\s+\d)/, $message); |
| 49 | is($latin, "stra\337e", $message); |
| 50 | ok($latin =~ s/stra\337e/straße/, $message); |
| 51 | # |
| 52 | # Previous code follows, but outcommented - there were no tests. |
| 53 | # |
| 54 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a |
| 55 | # use utf8; # needed for the raw UTF-8 |
| 56 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | { |
| 61 | # Fist half of the bug. |
| 62 | my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003'; |
| 63 | my $X = chr (1448); |
| 64 | ok(my ($Y) = $X =~ /(.*)/, $message); |
| 65 | is($Y, v1448, $message); |
| 66 | is(length $Y, 1, $message); |
| 67 | |
| 68 | # Second half of the bug. |
| 69 | $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003'; |
| 70 | $X = ''; |
| 71 | $X =~ s/^/chr(1488)/e; |
| 72 | is(length $X, 1, $message); |
| 73 | is(ord $X, 1488, $message); |
| 74 | } |
| 75 | |
| 76 | { |
| 77 | my $message = 'Repeated s///; Bug 20001108.001'; |
| 78 | my $X = "Szab\x{f3},Bal\x{e1}zs"; |
| 79 | my $Y = $X; |
| 80 | $Y =~ s/(B)/$1/ for 0 .. 3; |
| 81 | is($Y, $X, $message); |
| 82 | is($X, "Szab\x{f3},Bal\x{e1}zs", $message); |
| 83 | } |
| 84 | |
| 85 | { |
| 86 | my $message = 's/// on UTF-8 string; Bug 20000517.001'; |
| 87 | my $x = "\x{100}A"; |
| 88 | $x =~ s/A/B/; |
| 89 | is($x, "\x{100}B", $message); |
| 90 | is(length $x, 2, $message); |
| 91 | } |
| 92 | |
| 93 | { |
| 94 | my $message = '\C and É; Bug 20001230.002'; |
| 95 | ok("École" =~ /^\C\C(.)/ && $1 eq 'c', $message); |
| 96 | like("École", qr/^\C\C(c)/, $message); |
| 97 | } |
| 98 | |
| 99 | { |
| 100 | # The original bug report had 'no utf8' here but that was irrelevant. |
| 101 | |
| 102 | my $message = "Don't dump core; Bug 20010306.008"; |
| 103 | my $a = "a\x{1234}"; |
| 104 | like($a, qr/\w/, $message); # used to core dump. |
| 105 | } |
| 106 | |
| 107 | { |
| 108 | my $message = '/g in scalar context; Bug 20010410.006'; |
| 109 | for my $rx ('/(.*?)\{(.*?)\}/csg', |
| 110 | '/(.*?)\{(.*?)\}/cg', |
| 111 | '/(.*?)\{(.*?)\}/sg', |
| 112 | '/(.*?)\{(.*?)\}/g', |
| 113 | '/(.+?)\{(.+?)\}/csg',) { |
| 114 | my $i = 0; |
| 115 | my $input = "a{b}c{d}"; |
| 116 | eval <<" --"; |
| 117 | while (eval \$input =~ $rx) { |
| 118 | \$i ++; |
| 119 | } |
| 120 | -- |
| 121 | is($i, 2, $message); |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | { |
| 126 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. |
| 127 | for ("\n", "\t", "\014", "\r") { |
| 128 | unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003", ord $_); |
| 129 | } |
| 130 | for (" ") { |
| 131 | like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003"); |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | { |
| 136 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context |
| 137 | |
| 138 | $_ = "ababacadaea"; |
| 139 | my $a = join ":", /b./gc; |
| 140 | my $b = join ":", /a./gc; |
| 141 | my $c = pos; |
| 142 | is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004"); |
| 143 | } |
| 144 | |
| 145 | { |
| 146 | # [ID 20010407.006] matching utf8 return values from |
| 147 | # functions does not work |
| 148 | |
| 149 | my $message = 'UTF-8 return values from functions; Bug 20010407.006'; |
| 150 | package ID_20010407_006; |
| 151 | sub x {"a\x{1234}"} |
| 152 | my $x = x; |
| 153 | my $y; |
| 154 | ::ok($x =~ /(..)/, $message); |
| 155 | $y = $1; |
| 156 | ::ok(length ($y) == 2 && $y eq $x, $message); |
| 157 | ::ok(x =~ /(..)/, $message); |
| 158 | $y = $1; |
| 159 | ::ok(length ($y) == 2 && $y eq $x, $message); |
| 160 | } |
| 161 | |
| 162 | { |
| 163 | # High bit bug -- japhy |
| 164 | my $x = "ab\200d"; |
| 165 | ok $x =~ /.*?\200/, "High bit fine"; |
| 166 | } |
| 167 | |
| 168 | { |
| 169 | my $message = 'UTF-8 hash keys and /$/'; |
| 170 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters |
| 171 | # /2002-01/msg01327.html |
| 172 | |
| 173 | my $u = "a\x{100}"; |
| 174 | my $v = substr ($u, 0, 1); |
| 175 | my $w = substr ($u, 1, 1); |
| 176 | my %u = ($u => $u, $v => $v, $w => $w); |
| 177 | for (keys %u) { |
| 178 | my $m1 = /^\w*$/ ? 1 : 0; |
| 179 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; |
| 180 | is($m1, $m2, $message); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | { |
| 185 | my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005"; |
| 186 | |
| 187 | for my $char ("a", "\x{df}", "\x{100}") { |
| 188 | my $x = "$char b $char"; |
| 189 | $x =~ s{($char)}{ |
| 190 | "c" =~ /c/; |
| 191 | "x"; |
| 192 | }ge; |
| 193 | is(substr ($x, 0, 1), substr ($x, -1, 1), $message); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | { |
| 198 | my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005"; |
| 199 | |
| 200 | # Requires reuse of last successful pattern. |
| 201 | my $num = 123; |
| 202 | $num =~ /\d/; |
| 203 | for (0 .. 1) { |
| 204 | my $match = m?? + 0; |
| 205 | ok($match != $_, $message) |
| 206 | or diag(sprintf "'match one' %s on %s iteration" => |
| 207 | $match ? 'succeeded' : 'failed', |
| 208 | $_ ? 'second' : 'first'); |
| 209 | } |
| 210 | $num =~ /(\d)/; |
| 211 | my $result = join "" => $num =~ //g; |
| 212 | is($result, $num, $message); |
| 213 | } |
| 214 | |
| 215 | { |
| 216 | my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002'; |
| 217 | for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { |
| 218 | my ($type, $char) = @$_; |
| 219 | for my $len (32000, 32768, 33000) { |
| 220 | my $s = $char . "f" x $len; |
| 221 | my $r = $s =~ /$char([f]*)/gc; |
| 222 | ok($r, $message) or diag("<$type x $len>"); |
| 223 | ok(!$r || pos ($s) == $len + 1, $message) |
| 224 | or diag("<$type x $len>; pos = @{[pos $s]}"); |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | { |
| 230 | my $s = "\x{100}" x 5; |
| 231 | my $ok = $s =~ /(\x{100}{4})/; |
| 232 | my ($ord, $len) = (ord $1, length $1); |
| 233 | ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]"; |
| 234 | } |
| 235 | |
| 236 | { |
| 237 | our $a = "x\x{100}"; |
| 238 | chop $a; # Leaves the UTF-8 flag |
| 239 | $a .= "y"; # 1 byte before 'y'. |
| 240 | |
| 241 | like($a, qr/^\C/, 'match one \C on 1-byte UTF-8; Bug 15763'); |
| 242 | like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); |
| 243 | |
| 244 | like($a, qr/^\Cy/, 'match \Cy; Bug 15763'); |
| 245 | like($a, qr/^\C{1}y/, 'match \C{1}y; Bug 15763'); |
| 246 | |
| 247 | unlike($a, qr/^\C\Cy/, q {don't match two \Cy; Bug 15763}); |
| 248 | unlike($a, qr/^\C{2}y/, q {don't match \C{2}y; Bug 15763}); |
| 249 | |
| 250 | $a = "\x{100}y"; # 2 bytes before "y" |
| 251 | |
| 252 | like($a, qr/^\C/, 'match one \C on 2-byte UTF-8; Bug 15763'); |
| 253 | like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); |
| 254 | like($a, qr/^\C\C/, 'match two \C; Bug 15763'); |
| 255 | like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763'); |
| 256 | |
| 257 | like($a, qr/^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte; Bug 15763'); |
| 258 | like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763'); |
| 259 | |
| 260 | like($a, qr/^\C\Cy/, 'match two \C; Bug 15763'); |
| 261 | like($a, qr/^\C{2}y/, 'match \C{2}; Bug 15763'); |
| 262 | |
| 263 | unlike($a, qr/^\C\C\Cy/, q {don't match three \Cy; Bug 15763}); |
| 264 | unlike($a, qr/^\C{2}\Cy/, q {don't match \C{2}\Cy; Bug 15763}); |
| 265 | unlike($a, qr/^\C{3}y/, q {don't match \C{3}y; Bug 15763}); |
| 266 | |
| 267 | $a = "\x{1000}y"; # 3 bytes before "y" |
| 268 | |
| 269 | like($a, qr/^\C/, 'match one \C on three-byte UTF-8; Bug 15763'); |
| 270 | like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); |
| 271 | like($a, qr/^\C\C/, 'match two \C; Bug 15763'); |
| 272 | like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763'); |
| 273 | like($a, qr/^\C\C\C/, 'match three \C; Bug 15763'); |
| 274 | like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763'); |
| 275 | |
| 276 | like($a, qr/^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte; Bug 15763'); |
| 277 | like($a, qr/^\C{4}/, 'match \C{4}; Bug 15763'); |
| 278 | |
| 279 | like($a, qr/^\C\C\Cy/, 'match three \Cy; Bug 15763'); |
| 280 | like($a, qr/^\C{3}y/, 'match \C{3}y; Bug 15763'); |
| 281 | |
| 282 | unlike($a, qr/^\C\C\C\Cy/, q {don't match four \Cy; Bug 15763}); |
| 283 | unlike($a, qr/^\C{4}y/, q {don't match \C{4}y; Bug 15763}); |
| 284 | } |
| 285 | |
| 286 | |
| 287 | { |
| 288 | my $message = 'UTF-8 matching; Bug 15397'; |
| 289 | like("\x{100}", qr/\x{100}/, $message); |
| 290 | like("\x{100}", qr/(\x{100})/, $message); |
| 291 | like("\x{100}", qr/(\x{100}){1}/, $message); |
| 292 | like("\x{100}\x{100}", qr/(\x{100}){2}/, $message); |
| 293 | like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message); |
| 294 | } |
| 295 | |
| 296 | { |
| 297 | my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471'; |
| 298 | local $_ = 'CD'; |
| 299 | ok(/(AB)*?CD/ && !defined $1, $message); |
| 300 | ok(/(AB)*CD/ && !defined $1, $message); |
| 301 | } |
| 302 | |
| 303 | { |
| 304 | my $message = "Caching shouldn't prevent match; Bug 3547"; |
| 305 | my $pattern = "^(b+?|a){1,2}c"; |
| 306 | ok("bac" =~ /$pattern/ && $1 eq 'a', $message); |
| 307 | ok("bbac" =~ /$pattern/ && $1 eq 'a', $message); |
| 308 | ok("bbbac" =~ /$pattern/ && $1 eq 'a', $message); |
| 309 | ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message); |
| 310 | } |
| 311 | |
| 312 | { |
| 313 | ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232'); |
| 314 | is($1, "\x{100}", '$1 is UTF-8; Bug 18232'); |
| 315 | { 'a' =~ /./; } |
| 316 | is($1, "\x{100}", '$1 is still UTF-8; Bug 18232'); |
| 317 | isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232'); |
| 318 | } |
| 319 | |
| 320 | { |
| 321 | my $message = "Optimizer doesn't prematurely reject match; Bug 19767"; |
| 322 | use utf8; |
| 323 | |
| 324 | my $attr = 'Name-1'; |
| 325 | my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; |
| 326 | my $NormalWord = qr /${NormalChar}+?/; |
| 327 | my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; |
| 328 | |
| 329 | $attr =~ /^$/; |
| 330 | like($attr, $PredNameHyphen, $message); # Original test. |
| 331 | |
| 332 | "a" =~ m/[b]/; |
| 333 | like("0", qr/\p{N}+\z/, $message); # Variant. |
| 334 | } |
| 335 | |
| 336 | { |
| 337 | my $message = "(??{ }) doesn't return stale values; Bug 20683"; |
| 338 | our $p = 1; |
| 339 | foreach (1, 2, 3, 4) { |
| 340 | $p ++ if /(??{ $p })/ |
| 341 | } |
| 342 | is($p, 5, $message); |
| 343 | |
| 344 | { |
| 345 | package P; |
| 346 | $a = 1; |
| 347 | sub TIESCALAR {bless []} |
| 348 | sub FETCH {$a ++} |
| 349 | } |
| 350 | tie $p, "P"; |
| 351 | foreach (1, 2, 3, 4) { |
| 352 | /(??{ $p })/ |
| 353 | } |
| 354 | is($p, 5, $message); |
| 355 | } |
| 356 | |
| 357 | { |
| 358 | # Subject: Odd regexp behavior |
| 359 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> |
| 360 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 |
| 361 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> |
| 362 | # To: perl-unicode@perl.org |
| 363 | |
| 364 | my $message = 'Markus Kuhn 2003-02-26'; |
| 365 | |
| 366 | my $x = "\x{2019}\nk"; |
| 367 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
| 368 | is($x, "\x{2019} k", $message); |
| 369 | |
| 370 | $x = "b\nk"; |
| 371 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
| 372 | is($x, "b k", $message); |
| 373 | |
| 374 | like("\x{2019}", qr/\S/, $message); |
| 375 | } |
| 376 | |
| 377 | { |
| 378 | my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411"; |
| 379 | our $i; |
| 380 | is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message); |
| 381 | no warnings 'syntax'; |
| 382 | @_ = split /(?{'WOW'})/, 'abc'; |
| 383 | local $" = "|"; |
| 384 | is("@_", "a|b|c", $message); |
| 385 | } |
| 386 | |
| 387 | is(join('-', split /(?{ split "" })/, "abc"), 'a-b-c', 'nested split'); |
| 388 | |
| 389 | { |
| 390 | $_ = "code: 'x' { '...' }\n"; study; |
| 391 | my @x; push @x, $& while m/'[^\']*'/gx; |
| 392 | local $" = ":"; |
| 393 | is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757"); |
| 394 | } |
| 395 | |
| 396 | { |
| 397 | sub func ($) { |
| 398 | ok("a\nb" !~ /^b/, "Propagated modifier; $_[0]; Bug 22354"); |
| 399 | ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354"); |
| 400 | } |
| 401 | func "standalone"; |
| 402 | $_ = "x"; s/x/func "in subst"/e; |
| 403 | $_ = "x"; s/x/func "in multiline subst"/em; |
| 404 | $_ = "x"; /x(?{func "in regexp"})/; |
| 405 | $_ = "x"; /x(?{func "in multiline regexp"})/m; |
| 406 | } |
| 407 | |
| 408 | { |
| 409 | $_ = "abcdef\n"; |
| 410 | my @x = m/./g; |
| 411 | is("abcde", $`, 'Global match sets $`; Bug 19049'); |
| 412 | } |
| 413 | |
| 414 | { |
| 415 | # [perl #23769] Unicode regex broken on simple example |
| 416 | # regrepeat() didn't handle UTF-8 EXACT case right. |
| 417 | |
| 418 | my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; |
| 419 | my $message = "$Mess; Bug 23769"; |
| 420 | |
| 421 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; |
| 422 | |
| 423 | like($s, qr/\x{a0}/, $message); |
| 424 | like($s, qr/\x{a0}+/, $message); |
| 425 | like($s, qr/\x{a0}\x{a0}/, $message); |
| 426 | |
| 427 | $message = "$Mess (easy variant); Bug 23769"; |
| 428 | ok("aaa\x{100}" =~ /(a+)/, $message); |
| 429 | is($1, "aaa", $message); |
| 430 | |
| 431 | $message = "$Mess (easy invariant); Bug 23769"; |
| 432 | ok("aaa\x{100} " =~ /(a+?)/, $message); |
| 433 | is($1, "a", $message); |
| 434 | |
| 435 | $message = "$Mess (regrepeat variant); Bug 23769"; |
| 436 | ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, $message); |
| 437 | is($1, "\xa0", $message); |
| 438 | |
| 439 | $message = "$Mess (regrepeat invariant); Bug 23769"; |
| 440 | ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message); |
| 441 | is($1, "\xa0\xa0\xa0", $message); |
| 442 | |
| 443 | $message = "$Mess (hard variant); Bug 23769"; |
| 444 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message); |
| 445 | is($1, "\xa0\xa1", $message); |
| 446 | |
| 447 | $message = "$Mess (hard invariant); Bug 23769"; |
| 448 | ok("ababab\x{100} " =~ /((?:ab)+)/, $message); |
| 449 | is($1, 'ababab', $message); |
| 450 | |
| 451 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message); |
| 452 | is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message); |
| 453 | |
| 454 | ok("ababab\x{100} " =~ /((?:ab)+?)/, $message); |
| 455 | is($1, "ab", $message); |
| 456 | |
| 457 | $message = "Don't match first byte of UTF-8 representation; Bug 23769"; |
| 458 | unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message); |
| 459 | unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message); |
| 460 | unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message); |
| 461 | } |
| 462 | |
| 463 | { |
| 464 | # perl panic: pp_match start/end pointers |
| 465 | |
| 466 | is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc", |
| 467 | 'Captures can move backwards in string; Bug 25269'); |
| 468 | } |
| 469 | |
| 470 | { |
| 471 | # \cA not recognized in character classes |
| 472 | like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940'); |
| 473 | like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940'); |
| 474 | like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940'); |
| 475 | like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940'); |
| 476 | like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940'); |
| 477 | like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940'); |
| 478 | like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940'); |
| 479 | unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940'); |
| 480 | } |
| 481 | |
| 482 | { |
| 483 | # perl #28532: optional zero-width match at end of string is ignored |
| 484 | |
| 485 | ok("abc" =~ /^abc(\z)?/ && defined($1), |
| 486 | 'Optional zero-width match at end of string; Bug 28532'); |
| 487 | ok("abc" =~ /^abc(\z)??/ && !defined($1), |
| 488 | 'Optional zero-width match at end of string; Bug 28532'); |
| 489 | } |
| 490 | |
| 491 | { |
| 492 | my $utf8 = "\xe9\x{100}"; chop $utf8; |
| 493 | my $latin1 = "\xe9"; |
| 494 | |
| 495 | like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207"); |
| 496 | like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207"); |
| 497 | like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207"); |
| 498 | like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207"); |
| 499 | |
| 500 | like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207"); |
| 501 | like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207"); |
| 502 | like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207"); |
| 503 | like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207"); |
| 504 | } |
| 505 | |
| 506 | { |
| 507 | my $s = "abcd"; |
| 508 | $s =~ /(..)(..)/g; |
| 509 | $s = $1; |
| 510 | $s = $2; |
| 511 | is($2, 'cd', |
| 512 | "Assigning to original string does not corrupt match vars; Bug 37038"); |
| 513 | } |
| 514 | |
| 515 | { |
| 516 | { |
| 517 | package wooosh; |
| 518 | sub gloople {"!"} |
| 519 | } |
| 520 | my $aeek = bless {} => 'wooosh'; |
| 521 | is(do {$aeek -> gloople () =~ /(.)/g}, 1, |
| 522 | "//g match against return value of sub [change e26a497577f3ce7b]"); |
| 523 | |
| 524 | sub gloople {"!"} |
| 525 | is(do{gloople () =~ /(.)/g}, 1, |
| 526 | "change e26a497577f3ce7b didn't affect sub calls for some reason"); |
| 527 | } |
| 528 | |
| 529 | { |
| 530 | # [perl #78680] |
| 531 | # See changes 26925-26928, which reverted change 26410 |
| 532 | { |
| 533 | package lv; |
| 534 | our $var = "abc"; |
| 535 | sub variable : lvalue {$var} |
| 536 | } |
| 537 | my $o = bless [] => 'lv'; |
| 538 | my $f = ""; |
| 539 | my $r = eval { |
| 540 | for (1 .. 2) { |
| 541 | $f .= $1 if $o -> variable =~ /(.)/g; |
| 542 | } |
| 543 | 1; |
| 544 | }; |
| 545 | if ($r) { |
| 546 | is($f, "ab", "pos() retained between calls"); |
| 547 | } |
| 548 | else { |
| 549 | ok 0, "Code failed: $@"; |
| 550 | } |
| 551 | |
| 552 | our $var = "abc"; |
| 553 | sub variable : lvalue {$var} |
| 554 | my $g = ""; |
| 555 | my $s = eval { |
| 556 | for (1 .. 2) { |
| 557 | $g .= $1 if variable =~ /(.)/g; |
| 558 | } |
| 559 | 1; |
| 560 | }; |
| 561 | if ($s) { |
| 562 | is($g, "ab", "pos() retained between calls"); |
| 563 | } |
| 564 | else { |
| 565 | ok 0, "Code failed: $@"; |
| 566 | } |
| 567 | } |
| 568 | |
| 569 | SKIP: |
| 570 | { |
| 571 | skip "In EBCDIC" if $::IS_EBCDIC; |
| 572 | no warnings 'utf8'; |
| 573 | $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8 |
| 574 | my $ret = 0; |
| 575 | is(do {!($ret = s/[\0]+//g)}, 1, |
| 576 | "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836"); |
| 577 | } |
| 578 | |
| 579 | { |
| 580 | # chr(65535) should be allowed in regexes |
| 581 | |
| 582 | no warnings 'utf8'; # To allow non-characters |
| 583 | my ($c, $r, $s); |
| 584 | |
| 585 | $c = chr 0xffff; |
| 586 | $c =~ s/$c//g; |
| 587 | is($c, "", "U+FFFF, parsed as atom; Bug 38293"); |
| 588 | |
| 589 | $c = chr 0xffff; |
| 590 | $r = "\\$c"; |
| 591 | $c =~ s/$r//g; |
| 592 | is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293"); |
| 593 | |
| 594 | $c = chr 0xffff; |
| 595 | $c =~ s/[$c]//g; |
| 596 | is($c, "", "U+FFFF, parsed in class; Bug 38293"); |
| 597 | |
| 598 | $c = chr 0xffff; |
| 599 | $r = "[\\$c]"; |
| 600 | $c =~ s/$r//g; |
| 601 | is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293"); |
| 602 | |
| 603 | $s = "A\x{ffff}B"; |
| 604 | $s =~ s/\x{ffff}//i; |
| 605 | is($s, "AB", "U+FFFF, EXACTF; Bug 38293"); |
| 606 | |
| 607 | $s = "\x{ffff}A"; |
| 608 | $s =~ s/\bA//; |
| 609 | is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293"); |
| 610 | |
| 611 | $s = "\x{ffff}!"; |
| 612 | $s =~ s/\B!//; |
| 613 | is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293"); |
| 614 | } |
| 615 | |
| 616 | { |
| 617 | |
| 618 | # The printing characters |
| 619 | my @chars = ("A" .. "Z"); |
| 620 | my $delim = ","; |
| 621 | my $size = 32771 - 4; |
| 622 | my $str = ''; |
| 623 | |
| 624 | # Create some random junk. Inefficient, but it works. |
| 625 | for (my $i = 0; $i < $size; $ i++) { |
| 626 | $str .= $chars [rand @chars]; |
| 627 | } |
| 628 | |
| 629 | $str .= ($delim x 4); |
| 630 | my $res; |
| 631 | my $matched; |
| 632 | ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583"); |
| 633 | is($str, "", "Empty string; Bug 39583"); |
| 634 | ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583'); |
| 635 | } |
| 636 | |
| 637 | { |
| 638 | like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940'); |
| 639 | like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); |
| 640 | like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940'); |
| 641 | like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); |
| 642 | |
| 643 | like("X\0A", qr/X\c@?A/, '\c@?; Bug 27940'); |
| 644 | like("X\0A", qr/X\c@*A/, '\c@*; Bug 27940'); |
| 645 | like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940'); |
| 646 | like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940'); |
| 647 | like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940'); |
| 648 | |
| 649 | like("X\@A", qr/X@?A/, '@?; Bug 27940'); |
| 650 | like("X\@A", qr/X@*A/, '@*; Bug 27940'); |
| 651 | like("X\@A", qr/X@(A)/, '@(; Bug 27940'); |
| 652 | like("X\@A", qr/X(@)A/, '@); Bug 27940'); |
| 653 | like("X\@A", qr/X@|ZA/, '@|; Bug 27940'); |
| 654 | |
| 655 | local $" = ','; # non-whitespace and non-RE-specific |
| 656 | like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940'); |
| 657 | like("A@+B", qr/A@{+}B/, 'Interpolation of @+ in /@{+}/; Bug 27940'); |
| 658 | like("A@-B", qr/A@{-}B/, 'Interpolation of @- in /@{-}/; Bug 27940'); |
| 659 | like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940'); |
| 660 | like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940'); |
| 661 | } |
| 662 | |
| 663 | { |
| 664 | my $s = 'foo bar baz'; |
| 665 | my (@k, @v, @fetch, $res); |
| 666 | my $count = 0; |
| 667 | my @names = qw ($+{A} $+{B} $+{C}); |
| 668 | if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { |
| 669 | while (my ($k, $v) = each (%+)) { |
| 670 | $count++; |
| 671 | } |
| 672 | @k = sort keys (%+); |
| 673 | @v = sort values (%+); |
| 674 | $res = 1; |
| 675 | push @fetch, |
| 676 | ["$+{A}", "$1"], |
| 677 | ["$+{B}", "$2"], |
| 678 | ["$+{C}", "$3"], |
| 679 | ; |
| 680 | } |
| 681 | foreach (0 .. 2) { |
| 682 | if ($fetch [$_]) { |
| 683 | is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); |
| 684 | } else { |
| 685 | ok 0, $names[$_]; |
| 686 | } |
| 687 | } |
| 688 | is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496"); |
| 689 | is($count, 3, "Got 3 keys in %+ via each; Bug 50496"); |
| 690 | is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496"); |
| 691 | is("@k", "A B C", "Got expected keys; Bug 50496"); |
| 692 | is("@v", "bar baz foo", "Got expected values; Bug 50496"); |
| 693 | eval ' |
| 694 | no warnings "uninitialized"; |
| 695 | print for $+ {this_key_doesnt_exist}; |
| 696 | '; |
| 697 | is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); |
| 698 | } |
| 699 | |
| 700 | { |
| 701 | # |
| 702 | # Almost the same as the block above, except that the capture is nested. |
| 703 | # |
| 704 | |
| 705 | my $s = 'foo bar baz'; |
| 706 | my (@k, @v, @fetch, $res); |
| 707 | my $count = 0; |
| 708 | my @names = qw ($+{A} $+{B} $+{C} $+{D}); |
| 709 | if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { |
| 710 | while (my ($k,$v) = each(%+)) { |
| 711 | $count++; |
| 712 | } |
| 713 | @k = sort keys (%+); |
| 714 | @v = sort values (%+); |
| 715 | $res = 1; |
| 716 | push @fetch, |
| 717 | ["$+{A}", "$2"], |
| 718 | ["$+{B}", "$3"], |
| 719 | ["$+{C}", "$4"], |
| 720 | ["$+{D}", "$1"], |
| 721 | ; |
| 722 | } |
| 723 | foreach (0 .. 3) { |
| 724 | if ($fetch [$_]) { |
| 725 | is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); |
| 726 | } else { |
| 727 | ok 0, $names [$_]; |
| 728 | } |
| 729 | } |
| 730 | is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496"); |
| 731 | is($count, 4, "Got 4 keys in %+ via each; Bug 50496"); |
| 732 | is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496"); |
| 733 | is("@k", "A B C D", "Got expected keys; Bug 50496"); |
| 734 | is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496"); |
| 735 | eval ' |
| 736 | no warnings "uninitialized"; |
| 737 | print for $+ {this_key_doesnt_exist}; |
| 738 | '; |
| 739 | is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); |
| 740 | } |
| 741 | |
| 742 | { |
| 743 | my $str = 'abc'; |
| 744 | my $count = 0; |
| 745 | my $mval = 0; |
| 746 | my $pval = 0; |
| 747 | while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} |
| 748 | is($mval, 0, '@- should be empty; Bug 36046'); |
| 749 | is($pval, 0, '@+ should be empty; Bug 36046'); |
| 750 | is($count, 1, 'Should have matched once only; Bug 36046'); |
| 751 | } |
| 752 | |
| 753 | { |
| 754 | my $message = '/m in precompiled regexp; Bug 40684'; |
| 755 | my $s = "abc\ndef"; |
| 756 | my $rex = qr'^abc$'m; |
| 757 | ok($s =~ m/$rex/, $message); |
| 758 | ok($s =~ m/^abc$/m, $message); |
| 759 | } |
| 760 | |
| 761 | { |
| 762 | my $message = '(?: ... )? should not lose $^R; Bug 36909'; |
| 763 | $^R = 'Nothing'; |
| 764 | { |
| 765 | local $^R = "Bad"; |
| 766 | ok('x foofoo y' =~ m { |
| 767 | (foo) # $^R correctly set |
| 768 | (?{ "last regexp code result" }) |
| 769 | }x, $message); |
| 770 | is($^R, 'last regexp code result', $message); |
| 771 | } |
| 772 | is($^R, 'Nothing', $message); |
| 773 | |
| 774 | { |
| 775 | local $^R = "Bad"; |
| 776 | |
| 777 | ok('x foofoo y' =~ m { |
| 778 | (?:foo|bar)+ # $^R correctly set |
| 779 | (?{ "last regexp code result" }) |
| 780 | }x, $message); |
| 781 | is($^R, 'last regexp code result', $message); |
| 782 | } |
| 783 | is($^R, 'Nothing', $message); |
| 784 | |
| 785 | { |
| 786 | local $^R = "Bad"; |
| 787 | ok('x foofoo y' =~ m { |
| 788 | (foo|bar)\1+ # $^R undefined |
| 789 | (?{ "last regexp code result" }) |
| 790 | }x, $message); |
| 791 | is($^R, 'last regexp code result', $message); |
| 792 | } |
| 793 | is($^R, 'Nothing', $message); |
| 794 | |
| 795 | { |
| 796 | local $^R = "Bad"; |
| 797 | ok('x foofoo y' =~ m { |
| 798 | (foo|bar)\1 # This time without the + |
| 799 | (?{"last regexp code result"}) |
| 800 | }x, $message); |
| 801 | is($^R, 'last regexp code result', $message); |
| 802 | } |
| 803 | is($^R, 'Nothing', $message); |
| 804 | } |
| 805 | |
| 806 | { |
| 807 | my $message = 'Match is linear, not quadratic; Bug 22395'; |
| 808 | our $count; |
| 809 | for my $l (10, 100, 1000) { |
| 810 | $count = 0; |
| 811 | ('a' x $l) =~ /(.*)(?{$count++})[bc]/; |
| 812 | local $::TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; |
| 813 | is($count, $l + 1, $message); |
| 814 | } |
| 815 | } |
| 816 | |
| 817 | { |
| 818 | my $message = '@-/@+ should not have undefined values; Bug 22614'; |
| 819 | local $_ = 'ab'; |
| 820 | our @len = (); |
| 821 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; |
| 822 | is("@len", "2 2 2", $message); |
| 823 | } |
| 824 | |
| 825 | { |
| 826 | my $message = '$& set on s///; Bug 18209'; |
| 827 | my $text = ' word1 word2 word3 word4 word5 word6 '; |
| 828 | |
| 829 | my @words = ('word1', 'word3', 'word5'); |
| 830 | my $count; |
| 831 | foreach my $word (@words) { |
| 832 | $text =~ s/$word\s//gi; # Leave a space to separate words |
| 833 | # in the resultant str. |
| 834 | # The following block is not working. |
| 835 | if ($&) { |
| 836 | $count ++; |
| 837 | } |
| 838 | # End bad block |
| 839 | } |
| 840 | is($count, 3, $message); |
| 841 | is($text, ' word2 word4 word6 ', $message); |
| 842 | } |
| 843 | |
| 844 | { |
| 845 | # RT#6893 |
| 846 | |
| 847 | local $_ = qq (A\nB\nC\n); |
| 848 | my @res; |
| 849 | while (m#(\G|\n)([^\n]*)\n#gsx) { |
| 850 | push @res, "$2"; |
| 851 | last if @res > 3; |
| 852 | } |
| 853 | is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893"); |
| 854 | } |
| 855 | |
| 856 | { |
| 857 | # No optimizer bug |
| 858 | my @tails = ('', '(?(1))', '(|)', '()?'); |
| 859 | my @quants = ('*','+'); |
| 860 | my $doit = sub { |
| 861 | my $pats = shift; |
| 862 | for (@_) { |
| 863 | for my $pat (@$pats) { |
| 864 | for my $quant (@quants) { |
| 865 | for my $tail (@tails) { |
| 866 | my $re = "($pat$quant\$)$tail"; |
| 867 | ok(/$re/ && $1 eq $_, "'$_' =~ /$re/; Bug 41010"); |
| 868 | ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010"); |
| 869 | } |
| 870 | } |
| 871 | } |
| 872 | } |
| 873 | }; |
| 874 | |
| 875 | my @dpats = ('\d', |
| 876 | '[1234567890]', |
| 877 | '(1|[23]|4|[56]|[78]|[90])', |
| 878 | '(?:1|[23]|4|[56]|[78]|[90])', |
| 879 | '(1|2|3|4|5|6|7|8|9|0)', |
| 880 | '(?:1|2|3|4|5|6|7|8|9|0)'); |
| 881 | my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); |
| 882 | my @sstrs = (' '); |
| 883 | my @dstrs = ('12345'); |
| 884 | $doit -> (\@spats, @sstrs); |
| 885 | $doit -> (\@dpats, @dstrs); |
| 886 | } |
| 887 | |
| 888 | { |
| 889 | # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string |
| 890 | |
| 891 | my $utf_8 = "\xd6schel"; |
| 892 | utf8::upgrade ($utf_8); |
| 893 | $utf_8 =~ m {(\xd6|Ö)schel}; |
| 894 | is($1, "\xd6", "Upgrade error; Bug 45605"); |
| 895 | } |
| 896 | |
| 897 | { |
| 898 | # Regardless of utf8ness any character matches itself when |
| 899 | # doing a case insensitive match. See also [perl #36207] |
| 900 | |
| 901 | for my $o (0 .. 255) { |
| 902 | my @ch = (chr ($o), chr ($o)); |
| 903 | utf8::upgrade ($ch [1]); |
| 904 | for my $u_str (0, 1) { |
| 905 | for my $u_pat (0, 1) { |
| 906 | like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i, |
| 907 | "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); |
| 908 | like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i, |
| 909 | "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); |
| 910 | } |
| 911 | } |
| 912 | } |
| 913 | } |
| 914 | |
| 915 | { |
| 916 | my $message = '$REGMARK in replacement; Bug 49190'; |
| 917 | our $REGMARK; |
| 918 | no warnings 'experimental::lexical_topic'; |
| 919 | my $_ = "A"; |
| 920 | ok(s/(*:B)A/$REGMARK/, $message); |
| 921 | is($_, "B", $message); |
| 922 | $_ = "CCCCBAA"; |
| 923 | ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message); |
| 924 | is($_, "ZYX", $message); |
| 925 | # Use a longer name to force reallocation of $REGMARK. |
| 926 | $_ = "CCCCBAA"; |
| 927 | ok(s/(*:X)A+|(*:YYYYYYYYYYYYYYYY)B+|(*:Z)C+/$REGMARK/g, $message); |
| 928 | is($_, "ZYYYYYYYYYYYYYYYYX", $message); |
| 929 | } |
| 930 | |
| 931 | { |
| 932 | my $message = 'Substitution evaluation in list context; Bug 52658'; |
| 933 | my $reg = '../xxx/'; |
| 934 | my @te = ($reg =~ m{^(/?(?:\.\./)*)}, |
| 935 | $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); |
| 936 | is($reg, '../bbb/', $message); |
| 937 | is($te [0], '../', $message); |
| 938 | } |
| 939 | |
| 940 | { |
| 941 | my $a = "xyzt" x 8192; |
| 942 | like($a, qr/\A(?>[a-z])*\z/, |
| 943 | '(?>) does not cause wrongness on long string; Bug 60034'); |
| 944 | my $b = $a . chr 256; |
| 945 | chop $b; |
| 946 | is($a, $b, 'Bug 60034'); |
| 947 | like($b, qr/\A(?>[a-z])*\z/, |
| 948 | '(?>) does not cause wrongness on long string with UTF-8; Bug 60034'); |
| 949 | } |
| 950 | |
| 951 | # |
| 952 | # Keep the following tests last -- they may crash perl |
| 953 | # |
| 954 | print "# Tests that follow may crash perl\n"; |
| 955 | { |
| 956 | |
| 957 | my $message = 'Pattern in a loop, failure should not ' . |
| 958 | 'affect previous success; Bug 19049/38869'; |
| 959 | my @list = ( |
| 960 | 'ab cdef', # Matches regex |
| 961 | ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it |
| 962 | ); |
| 963 | my $y; |
| 964 | my $x; |
| 965 | foreach (@list) { |
| 966 | m/ab(.+)cd/i; # The ignore-case seems to be important |
| 967 | $y = $1; # Use $1, which might not be from the last match! |
| 968 | $x = substr ($list [0], $- [0], $+ [0] - $- [0]); |
| 969 | } |
| 970 | is($y, ' ', $message); |
| 971 | is($x, 'ab cd', $message); |
| 972 | } |
| 973 | |
| 974 | { |
| 975 | ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274"); |
| 976 | ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, |
| 977 | "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274"); |
| 978 | } |
| 979 | |
| 980 | { |
| 981 | # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache |
| 982 | |
| 983 | local ${^UTF8CACHE} = -1; |
| 984 | my $message = "Shouldn't panic; Bug 45337"; |
| 985 | my $s = "[a]a{2}"; |
| 986 | utf8::upgrade $s; |
| 987 | like("aaa", qr/$s/, $message); |
| 988 | } |
| 989 | { |
| 990 | my $message = "Check if tree logic breaks \$^R; Bug 57042"; |
| 991 | my $cond_re = qr/\s* |
| 992 | \s* (?: |
| 993 | \( \s* A (?{1}) |
| 994 | | \( \s* B (?{2}) |
| 995 | ) |
| 996 | /x; |
| 997 | my @res; |
| 998 | for my $line ("(A)","(B)") { |
| 999 | if ($line =~ m/$cond_re/) { |
| 1000 | push @res, $^R ? "#$^R" : "UNDEF"; |
| 1001 | } |
| 1002 | } |
| 1003 | is("@res","#1 #2", $message); |
| 1004 | } |
| 1005 | { |
| 1006 | no warnings 'closure'; |
| 1007 | my $re = qr/A(??{"1"})/; |
| 1008 | ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; |
| 1009 | ok $1 eq "A1"; |
| 1010 | ok $2 eq "B"; |
| 1011 | } |
| 1012 | |
| 1013 | # This only works under -DEBUGGING because it relies on an assert(). |
| 1014 | { |
| 1015 | # Check capture offset re-entrancy of utf8 code. |
| 1016 | |
| 1017 | sub fswash { $_[0] =~ s/([>X])//g; } |
| 1018 | |
| 1019 | my $k1 = "." x 4 . ">>"; |
| 1020 | fswash($k1); |
| 1021 | |
| 1022 | my $k2 = "\x{f1}\x{2022}"; |
| 1023 | $k2 =~ s/([\360-\362])/>/g; |
| 1024 | fswash($k2); |
| 1025 | |
| 1026 | is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508"); |
| 1027 | } |
| 1028 | |
| 1029 | { |
| 1030 | # minimal CURLYM limited to 32767 matches |
| 1031 | my @pat = ( |
| 1032 | qr{a(x|y)*b}, # CURLYM |
| 1033 | qr{a(x|y)*?b}, # .. with minmod |
| 1034 | qr{a([wx]|[yz])*b}, # .. and without tries |
| 1035 | qr{a([wx]|[yz])*?b}, |
| 1036 | ); |
| 1037 | my $len = 32768; |
| 1038 | my $s = join '', 'a', 'x' x $len, 'b'; |
| 1039 | for my $pat (@pat) { |
| 1040 | like($s, $pat, "$pat; Bug 65372"); |
| 1041 | } |
| 1042 | } |
| 1043 | |
| 1044 | { |
| 1045 | local $::TODO = "[perl #38133]"; |
| 1046 | |
| 1047 | "A" =~ /(((?:A))?)+/; |
| 1048 | my $first = $2; |
| 1049 | |
| 1050 | "A" =~ /(((A))?)+/; |
| 1051 | my $second = $2; |
| 1052 | |
| 1053 | is($first, $second); |
| 1054 | } |
| 1055 | |
| 1056 | { |
| 1057 | my $message |
| 1058 | = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998'; |
| 1059 | |
| 1060 | # Catch warnings: |
| 1061 | my $w; |
| 1062 | local $SIG{__WARN__} = sub { $w .= shift }; |
| 1063 | |
| 1064 | # This bug can be reduced to |
| 1065 | qq{\x{30ab}} =~ /\xab|\xa9/; |
| 1066 | # but it's nice to have a more 'real-world' test. The original test |
| 1067 | # case from the RT ticket follows: |
| 1068 | |
| 1069 | my %conv = ( |
| 1070 | "\xab" => "<", |
| 1071 | "\xa9" => "(c)", |
| 1072 | ); |
| 1073 | my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')'; |
| 1074 | $conv_rx = qr{$conv_rx}; |
| 1075 | |
| 1076 | my $x |
| 1077 | = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}} |
| 1078 | . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}} |
| 1079 | . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}} |
| 1080 | . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}} |
| 1081 | . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}} |
| 1082 | . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}} |
| 1083 | . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}} |
| 1084 | . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}}; |
| 1085 | |
| 1086 | $x =~ s{$conv_rx}{$conv{$1}}eg; |
| 1087 | |
| 1088 | is($w, undef, $message); |
| 1089 | } |
| 1090 | |
| 1091 | { |
| 1092 | # minimal CURLYM limited to 32767 matches |
| 1093 | |
| 1094 | is(join("-", " abc def " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f", |
| 1095 | 'stclass optimisation does not break + inside (?=); Bug 68564'); |
| 1096 | } |
| 1097 | |
| 1098 | { |
| 1099 | use charnames ":full"; |
| 1100 | # Delayed interpolation of \N' |
| 1101 | my $r1 = qr/\N{THAI CHARACTER SARA I}/; |
| 1102 | my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}"; |
| 1103 | |
| 1104 | # Bug #56444 |
| 1105 | ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/'; |
| 1106 | |
| 1107 | # Bug #62056 |
| 1108 | ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/'; |
| 1109 | |
| 1110 | ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"'; |
| 1111 | ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"'; |
| 1112 | } |
| 1113 | |
| 1114 | { |
| 1115 | use charnames ":full"; |
| 1116 | my $message = '[perl #74982] Period coming after \N{}'; |
| 1117 | ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message); |
| 1118 | ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message); |
| 1119 | } |
| 1120 | |
| 1121 | SKIP: { |
| 1122 | ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org |
| 1123 | |
| 1124 | skip('Perl configured without Encode module', 1) |
| 1125 | unless $Config{extensions} =~ / Encode /; |
| 1126 | |
| 1127 | # Test case cut down by jhi |
| 1128 | fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\) in substitution \(s///\) at!, 'Segfault using HTML::Entities'); |
| 1129 | use Encode; |
| 1130 | my $t = ord('A') == 193 ? "\xEA" : "\xE9"; |
| 1131 | Encode::_utf8_on($t); |
| 1132 | $t =~ s/([^a])//ge; |
| 1133 | EOP |
| 1134 | } |
| 1135 | |
| 1136 | { |
| 1137 | # pattern must be compiled late or we can break the test file |
| 1138 | my $message = '[perl #115050] repeated nothings in a trie can cause panic'; |
| 1139 | my $pattern; |
| 1140 | $pattern = '[xyz]|||'; |
| 1141 | ok("blah blah" =~ /$pattern/, $message); |
| 1142 | ok("blah blah" =~ /(?:$pattern)h/, $message); |
| 1143 | $pattern = '|||[xyz]'; |
| 1144 | ok("blah blah" =~ /$pattern/, $message); |
| 1145 | ok("blah blah" =~ /(?:$pattern)h/, $message); |
| 1146 | } |
| 1147 | |
| 1148 | { |
| 1149 | # [perl #4289] First mention $& after a match |
| 1150 | fresh_perl_is( |
| 1151 | '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$&|, "\n"', |
| 1152 | "b\n", {}, '$& first mentioned after match'); |
| 1153 | fresh_perl_is( |
| 1154 | '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$`|, "\n"', |
| 1155 | "a\n", {}, '$` first mentioned after match'); |
| 1156 | fresh_perl_is( |
| 1157 | '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"', |
| 1158 | "c\n", {}, '$\' first mentioned after match'); |
| 1159 | } |
| 1160 | |
| 1161 | { |
| 1162 | # [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t |
| 1163 | # this tests some related failures |
| 1164 | # |
| 1165 | # The tests in the block *only* fail when run on 32-bit systems |
| 1166 | # with a malloc that allocates above the 2GB line. On the system |
| 1167 | # in the report above that only happened in a thread. |
| 1168 | my $s = "\x{1ff}" . "f" x 32; |
| 1169 | ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap"); |
| 1170 | } |
| 1171 | } # End of sub run_tests |
| 1172 | |
| 1173 | 1; |