This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Eliminate use of $::Message in t/re/pat_advanced.t
[perl5.git] / t / re / pat_rt_report.t
CommitLineData
e425a60b
YO
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t. If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7use strict;
8use warnings;
9use 5.010;
10
11
12sub run_tests;
13
14$| = 1;
15
e425a60b
YO
16
17BEGIN {
c859a0d9
NC
18 if (!defined &DynaLoader::boot_DynaLoader) {
19 print "1..0 # Skip miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-\n";
20 exit 0;
21 }
e425a60b 22 chdir 't' if -d 't';
9d45b377
YO
23 @INC = ('../lib','.');
24 do "re/ReTest.pl" or die $@;
e425a60b 25}
e425a60b 26
e425a60b 27
1d0dc949 28plan tests => 2514; # Update this when adding/deleting tests.
e425a60b 29
9d45b377 30run_tests() unless caller;
e425a60b
YO
31
32#
33# Tests start here.
34#
35sub run_tests {
36
e425a60b
YO
37
38 {
39 local $BugId = '20000731.001';
40 ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/,
93f09d7b 41 "Match UTF-8 char in presence of (??{ })";
e425a60b
YO
42 }
43
44
45 {
46 local $BugId = '20001021.005';
47 no warnings 'uninitialized';
48 ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV";
49 }
50
e425a60b
YO
51 {
52 local $Message = 'bug id 20001008.001';
53
54 my @x = ("stra\337e 138", "stra\337e 138");
55 for (@x) {
56 ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
57 ok my ($latin) = /^(.+)(?:\s+\d)/;
58 iseq $latin, "stra\337e";
59 ok $latin =~ s/stra\337e/straße/;
60 #
61 # Previous code follows, but outcommented - there were no tests.
62 #
63 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
64 # use utf8; # needed for the raw UTF-8
65 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
66 }
67 }
68
69
70 {
e425a60b
YO
71 local $BugId = '20001028.003';
72
73 # Fist half of the bug.
74 local $Message = 'HEBREW ACCENT QADMA matched by .*';
75 my $X = chr (1448);
76 ok my ($Y) = $X =~ /(.*)/;
77 iseq $Y, v1448;
78 iseq length ($Y), 1;
79
80 # Second half of the bug.
81 $Message = 'HEBREW ACCENT QADMA in replacement';
82 $X = '';
83 $X =~ s/^/chr(1488)/e;
84 iseq length $X, 1;
85 iseq ord ($X), 1488;
86 }
87
88
89 {
90 local $BugId = '20001108.001';
91 local $Message = 'Repeated s///';
92 my $X = "Szab\x{f3},Bal\x{e1}zs";
93 my $Y = $X;
94 $Y =~ s/(B)/$1/ for 0 .. 3;
95 iseq $Y, $X;
96 iseq $X, "Szab\x{f3},Bal\x{e1}zs";
97 }
98
99
100 {
101 local $BugId = '20000517.001';
102 local $Message = 's/// on UTF-8 string';
103 my $x = "\x{100}A";
104 $x =~ s/A/B/;
105 iseq $x, "\x{100}B";
106 iseq length $x, 2;
107 }
108
109
110 {
111 local $BugId = '20001230.002';
112 local $Message = '\C and É';
113 ok "École" =~ /^\C\C(.)/ && $1 eq 'c';
114 ok "École" =~ /^\C\C(c)/;
115 }
116
117
e425a60b
YO
118 {
119 # The original bug report had 'no utf8' here but that was irrelevant.
120 local $BugId = '20010306.008';
121 local $Message = "Don't dump core";
122 my $a = "a\x{1234}";
123 ok $a =~ m/\w/; # used to core dump.
124 }
125
126
127 {
128 local $BugId = '20010410.006';
129 local $Message = '/g in scalar context';
130 for my $rx ('/(.*?)\{(.*?)\}/csg',
131 '/(.*?)\{(.*?)\}/cg',
132 '/(.*?)\{(.*?)\}/sg',
133 '/(.*?)\{(.*?)\}/g',
134 '/(.+?)\{(.+?)\}/csg',) {
135 my $i = 0;
136 my $input = "a{b}c{d}";
137 eval <<" --";
138 while (eval \$input =~ $rx) {
139 \$i ++;
140 }
141 --
142 iseq $i, 2;
143 }
144 }
145
e425a60b
YO
146 {
147 local $BugId = "20010619.003";
148 # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
149 for ("\n", "\t", "\014", "\r") {
150 ok !/[[:print:]]/, "'$_' not in [[:print:]]";
151 }
152 for (" ") {
153 ok /[[:print:]]/, "'$_' in [[:print:]]";
154 }
155 }
156
157
e425a60b
YO
158
159 {
160 # [ID 20010814.004] pos() doesn't work when using =~m// in list context
161 local $BugId = '20010814.004';
162 $_ = "ababacadaea";
163 my $a = join ":", /b./gc;
164 my $b = join ":", /a./gc;
165 my $c = pos;
166 iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//";
167 }
168
169
170 {
171 # [ID 20010407.006] matching utf8 return values from
172 # functions does not work
173 local $BugId = '20010407.006';
174 local $Message = 'UTF-8 return values from functions';
175 package ID_20010407_006;
176 sub x {"a\x{1234}"}
177 my $x = x;
178 my $y;
179 ::ok $x =~ /(..)/;
180 $y = $1;
181 ::ok length ($y) == 2 && $y eq $x;
182 ::ok x =~ /(..)/;
183 $y = $1;
184 ::ok length ($y) == 2 && $y eq $x;
185 }
186
e425a60b
YO
187 {
188 # High bit bug -- japhy
189 my $x = "ab\200d";
190 ok $x =~ /.*?\200/, "High bit fine";
191 }
192
193
194 {
e425a60b
YO
195 local $Message = 'UTF-8 hash keys and /$/';
196 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
197 # /2002-01/msg01327.html
198
199 my $u = "a\x{100}";
200 my $v = substr ($u, 0, 1);
201 my $w = substr ($u, 1, 1);
202 my %u = ($u => $u, $v => $v, $w => $w);
203 for (keys %u) {
204 my $m1 = /^\w*$/ ? 1 : 0;
205 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
206 iseq $m1, $m2;
207 }
208 }
209
210
211 {
212 local $BugId = "20020124.005";
1c6f1211 213 local $Message = "s///eg [change 13f46d054db22cf4]";
e425a60b
YO
214
215 for my $char ("a", "\x{df}", "\x{100}") {
216 my $x = "$char b $char";
217 $x =~ s{($char)}{
218 "c" =~ /c/;
219 "x";
220 }ge;
221 iseq substr ($x, 0, 1), substr ($x, -1, 1);
222 }
223 }
224
225
226 {
e425a60b
YO
227 local $BugId = "20020412.005";
228 local $Message = "Correct pmop flags checked when empty pattern";
229
230 # Requires reuse of last successful pattern.
231 my $num = 123;
232 $num =~ /\d/;
233 for (0 .. 1) {
725a61d7 234 my $match = m?? + 0;
e425a60b
YO
235 ok $match != $_, $Message,
236 sprintf "'match one' %s on %s iteration" =>
237 $match ? 'succeeded' : 'failed',
238 $_ ? 'second' : 'first';
239 }
240 $num =~ /(\d)/;
241 my $result = join "" => $num =~ //g;
242 iseq $result, $num;
243 }
244
245
246 {
247 local $BugId = '20020630.002';
248 local $Message = 'UTF-8 regex matches above 32k';
249 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
250 my ($type, $char) = @$_;
251 for my $len (32000, 32768, 33000) {
252 my $s = $char . "f" x $len;
253 my $r = $s =~ /$char([f]*)/gc;
254 ok $r, $Message, "<$type x $len>";
255 ok !$r || pos ($s) == $len + 1, $Message,
256 "<$type x $len>; pos = @{[pos $s]}";
257 }
258 }
259 }
260
e425a60b 261 {
e425a60b
YO
262 my $s = "\x{100}" x 5;
263 my $ok = $s =~ /(\x{100}{4})/;
264 my ($ord, $len) = (ord $1, length $1);
1c6f1211 265 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]";
e425a60b
YO
266 }
267
268
269 {
270 local $BugId = '15763';
271 our $a = "x\x{100}";
272 chop $a; # Leaves the UTF-8 flag
273 $a .= "y"; # 1 byte before 'y'.
274
275 ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8';
276 ok $a =~ /^\C{1}/, 'match \C{1}';
277
278 ok $a =~ /^\Cy/, 'match \Cy';
279 ok $a =~ /^\C{1}y/, 'match \C{1}y';
280
281 ok $a !~ /^\C\Cy/, q {don't match two \Cy};
282 ok $a !~ /^\C{2}y/, q {don't match \C{2}y};
283
284 $a = "\x{100}y"; # 2 bytes before "y"
285
286 ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8';
287 ok $a =~ /^\C{1}/, 'match \C{1}';
288 ok $a =~ /^\C\C/, 'match two \C';
289 ok $a =~ /^\C{2}/, 'match \C{2}';
290
291 ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte';
292 ok $a =~ /^\C{3}/, 'match \C{3}';
293
294 ok $a =~ /^\C\Cy/, 'match two \C';
295 ok $a =~ /^\C{2}y/, 'match \C{2}';
296
297 ok $a !~ /^\C\C\Cy/, q {don't match three \Cy};
298 ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy};
299 ok $a !~ /^\C{3}y/, q {don't match \C{3}y};
300
301 $a = "\x{1000}y"; # 3 bytes before "y"
302
303 ok $a =~ /^\C/, 'match one \C on three-byte UTF-8';
304 ok $a =~ /^\C{1}/, 'match \C{1}';
305 ok $a =~ /^\C\C/, 'match two \C';
306 ok $a =~ /^\C{2}/, 'match \C{2}';
307 ok $a =~ /^\C\C\C/, 'match three \C';
308 ok $a =~ /^\C{3}/, 'match \C{3}';
309
310 ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte';
311 ok $a =~ /^\C{4}/, 'match \C{4}';
312
313 ok $a =~ /^\C\C\Cy/, 'match three \Cy';
314 ok $a =~ /^\C{3}y/, 'match \C{3}y';
315
316 ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy};
317 ok $a !~ /^\C{4}y/, q {don't match \C{4}y};
318 }
319
320
321 {
e425a60b
YO
322 local $BugId = '15397';
323 local $Message = 'UTF-8 matching';
324 ok "\x{100}" =~ /\x{100}/;
325 ok "\x{100}" =~ /(\x{100})/;
326 ok "\x{100}" =~ /(\x{100}){1}/;
327 ok "\x{100}\x{100}" =~ /(\x{100}){2}/;
328 ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/;
329 }
330
331
332 {
333 local $BugId = '7471';
334 local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times';
335 local $_ = 'CD';
336 ok /(AB)*?CD/ && !defined $1;
337 ok /(AB)*CD/ && !defined $1;
338 }
339
340
341 {
342 local $BugId = '3547';
343 local $Message = "Caching shouldn't prevent match";
344 my $pattern = "^(b+?|a){1,2}c";
345 ok "bac" =~ /$pattern/ && $1 eq 'a';
346 ok "bbac" =~ /$pattern/ && $1 eq 'a';
347 ok "bbbac" =~ /$pattern/ && $1 eq 'a';
348 ok "bbbbac" =~ /$pattern/ && $1 eq 'a';
349 }
350
351
352
353 {
354 local $BugId = '18232';
355 local $Message = '$1 should keep UTF-8 ness';
356 ok "\x{100}" =~ /(.)/;
357 iseq $1, "\x{100}", '$1 is UTF-8';
358 { 'a' =~ /./; }
359 iseq $1, "\x{100}", '$1 is still UTF-8';
360 isneq $1, "\xC4\x80", '$1 is not non-UTF-8';
361 }
362
363
364 {
365 local $BugId = '19767';
366 local $Message = "Optimizer doesn't prematurely reject match";
367 use utf8;
368
369 my $attr = 'Name-1';
370 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
371 my $NormalWord = qr /${NormalChar}+?/;
372 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/;
373
374 $attr =~ /^$/;
375 ok $attr =~ $PredNameHyphen; # Original test.
376
377 "a" =~ m/[b]/;
378 ok "0" =~ /\p{N}+\z/; # Variant.
379 }
380
381
382 {
383 local $BugId = '20683';
384 local $Message = "(??{ }) doesn't return stale values";
385 our $p = 1;
386 foreach (1, 2, 3, 4) {
387 $p ++ if /(??{ $p })/
388 }
389 iseq $p, 5;
390
391 {
392 package P;
393 $a = 1;
394 sub TIESCALAR {bless []}
395 sub FETCH {$a ++}
396 }
397 tie $p, "P";
398 foreach (1, 2, 3, 4) {
399 /(??{ $p })/
400 }
401 iseq $p, 5;
402 }
403
404
405 {
406 # Subject: Odd regexp behavior
407 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
408 # Date: Wed, 26 Feb 2003 16:53:12 +0000
409 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
410 # To: perl-unicode@perl.org
411
412 local $Message = 'Markus Kuhn 2003-02-26';
413
414 my $x = "\x{2019}\nk";
415 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
416 ok $x eq "\x{2019} k";
417
418 $x = "b\nk";
419 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
420 ok $x eq "b k";
421
422 ok "\x{2019}" =~ /\S/;
423 }
424
425
426 {
427 local $BugId = '21411';
428 local $Message = "(??{ .. }) in split doesn't corrupt its stack";
429 our $i;
430 ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-';
431 no warnings 'syntax';
432 @_ = split /(?{'WOW'})/, 'abc';
433 local $" = "|";
434 iseq "@_", "a|b|c";
435 }
436
437
438 {
439 # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it
440 # hasn't been crashing. Disable this test until it is fixed properly.
441 # XXX also check what it returns rather than just doing ok(1,...)
442 # split /(?{ split "" })/, "abc";
443 local $TODO = "Recursive split is still broken";
444 ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0';
445 }
446
447
448 {
e425a60b
YO
449 local $BugId = '17757';
450 $_ = "code: 'x' { '...' }\n"; study;
451 my @x; push @x, $& while m/'[^\']*'/gx;
452 local $" = ":";
453 iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop";
454 }
455
456
457 {
e425a60b
YO
458 local $BugId = '22354';
459 sub func ($) {
460 ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]";
461 ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m";
462 }
463 func "standalone";
464 $_ = "x"; s/x/func "in subst"/e;
465 $_ = "x"; s/x/func "in multiline subst"/em;
1d0dc949
FC
466 $_ = "x"; /x(?{func "in regexp"})/;
467 $_ = "x"; /x(?{func "in multiline regexp"})/m;
e425a60b
YO
468 }
469
470
471 {
472 local $BugId = '19049';
473 $_ = "abcdef\n";
474 my @x = m/./g;
475 iseq "abcde", $`, 'Global match sets $`';
476 }
477
478
479 {
e425a60b
YO
480 # [perl #23769] Unicode regex broken on simple example
481 # regrepeat() didn't handle UTF-8 EXACT case right.
482 local $BugId = '23769';
483 my $Mess = 'regrepeat() handles UTF-8 EXACT case right';
484 local $Message = $Mess;
485
486 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
487
488 ok $s =~ /\x{a0}/;
489 ok $s =~ /\x{a0}+/;
490 ok $s =~ /\x{a0}\x{a0}/;
491
492 $Message = "$Mess (easy variant)";
493 ok "aaa\x{100}" =~ /(a+)/;
494 iseq $1, "aaa";
495
496 $Message = "$Mess (easy invariant)";
497 ok "aaa\x{100} " =~ /(a+?)/;
498 iseq $1, "a";
499
500 $Message = "$Mess (regrepeat variant)";
501 ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/;
502 iseq $1, "\xa0";
503
504 $Message = "$Mess (regrepeat invariant)";
505 ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/;
506 iseq $1, "\xa0\xa0\xa0";
507
508 $Message = "$Mess (hard variant)";
509 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/;
510 iseq $1, "\xa0\xa1";
511
512 $Message = "$Mess (hard invariant)";
513 ok "ababab\x{100} " =~ /((?:ab)+)/;
514 iseq $1, 'ababab';
515
516 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/;
517 iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1";
518
519 ok "ababab\x{100} " =~ /((?:ab)+?)/;
520 iseq $1, "ab";
521
522 $Message = "Don't match first byte of UTF-8 representation";
523 ok "\xc4\xc4\xc4" !~ /(\x{100}+)/;
524 ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/;
525 ok "\xc4\xc4\xc4" !~ /(\x{100}++)/;
526 }
527
528
529 {
e425a60b
YO
530 # perl panic: pp_match start/end pointers
531 local $BugId = '25269';
532 iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"},
533 'Captures can move backwards in string';
534 }
535
536
537 {
538 local $BugId = '27940'; # \cA not recognized in character classes
539 ok "a\cAb" =~ /\cA/, '\cA in pattern';
540 ok "a\cAb" =~ /[\cA]/, '\cA in character class';
541 ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range';
542 ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range';
543 ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range';
544 ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range';
545 ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern';
546 ok "ab" !~ /a\cIb/x, '\cI in pattern';
547 }
548
549
550 {
9d45b377
YO
551 # perl #28532: optional zero-width match at end of string is ignored
552 local $BugId = '28532';
553 ok "abc" =~ /^abc(\z)?/ && defined($1),
554 'Optional zero-width match at end of string';
555 ok "abc" =~ /^abc(\z)??/ && !defined($1),
556 'Optional zero-width match at end of string';
e425a60b
YO
557 }
558
559
e425a60b
YO
560
561 {
562 local $BugId = '36207';
563 my $utf8 = "\xe9\x{100}"; chop $utf8;
564 my $latin1 = "\xe9";
565
566 ok $utf8 =~ /\xe9/i, "utf8/latin";
567 ok $utf8 =~ /$latin1/i, "utf8/latin runtime";
568 ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie";
569 ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime";
570
571 ok "\xe9" =~ /$utf8/i, "latin/utf8";
572 ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie";
573 ok $latin1 =~ /$utf8/i, "latin/utf8 runtime";
574 ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime";
575 }
576
577
578 {
579 local $BugId = '37038';
580 my $s = "abcd";
581 $s =~ /(..)(..)/g;
582 $s = $1;
583 $s = $2;
584 iseq $2, 'cd',
585 "Assigning to original string does not corrupt match vars";
586 }
587
588
589 {
590 {
591 package wooosh;
592 sub gloople {"!"}
593 }
594 my $aeek = bless {} => 'wooosh';
595 eval_ok sub {$aeek -> gloople () =~ /(.)/g},
1c6f1211 596 "//g match against return value of sub [change e26a497577f3ce7b]";
e425a60b
YO
597
598 sub gloople {"!"}
599 eval_ok sub {gloople () =~ /(.)/g},
1c6f1211 600 "change e26a497577f3ce7b didn't affect sub calls for some reason";
e425a60b
YO
601 }
602
603
604 {
605 local $TODO = "See changes 26925-26928, which reverted change 26410";
606 {
607 package lv;
608 our $var = "abc";
609 sub variable : lvalue {$var}
610 }
611 my $o = bless [] => 'lv';
612 my $f = "";
613 my $r = eval {
614 for (1 .. 2) {
615 $f .= $1 if $o -> variable =~ /(.)/g;
616 }
617 1;
618 };
619 if ($r) {
620 iseq $f, "ab", "pos() retained between calls";
621 }
622 else {
623 local $TODO;
624 ok 0, "Code failed: $@";
625 }
626
627 our $var = "abc";
628 sub variable : lvalue {$var}
629 my $g = "";
630 my $s = eval {
631 for (1 .. 2) {
632 $g .= $1 if variable =~ /(.)/g;
633 }
634 1;
635 };
636 if ($s) {
637 iseq $g, "ab", "pos() retained between calls";
638 }
639 else {
640 local $TODO;
641 ok 0, "Code failed: $@";
642 }
643 }
644
645
646 SKIP:
647 {
648 local $BugId = '37836';
649 skip "In EBCDIC" if $IS_EBCDIC;
650 no warnings 'utf8';
651 $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8
652 my $ret = 0;
653 eval_ok sub {!($ret = s/[\0]+//g)},
654 "Ill-formed UTF-8 doesn't match NUL in class";
655 }
656
657
658 {
659 # chr(65535) should be allowed in regexes
660 local $BugId = '38293';
661 no warnings 'utf8'; # To allow non-characters
662 my ($c, $r, $s);
663
664 $c = chr 0xffff;
665 $c =~ s/$c//g;
666 ok $c eq "", "U+FFFF, parsed as atom";
667
668 $c = chr 0xffff;
669 $r = "\\$c";
670 $c =~ s/$r//g;
671 ok $c eq "", "U+FFFF backslashed, parsed as atom";
672
673 $c = chr 0xffff;
674 $c =~ s/[$c]//g;
675 ok $c eq "", "U+FFFF, parsed in class";
676
677 $c = chr 0xffff;
678 $r = "[\\$c]";
679 $c =~ s/$r//g;
680 ok $c eq "", "U+FFFF backslashed, parsed in class";
681
682 $s = "A\x{ffff}B";
683 $s =~ s/\x{ffff}//i;
684 ok $s eq "AB", "U+FFFF, EXACTF";
685
686 $s = "\x{ffff}A";
687 $s =~ s/\bA//;
688 ok $s eq "\x{ffff}", "U+FFFF, BOUND";
689
690 $s = "\x{ffff}!";
691 $s =~ s/\B!//;
692 ok $s eq "\x{ffff}", "U+FFFF, NBOUND";
693 }
694
695
696 {
697 local $BugId = '39583';
698
699 # The printing characters
700 my @chars = ("A" .. "Z");
701 my $delim = ",";
702 my $size = 32771 - 4;
703 my $str = '';
704
705 # Create some random junk. Inefficient, but it works.
706 for (my $i = 0; $i < $size; $ i++) {
707 $str .= $chars [rand @chars];
708 }
709
710 $str .= ($delim x 4);
711 my $res;
712 my $matched;
713 ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches";
714 iseq $str, "", "Empty string";
715 ok defined $1 && length ($1) == $size, '$1 is correct size';
716 }
717
718
719 {
720 local $BugId = '27940';
721 ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern';
722 ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern';
723 ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern';
724 ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern';
725
726 ok "X\0A" =~ /X\c@?A/, '\c@?';
727 ok "X\0A" =~ /X\c@*A/, '\c@*';
728 ok "X\0A" =~ /X\c@(A)/, '\c@(';
729 ok "X\0A" =~ /X(\c@)A/, '\c@)';
730 ok "X\0A" =~ /X\c@|ZA/, '\c@|';
731
732 ok "X\@A" =~ /X@?A/, '@?';
733 ok "X\@A" =~ /X@*A/, '@*';
734 ok "X\@A" =~ /X@(A)/, '@(';
735 ok "X\@A" =~ /X(@)A/, '@)';
736 ok "X\@A" =~ /X@|ZA/, '@|';
737
738 local $" = ','; # non-whitespace and non-RE-specific
739 ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus';
740 ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/';
741 ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/';
742 ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x';
743 ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x';
744 }
745
746
747 {
9d45b377 748 local $BugId = '50496';
e425a60b
YO
749 my $s = 'foo bar baz';
750 my (@k, @v, @fetch, $res);
751 my $count = 0;
752 my @names = qw ($+{A} $+{B} $+{C});
753 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
754 while (my ($k, $v) = each (%+)) {
755 $count++;
756 }
757 @k = sort keys (%+);
758 @v = sort values (%+);
759 $res = 1;
760 push @fetch,
761 ["$+{A}", "$1"],
762 ["$+{B}", "$2"],
763 ["$+{C}", "$3"],
764 ;
765 }
766 foreach (0 .. 2) {
767 if ($fetch [$_]) {
768 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
769 } else {
770 ok 0, $names[$_];
771 }
772 }
773 iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/";
774 iseq $count, 3, "Got 3 keys in %+ via each";
775 iseq 0 + @k, 3, 'Got 3 keys in %+ via keys';
776 iseq "@k", "A B C", "Got expected keys";
777 iseq "@v", "bar baz foo", "Got expected values";
778 eval '
779 no warnings "uninitialized";
780 print for $+ {this_key_doesnt_exist};
781 ';
782 ok !$@, 'lvalue $+ {...} should not throw an exception';
783 }
784
785
786 {
787 #
788 # Almost the same as the block above, except that the capture is nested.
789 #
790 local $BugId = '50496';
791 my $s = 'foo bar baz';
792 my (@k, @v, @fetch, $res);
793 my $count = 0;
794 my @names = qw ($+{A} $+{B} $+{C} $+{D});
795 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
796 while (my ($k,$v) = each(%+)) {
797 $count++;
798 }
799 @k = sort keys (%+);
800 @v = sort values (%+);
801 $res = 1;
802 push @fetch,
803 ["$+{A}", "$2"],
804 ["$+{B}", "$3"],
805 ["$+{C}", "$4"],
806 ["$+{D}", "$1"],
807 ;
808 }
809 foreach (0 .. 3) {
810 if ($fetch [$_]) {
811 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
812 } else {
813 ok 0, $names [$_];
814 }
815 }
816 iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/";
817 iseq $count, 4, "Got 4 keys in %+ via each";
818 iseq @k, 4, 'Got 4 keys in %+ via keys';
819 iseq "@k", "A B C D", "Got expected keys";
820 iseq "@v", "bar baz foo foo bar baz", "Got expected values";
821 eval '
822 no warnings "uninitialized";
823 print for $+ {this_key_doesnt_exist};
824 ';
825 ok !$@,'lvalue $+ {...} should not throw an exception';
826 }
827
828
829 {
e425a60b
YO
830 local $BugId = '36046';
831 my $str = 'abc';
832 my $count = 0;
833 my $mval = 0;
834 my $pval = 0;
835 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
836 iseq $mval, 0, '@- should be empty';
837 iseq $pval, 0, '@+ should be empty';
838 iseq $count, 1, 'Should have matched once only';
839 }
840
841
e425a60b
YO
842
843
844 {
845 local $BugId = '40684';
846 local $Message = '/m in precompiled regexp';
847 my $s = "abc\ndef";
848 my $rex = qr'^abc$'m;
849 ok $s =~ m/$rex/;
850 ok $s =~ m/^abc$/m;
851 }
852
853
854 {
e425a60b
YO
855 local $BugId = '36909';
856 local $Message = '(?: ... )? should not lose $^R';
857 $^R = 'Nothing';
858 {
859 local $^R = "Bad";
860 ok 'x foofoo y' =~ m {
861 (foo) # $^R correctly set
862 (?{ "last regexp code result" })
863 }x;
864 iseq $^R, 'last regexp code result';
865 }
866 iseq $^R, 'Nothing';
867
868 {
869 local $^R = "Bad";
870
871 ok 'x foofoo y' =~ m {
872 (?:foo|bar)+ # $^R correctly set
873 (?{ "last regexp code result" })
874 }x;
875 iseq $^R, 'last regexp code result';
876 }
877 iseq $^R, 'Nothing';
878
879 {
880 local $^R = "Bad";
881 ok 'x foofoo y' =~ m {
882 (foo|bar)\1+ # $^R undefined
883 (?{ "last regexp code result" })
884 }x;
885 iseq $^R, 'last regexp code result';
886 }
887 iseq $^R, 'Nothing';
888
889 {
890 local $^R = "Bad";
891 ok 'x foofoo y' =~ m {
892 (foo|bar)\1 # This time without the +
893 (?{"last regexp code result"})
894 }x;
895 iseq $^R, 'last regexp code result';
896 }
897 iseq $^R, 'Nothing';
898 }
899
900
901 {
902 local $BugId = '22395';
903 local $Message = 'Match is linear, not quadratic';
904 our $count;
905 for my $l (10, 100, 1000) {
906 $count = 0;
907 ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
908 local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)";
909 iseq $count, $l + 1;
910 }
911 }
912
913
914 {
915 local $BugId = '22614';
916 local $Message = '@-/@+ should not have undefined values';
917 local $_ = 'ab';
918 our @len = ();
919 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
920 iseq "@len", "2 2 2";
921 }
922
923
924 {
925 local $BugId = '18209';
926 local $Message = '$& set on s///';
927 my $text = ' word1 word2 word3 word4 word5 word6 ';
928
929 my @words = ('word1', 'word3', 'word5');
930 my $count;
931 foreach my $word (@words) {
93f09d7b 932 $text =~ s/$word\s//gi; # Leave a space to separate words
e425a60b
YO
933 # in the resultant str.
934 # The following block is not working.
935 if ($&) {
936 $count ++;
937 }
938 # End bad block
939 }
940 iseq $count, 3;
941 iseq $text, ' word2 word4 word6 ';
942 }
943
944
945 {
946 # RT#6893
947 local $BugId = '6893';
948 local $_ = qq (A\nB\nC\n);
949 my @res;
950 while (m#(\G|\n)([^\n]*)\n#gsx) {
951 push @res, "$2";
952 last if @res > 3;
953 }
954 iseq "@res", "A B C", "/g pattern shouldn't infinite loop";
955 }
956
957
e425a60b
YO
958
959 {
960 local $BugId = '41010';
961 local $Message = 'No optimizer bug';
962 my @tails = ('', '(?(1))', '(|)', '()?');
963 my @quants = ('*','+');
964 my $doit = sub {
965 my $pats = shift;
966 for (@_) {
967 for my $pat (@$pats) {
968 for my $quant (@quants) {
969 for my $tail (@tails) {
970 my $re = "($pat$quant\$)$tail";
971 ok /$re/ && $1 eq $_, "'$_' =~ /$re/";
972 ok /$re/m && $1 eq $_, "'$_' =~ /$re/m";
973 }
974 }
975 }
976 }
977 };
978
979 my @dpats = ('\d',
980 '[1234567890]',
981 '(1|[23]|4|[56]|[78]|[90])',
982 '(?:1|[23]|4|[56]|[78]|[90])',
983 '(1|2|3|4|5|6|7|8|9|0)',
984 '(?:1|2|3|4|5|6|7|8|9|0)');
985 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
986 my @sstrs = (' ');
987 my @dstrs = ('12345');
988 $doit -> (\@spats, @sstrs);
989 $doit -> (\@dpats, @dstrs);
990 }
991
992
e425a60b
YO
993
994 {
995 local $BugId = '45605';
996 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
997
998 my $utf_8 = "\xd6schel";
999 utf8::upgrade ($utf_8);
1000 $utf_8 =~ m {(\xd6|&Ouml;)schel};
1001 iseq $1, "\xd6", "Upgrade error";
1002 }
1003
1004 {
e425a60b
YO
1005 # Regardless of utf8ness any character matches itself when
1006 # doing a case insensitive match. See also [perl #36207]
1007 local $BugId = '36207';
1008 for my $o (0 .. 255) {
1009 my @ch = (chr ($o), chr ($o));
1010 utf8::upgrade ($ch [1]);
1011 for my $u_str (0, 1) {
1012 for my $u_pat (0, 1) {
1013 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i,
1014 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat";
1015 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i,
1016 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat";
1017 }
1018 }
1019 }
1020 }
1021
1022
1023 {
e425a60b
YO
1024 local $BugId = '49190';
1025 local $Message = '$REGMARK in replacement';
1026 our $REGMARK;
1027 my $_ = "A";
1028 ok s/(*:B)A/$REGMARK/;
1029 iseq $_, "B";
1030 $_ = "CCCCBAA";
1031 ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g;
1032 iseq $_, "ZYX";
1033 }
1034
1035
1036 {
e425a60b
YO
1037 local $BugId = '52658';
1038 local $Message = 'Substitution evaluation in list context';
1039 my $reg = '../xxx/';
1040 my @te = ($reg =~ m{^(/?(?:\.\./)*)},
1041 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
1042 iseq $reg, '../bbb/';
1043 iseq $te [0], '../';
1044 }
1045
e425a60b
YO
1046 {
1047 local $BugId = '60034';
1048 my $a = "xyzt" x 8192;
1049 ok $a =~ /\A(?>[a-z])*\z/,
1050 '(?>) does not cause wrongness on long string';
1051 my $b = $a . chr 256;
1052 chop $b;
1053 {
1054 iseq $a, $b;
1055 }
1056 ok $b =~ /\A(?>[a-z])*\z/,
1057 '(?>) does not cause wrongness on long string with UTF-8';
1058 }
1059
1060
1061 #
1062 # Keep the following tests last -- they may crash perl
1063 #
1064 print "# Tests that follow may crash perl\n";
1065 {
1066 local $BugId = '19049/38869';
1067 local $Message = 'Pattern in a loop, failure should not ' .
1068 'affect previous success';
1069 my @list = (
1070 'ab cdef', # Matches regex
1071 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
1072 );
1073 my $y;
1074 my $x;
1075 foreach (@list) {
1076 m/ab(.+)cd/i; # The ignore-case seems to be important
1077 $y = $1; # Use $1, which might not be from the last match!
1078 $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
1079 }
1080 iseq $y, ' ';
1081 iseq $x, 'ab cd';
1082 }
1083
1084
1085 {
1086 local $BugId = '24274';
1087
1088 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker");
1089 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/,
1090 "Regexp /^(??{'(.)'x 100})/ crashes older perls");
1091 }
1092
1093
1094 {
e425a60b
YO
1095 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
1096 local $BugId = '45337';
1097 local ${^UTF8CACHE} = -1;
1098 local $Message = "Shouldn't panic";
1099 my $s = "[a]a{2}";
1100 utf8::upgrade $s;
1101 ok "aaa" =~ /$s/;
1102 }
1103 {
1104 local $BugId = '57042';
1105 local $Message = "Check if tree logic breaks \$^R";
1106 my $cond_re = qr/\s*
1107 \s* (?:
1108 \( \s* A (?{1})
1109 | \( \s* B (?{2})
1110 )
1111 /x;
1112 my @res;
1113 for my $line ("(A)","(B)") {
1114 if ($line =~ m/$cond_re/) {
1115 push @res, $^R ? "#$^R" : "UNDEF";
1116 }
1117 }
1118 iseq "@res","#1 #2";
1119 }
1120 {
1121 no warnings 'closure';
1122 my $re = qr/A(??{"1"})/;
1123 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
1124 ok $1 eq "A1";
1125 ok $2 eq "B";
1126 }
1127
1128
e425a60b
YO
1129
1130 # This only works under -DEBUGGING because it relies on an assert().
1131 {
1132 local $BugId = '60508';
1133 local $Message = "Check capture offset re-entrancy of utf8 code.";
1134
1135 sub fswash { $_[0] =~ s/([>X])//g; }
1136
1137 my $k1 = "." x 4 . ">>";
1138 fswash($k1);
1139
1140 my $k2 = "\x{f1}\x{2022}";
1141 $k2 =~ s/([\360-\362])/>/g;
1142 fswash($k2);
1143
1144 iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks");
1145 }
1146
1147
1148 {
1149 local $BugId = 65372; # minimal CURLYM limited to 32767 matches
1150 my @pat = (
1151 qr{a(x|y)*b}, # CURLYM
1152 qr{a(x|y)*?b}, # .. with minmod
1153 qr{a([wx]|[yz])*b}, # .. and without tries
1154 qr{a([wx]|[yz])*?b},
1155 );
1156 my $len = 32768;
1157 my $s = join '', 'a', 'x' x $len, 'b';
1158 for my $pat (@pat) {
1159 ok($s =~ $pat, $pat);
1160 }
1161 }
b4da7bae
BR
1162
1163 {
1164 local $TODO = "[perl #38133]";
1165
1166 "A" =~ /(((?:A))?)+/;
1167 my $first = $2;
1168
1169 "A" =~ /(((A))?)+/;
1170 my $second = $2;
1171
1172 iseq($first, $second);
1173 }
aca53033
FC
1174
1175 {
1176 local $BugId = 70998;
1177 local $Message
1178 = 'utf8 =~ /trie/ where trie matches a continuation octet';
1179
1180 # Catch warnings:
1181 my $w;
1182 local $SIG{__WARN__} = sub { $w .= shift };
1183
1184 # This bug can be reduced to
1185 qq{\x{30ab}} =~ /\xab|\xa9/;
1186 # but it's nice to have a more 'real-world' test. The original test
1187 # case from the RT ticket follows:
1188
1189 my %conv = (
1190 "\xab" => "&lt;",
1191 "\xa9" => "(c)",
1192 );
1193 my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')';
1194 $conv_rx = qr{$conv_rx};
1195
1196 my $x
1197 = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}}
1198 . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}}
1199 . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}}
1200 . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}}
1201 . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}}
1202 . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}}
1203 . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}}
1204 . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}};
1205
1206 $x =~ s{$conv_rx}{$conv{$1}}eg;
1207
1208 iseq($w,undef);
1209 }
1210
e7f38d0f
YO
1211 {
1212 local $BugId = 68564; # minimal CURLYM limited to 32767 matches
1213 local $Message = "stclass optimisation does not break + inside (?=)";
1214 iseq join("-", " abc def " =~ /(?=(\S+))/g),
1215 "abc-bc-c-def-ef-f",
1216 }
1217
e425a60b
YO
1218} # End of sub run_tests
1219
12201;