This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Sanity check on Errno values.
[perl5.git] / t / re / pat_rt_report.t
CommitLineData
e425a60b
YO
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t. If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7use strict;
8use warnings;
9use 5.010;
10
11
12sub run_tests;
13
14$| = 1;
15
e425a60b
YO
16
17BEGIN {
18 chdir 't' if -d 't';
9d45b377
YO
19 @INC = ('../lib','.');
20 do "re/ReTest.pl" or die $@;
e425a60b 21}
e425a60b 22
e425a60b 23
fa1639c5 24plan tests => 2510; # Update this when adding/deleting tests.
e425a60b 25
9d45b377 26run_tests() unless caller;
e425a60b
YO
27
28#
29# Tests start here.
30#
31sub run_tests {
32
e425a60b
YO
33
34 {
35 local $BugId = '20000731.001';
36 ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/,
37 "Match UTF-8 char in presense of (??{ })";
38 }
39
40
41 {
42 local $BugId = '20001021.005';
43 no warnings 'uninitialized';
44 ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV";
45 }
46
e425a60b
YO
47 {
48 local $Message = 'bug id 20001008.001';
49
50 my @x = ("stra\337e 138", "stra\337e 138");
51 for (@x) {
52 ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
53 ok my ($latin) = /^(.+)(?:\s+\d)/;
54 iseq $latin, "stra\337e";
55 ok $latin =~ s/stra\337e/straße/;
56 #
57 # Previous code follows, but outcommented - there were no tests.
58 #
59 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
60 # use utf8; # needed for the raw UTF-8
61 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
62 }
63 }
64
65
66 {
e425a60b
YO
67 local $BugId = '20001028.003';
68
69 # Fist half of the bug.
70 local $Message = 'HEBREW ACCENT QADMA matched by .*';
71 my $X = chr (1448);
72 ok my ($Y) = $X =~ /(.*)/;
73 iseq $Y, v1448;
74 iseq length ($Y), 1;
75
76 # Second half of the bug.
77 $Message = 'HEBREW ACCENT QADMA in replacement';
78 $X = '';
79 $X =~ s/^/chr(1488)/e;
80 iseq length $X, 1;
81 iseq ord ($X), 1488;
82 }
83
84
85 {
86 local $BugId = '20001108.001';
87 local $Message = 'Repeated s///';
88 my $X = "Szab\x{f3},Bal\x{e1}zs";
89 my $Y = $X;
90 $Y =~ s/(B)/$1/ for 0 .. 3;
91 iseq $Y, $X;
92 iseq $X, "Szab\x{f3},Bal\x{e1}zs";
93 }
94
95
96 {
97 local $BugId = '20000517.001';
98 local $Message = 's/// on UTF-8 string';
99 my $x = "\x{100}A";
100 $x =~ s/A/B/;
101 iseq $x, "\x{100}B";
102 iseq length $x, 2;
103 }
104
105
106 {
107 local $BugId = '20001230.002';
108 local $Message = '\C and É';
109 ok "École" =~ /^\C\C(.)/ && $1 eq 'c';
110 ok "École" =~ /^\C\C(c)/;
111 }
112
113
e425a60b
YO
114 {
115 # The original bug report had 'no utf8' here but that was irrelevant.
116 local $BugId = '20010306.008';
117 local $Message = "Don't dump core";
118 my $a = "a\x{1234}";
119 ok $a =~ m/\w/; # used to core dump.
120 }
121
122
123 {
124 local $BugId = '20010410.006';
125 local $Message = '/g in scalar context';
126 for my $rx ('/(.*?)\{(.*?)\}/csg',
127 '/(.*?)\{(.*?)\}/cg',
128 '/(.*?)\{(.*?)\}/sg',
129 '/(.*?)\{(.*?)\}/g',
130 '/(.+?)\{(.+?)\}/csg',) {
131 my $i = 0;
132 my $input = "a{b}c{d}";
133 eval <<" --";
134 while (eval \$input =~ $rx) {
135 \$i ++;
136 }
137 --
138 iseq $i, 2;
139 }
140 }
141
e425a60b
YO
142 {
143 local $BugId = "20010619.003";
144 # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
145 for ("\n", "\t", "\014", "\r") {
146 ok !/[[:print:]]/, "'$_' not in [[:print:]]";
147 }
148 for (" ") {
149 ok /[[:print:]]/, "'$_' in [[:print:]]";
150 }
151 }
152
153
e425a60b
YO
154
155 {
156 # [ID 20010814.004] pos() doesn't work when using =~m// in list context
157 local $BugId = '20010814.004';
158 $_ = "ababacadaea";
159 my $a = join ":", /b./gc;
160 my $b = join ":", /a./gc;
161 my $c = pos;
162 iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//";
163 }
164
165
166 {
167 # [ID 20010407.006] matching utf8 return values from
168 # functions does not work
169 local $BugId = '20010407.006';
170 local $Message = 'UTF-8 return values from functions';
171 package ID_20010407_006;
172 sub x {"a\x{1234}"}
173 my $x = x;
174 my $y;
175 ::ok $x =~ /(..)/;
176 $y = $1;
177 ::ok length ($y) == 2 && $y eq $x;
178 ::ok x =~ /(..)/;
179 $y = $1;
180 ::ok length ($y) == 2 && $y eq $x;
181 }
182
e425a60b
YO
183 {
184 # High bit bug -- japhy
185 my $x = "ab\200d";
186 ok $x =~ /.*?\200/, "High bit fine";
187 }
188
189
190 {
e425a60b
YO
191 local $Message = 'UTF-8 hash keys and /$/';
192 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
193 # /2002-01/msg01327.html
194
195 my $u = "a\x{100}";
196 my $v = substr ($u, 0, 1);
197 my $w = substr ($u, 1, 1);
198 my %u = ($u => $u, $v => $v, $w => $w);
199 for (keys %u) {
200 my $m1 = /^\w*$/ ? 1 : 0;
201 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
202 iseq $m1, $m2;
203 }
204 }
205
206
207 {
208 local $BugId = "20020124.005";
209 local $PatchId = "14795";
210 local $Message = "s///eg";
211
212 for my $char ("a", "\x{df}", "\x{100}") {
213 my $x = "$char b $char";
214 $x =~ s{($char)}{
215 "c" =~ /c/;
216 "x";
217 }ge;
218 iseq substr ($x, 0, 1), substr ($x, -1, 1);
219 }
220 }
221
222
223 {
e425a60b
YO
224 local $BugId = "20020412.005";
225 local $Message = "Correct pmop flags checked when empty pattern";
226
227 # Requires reuse of last successful pattern.
228 my $num = 123;
229 $num =~ /\d/;
230 for (0 .. 1) {
231 my $match = ?? + 0;
232 ok $match != $_, $Message,
233 sprintf "'match one' %s on %s iteration" =>
234 $match ? 'succeeded' : 'failed',
235 $_ ? 'second' : 'first';
236 }
237 $num =~ /(\d)/;
238 my $result = join "" => $num =~ //g;
239 iseq $result, $num;
240 }
241
242
243 {
244 local $BugId = '20020630.002';
245 local $Message = 'UTF-8 regex matches above 32k';
246 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
247 my ($type, $char) = @$_;
248 for my $len (32000, 32768, 33000) {
249 my $s = $char . "f" x $len;
250 my $r = $s =~ /$char([f]*)/gc;
251 ok $r, $Message, "<$type x $len>";
252 ok !$r || pos ($s) == $len + 1, $Message,
253 "<$type x $len>; pos = @{[pos $s]}";
254 }
255 }
256 }
257
e425a60b
YO
258 {
259 local $PatchId = '18179';
260 my $s = "\x{100}" x 5;
261 my $ok = $s =~ /(\x{100}{4})/;
262 my ($ord, $len) = (ord $1, length $1);
263 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift";
264 }
265
266
267 {
268 local $BugId = '15763';
269 our $a = "x\x{100}";
270 chop $a; # Leaves the UTF-8 flag
271 $a .= "y"; # 1 byte before 'y'.
272
273 ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8';
274 ok $a =~ /^\C{1}/, 'match \C{1}';
275
276 ok $a =~ /^\Cy/, 'match \Cy';
277 ok $a =~ /^\C{1}y/, 'match \C{1}y';
278
279 ok $a !~ /^\C\Cy/, q {don't match two \Cy};
280 ok $a !~ /^\C{2}y/, q {don't match \C{2}y};
281
282 $a = "\x{100}y"; # 2 bytes before "y"
283
284 ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8';
285 ok $a =~ /^\C{1}/, 'match \C{1}';
286 ok $a =~ /^\C\C/, 'match two \C';
287 ok $a =~ /^\C{2}/, 'match \C{2}';
288
289 ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte';
290 ok $a =~ /^\C{3}/, 'match \C{3}';
291
292 ok $a =~ /^\C\Cy/, 'match two \C';
293 ok $a =~ /^\C{2}y/, 'match \C{2}';
294
295 ok $a !~ /^\C\C\Cy/, q {don't match three \Cy};
296 ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy};
297 ok $a !~ /^\C{3}y/, q {don't match \C{3}y};
298
299 $a = "\x{1000}y"; # 3 bytes before "y"
300
301 ok $a =~ /^\C/, 'match one \C on three-byte UTF-8';
302 ok $a =~ /^\C{1}/, 'match \C{1}';
303 ok $a =~ /^\C\C/, 'match two \C';
304 ok $a =~ /^\C{2}/, 'match \C{2}';
305 ok $a =~ /^\C\C\C/, 'match three \C';
306 ok $a =~ /^\C{3}/, 'match \C{3}';
307
308 ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte';
309 ok $a =~ /^\C{4}/, 'match \C{4}';
310
311 ok $a =~ /^\C\C\Cy/, 'match three \Cy';
312 ok $a =~ /^\C{3}y/, 'match \C{3}y';
313
314 ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy};
315 ok $a !~ /^\C{4}y/, q {don't match \C{4}y};
316 }
317
318
319 {
e425a60b
YO
320 local $BugId = '15397';
321 local $Message = 'UTF-8 matching';
322 ok "\x{100}" =~ /\x{100}/;
323 ok "\x{100}" =~ /(\x{100})/;
324 ok "\x{100}" =~ /(\x{100}){1}/;
325 ok "\x{100}\x{100}" =~ /(\x{100}){2}/;
326 ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/;
327 }
328
329
330 {
331 local $BugId = '7471';
332 local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times';
333 local $_ = 'CD';
334 ok /(AB)*?CD/ && !defined $1;
335 ok /(AB)*CD/ && !defined $1;
336 }
337
338
339 {
340 local $BugId = '3547';
341 local $Message = "Caching shouldn't prevent match";
342 my $pattern = "^(b+?|a){1,2}c";
343 ok "bac" =~ /$pattern/ && $1 eq 'a';
344 ok "bbac" =~ /$pattern/ && $1 eq 'a';
345 ok "bbbac" =~ /$pattern/ && $1 eq 'a';
346 ok "bbbbac" =~ /$pattern/ && $1 eq 'a';
347 }
348
349
350
351 {
352 local $BugId = '18232';
353 local $Message = '$1 should keep UTF-8 ness';
354 ok "\x{100}" =~ /(.)/;
355 iseq $1, "\x{100}", '$1 is UTF-8';
356 { 'a' =~ /./; }
357 iseq $1, "\x{100}", '$1 is still UTF-8';
358 isneq $1, "\xC4\x80", '$1 is not non-UTF-8';
359 }
360
361
362 {
363 local $BugId = '19767';
364 local $Message = "Optimizer doesn't prematurely reject match";
365 use utf8;
366
367 my $attr = 'Name-1';
368 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
369 my $NormalWord = qr /${NormalChar}+?/;
370 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/;
371
372 $attr =~ /^$/;
373 ok $attr =~ $PredNameHyphen; # Original test.
374
375 "a" =~ m/[b]/;
376 ok "0" =~ /\p{N}+\z/; # Variant.
377 }
378
379
380 {
381 local $BugId = '20683';
382 local $Message = "(??{ }) doesn't return stale values";
383 our $p = 1;
384 foreach (1, 2, 3, 4) {
385 $p ++ if /(??{ $p })/
386 }
387 iseq $p, 5;
388
389 {
390 package P;
391 $a = 1;
392 sub TIESCALAR {bless []}
393 sub FETCH {$a ++}
394 }
395 tie $p, "P";
396 foreach (1, 2, 3, 4) {
397 /(??{ $p })/
398 }
399 iseq $p, 5;
400 }
401
402
403 {
404 # Subject: Odd regexp behavior
405 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
406 # Date: Wed, 26 Feb 2003 16:53:12 +0000
407 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
408 # To: perl-unicode@perl.org
409
410 local $Message = 'Markus Kuhn 2003-02-26';
411
412 my $x = "\x{2019}\nk";
413 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
414 ok $x eq "\x{2019} k";
415
416 $x = "b\nk";
417 ok $x =~ s/(\S)\n(\S)/$1 $2/sg;
418 ok $x eq "b k";
419
420 ok "\x{2019}" =~ /\S/;
421 }
422
423
424 {
425 local $BugId = '21411';
426 local $Message = "(??{ .. }) in split doesn't corrupt its stack";
427 our $i;
428 ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-';
429 no warnings 'syntax';
430 @_ = split /(?{'WOW'})/, 'abc';
431 local $" = "|";
432 iseq "@_", "a|b|c";
433 }
434
435
436 {
437 # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it
438 # hasn't been crashing. Disable this test until it is fixed properly.
439 # XXX also check what it returns rather than just doing ok(1,...)
440 # split /(?{ split "" })/, "abc";
441 local $TODO = "Recursive split is still broken";
442 ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0';
443 }
444
445
446 {
e425a60b
YO
447 local $BugId = '17757';
448 $_ = "code: 'x' { '...' }\n"; study;
449 my @x; push @x, $& while m/'[^\']*'/gx;
450 local $" = ":";
451 iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop";
452 }
453
454
455 {
e425a60b
YO
456 local $BugId = '22354';
457 sub func ($) {
458 ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]";
459 ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m";
460 }
461 func "standalone";
462 $_ = "x"; s/x/func "in subst"/e;
463 $_ = "x"; s/x/func "in multiline subst"/em;
464
465 #
466 # Next two give 'panic: malloc'.
467 # Outcommented, using two TODOs.
468 #
469 local $TODO = 'panic: malloc';
470 local $Message = 'Postponed regexp and propaged modifier';
471 # ok 0 for 1 .. 2;
472 SKIP: {
473 skip "panic: malloc", 2;
474 $_ = "x"; /x(?{func "in regexp"})/;
475 $_ = "x"; /x(?{func "in multiline regexp"})/m;
476 }
477 }
478
479
480 {
481 local $BugId = '19049';
482 $_ = "abcdef\n";
483 my @x = m/./g;
484 iseq "abcde", $`, 'Global match sets $`';
485 }
486
487
488 {
e425a60b
YO
489 # [perl #23769] Unicode regex broken on simple example
490 # regrepeat() didn't handle UTF-8 EXACT case right.
491 local $BugId = '23769';
492 my $Mess = 'regrepeat() handles UTF-8 EXACT case right';
493 local $Message = $Mess;
494
495 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
496
497 ok $s =~ /\x{a0}/;
498 ok $s =~ /\x{a0}+/;
499 ok $s =~ /\x{a0}\x{a0}/;
500
501 $Message = "$Mess (easy variant)";
502 ok "aaa\x{100}" =~ /(a+)/;
503 iseq $1, "aaa";
504
505 $Message = "$Mess (easy invariant)";
506 ok "aaa\x{100} " =~ /(a+?)/;
507 iseq $1, "a";
508
509 $Message = "$Mess (regrepeat variant)";
510 ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/;
511 iseq $1, "\xa0";
512
513 $Message = "$Mess (regrepeat invariant)";
514 ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/;
515 iseq $1, "\xa0\xa0\xa0";
516
517 $Message = "$Mess (hard variant)";
518 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/;
519 iseq $1, "\xa0\xa1";
520
521 $Message = "$Mess (hard invariant)";
522 ok "ababab\x{100} " =~ /((?:ab)+)/;
523 iseq $1, 'ababab';
524
525 ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/;
526 iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1";
527
528 ok "ababab\x{100} " =~ /((?:ab)+?)/;
529 iseq $1, "ab";
530
531 $Message = "Don't match first byte of UTF-8 representation";
532 ok "\xc4\xc4\xc4" !~ /(\x{100}+)/;
533 ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/;
534 ok "\xc4\xc4\xc4" !~ /(\x{100}++)/;
535 }
536
537
538 {
e425a60b
YO
539 # perl panic: pp_match start/end pointers
540 local $BugId = '25269';
541 iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"},
542 'Captures can move backwards in string';
543 }
544
545
546 {
547 local $BugId = '27940'; # \cA not recognized in character classes
548 ok "a\cAb" =~ /\cA/, '\cA in pattern';
549 ok "a\cAb" =~ /[\cA]/, '\cA in character class';
550 ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range';
551 ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range';
552 ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range';
553 ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range';
554 ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern';
555 ok "ab" !~ /a\cIb/x, '\cI in pattern';
556 }
557
558
559 {
9d45b377
YO
560 # perl #28532: optional zero-width match at end of string is ignored
561 local $BugId = '28532';
562 ok "abc" =~ /^abc(\z)?/ && defined($1),
563 'Optional zero-width match at end of string';
564 ok "abc" =~ /^abc(\z)??/ && !defined($1),
565 'Optional zero-width match at end of string';
e425a60b
YO
566 }
567
568
e425a60b
YO
569
570 {
571 local $BugId = '36207';
572 my $utf8 = "\xe9\x{100}"; chop $utf8;
573 my $latin1 = "\xe9";
574
575 ok $utf8 =~ /\xe9/i, "utf8/latin";
576 ok $utf8 =~ /$latin1/i, "utf8/latin runtime";
577 ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie";
578 ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime";
579
580 ok "\xe9" =~ /$utf8/i, "latin/utf8";
581 ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie";
582 ok $latin1 =~ /$utf8/i, "latin/utf8 runtime";
583 ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime";
584 }
585
586
587 {
588 local $BugId = '37038';
589 my $s = "abcd";
590 $s =~ /(..)(..)/g;
591 $s = $1;
592 $s = $2;
593 iseq $2, 'cd',
594 "Assigning to original string does not corrupt match vars";
595 }
596
597
598 {
9d45b377 599 local $PatchId = '26410';
e425a60b
YO
600 {
601 package wooosh;
602 sub gloople {"!"}
603 }
604 my $aeek = bless {} => 'wooosh';
605 eval_ok sub {$aeek -> gloople () =~ /(.)/g},
606 "//g match against return value of sub";
607
608 sub gloople {"!"}
609 eval_ok sub {gloople () =~ /(.)/g},
610 "26410 didn't affect sub calls for some reason";
611 }
612
613
614 {
615 local $TODO = "See changes 26925-26928, which reverted change 26410";
616 {
617 package lv;
618 our $var = "abc";
619 sub variable : lvalue {$var}
620 }
621 my $o = bless [] => 'lv';
622 my $f = "";
623 my $r = eval {
624 for (1 .. 2) {
625 $f .= $1 if $o -> variable =~ /(.)/g;
626 }
627 1;
628 };
629 if ($r) {
630 iseq $f, "ab", "pos() retained between calls";
631 }
632 else {
633 local $TODO;
634 ok 0, "Code failed: $@";
635 }
636
637 our $var = "abc";
638 sub variable : lvalue {$var}
639 my $g = "";
640 my $s = eval {
641 for (1 .. 2) {
642 $g .= $1 if variable =~ /(.)/g;
643 }
644 1;
645 };
646 if ($s) {
647 iseq $g, "ab", "pos() retained between calls";
648 }
649 else {
650 local $TODO;
651 ok 0, "Code failed: $@";
652 }
653 }
654
655
656 SKIP:
657 {
658 local $BugId = '37836';
659 skip "In EBCDIC" if $IS_EBCDIC;
660 no warnings 'utf8';
661 $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8
662 my $ret = 0;
663 eval_ok sub {!($ret = s/[\0]+//g)},
664 "Ill-formed UTF-8 doesn't match NUL in class";
665 }
666
667
668 {
669 # chr(65535) should be allowed in regexes
670 local $BugId = '38293';
671 no warnings 'utf8'; # To allow non-characters
672 my ($c, $r, $s);
673
674 $c = chr 0xffff;
675 $c =~ s/$c//g;
676 ok $c eq "", "U+FFFF, parsed as atom";
677
678 $c = chr 0xffff;
679 $r = "\\$c";
680 $c =~ s/$r//g;
681 ok $c eq "", "U+FFFF backslashed, parsed as atom";
682
683 $c = chr 0xffff;
684 $c =~ s/[$c]//g;
685 ok $c eq "", "U+FFFF, parsed in class";
686
687 $c = chr 0xffff;
688 $r = "[\\$c]";
689 $c =~ s/$r//g;
690 ok $c eq "", "U+FFFF backslashed, parsed in class";
691
692 $s = "A\x{ffff}B";
693 $s =~ s/\x{ffff}//i;
694 ok $s eq "AB", "U+FFFF, EXACTF";
695
696 $s = "\x{ffff}A";
697 $s =~ s/\bA//;
698 ok $s eq "\x{ffff}", "U+FFFF, BOUND";
699
700 $s = "\x{ffff}!";
701 $s =~ s/\B!//;
702 ok $s eq "\x{ffff}", "U+FFFF, NBOUND";
703 }
704
705
706 {
707 local $BugId = '39583';
708
709 # The printing characters
710 my @chars = ("A" .. "Z");
711 my $delim = ",";
712 my $size = 32771 - 4;
713 my $str = '';
714
715 # Create some random junk. Inefficient, but it works.
716 for (my $i = 0; $i < $size; $ i++) {
717 $str .= $chars [rand @chars];
718 }
719
720 $str .= ($delim x 4);
721 my $res;
722 my $matched;
723 ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches";
724 iseq $str, "", "Empty string";
725 ok defined $1 && length ($1) == $size, '$1 is correct size';
726 }
727
728
729 {
730 local $BugId = '27940';
731 ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern';
732 ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern';
733 ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern';
734 ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern';
735
736 ok "X\0A" =~ /X\c@?A/, '\c@?';
737 ok "X\0A" =~ /X\c@*A/, '\c@*';
738 ok "X\0A" =~ /X\c@(A)/, '\c@(';
739 ok "X\0A" =~ /X(\c@)A/, '\c@)';
740 ok "X\0A" =~ /X\c@|ZA/, '\c@|';
741
742 ok "X\@A" =~ /X@?A/, '@?';
743 ok "X\@A" =~ /X@*A/, '@*';
744 ok "X\@A" =~ /X@(A)/, '@(';
745 ok "X\@A" =~ /X(@)A/, '@)';
746 ok "X\@A" =~ /X@|ZA/, '@|';
747
748 local $" = ','; # non-whitespace and non-RE-specific
749 ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus';
750 ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/';
751 ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/';
752 ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x';
753 ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x';
754 }
755
756
757 {
9d45b377 758 local $BugId = '50496';
e425a60b
YO
759 my $s = 'foo bar baz';
760 my (@k, @v, @fetch, $res);
761 my $count = 0;
762 my @names = qw ($+{A} $+{B} $+{C});
763 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
764 while (my ($k, $v) = each (%+)) {
765 $count++;
766 }
767 @k = sort keys (%+);
768 @v = sort values (%+);
769 $res = 1;
770 push @fetch,
771 ["$+{A}", "$1"],
772 ["$+{B}", "$2"],
773 ["$+{C}", "$3"],
774 ;
775 }
776 foreach (0 .. 2) {
777 if ($fetch [$_]) {
778 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
779 } else {
780 ok 0, $names[$_];
781 }
782 }
783 iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/";
784 iseq $count, 3, "Got 3 keys in %+ via each";
785 iseq 0 + @k, 3, 'Got 3 keys in %+ via keys';
786 iseq "@k", "A B C", "Got expected keys";
787 iseq "@v", "bar baz foo", "Got expected values";
788 eval '
789 no warnings "uninitialized";
790 print for $+ {this_key_doesnt_exist};
791 ';
792 ok !$@, 'lvalue $+ {...} should not throw an exception';
793 }
794
795
796 {
797 #
798 # Almost the same as the block above, except that the capture is nested.
799 #
800 local $BugId = '50496';
801 my $s = 'foo bar baz';
802 my (@k, @v, @fetch, $res);
803 my $count = 0;
804 my @names = qw ($+{A} $+{B} $+{C} $+{D});
805 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
806 while (my ($k,$v) = each(%+)) {
807 $count++;
808 }
809 @k = sort keys (%+);
810 @v = sort values (%+);
811 $res = 1;
812 push @fetch,
813 ["$+{A}", "$2"],
814 ["$+{B}", "$3"],
815 ["$+{C}", "$4"],
816 ["$+{D}", "$1"],
817 ;
818 }
819 foreach (0 .. 3) {
820 if ($fetch [$_]) {
821 iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_];
822 } else {
823 ok 0, $names [$_];
824 }
825 }
826 iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/";
827 iseq $count, 4, "Got 4 keys in %+ via each";
828 iseq @k, 4, 'Got 4 keys in %+ via keys';
829 iseq "@k", "A B C D", "Got expected keys";
830 iseq "@v", "bar baz foo foo bar baz", "Got expected values";
831 eval '
832 no warnings "uninitialized";
833 print for $+ {this_key_doesnt_exist};
834 ';
835 ok !$@,'lvalue $+ {...} should not throw an exception';
836 }
837
838
839 {
e425a60b
YO
840 local $BugId = '36046';
841 my $str = 'abc';
842 my $count = 0;
843 my $mval = 0;
844 my $pval = 0;
845 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
846 iseq $mval, 0, '@- should be empty';
847 iseq $pval, 0, '@+ should be empty';
848 iseq $count, 1, 'Should have matched once only';
849 }
850
851
e425a60b
YO
852
853
854 {
855 local $BugId = '40684';
856 local $Message = '/m in precompiled regexp';
857 my $s = "abc\ndef";
858 my $rex = qr'^abc$'m;
859 ok $s =~ m/$rex/;
860 ok $s =~ m/^abc$/m;
861 }
862
863
864 {
e425a60b
YO
865 local $BugId = '36909';
866 local $Message = '(?: ... )? should not lose $^R';
867 $^R = 'Nothing';
868 {
869 local $^R = "Bad";
870 ok 'x foofoo y' =~ m {
871 (foo) # $^R correctly set
872 (?{ "last regexp code result" })
873 }x;
874 iseq $^R, 'last regexp code result';
875 }
876 iseq $^R, 'Nothing';
877
878 {
879 local $^R = "Bad";
880
881 ok 'x foofoo y' =~ m {
882 (?:foo|bar)+ # $^R correctly set
883 (?{ "last regexp code result" })
884 }x;
885 iseq $^R, 'last regexp code result';
886 }
887 iseq $^R, 'Nothing';
888
889 {
890 local $^R = "Bad";
891 ok 'x foofoo y' =~ m {
892 (foo|bar)\1+ # $^R undefined
893 (?{ "last regexp code result" })
894 }x;
895 iseq $^R, 'last regexp code result';
896 }
897 iseq $^R, 'Nothing';
898
899 {
900 local $^R = "Bad";
901 ok 'x foofoo y' =~ m {
902 (foo|bar)\1 # This time without the +
903 (?{"last regexp code result"})
904 }x;
905 iseq $^R, 'last regexp code result';
906 }
907 iseq $^R, 'Nothing';
908 }
909
910
911 {
912 local $BugId = '22395';
913 local $Message = 'Match is linear, not quadratic';
914 our $count;
915 for my $l (10, 100, 1000) {
916 $count = 0;
917 ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
918 local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)";
919 iseq $count, $l + 1;
920 }
921 }
922
923
924 {
925 local $BugId = '22614';
926 local $Message = '@-/@+ should not have undefined values';
927 local $_ = 'ab';
928 our @len = ();
929 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
930 iseq "@len", "2 2 2";
931 }
932
933
934 {
935 local $BugId = '18209';
936 local $Message = '$& set on s///';
937 my $text = ' word1 word2 word3 word4 word5 word6 ';
938
939 my @words = ('word1', 'word3', 'word5');
940 my $count;
941 foreach my $word (@words) {
942 $text =~ s/$word\s//gi; # Leave a space to seperate words
943 # in the resultant str.
944 # The following block is not working.
945 if ($&) {
946 $count ++;
947 }
948 # End bad block
949 }
950 iseq $count, 3;
951 iseq $text, ' word2 word4 word6 ';
952 }
953
954
955 {
956 # RT#6893
957 local $BugId = '6893';
958 local $_ = qq (A\nB\nC\n);
959 my @res;
960 while (m#(\G|\n)([^\n]*)\n#gsx) {
961 push @res, "$2";
962 last if @res > 3;
963 }
964 iseq "@res", "A B C", "/g pattern shouldn't infinite loop";
965 }
966
967
e425a60b
YO
968
969 {
970 local $BugId = '41010';
971 local $Message = 'No optimizer bug';
972 my @tails = ('', '(?(1))', '(|)', '()?');
973 my @quants = ('*','+');
974 my $doit = sub {
975 my $pats = shift;
976 for (@_) {
977 for my $pat (@$pats) {
978 for my $quant (@quants) {
979 for my $tail (@tails) {
980 my $re = "($pat$quant\$)$tail";
981 ok /$re/ && $1 eq $_, "'$_' =~ /$re/";
982 ok /$re/m && $1 eq $_, "'$_' =~ /$re/m";
983 }
984 }
985 }
986 }
987 };
988
989 my @dpats = ('\d',
990 '[1234567890]',
991 '(1|[23]|4|[56]|[78]|[90])',
992 '(?:1|[23]|4|[56]|[78]|[90])',
993 '(1|2|3|4|5|6|7|8|9|0)',
994 '(?:1|2|3|4|5|6|7|8|9|0)');
995 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
996 my @sstrs = (' ');
997 my @dstrs = ('12345');
998 $doit -> (\@spats, @sstrs);
999 $doit -> (\@dpats, @dstrs);
1000 }
1001
1002
e425a60b
YO
1003
1004 {
1005 local $BugId = '45605';
1006 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
1007
1008 my $utf_8 = "\xd6schel";
1009 utf8::upgrade ($utf_8);
1010 $utf_8 =~ m {(\xd6|&Ouml;)schel};
1011 iseq $1, "\xd6", "Upgrade error";
1012 }
1013
1014 {
e425a60b
YO
1015 # Regardless of utf8ness any character matches itself when
1016 # doing a case insensitive match. See also [perl #36207]
1017 local $BugId = '36207';
1018 for my $o (0 .. 255) {
1019 my @ch = (chr ($o), chr ($o));
1020 utf8::upgrade ($ch [1]);
1021 for my $u_str (0, 1) {
1022 for my $u_pat (0, 1) {
1023 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i,
1024 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat";
1025 ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i,
1026 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat";
1027 }
1028 }
1029 }
1030 }
1031
1032
1033 {
e425a60b
YO
1034 local $BugId = '49190';
1035 local $Message = '$REGMARK in replacement';
1036 our $REGMARK;
1037 my $_ = "A";
1038 ok s/(*:B)A/$REGMARK/;
1039 iseq $_, "B";
1040 $_ = "CCCCBAA";
1041 ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g;
1042 iseq $_, "ZYX";
1043 }
1044
1045
1046 {
e425a60b
YO
1047 local $BugId = '52658';
1048 local $Message = 'Substitution evaluation in list context';
1049 my $reg = '../xxx/';
1050 my @te = ($reg =~ m{^(/?(?:\.\./)*)},
1051 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
1052 iseq $reg, '../bbb/';
1053 iseq $te [0], '../';
1054 }
1055
e425a60b
YO
1056 {
1057 local $BugId = '60034';
1058 my $a = "xyzt" x 8192;
1059 ok $a =~ /\A(?>[a-z])*\z/,
1060 '(?>) does not cause wrongness on long string';
1061 my $b = $a . chr 256;
1062 chop $b;
1063 {
1064 iseq $a, $b;
1065 }
1066 ok $b =~ /\A(?>[a-z])*\z/,
1067 '(?>) does not cause wrongness on long string with UTF-8';
1068 }
1069
1070
1071 #
1072 # Keep the following tests last -- they may crash perl
1073 #
1074 print "# Tests that follow may crash perl\n";
1075 {
1076 local $BugId = '19049/38869';
1077 local $Message = 'Pattern in a loop, failure should not ' .
1078 'affect previous success';
1079 my @list = (
1080 'ab cdef', # Matches regex
1081 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
1082 );
1083 my $y;
1084 my $x;
1085 foreach (@list) {
1086 m/ab(.+)cd/i; # The ignore-case seems to be important
1087 $y = $1; # Use $1, which might not be from the last match!
1088 $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
1089 }
1090 iseq $y, ' ';
1091 iseq $x, 'ab cd';
1092 }
1093
1094
1095 {
1096 local $BugId = '24274';
1097
1098 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker");
1099 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/,
1100 "Regexp /^(??{'(.)'x 100})/ crashes older perls");
1101 }
1102
1103
1104 {
e425a60b
YO
1105 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
1106 local $BugId = '45337';
1107 local ${^UTF8CACHE} = -1;
1108 local $Message = "Shouldn't panic";
1109 my $s = "[a]a{2}";
1110 utf8::upgrade $s;
1111 ok "aaa" =~ /$s/;
1112 }
1113 {
1114 local $BugId = '57042';
1115 local $Message = "Check if tree logic breaks \$^R";
1116 my $cond_re = qr/\s*
1117 \s* (?:
1118 \( \s* A (?{1})
1119 | \( \s* B (?{2})
1120 )
1121 /x;
1122 my @res;
1123 for my $line ("(A)","(B)") {
1124 if ($line =~ m/$cond_re/) {
1125 push @res, $^R ? "#$^R" : "UNDEF";
1126 }
1127 }
1128 iseq "@res","#1 #2";
1129 }
1130 {
1131 no warnings 'closure';
1132 my $re = qr/A(??{"1"})/;
1133 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
1134 ok $1 eq "A1";
1135 ok $2 eq "B";
1136 }
1137
1138
e425a60b
YO
1139
1140 # This only works under -DEBUGGING because it relies on an assert().
1141 {
1142 local $BugId = '60508';
1143 local $Message = "Check capture offset re-entrancy of utf8 code.";
1144
1145 sub fswash { $_[0] =~ s/([>X])//g; }
1146
1147 my $k1 = "." x 4 . ">>";
1148 fswash($k1);
1149
1150 my $k2 = "\x{f1}\x{2022}";
1151 $k2 =~ s/([\360-\362])/>/g;
1152 fswash($k2);
1153
1154 iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks");
1155 }
1156
1157
1158 {
1159 local $BugId = 65372; # minimal CURLYM limited to 32767 matches
1160 my @pat = (
1161 qr{a(x|y)*b}, # CURLYM
1162 qr{a(x|y)*?b}, # .. with minmod
1163 qr{a([wx]|[yz])*b}, # .. and without tries
1164 qr{a([wx]|[yz])*?b},
1165 );
1166 my $len = 32768;
1167 my $s = join '', 'a', 'x' x $len, 'b';
1168 for my $pat (@pat) {
1169 ok($s =~ $pat, $pat);
1170 }
1171 }
b4da7bae
B
1172
1173 {
1174 local $TODO = "[perl #38133]";
1175
1176 "A" =~ /(((?:A))?)+/;
1177 my $first = $2;
1178
1179 "A" =~ /(((A))?)+/;
1180 my $second = $2;
1181
1182 iseq($first, $second);
1183 }
e425a60b
YO
1184} # End of sub run_tests
1185
11861;