Commit | Line | Data |
---|---|---|
e425a60b YO |
1 | #!./perl |
2 | # | |
3 | # This is a home for regular expression tests that don't fit into | |
4 | # the format supported by re/regexp.t. If you want to add a test | |
5 | # that does fit that format, add it to re/re_tests, not here. | |
6 | ||
7 | use strict; | |
8 | use warnings; | |
9 | use 5.010; | |
10 | ||
11 | ||
12 | sub run_tests; | |
13 | ||
14 | $| = 1; | |
15 | ||
e425a60b YO |
16 | |
17 | BEGIN { | |
18 | chdir 't' if -d 't'; | |
9d45b377 YO |
19 | @INC = ('../lib','.'); |
20 | do "re/ReTest.pl" or die $@; | |
e425a60b | 21 | } |
e425a60b | 22 | |
e425a60b | 23 | |
fa1639c5 | 24 | plan tests => 2510; # Update this when adding/deleting tests. |
e425a60b | 25 | |
9d45b377 | 26 | run_tests() unless caller; |
e425a60b YO |
27 | |
28 | # | |
29 | # Tests start here. | |
30 | # | |
31 | sub run_tests { | |
32 | ||
e425a60b YO |
33 | |
34 | { | |
35 | local $BugId = '20000731.001'; | |
36 | ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/, | |
37 | "Match UTF-8 char in presense of (??{ })"; | |
38 | } | |
39 | ||
40 | ||
41 | { | |
42 | local $BugId = '20001021.005'; | |
43 | no warnings 'uninitialized'; | |
44 | ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV"; | |
45 | } | |
46 | ||
e425a60b YO |
47 | { |
48 | local $Message = 'bug id 20001008.001'; | |
49 | ||
50 | my @x = ("stra\337e 138", "stra\337e 138"); | |
51 | for (@x) { | |
52 | ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; | |
53 | ok my ($latin) = /^(.+)(?:\s+\d)/; | |
54 | iseq $latin, "stra\337e"; | |
55 | ok $latin =~ s/stra\337e/straße/; | |
56 | # | |
57 | # Previous code follows, but outcommented - there were no tests. | |
58 | # | |
59 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
60 | # use utf8; # needed for the raw UTF-8 | |
61 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a | |
62 | } | |
63 | } | |
64 | ||
65 | ||
66 | { | |
e425a60b YO |
67 | local $BugId = '20001028.003'; |
68 | ||
69 | # Fist half of the bug. | |
70 | local $Message = 'HEBREW ACCENT QADMA matched by .*'; | |
71 | my $X = chr (1448); | |
72 | ok my ($Y) = $X =~ /(.*)/; | |
73 | iseq $Y, v1448; | |
74 | iseq length ($Y), 1; | |
75 | ||
76 | # Second half of the bug. | |
77 | $Message = 'HEBREW ACCENT QADMA in replacement'; | |
78 | $X = ''; | |
79 | $X =~ s/^/chr(1488)/e; | |
80 | iseq length $X, 1; | |
81 | iseq ord ($X), 1488; | |
82 | } | |
83 | ||
84 | ||
85 | { | |
86 | local $BugId = '20001108.001'; | |
87 | local $Message = 'Repeated s///'; | |
88 | my $X = "Szab\x{f3},Bal\x{e1}zs"; | |
89 | my $Y = $X; | |
90 | $Y =~ s/(B)/$1/ for 0 .. 3; | |
91 | iseq $Y, $X; | |
92 | iseq $X, "Szab\x{f3},Bal\x{e1}zs"; | |
93 | } | |
94 | ||
95 | ||
96 | { | |
97 | local $BugId = '20000517.001'; | |
98 | local $Message = 's/// on UTF-8 string'; | |
99 | my $x = "\x{100}A"; | |
100 | $x =~ s/A/B/; | |
101 | iseq $x, "\x{100}B"; | |
102 | iseq length $x, 2; | |
103 | } | |
104 | ||
105 | ||
106 | { | |
107 | local $BugId = '20001230.002'; | |
108 | local $Message = '\C and É'; | |
109 | ok "École" =~ /^\C\C(.)/ && $1 eq 'c'; | |
110 | ok "École" =~ /^\C\C(c)/; | |
111 | } | |
112 | ||
113 | ||
e425a60b YO |
114 | { |
115 | # The original bug report had 'no utf8' here but that was irrelevant. | |
116 | local $BugId = '20010306.008'; | |
117 | local $Message = "Don't dump core"; | |
118 | my $a = "a\x{1234}"; | |
119 | ok $a =~ m/\w/; # used to core dump. | |
120 | } | |
121 | ||
122 | ||
123 | { | |
124 | local $BugId = '20010410.006'; | |
125 | local $Message = '/g in scalar context'; | |
126 | for my $rx ('/(.*?)\{(.*?)\}/csg', | |
127 | '/(.*?)\{(.*?)\}/cg', | |
128 | '/(.*?)\{(.*?)\}/sg', | |
129 | '/(.*?)\{(.*?)\}/g', | |
130 | '/(.+?)\{(.+?)\}/csg',) { | |
131 | my $i = 0; | |
132 | my $input = "a{b}c{d}"; | |
133 | eval <<" --"; | |
134 | while (eval \$input =~ $rx) { | |
135 | \$i ++; | |
136 | } | |
137 | -- | |
138 | iseq $i, 2; | |
139 | } | |
140 | } | |
141 | ||
e425a60b YO |
142 | { |
143 | local $BugId = "20010619.003"; | |
144 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. | |
145 | for ("\n", "\t", "\014", "\r") { | |
146 | ok !/[[:print:]]/, "'$_' not in [[:print:]]"; | |
147 | } | |
148 | for (" ") { | |
149 | ok /[[:print:]]/, "'$_' in [[:print:]]"; | |
150 | } | |
151 | } | |
152 | ||
153 | ||
e425a60b YO |
154 | |
155 | { | |
156 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context | |
157 | local $BugId = '20010814.004'; | |
158 | $_ = "ababacadaea"; | |
159 | my $a = join ":", /b./gc; | |
160 | my $b = join ":", /a./gc; | |
161 | my $c = pos; | |
162 | iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//"; | |
163 | } | |
164 | ||
165 | ||
166 | { | |
167 | # [ID 20010407.006] matching utf8 return values from | |
168 | # functions does not work | |
169 | local $BugId = '20010407.006'; | |
170 | local $Message = 'UTF-8 return values from functions'; | |
171 | package ID_20010407_006; | |
172 | sub x {"a\x{1234}"} | |
173 | my $x = x; | |
174 | my $y; | |
175 | ::ok $x =~ /(..)/; | |
176 | $y = $1; | |
177 | ::ok length ($y) == 2 && $y eq $x; | |
178 | ::ok x =~ /(..)/; | |
179 | $y = $1; | |
180 | ::ok length ($y) == 2 && $y eq $x; | |
181 | } | |
182 | ||
e425a60b YO |
183 | { |
184 | # High bit bug -- japhy | |
185 | my $x = "ab\200d"; | |
186 | ok $x =~ /.*?\200/, "High bit fine"; | |
187 | } | |
188 | ||
189 | ||
190 | { | |
e425a60b YO |
191 | local $Message = 'UTF-8 hash keys and /$/'; |
192 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters | |
193 | # /2002-01/msg01327.html | |
194 | ||
195 | my $u = "a\x{100}"; | |
196 | my $v = substr ($u, 0, 1); | |
197 | my $w = substr ($u, 1, 1); | |
198 | my %u = ($u => $u, $v => $v, $w => $w); | |
199 | for (keys %u) { | |
200 | my $m1 = /^\w*$/ ? 1 : 0; | |
201 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; | |
202 | iseq $m1, $m2; | |
203 | } | |
204 | } | |
205 | ||
206 | ||
207 | { | |
208 | local $BugId = "20020124.005"; | |
209 | local $PatchId = "14795"; | |
210 | local $Message = "s///eg"; | |
211 | ||
212 | for my $char ("a", "\x{df}", "\x{100}") { | |
213 | my $x = "$char b $char"; | |
214 | $x =~ s{($char)}{ | |
215 | "c" =~ /c/; | |
216 | "x"; | |
217 | }ge; | |
218 | iseq substr ($x, 0, 1), substr ($x, -1, 1); | |
219 | } | |
220 | } | |
221 | ||
222 | ||
223 | { | |
e425a60b YO |
224 | local $BugId = "20020412.005"; |
225 | local $Message = "Correct pmop flags checked when empty pattern"; | |
226 | ||
227 | # Requires reuse of last successful pattern. | |
228 | my $num = 123; | |
229 | $num =~ /\d/; | |
230 | for (0 .. 1) { | |
231 | my $match = ?? + 0; | |
232 | ok $match != $_, $Message, | |
233 | sprintf "'match one' %s on %s iteration" => | |
234 | $match ? 'succeeded' : 'failed', | |
235 | $_ ? 'second' : 'first'; | |
236 | } | |
237 | $num =~ /(\d)/; | |
238 | my $result = join "" => $num =~ //g; | |
239 | iseq $result, $num; | |
240 | } | |
241 | ||
242 | ||
243 | { | |
244 | local $BugId = '20020630.002'; | |
245 | local $Message = 'UTF-8 regex matches above 32k'; | |
246 | for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { | |
247 | my ($type, $char) = @$_; | |
248 | for my $len (32000, 32768, 33000) { | |
249 | my $s = $char . "f" x $len; | |
250 | my $r = $s =~ /$char([f]*)/gc; | |
251 | ok $r, $Message, "<$type x $len>"; | |
252 | ok !$r || pos ($s) == $len + 1, $Message, | |
253 | "<$type x $len>; pos = @{[pos $s]}"; | |
254 | } | |
255 | } | |
256 | } | |
257 | ||
e425a60b YO |
258 | { |
259 | local $PatchId = '18179'; | |
260 | my $s = "\x{100}" x 5; | |
261 | my $ok = $s =~ /(\x{100}{4})/; | |
262 | my ($ord, $len) = (ord $1, length $1); | |
263 | ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift"; | |
264 | } | |
265 | ||
266 | ||
267 | { | |
268 | local $BugId = '15763'; | |
269 | our $a = "x\x{100}"; | |
270 | chop $a; # Leaves the UTF-8 flag | |
271 | $a .= "y"; # 1 byte before 'y'. | |
272 | ||
273 | ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8'; | |
274 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
275 | ||
276 | ok $a =~ /^\Cy/, 'match \Cy'; | |
277 | ok $a =~ /^\C{1}y/, 'match \C{1}y'; | |
278 | ||
279 | ok $a !~ /^\C\Cy/, q {don't match two \Cy}; | |
280 | ok $a !~ /^\C{2}y/, q {don't match \C{2}y}; | |
281 | ||
282 | $a = "\x{100}y"; # 2 bytes before "y" | |
283 | ||
284 | ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8'; | |
285 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
286 | ok $a =~ /^\C\C/, 'match two \C'; | |
287 | ok $a =~ /^\C{2}/, 'match \C{2}'; | |
288 | ||
289 | ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'; | |
290 | ok $a =~ /^\C{3}/, 'match \C{3}'; | |
291 | ||
292 | ok $a =~ /^\C\Cy/, 'match two \C'; | |
293 | ok $a =~ /^\C{2}y/, 'match \C{2}'; | |
294 | ||
295 | ok $a !~ /^\C\C\Cy/, q {don't match three \Cy}; | |
296 | ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy}; | |
297 | ok $a !~ /^\C{3}y/, q {don't match \C{3}y}; | |
298 | ||
299 | $a = "\x{1000}y"; # 3 bytes before "y" | |
300 | ||
301 | ok $a =~ /^\C/, 'match one \C on three-byte UTF-8'; | |
302 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
303 | ok $a =~ /^\C\C/, 'match two \C'; | |
304 | ok $a =~ /^\C{2}/, 'match \C{2}'; | |
305 | ok $a =~ /^\C\C\C/, 'match three \C'; | |
306 | ok $a =~ /^\C{3}/, 'match \C{3}'; | |
307 | ||
308 | ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'; | |
309 | ok $a =~ /^\C{4}/, 'match \C{4}'; | |
310 | ||
311 | ok $a =~ /^\C\C\Cy/, 'match three \Cy'; | |
312 | ok $a =~ /^\C{3}y/, 'match \C{3}y'; | |
313 | ||
314 | ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy}; | |
315 | ok $a !~ /^\C{4}y/, q {don't match \C{4}y}; | |
316 | } | |
317 | ||
318 | ||
319 | { | |
e425a60b YO |
320 | local $BugId = '15397'; |
321 | local $Message = 'UTF-8 matching'; | |
322 | ok "\x{100}" =~ /\x{100}/; | |
323 | ok "\x{100}" =~ /(\x{100})/; | |
324 | ok "\x{100}" =~ /(\x{100}){1}/; | |
325 | ok "\x{100}\x{100}" =~ /(\x{100}){2}/; | |
326 | ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/; | |
327 | } | |
328 | ||
329 | ||
330 | { | |
331 | local $BugId = '7471'; | |
332 | local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times'; | |
333 | local $_ = 'CD'; | |
334 | ok /(AB)*?CD/ && !defined $1; | |
335 | ok /(AB)*CD/ && !defined $1; | |
336 | } | |
337 | ||
338 | ||
339 | { | |
340 | local $BugId = '3547'; | |
341 | local $Message = "Caching shouldn't prevent match"; | |
342 | my $pattern = "^(b+?|a){1,2}c"; | |
343 | ok "bac" =~ /$pattern/ && $1 eq 'a'; | |
344 | ok "bbac" =~ /$pattern/ && $1 eq 'a'; | |
345 | ok "bbbac" =~ /$pattern/ && $1 eq 'a'; | |
346 | ok "bbbbac" =~ /$pattern/ && $1 eq 'a'; | |
347 | } | |
348 | ||
349 | ||
350 | ||
351 | { | |
352 | local $BugId = '18232'; | |
353 | local $Message = '$1 should keep UTF-8 ness'; | |
354 | ok "\x{100}" =~ /(.)/; | |
355 | iseq $1, "\x{100}", '$1 is UTF-8'; | |
356 | { 'a' =~ /./; } | |
357 | iseq $1, "\x{100}", '$1 is still UTF-8'; | |
358 | isneq $1, "\xC4\x80", '$1 is not non-UTF-8'; | |
359 | } | |
360 | ||
361 | ||
362 | { | |
363 | local $BugId = '19767'; | |
364 | local $Message = "Optimizer doesn't prematurely reject match"; | |
365 | use utf8; | |
366 | ||
367 | my $attr = 'Name-1'; | |
368 | my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; | |
369 | my $NormalWord = qr /${NormalChar}+?/; | |
370 | my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; | |
371 | ||
372 | $attr =~ /^$/; | |
373 | ok $attr =~ $PredNameHyphen; # Original test. | |
374 | ||
375 | "a" =~ m/[b]/; | |
376 | ok "0" =~ /\p{N}+\z/; # Variant. | |
377 | } | |
378 | ||
379 | ||
380 | { | |
381 | local $BugId = '20683'; | |
382 | local $Message = "(??{ }) doesn't return stale values"; | |
383 | our $p = 1; | |
384 | foreach (1, 2, 3, 4) { | |
385 | $p ++ if /(??{ $p })/ | |
386 | } | |
387 | iseq $p, 5; | |
388 | ||
389 | { | |
390 | package P; | |
391 | $a = 1; | |
392 | sub TIESCALAR {bless []} | |
393 | sub FETCH {$a ++} | |
394 | } | |
395 | tie $p, "P"; | |
396 | foreach (1, 2, 3, 4) { | |
397 | /(??{ $p })/ | |
398 | } | |
399 | iseq $p, 5; | |
400 | } | |
401 | ||
402 | ||
403 | { | |
404 | # Subject: Odd regexp behavior | |
405 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> | |
406 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 | |
407 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> | |
408 | # To: perl-unicode@perl.org | |
409 | ||
410 | local $Message = 'Markus Kuhn 2003-02-26'; | |
411 | ||
412 | my $x = "\x{2019}\nk"; | |
413 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
414 | ok $x eq "\x{2019} k"; | |
415 | ||
416 | $x = "b\nk"; | |
417 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
418 | ok $x eq "b k"; | |
419 | ||
420 | ok "\x{2019}" =~ /\S/; | |
421 | } | |
422 | ||
423 | ||
424 | { | |
425 | local $BugId = '21411'; | |
426 | local $Message = "(??{ .. }) in split doesn't corrupt its stack"; | |
427 | our $i; | |
428 | ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-'; | |
429 | no warnings 'syntax'; | |
430 | @_ = split /(?{'WOW'})/, 'abc'; | |
431 | local $" = "|"; | |
432 | iseq "@_", "a|b|c"; | |
433 | } | |
434 | ||
435 | ||
436 | { | |
437 | # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it | |
438 | # hasn't been crashing. Disable this test until it is fixed properly. | |
439 | # XXX also check what it returns rather than just doing ok(1,...) | |
440 | # split /(?{ split "" })/, "abc"; | |
441 | local $TODO = "Recursive split is still broken"; | |
442 | ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'; | |
443 | } | |
444 | ||
445 | ||
446 | { | |
e425a60b YO |
447 | local $BugId = '17757'; |
448 | $_ = "code: 'x' { '...' }\n"; study; | |
449 | my @x; push @x, $& while m/'[^\']*'/gx; | |
450 | local $" = ":"; | |
451 | iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop"; | |
452 | } | |
453 | ||
454 | ||
455 | { | |
e425a60b YO |
456 | local $BugId = '22354'; |
457 | sub func ($) { | |
458 | ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]"; | |
459 | ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m"; | |
460 | } | |
461 | func "standalone"; | |
462 | $_ = "x"; s/x/func "in subst"/e; | |
463 | $_ = "x"; s/x/func "in multiline subst"/em; | |
464 | ||
465 | # | |
466 | # Next two give 'panic: malloc'. | |
467 | # Outcommented, using two TODOs. | |
468 | # | |
469 | local $TODO = 'panic: malloc'; | |
470 | local $Message = 'Postponed regexp and propaged modifier'; | |
471 | # ok 0 for 1 .. 2; | |
472 | SKIP: { | |
473 | skip "panic: malloc", 2; | |
474 | $_ = "x"; /x(?{func "in regexp"})/; | |
475 | $_ = "x"; /x(?{func "in multiline regexp"})/m; | |
476 | } | |
477 | } | |
478 | ||
479 | ||
480 | { | |
481 | local $BugId = '19049'; | |
482 | $_ = "abcdef\n"; | |
483 | my @x = m/./g; | |
484 | iseq "abcde", $`, 'Global match sets $`'; | |
485 | } | |
486 | ||
487 | ||
488 | { | |
e425a60b YO |
489 | # [perl #23769] Unicode regex broken on simple example |
490 | # regrepeat() didn't handle UTF-8 EXACT case right. | |
491 | local $BugId = '23769'; | |
492 | my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; | |
493 | local $Message = $Mess; | |
494 | ||
495 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; | |
496 | ||
497 | ok $s =~ /\x{a0}/; | |
498 | ok $s =~ /\x{a0}+/; | |
499 | ok $s =~ /\x{a0}\x{a0}/; | |
500 | ||
501 | $Message = "$Mess (easy variant)"; | |
502 | ok "aaa\x{100}" =~ /(a+)/; | |
503 | iseq $1, "aaa"; | |
504 | ||
505 | $Message = "$Mess (easy invariant)"; | |
506 | ok "aaa\x{100} " =~ /(a+?)/; | |
507 | iseq $1, "a"; | |
508 | ||
509 | $Message = "$Mess (regrepeat variant)"; | |
510 | ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/; | |
511 | iseq $1, "\xa0"; | |
512 | ||
513 | $Message = "$Mess (regrepeat invariant)"; | |
514 | ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/; | |
515 | iseq $1, "\xa0\xa0\xa0"; | |
516 | ||
517 | $Message = "$Mess (hard variant)"; | |
518 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/; | |
519 | iseq $1, "\xa0\xa1"; | |
520 | ||
521 | $Message = "$Mess (hard invariant)"; | |
522 | ok "ababab\x{100} " =~ /((?:ab)+)/; | |
523 | iseq $1, 'ababab'; | |
524 | ||
525 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/; | |
526 | iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1"; | |
527 | ||
528 | ok "ababab\x{100} " =~ /((?:ab)+?)/; | |
529 | iseq $1, "ab"; | |
530 | ||
531 | $Message = "Don't match first byte of UTF-8 representation"; | |
532 | ok "\xc4\xc4\xc4" !~ /(\x{100}+)/; | |
533 | ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/; | |
534 | ok "\xc4\xc4\xc4" !~ /(\x{100}++)/; | |
535 | } | |
536 | ||
537 | ||
538 | { | |
e425a60b YO |
539 | # perl panic: pp_match start/end pointers |
540 | local $BugId = '25269'; | |
541 | iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, | |
542 | 'Captures can move backwards in string'; | |
543 | } | |
544 | ||
545 | ||
546 | { | |
547 | local $BugId = '27940'; # \cA not recognized in character classes | |
548 | ok "a\cAb" =~ /\cA/, '\cA in pattern'; | |
549 | ok "a\cAb" =~ /[\cA]/, '\cA in character class'; | |
550 | ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range'; | |
551 | ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range'; | |
552 | ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range'; | |
553 | ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range'; | |
554 | ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern'; | |
555 | ok "ab" !~ /a\cIb/x, '\cI in pattern'; | |
556 | } | |
557 | ||
558 | ||
559 | { | |
9d45b377 YO |
560 | # perl #28532: optional zero-width match at end of string is ignored |
561 | local $BugId = '28532'; | |
562 | ok "abc" =~ /^abc(\z)?/ && defined($1), | |
563 | 'Optional zero-width match at end of string'; | |
564 | ok "abc" =~ /^abc(\z)??/ && !defined($1), | |
565 | 'Optional zero-width match at end of string'; | |
e425a60b YO |
566 | } |
567 | ||
568 | ||
e425a60b YO |
569 | |
570 | { | |
571 | local $BugId = '36207'; | |
572 | my $utf8 = "\xe9\x{100}"; chop $utf8; | |
573 | my $latin1 = "\xe9"; | |
574 | ||
575 | ok $utf8 =~ /\xe9/i, "utf8/latin"; | |
576 | ok $utf8 =~ /$latin1/i, "utf8/latin runtime"; | |
577 | ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"; | |
578 | ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"; | |
579 | ||
580 | ok "\xe9" =~ /$utf8/i, "latin/utf8"; | |
581 | ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie"; | |
582 | ok $latin1 =~ /$utf8/i, "latin/utf8 runtime"; | |
583 | ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime"; | |
584 | } | |
585 | ||
586 | ||
587 | { | |
588 | local $BugId = '37038'; | |
589 | my $s = "abcd"; | |
590 | $s =~ /(..)(..)/g; | |
591 | $s = $1; | |
592 | $s = $2; | |
593 | iseq $2, 'cd', | |
594 | "Assigning to original string does not corrupt match vars"; | |
595 | } | |
596 | ||
597 | ||
598 | { | |
9d45b377 | 599 | local $PatchId = '26410'; |
e425a60b YO |
600 | { |
601 | package wooosh; | |
602 | sub gloople {"!"} | |
603 | } | |
604 | my $aeek = bless {} => 'wooosh'; | |
605 | eval_ok sub {$aeek -> gloople () =~ /(.)/g}, | |
606 | "//g match against return value of sub"; | |
607 | ||
608 | sub gloople {"!"} | |
609 | eval_ok sub {gloople () =~ /(.)/g}, | |
610 | "26410 didn't affect sub calls for some reason"; | |
611 | } | |
612 | ||
613 | ||
614 | { | |
615 | local $TODO = "See changes 26925-26928, which reverted change 26410"; | |
616 | { | |
617 | package lv; | |
618 | our $var = "abc"; | |
619 | sub variable : lvalue {$var} | |
620 | } | |
621 | my $o = bless [] => 'lv'; | |
622 | my $f = ""; | |
623 | my $r = eval { | |
624 | for (1 .. 2) { | |
625 | $f .= $1 if $o -> variable =~ /(.)/g; | |
626 | } | |
627 | 1; | |
628 | }; | |
629 | if ($r) { | |
630 | iseq $f, "ab", "pos() retained between calls"; | |
631 | } | |
632 | else { | |
633 | local $TODO; | |
634 | ok 0, "Code failed: $@"; | |
635 | } | |
636 | ||
637 | our $var = "abc"; | |
638 | sub variable : lvalue {$var} | |
639 | my $g = ""; | |
640 | my $s = eval { | |
641 | for (1 .. 2) { | |
642 | $g .= $1 if variable =~ /(.)/g; | |
643 | } | |
644 | 1; | |
645 | }; | |
646 | if ($s) { | |
647 | iseq $g, "ab", "pos() retained between calls"; | |
648 | } | |
649 | else { | |
650 | local $TODO; | |
651 | ok 0, "Code failed: $@"; | |
652 | } | |
653 | } | |
654 | ||
655 | ||
656 | SKIP: | |
657 | { | |
658 | local $BugId = '37836'; | |
659 | skip "In EBCDIC" if $IS_EBCDIC; | |
660 | no warnings 'utf8'; | |
661 | $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8 | |
662 | my $ret = 0; | |
663 | eval_ok sub {!($ret = s/[\0]+//g)}, | |
664 | "Ill-formed UTF-8 doesn't match NUL in class"; | |
665 | } | |
666 | ||
667 | ||
668 | { | |
669 | # chr(65535) should be allowed in regexes | |
670 | local $BugId = '38293'; | |
671 | no warnings 'utf8'; # To allow non-characters | |
672 | my ($c, $r, $s); | |
673 | ||
674 | $c = chr 0xffff; | |
675 | $c =~ s/$c//g; | |
676 | ok $c eq "", "U+FFFF, parsed as atom"; | |
677 | ||
678 | $c = chr 0xffff; | |
679 | $r = "\\$c"; | |
680 | $c =~ s/$r//g; | |
681 | ok $c eq "", "U+FFFF backslashed, parsed as atom"; | |
682 | ||
683 | $c = chr 0xffff; | |
684 | $c =~ s/[$c]//g; | |
685 | ok $c eq "", "U+FFFF, parsed in class"; | |
686 | ||
687 | $c = chr 0xffff; | |
688 | $r = "[\\$c]"; | |
689 | $c =~ s/$r//g; | |
690 | ok $c eq "", "U+FFFF backslashed, parsed in class"; | |
691 | ||
692 | $s = "A\x{ffff}B"; | |
693 | $s =~ s/\x{ffff}//i; | |
694 | ok $s eq "AB", "U+FFFF, EXACTF"; | |
695 | ||
696 | $s = "\x{ffff}A"; | |
697 | $s =~ s/\bA//; | |
698 | ok $s eq "\x{ffff}", "U+FFFF, BOUND"; | |
699 | ||
700 | $s = "\x{ffff}!"; | |
701 | $s =~ s/\B!//; | |
702 | ok $s eq "\x{ffff}", "U+FFFF, NBOUND"; | |
703 | } | |
704 | ||
705 | ||
706 | { | |
707 | local $BugId = '39583'; | |
708 | ||
709 | # The printing characters | |
710 | my @chars = ("A" .. "Z"); | |
711 | my $delim = ","; | |
712 | my $size = 32771 - 4; | |
713 | my $str = ''; | |
714 | ||
715 | # Create some random junk. Inefficient, but it works. | |
716 | for (my $i = 0; $i < $size; $ i++) { | |
717 | $str .= $chars [rand @chars]; | |
718 | } | |
719 | ||
720 | $str .= ($delim x 4); | |
721 | my $res; | |
722 | my $matched; | |
723 | ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches"; | |
724 | iseq $str, "", "Empty string"; | |
725 | ok defined $1 && length ($1) == $size, '$1 is correct size'; | |
726 | } | |
727 | ||
728 | ||
729 | { | |
730 | local $BugId = '27940'; | |
731 | ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'; | |
732 | ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'; | |
733 | ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'; | |
734 | ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'; | |
735 | ||
736 | ok "X\0A" =~ /X\c@?A/, '\c@?'; | |
737 | ok "X\0A" =~ /X\c@*A/, '\c@*'; | |
738 | ok "X\0A" =~ /X\c@(A)/, '\c@('; | |
739 | ok "X\0A" =~ /X(\c@)A/, '\c@)'; | |
740 | ok "X\0A" =~ /X\c@|ZA/, '\c@|'; | |
741 | ||
742 | ok "X\@A" =~ /X@?A/, '@?'; | |
743 | ok "X\@A" =~ /X@*A/, '@*'; | |
744 | ok "X\@A" =~ /X@(A)/, '@('; | |
745 | ok "X\@A" =~ /X(@)A/, '@)'; | |
746 | ok "X\@A" =~ /X@|ZA/, '@|'; | |
747 | ||
748 | local $" = ','; # non-whitespace and non-RE-specific | |
749 | ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus'; | |
750 | ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/'; | |
751 | ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/'; | |
752 | ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x'; | |
753 | ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x'; | |
754 | } | |
755 | ||
756 | ||
757 | { | |
9d45b377 | 758 | local $BugId = '50496'; |
e425a60b YO |
759 | my $s = 'foo bar baz'; |
760 | my (@k, @v, @fetch, $res); | |
761 | my $count = 0; | |
762 | my @names = qw ($+{A} $+{B} $+{C}); | |
763 | if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { | |
764 | while (my ($k, $v) = each (%+)) { | |
765 | $count++; | |
766 | } | |
767 | @k = sort keys (%+); | |
768 | @v = sort values (%+); | |
769 | $res = 1; | |
770 | push @fetch, | |
771 | ["$+{A}", "$1"], | |
772 | ["$+{B}", "$2"], | |
773 | ["$+{C}", "$3"], | |
774 | ; | |
775 | } | |
776 | foreach (0 .. 2) { | |
777 | if ($fetch [$_]) { | |
778 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; | |
779 | } else { | |
780 | ok 0, $names[$_]; | |
781 | } | |
782 | } | |
783 | iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/"; | |
784 | iseq $count, 3, "Got 3 keys in %+ via each"; | |
785 | iseq 0 + @k, 3, 'Got 3 keys in %+ via keys'; | |
786 | iseq "@k", "A B C", "Got expected keys"; | |
787 | iseq "@v", "bar baz foo", "Got expected values"; | |
788 | eval ' | |
789 | no warnings "uninitialized"; | |
790 | print for $+ {this_key_doesnt_exist}; | |
791 | '; | |
792 | ok !$@, 'lvalue $+ {...} should not throw an exception'; | |
793 | } | |
794 | ||
795 | ||
796 | { | |
797 | # | |
798 | # Almost the same as the block above, except that the capture is nested. | |
799 | # | |
800 | local $BugId = '50496'; | |
801 | my $s = 'foo bar baz'; | |
802 | my (@k, @v, @fetch, $res); | |
803 | my $count = 0; | |
804 | my @names = qw ($+{A} $+{B} $+{C} $+{D}); | |
805 | if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { | |
806 | while (my ($k,$v) = each(%+)) { | |
807 | $count++; | |
808 | } | |
809 | @k = sort keys (%+); | |
810 | @v = sort values (%+); | |
811 | $res = 1; | |
812 | push @fetch, | |
813 | ["$+{A}", "$2"], | |
814 | ["$+{B}", "$3"], | |
815 | ["$+{C}", "$4"], | |
816 | ["$+{D}", "$1"], | |
817 | ; | |
818 | } | |
819 | foreach (0 .. 3) { | |
820 | if ($fetch [$_]) { | |
821 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; | |
822 | } else { | |
823 | ok 0, $names [$_]; | |
824 | } | |
825 | } | |
826 | iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/"; | |
827 | iseq $count, 4, "Got 4 keys in %+ via each"; | |
828 | iseq @k, 4, 'Got 4 keys in %+ via keys'; | |
829 | iseq "@k", "A B C D", "Got expected keys"; | |
830 | iseq "@v", "bar baz foo foo bar baz", "Got expected values"; | |
831 | eval ' | |
832 | no warnings "uninitialized"; | |
833 | print for $+ {this_key_doesnt_exist}; | |
834 | '; | |
835 | ok !$@,'lvalue $+ {...} should not throw an exception'; | |
836 | } | |
837 | ||
838 | ||
839 | { | |
e425a60b YO |
840 | local $BugId = '36046'; |
841 | my $str = 'abc'; | |
842 | my $count = 0; | |
843 | my $mval = 0; | |
844 | my $pval = 0; | |
845 | while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} | |
846 | iseq $mval, 0, '@- should be empty'; | |
847 | iseq $pval, 0, '@+ should be empty'; | |
848 | iseq $count, 1, 'Should have matched once only'; | |
849 | } | |
850 | ||
851 | ||
e425a60b YO |
852 | |
853 | ||
854 | { | |
855 | local $BugId = '40684'; | |
856 | local $Message = '/m in precompiled regexp'; | |
857 | my $s = "abc\ndef"; | |
858 | my $rex = qr'^abc$'m; | |
859 | ok $s =~ m/$rex/; | |
860 | ok $s =~ m/^abc$/m; | |
861 | } | |
862 | ||
863 | ||
864 | { | |
e425a60b YO |
865 | local $BugId = '36909'; |
866 | local $Message = '(?: ... )? should not lose $^R'; | |
867 | $^R = 'Nothing'; | |
868 | { | |
869 | local $^R = "Bad"; | |
870 | ok 'x foofoo y' =~ m { | |
871 | (foo) # $^R correctly set | |
872 | (?{ "last regexp code result" }) | |
873 | }x; | |
874 | iseq $^R, 'last regexp code result'; | |
875 | } | |
876 | iseq $^R, 'Nothing'; | |
877 | ||
878 | { | |
879 | local $^R = "Bad"; | |
880 | ||
881 | ok 'x foofoo y' =~ m { | |
882 | (?:foo|bar)+ # $^R correctly set | |
883 | (?{ "last regexp code result" }) | |
884 | }x; | |
885 | iseq $^R, 'last regexp code result'; | |
886 | } | |
887 | iseq $^R, 'Nothing'; | |
888 | ||
889 | { | |
890 | local $^R = "Bad"; | |
891 | ok 'x foofoo y' =~ m { | |
892 | (foo|bar)\1+ # $^R undefined | |
893 | (?{ "last regexp code result" }) | |
894 | }x; | |
895 | iseq $^R, 'last regexp code result'; | |
896 | } | |
897 | iseq $^R, 'Nothing'; | |
898 | ||
899 | { | |
900 | local $^R = "Bad"; | |
901 | ok 'x foofoo y' =~ m { | |
902 | (foo|bar)\1 # This time without the + | |
903 | (?{"last regexp code result"}) | |
904 | }x; | |
905 | iseq $^R, 'last regexp code result'; | |
906 | } | |
907 | iseq $^R, 'Nothing'; | |
908 | } | |
909 | ||
910 | ||
911 | { | |
912 | local $BugId = '22395'; | |
913 | local $Message = 'Match is linear, not quadratic'; | |
914 | our $count; | |
915 | for my $l (10, 100, 1000) { | |
916 | $count = 0; | |
917 | ('a' x $l) =~ /(.*)(?{$count++})[bc]/; | |
918 | local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; | |
919 | iseq $count, $l + 1; | |
920 | } | |
921 | } | |
922 | ||
923 | ||
924 | { | |
925 | local $BugId = '22614'; | |
926 | local $Message = '@-/@+ should not have undefined values'; | |
927 | local $_ = 'ab'; | |
928 | our @len = (); | |
929 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; | |
930 | iseq "@len", "2 2 2"; | |
931 | } | |
932 | ||
933 | ||
934 | { | |
935 | local $BugId = '18209'; | |
936 | local $Message = '$& set on s///'; | |
937 | my $text = ' word1 word2 word3 word4 word5 word6 '; | |
938 | ||
939 | my @words = ('word1', 'word3', 'word5'); | |
940 | my $count; | |
941 | foreach my $word (@words) { | |
942 | $text =~ s/$word\s//gi; # Leave a space to seperate words | |
943 | # in the resultant str. | |
944 | # The following block is not working. | |
945 | if ($&) { | |
946 | $count ++; | |
947 | } | |
948 | # End bad block | |
949 | } | |
950 | iseq $count, 3; | |
951 | iseq $text, ' word2 word4 word6 '; | |
952 | } | |
953 | ||
954 | ||
955 | { | |
956 | # RT#6893 | |
957 | local $BugId = '6893'; | |
958 | local $_ = qq (A\nB\nC\n); | |
959 | my @res; | |
960 | while (m#(\G|\n)([^\n]*)\n#gsx) { | |
961 | push @res, "$2"; | |
962 | last if @res > 3; | |
963 | } | |
964 | iseq "@res", "A B C", "/g pattern shouldn't infinite loop"; | |
965 | } | |
966 | ||
967 | ||
e425a60b YO |
968 | |
969 | { | |
970 | local $BugId = '41010'; | |
971 | local $Message = 'No optimizer bug'; | |
972 | my @tails = ('', '(?(1))', '(|)', '()?'); | |
973 | my @quants = ('*','+'); | |
974 | my $doit = sub { | |
975 | my $pats = shift; | |
976 | for (@_) { | |
977 | for my $pat (@$pats) { | |
978 | for my $quant (@quants) { | |
979 | for my $tail (@tails) { | |
980 | my $re = "($pat$quant\$)$tail"; | |
981 | ok /$re/ && $1 eq $_, "'$_' =~ /$re/"; | |
982 | ok /$re/m && $1 eq $_, "'$_' =~ /$re/m"; | |
983 | } | |
984 | } | |
985 | } | |
986 | } | |
987 | }; | |
988 | ||
989 | my @dpats = ('\d', | |
990 | '[1234567890]', | |
991 | '(1|[23]|4|[56]|[78]|[90])', | |
992 | '(?:1|[23]|4|[56]|[78]|[90])', | |
993 | '(1|2|3|4|5|6|7|8|9|0)', | |
994 | '(?:1|2|3|4|5|6|7|8|9|0)'); | |
995 | my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); | |
996 | my @sstrs = (' '); | |
997 | my @dstrs = ('12345'); | |
998 | $doit -> (\@spats, @sstrs); | |
999 | $doit -> (\@dpats, @dstrs); | |
1000 | } | |
1001 | ||
1002 | ||
e425a60b YO |
1003 | |
1004 | { | |
1005 | local $BugId = '45605'; | |
1006 | # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string | |
1007 | ||
1008 | my $utf_8 = "\xd6schel"; | |
1009 | utf8::upgrade ($utf_8); | |
1010 | $utf_8 =~ m {(\xd6|Ö)schel}; | |
1011 | iseq $1, "\xd6", "Upgrade error"; | |
1012 | } | |
1013 | ||
1014 | { | |
e425a60b YO |
1015 | # Regardless of utf8ness any character matches itself when |
1016 | # doing a case insensitive match. See also [perl #36207] | |
1017 | local $BugId = '36207'; | |
1018 | for my $o (0 .. 255) { | |
1019 | my @ch = (chr ($o), chr ($o)); | |
1020 | utf8::upgrade ($ch [1]); | |
1021 | for my $u_str (0, 1) { | |
1022 | for my $u_pat (0, 1) { | |
1023 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i, | |
1024 | "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat"; | |
1025 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i, | |
1026 | "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat"; | |
1027 | } | |
1028 | } | |
1029 | } | |
1030 | } | |
1031 | ||
1032 | ||
1033 | { | |
e425a60b YO |
1034 | local $BugId = '49190'; |
1035 | local $Message = '$REGMARK in replacement'; | |
1036 | our $REGMARK; | |
1037 | my $_ = "A"; | |
1038 | ok s/(*:B)A/$REGMARK/; | |
1039 | iseq $_, "B"; | |
1040 | $_ = "CCCCBAA"; | |
1041 | ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g; | |
1042 | iseq $_, "ZYX"; | |
1043 | } | |
1044 | ||
1045 | ||
1046 | { | |
e425a60b YO |
1047 | local $BugId = '52658'; |
1048 | local $Message = 'Substitution evaluation in list context'; | |
1049 | my $reg = '../xxx/'; | |
1050 | my @te = ($reg =~ m{^(/?(?:\.\./)*)}, | |
1051 | $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); | |
1052 | iseq $reg, '../bbb/'; | |
1053 | iseq $te [0], '../'; | |
1054 | } | |
1055 | ||
e425a60b YO |
1056 | { |
1057 | local $BugId = '60034'; | |
1058 | my $a = "xyzt" x 8192; | |
1059 | ok $a =~ /\A(?>[a-z])*\z/, | |
1060 | '(?>) does not cause wrongness on long string'; | |
1061 | my $b = $a . chr 256; | |
1062 | chop $b; | |
1063 | { | |
1064 | iseq $a, $b; | |
1065 | } | |
1066 | ok $b =~ /\A(?>[a-z])*\z/, | |
1067 | '(?>) does not cause wrongness on long string with UTF-8'; | |
1068 | } | |
1069 | ||
1070 | ||
1071 | # | |
1072 | # Keep the following tests last -- they may crash perl | |
1073 | # | |
1074 | print "# Tests that follow may crash perl\n"; | |
1075 | { | |
1076 | local $BugId = '19049/38869'; | |
1077 | local $Message = 'Pattern in a loop, failure should not ' . | |
1078 | 'affect previous success'; | |
1079 | my @list = ( | |
1080 | 'ab cdef', # Matches regex | |
1081 | ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it | |
1082 | ); | |
1083 | my $y; | |
1084 | my $x; | |
1085 | foreach (@list) { | |
1086 | m/ab(.+)cd/i; # The ignore-case seems to be important | |
1087 | $y = $1; # Use $1, which might not be from the last match! | |
1088 | $x = substr ($list [0], $- [0], $+ [0] - $- [0]); | |
1089 | } | |
1090 | iseq $y, ' '; | |
1091 | iseq $x, 'ab cd'; | |
1092 | } | |
1093 | ||
1094 | ||
1095 | { | |
1096 | local $BugId = '24274'; | |
1097 | ||
1098 | ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker"); | |
1099 | ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, | |
1100 | "Regexp /^(??{'(.)'x 100})/ crashes older perls"); | |
1101 | } | |
1102 | ||
1103 | ||
1104 | { | |
e425a60b YO |
1105 | # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache |
1106 | local $BugId = '45337'; | |
1107 | local ${^UTF8CACHE} = -1; | |
1108 | local $Message = "Shouldn't panic"; | |
1109 | my $s = "[a]a{2}"; | |
1110 | utf8::upgrade $s; | |
1111 | ok "aaa" =~ /$s/; | |
1112 | } | |
1113 | { | |
1114 | local $BugId = '57042'; | |
1115 | local $Message = "Check if tree logic breaks \$^R"; | |
1116 | my $cond_re = qr/\s* | |
1117 | \s* (?: | |
1118 | \( \s* A (?{1}) | |
1119 | | \( \s* B (?{2}) | |
1120 | ) | |
1121 | /x; | |
1122 | my @res; | |
1123 | for my $line ("(A)","(B)") { | |
1124 | if ($line =~ m/$cond_re/) { | |
1125 | push @res, $^R ? "#$^R" : "UNDEF"; | |
1126 | } | |
1127 | } | |
1128 | iseq "@res","#1 #2"; | |
1129 | } | |
1130 | { | |
1131 | no warnings 'closure'; | |
1132 | my $re = qr/A(??{"1"})/; | |
1133 | ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; | |
1134 | ok $1 eq "A1"; | |
1135 | ok $2 eq "B"; | |
1136 | } | |
1137 | ||
1138 | ||
e425a60b YO |
1139 | |
1140 | # This only works under -DEBUGGING because it relies on an assert(). | |
1141 | { | |
1142 | local $BugId = '60508'; | |
1143 | local $Message = "Check capture offset re-entrancy of utf8 code."; | |
1144 | ||
1145 | sub fswash { $_[0] =~ s/([>X])//g; } | |
1146 | ||
1147 | my $k1 = "." x 4 . ">>"; | |
1148 | fswash($k1); | |
1149 | ||
1150 | my $k2 = "\x{f1}\x{2022}"; | |
1151 | $k2 =~ s/([\360-\362])/>/g; | |
1152 | fswash($k2); | |
1153 | ||
1154 | iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks"); | |
1155 | } | |
1156 | ||
1157 | ||
1158 | { | |
1159 | local $BugId = 65372; # minimal CURLYM limited to 32767 matches | |
1160 | my @pat = ( | |
1161 | qr{a(x|y)*b}, # CURLYM | |
1162 | qr{a(x|y)*?b}, # .. with minmod | |
1163 | qr{a([wx]|[yz])*b}, # .. and without tries | |
1164 | qr{a([wx]|[yz])*?b}, | |
1165 | ); | |
1166 | my $len = 32768; | |
1167 | my $s = join '', 'a', 'x' x $len, 'b'; | |
1168 | for my $pat (@pat) { | |
1169 | ok($s =~ $pat, $pat); | |
1170 | } | |
1171 | } | |
b4da7bae B |
1172 | |
1173 | { | |
1174 | local $TODO = "[perl #38133]"; | |
1175 | ||
1176 | "A" =~ /(((?:A))?)+/; | |
1177 | my $first = $2; | |
1178 | ||
1179 | "A" =~ /(((A))?)+/; | |
1180 | my $second = $2; | |
1181 | ||
1182 | iseq($first, $second); | |
1183 | } | |
e425a60b YO |
1184 | } # End of sub run_tests |
1185 | ||
1186 | 1; |