Commit | Line | Data |
---|---|---|
e425a60b YO |
1 | #!./perl |
2 | # | |
3 | # This is a home for regular expression tests that don't fit into | |
4 | # the format supported by re/regexp.t. If you want to add a test | |
5 | # that does fit that format, add it to re/re_tests, not here. | |
6 | ||
7 | use strict; | |
8 | use warnings; | |
9 | use 5.010; | |
10 | ||
11 | ||
12 | sub run_tests; | |
13 | ||
14 | $| = 1; | |
15 | ||
16 | my $EXPECTED_TESTS = 4066; # Update this when adding/deleting tests. | |
17 | ||
18 | BEGIN { | |
19 | chdir 't' if -d 't'; | |
20 | @INC = '../lib'; | |
21 | } | |
22 | our $TODO; | |
23 | our $Message = "Noname test"; | |
24 | our $Error; | |
25 | our $DiePattern; | |
26 | our $WarnPattern; | |
27 | our $BugId; | |
28 | our $PatchId; | |
29 | our $running_as_thread; | |
30 | ||
31 | my $ordA = ord ('A'); # This defines ASCII/UTF-8 vs EBCDIC/UTF-EBCDIC | |
32 | # This defined the platform. | |
33 | my $IS_ASCII = $ordA == 65; | |
34 | my $IS_EBCDIC = $ordA == 193; | |
35 | ||
36 | use vars '%Config'; | |
37 | eval 'use Config'; # Defaults assumed if this fails | |
38 | ||
39 | my $test = 0; | |
40 | ||
41 | print "1..$EXPECTED_TESTS\n"; | |
42 | ||
43 | run_tests unless caller (); | |
44 | ||
45 | END { | |
46 | } | |
47 | ||
48 | sub pretty { | |
49 | my ($mess) = @_; | |
50 | $mess =~ s/\n/\\n/g; | |
51 | $mess =~ s/\r/\\r/g; | |
52 | $mess =~ s/\t/\\t/g; | |
53 | $mess =~ s/([\00-\37\177])/sprintf '\%03o', ord $1/eg; | |
54 | $mess =~ s/#/\\#/g; | |
55 | $mess; | |
56 | } | |
57 | ||
58 | sub safe_globals { | |
59 | defined($_) and s/#/\\#/g for $BugId, $PatchId, $TODO; | |
60 | } | |
61 | ||
62 | sub _ok { | |
63 | my ($ok, $mess, $error) = @_; | |
64 | safe_globals(); | |
65 | $mess = pretty ($mess // $Message); | |
66 | $mess .= "; Bug $BugId" if defined $BugId; | |
67 | $mess .= "; Patch $PatchId" if defined $PatchId; | |
68 | $mess .= " # TODO $TODO" if defined $TODO; | |
69 | ||
70 | my $line_nr = (caller(1)) [2]; | |
71 | ||
72 | printf "%sok %d - %s\n", | |
73 | ($ok ? "" : "not "), | |
74 | ++ $test, | |
75 | "$mess\tLine $line_nr"; | |
76 | ||
77 | unless ($ok) { | |
78 | print "# Failed test at line $line_nr\n" unless defined $TODO; | |
79 | if ($error //= $Error) { | |
80 | no warnings 'utf8'; | |
81 | chomp $error; | |
82 | $error = join "\n#", map {pretty $_} split /\n\h*#/ => $error; | |
83 | $error = "# $error" unless $error =~ /^\h*#/; | |
84 | print $error, "\n"; | |
85 | } | |
86 | } | |
87 | ||
88 | return $ok; | |
89 | } | |
90 | ||
91 | # Force scalar context on the pattern match | |
92 | sub ok ($;$$) {_ok $_ [0], $_ [1], $_ [2]} | |
93 | sub nok ($;$$) {_ok !$_ [0], "Failed: " . ($_ [1] // $Message), $_ [2]} | |
94 | ||
95 | ||
96 | sub skip { | |
97 | my $why = shift; | |
98 | safe_globals(); | |
99 | $why =~ s/\n.*//s; | |
100 | $why .= "; Bug $BugId" if defined $BugId; | |
101 | # seems like the new harness code doesnt like todo and skip to be mixed. | |
102 | # which seems like a bug in the harness to me. -- dmq | |
103 | #$why .= " # TODO $TODO" if defined $TODO; | |
104 | ||
105 | my $n = shift // 1; | |
106 | my $line_nr = (caller(0)) [2]; | |
107 | for (1 .. $n) { | |
108 | ++ $test; | |
109 | #print "not " if defined $TODO; | |
110 | print "ok $test # skip $why\tLine $line_nr\n"; | |
111 | } | |
112 | no warnings "exiting"; | |
113 | last SKIP; | |
114 | } | |
115 | ||
116 | sub iseq ($$;$) { | |
117 | my ($got, $expect, $name) = @_; | |
118 | ||
119 | $_ = defined ($_) ? "'$_'" : "undef" for $got, $expect; | |
120 | ||
121 | my $ok = $got eq $expect; | |
122 | my $error = "# expected: $expect\n" . | |
123 | "# result: $got"; | |
124 | ||
125 | _ok $ok, $name, $error; | |
126 | } | |
127 | ||
128 | sub isneq ($$;$) { | |
129 | my ($got, $expect, $name) = @_; | |
130 | my $todo = $TODO ? " # TODO $TODO" : ''; | |
131 | ||
132 | $_ = defined ($_) ? "'$_'" : "undef" for $got, $expect; | |
133 | ||
134 | my $ok = $got ne $expect; | |
135 | my $error = "# results are equal ($got)"; | |
136 | ||
137 | _ok $ok, $name, $error; | |
138 | } | |
139 | ||
140 | ||
141 | sub eval_ok ($;$) { | |
142 | my ($code, $name) = @_; | |
143 | local $@; | |
144 | if (ref $code) { | |
145 | _ok eval {&$code} && !$@, $name; | |
146 | } | |
147 | else { | |
148 | _ok eval ($code) && !$@, $name; | |
149 | } | |
150 | } | |
151 | ||
152 | sub must_die { | |
153 | my ($code, $pattern, $name) = @_; | |
154 | $pattern //= $DiePattern; | |
155 | undef $@; | |
156 | ref $code ? &$code : eval $code; | |
157 | my $r = $@ && $@ =~ /$pattern/; | |
158 | _ok $r, $name // $Message // "\$\@ =~ /$pattern/"; | |
159 | } | |
160 | ||
161 | sub must_warn { | |
162 | my ($code, $pattern, $name) = @_; | |
163 | $pattern //= $WarnPattern; | |
164 | my $w; | |
165 | local $SIG {__WARN__} = sub {$w .= join "" => @_}; | |
166 | use warnings 'all'; | |
167 | ref $code ? &$code : eval $code; | |
168 | my $r = $w && $w =~ /$pattern/; | |
169 | $w //= "UNDEF"; | |
170 | _ok $r, $name // $Message // "Got warning /$pattern/", | |
171 | "# expected: /$pattern/\n" . | |
172 | "# result: $w"; | |
173 | } | |
174 | ||
175 | sub may_not_warn { | |
176 | my ($code, $name) = @_; | |
177 | my $w; | |
178 | local $SIG {__WARN__} = sub {$w .= join "" => @_}; | |
179 | use warnings 'all'; | |
180 | ref $code ? &$code : eval $code; | |
181 | _ok !$w, $name // ($Message ? "$Message (did not warn)" | |
182 | : "Did not warn"), | |
183 | "Got warning '$w'"; | |
184 | } | |
185 | ||
186 | ||
187 | # | |
188 | # Tests start here. | |
189 | # | |
190 | sub run_tests { | |
191 | ||
192 | { | |
193 | ||
194 | my $x = "abc\ndef\n"; | |
195 | ||
196 | ok $x =~ /^abc/, qq ["$x" =~ /^abc/]; | |
197 | ok $x !~ /^def/, qq ["$x" !~ /^def/]; | |
198 | ||
199 | # used to be a test for $* | |
200 | ok $x =~ /^def/m, qq ["$x" =~ /^def/m]; | |
201 | ||
202 | nok $x =~ /^xxx/, qq ["$x" =~ /^xxx/]; | |
203 | nok $x !~ /^abc/, qq ["$x" !~ /^abc/]; | |
204 | ||
205 | ok $x =~ /def/, qq ["$x" =~ /def/]; | |
206 | nok $x !~ /def/, qq ["$x" !~ /def/]; | |
207 | ||
208 | ok $x !~ /.def/, qq ["$x" !~ /.def/]; | |
209 | nok $x =~ /.def/, qq ["$x" =~ /.def/]; | |
210 | ||
211 | ok $x =~ /\ndef/, qq ["$x" =~ /\ndef/]; | |
212 | nok $x !~ /\ndef/, qq ["$x" !~ /\ndef/]; | |
213 | } | |
214 | ||
215 | { | |
216 | $_ = '123'; | |
217 | ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; | |
218 | } | |
219 | ||
220 | { | |
221 | $_ = 'aaabbbccc'; | |
222 | ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', | |
223 | qq [\$_ = '$_'; /(a*b*)(c*)/]; | |
224 | ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; | |
225 | nok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; | |
226 | ||
227 | $_ = 'aaabccc'; | |
228 | ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; | |
229 | ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; | |
230 | ||
231 | $_ = 'aaaccc'; | |
232 | ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; | |
233 | nok /a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]; | |
234 | ||
235 | $_ = 'abcdef'; | |
236 | ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; | |
237 | ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; | |
238 | ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; | |
239 | ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; | |
240 | } | |
241 | ||
242 | { | |
243 | # used to be a test for $* | |
244 | ok "ab\ncd\n" =~ /^cd/m, qq ["ab\ncd\n" =~ /^cd/m]; | |
245 | } | |
246 | ||
247 | { | |
248 | our %XXX = map {($_ => $_)} 123, 234, 345; | |
249 | ||
250 | our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); | |
251 | while ($_ = shift(@XXX)) { | |
252 | my $f = index ($_, 'not') >= 0 ? \&nok : \&ok; | |
253 | my $r = ?(.*)?; | |
254 | &$f ($r, "?(.*)?"); | |
255 | /not/ && reset; | |
256 | if (/not ok 2/) { | |
257 | if ($^O eq 'VMS') { | |
258 | $_ = shift(@XXX); | |
259 | } | |
260 | else { | |
261 | reset 'X'; | |
262 | } | |
263 | } | |
264 | } | |
265 | ||
266 | SKIP: { | |
267 | if ($^O eq 'VMS') { | |
268 | skip "Reset 'X'", 1; | |
269 | } | |
270 | ok !keys %XXX, "%XXX is empty"; | |
271 | } | |
272 | ||
273 | } | |
274 | ||
275 | { | |
276 | local $Message = "Test empty pattern"; | |
277 | my $xyz = 'xyz'; | |
278 | my $cde = 'cde'; | |
279 | ||
280 | $cde =~ /[^ab]*/; | |
281 | $xyz =~ //; | |
282 | iseq $&, $xyz; | |
283 | ||
284 | my $foo = '[^ab]*'; | |
285 | $cde =~ /$foo/; | |
286 | $xyz =~ //; | |
287 | iseq $&, $xyz; | |
288 | ||
289 | $cde =~ /$foo/; | |
290 | my $null; | |
291 | no warnings 'uninitialized'; | |
292 | $xyz =~ /$null/; | |
293 | iseq $&, $xyz; | |
294 | ||
295 | $null = ""; | |
296 | $xyz =~ /$null/; | |
297 | iseq $&, $xyz; | |
298 | } | |
299 | ||
300 | { | |
301 | local $Message = q !Check $`, $&, $'!; | |
302 | $_ = 'abcdefghi'; | |
303 | /def/; # optimized up to cmd | |
304 | iseq "$`:$&:$'", 'abc:def:ghi'; | |
305 | ||
306 | no warnings 'void'; | |
307 | /cde/ + 0; # optimized only to spat | |
308 | iseq "$`:$&:$'", 'ab:cde:fghi'; | |
309 | ||
310 | /[d][e][f]/; # not optimized | |
311 | iseq "$`:$&:$'", 'abc:def:ghi'; | |
312 | } | |
313 | ||
314 | { | |
315 | $_ = 'now is the {time for all} good men to come to.'; | |
316 | / {([^}]*)}/; | |
317 | iseq $1, 'time for all', "Match braces"; | |
318 | } | |
319 | ||
320 | { | |
321 | local $Message = "{N,M} quantifier"; | |
322 | $_ = 'xxx {3,4} yyy zzz'; | |
323 | ok /( {3,4})/; | |
324 | iseq $1, ' '; | |
325 | ok !/( {4,})/; | |
326 | ok /( {2,3}.)/; | |
327 | iseq $1, ' y'; | |
328 | ok /(y{2,3}.)/; | |
329 | iseq $1, 'yyy '; | |
330 | ok !/x {3,4}/; | |
331 | ok !/^xxx {3,4}/; | |
332 | } | |
333 | ||
334 | { | |
335 | local $Message = "Test /g"; | |
336 | local $" = ":"; | |
337 | $_ = "now is the time for all good men to come to."; | |
338 | my @words = /(\w+)/g; | |
339 | my $exp = "now:is:the:time:for:all:good:men:to:come:to"; | |
340 | ||
341 | iseq "@words", $exp; | |
342 | ||
343 | @words = (); | |
344 | while (/\w+/g) { | |
345 | push (@words, $&); | |
346 | } | |
347 | iseq "@words", $exp; | |
348 | ||
349 | @words = (); | |
350 | pos = 0; | |
351 | while (/to/g) { | |
352 | push(@words, $&); | |
353 | } | |
354 | iseq "@words", "to:to"; | |
355 | ||
356 | pos $_ = 0; | |
357 | @words = /to/g; | |
358 | iseq "@words", "to:to"; | |
359 | } | |
360 | ||
361 | { | |
362 | $_ = "abcdefghi"; | |
363 | ||
364 | my $pat1 = 'def'; | |
365 | my $pat2 = '^def'; | |
366 | my $pat3 = '.def.'; | |
367 | my $pat4 = 'abc'; | |
368 | my $pat5 = '^abc'; | |
369 | my $pat6 = 'abc$'; | |
370 | my $pat7 = 'ghi'; | |
371 | my $pat8 = '\w*ghi'; | |
372 | my $pat9 = 'ghi$'; | |
373 | ||
374 | my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = | |
375 | my $t6 = my $t7 = my $t8 = my $t9 = 0; | |
376 | ||
377 | for my $iter (1 .. 5) { | |
378 | $t1++ if /$pat1/o; | |
379 | $t2++ if /$pat2/o; | |
380 | $t3++ if /$pat3/o; | |
381 | $t4++ if /$pat4/o; | |
382 | $t5++ if /$pat5/o; | |
383 | $t6++ if /$pat6/o; | |
384 | $t7++ if /$pat7/o; | |
385 | $t8++ if /$pat8/o; | |
386 | $t9++ if /$pat9/o; | |
387 | } | |
388 | my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; | |
389 | iseq $x, '505550555', "Test /o"; | |
390 | } | |
391 | ||
392 | ||
393 | SKIP: { | |
394 | my $xyz = 'xyz'; | |
395 | ok "abc" =~ /^abc$|$xyz/, "| after \$"; | |
396 | ||
397 | # perl 4.009 says "unmatched ()" | |
398 | local $Message = '$ inside ()'; | |
399 | ||
400 | my $result; | |
401 | eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; | |
402 | iseq $@, "" or skip "eval failed", 1; | |
403 | iseq $result, "abc:bc"; | |
404 | } | |
405 | ||
406 | ||
407 | { | |
408 | local $Message = "Scalar /g"; | |
409 | $_ = "abcfooabcbar"; | |
410 | ||
411 | ok /abc/g && $` eq ""; | |
412 | ok /abc/g && $` eq "abcfoo"; | |
413 | ok !/abc/g; | |
414 | ||
415 | local $Message = "Scalar /gi"; | |
416 | pos = 0; | |
417 | ok /ABC/gi && $` eq ""; | |
418 | ok /ABC/gi && $` eq "abcfoo"; | |
419 | ok !/ABC/gi; | |
420 | ||
421 | local $Message = "Scalar /g"; | |
422 | pos = 0; | |
423 | ok /abc/g && $' eq "fooabcbar"; | |
424 | ok /abc/g && $' eq "bar"; | |
425 | ||
426 | $_ .= ''; | |
427 | my @x = /abc/g; | |
428 | iseq @x, 2, "/g reset after assignment"; | |
429 | } | |
430 | ||
431 | { | |
432 | local $Message = '/g, \G and pos'; | |
433 | $_ = "abdc"; | |
434 | pos $_ = 2; | |
435 | /\Gc/gc; | |
436 | iseq pos $_, 2; | |
437 | /\Gc/g; | |
438 | ok !defined pos $_; | |
439 | } | |
440 | ||
441 | { | |
442 | local $Message = '(?{ })'; | |
443 | our $out = 1; | |
444 | 'abc' =~ m'a(?{ $out = 2 })b'; | |
445 | iseq $out, 2; | |
446 | ||
447 | $out = 1; | |
448 | 'abc' =~ m'a(?{ $out = 3 })c'; | |
449 | iseq $out, 1; | |
450 | } | |
451 | ||
452 | ||
453 | { | |
454 | $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; | |
455 | my @out = /(?<!foo)bar./g; | |
456 | iseq "@out", 'bar2 barf', "Negative lookbehind"; | |
457 | } | |
458 | ||
459 | { | |
460 | local $Message = "REG_INFTY tests"; | |
461 | # Tests which depend on REG_INFTY | |
462 | $::reg_infty = $Config {reg_infty} // 32767; | |
463 | $::reg_infty_m = $::reg_infty - 1; | |
464 | $::reg_infty_p = $::reg_infty + 1; | |
465 | $::reg_infty_m = $::reg_infty_m; # Surpress warning. | |
466 | ||
467 | # As well as failing if the pattern matches do unexpected things, the | |
468 | # next three tests will fail if you should have picked up a lower-than- | |
469 | # default value for $reg_infty from Config.pm, but have not. | |
470 | ||
471 | eval_ok q (('aaa' =~ /(a{1,$::reg_infty_m})/)[0] eq 'aaa'); | |
472 | eval_ok q (('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/); | |
473 | eval_ok q (('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/); | |
474 | eval "'aaa' =~ /a{1,$::reg_infty}/"; | |
475 | ok $@ =~ /^\QQuantifier in {,} bigger than/; | |
476 | eval "'aaa' =~ /a{1,$::reg_infty_p}/"; | |
477 | ok $@ =~ /^\QQuantifier in {,} bigger than/; | |
478 | } | |
479 | ||
480 | { | |
481 | # Poke a couple more parse failures | |
482 | my $context = 'x' x 256; | |
483 | eval qq("${context}y" =~ /(?<=$context)y/); | |
484 | ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; | |
485 | } | |
486 | ||
487 | { | |
488 | # Long Monsters | |
489 | local $Message = "Long monster"; | |
490 | for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory | |
491 | my $a = 'a' x $l; | |
492 | local $Error = "length = $l"; | |
493 | ok "ba$a=" =~ /a$a=/; | |
494 | nok "b$a=" =~ /a$a=/; | |
495 | ok "b$a=" =~ /ba+=/; | |
496 | ||
497 | ok "ba$a=" =~ /b(?:a|b)+=/; | |
498 | } | |
499 | } | |
500 | ||
501 | ||
502 | { | |
503 | # 20000 nodes, each taking 3 words per string, and 1 per branch | |
504 | my $long_constant_len = join '|', 12120 .. 32645; | |
505 | my $long_var_len = join '|', 8120 .. 28645; | |
506 | my %ans = ( 'ax13876y25677lbc' => 1, | |
507 | 'ax13876y25677mcb' => 0, # not b. | |
508 | 'ax13876y35677nbc' => 0, # Num too big | |
509 | 'ax13876y25677y21378obc' => 1, | |
510 | 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] | |
511 | 'ax13876y25677y21378y21378kbc' => 1, | |
512 | 'ax13876y25677y21378y21378kcb' => 0, # Not b. | |
513 | 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs | |
514 | ); | |
515 | ||
516 | local $Message = "20000 nodes"; | |
517 | for (keys %ans) { | |
518 | local $Error = "const-len '$_'"; | |
519 | ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o); | |
520 | ||
521 | local $Error = "var-len '$_'"; | |
522 | ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o); | |
523 | } | |
524 | } | |
525 | ||
526 | { | |
527 | local $Message = "Complicated backtracking"; | |
528 | $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; | |
529 | my $expect = "(bla()) ((l)u((e))) (l(e)e)"; | |
530 | ||
531 | use vars '$c'; | |
532 | sub matchit { | |
533 | m/ | |
534 | ( | |
535 | \( | |
536 | (?{ $c = 1 }) # Initialize | |
537 | (?: | |
538 | (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop | |
539 | (?! | |
540 | ) # Fail: will unwind one iteration back | |
541 | ) | |
542 | (?: | |
543 | [^()]+ # Match a big chunk | |
544 | (?= | |
545 | [()] | |
546 | ) # Do not try to match subchunks | |
547 | | | |
548 | \( | |
549 | (?{ ++$c }) | |
550 | | | |
551 | \) | |
552 | (?{ --$c }) | |
553 | ) | |
554 | )+ # This may not match with different subblocks | |
555 | ) | |
556 | (?(?{ $c != 0 }) | |
557 | (?! | |
558 | ) # Fail | |
559 | ) # Otherwise the chunk 1 may succeed with $c>0 | |
560 | /xg; | |
561 | } | |
562 | ||
563 | my @ans = (); | |
564 | my $res; | |
565 | push @ans, $res while $res = matchit; | |
566 | iseq "@ans", "1 1 1"; | |
567 | ||
568 | @ans = matchit; | |
569 | iseq "@ans", $expect; | |
570 | ||
571 | local $Message = "Recursion with (??{ })"; | |
572 | our $matched; | |
573 | $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; | |
574 | ||
575 | @ans = my @ans1 = (); | |
576 | push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; | |
577 | ||
578 | iseq "@ans", "1 1 1"; | |
579 | iseq "@ans1", $expect; | |
580 | ||
581 | @ans = m/$matched/g; | |
582 | iseq "@ans", $expect; | |
583 | ||
584 | } | |
585 | ||
586 | { | |
587 | ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; | |
588 | } | |
589 | ||
590 | { | |
591 | my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad | |
592 | iseq "@ans", 'a/ b', "Stack may be bad"; | |
593 | } | |
594 | ||
595 | { | |
596 | local $Message = "Eval-group not allowed at runtime"; | |
597 | my $code = '{$blah = 45}'; | |
598 | our $blah = 12; | |
599 | eval { /(?$code)/ }; | |
600 | ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12; | |
601 | ||
602 | for $code ('{$blah = 45}','=xx') { | |
603 | $blah = 12; | |
604 | my $res = eval { "xx" =~ /(?$code)/o }; | |
605 | no warnings 'uninitialized'; | |
606 | local $Error = "'$@', '$res', '$blah'"; | |
607 | if ($code eq '=xx') { | |
608 | ok !$@ && $res; | |
609 | } | |
610 | else { | |
611 | ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12; | |
612 | } | |
613 | } | |
614 | ||
615 | $code = '{$blah = 45}'; | |
616 | $blah = 12; | |
617 | eval "/(?$code)/"; | |
618 | iseq $blah, 45; | |
619 | ||
620 | $blah = 12; | |
621 | /(?{$blah = 45})/; | |
622 | iseq $blah, 45; | |
623 | } | |
624 | ||
625 | { | |
626 | local $Message = "Pos checks"; | |
627 | my $x = 'banana'; | |
628 | $x =~ /.a/g; | |
629 | iseq pos ($x), 2; | |
630 | ||
631 | $x =~ /.z/gc; | |
632 | iseq pos ($x), 2; | |
633 | ||
634 | sub f { | |
635 | my $p = $_[0]; | |
636 | return $p; | |
637 | } | |
638 | ||
639 | $x =~ /.a/g; | |
640 | iseq f (pos ($x)), 4; | |
641 | } | |
642 | ||
643 | { | |
644 | local $Message = 'Checking $^R'; | |
645 | our $x = $^R = 67; | |
646 | 'foot' =~ /foo(?{$x = 12; 75})[t]/; | |
647 | iseq $^R, 75; | |
648 | ||
649 | $x = $^R = 67; | |
650 | 'foot' =~ /foo(?{$x = 12; 75})[xy]/; | |
651 | ok $^R eq '67' && $x eq '12'; | |
652 | ||
653 | $x = $^R = 67; | |
654 | 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; | |
655 | ok $^R eq '79' && $x eq '12'; | |
656 | } | |
657 | ||
658 | { | |
659 | iseq qr/\b\v$/i, '(?i-xsm:\b\v$)', 'qr/\b\v$/i'; | |
660 | iseq qr/\b\v$/s, '(?s-xim:\b\v$)', 'qr/\b\v$/s'; | |
661 | iseq qr/\b\v$/m, '(?m-xis:\b\v$)', 'qr/\b\v$/m'; | |
662 | iseq qr/\b\v$/x, '(?x-ism:\b\v$)', 'qr/\b\v$/x'; | |
663 | iseq qr/\b\v$/xism, '(?msix:\b\v$)', 'qr/\b\v$/xism'; | |
664 | iseq qr/\b\v$/, '(?-xism:\b\v$)', 'qr/\b\v$/'; | |
665 | } | |
666 | ||
667 | ||
668 | { | |
669 | local $Message = "Look around"; | |
670 | $_ = 'xabcx'; | |
671 | SKIP: | |
672 | foreach my $ans ('', 'c') { | |
673 | ok /(?<=(?=a)..)((?=c)|.)/g or skip "Match failed", 1; | |
674 | iseq $1, $ans; | |
675 | } | |
676 | } | |
677 | ||
678 | { | |
679 | local $Message = "Empty clause"; | |
680 | $_ = 'a'; | |
681 | foreach my $ans ('', 'a', '') { | |
682 | ok /^|a|$/g or skip "Match failed", 1; | |
683 | iseq $&, $ans; | |
684 | } | |
685 | } | |
686 | ||
687 | { | |
688 | local $Message = "Prefixify"; | |
689 | sub prefixify { | |
690 | SKIP: { | |
691 | my ($v, $a, $b, $res) = @_; | |
692 | ok $v =~ s/\Q$a\E/$b/ or skip "Match failed", 1; | |
693 | iseq $v, $res; | |
694 | } | |
695 | } | |
696 | ||
697 | prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); | |
698 | prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); | |
699 | } | |
700 | ||
701 | { | |
702 | $_ = 'var="foo"'; | |
703 | /(\")/; | |
704 | ok $1 && /$1/, "Capture a quote"; | |
705 | } | |
706 | ||
707 | { | |
708 | local $Message = "Call code from qr //"; | |
709 | $a = qr/(?{++$b})/; | |
710 | $b = 7; | |
711 | ok /$a$a/ && $b eq '9'; | |
712 | ||
713 | $c="$a"; | |
714 | ok /$a$a/ && $b eq '11'; | |
715 | ||
716 | undef $@; | |
717 | eval {/$c/}; | |
718 | ok $@ && $@ =~ /not allowed at runtime/; | |
719 | ||
720 | use re "eval"; | |
721 | /$a$c$a/; | |
722 | iseq $b, '14'; | |
723 | ||
724 | our $lex_a = 43; | |
725 | our $lex_b = 17; | |
726 | our $lex_c = 27; | |
727 | my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); | |
728 | ||
729 | iseq $lex_res, 1; | |
730 | iseq $lex_a, 44; | |
731 | iseq $lex_c, 43; | |
732 | ||
733 | no re "eval"; | |
734 | undef $@; | |
735 | my $match = eval { /$a$c$a/ }; | |
736 | ok $@ && $@ =~ /Eval-group not allowed/ && !$match; | |
737 | iseq $b, '14'; | |
738 | ||
739 | $lex_a = 2; | |
740 | $lex_a = 43; | |
741 | $lex_b = 17; | |
742 | $lex_c = 27; | |
743 | $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); | |
744 | ||
745 | iseq $lex_res, 1; | |
746 | iseq $lex_a, 44; | |
747 | iseq $lex_c, 43; | |
748 | ||
749 | } | |
750 | ||
751 | ||
752 | { | |
753 | no warnings 'closure'; | |
754 | local $Message = '(?{ $var } refers to package vars'; | |
755 | package aa; | |
756 | our $c = 2; | |
757 | $::c = 3; | |
758 | '' =~ /(?{ $c = 4 })/; | |
759 | main::iseq $c, 4; | |
760 | main::iseq $::c, 3; | |
761 | } | |
762 | ||
763 | ||
764 | { | |
765 | must_die 'q(a:[b]:) =~ /[x[:foo:]]/', | |
766 | 'POSIX class \[:[^:]+:\] unknown in regex', | |
767 | 'POSIX class [: :] must have valid name'; | |
768 | ||
769 | for my $d (qw [= .]) { | |
770 | must_die "/[[${d}foo${d}]]/", | |
771 | "\QPOSIX syntax [$d $d] is reserved for future extensions", | |
772 | "POSIX syntax [[$d $d]] is an error"; | |
773 | } | |
774 | } | |
775 | ||
776 | ||
777 | { | |
778 | # test if failure of patterns returns empty list | |
779 | local $Message = "Failed pattern returns empty list"; | |
780 | $_ = 'aaa'; | |
781 | @_ = /bbb/; | |
782 | iseq "@_", ""; | |
783 | ||
784 | @_ = /bbb/g; | |
785 | iseq "@_", ""; | |
786 | ||
787 | @_ = /(bbb)/; | |
788 | iseq "@_", ""; | |
789 | ||
790 | @_ = /(bbb)/g; | |
791 | iseq "@_", ""; | |
792 | } | |
793 | ||
794 | ||
795 | { | |
796 | local $Message = '@- and @+ tests'; | |
797 | ||
798 | /a(?=.$)/; | |
799 | iseq $#+, 0; | |
800 | iseq $#-, 0; | |
801 | iseq $+ [0], 2; | |
802 | iseq $- [0], 1; | |
803 | ok !defined $+ [1] && !defined $- [1] && | |
804 | !defined $+ [2] && !defined $- [2]; | |
805 | ||
806 | /a(a)(a)/; | |
807 | iseq $#+, 2; | |
808 | iseq $#-, 2; | |
809 | iseq $+ [0], 3; | |
810 | iseq $- [0], 0; | |
811 | iseq $+ [1], 2; | |
812 | iseq $- [1], 1; | |
813 | iseq $+ [2], 3; | |
814 | iseq $- [2], 2; | |
815 | ok !defined $+ [3] && !defined $- [3] && | |
816 | !defined $+ [4] && !defined $- [4]; | |
817 | ||
818 | ||
819 | /.(a)(b)?(a)/; | |
820 | iseq $#+, 3; | |
821 | iseq $#-, 3; | |
822 | iseq $+ [1], 2; | |
823 | iseq $- [1], 1; | |
824 | iseq $+ [3], 3; | |
825 | iseq $- [3], 2; | |
826 | ok !defined $+ [2] && !defined $- [2] && | |
827 | !defined $+ [4] && !defined $- [4]; | |
828 | ||
829 | ||
830 | /.(a)/; | |
831 | iseq $#+, 1; | |
832 | iseq $#-, 1; | |
833 | iseq $+ [0], 2; | |
834 | iseq $- [0], 0; | |
835 | iseq $+ [1], 2; | |
836 | iseq $- [1], 1; | |
837 | ok !defined $+ [2] && !defined $- [2] && | |
838 | !defined $+ [3] && !defined $- [3]; | |
839 | ||
840 | /.(a)(ba*)?/; | |
841 | iseq $#+, 2; | |
842 | iseq $#-, 1; | |
843 | } | |
844 | ||
845 | ||
846 | { | |
847 | local $DiePattern = '^Modification of a read-only value attempted'; | |
848 | local $Message = 'Elements of @- and @+ are read-only'; | |
849 | must_die '$+[0] = 13'; | |
850 | must_die '$-[0] = 13'; | |
851 | must_die '@+ = (7, 6, 5)'; | |
852 | must_die '@- = qw (foo bar)'; | |
853 | } | |
854 | ||
855 | ||
856 | { | |
857 | local $Message = '\G testing'; | |
858 | $_ = 'aaa'; | |
859 | pos = 1; | |
860 | my @a = /\Ga/g; | |
861 | iseq "@a", "a a"; | |
862 | ||
863 | my $str = 'abcde'; | |
864 | pos $str = 2; | |
865 | ok $str !~ /^\G/; | |
866 | ok $str !~ /^.\G/; | |
867 | ok $str =~ /^..\G/; | |
868 | ok $str !~ /^...\G/; | |
869 | ok $str =~ /\G../ && $& eq 'cd'; | |
870 | ||
871 | local $TODO = $running_as_thread; | |
872 | ok $str =~ /.\G./ && $& eq 'bc'; | |
873 | } | |
874 | ||
875 | ||
876 | { | |
877 | local $Message = 'pos inside (?{ })'; | |
878 | my $str = 'abcde'; | |
879 | our ($foo, $bar); | |
880 | ok $str =~ /b(?{$foo = $_; $bar = pos})c/; | |
881 | iseq $foo, $str; | |
882 | iseq $bar, 2; | |
883 | ok !defined pos ($str); | |
884 | ||
885 | undef $foo; | |
886 | undef $bar; | |
887 | pos $str = undef; | |
888 | ok $str =~ /b(?{$foo = $_; $bar = pos})c/g; | |
889 | iseq $foo, $str; | |
890 | iseq $bar, 2; | |
891 | iseq pos ($str), 3; | |
892 | ||
893 | $_ = $str; | |
894 | undef $foo; | |
895 | undef $bar; | |
896 | ok /b(?{$foo = $_; $bar = pos})c/; | |
897 | iseq $foo, $str; | |
898 | iseq $bar, 2; | |
899 | ||
900 | undef $foo; | |
901 | undef $bar; | |
902 | ok /b(?{$foo = $_; $bar = pos})c/g; | |
903 | iseq $foo, $str; | |
904 | iseq $bar, 2; | |
905 | iseq pos, 3; | |
906 | ||
907 | undef $foo; | |
908 | undef $bar; | |
909 | pos = undef; | |
910 | 1 while /b(?{$foo = $_; $bar = pos})c/g; | |
911 | iseq $foo, $str; | |
912 | iseq $bar, 2; | |
913 | ok !defined pos; | |
914 | ||
915 | undef $foo; | |
916 | undef $bar; | |
917 | $_ = 'abcde|abcde'; | |
918 | ok s/b(?{$foo = $_; $bar = pos})c/x/g; | |
919 | iseq $foo, 'abcde|abcde'; | |
920 | iseq $bar, 8; | |
921 | iseq $_, 'axde|axde'; | |
922 | ||
923 | # List context: | |
924 | $_ = 'abcde|abcde'; | |
925 | our @res; | |
926 | () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; | |
927 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
928 | iseq "@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'"; | |
929 | ||
930 | @res = (); | |
931 | () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; | |
932 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
933 | iseq "@res", "'' 'ab' 'cde|abcde' " . | |
934 | "'' 'abc' 'de|abcde' " . | |
935 | "'abcd' 'e|' 'abcde' " . | |
936 | "'abcde|' 'ab' 'cde' " . | |
937 | "'abcde|' 'abc' 'de'" ; | |
938 | } | |
939 | ||
940 | ||
941 | { | |
942 | local $Message = '\G anchor checks'; | |
943 | my $foo = 'aabbccddeeffgg'; | |
944 | pos ($foo) = 1; | |
945 | { | |
946 | local $TODO = $running_as_thread; | |
947 | no warnings 'uninitialized'; | |
948 | ok $foo =~ /.\G(..)/g; | |
949 | iseq $1, 'ab'; | |
950 | ||
951 | pos ($foo) += 1; | |
952 | ok $foo =~ /.\G(..)/g; | |
953 | iseq $1, 'cc'; | |
954 | ||
955 | pos ($foo) += 1; | |
956 | ok $foo =~ /.\G(..)/g; | |
957 | iseq $1, 'de'; | |
958 | ||
959 | ok $foo =~ /\Gef/g; | |
960 | } | |
961 | ||
962 | undef pos $foo; | |
963 | ok $foo =~ /\G(..)/g; | |
964 | iseq $1, 'aa'; | |
965 | ||
966 | ok $foo =~ /\G(..)/g; | |
967 | iseq $1, 'bb'; | |
968 | ||
969 | pos ($foo) = 5; | |
970 | ok $foo =~ /\G(..)/g; | |
971 | iseq $1, 'cd'; | |
972 | } | |
973 | ||
974 | ||
975 | { | |
976 | $_ = '123x123'; | |
977 | my @res = /(\d*|x)/g; | |
978 | local $" = '|'; | |
979 | iseq "@res", "123||x|123|", "0 match in alternation"; | |
980 | } | |
981 | ||
982 | ||
983 | { | |
984 | local $Message = "Match against temporaries (created via pp_helem())" . | |
985 | " is safe"; | |
986 | ok {foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g; | |
987 | iseq $1, "bar"; | |
988 | } | |
989 | ||
990 | ||
991 | { | |
992 | local $Message = 'package $i inside (?{ }), ' . | |
993 | 'saved substrings and changing $_'; | |
994 | our @a = qw [foo bar]; | |
995 | our @b = (); | |
996 | s/(\w)(?{push @b, $1})/,$1,/g for @a; | |
997 | iseq "@b", "f o o b a r"; | |
998 | iseq "@a", ",f,,o,,o, ,b,,a,,r,"; | |
999 | ||
1000 | local $Message = 'lexical $i inside (?{ }), ' . | |
1001 | 'saved substrings and changing $_'; | |
1002 | no warnings 'closure'; | |
1003 | my @c = qw [foo bar]; | |
1004 | my @d = (); | |
1005 | s/(\w)(?{push @d, $1})/,$1,/g for @c; | |
1006 | iseq "@d", "f o o b a r"; | |
1007 | iseq "@c", ",f,,o,,o, ,b,,a,,r,"; | |
1008 | } | |
1009 | ||
1010 | ||
1011 | { | |
1012 | local $Message = 'Brackets'; | |
1013 | our $brackets; | |
1014 | $brackets = qr { | |
1015 | { (?> [^{}]+ | (??{ $brackets }) )* } | |
1016 | }x; | |
1017 | ||
1018 | ok "{{}" =~ $brackets; | |
1019 | iseq $&, "{}"; | |
1020 | ok "something { long { and } hairy" =~ $brackets; | |
1021 | iseq $&, "{ and }"; | |
1022 | ok "something { long { and } hairy" =~ m/((??{ $brackets }))/; | |
1023 | iseq $&, "{ and }"; | |
1024 | } | |
1025 | ||
1026 | ||
1027 | { | |
1028 | $_ = "a-a\nxbb"; | |
1029 | pos = 1; | |
1030 | nok m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'; | |
1031 | } | |
1032 | ||
1033 | ||
1034 | { | |
1035 | local $Message = '\G anchor checks'; | |
1036 | my $text = "aaXbXcc"; | |
1037 | pos ($text) = 0; | |
1038 | ok $text !~ /\GXb*X/g; | |
1039 | } | |
1040 | ||
1041 | ||
1042 | { | |
1043 | $_ = "xA\n" x 500; | |
1044 | nok /^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'; | |
1045 | ||
1046 | my $text = "abc dbf"; | |
1047 | my @res = ($text =~ /.*?(b).*?\b/g); | |
1048 | iseq "@res", "b b", '\b is not special'; | |
1049 | } | |
1050 | ||
1051 | ||
1052 | { | |
1053 | local $Message = '\S, [\S], \s, [\s]'; | |
1054 | my @a = map chr, 0 .. 255; | |
1055 | my @b = grep /\S/, @a; | |
1056 | my @c = grep /[^\s]/, @a; | |
1057 | iseq "@b", "@c"; | |
1058 | ||
1059 | @b = grep /\S/, @a; | |
1060 | @c = grep /[\S]/, @a; | |
1061 | iseq "@b", "@c"; | |
1062 | ||
1063 | @b = grep /\s/, @a; | |
1064 | @c = grep /[^\S]/, @a; | |
1065 | iseq "@b", "@c"; | |
1066 | ||
1067 | @b = grep /\s/, @a; | |
1068 | @c = grep /[\s]/, @a; | |
1069 | iseq "@b", "@c"; | |
1070 | } | |
1071 | { | |
1072 | local $Message = '\D, [\D], \d, [\d]'; | |
1073 | my @a = map chr, 0 .. 255; | |
1074 | my @b = grep /\D/, @a; | |
1075 | my @c = grep /[^\d]/, @a; | |
1076 | iseq "@b", "@c"; | |
1077 | ||
1078 | @b = grep /\D/, @a; | |
1079 | @c = grep /[\D]/, @a; | |
1080 | iseq "@b", "@c"; | |
1081 | ||
1082 | @b = grep /\d/, @a; | |
1083 | @c = grep /[^\D]/, @a; | |
1084 | iseq "@b", "@c"; | |
1085 | ||
1086 | @b = grep /\d/, @a; | |
1087 | @c = grep /[\d]/, @a; | |
1088 | iseq "@b", "@c"; | |
1089 | } | |
1090 | { | |
1091 | local $Message = '\W, [\W], \w, [\w]'; | |
1092 | my @a = map chr, 0 .. 255; | |
1093 | my @b = grep /\W/, @a; | |
1094 | my @c = grep /[^\w]/, @a; | |
1095 | iseq "@b", "@c"; | |
1096 | ||
1097 | @b = grep /\W/, @a; | |
1098 | @c = grep /[\W]/, @a; | |
1099 | iseq "@b", "@c"; | |
1100 | ||
1101 | @b = grep /\w/, @a; | |
1102 | @c = grep /[^\W]/, @a; | |
1103 | iseq "@b", "@c"; | |
1104 | ||
1105 | @b = grep /\w/, @a; | |
1106 | @c = grep /[\w]/, @a; | |
1107 | iseq "@b", "@c"; | |
1108 | } | |
1109 | ||
1110 | ||
1111 | { | |
1112 | # see if backtracking optimization works correctly | |
1113 | local $Message = 'Backtrack optimization'; | |
1114 | ok "\n\n" =~ /\n $ \n/x; | |
1115 | ok "\n\n" =~ /\n* $ \n/x; | |
1116 | ok "\n\n" =~ /\n+ $ \n/x; | |
1117 | ok "\n\n" =~ /\n? $ \n/x; | |
1118 | ok "\n\n" =~ /\n*? $ \n/x; | |
1119 | ok "\n\n" =~ /\n+? $ \n/x; | |
1120 | ok "\n\n" =~ /\n?? $ \n/x; | |
1121 | ok "\n\n" !~ /\n*+ $ \n/x; | |
1122 | ok "\n\n" !~ /\n++ $ \n/x; | |
1123 | ok "\n\n" =~ /\n?+ $ \n/x; | |
1124 | } | |
1125 | ||
1126 | ||
1127 | { | |
1128 | package S; | |
1129 | use overload '""' => sub {'Object S'}; | |
1130 | sub new {bless []} | |
1131 | ||
1132 | local $Message = "Ref stringification"; | |
1133 | ::ok do { \my $v} =~ /^SCALAR/, "Scalar ref stringification"; | |
1134 | ::ok do {\\my $v} =~ /^REF/, "Ref ref stringification"; | |
1135 | ::ok [] =~ /^ARRAY/, "Array ref stringification"; | |
1136 | ::ok {} =~ /^HASH/, "Hash ref stringification"; | |
1137 | ::ok 'S' -> new =~ /^Object S/, "Object stringification"; | |
1138 | } | |
1139 | ||
1140 | ||
1141 | { | |
1142 | local $Message = "Test result of match used as match"; | |
1143 | ok 'a1b' =~ ('xyz' =~ /y/); | |
1144 | iseq $`, 'a'; | |
1145 | ok 'a1b' =~ ('xyz' =~ /t/); | |
1146 | iseq $`, 'a'; | |
1147 | } | |
1148 | ||
1149 | ||
1150 | { | |
1151 | local $Message = '"1" is not \s'; | |
1152 | may_not_warn sub {ok ("1\n" x 102) !~ /^\s*\n/m}; | |
1153 | } | |
1154 | ||
1155 | ||
1156 | { | |
1157 | local $Message = '\s, [[:space:]] and [[:blank:]]'; | |
1158 | my %space = (spc => " ", | |
1159 | tab => "\t", | |
1160 | cr => "\r", | |
1161 | lf => "\n", | |
1162 | ff => "\f", | |
1163 | # There's no \v but the vertical tabulator seems miraculously | |
1164 | # be 11 both in ASCII and EBCDIC. | |
1165 | vt => chr(11), | |
1166 | false => "space"); | |
1167 | ||
1168 | my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; | |
1169 | my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; | |
1170 | my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; | |
1171 | ||
1172 | iseq "@space0", "cr ff lf spc tab"; | |
1173 | iseq "@space1", "cr ff lf spc tab vt"; | |
1174 | iseq "@space2", "spc tab"; | |
1175 | } | |
1176 | ||
1177 | ||
1178 | { | |
1179 | local $BugId = '20000731.001'; | |
1180 | ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/, | |
1181 | "Match UTF-8 char in presense of (??{ })"; | |
1182 | } | |
1183 | ||
1184 | ||
1185 | { | |
1186 | local $BugId = '20001021.005'; | |
1187 | no warnings 'uninitialized'; | |
1188 | ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV"; | |
1189 | } | |
1190 | ||
1191 | ||
1192 | SKIP: | |
1193 | { | |
1194 | local $Message = '\C matches octet'; | |
1195 | $_ = "a\x{100}b"; | |
1196 | ok /(.)(\C)(\C)(.)/ or skip q [\C doesn't match], 4; | |
1197 | iseq $1, "a"; | |
1198 | if ($IS_ASCII) { # ASCII (or equivalent), should be UTF-8 | |
1199 | iseq $2, "\xC4"; | |
1200 | iseq $3, "\x80"; | |
1201 | } | |
1202 | elsif ($IS_EBCDIC) { # EBCDIC (or equivalent), should be UTF-EBCDIC | |
1203 | iseq $2, "\x8C"; | |
1204 | iseq $3, "\x41"; | |
1205 | } | |
1206 | else { | |
1207 | SKIP: { | |
1208 | ok 0, "Unexpected platform", "ord ('A') = $ordA"; | |
1209 | skip "Unexpected platform"; | |
1210 | } | |
1211 | } | |
1212 | iseq $4, "b"; | |
1213 | } | |
1214 | ||
1215 | ||
1216 | SKIP: | |
1217 | { | |
1218 | local $Message = '\C matches octet'; | |
1219 | $_ = "\x{100}"; | |
1220 | ok /(\C)/g or skip q [\C doesn't match], 2; | |
1221 | if ($IS_ASCII) { | |
1222 | iseq $1, "\xC4"; | |
1223 | } | |
1224 | elsif ($IS_EBCDIC) { | |
1225 | iseq $1, "\x8C"; | |
1226 | } | |
1227 | else { | |
1228 | ok 0, "Unexpected platform", "ord ('A') = $ordA"; | |
1229 | } | |
1230 | ok /(\C)/g or skip q [\C doesn't match]; | |
1231 | if ($IS_ASCII) { | |
1232 | iseq $1, "\x80"; | |
1233 | } | |
1234 | elsif ($IS_EBCDIC) { | |
1235 | iseq $1, "\x41"; | |
1236 | } | |
1237 | else { | |
1238 | ok 0, "Unexpected platform", "ord ('A') = $ordA"; | |
1239 | } | |
1240 | } | |
1241 | ||
1242 | ||
1243 | { | |
1244 | # Japhy -- added 03/03/2001 | |
1245 | () = (my $str = "abc") =~ /(...)/; | |
1246 | $str = "def"; | |
1247 | iseq $1, "abc", 'Changing subject does not modify $1'; | |
1248 | } | |
1249 | ||
1250 | ||
1251 | SKIP: | |
1252 | { | |
1253 | # The trick is that in EBCDIC the explicit numeric range should | |
1254 | # match (as also in non-EBCDIC) but the explicit alphabetic range | |
1255 | # should not match. | |
1256 | ok "\x8e" =~ /[\x89-\x91]/, '"\x8e" =~ /[\x89-\x91]/'; | |
1257 | ok "\xce" =~ /[\xc9-\xd1]/, '"\xce" =~ /[\xc9-\xd1]/'; | |
1258 | ||
1259 | skip "Not an EBCDIC platform", 2 unless ord ('i') == 0x89 && | |
1260 | ord ('J') == 0xd1; | |
1261 | ||
1262 | # In most places these tests would succeed since \x8e does not | |
1263 | # in most character sets match 'i' or 'j' nor would \xce match | |
1264 | # 'I' or 'J', but strictly speaking these tests are here for | |
1265 | # the good of EBCDIC, so let's test these only there. | |
1266 | nok "\x8e" !~ /[i-j]/, '"\x8e" !~ /[i-j]/'; | |
1267 | nok "\xce" !~ /[I-J]/, '"\xce" !~ /[I-J]/'; | |
1268 | } | |
1269 | ||
1270 | ||
1271 | { | |
1272 | ok "\x{ab}" =~ /\x{ab}/, '"\x{ab}" =~ /\x{ab}/ '; | |
1273 | ok "\x{abcd}" =~ /\x{abcd}/, '"\x{abcd}" =~ /\x{abcd}/'; | |
1274 | } | |
1275 | ||
1276 | ||
1277 | { | |
1278 | local $Message = 'bug id 20001008.001'; | |
1279 | ||
1280 | my @x = ("stra\337e 138", "stra\337e 138"); | |
1281 | for (@x) { | |
1282 | ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; | |
1283 | ok my ($latin) = /^(.+)(?:\s+\d)/; | |
1284 | iseq $latin, "stra\337e"; | |
1285 | ok $latin =~ s/stra\337e/straße/; | |
1286 | # | |
1287 | # Previous code follows, but outcommented - there were no tests. | |
1288 | # | |
1289 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
1290 | # use utf8; # needed for the raw UTF-8 | |
1291 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a | |
1292 | } | |
1293 | } | |
1294 | ||
1295 | ||
1296 | { | |
1297 | local $Message = 'Test \x escapes'; | |
1298 | ok "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1299 | ok "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1300 | ok "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1301 | ok "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1302 | ok "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1303 | ok "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1304 | ok "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1305 | ok "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1306 | } | |
1307 | ||
1308 | ||
1309 | { | |
1310 | local $BugId = '20001028.003'; | |
1311 | ||
1312 | # Fist half of the bug. | |
1313 | local $Message = 'HEBREW ACCENT QADMA matched by .*'; | |
1314 | my $X = chr (1448); | |
1315 | ok my ($Y) = $X =~ /(.*)/; | |
1316 | iseq $Y, v1448; | |
1317 | iseq length ($Y), 1; | |
1318 | ||
1319 | # Second half of the bug. | |
1320 | $Message = 'HEBREW ACCENT QADMA in replacement'; | |
1321 | $X = ''; | |
1322 | $X =~ s/^/chr(1488)/e; | |
1323 | iseq length $X, 1; | |
1324 | iseq ord ($X), 1488; | |
1325 | } | |
1326 | ||
1327 | ||
1328 | { | |
1329 | local $BugId = '20001108.001'; | |
1330 | local $Message = 'Repeated s///'; | |
1331 | my $X = "Szab\x{f3},Bal\x{e1}zs"; | |
1332 | my $Y = $X; | |
1333 | $Y =~ s/(B)/$1/ for 0 .. 3; | |
1334 | iseq $Y, $X; | |
1335 | iseq $X, "Szab\x{f3},Bal\x{e1}zs"; | |
1336 | } | |
1337 | ||
1338 | ||
1339 | { | |
1340 | local $BugId = '20000517.001'; | |
1341 | local $Message = 's/// on UTF-8 string'; | |
1342 | my $x = "\x{100}A"; | |
1343 | $x =~ s/A/B/; | |
1344 | iseq $x, "\x{100}B"; | |
1345 | iseq length $x, 2; | |
1346 | } | |
1347 | ||
1348 | ||
1349 | { | |
1350 | local $BugId = '20001230.002'; | |
1351 | local $Message = '\C and É'; | |
1352 | ok "École" =~ /^\C\C(.)/ && $1 eq 'c'; | |
1353 | ok "École" =~ /^\C\C(c)/; | |
1354 | } | |
1355 | ||
1356 | ||
1357 | SKIP: | |
1358 | { | |
1359 | local $Message = 'Match code points > 255'; | |
1360 | $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; | |
1361 | ok /(.\x{300})./ or skip "No match", 4; | |
1362 | ok $` eq "abc\x{100}" && length ($`) == 4; | |
1363 | ok $& eq "\x{200}\x{300}\x{380}" && length ($&) == 3; | |
1364 | ok $' eq "\x{400}defg" && length ($') == 5; | |
1365 | ok $1 eq "\x{200}\x{300}" && length ($1) == 2; | |
1366 | } | |
1367 | ||
1368 | ||
1369 | { | |
1370 | # The original bug report had 'no utf8' here but that was irrelevant. | |
1371 | local $BugId = '20010306.008'; | |
1372 | local $Message = "Don't dump core"; | |
1373 | my $a = "a\x{1234}"; | |
1374 | ok $a =~ m/\w/; # used to core dump. | |
1375 | } | |
1376 | ||
1377 | ||
1378 | { | |
1379 | local $BugId = '20010410.006'; | |
1380 | local $Message = '/g in scalar context'; | |
1381 | for my $rx ('/(.*?)\{(.*?)\}/csg', | |
1382 | '/(.*?)\{(.*?)\}/cg', | |
1383 | '/(.*?)\{(.*?)\}/sg', | |
1384 | '/(.*?)\{(.*?)\}/g', | |
1385 | '/(.+?)\{(.+?)\}/csg',) { | |
1386 | my $i = 0; | |
1387 | my $input = "a{b}c{d}"; | |
1388 | eval <<" --"; | |
1389 | while (eval \$input =~ $rx) { | |
1390 | \$i ++; | |
1391 | } | |
1392 | -- | |
1393 | iseq $i, 2; | |
1394 | } | |
1395 | } | |
1396 | ||
1397 | ||
1398 | { | |
1399 | my $x = "\x{10FFFD}"; | |
1400 | $x =~ s/(.)/$1/g; | |
1401 | ok ord($x) == 0x10FFFD && length($x) == 1, "From Robin Houston"; | |
1402 | } | |
1403 | ||
1404 | ||
1405 | { | |
1406 | my %d = ( | |
1407 | "7f" => [0, 0, 0], | |
1408 | "80" => [1, 1, 0], | |
1409 | "ff" => [1, 1, 0], | |
1410 | "100" => [0, 1, 1], | |
1411 | ); | |
1412 | SKIP: | |
1413 | while (my ($code, $match) = each %d) { | |
1414 | local $Message = "Properties of \\x$code"; | |
1415 | my $char = eval qq ["\\x{$code}"]; | |
1416 | my $i = 0; | |
1417 | ok (($char =~ /[\x80-\xff]/) xor !$$match [$i ++]); | |
1418 | ok (($char =~ /[\x80-\x{100}]/) xor !$$match [$i ++]); | |
1419 | ok (($char =~ /[\x{100}]/) xor !$$match [$i ++]); | |
1420 | } | |
1421 | } | |
1422 | ||
1423 | ||
1424 | { | |
1425 | # From Japhy | |
1426 | local $Message; | |
1427 | must_warn 'qr/(?c)/', '^Useless \(\?c\)'; | |
1428 | must_warn 'qr/(?-c)/', '^Useless \(\?-c\)'; | |
1429 | must_warn 'qr/(?g)/', '^Useless \(\?g\)'; | |
1430 | must_warn 'qr/(?-g)/', '^Useless \(\?-g\)'; | |
1431 | must_warn 'qr/(?o)/', '^Useless \(\?o\)'; | |
1432 | must_warn 'qr/(?-o)/', '^Useless \(\?-o\)'; | |
1433 | ||
1434 | # Now test multi-error regexes | |
1435 | must_warn 'qr/(?g-o)/', '^Useless \(\?g\).*\nUseless \(\?-o\)'; | |
1436 | must_warn 'qr/(?g-c)/', '^Useless \(\?g\).*\nUseless \(\?-c\)'; | |
1437 | # (?c) means (?g) error won't be thrown | |
1438 | must_warn 'qr/(?o-cg)/', '^Useless \(\?o\).*\nUseless \(\?-c\)'; | |
1439 | must_warn 'qr/(?ogc)/', '^Useless \(\?o\).*\nUseless \(\?g\).*\n' . | |
1440 | 'Useless \(\?c\)'; | |
1441 | } | |
1442 | ||
1443 | ||
1444 | { | |
1445 | local $Message = "/x tests"; | |
1446 | $_ = "foo"; | |
1447 | eval_ok <<" --"; | |
1448 | /f | |
1449 | o\r | |
1450 | o | |
1451 | \$ | |
1452 | /x | |
1453 | -- | |
1454 | eval_ok <<" --"; | |
1455 | /f | |
1456 | o | |
1457 | o | |
1458 | \$\r | |
1459 | /x | |
1460 | -- | |
1461 | } | |
1462 | ||
1463 | ||
1464 | { | |
1465 | local $Message = "/o feature"; | |
1466 | sub test_o {$_ [0] =~ /$_[1]/o; return $1} | |
1467 | iseq test_o ('abc', '(.)..'), 'a'; | |
1468 | iseq test_o ('abc', '..(.)'), 'a'; | |
1469 | } | |
1470 | ||
1471 | ||
1472 | { | |
1473 | local $BugId = "20010619.003"; | |
1474 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. | |
1475 | for ("\n", "\t", "\014", "\r") { | |
1476 | ok !/[[:print:]]/, "'$_' not in [[:print:]]"; | |
1477 | } | |
1478 | for (" ") { | |
1479 | ok /[[:print:]]/, "'$_' in [[:print:]]"; | |
1480 | } | |
1481 | } | |
1482 | ||
1483 | ||
1484 | { | |
1485 | # Test basic $^N usage outside of a regex | |
1486 | local $Message = '$^N usage outside of a regex'; | |
1487 | my $x = "abcdef"; | |
1488 | ok ($x =~ /cde/ and !defined $^N); | |
1489 | ok ($x =~ /(cde)/ and $^N eq "cde"); | |
1490 | ok ($x =~ /(c)(d)(e)/ and $^N eq "e"); | |
1491 | ok ($x =~ /(c(d)e)/ and $^N eq "cde"); | |
1492 | ok ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde"); | |
1493 | ok ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde"); | |
1494 | ok ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc"); | |
1495 | ok ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde"); | |
1496 | ok ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde"); | |
1497 | ok ($x =~ /(?:c(d)e)/ and $^N eq "d"); | |
1498 | ok ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d"); | |
1499 | ok ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f"); | |
1500 | ok ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f"); | |
1501 | ok ($x =~ /(([ace])|([bd]))*/ and $^N eq "e"); | |
1502 | {ok ($x =~ /(([ace])|([bdf]))*/ and $^N eq "f");} | |
1503 | ## Test to see if $^N is automatically localized -- it should now | |
1504 | ## have the value set in the previous test. | |
1505 | iseq $^N, "e", '$^N is automatically localized'; | |
1506 | ||
1507 | # Now test inside (?{ ... }) | |
1508 | local $Message = '$^N usage inside (?{ ... })'; | |
1509 | our ($y, $z); | |
1510 | ok ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b"); | |
1511 | ok ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"); | |
1512 | ok ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"); | |
1513 | ok ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" | |
1514 | and $z eq "abcd"); | |
1515 | ok ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" | |
1516 | and $z eq "abcde"); | |
1517 | ||
1518 | } | |
1519 | ||
1520 | ||
1521 | SKIP: | |
1522 | { | |
1523 | ## Should probably put in tests for all the POSIX stuff, | |
1524 | ## but not sure how to guarantee a specific locale...... | |
1525 | ||
1526 | skip "Not an ASCII platform", 2 unless $IS_ASCII; | |
1527 | local $Message = 'Test [[:cntrl:]]'; | |
1528 | my $AllBytes = join "" => map {chr} 0 .. 255; | |
1529 | (my $x = $AllBytes) =~ s/[[:cntrl:]]//g; | |
1530 | iseq $x, join "", map {chr} 0x20 .. 0x7E, 0x80 .. 0xFF; | |
1531 | ||
1532 | ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; | |
1533 | iseq $x, join "", map {chr} 0x00 .. 0x1F, 0x7F; | |
1534 | } | |
1535 | ||
1536 | ||
1537 | { | |
1538 | # With /s modifier UTF8 chars were interpreted as bytes | |
1539 | local $Message = "UTF-8 chars aren't bytes"; | |
1540 | my $a = "Hello \x{263A} World"; | |
1541 | my @a = ($a =~ /./gs); | |
1542 | iseq $#a, 12; | |
1543 | } | |
1544 | ||
1545 | ||
1546 | { | |
1547 | local $Message = '. matches \n with /s'; | |
1548 | my $str1 = "foo\nbar"; | |
1549 | my $str2 = "foo\n\x{100}bar"; | |
1550 | my ($a, $b) = map {chr} $IS_ASCII ? (0xc4, 0x80) : (0x8c, 0x41); | |
1551 | my @a; | |
1552 | @a = $str1 =~ /./g; iseq @a, 6; iseq "@a", "f o o b a r"; | |
1553 | @a = $str1 =~ /./gs; iseq @a, 7; iseq "@a", "f o o \n b a r"; | |
1554 | @a = $str1 =~ /\C/g; iseq @a, 7; iseq "@a", "f o o \n b a r"; | |
1555 | @a = $str1 =~ /\C/gs; iseq @a, 7; iseq "@a", "f o o \n b a r"; | |
1556 | @a = $str2 =~ /./g; iseq @a, 7; iseq "@a", "f o o \x{100} b a r"; | |
1557 | @a = $str2 =~ /./gs; iseq @a, 8; iseq "@a", "f o o \n \x{100} b a r"; | |
1558 | @a = $str2 =~ /\C/g; iseq @a, 9; iseq "@a", "f o o \n $a $b b a r"; | |
1559 | @a = $str2 =~ /\C/gs; iseq @a, 9; iseq "@a", "f o o \n $a $b b a r"; | |
1560 | } | |
1561 | ||
1562 | ||
1563 | { | |
1564 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context | |
1565 | local $BugId = '20010814.004'; | |
1566 | $_ = "ababacadaea"; | |
1567 | my $a = join ":", /b./gc; | |
1568 | my $b = join ":", /a./gc; | |
1569 | my $c = pos; | |
1570 | iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//"; | |
1571 | } | |
1572 | ||
1573 | ||
1574 | { | |
1575 | # [ID 20010407.006] matching utf8 return values from | |
1576 | # functions does not work | |
1577 | local $BugId = '20010407.006'; | |
1578 | local $Message = 'UTF-8 return values from functions'; | |
1579 | package ID_20010407_006; | |
1580 | sub x {"a\x{1234}"} | |
1581 | my $x = x; | |
1582 | my $y; | |
1583 | ::ok $x =~ /(..)/; | |
1584 | $y = $1; | |
1585 | ::ok length ($y) == 2 && $y eq $x; | |
1586 | ::ok x =~ /(..)/; | |
1587 | $y = $1; | |
1588 | ::ok length ($y) == 2 && $y eq $x; | |
1589 | } | |
1590 | ||
1591 | ||
1592 | { | |
1593 | no warnings 'digit'; | |
1594 | # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. | |
1595 | my $x; | |
1596 | $x = "\x4e" . "E"; | |
1597 | ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); | |
1598 | ||
1599 | $x = "\x4e" . "i"; | |
1600 | ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); | |
1601 | ||
1602 | $x = "\x4" . "j"; | |
1603 | ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); | |
1604 | ||
1605 | $x = "\x0" . "k"; | |
1606 | ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); | |
1607 | ||
1608 | $x = "\x0" . "x"; | |
1609 | ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); | |
1610 | ||
1611 | $x = "\x0" . "xa"; | |
1612 | ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); | |
1613 | ||
1614 | $x = "\x9" . "_b"; | |
1615 | ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); | |
1616 | ||
1617 | # and now again in [] ranges | |
1618 | ||
1619 | $x = "\x4e" . "E"; | |
1620 | ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); | |
1621 | ||
1622 | $x = "\x4e" . "i"; | |
1623 | ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); | |
1624 | ||
1625 | $x = "\x4" . "j"; | |
1626 | ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); | |
1627 | ||
1628 | $x = "\x0" . "k"; | |
1629 | ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); | |
1630 | ||
1631 | $x = "\x0" . "x"; | |
1632 | ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); | |
1633 | ||
1634 | $x = "\x0" . "xa"; | |
1635 | ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); | |
1636 | ||
1637 | $x = "\x9" . "_b"; | |
1638 | ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); | |
1639 | ||
1640 | # Check that \x{##} works. 5.6.1 fails quite a few of these. | |
1641 | ||
1642 | $x = "\x9b"; | |
1643 | ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); | |
1644 | ||
1645 | $x = "\x9b" . "y"; | |
1646 | ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); | |
1647 | ||
1648 | $x = "\x9b" . "y"; | |
1649 | ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); | |
1650 | ||
1651 | $x = "\x9b" . "y"; | |
1652 | ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); | |
1653 | ||
1654 | $x = "\x0" . "y"; | |
1655 | ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); | |
1656 | ||
1657 | $x = "\x0" . "y"; | |
1658 | ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); | |
1659 | ||
1660 | $x = "\x9b" . "y"; | |
1661 | ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); | |
1662 | ||
1663 | $x = "\x9b"; | |
1664 | ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); | |
1665 | ||
1666 | $x = "\x9b" . "y"; | |
1667 | ok ($x =~ /^[\x{9_b}y]{2}$/, | |
1668 | "\\x{9_b} is to be treated as \\x9b (again)"); | |
1669 | ||
1670 | $x = "\x9b" . "y"; | |
1671 | ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); | |
1672 | ||
1673 | $x = "\x9b" . "y"; | |
1674 | ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); | |
1675 | ||
1676 | $x = "\x0" . "y"; | |
1677 | ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); | |
1678 | ||
1679 | $x = "\x0" . "y"; | |
1680 | ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); | |
1681 | ||
1682 | $x = "\x9b" . "y"; | |
1683 | ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); | |
1684 | ||
1685 | } | |
1686 | ||
1687 | ||
1688 | { | |
1689 | # High bit bug -- japhy | |
1690 | my $x = "ab\200d"; | |
1691 | ok $x =~ /.*?\200/, "High bit fine"; | |
1692 | } | |
1693 | ||
1694 | ||
1695 | { | |
1696 | # The basic character classes and Unicode | |
1697 | ok "\x{0100}" =~ /\w/, 'LATIN CAPITAL LETTER A WITH MACRON in /\w/'; | |
1698 | ok "\x{0660}" =~ /\d/, 'ARABIC-INDIC DIGIT ZERO in /\d/'; | |
1699 | ok "\x{1680}" =~ /\s/, 'OGHAM SPACE MARK in /\s/'; | |
1700 | } | |
1701 | ||
1702 | ||
1703 | { | |
1704 | local $Message = "Folding matches and Unicode"; | |
1705 | ok "a\x{100}" =~ /A/i; | |
1706 | ok "A\x{100}" =~ /a/i; | |
1707 | ok "a\x{100}" =~ /a/i; | |
1708 | ok "A\x{100}" =~ /A/i; | |
1709 | ok "\x{101}a" =~ /\x{100}/i; | |
1710 | ok "\x{100}a" =~ /\x{100}/i; | |
1711 | ok "\x{101}a" =~ /\x{101}/i; | |
1712 | ok "\x{100}a" =~ /\x{101}/i; | |
1713 | ok "a\x{100}" =~ /A\x{100}/i; | |
1714 | ok "A\x{100}" =~ /a\x{100}/i; | |
1715 | ok "a\x{100}" =~ /a\x{100}/i; | |
1716 | ok "A\x{100}" =~ /A\x{100}/i; | |
1717 | ok "a\x{100}" =~ /[A]/i; | |
1718 | ok "A\x{100}" =~ /[a]/i; | |
1719 | ok "a\x{100}" =~ /[a]/i; | |
1720 | ok "A\x{100}" =~ /[A]/i; | |
1721 | ok "\x{101}a" =~ /[\x{100}]/i; | |
1722 | ok "\x{100}a" =~ /[\x{100}]/i; | |
1723 | ok "\x{101}a" =~ /[\x{101}]/i; | |
1724 | ok "\x{100}a" =~ /[\x{101}]/i; | |
1725 | } | |
1726 | ||
1727 | ||
1728 | { | |
1729 | use charnames ':full'; | |
1730 | local $Message = "Folding 'LATIN LETTER A WITH GRAVE'"; | |
1731 | ||
1732 | my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; | |
1733 | my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; | |
1734 | ||
1735 | ok $lower =~ m/$UPPER/i; | |
1736 | ok $UPPER =~ m/$lower/i; | |
1737 | ok $lower =~ m/[$UPPER]/i; | |
1738 | ok $UPPER =~ m/[$lower]/i; | |
1739 | ||
1740 | local $Message = "Folding 'GREEK LETTER ALPHA WITH VRACHY'"; | |
1741 | ||
1742 | $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; | |
1743 | $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; | |
1744 | ||
1745 | ok $lower =~ m/$UPPER/i; | |
1746 | ok $UPPER =~ m/$lower/i; | |
1747 | ok $lower =~ m/[$UPPER]/i; | |
1748 | ok $UPPER =~ m/[$lower]/i; | |
1749 | ||
1750 | local $Message = "Folding 'LATIN LETTER Y WITH DIAERESIS'"; | |
1751 | ||
1752 | $lower = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; | |
1753 | $UPPER = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; | |
1754 | ||
1755 | ok $lower =~ m/$UPPER/i; | |
1756 | ok $UPPER =~ m/$lower/i; | |
1757 | ok $lower =~ m/[$UPPER]/i; | |
1758 | ok $UPPER =~ m/[$lower]/i; | |
1759 | } | |
1760 | ||
1761 | ||
1762 | { | |
1763 | use charnames ':full'; | |
1764 | local $PatchId = "13843"; | |
1765 | local $Message = "GREEK CAPITAL LETTER SIGMA vs " . | |
1766 | "COMBINING GREEK PERISPOMENI"; | |
1767 | ||
1768 | my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; | |
1769 | my $char = "\N{COMBINING GREEK PERISPOMENI}"; | |
1770 | ||
1771 | may_not_warn sub {ok "_:$char:_" !~ m/_:$SIGMA:_/i}; | |
1772 | } | |
1773 | ||
1774 | ||
1775 | { | |
1776 | local $Message = '\X'; | |
1777 | use charnames ':full'; | |
1778 | ||
1779 | ok "a!" =~ /^(\X)!/ && $1 eq "a"; | |
1780 | ok "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF"; | |
1781 | ok "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}"; | |
1782 | ok "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}"; | |
1783 | ok "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && | |
1784 | $1 eq "\N{LATIN CAPITAL LETTER E}"; | |
1785 | ok "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" | |
1786 | =~ /^(\X)!/ && | |
1787 | $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}"; | |
1788 | ||
1789 | local $Message = '\C and \X'; | |
1790 | ok "!abc!" =~ /a\Cc/; | |
1791 | ok "!abc!" =~ /a\Xc/; | |
1792 | } | |
1793 | ||
1794 | ||
1795 | { | |
1796 | local $Message = "Final Sigma"; | |
1797 | ||
1798 | my $SIGMA = "\x{03A3}"; # CAPITAL | |
1799 | my $Sigma = "\x{03C2}"; # SMALL FINAL | |
1800 | my $sigma = "\x{03C3}"; # SMALL | |
1801 | ||
1802 | ok $SIGMA =~ /$SIGMA/i; | |
1803 | ok $SIGMA =~ /$Sigma/i; | |
1804 | ok $SIGMA =~ /$sigma/i; | |
1805 | ||
1806 | ok $Sigma =~ /$SIGMA/i; | |
1807 | ok $Sigma =~ /$Sigma/i; | |
1808 | ok $Sigma =~ /$sigma/i; | |
1809 | ||
1810 | ok $sigma =~ /$SIGMA/i; | |
1811 | ok $sigma =~ /$Sigma/i; | |
1812 | ok $sigma =~ /$sigma/i; | |
1813 | ||
1814 | ok $SIGMA =~ /[$SIGMA]/i; | |
1815 | ok $SIGMA =~ /[$Sigma]/i; | |
1816 | ok $SIGMA =~ /[$sigma]/i; | |
1817 | ||
1818 | ok $Sigma =~ /[$SIGMA]/i; | |
1819 | ok $Sigma =~ /[$Sigma]/i; | |
1820 | ok $Sigma =~ /[$sigma]/i; | |
1821 | ||
1822 | ok $sigma =~ /[$SIGMA]/i; | |
1823 | ok $sigma =~ /[$Sigma]/i; | |
1824 | ok $sigma =~ /[$sigma]/i; | |
1825 | ||
1826 | local $Message = "More final Sigma"; | |
1827 | ||
1828 | my $S3 = "$SIGMA$Sigma$sigma"; | |
1829 | ||
1830 | ok ":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma; | |
1831 | ok ":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma; | |
1832 | ok ":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma; | |
1833 | ||
1834 | ok ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma; | |
1835 | ok ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma; | |
1836 | ok ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma; | |
1837 | } | |
1838 | ||
1839 | ||
1840 | { | |
1841 | use charnames ':full'; | |
1842 | local $Message = "Parlez-Vous " . | |
1843 | "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais?"; | |
1844 | ||
1845 | ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran.ais/ && | |
1846 | $& eq "Francais"; | |
1847 | ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran.ais/ && | |
1848 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; | |
1849 | ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Cais/ && | |
1850 | $& eq "Francais"; | |
1851 | # COMBINING CEDILLA is two bytes when encoded | |
1852 | ok "Franc\N{COMBINING CEDILLA}ais" =~ /Franc\C\Cais/; | |
1853 | ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Xais/ && | |
1854 | $& eq "Francais"; | |
1855 | ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran\Xais/ && | |
1856 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; | |
1857 | ok "Franc\N{COMBINING CEDILLA}ais" =~ /Fran\Xais/ && | |
1858 | $& eq "Franc\N{COMBINING CEDILLA}ais"; | |
1859 | ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ | |
1860 | /Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && | |
1861 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; | |
1862 | ok "Franc\N{COMBINING CEDILLA}ais" =~ /Franc\N{COMBINING CEDILLA}ais/ && | |
1863 | $& eq "Franc\N{COMBINING CEDILLA}ais"; | |
1864 | ||
1865 | my @f = ( | |
1866 | ["Fran\N{LATIN SMALL LETTER C}ais", "Francais"], | |
1867 | ["Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", | |
1868 | "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"], | |
1869 | ["Franc\N{COMBINING CEDILLA}ais", "Franc\N{COMBINING CEDILLA}ais"], | |
1870 | ); | |
1871 | foreach my $entry (@f) { | |
1872 | my ($subject, $match) = @$entry; | |
1873 | ok $subject =~ /Fran(?:c\N{COMBINING CEDILLA}?| | |
1874 | \N{LATIN SMALL LETTER C WITH CEDILLA})ais/x && | |
1875 | $& eq $match; | |
1876 | } | |
1877 | } | |
1878 | ||
1879 | ||
1880 | { | |
1881 | local $Message = "Lingering (and useless) UTF8 flag doesn't mess up /i"; | |
1882 | my $pat = "ABcde"; | |
1883 | my $str = "abcDE\x{100}"; | |
1884 | chop $str; | |
1885 | ok $str =~ /$pat/i; | |
1886 | ||
1887 | $pat = "ABcde\x{100}"; | |
1888 | $str = "abcDE"; | |
1889 | chop $pat; | |
1890 | ok $str =~ /$pat/i; | |
1891 | ||
1892 | $pat = "ABcde\x{100}"; | |
1893 | $str = "abcDE\x{100}"; | |
1894 | chop $pat; | |
1895 | chop $str; | |
1896 | ok $str =~ /$pat/i; | |
1897 | } | |
1898 | ||
1899 | ||
1900 | { | |
1901 | use charnames ':full'; | |
1902 | local $Message = "LATIN SMALL LETTER SHARP S " . | |
1903 | "(\N{LATIN SMALL LETTER SHARP S})"; | |
1904 | ||
1905 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1906 | /\N{LATIN SMALL LETTER SHARP S}/; | |
1907 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1908 | /\N{LATIN SMALL LETTER SHARP S}/i; | |
1909 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1910 | /[\N{LATIN SMALL LETTER SHARP S}]/; | |
1911 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1912 | /[\N{LATIN SMALL LETTER SHARP S}]/i; | |
1913 | ||
1914 | ok "ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i; | |
1915 | ok "SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i; | |
1916 | ok "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i; | |
1917 | ok "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i; | |
1918 | ||
1919 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i; | |
1920 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i; | |
1921 | ||
1922 | local $Message = "Unoptimized named sequence in class"; | |
1923 | ok "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i; | |
1924 | ok "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i; | |
1925 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1926 | /[\N{LATIN SMALL LETTER SHARP S}x]/; | |
1927 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1928 | /[\N{LATIN SMALL LETTER SHARP S}x]/i; | |
1929 | } | |
1930 | ||
1931 | ||
1932 | { | |
1933 | # More whitespace: U+0085, U+2028, U+2029\n"; | |
1934 | ||
1935 | # U+0085, U+00A0 need to be forced to be Unicode, the \x{100} does that. | |
1936 | SKIP: { | |
1937 | skip "EBCDIC platform", 4 if $IS_EBCDIC; | |
1938 | # Do \x{0015} and \x{0041} match \s in EBCDIC? | |
1939 | ok "<\x{100}\x{0085}>" =~ /<\x{100}\s>/, '\x{0085} in \s'; | |
1940 | ok "<\x{0085}>" =~ /<\v>/, '\x{0085} in \v'; | |
1941 | ok "<\x{100}\x{00A0}>" =~ /<\x{100}\s>/, '\x{00A0} in \s'; | |
1942 | ok "<\x{00A0}>" =~ /<\h>/, '\x{00A0} in \h'; | |
1943 | } | |
1944 | my @h = map {sprintf "%05x" => $_} 0x01680, 0x0180E, 0x02000 .. 0x0200A, | |
1945 | 0x0202F, 0x0205F, 0x03000; | |
1946 | my @v = map {sprintf "%05x" => $_} 0x02028, 0x02029; | |
1947 | ||
1948 | my @H = map {sprintf "%05x" => $_} 0x01361, 0x0200B, 0x02408, 0x02420, | |
1949 | 0x0303F, 0xE0020; | |
1950 | my @V = map {sprintf "%05x" => $_} 0x0008A .. 0x0008D, 0x00348, 0x10100, | |
1951 | 0xE005F, 0xE007C; | |
1952 | ||
1953 | for my $hex (@h) { | |
1954 | my $str = eval qq ["<\\x{$hex}>"]; | |
1955 | ok $str =~ /<\s>/, "\\x{$hex} in \\s"; | |
1956 | ok $str =~ /<\h>/, "\\x{$hex} in \\h"; | |
1957 | ok $str !~ /<\v>/, "\\x{$hex} not in \\v"; | |
1958 | } | |
1959 | ||
1960 | for my $hex (@v) { | |
1961 | my $str = eval qq ["<\\x{$hex}>"]; | |
1962 | ok $str =~ /<\s>/, "\\x{$hex} in \\s"; | |
1963 | ok $str =~ /<\v>/, "\\x{$hex} in \\v"; | |
1964 | ok $str !~ /<\h>/, "\\x{$hex} not in \\h"; | |
1965 | } | |
1966 | ||
1967 | for my $hex (@H) { | |
1968 | my $str = eval qq ["<\\x{$hex}>"]; | |
1969 | ok $str =~ /<\S>/, "\\x{$hex} in \\S"; | |
1970 | ok $str =~ /<\H>/, "\\x{$hex} in \\H"; | |
1971 | } | |
1972 | ||
1973 | for my $hex (@V) { | |
1974 | my $str = eval qq ["<\\x{$hex}>"]; | |
1975 | ok $str =~ /<\S>/, "\\x{$hex} in \\S"; | |
1976 | ok $str =~ /<\V>/, "\\x{$hex} in \\V"; | |
1977 | } | |
1978 | } | |
1979 | ||
1980 | ||
1981 | { | |
1982 | # . with /s should work on characters, as opposed to bytes | |
1983 | local $Message = ". with /s works on characters, not bytes"; | |
1984 | ||
1985 | my $s = "\x{e4}\x{100}"; | |
1986 | # This is not expected to match: the point is that | |
1987 | # neither should we get "Malformed UTF-8" warnings. | |
1988 | may_not_warn sub {$s =~ /\G(.+?)\n/gcs}, "No 'Malformed UTF-8' warning"; | |
1989 | ||
1990 | my @c; | |
1991 | push @c => $1 while $s =~ /\G(.)/gs; | |
1992 | ||
1993 | local $" = ""; | |
1994 | iseq "@c", $s; | |
1995 | ||
1996 | # Test only chars < 256 | |
1997 | my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; | |
1998 | my $r1 = ""; | |
1999 | while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
2000 | $r1 .= $1 . $2; | |
2001 | } | |
2002 | ||
2003 | my $t2 = $t1 . "\x{100}"; # Repeat with a larger char | |
2004 | my $r2 = ""; | |
2005 | while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
2006 | $r2 .= $1 . $2; | |
2007 | } | |
2008 | $r2 =~ s/\x{100}//; | |
2009 | ||
2010 | iseq $r1, $r2; | |
2011 | } | |
2012 | ||
2013 | ||
2014 | { | |
2015 | local $Message = "Unicode lookbehind"; | |
2016 | ok "A\x{100}B" =~ /(?<=A.)B/; | |
2017 | ok "A\x{200}\x{300}B" =~ /(?<=A..)B/; | |
2018 | ok "\x{400}AB" =~ /(?<=\x{400}.)B/; | |
2019 | ok "\x{500}\x{600}B" =~ /(?<=\x{500}.)B/; | |
2020 | ||
2021 | # Original code also contained: | |
2022 | # ok "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/; | |
2023 | # but that looks like a typo. | |
2024 | } | |
2025 | ||
2026 | ||
2027 | { | |
2028 | local $Message = 'UTF-8 hash keys and /$/'; | |
2029 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters | |
2030 | # /2002-01/msg01327.html | |
2031 | ||
2032 | my $u = "a\x{100}"; | |
2033 | my $v = substr ($u, 0, 1); | |
2034 | my $w = substr ($u, 1, 1); | |
2035 | my %u = ($u => $u, $v => $v, $w => $w); | |
2036 | for (keys %u) { | |
2037 | my $m1 = /^\w*$/ ? 1 : 0; | |
2038 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; | |
2039 | iseq $m1, $m2; | |
2040 | } | |
2041 | } | |
2042 | ||
2043 | ||
2044 | { | |
2045 | local $BugId = "20020124.005"; | |
2046 | local $PatchId = "14795"; | |
2047 | local $Message = "s///eg"; | |
2048 | ||
2049 | for my $char ("a", "\x{df}", "\x{100}") { | |
2050 | my $x = "$char b $char"; | |
2051 | $x =~ s{($char)}{ | |
2052 | "c" =~ /c/; | |
2053 | "x"; | |
2054 | }ge; | |
2055 | iseq substr ($x, 0, 1), substr ($x, -1, 1); | |
2056 | } | |
2057 | } | |
2058 | ||
2059 | ||
2060 | { | |
2061 | local $Message = "No SEGV in s/// and UTF-8"; | |
2062 | my $s = "s#\x{100}" x 4; | |
2063 | ok $s =~ s/[^\w]/ /g; | |
2064 | if ( $ENV{PERL_TEST_LEGACY_POSIX_CC} ) { | |
2065 | iseq $s, "s \x{100}" x 4; | |
2066 | } | |
2067 | else { | |
2068 | iseq $s, "s " x 4; | |
2069 | } | |
2070 | } | |
2071 | ||
2072 | ||
2073 | { | |
2074 | local $Message = "UTF-8 bug (maybe already known?)"; | |
2075 | my $u = "foo"; | |
2076 | $u =~ s/./\x{100}/g; | |
2077 | iseq $u, "\x{100}\x{100}\x{100}"; | |
2078 | ||
2079 | $u = "foobar"; | |
2080 | $u =~ s/[ao]/\x{100}/g; | |
2081 | iseq $u, "f\x{100}\x{100}b\x{100}r"; | |
2082 | ||
2083 | $u =~ s/\x{100}/e/g; | |
2084 | iseq $u, "feeber"; | |
2085 | } | |
2086 | ||
2087 | ||
2088 | { | |
2089 | local $Message = "UTF-8 bug with s///"; | |
2090 | # check utf8/non-utf8 mixtures | |
2091 | # try to force all float/anchored check combinations | |
2092 | ||
2093 | my $c = "\x{100}"; | |
2094 | my $subst; | |
2095 | for my $re ("xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", | |
2096 | "xx.*(?=$c)", "(?=$c).*xx",) { | |
2097 | ok "xxx" !~ /$re/; | |
2098 | ok +($subst = "xxx") !~ s/$re//; | |
2099 | } | |
2100 | for my $re ("xx.*$c*", "$c*.*xx") { | |
2101 | ok "xxx" =~ /$re/; | |
2102 | ok +($subst = "xxx") =~ s/$re//; | |
2103 | iseq $subst, ""; | |
2104 | } | |
2105 | for my $re ("xxy*", "y*xx") { | |
2106 | ok "xx$c" =~ /$re/; | |
2107 | ok +($subst = "xx$c") =~ s/$re//; | |
2108 | iseq $subst, $c; | |
2109 | ok "xy$c" !~ /$re/; | |
2110 | ok +($subst = "xy$c") !~ s/$re//; | |
2111 | } | |
2112 | for my $re ("xy$c*z", "x$c*yz") { | |
2113 | ok "xyz" =~ /$re/; | |
2114 | ok +($subst = "xyz") =~ s/$re//; | |
2115 | iseq $subst, ""; | |
2116 | } | |
2117 | } | |
2118 | ||
2119 | ||
2120 | { | |
2121 | local $Message = "qr /.../x"; | |
2122 | my $R = qr / A B C # D E/x; | |
2123 | ok "ABCDE" =~ $R && $& eq "ABC"; | |
2124 | ok "ABCDE" =~ /$R/ && $& eq "ABC"; | |
2125 | ok "ABCDE" =~ m/$R/ && $& eq "ABC"; | |
2126 | ok "ABCDE" =~ /($R)/ && $1 eq "ABC"; | |
2127 | ok "ABCDE" =~ m/($R)/ && $1 eq "ABC"; | |
2128 | } | |
2129 | ||
2130 | ||
2131 | { | |
2132 | local $BugId = "20020412.005"; | |
2133 | local $Message = "Correct pmop flags checked when empty pattern"; | |
2134 | ||
2135 | # Requires reuse of last successful pattern. | |
2136 | my $num = 123; | |
2137 | $num =~ /\d/; | |
2138 | for (0 .. 1) { | |
2139 | my $match = ?? + 0; | |
2140 | ok $match != $_, $Message, | |
2141 | sprintf "'match one' %s on %s iteration" => | |
2142 | $match ? 'succeeded' : 'failed', | |
2143 | $_ ? 'second' : 'first'; | |
2144 | } | |
2145 | $num =~ /(\d)/; | |
2146 | my $result = join "" => $num =~ //g; | |
2147 | iseq $result, $num; | |
2148 | } | |
2149 | ||
2150 | ||
2151 | { | |
2152 | local $BugId = '20020630.002'; | |
2153 | local $Message = 'UTF-8 regex matches above 32k'; | |
2154 | for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { | |
2155 | my ($type, $char) = @$_; | |
2156 | for my $len (32000, 32768, 33000) { | |
2157 | my $s = $char . "f" x $len; | |
2158 | my $r = $s =~ /$char([f]*)/gc; | |
2159 | ok $r, $Message, "<$type x $len>"; | |
2160 | ok !$r || pos ($s) == $len + 1, $Message, | |
2161 | "<$type x $len>; pos = @{[pos $s]}"; | |
2162 | } | |
2163 | } | |
2164 | } | |
2165 | ||
2166 | ||
2167 | { | |
2168 | our $a = bless qr /foo/ => 'Foo'; | |
2169 | ok 'goodfood' =~ $a, "Reblessed qr // matches"; | |
2170 | iseq $a, '(?-xism:foo)', "Reblessed qr // stringifies"; | |
2171 | my $x = "\x{3fe}"; | |
2172 | my $z = my $y = "\317\276"; # Byte representation of $x | |
2173 | $a = qr /$x/; | |
2174 | ok $x =~ $a, "UTF-8 interpolation in qr //"; | |
2175 | ok "a$a" =~ $x, "Stringified qr // preserves UTF-8"; | |
2176 | ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8"; | |
2177 | ok "a$x" =~ /^a(??{$a})\z/, | |
2178 | "Postponed interpolation of qr // preserves UTF-8"; | |
2179 | { | |
2180 | local $BugId = '17776'; | |
2181 | iseq length qr /##/x, 12, "## in qr // doesn't corrupt memory"; | |
2182 | } | |
2183 | { | |
2184 | use re 'eval'; | |
2185 | ok "$x$x" =~ /^$x(??{$x})\z/, | |
2186 | "Postponed UTF-8 string in UTF-8 re matches UTF-8"; | |
2187 | ok "$y$x" =~ /^$y(??{$x})\z/, | |
2188 | "Postponed UTF-8 string in non-UTF-8 re matches UTF-8"; | |
2189 | ok "$y$x" !~ /^$y(??{$y})\z/, | |
2190 | "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8"; | |
2191 | ok "$x$x" !~ /^$x(??{$y})\z/, | |
2192 | "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8"; | |
2193 | ok "$y$y" =~ /^$y(??{$y})\z/, | |
2194 | "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8"; | |
2195 | ok "$x$y" =~ /^$x(??{$y})\z/, | |
2196 | "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8"; | |
2197 | ||
2198 | $y = $z; # Reset $y after upgrade. | |
2199 | ok "$x$y" !~ /^$x(??{$x})\z/, | |
2200 | "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8"; | |
2201 | ok "$y$y" !~ /^$y(??{$x})\z/, | |
2202 | "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8"; | |
2203 | } | |
2204 | } | |
2205 | ||
2206 | ||
2207 | { | |
2208 | local $PatchId = '18179'; | |
2209 | my $s = "\x{100}" x 5; | |
2210 | my $ok = $s =~ /(\x{100}{4})/; | |
2211 | my ($ord, $len) = (ord $1, length $1); | |
2212 | ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift"; | |
2213 | } | |
2214 | ||
2215 | ||
2216 | { | |
2217 | local $BugId = '15763'; | |
2218 | our $a = "x\x{100}"; | |
2219 | chop $a; # Leaves the UTF-8 flag | |
2220 | $a .= "y"; # 1 byte before 'y'. | |
2221 | ||
2222 | ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8'; | |
2223 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
2224 | ||
2225 | ok $a =~ /^\Cy/, 'match \Cy'; | |
2226 | ok $a =~ /^\C{1}y/, 'match \C{1}y'; | |
2227 | ||
2228 | ok $a !~ /^\C\Cy/, q {don't match two \Cy}; | |
2229 | ok $a !~ /^\C{2}y/, q {don't match \C{2}y}; | |
2230 | ||
2231 | $a = "\x{100}y"; # 2 bytes before "y" | |
2232 | ||
2233 | ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8'; | |
2234 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
2235 | ok $a =~ /^\C\C/, 'match two \C'; | |
2236 | ok $a =~ /^\C{2}/, 'match \C{2}'; | |
2237 | ||
2238 | ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'; | |
2239 | ok $a =~ /^\C{3}/, 'match \C{3}'; | |
2240 | ||
2241 | ok $a =~ /^\C\Cy/, 'match two \C'; | |
2242 | ok $a =~ /^\C{2}y/, 'match \C{2}'; | |
2243 | ||
2244 | ok $a !~ /^\C\C\Cy/, q {don't match three \Cy}; | |
2245 | ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy}; | |
2246 | ok $a !~ /^\C{3}y/, q {don't match \C{3}y}; | |
2247 | ||
2248 | $a = "\x{1000}y"; # 3 bytes before "y" | |
2249 | ||
2250 | ok $a =~ /^\C/, 'match one \C on three-byte UTF-8'; | |
2251 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
2252 | ok $a =~ /^\C\C/, 'match two \C'; | |
2253 | ok $a =~ /^\C{2}/, 'match \C{2}'; | |
2254 | ok $a =~ /^\C\C\C/, 'match three \C'; | |
2255 | ok $a =~ /^\C{3}/, 'match \C{3}'; | |
2256 | ||
2257 | ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'; | |
2258 | ok $a =~ /^\C{4}/, 'match \C{4}'; | |
2259 | ||
2260 | ok $a =~ /^\C\C\Cy/, 'match three \Cy'; | |
2261 | ok $a =~ /^\C{3}y/, 'match \C{3}y'; | |
2262 | ||
2263 | ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy}; | |
2264 | ok $a !~ /^\C{4}y/, q {don't match \C{4}y}; | |
2265 | } | |
2266 | ||
2267 | ||
2268 | { | |
2269 | local $\; | |
2270 | $_ = 'aaaaaaaaaa'; | |
2271 | utf8::upgrade($_); chop $_; $\="\n"; | |
2272 | ok /[^\s]+/, 'm/[^\s]/ utf8'; | |
2273 | ok /[^\d]+/, 'm/[^\d]/ utf8'; | |
2274 | ok +($a = $_, $_ =~ s/[^\s]+/./g), 's/[^\s]/ utf8'; | |
2275 | ok +($a = $_, $a =~ s/[^\d]+/./g), 's/[^\s]/ utf8'; | |
2276 | } | |
2277 | ||
2278 | ||
2279 | { | |
2280 | local $BugId = '15397'; | |
2281 | local $Message = 'UTF-8 matching'; | |
2282 | ok "\x{100}" =~ /\x{100}/; | |
2283 | ok "\x{100}" =~ /(\x{100})/; | |
2284 | ok "\x{100}" =~ /(\x{100}){1}/; | |
2285 | ok "\x{100}\x{100}" =~ /(\x{100}){2}/; | |
2286 | ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/; | |
2287 | } | |
2288 | ||
2289 | ||
2290 | { | |
2291 | local $BugId = '7471'; | |
2292 | local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times'; | |
2293 | local $_ = 'CD'; | |
2294 | ok /(AB)*?CD/ && !defined $1; | |
2295 | ok /(AB)*CD/ && !defined $1; | |
2296 | } | |
2297 | ||
2298 | ||
2299 | { | |
2300 | local $BugId = '3547'; | |
2301 | local $Message = "Caching shouldn't prevent match"; | |
2302 | my $pattern = "^(b+?|a){1,2}c"; | |
2303 | ok "bac" =~ /$pattern/ && $1 eq 'a'; | |
2304 | ok "bbac" =~ /$pattern/ && $1 eq 'a'; | |
2305 | ok "bbbac" =~ /$pattern/ && $1 eq 'a'; | |
2306 | ok "bbbbac" =~ /$pattern/ && $1 eq 'a'; | |
2307 | } | |
2308 | ||
2309 | ||
2310 | ||
2311 | { | |
2312 | local $BugId = '18232'; | |
2313 | local $Message = '$1 should keep UTF-8 ness'; | |
2314 | ok "\x{100}" =~ /(.)/; | |
2315 | iseq $1, "\x{100}", '$1 is UTF-8'; | |
2316 | { 'a' =~ /./; } | |
2317 | iseq $1, "\x{100}", '$1 is still UTF-8'; | |
2318 | isneq $1, "\xC4\x80", '$1 is not non-UTF-8'; | |
2319 | } | |
2320 | ||
2321 | ||
2322 | { | |
2323 | local $BugId = '19767'; | |
2324 | local $Message = "Optimizer doesn't prematurely reject match"; | |
2325 | use utf8; | |
2326 | ||
2327 | my $attr = 'Name-1'; | |
2328 | my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; | |
2329 | my $NormalWord = qr /${NormalChar}+?/; | |
2330 | my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; | |
2331 | ||
2332 | $attr =~ /^$/; | |
2333 | ok $attr =~ $PredNameHyphen; # Original test. | |
2334 | ||
2335 | "a" =~ m/[b]/; | |
2336 | ok "0" =~ /\p{N}+\z/; # Variant. | |
2337 | } | |
2338 | ||
2339 | ||
2340 | { | |
2341 | local $BugId = '20683'; | |
2342 | local $Message = "(??{ }) doesn't return stale values"; | |
2343 | our $p = 1; | |
2344 | foreach (1, 2, 3, 4) { | |
2345 | $p ++ if /(??{ $p })/ | |
2346 | } | |
2347 | iseq $p, 5; | |
2348 | ||
2349 | { | |
2350 | package P; | |
2351 | $a = 1; | |
2352 | sub TIESCALAR {bless []} | |
2353 | sub FETCH {$a ++} | |
2354 | } | |
2355 | tie $p, "P"; | |
2356 | foreach (1, 2, 3, 4) { | |
2357 | /(??{ $p })/ | |
2358 | } | |
2359 | iseq $p, 5; | |
2360 | } | |
2361 | ||
2362 | ||
2363 | { | |
2364 | # Subject: Odd regexp behavior | |
2365 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> | |
2366 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 | |
2367 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> | |
2368 | # To: perl-unicode@perl.org | |
2369 | ||
2370 | local $Message = 'Markus Kuhn 2003-02-26'; | |
2371 | ||
2372 | my $x = "\x{2019}\nk"; | |
2373 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
2374 | ok $x eq "\x{2019} k"; | |
2375 | ||
2376 | $x = "b\nk"; | |
2377 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
2378 | ok $x eq "b k"; | |
2379 | ||
2380 | ok "\x{2019}" =~ /\S/; | |
2381 | } | |
2382 | ||
2383 | ||
2384 | { | |
2385 | local $BugId = '21411'; | |
2386 | local $Message = "(??{ .. }) in split doesn't corrupt its stack"; | |
2387 | our $i; | |
2388 | ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-'; | |
2389 | no warnings 'syntax'; | |
2390 | @_ = split /(?{'WOW'})/, 'abc'; | |
2391 | local $" = "|"; | |
2392 | iseq "@_", "a|b|c"; | |
2393 | } | |
2394 | ||
2395 | ||
2396 | { | |
2397 | # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it | |
2398 | # hasn't been crashing. Disable this test until it is fixed properly. | |
2399 | # XXX also check what it returns rather than just doing ok(1,...) | |
2400 | # split /(?{ split "" })/, "abc"; | |
2401 | local $TODO = "Recursive split is still broken"; | |
2402 | ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'; | |
2403 | } | |
2404 | ||
2405 | ||
2406 | { | |
2407 | ok "\x{100}\n" =~ /\x{100}\n$/, "UTF-8 length cache and fbm_compile"; | |
2408 | } | |
2409 | ||
2410 | ||
2411 | { | |
2412 | package Str; | |
2413 | use overload q /""/ => sub {${$_ [0]};}; | |
2414 | sub new {my ($c, $v) = @_; bless \$v, $c;} | |
2415 | ||
2416 | package main; | |
2417 | $_ = Str -> new ("a\x{100}/\x{100}b"); | |
2418 | ok join (":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"; | |
2419 | } | |
2420 | ||
2421 | ||
2422 | { | |
2423 | local $BugId = '17757'; | |
2424 | $_ = "code: 'x' { '...' }\n"; study; | |
2425 | my @x; push @x, $& while m/'[^\']*'/gx; | |
2426 | local $" = ":"; | |
2427 | iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop"; | |
2428 | } | |
2429 | ||
2430 | ||
2431 | { | |
2432 | my $re = qq /^([^X]*)X/; | |
2433 | utf8::upgrade ($re); | |
2434 | ok "\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; | |
2435 | } | |
2436 | ||
2437 | ||
2438 | { | |
2439 | local $BugId = '22354'; | |
2440 | sub func ($) { | |
2441 | ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]"; | |
2442 | ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m"; | |
2443 | } | |
2444 | func "standalone"; | |
2445 | $_ = "x"; s/x/func "in subst"/e; | |
2446 | $_ = "x"; s/x/func "in multiline subst"/em; | |
2447 | ||
2448 | # | |
2449 | # Next two give 'panic: malloc'. | |
2450 | # Outcommented, using two TODOs. | |
2451 | # | |
2452 | local $TODO = 'panic: malloc'; | |
2453 | local $Message = 'Postponed regexp and propaged modifier'; | |
2454 | # ok 0 for 1 .. 2; | |
2455 | SKIP: { | |
2456 | skip "panic: malloc", 2; | |
2457 | $_ = "x"; /x(?{func "in regexp"})/; | |
2458 | $_ = "x"; /x(?{func "in multiline regexp"})/m; | |
2459 | } | |
2460 | } | |
2461 | ||
2462 | ||
2463 | { | |
2464 | local $BugId = '19049'; | |
2465 | $_ = "abcdef\n"; | |
2466 | my @x = m/./g; | |
2467 | iseq "abcde", $`, 'Global match sets $`'; | |
2468 | } | |
2469 | ||
2470 | ||
2471 | { | |
2472 | ok "123\x{100}" =~ /^.*1.*23\x{100}$/, | |
2473 | 'UTF-8 + multiple floating substr'; | |
2474 | } | |
2475 | ||
2476 | ||
2477 | { | |
2478 | local $Message = '<20030808193656.5109.1@llama.ni-s.u-net.com>'; | |
2479 | ||
2480 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON | |
2481 | ok " \x{101}" =~ qr/\x{100}/i; | |
2482 | ||
2483 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW | |
2484 | ok " \x{1E01}" =~ qr/\x{1E00}/i; | |
2485 | ||
2486 | # DESERET SMALL/CAPITAL LETTER LONG I | |
2487 | ok " \x{10428}" =~ qr/\x{10400}/i; | |
2488 | ||
2489 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' | |
2490 | ok " \x{1E01}x" =~ qr/\x{1E00}X/i; | |
2491 | } | |
2492 | ||
2493 | ||
2494 | { | |
2495 | # [perl #23769] Unicode regex broken on simple example | |
2496 | # regrepeat() didn't handle UTF-8 EXACT case right. | |
2497 | local $BugId = '23769'; | |
2498 | my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; | |
2499 | local $Message = $Mess; | |
2500 | ||
2501 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; | |
2502 | ||
2503 | ok $s =~ /\x{a0}/; | |
2504 | ok $s =~ /\x{a0}+/; | |
2505 | ok $s =~ /\x{a0}\x{a0}/; | |
2506 | ||
2507 | $Message = "$Mess (easy variant)"; | |
2508 | ok "aaa\x{100}" =~ /(a+)/; | |
2509 | iseq $1, "aaa"; | |
2510 | ||
2511 | $Message = "$Mess (easy invariant)"; | |
2512 | ok "aaa\x{100} " =~ /(a+?)/; | |
2513 | iseq $1, "a"; | |
2514 | ||
2515 | $Message = "$Mess (regrepeat variant)"; | |
2516 | ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/; | |
2517 | iseq $1, "\xa0"; | |
2518 | ||
2519 | $Message = "$Mess (regrepeat invariant)"; | |
2520 | ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/; | |
2521 | iseq $1, "\xa0\xa0\xa0"; | |
2522 | ||
2523 | $Message = "$Mess (hard variant)"; | |
2524 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/; | |
2525 | iseq $1, "\xa0\xa1"; | |
2526 | ||
2527 | $Message = "$Mess (hard invariant)"; | |
2528 | ok "ababab\x{100} " =~ /((?:ab)+)/; | |
2529 | iseq $1, 'ababab'; | |
2530 | ||
2531 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/; | |
2532 | iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1"; | |
2533 | ||
2534 | ok "ababab\x{100} " =~ /((?:ab)+?)/; | |
2535 | iseq $1, "ab"; | |
2536 | ||
2537 | $Message = "Don't match first byte of UTF-8 representation"; | |
2538 | ok "\xc4\xc4\xc4" !~ /(\x{100}+)/; | |
2539 | ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/; | |
2540 | ok "\xc4\xc4\xc4" !~ /(\x{100}++)/; | |
2541 | } | |
2542 | ||
2543 | ||
2544 | { | |
2545 | for (120 .. 130) { | |
2546 | my $head = 'x' x $_; | |
2547 | local $Message = q [Don't misparse \x{...} in regexp ] . | |
2548 | q [near 127 char EXACT limit]; | |
2549 | for my $tail ('\x{0061}', '\x{1234}', '\x61') { | |
2550 | eval_ok qq ["$head$tail" =~ /$head$tail/]; | |
2551 | } | |
2552 | local $Message = q [Don't misparse \N{...} in regexp ] . | |
2553 | q [near 127 char EXACT limit]; | |
2554 | for my $tail ('\N{SNOWFLAKE}') { | |
2555 | eval_ok qq [use charnames ':full'; | |
2556 | "$head$tail" =~ /$head$tail/]; | |
2557 | } | |
2558 | } | |
2559 | } | |
2560 | ||
2561 | ||
2562 | { | |
2563 | # perl panic: pp_match start/end pointers | |
2564 | local $BugId = '25269'; | |
2565 | iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, | |
2566 | 'Captures can move backwards in string'; | |
2567 | } | |
2568 | ||
2569 | ||
2570 | { | |
2571 | local $BugId = '27940'; # \cA not recognized in character classes | |
2572 | ok "a\cAb" =~ /\cA/, '\cA in pattern'; | |
2573 | ok "a\cAb" =~ /[\cA]/, '\cA in character class'; | |
2574 | ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range'; | |
2575 | ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range'; | |
2576 | ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range'; | |
2577 | ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range'; | |
2578 | ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern'; | |
2579 | ok "ab" !~ /a\cIb/x, '\cI in pattern'; | |
2580 | } | |
2581 | ||
2582 | ||
2583 | { | |
2584 | # perl #28532: optional zero-width match at end of string is ignored | |
2585 | local $BugId = '28532'; | |
2586 | ok "abc" =~ /^abc(\z)?/ && defined($1), | |
2587 | 'Optional zero-width match at end of string'; | |
2588 | ok "abc" =~ /^abc(\z)??/ && !defined($1), | |
2589 | 'Optional zero-width match at end of string'; | |
2590 | } | |
2591 | ||
2592 | ||
2593 | ||
2594 | { # TRIE related | |
2595 | our @got = (); | |
2596 | "words" =~ /(word|word|word)(?{push @got, $1})s$/; | |
2597 | iseq @got, 1, "TRIE optimation"; | |
2598 | ||
2599 | @got = (); | |
2600 | "words" =~ /(word|word|word)(?{push @got,$1})s$/i; | |
2601 | iseq @got, 1,"TRIEF optimisation"; | |
2602 | ||
2603 | my @nums = map {int rand 1000} 1 .. 100; | |
2604 | my $re = "(" . (join "|", @nums) . ")"; | |
2605 | $re = qr/\b$re\b/; | |
2606 | ||
2607 | foreach (@nums) { | |
2608 | ok $_ =~ /$re/, "Trie nums"; | |
2609 | } | |
2610 | ||
2611 | $_ = join " ", @nums; | |
2612 | @got = (); | |
2613 | push @got, $1 while /$re/g; | |
2614 | ||
2615 | my %count; | |
2616 | $count {$_} ++ for @got; | |
2617 | my $ok = 1; | |
2618 | for (@nums) { | |
2619 | $ok = 0 if --$count {$_} < 0; | |
2620 | } | |
2621 | ok $ok, "Trie min count matches"; | |
2622 | } | |
2623 | ||
2624 | ||
2625 | { | |
2626 | # TRIE related | |
2627 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON | |
2628 | ok "foba \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i && | |
2629 | $1 eq "\x{101}foo", | |
2630 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON"; | |
2631 | ||
2632 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW | |
2633 | ok "foba \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i && | |
2634 | $1 eq "\x{1E01}foo", | |
2635 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW"; | |
2636 | ||
2637 | # DESERET SMALL/CAPITAL LETTER LONG I | |
2638 | ok "foba \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i && | |
2639 | $1 eq "\x{10428}foo", | |
2640 | "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I"; | |
2641 | ||
2642 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' | |
2643 | ok "foba \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i && | |
2644 | $1 eq "\x{1E01}xfoo", | |
2645 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'"; | |
2646 | ||
2647 | use charnames ':full'; | |
2648 | ||
2649 | my $s = "\N{LATIN SMALL LETTER SHARP S}"; | |
2650 | ok "foba ba$s" =~ qr/(foo|Ba$s|bar)/i && $1 eq "ba$s", | |
2651 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2652 | ok "foba ba$s" =~ qr/(Ba$s|foo|bar)/i && $1 eq "ba$s", | |
2653 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2654 | ok "foba ba$s" =~ qr/(foo|bar|Ba$s)/i && $1 eq "ba$s", | |
2655 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2656 | ||
2657 | ok "foba ba$s" =~ qr/(foo|Bass|bar)/i && $1 eq "ba$s", | |
2658 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2659 | ||
2660 | ok "foba ba$s" =~ qr/(foo|BaSS|bar)/i && $1 eq "ba$s", | |
2661 | "TRIEF + LATIN SMALL LETTER SHARP S =~ SS"; | |
2662 | ||
2663 | ok "foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i | |
2664 | && $1 eq "ba${s}pxySS$s$s", | |
2665 | "COMMON PREFIX TRIEF + LATIN SMALL LETTER SHARP S"; | |
2666 | } | |
2667 | ||
2668 | ||
2669 | SKIP: | |
2670 | { | |
2671 | print "# Set PERL_SKIP_PSYCHO_TEST to skip this test\n"; | |
2672 | my @normal = qw [the are some normal words]; | |
2673 | ||
2674 | skip "Skipped Psycho", 2 * @normal if $ENV {PERL_SKIP_PSYCHO_TEST}; | |
2675 | ||
2676 | local $" = "|"; | |
2677 | ||
2678 | my @psycho = (@normal, map chr $_, 255 .. 20000); | |
2679 | my $psycho1 = "@psycho"; | |
2680 | for (my $i = @psycho; -- $i;) { | |
2681 | my $j = int rand (1 + $i); | |
2682 | @psycho [$i, $j] = @psycho [$j, $i]; | |
2683 | } | |
2684 | my $psycho2 = "@psycho"; | |
2685 | ||
2686 | foreach my $word (@normal) { | |
2687 | ok $word =~ /($psycho1)/ && $1 eq $word, 'Psycho'; | |
2688 | ok $word =~ /($psycho2)/ && $1 eq $word, 'Psycho'; | |
2689 | } | |
2690 | } | |
2691 | ||
2692 | ||
2693 | { | |
2694 | local $BugId = '36207'; | |
2695 | my $utf8 = "\xe9\x{100}"; chop $utf8; | |
2696 | my $latin1 = "\xe9"; | |
2697 | ||
2698 | ok $utf8 =~ /\xe9/i, "utf8/latin"; | |
2699 | ok $utf8 =~ /$latin1/i, "utf8/latin runtime"; | |
2700 | ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"; | |
2701 | ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"; | |
2702 | ||
2703 | ok "\xe9" =~ /$utf8/i, "latin/utf8"; | |
2704 | ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie"; | |
2705 | ok $latin1 =~ /$utf8/i, "latin/utf8 runtime"; | |
2706 | ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime"; | |
2707 | } | |
2708 | ||
2709 | ||
2710 | { | |
2711 | local $BugId = '37038'; | |
2712 | my $s = "abcd"; | |
2713 | $s =~ /(..)(..)/g; | |
2714 | $s = $1; | |
2715 | $s = $2; | |
2716 | iseq $2, 'cd', | |
2717 | "Assigning to original string does not corrupt match vars"; | |
2718 | } | |
2719 | ||
2720 | ||
2721 | { | |
2722 | { | |
2723 | package wooosh; | |
2724 | sub gloople {"!"} | |
2725 | } | |
2726 | my $aeek = bless {} => 'wooosh'; | |
2727 | eval_ok sub {$aeek -> gloople () =~ /(.)/g}, | |
2728 | "//g match against return value of sub"; | |
2729 | ||
2730 | sub gloople {"!"} | |
2731 | eval_ok sub {gloople () =~ /(.)/g}, | |
2732 | "26410 didn't affect sub calls for some reason"; | |
2733 | } | |
2734 | ||
2735 | ||
2736 | { | |
2737 | local $TODO = "See changes 26925-26928, which reverted change 26410"; | |
2738 | { | |
2739 | package lv; | |
2740 | our $var = "abc"; | |
2741 | sub variable : lvalue {$var} | |
2742 | } | |
2743 | my $o = bless [] => 'lv'; | |
2744 | my $f = ""; | |
2745 | my $r = eval { | |
2746 | for (1 .. 2) { | |
2747 | $f .= $1 if $o -> variable =~ /(.)/g; | |
2748 | } | |
2749 | 1; | |
2750 | }; | |
2751 | if ($r) { | |
2752 | iseq $f, "ab", "pos() retained between calls"; | |
2753 | } | |
2754 | else { | |
2755 | local $TODO; | |
2756 | ok 0, "Code failed: $@"; | |
2757 | } | |
2758 | ||
2759 | our $var = "abc"; | |
2760 | sub variable : lvalue {$var} | |
2761 | my $g = ""; | |
2762 | my $s = eval { | |
2763 | for (1 .. 2) { | |
2764 | $g .= $1 if variable =~ /(.)/g; | |
2765 | } | |
2766 | 1; | |
2767 | }; | |
2768 | if ($s) { | |
2769 | iseq $g, "ab", "pos() retained between calls"; | |
2770 | } | |
2771 | else { | |
2772 | local $TODO; | |
2773 | ok 0, "Code failed: $@"; | |
2774 | } | |
2775 | } | |
2776 | ||
2777 | ||
2778 | SKIP: | |
2779 | { | |
2780 | local $BugId = '37836'; | |
2781 | skip "In EBCDIC" if $IS_EBCDIC; | |
2782 | no warnings 'utf8'; | |
2783 | $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8 | |
2784 | my $ret = 0; | |
2785 | eval_ok sub {!($ret = s/[\0]+//g)}, | |
2786 | "Ill-formed UTF-8 doesn't match NUL in class"; | |
2787 | } | |
2788 | ||
2789 | ||
2790 | { | |
2791 | # chr(65535) should be allowed in regexes | |
2792 | local $BugId = '38293'; | |
2793 | no warnings 'utf8'; # To allow non-characters | |
2794 | my ($c, $r, $s); | |
2795 | ||
2796 | $c = chr 0xffff; | |
2797 | $c =~ s/$c//g; | |
2798 | ok $c eq "", "U+FFFF, parsed as atom"; | |
2799 | ||
2800 | $c = chr 0xffff; | |
2801 | $r = "\\$c"; | |
2802 | $c =~ s/$r//g; | |
2803 | ok $c eq "", "U+FFFF backslashed, parsed as atom"; | |
2804 | ||
2805 | $c = chr 0xffff; | |
2806 | $c =~ s/[$c]//g; | |
2807 | ok $c eq "", "U+FFFF, parsed in class"; | |
2808 | ||
2809 | $c = chr 0xffff; | |
2810 | $r = "[\\$c]"; | |
2811 | $c =~ s/$r//g; | |
2812 | ok $c eq "", "U+FFFF backslashed, parsed in class"; | |
2813 | ||
2814 | $s = "A\x{ffff}B"; | |
2815 | $s =~ s/\x{ffff}//i; | |
2816 | ok $s eq "AB", "U+FFFF, EXACTF"; | |
2817 | ||
2818 | $s = "\x{ffff}A"; | |
2819 | $s =~ s/\bA//; | |
2820 | ok $s eq "\x{ffff}", "U+FFFF, BOUND"; | |
2821 | ||
2822 | $s = "\x{ffff}!"; | |
2823 | $s =~ s/\B!//; | |
2824 | ok $s eq "\x{ffff}", "U+FFFF, NBOUND"; | |
2825 | } | |
2826 | ||
2827 | ||
2828 | { | |
2829 | local $BugId = '39583'; | |
2830 | ||
2831 | # The printing characters | |
2832 | my @chars = ("A" .. "Z"); | |
2833 | my $delim = ","; | |
2834 | my $size = 32771 - 4; | |
2835 | my $str = ''; | |
2836 | ||
2837 | # Create some random junk. Inefficient, but it works. | |
2838 | for (my $i = 0; $i < $size; $ i++) { | |
2839 | $str .= $chars [rand @chars]; | |
2840 | } | |
2841 | ||
2842 | $str .= ($delim x 4); | |
2843 | my $res; | |
2844 | my $matched; | |
2845 | ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches"; | |
2846 | iseq $str, "", "Empty string"; | |
2847 | ok defined $1 && length ($1) == $size, '$1 is correct size'; | |
2848 | } | |
2849 | ||
2850 | ||
2851 | { | |
2852 | local $BugId = '27940'; | |
2853 | ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'; | |
2854 | ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'; | |
2855 | ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'; | |
2856 | ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'; | |
2857 | ||
2858 | ok "X\0A" =~ /X\c@?A/, '\c@?'; | |
2859 | ok "X\0A" =~ /X\c@*A/, '\c@*'; | |
2860 | ok "X\0A" =~ /X\c@(A)/, '\c@('; | |
2861 | ok "X\0A" =~ /X(\c@)A/, '\c@)'; | |
2862 | ok "X\0A" =~ /X\c@|ZA/, '\c@|'; | |
2863 | ||
2864 | ok "X\@A" =~ /X@?A/, '@?'; | |
2865 | ok "X\@A" =~ /X@*A/, '@*'; | |
2866 | ok "X\@A" =~ /X@(A)/, '@('; | |
2867 | ok "X\@A" =~ /X(@)A/, '@)'; | |
2868 | ok "X\@A" =~ /X@|ZA/, '@|'; | |
2869 | ||
2870 | local $" = ','; # non-whitespace and non-RE-specific | |
2871 | ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus'; | |
2872 | ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/'; | |
2873 | ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/'; | |
2874 | ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x'; | |
2875 | ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x'; | |
2876 | } | |
2877 | ||
2878 | ||
2879 | { | |
2880 | BEGIN { | |
2881 | unshift @INC, 'lib'; | |
2882 | } | |
2883 | use Cname; | |
2884 | ||
2885 | ok 'fooB' =~ /\N{foo}[\N{B}\N{b}]/, "Passthrough charname"; | |
2886 | my $test = 1233; | |
2887 | # | |
2888 | # Why doesn't must_warn work here? | |
2889 | # | |
2890 | my $w; | |
2891 | local $SIG {__WARN__} = sub {$w .= "@_"}; | |
2892 | eval 'q(xxWxx) =~ /[\N{WARN}]/'; | |
2893 | ok $w && $w =~ /^Ignoring excess chars from/, | |
2894 | "Ignoring excess chars warning"; | |
2895 | ||
2896 | undef $w; | |
2897 | eval q [ok "\0" !~ /[\N{EMPTY-STR}XY]/, | |
2898 | "Zerolength charname in charclass doesn't match \\0"]; | |
2899 | ok $w && $w =~ /^Ignoring zero length/, | |
2900 | 'Ignoring zero length \N{%} in character class warning'; | |
2901 | ||
2902 | ok 'AB' =~ /(\N{EVIL})/ && $1 eq 'A', 'Charname caching $1'; | |
2903 | ok 'ABC' =~ /(\N{EVIL})/, 'Charname caching $1'; | |
2904 | ok 'xy' =~ /x\N{EMPTY-STR}y/, | |
2905 | 'Empty string charname produces NOTHING node'; | |
2906 | ok '' =~ /\N{EMPTY-STR}/, | |
2907 | 'Empty string charname produces NOTHING node'; | |
2908 | ||
2909 | } | |
2910 | ||
2911 | ||
2912 | { | |
2913 | use charnames ':full'; | |
2914 | ||
2915 | ok 'aabc' !~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against aabc'; | |
2916 | ok 'a+bc' =~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against a+bc'; | |
2917 | ||
2918 | ok ' A B' =~ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
2919 | 'Intermixed named and unicode escapes'; | |
2920 | ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ | |
2921 | /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
2922 | 'Intermixed named and unicode escapes'; | |
2923 | ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ | |
2924 | /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, | |
2925 | 'Intermixed named and unicode escapes'; | |
2926 | } | |
2927 | ||
2928 | ||
2929 | { | |
2930 | our $brackets; | |
2931 | $brackets = qr{ | |
2932 | { (?> [^{}]+ | (??{ $brackets }) )* } | |
2933 | }x; | |
2934 | ||
2935 | ok "{b{c}d" !~ m/^((??{ $brackets }))/, "Bracket mismatch"; | |
2936 | ||
2937 | SKIP: { | |
2938 | our @stack = (); | |
2939 | my @expect = qw( | |
2940 | stuff1 | |
2941 | stuff2 | |
2942 | <stuff1>and<stuff2> | |
2943 | right | |
2944 | <right> | |
2945 | <<right>> | |
2946 | <<<right>>> | |
2947 | <<stuff1>and<stuff2>><<<<right>>>> | |
2948 | ); | |
2949 | ||
2950 | local $_ = '<<<stuff1>and<stuff2>><<<<right>>>>>'; | |
2951 | ok /^(<((?:(?>[^<>]+)|(?1))*)>(?{push @stack, $2 }))$/, | |
2952 | "Recursion matches"; | |
2953 | iseq @stack, @expect, "Right amount of matches" | |
2954 | or skip "Won't test individual results as count isn't equal", | |
2955 | 0 + @expect; | |
2956 | my $idx = 0; | |
2957 | foreach my $expect (@expect) { | |
2958 | iseq $stack [$idx], $expect, | |
2959 | "Expecting '$expect' at stack pos #$idx"; | |
2960 | $idx ++; | |
2961 | } | |
2962 | } | |
2963 | } | |
2964 | ||
2965 | ||
2966 | { | |
2967 | my $s = '123453456'; | |
2968 | $s =~ s/(?<digits>\d+)\k<digits>/$+{digits}/; | |
2969 | ok $s eq '123456', 'Named capture (angle brackets) s///'; | |
2970 | $s = '123453456'; | |
2971 | $s =~ s/(?'digits'\d+)\k'digits'/$+{digits}/; | |
2972 | ok $s eq '123456', 'Named capture (single quotes) s///'; | |
2973 | } | |
2974 | ||
2975 | ||
2976 | { | |
2977 | my @ary = ( | |
2978 | pack('U', 0x00F1), # n-tilde | |
2979 | '_'.pack('U', 0x00F1), # _ + n-tilde | |
2980 | 'c'.pack('U', 0x0327), # c + cedilla | |
2981 | pack('U*', 0x00F1, 0x0327), # n-tilde + cedilla | |
2982 | 'a'.pack('U', 0x00B2), # a + superscript two | |
2983 | pack('U', 0x0391), # ALPHA | |
2984 | pack('U', 0x0391).'2', # ALPHA + 2 | |
2985 | pack('U', 0x0391).'_', # ALPHA + _ | |
2986 | ); | |
2987 | ||
2988 | for my $uni (@ary) { | |
2989 | my ($r1, $c1, $r2, $c2) = eval qq { | |
2990 | use utf8; | |
2991 | scalar ("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/), | |
2992 | \$+{${uni}}, | |
2993 | scalar ("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/), | |
2994 | \$+{${uni}}; | |
2995 | }; | |
2996 | ok $r1, "Named capture UTF (?'')"; | |
2997 | ok defined $c1 && $c1 eq 'foo', "Named capture UTF \%+"; | |
2998 | ok $r2, "Named capture UTF (?<>)"; | |
2999 | ok defined $c2 && $c2 eq 'bar', "Named capture UTF \%+"; | |
3000 | } | |
3001 | } | |
3002 | ||
3003 | ||
3004 | { | |
3005 | my $s = 'foo bar baz'; | |
3006 | my (@k, @v, @fetch, $res); | |
3007 | my $count = 0; | |
3008 | my @names = qw ($+{A} $+{B} $+{C}); | |
3009 | if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { | |
3010 | while (my ($k, $v) = each (%+)) { | |
3011 | $count++; | |
3012 | } | |
3013 | @k = sort keys (%+); | |
3014 | @v = sort values (%+); | |
3015 | $res = 1; | |
3016 | push @fetch, | |
3017 | ["$+{A}", "$1"], | |
3018 | ["$+{B}", "$2"], | |
3019 | ["$+{C}", "$3"], | |
3020 | ; | |
3021 | } | |
3022 | foreach (0 .. 2) { | |
3023 | if ($fetch [$_]) { | |
3024 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; | |
3025 | } else { | |
3026 | ok 0, $names[$_]; | |
3027 | } | |
3028 | } | |
3029 | iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/"; | |
3030 | iseq $count, 3, "Got 3 keys in %+ via each"; | |
3031 | iseq 0 + @k, 3, 'Got 3 keys in %+ via keys'; | |
3032 | iseq "@k", "A B C", "Got expected keys"; | |
3033 | iseq "@v", "bar baz foo", "Got expected values"; | |
3034 | eval ' | |
3035 | no warnings "uninitialized"; | |
3036 | print for $+ {this_key_doesnt_exist}; | |
3037 | '; | |
3038 | ok !$@, 'lvalue $+ {...} should not throw an exception'; | |
3039 | } | |
3040 | ||
3041 | ||
3042 | { | |
3043 | # | |
3044 | # Almost the same as the block above, except that the capture is nested. | |
3045 | # | |
3046 | local $BugId = '50496'; | |
3047 | my $s = 'foo bar baz'; | |
3048 | my (@k, @v, @fetch, $res); | |
3049 | my $count = 0; | |
3050 | my @names = qw ($+{A} $+{B} $+{C} $+{D}); | |
3051 | if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { | |
3052 | while (my ($k,$v) = each(%+)) { | |
3053 | $count++; | |
3054 | } | |
3055 | @k = sort keys (%+); | |
3056 | @v = sort values (%+); | |
3057 | $res = 1; | |
3058 | push @fetch, | |
3059 | ["$+{A}", "$2"], | |
3060 | ["$+{B}", "$3"], | |
3061 | ["$+{C}", "$4"], | |
3062 | ["$+{D}", "$1"], | |
3063 | ; | |
3064 | } | |
3065 | foreach (0 .. 3) { | |
3066 | if ($fetch [$_]) { | |
3067 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; | |
3068 | } else { | |
3069 | ok 0, $names [$_]; | |
3070 | } | |
3071 | } | |
3072 | iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/"; | |
3073 | iseq $count, 4, "Got 4 keys in %+ via each"; | |
3074 | iseq @k, 4, 'Got 4 keys in %+ via keys'; | |
3075 | iseq "@k", "A B C D", "Got expected keys"; | |
3076 | iseq "@v", "bar baz foo foo bar baz", "Got expected values"; | |
3077 | eval ' | |
3078 | no warnings "uninitialized"; | |
3079 | print for $+ {this_key_doesnt_exist}; | |
3080 | '; | |
3081 | ok !$@,'lvalue $+ {...} should not throw an exception'; | |
3082 | } | |
3083 | ||
3084 | ||
3085 | { | |
3086 | my $s = 'foo bar baz'; | |
3087 | my @res; | |
3088 | if ('1234' =~ /(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { | |
3089 | foreach my $name (sort keys(%-)) { | |
3090 | my $ary = $- {$name}; | |
3091 | foreach my $idx (0 .. $#$ary) { | |
3092 | push @res, "$name:$idx:$ary->[$idx]"; | |
3093 | } | |
3094 | } | |
3095 | } | |
3096 | my @expect = qw (A:0:1 A:1:3 B:0:2 B:1:4); | |
3097 | iseq "@res", "@expect", "Check %-"; | |
3098 | eval' | |
3099 | no warnings "uninitialized"; | |
3100 | print for $- {this_key_doesnt_exist}; | |
3101 | '; | |
3102 | ok !$@,'lvalue $- {...} should not throw an exception'; | |
3103 | } | |
3104 | ||
3105 | ||
3106 | SKIP: | |
3107 | { | |
3108 | # stress test CURLYX/WHILEM. | |
3109 | # | |
3110 | # This test includes varying levels of nesting, and according to | |
3111 | # profiling done against build 28905, exercises every code line in the | |
3112 | # CURLYX and WHILEM blocks, except those related to LONGJMP, the | |
3113 | # super-linear cache and warnings. It executes about 0.5M regexes | |
3114 | ||
3115 | skip "No psycho tests" if $ENV {PERL_SKIP_PSYCHO_TEST}; | |
3116 | print "# Set PERL_SKIP_PSYCHO_TEST to skip this test\n"; | |
3117 | my $r = qr/^ | |
3118 | (?: | |
3119 | ( (?:a|z+)+ ) | |
3120 | (?: | |
3121 | ( (?:b|z+){3,}? ) | |
3122 | ( | |
3123 | (?: | |
3124 | (?: | |
3125 | (?:c|z+){1,1}?z | |
3126 | )? | |
3127 | (?:c|z+){1,1} | |
3128 | )* | |
3129 | ) | |
3130 | (?:z*){2,} | |
3131 | ( (?:z+|d)+ ) | |
3132 | (?: | |
3133 | ( (?:e|z+)+ ) | |
3134 | )* | |
3135 | ( (?:f|z+)+ ) | |
3136 | )* | |
3137 | ( (?:z+|g)+ ) | |
3138 | (?: | |
3139 | ( (?:h|z+)+ ) | |
3140 | )* | |
3141 | ( (?:i|z+)+ ) | |
3142 | )+ | |
3143 | ( (?:j|z+)+ ) | |
3144 | (?: | |
3145 | ( (?:k|z+)+ ) | |
3146 | )* | |
3147 | ( (?:l|z+)+ ) | |
3148 | $/x; | |
3149 | ||
3150 | my $ok = 1; | |
3151 | my $msg = "CURLYX stress test"; | |
3152 | OUTER: | |
3153 | for my $a ("x","a","aa") { | |
3154 | for my $b ("x","bbb","bbbb") { | |
3155 | my $bs = $a.$b; | |
3156 | for my $c ("x","c","cc") { | |
3157 | my $cs = $bs.$c; | |
3158 | for my $d ("x","d","dd") { | |
3159 | my $ds = $cs.$d; | |
3160 | for my $e ("x","e","ee") { | |
3161 | my $es = $ds.$e; | |
3162 | for my $f ("x","f","ff") { | |
3163 | my $fs = $es.$f; | |
3164 | for my $g ("x","g","gg") { | |
3165 | my $gs = $fs.$g; | |
3166 | for my $h ("x","h","hh") { | |
3167 | my $hs = $gs.$h; | |
3168 | for my $i ("x","i","ii") { | |
3169 | my $is = $hs.$i; | |
3170 | for my $j ("x","j","jj") { | |
3171 | my $js = $is.$j; | |
3172 | for my $k ("x","k","kk") { | |
3173 | my $ks = $js.$k; | |
3174 | for my $l ("x","l","ll") { | |
3175 | my $ls = $ks.$l; | |
3176 | if ($ls =~ $r) { | |
3177 | if ($ls =~ /x/) { | |
3178 | $msg .= ": unexpected match for [$ls]"; | |
3179 | $ok = 0; | |
3180 | last OUTER; | |
3181 | } | |
3182 | my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12"; | |
3183 | unless ($ls eq $cap) { | |
3184 | $msg .= ": capture: [$ls], got [$cap]"; | |
3185 | $ok = 0; | |
3186 | last OUTER; | |
3187 | } | |
3188 | } | |
3189 | else { | |
3190 | unless ($ls =~ /x/) { | |
3191 | $msg = ": failed for [$ls]"; | |
3192 | $ok = 0; | |
3193 | last OUTER; | |
3194 | } | |
3195 | } | |
3196 | } | |
3197 | } | |
3198 | } | |
3199 | } | |
3200 | } | |
3201 | } | |
3202 | } | |
3203 | } | |
3204 | } | |
3205 | } | |
3206 | } | |
3207 | } | |
3208 | ok($ok, $msg); | |
3209 | } | |
3210 | ||
3211 | ||
3212 | { | |
3213 | # \, breaks {3,4} | |
3214 | ok "xaaay" !~ /xa{3\,4}y/, '\, in a pattern'; | |
3215 | ok "xa{3,4}y" =~ /xa{3\,4}y/, '\, in a pattern'; | |
3216 | ||
3217 | # \c\ followed by _ | |
3218 | ok "x\c_y" !~ /x\c\_y/, '\_ in a pattern'; | |
3219 | ok "x\c\_y" =~ /x\c\_y/, '\_ in a pattern'; | |
3220 | ||
3221 | # \c\ followed by other characters | |
3222 | for my $c ("z", "\0", "!", chr(254), chr(256)) { | |
3223 | my $targ = "a\034$c"; | |
3224 | my $reg = "a\\c\\$c"; | |
3225 | ok eval ("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern"; | |
3226 | } | |
3227 | } | |
3228 | ||
3229 | ||
3230 | { | |
3231 | local $BugId = '36046'; | |
3232 | my $str = 'abc'; | |
3233 | my $count = 0; | |
3234 | my $mval = 0; | |
3235 | my $pval = 0; | |
3236 | while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} | |
3237 | iseq $mval, 0, '@- should be empty'; | |
3238 | iseq $pval, 0, '@+ should be empty'; | |
3239 | iseq $count, 1, 'Should have matched once only'; | |
3240 | } | |
3241 | ||
3242 | ||
3243 | { # Test the (*PRUNE) pattern | |
3244 | our $count = 0; | |
3245 | 'aaab' =~ /a+b?(?{$count++})(*FAIL)/; | |
3246 | iseq $count, 9, "Expect 9 for no (*PRUNE)"; | |
3247 | $count = 0; | |
3248 | 'aaab' =~ /a+b?(*PRUNE)(?{$count++})(*FAIL)/; | |
3249 | iseq $count, 3, "Expect 3 with (*PRUNE)"; | |
3250 | local $_ = 'aaab'; | |
3251 | $count = 0; | |
3252 | 1 while /.(*PRUNE)(?{$count++})(*FAIL)/g; | |
3253 | iseq $count, 4, "/.(*PRUNE)/"; | |
3254 | $count = 0; | |
3255 | 'aaab' =~ /a+b?(??{'(*PRUNE)'})(?{$count++})(*FAIL)/; | |
3256 | iseq $count, 3, "Expect 3 with (*PRUNE)"; | |
3257 | local $_ = 'aaab'; | |
3258 | $count = 0; | |
3259 | 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g; | |
3260 | iseq $count, 4, "/.(*PRUNE)/"; | |
3261 | } | |
3262 | ||
3263 | ||
3264 | { # Test the (*SKIP) pattern | |
3265 | our $count = 0; | |
3266 | 'aaab' =~ /a+b?(*SKIP)(?{$count++})(*FAIL)/; | |
3267 | iseq $count, 1, "Expect 1 with (*SKIP)"; | |
3268 | local $_ = 'aaab'; | |
3269 | $count = 0; | |
3270 | 1 while /.(*SKIP)(?{$count++})(*FAIL)/g; | |
3271 | iseq $count, 4, "/.(*SKIP)/"; | |
3272 | $_ = 'aaabaaab'; | |
3273 | $count = 0; | |
3274 | our @res = (); | |
3275 | 1 while /(a+b?)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; | |
3276 | iseq $count, 2, "Expect 2 with (*SKIP)"; | |
3277 | iseq "@res", "aaab aaab", "Adjacent (*SKIP) works as expected"; | |
3278 | } | |
3279 | ||
3280 | ||
3281 | { # Test the (*SKIP) pattern | |
3282 | our $count = 0; | |
3283 | 'aaab' =~ /a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/; | |
3284 | iseq $count, 1, "Expect 1 with (*SKIP)"; | |
3285 | local $_ = 'aaab'; | |
3286 | $count = 0; | |
3287 | 1 while /.(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/g; | |
3288 | iseq $count, 4, "/.(*SKIP)/"; | |
3289 | $_ = 'aaabaaab'; | |
3290 | $count = 0; | |
3291 | our @res = (); | |
3292 | 1 while /(a+b?)(*MARK:foo)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; | |
3293 | iseq $count, 2, "Expect 2 with (*SKIP)"; | |
3294 | iseq "@res", "aaab aaab", "Adjacent (*SKIP) works as expected"; | |
3295 | } | |
3296 | ||
3297 | ||
3298 | { # Test the (*SKIP) pattern | |
3299 | our $count = 0; | |
3300 | 'aaab' =~ /a*(*MARK:a)b?(*MARK:b)(*SKIP:a)(?{$count++})(*FAIL)/; | |
3301 | iseq $count, 3, "Expect 3 with *MARK:a)b?(*MARK:b)(*SKIP:a)"; | |
3302 | local $_ = 'aaabaaab'; | |
3303 | $count = 0; | |
3304 | our @res = (); | |
3305 | 1 while | |
3306 | /(a*(*MARK:a)b?)(*MARK:x)(*SKIP:a)(?{$count++; push @res,$1})(*FAIL)/g; | |
3307 | iseq $count, 5, "Expect 5 with (*MARK:a)b?)(*MARK:x)(*SKIP:a)"; | |
3308 | iseq "@res", "aaab b aaab b ", | |
3309 | "Adjacent (*MARK:a)b?)(*MARK:x)(*SKIP:a) works as expected"; | |
3310 | } | |
3311 | ||
3312 | ||
3313 | { # Test the (*COMMIT) pattern | |
3314 | our $count = 0; | |
3315 | 'aaabaaab' =~ /a+b?(*COMMIT)(?{$count++})(*FAIL)/; | |
3316 | iseq $count, 1, "Expect 1 with (*COMMIT)"; | |
3317 | local $_ = 'aaab'; | |
3318 | $count = 0; | |
3319 | 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g; | |
3320 | iseq $count, 1, "/.(*COMMIT)/"; | |
3321 | $_ = 'aaabaaab'; | |
3322 | $count = 0; | |
3323 | our @res = (); | |
3324 | 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g; | |
3325 | iseq $count, 1, "Expect 1 with (*COMMIT)"; | |
3326 | iseq "@res", "aaab", "Adjacent (*COMMIT) works as expected"; | |
3327 | } | |
3328 | ||
3329 | ||
3330 | { | |
3331 | # Test named commits and the $REGERROR var | |
3332 | our $REGERROR; | |
3333 | for my $name ('', ':foo') { | |
3334 | for my $pat ("(*PRUNE$name)", | |
3335 | ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", | |
3336 | "(*COMMIT$name)") { | |
3337 | for my $suffix ('(*FAIL)', '') { | |
3338 | 'aaaab' =~ /a+b$pat$suffix/; | |
3339 | iseq $REGERROR, | |
3340 | ($suffix ? ($name ? 'foo' : "1") : ""), | |
3341 | "Test $pat and \$REGERROR $suffix"; | |
3342 | } | |
3343 | } | |
3344 | } | |
3345 | } | |
3346 | ||
3347 | ||
3348 | { | |
3349 | # Test named commits and the $REGERROR var | |
3350 | package Fnorble; | |
3351 | our $REGERROR; | |
3352 | for my $name ('', ':foo') { | |
3353 | for my $pat ("(*PRUNE$name)", | |
3354 | ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", | |
3355 | "(*COMMIT$name)") { | |
3356 | for my $suffix ('(*FAIL)','') { | |
3357 | 'aaaab' =~ /a+b$pat$suffix/; | |
3358 | ::iseq $REGERROR, | |
3359 | ($suffix ? ($name ? 'foo' : "1") : ""), | |
3360 | "Test $pat and \$REGERROR $suffix"; | |
3361 | } | |
3362 | } | |
3363 | } | |
3364 | } | |
3365 | ||
3366 | ||
3367 | { | |
3368 | # Test named commits and the $REGERROR var | |
3369 | local $Message = '$REGERROR'; | |
3370 | our $REGERROR; | |
3371 | for my $word (qw (bar baz bop)) { | |
3372 | $REGERROR = ""; | |
3373 | "aaaaa$word" =~ | |
3374 | /a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/; | |
3375 | iseq $REGERROR, $word; | |
3376 | } | |
3377 | } | |
3378 | ||
3379 | ||
3380 | { | |
3381 | local $BugId = '40684'; | |
3382 | local $Message = '/m in precompiled regexp'; | |
3383 | my $s = "abc\ndef"; | |
3384 | my $rex = qr'^abc$'m; | |
3385 | ok $s =~ m/$rex/; | |
3386 | ok $s =~ m/^abc$/m; | |
3387 | } | |
3388 | ||
3389 | ||
3390 | { | |
3391 | #Mindnumbingly simple test of (*THEN) | |
3392 | for ("ABC","BAX") { | |
3393 | ok /A (*THEN) X | B (*THEN) C/x, "Simple (*THEN) test"; | |
3394 | } | |
3395 | } | |
3396 | ||
3397 | ||
3398 | { | |
3399 | local $Message = "Relative Recursion"; | |
3400 | my $parens = qr/(\((?:[^()]++|(?-1))*+\))/; | |
3401 | local $_ = 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; | |
3402 | my ($all, $one, $two) = ('', '', ''); | |
3403 | ok /foo $parens \s* \+ \s* bar $parens/x; | |
3404 | iseq $1, '((2*3)+4-3)'; | |
3405 | iseq $2, '(2*(3+4)-1*(2-3))'; | |
3406 | iseq $&, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; | |
3407 | iseq $&, $_; | |
3408 | } | |
3409 | ||
3410 | { | |
3411 | my $spaces=" "; | |
3412 | local $_ = join 'bar', $spaces, $spaces; | |
3413 | our $count = 0; | |
3414 | s/(?>\s+bar)(?{$count++})//g; | |
3415 | iseq $_, $spaces, "SUSPEND final string"; | |
3416 | iseq $count, 1, "Optimiser should have prevented more than one match"; | |
3417 | } | |
3418 | ||
3419 | { | |
3420 | local $BugId = '36909'; | |
3421 | local $Message = '(?: ... )? should not lose $^R'; | |
3422 | $^R = 'Nothing'; | |
3423 | { | |
3424 | local $^R = "Bad"; | |
3425 | ok 'x foofoo y' =~ m { | |
3426 | (foo) # $^R correctly set | |
3427 | (?{ "last regexp code result" }) | |
3428 | }x; | |
3429 | iseq $^R, 'last regexp code result'; | |
3430 | } | |
3431 | iseq $^R, 'Nothing'; | |
3432 | ||
3433 | { | |
3434 | local $^R = "Bad"; | |
3435 | ||
3436 | ok 'x foofoo y' =~ m { | |
3437 | (?:foo|bar)+ # $^R correctly set | |
3438 | (?{ "last regexp code result" }) | |
3439 | }x; | |
3440 | iseq $^R, 'last regexp code result'; | |
3441 | } | |
3442 | iseq $^R, 'Nothing'; | |
3443 | ||
3444 | { | |
3445 | local $^R = "Bad"; | |
3446 | ok 'x foofoo y' =~ m { | |
3447 | (foo|bar)\1+ # $^R undefined | |
3448 | (?{ "last regexp code result" }) | |
3449 | }x; | |
3450 | iseq $^R, 'last regexp code result'; | |
3451 | } | |
3452 | iseq $^R, 'Nothing'; | |
3453 | ||
3454 | { | |
3455 | local $^R = "Bad"; | |
3456 | ok 'x foofoo y' =~ m { | |
3457 | (foo|bar)\1 # This time without the + | |
3458 | (?{"last regexp code result"}) | |
3459 | }x; | |
3460 | iseq $^R, 'last regexp code result'; | |
3461 | } | |
3462 | iseq $^R, 'Nothing'; | |
3463 | } | |
3464 | ||
3465 | ||
3466 | { | |
3467 | local $BugId = '22395'; | |
3468 | local $Message = 'Match is linear, not quadratic'; | |
3469 | our $count; | |
3470 | for my $l (10, 100, 1000) { | |
3471 | $count = 0; | |
3472 | ('a' x $l) =~ /(.*)(?{$count++})[bc]/; | |
3473 | local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; | |
3474 | iseq $count, $l + 1; | |
3475 | } | |
3476 | } | |
3477 | ||
3478 | ||
3479 | { | |
3480 | local $BugId = '22614'; | |
3481 | local $Message = '@-/@+ should not have undefined values'; | |
3482 | local $_ = 'ab'; | |
3483 | our @len = (); | |
3484 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; | |
3485 | iseq "@len", "2 2 2"; | |
3486 | } | |
3487 | ||
3488 | ||
3489 | { | |
3490 | local $BugId = '18209'; | |
3491 | local $Message = '$& set on s///'; | |
3492 | my $text = ' word1 word2 word3 word4 word5 word6 '; | |
3493 | ||
3494 | my @words = ('word1', 'word3', 'word5'); | |
3495 | my $count; | |
3496 | foreach my $word (@words) { | |
3497 | $text =~ s/$word\s//gi; # Leave a space to seperate words | |
3498 | # in the resultant str. | |
3499 | # The following block is not working. | |
3500 | if ($&) { | |
3501 | $count ++; | |
3502 | } | |
3503 | # End bad block | |
3504 | } | |
3505 | iseq $count, 3; | |
3506 | iseq $text, ' word2 word4 word6 '; | |
3507 | } | |
3508 | ||
3509 | ||
3510 | { | |
3511 | # RT#6893 | |
3512 | local $BugId = '6893'; | |
3513 | local $_ = qq (A\nB\nC\n); | |
3514 | my @res; | |
3515 | while (m#(\G|\n)([^\n]*)\n#gsx) { | |
3516 | push @res, "$2"; | |
3517 | last if @res > 3; | |
3518 | } | |
3519 | iseq "@res", "A B C", "/g pattern shouldn't infinite loop"; | |
3520 | } | |
3521 | ||
3522 | ||
3523 | { | |
3524 | # From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus> | |
3525 | my $dow_name = "nada"; | |
3526 | my $parser = "(\$dow_name) = \$time_string =~ /(D\x{e9}\\ " . | |
3527 | "C\x{e9}adaoin|D\x{e9}\\ Sathairn|\\w+|\x{100})/"; | |
3528 | my $time_string = "D\x{e9} C\x{e9}adaoin"; | |
3529 | eval $parser; | |
3530 | ok !$@, "Test Eval worked"; | |
3531 | iseq $dow_name, $time_string, "UTF-8 trie common prefix extraction"; | |
3532 | } | |
3533 | ||
3534 | ||
3535 | { | |
3536 | my $v; | |
3537 | ($v = 'bar') =~ /(\w+)/g; | |
3538 | $v = 'foo'; | |
3539 | iseq "$1", 'bar', '$1 is safe after /g - may fail due ' . | |
3540 | 'to specialized config in pp_hot.c' | |
3541 | } | |
3542 | ||
3543 | ||
3544 | { | |
3545 | local $Message = "http://nntp.perl.org/group/perl.perl5.porters/118663"; | |
3546 | my $qr_barR1 = qr/(bar)\g-1/; | |
3547 | ok "foobarbarxyz" =~ $qr_barR1; | |
3548 | ok "foobarbarxyz" =~ qr/foo${qr_barR1}xyz/; | |
3549 | ok "foobarbarxyz" =~ qr/(foo)${qr_barR1}xyz/; | |
3550 | ok "foobarbarxyz" =~ qr/(foo)(bar)\g{-1}xyz/; | |
3551 | ok "foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/; | |
3552 | ok "foobarbarxyz" =~ qr/(foo(bar)\g{-1})xyz/; | |
3553 | } | |
3554 | ||
3555 | ||
3556 | { | |
3557 | local $BugId = '41010'; | |
3558 | local $Message = 'No optimizer bug'; | |
3559 | my @tails = ('', '(?(1))', '(|)', '()?'); | |
3560 | my @quants = ('*','+'); | |
3561 | my $doit = sub { | |
3562 | my $pats = shift; | |
3563 | for (@_) { | |
3564 | for my $pat (@$pats) { | |
3565 | for my $quant (@quants) { | |
3566 | for my $tail (@tails) { | |
3567 | my $re = "($pat$quant\$)$tail"; | |
3568 | ok /$re/ && $1 eq $_, "'$_' =~ /$re/"; | |
3569 | ok /$re/m && $1 eq $_, "'$_' =~ /$re/m"; | |
3570 | } | |
3571 | } | |
3572 | } | |
3573 | } | |
3574 | }; | |
3575 | ||
3576 | my @dpats = ('\d', | |
3577 | '[1234567890]', | |
3578 | '(1|[23]|4|[56]|[78]|[90])', | |
3579 | '(?:1|[23]|4|[56]|[78]|[90])', | |
3580 | '(1|2|3|4|5|6|7|8|9|0)', | |
3581 | '(?:1|2|3|4|5|6|7|8|9|0)'); | |
3582 | my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); | |
3583 | my @sstrs = (' '); | |
3584 | my @dstrs = ('12345'); | |
3585 | $doit -> (\@spats, @sstrs); | |
3586 | $doit -> (\@dpats, @dstrs); | |
3587 | } | |
3588 | ||
3589 | ||
3590 | { | |
3591 | local $Message = '$REGMARK'; | |
3592 | our @r = (); | |
3593 | our ($REGMARK, $REGERROR); | |
3594 | ok 'foofoo' =~ /foo (*MARK:foo) (?{push @r,$REGMARK}) /x; | |
3595 | iseq "@r","foo"; | |
3596 | iseq $REGMARK, "foo"; | |
3597 | ok 'foofoo' !~ /foo (*MARK:foo) (*FAIL) /x; | |
3598 | ok !$REGMARK; | |
3599 | iseq $REGERROR, 'foo'; | |
3600 | } | |
3601 | ||
3602 | ||
3603 | { | |
3604 | local $Message = '\K test'; | |
3605 | my $x; | |
3606 | $x = "abc.def.ghi.jkl"; | |
3607 | $x =~ s/.*\K\..*//; | |
3608 | iseq $x, "abc.def.ghi"; | |
3609 | ||
3610 | $x = "one two three four"; | |
3611 | $x =~ s/o+ \Kthree//g; | |
3612 | iseq $x, "one two four"; | |
3613 | ||
3614 | $x = "abcde"; | |
3615 | $x =~ s/(.)\K/$1/g; | |
3616 | iseq $x, "aabbccddee"; | |
3617 | } | |
3618 | ||
3619 | ||
3620 | { | |
3621 | sub kt { | |
3622 | return '4' if $_[0] eq '09028623'; | |
3623 | } | |
3624 | # Nested EVAL using PL_curpm (via $1 or friends) | |
3625 | my $re; | |
3626 | our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x; | |
3627 | $re = qr/^ ( (??{ $grabit }) ) $ /x; | |
3628 | my @res = '0902862349' =~ $re; | |
3629 | iseq join ("-", @res), "0902862349", | |
3630 | 'PL_curpm is set properly on nested eval'; | |
3631 | ||
3632 | our $qr = qr/ (o) (??{ $1 }) /x; | |
3633 | ok 'boob'=~/( b (??{ $qr }) b )/x && 1, "PL_curpm, nested eval"; | |
3634 | } | |
3635 | ||
3636 | ||
3637 | { | |
3638 | use charnames ":full"; | |
3639 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "I =~ Alphabetic"; | |
3640 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/, "I =~ Uppercase"; | |
3641 | ok "\N{ROMAN NUMERAL ONE}" !~ /\p{Lowercase}/, "I !~ Lowercase"; | |
3642 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "I =~ ID_Start"; | |
3643 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "I =~ ID_Continue"; | |
3644 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "i =~ Alphabetic"; | |
3645 | ok "\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Uppercase}/, "i !~ Uppercase"; | |
3646 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/, "i =~ Lowercase"; | |
3647 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "i =~ ID_Start"; | |
3648 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "i =~ ID_Continue" | |
3649 | } | |
3650 | ||
3651 | ||
3652 | { | |
3653 | # requirement of Unicode Technical Standard #18, 1.7 Code Points | |
3654 | # cf. http://www.unicode.org/reports/tr18/#Supplementary_Characters | |
3655 | for my $u (0x7FF, 0x800, 0xFFFF, 0x10000) { | |
3656 | no warnings 'utf8'; # oops | |
3657 | my $c = chr $u; | |
3658 | my $x = sprintf '%04X', $u; | |
3659 | ok "A${c}B" =~ /A[\0-\x{10000}]B/, "Unicode range - $x"; | |
3660 | } | |
3661 | } | |
3662 | ||
3663 | ||
3664 | { | |
3665 | my $res=""; | |
3666 | ||
3667 | if ('1' =~ /(?|(?<digit>1)|(?<digit>2))/) { | |
3668 | $res = "@{$- {digit}}"; | |
3669 | } | |
3670 | iseq $res, "1", | |
3671 | "Check that (?|...) doesnt cause dupe entries in the names array"; | |
3672 | ||
3673 | $res = ""; | |
3674 | if ('11' =~ /(?|(?<digit>1)|(?<digit>2))(?&digit)/) { | |
3675 | $res = "@{$- {digit}}"; | |
3676 | } | |
3677 | iseq $res, "1", "Check that (?&..) to a buffer inside " . | |
3678 | "a (?|...) goes to the leftmost"; | |
3679 | } | |
3680 | ||
3681 | ||
3682 | { | |
3683 | use warnings; | |
3684 | local $Message = "ASCII pattern that really is UTF-8"; | |
3685 | my @w; | |
3686 | local $SIG {__WARN__} = sub {push @w, "@_"}; | |
3687 | my $c = qq (\x{DF}); | |
3688 | ok $c =~ /${c}|\x{100}/; | |
3689 | ok @w == 0; | |
3690 | } | |
3691 | ||
3692 | ||
3693 | { | |
3694 | local $Message = "Corruption of match results of qr// across scopes"; | |
3695 | my $qr = qr/(fo+)(ba+r)/; | |
3696 | 'foobar' =~ /$qr/; | |
3697 | iseq "$1$2", "foobar"; | |
3698 | { | |
3699 | 'foooooobaaaaar' =~ /$qr/; | |
3700 | iseq "$1$2", 'foooooobaaaaar'; | |
3701 | } | |
3702 | iseq "$1$2", "foobar"; | |
3703 | } | |
3704 | ||
3705 | ||
3706 | { | |
3707 | local $Message = "HORIZWS"; | |
3708 | local $_ = "\t \r\n \n \t".chr(11)."\n"; | |
3709 | s/\H/H/g; | |
3710 | s/\h/h/g; | |
3711 | iseq $_, "hhHHhHhhHH"; | |
3712 | $_ = "\t \r\n \n \t" . chr (11) . "\n"; | |
3713 | utf8::upgrade ($_); | |
3714 | s/\H/H/g; | |
3715 | s/\h/h/g; | |
3716 | iseq $_, "hhHHhHhhHH"; | |
3717 | } | |
3718 | ||
3719 | ||
3720 | { | |
3721 | local $Message = "Various whitespace special patterns"; | |
3722 | my @h = map {chr $_} 0x09, 0x20, 0xa0, 0x1680, 0x180e, 0x2000, | |
3723 | 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, | |
3724 | 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x205f, | |
3725 | 0x3000; | |
3726 | my @v = map {chr $_} 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, | |
3727 | 0x2029; | |
3728 | my @lb = ("\x0D\x0A", map {chr $_} 0x0A .. 0x0D, 0x85, 0x2028, 0x2029); | |
3729 | foreach my $t ([\@h, qr/\h/, qr/\h+/], | |
3730 | [\@v, qr/\v/, qr/\v+/], | |
3731 | [\@lb, qr/\R/, qr/\R+/],) { | |
3732 | my $ary = shift @$t; | |
3733 | foreach my $pat (@$t) { | |
3734 | foreach my $str (@$ary) { | |
3735 | ok $str =~ /($pat)/, $pat; | |
3736 | iseq $1, $str, $pat; | |
3737 | utf8::upgrade ($str); | |
3738 | ok $str =~ /($pat)/, "Upgraded string - $pat"; | |
3739 | iseq $1, $str, "Upgraded string - $pat"; | |
3740 | } | |
3741 | } | |
3742 | } | |
3743 | } | |
3744 | ||
3745 | ||
3746 | { | |
3747 | local $Message = "Check that \\xDF match properly in its various forms"; | |
3748 | # Test that \xDF matches properly. this is pretty hacky stuff, | |
3749 | # but its actually needed. The malarky with '-' is to prevent | |
3750 | # compilation caching from playing any role in the test. | |
3751 | my @df = (chr (0xDF), '-', chr (0xDF)); | |
3752 | utf8::upgrade ($df [2]); | |
3753 | my @strs = ('ss', 'sS', 'Ss', 'SS', chr (0xDF)); | |
3754 | my @ss = map {("$_", "$_")} @strs; | |
3755 | utf8::upgrade ($ss [$_ * 2 + 1]) for 0 .. $#strs; | |
3756 | ||
3757 | for my $ssi (0 .. $#ss) { | |
3758 | for my $dfi (0 .. $#df) { | |
3759 | my $pat = $df [$dfi]; | |
3760 | my $str = $ss [$ssi]; | |
3761 | my $utf_df = ($dfi > 1) ? 'utf8' : ''; | |
3762 | my $utf_ss = ($ssi % 2) ? 'utf8' : ''; | |
3763 | (my $sstr = $str) =~ s/\xDF/\\xDF/; | |
3764 | ||
3765 | if ($utf_df || $utf_ss || length ($ss [$ssi]) == 1) { | |
3766 | my $ret = $str =~ /$pat/i; | |
3767 | next if $pat eq '-'; | |
3768 | ok $ret, "\"$sstr\" =~ /\\xDF/i " . | |
3769 | "(str is @{[$utf_ss||'latin']}, pat is " . | |
3770 | "@{[$utf_df||'latin']})"; | |
3771 | } | |
3772 | else { | |
3773 | my $ret = $str !~ /$pat/i; | |
3774 | next if $pat eq '-'; | |
3775 | ok $ret, "\"$sstr\" !~ /\\xDF/i " . | |
3776 | "(str is @{[$utf_ss||'latin']}, pat is " . | |
3777 | "@{[$utf_df||'latin']})"; | |
3778 | } | |
3779 | } | |
3780 | } | |
3781 | } | |
3782 | ||
3783 | ||
3784 | { | |
3785 | local $Message = "BBC(Bleadperl Breaks CPAN) Today: String::Multibyte"; | |
3786 | my $re = qr/(?:[\x00-\xFF]{4})/; | |
3787 | my $hyp = "\0\0\0-"; | |
3788 | my $esc = "\0\0\0\\"; | |
3789 | ||
3790 | my $str = "$esc$hyp$hyp$esc$esc"; | |
3791 | my @a = ($str =~ /\G(?:\Q$esc$esc\E|\Q$esc$hyp\E|$re)/g); | |
3792 | ||
3793 | iseq @a,3; | |
3794 | local $" = "="; | |
3795 | iseq "@a","$esc$hyp=$hyp=$esc$esc"; | |
3796 | } | |
3797 | ||
3798 | ||
3799 | { | |
3800 | # Test for keys in %+ and %- | |
3801 | local $Message = 'Test keys in %+ and %-'; | |
3802 | no warnings 'uninitialized'; | |
3803 | my $_ = "abcdef"; | |
3804 | /(?<foo>a)|(?<foo>b)/; | |
3805 | iseq ((join ",", sort keys %+), "foo"); | |
3806 | iseq ((join ",", sort keys %-), "foo"); | |
3807 | iseq ((join ",", sort values %+), "a"); | |
3808 | iseq ((join ",", sort map "@$_", values %-), "a "); | |
3809 | /(?<bar>a)(?<bar>b)(?<quux>.)/; | |
3810 | iseq ((join ",", sort keys %+), "bar,quux"); | |
3811 | iseq ((join ",", sort keys %-), "bar,quux"); | |
3812 | iseq ((join ",", sort values %+), "a,c"); # leftmost | |
3813 | iseq ((join ",", sort map "@$_", values %-), "a b,c"); | |
3814 | /(?<un>a)(?<deux>c)?/; # second buffer won't capture | |
3815 | iseq ((join ",", sort keys %+), "un"); | |
3816 | iseq ((join ",", sort keys %-), "deux,un"); | |
3817 | iseq ((join ",", sort values %+), "a"); | |
3818 | iseq ((join ",", sort map "@$_", values %-), ",a"); | |
3819 | } | |
3820 | ||
3821 | ||
3822 | { | |
3823 | # length() on captures, the numbered ones end up in Perl_magic_len | |
3824 | my $_ = "aoeu \xe6var ook"; | |
3825 | /^ \w+ \s (?<eek>\S+)/x; | |
3826 | ||
3827 | iseq length ($`), 0, q[length $`]; | |
3828 | iseq length ($'), 4, q[length $']; | |
3829 | iseq length ($&), 9, q[length $&]; | |
3830 | iseq length ($1), 4, q[length $1]; | |
3831 | iseq length ($+{eek}), 4, q[length $+{eek} == length $1]; | |
3832 | } | |
3833 | ||
3834 | ||
3835 | { | |
3836 | my $ok = -1; | |
3837 | ||
3838 | $ok = exists ($-{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; | |
3839 | iseq $ok, 1, '$-{x} exists after "bar"=~/(?<x>foo)|bar/'; | |
3840 | iseq scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'; | |
3841 | iseq scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'; | |
3842 | ||
3843 | $ok = -1; | |
3844 | $ok = exists ($+{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; | |
3845 | iseq $ok, 0, '$+{x} not exists after "bar"=~/(?<x>foo)|bar/'; | |
3846 | iseq scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'; | |
3847 | iseq scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'; | |
3848 | ||
3849 | $ok = -1; | |
3850 | $ok = exists ($-{x}) ? 1 : 0 if 'foo' =~ /(?<x>foo)|bar/; | |
3851 | iseq $ok, 1, '$-{x} exists after "foo"=~/(?<x>foo)|bar/'; | |
3852 | iseq scalar (%+), 1, 'scalar %+ == 1 after "foo"=~/(?<x>foo)|bar/'; | |
3853 | iseq scalar (%-), 1, 'scalar %- == 1 after "foo"=~/(?<x>foo)|bar/'; | |
3854 | ||
3855 | $ok = -1; | |
3856 | $ok = exists ($+{x}) ? 1 : 0 if 'foo'=~/(?<x>foo)|bar/; | |
3857 | iseq $ok, 1, '$+{x} exists after "foo"=~/(?<x>foo)|bar/'; | |
3858 | } | |
3859 | ||
3860 | ||
3861 | { | |
3862 | local $_; | |
3863 | ($_ = 'abc') =~ /(abc)/g; | |
3864 | $_ = '123'; | |
3865 | iseq "$1", 'abc', "/g leads to unsafe match vars: $1"; | |
3866 | } | |
3867 | ||
3868 | ||
3869 | { | |
3870 | local $Message = 'Message-ID: <20070818091501.7eff4831@r2d2>'; | |
3871 | my $str = ""; | |
3872 | for (0 .. 5) { | |
3873 | my @x; | |
3874 | $str .= "@x"; # this should ALWAYS be the empty string | |
3875 | 'a' =~ /(a|)/; | |
3876 | push @x, 1; | |
3877 | } | |
3878 | iseq length ($str), 0, "Trie scope error, string should be empty"; | |
3879 | $str = ""; | |
3880 | my @foo = ('a') x 5; | |
3881 | for (@foo) { | |
3882 | my @bar; | |
3883 | $str .= "@bar"; | |
3884 | s/a|/push @bar, 1/e; | |
3885 | } | |
3886 | iseq length ($str), 0, "Trie scope error, string should be empty"; | |
3887 | } | |
3888 | ||
3889 | ||
3890 | { | |
3891 | local $BugId = '45605'; | |
3892 | # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string | |
3893 | ||
3894 | my $utf_8 = "\xd6schel"; | |
3895 | utf8::upgrade ($utf_8); | |
3896 | $utf_8 =~ m {(\xd6|Ö)schel}; | |
3897 | iseq $1, "\xd6", "Upgrade error"; | |
3898 | } | |
3899 | ||
3900 | { | |
3901 | # more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding | |
3902 | for my $chr (160 .. 255) { | |
3903 | my $chr_byte = chr($chr); | |
3904 | my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8); | |
3905 | my $rx = qr{$chr_byte|X}i; | |
3906 | ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr"); | |
3907 | } | |
3908 | } | |
3909 | ||
3910 | { | |
3911 | # Regardless of utf8ness any character matches itself when | |
3912 | # doing a case insensitive match. See also [perl #36207] | |
3913 | local $BugId = '36207'; | |
3914 | for my $o (0 .. 255) { | |
3915 | my @ch = (chr ($o), chr ($o)); | |
3916 | utf8::upgrade ($ch [1]); | |
3917 | for my $u_str (0, 1) { | |
3918 | for my $u_pat (0, 1) { | |
3919 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i, | |
3920 | "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat"; | |
3921 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i, | |
3922 | "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat"; | |
3923 | } | |
3924 | } | |
3925 | } | |
3926 | } | |
3927 | ||
3928 | ||
3929 | { | |
3930 | our $a = 3; "" =~ /(??{ $a })/; | |
3931 | our $b = $a; | |
3932 | iseq $b, $a, "Copy of scalar used for postponed subexpression"; | |
3933 | } | |
3934 | ||
3935 | ||
3936 | { | |
3937 | local $BugId = '49190'; | |
3938 | local $Message = '$REGMARK in replacement'; | |
3939 | our $REGMARK; | |
3940 | my $_ = "A"; | |
3941 | ok s/(*:B)A/$REGMARK/; | |
3942 | iseq $_, "B"; | |
3943 | $_ = "CCCCBAA"; | |
3944 | ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g; | |
3945 | iseq $_, "ZYX"; | |
3946 | } | |
3947 | ||
3948 | ||
3949 | { | |
3950 | our @ctl_n = (); | |
3951 | our @plus = (); | |
3952 | our $nested_tags; | |
3953 | $nested_tags = qr{ | |
3954 | < | |
3955 | (\w+) | |
3956 | (?{ | |
3957 | push @ctl_n,$^N; | |
3958 | push @plus,$+; | |
3959 | }) | |
3960 | > | |
3961 | (??{$nested_tags})* | |
3962 | </\s* \w+ \s*> | |
3963 | }x; | |
3964 | ||
3965 | my $match = '<bla><blubb></blubb></bla>' =~ m/^$nested_tags$/; | |
3966 | ok $match, 'nested construct matches'; | |
3967 | iseq "@ctl_n", "bla blubb", '$^N inside of (?{}) works as expected'; | |
3968 | iseq "@plus", "bla blubb", '$+ inside of (?{}) works as expected'; | |
3969 | } | |
3970 | ||
3971 | ||
3972 | { | |
3973 | local $BugId = '52658'; | |
3974 | local $Message = 'Substitution evaluation in list context'; | |
3975 | my $reg = '../xxx/'; | |
3976 | my @te = ($reg =~ m{^(/?(?:\.\./)*)}, | |
3977 | $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); | |
3978 | iseq $reg, '../bbb/'; | |
3979 | iseq $te [0], '../'; | |
3980 | } | |
3981 | ||
3982 | # This currently has to come before any "use encoding" in this file. | |
3983 | { | |
3984 | local $Message; | |
3985 | local $BugId = '59342'; | |
3986 | must_warn 'qr/\400/', '^Use of octal value above 377'; | |
3987 | } | |
3988 | ||
3989 | ||
3990 | SKIP: { | |
3991 | # XXX: This set of tests is essentially broken, POSIX character classes | |
3992 | # should not have differing definitions under Unicode. | |
3993 | # There are property names for that. | |
3994 | skip "Tests assume ASCII", 4 unless $IS_ASCII; | |
3995 | ||
3996 | my @notIsPunct = grep {/[[:punct:]]/ and not /\p{IsPunct}/} | |
3997 | map {chr} 0x20 .. 0x7f; | |
3998 | iseq join ('', @notIsPunct), '$+<=>^`|~', | |
3999 | '[:punct:] disagress with IsPunct on Symbols'; | |
4000 | ||
4001 | my @isPrint = grep {not /[[:print:]]/ and /\p{IsPrint}/} | |
4002 | map {chr} 0 .. 0x1f, 0x7f .. 0x9f; | |
4003 | iseq join ('', @isPrint), "\x09\x0a\x0b\x0c\x0d\x85", | |
4004 | 'IsPrint disagrees with [:print:] on control characters'; | |
4005 | ||
4006 | my @isPunct = grep {/[[:punct:]]/ != /\p{IsPunct}/} | |
4007 | map {chr} 0x80 .. 0xff; | |
4008 | iseq join ('', @isPunct), "\xa1\xab\xb7\xbb\xbf", # ¡ « · » ¿ | |
4009 | 'IsPunct disagrees with [:punct:] outside ASCII'; | |
4010 | ||
4011 | my @isPunctLatin1 = eval q { | |
4012 | use encoding 'latin1'; | |
4013 | grep {/[[:punct:]]/ != /\p{IsPunct}/} map {chr} 0x80 .. 0xff; | |
4014 | }; | |
4015 | skip "Eval failed ($@)", 1 if $@; | |
4016 | skip "PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS set to 0", 1 | |
4017 | if !$ENV{PERL_TEST_LEGACY_POSIX_CC}; | |
4018 | iseq join ('', @isPunctLatin1), '', | |
4019 | 'IsPunct agrees with [:punct:] with explicit Latin1'; | |
4020 | } | |
4021 | ||
4022 | ||
4023 | { | |
4024 | local $BugId = '60034'; | |
4025 | my $a = "xyzt" x 8192; | |
4026 | ok $a =~ /\A(?>[a-z])*\z/, | |
4027 | '(?>) does not cause wrongness on long string'; | |
4028 | my $b = $a . chr 256; | |
4029 | chop $b; | |
4030 | { | |
4031 | iseq $a, $b; | |
4032 | } | |
4033 | ok $b =~ /\A(?>[a-z])*\z/, | |
4034 | '(?>) does not cause wrongness on long string with UTF-8'; | |
4035 | } | |
4036 | ||
4037 | ||
4038 | # | |
4039 | # Keep the following tests last -- they may crash perl | |
4040 | # | |
4041 | print "# Tests that follow may crash perl\n"; | |
4042 | { | |
4043 | local $BugId = '19049/38869'; | |
4044 | local $Message = 'Pattern in a loop, failure should not ' . | |
4045 | 'affect previous success'; | |
4046 | my @list = ( | |
4047 | 'ab cdef', # Matches regex | |
4048 | ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it | |
4049 | ); | |
4050 | my $y; | |
4051 | my $x; | |
4052 | foreach (@list) { | |
4053 | m/ab(.+)cd/i; # The ignore-case seems to be important | |
4054 | $y = $1; # Use $1, which might not be from the last match! | |
4055 | $x = substr ($list [0], $- [0], $+ [0] - $- [0]); | |
4056 | } | |
4057 | iseq $y, ' '; | |
4058 | iseq $x, 'ab cd'; | |
4059 | } | |
4060 | ||
4061 | ||
4062 | { | |
4063 | local $BugId = '24274'; | |
4064 | ||
4065 | ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker"); | |
4066 | ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, | |
4067 | "Regexp /^(??{'(.)'x 100})/ crashes older perls"); | |
4068 | } | |
4069 | ||
4070 | ||
4071 | { | |
4072 | eval '/\k/'; | |
4073 | ok $@ =~ /\QSequence \k... not terminated in regex;\E/, | |
4074 | 'Lone \k not allowed'; | |
4075 | } | |
4076 | ||
4077 | ||
4078 | { | |
4079 | local $Message = "Substitution with lookahead (possible segv)"; | |
4080 | $_ = "ns1ns1ns1"; | |
4081 | s/ns(?=\d)/ns_/g; | |
4082 | iseq $_, "ns_1ns_1ns_1"; | |
4083 | $_ = "ns1"; | |
4084 | s/ns(?=\d)/ns_/; | |
4085 | iseq $_, "ns_1"; | |
4086 | $_ = "123"; | |
4087 | s/(?=\d+)|(?<=\d)/!Bang!/g; | |
4088 | iseq $_, "!Bang!1!Bang!2!Bang!3!Bang!"; | |
4089 | } | |
4090 | ||
4091 | ||
4092 | { | |
4093 | # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache | |
4094 | local $BugId = '45337'; | |
4095 | local ${^UTF8CACHE} = -1; | |
4096 | local $Message = "Shouldn't panic"; | |
4097 | my $s = "[a]a{2}"; | |
4098 | utf8::upgrade $s; | |
4099 | ok "aaa" =~ /$s/; | |
4100 | } | |
4101 | { | |
4102 | local $BugId = '57042'; | |
4103 | local $Message = "Check if tree logic breaks \$^R"; | |
4104 | my $cond_re = qr/\s* | |
4105 | \s* (?: | |
4106 | \( \s* A (?{1}) | |
4107 | | \( \s* B (?{2}) | |
4108 | ) | |
4109 | /x; | |
4110 | my @res; | |
4111 | for my $line ("(A)","(B)") { | |
4112 | if ($line =~ m/$cond_re/) { | |
4113 | push @res, $^R ? "#$^R" : "UNDEF"; | |
4114 | } | |
4115 | } | |
4116 | iseq "@res","#1 #2"; | |
4117 | } | |
4118 | { | |
4119 | no warnings 'closure'; | |
4120 | my $re = qr/A(??{"1"})/; | |
4121 | ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; | |
4122 | ok $1 eq "A1"; | |
4123 | ok $2 eq "B"; | |
4124 | } | |
4125 | ||
4126 | ||
4127 | { | |
4128 | use re 'eval'; | |
4129 | local $Message = 'Test if $^N and $+ work in (?{{})'; | |
4130 | our @ctl_n = (); | |
4131 | our @plus = (); | |
4132 | our $nested_tags; | |
4133 | $nested_tags = qr{ | |
4134 | < | |
4135 | ((\w)+) | |
4136 | (?{ | |
4137 | push @ctl_n, (defined $^N ? $^N : "undef"); | |
4138 | push @plus, (defined $+ ? $+ : "undef"); | |
4139 | }) | |
4140 | > | |
4141 | (??{$nested_tags})* | |
4142 | </\s* \w+ \s*> | |
4143 | }x; | |
4144 | ||
4145 | ||
4146 | my $c = 0; | |
4147 | for my $test ( | |
4148 | # Test structure: | |
4149 | # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+ ] | |
4150 | [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ], | |
4151 | [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], | |
4152 | [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ], | |
4153 | [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ], | |
4154 | [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ], | |
4155 | [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ], | |
4156 | [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ], | |
4157 | [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ], | |
4158 | [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ], | |
4159 | [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], | |
4160 | [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], | |
4161 | [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], | |
4162 | [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ], | |
4163 | ||
4164 | ) { #"#silence vim highlighting | |
4165 | $c++; | |
4166 | @ctl_n = (); | |
4167 | @plus = (); | |
4168 | my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0); | |
4169 | push @ctl_n, (defined $^N ? $^N : "undef"); | |
4170 | push @plus, (defined $+ ? $+ : "undef"); | |
4171 | ok($test->[0] == $match, "match $c"); | |
4172 | if ($test->[0] != $match) { | |
4173 | # unset @ctl_n and @plus | |
4174 | @ctl_n = @plus = (); | |
4175 | } | |
4176 | iseq("@ctl_n", $test->[2], "ctl_n $c"); | |
4177 | iseq("@plus", $test->[3], "plus $c"); | |
4178 | } | |
4179 | } | |
4180 | ||
4181 | { | |
4182 | use re 'eval'; | |
4183 | local $BugId = '56194'; | |
4184 | ||
4185 | our $f; | |
4186 | local $f; | |
4187 | $f = sub { | |
4188 | defined $_[0] ? $_[0] : "undef"; | |
4189 | }; | |
4190 | ||
4191 | ok("123" =~ m/^(\d)(((??{1 + $^N})))+$/); | |
4192 | ||
4193 | our @ctl_n; | |
4194 | our @plus; | |
4195 | ||
4196 | my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#; | |
4197 | my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#; | |
4198 | my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#; | |
4199 | our $re5; | |
4200 | local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#; | |
4201 | my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; | |
4202 | my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; | |
4203 | my $re8 = qr/(\d+)/; | |
4204 | my $c = 0; | |
4205 | for my $test ( | |
4206 | # Test structure: | |
4207 | # [ | |
4208 | # String to match | |
4209 | # Regex too match | |
4210 | # Expected values of $^N | |
4211 | # Expected values of $+ | |
4212 | # Expected values of $1, $2, $3, $4 and $5 | |
4213 | # ] | |
4214 | [ | |
4215 | "1233", | |
4216 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#, | |
4217 | "1 2 3 3", | |
4218 | "1 2 3 3", | |
4219 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4220 | ], | |
4221 | [ | |
4222 | "1233", | |
4223 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#, | |
4224 | "1 2 3 3", | |
4225 | "1 2 3 3", | |
4226 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4227 | ], | |
4228 | [ | |
4229 | "1233", | |
4230 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#, | |
4231 | "1 2 3 3", | |
4232 | "1 2 3 3", | |
4233 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4234 | ], | |
4235 | [ | |
4236 | "1233", | |
4237 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#, | |
4238 | "1 2 3 3", | |
4239 | "1 2 3 3", | |
4240 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4241 | ], | |
4242 | [ | |
4243 | "1233", | |
4244 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#, | |
4245 | "1 2 3 3", | |
4246 | "1 2 3 3", | |
4247 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4248 | ], | |
4249 | [ | |
4250 | "123abc3", | |
4251 | qr#^($re)(|a(b)c|def)(??{$^R})$#, | |
4252 | "1 2 3 abc", | |
4253 | "1 2 3 b", | |
4254 | "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", | |
4255 | ], | |
4256 | [ | |
4257 | "123abc3", | |
4258 | qr#^($re2)$#, | |
4259 | "1 2 3 123abc3", | |
4260 | "1 2 3 b", | |
4261 | "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", | |
4262 | ], | |
4263 | [ | |
4264 | "123abc3", | |
4265 | qr#^($re3)$#, | |
4266 | "1 2 123abc3", | |
4267 | "1 2 b", | |
4268 | "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", | |
4269 | ], | |
4270 | [ | |
4271 | "123abc3", | |
4272 | qr#^(??{$re5})(|abc|def)(??{"$^R"})$#, | |
4273 | "1 2 abc", | |
4274 | "1 2 abc", | |
4275 | "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef", | |
4276 | ], | |
4277 | [ | |
4278 | "123abc3", | |
4279 | qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#, | |
4280 | "1 2 abc", | |
4281 | "1 2 b", | |
4282 | "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef", | |
4283 | ], | |
4284 | [ | |
4285 | "1234", | |
4286 | qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#, | |
4287 | "1234 123 12 1 2 3 1234", | |
4288 | "1234 123 12 1 2 3 4", | |
4289 | "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4", | |
4290 | ], | |
4291 | [ | |
4292 | "1234556", |