Commit | Line | Data |
---|---|---|
8d063cd8 | 1 | #!./perl |
8d37f932 DD |
2 | # |
3 | # This is a home for regular expression tests that don't fit into | |
4 | # the format supported by op/regexp.t. If you want to add a test | |
5 | # that does fit that format, add it to op/re_tests, not here. | |
8d063cd8 | 6 | |
84281c31 A |
7 | use strict; |
8 | use warnings; | |
9 | use 5.010; | |
10 | ||
11 | ||
12 | sub run_tests; | |
13 | ||
9133bbab | 14 | $| = 1; |
3568d838 | 15 | |
34a81e2b | 16 | my $EXPECTED_TESTS = 4061; # Update this when adding/deleting tests. |
8d37f932 | 17 | |
e4d48cc9 GS |
18 | BEGIN { |
19 | chdir 't' if -d 't'; | |
20822f61 | 20 | @INC = '../lib'; |
e4d48cc9 | 21 | } |
84281c31 | 22 | our $TODO; |
de734bd5 | 23 | our $Message = "Noname test"; |
84281c31 A |
24 | our $Error; |
25 | our $DiePattern; | |
26 | our $WarnPattern; | |
27 | our $BugId; | |
28 | our $PatchId; | |
29 | our $running_as_thread; | |
30 | ||
31 | my $ordA = ord ('A'); # This defines ASCII/UTF-8 vs EBCDIC/UTF-EBCDIC | |
32 | # This defined the platform. | |
33 | my $IS_ASCII = $ordA == 65; | |
34 | my $IS_EBCDIC = $ordA == 193; | |
35 | ||
36 | use vars '%Config'; | |
8d37f932 | 37 | eval 'use Config'; # Defaults assumed if this fails |
8d063cd8 | 38 | |
84281c31 | 39 | my $test = 0; |
378cc40b | 40 | |
84281c31 | 41 | print "1..$EXPECTED_TESTS\n"; |
cb55de95 | 42 | |
84281c31 | 43 | run_tests unless caller (); |
cb55de95 | 44 | |
84281c31 | 45 | END { |
cb55de95 JH |
46 | } |
47 | ||
84281c31 A |
48 | sub pretty { |
49 | my ($mess) = @_; | |
50 | $mess =~ s/\n/\\n/g; | |
51 | $mess =~ s/\r/\\r/g; | |
52 | $mess =~ s/\t/\\t/g; | |
53 | $mess =~ s/([\00-\37\177])/sprintf '\%03o', ord $1/eg; | |
54 | $mess =~ s/#/\\#/g; | |
55 | $mess; | |
352d5a3a LW |
56 | } |
57 | ||
84281c31 A |
58 | sub safe_globals { |
59 | defined($_) and s/#/\\#/g for $BugId, $PatchId, $TODO; | |
c277df42 IZ |
60 | } |
61 | ||
84281c31 A |
62 | sub _ok { |
63 | my ($ok, $mess, $error) = @_; | |
64 | safe_globals(); | |
65 | $mess = pretty ($mess // $Message); | |
66 | $mess .= "; Bug $BugId" if defined $BugId; | |
67 | $mess .= "; Patch $PatchId" if defined $PatchId; | |
68 | $mess .= " # TODO $TODO" if defined $TODO; | |
c277df42 | 69 | |
84281c31 | 70 | my $line_nr = (caller(1)) [2]; |
c277df42 | 71 | |
84281c31 A |
72 | printf "%sok %d - %s\n", |
73 | ($ok ? "" : "not "), | |
74 | ++ $test, | |
75 | "$mess\tLine $line_nr"; | |
2cd61cdb | 76 | |
84281c31 A |
77 | unless ($ok) { |
78 | print "# Failed test at line $line_nr\n" unless defined $TODO; | |
79 | if ($error //= $Error) { | |
80 | no warnings 'utf8'; | |
81 | chomp $error; | |
82 | $error = join "\n#", map {pretty $_} split /\n\h*#/ => $error; | |
83 | $error = "# $error" unless $error =~ /^\h*#/; | |
84 | print $error, "\n"; | |
85 | } | |
86 | } | |
74d6a13a | 87 | |
84281c31 | 88 | return $ok; |
7e5428c5 IZ |
89 | } |
90 | ||
84281c31 A |
91 | # Force scalar context on the pattern match |
92 | sub ok ($;$$) {_ok $_ [0], $_ [1], $_ [2]} | |
93 | sub nok ($;$$) {_ok !$_ [0], "Failed: " . ($_ [1] // $Message), $_ [2]} | |
7e5428c5 | 94 | |
2cd61cdb | 95 | |
84281c31 A |
96 | sub skip { |
97 | my $why = shift; | |
98 | safe_globals(); | |
99 | $why =~ s/\n.*//s; | |
100 | $why .= "; Bug $BugId" if defined $BugId; | |
101 | # seems like the new harness code doesnt like todo and skip to be mixed. | |
102 | # which seems like a bug in the harness to me. -- dmq | |
103 | #$why .= " # TODO $TODO" if defined $TODO; | |
104 | ||
105 | my $n = shift // 1; | |
106 | my $line_nr = (caller(0)) [2]; | |
107 | for (1 .. $n) { | |
108 | ++ $test; | |
109 | #print "not " if defined $TODO; | |
110 | print "ok $test # skip $why\tLine $line_nr\n"; | |
111 | } | |
112 | no warnings "exiting"; | |
113 | last SKIP; | |
2cd61cdb | 114 | } |
cbce877f | 115 | |
84281c31 A |
116 | sub iseq ($$;$) { |
117 | my ($got, $expect, $name) = @_; | |
118 | ||
119 | $_ = defined ($_) ? "'$_'" : "undef" for $got, $expect; | |
120 | ||
121 | my $ok = $got eq $expect; | |
122 | my $error = "# expected: $expect\n" . | |
123 | "# result: $got"; | |
160cb429 | 124 | |
84281c31 A |
125 | _ok $ok, $name, $error; |
126 | } | |
4599a1de | 127 | |
84281c31 A |
128 | sub isneq ($$;$) { |
129 | my ($got, $expect, $name) = @_; | |
130 | my $todo = $TODO ? " # TODO $TODO" : ''; | |
131 | ||
132 | $_ = defined ($_) ? "'$_'" : "undef" for $got, $expect; | |
133 | ||
134 | my $ok = $got ne $expect; | |
135 | my $error = "# results are equal ($got)"; | |
4599a1de | 136 | |
84281c31 A |
137 | _ok $ok, $name, $error; |
138 | } | |
4599a1de | 139 | |
4599a1de | 140 | |
84281c31 A |
141 | sub eval_ok ($;$) { |
142 | my ($code, $name) = @_; | |
143 | local $@; | |
144 | if (ref $code) { | |
145 | _ok eval {&$code} && !$@, $name; | |
146 | } | |
147 | else { | |
148 | _ok eval ($code) && !$@, $name; | |
149 | } | |
e3faa678 | 150 | } |
22e551b9 | 151 | |
84281c31 A |
152 | sub must_die { |
153 | my ($code, $pattern, $name) = @_; | |
154 | $pattern //= $DiePattern; | |
155 | undef $@; | |
156 | ref $code ? &$code : eval $code; | |
157 | my $r = $@ && $@ =~ /$pattern/; | |
158 | _ok $r, $name // $Message // "\$\@ =~ /$pattern/"; | |
e3faa678 | 159 | } |
b7a35066 | 160 | |
84281c31 A |
161 | sub must_warn { |
162 | my ($code, $pattern, $name) = @_; | |
163 | $pattern //= $WarnPattern; | |
164 | my $w; | |
165 | local $SIG {__WARN__} = sub {$w .= join "" => @_}; | |
166 | use warnings 'all'; | |
167 | ref $code ? &$code : eval $code; | |
168 | my $r = $w && $w =~ /$pattern/; | |
169 | $w //= "UNDEF"; | |
170 | _ok $r, $name // $Message // "Got warning /$pattern/", | |
171 | "# expected: /$pattern/\n" . | |
172 | "# result: $w"; | |
173 | } | |
174 | ||
175 | sub may_not_warn { | |
176 | my ($code, $name) = @_; | |
177 | my $w; | |
178 | local $SIG {__WARN__} = sub {$w .= join "" => @_}; | |
179 | use warnings 'all'; | |
180 | ref $code ? &$code : eval $code; | |
181 | _ok !$w, $name // ($Message ? "$Message (did not warn)" | |
182 | : "Did not warn"), | |
183 | "Got warning '$w'"; | |
e3faa678 | 184 | } |
b7a35066 | 185 | |
b7a35066 | 186 | |
84281c31 A |
187 | # |
188 | # Tests start here. | |
189 | # | |
190 | sub run_tests { | |
0ef3e39e | 191 | |
84281c31 | 192 | { |
b485d051 | 193 | |
84281c31 | 194 | my $x = "abc\ndef\n"; |
fd291da9 | 195 | |
84281c31 A |
196 | ok $x =~ /^abc/, qq ["$x" =~ /^abc/]; |
197 | ok $x !~ /^def/, qq ["$x" !~ /^def/]; | |
fd291da9 | 198 | |
84281c31 A |
199 | # used to be a test for $* |
200 | ok $x =~ /^def/m, qq ["$x" =~ /^def/m]; | |
fd291da9 | 201 | |
84281c31 A |
202 | nok $x =~ /^xxx/, qq ["$x" =~ /^xxx/]; |
203 | nok $x !~ /^abc/, qq ["$x" !~ /^abc/]; | |
fd291da9 | 204 | |
84281c31 A |
205 | ok $x =~ /def/, qq ["$x" =~ /def/]; |
206 | nok $x !~ /def/, qq ["$x" !~ /def/]; | |
4765795a | 207 | |
84281c31 A |
208 | ok $x !~ /.def/, qq ["$x" !~ /.def/]; |
209 | nok $x =~ /.def/, qq ["$x" =~ /.def/]; | |
4765795a | 210 | |
84281c31 A |
211 | ok $x =~ /\ndef/, qq ["$x" =~ /\ndef/]; |
212 | nok $x !~ /\ndef/, qq ["$x" !~ /\ndef/]; | |
213 | } | |
4765795a | 214 | |
84281c31 A |
215 | { |
216 | $_ = '123'; | |
217 | ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; | |
218 | } | |
f9969324 | 219 | |
84281c31 A |
220 | { |
221 | $_ = 'aaabbbccc'; | |
222 | ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', | |
223 | qq [\$_ = '$_'; /(a*b*)(c*)/]; | |
224 | ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; | |
225 | nok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; | |
226 | ||
227 | $_ = 'aaabccc'; | |
228 | ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; | |
229 | ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; | |
230 | ||
231 | $_ = 'aaaccc'; | |
232 | ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; | |
233 | nok /a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]; | |
234 | ||
235 | $_ = 'abcdef'; | |
236 | ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; | |
237 | ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; | |
238 | ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; | |
239 | ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; | |
4765795a | 240 | } |
4765795a | 241 | |
84281c31 A |
242 | { |
243 | # used to be a test for $* | |
244 | ok "ab\ncd\n" =~ /^cd/m, qq ["ab\ncd\n" =~ /^cd/m]; | |
245 | } | |
4765795a | 246 | |
84281c31 A |
247 | { |
248 | our %XXX = map {($_ => $_)} 123, 234, 345; | |
249 | ||
250 | our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); | |
251 | while ($_ = shift(@XXX)) { | |
252 | my $f = index ($_, 'not') >= 0 ? \&nok : \&ok; | |
253 | my $r = ?(.*)?; | |
254 | &$f ($r, "?(.*)?"); | |
255 | /not/ && reset; | |
256 | if (/not ok 2/) { | |
257 | if ($^O eq 'VMS') { | |
258 | $_ = shift(@XXX); | |
259 | } | |
260 | else { | |
261 | reset 'X'; | |
262 | } | |
263 | } | |
264 | } | |
4765795a | 265 | |
84281c31 A |
266 | SKIP: { |
267 | if ($^O eq 'VMS') { | |
268 | skip "Reset 'X'", 1; | |
269 | } | |
270 | ok !keys %XXX, "%XXX is empty"; | |
271 | } | |
4765795a | 272 | |
84281c31 | 273 | } |
4765795a | 274 | |
84281c31 A |
275 | { |
276 | local $Message = "Test empty pattern"; | |
277 | my $xyz = 'xyz'; | |
278 | my $cde = 'cde'; | |
279 | ||
280 | $cde =~ /[^ab]*/; | |
281 | $xyz =~ //; | |
282 | iseq $&, $xyz; | |
283 | ||
284 | my $foo = '[^ab]*'; | |
285 | $cde =~ /$foo/; | |
286 | $xyz =~ //; | |
287 | iseq $&, $xyz; | |
288 | ||
289 | $cde =~ /$foo/; | |
290 | my $null; | |
291 | no warnings 'uninitialized'; | |
292 | $xyz =~ /$null/; | |
293 | iseq $&, $xyz; | |
294 | ||
295 | $null = ""; | |
296 | $xyz =~ /$null/; | |
297 | iseq $&, $xyz; | |
298 | } | |
4765795a | 299 | |
84281c31 A |
300 | { |
301 | local $Message = q !Check $`, $&, $'!; | |
302 | $_ = 'abcdefghi'; | |
303 | /def/; # optimized up to cmd | |
304 | iseq "$`:$&:$'", 'abc:def:ghi'; | |
4765795a | 305 | |
84281c31 A |
306 | no warnings 'void'; |
307 | /cde/ + 0; # optimized only to spat | |
308 | iseq "$`:$&:$'", 'ab:cde:fghi'; | |
4765795a | 309 | |
84281c31 A |
310 | /[d][e][f]/; # not optimized |
311 | iseq "$`:$&:$'", 'abc:def:ghi'; | |
312 | } | |
4765795a | 313 | |
84281c31 A |
314 | { |
315 | $_ = 'now is the {time for all} good men to come to.'; | |
316 | / {([^}]*)}/; | |
317 | iseq $1, 'time for all', "Match braces"; | |
318 | } | |
4765795a | 319 | |
84281c31 A |
320 | { |
321 | local $Message = "{N,M} quantifier"; | |
322 | $_ = 'xxx {3,4} yyy zzz'; | |
323 | ok /( {3,4})/; | |
324 | iseq $1, ' '; | |
325 | ok !/( {4,})/; | |
326 | ok /( {2,3}.)/; | |
327 | iseq $1, ' y'; | |
328 | ok /(y{2,3}.)/; | |
329 | iseq $1, 'yyy '; | |
330 | ok !/x {3,4}/; | |
331 | ok !/^xxx {3,4}/; | |
332 | } | |
4765795a | 333 | |
84281c31 A |
334 | { |
335 | local $Message = "Test /g"; | |
336 | local $" = ":"; | |
337 | $_ = "now is the time for all good men to come to."; | |
338 | my @words = /(\w+)/g; | |
339 | my $exp = "now:is:the:time:for:all:good:men:to:come:to"; | |
4765795a | 340 | |
84281c31 | 341 | iseq "@words", $exp; |
4765795a | 342 | |
84281c31 A |
343 | @words = (); |
344 | while (/\w+/g) { | |
345 | push (@words, $&); | |
346 | } | |
347 | iseq "@words", $exp; | |
4765795a | 348 | |
84281c31 A |
349 | @words = (); |
350 | pos = 0; | |
351 | while (/to/g) { | |
352 | push(@words, $&); | |
353 | } | |
354 | iseq "@words", "to:to"; | |
4765795a | 355 | |
84281c31 A |
356 | pos $_ = 0; |
357 | @words = /to/g; | |
358 | iseq "@words", "to:to"; | |
359 | } | |
4765795a | 360 | |
84281c31 A |
361 | { |
362 | $_ = "abcdefghi"; | |
363 | ||
364 | my $pat1 = 'def'; | |
365 | my $pat2 = '^def'; | |
366 | my $pat3 = '.def.'; | |
367 | my $pat4 = 'abc'; | |
368 | my $pat5 = '^abc'; | |
369 | my $pat6 = 'abc$'; | |
370 | my $pat7 = 'ghi'; | |
371 | my $pat8 = '\w*ghi'; | |
372 | my $pat9 = 'ghi$'; | |
373 | ||
374 | my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = | |
375 | my $t6 = my $t7 = my $t8 = my $t9 = 0; | |
376 | ||
377 | for my $iter (1 .. 5) { | |
378 | $t1++ if /$pat1/o; | |
379 | $t2++ if /$pat2/o; | |
380 | $t3++ if /$pat3/o; | |
381 | $t4++ if /$pat4/o; | |
382 | $t5++ if /$pat5/o; | |
383 | $t6++ if /$pat6/o; | |
384 | $t7++ if /$pat7/o; | |
385 | $t8++ if /$pat8/o; | |
386 | $t9++ if /$pat9/o; | |
387 | } | |
388 | my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; | |
389 | iseq $x, '505550555', "Test /o"; | |
390 | } | |
4765795a | 391 | |
4765795a | 392 | |
84281c31 A |
393 | SKIP: { |
394 | my $xyz = 'xyz'; | |
395 | ok "abc" =~ /^abc$|$xyz/, "| after \$"; | |
4765795a | 396 | |
84281c31 A |
397 | # perl 4.009 says "unmatched ()" |
398 | local $Message = '$ inside ()'; | |
4765795a | 399 | |
84281c31 A |
400 | my $result; |
401 | eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; | |
402 | iseq $@, "" or skip "eval failed", 1; | |
403 | iseq $result, "abc:bc"; | |
404 | } | |
4765795a | 405 | |
4765795a | 406 | |
84281c31 A |
407 | { |
408 | local $Message = "Scalar /g"; | |
409 | $_ = "abcfooabcbar"; | |
410 | ||
411 | ok /abc/g && $` eq ""; | |
412 | ok /abc/g && $` eq "abcfoo"; | |
413 | ok !/abc/g; | |
414 | ||
415 | local $Message = "Scalar /gi"; | |
416 | pos = 0; | |
417 | ok /ABC/gi && $` eq ""; | |
418 | ok /ABC/gi && $` eq "abcfoo"; | |
419 | ok !/ABC/gi; | |
420 | ||
421 | local $Message = "Scalar /g"; | |
422 | pos = 0; | |
423 | ok /abc/g && $' eq "fooabcbar"; | |
424 | ok /abc/g && $' eq "bar"; | |
425 | ||
426 | $_ .= ''; | |
427 | my @x = /abc/g; | |
428 | iseq @x, 2, "/g reset after assignment"; | |
4765795a | 429 | } |
4765795a | 430 | |
84281c31 A |
431 | { |
432 | local $Message = '/g, \G and pos'; | |
433 | $_ = "abdc"; | |
434 | pos $_ = 2; | |
435 | /\Gc/gc; | |
436 | iseq pos $_, 2; | |
437 | /\Gc/g; | |
438 | ok !defined pos $_; | |
439 | } | |
4765795a | 440 | |
84281c31 A |
441 | { |
442 | local $Message = '(?{ })'; | |
443 | our $out = 1; | |
444 | 'abc' =~ m'a(?{ $out = 2 })b'; | |
445 | iseq $out, 2; | |
446 | ||
447 | $out = 1; | |
448 | 'abc' =~ m'a(?{ $out = 3 })c'; | |
449 | iseq $out, 1; | |
450 | } | |
4765795a | 451 | |
4765795a | 452 | |
84281c31 A |
453 | { |
454 | $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; | |
455 | my @out = /(?<!foo)bar./g; | |
456 | iseq "@out", 'bar2 barf', "Negative lookbehind"; | |
457 | } | |
4765795a | 458 | |
84281c31 A |
459 | { |
460 | local $Message = "REG_INFTY tests"; | |
461 | # Tests which depend on REG_INFTY | |
462 | $::reg_infty = $Config {reg_infty} // 32767; | |
463 | $::reg_infty_m = $::reg_infty - 1; | |
464 | $::reg_infty_p = $::reg_infty + 1; | |
465 | $::reg_infty_m = $::reg_infty_m; # Surpress warning. | |
466 | ||
467 | # As well as failing if the pattern matches do unexpected things, the | |
468 | # next three tests will fail if you should have picked up a lower-than- | |
469 | # default value for $reg_infty from Config.pm, but have not. | |
470 | ||
471 | eval_ok q (('aaa' =~ /(a{1,$::reg_infty_m})/)[0] eq 'aaa'); | |
472 | eval_ok q (('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/); | |
473 | eval_ok q (('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/); | |
474 | eval "'aaa' =~ /a{1,$::reg_infty}/"; | |
475 | ok $@ =~ /^\QQuantifier in {,} bigger than/; | |
476 | eval "'aaa' =~ /a{1,$::reg_infty_p}/"; | |
477 | ok $@ =~ /^\QQuantifier in {,} bigger than/; | |
4765795a | 478 | } |
8269fa76 | 479 | |
84281c31 A |
480 | { |
481 | # Poke a couple more parse failures | |
482 | my $context = 'x' x 256; | |
483 | eval qq("${context}y" =~ /(?<=$context)y/); | |
484 | ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; | |
485 | } | |
8269fa76 | 486 | |
84281c31 A |
487 | { |
488 | # Long Monsters | |
489 | local $Message = "Long monster"; | |
490 | for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory | |
491 | my $a = 'a' x $l; | |
492 | local $Error = "length = $l"; | |
493 | ok "ba$a=" =~ /a$a=/; | |
494 | nok "b$a=" =~ /a$a=/; | |
495 | ok "b$a=" =~ /ba+=/; | |
496 | ||
84281c31 A |
497 | ok "ba$a=" =~ /b(?:a|b)+=/; |
498 | } | |
499 | } | |
8269fa76 | 500 | |
b8ef571c | 501 | |
84281c31 A |
502 | { |
503 | # 20000 nodes, each taking 3 words per string, and 1 per branch | |
504 | my $long_constant_len = join '|', 12120 .. 32645; | |
505 | my $long_var_len = join '|', 8120 .. 28645; | |
506 | my %ans = ( 'ax13876y25677lbc' => 1, | |
507 | 'ax13876y25677mcb' => 0, # not b. | |
508 | 'ax13876y35677nbc' => 0, # Num too big | |
509 | 'ax13876y25677y21378obc' => 1, | |
510 | 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] | |
511 | 'ax13876y25677y21378y21378kbc' => 1, | |
512 | 'ax13876y25677y21378y21378kcb' => 0, # Not b. | |
513 | 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs | |
514 | ); | |
515 | ||
516 | local $Message = "20000 nodes"; | |
517 | for (keys %ans) { | |
518 | local $Error = "const-len '$_'"; | |
519 | ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o); | |
520 | ||
521 | local $Error = "var-len '$_'"; | |
522 | ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o); | |
523 | } | |
b8ef571c | 524 | } |
209a9bc1 | 525 | |
84281c31 A |
526 | { |
527 | local $Message = "Complicated backtracking"; | |
528 | $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; | |
529 | my $expect = "(bla()) ((l)u((e))) (l(e)e)"; | |
530 | ||
531 | use vars '$c'; | |
532 | sub matchit { | |
533 | m/ | |
534 | ( | |
535 | \( | |
536 | (?{ $c = 1 }) # Initialize | |
537 | (?: | |
538 | (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop | |
539 | (?! | |
540 | ) # Fail: will unwind one iteration back | |
541 | ) | |
542 | (?: | |
543 | [^()]+ # Match a big chunk | |
544 | (?= | |
545 | [()] | |
546 | ) # Do not try to match subchunks | |
547 | | | |
548 | \( | |
549 | (?{ ++$c }) | |
550 | | | |
551 | \) | |
552 | (?{ --$c }) | |
553 | ) | |
554 | )+ # This may not match with different subblocks | |
555 | ) | |
556 | (?(?{ $c != 0 }) | |
557 | (?! | |
558 | ) # Fail | |
559 | ) # Otherwise the chunk 1 may succeed with $c>0 | |
560 | /xg; | |
561 | } | |
3568d838 | 562 | |
84281c31 A |
563 | my @ans = (); |
564 | my $res; | |
565 | push @ans, $res while $res = matchit; | |
566 | iseq "@ans", "1 1 1"; | |
3568d838 | 567 | |
84281c31 A |
568 | @ans = matchit; |
569 | iseq "@ans", $expect; | |
3568d838 | 570 | |
84281c31 A |
571 | local $Message = "Recursion with (??{ })"; |
572 | our $matched; | |
573 | $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; | |
3568d838 | 574 | |
84281c31 A |
575 | @ans = my @ans1 = (); |
576 | push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; | |
3568d838 | 577 | |
84281c31 A |
578 | iseq "@ans", "1 1 1"; |
579 | iseq "@ans1", $expect; | |
3568d838 | 580 | |
84281c31 A |
581 | @ans = m/$matched/g; |
582 | iseq "@ans", $expect; | |
3568d838 | 583 | |
84281c31 | 584 | } |
3568d838 | 585 | |
84281c31 A |
586 | { |
587 | ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; | |
588 | } | |
3568d838 | 589 | |
84281c31 A |
590 | { |
591 | my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad | |
592 | iseq "@ans", 'a/ b', "Stack may be bad"; | |
593 | } | |
3568d838 | 594 | |
84281c31 A |
595 | { |
596 | local $Message = "Eval-group not allowed at runtime"; | |
597 | my $code = '{$blah = 45}'; | |
598 | our $blah = 12; | |
599 | eval { /(?$code)/ }; | |
600 | ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12; | |
601 | ||
602 | for $code ('{$blah = 45}','=xx') { | |
603 | $blah = 12; | |
604 | my $res = eval { "xx" =~ /(?$code)/o }; | |
605 | no warnings 'uninitialized'; | |
606 | local $Error = "'$@', '$res', '$blah'"; | |
607 | if ($code eq '=xx') { | |
608 | ok !$@ && $res; | |
609 | } | |
610 | else { | |
611 | ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12; | |
612 | } | |
613 | } | |
3568d838 | 614 | |
84281c31 A |
615 | $code = '{$blah = 45}'; |
616 | $blah = 12; | |
617 | eval "/(?$code)/"; | |
618 | iseq $blah, 45; | |
3568d838 | 619 | |
84281c31 A |
620 | $blah = 12; |
621 | /(?{$blah = 45})/; | |
622 | iseq $blah, 45; | |
623 | } | |
3568d838 | 624 | |
84281c31 A |
625 | { |
626 | local $Message = "Pos checks"; | |
627 | my $x = 'banana'; | |
628 | $x =~ /.a/g; | |
629 | iseq pos ($x), 2; | |
3568d838 | 630 | |
84281c31 A |
631 | $x =~ /.z/gc; |
632 | iseq pos ($x), 2; | |
3568d838 | 633 | |
84281c31 A |
634 | sub f { |
635 | my $p = $_[0]; | |
636 | return $p; | |
637 | } | |
3568d838 | 638 | |
84281c31 A |
639 | $x =~ /.a/g; |
640 | iseq f (pos ($x)), 4; | |
641 | } | |
3568d838 | 642 | |
84281c31 A |
643 | { |
644 | local $Message = 'Checking $^R'; | |
645 | our $x = $^R = 67; | |
646 | 'foot' =~ /foo(?{$x = 12; 75})[t]/; | |
647 | iseq $^R, 75; | |
648 | ||
649 | $x = $^R = 67; | |
650 | 'foot' =~ /foo(?{$x = 12; 75})[xy]/; | |
651 | ok $^R eq '67' && $x eq '12'; | |
652 | ||
653 | $x = $^R = 67; | |
654 | 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; | |
655 | ok $^R eq '79' && $x eq '12'; | |
656 | } | |
3568d838 | 657 | |
84281c31 A |
658 | { |
659 | iseq qr/\b\v$/i, '(?i-xsm:\b\v$)', 'qr/\b\v$/i'; | |
660 | iseq qr/\b\v$/s, '(?s-xim:\b\v$)', 'qr/\b\v$/s'; | |
661 | iseq qr/\b\v$/m, '(?m-xis:\b\v$)', 'qr/\b\v$/m'; | |
662 | iseq qr/\b\v$/x, '(?x-ism:\b\v$)', 'qr/\b\v$/x'; | |
663 | iseq qr/\b\v$/xism, '(?msix:\b\v$)', 'qr/\b\v$/xism'; | |
664 | iseq qr/\b\v$/, '(?-xism:\b\v$)', 'qr/\b\v$/'; | |
665 | } | |
3568d838 | 666 | |
3568d838 | 667 | |
84281c31 A |
668 | { |
669 | local $Message = "Look around"; | |
670 | $_ = 'xabcx'; | |
671 | SKIP: | |
672 | foreach my $ans ('', 'c') { | |
673 | ok /(?<=(?=a)..)((?=c)|.)/g or skip "Match failed", 1; | |
674 | iseq $1, $ans; | |
675 | } | |
676 | } | |
3568d838 | 677 | |
84281c31 A |
678 | { |
679 | local $Message = "Empty clause"; | |
680 | $_ = 'a'; | |
681 | foreach my $ans ('', 'a', '') { | |
682 | ok /^|a|$/g or skip "Match failed", 1; | |
683 | iseq $&, $ans; | |
684 | } | |
685 | } | |
3568d838 | 686 | |
84281c31 A |
687 | { |
688 | local $Message = "Prefixify"; | |
689 | sub prefixify { | |
690 | SKIP: { | |
691 | my ($v, $a, $b, $res) = @_; | |
692 | ok $v =~ s/\Q$a\E/$b/ or skip "Match failed", 1; | |
693 | iseq $v, $res; | |
694 | } | |
695 | } | |
3568d838 | 696 | |
84281c31 A |
697 | prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); |
698 | prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); | |
699 | } | |
3568d838 | 700 | |
84281c31 A |
701 | { |
702 | $_ = 'var="foo"'; | |
703 | /(\")/; | |
704 | ok $1 && /$1/, "Capture a quote"; | |
705 | } | |
3568d838 | 706 | |
84281c31 A |
707 | { |
708 | local $Message = "Call code from qr //"; | |
709 | $a = qr/(?{++$b})/; | |
710 | $b = 7; | |
711 | ok /$a$a/ && $b eq '9'; | |
712 | ||
713 | $c="$a"; | |
714 | ok /$a$a/ && $b eq '11'; | |
715 | ||
716 | undef $@; | |
717 | eval {/$c/}; | |
718 | ok $@ && $@ =~ /not allowed at runtime/; | |
719 | ||
720 | use re "eval"; | |
721 | /$a$c$a/; | |
722 | iseq $b, '14'; | |
723 | ||
724 | our $lex_a = 43; | |
725 | our $lex_b = 17; | |
726 | our $lex_c = 27; | |
727 | my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); | |
728 | ||
729 | iseq $lex_res, 1; | |
730 | iseq $lex_a, 44; | |
731 | iseq $lex_c, 43; | |
732 | ||
733 | no re "eval"; | |
734 | undef $@; | |
735 | my $match = eval { /$a$c$a/ }; | |
736 | ok $@ && $@ =~ /Eval-group not allowed/ && !$match; | |
737 | iseq $b, '14'; | |
738 | ||
739 | $lex_a = 2; | |
740 | $lex_a = 43; | |
741 | $lex_b = 17; | |
742 | $lex_c = 27; | |
743 | $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); | |
744 | ||
745 | iseq $lex_res, 1; | |
746 | iseq $lex_a, 44; | |
747 | iseq $lex_c, 43; | |
3568d838 | 748 | |
84281c31 | 749 | } |
3568d838 | 750 | |
3568d838 | 751 | |
84281c31 A |
752 | { |
753 | no warnings 'closure'; | |
754 | local $Message = '(?{ $var } refers to package vars'; | |
755 | package aa; | |
756 | our $c = 2; | |
757 | $::c = 3; | |
758 | '' =~ /(?{ $c = 4 })/; | |
759 | main::iseq $c, 4; | |
760 | main::iseq $::c, 3; | |
761 | } | |
3568d838 | 762 | |
3568d838 | 763 | |
84281c31 A |
764 | { |
765 | must_die 'q(a:[b]:) =~ /[x[:foo:]]/', | |
766 | 'POSIX class \[:[^:]+:\] unknown in regex', | |
767 | 'POSIX class [: :] must have valid name'; | |
768 | ||
769 | for my $d (qw [= .]) { | |
770 | must_die "/[[${d}foo${d}]]/", | |
771 | "\QPOSIX syntax [$d $d] is reserved for future extensions", | |
772 | "POSIX syntax [[$d $d]] is an error"; | |
773 | } | |
774 | } | |
3568d838 | 775 | |
3568d838 | 776 | |
84281c31 A |
777 | { |
778 | # test if failure of patterns returns empty list | |
779 | local $Message = "Failed pattern returns empty list"; | |
780 | $_ = 'aaa'; | |
781 | @_ = /bbb/; | |
782 | iseq "@_", ""; | |
3568d838 | 783 | |
84281c31 A |
784 | @_ = /bbb/g; |
785 | iseq "@_", ""; | |
a72deede | 786 | |
84281c31 A |
787 | @_ = /(bbb)/; |
788 | iseq "@_", ""; | |
a72deede | 789 | |
84281c31 A |
790 | @_ = /(bbb)/g; |
791 | iseq "@_", ""; | |
792 | } | |
a72deede | 793 | |
84281c31 A |
794 | |
795 | { | |
796 | local $Message = '@- and @+ tests'; | |
797 | ||
798 | /a(?=.$)/; | |
799 | iseq $#+, 0; | |
800 | iseq $#-, 0; | |
801 | iseq $+ [0], 2; | |
802 | iseq $- [0], 1; | |
803 | ok !defined $+ [1] && !defined $- [1] && | |
804 | !defined $+ [2] && !defined $- [2]; | |
805 | ||
806 | /a(a)(a)/; | |
807 | iseq $#+, 2; | |
808 | iseq $#-, 2; | |
809 | iseq $+ [0], 3; | |
810 | iseq $- [0], 0; | |
811 | iseq $+ [1], 2; | |
812 | iseq $- [1], 1; | |
813 | iseq $+ [2], 3; | |
814 | iseq $- [2], 2; | |
815 | ok !defined $+ [3] && !defined $- [3] && | |
816 | !defined $+ [4] && !defined $- [4]; | |
817 | ||
818 | ||
819 | /.(a)(b)?(a)/; | |
820 | iseq $#+, 3; | |
821 | iseq $#-, 3; | |
822 | iseq $+ [1], 2; | |
823 | iseq $- [1], 1; | |
824 | iseq $+ [3], 3; | |
825 | iseq $- [3], 2; | |
826 | ok !defined $+ [2] && !defined $- [2] && | |
827 | !defined $+ [4] && !defined $- [4]; | |
828 | ||
829 | ||
830 | /.(a)/; | |
831 | iseq $#+, 1; | |
832 | iseq $#-, 1; | |
833 | iseq $+ [0], 2; | |
834 | iseq $- [0], 0; | |
835 | iseq $+ [1], 2; | |
836 | iseq $- [1], 1; | |
837 | ok !defined $+ [2] && !defined $- [2] && | |
838 | !defined $+ [3] && !defined $- [3]; | |
839 | ||
840 | /.(a)(ba*)?/; | |
841 | iseq $#+, 2; | |
842 | iseq $#-, 1; | |
843 | } | |
a72deede | 844 | |
a72deede | 845 | |
84281c31 A |
846 | { |
847 | local $DiePattern = '^Modification of a read-only value attempted'; | |
848 | local $Message = 'Elements of @- and @+ are read-only'; | |
849 | must_die '$+[0] = 13'; | |
850 | must_die '$-[0] = 13'; | |
851 | must_die '@+ = (7, 6, 5)'; | |
852 | must_die '@- = qw (foo bar)'; | |
853 | } | |
a72deede | 854 | |
a72deede | 855 | |
84281c31 A |
856 | { |
857 | local $Message = '\G testing'; | |
858 | $_ = 'aaa'; | |
859 | pos = 1; | |
860 | my @a = /\Ga/g; | |
861 | iseq "@a", "a a"; | |
862 | ||
863 | my $str = 'abcde'; | |
864 | pos $str = 2; | |
865 | ok $str !~ /^\G/; | |
866 | ok $str !~ /^.\G/; | |
867 | ok $str =~ /^..\G/; | |
868 | ok $str !~ /^...\G/; | |
869 | ok $str =~ /\G../ && $& eq 'cd'; | |
870 | ||
871 | local $TODO = $running_as_thread; | |
872 | ok $str =~ /.\G./ && $& eq 'bc'; | |
873 | } | |
a72deede | 874 | |
569b5e07 | 875 | |
84281c31 A |
876 | { |
877 | local $Message = 'pos inside (?{ })'; | |
878 | my $str = 'abcde'; | |
879 | our ($foo, $bar); | |
880 | ok $str =~ /b(?{$foo = $_; $bar = pos})c/; | |
881 | iseq $foo, $str; | |
882 | iseq $bar, 2; | |
883 | ok !defined pos ($str); | |
884 | ||
885 | undef $foo; | |
886 | undef $bar; | |
887 | pos $str = undef; | |
888 | ok $str =~ /b(?{$foo = $_; $bar = pos})c/g; | |
889 | iseq $foo, $str; | |
890 | iseq $bar, 2; | |
891 | iseq pos ($str), 3; | |
892 | ||
893 | $_ = $str; | |
894 | undef $foo; | |
895 | undef $bar; | |
896 | ok /b(?{$foo = $_; $bar = pos})c/; | |
897 | iseq $foo, $str; | |
898 | iseq $bar, 2; | |
899 | ||
900 | undef $foo; | |
901 | undef $bar; | |
902 | ok /b(?{$foo = $_; $bar = pos})c/g; | |
903 | iseq $foo, $str; | |
904 | iseq $bar, 2; | |
905 | iseq pos, 3; | |
906 | ||
907 | undef $foo; | |
908 | undef $bar; | |
909 | pos = undef; | |
910 | 1 while /b(?{$foo = $_; $bar = pos})c/g; | |
911 | iseq $foo, $str; | |
912 | iseq $bar, 2; | |
913 | ok !defined pos; | |
914 | ||
915 | undef $foo; | |
916 | undef $bar; | |
917 | $_ = 'abcde|abcde'; | |
918 | ok s/b(?{$foo = $_; $bar = pos})c/x/g; | |
919 | iseq $foo, 'abcde|abcde'; | |
920 | iseq $bar, 8; | |
921 | iseq $_, 'axde|axde'; | |
922 | ||
923 | # List context: | |
924 | $_ = 'abcde|abcde'; | |
925 | our @res; | |
926 | () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; | |
927 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
928 | iseq "@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'"; | |
929 | ||
930 | @res = (); | |
931 | () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; | |
932 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
933 | iseq "@res", "'' 'ab' 'cde|abcde' " . | |
934 | "'' 'abc' 'de|abcde' " . | |
935 | "'abcd' 'e|' 'abcde' " . | |
936 | "'abcde|' 'ab' 'cde' " . | |
937 | "'abcde|' 'abc' 'de'" ; | |
938 | } | |
f33976b4 | 939 | |
cce850e4 | 940 | |
84281c31 A |
941 | { |
942 | local $Message = '\G anchor checks'; | |
943 | my $foo = 'aabbccddeeffgg'; | |
944 | pos ($foo) = 1; | |
945 | { | |
946 | local $TODO = $running_as_thread; | |
947 | no warnings 'uninitialized'; | |
948 | ok $foo =~ /.\G(..)/g; | |
949 | iseq $1, 'ab'; | |
cce850e4 | 950 | |
84281c31 A |
951 | pos ($foo) += 1; |
952 | ok $foo =~ /.\G(..)/g; | |
953 | iseq $1, 'cc'; | |
cce850e4 | 954 | |
84281c31 A |
955 | pos ($foo) += 1; |
956 | ok $foo =~ /.\G(..)/g; | |
957 | iseq $1, 'de'; | |
cce850e4 | 958 | |
84281c31 A |
959 | ok $foo =~ /\Gef/g; |
960 | } | |
cce850e4 | 961 | |
84281c31 A |
962 | undef pos $foo; |
963 | ok $foo =~ /\G(..)/g; | |
964 | iseq $1, 'aa'; | |
cce850e4 | 965 | |
84281c31 A |
966 | ok $foo =~ /\G(..)/g; |
967 | iseq $1, 'bb'; | |
cce850e4 | 968 | |
84281c31 A |
969 | pos ($foo) = 5; |
970 | ok $foo =~ /\G(..)/g; | |
971 | iseq $1, 'cd'; | |
972 | } | |
cce850e4 | 973 | |
cce850e4 | 974 | |
84281c31 A |
975 | { |
976 | $_ = '123x123'; | |
977 | my @res = /(\d*|x)/g; | |
978 | local $" = '|'; | |
979 | iseq "@res", "123||x|123|", "0 match in alternation"; | |
980 | } | |
cce850e4 | 981 | |
d9f424b2 | 982 | |
84281c31 A |
983 | { |
984 | local $Message = "Match against temporaries (created via pp_helem())" . | |
985 | " is safe"; | |
986 | ok {foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g; | |
987 | iseq $1, "bar"; | |
988 | } | |
75685a94 | 989 | |
d9f424b2 | 990 | |
84281c31 A |
991 | { |
992 | local $Message = 'package $i inside (?{ }), ' . | |
993 | 'saved substrings and changing $_'; | |
994 | our @a = qw [foo bar]; | |
995 | our @b = (); | |
996 | s/(\w)(?{push @b, $1})/,$1,/g for @a; | |
997 | iseq "@b", "f o o b a r"; | |
998 | iseq "@a", ",f,,o,,o, ,b,,a,,r,"; | |
999 | ||
1000 | local $Message = 'lexical $i inside (?{ }), ' . | |
1001 | 'saved substrings and changing $_'; | |
1002 | no warnings 'closure'; | |
1003 | my @c = qw [foo bar]; | |
1004 | my @d = (); | |
1005 | s/(\w)(?{push @d, $1})/,$1,/g for @c; | |
1006 | iseq "@d", "f o o b a r"; | |
1007 | iseq "@c", ",f,,o,,o, ,b,,a,,r,"; | |
d9f424b2 JH |
1008 | } |
1009 | ||
d9f424b2 | 1010 | |
84281c31 A |
1011 | { |
1012 | local $Message = 'Brackets'; | |
1013 | our $brackets; | |
1014 | $brackets = qr { | |
1015 | { (?> [^{}]+ | (??{ $brackets }) )* } | |
1016 | }x; | |
1017 | ||
1018 | ok "{{}" =~ $brackets; | |
1019 | iseq $&, "{}"; | |
1020 | ok "something { long { and } hairy" =~ $brackets; | |
1021 | iseq $&, "{ and }"; | |
1022 | ok "something { long { and } hairy" =~ m/((??{ $brackets }))/; | |
1023 | iseq $&, "{ and }"; | |
1024 | } | |
a4c04bdc | 1025 | |
e2d8ce26 | 1026 | |
84281c31 A |
1027 | { |
1028 | $_ = "a-a\nxbb"; | |
1029 | pos = 1; | |
1030 | nok m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'; | |
1031 | } | |
a4c04bdc | 1032 | |
a4c04bdc | 1033 | |
84281c31 A |
1034 | { |
1035 | local $Message = '\G anchor checks'; | |
1036 | my $text = "aaXbXcc"; | |
1037 | pos ($text) = 0; | |
1038 | ok $text !~ /\GXb*X/g; | |
1039 | } | |
a4c04bdc | 1040 | |
a4c04bdc | 1041 | |
84281c31 A |
1042 | { |
1043 | $_ = "xA\n" x 500; | |
1044 | nok /^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'; | |
a4c04bdc | 1045 | |
84281c31 A |
1046 | my $text = "abc dbf"; |
1047 | my @res = ($text =~ /.*?(b).*?\b/g); | |
1048 | iseq "@res", "b b", '\b is not special'; | |
987aaf07 | 1049 | } |
a4c04bdc | 1050 | |
a4c04bdc | 1051 | |
84281c31 A |
1052 | { |
1053 | local $Message = '\S, [\S], \s, [\s]'; | |
1054 | my @a = map chr, 0 .. 255; | |
1055 | my @b = grep /\S/, @a; | |
1056 | my @c = grep /[^\s]/, @a; | |
1057 | iseq "@b", "@c"; | |
1058 | ||
1059 | @b = grep /\S/, @a; | |
1060 | @c = grep /[\S]/, @a; | |
1061 | iseq "@b", "@c"; | |
1062 | ||
1063 | @b = grep /\s/, @a; | |
1064 | @c = grep /[^\S]/, @a; | |
1065 | iseq "@b", "@c"; | |
1066 | ||
1067 | @b = grep /\s/, @a; | |
1068 | @c = grep /[\s]/, @a; | |
1069 | iseq "@b", "@c"; | |
1070 | } | |
1071 | { | |
1072 | local $Message = '\D, [\D], \d, [\d]'; | |
1073 | my @a = map chr, 0 .. 255; | |
1074 | my @b = grep /\D/, @a; | |
1075 | my @c = grep /[^\d]/, @a; | |
1076 | iseq "@b", "@c"; | |
1077 | ||
1078 | @b = grep /\D/, @a; | |
1079 | @c = grep /[\D]/, @a; | |
1080 | iseq "@b", "@c"; | |
1081 | ||
1082 | @b = grep /\d/, @a; | |
1083 | @c = grep /[^\D]/, @a; | |
1084 | iseq "@b", "@c"; | |
1085 | ||
1086 | @b = grep /\d/, @a; | |
1087 | @c = grep /[\d]/, @a; | |
1088 | iseq "@b", "@c"; | |
1089 | } | |
1090 | { | |
1091 | local $Message = '\W, [\W], \w, [\w]'; | |
1092 | my @a = map chr, 0 .. 255; | |
1093 | my @b = grep /\W/, @a; | |
1094 | my @c = grep /[^\w]/, @a; | |
1095 | iseq "@b", "@c"; | |
1096 | ||
1097 | @b = grep /\W/, @a; | |
1098 | @c = grep /[\W]/, @a; | |
1099 | iseq "@b", "@c"; | |
1100 | ||
1101 | @b = grep /\w/, @a; | |
1102 | @c = grep /[^\W]/, @a; | |
1103 | iseq "@b", "@c"; | |
1104 | ||
1105 | @b = grep /\w/, @a; | |
1106 | @c = grep /[\w]/, @a; | |
1107 | iseq "@b", "@c"; | |
1108 | } | |
a4c04bdc | 1109 | |
a4c04bdc | 1110 | |
84281c31 A |
1111 | { |
1112 | # see if backtracking optimization works correctly | |
1113 | local $Message = 'Backtrack optimization'; | |
1114 | ok "\n\n" =~ /\n $ \n/x; | |
1115 | ok "\n\n" =~ /\n* $ \n/x; | |
1116 | ok "\n\n" =~ /\n+ $ \n/x; | |
1117 | ok "\n\n" =~ /\n? $ \n/x; | |
1118 | ok "\n\n" =~ /\n*? $ \n/x; | |
1119 | ok "\n\n" =~ /\n+? $ \n/x; | |
1120 | ok "\n\n" =~ /\n?? $ \n/x; | |
1121 | ok "\n\n" !~ /\n*+ $ \n/x; | |
1122 | ok "\n\n" !~ /\n++ $ \n/x; | |
1123 | ok "\n\n" =~ /\n?+ $ \n/x; | |
1124 | } | |
a4c04bdc | 1125 | |
a4c04bdc | 1126 | |
84281c31 A |
1127 | { |
1128 | package S; | |
1129 | use overload '""' => sub {'Object S'}; | |
1130 | sub new {bless []} | |
1131 | ||
1132 | local $Message = "Ref stringification"; | |
1133 | ::ok do { \my $v} =~ /^SCALAR/, "Scalar ref stringification"; | |
1134 | ::ok do {\\my $v} =~ /^REF/, "Ref ref stringification"; | |
1135 | ::ok [] =~ /^ARRAY/, "Array ref stringification"; | |
1136 | ::ok {} =~ /^HASH/, "Hash ref stringification"; | |
1137 | ::ok 'S' -> new =~ /^Object S/, "Object stringification"; | |
1138 | } | |
a4c04bdc | 1139 | |
a4c04bdc | 1140 | |
84281c31 A |
1141 | { |
1142 | local $Message = "Test result of match used as match"; | |
1143 | ok 'a1b' =~ ('xyz' =~ /y/); | |
1144 | iseq $`, 'a'; | |
1145 | ok 'a1b' =~ ('xyz' =~ /t/); | |
1146 | iseq $`, 'a'; | |
1147 | } | |
a4c04bdc | 1148 | |
a4c04bdc | 1149 | |
84281c31 A |
1150 | { |
1151 | local $Message = '"1" is not \s'; | |
1152 | may_not_warn sub {ok ("1\n" x 102) !~ /^\s*\n/m}; | |
1153 | } | |
a4c04bdc | 1154 | |
a4c04bdc | 1155 | |
84281c31 A |
1156 | { |
1157 | local $Message = '\s, [[:space:]] and [[:blank:]]'; | |
1158 | my %space = (spc => " ", | |
1159 | tab => "\t", | |
1160 | cr => "\r", | |
1161 | lf => "\n", | |
1162 | ff => "\f", | |
1163 | # There's no \v but the vertical tabulator seems miraculously | |
1164 | # be 11 both in ASCII and EBCDIC. | |
1165 | vt => chr(11), | |
1166 | false => "space"); | |
1167 | ||
1168 | my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; | |
1169 | my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; | |
1170 | my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; | |
1171 | ||
1172 | iseq "@space0", "cr ff lf spc tab"; | |
1173 | iseq "@space1", "cr ff lf spc tab vt"; | |
1174 | iseq "@space2", "spc tab"; | |
1175 | } | |
a4c04bdc | 1176 | |
a4c04bdc | 1177 | |
84281c31 A |
1178 | { |
1179 | local $BugId = '20000731.001'; | |
1180 | ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/, | |
1181 | "Match UTF-8 char in presense of (??{ })"; | |
1182 | } | |
a4c04bdc | 1183 | |
a4c04bdc | 1184 | |
84281c31 A |
1185 | { |
1186 | local $BugId = '20001021.005'; | |
1187 | no warnings 'uninitialized'; | |
1188 | ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV"; | |
1189 | } | |
a4c04bdc | 1190 | |
a4c04bdc | 1191 | |
84281c31 A |
1192 | SKIP: |
1193 | { | |
1194 | local $Message = '\C matches octet'; | |
1195 | $_ = "a\x{100}b"; | |
1196 | ok /(.)(\C)(\C)(.)/ or skip q [\C doesn't match], 4; | |
1197 | iseq $1, "a"; | |
1198 | if ($IS_ASCII) { # ASCII (or equivalent), should be UTF-8 | |
1199 | iseq $2, "\xC4"; | |
1200 | iseq $3, "\x80"; | |
1201 | } | |
1202 | elsif ($IS_EBCDIC) { # EBCDIC (or equivalent), should be UTF-EBCDIC | |
1203 | iseq $2, "\x8C"; | |
1204 | iseq $3, "\x41"; | |
1205 | } | |
1206 | else { | |
1207 | SKIP: { | |
1208 | ok 0, "Unexpected platform", "ord ('A') = $ordA"; | |
1209 | skip "Unexpected platform"; | |
1210 | } | |
1211 | } | |
1212 | iseq $4, "b"; | |
1213 | } | |
a4c04bdc | 1214 | |
a4c04bdc | 1215 | |
84281c31 A |
1216 | SKIP: |
1217 | { | |
1218 | local $Message = '\C matches octet'; | |
1219 | $_ = "\x{100}"; | |
1220 | ok /(\C)/g or skip q [\C doesn't match], 2; | |
1221 | if ($IS_ASCII) { | |
1222 | iseq $1, "\xC4"; | |
1223 | } | |
1224 | elsif ($IS_EBCDIC) { | |
1225 | iseq $1, "\x8C"; | |
1226 | } | |
1227 | else { | |
1228 | ok 0, "Unexpected platform", "ord ('A') = $ordA"; | |
1229 | } | |
1230 | ok /(\C)/g or skip q [\C doesn't match]; | |
1231 | if ($IS_ASCII) { | |
1232 | iseq $1, "\x80"; | |
1233 | } | |
1234 | elsif ($IS_EBCDIC) { | |
1235 | iseq $1, "\x41"; | |
1236 | } | |
1237 | else { | |
1238 | ok 0, "Unexpected platform", "ord ('A') = $ordA"; | |
1239 | } | |
1240 | } | |
a4c04bdc | 1241 | |
a4c04bdc | 1242 | |
84281c31 A |
1243 | { |
1244 | # Japhy -- added 03/03/2001 | |
1245 | () = (my $str = "abc") =~ /(...)/; | |
1246 | $str = "def"; | |
1247 | iseq $1, "abc", 'Changing subject does not modify $1'; | |
1248 | } | |
a4c04bdc | 1249 | |
a4c04bdc | 1250 | |
84281c31 A |
1251 | SKIP: |
1252 | { | |
1253 | # The trick is that in EBCDIC the explicit numeric range should | |
1254 | # match (as also in non-EBCDIC) but the explicit alphabetic range | |
1255 | # should not match. | |
1256 | ok "\x8e" =~ /[\x89-\x91]/, '"\x8e" =~ /[\x89-\x91]/'; | |
1257 | ok "\xce" =~ /[\xc9-\xd1]/, '"\xce" =~ /[\xc9-\xd1]/'; | |
1258 | ||
1259 | skip "Not an EBCDIC platform", 2 unless ord ('i') == 0x89 && | |
1260 | ord ('J') == 0xd1; | |
1261 | ||
1262 | # In most places these tests would succeed since \x8e does not | |
1263 | # in most character sets match 'i' or 'j' nor would \xce match | |
1264 | # 'I' or 'J', but strictly speaking these tests are here for | |
1265 | # the good of EBCDIC, so let's test these only there. | |
1266 | nok "\x8e" !~ /[i-j]/, '"\x8e" !~ /[i-j]/'; | |
1267 | nok "\xce" !~ /[I-J]/, '"\xce" !~ /[I-J]/'; | |
1268 | } | |
a4c04bdc | 1269 | |
a4c04bdc | 1270 | |
84281c31 A |
1271 | { |
1272 | ok "\x{ab}" =~ /\x{ab}/, '"\x{ab}" =~ /\x{ab}/ '; | |
1273 | ok "\x{abcd}" =~ /\x{abcd}/, '"\x{abcd}" =~ /\x{abcd}/'; | |
1274 | } | |
a4c04bdc | 1275 | |
a4c04bdc | 1276 | |
84281c31 A |
1277 | { |
1278 | local $Message = 'bug id 20001008.001'; | |
1279 | ||
1280 | my @x = ("stra\337e 138", "stra\337e 138"); | |
1281 | for (@x) { | |
1282 | ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; | |
1283 | ok my ($latin) = /^(.+)(?:\s+\d)/; | |
1284 | iseq $latin, "stra\337e"; | |
1285 | ok $latin =~ s/stra\337e/straße/; | |
1286 | # | |
1287 | # Previous code follows, but outcommented - there were no tests. | |
1288 | # | |
1289 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
1290 | # use utf8; # needed for the raw UTF-8 | |
1291 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a | |
1292 | } | |
1293 | } | |
a4c04bdc | 1294 | |
e2d8ce26 | 1295 | |
84281c31 A |
1296 | { |
1297 | local $Message = 'Test \x escapes'; | |
1298 | ok "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1299 | ok "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1300 | ok "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1301 | ok "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1302 | ok "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1303 | ok "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1304 | ok "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1305 | ok "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1306 | } | |
e2d8ce26 | 1307 | |
4193bef7 | 1308 | |
84281c31 A |
1309 | { |
1310 | local $BugId = '20001028.003'; | |
1311 | ||
1312 | # Fist half of the bug. | |
1313 | local $Message = 'HEBREW ACCENT QADMA matched by .*'; | |
1314 | my $X = chr (1448); | |
1315 | ok my ($Y) = $X =~ /(.*)/; | |
1316 | iseq $Y, v1448; | |
1317 | iseq length ($Y), 1; | |
1318 | ||
1319 | # Second half of the bug. | |
1320 | $Message = 'HEBREW ACCENT QADMA in replacement'; | |
1321 | $X = ''; | |
1322 | $X =~ s/^/chr(1488)/e; | |
1323 | iseq length $X, 1; | |
1324 | iseq ord ($X), 1488; | |
1325 | } | |
ab13f0c7 | 1326 | |
4193bef7 | 1327 | |
84281c31 A |
1328 | { |
1329 | local $BugId = '20001108.001'; | |
1330 | local $Message = 'Repeated s///'; | |
1331 | my $X = "Szab\x{f3},Bal\x{e1}zs"; | |
1332 | my $Y = $X; | |
1333 | $Y =~ s/(B)/$1/ for 0 .. 3; | |
1334 | iseq $Y, $X; | |
1335 | iseq $X, "Szab\x{f3},Bal\x{e1}zs"; | |
1336 | } | |
4193bef7 | 1337 | |
4193bef7 | 1338 | |
84281c31 A |
1339 | { |
1340 | local $BugId = '20000517.001'; | |
1341 | local $Message = 's/// on UTF-8 string'; | |
1342 | my $x = "\x{100}A"; | |
1343 | $x =~ s/A/B/; | |
1344 | iseq $x, "\x{100}B"; | |
1345 | iseq length $x, 2; | |
1346 | } | |
4193bef7 | 1347 | |
4193bef7 | 1348 | |
84281c31 A |
1349 | { |
1350 | local $BugId = '20001230.002'; | |
1351 | local $Message = '\C and É'; | |
1352 | ok "École" =~ /^\C\C(.)/ && $1 eq 'c'; | |
1353 | ok "École" =~ /^\C\C(c)/; | |
1354 | } | |
c87b7cc2 | 1355 | |
c87b7cc2 | 1356 | |
84281c31 A |
1357 | SKIP: |
1358 | { | |
1359 | local $Message = 'Match code points > 255'; | |
1360 | $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; | |
1361 | ok /(.\x{300})./ or skip "No match", 4; | |
1362 | ok $` eq "abc\x{100}" && length ($`) == 4; | |
1363 | ok $& eq "\x{200}\x{300}\x{380}" && length ($&) == 3; | |
1364 | ok $' eq "\x{400}defg" && length ($') == 5; | |
1365 | ok $1 eq "\x{200}\x{300}" && length ($1) == 2; | |
1366 | } | |
c87b7cc2 | 1367 | |
c87b7cc2 | 1368 | |
84281c31 A |
1369 | { |
1370 | # The original bug report had 'no utf8' here but that was irrelevant. | |
1371 | local $BugId = '20010306.008'; | |
1372 | local $Message = "Don't dump core"; | |
1373 | my $a = "a\x{1234}"; | |
1374 | ok $a =~ m/\w/; # used to core dump. | |
1375 | } | |
c87b7cc2 | 1376 | |
c87b7cc2 | 1377 | |
84281c31 A |
1378 | { |
1379 | local $BugId = '20010410.006'; | |
1380 | local $Message = '/g in scalar context'; | |
1381 | for my $rx ('/(.*?)\{(.*?)\}/csg', | |
1382 | '/(.*?)\{(.*?)\}/cg', | |
1383 | '/(.*?)\{(.*?)\}/sg', | |
1384 | '/(.*?)\{(.*?)\}/g', | |
1385 | '/(.+?)\{(.+?)\}/csg',) { | |
1386 | my $i = 0; | |
1387 | my $input = "a{b}c{d}"; | |
1388 | eval <<" --"; | |
1389 | while (eval \$input =~ $rx) { | |
1390 | \$i ++; | |
1391 | } | |
1392 | -- | |
1393 | iseq $i, 2; | |
1394 | } | |
1395 | } | |
c87b7cc2 | 1396 | |
c87b7cc2 | 1397 | |
84281c31 A |
1398 | { |
1399 | my $x = "\x{10FFFD}"; | |
1400 | $x =~ s/(.)/$1/g; | |
1401 | ok ord($x) == 0x10FFFD && length($x) == 1, "From Robin Houston"; | |
1402 | } | |
61247495 | 1403 | |
61247495 | 1404 | |
84281c31 A |
1405 | { |
1406 | my %d = ( | |
1407 | "7f" => [0, 0, 0], | |
1408 | "80" => [1, 1, 0], | |
1409 | "ff" => [1, 1, 0], | |
1410 | "100" => [0, 1, 1], | |
1411 | ); | |
1412 | SKIP: | |
1413 | while (my ($code, $match) = each %d) { | |
1414 | local $Message = "Properties of \\x$code"; | |
1415 | my $char = eval qq ["\\x{$code}"]; | |
1416 | my $i = 0; | |
1417 | ok (($char =~ /[\x80-\xff]/) xor !$$match [$i ++]); | |
1418 | ok (($char =~ /[\x80-\x{100}]/) xor !$$match [$i ++]); | |
1419 | ok (($char =~ /[\x{100}]/) xor !$$match [$i ++]); | |
1420 | } | |
1421 | } | |
61247495 | 1422 | |
61247495 | 1423 | |
84281c31 A |
1424 | { |
1425 | # From Japhy | |
1426 | local $Message; | |
1427 | must_warn 'qr/(?c)/', '^Useless \(\?c\)'; | |
1428 | must_warn 'qr/(?-c)/', '^Useless \(\?-c\)'; | |
1429 | must_warn 'qr/(?g)/', '^Useless \(\?g\)'; | |
1430 | must_warn 'qr/(?-g)/', '^Useless \(\?-g\)'; | |
1431 | must_warn 'qr/(?o)/', '^Useless \(\?o\)'; | |
1432 | must_warn 'qr/(?-o)/', '^Useless \(\?-o\)'; | |
1433 | ||
1434 | # Now test multi-error regexes | |
1435 | must_warn 'qr/(?g-o)/', '^Useless \(\?g\).*\nUseless \(\?-o\)'; | |
1436 | must_warn 'qr/(?g-c)/', '^Useless \(\?g\).*\nUseless \(\?-c\)'; | |
1437 | # (?c) means (?g) error won't be thrown | |
1438 | must_warn 'qr/(?o-cg)/', '^Useless \(\?o\).*\nUseless \(\?-c\)'; | |
1439 | must_warn 'qr/(?ogc)/', '^Useless \(\?o\).*\nUseless \(\?g\).*\n' . | |
1440 | 'Useless \(\?c\)'; | |
1441 | } | |
61247495 | 1442 | |
61247495 | 1443 | |
84281c31 A |
1444 | { |
1445 | local $Message = "/x tests"; | |
1446 | $_ = "foo"; | |
1447 | eval_ok <<" --"; | |
1448 | /f | |
1449 | o\r | |
1450 | o | |
1451 | \$ | |
1452 | /x | |
1453 | -- | |
1454 | eval_ok <<" --"; | |
1455 | /f | |
1456 | o | |
1457 | o | |
1458 | \$\r | |
1459 | /x | |
1460 | -- | |
1461 | } | |
61247495 | 1462 | |
61247495 | 1463 | |
84281c31 A |
1464 | { |
1465 | local $Message = "/o feature"; | |
1466 | sub test_o {$_ [0] =~ /$_[1]/o; return $1} | |
1467 | iseq test_o ('abc', '(.)..'), 'a'; | |
1468 | iseq test_o ('abc', '..(.)'), 'a'; | |
1469 | } | |
61247495 | 1470 | |
9b4e380a | 1471 | |
84281c31 A |
1472 | { |
1473 | local $BugId = "20010619.003"; | |
1474 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. | |
1475 | for ("\n", "\t", "\014", "\r") { | |
1476 | ok !/[[:print:]]/, "'$_' not in [[:print:]]"; | |
1477 | } | |
1478 | for (" ") { | |
1479 | ok /[[:print:]]/, "'$_' in [[:print:]]"; | |
1480 | } | |
1481 | } | |
9b4e380a | 1482 | |
9b4e380a | 1483 | |
84281c31 A |
1484 | { |
1485 | # Test basic $^N usage outside of a regex | |
1486 | local $Message = '$^N usage outside of a regex'; | |
1487 | my $x = "abcdef"; | |
1488 | ok ($x =~ /cde/ and !defined $^N); | |
1489 | ok ($x =~ /(cde)/ and $^N eq "cde"); | |
1490 | ok ($x =~ /(c)(d)(e)/ and $^N eq "e"); | |
1491 | ok ($x =~ /(c(d)e)/ and $^N eq "cde"); | |
1492 | ok ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde"); | |
1493 | ok ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde"); | |
1494 | ok ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc"); | |
1495 | ok ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde"); | |
1496 | ok ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde"); | |
1497 | ok ($x =~ /(?:c(d)e)/ and $^N eq "d"); | |
1498 | ok ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d"); | |
1499 | ok ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f"); | |
1500 | ok ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f"); | |
1501 | ok ($x =~ /(([ace])|([bd]))*/ and $^N eq "e"); | |
1502 | {ok ($x =~ /(([ace])|([bdf]))*/ and $^N eq "f");} | |
1503 | ## Test to see if $^N is automatically localized -- it should now | |
1504 | ## have the value set in the previous test. | |
1505 | iseq $^N, "e", '$^N is automatically localized'; | |
1506 | ||
1507 | # Now test inside (?{ ... }) | |
1508 | local $Message = '$^N usage inside (?{ ... })'; | |
1509 | our ($y, $z); | |
1510 | ok ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b"); | |
1511 | ok ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"); | |
1512 | ok ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"); | |
1513 | ok ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" | |
1514 | and $z eq "abcd"); | |
1515 | ok ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" | |
1516 | and $z eq "abcde"); | |
9b4e380a | 1517 | |
84281c31 | 1518 | } |
9b4e380a | 1519 | |
9b4e380a | 1520 | |
84281c31 A |
1521 | SKIP: |
1522 | { | |
1523 | ## Should probably put in tests for all the POSIX stuff, | |
1524 | ## but not sure how to guarantee a specific locale...... | |
d73e5302 | 1525 | |
84281c31 A |
1526 | skip "Not an ASCII platform", 2 unless $IS_ASCII; |
1527 | local $Message = 'Test [[:cntrl:]]'; | |
1528 | my $AllBytes = join "" => map {chr} 0 .. 255; | |
1529 | (my $x = $AllBytes) =~ s/[[:cntrl:]]//g; | |
1530 | iseq $x, join "", map {chr} 0x20 .. 0x7E, 0x80 .. 0xFF; | |
d73e5302 | 1531 | |
84281c31 A |
1532 | ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; |
1533 | iseq $x, join "", map {chr} 0x00 .. 0x1F, 0x7F; | |
1534 | } | |
d73e5302 | 1535 | |
71d929cb | 1536 | |
84281c31 A |
1537 | { |
1538 | # With /s modifier UTF8 chars were interpreted as bytes | |
1539 | local $Message = "UTF-8 chars aren't bytes"; | |
1540 | my $a = "Hello \x{263A} World"; | |
1541 | my @a = ($a =~ /./gs); | |
1542 | iseq $#a, 12; | |
1543 | } | |
701a277b | 1544 | |
71d929cb | 1545 | |
84281c31 A |
1546 | { |
1547 | local $Message = '. matches \n with /s'; | |
1548 | my $str1 = "foo\nbar"; | |
1549 | my $str2 = "foo\n\x{100}bar"; | |
1550 | my ($a, $b) = map {chr} $IS_ASCII ? (0xc4, 0x80) : (0x8c, 0x41); | |
1551 | my @a; | |
1552 | @a = $str1 =~ /./g; iseq @a, 6; iseq "@a", "f o o b a r"; | |
1553 | @a = $str1 =~ /./gs; iseq @a, 7; iseq "@a", "f o o \n b a r"; | |
1554 | @a = $str1 =~ /\C/g; iseq @a, 7; iseq "@a", "f o o \n b a r"; | |
1555 | @a = $str1 =~ /\C/gs; iseq @a, 7; iseq "@a", "f o o \n b a r"; | |
1556 | @a = $str2 =~ /./g; iseq @a, 7; iseq "@a", "f o o \x{100} b a r"; | |
1557 | @a = $str2 =~ /./gs; iseq @a, 8; iseq "@a", "f o o \n \x{100} b a r"; | |
1558 | @a = $str2 =~ /\C/g; iseq @a, 9; iseq "@a", "f o o \n $a $b b a r"; | |
1559 | @a = $str2 =~ /\C/gs; iseq @a, 9; iseq "@a", "f o o \n $a $b b a r"; | |
1560 | } | |
71d929cb | 1561 | |
71d929cb | 1562 | |
84281c31 A |
1563 | { |
1564 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context | |
1565 | local $BugId = '20010814.004'; | |
1566 | $_ = "ababacadaea"; | |
1567 | my $a = join ":", /b./gc; | |
1568 | my $b = join ":", /a./gc; | |
1569 | my $c = pos; | |
1570 | iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//"; | |
1571 | } | |
701a277b | 1572 | |
ef54fa25 | 1573 | |
84281c31 A |
1574 | { |
1575 | # [ID 20010407.006] matching utf8 return values from | |
1576 | # functions does not work | |
1577 | local $BugId = '20010407.006'; | |
1578 | local $Message = 'UTF-8 return values from functions'; | |
1579 | package ID_20010407_006; | |
1580 | sub x {"a\x{1234}"} | |
1581 | my $x = x; | |
1582 | my $y; | |
1583 | ::ok $x =~ /(..)/; | |
1584 | $y = $1; | |
1585 | ::ok length ($y) == 2 && $y eq $x; | |
1586 | ::ok x =~ /(..)/; | |
1587 | $y = $1; | |
1588 | ::ok length ($y) == 2 && $y eq $x; | |
1589 | } | |
ef54fa25 | 1590 | |
701a277b | 1591 | |
84281c31 A |
1592 | { |
1593 | no warnings 'digit'; | |
1594 | # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. | |
1595 | my $x; | |
1596 | $x = "\x4e" . "E"; | |
1597 | ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); | |
701a277b | 1598 | |
84281c31 A |
1599 | $x = "\x4e" . "i"; |
1600 | ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); | |
701a277b | 1601 | |
84281c31 A |
1602 | $x = "\x4" . "j"; |
1603 | ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); | |
701a277b | 1604 | |
84281c31 A |
1605 | $x = "\x0" . "k"; |
1606 | ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); | |
701a277b | 1607 | |
84281c31 A |
1608 | $x = "\x0" . "x"; |
1609 | ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); | |
bc517b45 | 1610 | |
84281c31 A |
1611 | $x = "\x0" . "xa"; |
1612 | ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); | |
bc517b45 | 1613 | |
84281c31 A |
1614 | $x = "\x9" . "_b"; |
1615 | ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); | |
bc517b45 | 1616 | |
84281c31 | 1617 | # and now again in [] ranges |
bc517b45 | 1618 | |
84281c31 A |
1619 | $x = "\x4e" . "E"; |
1620 | ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); | |
bc517b45 | 1621 | |
84281c31 A |
1622 | $x = "\x4e" . "i"; |
1623 | ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); | |
bc517b45 | 1624 | |
84281c31 A |
1625 | $x = "\x4" . "j"; |
1626 | ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); | |
bc517b45 | 1627 | |
84281c31 A |
1628 | $x = "\x0" . "k"; |
1629 | ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); | |
bc517b45 | 1630 | |
84281c31 A |
1631 | $x = "\x0" . "x"; |
1632 | ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); | |
bc517b45 | 1633 | |
84281c31 A |
1634 | $x = "\x0" . "xa"; |
1635 | ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); | |
bc517b45 | 1636 | |
84281c31 A |
1637 | $x = "\x9" . "_b"; |
1638 | ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); | |
bc517b45 | 1639 | |
84281c31 | 1640 | # Check that \x{##} works. 5.6.1 fails quite a few of these. |
bc517b45 | 1641 | |
84281c31 A |
1642 | $x = "\x9b"; |
1643 | ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); | |
bc517b45 | 1644 | |
84281c31 A |
1645 | $x = "\x9b" . "y"; |
1646 | ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); | |
bc517b45 | 1647 | |
84281c31 A |
1648 | $x = "\x9b" . "y"; |
1649 | ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); | |
bc517b45 | 1650 | |
84281c31 A |
1651 | $x = "\x9b" . "y"; |
1652 | ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); | |
bc517b45 | 1653 | |
84281c31 A |
1654 | $x = "\x0" . "y"; |
1655 | ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); | |
bc517b45 | 1656 | |
84281c31 A |
1657 | $x = "\x0" . "y"; |
1658 | ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); | |
a5961de5 | 1659 | |
84281c31 A |
1660 | $x = "\x9b" . "y"; |
1661 | ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); | |
a5961de5 | 1662 | |
84281c31 A |
1663 | $x = "\x9b"; |
1664 | ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); | |
a5961de5 | 1665 | |
84281c31 A |
1666 | $x = "\x9b" . "y"; |
1667 | ok ($x =~ /^[\x{9_b}y]{2}$/, | |
1668 | "\\x{9_b} is to be treated as \\x9b (again)"); | |
a5961de5 | 1669 | |
84281c31 A |
1670 | $x = "\x9b" . "y"; |
1671 | ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); | |
a5961de5 | 1672 | |
84281c31 A |
1673 | $x = "\x9b" . "y"; |
1674 | ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); | |
a5961de5 | 1675 | |
84281c31 A |
1676 | $x = "\x0" . "y"; |
1677 | ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); | |
a5961de5 | 1678 | |
84281c31 A |
1679 | $x = "\x0" . "y"; |
1680 | ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); | |
a5961de5 | 1681 | |
84281c31 A |
1682 | $x = "\x9b" . "y"; |
1683 | ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); | |
55da9344 | 1684 | |
84281c31 | 1685 | } |
55da9344 | 1686 | |
55da9344 | 1687 | |
84281c31 A |
1688 | { |
1689 | # High bit bug -- japhy | |
1690 | my $x = "ab\200d"; | |
1691 | ok $x =~ /.*?\200/, "High bit fine"; | |
1692 | } | |
b7c83a7e | 1693 | |
112bedeb | 1694 | |
84281c31 A |
1695 | { |
1696 | # The basic character classes and Unicode | |
1697 | ok "\x{0100}" =~ /\w/, 'LATIN CAPITAL LETTER A WITH MACRON in /\w/'; | |
1698 | ok "\x{0660}" =~ /\d/, 'ARABIC-INDIC DIGIT ZERO in /\d/'; | |
1699 | ok "\x{1680}" =~ /\s/, 'OGHAM SPACE MARK in /\s/'; | |
1700 | } | |
112bedeb | 1701 | |
09091399 | 1702 | |
84281c31 A |
1703 | { |
1704 | local $Message = "Folding matches and Unicode"; | |
1705 | ok "a\x{100}" =~ /A/i; | |
1706 | ok "A\x{100}" =~ /a/i; | |
1707 | ok "a\x{100}" =~ /a/i; | |
1708 | ok "A\x{100}" =~ /A/i; | |
1709 | ok "\x{101}a" =~ /\x{100}/i; | |
1710 | ok "\x{100}a" =~ /\x{100}/i; | |
1711 | ok "\x{101}a" =~ /\x{101}/i; | |
1712 | ok "\x{100}a" =~ /\x{101}/i; | |
1713 | ok "a\x{100}" =~ /A\x{100}/i; | |
1714 | ok "A\x{100}" =~ /a\x{100}/i; | |
1715 | ok "a\x{100}" =~ /a\x{100}/i; | |
1716 | ok "A\x{100}" =~ /A\x{100}/i; | |
1717 | ok "a\x{100}" =~ /[A]/i; | |
1718 | ok "A\x{100}" =~ /[a]/i; | |
1719 | ok "a\x{100}" =~ /[a]/i; | |
1720 | ok "A\x{100}" =~ /[A]/i; | |
1721 | ok "\x{101}a" =~ /[\x{100}]/i; | |
1722 | ok "\x{100}a" =~ /[\x{100}]/i; | |
1723 | ok "\x{101}a" =~ /[\x{101}]/i; | |
1724 | ok "\x{100}a" =~ /[\x{101}]/i; | |
1725 | } | |
09091399 | 1726 | |
09091399 | 1727 | |
84281c31 A |
1728 | { |
1729 | use charnames ':full'; | |
1730 | local $Message = "Folding 'LATIN LETTER A WITH GRAVE'"; | |
09091399 | 1731 | |
84281c31 A |
1732 | my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; |
1733 | my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; | |
1734 | ||
1735 | ok $lower =~ m/$UPPER/i; | |
1736 | ok $UPPER =~ m/$lower/i; | |
1737 | ok $lower =~ m/[$UPPER]/i; | |
1738 | ok $UPPER =~ m/[$lower]/i; | |
09091399 | 1739 | |
84281c31 | 1740 | local $Message = "Folding 'GREEK LETTER ALPHA WITH VRACHY'"; |
09091399 | 1741 | |
84281c31 A |
1742 | $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; |
1743 | $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; | |
09091399 | 1744 | |
84281c31 A |
1745 | ok $lower =~ m/$UPPER/i; |
1746 | ok $UPPER =~ m/$lower/i; | |
1747 | ok $lower =~ m/[$UPPER]/i; | |
1748 | ok $UPPER =~ m/[$lower]/i; | |
e036fef9 | 1749 | |
84281c31 | 1750 | local $Message = "Folding 'LATIN LETTER Y WITH DIAERESIS'"; |
ffce6cc2 | 1751 | |
84281c31 A |
1752 | $lower = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; |
1753 | $UPPER = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; | |
ffce6cc2 | 1754 | |
84281c31 A |
1755 | ok $lower =~ m/$UPPER/i; |
1756 | ok $UPPER =~ m/$lower/i; | |
1757 | ok $lower =~ m/[$UPPER]/i; | |
1758 | ok $UPPER =~ m/[$lower]/i; | |
ffce6cc2 JH |
1759 | } |
1760 | ||
ffce6cc2 JH |
1761 | |
1762 | { | |
84281c31 A |
1763 | use charnames ':full'; |
1764 | local $PatchId = "13843"; | |
1765 | local $Message = "GREEK CAPITAL LETTER SIGMA vs " . | |
1766 | "COMBINING GREEK PERISPOMENI"; | |
d07ddd77 | 1767 | |
84281c31 A |
1768 | my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; |
1769 | my $char = "\N{COMBINING GREEK PERISPOMENI}"; | |
d07ddd77 | 1770 | |
84281c31 A |
1771 | may_not_warn sub {ok "_:$char:_" !~ m/_:$SIGMA:_/i}; |
1772 | } | |
925f9e00 | 1773 | |
925f9e00 | 1774 | |
84281c31 A |
1775 | { |
1776 | local $Message = '\X'; | |
1777 | use charnames ':full'; | |
1778 | ||
1779 | ok "a!" =~ /^(\X)!/ && $1 eq "a"; | |
1780 | ok "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF"; | |
1781 | ok "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}"; | |
1782 | ok "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}"; | |
1783 | ok "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && | |
1784 | $1 eq "\N{LATIN CAPITAL LETTER E}"; | |
1785 | ok "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" | |
1786 | =~ /^(\X)!/ && | |
1787 | $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}"; | |
1788 | ||
1789 | local $Message = '\C and \X'; | |
1790 | ok "!abc!" =~ /a\Cc/; | |
1791 | ok "!abc!" =~ /a\Xc/; | |
1792 | } | |
925f9e00 | 1793 | |
925f9e00 | 1794 | |
84281c31 A |
1795 | { |
1796 | local $Message = "Final Sigma"; | |
925f9e00 | 1797 | |
84281c31 A |
1798 | my $SIGMA = "\x{03A3}"; # CAPITAL |
1799 | my $Sigma = "\x{03C2}"; # SMALL FINAL | |
1800 | my $sigma = "\x{03C3}"; # SMALL | |
925f9e00 | 1801 | |
84281c31 A |
1802 | ok $SIGMA =~ /$SIGMA/i; |
1803 | ok $SIGMA =~ /$Sigma/i; | |
1804 | ok $SIGMA =~ /$sigma/i; | |
925f9e00 | 1805 | |
84281c31 A |
1806 | ok $Sigma =~ /$SIGMA/i; |
1807 | ok $Sigma =~ /$Sigma/i; | |
1808 | ok $Sigma =~ /$sigma/i; | |
d8f6a732 | 1809 | |
84281c31 A |
1810 | ok $sigma =~ /$SIGMA/i; |
1811 | ok $sigma =~ /$Sigma/i; | |
1812 | ok $sigma =~ /$sigma/i; | |
1813 | ||
1814 | ok $SIGMA =~ /[$SIGMA]/i; | |
1815 | ok $SIGMA =~ /[$Sigma]/i; | |
1816 | ok $SIGMA =~ /[$sigma]/i; | |
d8f6a732 | 1817 | |
84281c31 A |
1818 | ok $Sigma =~ /[$SIGMA]/i; |
1819 | ok $Sigma =~ /[$Sigma]/i; | |
1820 | ok $Sigma =~ /[$sigma]/i; | |
d8f6a732 | 1821 | |
84281c31 A |
1822 | ok $sigma =~ /[$SIGMA]/i; |
1823 | ok $sigma =~ /[$Sigma]/i; | |
1824 | ok $sigma =~ /[$sigma]/i; | |
def8e4ea | 1825 | |
84281c31 | 1826 | local $Message = "More final Sigma"; |
def8e4ea | 1827 | |
84281c31 | 1828 | my $S3 = "$SIGMA$Sigma$sigma"; |
def8e4ea | 1829 | |
84281c31 A |
1830 | ok ":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma; |
1831 | ok ":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma; | |
1832 | ok ":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma; | |
def8e4ea | 1833 | |
84281c31 A |
1834 | ok ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma; |
1835 | ok ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma; | |
1836 | ok ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma; | |
def8e4ea JH |
1837 | } |
1838 | ||
a0804c9e | 1839 | |
84281c31 A |
1840 | { |
1841 | use charnames ':full'; | |
1842 | local $Message = "Parlez-Vous " . | |
1843 | "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais?"; | |
1844 | ||
1845 | ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran.ais/ && | |
1846 | $& eq "Francais"; | |
1847 | ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran.ais/ && | |
1848 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; | |
1849 | ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Cais/ && | |
1850 | $& eq "Francais"; | |
1851 | # COMBINING CEDILLA is two bytes when encoded | |
1852 | ok "Franc\N{COMBINING CEDILLA}ais" =~ /Franc\C\Cais/; | |
1853 | ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Xais/ && | |
1854 | $& eq "Francais"; | |
1855 | ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran\Xais/ && | |
1856 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; | |
1857 | ok "Franc\N{COMBINING CEDILLA}ais" =~ /Fran\Xais/ && | |
1858 | $& eq "Franc\N{COMBINING CEDILLA}ais"; | |
1859 | ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ | |
1860 | /Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && | |
1861 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; | |
1862 | ok "Franc\N{COMBINING CEDILLA}ais" =~ /Franc\N{COMBINING CEDILLA}ais/ && | |
1863 | $& eq "Franc\N{COMBINING CEDILLA}ais"; | |
1864 | ||
1865 | my @f = ( | |
1866 | ["Fran\N{LATIN SMALL LETTER C}ais", "Francais"], | |
1867 | ["Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", | |
1868 | "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"], | |
1869 | ["Franc\N{COMBINING CEDILLA}ais", "Franc\N{COMBINING CEDILLA}ais"], | |
1870 | ); | |
1871 | foreach my $entry (@f) { | |
1872 | my ($subject, $match) = @$entry; | |
1873 | ok $subject =~ /Fran(?:c\N{COMBINING CEDILLA}?| | |
1874 | \N{LATIN SMALL LETTER C WITH CEDILLA})ais/x && | |
1875 | $& eq $match; | |
1876 | } | |
a0804c9e | 1877 | } |
e54858b0 | 1878 | |
c46248c1 | 1879 | |
84281c31 A |
1880 | { |
1881 | local $Message = "Lingering (and useless) UTF8 flag doesn't mess up /i"; | |
1882 | my $pat = "ABcde"; | |
1883 | my $str = "abcDE\x{100}"; | |
1884 | chop $str; | |
1885 | ok $str =~ /$pat/i; | |
1886 | ||
1887 | $pat = "ABcde\x{100}"; | |
1888 | $str = "abcDE"; | |
1889 | chop $pat; | |
1890 | ok $str =~ /$pat/i; | |
1891 | ||
1892 | $pat = "ABcde\x{100}"; | |
1893 | $str = "abcDE\x{100}"; | |
1894 | chop $pat; | |
1895 | chop $str; | |
1896 | ok $str =~ /$pat/i; | |
85fd1718 | 1897 | } |
8d21bda2 | 1898 | |
6e602e29 | 1899 | |
84281c31 A |
1900 | { |
1901 | use charnames ':full'; | |
1902 | local $Message = "LATIN SMALL LETTER SHARP S " . | |
1903 | "(\N{LATIN SMALL LETTER SHARP S})"; | |
1904 | ||
1905 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1906 | /\N{LATIN SMALL LETTER SHARP S}/; | |
1907 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1908 | /\N{LATIN SMALL LETTER SHARP S}/i; | |
1909 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1910 | /[\N{LATIN SMALL LETTER SHARP S}]/; | |
1911 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1912 | /[\N{LATIN SMALL LETTER SHARP S}]/i; | |
1913 | ||
1914 | ok "ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i; | |
1915 | ok "SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i; | |
1916 | ok "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i; | |
1917 | ok "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i; | |
1918 | ||
1919 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i; | |
1920 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i; | |
1921 | ||
1922 | local $Message = "Unoptimized named sequence in class"; | |
1923 | ok "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i; | |
1924 | ok "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i; | |
1925 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1926 | /[\N{LATIN SMALL LETTER SHARP S}x]/; | |
1927 | ok "\N{LATIN SMALL LETTER SHARP S}" =~ | |
1928 | /[\N{LATIN SMALL LETTER SHARP S}x]/i; | |
dba1316b | 1929 | } |
f272994b | 1930 | |
f272994b | 1931 | |
84281c31 A |
1932 | { |
1933 | # More whitespace: U+0085, U+2028, U+2029\n"; | |
1934 | ||
1935 | # U+0085, U+00A0 need to be forced to be Unicode, the \x{100} does that. | |
1936 | SKIP: { | |
1937 | skip "EBCDIC platform", 4 if $IS_EBCDIC; | |
1938 | # Do \x{0015} and \x{0041} match \s in EBCDIC? | |
1939 | ok "<\x{100}\x{0085}>" =~ /<\x{100}\s>/, '\x{0085} in \s'; | |
1940 | ok "<\x{0085}>" =~ /<\v>/, '\x{0085} in \v'; | |
1941 | ok "<\x{100}\x{00A0}>" =~ /<\x{100}\s>/, '\x{00A0} in \s'; | |
1942 | ok "<\x{00A0}>" =~ /<\h>/, '\x{00A0} in \h'; | |
1943 | } | |
1944 | my @h = map {sprintf "%05x" => $_} 0x01680, 0x0180E, 0x02000 .. 0x0200A, | |
1945 | 0x0202F, 0x0205F, 0x03000; | |
1946 | my @v = map {sprintf "%05x" => $_} 0x02028, 0x02029; | |
1947 | ||
1948 | my @H = map {sprintf "%05x" => $_} 0x01361, 0x0200B, 0x02408, 0x02420, | |
1949 | 0x0303F, 0xE0020; | |
1950 | my @V = map {sprintf "%05x" => $_} 0x0008A .. 0x0008D, 0x00348, 0x10100, | |
1951 | 0xE005F, 0xE007C; | |
1952 | ||
1953 | for my $hex (@h) { | |
1954 | my $str = eval qq ["<\\x{$hex}>"]; | |
1955 | ok $str =~ /<\s>/, "\\x{$hex} in \\s"; | |
1956 | ok $str =~ /<\h>/, "\\x{$hex} in \\h"; | |
1957 | ok $str !~ /<\v>/, "\\x{$hex} not in \\v"; | |
1958 | } | |
f272994b | 1959 | |
84281c31 A |
1960 | for my $hex (@v) { |
1961 | my $str = eval qq ["<\\x{$hex}>"]; | |
1962 | ok $str =~ /<\s>/, "\\x{$hex} in \\s"; | |
1963 | ok $str =~ /<\v>/, "\\x{$hex} in \\v"; | |
1964 | ok $str !~ /<\h>/, "\\x{$hex} not in \\h"; | |
1965 | } | |
f272994b | 1966 | |
84281c31 A |
1967 | for my $hex (@H) { |
1968 | my $str = eval qq ["<\\x{$hex}>"]; | |
1969 | ok $str =~ /<\S>/, "\\x{$hex} in \\S"; | |
1970 | ok $str =~ /<\H>/, "\\x{$hex} in \\H"; | |
1971 | } | |
f272994b | 1972 | |
84281c31 A |
1973 | for my $hex (@V) { |
1974 | my $str = eval qq ["<\\x{$hex}>"]; | |
1975 | ok $str =~ /<\S>/, "\\x{$hex} in \\S"; | |
1976 | ok $str =~ /<\V>/, "\\x{$hex} in \\V"; | |
1977 | } | |
33b8afdf | 1978 | } |
ff385a1b | 1979 | |
ff385a1b | 1980 | |
84281c31 A |
1981 | { |
1982 | # . with /s should work on characters, as opposed to bytes | |
1983 | local $Message = ". with /s works on characters, not bytes"; | |
ff385a1b | 1984 | |
84281c31 A |
1985 | my $s = "\x{e4}\x{100}"; |
1986 | # This is not expected to match: the point is that | |
1987 | # neither should we get "Malformed UTF-8" warnings. | |
1988 | may_not_warn sub {$s =~ /\G(.+?)\n/gcs}, "No 'Malformed UTF-8' warning"; | |
ff385a1b | 1989 | |
84281c31 A |
1990 | my @c; |
1991 | push @c => $1 while $s =~ /\G(.)/gs; | |
bc45ce41 | 1992 | |
84281c31 A |
1993 | local $" = ""; |
1994 | iseq "@c", $s; | |
bc45ce41 | 1995 | |
84281c31 A |
1996 | # Test only chars < 256 |
1997 | my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; | |
1998 | my $r1 = ""; | |
1999 | while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
2000 | $r1 .= $1 . $2; | |
2001 | } | |
bc45ce41 | 2002 | |
84281c31 A |
2003 | my $t2 = $t1 . "\x{100}"; # Repeat with a larger char |
2004 | my $r2 = ""; | |
2005 | while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
2006 | $r2 .= $1 . $2; | |
2007 | } | |
2008 | $r2 =~ s/\x{100}//; | |
d65afb4b | 2009 | |
84281c31 A |
2010 | iseq $r1, $r2; |
2011 | } | |
491fd90a | 2012 | |
491fd90a | 2013 | |
84281c31 A |
2014 | { |
2015 | local $Message = "Unicode lookbehind"; | |
2016 | ok "A\x{100}B" =~ /(?<=A.)B/; | |
2017 | ok "A\x{200}\x{300}B" =~ /(?<=A..)B/; | |
2018 | ok "\x{400}AB" =~ /(?<=\x{400}.)B/; | |
2019 | ok "\x{500}\x{600}B" =~ /(?<=\x{500}.)B/; | |
2020 | ||
2021 | # Original code also contained: | |
2022 | # ok "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/; | |
2023 | # but that looks like a typo. | |
2024 | } | |
491fd90a | 2025 | |
491fd90a | 2026 | |
84281c31 A |
2027 | { |
2028 | local $Message = 'UTF-8 hash keys and /$/'; | |
2029 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters | |
2030 | # /2002-01/msg01327.html | |
2031 | ||
2032 | my $u = "a\x{100}"; | |
2033 | my $v = substr ($u, 0, 1); | |
2034 | my $w = substr ($u, 1, 1); | |
2035 | my %u = ($u => $u, $v => $v, $w => $w); | |
2036 | for (keys %u) { | |
2037 | my $m1 = /^\w*$/ ? 1 : 0; | |
2038 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; | |
2039 | iseq $m1, $m2; | |
2040 | } | |
2041 | } | |
491fd90a | 2042 | |
491fd90a | 2043 | |
84281c31 A |
2044 | { |
2045 | local $BugId = "20020124.005"; | |
2046 | local $PatchId = "14795"; | |
2047 | local $Message = "s///eg"; | |
2048 | ||
2049 | for my $char ("a", "\x{df}", "\x{100}") { | |
2050 | my $x = "$char b $char"; | |
2051 | $x =~ s{($char)}{ | |
2052 | "c" =~ /c/; | |
2053 | "x"; | |
2054 | }ge; | |
2055 | iseq substr ($x, 0, 1), substr ($x, -1, 1); | |
2056 | } | |
2057 | } | |
491fd90a | 2058 | |
491fd90a | 2059 | |
84281c31 A |
2060 | { |
2061 | local $Message = "No SEGV in s/// and UTF-8"; | |
2062 | my $s = "s#\x{100}" x 4; | |
2063 | ok $s =~ s/[^\w]/ /g; | |
2064 | if ($ENV {REAL_POSIX_CC}) { | |
2065 | iseq $s, "s " x 4; | |
2066 | } | |
2067 | else { | |
2068 | iseq $s, "s \x{100}" x 4; | |
2069 | } | |
2070 | } | |
491fd90a | 2071 | |
491fd90a | 2072 | |
84281c31 A |
2073 | { |
2074 | local $Message = "UTF-8 bug (maybe already known?)"; | |
2075 | my $u = "foo"; | |
2076 | $u =~ s/./\x{100}/g; | |
2077 | iseq $u, "\x{100}\x{100}\x{100}"; | |
491fd90a | 2078 | |
84281c31 A |
2079 | $u = "foobar"; |
2080 | $u =~ s/[ao]/\x{100}/g; | |
2081 | iseq $u, "f\x{100}\x{100}b\x{100}r"; | |
11ef8fdd | 2082 | |
84281c31 A |
2083 | $u =~ s/\x{100}/e/g; |
2084 | iseq $u, "feeber"; | |
faf11cac | 2085 | } |
faf11cac | 2086 | |
f3b1e556 | 2087 | |
84281c31 A |
2088 | { |
2089 | local $Message = "UTF-8 bug with s///"; | |
2090 | # check utf8/non-utf8 mixtures | |
2091 | # try to force all float/anchored check combinations | |
2092 | ||
2093 | my $c = "\x{100}"; | |
2094 | my $subst; | |
2095 | for my $re ("xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", | |
2096 | "xx.*(?=$c)", "(?=$c).*xx",) { | |
2097 | ok "xxx" !~ /$re/; | |
2098 | ok +($subst = "xxx") !~ s/$re//; | |
2099 | } | |
2100 | for my $re ("xx.*$c*", "$c*.*xx") { | |
2101 | ok "xxx" =~ /$re/; | |
2102 | ok +($subst = "xxx") =~ s/$re//; | |
2103 | iseq $subst, ""; | |
2104 | } | |
2105 | for my $re ("xxy*", "y*xx") { | |
2106 | ok "xx$c" =~ /$re/; | |
2107 | ok +($subst = "xx$c") =~ s/$re//; | |
2108 | iseq $subst, $c; | |
2109 | ok "xy$c" !~ /$re/; | |
2110 | ok +($subst = "xy$c") !~ s/$re//; | |
2111 | } | |
2112 | for my $re ("xy$c*z", "x$c*yz") { | |
2113 | ok "xyz" =~ /$re/; | |
2114 | ok +($subst = "xyz") =~ s/$re//; | |
2115 | iseq $subst, ""; | |
2116 | } | |
2117 | } | |
f3b1e556 | 2118 | |
446eaa42 | 2119 | |
84281c31 A |
2120 | { |
2121 | local $Message = "qr /.../x"; | |
2122 | my $R = qr / A B C # D E/x; | |
2123 | ok "ABCDE" =~ $R && $& eq "ABC"; | |
2124 | ok "ABCDE" =~ /$R/ && $& eq "ABC"; | |
2125 | ok "ABCDE" =~ m/$R/ && $& eq "ABC"; | |
2126 | ok "ABCDE" =~ /($R)/ && $1 eq "ABC"; | |
2127 | ok "ABCDE" =~ m/($R)/ && $1 eq "ABC"; | |
2128 | } | |
cb50f42d | 2129 | |
446eaa42 | 2130 | |
84281c31 A |
2131 | { |
2132 | local $BugId = "20020412.005"; | |
2133 | local $Message = "Correct pmop flags checked when empty pattern"; | |
2134 | ||
2135 | # Requires reuse of last successful pattern. | |
2136 | my $num = 123; | |
2137 | $num =~ /\d/; | |
2138 | for (0 .. 1) { | |
2139 | my $match = ?? + 0; | |
2140 | ok $match != $_, $Message, | |
2141 | sprintf "'match one' %s on %s iteration" => | |
2142 | $match ? 'succeeded' : 'failed', | |
2143 | $_ ? 'second' : 'first'; | |
2144 | } | |
2145 | $num =~ /(\d)/; | |
2146 | my $result = join "" => $num =~ //g; | |
2147 | iseq $result, $num; | |
2148 | } | |
cb50f42d | 2149 | |
cb50f42d | 2150 | |
84281c31 A |
2151 | { |
2152 | local $BugId = '20020630.002'; | |
2153 | local $Message = 'UTF-8 regex matches above 32k'; | |
2154 | for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { | |
2155 | my ($type, $char) = @$_; | |
2156 | for my $len (32000, 32768, 33000) { | |
2157 | my $s = $char . "f" x $len; | |
2158 | my $r = $s =~ /$char([f]*)/gc; | |
2159 | ok $r, $Message, "<$type x $len>"; | |
2160 | ok !$r || pos ($s) == $len + 1, $Message, | |
2161 | "<$type x $len>; pos = @{[pos $s]}"; | |
2162 | } | |
2163 | } | |
2164 | } | |
cb50f42d | 2165 | |
ab01544f | 2166 | |
84281c31 A |
2167 | { |
2168 | our $a = bless qr /foo/ => 'Foo'; | |
2169 | ok 'goodfood' =~ $a, "Reblessed qr // matches"; | |
2170 | iseq $a, '(?-xism:foo)', "Reblessed qr // stringifies"; | |
2171 | my $x = "\x{3fe}"; | |
2172 | my $z = my $y = "\317\276"; # Byte representation of $x | |
2173 | $a = qr /$x/; | |
2174 | ok $x =~ $a, "UTF-8 interpolation in qr //"; | |
2175 | ok "a$a" =~ $x, "Stringified qr // preserves UTF-8"; | |
2176 | ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8"; | |
2177 | ok "a$x" =~ /^a(??{$a})\z/, | |
2178 | "Postponed interpolation of qr // preserves UTF-8"; | |
2179 | { | |
2180 | local $BugId = '17776'; | |
2181 | iseq length qr /##/x, 12, "## in qr // doesn't corrupt memory"; | |
2182 | } | |
2183 | { | |
2184 | use re 'eval'; | |
2185 | ok "$x$x" =~ /^$x(??{$x})\z/, | |
2186 | "Postponed UTF-8 string in UTF-8 re matches UTF-8"; | |
2187 | ok "$y$x" =~ /^$y(??{$x})\z/, | |
2188 | "Postponed UTF-8 string in non-UTF-8 re matches UTF-8"; | |
2189 | ok "$y$x" !~ /^$y(??{$y})\z/, | |
2190 | "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8"; | |
2191 | ok "$x$x" !~ /^$x(??{$y})\z/, | |
2192 | "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8"; | |
2193 | ok "$y$y" =~ /^$y(??{$y})\z/, | |
2194 | "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8"; | |
2195 | ok "$x$y" =~ /^$x(??{$y})\z/, | |
2196 | "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8"; | |
2197 | ||
2198 | $y = $z; # Reset $y after upgrade. | |
2199 | ok "$x$y" !~ /^$x(??{$x})\z/, | |
2200 | "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8"; | |
2201 | ok "$y$y" !~ /^$y(??{$x})\z/, | |
2202 | "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8"; | |
2203 | } | |
2204 | } | |
cb50f42d | 2205 | |
cb50f42d | 2206 | |
84281c31 A |
2207 | { |
2208 | local $PatchId = '18179'; | |
2209 | my $s = "\x{100}" x 5; | |
2210 | my $ok = $s =~ /(\x{100}{4})/; | |
2211 | my ($ord, $len) = (ord $1, length $1); | |
2212 | ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift"; | |
2213 | } | |
446eaa42 | 2214 | |
446eaa42 | 2215 | |
84281c31 A |
2216 | { |
2217 | local $BugId = '15763'; | |
2218 | our $a = "x\x{100}"; | |
2219 | chop $a; # Leaves the UTF-8 flag | |
2220 | $a .= "y"; # 1 byte before 'y'. | |
446eaa42 | 2221 | |
84281c31 A |
2222 | ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8'; |
2223 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
cb50f42d | 2224 | |
84281c31 A |
2225 | ok $a =~ /^\Cy/, 'match \Cy'; |
2226 | ok $a =~ /^\C{1}y/, 'match \C{1}y'; | |
cb50f42d | 2227 | |
84281c31 A |
2228 | ok $a !~ /^\C\Cy/, q {don't match two \Cy}; |
2229 | ok $a !~ /^\C{2}y/, q {don't match \C{2}y}; | |
cb50f42d | 2230 | |
84281c31 | 2231 | $a = "\x{100}y"; # 2 bytes before "y" |
cb50f42d | 2232 | |
84281c31 A |
2233 | ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8'; |
2234 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
2235 | ok $a =~ /^\C\C/, 'match two \C'; | |
2236 | ok $a =~ /^\C{2}/, 'match \C{2}'; | |
cb50f42d | 2237 | |
84281c31 A |
2238 | ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'; |
2239 | ok $a =~ /^\C{3}/, 'match \C{3}'; | |
2d92f8a0 | 2240 | |
84281c31 A |
2241 | ok $a =~ /^\C\Cy/, 'match two \C'; |
2242 | ok $a =~ /^\C{2}y/, 'match \C{2}'; | |
2d92f8a0 | 2243 | |
84281c31 A |
2244 | ok $a !~ /^\C\C\Cy/, q {don't match three \Cy}; |
2245 | ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy}; | |
2246 | ok $a !~ /^\C{3}y/, q {don't match \C{3}y}; | |
2d92f8a0 | 2247 | |
84281c31 | 2248 | $a = "\x{1000}y"; # 3 bytes before "y" |
2d92f8a0 | 2249 | |
84281c31 A |
2250 | ok $a =~ /^\C/, 'match one \C on three-byte UTF-8'; |
2251 | ok $a =~ /^\C{1}/, 'match \C{1}'; | |
2252 | ok $a =~ /^\C\C/, 'match two \C'; | |
2253 | ok $a =~ /^\C{2}/, 'match \C{2}'; | |
2254 | ok $a =~ /^\C\C\C/, 'match three \C'; | |
2255 | ok $a =~ /^\C{3}/, 'match \C{3}'; | |
2d92f8a0 | 2256 | |
84281c31 A |
2257 | ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'; |
2258 | ok $a =~ /^\C{4}/, 'match \C{4}'; | |
4a818d86 | 2259 | |
84281c31 A |
2260 | ok $a =~ /^\C\C\Cy/, 'match three \Cy'; |
2261 | ok $a =~ /^\C{3}y/, 'match \C{3}y'; | |
4a818d86 | 2262 | |
84281c31 A |
2263 | ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy}; |
2264 | ok $a !~ /^\C{4}y/, q {don't match \C{4}y}; | |
2265 | } | |
711a919c | 2266 | |
84281c31 A |
2267 | |
2268 | { | |
2269 | local $\; | |
2270 | $_ = 'aaaaaaaaaa'; | |
2271 | utf8::upgrade($_); chop $_; $\="\n"; | |
2272 | ok /[^\s]+/, 'm/[^\s]/ utf8'; | |
2273 | ok /[^\d]+/, 'm/[^\d]/ utf8'; | |
2274 | ok +($a = $_, $_ =~ s/[^\s]+/./g), 's/[^\s]/ utf8'; | |
2275 | ok +($a = $_, $a =~ s/[^\d]+/./g), 's/[^\s]/ utf8'; | |
2276 | } | |
0e933229 | 2277 | |
f14c76ed | 2278 | |
84281c31 A |
2279 | { |
2280 | local $BugId = '15397'; | |
2281 | local $Message = 'UTF-8 matching'; | |
2282 | ok "\x{100}" =~ /\x{100}/; | |
2283 | ok "\x{100}" =~ /(\x{100})/; | |
2284 | ok "\x{100}" =~ /(\x{100}){1}/; | |
2285 | ok "\x{100}\x{100}" =~ /(\x{100}){2}/; | |
2286 | ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/; | |
2287 | } | |
f14c76ed | 2288 | |
f14c76ed | 2289 | |
84281c31 A |
2290 | { |
2291 | local $BugId = '7471'; | |
2292 | local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times'; | |
2293 | local $_ = 'CD'; | |
2294 | ok /(AB)*?CD/ && !defined $1; | |
2295 | ok /(AB)*CD/ && !defined $1; | |
2296 | } | |
f14c76ed | 2297 | |
f14c76ed | 2298 | |
84281c31 A |
2299 | { |
2300 | local $BugId = '3547'; | |
2301 | local $Message = "Caching shouldn't prevent match"; | |
2302 | my $pattern = "^(b+?|a){1,2}c"; | |
2303 | ok "bac" =~ /$pattern/ && $1 eq 'a'; | |
2304 | ok "bbac" =~ /$pattern/ && $1 eq 'a'; | |
2305 | ok "bbbac" =~ /$pattern/ && $1 eq 'a'; | |
2306 | ok "bbbbac" =~ /$pattern/ && $1 eq 'a'; | |
2307 | } | |
f14c76ed | 2308 | |
f14c76ed | 2309 | |
f14c76ed | 2310 | |
84281c31 A |
2311 | { |
2312 | local $BugId = '18232'; | |
2313 | local $Message = '$1 should keep UTF-8 ness'; | |
2314 | ok "\x{100}" =~ /(.)/; | |
2315 | iseq $1, "\x{100}", '$1 is UTF-8'; | |
2316 | { 'a' =~ /./; } | |
2317 | iseq $1, "\x{100}", '$1 is still UTF-8'; | |
2318 | isneq $1, "\xC4\x80", '$1 is not non-UTF-8'; | |
2319 | } | |
f14c76ed | 2320 | |
f14c76ed | 2321 | |
84281c31 A |
2322 | { |
2323 | local $BugId = '19767'; | |
2324 | local $Message = "Optimizer doesn't prematurely reject match"; | |
2325 | use utf8; | |
f14c76ed | 2326 | |
84281c31 A |
2327 | my $attr = 'Name-1'; |
2328 | my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; | |
2329 | my $NormalWord = qr /${NormalChar}+?/; | |
2330 | my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; | |
f14c76ed | 2331 | |
84281c31 A |
2332 | $attr =~ /^$/; |
2333 | ok $attr =~ $PredNameHyphen; # Original test. | |
f14c76ed | 2334 | |
84281c31 A |
2335 | "a" =~ m/[b]/; |
2336 | ok "0" =~ /\p{N}+\z/; # Variant. | |
2337 | } | |
f14c76ed | 2338 | |
f14c76ed | 2339 | |
84281c31 A |
2340 | { |
2341 | local $BugId = '20683'; | |
2342 | local $Message = "(??{ }) doesn't return stale values"; | |
2343 | our $p = 1; | |
2344 | foreach (1, 2, 3, 4) { | |
2345 | $p ++ if /(??{ $p })/ | |
2346 | } | |
2347 | iseq $p, 5; | |
a30b2f1f | 2348 | |
84281c31 A |
2349 | { |
2350 | package P; | |
2351 | $a = 1; | |
2352 | sub TIESCALAR {bless []} | |
2353 | sub FETCH {$a ++} | |
2354 | } | |
2355 | tie $p, "P"; | |
2356 | foreach (1, 2, 3, 4) { | |
2357 | /(??{ $p })/ | |
2358 | } | |
2359 | iseq $p, 5; | |
2360 | } | |
f119b0fb | 2361 | |
f119b0fb | 2362 | |
84281c31 A |
2363 | { |
2364 | # Subject: Odd regexp behavior | |
2365 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> | |
2366 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 | |
2367 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> | |
2368 | # To: perl-unicode@perl.org | |
f119b0fb | 2369 | |
84281c31 A |
2370 | local $Message = 'Markus Kuhn 2003-02-26'; |
2371 | ||
2372 | my $x = "\x{2019}\nk"; | |
2373 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
2374 | ok $x eq "\x{2019} k"; | |
f119b0fb | 2375 | |
84281c31 A |
2376 | $x = "b\nk"; |
2377 | ok $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
2378 | ok $x eq "b k"; | |
faf82a0b | 2379 | |
84281c31 | 2380 | ok "\x{2019}" =~ /\S/; |
faf82a0b | 2381 | } |
351208f1 | 2382 | |
351208f1 | 2383 | |
84281c31 A |
2384 | { |
2385 | local $BugId = '21411'; | |
2386 | local $Message = "(??{ .. }) in split doesn't corrupt its stack"; | |
2387 | our $i; | |
2388 | ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-'; | |
2389 | no warnings 'deprecated', 'syntax'; | |
2390 | split /(?{'WOW'})/, 'abc'; | |
2391 | local $" = "|"; | |
2392 | iseq "@_", "a|b|c"; | |
2393 | } | |
351208f1 | 2394 | |
080c2dec | 2395 | |
84281c31 A |
2396 | { |
2397 | # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it | |
2398 | # hasn't been crashing. Disable this test until it is fixed properly. | |
2399 | # XXX also check what it returns rather than just doing ok(1,...) | |
2400 | # split /(?{ split "" })/, "abc"; | |
2401 | local $TODO = "Recursive split is still broken"; | |
2402 | ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'; | |
2403 | } | |
91e09a61 | 2404 | |
ec391688 | 2405 | |
84281c31 A |
2406 | { |
2407 | ok "\x{100}\n" =~ /\x{100}\n$/, "UTF-8 length cache and fbm_compile"; | |
2408 | } | |
16e1b944 | 2409 | |
16e1b944 | 2410 | |
84281c31 A |
2411 | { |
2412 | package Str; | |
2413 | use overload q /""/ => sub {${$_ [0]};}; | |
2414 | sub new {my ($c, $v) = @_; bless \$v, $c;} | |
7ef91622 | 2415 | |
84281c31 A |
2416 | package main; |
2417 | $_ = Str -> new ("a\x{100}/\x{100}b"); | |
2418 | ok join (":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"; | |
2419 | } | |
14ebb1a2 | 2420 | |
f02c194e | 2421 | |
84281c31 A |
2422 | { |
2423 | local $BugId = '17757'; | |
2424 | $_ = "code: 'x' { '...' }\n"; study; | |
2425 | my @x; push @x, $& while m/'[^\']*'/gx; | |
2426 | local $" = ":"; | |
2427 | iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop"; | |
2428 | } | |
5aca4364 | 2429 | |
6b43b216 | 2430 | |
84281c31 A |
2431 | { |
2432 | my $re = qq /^([^X]*)X/; | |
2433 | utf8::upgrade ($re); | |
2434 | ok "\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; | |
2435 | } | |
c1e0e3d2 | 2436 | |
5dab1207 | 2437 | |
84281c31 A |
2438 | { |
2439 | local $BugId = '22354'; | |
2440 | sub func ($) { | |
2441 | ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]"; | |
2442 | ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m"; | |
2443 | } | |
2444 | func "standalone"; | |
2445 | $_ = "x"; s/x/func "in subst"/e; | |
2446 | $_ = "x"; s/x/func "in multiline subst"/em; | |
2447 | ||
2448 | # | |
2449 | # Next two give 'panic: malloc'. | |
2450 | # Outcommented, using two TODOs. | |
2451 | # | |
2452 | local $TODO = 'panic: malloc'; | |
2453 | local $Message = 'Postponed regexp and propaged modifier'; | |
2454 | # ok 0 for 1 .. 2; | |
2455 | SKIP: { | |
2456 | skip "panic: malloc", 2; | |
2457 | $_ = "x"; /x(?{func "in regexp"})/; | |
2458 | $_ = "x"; /x(?{func "in multiline regexp"})/m; | |
2459 | } | |
2460 | } | |
89d0f8e1 | 2461 | |
89d0f8e1 | 2462 | |
84281c31 A |
2463 | { |
2464 | local $BugId = '19049'; | |
2465 | $_ = "abcdef\n"; | |
2466 | my @x = m/./g; | |
2467 | iseq "abcde", $`, 'Global match sets $`'; | |
2468 | } | |
a7913593 | 2469 | |
a7913593 | 2470 | |
84281c31 A |
2471 | { |
2472 | ok "123\x{100}" =~ /^.*1.*23\x{100}$/, | |
2473 | 'UTF-8 + multiple floating substr'; | |
2474 | } | |
090f7165 | 2475 | |
090f7165 | 2476 | |
84281c31 A |
2477 | { |
2478 | local $Message = '<20030808193656.5109.1@llama.ni-s.u-net.com>'; | |
090f7165 | 2479 | |
84281c31 A |
2480 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON |
2481 | ok " \x{101}" =~ qr/\x{100}/i; | |
090f7165 | 2482 | |
84281c31 A |
2483 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW |
2484 | ok " \x{1E01}" =~ qr/\x{1E00}/i; | |
090f7165 | 2485 | |
84281c31 A |
2486 | # DESERET SMALL/CAPITAL LETTER LONG I |
2487 | ok " \x{10428}" =~ qr/\x{10400}/i; | |
090f7165 | 2488 | |
84281c31 A |
2489 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' |
2490 | ok " \x{1E01}x" =~ qr/\x{1E00}X/i; | |
2491 | } | |
090f7165 | 2492 | |
090f7165 | 2493 | |
84281c31 A |
2494 | { |
2495 | # [perl #23769] Unicode regex broken on simple example | |
2496 | # regrepeat() didn't handle UTF-8 EXACT case right. | |
2497 | local $BugId = '23769'; | |
2498 | my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; | |
2499 | local $Message = $Mess; | |
090f7165 | 2500 | |
84281c31 | 2501 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; |
a7913593 | 2502 | |
84281c31 A |
2503 | ok $s =~ /\x{a0}/; |
2504 | ok $s =~ /\x{a0}+/; | |
2505 | ok $s =~ /\x{a0}\x{a0}/; | |
4be7a33f | 2506 | |
84281c31 A |
2507 | $Message = "$Mess (easy variant)"; |
2508 | ok "aaa\x{100}" =~ /(a+)/; | |
2509 | iseq $1, "aaa"; | |
a3621e74 | 2510 | |
84281c31 A |
2511 | $Message = "$Mess (easy invariant)"; |
2512 | ok "aaa\x{100} " =~ /(a+?)/; | |
2513 | iseq $1, "a"; | |
a3621e74 | 2514 | |
84281c31 A |
2515 | $Message = "$Mess (regrepeat variant)"; |
2516 | ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/; | |
2517 | iseq $1, "\xa0"; | |
a3621e74 | 2518 | |
84281c31 A |
2519 | $Message = "$Mess (regrepeat invariant)"; |
2520 | ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/; | |
2521 | iseq $1, "\xa0\xa0\xa0"; | |
a3621e74 | 2522 | |
84281c31 A |
2523 | $Message = "$Mess (hard variant)"; |
2524 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/; | |
2525 | iseq $1, "\xa0\xa1"; | |
a3621e74 | 2526 | |
84281c31 A |
2527 | $Message = "$Mess (hard invariant)"; |
2528 | ok "ababab\x{100} " =~ /((?:ab)+)/; | |
2529 | iseq $1, 'ababab'; | |
a3621e74 | 2530 | |
84281c31 A |
2531 | ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/; |
2532 | iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1"; | |
a3621e74 | 2533 | |
84281c31 A |
2534 | ok "ababab\x{100} " =~ /((?:ab)+?)/; |
2535 | iseq $1, "ab"; | |
a3621e74 | 2536 | |
84281c31 A |
2537 | $Message = "Don't match first byte of UTF-8 representation"; |
2538 | ok "\xc4\xc4\xc4" !~ /(\x{100}+)/; | |
2539 | ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/; | |
2540 | ok "\xc4\xc4\xc4" !~ /(\x{100}++)/; | |
2541 | } | |
a3621e74 | 2542 | |
a3621e74 | 2543 | |
84281c31 A |
2544 | { |
2545 | for (120 .. 130) { | |
2546 | my $head = 'x' x $_; | |
2547 | local $Message = q [Don't misparse \x{...} in regexp ] . | |
2548 | q [near 127 char EXACT limit]; | |
2549 | for my $tail ('\x{0061}', '\x{1234}', '\x61') { | |
2550 | eval_ok qq ["$head$tail" =~ /$head$tail/]; | |
2551 | } | |
2552 | local $Message = q [Don't misparse \N{...} in regexp ] . | |
2553 | q [near 127 char EXACT limit]; | |
2554 | for my $tail ('\N{SNOWFLAKE}') { | |
2555 | eval_ok qq [use charnames ':full'; | |
2556 | "$head$tail" =~ /$head$tail/]; | |
2557 | } | |
2558 | } | |
2559 | } | |
786e8c11 | 2560 | |
786e8c11 | 2561 | |
84281c31 A |
2562 | { |
2563 | # perl panic: pp_match start/end pointers | |
2564 | local $BugId = '25269'; | |
2565 | iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, | |
2566 | 'Captures can move backwards in string'; | |
2567 | } | |
a3621e74 YO |
2568 | |
2569 | ||
84281c31 A |
2570 | { |
2571 | local $BugId = '27940'; # \cA not recognized in character classes | |
2572 | ok "a\cAb" =~ /\cA/, '\cA in pattern'; | |
2573 | ok "a\cAb" =~ /[\cA]/, '\cA in character class'; | |
2574 | ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range'; | |
2575 | ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range'; | |
2576 | ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range'; | |
2577 | ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range'; | |
2578 | ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern'; | |
2579 | ok "ab" !~ /a\cIb/x, '\cI in pattern'; | |
2580 | } | |
809e8e66 | 2581 | |
809e8e66 | 2582 | |
84281c31 A |
2583 | { |
2584 | # perl #28532: optional zero-width match at end of string is ignored | |
2585 | local $BugId = '28532'; | |
2586 | ok "abc" =~ /^abc(\z)?/ && defined($1), | |
2587 | 'Optional zero-width match at end of string'; | |
2588 | ok "abc" =~ /^abc(\z)??/ && !defined($1), | |
2589 | 'Optional zero-width match at end of string'; | |
2590 | } | |
108bb1ad | 2591 | |
108bb1ad | 2592 | |
809e8e66 | 2593 | |
84281c31 A |
2594 | { # TRIE related |
2595 | our @got = (); | |
2596 | "words" =~ /(word|word|word)(?{push @got, $1})s$/; | |
2597 | iseq @got, 1, "TRIE optimation"; | |
740266bf | 2598 | |
84281c31 A |
2599 | @got = (); |
2600 | "words" =~ /(word|word|word)(?{push @got,$1})s$/i; | |
2601 | iseq @got, 1,"TRIEF optimisation"; | |
2602 | ||
2603 | my @nums = map {int rand 1000} 1 .. 100; | |
2604 | my $re = "(" . (join "|", @nums) . ")"; | |
2605 | $re = qr/\b$re\b/; | |
2606 | ||
2607 | foreach (@nums) { | |
2608 | ok $_ =~ /$re/, "Trie nums"; | |
2609 | } | |
740266bf | 2610 | |
84281c31 A |
2611 | $_ = join " ", @nums; |
2612 | @got = (); | |
2613 | push @got, $1 while /$re/g; | |
2614 | ||
2615 | my %count; | |
2616 | $count {$_} ++ for @got; | |
2617 | my $ok = 1; | |
2618 | for (@nums) { | |
2619 | $ok = 0 if --$count {$_} < 0; | |
2620 | } | |
2621 | ok $ok, "Trie min count matches"; | |
0523e772 | 2622 | } |
740266bf | 2623 | |
84281c31 A |
2624 | |
2625 | { | |
2626 | # TRIE related | |
2627 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON | |
2628 | ok "foba \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i && | |
2629 | $1 eq "\x{101}foo", | |
2630 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON"; | |
2631 | ||
2632 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW | |
2633 | ok "foba \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i && | |
2634 | $1 eq "\x{1E01}foo", | |
2635 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW"; | |
2636 | ||
2637 | # DESERET SMALL/CAPITAL LETTER LONG I | |
2638 | ok "foba \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i && | |
2639 | $1 eq "\x{10428}foo", | |
2640 | "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I"; | |
2641 | ||
2642 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' | |
2643 | ok "foba \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i && | |
2644 | $1 eq "\x{1E01}xfoo", | |
2645 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'"; | |
2646 | ||
2647 | use charnames ':full'; | |
2648 | ||
2649 | my $s = "\N{LATIN SMALL LETTER SHARP S}"; | |
2650 | ok "foba ba$s" =~ qr/(foo|Ba$s|bar)/i && $1 eq "ba$s", | |
2651 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2652 | ok "foba ba$s" =~ qr/(Ba$s|foo|bar)/i && $1 eq "ba$s", | |
2653 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2654 | ok "foba ba$s" =~ qr/(foo|bar|Ba$s)/i && $1 eq "ba$s", | |
2655 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2656 | ||
2657 | ok "foba ba$s" =~ qr/(foo|Bass|bar)/i && $1 eq "ba$s", | |
2658 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
2659 | ||
2660 | ok "foba ba$s" =~ qr/(foo|BaSS|bar)/i && $1 eq "ba$s", | |
2661 | "TRIEF + LATIN SMALL LETTER SHARP S =~ SS"; | |
2662 | ||
2663 | ok "foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i | |
2664 | && $1 eq "ba${s}pxySS$s$s", | |
2665 | "COMMON PREFIX TRIEF + LATIN SMALL LETTER SHARP S"; | |
86f12da2 | 2666 | } |
86f12da2 | 2667 | |
86f12da2 | 2668 | |
84281c31 A |
2669 | SKIP: |
2670 | { | |
2671 | print "# Set PERL_SKIP_PSYCHO_TEST to skip this test\n"; | |
2672 | my @normal = qw [the are some normal words]; | |
86f12da2 | 2673 | |
84281c31 | 2674 | skip "Skipped Psycho", 2 * @normal if $ENV {PERL_SKIP_PSYCHO_TEST}; |
86f12da2 | 2675 | |
84281c31 | 2676 | local $" = "|"; |
9f7f3913 | 2677 | |
84281c31 A |
2678 | my @psycho = (@normal, map chr $_, 255 .. 20000); |
2679 | my $psycho1 = "@psycho"; | |
2680 | for (my $i = @psycho; -- $i;) { | |
2681 | my $j = int rand (1 + $i); | |
2682 | @psycho [$i, $j] = @psycho [$j, $i]; | |
2683 | } | |
2684 | my $psycho2 = "@psycho"; | |
9f7f3913 | 2685 | |
84281c31 A |
2686 | foreach my $word (@normal) { |
2687 | ok $word =~ /($psycho1)/ && $1 eq $word, 'Psycho'; | |
2688 | ok $word =~ /($psycho2)/ && $1 eq $word, 'Psycho'; | |
2689 | } | |
2690 | } | |
9f7f3913 | 2691 | |
9f7f3913 | 2692 | |
84281c31 A |
2693 | { |
2694 | local $BugId = '36207'; | |
2695 | my $utf8 = "\xe9\x{100}"; chop $utf8; | |
2696 | my $latin1 = "\xe9"; | |
2697 | ||
2698 | ok $utf8 =~ /\xe9/i, "utf8/latin"; | |
2699 | ok $utf8 =~ /$latin1/i, "utf8/latin runtime"; | |
2700 | ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"; | |
2701 | ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"; | |
2702 | ||
2703 | ok "\xe9" =~ /$utf8/i, "latin/utf8"; | |
2704 | ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie"; | |
2705 | ok $latin1 =~ /$utf8/i, "latin/utf8 runtime"; | |
2706 | ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime"; | |
2707 | } | |
9f7f3913 | 2708 | |
9f7f3913 | 2709 | |
84281c31 A |
2710 | { |
2711 | local $BugId = '37038'; | |
2712 | my $s = "abcd"; | |
2713 | $s =~ /(..)(..)/g; | |
2714 | $s = $1; | |
2715 | $s = $2; | |
2716 | iseq $2, 'cd', | |
2717 | "Assigning to original string does not corrupt match vars"; | |
2718 | } | |
9f7f3913 | 2719 | |
9f7f3913 | 2720 | |
84281c31 A |
2721 | { |
2722 | { | |
2723 | package wooosh; | |
2724 | sub gloople {"!"} | |
2725 | } | |
2726 | my $aeek = bless {} => 'wooosh'; | |
2727 | eval_ok sub {$aeek -> gloople () =~ /(.)/g}, | |
2728 | "//g match against return value of sub"; | |
9f7f3913 | 2729 | |
84281c31 A |
2730 | sub gloople {"!"} |
2731 | eval_ok sub {gloople () =~ /(.)/g}, | |
2732 | "26410 didn't affect sub calls for some reason"; | |
2733 | } | |
9f7f3913 | 2734 | |
1749ea0d | 2735 | |
84281c31 A |
2736 | { |
2737 | local $TODO = "See changes 26925-26928, which reverted change 26410"; | |
fc8cd66c | 2738 | { |
84281c31 A |
2739 | package lv; |
2740 | our $var = "abc"; | |
2741 | sub variable : lvalue {$var} | |
2742 | } | |
2743 | my $o = bless [] => 'lv'; | |
2744 | my $f = ""; | |
2745 | my $r = eval { | |
2746 | for (1 .. 2) { | |
2747 | $f .= $1 if $o -> variable =~ /(.)/g; | |
2748 | } | |
fc8cd66c | 2749 | 1; |
84281c31 A |
2750 | }; |
2751 | if ($r) { | |
2752 | iseq $f, "ab", "pos() retained between calls"; | |
2753 | } | |
2754 | else { | |
2755 | local $TODO; | |
2756 | ok 0, "Code failed: $@"; | |
2757 | } | |
2758 | ||
2759 | our $var = "abc"; | |
2760 | sub variable : lvalue {$var} | |
2761 | my $g = ""; | |
2762 | my $s = eval { | |
2763 | for (1 .. 2) { | |
2764 | $g .= $1 if variable =~ /(.)/g; | |
2765 | } | |
2766 | 1; | |
2767 | }; | |
2768 | if ($s) { | |
2769 | iseq $g, "ab", "pos() retained between calls"; | |
2770 | } | |
2771 | else { | |
2772 | local $TODO; | |
2773 | ok 0, "Code failed: $@"; | |
fc8cd66c | 2774 | } |
fc8cd66c | 2775 | } |
84281c31 A |
2776 | |
2777 | ||
2778 | SKIP: | |
2779 | { | |
2780 | local $BugId = '37836'; | |
2781 | skip "In EBCDIC" if $IS_EBCDIC; | |
2782 | no warnings 'utf8'; | |
2783 | $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8 | |
2784 | my $ret = 0; | |
2785 | eval_ok sub {!($ret = s/[\0]+//g)}, | |
2786 | "Ill-formed UTF-8 doesn't match NUL in class"; | |
6bda09f9 | 2787 | } |
1f1031fe | 2788 | |
84281c31 A |
2789 | |
2790 | { | |
2791 | # chr(65535) should be allowed in regexes | |
2792 | local $BugId = '38293'; | |
2793 | no warnings 'utf8'; # To allow non-characters | |
2794 | my ($c, $r, $s); | |
2795 | ||
2796 | $c = chr 0xffff; | |
2797 | $c =~ s/$c//g; | |
2798 | ok $c eq "", "U+FFFF, parsed as atom"; | |
2799 | ||
2800 | $c = chr 0xffff; | |
2801 | $r = "\\$c"; | |
2802 | $c =~ s/$r//g; | |
2803 | ok $c eq "", "U+FFFF backslashed, parsed as atom"; | |
2804 | ||
2805 | $c = chr 0xffff; | |
2806 | $c =~ s/[$c]//g; | |
2807 | ok $c eq "", "U+FFFF, parsed in class"; | |
2808 | ||
2809 | $c = chr 0xffff; | |
2810 | $r = "[\\$c]"; | |
2811 | $c =~ s/$r//g; | |
2812 | ok $c eq "", "U+FFFF backslashed, parsed in class"; | |
2813 | ||
2814 | $s = "A\x{ffff}B"; | |
2815 | $s =~ s/\x{ffff}//i; | |
2816 | ok $s eq "AB", "U+FFFF, EXACTF"; | |
2817 | ||
2818 | $s = "\x{ffff}A"; | |
2819 | $s =~ s/\bA//; | |
2820 | ok $s eq "\x{ffff}", "U+FFFF, BOUND"; | |
2821 | ||
2822 | $s = "\x{ffff}!"; | |
2823 | $s =~ s/\B!//; | |
2824 | ok $s eq "\x{ffff}", "U+FFFF, NBOUND"; | |
1f1031fe | 2825 | } |
1f1031fe | 2826 | |
84281c31 A |
2827 | |
2828 | { | |
2829 | local $BugId = '39583'; | |
e62cc96a | 2830 | |
84281c31 A |
2831 | # The printing characters |
2832 | my @chars = ("A" .. "Z"); | |
2833 | my $delim = ","; | |
2834 | my $size = 32771 - 4; | |
2835 | my $str = ''; | |
2836 | ||
2837 | # Create some random junk. Inefficient, but it works. | |
2838 | for (my $i = 0; $i < $size; $ i++) { | |
2839 | $str .= $chars [rand @chars]; | |
2840 | } | |
2841 | ||
2842 | $str .= ($delim x 4); | |
2843 | my $res; | |
2844 | my $matched; | |
2845 | ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches"; | |
2846 | iseq $str, "", "Empty string"; | |
2847 | ok defined $1 && length ($1) == $size, '$1 is correct size'; | |
2848 | } | |
2849 | ||
2850 | ||
2851 | { | |
2852 | local $BugId = '27940'; | |
2853 | ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'; | |
2854 | ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'; | |
2855 | ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'; | |
2856 | ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'; | |
2857 | ||
2858 | ok "X\0A" =~ /X\c@?A/, '\c@?'; | |
2859 | ok "X\0A" =~ /X\c@*A/, '\c@*'; | |
2860 | ok "X\0A" =~ /X\c@(A)/, '\c@('; | |
2861 | ok "X\0A" =~ /X(\c@)A/, '\c@)'; | |
2862 | ok "X\0A" =~ /X\c@|ZA/, '\c@|'; | |
2863 | ||
2864 | ok "X\@A" =~ /X@?A/, '@?'; | |
2865 | ok "X\@A" =~ /X@*A/, '@*'; | |
2866 | ok "X\@A" =~ /X@(A)/, '@('; | |
2867 | ok "X\@A" =~ /X(@)A/, '@)'; | |
2868 | ok "X\@A" =~ /X@|ZA/, '@|'; | |
2869 | ||
2870 | local $" = ','; # non-whitespace and non-RE-specific | |
2871 | ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus'; | |
2872 | ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/'; | |
2873 | ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/'; | |
2874 | ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x'; | |
2875 | ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x'; | |
2876 | } | |
2877 | ||
2878 | ||
2879 | { | |
2880 | use lib 'lib'; | |
2881 | use Cname; | |
e62cc96a | 2882 | |
84281c31 A |
2883 | ok 'fooB' =~ /\N{foo}[\N{B}\N{b}]/, "Passthrough charname"; |
2884 | my $test = 1233; | |
2885 | # | |
2886 | # Why doesn't must_warn work here? | |
2887 | # | |
2888 | my $w; | |
2889 | local $SIG {__WARN__} = sub {$w .= "@_"}; | |
2890 | eval 'q(xxWxx) =~ /[\N{WARN}]/'; | |
2891 | ok $w && $w =~ /^Ignoring excess chars from/, | |
2892 | "Ignoring excess chars warning"; | |
2893 | ||
2894 | undef $w; | |
2895 | eval q [ok "\0" !~ /[\N{EMPTY-STR}XY]/, | |
2896 | "Zerolength charname in charclass doesn't match \\0"]; | |
2897 | ok $w && $w =~ /^Ignoring zero length/, | |
2898 | 'Ignoring zero length \N{%} in character class warning'; | |
2899 | ||
2900 | ok 'AB' =~ /(\N{EVIL})/ && $1 eq 'A', 'Charname caching $1'; | |
2901 | ok 'ABC' =~ /(\N{EVIL})/, 'Charname caching $1'; | |
2902 | ok 'xy' =~ /x\N{EMPTY-STR}y/, | |
2903 | 'Empty string charname produces NOTHING node'; | |
2904 | ok '' =~ /\N{EMPTY-STR}/, | |
2905 | 'Empty string charname produces NOTHING node'; | |
2906 | ||
2907 | } | |
e62cc96a | 2908 | |
e62cc96a | 2909 | |
84281c31 A |
2910 | { |
2911 | use charnames ':full'; | |
2912 | ||
2913 | ok 'aabc' !~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against aabc'; | |
2914 | ok 'a+bc' =~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against a+bc'; | |
2915 | ||
2916 | ok ' A B' =~ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
2917 | 'Intermixed named and unicode escapes'; | |
2918 | ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ | |
2919 | /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
2920 | 'Intermixed named and unicode escapes'; | |
2921 | ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ | |
2922 | /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, | |
2923 | 'Intermixed named and unicode escapes'; | |
2924 | } | |
2925 | ||
2926 | ||
2927 | { | |
2928 | our $brackets; | |
2929 | $brackets = qr{ | |
2930 | { (?> [^{}]+ | (??{ $brackets }) )* } | |
2931 | }x; | |
2932 | ||
2933 | ok "{b{c}d" !~ m/^((??{ $brackets }))/, "Bracket mismatch"; | |
2934 | ||
2935 | SKIP: { | |
2936 | our @stack = (); | |
6962fb1a | 2937 | my @expect = qw( |
84281c31 A |
2938 | stuff1 |
2939 | stuff2 | |
2940 | <stuff1>and<stuff2> | |
2941 | right | |
2942 | <right> | |
2943 | <<right>> | |
2944 | <<<right>>> | |
2945 | <<stuff1>and<stuff2>><<<<right>>>> | |
2946 | ); | |
2947 | ||
2948 | local $_ = '<<<stuff1>and<stuff2>><<<<right>>>>>'; | |
2949 | ok /^(<((?:(?>[^<>]+)|(?1))*)>(?{push @stack, $2 }))$/, | |
2950 | "Recursion matches"; | |
2951 | iseq @stack, @expect, "Right amount of matches" | |
2952 | or skip "Won't test individual results as count isn't equal", | |
2953 | 0 + @expect; | |
2954 | my $idx = 0; | |
2955 | foreach my $expect (@expect) { | |
2956 | iseq $stack [$idx], $expect, | |
2957 | "Expecting '$expect' at stack pos #$idx"; | |
2958 | $idx ++; | |
2959 | } | |
81714fb9 | 2960 | } |
84281c31 A |
2961 | } |
2962 | ||
2963 | ||
2964 | { | |
2965 | my $s = '123453456'; | |
2966 | $s =~ s/(?<digits>\d+)\k<digits>/$+{digits}/; | |
2967 | ok $s eq '123456', 'Named capture (angle brackets) s///'; | |
2968 | $s = '123453456'; | |
2969 | $s =~ s/(?'digits'\d+)\k'digits'/$+{digits}/; | |
2970 | ok $s eq '123456', 'Named capture (single quotes) s///'; | |
2971 | } | |
2972 | ||
2973 | ||
2974 | { | |
2975 | my @ary = ( | |
2976 | pack('U', 0x00F1), # n-tilde | |
2977 | '_'.pack('U', 0x00F1), # _ + n-tilde | |
2978 | 'c'.pack('U', 0x0327), # c + cedilla | |
2979 | pack('U*', 0x00F1, 0x0327), # n-tilde + cedilla | |
2980 | 'a'.pack('U', 0x00B2), # a + superscript two | |
2981 | pack('U', 0x0391), # ALPHA | |
2982 | pack('U', 0x0391).'2', # ALPHA + 2 | |
2983 | pack('U', 0x0391).'_', # ALPHA + _ | |
2984 | ); | |
2985 | ||
2986 | for my $uni (@ary) { | |
2987 | my ($r1, $c1, $r2, $c2) = eval qq { | |
2988 | use utf8; | |
2989 | scalar ("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/), | |
2990 | \$+{${uni}}, | |
2991 | scalar ("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/), | |
2992 | \$+{${uni}}; | |
2993 | }; | |
2994 | ok $r1, "Named capture UTF (?'')"; | |
2995 | ok defined $c1 && $c1 eq 'foo', "Named capture UTF \%+"; | |
2996 | ok $r2, "Named capture UTF (?<>)"; | |
2997 | ok defined $c2 && $c2 eq 'bar', "Named capture UTF \%+"; | |
e62cc96a | 2998 | } |
81714fb9 | 2999 | } |
84281c31 A |
3000 | |
3001 | ||
3002 | { | |
3003 | my $s = 'foo bar baz'; | |
3004 | my (@k, @v, @fetch, $res); | |
3005 | my $count = 0; | |
3006 | my @names = qw ($+{A} $+{B} $+{C}); | |
3007 | if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { | |
3008 | while (my ($k, $v) = each (%+)) { | |
3009 | $count++; | |
3010 | } | |
3011 | @k = sort keys (%+); | |
3012 | @v = sort values (%+); | |
3013 | $res = 1; | |
3014 | push @fetch, | |
3015 | ["$+{A}", "$1"], | |
3016 | ["$+{B}", "$2"], | |
3017 | ["$+{C}", "$3"], | |
3018 | ; | |
3019 | } | |
3020 | foreach (0 .. 2) { | |
3021 | if ($fetch [$_]) { | |
3022 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; | |
3023 | } else { | |
3024 | ok 0, $names[$_]; | |
44a2ac75 YO |
3025 | } |
3026 | } | |
84281c31 A |
3027 | iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/"; |
3028 | iseq $count, 3, "Got 3 keys in %+ via each"; | |
3029 | iseq 0 + @k, 3, 'Got 3 keys in %+ via keys'; | |
3030 | iseq "@k", "A B C", "Got expected keys"; | |
3031 | iseq "@v", "bar baz foo", "Got expected values"; | |
3032 | eval ' | |
3033 | no warnings "uninitialized"; | |
3034 | print for $+ {this_key_doesnt_exist}; | |
3035 | '; | |
3036 | ok !$@, 'lvalue $+ {...} should not throw an exception'; | |
44a2ac75 | 3037 | } |
84281c31 A |
3038 | |
3039 | ||
3040 | { | |
3041 | # | |
3042 | # Almost the same as the block above, except that the capture is nested. | |
3043 | # | |
3044 | local $BugId = '50496'; | |
3045 | my $s = 'foo bar baz'; | |
3046 | my (@k, @v, @fetch, $res); | |
3047 | my $count = 0; | |
3048 | my @names = qw ($+{A} $+{B} $+{C} $+{D}); | |
3049 | if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { | |
3050 | while (my ($k,$v) = each(%+)) { | |
3051 | $count++; | |
3052 | } | |
3053 | @k = sort keys (%+); | |
3054 | @v = sort values (%+); | |
3055 | $res = 1; | |
3056 | push @fetch, | |
3057 | ["$+{A}", "$2"], | |
3058 | ["$+{B}", "$3"], | |
3059 | ["$+{C}", "$4"], | |
3060 | ["$+{D}", "$1"], | |
3061 | ; | |
3062 | } | |
3063 | foreach (0 .. 3) { | |
3064 | if ($fetch [$_]) { | |
3065 | iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; | |
3066 | } else { | |
3067 | ok 0, $names [$_]; | |
3068 | } | |
3069 | } | |
3070 | iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/"; | |
3071 | iseq $count, 4, "Got 4 keys in %+ via each"; | |
3072 | iseq @k, 4, 'Got 4 keys in %+ via keys'; | |
3073 | iseq "@k", "A B C D", "Got expected keys"; | |
3074 | iseq "@v", "bar baz foo foo bar baz", "Got expected values"; | |
3075 | eval ' | |
3076 | no warnings "uninitialized"; | |
3077 | print for $+ {this_key_doesnt_exist}; | |
3078 | '; | |
3079 | ok !$@,'lvalue $+ {...} should not throw an exception'; | |
3080 | } | |
3081 | ||
3082 | ||
3083 | { | |
3084 | my $s = 'foo bar baz'; | |
3085 | my @res; | |
3086 | if ('1234' =~ /(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { | |
3087 | foreach my $name (sort keys(%-)) { | |
3088 | my $ary = $- {$name}; | |
3089 | foreach my $idx (0 .. $#$ary) { | |
3090 | push @res, "$name:$idx:$ary->[$idx]"; | |
3091 | } | |
3092 | } | |
3093 | } | |
3094 | my @expect = qw (A:0:1 A:1:3 B:0:2 B:1:4); | |
3095 | iseq "@res", "@expect", "Check %-"; | |
3096 | eval' | |
3097 | no warnings "uninitialized"; | |
3098 | print for $- {this_key_doesnt_exist}; | |
3099 | '; | |
3100 | ok !$@,'lvalue $- {...} should not throw an exception'; | |
3101 | } | |
3102 | ||
3103 | ||
3104 | SKIP: | |
3105 | { | |
3106 | # stress test CURLYX/WHILEM. | |
3107 | # | |
3108 | # This test includes varying levels of nesting, and according to | |
3109 | # profiling done against build 28905, exercises every code line in the | |
3110 | # CURLYX and WHILEM blocks, except those related to LONGJMP, the | |
3111 | # super-linear cache and warnings. It executes about 0.5M regexes | |
3112 | ||
3113 | skip "No psycho tests" if $ENV {PERL_SKIP_PSYCHO_TEST}; | |
3114 | print "# Set PERL_SKIP_PSYCHO_TEST to skip this test\n"; | |
3115 | my $r = qr/^ | |
3116 | (?: | |
3117 | ( (?:a|z+)+ ) | |
3118 | (?: | |
3119 | ( (?:b|z+){3,}? ) | |
3120 | ( | |
3121 | (?: | |
3122 | (?: | |
3123 | (?:c|z+){1,1}?z | |
3124 | )? | |
3125 | (?:c|z+){1,1} | |
3126 | )* | |
3127 | ) | |
3128 | (?:z*){2,} | |
3129 | ( (?:z+|d)+ ) | |
3130 | (?: | |
3131 | ( (?:e|z+)+ ) | |
3132 | )* | |
3133 | ( (?:f|z+)+ ) | |
3134 | )* | |
3135 | ( (?:z+|g)+ ) | |
3136 | (?: | |
3137 | ( (?:h|z+)+ ) | |
3138 | )* | |
3139 | ( (?:i|z+)+ ) | |
3140 | )+ | |
3141 | ( (?:j|z+)+ ) | |
3142 | (?: | |
3143 | ( (?:k|z+)+ ) | |
3144 | )* | |
3145 | ( (?:l|z+)+ ) | |
3146 | $/x; | |
3147 | ||
3148 | my $ok = 1; | |
3149 | my $msg = "CURLYX stress test"; | |
3150 | OUTER: | |
3151 | for my $a ("x","a","aa") { | |
3152 | for my $b ("x","bbb","bbbb") { | |
3153 | my $bs = $a.$b; | |
3154 | for my $c ("x","c","cc") { | |
3155 | my $cs = $bs.$c; | |
3156 | for my $d ("x","d","dd") { | |
3157 | my $ds = $cs.$d; | |
3158 | for my $e ("x","e","ee") { | |
3159 | my $es = $ds.$e; | |
3160 | for my $f ("x","f","ff") { | |
3161 | my $fs = $es.$f; | |
3162 | for my $g ("x","g","gg") { | |
3163 | my $gs = $fs.$g; | |
3164 | for my $h ("x","h","hh") { | |
3165 | my $hs = $gs.$h; | |
3166 | for my $i ("x","i","ii") { | |
3167 | my $is = $hs.$i; | |
3168 | for my $j ("x","j","jj") { | |
3169 | my $js = $is.$j; | |
3170 | for my $k ("x","k","kk") { | |
3171 | my $ks = $js.$k; | |
3172 | for my $l ("x","l","ll") { | |
3173 | my $ls = $ks.$l; | |
3174 | if ($ls =~ $r) { | |
3175 | if ($ls =~ /x/) { | |
3176 | $msg .= ": unexpected match for [$ls]"; | |
3177 | $ok = 0; | |
3178 | last OUTER; | |
3179 | } | |
3180 | my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12"; | |
3181 | unless ($ls eq $cap) { | |
3182 | $msg .= ": capture: [$ls], got [$cap]"; | |
3183 | $ok = 0; | |
3184 | last OUTER; | |
3185 | } | |
3186 | } | |
3187 | else { | |
3188 | unless ($ls =~ /x/) { | |
3189 | $msg = ": failed for [$ls]"; | |
3190 | $ok = 0; | |
3191 | last OUTER; | |
3192 | } | |
3193 | } | |
3194 | } | |
3195 | } | |
bfac009d DM |
3196 | } |
3197 | } | |
3198 | } | |
3199 | } | |
3200 | } | |
3201 | } | |
3202 | } | |
3203 | } | |
3204 | } | |
bfac009d | 3205 | } |
84281c31 | 3206 | ok($ok, $msg); |
bfac009d | 3207 | } |
bfac009d | 3208 | |
cc74c5bd | 3209 | |
84281c31 A |
3210 | { |
3211 | # \, breaks {3,4} | |
3212 | ok "xaaay" !~ /xa{3\,4}y/, '\, in a pattern'; | |
3213 | ok "xa{3,4}y" =~ /xa{3\,4}y/, '\, in a pattern'; | |
3214 | ||
3215 | # \c\ followed by _ | |
3216 | ok "x\c_y" !~ /x\c\_y/, '\_ in a pattern'; | |
3217 | ok "x\c\_y" =~ /x\c\_y/, '\_ in a pattern'; | |
3218 | ||
3219 | # \c\ followed by other characters | |
3220 | for my $c ("z", "\0", "!", chr(254), chr(256)) { | |
3221 | my $targ = "a\034$c"; | |
3222 | my $reg = "a\\c\\$c"; | |
3223 | ok eval ("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern"; | |
3224 | } | |
3225 | } | |
3226 | ||
cc74c5bd | 3227 | |
84281c31 A |
3228 | { |
3229 | local $BugId = '36046'; | |
3230 | my $str = 'abc'; | |
3231 | my $count = 0; | |
3232 | my $mval = 0; | |
3233 | my $pval = 0; | |
3234 | while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} | |
3235 | iseq $mval, 0, '@- should be empty'; | |
3236 | iseq $pval, 0, '@+ should be empty'; | |
3237 | iseq $count, 1, 'Should have matched once only'; | |
3238 | } | |
bfac009d | 3239 | |
581d1b5b | 3240 | |
84281c31 A |
3241 | { # Test the (*PRUNE) pattern |
3242 | our $count = 0; | |
3243 | 'aaab' =~ /a+b?(?{$count++})(*FAIL)/; | |
3244 | iseq $count, 9, "Expect 9 for no (*PRUNE)"; | |
3245 | $count = 0; | |
3246 | 'aaab' =~ /a+b?(*PRUNE)(?{$count++})(*FAIL)/; | |
3247 | iseq $count, 3, "Expect 3 with (*PRUNE)"; | |
3248 | local $_ = 'aaab'; | |
3249 | $count = 0; | |
3250 | 1 while /.(*PRUNE)(?{$count++})(*FAIL)/g; | |
3251 | iseq $count, 4, "/.(*PRUNE)/"; | |
3252 | $count = 0; | |
3253 | 'aaab' =~ /a+b?(??{'(*PRUNE)'})(?{$count++})(*FAIL)/; | |
3254 | iseq $count, 3, "Expect 3 with (*PRUNE)"; | |
3255 | local $_ = 'aaab'; | |
3256 | $count = 0; | |
3257 | 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g; | |
3258 | iseq $count, 4, "/.(*PRUNE)/"; | |
3259 | } | |
3260 | ||
3261 | ||
3262 | { # Test the (*SKIP) pattern | |
3263 | our $count = 0; | |
3264 | 'aaab' =~ /a+b?(*SKIP)(?{$count++})(*FAIL)/; | |
3265 | iseq $count, 1, "Expect 1 with (*SKIP)"; | |
3266 | local $_ = 'aaab'; | |
3267 | $count = 0; | |
3268 | 1 while /.(*SKIP)(?{$count++})(*FAIL)/g; | |
3269 | iseq $count, 4, "/.(*SKIP)/"; | |
3270 | $_ = 'aaabaaab'; | |
3271 | $count = 0; | |
3272 | our @res = (); | |
3273 | 1 while /(a+b?)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; | |
3274 | iseq $count, 2, "Expect 2 with (*SKIP)"; | |
3275 | iseq "@res", "aaab aaab", "Adjacent (*SKIP) works as expected"; | |
3276 | } | |
3277 | ||
3278 | ||
3279 | { # Test the (*SKIP) pattern | |
3280 | our $count = 0; | |
3281 | 'aaab' =~ /a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/; | |
3282 | iseq $count, 1, "Expect 1 with (*SKIP)"; | |
3283 | local $_ = 'aaab'; | |
3284 | $count = 0; | |
3285 | 1 while /.(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/g; | |
3286 | iseq $count, 4, "/.(*SKIP)/"; | |
3287 | $_ = 'aaabaaab'; | |
3288 | $count = 0; | |
3289 | our @res = (); | |
3290 | 1 while /(a+b?)(*MARK:foo)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; | |
3291 | iseq $count, 2, "Expect 2 with (*SKIP)"; | |
3292 | iseq "@res", "aaab aaab", "Adjacent (*SKIP) works as expected"; | |
3293 | } | |
3294 | ||
3295 | ||
3296 | { # Test the (*SKIP) pattern | |
3297 | our $count = 0; | |
3298 | 'aaab' =~ /a*(*MARK:a)b?(*MARK:b)(*SKIP:a)(?{$count++})(*FAIL)/; | |
3299 | iseq $count, 3, "Expect 3 with *MARK:a)b?(*MARK:b)(*SKIP:a)"; | |
3300 | local $_ = 'aaabaaab'; | |
3301 | $count = 0; | |
3302 | our @res = (); | |
3303 | 1 while | |
3304 | /(a*(*MARK:a)b?)(*MARK:x)(*SKIP:a)(?{$count++; push @res,$1})(*FAIL)/g; | |
3305 | iseq $count, 5, "Expect 5 with (*MARK:a)b?)(*MARK:x)(*SKIP:a)"; | |
3306 | iseq "@res", "aaab b aaab b ", | |
3307 | "Adjacent (*MARK:a)b?)(*MARK:x)(*SKIP:a) works as expected"; | |
3308 | } | |
3309 | ||
3310 | ||
3311 | { # Test the (*COMMIT) pattern | |
3312 | our $count = 0; | |
3313 | 'aaabaaab' =~ /a+b?(*COMMIT)(?{$count++})(*FAIL)/; | |
3314 | iseq $count, 1, "Expect 1 with (*COMMIT)"; | |
3315 | local $_ = 'aaab'; | |
3316 | $count = 0; | |
3317 | 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g; | |
3318 | iseq $count, 1, "/.(*COMMIT)/"; | |
3319 | $_ = 'aaabaaab'; | |
3320 | $count = 0; | |
3321 | our @res = (); | |
3322 | 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g; | |
3323 | iseq $count, 1, "Expect 1 with (*COMMIT)"; | |
3324 | iseq "@res", "aaab", "Adjacent (*COMMIT) works as expected"; | |
3325 | } | |
3326 | ||
3327 | ||
3328 | { | |
3329 | # Test named commits and the $REGERROR var | |
3330 | our $REGERROR; | |
3331 | for my $name ('', ':foo') { | |
3332 | for my $pat ("(*PRUNE$name)", | |
3333 | ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", | |
3334 | "(*COMMIT$name)") { | |
3335 | for my $suffix ('(*FAIL)', '') { | |
3336 | 'aaaab' =~ /a+b$pat$suffix/; | |
3337 | iseq $REGERROR, | |
3338 | ($suffix ? ($name ? 'foo' : "1") : ""), | |
3339 | "Test $pat and \$REGERROR $suffix"; | |
3340 | } | |
e2e6a0f1 YO |
3341 | } |
3342 | } | |
84281c31 A |
3343 | } |
3344 | ||
3345 | ||
3346 | { | |
3347 | # Test named commits and the $REGERROR var | |
3348 | package Fnorble; | |
3349 | our $REGERROR; | |
3350 | for my $name ('', ':foo') { | |
3351 | for my $pat ("(*PRUNE$name)", | |
3352 | ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", | |
3353 | "(*COMMIT$name)") { | |
3354 | for my $suffix ('(*FAIL)','') { | |
3355 | 'aaaab' =~ /a+b$pat$suffix/; | |
3356 | ::iseq $REGERROR, | |
3357 | ($suffix ? ($name ? 'foo' : "1") : ""), | |
3358 | "Test $pat and \$REGERROR $suffix"; | |
3359 | } | |
e2e6a0f1 | 3360 | } |
84281c31 | 3361 | } |
e2e6a0f1 | 3362 | } |
84281c31 A |
3363 | |
3364 | ||
3365 | { | |
3366 | # Test named commits and the $REGERROR var | |
3367 | local $Message = '$REGERROR'; | |
3368 | our $REGERROR; | |
3369 | for my $word (qw (bar baz bop)) { | |
3370 | $REGERROR = ""; | |
3371 | "aaaaa$word" =~ | |
3372 | /a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/; | |
3373 | iseq $REGERROR, $word; | |
3374 | } | |
5d458dd8 | 3375 | } |
542fa716 | 3376 | |
84281c31 | 3377 | |
f0852a51 | 3378 | { |
84281c31 A |
3379 | local $BugId = '40684'; |
3380 | local $Message = '/m in precompiled regexp'; | |
3381 | my $s = "abc\ndef"; | |
3382 | my $rex = qr'^abc$'m; | |
3383 | ok $s =~ m/$rex/; | |
3384 | ok $s =~ m/^abc$/m; | |
f0852a51 | 3385 | } |
84281c31 A |
3386 | |
3387 | ||
f0852a51 | 3388 | { |
84281c31 A |
3389 | #Mindnumbingly simple test of (*THEN) |
3390 | for ("ABC","BAX") { | |
3391 | ok /A (*THEN) X | B (*THEN) C/x, "Simple (*THEN) test"; | |
3392 | } | |
3393 | } | |
3394 | ||
f0852a51 | 3395 | |
84281c31 A |
3396 | { |
3397 | local $Message = "Relative Recursion"; | |
3398 | my $parens = qr/(\((?:[^()]++|(?-1))*+\))/; | |
3399 | local $_ = 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; | |
3400 | my ($all, $one, $two) = ('', '', ''); | |
3401 | ok /foo $parens \s* \+ \s* bar $parens/x; | |
3402 | iseq $1, '((2*3)+4-3)'; | |
3403 | iseq $2, '(2*(3+4)-1*(2-3))'; | |
3404 | iseq $&, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; | |
3405 | iseq $&, $_; | |
f0852a51 | 3406 | } |
f0852a51 YO |
3407 | |
3408 | { | |
84281c31 A |
3409 | my $spaces=" "; |
3410 | local $_ = join 'bar', $spaces, $spaces; | |
3411 | our $count = 0; | |
3412 | s/(?>\s+bar)(?{$count++})//g; | |
3413 | iseq $_, $spaces, "SUSPEND final string"; | |
3414 | iseq $count, 1, "Optimiser should have prevented more than one match"; | |
f0852a51 | 3415 | } |
19b95bf0 DM |
3416 | |
3417 | { | |
84281c31 A |
3418 | local $BugId = '36909'; |
3419 | local $Message = '(?: ... )? should not lose $^R'; | |
3420 | $^R = 'Nothing'; | |
3421 | { | |
3422 | local $^R = "Bad"; | |
3423 | ok 'x foofoo y' =~ m { | |
3424 | (foo) # $^R correctly set | |
3425 | (?{ "last regexp code result" }) | |
3426 | }x; | |
3427 | iseq $^R, 'last regexp code result'; | |
3428 | } | |
3429 | iseq $^R, 'Nothing'; | |
3430 | ||
3431 | { | |
3432 | local $^R = "Bad"; | |
3433 | ||
3434 | ok 'x foofoo y' =~ m { | |
3435 | (?:foo|bar)+ # $^R correctly set | |
3436 | (?{ "last regexp code result" }) | |
3437 | }x; | |
3438 | iseq $^R, 'last regexp code result'; | |
3439 | } | |
3440 | iseq $^R, 'Nothing'; | |
3441 | ||
3442 | { | |
3443 | local $^R = "Bad"; | |
3444 | ok 'x foofoo y' =~ m { | |
3445 | (foo|bar)\1+ # $^R undefined | |
3446 | (?{ "last regexp code result" }) | |
3447 | }x; | |
3448 | iseq $^R, 'last regexp code result'; | |
3449 | } | |
3450 | iseq $^R, 'Nothing'; | |
3451 | ||
3452 | { | |
3453 | local $^R = "Bad"; | |
3454 | ok 'x foofoo y' =~ m { | |
3455 | (foo|bar)\1 # This time without the + | |
3456 | (?{"last regexp code result"}) | |
3457 | }x; | |
3458 | iseq $^R, 'last regexp code result'; | |
3459 | } | |
3460 | iseq $^R, 'Nothing'; | |
19b95bf0 | 3461 | } |
84281c31 A |
3462 | |
3463 | ||
3464 | { | |
3465 | local $BugId = '22395'; | |
3466 | local $Message = 'Match is linear, not quadratic'; | |
3467 | our $count; | |
3468 | for my $l (10, 100, 1000) { | |
3469 | $count = 0; | |
3470 | ('a' x $l) =~ /(.*)(?{$count++})[bc]/; | |
3471 | local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; | |
3472 | iseq $count, $l + 1; | |
3473 | } | |
cf2a2b69 | 3474 | } |
84281c31 A |
3475 | |
3476 | ||
3477 | { | |
3478 | local $BugId = '22614'; | |
3479 | local $Message = '@-/@+ should not have undefined values'; | |
3480 | local $_ = 'ab'; | |
3481 | our @len = (); | |
3482 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; | |
3483 | iseq "@len", "2 2 2"; | |
3484 | } | |
3485 | ||
3486 | ||
3487 | { | |
3488 | local $BugId = '18209'; | |
3489 | local $Message = '$& set on s///'; | |
3490 | my $text = ' word1 word2 word3 word4 word5 word6 '; | |
3491 | ||
3492 | my @words = ('word1', 'word3', 'word5'); | |
3493 | my $count; | |
3494 | foreach my $word (@words) { | |
3495 | $text =~ s/$word\s//gi; # Leave a space to seperate words | |
3496 | # in the resultant str. | |
3497 | # The following block is not working. | |
3498 | if ($&) { | |
3499 | $count ++; | |
3500 | } | |
3501 | # End bad block | |
04e1c60a | 3502 | } |
84281c31 A |
3503 | iseq $count, 3; |
3504 | iseq $text, ' word2 word4 word6 '; | |
04e1c60a | 3505 | } |
04e1c60a | 3506 | |
de734bd5 | 3507 | |
84281c31 A |
3508 | { |
3509 | # RT#6893 | |
3510 | local $BugId = '6893'; | |
3511 | local $_ = qq (A\nB\nC\n); | |
3512 | my @res; | |
3513 | while (m#(\G|\n)([^\n]*)\n#gsx) { | |
3514 | push @res, "$2"; | |
3515 | last if @res > 3; | |
3516 | } | |
3517 | iseq "@res", "A B C", "/g pattern shouldn't infinite loop"; | |
3518 | } | |
3519 | ||
3520 | ||
3521 | { | |
3522 | # From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus> | |
3523 | my $dow_name = "nada"; | |
3524 | my $parser = "(\$dow_name) = \$time_string =~ /(D\x{e9}\\ " . | |
3525 | "C\x{e9}adaoin|D\x{e9}\\ Sathairn|\\w+|\x{100})/"; | |
3526 | my $time_string = "D\x{e9} C\x{e9}adaoin"; | |
3527 | eval $parser; | |
3528 | ok !$@, "Test Eval worked"; | |
3529 | iseq $dow_name, $time_string, "UTF-8 trie common prefix extraction"; | |
3530 | } | |
3531 | ||
3532 | ||
3533 | { | |
3534 | my $v; | |
3535 | ($v = 'bar') =~ /(\w+)/g; | |
3536 | $v = 'foo'; | |
3537 | iseq "$1", 'bar', '$1 is safe after /g - may fail due ' . | |
3538 | 'to specialized config in pp_hot.c' | |
3539 | } | |
3540 | ||
3541 | ||
3542 | { | |
3543 | local $Message = "http://nntp.perl.org/group/perl.perl5.porters/118663"; | |
3544 | my $qr_barR1 = qr/(bar)\g-1/; | |
3545 | ok "foobarbarxyz" =~ $qr_barR1; | |
3546 | ok "foobarbarxyz" =~ qr/foo${qr_barR1}xyz/; | |
3547 | ok "foobarbarxyz" =~ qr/(foo)${qr_barR1}xyz/; | |
3548 | ok "foobarbarxyz" =~ qr/(foo)(bar)\g{-1}xyz/; | |
3549 | ok "foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/; | |
3550 | ok "foobarbarxyz" =~ qr/(foo(bar)\g{-1})xyz/; | |
3551 | } | |
3552 | ||
3553 | ||
3554 | { | |
3555 | local $BugId = '41010'; | |
3556 | local $Message = 'No optimizer bug'; | |
3557 | my @tails = ('', '(?(1))', '(|)', '()?'); | |
3558 | my @quants = ('*','+'); | |
3559 | my $doit = sub { | |
3560 | my $pats = shift; | |
3561 | for (@_) { | |
3562 | for my $pat (@$pats) { | |
3563 | for my $quant (@quants) { | |
3564 | for my $tail (@tails) { | |
3565 | my $re = "($pat$quant\$)$tail"; | |
3566 | ok /$re/ && $1 eq $_, "'$_' =~ /$re/"; | |
3567 | ok /$re/m && $1 eq $_, "'$_' =~ /$re/m"; | |
3568 | } | |
304ee84b YO |
3569 | } |
3570 | } | |
3571 | } | |
84281c31 A |
3572 | }; |
3573 | ||
3574 | my @dpats = ('\d', | |
3575 | '[1234567890]', | |
3576 | '(1|[23]|4|[56]|[78]|[90])', | |
3577 | '(?:1|[23]|4|[56]|[78]|[90])', | |
3578 | '(1|2|3|4|5|6|7|8|9|0)', | |
3579 | '(?:1|2|3|4|5|6|7|8|9|0)'); | |
3580 | my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); | |
3581 | my @sstrs = (' '); | |
3582 | my @dstrs = ('12345'); | |
3583 | $doit -> (\@spats, @sstrs); | |
3584 | $doit -> (\@dpats, @dstrs); | |
3585 | } | |
ee9b8eae YO |
3586 | |
3587 | ||
84281c31 A |
3588 | { |
3589 | local $Message = '$REGMARK'; | |
3590 | our @r = (); | |
3591 | our ($REGMARK, $REGERROR); | |
3592 | ok 'foofoo' =~ /foo (*MARK:foo) (?{push @r,$REGMARK}) /x; | |
3593 | iseq "@r","foo"; | |
3594 | iseq $REGMARK, "foo"; | |
3595 | ok 'foofoo' !~ /foo (*MARK:foo) (*FAIL) /x; | |
3596 | ok !$REGMARK; | |
3597 | iseq $REGERROR, 'foo'; | |
3598 | } | |
3599 | ||
8158862b | 3600 | |
84281c31 A |
3601 | { |
3602 | local $Message = '\K test'; | |
3603 | my $x; | |
3604 | $x = "abc.def.ghi.jkl"; | |
3605 | $x =~ s/.*\K\..*//; | |
3606 | iseq $x, "abc.def.ghi"; | |
3607 | ||
3608 | $x = "one two three four"; | |
3609 | $x =~ s/o+ \Kthree//g; | |
3610 | iseq $x, "one two four"; | |
3611 | ||
3612 | $x = "abcde"; | |
3613 | $x =~ s/(.)\K/$1/g; | |
3614 | iseq $x, "aabbccddee"; | |
3615 | } | |
3616 | ||
3617 | ||
3618 | { | |
3619 | sub kt { | |
3620 | return '4' if $_[0] eq '09028623'; | |
3621 | } | |
3622 | # Nested EVAL using PL_curpm (via $1 or friends) | |
3623 | my $re; | |
3624 | our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x; | |
3625 | $re = qr/^ ( (??{ $grabit }) ) $ /x; | |
3626 | my @res = '0902862349' =~ $re; | |
3627 | iseq join ("-", @res), "0902862349", | |
3628 | 'PL_curpm is set properly on nested eval'; | |
3629 | ||
3630 | our $qr = qr/ (o) (??{ $1 }) /x; | |
3631 | ok 'boob'=~/( b (??{ $qr }) b )/x && 1, "PL_curpm, nested eval"; | |
8158862b | 3632 | } |
8158862b | 3633 | |
76a476f9 | 3634 | |
84281c31 A |
3635 | { |
3636 | use charnames ":full"; | |
3637 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "I =~ Alphabetic"; | |
3638 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/, "I =~ Uppercase"; | |
3639 | ok "\N{ROMAN NUMERAL ONE}" !~ /\p{Lowercase}/, "I !~ Lowercase"; | |
3640 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "I =~ ID_Start"; | |
3641 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "I =~ ID_Continue"; | |
3642 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "i =~ Alphabetic"; | |
3643 | ok "\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Uppercase}/, "i !~ Uppercase"; | |
3644 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/, "i =~ Lowercase"; | |
3645 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "i =~ ID_Start"; | |
3646 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "i =~ ID_Continue" | |
76a476f9 | 3647 | } |
84281c31 A |
3648 | |
3649 | ||
3650 | { | |
3651 | # requirement of Unicode Technical Standard #18, 1.7 Code Points | |
3652 | # cf. http://www.unicode.org/reports/tr18/#Supplementary_Characters | |
3653 | for my $u (0x7FF, 0x800, 0xFFFF, 0x10000) { | |
3654 | no warnings 'utf8'; # oops | |
3655 | my $c = chr $u; | |
3656 | my $x = sprintf '%04X', $u; | |
3657 | ok "A${c}B" =~ /A[\0-\x{10000}]B/, "Unicode range - $x"; | |
3658 | } | |
76a476f9 | 3659 | } |
8158862b | 3660 | |
84281c31 A |
3661 | |
3662 | { | |
3663 | my $res=""; | |
3664 | ||
3665 | if ('1' =~ /(?|(?<digit>1)|(?<digit>2))/) { | |
3666 | $res = "@{$- {digit}}"; | |
3667 | } | |
3668 | iseq $res, "1", | |
3669 | "Check that (?|...) doesnt cause dupe entries in the names array"; | |
3670 | ||
3671 | $res = ""; | |
3672 | if ('11' =~ /(?|(?<digit>1)|(?<digit>2))(?&digit)/) { | |
3673 | $res = "@{$- {digit}}"; | |
3674 | } | |
3675 | iseq $res, "1", "Check that (?&..) to a buffer inside " . | |
3676 | "a (?|...) goes to the leftmost"; | |
3677 | } | |
3678 | ||
3679 | ||
3680 | { | |
3681 | use warnings; | |
3682 | local $Message = "ASCII pattern that really is UTF-8"; | |
3683 | my @w; | |
3684 | local $SIG {__WARN__} = sub {push @w, "@_"}; | |
3685 | my $c = qq (\x{DF}); | |
3686 | ok $c =~ /${c}|\x{100}/; | |
3687 | ok @w == 0; | |
3688 | } | |
3689 | ||
3690 | ||
3691 | { | |
3692 | local $Message = "Corruption of match results of qr// across scopes"; | |
3693 | my $qr = qr/(fo+)(ba+r)/; | |
3694 | 'foobar' =~ /$qr/; | |
3695 | iseq "$1$2", "foobar"; | |
3696 | { | |
3697 | 'foooooobaaaaar' =~ /$qr/; | |
3698 | iseq "$1$2", 'foooooobaaaaar'; | |
3699 | } | |
3700 | iseq "$1$2", "foobar"; | |
3701 | } | |
3702 | ||
3703 | ||
3704 | { | |
3705 | local $Message = "HORIZWS"; | |
3706 | local $_ = "\t \r\n \n \t".chr(11)."\n"; | |
3707 | s/\H/H/g; | |
3708 | s/\h/h/g; | |
3709 | iseq $_, "hhHHhHhhHH"; | |
3710 | $_ = "\t \r\n \n \t" . chr (11) . "\n"; | |
3711 | utf8::upgrade ($_); | |
3712 | s/\H/H/g; | |
3713 | s/\h/h/g; | |
3714 | iseq $_, "hhHHhHhhHH"; | |
3715 | } | |
3716 | ||
3717 | ||
3718 | { | |
3719 | local $Message = "Various whitespace special patterns"; | |
3720 | my @h = map {chr $_} 0x09, 0x20, 0xa0, 0x1680, 0x180e, 0x2000, | |
3721 | 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, | |
3722 | 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x205f, | |
3723 | 0x3000; | |
3724 | my @v = map {chr $_} 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, | |
3725 | 0x2029; | |
3726 | my @lb = ("\x0D\x0A", map {chr $_} 0x0A .. 0x0D, 0x85, 0x2028, 0x2029); | |
3727 | foreach my $t ([\@h, qr/\h/, qr/\h+/], | |
3728 | [\@v, qr/\v/, qr/\v+/], | |
3729 | [\@lb, qr/\R/, qr/\R+/],) { | |
3730 | my $ary = shift @$t; | |
3731 | foreach my $pat (@$t) { | |
3732 | foreach my $str (@$ary) { | |
3733 | ok $str =~ /($pat)/, $pat; | |
3734 | iseq $1, $str, $pat; | |
3735 | utf8::upgrade ($str); | |
3736 | ok $str =~ /($pat)/, "Upgraded string - $pat"; | |
3737 | iseq $1, $str, "Upgraded string - $pat"; | |
3738 | } | |
e1d1eefb YO |
3739 | } |
3740 | } | |
3741 | } | |
84281c31 A |
3742 | |
3743 | ||
3744 | { | |
3745 | local $Message = "Check that \\xDF match properly in its various forms"; | |
3746 | # Test that \xDF matches properly. this is pretty hacky stuff, | |
3747 | # but its actually needed. The malarky with '-' is to prevent | |
3748 | # compilation caching from playing any role in the test. | |
3749 | my @df = (chr (0xDF), '-', chr (0xDF)); | |
3750 | utf8::upgrade ($df [2]); | |
3751 | my @strs = ('ss', 'sS', 'Ss', 'SS', chr (0xDF)); | |
3752 | my @ss = map {("$_", "$_")} @strs; | |
3753 | utf8::upgrade ($ss [$_ * 2 + 1]) for 0 .. $#strs; | |
3754 | ||
3755 | for my $ssi (0 .. $#ss) { | |
3756 | for my $dfi (0 .. $#df) { | |
3757 | my $pat = $df [$dfi]; | |
3758 | my $str = $ss [$ssi]; | |
3759 | my $utf_df = ($dfi > 1) ? 'utf8' : ''; | |
3760 | my $utf_ss = ($ssi % 2) ? 'utf8' : ''; | |
3761 | (my $sstr = $str) =~ s/\xDF/\\xDF/; | |
3762 | ||
3763 | if ($utf_df || $utf_ss || length ($ss [$ssi]) == 1) { | |
3764 | my $ret = $str =~ /$pat/i; | |
3765 | next if $pat eq '-'; | |
3766 | ok $ret, "\"$sstr\" =~ /\\xDF/i " . | |
3767 | "(str is @{[$utf_ss||'latin']}, pat is " . | |
3768 | "@{[$utf_df||'latin']})"; | |
3769 | } | |
3770 | else { | |
3771 | my $ret = $str !~ /$pat/i; | |
3772 | next if $pat eq '-'; | |
3773 | ok $ret, "\"$sstr\" !~ /\\xDF/i " . | |
3774 | "(str is @{[$utf_ss||'latin']}, pat is " . | |
3775 | "@{[$utf_df||'latin']})"; | |
3776 | } | |
32e6a07c YO |
3777 | } |
3778 | } | |
3779 | } | |
c490c714 | 3780 | |
c490c714 | 3781 | |
84281c31 A |
3782 | { |
3783 | local $Message = "BBC(Bleadperl Breaks CPAN) Today: String::Multibyte"; | |
3784 | my $re = qr/(?:[\x00-\xFF]{4})/; | |
3785 | my $hyp = "\0\0\0-"; | |
3786 | my $esc = "\0\0\0\\"; | |
192b9cd1 | 3787 | |
84281c31 A |
3788 | my $str = "$esc$hyp$hyp$esc$esc"; |
3789 | my @a = ($str =~ /\G(?:\Q$esc$esc\E|\Q$esc$hyp\E|$re)/g); | |
192b9cd1 | 3790 | |
84281c31 A |
3791 | iseq @a,3; |
3792 | local $" = "="; | |
3793 | iseq "@a","$esc$hyp=$hyp=$esc$esc"; | |
3794 | } | |
192b9cd1 | 3795 | |
0921ee73 | 3796 | |
84281c31 A |
3797 | { |
3798 | # Test for keys in %+ and %- | |
3799 | local $Message = 'Test keys in %+ and %-'; | |
3800 | no warnings 'uninitialized'; | |
3801 | my $_ = "abcdef"; | |
3802 | /(?<foo>a)|(?<foo>b)/; | |
3803 | iseq ((join ",", sort keys %+), "foo"); | |
3804 | iseq ((join ",", sort keys %-), "foo"); | |
3805 | iseq ((join ",", sort values %+), "a"); | |
3806 | iseq ((join ",", sort map "@$_", values %-), "a "); | |
3807 | /(?<bar>a)(?<bar>b)(?<quux>.)/; | |
3808 | iseq ((join ",", sort keys %+), "bar,quux"); | |
3809 | iseq ((join ",", sort keys %-), "bar,quux"); | |
3810 | iseq ((join ",", sort values %+), "a,c"); # leftmost | |
3811 | iseq ((join ",", sort map "@$_", values %-), "a b,c"); | |
3812 | /(?<un>a)(?<deux>c)?/; # second buffer won't capture | |
3813 | iseq ((join ",", sort keys %+), "un"); | |
3814 | iseq ((join ",", sort keys %-), "deux,un"); | |
3815 | iseq ((join ",", sort values %+), "a"); | |
3816 | iseq ((join ",", sort map "@$_", values %-), ",a"); | |
3817 | } | |
3818 | ||
3819 | ||
3820 | { | |
3821 | # length() on captures, the numbered ones end up in Perl_magic_len | |
3822 | my $_ = "aoeu \xe6var ook"; | |
3823 | /^ \w+ \s (?<eek>\S+)/x; | |
3824 | ||
3825 | iseq length ($`), 0, q[length $`]; | |
3826 | iseq length ($'), 4, q[length $']; | |
3827 | iseq length ($&), 9, q[length $&]; | |
3828 | iseq length ($1), 4, q[length $1]; | |
3829 | iseq length ($+{eek}), 4, q[length $+{eek} == length $1]; | |
3830 | } | |
3831 | ||
3832 | ||
3833 | { | |
3834 | my $ok = -1; | |
3835 | ||
3836 | $ok = exists ($-{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; | |
3837 | iseq $ok, 1, '$-{x} exists after "bar"=~/(?<x>foo)|bar/'; | |
3838 | iseq scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'; | |
3839 | iseq scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'; | |
3840 | ||
3841 | $ok = -1; | |
3842 | $ok = exists ($+{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; | |
3843 | iseq $ok, 0, '$+{x} not exists after "bar"=~/(?<x>foo)|bar/'; | |
3844 | iseq scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'; | |
3845 | iseq scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'; | |
3846 | ||
3847 | $ok = -1; | |
3848 | $ok = exists ($-{x}) ? 1 : 0 if 'foo' =~ /(?<x>foo)|bar/; | |
3849 | iseq $ok, 1, '$-{x} exists after "foo"=~/(?<x>foo)|bar/'; | |
3850 | iseq scalar (%+), 1, 'scalar %+ == 1 after "foo"=~/(?<x>foo)|bar/'; | |
3851 | iseq scalar (%-), 1, 'scalar %- == 1 after "foo"=~/(?<x>foo)|bar/'; | |
3852 | ||
3853 | $ok = -1; | |
3854 | $ok = exists ($+{x}) ? 1 : 0 if 'foo'=~/(?<x>foo)|bar/; | |
3855 | iseq $ok, 1, '$+{x} exists after "foo"=~/(?<x>foo)|bar/'; | |
3856 | } | |
3857 | ||
3858 | ||
3859 | { | |
3860 | local $_; | |
3861 | ($_ = 'abc') =~ /(abc)/g; | |
3862 | $_ = '123'; | |
3863 | iseq "$1", 'abc', "/g leads to unsafe match vars: $1"; | |
3864 | } | |
3865 | ||
3866 | ||
3867 | { | |
3868 | local $Message = 'Message-ID: <20070818091501.7eff4831@r2d2>'; | |
3869 | my $str = ""; | |
3870 | for (0 .. 5) { | |
3871 | my @x; | |
3872 | $str .= "@x"; # this should ALWAYS be the empty string | |
3873 | 'a' =~ /(a|)/; | |
3874 | push @x, 1; | |
3875 | } | |
3876 | iseq length ($str), 0, "Trie scope error, string should be empty"; | |
3877 | $str = ""; | |
3878 | my @foo = ('a') x 5; | |
3879 | for (@foo) { | |
3880 | my @bar; | |
3881 | $str .= "@bar"; | |
3882 | s/a|/push @bar, 1/e; | |
3883 | } | |
3884 | iseq length ($str), 0, "Trie scope error, string should be empty"; | |
3885 | } | |
3886 | ||
192b9cd1 | 3887 | |
84281c31 A |
3888 | { |
3889 | local $BugId = '45605'; | |
3890 | # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string | |
3891 | ||
3892 | my $utf_8 = "\xd6schel"; | |
3893 | utf8::upgrade ($utf_8); | |
3894 | $utf_8 =~ m {(\xd6|Ö)schel}; | |
3895 | iseq $1, "\xd6", "Upgrade error"; | |
3896 | } | |
3897 | ||
c012444f SR |
3898 | { |
3899 | # more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding | |
3900 | for my $chr (160 .. 255) { | |
3901 | my $chr_byte = chr($chr); | |
3902 | my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8); | |
3903 | my $rx = qr{$chr_byte|X}i; | |
3904 | ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr"); | |
3905 | } | |
3906 | } | |
84281c31 A |
3907 | |
3908 | { | |
3909 | # Regardless of utf8ness any character matches itself when | |
3910 | # doing a case insensitive match. See also [perl #36207] | |
3911 | local $BugId = '36207'; | |
3912 | for my $o (0 .. 255) { | |
3913 | my @ch = (chr ($o), chr ($o)); | |
3914 | utf8::upgrade ($ch [1]); | |
3915 | for my $u_str (0, 1) { | |
3916 | for my $u_pat (0, 1) { | |
3917 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i, | |
3918 | "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat"; | |
3919 | ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i, | |
3920 | "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat"; | |
3921 | } | |
a0a388a1 YO |
3922 | } |
3923 | } | |
3924 | } | |
0357f1fd ML |
3925 | |
3926 | ||
84281c31 A |
3927 | { |
3928 | our $a = 3; "" =~ /(??{ $a })/; | |
3929 | our $b = $a; | |
3930 | iseq $b, $a, "Copy of scalar used for postponed subexpression"; | |
3931 | } | |
3b4d0bf4 | 3932 | |
0357f1fd | 3933 | |
84281c31 A |
3934 | { |
3935 | local $BugId = '49190'; | |
3936 | local $Message = '$REGMARK in replacement'; | |
3937 | our $REGMARK; | |
3938 | my $_ = "A"; | |
3939 | ok s/(*:B)A/$REGMARK/; | |
3940 | iseq $_, "B"; | |
3941 | $_ = "CCCCBAA"; | |
3942 | ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g; | |
3943 | iseq $_, "ZYX"; | |
3944 | } | |
ea29714e | 3945 | |
de8c5301 | 3946 | |
84281c31 A |
3947 | { |
3948 | our @ctl_n = (); | |
3949 | our @plus = (); | |
3950 | our $nested_tags; | |
3951 | $nested_tags = qr{ | |
3952 | < | |
3953 | (\w+) | |
3954 | (?{ | |
3955 | push @ctl_n,$^N; | |
3956 | push @plus,$+; | |
3957 | }) | |
3958 | > | |
3959 | (??{$nested_tags})* | |
3960 | </\s* \w+ \s*> | |
3961 | }x; | |
3962 | ||
3963 | my $match = '<bla><blubb></blubb></bla>' =~ m/^$nested_tags$/; | |
3964 | ok $match, 'nested construct matches'; | |
3965 | iseq "@ctl_n", "bla blubb", '$^N inside of (?{}) works as expected'; | |
3966 | iseq "@plus", "bla blubb", '$+ inside of (?{}) works as expected'; | |
3967 | } | |
3968 | ||
bd94e887 | 3969 | |
84281c31 A |
3970 | { |
3971 | local $BugId = '52658'; | |
3972 | local $Message = 'Substitution evaluation in list context'; | |
3973 | my $reg = '../xxx/'; | |
3974 | my @te = ($reg =~ m{^(/?(?:\.\./)*)}, | |
3975 | $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); | |
3976 | iseq $reg, '../bbb/'; | |
3977 | iseq $te [0], '../'; | |
3978 | } | |
bd94e887 | 3979 | |
609122bd KW |
3980 | # This currently has to come before any "use encoding" in this file. |
3981 | { | |
3982 | local $Message; | |
3983 | local $BugId = '59342'; | |
3984 | must_warn 'qr/\400/', '^Use of octal value above 377'; | |
3985 | } | |
3986 | ||
e3faa678 | 3987 | |
84281c31 A |
3988 | SKIP: { |
3989 | # XXX: This set of tests is essentially broken, POSIX character classes | |
3990 | # should not have differing definitions under Unicode. | |
3991 | # There are property names for that. | |
3992 | skip "Tests assume ASCII", 4 unless $IS_ASCII; | |
3993 | ||
3994 | my @notIsPunct = grep {/[[:punct:]]/ and not /\p{IsPunct}/} | |
3995 | map {chr} 0x20 .. 0x7f; | |
3996 | iseq join ('', @notIsPunct), '$+<=>^`|~', | |
3997 | '[:punct:] disagress with IsPunct on Symbols'; | |
3998 | ||
3999 | my @isPrint = grep {not /[[:print:]]/ and /\p{IsPrint}/} | |
4000 | map {chr} 0 .. 0x1f, 0x7f .. 0x9f; | |
4001 | iseq join ('', @isPrint), "\x09\x0a\x0b\x0c\x0d\x85", | |
4002 | 'IsPrint disagrees with [:print:] on control characters'; | |
4003 | ||
4004 | my @isPunct = grep {/[[:punct:]]/ != /\p{IsPunct}/} | |
4005 | map {chr} 0x80 .. 0xff; | |
4006 | iseq join ('', @isPunct), "\xa1\xab\xb7\xbb\xbf", # ¡ « · » ¿ | |
4007 | 'IsPunct disagrees with [:punct:] outside ASCII'; | |
4008 | ||
4009 | my @isPunctLatin1 = eval q { | |
4010 | use encoding 'latin1'; | |
4011 | grep {/[[:punct:]]/ != /\p{IsPunct}/} map {chr} 0x80 .. 0xff; | |
4012 | }; | |
4013 | skip "Eval failed ($@)", 1 if $@; | |
4014 | skip "PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS set to 0", 1 | |
4015 | if $ENV {REAL_POSIX_CC}; | |
4016 | iseq join ('', @isPunctLatin1), '', | |
4017 | 'IsPunct agrees with [:punct:] with explicit Latin1'; | |
4018 | } | |
e3faa678 | 4019 | |
e3faa678 | 4020 | |
84281c31 A |
4021 | { |
4022 | local $BugId = '60034'; | |
84281c31 A |
4023 | my $a = "xyzt" x 8192; |
4024 | ok $a =~ /\A(?>[a-z])*\z/, | |
4025 | '(?>) does not cause wrongness on long string'; | |
4026 | my $b = $a . chr 256; | |
4027 | chop $b; | |
4028 | { | |
84281c31 A |
4029 | iseq $a, $b; |
4030 | } | |
4031 | ok $b =~ /\A(?>[a-z])*\z/, | |
4032 | '(?>) does not cause wrongness on long string with UTF-8'; | |
4033 | } | |
4034 | ||
4035 | ||
4036 | # | |
4037 | # Keep the following tests last -- they may crash perl | |
4038 | # | |
4039 | print "# Tests that follow may crash perl\n"; | |
4040 | { | |
4041 | local $BugId = '19049/38869'; | |
4042 | local $Message = 'Pattern in a loop, failure should not ' . | |
4043 | 'affect previous success'; | |
4044 | my @list = ( | |
4045 | 'ab cdef', # Matches regex | |
4046 | ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it | |
4047 | ); | |
4048 | my $y; | |
4049 | my $x; | |
4050 | foreach (@list) { | |
4051 | m/ab(.+)cd/i; # The ignore-case seems to be important | |
4052 | $y = $1; # Use $1, which might not be from the last match! | |
4053 | $x = substr ($list [0], $- [0], $+ [0] - $- [0]); | |
4054 | } | |
4055 | iseq $y, ' '; | |
4056 | iseq $x, 'ab cd'; | |
4057 | } | |
4058 | ||
4059 | ||
4060 | { | |
4061 | local $BugId = '24274'; | |
4062 | ||
4063 | ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker"); | |
4064 | ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, | |
4065 | "Regexp /^(??{'(.)'x 100})/ crashes older perls"); | |
4066 | } | |
4067 | ||
4068 | ||
4069 | { | |
4070 | eval '/\k/'; | |
4071 | ok $@ =~ /\QSequence \k... not terminated in regex;\E/, | |
4072 | 'Lone \k not allowed'; | |
4073 | } | |
4074 | ||
4075 | ||
4076 | { | |
4077 | local $Message = "Substitution with lookahead (possible segv)"; | |
4078 | $_ = "ns1ns1ns1"; | |
4079 | s/ns(?=\d)/ns_/g; | |
4080 | iseq $_, "ns_1ns_1ns_1"; | |
4081 | $_ = "ns1"; | |
4082 | s/ns(?=\d)/ns_/; | |
4083 | iseq $_, "ns_1"; | |
4084 | $_ = "123"; | |
4085 | s/(?=\d+)|(?<=\d)/!Bang!/g; | |
4086 | iseq $_, "!Bang!1!Bang!2!Bang!3!Bang!"; | |
4087 | } | |
4088 | ||
4089 | ||
4090 | { | |
4091 | # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache | |
4092 | local $BugId = '45337'; | |
4093 | local ${^UTF8CACHE} = -1; | |
4094 | local $Message = "Shouldn't panic"; | |
4095 | my $s = "[a]a{2}"; | |
4096 | utf8::upgrade $s; | |
4097 | ok "aaa" =~ /$s/; | |
4098 | } | |
6962fb1a YO |
4099 | { |
4100 | local $BugId = '57042'; | |
4101 | local $Message = "Check if tree logic breaks \$^R"; | |
4102 | my $cond_re = qr/\s* | |
4103 | \s* (?: | |
4104 | \( \s* A (?{1}) | |
4105 | | \( \s* B (?{2}) | |
4106 | ) | |
4107 | /x; | |
4108 | my @res; | |
4109 | for my $line ("(A)","(B)") { | |
4110 | if ($line =~ m/$cond_re/) { | |
4111 | push @res, $^R ? "#$^R" : "UNDEF"; | |
4112 | } | |
4113 | } | |
4114 | iseq "@res","#1 #2"; | |
4115 | } | |
e120390b B |
4116 | { |
4117 | no warnings 'closure'; | |
4118 | my $re = qr/A(??{"1"})/; | |
4119 | ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; | |
4120 | ok $1 eq "A1"; | |
4121 | ok $2 eq "B"; | |
4122 | } | |
34a81e2b B |
4123 | |
4124 | ||
4125 | { | |
4126 | use re 'eval'; | |
4127 | local $Message = 'Test if $^N and $+ work in (?{{})'; | |
4128 | our @ctl_n = (); | |
4129 | our @plus = (); | |
4130 | our $nested_tags; | |
4131 | $nested_tags = qr{ | |
4132 | < | |
4133 | ((\w)+) | |
4134 | (?{ | |
4135 | push @ctl_n, (defined $^N ? $^N : "undef"); | |
4136 | push @plus, (defined $+ ? $+ : "undef"); | |
4137 | }) | |
4138 | > | |
4139 | (??{$nested_tags})* | |
4140 | </\s* \w+ \s*> | |
4141 | }x; | |
4142 | ||
4143 | ||
4144 | my $c = 0; | |
4145 | for my $test ( | |
4146 | [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ], | |
4147 | [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], | |
4148 | [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ], | |
4149 | [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ], | |
4150 | [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ], | |
4151 | [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ], | |
4152 | [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ], | |
4153 | [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ], | |
4154 | [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ], | |
4155 | [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], | |
4156 | [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], | |
4157 | [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], | |
4158 | [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ], | |
4159 | ||
4160 | ) { | |
4161 | $c++; | |
4162 | @ctl_n = (); | |
4163 | @plus = (); | |
4164 | my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0); | |
4165 | push @ctl_n, (defined $^N ? $^N : "undef"); | |
4166 | push @plus, (defined $+ ? $+ : "undef"); | |
4167 | ok($test->[0] == $match, "match $c"); | |
4168 | if ($test->[0] != $match) { | |
4169 | # unset @ctl_n and @plus | |
4170 | @ctl_n = @plus = (); | |
4171 | } | |
4172 | iseq("@ctl_n", $test->[2], "ctl_n $c"); | |
4173 | iseq("@plus", $test->[3], "plus $c"); | |
4174 | } | |
4175 | } | |
4176 | ||
4177 | { | |
4178 | use re 'eval'; | |
4179 | local $BugId = '56194'; | |
4180 | ||
4181 | our $f; | |
4182 | local $f; | |
4183 | $f = sub { | |
4184 | defined $_[0] ? $_[0] : "undef"; | |
4185 | }; | |
4186 | ||
4187 | ok("123" =~ m/^(\d)(((??{1 + $^N})))+$/); | |
4188 | ||
4189 | our @ctl_n; | |
4190 | our @plus; | |
4191 | ||
4192 | my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#; | |
4193 | my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#; | |
4194 | my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#; | |
4195 | our $re5; | |
4196 | local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#; | |
4197 | my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; | |
4198 | my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; | |
4199 | my $re8 = qr/(\d+)/; | |
4200 | my $c = 0; | |
4201 | for my $test ( | |
4202 | [ | |
4203 | "1233", | |
4204 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#, | |
4205 | "1 2 3 3", | |
4206 | "1 2 3 3", | |
4207 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4208 | ], | |
4209 | [ | |
4210 | "1233", | |
4211 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#, | |
4212 | "1 2 3 3", | |
4213 | "1 2 3 3", | |
4214 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4215 | ], | |
4216 | [ | |
4217 | "1233", | |
4218 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#, | |
4219 | "1 2 3 3", | |
4220 | "1 2 3 3", | |
4221 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4222 | ], | |
4223 | [ | |
4224 | "1233", | |
4225 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#, | |
4226 | "1 2 3 3", | |
4227 | "1 2 3 3", | |
4228 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4229 | ], | |
4230 | [ | |
4231 | "1233", | |
4232 | qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#, | |
4233 | "1 2 3 3", | |
4234 | "1 2 3 3", | |
4235 | "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", | |
4236 | ], | |
4237 | [ | |
4238 | "123abc3", | |
4239 | qr#^($re)(|a(b)c|def)(??{$^R})$#, | |
4240 | "1 2 3 abc", | |
4241 | "1 2 3 b", | |
4242 | "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", | |
4243 | ], | |
4244 | [ | |
4245 | "123abc3", | |
4246 | qr#^($re2)$#, | |
4247 | "1 2 3 123abc3", | |
4248 | "1 2 3 b", | |
4249 | "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", | |
4250 | ], | |
4251 | [ | |
4252 | "123abc3", | |
4253 | qr#^($re3)$#, | |
4254 | "1 2 123abc3", | |
4255 | "1 2 b", | |
4256 | "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", | |
4257 | ], | |
4258 | [ | |
4259 | "123abc3", | |
4260 | qr#^(??{$re5})(|abc|def)(??{"$^R"})$#, | |
4261 | "1 2 abc", | |
4262 | "1 2 abc", | |
4263 | "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef", | |
4264 | ], | |
4265 | [ | |
4266 | "123abc3", | |
4267 | qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#, | |
4268 | "1 2 abc", | |
4269 | "1 2 b", | |
4270 | "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef", | |
4271 | ], | |
4272 | [ | |
4273 | "1234", | |
4274 | qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#, | |
4275 | "1234 123 12 1 2 3 1234", | |
4276 | "1234 123 12 1 2 3 4", | |
4277 | "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4", | |
4278 | ], | |
4279 | [ | |
4280 | "1234556", | |
4281 | qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#, | |
4282 | "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56", | |
4283 | "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5", | |
4284 | "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56", | |
4285 | ], | |
4286 | [ | |
4287 | "12345562", | |
4288 | qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#, | |
4289 | "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62", | |
4290 | "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2", | |
4291 | "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5", | |
4292 | ], | |
4293 | ) { | |
4294 | $c++; | |
4295 | @ctl_n = (); | |
4296 | @plus = (); | |
4297 | undef $^R; | |
4298 | my $match = $test->[0] =~ $test->[1]; | |
4299 | my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5)); | |
4300 | push @ctl_n, $f->($^N); | |
4301 | push @plus, $f->($+); | |
4302 | ok($match, "match $c"); | |
4303 | if (not $match) { | |
4304 | # unset $str, @ctl_n and @plus | |
4305 | $str = ""; | |
4306 | @ctl_n = @plus = (); | |
4307 | } | |
4308 | iseq("@ctl_n", $test->[2], "ctl_n $c"); | |
4309 | iseq("@plus", $test->[3], "plus $c"); | |
4310 | iseq($str, $test->[4], "str $c"); | |
4311 | } | |
4312 | SKIP: { | |
4313 | if ($] le '5.010') { | |
4314 | skip "test segfaults on perl < 5.10", 4; | |
4315 | } | |
4316 | ||
4317 | @ctl_n = (); | |
4318 | @plus = (); | |
4319 | ||
4320 | our $re4; | |
4321 | local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#; | |
4322 | undef $^R; | |
4323 | my $match = "123abc3" =~ m/^(??{$re4})$/; | |
4324 | my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R)); | |
4325 | push @ctl_n, $f->($^N); | |
4326 | push @plus, $f->($+); | |
4327 | ok($match); | |
4328 | if (not $match) { | |
4329 | # unset $str | |
4330 | @ctl_n = (); | |
4331 | @plus = (); | |
4332 | $str = ""; | |
4333 | } | |
4334 | iseq("@ctl_n", "1 2 undef"); | |
4335 | iseq("@plus", "1 2 undef"); | |
4336 | iseq($str, "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = undef"); | |
4337 | } | |
4338 | } | |
84281c31 A |
4339 | # |
4340 | # This should be the last test. | |
4341 | # | |
4342 | iseq $test + 1, $EXPECTED_TESTS, "Got the right number of tests!"; | |
4343 | ||
4344 | } # End of sub run_tests | |
4345 | ||
4346 | 1; |