Commit | Line | Data |
---|---|---|
8d063cd8 | 1 | #!./perl |
8d37f932 DD |
2 | # |
3 | # This is a home for regular expression tests that don't fit into | |
67a2b8c6 | 4 | # the format supported by re/regexp.t. If you want to add a test |
ff3f963a KW |
5 | # that does fit that format, add it to re/re_tests, not here. Tests for \N |
6 | # should be added here because they are treated as single quoted strings | |
7 | # there, which means they avoid the lexer which otherwise would look at them. | |
8d063cd8 | 8 | |
84281c31 A |
9 | use strict; |
10 | use warnings; | |
11 | use 5.010; | |
12 | ||
84281c31 A |
13 | sub run_tests; |
14 | ||
9133bbab | 15 | $| = 1; |
3568d838 | 16 | |
8d37f932 | 17 | |
e4d48cc9 GS |
18 | BEGIN { |
19 | chdir 't' if -d 't'; | |
9d45b377 | 20 | @INC = ('../lib','.'); |
6f4e0180 | 21 | require './test.pl'; |
e4d48cc9 | 22 | } |
84281c31 | 23 | |
0bda3001 | 24 | plan tests => 455; # Update this when adding/deleting tests. |
b7a35066 | 25 | |
9d45b377 | 26 | run_tests() unless caller; |
b7a35066 | 27 | |
84281c31 A |
28 | # |
29 | # Tests start here. | |
30 | # | |
31 | sub run_tests { | |
0ef3e39e | 32 | |
84281c31 | 33 | { |
84281c31 | 34 | my $x = "abc\ndef\n"; |
5895685f | 35 | (my $x_pretty = $x) =~ s/\n/\\n/g; |
fd291da9 | 36 | |
5895685f NC |
37 | ok $x =~ /^abc/, qq ["$x_pretty" =~ /^abc/]; |
38 | ok $x !~ /^def/, qq ["$x_pretty" !~ /^def/]; | |
fd291da9 | 39 | |
84281c31 | 40 | # used to be a test for $* |
5895685f | 41 | ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m]; |
fd291da9 | 42 | |
b33825c4 NC |
43 | ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]); |
44 | ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]); | |
fd291da9 | 45 | |
5895685f | 46 | ok $x =~ /def/, qq ["$x_pretty" =~ /def/]; |
b33825c4 | 47 | ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]); |
4765795a | 48 | |
5895685f | 49 | ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/]; |
b33825c4 | 50 | ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]); |
4765795a | 51 | |
5895685f | 52 | ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/]; |
b33825c4 | 53 | ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]); |
84281c31 | 54 | } |
4765795a | 55 | |
84281c31 A |
56 | { |
57 | $_ = '123'; | |
58 | ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; | |
59 | } | |
f9969324 | 60 | |
84281c31 A |
61 | { |
62 | $_ = 'aaabbbccc'; | |
63 | ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', | |
64 | qq [\$_ = '$_'; /(a*b*)(c*)/]; | |
65 | ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; | |
b33825c4 | 66 | unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]); |
84281c31 A |
67 | |
68 | $_ = 'aaabccc'; | |
69 | ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; | |
70 | ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; | |
71 | ||
72 | $_ = 'aaaccc'; | |
73 | ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; | |
b33825c4 | 74 | unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]); |
84281c31 A |
75 | |
76 | $_ = 'abcdef'; | |
77 | ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; | |
78 | ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; | |
79 | ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; | |
80 | ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; | |
4765795a | 81 | } |
4765795a | 82 | |
84281c31 A |
83 | { |
84 | # used to be a test for $* | |
5895685f | 85 | ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m]; |
84281c31 | 86 | } |
4765795a | 87 | |
84281c31 A |
88 | { |
89 | our %XXX = map {($_ => $_)} 123, 234, 345; | |
90 | ||
91 | our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); | |
92 | while ($_ = shift(@XXX)) { | |
b33825c4 | 93 | my $e = index ($_, 'not') >= 0 ? '' : 1; |
725a61d7 | 94 | my $r = m?(.*)?; |
b33825c4 | 95 | is($r, $e, "?(.*)?"); |
84281c31 A |
96 | /not/ && reset; |
97 | if (/not ok 2/) { | |
98 | if ($^O eq 'VMS') { | |
99 | $_ = shift(@XXX); | |
100 | } | |
101 | else { | |
102 | reset 'X'; | |
103 | } | |
104 | } | |
105 | } | |
4765795a | 106 | |
84281c31 A |
107 | SKIP: { |
108 | if ($^O eq 'VMS') { | |
109 | skip "Reset 'X'", 1; | |
110 | } | |
111 | ok !keys %XXX, "%XXX is empty"; | |
112 | } | |
4765795a | 113 | |
84281c31 | 114 | } |
4765795a | 115 | |
84281c31 | 116 | { |
4f890a30 | 117 | my $message = "Test empty pattern"; |
84281c31 A |
118 | my $xyz = 'xyz'; |
119 | my $cde = 'cde'; | |
120 | ||
121 | $cde =~ /[^ab]*/; | |
122 | $xyz =~ //; | |
4f890a30 | 123 | is($&, $xyz, $message); |
84281c31 A |
124 | |
125 | my $foo = '[^ab]*'; | |
126 | $cde =~ /$foo/; | |
127 | $xyz =~ //; | |
4f890a30 | 128 | is($&, $xyz, $message); |
84281c31 A |
129 | |
130 | $cde =~ /$foo/; | |
131 | my $null; | |
132 | no warnings 'uninitialized'; | |
133 | $xyz =~ /$null/; | |
4f890a30 | 134 | is($&, $xyz, $message); |
84281c31 A |
135 | |
136 | $null = ""; | |
137 | $xyz =~ /$null/; | |
4f890a30 | 138 | is($&, $xyz, $message); |
84281c31 | 139 | } |
4765795a | 140 | |
84281c31 | 141 | { |
4f890a30 | 142 | my $message = q !Check $`, $&, $'!; |
84281c31 | 143 | $_ = 'abcdefghi'; |
0f289c68 | 144 | /def/; # optimized up to cmd |
4f890a30 | 145 | is("$`:$&:$'", 'abc:def:ghi', $message); |
4765795a | 146 | |
84281c31 | 147 | no warnings 'void'; |
0f289c68 | 148 | /cde/ + 0; # optimized only to spat |
4f890a30 | 149 | is("$`:$&:$'", 'ab:cde:fghi', $message); |
4765795a | 150 | |
0f289c68 | 151 | /[d][e][f]/; # not optimized |
4f890a30 | 152 | is("$`:$&:$'", 'abc:def:ghi', $message); |
84281c31 | 153 | } |
4765795a | 154 | |
84281c31 A |
155 | { |
156 | $_ = 'now is the {time for all} good men to come to.'; | |
157 | / {([^}]*)}/; | |
de26e0cc | 158 | is($1, 'time for all', "Match braces"); |
84281c31 | 159 | } |
4765795a | 160 | |
84281c31 | 161 | { |
4f890a30 | 162 | my $message = "{N,M} quantifier"; |
84281c31 | 163 | $_ = 'xxx {3,4} yyy zzz'; |
4f890a30 NC |
164 | ok(/( {3,4})/, $message); |
165 | is($1, ' ', $message); | |
166 | unlike($_, qr/( {4,})/, $message); | |
167 | ok(/( {2,3}.)/, $message); | |
168 | is($1, ' y', $message); | |
169 | ok(/(y{2,3}.)/, $message); | |
170 | is($1, 'yyy ', $message); | |
171 | unlike($_, qr/x {3,4}/, $message); | |
172 | unlike($_, qr/^xxx {3,4}/, $message); | |
84281c31 | 173 | } |
4765795a | 174 | |
84281c31 | 175 | { |
4f890a30 | 176 | my $message = "Test /g"; |
84281c31 A |
177 | local $" = ":"; |
178 | $_ = "now is the time for all good men to come to."; | |
179 | my @words = /(\w+)/g; | |
180 | my $exp = "now:is:the:time:for:all:good:men:to:come:to"; | |
4765795a | 181 | |
4f890a30 | 182 | is("@words", $exp, $message); |
4765795a | 183 | |
84281c31 A |
184 | @words = (); |
185 | while (/\w+/g) { | |
186 | push (@words, $&); | |
187 | } | |
4f890a30 | 188 | is("@words", $exp, $message); |
4765795a | 189 | |
84281c31 A |
190 | @words = (); |
191 | pos = 0; | |
192 | while (/to/g) { | |
193 | push(@words, $&); | |
194 | } | |
4f890a30 | 195 | is("@words", "to:to", $message); |
4765795a | 196 | |
84281c31 A |
197 | pos $_ = 0; |
198 | @words = /to/g; | |
4f890a30 | 199 | is("@words", "to:to", $message); |
84281c31 | 200 | } |
4765795a | 201 | |
84281c31 A |
202 | { |
203 | $_ = "abcdefghi"; | |
204 | ||
205 | my $pat1 = 'def'; | |
206 | my $pat2 = '^def'; | |
207 | my $pat3 = '.def.'; | |
208 | my $pat4 = 'abc'; | |
209 | my $pat5 = '^abc'; | |
210 | my $pat6 = 'abc$'; | |
211 | my $pat7 = 'ghi'; | |
212 | my $pat8 = '\w*ghi'; | |
213 | my $pat9 = 'ghi$'; | |
214 | ||
215 | my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = | |
216 | my $t6 = my $t7 = my $t8 = my $t9 = 0; | |
217 | ||
218 | for my $iter (1 .. 5) { | |
219 | $t1++ if /$pat1/o; | |
220 | $t2++ if /$pat2/o; | |
221 | $t3++ if /$pat3/o; | |
222 | $t4++ if /$pat4/o; | |
223 | $t5++ if /$pat5/o; | |
224 | $t6++ if /$pat6/o; | |
225 | $t7++ if /$pat7/o; | |
226 | $t8++ if /$pat8/o; | |
227 | $t9++ if /$pat9/o; | |
228 | } | |
229 | my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; | |
de26e0cc | 230 | is($x, '505550555', "Test /o"); |
84281c31 | 231 | } |
4765795a | 232 | |
4f890a30 | 233 | { |
84281c31 A |
234 | my $xyz = 'xyz'; |
235 | ok "abc" =~ /^abc$|$xyz/, "| after \$"; | |
4765795a | 236 | |
84281c31 | 237 | # perl 4.009 says "unmatched ()" |
4f890a30 | 238 | my $message = '$ inside ()'; |
4765795a | 239 | |
84281c31 A |
240 | my $result; |
241 | eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; | |
4f890a30 NC |
242 | is($@, "", $message); |
243 | is($result, "abc:bc", $message); | |
84281c31 | 244 | } |
4765795a | 245 | |
84281c31 | 246 | { |
4f890a30 | 247 | my $message = "Scalar /g"; |
84281c31 A |
248 | $_ = "abcfooabcbar"; |
249 | ||
4f890a30 NC |
250 | ok( /abc/g && $` eq "", $message); |
251 | ok( /abc/g && $` eq "abcfoo", $message); | |
252 | ok(!/abc/g, $message); | |
84281c31 | 253 | |
4f890a30 | 254 | $message = "Scalar /gi"; |
84281c31 | 255 | pos = 0; |
4f890a30 NC |
256 | ok( /ABC/gi && $` eq "", $message); |
257 | ok( /ABC/gi && $` eq "abcfoo", $message); | |
258 | ok(!/ABC/gi, $message); | |
84281c31 | 259 | |
4f890a30 | 260 | $message = "Scalar /g"; |
84281c31 | 261 | pos = 0; |
4f890a30 NC |
262 | ok( /abc/g && $' eq "fooabcbar", $message); |
263 | ok( /abc/g && $' eq "bar", $message); | |
84281c31 A |
264 | |
265 | $_ .= ''; | |
266 | my @x = /abc/g; | |
de26e0cc | 267 | is(@x, 2, "/g reset after assignment"); |
4765795a | 268 | } |
4765795a | 269 | |
84281c31 | 270 | { |
4f890a30 | 271 | my $message = '/g, \G and pos'; |
84281c31 A |
272 | $_ = "abdc"; |
273 | pos $_ = 2; | |
274 | /\Gc/gc; | |
4f890a30 | 275 | is(pos $_, 2, $message); |
84281c31 | 276 | /\Gc/g; |
4f890a30 | 277 | is(pos $_, undef, $message); |
84281c31 | 278 | } |
4765795a | 279 | |
84281c31 | 280 | { |
4f890a30 | 281 | my $message = '(?{ })'; |
84281c31 A |
282 | our $out = 1; |
283 | 'abc' =~ m'a(?{ $out = 2 })b'; | |
4f890a30 | 284 | is($out, 2, $message); |
84281c31 A |
285 | |
286 | $out = 1; | |
287 | 'abc' =~ m'a(?{ $out = 3 })c'; | |
4f890a30 | 288 | is($out, 1, $message); |
84281c31 | 289 | } |
4765795a | 290 | |
84281c31 A |
291 | { |
292 | $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; | |
293 | my @out = /(?<!foo)bar./g; | |
de26e0cc | 294 | is("@out", 'bar2 barf', "Negative lookbehind"); |
84281c31 | 295 | } |
4765795a | 296 | |
84281c31 | 297 | { |
4f890a30 | 298 | my $message = "REG_INFTY tests"; |
84281c31 | 299 | # Tests which depend on REG_INFTY |
19d6612d NC |
300 | |
301 | # Defaults assumed if this fails | |
302 | eval { require Config; }; | |
303 | $::reg_infty = $Config::Config{reg_infty} // 32767; | |
84281c31 A |
304 | $::reg_infty_m = $::reg_infty - 1; |
305 | $::reg_infty_p = $::reg_infty + 1; | |
93f09d7b | 306 | $::reg_infty_m = $::reg_infty_m; # Suppress warning. |
84281c31 A |
307 | |
308 | # As well as failing if the pattern matches do unexpected things, the | |
309 | # next three tests will fail if you should have picked up a lower-than- | |
310 | # default value for $reg_infty from Config.pm, but have not. | |
311 | ||
14358a41 NC |
312 | is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message); |
313 | is($@, '', $message); | |
314 | is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message); | |
315 | is($@, '', $message); | |
316 | isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message); | |
317 | is($@, '', $message); | |
318 | ||
84281c31 | 319 | eval "'aaa' =~ /a{1,$::reg_infty}/"; |
224b2e7e | 320 | like($@, qr/^\QQuantifier in {,} bigger than/, $message); |
84281c31 | 321 | eval "'aaa' =~ /a{1,$::reg_infty_p}/"; |
4f890a30 | 322 | like($@, qr/^\QQuantifier in {,} bigger than/, $message); |
4765795a | 323 | } |
8269fa76 | 324 | |
84281c31 A |
325 | { |
326 | # Poke a couple more parse failures | |
327 | my $context = 'x' x 256; | |
328 | eval qq("${context}y" =~ /(?<=$context)y/); | |
329 | ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; | |
330 | } | |
8269fa76 | 331 | |
84281c31 A |
332 | { |
333 | # Long Monsters | |
84281c31 A |
334 | for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory |
335 | my $a = 'a' x $l; | |
4f890a30 NC |
336 | my $message = "Long monster, length = $l"; |
337 | like("ba$a=", qr/a$a=/, $message); | |
338 | unlike("b$a=", qr/a$a=/, $message); | |
339 | like("b$a=", qr/ba+=/, $message); | |
84281c31 | 340 | |
224b2e7e | 341 | like("ba$a=", qr/b(?:a|b)+=/, $message); |
84281c31 A |
342 | } |
343 | } | |
8269fa76 | 344 | |
84281c31 A |
345 | { |
346 | # 20000 nodes, each taking 3 words per string, and 1 per branch | |
347 | my $long_constant_len = join '|', 12120 .. 32645; | |
348 | my $long_var_len = join '|', 8120 .. 28645; | |
349 | my %ans = ( 'ax13876y25677lbc' => 1, | |
350 | 'ax13876y25677mcb' => 0, # not b. | |
351 | 'ax13876y35677nbc' => 0, # Num too big | |
352 | 'ax13876y25677y21378obc' => 1, | |
0f289c68 | 353 | 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] |
84281c31 A |
354 | 'ax13876y25677y21378y21378kbc' => 1, |
355 | 'ax13876y25677y21378y21378kcb' => 0, # Not b. | |
356 | 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs | |
357 | ); | |
358 | ||
84281c31 | 359 | for (keys %ans) { |
4f890a30 NC |
360 | my $message = "20000 nodes, const-len '$_'"; |
361 | ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message; | |
84281c31 | 362 | |
4f890a30 NC |
363 | $message = "20000 nodes, var-len '$_'"; |
364 | ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o,), $message; | |
84281c31 | 365 | } |
b8ef571c | 366 | } |
209a9bc1 | 367 | |
84281c31 | 368 | { |
4f890a30 | 369 | my $message = "Complicated backtracking"; |
84281c31 A |
370 | $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; |
371 | my $expect = "(bla()) ((l)u((e))) (l(e)e)"; | |
372 | ||
373 | use vars '$c'; | |
374 | sub matchit { | |
375 | m/ | |
376 | ( | |
377 | \( | |
0f289c68 | 378 | (?{ $c = 1 }) # Initialize |
84281c31 A |
379 | (?: |
380 | (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop | |
381 | (?! | |
0f289c68 YO |
382 | ) # Fail: will unwind one iteration back |
383 | ) | |
84281c31 | 384 | (?: |
0f289c68 | 385 | [^()]+ # Match a big chunk |
84281c31 A |
386 | (?= |
387 | [()] | |
0f289c68 | 388 | ) # Do not try to match subchunks |
84281c31 A |
389 | | |
390 | \( | |
391 | (?{ ++$c }) | |
392 | | | |
393 | \) | |
394 | (?{ --$c }) | |
395 | ) | |
0f289c68 | 396 | )+ # This may not match with different subblocks |
84281c31 A |
397 | ) |
398 | (?(?{ $c != 0 }) | |
399 | (?! | |
0f289c68 YO |
400 | ) # Fail |
401 | ) # Otherwise the chunk 1 may succeed with $c>0 | |
84281c31 A |
402 | /xg; |
403 | } | |
3568d838 | 404 | |
84281c31 A |
405 | my @ans = (); |
406 | my $res; | |
407 | push @ans, $res while $res = matchit; | |
4f890a30 | 408 | is("@ans", "1 1 1", $message); |
3568d838 | 409 | |
84281c31 | 410 | @ans = matchit; |
4f890a30 | 411 | is("@ans", $expect, $message); |
3568d838 | 412 | |
4f890a30 | 413 | $message = "Recursion with (??{ })"; |
84281c31 A |
414 | our $matched; |
415 | $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; | |
3568d838 | 416 | |
84281c31 A |
417 | @ans = my @ans1 = (); |
418 | push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; | |
3568d838 | 419 | |
4f890a30 NC |
420 | is("@ans", "1 1 1", $message); |
421 | is("@ans1", $expect, $message); | |
3568d838 | 422 | |
84281c31 | 423 | @ans = m/$matched/g; |
4f890a30 | 424 | is("@ans", $expect, $message); |
3568d838 | 425 | |
84281c31 | 426 | } |
3568d838 | 427 | |
84281c31 A |
428 | { |
429 | ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; | |
430 | } | |
3568d838 | 431 | |
84281c31 | 432 | { |
0f289c68 | 433 | my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad |
de26e0cc | 434 | is("@ans", 'a/ b', "Stack may be bad"); |
84281c31 | 435 | } |
3568d838 | 436 | |
84281c31 | 437 | { |
4f890a30 | 438 | my $message = "Eval-group not allowed at runtime"; |
84281c31 A |
439 | my $code = '{$blah = 45}'; |
440 | our $blah = 12; | |
441 | eval { /(?$code)/ }; | |
4f890a30 | 442 | ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); |
84281c31 | 443 | |
3044771b NC |
444 | $blah = 12; |
445 | my $res = eval { "xx" =~ /(?$code)/o }; | |
446 | { | |
447 | no warnings 'uninitialized'; | |
5895685f | 448 | chomp $@; my $message = "$message '$@', '$res', '$blah'"; |
4f890a30 | 449 | ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); |
3044771b NC |
450 | } |
451 | ||
452 | $code = '=xx'; | |
453 | $blah = 12; | |
454 | $res = eval { "xx" =~ /(?$code)/o }; | |
455 | { | |
456 | no warnings 'uninitialized'; | |
4f890a30 NC |
457 | my $message = "$message '$@', '$res', '$blah'"; |
458 | ok(!$@ && $res, $message); | |
3044771b | 459 | } |
3568d838 | 460 | |
84281c31 A |
461 | $code = '{$blah = 45}'; |
462 | $blah = 12; | |
463 | eval "/(?$code)/"; | |
4f890a30 | 464 | is($blah, 45, $message); |
3568d838 | 465 | |
84281c31 A |
466 | $blah = 12; |
467 | /(?{$blah = 45})/; | |
4f890a30 | 468 | is($blah, 45, $message); |
84281c31 | 469 | } |
3568d838 | 470 | |
84281c31 | 471 | { |
4f890a30 | 472 | my $message = "Pos checks"; |
84281c31 A |
473 | my $x = 'banana'; |
474 | $x =~ /.a/g; | |
4f890a30 | 475 | is(pos $x, 2, $message); |
3568d838 | 476 | |
84281c31 | 477 | $x =~ /.z/gc; |
4f890a30 | 478 | is(pos $x, 2, $message); |
3568d838 | 479 | |
84281c31 A |
480 | sub f { |
481 | my $p = $_[0]; | |
482 | return $p; | |
483 | } | |
3568d838 | 484 | |
84281c31 | 485 | $x =~ /.a/g; |
4f890a30 | 486 | is(f (pos $x), 4, $message); |
84281c31 | 487 | } |
3568d838 | 488 | |
84281c31 | 489 | { |
4f890a30 | 490 | my $message = 'Checking $^R'; |
84281c31 A |
491 | our $x = $^R = 67; |
492 | 'foot' =~ /foo(?{$x = 12; 75})[t]/; | |
4f890a30 | 493 | is($^R, 75, $message); |
84281c31 A |
494 | |
495 | $x = $^R = 67; | |
496 | 'foot' =~ /foo(?{$x = 12; 75})[xy]/; | |
4f890a30 | 497 | ok($^R eq '67' && $x eq '12', $message); |
84281c31 A |
498 | |
499 | $x = $^R = 67; | |
500 | 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; | |
4f890a30 | 501 | ok($^R eq '79' && $x eq '12', $message); |
84281c31 | 502 | } |
3568d838 | 503 | |
84281c31 | 504 | { |
de26e0cc NC |
505 | is(qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i'); |
506 | is(qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s'); | |
507 | is(qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m'); | |
508 | is(qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x'); | |
509 | is(qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism'); | |
510 | is(qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/'); | |
84281c31 | 511 | } |
3568d838 | 512 | |
9de15fec | 513 | { # Test that charset modifier work, and are interpolated |
de26e0cc NC |
514 | is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier'); |
515 | is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles'); | |
516 | is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles'); | |
517 | is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles'); | |
518 | is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'); | |
9de15fec KW |
519 | |
520 | my $dual = qr/\b\v$/; | |
521 | use locale; | |
522 | my $locale = qr/\b\v$/; | |
de26e0cc | 523 | is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); |
9de15fec KW |
524 | no locale; |
525 | ||
526 | use feature 'unicode_strings'; | |
527 | my $unicode = qr/\b\v$/; | |
de26e0cc NC |
528 | is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'); |
529 | is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); | |
530 | is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); | |
9de15fec KW |
531 | |
532 | no feature 'unicode_strings'; | |
de26e0cc NC |
533 | is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); |
534 | is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'); | |
9de15fec KW |
535 | |
536 | use locale; | |
de26e0cc NC |
537 | is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); |
538 | is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); | |
9de15fec KW |
539 | } |
540 | ||
84281c31 | 541 | { |
4f890a30 | 542 | my $message = "Look around"; |
84281c31 | 543 | $_ = 'xabcx'; |
84281c31 | 544 | foreach my $ans ('', 'c') { |
4f890a30 NC |
545 | ok(/(?<=(?=a)..)((?=c)|.)/g, $message); |
546 | is($1, $ans, $message); | |
84281c31 A |
547 | } |
548 | } | |
3568d838 | 549 | |
84281c31 | 550 | { |
4f890a30 | 551 | my $message = "Empty clause"; |
84281c31 A |
552 | $_ = 'a'; |
553 | foreach my $ans ('', 'a', '') { | |
4f890a30 NC |
554 | ok(/^|a|$/g, $message); |
555 | is($&, $ans, $message); | |
84281c31 A |
556 | } |
557 | } | |
3568d838 | 558 | |
84281c31 | 559 | { |
84281c31 | 560 | sub prefixify { |
4f890a30 NC |
561 | my $message = "Prefixify"; |
562 | { | |
84281c31 | 563 | my ($v, $a, $b, $res) = @_; |
4f890a30 NC |
564 | ok($v =~ s/\Q$a\E/$b/, $message); |
565 | is($v, $res, $message); | |
84281c31 A |
566 | } |
567 | } | |
3568d838 | 568 | |
84281c31 A |
569 | prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); |
570 | prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); | |
571 | } | |
3568d838 | 572 | |
84281c31 A |
573 | { |
574 | $_ = 'var="foo"'; | |
575 | /(\")/; | |
576 | ok $1 && /$1/, "Capture a quote"; | |
577 | } | |
3568d838 | 578 | |
84281c31 | 579 | { |
84281c31 | 580 | no warnings 'closure'; |
4f890a30 | 581 | my $message = '(?{ $var } refers to package vars'; |
84281c31 A |
582 | package aa; |
583 | our $c = 2; | |
584 | $::c = 3; | |
585 | '' =~ /(?{ $c = 4 })/; | |
4f890a30 NC |
586 | main::is($c, 4, $message); |
587 | main::is($::c, 3, $message); | |
84281c31 | 588 | } |
3568d838 | 589 | |
84281c31 | 590 | { |
cb124425 NC |
591 | is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef); |
592 | like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/, | |
593 | 'POSIX class [: :] must have valid name'); | |
84281c31 A |
594 | |
595 | for my $d (qw [= .]) { | |
cb124425 NC |
596 | is(eval "/[[${d}foo${d}]]/", undef); |
597 | like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/, | |
598 | "POSIX syntax [[$d $d]] is an error"); | |
84281c31 A |
599 | } |
600 | } | |
3568d838 | 601 | |
84281c31 A |
602 | { |
603 | # test if failure of patterns returns empty list | |
4f890a30 | 604 | my $message = "Failed pattern returns empty list"; |
84281c31 A |
605 | $_ = 'aaa'; |
606 | @_ = /bbb/; | |
4f890a30 | 607 | is("@_", "", $message); |
3568d838 | 608 | |
84281c31 | 609 | @_ = /bbb/g; |
4f890a30 | 610 | is("@_", "", $message); |
a72deede | 611 | |
84281c31 | 612 | @_ = /(bbb)/; |
4f890a30 | 613 | is("@_", "", $message); |
a72deede | 614 | |
84281c31 | 615 | @_ = /(bbb)/g; |
4f890a30 | 616 | is("@_", "", $message); |
84281c31 | 617 | } |
a72deede | 618 | |
84281c31 | 619 | { |
4f890a30 | 620 | my $message = '@- and @+ tests'; |
84281c31 A |
621 | |
622 | /a(?=.$)/; | |
4f890a30 NC |
623 | is($#+, 0, $message); |
624 | is($#-, 0, $message); | |
625 | is($+ [0], 2, $message); | |
626 | is($- [0], 1, $message); | |
627 | ok(!defined $+ [1] && !defined $- [1] && | |
628 | !defined $+ [2] && !defined $- [2], $message); | |
84281c31 A |
629 | |
630 | /a(a)(a)/; | |
4f890a30 NC |
631 | is($#+, 2, $message); |
632 | is($#-, 2, $message); | |
633 | is($+ [0], 3, $message); | |
634 | is($- [0], 0, $message); | |
635 | is($+ [1], 2, $message); | |
636 | is($- [1], 1, $message); | |
637 | is($+ [2], 3, $message); | |
638 | is($- [2], 2, $message); | |
639 | ok(!defined $+ [3] && !defined $- [3] && | |
640 | !defined $+ [4] && !defined $- [4], $message); | |
84281c31 | 641 | |
54a4274e | 642 | # Exists has a special check for @-/@+ - bug 45147 |
4f890a30 NC |
643 | ok(exists $-[0], $message); |
644 | ok(exists $+[0], $message); | |
645 | ok(exists $-[2], $message); | |
646 | ok(exists $+[2], $message); | |
647 | ok(!exists $-[3], $message); | |
648 | ok(!exists $+[3], $message); | |
649 | ok(exists $-[-1], $message); | |
650 | ok(exists $+[-1], $message); | |
651 | ok(exists $-[-3], $message); | |
652 | ok(exists $+[-3], $message); | |
653 | ok(!exists $-[-4], $message); | |
654 | ok(!exists $+[-4], $message); | |
84281c31 A |
655 | |
656 | /.(a)(b)?(a)/; | |
4f890a30 NC |
657 | is($#+, 3, $message); |
658 | is($#-, 3, $message); | |
659 | is($+ [1], 2, $message); | |
660 | is($- [1], 1, $message); | |
661 | is($+ [3], 3, $message); | |
662 | is($- [3], 2, $message); | |
663 | ok(!defined $+ [2] && !defined $- [2] && | |
664 | !defined $+ [4] && !defined $- [4], $message); | |
84281c31 | 665 | |
84281c31 | 666 | /.(a)/; |
4f890a30 NC |
667 | is($#+, 1, $message); |
668 | is($#-, 1, $message); | |
669 | is($+ [0], 2, $message); | |
670 | is($- [0], 0, $message); | |
671 | is($+ [1], 2, $message); | |
672 | is($- [1], 1, $message); | |
673 | ok(!defined $+ [2] && !defined $- [2] && | |
674 | !defined $+ [3] && !defined $- [3], $message); | |
84281c31 A |
675 | |
676 | /.(a)(ba*)?/; | |
4f890a30 NC |
677 | is($#+, 2, $message); |
678 | is($#-, 1, $message); | |
84281c31 | 679 | } |
a72deede | 680 | |
88743d87 | 681 | foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', '@- = qw (foo bar)') { |
cb124425 NC |
682 | is(eval $_, undef); |
683 | like($@, qr/^Modification of a read-only value attempted/, | |
684 | 'Elements of @- and @+ are read-only'); | |
84281c31 | 685 | } |
a72deede | 686 | |
84281c31 | 687 | { |
4f890a30 | 688 | my $message = '\G testing'; |
84281c31 A |
689 | $_ = 'aaa'; |
690 | pos = 1; | |
691 | my @a = /\Ga/g; | |
4f890a30 | 692 | is("@a", "a a", $message); |
84281c31 A |
693 | |
694 | my $str = 'abcde'; | |
695 | pos $str = 2; | |
4f890a30 NC |
696 | unlike($str, qr/^\G/, $message); |
697 | unlike($str, qr/^.\G/, $message); | |
698 | like($str, qr/^..\G/, $message); | |
699 | unlike($str, qr/^...\G/, $message); | |
700 | ok($str =~ /\G../ && $& eq 'cd', $message); | |
84281c31 | 701 | |
04934b6d | 702 | local $::TODO = $::running_as_thread; |
4f890a30 | 703 | ok($str =~ /.\G./ && $& eq 'bc', $message); |
84281c31 | 704 | } |
a72deede | 705 | |
84281c31 | 706 | { |
4f890a30 | 707 | my $message = 'pos inside (?{ })'; |
84281c31 A |
708 | my $str = 'abcde'; |
709 | our ($foo, $bar); | |
4f890a30 NC |
710 | like($str, qr/b(?{$foo = $_; $bar = pos})c/, $message); |
711 | is($foo, $str, $message); | |
712 | is($bar, 2, $message); | |
713 | is(pos $str, undef, $message); | |
84281c31 A |
714 | |
715 | undef $foo; | |
716 | undef $bar; | |
717 | pos $str = undef; | |
4f890a30 NC |
718 | ok($str =~ /b(?{$foo = $_; $bar = pos})c/g, $message); |
719 | is($foo, $str, $message); | |
720 | is($bar, 2, $message); | |
721 | is(pos $str, 3, $message); | |
84281c31 A |
722 | |
723 | $_ = $str; | |
724 | undef $foo; | |
725 | undef $bar; | |
4f890a30 NC |
726 | like($_, qr/b(?{$foo = $_; $bar = pos})c/, $message); |
727 | is($foo, $str, $message); | |
728 | is($bar, 2, $message); | |
84281c31 A |
729 | |
730 | undef $foo; | |
731 | undef $bar; | |
4f890a30 NC |
732 | ok(/b(?{$foo = $_; $bar = pos})c/g, $message); |
733 | is($foo, $str, $message); | |
734 | is($bar, 2, $message); | |
735 | is(pos, 3, $message); | |
84281c31 A |
736 | |
737 | undef $foo; | |
738 | undef $bar; | |
739 | pos = undef; | |
740 | 1 while /b(?{$foo = $_; $bar = pos})c/g; | |
4f890a30 NC |
741 | is($foo, $str, $message); |
742 | is($bar, 2, $message); | |
743 | is(pos, undef, $message); | |
84281c31 A |
744 | |
745 | undef $foo; | |
746 | undef $bar; | |
747 | $_ = 'abcde|abcde'; | |
4f890a30 NC |
748 | ok(s/b(?{$foo = $_; $bar = pos})c/x/g, $message); |
749 | is($foo, 'abcde|abcde', $message); | |
750 | is($bar, 8, $message); | |
751 | is($_, 'axde|axde', $message); | |
84281c31 A |
752 | |
753 | # List context: | |
754 | $_ = 'abcde|abcde'; | |
755 | our @res; | |
756 | () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; | |
757 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
4f890a30 | 758 | is("@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'", $message); |
84281c31 A |
759 | |
760 | @res = (); | |
761 | () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; | |
762 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
4f890a30 | 763 | is("@res", "'' 'ab' 'cde|abcde' " . |
84281c31 A |
764 | "'' 'abc' 'de|abcde' " . |
765 | "'abcd' 'e|' 'abcde' " . | |
766 | "'abcde|' 'ab' 'cde' " . | |
4f890a30 | 767 | "'abcde|' 'abc' 'de'", $message); |
84281c31 | 768 | } |
f33976b4 | 769 | |
84281c31 | 770 | { |
4f890a30 | 771 | my $message = '\G anchor checks'; |
84281c31 A |
772 | my $foo = 'aabbccddeeffgg'; |
773 | pos ($foo) = 1; | |
774 | { | |
04934b6d | 775 | local $::TODO = $::running_as_thread; |
84281c31 | 776 | no warnings 'uninitialized'; |
4f890a30 NC |
777 | ok($foo =~ /.\G(..)/g, $message); |
778 | is($1, 'ab', $message); | |
cce850e4 | 779 | |
84281c31 | 780 | pos ($foo) += 1; |
4f890a30 NC |
781 | ok($foo =~ /.\G(..)/g, $message); |
782 | is($1, 'cc', $message); | |
cce850e4 | 783 | |
84281c31 | 784 | pos ($foo) += 1; |
4f890a30 NC |
785 | ok($foo =~ /.\G(..)/g, $message); |
786 | is($1, 'de', $message); | |
cce850e4 | 787 | |
4f890a30 | 788 | ok($foo =~ /\Gef/g, $message); |
84281c31 | 789 | } |
cce850e4 | 790 | |
84281c31 | 791 | undef pos $foo; |
4f890a30 NC |
792 | ok($foo =~ /\G(..)/g, $message); |
793 | is($1, 'aa', $message); | |
cce850e4 | 794 | |
4f890a30 NC |
795 | ok($foo =~ /\G(..)/g, $message); |
796 | is($1, 'bb', $message); | |
cce850e4 | 797 | |
84281c31 | 798 | pos ($foo) = 5; |
4f890a30 NC |
799 | ok($foo =~ /\G(..)/g, $message); |
800 | is($1, 'cd', $message); | |
84281c31 | 801 | } |
cce850e4 | 802 | |
84281c31 A |
803 | { |
804 | $_ = '123x123'; | |
805 | my @res = /(\d*|x)/g; | |
806 | local $" = '|'; | |
de26e0cc | 807 | is("@res", "123||x|123|", "0 match in alternation"); |
84281c31 | 808 | } |
cce850e4 | 809 | |
84281c31 | 810 | { |
4f890a30 | 811 | my $message = "Match against temporaries (created via pp_helem())" . |
84281c31 | 812 | " is safe"; |
4f890a30 NC |
813 | ok({foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g, $message); |
814 | is($1, "bar", $message); | |
84281c31 | 815 | } |
75685a94 | 816 | |
84281c31 | 817 | { |
4f890a30 | 818 | my $message = 'package $i inside (?{ }), ' . |
84281c31 A |
819 | 'saved substrings and changing $_'; |
820 | our @a = qw [foo bar]; | |
821 | our @b = (); | |
822 | s/(\w)(?{push @b, $1})/,$1,/g for @a; | |
4f890a30 NC |
823 | is("@b", "f o o b a r", $message); |
824 | is("@a", ",f,,o,,o, ,b,,a,,r,", $message); | |
84281c31 | 825 | |
4f890a30 | 826 | $message = 'lexical $i inside (?{ }), ' . |
84281c31 A |
827 | 'saved substrings and changing $_'; |
828 | no warnings 'closure'; | |
829 | my @c = qw [foo bar]; | |
830 | my @d = (); | |
831 | s/(\w)(?{push @d, $1})/,$1,/g for @c; | |
4f890a30 NC |
832 | is("@d", "f o o b a r", $message); |
833 | is("@c", ",f,,o,,o, ,b,,a,,r,", $message); | |
d9f424b2 JH |
834 | } |
835 | ||
84281c31 | 836 | { |
4f890a30 | 837 | my $message = 'Brackets'; |
84281c31 A |
838 | our $brackets; |
839 | $brackets = qr { | |
840 | { (?> [^{}]+ | (??{ $brackets }) )* } | |
841 | }x; | |
842 | ||
4f890a30 NC |
843 | ok("{{}" =~ $brackets, $message); |
844 | is($&, "{}", $message); | |
845 | ok("something { long { and } hairy" =~ $brackets, $message); | |
846 | is($&, "{ and }", $message); | |
847 | ok("something { long { and } hairy" =~ m/((??{ $brackets }))/, $message); | |
848 | is($&, "{ and }", $message); | |
84281c31 | 849 | } |
a4c04bdc | 850 | |
84281c31 A |
851 | { |
852 | $_ = "a-a\nxbb"; | |
853 | pos = 1; | |
b33825c4 | 854 | ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'); |
84281c31 | 855 | } |
a4c04bdc | 856 | |
84281c31 | 857 | { |
4f890a30 | 858 | my $message = '\G anchor checks'; |
84281c31 A |
859 | my $text = "aaXbXcc"; |
860 | pos ($text) = 0; | |
4f890a30 | 861 | ok($text !~ /\GXb*X/g, $message); |
84281c31 | 862 | } |
a4c04bdc | 863 | |
84281c31 A |
864 | { |
865 | $_ = "xA\n" x 500; | |
b33825c4 | 866 | unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'); |
a4c04bdc | 867 | |
84281c31 A |
868 | my $text = "abc dbf"; |
869 | my @res = ($text =~ /.*?(b).*?\b/g); | |
de26e0cc | 870 | is("@res", "b b", '\b is not special'); |
987aaf07 | 871 | } |
a4c04bdc | 872 | |
84281c31 | 873 | { |
4f890a30 | 874 | my $message = '\S, [\S], \s, [\s]'; |
84281c31 | 875 | my @a = map chr, 0 .. 255; |
9d45b377 YO |
876 | my @b = grep m/\S/, @a; |
877 | my @c = grep m/[^\s]/, @a; | |
4f890a30 | 878 | is("@b", "@c", $message); |
84281c31 A |
879 | |
880 | @b = grep /\S/, @a; | |
881 | @c = grep /[\S]/, @a; | |
4f890a30 | 882 | is("@b", "@c", $message); |
84281c31 A |
883 | |
884 | @b = grep /\s/, @a; | |
885 | @c = grep /[^\S]/, @a; | |
4f890a30 | 886 | is("@b", "@c", $message); |
84281c31 A |
887 | |
888 | @b = grep /\s/, @a; | |
889 | @c = grep /[\s]/, @a; | |
4f890a30 | 890 | is("@b", "@c", $message); |
84281c31 A |
891 | } |
892 | { | |
4f890a30 | 893 | my $message = '\D, [\D], \d, [\d]'; |
84281c31 A |
894 | my @a = map chr, 0 .. 255; |
895 | my @b = grep /\D/, @a; | |
896 | my @c = grep /[^\d]/, @a; | |
4f890a30 | 897 | is("@b", "@c", $message); |
84281c31 A |
898 | |
899 | @b = grep /\D/, @a; | |
900 | @c = grep /[\D]/, @a; | |
4f890a30 | 901 | is("@b", "@c", $message); |
84281c31 A |
902 | |
903 | @b = grep /\d/, @a; | |
904 | @c = grep /[^\D]/, @a; | |
4f890a30 | 905 | is("@b", "@c", $message); |
84281c31 A |
906 | |
907 | @b = grep /\d/, @a; | |
908 | @c = grep /[\d]/, @a; | |
4f890a30 | 909 | is("@b", "@c", $message); |
84281c31 A |
910 | } |
911 | { | |
4f890a30 | 912 | my $message = '\W, [\W], \w, [\w]'; |
84281c31 A |
913 | my @a = map chr, 0 .. 255; |
914 | my @b = grep /\W/, @a; | |
915 | my @c = grep /[^\w]/, @a; | |
4f890a30 | 916 | is("@b", "@c", $message); |
84281c31 A |
917 | |
918 | @b = grep /\W/, @a; | |
919 | @c = grep /[\W]/, @a; | |
4f890a30 | 920 | is("@b", "@c", $message); |
84281c31 A |
921 | |
922 | @b = grep /\w/, @a; | |
923 | @c = grep /[^\W]/, @a; | |
4f890a30 | 924 | is("@b", "@c", $message); |
84281c31 A |
925 | |
926 | @b = grep /\w/, @a; | |
927 | @c = grep /[\w]/, @a; | |
4f890a30 | 928 | is("@b", "@c", $message); |
84281c31 | 929 | } |
a4c04bdc | 930 | |
84281c31 A |
931 | { |
932 | # see if backtracking optimization works correctly | |
4f890a30 NC |
933 | my $message = 'Backtrack optimization'; |
934 | like("\n\n", qr/\n $ \n/x, $message); | |
935 | like("\n\n", qr/\n* $ \n/x, $message); | |
936 | like("\n\n", qr/\n+ $ \n/x, $message); | |
937 | like("\n\n", qr/\n? $ \n/x, $message); | |
938 | like("\n\n", qr/\n*? $ \n/x, $message); | |
939 | like("\n\n", qr/\n+? $ \n/x, $message); | |
940 | like("\n\n", qr/\n?? $ \n/x, $message); | |
941 | unlike("\n\n", qr/\n*+ $ \n/x, $message); | |
942 | unlike("\n\n", qr/\n++ $ \n/x, $message); | |
943 | like("\n\n", qr/\n?+ $ \n/x, $message); | |
84281c31 | 944 | } |
a4c04bdc | 945 | |
84281c31 A |
946 | { |
947 | package S; | |
948 | use overload '""' => sub {'Object S'}; | |
949 | sub new {bless []} | |
0f289c68 | 950 | |
4f890a30 | 951 | my $message = "Ref stringification"; |
5895685f NC |
952 | ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification") or diag($message); |
953 | ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification") or diag($message); | |
954 | ::ok([] =~ /^ARRAY/, "Array ref stringification") or diag($message); | |
955 | ::ok({} =~ /^HASH/, "Hash ref stringification") or diag($message); | |
956 | ::ok('S' -> new =~ /^Object S/, "Object stringification") or diag($message); | |
84281c31 | 957 | } |
a4c04bdc | 958 | |
84281c31 | 959 | { |
4f890a30 NC |
960 | my $message = "Test result of match used as match"; |
961 | ok('a1b' =~ ('xyz' =~ /y/), $message); | |
962 | is($`, 'a', $message); | |
963 | ok('a1b' =~ ('xyz' =~ /t/), $message); | |
964 | is($`, 'a', $message); | |
84281c31 | 965 | } |
a4c04bdc | 966 | |
84281c31 | 967 | { |
d728c370 | 968 | my $message = '"1" is not \s'; |
c11a8df3 NC |
969 | warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)}, |
970 | undef, "$message (did not warn)"); | |
84281c31 | 971 | } |
a4c04bdc | 972 | |
84281c31 | 973 | { |
4f890a30 | 974 | my $message = '\s, [[:space:]] and [[:blank:]]'; |
84281c31 A |
975 | my %space = (spc => " ", |
976 | tab => "\t", | |
977 | cr => "\r", | |
978 | lf => "\n", | |
979 | ff => "\f", | |
980 | # There's no \v but the vertical tabulator seems miraculously | |
981 | # be 11 both in ASCII and EBCDIC. | |
982 | vt => chr(11), | |
983 | false => "space"); | |
984 | ||
985 | my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; | |
986 | my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; | |
987 | my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; | |
988 | ||
4f890a30 NC |
989 | is("@space0", "cr ff lf spc tab", $message); |
990 | is("@space1", "cr ff lf spc tab vt", $message); | |
991 | is("@space2", "spc tab", $message); | |
84281c31 | 992 | } |
a4c04bdc | 993 | |
ff3f963a | 994 | { |
c9415951 | 995 | my $n= 50; |
93f09d7b | 996 | # this must be a high number and go from 0 to N, as the bug we are looking for doesn't |
c9415951 YO |
997 | # seem to be predictable. Slight changes to the test make it fail earlier or later. |
998 | foreach my $i (0 .. $n) | |
999 | { | |
1000 | my $str= "\n" x $i; | |
93f09d7b | 1001 | ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i"; |
c9415951 YO |
1002 | } |
1003 | } | |
92f3d482 YO |
1004 | { |
1005 | # we are actually testing that we dont die when executing these patterns | |
1006 | use utf8; | |
1007 | my $e = "Böck"; | |
1008 | ok(utf8::is_utf8($e),"got a unicode string - rt75680"); | |
1009 | ||
1010 | ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680"); | |
1011 | ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680"); | |
1012 | ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680"); | |
1013 | ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680"); | |
1014 | } | |
1015 | { | |
1016 | # we are actually testing that we dont die when executing these patterns | |
1017 | my $e = "B\x{f6}ck"; | |
1018 | ok(!utf8::is_utf8($e), "got a latin string - rt75680"); | |
1019 | ||
1020 | ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680"); | |
1021 | ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680"); | |
1022 | ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680"); | |
1023 | ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680"); | |
1024 | } | |
c920e018 A |
1025 | |
1026 | { | |
1027 | # | |
1028 | # Tests for bug 77414. | |
1029 | # | |
1030 | ||
4f890a30 | 1031 | my $message = '\p property after empty * match'; |
c920e018 | 1032 | { |
4f890a30 NC |
1033 | like("1", qr/\s*\pN/, $message); |
1034 | like("-", qr/\s*\p{Dash}/, $message); | |
1035 | like(" ", qr/\w*\p{Blank}/, $message); | |
c920e018 A |
1036 | } |
1037 | ||
4f890a30 NC |
1038 | like("1", qr/\s*\pN+/, $message); |
1039 | like("-", qr/\s*\p{Dash}{1}/, $message); | |
1040 | like(" ", qr/\w*\p{Blank}{1,4}/, $message); | |
c920e018 A |
1041 | |
1042 | } | |
1043 | ||
7c17ea2f KW |
1044 | SKIP: { # Some constructs with Latin1 characters cause a utf8 string not |
1045 | # to match itself in non-utf8 | |
ef237063 | 1046 | if ($::IS_EBCDIC) { |
7c17ea2f KW |
1047 | skip "Needs to be customized to run on EBCDIC", 6; |
1048 | } | |
634c83a2 KW |
1049 | my $c = "\xc0"; |
1050 | my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/; | |
1051 | utf8::upgrade($utf8_pattern); | |
1052 | ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; | |
d4e0b827 | 1053 | ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8"; |
634c83a2 | 1054 | ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; |
d4e0b827 | 1055 | ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not"; |
634c83a2 KW |
1056 | utf8::upgrade($c); |
1057 | ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; | |
d4e0b827 | 1058 | ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not"; |
634c83a2 | 1059 | ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; |
d4e0b827 | 1060 | ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8"; |
634c83a2 KW |
1061 | } |
1062 | ||
8cc86590 | 1063 | SKIP: { # Make sure can override the formatting |
ef237063 | 1064 | if ($::IS_EBCDIC) { |
8cc86590 KW |
1065 | skip "Needs to be customized to run on EBCDIC", 2; |
1066 | } | |
1067 | use feature 'unicode_strings'; | |
1068 | ok "\xc0" =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/'; | |
1069 | ok "\xc0" !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/'; | |
1070 | } | |
1071 | ||
704f71be B |
1072 | { |
1073 | # Test that a regex followed by an operator and/or a statement modifier work | |
1074 | # These tests use string-eval so that it reports a clean error when it fails | |
1075 | # (without the string eval the test script might be unparseable) | |
1076 | ||
1077 | # Note: these test check the behaviour that currently is valid syntax | |
93f09d7b | 1078 | # If a new regex modifier is added and a test fails then there is a backwards-compatibility issue |
704f71be B |
1079 | # Note-2: a new deprecate warning was added for this with commit e6897b1a5db0410e387ccbf677e89fc4a1d8c97a |
1080 | # which indicate that this syntax will be removed in 5.16. | |
1081 | # When this happens the tests can be removed | |
1082 | ||
e16f9c76 NC |
1083 | foreach (['my $r = "a" =~ m/a/lt 2', 'm', 'lt'], |
1084 | ['my $r = "a" =~ m/a/le 1', 'm', 'le'], | |
1085 | ['my $r = "a" =~ m/a/eq 1', 'm', 'eq'], | |
1086 | ['my $r = "a" =~ m/a/ne 0', 'm', 'ne'], | |
1087 | ['my $r = "a" =~ m/a/and 1', 'm', 'and'], | |
1088 | ['my $r = "a" =~ m/a/unless 0', 'm', 'unless'], | |
1089 | ['my $c = 1; my $r; $r = "a" =~ m/a/while $c--', 'm', 'while'], | |
1090 | ['my $c = 0; my $r; $r = "a" =~ m/a/until $c++', 'm', 'until'], | |
1091 | ['my $r; $r = "a" =~ m/a/for 1', 'm', 'for'], | |
1092 | ['my $r; $r = "a" =~ m/a/foreach 1', 'm', 'foreach'], | |
1093 | ||
1094 | ['my $t = "a"; my $r = $t =~ s/a//lt 2', 's', 'lt'], | |
1095 | ['my $t = "a"; my $r = $t =~ s/a//le 1', 's', 'le'], | |
1096 | ['my $t = "a"; my $r = $t =~ s/a//ne 0', 's', 'ne'], | |
1097 | ['my $t = "a"; my $r = $t =~ s/a//and 1', 's', 'and'], | |
1098 | ['my $t = "a"; my $r = $t =~ s/a//unless 0', 's', 'unless'], | |
1099 | ||
1100 | ['my $c = 1; my $r; my $t = "a"; $r = $t =~ s/a//while $c--', 's', 'while'], | |
1101 | ['my $c = 0; my $r; my $t = "a"; $r = $t =~ s/a//until $c++', 's', 'until'], | |
1102 | ['my $r; my $t = "a"; $r = $t =~ s/a//for 1', 's', 'for'], | |
1103 | ['my $r; my $t = "a"; $r = $t =~ s/a//for 1', 's', 'foreach'], | |
1104 | ) { | |
1105 | my $message = sprintf 'regex (%s) followed by $_->[2]', | |
1106 | $_->[1] eq 'm' ? 'm//' : 's///'; | |
1107 | my $code = "$_->[0]; 'eval_ok ' . \$r"; | |
1108 | my $result = do { | |
1109 | no warnings 'syntax'; | |
1110 | eval $code; | |
1111 | }; | |
1112 | is($@, '', $message); | |
1113 | is($result, 'eval_ok 1', $message); | |
1114 | } | |
704f71be B |
1115 | } |
1116 | ||
5b6010b3 YO |
1117 | { |
1118 | my $str= "\x{100}"; | |
1119 | chop $str; | |
1120 | my $qr= qr/$str/; | |
de26e0cc | 1121 | is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212"); |
5b6010b3 YO |
1122 | $str= ""; |
1123 | $qr= qr/$str/; | |
de26e0cc | 1124 | is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212"); |
5b6010b3 YO |
1125 | |
1126 | } | |
1127 | ||
72aa120d | 1128 | { |
04934b6d | 1129 | local $::TODO = "[perl #38133]"; |
72aa120d KW |
1130 | |
1131 | "A" =~ /(((?:A))?)+/; | |
1132 | my $first = $2; | |
1133 | ||
1134 | "A" =~ /(((A))?)+/; | |
1135 | my $second = $2; | |
1136 | ||
de26e0cc | 1137 | is($first, $second); |
72aa120d KW |
1138 | } |
1139 | ||
99ca48e1 DM |
1140 | { |
1141 | # RT #3516: \G in a m//g expression causes problems | |
1142 | my $count = 0; | |
1143 | while ("abc" =~ m/(\G[ac])?/g) { | |
1144 | last if $count++ > 10; | |
1145 | } | |
1146 | ok($count < 10, 'RT #3516 A'); | |
1147 | ||
1148 | $count = 0; | |
1149 | while ("abc" =~ m/(\G|.)[ac]/g) { | |
1150 | last if $count++ > 10; | |
1151 | } | |
1152 | ok($count < 10, 'RT #3516 B'); | |
1153 | ||
1154 | $count = 0; | |
1155 | while ("abc" =~ m/(\G?[ac])?/g) { | |
1156 | last if $count++ > 10; | |
1157 | } | |
1158 | ok($count < 10, 'RT #3516 C'); | |
1159 | } | |
d774cd11 YO |
1160 | { |
1161 | # RT #84294: Is this a bug in the simple Perl regex? | |
1162 | # : Nested buffers and (?{...}) dont play nicely on partial matches | |
1163 | our @got= (); | |
1164 | ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match"); | |
d774cd11 YO |
1165 | my $want= "'ab', 'a', 'b'"; |
1166 | my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got); | |
1167 | is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); | |
1168 | } | |
1169 | ||
0bda3001 KW |
1170 | { |
1171 | # Suppress warnings, as the non-unicode one comes out even if turn off | |
1172 | # warnings here (because the execution is done in another scope). | |
1173 | local $SIG{__WARN__} = sub {}; | |
1174 | my $str = "\x{110000}"; | |
1175 | ||
1176 | # No non-unicode code points match any Unicode property, even inverse | |
1177 | # ones | |
1178 | unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{}"); | |
1179 | unlike($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode doesn't match \\p{}"); | |
1180 | like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{}"); | |
1181 | like($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{}"); | |
1182 | } | |
1183 | ||
84281c31 A |
1184 | } # End of sub run_tests |
1185 | ||
1186 | 1; |