Commit | Line | Data |
---|---|---|
e425a60b YO |
1 | #!./perl |
2 | # | |
3 | # This is a home for regular expression tests that don't fit into | |
4 | # the format supported by re/regexp.t. If you want to add a test | |
5 | # that does fit that format, add it to re/re_tests, not here. | |
6 | ||
e425a60b YO |
7 | sub run_tests; |
8 | ||
9 | $| = 1; | |
10 | ||
e425a60b YO |
11 | BEGIN { |
12 | chdir 't' if -d 't'; | |
6f4e0180 | 13 | require './test.pl'; |
624c42e2 | 14 | set_up_inc( '../lib', '.' ); |
2fcf74c4 | 15 | skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-"); |
e425a60b | 16 | } |
e425a60b | 17 | |
0b1b7115 JH |
18 | use strict; |
19 | use warnings; | |
20 | use 5.010; | |
21 | use Config; | |
e425a60b | 22 | |
c224bbd5 | 23 | plan tests => 2514; # Update this when adding/deleting tests. |
e425a60b | 24 | |
9d45b377 | 25 | run_tests() unless caller; |
e425a60b YO |
26 | |
27 | # | |
28 | # Tests start here. | |
29 | # | |
30 | sub run_tests { | |
31 | ||
a86da477 | 32 | like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/, |
ee95e30c | 33 | "Match UTF-8 char in presence of (??{ }); Bug 20000731.001 (#3600)"); |
a86da477 | 34 | |
e425a60b | 35 | { |
e425a60b | 36 | no warnings 'uninitialized'; |
ee95e30c | 37 | ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005 (#4492)"); |
e425a60b YO |
38 | } |
39 | ||
e425a60b | 40 | { |
ee95e30c | 41 | my $message = 'bug id 20001008.001 (#4407)'; |
e425a60b YO |
42 | |
43 | my @x = ("stra\337e 138", "stra\337e 138"); | |
44 | for (@x) { | |
51b6c6b3 NC |
45 | ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message); |
46 | ok(my ($latin) = /^(.+)(?:\s+\d)/, $message); | |
47 | is($latin, "stra\337e", $message); | |
48 | ok($latin =~ s/stra\337e/straße/, $message); | |
e425a60b YO |
49 | # |
50 | # Previous code follows, but outcommented - there were no tests. | |
51 | # | |
52 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
53 | # use utf8; # needed for the raw UTF-8 | |
54 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a | |
55 | } | |
56 | } | |
57 | ||
e425a60b | 58 | { |
e425a60b | 59 | # Fist half of the bug. |
ee95e30c | 60 | my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003 (#4536)'; |
e425a60b | 61 | my $X = chr (1448); |
51b6c6b3 NC |
62 | ok(my ($Y) = $X =~ /(.*)/, $message); |
63 | is($Y, v1448, $message); | |
64 | is(length $Y, 1, $message); | |
e425a60b YO |
65 | |
66 | # Second half of the bug. | |
ee95e30c | 67 | $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003 (#4536)'; |
e425a60b YO |
68 | $X = ''; |
69 | $X =~ s/^/chr(1488)/e; | |
51b6c6b3 NC |
70 | is(length $X, 1, $message); |
71 | is(ord $X, 1488, $message); | |
e425a60b YO |
72 | } |
73 | ||
e425a60b | 74 | { |
ee95e30c | 75 | my $message = 'Repeated s///; Bug 20001108.001 (#4631)'; |
e425a60b YO |
76 | my $X = "Szab\x{f3},Bal\x{e1}zs"; |
77 | my $Y = $X; | |
78 | $Y =~ s/(B)/$1/ for 0 .. 3; | |
51b6c6b3 NC |
79 | is($Y, $X, $message); |
80 | is($X, "Szab\x{f3},Bal\x{e1}zs", $message); | |
e425a60b YO |
81 | } |
82 | ||
e425a60b | 83 | { |
ee95e30c | 84 | my $message = 's/// on UTF-8 string; Bug 20000517.001 (#3253)'; |
e425a60b YO |
85 | my $x = "\x{100}A"; |
86 | $x =~ s/A/B/; | |
51b6c6b3 NC |
87 | is($x, "\x{100}B", $message); |
88 | is(length $x, 2, $message); | |
e425a60b YO |
89 | } |
90 | ||
e425a60b | 91 | { |
e425a60b | 92 | # The original bug report had 'no utf8' here but that was irrelevant. |
a86da477 | 93 | |
ee95e30c | 94 | my $message = "Don't dump core; Bug 20010306.008 (#5982)"; |
e425a60b | 95 | my $a = "a\x{1234}"; |
51b6c6b3 | 96 | like($a, qr/\w/, $message); # used to core dump. |
e425a60b YO |
97 | } |
98 | ||
e425a60b | 99 | { |
ee95e30c | 100 | my $message = '/g in scalar context; Bug 20010410.006 (#6796)'; |
e425a60b YO |
101 | for my $rx ('/(.*?)\{(.*?)\}/csg', |
102 | '/(.*?)\{(.*?)\}/cg', | |
103 | '/(.*?)\{(.*?)\}/sg', | |
104 | '/(.*?)\{(.*?)\}/g', | |
105 | '/(.+?)\{(.+?)\}/csg',) { | |
106 | my $i = 0; | |
107 | my $input = "a{b}c{d}"; | |
108 | eval <<" --"; | |
109 | while (eval \$input =~ $rx) { | |
110 | \$i ++; | |
111 | } | |
112 | -- | |
51b6c6b3 | 113 | is($i, 2, $message); |
e425a60b YO |
114 | } |
115 | } | |
116 | ||
e425a60b | 117 | { |
e425a60b YO |
118 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. |
119 | for ("\n", "\t", "\014", "\r") { | |
ee95e30c | 120 | unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003 (#7131)", ord $_); |
e425a60b YO |
121 | } |
122 | for (" ") { | |
ee95e30c | 123 | like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003 (#7131)"); |
e425a60b YO |
124 | } |
125 | } | |
126 | ||
e425a60b | 127 | { |
ee95e30c | 128 | # [ID 20010814.004 (#7526)] pos() doesn't work when using =~m// in list context |
a86da477 | 129 | |
e425a60b YO |
130 | $_ = "ababacadaea"; |
131 | my $a = join ":", /b./gc; | |
132 | my $b = join ":", /a./gc; | |
133 | my $c = pos; | |
ee95e30c | 134 | is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004 (#7526)"); |
e425a60b YO |
135 | } |
136 | ||
e425a60b | 137 | { |
ee95e30c | 138 | # [ID 20010407.006 (#6767)] matching utf8 return values from |
e425a60b | 139 | # functions does not work |
a86da477 | 140 | |
ee95e30c | 141 | my $message = 'UTF-8 return values from functions; Bug 20010407.006 (#6767)'; |
e425a60b YO |
142 | package ID_20010407_006; |
143 | sub x {"a\x{1234}"} | |
144 | my $x = x; | |
145 | my $y; | |
51b6c6b3 | 146 | ::ok($x =~ /(..)/, $message); |
e425a60b | 147 | $y = $1; |
51b6c6b3 NC |
148 | ::ok(length ($y) == 2 && $y eq $x, $message); |
149 | ::ok(x =~ /(..)/, $message); | |
e425a60b | 150 | $y = $1; |
51b6c6b3 | 151 | ::ok(length ($y) == 2 && $y eq $x, $message); |
e425a60b YO |
152 | } |
153 | ||
e425a60b YO |
154 | { |
155 | # High bit bug -- japhy | |
156 | my $x = "ab\200d"; | |
157 | ok $x =~ /.*?\200/, "High bit fine"; | |
158 | } | |
159 | ||
e425a60b | 160 | { |
51b6c6b3 | 161 | my $message = 'UTF-8 hash keys and /$/'; |
e425a60b YO |
162 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters |
163 | # /2002-01/msg01327.html | |
164 | ||
165 | my $u = "a\x{100}"; | |
166 | my $v = substr ($u, 0, 1); | |
167 | my $w = substr ($u, 1, 1); | |
168 | my %u = ($u => $u, $v => $v, $w => $w); | |
169 | for (keys %u) { | |
170 | my $m1 = /^\w*$/ ? 1 : 0; | |
171 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; | |
51b6c6b3 | 172 | is($m1, $m2, $message); |
e425a60b YO |
173 | } |
174 | } | |
175 | ||
e425a60b | 176 | { |
ee95e30c | 177 | my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005 (#8335)"; |
e425a60b YO |
178 | |
179 | for my $char ("a", "\x{df}", "\x{100}") { | |
180 | my $x = "$char b $char"; | |
181 | $x =~ s{($char)}{ | |
182 | "c" =~ /c/; | |
183 | "x"; | |
184 | }ge; | |
51b6c6b3 | 185 | is(substr ($x, 0, 1), substr ($x, -1, 1), $message); |
e425a60b YO |
186 | } |
187 | } | |
188 | ||
e425a60b | 189 | { |
ee95e30c | 190 | my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005 (#8935)"; |
e425a60b YO |
191 | |
192 | # Requires reuse of last successful pattern. | |
193 | my $num = 123; | |
194 | $num =~ /\d/; | |
195 | for (0 .. 1) { | |
725a61d7 | 196 | my $match = m?? + 0; |
5895685f NC |
197 | ok($match != $_, $message) |
198 | or diag(sprintf "'match one' %s on %s iteration" => | |
199 | $match ? 'succeeded' : 'failed', | |
200 | $_ ? 'second' : 'first'); | |
e425a60b YO |
201 | } |
202 | $num =~ /(\d)/; | |
203 | my $result = join "" => $num =~ //g; | |
51b6c6b3 | 204 | is($result, $num, $message); |
e425a60b YO |
205 | } |
206 | ||
e425a60b | 207 | { |
ee95e30c | 208 | my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002 (#10013)'; |
e425a60b YO |
209 | for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { |
210 | my ($type, $char) = @$_; | |
211 | for my $len (32000, 32768, 33000) { | |
212 | my $s = $char . "f" x $len; | |
213 | my $r = $s =~ /$char([f]*)/gc; | |
5895685f NC |
214 | ok($r, $message) or diag("<$type x $len>"); |
215 | ok(!$r || pos ($s) == $len + 1, $message) | |
216 | or diag("<$type x $len>; pos = @{[pos $s]}"); | |
e425a60b YO |
217 | } |
218 | } | |
219 | } | |
220 | ||
e425a60b | 221 | { |
e425a60b YO |
222 | my $s = "\x{100}" x 5; |
223 | my $ok = $s =~ /(\x{100}{4})/; | |
224 | my ($ord, $len) = (ord $1, length $1); | |
1c6f1211 | 225 | ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]"; |
e425a60b YO |
226 | } |
227 | ||
e425a60b | 228 | { |
a86da477 | 229 | my $message = 'UTF-8 matching; Bug 15397'; |
51b6c6b3 NC |
230 | like("\x{100}", qr/\x{100}/, $message); |
231 | like("\x{100}", qr/(\x{100})/, $message); | |
232 | like("\x{100}", qr/(\x{100}){1}/, $message); | |
233 | like("\x{100}\x{100}", qr/(\x{100}){2}/, $message); | |
234 | like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message); | |
e425a60b YO |
235 | } |
236 | ||
e425a60b | 237 | { |
a86da477 | 238 | my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471'; |
e425a60b | 239 | local $_ = 'CD'; |
51b6c6b3 NC |
240 | ok(/(AB)*?CD/ && !defined $1, $message); |
241 | ok(/(AB)*CD/ && !defined $1, $message); | |
e425a60b YO |
242 | } |
243 | ||
e425a60b | 244 | { |
a86da477 | 245 | my $message = "Caching shouldn't prevent match; Bug 3547"; |
e425a60b | 246 | my $pattern = "^(b+?|a){1,2}c"; |
51b6c6b3 NC |
247 | ok("bac" =~ /$pattern/ && $1 eq 'a', $message); |
248 | ok("bbac" =~ /$pattern/ && $1 eq 'a', $message); | |
249 | ok("bbbac" =~ /$pattern/ && $1 eq 'a', $message); | |
250 | ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message); | |
e425a60b YO |
251 | } |
252 | ||
e425a60b | 253 | { |
a86da477 NC |
254 | ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232'); |
255 | is($1, "\x{100}", '$1 is UTF-8; Bug 18232'); | |
e425a60b | 256 | { 'a' =~ /./; } |
a86da477 NC |
257 | is($1, "\x{100}", '$1 is still UTF-8; Bug 18232'); |
258 | isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232'); | |
e425a60b YO |
259 | } |
260 | ||
e425a60b | 261 | { |
a86da477 | 262 | my $message = "Optimizer doesn't prematurely reject match; Bug 19767"; |
e425a60b YO |
263 | use utf8; |
264 | ||
265 | my $attr = 'Name-1'; | |
266 | my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; | |
267 | my $NormalWord = qr /${NormalChar}+?/; | |
268 | my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; | |
269 | ||
270 | $attr =~ /^$/; | |
51b6c6b3 | 271 | like($attr, $PredNameHyphen, $message); # Original test. |
e425a60b YO |
272 | |
273 | "a" =~ m/[b]/; | |
51b6c6b3 | 274 | like("0", qr/\p{N}+\z/, $message); # Variant. |
e425a60b YO |
275 | } |
276 | ||
e425a60b | 277 | { |
a86da477 | 278 | my $message = "(??{ }) doesn't return stale values; Bug 20683"; |
e425a60b YO |
279 | our $p = 1; |
280 | foreach (1, 2, 3, 4) { | |
281 | $p ++ if /(??{ $p })/ | |
282 | } | |
51b6c6b3 | 283 | is($p, 5, $message); |
e425a60b YO |
284 | |
285 | { | |
286 | package P; | |
287 | $a = 1; | |
288 | sub TIESCALAR {bless []} | |
289 | sub FETCH {$a ++} | |
290 | } | |
291 | tie $p, "P"; | |
292 | foreach (1, 2, 3, 4) { | |
293 | /(??{ $p })/ | |
294 | } | |
51b6c6b3 | 295 | is($p, 5, $message); |
e425a60b YO |
296 | } |
297 | ||
e425a60b YO |
298 | { |
299 | # Subject: Odd regexp behavior | |
300 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> | |
301 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 | |
302 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> | |
303 | # To: perl-unicode@perl.org | |
304 | ||
51b6c6b3 | 305 | my $message = 'Markus Kuhn 2003-02-26'; |
e425a60b YO |
306 | |
307 | my $x = "\x{2019}\nk"; | |
51b6c6b3 NC |
308 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
309 | is($x, "\x{2019} k", $message); | |
e425a60b YO |
310 | |
311 | $x = "b\nk"; | |
51b6c6b3 NC |
312 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
313 | is($x, "b k", $message); | |
e425a60b | 314 | |
51b6c6b3 | 315 | like("\x{2019}", qr/\S/, $message); |
e425a60b YO |
316 | } |
317 | ||
e425a60b | 318 | { |
a86da477 | 319 | my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411"; |
e425a60b | 320 | our $i; |
51b6c6b3 | 321 | is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message); |
e425a60b YO |
322 | no warnings 'syntax'; |
323 | @_ = split /(?{'WOW'})/, 'abc'; | |
324 | local $" = "|"; | |
51b6c6b3 | 325 | is("@_", "a|b|c", $message); |
e425a60b YO |
326 | } |
327 | ||
f7a1f402 | 328 | is(join('-', split /(?{ split "" })/, "abc"), 'a-b-c', 'nested split'); |
e425a60b | 329 | |
e425a60b | 330 | { |
e425a60b YO |
331 | $_ = "code: 'x' { '...' }\n"; study; |
332 | my @x; push @x, $& while m/'[^\']*'/gx; | |
333 | local $" = ":"; | |
a86da477 | 334 | is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757"); |
e425a60b YO |
335 | } |
336 | ||
e425a60b | 337 | { |
e425a60b | 338 | sub func ($) { |
a86da477 NC |
339 | ok("a\nb" !~ /^b/, "Propagated modifier; $_[0]; Bug 22354"); |
340 | ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354"); | |
e425a60b YO |
341 | } |
342 | func "standalone"; | |
343 | $_ = "x"; s/x/func "in subst"/e; | |
344 | $_ = "x"; s/x/func "in multiline subst"/em; | |
1d0dc949 FC |
345 | $_ = "x"; /x(?{func "in regexp"})/; |
346 | $_ = "x"; /x(?{func "in multiline regexp"})/m; | |
e425a60b YO |
347 | } |
348 | ||
e425a60b | 349 | { |
e425a60b YO |
350 | $_ = "abcdef\n"; |
351 | my @x = m/./g; | |
a86da477 | 352 | is("abcde", $`, 'Global match sets $`; Bug 19049'); |
e425a60b YO |
353 | } |
354 | ||
e425a60b | 355 | { |
e425a60b YO |
356 | # [perl #23769] Unicode regex broken on simple example |
357 | # regrepeat() didn't handle UTF-8 EXACT case right. | |
a86da477 | 358 | |
e425a60b | 359 | my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; |
a86da477 | 360 | my $message = "$Mess; Bug 23769"; |
e425a60b YO |
361 | |
362 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; | |
363 | ||
51b6c6b3 NC |
364 | like($s, qr/\x{a0}/, $message); |
365 | like($s, qr/\x{a0}+/, $message); | |
366 | like($s, qr/\x{a0}\x{a0}/, $message); | |
e425a60b | 367 | |
a86da477 | 368 | $message = "$Mess (easy variant); Bug 23769"; |
51b6c6b3 NC |
369 | ok("aaa\x{100}" =~ /(a+)/, $message); |
370 | is($1, "aaa", $message); | |
e425a60b | 371 | |
a86da477 | 372 | $message = "$Mess (easy invariant); Bug 23769"; |
51b6c6b3 NC |
373 | ok("aaa\x{100} " =~ /(a+?)/, $message); |
374 | is($1, "a", $message); | |
e425a60b | 375 | |
a86da477 | 376 | $message = "$Mess (regrepeat variant); Bug 23769"; |
51b6c6b3 NC |
377 | ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, $message); |
378 | is($1, "\xa0", $message); | |
e425a60b | 379 | |
a86da477 | 380 | $message = "$Mess (regrepeat invariant); Bug 23769"; |
51b6c6b3 NC |
381 | ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message); |
382 | is($1, "\xa0\xa0\xa0", $message); | |
e425a60b | 383 | |
a86da477 | 384 | $message = "$Mess (hard variant); Bug 23769"; |
51b6c6b3 NC |
385 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message); |
386 | is($1, "\xa0\xa1", $message); | |
e425a60b | 387 | |
a86da477 | 388 | $message = "$Mess (hard invariant); Bug 23769"; |
51b6c6b3 NC |
389 | ok("ababab\x{100} " =~ /((?:ab)+)/, $message); |
390 | is($1, 'ababab', $message); | |
e425a60b | 391 | |
51b6c6b3 NC |
392 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message); |
393 | is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message); | |
e425a60b | 394 | |
51b6c6b3 NC |
395 | ok("ababab\x{100} " =~ /((?:ab)+?)/, $message); |
396 | is($1, "ab", $message); | |
e425a60b | 397 | |
a86da477 | 398 | $message = "Don't match first byte of UTF-8 representation; Bug 23769"; |
51b6c6b3 NC |
399 | unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message); |
400 | unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message); | |
401 | unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message); | |
e425a60b YO |
402 | } |
403 | ||
e425a60b | 404 | { |
e425a60b | 405 | # perl panic: pp_match start/end pointers |
a86da477 NC |
406 | |
407 | is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc", | |
408 | 'Captures can move backwards in string; Bug 25269'); | |
e425a60b YO |
409 | } |
410 | ||
e425a60b | 411 | { |
a86da477 NC |
412 | # \cA not recognized in character classes |
413 | like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940'); | |
414 | like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940'); | |
415 | like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940'); | |
416 | like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940'); | |
417 | like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940'); | |
418 | like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940'); | |
419 | like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940'); | |
420 | unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940'); | |
e425a60b YO |
421 | } |
422 | ||
e425a60b | 423 | { |
9d45b377 | 424 | # perl #28532: optional zero-width match at end of string is ignored |
a86da477 NC |
425 | |
426 | ok("abc" =~ /^abc(\z)?/ && defined($1), | |
427 | 'Optional zero-width match at end of string; Bug 28532'); | |
428 | ok("abc" =~ /^abc(\z)??/ && !defined($1), | |
429 | 'Optional zero-width match at end of string; Bug 28532'); | |
e425a60b YO |
430 | } |
431 | ||
e425a60b | 432 | { |
e425a60b YO |
433 | my $utf8 = "\xe9\x{100}"; chop $utf8; |
434 | my $latin1 = "\xe9"; | |
435 | ||
a86da477 NC |
436 | like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207"); |
437 | like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207"); | |
438 | like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207"); | |
439 | like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207"); | |
e425a60b | 440 | |
a86da477 NC |
441 | like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207"); |
442 | like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207"); | |
443 | like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207"); | |
444 | like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207"); | |
e425a60b YO |
445 | } |
446 | ||
e425a60b | 447 | { |
e425a60b YO |
448 | my $s = "abcd"; |
449 | $s =~ /(..)(..)/g; | |
450 | $s = $1; | |
451 | $s = $2; | |
a86da477 NC |
452 | is($2, 'cd', |
453 | "Assigning to original string does not corrupt match vars; Bug 37038"); | |
e425a60b YO |
454 | } |
455 | ||
e425a60b YO |
456 | { |
457 | { | |
458 | package wooosh; | |
459 | sub gloople {"!"} | |
460 | } | |
461 | my $aeek = bless {} => 'wooosh'; | |
2e321296 NC |
462 | is(do {$aeek -> gloople () =~ /(.)/g}, 1, |
463 | "//g match against return value of sub [change e26a497577f3ce7b]"); | |
e425a60b YO |
464 | |
465 | sub gloople {"!"} | |
2e321296 NC |
466 | is(do{gloople () =~ /(.)/g}, 1, |
467 | "change e26a497577f3ce7b didn't affect sub calls for some reason"); | |
e425a60b YO |
468 | } |
469 | ||
e425a60b | 470 | { |
bf8fb5eb FC |
471 | # [perl #78680] |
472 | # See changes 26925-26928, which reverted change 26410 | |
e425a60b YO |
473 | { |
474 | package lv; | |
475 | our $var = "abc"; | |
476 | sub variable : lvalue {$var} | |
477 | } | |
478 | my $o = bless [] => 'lv'; | |
479 | my $f = ""; | |
480 | my $r = eval { | |
481 | for (1 .. 2) { | |
482 | $f .= $1 if $o -> variable =~ /(.)/g; | |
483 | } | |
484 | 1; | |
485 | }; | |
486 | if ($r) { | |
de26e0cc | 487 | is($f, "ab", "pos() retained between calls"); |
e425a60b YO |
488 | } |
489 | else { | |
e425a60b YO |
490 | ok 0, "Code failed: $@"; |
491 | } | |
492 | ||
493 | our $var = "abc"; | |
494 | sub variable : lvalue {$var} | |
495 | my $g = ""; | |
496 | my $s = eval { | |
497 | for (1 .. 2) { | |
498 | $g .= $1 if variable =~ /(.)/g; | |
499 | } | |
500 | 1; | |
501 | }; | |
502 | if ($s) { | |
de26e0cc | 503 | is($g, "ab", "pos() retained between calls"); |
e425a60b YO |
504 | } |
505 | else { | |
e425a60b YO |
506 | ok 0, "Code failed: $@"; |
507 | } | |
508 | } | |
509 | ||
e425a60b YO |
510 | SKIP: |
511 | { | |
e1a1ba14 | 512 | skip "In EBCDIC and unclear what would trigger this bug there" if $::IS_EBCDIC; |
76024109 KW |
513 | fresh_perl_like( |
514 | 'no warnings "utf8"; | |
515 | $_ = pack "U0C2", 0xa2, 0xf8; # Ill-formed UTF-8 | |
516 | my $ret = 0; | |
517 | do {!($ret = s/[a\0]+//g)}', | |
518 | qr/Malformed UTF-8/, | |
519 | {}, "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836"); | |
e425a60b YO |
520 | } |
521 | ||
e425a60b YO |
522 | { |
523 | # chr(65535) should be allowed in regexes | |
a86da477 | 524 | |
e425a60b YO |
525 | no warnings 'utf8'; # To allow non-characters |
526 | my ($c, $r, $s); | |
527 | ||
528 | $c = chr 0xffff; | |
529 | $c =~ s/$c//g; | |
a86da477 | 530 | is($c, "", "U+FFFF, parsed as atom; Bug 38293"); |
e425a60b YO |
531 | |
532 | $c = chr 0xffff; | |
533 | $r = "\\$c"; | |
534 | $c =~ s/$r//g; | |
a86da477 | 535 | is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293"); |
e425a60b YO |
536 | |
537 | $c = chr 0xffff; | |
538 | $c =~ s/[$c]//g; | |
a86da477 | 539 | is($c, "", "U+FFFF, parsed in class; Bug 38293"); |
e425a60b YO |
540 | |
541 | $c = chr 0xffff; | |
542 | $r = "[\\$c]"; | |
543 | $c =~ s/$r//g; | |
a86da477 | 544 | is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293"); |
e425a60b YO |
545 | |
546 | $s = "A\x{ffff}B"; | |
547 | $s =~ s/\x{ffff}//i; | |
a86da477 | 548 | is($s, "AB", "U+FFFF, EXACTF; Bug 38293"); |
e425a60b YO |
549 | |
550 | $s = "\x{ffff}A"; | |
551 | $s =~ s/\bA//; | |
a86da477 | 552 | is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293"); |
e425a60b YO |
553 | |
554 | $s = "\x{ffff}!"; | |
555 | $s =~ s/\B!//; | |
a86da477 | 556 | is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293"); |
e425a60b YO |
557 | } |
558 | ||
e425a60b | 559 | { |
e425a60b YO |
560 | |
561 | # The printing characters | |
562 | my @chars = ("A" .. "Z"); | |
563 | my $delim = ","; | |
564 | my $size = 32771 - 4; | |
565 | my $str = ''; | |
566 | ||
567 | # Create some random junk. Inefficient, but it works. | |
568 | for (my $i = 0; $i < $size; $ i++) { | |
569 | $str .= $chars [rand @chars]; | |
570 | } | |
571 | ||
572 | $str .= ($delim x 4); | |
573 | my $res; | |
574 | my $matched; | |
a86da477 NC |
575 | ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583"); |
576 | is($str, "", "Empty string; Bug 39583"); | |
577 | ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583'); | |
e425a60b YO |
578 | } |
579 | ||
e425a60b | 580 | { |
a86da477 NC |
581 | like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940'); |
582 | like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); | |
583 | like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940'); | |
584 | like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); | |
e425a60b | 585 | |
a86da477 NC |
586 | like("X\0A", qr/X\c@?A/, '\c@?; Bug 27940'); |
587 | like("X\0A", qr/X\c@*A/, '\c@*; Bug 27940'); | |
588 | like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940'); | |
589 | like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940'); | |
590 | like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940'); | |
591 | ||
592 | like("X\@A", qr/X@?A/, '@?; Bug 27940'); | |
593 | like("X\@A", qr/X@*A/, '@*; Bug 27940'); | |
594 | like("X\@A", qr/X@(A)/, '@(; Bug 27940'); | |
595 | like("X\@A", qr/X(@)A/, '@); Bug 27940'); | |
596 | like("X\@A", qr/X@|ZA/, '@|; Bug 27940'); | |
e425a60b YO |
597 | |
598 | local $" = ','; # non-whitespace and non-RE-specific | |
a86da477 NC |
599 | like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940'); |
600 | like("A@+B", qr/A@{+}B/, 'Interpolation of @+ in /@{+}/; Bug 27940'); | |
601 | like("A@-B", qr/A@{-}B/, 'Interpolation of @- in /@{-}/; Bug 27940'); | |
602 | like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940'); | |
603 | like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940'); | |
e425a60b YO |
604 | } |
605 | ||
e425a60b | 606 | { |
e425a60b YO |
607 | my $s = 'foo bar baz'; |
608 | my (@k, @v, @fetch, $res); | |
609 | my $count = 0; | |
610 | my @names = qw ($+{A} $+{B} $+{C}); | |
611 | if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { | |
612 | while (my ($k, $v) = each (%+)) { | |
613 | $count++; | |
614 | } | |
615 | @k = sort keys (%+); | |
616 | @v = sort values (%+); | |
617 | $res = 1; | |
618 | push @fetch, | |
619 | ["$+{A}", "$1"], | |
620 | ["$+{B}", "$2"], | |
621 | ["$+{C}", "$3"], | |
622 | ; | |
623 | } | |
624 | foreach (0 .. 2) { | |
625 | if ($fetch [$_]) { | |
a86da477 | 626 | is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); |
e425a60b YO |
627 | } else { |
628 | ok 0, $names[$_]; | |
629 | } | |
630 | } | |
a86da477 NC |
631 | is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496"); |
632 | is($count, 3, "Got 3 keys in %+ via each; Bug 50496"); | |
633 | is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496"); | |
634 | is("@k", "A B C", "Got expected keys; Bug 50496"); | |
635 | is("@v", "bar baz foo", "Got expected values; Bug 50496"); | |
e425a60b YO |
636 | eval ' |
637 | no warnings "uninitialized"; | |
638 | print for $+ {this_key_doesnt_exist}; | |
639 | '; | |
a86da477 | 640 | is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); |
e425a60b YO |
641 | } |
642 | ||
e425a60b YO |
643 | { |
644 | # | |
645 | # Almost the same as the block above, except that the capture is nested. | |
646 | # | |
a86da477 | 647 | |
e425a60b YO |
648 | my $s = 'foo bar baz'; |
649 | my (@k, @v, @fetch, $res); | |
650 | my $count = 0; | |
651 | my @names = qw ($+{A} $+{B} $+{C} $+{D}); | |
652 | if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { | |
653 | while (my ($k,$v) = each(%+)) { | |
654 | $count++; | |
655 | } | |
656 | @k = sort keys (%+); | |
657 | @v = sort values (%+); | |
658 | $res = 1; | |
659 | push @fetch, | |
660 | ["$+{A}", "$2"], | |
661 | ["$+{B}", "$3"], | |
662 | ["$+{C}", "$4"], | |
663 | ["$+{D}", "$1"], | |
664 | ; | |
665 | } | |
666 | foreach (0 .. 3) { | |
667 | if ($fetch [$_]) { | |
a86da477 | 668 | is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); |
e425a60b YO |
669 | } else { |
670 | ok 0, $names [$_]; | |
671 | } | |
672 | } | |
a86da477 NC |
673 | is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496"); |
674 | is($count, 4, "Got 4 keys in %+ via each; Bug 50496"); | |
675 | is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496"); | |
676 | is("@k", "A B C D", "Got expected keys; Bug 50496"); | |
677 | is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496"); | |
e425a60b YO |
678 | eval ' |
679 | no warnings "uninitialized"; | |
680 | print for $+ {this_key_doesnt_exist}; | |
681 | '; | |
a86da477 | 682 | is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); |
e425a60b YO |
683 | } |
684 | ||
e425a60b | 685 | { |
e425a60b YO |
686 | my $str = 'abc'; |
687 | my $count = 0; | |
688 | my $mval = 0; | |
689 | my $pval = 0; | |
690 | while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} | |
a86da477 NC |
691 | is($mval, 0, '@- should be empty; Bug 36046'); |
692 | is($pval, 0, '@+ should be empty; Bug 36046'); | |
693 | is($count, 1, 'Should have matched once only; Bug 36046'); | |
e425a60b YO |
694 | } |
695 | ||
e425a60b | 696 | { |
a86da477 | 697 | my $message = '/m in precompiled regexp; Bug 40684'; |
e425a60b YO |
698 | my $s = "abc\ndef"; |
699 | my $rex = qr'^abc$'m; | |
51b6c6b3 NC |
700 | ok($s =~ m/$rex/, $message); |
701 | ok($s =~ m/^abc$/m, $message); | |
e425a60b YO |
702 | } |
703 | ||
e425a60b | 704 | { |
a86da477 | 705 | my $message = '(?: ... )? should not lose $^R; Bug 36909'; |
e425a60b YO |
706 | $^R = 'Nothing'; |
707 | { | |
708 | local $^R = "Bad"; | |
51b6c6b3 | 709 | ok('x foofoo y' =~ m { |
e425a60b YO |
710 | (foo) # $^R correctly set |
711 | (?{ "last regexp code result" }) | |
51b6c6b3 NC |
712 | }x, $message); |
713 | is($^R, 'last regexp code result', $message); | |
e425a60b | 714 | } |
51b6c6b3 | 715 | is($^R, 'Nothing', $message); |
e425a60b YO |
716 | |
717 | { | |
718 | local $^R = "Bad"; | |
719 | ||
51b6c6b3 | 720 | ok('x foofoo y' =~ m { |
e425a60b YO |
721 | (?:foo|bar)+ # $^R correctly set |
722 | (?{ "last regexp code result" }) | |
51b6c6b3 NC |
723 | }x, $message); |
724 | is($^R, 'last regexp code result', $message); | |
e425a60b | 725 | } |
51b6c6b3 | 726 | is($^R, 'Nothing', $message); |
e425a60b YO |
727 | |
728 | { | |
729 | local $^R = "Bad"; | |
51b6c6b3 | 730 | ok('x foofoo y' =~ m { |
e425a60b YO |
731 | (foo|bar)\1+ # $^R undefined |
732 | (?{ "last regexp code result" }) | |
51b6c6b3 NC |
733 | }x, $message); |
734 | is($^R, 'last regexp code result', $message); | |
e425a60b | 735 | } |
51b6c6b3 | 736 | is($^R, 'Nothing', $message); |
e425a60b YO |
737 | |
738 | { | |
739 | local $^R = "Bad"; | |
51b6c6b3 | 740 | ok('x foofoo y' =~ m { |
e425a60b YO |
741 | (foo|bar)\1 # This time without the + |
742 | (?{"last regexp code result"}) | |
51b6c6b3 NC |
743 | }x, $message); |
744 | is($^R, 'last regexp code result', $message); | |
e425a60b | 745 | } |
51b6c6b3 | 746 | is($^R, 'Nothing', $message); |
e425a60b YO |
747 | } |
748 | ||
e425a60b | 749 | { |
c224bbd5 | 750 | my $message = 'Match is quadratic due to eval; See Bug 22395'; |
e425a60b YO |
751 | our $count; |
752 | for my $l (10, 100, 1000) { | |
753 | $count = 0; | |
c224bbd5 YO |
754 | ('a' x $l) =~ /(.*)(?{ $count++ })[bc]/; |
755 | is($count, $l*($l+3)/2+1, $message); | |
e425a60b YO |
756 | } |
757 | } | |
c224bbd5 YO |
758 | { |
759 | my $message = 'Match is linear, not quadratic; Bug 22395.'; | |
760 | our $count; | |
761 | my $ok= 0; | |
762 | for my $l (10, 100, 1000) { | |
763 | $count = 0; | |
764 | ('a' x $l) =~ /(.*)(*{ $count++ })[bc]/; | |
765 | $ok += is($count, $l + 1, $message); | |
766 | } | |
767 | is($ok,3, "Optimistic eval does not disable optimisations"); | |
768 | } | |
e425a60b | 769 | |
e425a60b | 770 | { |
a86da477 | 771 | my $message = '@-/@+ should not have undefined values; Bug 22614'; |
e425a60b YO |
772 | local $_ = 'ab'; |
773 | our @len = (); | |
774 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; | |
51b6c6b3 | 775 | is("@len", "2 2 2", $message); |
e425a60b YO |
776 | } |
777 | ||
e425a60b | 778 | { |
a86da477 | 779 | my $message = '$& set on s///; Bug 18209'; |
e425a60b YO |
780 | my $text = ' word1 word2 word3 word4 word5 word6 '; |
781 | ||
782 | my @words = ('word1', 'word3', 'word5'); | |
783 | my $count; | |
784 | foreach my $word (@words) { | |
93f09d7b | 785 | $text =~ s/$word\s//gi; # Leave a space to separate words |
e425a60b YO |
786 | # in the resultant str. |
787 | # The following block is not working. | |
788 | if ($&) { | |
789 | $count ++; | |
790 | } | |
791 | # End bad block | |
792 | } | |
51b6c6b3 NC |
793 | is($count, 3, $message); |
794 | is($text, ' word2 word4 word6 ', $message); | |
e425a60b YO |
795 | } |
796 | ||
e425a60b YO |
797 | { |
798 | # RT#6893 | |
a86da477 | 799 | |
e425a60b YO |
800 | local $_ = qq (A\nB\nC\n); |
801 | my @res; | |
802 | while (m#(\G|\n)([^\n]*)\n#gsx) { | |
803 | push @res, "$2"; | |
804 | last if @res > 3; | |
805 | } | |
de26e0cc | 806 | is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893"); |
e425a60b YO |
807 | } |
808 | ||
e425a60b | 809 | { |
51b6c6b3 | 810 | # No optimizer bug |
e425a60b YO |
811 | my @tails = ('', '(?(1))', '(|)', '()?'); |
812 | my @quants = ('*','+'); | |
813 | my $doit = sub { | |
814 | my $pats = shift; | |
815 | for (@_) { | |
816 | for my $pat (@$pats) { | |
817 | for my $quant (@quants) { | |
818 | for my $tail (@tails) { | |
819 | my $re = "($pat$quant\$)$tail"; | |
a86da477 NC |
820 | ok(/$re/ && $1 eq $_, "'$_' =~ /$re/; Bug 41010"); |
821 | ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010"); | |
e425a60b YO |
822 | } |
823 | } | |
824 | } | |
825 | } | |
826 | }; | |
827 | ||
828 | my @dpats = ('\d', | |
829 | '[1234567890]', | |
830 | '(1|[23]|4|[56]|[78]|[90])', | |
831 | '(?:1|[23]|4|[56]|[78]|[90])', | |
832 | '(1|2|3|4|5|6|7|8|9|0)', | |
833 | '(?:1|2|3|4|5|6|7|8|9|0)'); | |
834 | my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); | |
835 | my @sstrs = (' '); | |
836 | my @dstrs = ('12345'); | |
837 | $doit -> (\@spats, @sstrs); | |
838 | $doit -> (\@dpats, @dstrs); | |
839 | } | |
840 | ||
e425a60b | 841 | { |
e425a60b YO |
842 | # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string |
843 | ||
844 | my $utf_8 = "\xd6schel"; | |
845 | utf8::upgrade ($utf_8); | |
846 | $utf_8 =~ m {(\xd6|Ö)schel}; | |
a86da477 | 847 | is($1, "\xd6", "Upgrade error; Bug 45605"); |
e425a60b YO |
848 | } |
849 | ||
850 | { | |
e425a60b YO |
851 | # Regardless of utf8ness any character matches itself when |
852 | # doing a case insensitive match. See also [perl #36207] | |
a86da477 | 853 | |
e425a60b YO |
854 | for my $o (0 .. 255) { |
855 | my @ch = (chr ($o), chr ($o)); | |
856 | utf8::upgrade ($ch [1]); | |
857 | for my $u_str (0, 1) { | |
858 | for my $u_pat (0, 1) { | |
a86da477 NC |
859 | like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i, |
860 | "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); | |
861 | like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i, | |
862 | "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); | |
e425a60b YO |
863 | } |
864 | } | |
865 | } | |
866 | } | |
867 | ||
e425a60b | 868 | { |
a86da477 | 869 | my $message = '$REGMARK in replacement; Bug 49190'; |
e425a60b | 870 | our $REGMARK; |
7fba2966 | 871 | local $_ = "A"; |
51b6c6b3 NC |
872 | ok(s/(*:B)A/$REGMARK/, $message); |
873 | is($_, "B", $message); | |
e425a60b | 874 | $_ = "CCCCBAA"; |
51b6c6b3 NC |
875 | ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message); |
876 | is($_, "ZYX", $message); | |
1754320d FC |
877 | # Use a longer name to force reallocation of $REGMARK. |
878 | $_ = "CCCCBAA"; | |
879 | ok(s/(*:X)A+|(*:YYYYYYYYYYYYYYYY)B+|(*:Z)C+/$REGMARK/g, $message); | |
880 | is($_, "ZYYYYYYYYYYYYYYYYX", $message); | |
e425a60b YO |
881 | } |
882 | ||
e425a60b | 883 | { |
a86da477 | 884 | my $message = 'Substitution evaluation in list context; Bug 52658'; |
e425a60b YO |
885 | my $reg = '../xxx/'; |
886 | my @te = ($reg =~ m{^(/?(?:\.\./)*)}, | |
887 | $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); | |
51b6c6b3 NC |
888 | is($reg, '../bbb/', $message); |
889 | is($te [0], '../', $message); | |
e425a60b YO |
890 | } |
891 | ||
e425a60b | 892 | { |
e425a60b | 893 | my $a = "xyzt" x 8192; |
a86da477 NC |
894 | like($a, qr/\A(?>[a-z])*\z/, |
895 | '(?>) does not cause wrongness on long string; Bug 60034'); | |
e425a60b YO |
896 | my $b = $a . chr 256; |
897 | chop $b; | |
4b0d13b9 | 898 | is($a, $b, 'Bug 60034'); |
a86da477 NC |
899 | like($b, qr/\A(?>[a-z])*\z/, |
900 | '(?>) does not cause wrongness on long string with UTF-8; Bug 60034'); | |
e425a60b YO |
901 | } |
902 | ||
e425a60b YO |
903 | # |
904 | # Keep the following tests last -- they may crash perl | |
905 | # | |
906 | print "# Tests that follow may crash perl\n"; | |
907 | { | |
a86da477 | 908 | |
51b6c6b3 | 909 | my $message = 'Pattern in a loop, failure should not ' . |
a86da477 | 910 | 'affect previous success; Bug 19049/38869'; |
e425a60b YO |
911 | my @list = ( |
912 | 'ab cdef', # Matches regex | |
913 | ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it | |
914 | ); | |
915 | my $y; | |
916 | my $x; | |
917 | foreach (@list) { | |
918 | m/ab(.+)cd/i; # The ignore-case seems to be important | |
919 | $y = $1; # Use $1, which might not be from the last match! | |
920 | $x = substr ($list [0], $- [0], $+ [0] - $- [0]); | |
921 | } | |
51b6c6b3 NC |
922 | is($y, ' ', $message); |
923 | is($x, 'ab cd', $message); | |
e425a60b YO |
924 | } |
925 | ||
19d55f68 KW |
926 | SKIP: { |
927 | skip("Can run out of memory on os390", 1) if $^O eq 'os390'; | |
a86da477 | 928 | ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274"); |
19d55f68 KW |
929 | } |
930 | { | |
e425a60b | 931 | ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, |
a86da477 | 932 | "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274"); |
e425a60b YO |
933 | } |
934 | ||
e425a60b | 935 | { |
e425a60b | 936 | # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache |
a86da477 | 937 | |
e425a60b | 938 | local ${^UTF8CACHE} = -1; |
a86da477 | 939 | my $message = "Shouldn't panic; Bug 45337"; |
e425a60b YO |
940 | my $s = "[a]a{2}"; |
941 | utf8::upgrade $s; | |
51b6c6b3 | 942 | like("aaa", qr/$s/, $message); |
e425a60b YO |
943 | } |
944 | { | |
a86da477 | 945 | my $message = "Check if tree logic breaks \$^R; Bug 57042"; |
e425a60b YO |
946 | my $cond_re = qr/\s* |
947 | \s* (?: | |
948 | \( \s* A (?{1}) | |
949 | | \( \s* B (?{2}) | |
950 | ) | |
951 | /x; | |
952 | my @res; | |
953 | for my $line ("(A)","(B)") { | |
954 | if ($line =~ m/$cond_re/) { | |
955 | push @res, $^R ? "#$^R" : "UNDEF"; | |
956 | } | |
957 | } | |
51b6c6b3 | 958 | is("@res","#1 #2", $message); |
e425a60b YO |
959 | } |
960 | { | |
961 | no warnings 'closure'; | |
962 | my $re = qr/A(??{"1"})/; | |
963 | ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; | |
964 | ok $1 eq "A1"; | |
965 | ok $2 eq "B"; | |
966 | } | |
967 | ||
e425a60b YO |
968 | # This only works under -DEBUGGING because it relies on an assert(). |
969 | { | |
51b6c6b3 | 970 | # Check capture offset re-entrancy of utf8 code. |
e425a60b YO |
971 | |
972 | sub fswash { $_[0] =~ s/([>X])//g; } | |
973 | ||
974 | my $k1 = "." x 4 . ">>"; | |
975 | fswash($k1); | |
976 | ||
977 | my $k2 = "\x{f1}\x{2022}"; | |
978 | $k2 =~ s/([\360-\362])/>/g; | |
979 | fswash($k2); | |
980 | ||
a86da477 | 981 | is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508"); |
e425a60b YO |
982 | } |
983 | ||
e425a60b | 984 | { |
a86da477 | 985 | # minimal CURLYM limited to 32767 matches |
e425a60b YO |
986 | my @pat = ( |
987 | qr{a(x|y)*b}, # CURLYM | |
988 | qr{a(x|y)*?b}, # .. with minmod | |
989 | qr{a([wx]|[yz])*b}, # .. and without tries | |
990 | qr{a([wx]|[yz])*?b}, | |
991 | ); | |
992 | my $len = 32768; | |
993 | my $s = join '', 'a', 'x' x $len, 'b'; | |
994 | for my $pat (@pat) { | |
a86da477 | 995 | like($s, $pat, "$pat; Bug 65372"); |
e425a60b YO |
996 | } |
997 | } | |
b4da7bae B |
998 | |
999 | { | |
04934b6d | 1000 | local $::TODO = "[perl #38133]"; |
b4da7bae B |
1001 | |
1002 | "A" =~ /(((?:A))?)+/; | |
1003 | my $first = $2; | |
1004 | ||
1005 | "A" =~ /(((A))?)+/; | |
1006 | my $second = $2; | |
1007 | ||
de26e0cc | 1008 | is($first, $second); |
b4da7bae | 1009 | } |
aca53033 FC |
1010 | |
1011 | { | |
51b6c6b3 | 1012 | my $message |
a86da477 | 1013 | = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998'; |
aca53033 FC |
1014 | |
1015 | # Catch warnings: | |
1016 | my $w; | |
1017 | local $SIG{__WARN__} = sub { $w .= shift }; | |
1018 | ||
1019 | # This bug can be reduced to | |
1020 | qq{\x{30ab}} =~ /\xab|\xa9/; | |
1021 | # but it's nice to have a more 'real-world' test. The original test | |
1022 | # case from the RT ticket follows: | |
1023 | ||
1024 | my %conv = ( | |
1025 | "\xab" => "<", | |
1026 | "\xa9" => "(c)", | |
1027 | ); | |
1028 | my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')'; | |
1029 | $conv_rx = qr{$conv_rx}; | |
1030 | ||
1031 | my $x | |
1032 | = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}} | |
1033 | . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}} | |
1034 | . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}} | |
1035 | . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}} | |
1036 | . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}} | |
1037 | . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}} | |
1038 | . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}} | |
1039 | . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}}; | |
1040 | ||
1041 | $x =~ s{$conv_rx}{$conv{$1}}eg; | |
1042 | ||
51b6c6b3 | 1043 | is($w, undef, $message); |
aca53033 FC |
1044 | } |
1045 | ||
e7f38d0f | 1046 | { |
a86da477 | 1047 | # minimal CURLYM limited to 32767 matches |
51b6c6b3 NC |
1048 | |
1049 | is(join("-", " abc def " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f", | |
a86da477 | 1050 | 'stclass optimisation does not break + inside (?=); Bug 68564'); |
e7f38d0f YO |
1051 | } |
1052 | ||
5c0a530f NC |
1053 | { |
1054 | use charnames ":full"; | |
1055 | # Delayed interpolation of \N' | |
1056 | my $r1 = qr/\N{THAI CHARACTER SARA I}/; | |
945961fd | 1057 | my $r2 = qr'\N{THAI CHARACTER SARA I}'; |
5c0a530f NC |
1058 | my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}"; |
1059 | ||
1060 | # Bug #56444 | |
1061 | ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/'; | |
945961fd | 1062 | ok $s1 =~ /$r2+/, 'my $r2 = qr\'\N{THAI CHARACTER SARA I}\'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ \'$r2+\''; |
5c0a530f NC |
1063 | |
1064 | # Bug #62056 | |
1065 | ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/'; | |
1066 | ||
1067 | ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"'; | |
945961fd | 1068 | ok "abbbbc" =~ m'\N{1}' && $& eq "a", '"abbbbc" =~ m\'\N{1}\' && $& eq "a"'; |
5c0a530f | 1069 | ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"'; |
945961fd | 1070 | ok "abbbbc" =~ m'\N{3,4}' && $& eq "abbb", '"abbbbc" =~ m\'\N{3,4}\' && $& eq "abbb"'; |
5c0a530f NC |
1071 | } |
1072 | ||
1073 | { | |
1074 | use charnames ":full"; | |
1075 | my $message = '[perl #74982] Period coming after \N{}'; | |
1076 | ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message); | |
945961fd | 1077 | ok("\x{ff08}." =~ m'\N{FULLWIDTH LEFT PARENTHESIS}.' && $& eq "\x{ff08}.", $message); |
5c0a530f | 1078 | ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message); |
945961fd | 1079 | ok("\x{ff08}." =~ m'[\N{FULLWIDTH LEFT PARENTHESIS}].' && $& eq "\x{ff08}.", $message); |
5c0a530f NC |
1080 | } |
1081 | ||
453466ce NC |
1082 | SKIP: { |
1083 | ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org | |
1084 | ||
1085 | skip('Perl configured without Encode module', 1) | |
1086 | unless $Config{extensions} =~ / Encode /; | |
1087 | ||
1088 | # Test case cut down by jhi | |
be76ad45 | 1089 | fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\)!, {}, 'Segfault using HTML::Entities'); |
453466ce NC |
1090 | use Encode; |
1091 | my $t = ord('A') == 193 ? "\xEA" : "\xE9"; | |
1092 | Encode::_utf8_on($t); | |
be76ad45 | 1093 | substr($t,0); |
453466ce NC |
1094 | $t =~ s/([^a])//ge; |
1095 | EOP | |
1096 | } | |
1097 | ||
e6351b37 YO |
1098 | { |
1099 | # pattern must be compiled late or we can break the test file | |
1100 | my $message = '[perl #115050] repeated nothings in a trie can cause panic'; | |
1101 | my $pattern; | |
1102 | $pattern = '[xyz]|||'; | |
1103 | ok("blah blah" =~ /$pattern/, $message); | |
1104 | ok("blah blah" =~ /(?:$pattern)h/, $message); | |
1105 | $pattern = '|||[xyz]'; | |
1106 | ok("blah blah" =~ /$pattern/, $message); | |
1107 | ok("blah blah" =~ /(?:$pattern)h/, $message); | |
1108 | } | |
00f6437b FC |
1109 | |
1110 | { | |
1111 | # [perl #4289] First mention $& after a match | |
4bc5302a DM |
1112 | local $::TODO = "these tests fail without Copy-on-Write enabled" |
1113 | if $Config{ccflags} =~ /PERL_NO_COW/; | |
00f6437b FC |
1114 | fresh_perl_is( |
1115 | '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$&|, "\n"', | |
1116 | "b\n", {}, '$& first mentioned after match'); | |
1117 | fresh_perl_is( | |
1118 | '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$`|, "\n"', | |
1119 | "a\n", {}, '$` first mentioned after match'); | |
1120 | fresh_perl_is( | |
1121 | '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"', | |
1122 | "c\n", {}, '$\' first mentioned after match'); | |
1123 | } | |
0430522f TC |
1124 | |
1125 | { | |
1126 | # [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t | |
1127 | # this tests some related failures | |
1128 | # | |
1129 | # The tests in the block *only* fail when run on 32-bit systems | |
1130 | # with a malloc that allocates above the 2GB line. On the system | |
1131 | # in the report above that only happened in a thread. | |
1132 | my $s = "\x{1ff}" . "f" x 32; | |
1133 | ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap"); | |
1134 | } | |
71a9d105 DM |
1135 | |
1136 | { | |
1137 | # RT #129012 heap-buffer-overflow Perl_fbm_instr. | |
1138 | # This test is unlikely to not pass, but it used to fail | |
1139 | # ASAN/valgrind | |
1140 | ||
1141 | my $s ="\x{100}0000000"; | |
1142 | ok($s !~ /00000?\x80\x80\x80/, "RT #129012"); | |
1143 | } | |
1144 | ||
26fb2318 TC |
1145 | { |
1146 | # RT #129085 heap-buffer-overflow Perl_re_intuit_start | |
1147 | # this did fail under ASAN, but didn't under valgrind | |
1148 | my $s = "\x{f2}\x{140}\x{fe}\x{ff}\x{ff}\x{ff}"; | |
1149 | ok($s !~ /^0000.\34500\376\377\377\377/, "RT #129085"); | |
1150 | } | |
31fc9395 YO |
1151 | { |
1152 | # rt | |
1153 | fresh_perl_is( | |
923e23ba | 1154 | 'no warnings "regexp"; "foo"=~/((?1)){8,0}/; print "ok"', |
31fc9395 YO |
1155 | "ok", {}, 'RT #130561 - allowing impossible quantifier should not cause SEGVs'); |
1156 | my $s= "foo"; | |
923e23ba | 1157 | no warnings 'regexp'; |
31fc9395 YO |
1158 | ok($s=~/(foo){1,0}|(?1)/, |
1159 | "RT #130561 - allowing impossible quantifier should not break recursion"); | |
1160 | } | |
fd8def15 HS |
1161 | { |
1162 | # RT #133892 Coredump in Perl_re_intuit_start | |
1163 | # Second match flips to checking floating substring before fixed | |
1164 | # substring, which triggers a pathway that failed to check there | |
1165 | # was a non-utf8 version of the string before trying to use it | |
1166 | # resulting in a SEGV. | |
1167 | my $result = grep /b\x{1c0}ss0/i, qw{ xxxx xxxx0 }; | |
1168 | ok($result == 0); | |
1169 | } | |
26fb2318 | 1170 | |
e425a60b YO |
1171 | } # End of sub run_tests |
1172 | ||
1173 | 1; |