This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
new perldelta
[perl5.git] / t / re / pat_rt_report.t
CommitLineData
e425a60b
YO
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t. If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
e425a60b
YO
7sub run_tests;
8
9$| = 1;
10
e425a60b
YO
11BEGIN {
12 chdir 't' if -d 't';
6f4e0180 13 require './test.pl';
624c42e2 14 set_up_inc( '../lib', '.' );
2fcf74c4 15 skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-");
e425a60b 16}
e425a60b 17
0b1b7115
JH
18use strict;
19use warnings;
20use 5.010;
21use Config;
e425a60b 22
c224bbd5 23plan tests => 2514; # Update this when adding/deleting tests.
e425a60b 24
9d45b377 25run_tests() unless caller;
e425a60b
YO
26
27#
28# Tests start here.
29#
30sub run_tests {
31
a86da477 32 like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/,
ee95e30c 33 "Match UTF-8 char in presence of (??{ }); Bug 20000731.001 (#3600)");
a86da477 34
e425a60b 35 {
e425a60b 36 no warnings 'uninitialized';
ee95e30c 37 ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005 (#4492)");
e425a60b
YO
38 }
39
e425a60b 40 {
ee95e30c 41 my $message = 'bug id 20001008.001 (#4407)';
e425a60b
YO
42
43 my @x = ("stra\337e 138", "stra\337e 138");
44 for (@x) {
51b6c6b3
NC
45 ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message);
46 ok(my ($latin) = /^(.+)(?:\s+\d)/, $message);
47 is($latin, "stra\337e", $message);
48 ok($latin =~ s/stra\337e/straße/, $message);
e425a60b
YO
49 #
50 # Previous code follows, but outcommented - there were no tests.
51 #
52 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
53 # use utf8; # needed for the raw UTF-8
54 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
55 }
56 }
57
e425a60b 58 {
e425a60b 59 # Fist half of the bug.
ee95e30c 60 my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003 (#4536)';
e425a60b 61 my $X = chr (1448);
51b6c6b3
NC
62 ok(my ($Y) = $X =~ /(.*)/, $message);
63 is($Y, v1448, $message);
64 is(length $Y, 1, $message);
e425a60b
YO
65
66 # Second half of the bug.
ee95e30c 67 $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003 (#4536)';
e425a60b
YO
68 $X = '';
69 $X =~ s/^/chr(1488)/e;
51b6c6b3
NC
70 is(length $X, 1, $message);
71 is(ord $X, 1488, $message);
e425a60b
YO
72 }
73
e425a60b 74 {
ee95e30c 75 my $message = 'Repeated s///; Bug 20001108.001 (#4631)';
e425a60b
YO
76 my $X = "Szab\x{f3},Bal\x{e1}zs";
77 my $Y = $X;
78 $Y =~ s/(B)/$1/ for 0 .. 3;
51b6c6b3
NC
79 is($Y, $X, $message);
80 is($X, "Szab\x{f3},Bal\x{e1}zs", $message);
e425a60b
YO
81 }
82
e425a60b 83 {
ee95e30c 84 my $message = 's/// on UTF-8 string; Bug 20000517.001 (#3253)';
e425a60b
YO
85 my $x = "\x{100}A";
86 $x =~ s/A/B/;
51b6c6b3
NC
87 is($x, "\x{100}B", $message);
88 is(length $x, 2, $message);
e425a60b
YO
89 }
90
e425a60b 91 {
e425a60b 92 # The original bug report had 'no utf8' here but that was irrelevant.
a86da477 93
ee95e30c 94 my $message = "Don't dump core; Bug 20010306.008 (#5982)";
e425a60b 95 my $a = "a\x{1234}";
51b6c6b3 96 like($a, qr/\w/, $message); # used to core dump.
e425a60b
YO
97 }
98
e425a60b 99 {
ee95e30c 100 my $message = '/g in scalar context; Bug 20010410.006 (#6796)';
e425a60b
YO
101 for my $rx ('/(.*?)\{(.*?)\}/csg',
102 '/(.*?)\{(.*?)\}/cg',
103 '/(.*?)\{(.*?)\}/sg',
104 '/(.*?)\{(.*?)\}/g',
105 '/(.+?)\{(.+?)\}/csg',) {
106 my $i = 0;
107 my $input = "a{b}c{d}";
108 eval <<" --";
109 while (eval \$input =~ $rx) {
110 \$i ++;
111 }
112 --
51b6c6b3 113 is($i, 2, $message);
e425a60b
YO
114 }
115 }
116
e425a60b 117 {
e425a60b
YO
118 # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
119 for ("\n", "\t", "\014", "\r") {
ee95e30c 120 unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003 (#7131)", ord $_);
e425a60b
YO
121 }
122 for (" ") {
ee95e30c 123 like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003 (#7131)");
e425a60b
YO
124 }
125 }
126
e425a60b 127 {
ee95e30c 128 # [ID 20010814.004 (#7526)] pos() doesn't work when using =~m// in list context
a86da477 129
e425a60b
YO
130 $_ = "ababacadaea";
131 my $a = join ":", /b./gc;
132 my $b = join ":", /a./gc;
133 my $c = pos;
ee95e30c 134 is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004 (#7526)");
e425a60b
YO
135 }
136
e425a60b 137 {
ee95e30c 138 # [ID 20010407.006 (#6767)] matching utf8 return values from
e425a60b 139 # functions does not work
a86da477 140
ee95e30c 141 my $message = 'UTF-8 return values from functions; Bug 20010407.006 (#6767)';
e425a60b
YO
142 package ID_20010407_006;
143 sub x {"a\x{1234}"}
144 my $x = x;
145 my $y;
51b6c6b3 146 ::ok($x =~ /(..)/, $message);
e425a60b 147 $y = $1;
51b6c6b3
NC
148 ::ok(length ($y) == 2 && $y eq $x, $message);
149 ::ok(x =~ /(..)/, $message);
e425a60b 150 $y = $1;
51b6c6b3 151 ::ok(length ($y) == 2 && $y eq $x, $message);
e425a60b
YO
152 }
153
e425a60b
YO
154 {
155 # High bit bug -- japhy
156 my $x = "ab\200d";
157 ok $x =~ /.*?\200/, "High bit fine";
158 }
159
e425a60b 160 {
51b6c6b3 161 my $message = 'UTF-8 hash keys and /$/';
e425a60b
YO
162 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
163 # /2002-01/msg01327.html
164
165 my $u = "a\x{100}";
166 my $v = substr ($u, 0, 1);
167 my $w = substr ($u, 1, 1);
168 my %u = ($u => $u, $v => $v, $w => $w);
169 for (keys %u) {
170 my $m1 = /^\w*$/ ? 1 : 0;
171 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
51b6c6b3 172 is($m1, $m2, $message);
e425a60b
YO
173 }
174 }
175
e425a60b 176 {
ee95e30c 177 my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005 (#8335)";
e425a60b
YO
178
179 for my $char ("a", "\x{df}", "\x{100}") {
180 my $x = "$char b $char";
181 $x =~ s{($char)}{
182 "c" =~ /c/;
183 "x";
184 }ge;
51b6c6b3 185 is(substr ($x, 0, 1), substr ($x, -1, 1), $message);
e425a60b
YO
186 }
187 }
188
e425a60b 189 {
ee95e30c 190 my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005 (#8935)";
e425a60b
YO
191
192 # Requires reuse of last successful pattern.
193 my $num = 123;
194 $num =~ /\d/;
195 for (0 .. 1) {
725a61d7 196 my $match = m?? + 0;
5895685f
NC
197 ok($match != $_, $message)
198 or diag(sprintf "'match one' %s on %s iteration" =>
199 $match ? 'succeeded' : 'failed',
200 $_ ? 'second' : 'first');
e425a60b
YO
201 }
202 $num =~ /(\d)/;
203 my $result = join "" => $num =~ //g;
51b6c6b3 204 is($result, $num, $message);
e425a60b
YO
205 }
206
e425a60b 207 {
ee95e30c 208 my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002 (#10013)';
e425a60b
YO
209 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
210 my ($type, $char) = @$_;
211 for my $len (32000, 32768, 33000) {
212 my $s = $char . "f" x $len;
213 my $r = $s =~ /$char([f]*)/gc;
5895685f
NC
214 ok($r, $message) or diag("<$type x $len>");
215 ok(!$r || pos ($s) == $len + 1, $message)
216 or diag("<$type x $len>; pos = @{[pos $s]}");
e425a60b
YO
217 }
218 }
219 }
220
e425a60b 221 {
e425a60b
YO
222 my $s = "\x{100}" x 5;
223 my $ok = $s =~ /(\x{100}{4})/;
224 my ($ord, $len) = (ord $1, length $1);
1c6f1211 225 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]";
e425a60b
YO
226 }
227
e425a60b 228 {
a86da477 229 my $message = 'UTF-8 matching; Bug 15397';
51b6c6b3
NC
230 like("\x{100}", qr/\x{100}/, $message);
231 like("\x{100}", qr/(\x{100})/, $message);
232 like("\x{100}", qr/(\x{100}){1}/, $message);
233 like("\x{100}\x{100}", qr/(\x{100}){2}/, $message);
234 like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message);
e425a60b
YO
235 }
236
e425a60b 237 {
a86da477 238 my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471';
e425a60b 239 local $_ = 'CD';
51b6c6b3
NC
240 ok(/(AB)*?CD/ && !defined $1, $message);
241 ok(/(AB)*CD/ && !defined $1, $message);
e425a60b
YO
242 }
243
e425a60b 244 {
a86da477 245 my $message = "Caching shouldn't prevent match; Bug 3547";
e425a60b 246 my $pattern = "^(b+?|a){1,2}c";
51b6c6b3
NC
247 ok("bac" =~ /$pattern/ && $1 eq 'a', $message);
248 ok("bbac" =~ /$pattern/ && $1 eq 'a', $message);
249 ok("bbbac" =~ /$pattern/ && $1 eq 'a', $message);
250 ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message);
e425a60b
YO
251 }
252
e425a60b 253 {
a86da477
NC
254 ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232');
255 is($1, "\x{100}", '$1 is UTF-8; Bug 18232');
e425a60b 256 { 'a' =~ /./; }
a86da477
NC
257 is($1, "\x{100}", '$1 is still UTF-8; Bug 18232');
258 isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232');
e425a60b
YO
259 }
260
e425a60b 261 {
a86da477 262 my $message = "Optimizer doesn't prematurely reject match; Bug 19767";
e425a60b
YO
263 use utf8;
264
265 my $attr = 'Name-1';
266 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
267 my $NormalWord = qr /${NormalChar}+?/;
268 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/;
269
270 $attr =~ /^$/;
51b6c6b3 271 like($attr, $PredNameHyphen, $message); # Original test.
e425a60b
YO
272
273 "a" =~ m/[b]/;
51b6c6b3 274 like("0", qr/\p{N}+\z/, $message); # Variant.
e425a60b
YO
275 }
276
e425a60b 277 {
a86da477 278 my $message = "(??{ }) doesn't return stale values; Bug 20683";
e425a60b
YO
279 our $p = 1;
280 foreach (1, 2, 3, 4) {
281 $p ++ if /(??{ $p })/
282 }
51b6c6b3 283 is($p, 5, $message);
e425a60b
YO
284
285 {
286 package P;
287 $a = 1;
288 sub TIESCALAR {bless []}
289 sub FETCH {$a ++}
290 }
291 tie $p, "P";
292 foreach (1, 2, 3, 4) {
293 /(??{ $p })/
294 }
51b6c6b3 295 is($p, 5, $message);
e425a60b
YO
296 }
297
e425a60b
YO
298 {
299 # Subject: Odd regexp behavior
300 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
301 # Date: Wed, 26 Feb 2003 16:53:12 +0000
302 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
303 # To: perl-unicode@perl.org
304
51b6c6b3 305 my $message = 'Markus Kuhn 2003-02-26';
e425a60b
YO
306
307 my $x = "\x{2019}\nk";
51b6c6b3
NC
308 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
309 is($x, "\x{2019} k", $message);
e425a60b
YO
310
311 $x = "b\nk";
51b6c6b3
NC
312 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
313 is($x, "b k", $message);
e425a60b 314
51b6c6b3 315 like("\x{2019}", qr/\S/, $message);
e425a60b
YO
316 }
317
e425a60b 318 {
a86da477 319 my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411";
e425a60b 320 our $i;
51b6c6b3 321 is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message);
e425a60b
YO
322 no warnings 'syntax';
323 @_ = split /(?{'WOW'})/, 'abc';
324 local $" = "|";
51b6c6b3 325 is("@_", "a|b|c", $message);
e425a60b
YO
326 }
327
f7a1f402 328 is(join('-', split /(?{ split "" })/, "abc"), 'a-b-c', 'nested split');
e425a60b 329
e425a60b 330 {
e425a60b
YO
331 $_ = "code: 'x' { '...' }\n"; study;
332 my @x; push @x, $& while m/'[^\']*'/gx;
333 local $" = ":";
a86da477 334 is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757");
e425a60b
YO
335 }
336
e425a60b 337 {
e425a60b 338 sub func ($) {
a86da477
NC
339 ok("a\nb" !~ /^b/, "Propagated modifier; $_[0]; Bug 22354");
340 ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354");
e425a60b
YO
341 }
342 func "standalone";
343 $_ = "x"; s/x/func "in subst"/e;
344 $_ = "x"; s/x/func "in multiline subst"/em;
1d0dc949
FC
345 $_ = "x"; /x(?{func "in regexp"})/;
346 $_ = "x"; /x(?{func "in multiline regexp"})/m;
e425a60b
YO
347 }
348
e425a60b 349 {
e425a60b
YO
350 $_ = "abcdef\n";
351 my @x = m/./g;
a86da477 352 is("abcde", $`, 'Global match sets $`; Bug 19049');
e425a60b
YO
353 }
354
e425a60b 355 {
e425a60b
YO
356 # [perl #23769] Unicode regex broken on simple example
357 # regrepeat() didn't handle UTF-8 EXACT case right.
a86da477 358
e425a60b 359 my $Mess = 'regrepeat() handles UTF-8 EXACT case right';
a86da477 360 my $message = "$Mess; Bug 23769";
e425a60b
YO
361
362 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
363
51b6c6b3
NC
364 like($s, qr/\x{a0}/, $message);
365 like($s, qr/\x{a0}+/, $message);
366 like($s, qr/\x{a0}\x{a0}/, $message);
e425a60b 367
a86da477 368 $message = "$Mess (easy variant); Bug 23769";
51b6c6b3
NC
369 ok("aaa\x{100}" =~ /(a+)/, $message);
370 is($1, "aaa", $message);
e425a60b 371
a86da477 372 $message = "$Mess (easy invariant); Bug 23769";
51b6c6b3
NC
373 ok("aaa\x{100} " =~ /(a+?)/, $message);
374 is($1, "a", $message);
e425a60b 375
a86da477 376 $message = "$Mess (regrepeat variant); Bug 23769";
51b6c6b3
NC
377 ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, $message);
378 is($1, "\xa0", $message);
e425a60b 379
a86da477 380 $message = "$Mess (regrepeat invariant); Bug 23769";
51b6c6b3
NC
381 ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message);
382 is($1, "\xa0\xa0\xa0", $message);
e425a60b 383
a86da477 384 $message = "$Mess (hard variant); Bug 23769";
51b6c6b3
NC
385 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message);
386 is($1, "\xa0\xa1", $message);
e425a60b 387
a86da477 388 $message = "$Mess (hard invariant); Bug 23769";
51b6c6b3
NC
389 ok("ababab\x{100} " =~ /((?:ab)+)/, $message);
390 is($1, 'ababab', $message);
e425a60b 391
51b6c6b3
NC
392 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message);
393 is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message);
e425a60b 394
51b6c6b3
NC
395 ok("ababab\x{100} " =~ /((?:ab)+?)/, $message);
396 is($1, "ab", $message);
e425a60b 397
a86da477 398 $message = "Don't match first byte of UTF-8 representation; Bug 23769";
51b6c6b3
NC
399 unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message);
400 unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message);
401 unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message);
e425a60b
YO
402 }
403
e425a60b 404 {
e425a60b 405 # perl panic: pp_match start/end pointers
a86da477
NC
406
407 is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc",
408 'Captures can move backwards in string; Bug 25269');
e425a60b
YO
409 }
410
e425a60b 411 {
a86da477
NC
412 # \cA not recognized in character classes
413 like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940');
414 like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940');
415 like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940');
416 like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940');
417 like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940');
418 like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940');
419 like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940');
420 unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940');
e425a60b
YO
421 }
422
e425a60b 423 {
9d45b377 424 # perl #28532: optional zero-width match at end of string is ignored
a86da477
NC
425
426 ok("abc" =~ /^abc(\z)?/ && defined($1),
427 'Optional zero-width match at end of string; Bug 28532');
428 ok("abc" =~ /^abc(\z)??/ && !defined($1),
429 'Optional zero-width match at end of string; Bug 28532');
e425a60b
YO
430 }
431
e425a60b 432 {
e425a60b
YO
433 my $utf8 = "\xe9\x{100}"; chop $utf8;
434 my $latin1 = "\xe9";
435
a86da477
NC
436 like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207");
437 like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207");
438 like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207");
439 like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207");
e425a60b 440
a86da477
NC
441 like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207");
442 like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207");
443 like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207");
444 like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207");
e425a60b
YO
445 }
446
e425a60b 447 {
e425a60b
YO
448 my $s = "abcd";
449 $s =~ /(..)(..)/g;
450 $s = $1;
451 $s = $2;
a86da477
NC
452 is($2, 'cd',
453 "Assigning to original string does not corrupt match vars; Bug 37038");
e425a60b
YO
454 }
455
e425a60b
YO
456 {
457 {
458 package wooosh;
459 sub gloople {"!"}
460 }
461 my $aeek = bless {} => 'wooosh';
2e321296
NC
462 is(do {$aeek -> gloople () =~ /(.)/g}, 1,
463 "//g match against return value of sub [change e26a497577f3ce7b]");
e425a60b
YO
464
465 sub gloople {"!"}
2e321296
NC
466 is(do{gloople () =~ /(.)/g}, 1,
467 "change e26a497577f3ce7b didn't affect sub calls for some reason");
e425a60b
YO
468 }
469
e425a60b 470 {
bf8fb5eb
FC
471 # [perl #78680]
472 # See changes 26925-26928, which reverted change 26410
e425a60b
YO
473 {
474 package lv;
475 our $var = "abc";
476 sub variable : lvalue {$var}
477 }
478 my $o = bless [] => 'lv';
479 my $f = "";
480 my $r = eval {
481 for (1 .. 2) {
482 $f .= $1 if $o -> variable =~ /(.)/g;
483 }
484 1;
485 };
486 if ($r) {
de26e0cc 487 is($f, "ab", "pos() retained between calls");
e425a60b
YO
488 }
489 else {
e425a60b
YO
490 ok 0, "Code failed: $@";
491 }
492
493 our $var = "abc";
494 sub variable : lvalue {$var}
495 my $g = "";
496 my $s = eval {
497 for (1 .. 2) {
498 $g .= $1 if variable =~ /(.)/g;
499 }
500 1;
501 };
502 if ($s) {
de26e0cc 503 is($g, "ab", "pos() retained between calls");
e425a60b
YO
504 }
505 else {
e425a60b
YO
506 ok 0, "Code failed: $@";
507 }
508 }
509
e425a60b
YO
510 SKIP:
511 {
e1a1ba14 512 skip "In EBCDIC and unclear what would trigger this bug there" if $::IS_EBCDIC;
76024109
KW
513 fresh_perl_like(
514 'no warnings "utf8";
515 $_ = pack "U0C2", 0xa2, 0xf8; # Ill-formed UTF-8
516 my $ret = 0;
517 do {!($ret = s/[a\0]+//g)}',
518 qr/Malformed UTF-8/,
519 {}, "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836");
e425a60b
YO
520 }
521
e425a60b
YO
522 {
523 # chr(65535) should be allowed in regexes
a86da477 524
e425a60b
YO
525 no warnings 'utf8'; # To allow non-characters
526 my ($c, $r, $s);
527
528 $c = chr 0xffff;
529 $c =~ s/$c//g;
a86da477 530 is($c, "", "U+FFFF, parsed as atom; Bug 38293");
e425a60b
YO
531
532 $c = chr 0xffff;
533 $r = "\\$c";
534 $c =~ s/$r//g;
a86da477 535 is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293");
e425a60b
YO
536
537 $c = chr 0xffff;
538 $c =~ s/[$c]//g;
a86da477 539 is($c, "", "U+FFFF, parsed in class; Bug 38293");
e425a60b
YO
540
541 $c = chr 0xffff;
542 $r = "[\\$c]";
543 $c =~ s/$r//g;
a86da477 544 is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293");
e425a60b
YO
545
546 $s = "A\x{ffff}B";
547 $s =~ s/\x{ffff}//i;
a86da477 548 is($s, "AB", "U+FFFF, EXACTF; Bug 38293");
e425a60b
YO
549
550 $s = "\x{ffff}A";
551 $s =~ s/\bA//;
a86da477 552 is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293");
e425a60b
YO
553
554 $s = "\x{ffff}!";
555 $s =~ s/\B!//;
a86da477 556 is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293");
e425a60b
YO
557 }
558
e425a60b 559 {
e425a60b
YO
560
561 # The printing characters
562 my @chars = ("A" .. "Z");
563 my $delim = ",";
564 my $size = 32771 - 4;
565 my $str = '';
566
567 # Create some random junk. Inefficient, but it works.
568 for (my $i = 0; $i < $size; $ i++) {
569 $str .= $chars [rand @chars];
570 }
571
572 $str .= ($delim x 4);
573 my $res;
574 my $matched;
a86da477
NC
575 ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583");
576 is($str, "", "Empty string; Bug 39583");
577 ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583');
e425a60b
YO
578 }
579
e425a60b 580 {
a86da477
NC
581 like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940');
582 like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
583 like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940');
584 like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
e425a60b 585
a86da477
NC
586 like("X\0A", qr/X\c@?A/, '\c@?; Bug 27940');
587 like("X\0A", qr/X\c@*A/, '\c@*; Bug 27940');
588 like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940');
589 like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940');
590 like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940');
591
592 like("X\@A", qr/X@?A/, '@?; Bug 27940');
593 like("X\@A", qr/X@*A/, '@*; Bug 27940');
594 like("X\@A", qr/X@(A)/, '@(; Bug 27940');
595 like("X\@A", qr/X(@)A/, '@); Bug 27940');
596 like("X\@A", qr/X@|ZA/, '@|; Bug 27940');
e425a60b
YO
597
598 local $" = ','; # non-whitespace and non-RE-specific
a86da477
NC
599 like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940');
600 like("A@+B", qr/A@{+}B/, 'Interpolation of @+ in /@{+}/; Bug 27940');
601 like("A@-B", qr/A@{-}B/, 'Interpolation of @- in /@{-}/; Bug 27940');
602 like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940');
603 like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940');
e425a60b
YO
604 }
605
e425a60b 606 {
e425a60b
YO
607 my $s = 'foo bar baz';
608 my (@k, @v, @fetch, $res);
609 my $count = 0;
610 my @names = qw ($+{A} $+{B} $+{C});
611 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
612 while (my ($k, $v) = each (%+)) {
613 $count++;
614 }
615 @k = sort keys (%+);
616 @v = sort values (%+);
617 $res = 1;
618 push @fetch,
619 ["$+{A}", "$1"],
620 ["$+{B}", "$2"],
621 ["$+{C}", "$3"],
622 ;
623 }
624 foreach (0 .. 2) {
625 if ($fetch [$_]) {
a86da477 626 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
e425a60b
YO
627 } else {
628 ok 0, $names[$_];
629 }
630 }
a86da477
NC
631 is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496");
632 is($count, 3, "Got 3 keys in %+ via each; Bug 50496");
633 is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496");
634 is("@k", "A B C", "Got expected keys; Bug 50496");
635 is("@v", "bar baz foo", "Got expected values; Bug 50496");
e425a60b
YO
636 eval '
637 no warnings "uninitialized";
638 print for $+ {this_key_doesnt_exist};
639 ';
a86da477 640 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
e425a60b
YO
641 }
642
e425a60b
YO
643 {
644 #
645 # Almost the same as the block above, except that the capture is nested.
646 #
a86da477 647
e425a60b
YO
648 my $s = 'foo bar baz';
649 my (@k, @v, @fetch, $res);
650 my $count = 0;
651 my @names = qw ($+{A} $+{B} $+{C} $+{D});
652 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
653 while (my ($k,$v) = each(%+)) {
654 $count++;
655 }
656 @k = sort keys (%+);
657 @v = sort values (%+);
658 $res = 1;
659 push @fetch,
660 ["$+{A}", "$2"],
661 ["$+{B}", "$3"],
662 ["$+{C}", "$4"],
663 ["$+{D}", "$1"],
664 ;
665 }
666 foreach (0 .. 3) {
667 if ($fetch [$_]) {
a86da477 668 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
e425a60b
YO
669 } else {
670 ok 0, $names [$_];
671 }
672 }
a86da477
NC
673 is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496");
674 is($count, 4, "Got 4 keys in %+ via each; Bug 50496");
675 is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496");
676 is("@k", "A B C D", "Got expected keys; Bug 50496");
677 is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496");
e425a60b
YO
678 eval '
679 no warnings "uninitialized";
680 print for $+ {this_key_doesnt_exist};
681 ';
a86da477 682 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
e425a60b
YO
683 }
684
e425a60b 685 {
e425a60b
YO
686 my $str = 'abc';
687 my $count = 0;
688 my $mval = 0;
689 my $pval = 0;
690 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
a86da477
NC
691 is($mval, 0, '@- should be empty; Bug 36046');
692 is($pval, 0, '@+ should be empty; Bug 36046');
693 is($count, 1, 'Should have matched once only; Bug 36046');
e425a60b
YO
694 }
695
e425a60b 696 {
a86da477 697 my $message = '/m in precompiled regexp; Bug 40684';
e425a60b
YO
698 my $s = "abc\ndef";
699 my $rex = qr'^abc$'m;
51b6c6b3
NC
700 ok($s =~ m/$rex/, $message);
701 ok($s =~ m/^abc$/m, $message);
e425a60b
YO
702 }
703
e425a60b 704 {
a86da477 705 my $message = '(?: ... )? should not lose $^R; Bug 36909';
e425a60b
YO
706 $^R = 'Nothing';
707 {
708 local $^R = "Bad";
51b6c6b3 709 ok('x foofoo y' =~ m {
e425a60b
YO
710 (foo) # $^R correctly set
711 (?{ "last regexp code result" })
51b6c6b3
NC
712 }x, $message);
713 is($^R, 'last regexp code result', $message);
e425a60b 714 }
51b6c6b3 715 is($^R, 'Nothing', $message);
e425a60b
YO
716
717 {
718 local $^R = "Bad";
719
51b6c6b3 720 ok('x foofoo y' =~ m {
e425a60b
YO
721 (?:foo|bar)+ # $^R correctly set
722 (?{ "last regexp code result" })
51b6c6b3
NC
723 }x, $message);
724 is($^R, 'last regexp code result', $message);
e425a60b 725 }
51b6c6b3 726 is($^R, 'Nothing', $message);
e425a60b
YO
727
728 {
729 local $^R = "Bad";
51b6c6b3 730 ok('x foofoo y' =~ m {
e425a60b
YO
731 (foo|bar)\1+ # $^R undefined
732 (?{ "last regexp code result" })
51b6c6b3
NC
733 }x, $message);
734 is($^R, 'last regexp code result', $message);
e425a60b 735 }
51b6c6b3 736 is($^R, 'Nothing', $message);
e425a60b
YO
737
738 {
739 local $^R = "Bad";
51b6c6b3 740 ok('x foofoo y' =~ m {
e425a60b
YO
741 (foo|bar)\1 # This time without the +
742 (?{"last regexp code result"})
51b6c6b3
NC
743 }x, $message);
744 is($^R, 'last regexp code result', $message);
e425a60b 745 }
51b6c6b3 746 is($^R, 'Nothing', $message);
e425a60b
YO
747 }
748
e425a60b 749 {
c224bbd5 750 my $message = 'Match is quadratic due to eval; See Bug 22395';
e425a60b
YO
751 our $count;
752 for my $l (10, 100, 1000) {
753 $count = 0;
c224bbd5
YO
754 ('a' x $l) =~ /(.*)(?{ $count++ })[bc]/;
755 is($count, $l*($l+3)/2+1, $message);
e425a60b
YO
756 }
757 }
c224bbd5
YO
758 {
759 my $message = 'Match is linear, not quadratic; Bug 22395.';
760 our $count;
761 my $ok= 0;
762 for my $l (10, 100, 1000) {
763 $count = 0;
764 ('a' x $l) =~ /(.*)(*{ $count++ })[bc]/;
765 $ok += is($count, $l + 1, $message);
766 }
767 is($ok,3, "Optimistic eval does not disable optimisations");
768 }
e425a60b 769
e425a60b 770 {
a86da477 771 my $message = '@-/@+ should not have undefined values; Bug 22614';
e425a60b
YO
772 local $_ = 'ab';
773 our @len = ();
774 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
51b6c6b3 775 is("@len", "2 2 2", $message);
e425a60b
YO
776 }
777
e425a60b 778 {
a86da477 779 my $message = '$& set on s///; Bug 18209';
e425a60b
YO
780 my $text = ' word1 word2 word3 word4 word5 word6 ';
781
782 my @words = ('word1', 'word3', 'word5');
783 my $count;
784 foreach my $word (@words) {
93f09d7b 785 $text =~ s/$word\s//gi; # Leave a space to separate words
e425a60b
YO
786 # in the resultant str.
787 # The following block is not working.
788 if ($&) {
789 $count ++;
790 }
791 # End bad block
792 }
51b6c6b3
NC
793 is($count, 3, $message);
794 is($text, ' word2 word4 word6 ', $message);
e425a60b
YO
795 }
796
e425a60b
YO
797 {
798 # RT#6893
a86da477 799
e425a60b
YO
800 local $_ = qq (A\nB\nC\n);
801 my @res;
802 while (m#(\G|\n)([^\n]*)\n#gsx) {
803 push @res, "$2";
804 last if @res > 3;
805 }
de26e0cc 806 is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893");
e425a60b
YO
807 }
808
e425a60b 809 {
51b6c6b3 810 # No optimizer bug
e425a60b
YO
811 my @tails = ('', '(?(1))', '(|)', '()?');
812 my @quants = ('*','+');
813 my $doit = sub {
814 my $pats = shift;
815 for (@_) {
816 for my $pat (@$pats) {
817 for my $quant (@quants) {
818 for my $tail (@tails) {
819 my $re = "($pat$quant\$)$tail";
a86da477
NC
820 ok(/$re/ && $1 eq $_, "'$_' =~ /$re/; Bug 41010");
821 ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010");
e425a60b
YO
822 }
823 }
824 }
825 }
826 };
827
828 my @dpats = ('\d',
829 '[1234567890]',
830 '(1|[23]|4|[56]|[78]|[90])',
831 '(?:1|[23]|4|[56]|[78]|[90])',
832 '(1|2|3|4|5|6|7|8|9|0)',
833 '(?:1|2|3|4|5|6|7|8|9|0)');
834 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
835 my @sstrs = (' ');
836 my @dstrs = ('12345');
837 $doit -> (\@spats, @sstrs);
838 $doit -> (\@dpats, @dstrs);
839 }
840
e425a60b 841 {
e425a60b
YO
842 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
843
844 my $utf_8 = "\xd6schel";
845 utf8::upgrade ($utf_8);
846 $utf_8 =~ m {(\xd6|&Ouml;)schel};
a86da477 847 is($1, "\xd6", "Upgrade error; Bug 45605");
e425a60b
YO
848 }
849
850 {
e425a60b
YO
851 # Regardless of utf8ness any character matches itself when
852 # doing a case insensitive match. See also [perl #36207]
a86da477 853
e425a60b
YO
854 for my $o (0 .. 255) {
855 my @ch = (chr ($o), chr ($o));
856 utf8::upgrade ($ch [1]);
857 for my $u_str (0, 1) {
858 for my $u_pat (0, 1) {
a86da477
NC
859 like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i,
860 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
861 like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i,
862 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
e425a60b
YO
863 }
864 }
865 }
866 }
867
e425a60b 868 {
a86da477 869 my $message = '$REGMARK in replacement; Bug 49190';
e425a60b 870 our $REGMARK;
7fba2966 871 local $_ = "A";
51b6c6b3
NC
872 ok(s/(*:B)A/$REGMARK/, $message);
873 is($_, "B", $message);
e425a60b 874 $_ = "CCCCBAA";
51b6c6b3
NC
875 ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message);
876 is($_, "ZYX", $message);
1754320d
FC
877 # Use a longer name to force reallocation of $REGMARK.
878 $_ = "CCCCBAA";
879 ok(s/(*:X)A+|(*:YYYYYYYYYYYYYYYY)B+|(*:Z)C+/$REGMARK/g, $message);
880 is($_, "ZYYYYYYYYYYYYYYYYX", $message);
e425a60b
YO
881 }
882
e425a60b 883 {
a86da477 884 my $message = 'Substitution evaluation in list context; Bug 52658';
e425a60b
YO
885 my $reg = '../xxx/';
886 my @te = ($reg =~ m{^(/?(?:\.\./)*)},
887 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
51b6c6b3
NC
888 is($reg, '../bbb/', $message);
889 is($te [0], '../', $message);
e425a60b
YO
890 }
891
e425a60b 892 {
e425a60b 893 my $a = "xyzt" x 8192;
a86da477
NC
894 like($a, qr/\A(?>[a-z])*\z/,
895 '(?>) does not cause wrongness on long string; Bug 60034');
e425a60b
YO
896 my $b = $a . chr 256;
897 chop $b;
4b0d13b9 898 is($a, $b, 'Bug 60034');
a86da477
NC
899 like($b, qr/\A(?>[a-z])*\z/,
900 '(?>) does not cause wrongness on long string with UTF-8; Bug 60034');
e425a60b
YO
901 }
902
e425a60b
YO
903 #
904 # Keep the following tests last -- they may crash perl
905 #
906 print "# Tests that follow may crash perl\n";
907 {
a86da477 908
51b6c6b3 909 my $message = 'Pattern in a loop, failure should not ' .
a86da477 910 'affect previous success; Bug 19049/38869';
e425a60b
YO
911 my @list = (
912 'ab cdef', # Matches regex
913 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
914 );
915 my $y;
916 my $x;
917 foreach (@list) {
918 m/ab(.+)cd/i; # The ignore-case seems to be important
919 $y = $1; # Use $1, which might not be from the last match!
920 $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
921 }
51b6c6b3
NC
922 is($y, ' ', $message);
923 is($x, 'ab cd', $message);
e425a60b
YO
924 }
925
19d55f68
KW
926 SKIP: {
927 skip("Can run out of memory on os390", 1) if $^O eq 'os390';
a86da477 928 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274");
19d55f68
KW
929 }
930 {
e425a60b 931 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/,
a86da477 932 "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274");
e425a60b
YO
933 }
934
e425a60b 935 {
e425a60b 936 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
a86da477 937
e425a60b 938 local ${^UTF8CACHE} = -1;
a86da477 939 my $message = "Shouldn't panic; Bug 45337";
e425a60b
YO
940 my $s = "[a]a{2}";
941 utf8::upgrade $s;
51b6c6b3 942 like("aaa", qr/$s/, $message);
e425a60b
YO
943 }
944 {
a86da477 945 my $message = "Check if tree logic breaks \$^R; Bug 57042";
e425a60b
YO
946 my $cond_re = qr/\s*
947 \s* (?:
948 \( \s* A (?{1})
949 | \( \s* B (?{2})
950 )
951 /x;
952 my @res;
953 for my $line ("(A)","(B)") {
954 if ($line =~ m/$cond_re/) {
955 push @res, $^R ? "#$^R" : "UNDEF";
956 }
957 }
51b6c6b3 958 is("@res","#1 #2", $message);
e425a60b
YO
959 }
960 {
961 no warnings 'closure';
962 my $re = qr/A(??{"1"})/;
963 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
964 ok $1 eq "A1";
965 ok $2 eq "B";
966 }
967
e425a60b
YO
968 # This only works under -DEBUGGING because it relies on an assert().
969 {
51b6c6b3 970 # Check capture offset re-entrancy of utf8 code.
e425a60b
YO
971
972 sub fswash { $_[0] =~ s/([>X])//g; }
973
974 my $k1 = "." x 4 . ">>";
975 fswash($k1);
976
977 my $k2 = "\x{f1}\x{2022}";
978 $k2 =~ s/([\360-\362])/>/g;
979 fswash($k2);
980
a86da477 981 is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508");
e425a60b
YO
982 }
983
e425a60b 984 {
a86da477 985 # minimal CURLYM limited to 32767 matches
e425a60b
YO
986 my @pat = (
987 qr{a(x|y)*b}, # CURLYM
988 qr{a(x|y)*?b}, # .. with minmod
989 qr{a([wx]|[yz])*b}, # .. and without tries
990 qr{a([wx]|[yz])*?b},
991 );
992 my $len = 32768;
993 my $s = join '', 'a', 'x' x $len, 'b';
994 for my $pat (@pat) {
a86da477 995 like($s, $pat, "$pat; Bug 65372");
e425a60b
YO
996 }
997 }
b4da7bae
B
998
999 {
04934b6d 1000 local $::TODO = "[perl #38133]";
b4da7bae
B
1001
1002 "A" =~ /(((?:A))?)+/;
1003 my $first = $2;
1004
1005 "A" =~ /(((A))?)+/;
1006 my $second = $2;
1007
de26e0cc 1008 is($first, $second);
b4da7bae 1009 }
aca53033
FC
1010
1011 {
51b6c6b3 1012 my $message
a86da477 1013 = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998';
aca53033
FC
1014
1015 # Catch warnings:
1016 my $w;
1017 local $SIG{__WARN__} = sub { $w .= shift };
1018
1019 # This bug can be reduced to
1020 qq{\x{30ab}} =~ /\xab|\xa9/;
1021 # but it's nice to have a more 'real-world' test. The original test
1022 # case from the RT ticket follows:
1023
1024 my %conv = (
1025 "\xab" => "&lt;",
1026 "\xa9" => "(c)",
1027 );
1028 my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')';
1029 $conv_rx = qr{$conv_rx};
1030
1031 my $x
1032 = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}}
1033 . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}}
1034 . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}}
1035 . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}}
1036 . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}}
1037 . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}}
1038 . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}}
1039 . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}};
1040
1041 $x =~ s{$conv_rx}{$conv{$1}}eg;
1042
51b6c6b3 1043 is($w, undef, $message);
aca53033
FC
1044 }
1045
e7f38d0f 1046 {
a86da477 1047 # minimal CURLYM limited to 32767 matches
51b6c6b3
NC
1048
1049 is(join("-", " abc def " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f",
a86da477 1050 'stclass optimisation does not break + inside (?=); Bug 68564');
e7f38d0f
YO
1051 }
1052
5c0a530f
NC
1053 {
1054 use charnames ":full";
1055 # Delayed interpolation of \N'
1056 my $r1 = qr/\N{THAI CHARACTER SARA I}/;
945961fd 1057 my $r2 = qr'\N{THAI CHARACTER SARA I}';
5c0a530f
NC
1058 my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}";
1059
1060 # Bug #56444
1061 ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/';
945961fd 1062 ok $s1 =~ /$r2+/, 'my $r2 = qr\'\N{THAI CHARACTER SARA I}\'; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ \'$r2+\'';
5c0a530f
NC
1063
1064 # Bug #62056
1065 ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/';
1066
1067 ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"';
945961fd 1068 ok "abbbbc" =~ m'\N{1}' && $& eq "a", '"abbbbc" =~ m\'\N{1}\' && $& eq "a"';
5c0a530f 1069 ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"';
945961fd 1070 ok "abbbbc" =~ m'\N{3,4}' && $& eq "abbb", '"abbbbc" =~ m\'\N{3,4}\' && $& eq "abbb"';
5c0a530f
NC
1071 }
1072
1073 {
1074 use charnames ":full";
1075 my $message = '[perl #74982] Period coming after \N{}';
1076 ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message);
945961fd 1077 ok("\x{ff08}." =~ m'\N{FULLWIDTH LEFT PARENTHESIS}.' && $& eq "\x{ff08}.", $message);
5c0a530f 1078 ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message);
945961fd 1079 ok("\x{ff08}." =~ m'[\N{FULLWIDTH LEFT PARENTHESIS}].' && $& eq "\x{ff08}.", $message);
5c0a530f
NC
1080 }
1081
453466ce
NC
1082SKIP: {
1083 ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org
1084
1085 skip('Perl configured without Encode module', 1)
1086 unless $Config{extensions} =~ / Encode /;
1087
1088 # Test case cut down by jhi
be76ad45 1089 fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\)!, {}, 'Segfault using HTML::Entities');
453466ce
NC
1090use Encode;
1091my $t = ord('A') == 193 ? "\xEA" : "\xE9";
1092Encode::_utf8_on($t);
be76ad45 1093substr($t,0);
453466ce
NC
1094$t =~ s/([^a])//ge;
1095EOP
1096 }
1097
e6351b37
YO
1098 {
1099 # pattern must be compiled late or we can break the test file
1100 my $message = '[perl #115050] repeated nothings in a trie can cause panic';
1101 my $pattern;
1102 $pattern = '[xyz]|||';
1103 ok("blah blah" =~ /$pattern/, $message);
1104 ok("blah blah" =~ /(?:$pattern)h/, $message);
1105 $pattern = '|||[xyz]';
1106 ok("blah blah" =~ /$pattern/, $message);
1107 ok("blah blah" =~ /(?:$pattern)h/, $message);
1108 }
00f6437b
FC
1109
1110 {
1111 # [perl #4289] First mention $& after a match
4bc5302a
DM
1112 local $::TODO = "these tests fail without Copy-on-Write enabled"
1113 if $Config{ccflags} =~ /PERL_NO_COW/;
00f6437b
FC
1114 fresh_perl_is(
1115 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$&|, "\n"',
1116 "b\n", {}, '$& first mentioned after match');
1117 fresh_perl_is(
1118 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$`|, "\n"',
1119 "a\n", {}, '$` first mentioned after match');
1120 fresh_perl_is(
1121 '$_ = "abc"; /b/g; $_ = "hello"; print eval q|$\'|,"\n"',
1122 "c\n", {}, '$\' first mentioned after match');
1123 }
0430522f
TC
1124
1125 {
1126 # [perl #118175] threaded perl-5.18.0 fails pat_rt_report_thr.t
1127 # this tests some related failures
1128 #
1129 # The tests in the block *only* fail when run on 32-bit systems
1130 # with a malloc that allocates above the 2GB line. On the system
1131 # in the report above that only happened in a thread.
1132 my $s = "\x{1ff}" . "f" x 32;
1133 ok($s =~ /\x{1ff}[[:alpha:]]+/gca, "POSIXA pointer wrap");
1134 }
71a9d105
DM
1135
1136 {
1137 # RT #129012 heap-buffer-overflow Perl_fbm_instr.
1138 # This test is unlikely to not pass, but it used to fail
1139 # ASAN/valgrind
1140
1141 my $s ="\x{100}0000000";
1142 ok($s !~ /00000?\x80\x80\x80/, "RT #129012");
1143 }
1144
26fb2318
TC
1145 {
1146 # RT #129085 heap-buffer-overflow Perl_re_intuit_start
1147 # this did fail under ASAN, but didn't under valgrind
1148 my $s = "\x{f2}\x{140}\x{fe}\x{ff}\x{ff}\x{ff}";
1149 ok($s !~ /^0000.\34500\376\377\377\377/, "RT #129085");
1150 }
31fc9395
YO
1151 {
1152 # rt
1153 fresh_perl_is(
923e23ba 1154 'no warnings "regexp"; "foo"=~/((?1)){8,0}/; print "ok"',
31fc9395
YO
1155 "ok", {}, 'RT #130561 - allowing impossible quantifier should not cause SEGVs');
1156 my $s= "foo";
923e23ba 1157 no warnings 'regexp';
31fc9395
YO
1158 ok($s=~/(foo){1,0}|(?1)/,
1159 "RT #130561 - allowing impossible quantifier should not break recursion");
1160 }
fd8def15
HS
1161 {
1162 # RT #133892 Coredump in Perl_re_intuit_start
1163 # Second match flips to checking floating substring before fixed
1164 # substring, which triggers a pathway that failed to check there
1165 # was a non-utf8 version of the string before trying to use it
1166 # resulting in a SEGV.
1167 my $result = grep /b\x{1c0}ss0/i, qw{ xxxx xxxx0 };
1168 ok($result == 0);
1169 }
26fb2318 1170
e425a60b
YO
1171} # End of sub run_tests
1172
11731;