This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Move the test for 078c425b721ea35d from fresh_perl.t to pat_rt_report.t
[perl5.git] / t / re / pat_rt_report.t
CommitLineData
e425a60b
YO
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t. If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7use strict;
8use warnings;
9use 5.010;
453466ce 10use Config;
e425a60b
YO
11
12sub run_tests;
13
14$| = 1;
15
e425a60b
YO
16
17BEGIN {
18 chdir 't' if -d 't';
9d45b377 19 @INC = ('../lib','.');
6f4e0180 20 require './test.pl';
2fcf74c4 21 skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-");
e425a60b 22}
e425a60b 23
e425a60b 24
453466ce 25plan tests => 2521; # Update this when adding/deleting tests.
e425a60b 26
9d45b377 27run_tests() unless caller;
e425a60b
YO
28
29#
30# Tests start here.
31#
32sub run_tests {
33
a86da477
NC
34 like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/,
35 "Match UTF-8 char in presence of (??{ }); Bug 20000731.001");
36
e425a60b 37 {
e425a60b 38 no warnings 'uninitialized';
a86da477 39 ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005");
e425a60b
YO
40 }
41
e425a60b 42 {
51b6c6b3 43 my $message = 'bug id 20001008.001';
e425a60b
YO
44
45 my @x = ("stra\337e 138", "stra\337e 138");
46 for (@x) {
51b6c6b3
NC
47 ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message);
48 ok(my ($latin) = /^(.+)(?:\s+\d)/, $message);
49 is($latin, "stra\337e", $message);
50 ok($latin =~ s/stra\337e/straße/, $message);
e425a60b
YO
51 #
52 # Previous code follows, but outcommented - there were no tests.
53 #
54 # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
55 # use utf8; # needed for the raw UTF-8
56 # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
57 }
58 }
59
e425a60b 60 {
e425a60b 61 # Fist half of the bug.
a86da477 62 my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003';
e425a60b 63 my $X = chr (1448);
51b6c6b3
NC
64 ok(my ($Y) = $X =~ /(.*)/, $message);
65 is($Y, v1448, $message);
66 is(length $Y, 1, $message);
e425a60b
YO
67
68 # Second half of the bug.
a86da477 69 $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003';
e425a60b
YO
70 $X = '';
71 $X =~ s/^/chr(1488)/e;
51b6c6b3
NC
72 is(length $X, 1, $message);
73 is(ord $X, 1488, $message);
e425a60b
YO
74 }
75
e425a60b 76 {
a86da477 77 my $message = 'Repeated s///; Bug 20001108.001';
e425a60b
YO
78 my $X = "Szab\x{f3},Bal\x{e1}zs";
79 my $Y = $X;
80 $Y =~ s/(B)/$1/ for 0 .. 3;
51b6c6b3
NC
81 is($Y, $X, $message);
82 is($X, "Szab\x{f3},Bal\x{e1}zs", $message);
e425a60b
YO
83 }
84
e425a60b 85 {
a86da477 86 my $message = 's/// on UTF-8 string; Bug 20000517.001';
e425a60b
YO
87 my $x = "\x{100}A";
88 $x =~ s/A/B/;
51b6c6b3
NC
89 is($x, "\x{100}B", $message);
90 is(length $x, 2, $message);
e425a60b
YO
91 }
92
e425a60b 93 {
a86da477 94 my $message = '\C and É; Bug 20001230.002';
51b6c6b3
NC
95 ok("École" =~ /^\C\C(.)/ && $1 eq 'c', $message);
96 like("École", qr/^\C\C(c)/, $message);
e425a60b
YO
97 }
98
e425a60b
YO
99 {
100 # The original bug report had 'no utf8' here but that was irrelevant.
a86da477
NC
101
102 my $message = "Don't dump core; Bug 20010306.008";
e425a60b 103 my $a = "a\x{1234}";
51b6c6b3 104 like($a, qr/\w/, $message); # used to core dump.
e425a60b
YO
105 }
106
e425a60b 107 {
a86da477 108 my $message = '/g in scalar context; Bug 20010410.006';
e425a60b
YO
109 for my $rx ('/(.*?)\{(.*?)\}/csg',
110 '/(.*?)\{(.*?)\}/cg',
111 '/(.*?)\{(.*?)\}/sg',
112 '/(.*?)\{(.*?)\}/g',
113 '/(.+?)\{(.+?)\}/csg',) {
114 my $i = 0;
115 my $input = "a{b}c{d}";
116 eval <<" --";
117 while (eval \$input =~ $rx) {
118 \$i ++;
119 }
120 --
51b6c6b3 121 is($i, 2, $message);
e425a60b
YO
122 }
123 }
124
e425a60b 125 {
e425a60b
YO
126 # Amazingly vertical tabulator is the same in ASCII and EBCDIC.
127 for ("\n", "\t", "\014", "\r") {
5895685f 128 unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003", ord $_);
e425a60b
YO
129 }
130 for (" ") {
a86da477 131 like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003");
e425a60b
YO
132 }
133 }
134
e425a60b
YO
135 {
136 # [ID 20010814.004] pos() doesn't work when using =~m// in list context
a86da477 137
e425a60b
YO
138 $_ = "ababacadaea";
139 my $a = join ":", /b./gc;
140 my $b = join ":", /a./gc;
141 my $c = pos;
a86da477 142 is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004");
e425a60b
YO
143 }
144
e425a60b
YO
145 {
146 # [ID 20010407.006] matching utf8 return values from
147 # functions does not work
a86da477
NC
148
149 my $message = 'UTF-8 return values from functions; Bug 20010407.006';
e425a60b
YO
150 package ID_20010407_006;
151 sub x {"a\x{1234}"}
152 my $x = x;
153 my $y;
51b6c6b3 154 ::ok($x =~ /(..)/, $message);
e425a60b 155 $y = $1;
51b6c6b3
NC
156 ::ok(length ($y) == 2 && $y eq $x, $message);
157 ::ok(x =~ /(..)/, $message);
e425a60b 158 $y = $1;
51b6c6b3 159 ::ok(length ($y) == 2 && $y eq $x, $message);
e425a60b
YO
160 }
161
e425a60b
YO
162 {
163 # High bit bug -- japhy
164 my $x = "ab\200d";
165 ok $x =~ /.*?\200/, "High bit fine";
166 }
167
e425a60b 168 {
51b6c6b3 169 my $message = 'UTF-8 hash keys and /$/';
e425a60b
YO
170 # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters
171 # /2002-01/msg01327.html
172
173 my $u = "a\x{100}";
174 my $v = substr ($u, 0, 1);
175 my $w = substr ($u, 1, 1);
176 my %u = ($u => $u, $v => $v, $w => $w);
177 for (keys %u) {
178 my $m1 = /^\w*$/ ? 1 : 0;
179 my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0;
51b6c6b3 180 is($m1, $m2, $message);
e425a60b
YO
181 }
182 }
183
e425a60b 184 {
a86da477 185 my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005";
e425a60b
YO
186
187 for my $char ("a", "\x{df}", "\x{100}") {
188 my $x = "$char b $char";
189 $x =~ s{($char)}{
190 "c" =~ /c/;
191 "x";
192 }ge;
51b6c6b3 193 is(substr ($x, 0, 1), substr ($x, -1, 1), $message);
e425a60b
YO
194 }
195 }
196
e425a60b 197 {
a86da477 198 my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005";
e425a60b
YO
199
200 # Requires reuse of last successful pattern.
201 my $num = 123;
202 $num =~ /\d/;
203 for (0 .. 1) {
725a61d7 204 my $match = m?? + 0;
5895685f
NC
205 ok($match != $_, $message)
206 or diag(sprintf "'match one' %s on %s iteration" =>
207 $match ? 'succeeded' : 'failed',
208 $_ ? 'second' : 'first');
e425a60b
YO
209 }
210 $num =~ /(\d)/;
211 my $result = join "" => $num =~ //g;
51b6c6b3 212 is($result, $num, $message);
e425a60b
YO
213 }
214
e425a60b 215 {
a86da477 216 my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002';
e425a60b
YO
217 for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) {
218 my ($type, $char) = @$_;
219 for my $len (32000, 32768, 33000) {
220 my $s = $char . "f" x $len;
221 my $r = $s =~ /$char([f]*)/gc;
5895685f
NC
222 ok($r, $message) or diag("<$type x $len>");
223 ok(!$r || pos ($s) == $len + 1, $message)
224 or diag("<$type x $len>; pos = @{[pos $s]}");
e425a60b
YO
225 }
226 }
227 }
228
e425a60b 229 {
e425a60b
YO
230 my $s = "\x{100}" x 5;
231 my $ok = $s =~ /(\x{100}{4})/;
232 my ($ord, $len) = (ord $1, length $1);
1c6f1211 233 ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]";
e425a60b
YO
234 }
235
e425a60b 236 {
e425a60b
YO
237 our $a = "x\x{100}";
238 chop $a; # Leaves the UTF-8 flag
239 $a .= "y"; # 1 byte before 'y'.
240
a86da477
NC
241 like($a, qr/^\C/, 'match one \C on 1-byte UTF-8; Bug 15763');
242 like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763');
e425a60b 243
a86da477
NC
244 like($a, qr/^\Cy/, 'match \Cy; Bug 15763');
245 like($a, qr/^\C{1}y/, 'match \C{1}y; Bug 15763');
e425a60b 246
a86da477
NC
247 unlike($a, qr/^\C\Cy/, q {don't match two \Cy; Bug 15763});
248 unlike($a, qr/^\C{2}y/, q {don't match \C{2}y; Bug 15763});
e425a60b
YO
249
250 $a = "\x{100}y"; # 2 bytes before "y"
251
a86da477
NC
252 like($a, qr/^\C/, 'match one \C on 2-byte UTF-8; Bug 15763');
253 like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763');
254 like($a, qr/^\C\C/, 'match two \C; Bug 15763');
255 like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763');
e425a60b 256
a86da477
NC
257 like($a, qr/^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte; Bug 15763');
258 like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763');
e425a60b 259
a86da477
NC
260 like($a, qr/^\C\Cy/, 'match two \C; Bug 15763');
261 like($a, qr/^\C{2}y/, 'match \C{2}; Bug 15763');
e425a60b 262
a86da477
NC
263 unlike($a, qr/^\C\C\Cy/, q {don't match three \Cy; Bug 15763});
264 unlike($a, qr/^\C{2}\Cy/, q {don't match \C{2}\Cy; Bug 15763});
265 unlike($a, qr/^\C{3}y/, q {don't match \C{3}y; Bug 15763});
e425a60b
YO
266
267 $a = "\x{1000}y"; # 3 bytes before "y"
268
a86da477
NC
269 like($a, qr/^\C/, 'match one \C on three-byte UTF-8; Bug 15763');
270 like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763');
271 like($a, qr/^\C\C/, 'match two \C; Bug 15763');
272 like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763');
273 like($a, qr/^\C\C\C/, 'match three \C; Bug 15763');
274 like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763');
e425a60b 275
a86da477
NC
276 like($a, qr/^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte; Bug 15763');
277 like($a, qr/^\C{4}/, 'match \C{4}; Bug 15763');
e425a60b 278
a86da477
NC
279 like($a, qr/^\C\C\Cy/, 'match three \Cy; Bug 15763');
280 like($a, qr/^\C{3}y/, 'match \C{3}y; Bug 15763');
e425a60b 281
a86da477
NC
282 unlike($a, qr/^\C\C\C\Cy/, q {don't match four \Cy; Bug 15763});
283 unlike($a, qr/^\C{4}y/, q {don't match \C{4}y; Bug 15763});
e425a60b
YO
284 }
285
286
287 {
a86da477 288 my $message = 'UTF-8 matching; Bug 15397';
51b6c6b3
NC
289 like("\x{100}", qr/\x{100}/, $message);
290 like("\x{100}", qr/(\x{100})/, $message);
291 like("\x{100}", qr/(\x{100}){1}/, $message);
292 like("\x{100}\x{100}", qr/(\x{100}){2}/, $message);
293 like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message);
e425a60b
YO
294 }
295
e425a60b 296 {
a86da477 297 my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471';
e425a60b 298 local $_ = 'CD';
51b6c6b3
NC
299 ok(/(AB)*?CD/ && !defined $1, $message);
300 ok(/(AB)*CD/ && !defined $1, $message);
e425a60b
YO
301 }
302
e425a60b 303 {
a86da477 304 my $message = "Caching shouldn't prevent match; Bug 3547";
e425a60b 305 my $pattern = "^(b+?|a){1,2}c";
51b6c6b3
NC
306 ok("bac" =~ /$pattern/ && $1 eq 'a', $message);
307 ok("bbac" =~ /$pattern/ && $1 eq 'a', $message);
308 ok("bbbac" =~ /$pattern/ && $1 eq 'a', $message);
309 ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message);
e425a60b
YO
310 }
311
e425a60b 312 {
a86da477
NC
313 ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232');
314 is($1, "\x{100}", '$1 is UTF-8; Bug 18232');
e425a60b 315 { 'a' =~ /./; }
a86da477
NC
316 is($1, "\x{100}", '$1 is still UTF-8; Bug 18232');
317 isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232');
e425a60b
YO
318 }
319
e425a60b 320 {
a86da477 321 my $message = "Optimizer doesn't prematurely reject match; Bug 19767";
e425a60b
YO
322 use utf8;
323
324 my $attr = 'Name-1';
325 my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/;
326 my $NormalWord = qr /${NormalChar}+?/;
327 my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/;
328
329 $attr =~ /^$/;
51b6c6b3 330 like($attr, $PredNameHyphen, $message); # Original test.
e425a60b
YO
331
332 "a" =~ m/[b]/;
51b6c6b3 333 like("0", qr/\p{N}+\z/, $message); # Variant.
e425a60b
YO
334 }
335
e425a60b 336 {
a86da477 337 my $message = "(??{ }) doesn't return stale values; Bug 20683";
e425a60b
YO
338 our $p = 1;
339 foreach (1, 2, 3, 4) {
340 $p ++ if /(??{ $p })/
341 }
51b6c6b3 342 is($p, 5, $message);
e425a60b
YO
343
344 {
345 package P;
346 $a = 1;
347 sub TIESCALAR {bless []}
348 sub FETCH {$a ++}
349 }
350 tie $p, "P";
351 foreach (1, 2, 3, 4) {
352 /(??{ $p })/
353 }
51b6c6b3 354 is($p, 5, $message);
e425a60b
YO
355 }
356
e425a60b
YO
357 {
358 # Subject: Odd regexp behavior
359 # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk>
360 # Date: Wed, 26 Feb 2003 16:53:12 +0000
361 # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk>
362 # To: perl-unicode@perl.org
363
51b6c6b3 364 my $message = 'Markus Kuhn 2003-02-26';
e425a60b
YO
365
366 my $x = "\x{2019}\nk";
51b6c6b3
NC
367 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
368 is($x, "\x{2019} k", $message);
e425a60b
YO
369
370 $x = "b\nk";
51b6c6b3
NC
371 ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message);
372 is($x, "b k", $message);
e425a60b 373
51b6c6b3 374 like("\x{2019}", qr/\S/, $message);
e425a60b
YO
375 }
376
e425a60b 377 {
a86da477 378 my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411";
e425a60b 379 our $i;
51b6c6b3 380 is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message);
e425a60b
YO
381 no warnings 'syntax';
382 @_ = split /(?{'WOW'})/, 'abc';
383 local $" = "|";
51b6c6b3 384 is("@_", "a|b|c", $message);
e425a60b
YO
385 }
386
e425a60b
YO
387 {
388 # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it
389 # hasn't been crashing. Disable this test until it is fixed properly.
390 # XXX also check what it returns rather than just doing ok(1,...)
391 # split /(?{ split "" })/, "abc";
04934b6d 392 local $::TODO = "Recursive split is still broken";
e425a60b
YO
393 ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0';
394 }
395
e425a60b 396 {
e425a60b
YO
397 $_ = "code: 'x' { '...' }\n"; study;
398 my @x; push @x, $& while m/'[^\']*'/gx;
399 local $" = ":";
a86da477 400 is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757");
e425a60b
YO
401 }
402
e425a60b 403 {
e425a60b 404 sub func ($) {
a86da477
NC
405 ok("a\nb" !~ /^b/, "Propagated modifier; $_[0]; Bug 22354");
406 ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354");
e425a60b
YO
407 }
408 func "standalone";
409 $_ = "x"; s/x/func "in subst"/e;
410 $_ = "x"; s/x/func "in multiline subst"/em;
1d0dc949
FC
411 $_ = "x"; /x(?{func "in regexp"})/;
412 $_ = "x"; /x(?{func "in multiline regexp"})/m;
e425a60b
YO
413 }
414
e425a60b 415 {
e425a60b
YO
416 $_ = "abcdef\n";
417 my @x = m/./g;
a86da477 418 is("abcde", $`, 'Global match sets $`; Bug 19049');
e425a60b
YO
419 }
420
e425a60b 421 {
e425a60b
YO
422 # [perl #23769] Unicode regex broken on simple example
423 # regrepeat() didn't handle UTF-8 EXACT case right.
a86da477 424
e425a60b 425 my $Mess = 'regrepeat() handles UTF-8 EXACT case right';
a86da477 426 my $message = "$Mess; Bug 23769";
e425a60b
YO
427
428 my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s;
429
51b6c6b3
NC
430 like($s, qr/\x{a0}/, $message);
431 like($s, qr/\x{a0}+/, $message);
432 like($s, qr/\x{a0}\x{a0}/, $message);
e425a60b 433
a86da477 434 $message = "$Mess (easy variant); Bug 23769";
51b6c6b3
NC
435 ok("aaa\x{100}" =~ /(a+)/, $message);
436 is($1, "aaa", $message);
e425a60b 437
a86da477 438 $message = "$Mess (easy invariant); Bug 23769";
51b6c6b3
NC
439 ok("aaa\x{100} " =~ /(a+?)/, $message);
440 is($1, "a", $message);
e425a60b 441
a86da477 442 $message = "$Mess (regrepeat variant); Bug 23769";
51b6c6b3
NC
443 ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, $message);
444 is($1, "\xa0", $message);
e425a60b 445
a86da477 446 $message = "$Mess (regrepeat invariant); Bug 23769";
51b6c6b3
NC
447 ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message);
448 is($1, "\xa0\xa0\xa0", $message);
e425a60b 449
a86da477 450 $message = "$Mess (hard variant); Bug 23769";
51b6c6b3
NC
451 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message);
452 is($1, "\xa0\xa1", $message);
e425a60b 453
a86da477 454 $message = "$Mess (hard invariant); Bug 23769";
51b6c6b3
NC
455 ok("ababab\x{100} " =~ /((?:ab)+)/, $message);
456 is($1, 'ababab', $message);
e425a60b 457
51b6c6b3
NC
458 ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message);
459 is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message);
e425a60b 460
51b6c6b3
NC
461 ok("ababab\x{100} " =~ /((?:ab)+?)/, $message);
462 is($1, "ab", $message);
e425a60b 463
a86da477 464 $message = "Don't match first byte of UTF-8 representation; Bug 23769";
51b6c6b3
NC
465 unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message);
466 unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message);
467 unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message);
e425a60b
YO
468 }
469
e425a60b 470 {
e425a60b 471 # perl panic: pp_match start/end pointers
a86da477
NC
472
473 is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc",
474 'Captures can move backwards in string; Bug 25269');
e425a60b
YO
475 }
476
e425a60b 477 {
a86da477
NC
478 # \cA not recognized in character classes
479 like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940');
480 like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940');
481 like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940');
482 like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940');
483 like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940');
484 like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940');
485 like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940');
486 unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940');
e425a60b
YO
487 }
488
e425a60b 489 {
9d45b377 490 # perl #28532: optional zero-width match at end of string is ignored
a86da477
NC
491
492 ok("abc" =~ /^abc(\z)?/ && defined($1),
493 'Optional zero-width match at end of string; Bug 28532');
494 ok("abc" =~ /^abc(\z)??/ && !defined($1),
495 'Optional zero-width match at end of string; Bug 28532');
e425a60b
YO
496 }
497
e425a60b 498 {
e425a60b
YO
499 my $utf8 = "\xe9\x{100}"; chop $utf8;
500 my $latin1 = "\xe9";
501
a86da477
NC
502 like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207");
503 like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207");
504 like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207");
505 like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207");
e425a60b 506
a86da477
NC
507 like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207");
508 like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207");
509 like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207");
510 like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207");
e425a60b
YO
511 }
512
e425a60b 513 {
e425a60b
YO
514 my $s = "abcd";
515 $s =~ /(..)(..)/g;
516 $s = $1;
517 $s = $2;
a86da477
NC
518 is($2, 'cd',
519 "Assigning to original string does not corrupt match vars; Bug 37038");
e425a60b
YO
520 }
521
e425a60b
YO
522 {
523 {
524 package wooosh;
525 sub gloople {"!"}
526 }
527 my $aeek = bless {} => 'wooosh';
2e321296
NC
528 is(do {$aeek -> gloople () =~ /(.)/g}, 1,
529 "//g match against return value of sub [change e26a497577f3ce7b]");
e425a60b
YO
530
531 sub gloople {"!"}
2e321296
NC
532 is(do{gloople () =~ /(.)/g}, 1,
533 "change e26a497577f3ce7b didn't affect sub calls for some reason");
e425a60b
YO
534 }
535
e425a60b 536 {
04934b6d 537 local $::TODO = "See changes 26925-26928, which reverted change 26410";
e425a60b
YO
538 {
539 package lv;
540 our $var = "abc";
541 sub variable : lvalue {$var}
542 }
543 my $o = bless [] => 'lv';
544 my $f = "";
545 my $r = eval {
546 for (1 .. 2) {
547 $f .= $1 if $o -> variable =~ /(.)/g;
548 }
549 1;
550 };
551 if ($r) {
de26e0cc 552 is($f, "ab", "pos() retained between calls");
e425a60b
YO
553 }
554 else {
04934b6d 555 local $::TODO;
e425a60b
YO
556 ok 0, "Code failed: $@";
557 }
558
559 our $var = "abc";
560 sub variable : lvalue {$var}
561 my $g = "";
562 my $s = eval {
563 for (1 .. 2) {
564 $g .= $1 if variable =~ /(.)/g;
565 }
566 1;
567 };
568 if ($s) {
de26e0cc 569 is($g, "ab", "pos() retained between calls");
e425a60b
YO
570 }
571 else {
04934b6d 572 local $::TODO;
e425a60b
YO
573 ok 0, "Code failed: $@";
574 }
575 }
576
e425a60b
YO
577 SKIP:
578 {
ef237063 579 skip "In EBCDIC" if $::IS_EBCDIC;
e425a60b
YO
580 no warnings 'utf8';
581 $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8
582 my $ret = 0;
2e321296
NC
583 is(do {!($ret = s/[\0]+//g)}, 1,
584 "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836");
e425a60b
YO
585 }
586
e425a60b
YO
587 {
588 # chr(65535) should be allowed in regexes
a86da477 589
e425a60b
YO
590 no warnings 'utf8'; # To allow non-characters
591 my ($c, $r, $s);
592
593 $c = chr 0xffff;
594 $c =~ s/$c//g;
a86da477 595 is($c, "", "U+FFFF, parsed as atom; Bug 38293");
e425a60b
YO
596
597 $c = chr 0xffff;
598 $r = "\\$c";
599 $c =~ s/$r//g;
a86da477 600 is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293");
e425a60b
YO
601
602 $c = chr 0xffff;
603 $c =~ s/[$c]//g;
a86da477 604 is($c, "", "U+FFFF, parsed in class; Bug 38293");
e425a60b
YO
605
606 $c = chr 0xffff;
607 $r = "[\\$c]";
608 $c =~ s/$r//g;
a86da477 609 is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293");
e425a60b
YO
610
611 $s = "A\x{ffff}B";
612 $s =~ s/\x{ffff}//i;
a86da477 613 is($s, "AB", "U+FFFF, EXACTF; Bug 38293");
e425a60b
YO
614
615 $s = "\x{ffff}A";
616 $s =~ s/\bA//;
a86da477 617 is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293");
e425a60b
YO
618
619 $s = "\x{ffff}!";
620 $s =~ s/\B!//;
a86da477 621 is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293");
e425a60b
YO
622 }
623
e425a60b 624 {
e425a60b
YO
625
626 # The printing characters
627 my @chars = ("A" .. "Z");
628 my $delim = ",";
629 my $size = 32771 - 4;
630 my $str = '';
631
632 # Create some random junk. Inefficient, but it works.
633 for (my $i = 0; $i < $size; $ i++) {
634 $str .= $chars [rand @chars];
635 }
636
637 $str .= ($delim x 4);
638 my $res;
639 my $matched;
a86da477
NC
640 ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583");
641 is($str, "", "Empty string; Bug 39583");
642 ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583');
e425a60b
YO
643 }
644
e425a60b 645 {
a86da477
NC
646 like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940');
647 like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
648 like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940');
649 like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940');
e425a60b 650
a86da477
NC
651 like("X\0A", qr/X\c@?A/, '\c@?; Bug 27940');
652 like("X\0A", qr/X\c@*A/, '\c@*; Bug 27940');
653 like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940');
654 like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940');
655 like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940');
656
657 like("X\@A", qr/X@?A/, '@?; Bug 27940');
658 like("X\@A", qr/X@*A/, '@*; Bug 27940');
659 like("X\@A", qr/X@(A)/, '@(; Bug 27940');
660 like("X\@A", qr/X(@)A/, '@); Bug 27940');
661 like("X\@A", qr/X@|ZA/, '@|; Bug 27940');
e425a60b
YO
662
663 local $" = ','; # non-whitespace and non-RE-specific
a86da477
NC
664 like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940');
665 like("A@+B", qr/A@{+}B/, 'Interpolation of @+ in /@{+}/; Bug 27940');
666 like("A@-B", qr/A@{-}B/, 'Interpolation of @- in /@{-}/; Bug 27940');
667 like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940');
668 like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940');
e425a60b
YO
669 }
670
e425a60b 671 {
e425a60b
YO
672 my $s = 'foo bar baz';
673 my (@k, @v, @fetch, $res);
674 my $count = 0;
675 my @names = qw ($+{A} $+{B} $+{C});
676 if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) {
677 while (my ($k, $v) = each (%+)) {
678 $count++;
679 }
680 @k = sort keys (%+);
681 @v = sort values (%+);
682 $res = 1;
683 push @fetch,
684 ["$+{A}", "$1"],
685 ["$+{B}", "$2"],
686 ["$+{C}", "$3"],
687 ;
688 }
689 foreach (0 .. 2) {
690 if ($fetch [$_]) {
a86da477 691 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
e425a60b
YO
692 } else {
693 ok 0, $names[$_];
694 }
695 }
a86da477
NC
696 is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496");
697 is($count, 3, "Got 3 keys in %+ via each; Bug 50496");
698 is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496");
699 is("@k", "A B C", "Got expected keys; Bug 50496");
700 is("@v", "bar baz foo", "Got expected values; Bug 50496");
e425a60b
YO
701 eval '
702 no warnings "uninitialized";
703 print for $+ {this_key_doesnt_exist};
704 ';
a86da477 705 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
e425a60b
YO
706 }
707
e425a60b
YO
708 {
709 #
710 # Almost the same as the block above, except that the capture is nested.
711 #
a86da477 712
e425a60b
YO
713 my $s = 'foo bar baz';
714 my (@k, @v, @fetch, $res);
715 my $count = 0;
716 my @names = qw ($+{A} $+{B} $+{C} $+{D});
717 if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) {
718 while (my ($k,$v) = each(%+)) {
719 $count++;
720 }
721 @k = sort keys (%+);
722 @v = sort values (%+);
723 $res = 1;
724 push @fetch,
725 ["$+{A}", "$2"],
726 ["$+{B}", "$3"],
727 ["$+{C}", "$4"],
728 ["$+{D}", "$1"],
729 ;
730 }
731 foreach (0 .. 3) {
732 if ($fetch [$_]) {
a86da477 733 is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496");
e425a60b
YO
734 } else {
735 ok 0, $names [$_];
736 }
737 }
a86da477
NC
738 is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496");
739 is($count, 4, "Got 4 keys in %+ via each; Bug 50496");
740 is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496");
741 is("@k", "A B C D", "Got expected keys; Bug 50496");
742 is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496");
e425a60b
YO
743 eval '
744 no warnings "uninitialized";
745 print for $+ {this_key_doesnt_exist};
746 ';
a86da477 747 is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496');
e425a60b
YO
748 }
749
e425a60b 750 {
e425a60b
YO
751 my $str = 'abc';
752 my $count = 0;
753 my $mval = 0;
754 my $pval = 0;
755 while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++}
a86da477
NC
756 is($mval, 0, '@- should be empty; Bug 36046');
757 is($pval, 0, '@+ should be empty; Bug 36046');
758 is($count, 1, 'Should have matched once only; Bug 36046');
e425a60b
YO
759 }
760
e425a60b 761 {
a86da477 762 my $message = '/m in precompiled regexp; Bug 40684';
e425a60b
YO
763 my $s = "abc\ndef";
764 my $rex = qr'^abc$'m;
51b6c6b3
NC
765 ok($s =~ m/$rex/, $message);
766 ok($s =~ m/^abc$/m, $message);
e425a60b
YO
767 }
768
e425a60b 769 {
a86da477 770 my $message = '(?: ... )? should not lose $^R; Bug 36909';
e425a60b
YO
771 $^R = 'Nothing';
772 {
773 local $^R = "Bad";
51b6c6b3 774 ok('x foofoo y' =~ m {
e425a60b
YO
775 (foo) # $^R correctly set
776 (?{ "last regexp code result" })
51b6c6b3
NC
777 }x, $message);
778 is($^R, 'last regexp code result', $message);
e425a60b 779 }
51b6c6b3 780 is($^R, 'Nothing', $message);
e425a60b
YO
781
782 {
783 local $^R = "Bad";
784
51b6c6b3 785 ok('x foofoo y' =~ m {
e425a60b
YO
786 (?:foo|bar)+ # $^R correctly set
787 (?{ "last regexp code result" })
51b6c6b3
NC
788 }x, $message);
789 is($^R, 'last regexp code result', $message);
e425a60b 790 }
51b6c6b3 791 is($^R, 'Nothing', $message);
e425a60b
YO
792
793 {
794 local $^R = "Bad";
51b6c6b3 795 ok('x foofoo y' =~ m {
e425a60b
YO
796 (foo|bar)\1+ # $^R undefined
797 (?{ "last regexp code result" })
51b6c6b3
NC
798 }x, $message);
799 is($^R, 'last regexp code result', $message);
e425a60b 800 }
51b6c6b3 801 is($^R, 'Nothing', $message);
e425a60b
YO
802
803 {
804 local $^R = "Bad";
51b6c6b3 805 ok('x foofoo y' =~ m {
e425a60b
YO
806 (foo|bar)\1 # This time without the +
807 (?{"last regexp code result"})
51b6c6b3
NC
808 }x, $message);
809 is($^R, 'last regexp code result', $message);
e425a60b 810 }
51b6c6b3 811 is($^R, 'Nothing', $message);
e425a60b
YO
812 }
813
e425a60b 814 {
a86da477 815 my $message = 'Match is linear, not quadratic; Bug 22395';
e425a60b
YO
816 our $count;
817 for my $l (10, 100, 1000) {
818 $count = 0;
819 ('a' x $l) =~ /(.*)(?{$count++})[bc]/;
04934b6d 820 local $::TODO = "Should be L+1 not L*(L+3)/2 (L=$l)";
51b6c6b3 821 is($count, $l + 1, $message);
e425a60b
YO
822 }
823 }
824
e425a60b 825 {
a86da477 826 my $message = '@-/@+ should not have undefined values; Bug 22614';
e425a60b
YO
827 local $_ = 'ab';
828 our @len = ();
829 /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/;
51b6c6b3 830 is("@len", "2 2 2", $message);
e425a60b
YO
831 }
832
e425a60b 833 {
a86da477 834 my $message = '$& set on s///; Bug 18209';
e425a60b
YO
835 my $text = ' word1 word2 word3 word4 word5 word6 ';
836
837 my @words = ('word1', 'word3', 'word5');
838 my $count;
839 foreach my $word (@words) {
93f09d7b 840 $text =~ s/$word\s//gi; # Leave a space to separate words
e425a60b
YO
841 # in the resultant str.
842 # The following block is not working.
843 if ($&) {
844 $count ++;
845 }
846 # End bad block
847 }
51b6c6b3
NC
848 is($count, 3, $message);
849 is($text, ' word2 word4 word6 ', $message);
e425a60b
YO
850 }
851
e425a60b
YO
852 {
853 # RT#6893
a86da477 854
e425a60b
YO
855 local $_ = qq (A\nB\nC\n);
856 my @res;
857 while (m#(\G|\n)([^\n]*)\n#gsx) {
858 push @res, "$2";
859 last if @res > 3;
860 }
de26e0cc 861 is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893");
e425a60b
YO
862 }
863
e425a60b 864 {
51b6c6b3 865 # No optimizer bug
e425a60b
YO
866 my @tails = ('', '(?(1))', '(|)', '()?');
867 my @quants = ('*','+');
868 my $doit = sub {
869 my $pats = shift;
870 for (@_) {
871 for my $pat (@$pats) {
872 for my $quant (@quants) {
873 for my $tail (@tails) {
874 my $re = "($pat$quant\$)$tail";
a86da477
NC
875 ok(/$re/ && $1 eq $_, "'$_' =~ /$re/; Bug 41010");
876 ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010");
e425a60b
YO
877 }
878 }
879 }
880 }
881 };
882
883 my @dpats = ('\d',
884 '[1234567890]',
885 '(1|[23]|4|[56]|[78]|[90])',
886 '(?:1|[23]|4|[56]|[78]|[90])',
887 '(1|2|3|4|5|6|7|8|9|0)',
888 '(?:1|2|3|4|5|6|7|8|9|0)');
889 my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s');
890 my @sstrs = (' ');
891 my @dstrs = ('12345');
892 $doit -> (\@spats, @sstrs);
893 $doit -> (\@dpats, @dstrs);
894 }
895
e425a60b 896 {
e425a60b
YO
897 # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string
898
899 my $utf_8 = "\xd6schel";
900 utf8::upgrade ($utf_8);
901 $utf_8 =~ m {(\xd6|&Ouml;)schel};
a86da477 902 is($1, "\xd6", "Upgrade error; Bug 45605");
e425a60b
YO
903 }
904
905 {
e425a60b
YO
906 # Regardless of utf8ness any character matches itself when
907 # doing a case insensitive match. See also [perl #36207]
a86da477 908
e425a60b
YO
909 for my $o (0 .. 255) {
910 my @ch = (chr ($o), chr ($o));
911 utf8::upgrade ($ch [1]);
912 for my $u_str (0, 1) {
913 for my $u_pat (0, 1) {
a86da477
NC
914 like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i,
915 "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
916 like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i,
917 "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207");
e425a60b
YO
918 }
919 }
920 }
921 }
922
e425a60b 923 {
a86da477 924 my $message = '$REGMARK in replacement; Bug 49190';
e425a60b
YO
925 our $REGMARK;
926 my $_ = "A";
51b6c6b3
NC
927 ok(s/(*:B)A/$REGMARK/, $message);
928 is($_, "B", $message);
e425a60b 929 $_ = "CCCCBAA";
51b6c6b3
NC
930 ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message);
931 is($_, "ZYX", $message);
e425a60b
YO
932 }
933
e425a60b 934 {
a86da477 935 my $message = 'Substitution evaluation in list context; Bug 52658';
e425a60b
YO
936 my $reg = '../xxx/';
937 my @te = ($reg =~ m{^(/?(?:\.\./)*)},
938 $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++');
51b6c6b3
NC
939 is($reg, '../bbb/', $message);
940 is($te [0], '../', $message);
e425a60b
YO
941 }
942
e425a60b 943 {
e425a60b 944 my $a = "xyzt" x 8192;
a86da477
NC
945 like($a, qr/\A(?>[a-z])*\z/,
946 '(?>) does not cause wrongness on long string; Bug 60034');
e425a60b
YO
947 my $b = $a . chr 256;
948 chop $b;
4b0d13b9 949 is($a, $b, 'Bug 60034');
a86da477
NC
950 like($b, qr/\A(?>[a-z])*\z/,
951 '(?>) does not cause wrongness on long string with UTF-8; Bug 60034');
e425a60b
YO
952 }
953
e425a60b
YO
954 #
955 # Keep the following tests last -- they may crash perl
956 #
957 print "# Tests that follow may crash perl\n";
958 {
a86da477 959
51b6c6b3 960 my $message = 'Pattern in a loop, failure should not ' .
a86da477 961 'affect previous success; Bug 19049/38869';
e425a60b
YO
962 my @list = (
963 'ab cdef', # Matches regex
964 ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it
965 );
966 my $y;
967 my $x;
968 foreach (@list) {
969 m/ab(.+)cd/i; # The ignore-case seems to be important
970 $y = $1; # Use $1, which might not be from the last match!
971 $x = substr ($list [0], $- [0], $+ [0] - $- [0]);
972 }
51b6c6b3
NC
973 is($y, ' ', $message);
974 is($x, 'ab cd', $message);
e425a60b
YO
975 }
976
e425a60b 977 {
a86da477 978 ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274");
e425a60b 979 ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/,
a86da477 980 "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274");
e425a60b
YO
981 }
982
e425a60b 983 {
e425a60b 984 # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache
a86da477 985
e425a60b 986 local ${^UTF8CACHE} = -1;
a86da477 987 my $message = "Shouldn't panic; Bug 45337";
e425a60b
YO
988 my $s = "[a]a{2}";
989 utf8::upgrade $s;
51b6c6b3 990 like("aaa", qr/$s/, $message);
e425a60b
YO
991 }
992 {
a86da477 993 my $message = "Check if tree logic breaks \$^R; Bug 57042";
e425a60b
YO
994 my $cond_re = qr/\s*
995 \s* (?:
996 \( \s* A (?{1})
997 | \( \s* B (?{2})
998 )
999 /x;
1000 my @res;
1001 for my $line ("(A)","(B)") {
1002 if ($line =~ m/$cond_re/) {
1003 push @res, $^R ? "#$^R" : "UNDEF";
1004 }
1005 }
51b6c6b3 1006 is("@res","#1 #2", $message);
e425a60b
YO
1007 }
1008 {
1009 no warnings 'closure';
1010 my $re = qr/A(??{"1"})/;
1011 ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/;
1012 ok $1 eq "A1";
1013 ok $2 eq "B";
1014 }
1015
e425a60b
YO
1016 # This only works under -DEBUGGING because it relies on an assert().
1017 {
51b6c6b3 1018 # Check capture offset re-entrancy of utf8 code.
e425a60b
YO
1019
1020 sub fswash { $_[0] =~ s/([>X])//g; }
1021
1022 my $k1 = "." x 4 . ">>";
1023 fswash($k1);
1024
1025 my $k2 = "\x{f1}\x{2022}";
1026 $k2 =~ s/([\360-\362])/>/g;
1027 fswash($k2);
1028
a86da477 1029 is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508");
e425a60b
YO
1030 }
1031
e425a60b 1032 {
a86da477 1033 # minimal CURLYM limited to 32767 matches
e425a60b
YO
1034 my @pat = (
1035 qr{a(x|y)*b}, # CURLYM
1036 qr{a(x|y)*?b}, # .. with minmod
1037 qr{a([wx]|[yz])*b}, # .. and without tries
1038 qr{a([wx]|[yz])*?b},
1039 );
1040 my $len = 32768;
1041 my $s = join '', 'a', 'x' x $len, 'b';
1042 for my $pat (@pat) {
a86da477 1043 like($s, $pat, "$pat; Bug 65372");
e425a60b
YO
1044 }
1045 }
b4da7bae
B
1046
1047 {
04934b6d 1048 local $::TODO = "[perl #38133]";
b4da7bae
B
1049
1050 "A" =~ /(((?:A))?)+/;
1051 my $first = $2;
1052
1053 "A" =~ /(((A))?)+/;
1054 my $second = $2;
1055
de26e0cc 1056 is($first, $second);
b4da7bae 1057 }
aca53033
FC
1058
1059 {
51b6c6b3 1060 my $message
a86da477 1061 = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998';
aca53033
FC
1062
1063 # Catch warnings:
1064 my $w;
1065 local $SIG{__WARN__} = sub { $w .= shift };
1066
1067 # This bug can be reduced to
1068 qq{\x{30ab}} =~ /\xab|\xa9/;
1069 # but it's nice to have a more 'real-world' test. The original test
1070 # case from the RT ticket follows:
1071
1072 my %conv = (
1073 "\xab" => "&lt;",
1074 "\xa9" => "(c)",
1075 );
1076 my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')';
1077 $conv_rx = qr{$conv_rx};
1078
1079 my $x
1080 = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}}
1081 . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}}
1082 . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}}
1083 . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}}
1084 . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}}
1085 . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}}
1086 . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}}
1087 . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}};
1088
1089 $x =~ s{$conv_rx}{$conv{$1}}eg;
1090
51b6c6b3 1091 is($w, undef, $message);
aca53033
FC
1092 }
1093
e7f38d0f 1094 {
a86da477 1095 # minimal CURLYM limited to 32767 matches
51b6c6b3
NC
1096
1097 is(join("-", " abc def " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f",
a86da477 1098 'stclass optimisation does not break + inside (?=); Bug 68564');
e7f38d0f
YO
1099 }
1100
5c0a530f
NC
1101 {
1102 use charnames ":full";
1103 # Delayed interpolation of \N'
1104 my $r1 = qr/\N{THAI CHARACTER SARA I}/;
1105 my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}";
1106
1107 # Bug #56444
1108 ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/';
1109
1110 # Bug #62056
1111 ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/';
1112
1113 ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"';
1114 ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"';
1115 }
1116
1117 {
1118 use charnames ":full";
1119 my $message = '[perl #74982] Period coming after \N{}';
1120 ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message);
1121 ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message);
1122 }
1123
453466ce
NC
1124SKIP: {
1125 ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org
1126
1127 skip('Perl configured without Encode module', 1)
1128 unless $Config{extensions} =~ / Encode /;
1129
1130 # Test case cut down by jhi
1131 fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\) in substitution \(s///\) at!, 'Segfault using HTML::Entities');
1132use Encode;
1133my $t = ord('A') == 193 ? "\xEA" : "\xE9";
1134Encode::_utf8_on($t);
1135$t =~ s/([^a])//ge;
1136EOP
1137 }
1138
e425a60b
YO
1139} # End of sub run_tests
1140
11411;