Commit | Line | Data |
---|---|---|
e425a60b YO |
1 | #!./perl |
2 | # | |
3 | # This is a home for regular expression tests that don't fit into | |
4 | # the format supported by re/regexp.t. If you want to add a test | |
5 | # that does fit that format, add it to re/re_tests, not here. | |
6 | ||
7 | use strict; | |
8 | use warnings; | |
9 | use 5.010; | |
453466ce | 10 | use Config; |
e425a60b YO |
11 | |
12 | sub run_tests; | |
13 | ||
14 | $| = 1; | |
15 | ||
e425a60b YO |
16 | |
17 | BEGIN { | |
18 | chdir 't' if -d 't'; | |
9d45b377 | 19 | @INC = ('../lib','.'); |
6f4e0180 | 20 | require './test.pl'; |
2fcf74c4 | 21 | skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-"); |
e425a60b | 22 | } |
e425a60b | 23 | |
e425a60b | 24 | |
453466ce | 25 | plan tests => 2521; # Update this when adding/deleting tests. |
e425a60b | 26 | |
9d45b377 | 27 | run_tests() unless caller; |
e425a60b YO |
28 | |
29 | # | |
30 | # Tests start here. | |
31 | # | |
32 | sub run_tests { | |
33 | ||
a86da477 NC |
34 | like("A \x{263a} B z C", qr/A . B (??{ "z" }) C/, |
35 | "Match UTF-8 char in presence of (??{ }); Bug 20000731.001"); | |
36 | ||
e425a60b | 37 | { |
e425a60b | 38 | no warnings 'uninitialized'; |
a86da477 | 39 | ok(undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV; Bug 20001021.005"); |
e425a60b YO |
40 | } |
41 | ||
e425a60b | 42 | { |
51b6c6b3 | 43 | my $message = 'bug id 20001008.001'; |
e425a60b YO |
44 | |
45 | my @x = ("stra\337e 138", "stra\337e 138"); | |
46 | for (@x) { | |
51b6c6b3 NC |
47 | ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message); |
48 | ok(my ($latin) = /^(.+)(?:\s+\d)/, $message); | |
49 | is($latin, "stra\337e", $message); | |
50 | ok($latin =~ s/stra\337e/straße/, $message); | |
e425a60b YO |
51 | # |
52 | # Previous code follows, but outcommented - there were no tests. | |
53 | # | |
54 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
55 | # use utf8; # needed for the raw UTF-8 | |
56 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a | |
57 | } | |
58 | } | |
59 | ||
e425a60b | 60 | { |
e425a60b | 61 | # Fist half of the bug. |
a86da477 | 62 | my $message = 'HEBREW ACCENT QADMA matched by .*; Bug 20001028.003'; |
e425a60b | 63 | my $X = chr (1448); |
51b6c6b3 NC |
64 | ok(my ($Y) = $X =~ /(.*)/, $message); |
65 | is($Y, v1448, $message); | |
66 | is(length $Y, 1, $message); | |
e425a60b YO |
67 | |
68 | # Second half of the bug. | |
a86da477 | 69 | $message = 'HEBREW ACCENT QADMA in replacement; Bug 20001028.003'; |
e425a60b YO |
70 | $X = ''; |
71 | $X =~ s/^/chr(1488)/e; | |
51b6c6b3 NC |
72 | is(length $X, 1, $message); |
73 | is(ord $X, 1488, $message); | |
e425a60b YO |
74 | } |
75 | ||
e425a60b | 76 | { |
a86da477 | 77 | my $message = 'Repeated s///; Bug 20001108.001'; |
e425a60b YO |
78 | my $X = "Szab\x{f3},Bal\x{e1}zs"; |
79 | my $Y = $X; | |
80 | $Y =~ s/(B)/$1/ for 0 .. 3; | |
51b6c6b3 NC |
81 | is($Y, $X, $message); |
82 | is($X, "Szab\x{f3},Bal\x{e1}zs", $message); | |
e425a60b YO |
83 | } |
84 | ||
e425a60b | 85 | { |
a86da477 | 86 | my $message = 's/// on UTF-8 string; Bug 20000517.001'; |
e425a60b YO |
87 | my $x = "\x{100}A"; |
88 | $x =~ s/A/B/; | |
51b6c6b3 NC |
89 | is($x, "\x{100}B", $message); |
90 | is(length $x, 2, $message); | |
e425a60b YO |
91 | } |
92 | ||
e425a60b | 93 | { |
a86da477 | 94 | my $message = '\C and É; Bug 20001230.002'; |
51b6c6b3 NC |
95 | ok("École" =~ /^\C\C(.)/ && $1 eq 'c', $message); |
96 | like("École", qr/^\C\C(c)/, $message); | |
e425a60b YO |
97 | } |
98 | ||
e425a60b YO |
99 | { |
100 | # The original bug report had 'no utf8' here but that was irrelevant. | |
a86da477 NC |
101 | |
102 | my $message = "Don't dump core; Bug 20010306.008"; | |
e425a60b | 103 | my $a = "a\x{1234}"; |
51b6c6b3 | 104 | like($a, qr/\w/, $message); # used to core dump. |
e425a60b YO |
105 | } |
106 | ||
e425a60b | 107 | { |
a86da477 | 108 | my $message = '/g in scalar context; Bug 20010410.006'; |
e425a60b YO |
109 | for my $rx ('/(.*?)\{(.*?)\}/csg', |
110 | '/(.*?)\{(.*?)\}/cg', | |
111 | '/(.*?)\{(.*?)\}/sg', | |
112 | '/(.*?)\{(.*?)\}/g', | |
113 | '/(.+?)\{(.+?)\}/csg',) { | |
114 | my $i = 0; | |
115 | my $input = "a{b}c{d}"; | |
116 | eval <<" --"; | |
117 | while (eval \$input =~ $rx) { | |
118 | \$i ++; | |
119 | } | |
120 | -- | |
51b6c6b3 | 121 | is($i, 2, $message); |
e425a60b YO |
122 | } |
123 | } | |
124 | ||
e425a60b | 125 | { |
e425a60b YO |
126 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. |
127 | for ("\n", "\t", "\014", "\r") { | |
5895685f | 128 | unlike($_, qr/[[:print:]]/, sprintf "\\%03o not in [[:print:]]; Bug 20010619.003", ord $_); |
e425a60b YO |
129 | } |
130 | for (" ") { | |
a86da477 | 131 | like($_, qr/[[:print:]]/, "'$_' in [[:print:]]; Bug 20010619.003"); |
e425a60b YO |
132 | } |
133 | } | |
134 | ||
e425a60b YO |
135 | { |
136 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context | |
a86da477 | 137 | |
e425a60b YO |
138 | $_ = "ababacadaea"; |
139 | my $a = join ":", /b./gc; | |
140 | my $b = join ":", /a./gc; | |
141 | my $c = pos; | |
a86da477 | 142 | is("$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//; Bug 20010814.004"); |
e425a60b YO |
143 | } |
144 | ||
e425a60b YO |
145 | { |
146 | # [ID 20010407.006] matching utf8 return values from | |
147 | # functions does not work | |
a86da477 NC |
148 | |
149 | my $message = 'UTF-8 return values from functions; Bug 20010407.006'; | |
e425a60b YO |
150 | package ID_20010407_006; |
151 | sub x {"a\x{1234}"} | |
152 | my $x = x; | |
153 | my $y; | |
51b6c6b3 | 154 | ::ok($x =~ /(..)/, $message); |
e425a60b | 155 | $y = $1; |
51b6c6b3 NC |
156 | ::ok(length ($y) == 2 && $y eq $x, $message); |
157 | ::ok(x =~ /(..)/, $message); | |
e425a60b | 158 | $y = $1; |
51b6c6b3 | 159 | ::ok(length ($y) == 2 && $y eq $x, $message); |
e425a60b YO |
160 | } |
161 | ||
e425a60b YO |
162 | { |
163 | # High bit bug -- japhy | |
164 | my $x = "ab\200d"; | |
165 | ok $x =~ /.*?\200/, "High bit fine"; | |
166 | } | |
167 | ||
e425a60b | 168 | { |
51b6c6b3 | 169 | my $message = 'UTF-8 hash keys and /$/'; |
e425a60b YO |
170 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters |
171 | # /2002-01/msg01327.html | |
172 | ||
173 | my $u = "a\x{100}"; | |
174 | my $v = substr ($u, 0, 1); | |
175 | my $w = substr ($u, 1, 1); | |
176 | my %u = ($u => $u, $v => $v, $w => $w); | |
177 | for (keys %u) { | |
178 | my $m1 = /^\w*$/ ? 1 : 0; | |
179 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; | |
51b6c6b3 | 180 | is($m1, $m2, $message); |
e425a60b YO |
181 | } |
182 | } | |
183 | ||
e425a60b | 184 | { |
a86da477 | 185 | my $message = "s///eg [change 13f46d054db22cf4]; Bug 20020124.005"; |
e425a60b YO |
186 | |
187 | for my $char ("a", "\x{df}", "\x{100}") { | |
188 | my $x = "$char b $char"; | |
189 | $x =~ s{($char)}{ | |
190 | "c" =~ /c/; | |
191 | "x"; | |
192 | }ge; | |
51b6c6b3 | 193 | is(substr ($x, 0, 1), substr ($x, -1, 1), $message); |
e425a60b YO |
194 | } |
195 | } | |
196 | ||
e425a60b | 197 | { |
a86da477 | 198 | my $message = "Correct pmop flags checked when empty pattern; Bug 20020412.005"; |
e425a60b YO |
199 | |
200 | # Requires reuse of last successful pattern. | |
201 | my $num = 123; | |
202 | $num =~ /\d/; | |
203 | for (0 .. 1) { | |
725a61d7 | 204 | my $match = m?? + 0; |
5895685f NC |
205 | ok($match != $_, $message) |
206 | or diag(sprintf "'match one' %s on %s iteration" => | |
207 | $match ? 'succeeded' : 'failed', | |
208 | $_ ? 'second' : 'first'); | |
e425a60b YO |
209 | } |
210 | $num =~ /(\d)/; | |
211 | my $result = join "" => $num =~ //g; | |
51b6c6b3 | 212 | is($result, $num, $message); |
e425a60b YO |
213 | } |
214 | ||
e425a60b | 215 | { |
a86da477 | 216 | my $message = 'UTF-8 regex matches above 32k; Bug 20020630.002'; |
e425a60b YO |
217 | for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { |
218 | my ($type, $char) = @$_; | |
219 | for my $len (32000, 32768, 33000) { | |
220 | my $s = $char . "f" x $len; | |
221 | my $r = $s =~ /$char([f]*)/gc; | |
5895685f NC |
222 | ok($r, $message) or diag("<$type x $len>"); |
223 | ok(!$r || pos ($s) == $len + 1, $message) | |
224 | or diag("<$type x $len>; pos = @{[pos $s]}"); | |
e425a60b YO |
225 | } |
226 | } | |
227 | } | |
228 | ||
e425a60b | 229 | { |
e425a60b YO |
230 | my $s = "\x{100}" x 5; |
231 | my $ok = $s =~ /(\x{100}{4})/; | |
232 | my ($ord, $len) = (ord $1, length $1); | |
1c6f1211 | 233 | ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift [change 0e933229fa758625]"; |
e425a60b YO |
234 | } |
235 | ||
e425a60b | 236 | { |
e425a60b YO |
237 | our $a = "x\x{100}"; |
238 | chop $a; # Leaves the UTF-8 flag | |
239 | $a .= "y"; # 1 byte before 'y'. | |
240 | ||
a86da477 NC |
241 | like($a, qr/^\C/, 'match one \C on 1-byte UTF-8; Bug 15763'); |
242 | like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); | |
e425a60b | 243 | |
a86da477 NC |
244 | like($a, qr/^\Cy/, 'match \Cy; Bug 15763'); |
245 | like($a, qr/^\C{1}y/, 'match \C{1}y; Bug 15763'); | |
e425a60b | 246 | |
a86da477 NC |
247 | unlike($a, qr/^\C\Cy/, q {don't match two \Cy; Bug 15763}); |
248 | unlike($a, qr/^\C{2}y/, q {don't match \C{2}y; Bug 15763}); | |
e425a60b YO |
249 | |
250 | $a = "\x{100}y"; # 2 bytes before "y" | |
251 | ||
a86da477 NC |
252 | like($a, qr/^\C/, 'match one \C on 2-byte UTF-8; Bug 15763'); |
253 | like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); | |
254 | like($a, qr/^\C\C/, 'match two \C; Bug 15763'); | |
255 | like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763'); | |
e425a60b | 256 | |
a86da477 NC |
257 | like($a, qr/^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte; Bug 15763'); |
258 | like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763'); | |
e425a60b | 259 | |
a86da477 NC |
260 | like($a, qr/^\C\Cy/, 'match two \C; Bug 15763'); |
261 | like($a, qr/^\C{2}y/, 'match \C{2}; Bug 15763'); | |
e425a60b | 262 | |
a86da477 NC |
263 | unlike($a, qr/^\C\C\Cy/, q {don't match three \Cy; Bug 15763}); |
264 | unlike($a, qr/^\C{2}\Cy/, q {don't match \C{2}\Cy; Bug 15763}); | |
265 | unlike($a, qr/^\C{3}y/, q {don't match \C{3}y; Bug 15763}); | |
e425a60b YO |
266 | |
267 | $a = "\x{1000}y"; # 3 bytes before "y" | |
268 | ||
a86da477 NC |
269 | like($a, qr/^\C/, 'match one \C on three-byte UTF-8; Bug 15763'); |
270 | like($a, qr/^\C{1}/, 'match \C{1}; Bug 15763'); | |
271 | like($a, qr/^\C\C/, 'match two \C; Bug 15763'); | |
272 | like($a, qr/^\C{2}/, 'match \C{2}; Bug 15763'); | |
273 | like($a, qr/^\C\C\C/, 'match three \C; Bug 15763'); | |
274 | like($a, qr/^\C{3}/, 'match \C{3}; Bug 15763'); | |
e425a60b | 275 | |
a86da477 NC |
276 | like($a, qr/^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte; Bug 15763'); |
277 | like($a, qr/^\C{4}/, 'match \C{4}; Bug 15763'); | |
e425a60b | 278 | |
a86da477 NC |
279 | like($a, qr/^\C\C\Cy/, 'match three \Cy; Bug 15763'); |
280 | like($a, qr/^\C{3}y/, 'match \C{3}y; Bug 15763'); | |
e425a60b | 281 | |
a86da477 NC |
282 | unlike($a, qr/^\C\C\C\Cy/, q {don't match four \Cy; Bug 15763}); |
283 | unlike($a, qr/^\C{4}y/, q {don't match \C{4}y; Bug 15763}); | |
e425a60b YO |
284 | } |
285 | ||
286 | ||
287 | { | |
a86da477 | 288 | my $message = 'UTF-8 matching; Bug 15397'; |
51b6c6b3 NC |
289 | like("\x{100}", qr/\x{100}/, $message); |
290 | like("\x{100}", qr/(\x{100})/, $message); | |
291 | like("\x{100}", qr/(\x{100}){1}/, $message); | |
292 | like("\x{100}\x{100}", qr/(\x{100}){2}/, $message); | |
293 | like("\x{100}\x{100}", qr/(\x{100})(\x{100})/, $message); | |
e425a60b YO |
294 | } |
295 | ||
e425a60b | 296 | { |
a86da477 | 297 | my $message = 'Neither ()* nor ()*? sets $1 when matched 0 times; Bug 7471'; |
e425a60b | 298 | local $_ = 'CD'; |
51b6c6b3 NC |
299 | ok(/(AB)*?CD/ && !defined $1, $message); |
300 | ok(/(AB)*CD/ && !defined $1, $message); | |
e425a60b YO |
301 | } |
302 | ||
e425a60b | 303 | { |
a86da477 | 304 | my $message = "Caching shouldn't prevent match; Bug 3547"; |
e425a60b | 305 | my $pattern = "^(b+?|a){1,2}c"; |
51b6c6b3 NC |
306 | ok("bac" =~ /$pattern/ && $1 eq 'a', $message); |
307 | ok("bbac" =~ /$pattern/ && $1 eq 'a', $message); | |
308 | ok("bbbac" =~ /$pattern/ && $1 eq 'a', $message); | |
309 | ok("bbbbac" =~ /$pattern/ && $1 eq 'a', $message); | |
e425a60b YO |
310 | } |
311 | ||
e425a60b | 312 | { |
a86da477 NC |
313 | ok("\x{100}" =~ /(.)/, '$1 should keep UTF-8 ness; Bug 18232'); |
314 | is($1, "\x{100}", '$1 is UTF-8; Bug 18232'); | |
e425a60b | 315 | { 'a' =~ /./; } |
a86da477 NC |
316 | is($1, "\x{100}", '$1 is still UTF-8; Bug 18232'); |
317 | isnt($1, "\xC4\x80", '$1 is not non-UTF-8; Bug 18232'); | |
e425a60b YO |
318 | } |
319 | ||
e425a60b | 320 | { |
a86da477 | 321 | my $message = "Optimizer doesn't prematurely reject match; Bug 19767"; |
e425a60b YO |
322 | use utf8; |
323 | ||
324 | my $attr = 'Name-1'; | |
325 | my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; | |
326 | my $NormalWord = qr /${NormalChar}+?/; | |
327 | my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; | |
328 | ||
329 | $attr =~ /^$/; | |
51b6c6b3 | 330 | like($attr, $PredNameHyphen, $message); # Original test. |
e425a60b YO |
331 | |
332 | "a" =~ m/[b]/; | |
51b6c6b3 | 333 | like("0", qr/\p{N}+\z/, $message); # Variant. |
e425a60b YO |
334 | } |
335 | ||
e425a60b | 336 | { |
a86da477 | 337 | my $message = "(??{ }) doesn't return stale values; Bug 20683"; |
e425a60b YO |
338 | our $p = 1; |
339 | foreach (1, 2, 3, 4) { | |
340 | $p ++ if /(??{ $p })/ | |
341 | } | |
51b6c6b3 | 342 | is($p, 5, $message); |
e425a60b YO |
343 | |
344 | { | |
345 | package P; | |
346 | $a = 1; | |
347 | sub TIESCALAR {bless []} | |
348 | sub FETCH {$a ++} | |
349 | } | |
350 | tie $p, "P"; | |
351 | foreach (1, 2, 3, 4) { | |
352 | /(??{ $p })/ | |
353 | } | |
51b6c6b3 | 354 | is($p, 5, $message); |
e425a60b YO |
355 | } |
356 | ||
e425a60b YO |
357 | { |
358 | # Subject: Odd regexp behavior | |
359 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> | |
360 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 | |
361 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> | |
362 | # To: perl-unicode@perl.org | |
363 | ||
51b6c6b3 | 364 | my $message = 'Markus Kuhn 2003-02-26'; |
e425a60b YO |
365 | |
366 | my $x = "\x{2019}\nk"; | |
51b6c6b3 NC |
367 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
368 | is($x, "\x{2019} k", $message); | |
e425a60b YO |
369 | |
370 | $x = "b\nk"; | |
51b6c6b3 NC |
371 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
372 | is($x, "b k", $message); | |
e425a60b | 373 | |
51b6c6b3 | 374 | like("\x{2019}", qr/\S/, $message); |
e425a60b YO |
375 | } |
376 | ||
e425a60b | 377 | { |
a86da477 | 378 | my $message = "(??{ .. }) in split doesn't corrupt its stack; Bug 21411"; |
e425a60b | 379 | our $i; |
51b6c6b3 | 380 | is('-1-3-5-', join('', split /((??{$i++}))/, '-1-3-5-'), $message); |
e425a60b YO |
381 | no warnings 'syntax'; |
382 | @_ = split /(?{'WOW'})/, 'abc'; | |
383 | local $" = "|"; | |
51b6c6b3 | 384 | is("@_", "a|b|c", $message); |
e425a60b YO |
385 | } |
386 | ||
e425a60b YO |
387 | { |
388 | # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it | |
389 | # hasn't been crashing. Disable this test until it is fixed properly. | |
390 | # XXX also check what it returns rather than just doing ok(1,...) | |
391 | # split /(?{ split "" })/, "abc"; | |
04934b6d | 392 | local $::TODO = "Recursive split is still broken"; |
e425a60b YO |
393 | ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'; |
394 | } | |
395 | ||
e425a60b | 396 | { |
e425a60b YO |
397 | $_ = "code: 'x' { '...' }\n"; study; |
398 | my @x; push @x, $& while m/'[^\']*'/gx; | |
399 | local $" = ":"; | |
a86da477 | 400 | is("@x", "'x':'...'", "Parse::RecDescent triggered infinite loop; Bug 17757"); |
e425a60b YO |
401 | } |
402 | ||
e425a60b | 403 | { |
e425a60b | 404 | sub func ($) { |
a86da477 NC |
405 | ok("a\nb" !~ /^b/, "Propagated modifier; $_[0]; Bug 22354"); |
406 | ok("a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m; Bug 22354"); | |
e425a60b YO |
407 | } |
408 | func "standalone"; | |
409 | $_ = "x"; s/x/func "in subst"/e; | |
410 | $_ = "x"; s/x/func "in multiline subst"/em; | |
1d0dc949 FC |
411 | $_ = "x"; /x(?{func "in regexp"})/; |
412 | $_ = "x"; /x(?{func "in multiline regexp"})/m; | |
e425a60b YO |
413 | } |
414 | ||
e425a60b | 415 | { |
e425a60b YO |
416 | $_ = "abcdef\n"; |
417 | my @x = m/./g; | |
a86da477 | 418 | is("abcde", $`, 'Global match sets $`; Bug 19049'); |
e425a60b YO |
419 | } |
420 | ||
e425a60b | 421 | { |
e425a60b YO |
422 | # [perl #23769] Unicode regex broken on simple example |
423 | # regrepeat() didn't handle UTF-8 EXACT case right. | |
a86da477 | 424 | |
e425a60b | 425 | my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; |
a86da477 | 426 | my $message = "$Mess; Bug 23769"; |
e425a60b YO |
427 | |
428 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; | |
429 | ||
51b6c6b3 NC |
430 | like($s, qr/\x{a0}/, $message); |
431 | like($s, qr/\x{a0}+/, $message); | |
432 | like($s, qr/\x{a0}\x{a0}/, $message); | |
e425a60b | 433 | |
a86da477 | 434 | $message = "$Mess (easy variant); Bug 23769"; |
51b6c6b3 NC |
435 | ok("aaa\x{100}" =~ /(a+)/, $message); |
436 | is($1, "aaa", $message); | |
e425a60b | 437 | |
a86da477 | 438 | $message = "$Mess (easy invariant); Bug 23769"; |
51b6c6b3 NC |
439 | ok("aaa\x{100} " =~ /(a+?)/, $message); |
440 | is($1, "a", $message); | |
e425a60b | 441 | |
a86da477 | 442 | $message = "$Mess (regrepeat variant); Bug 23769"; |
51b6c6b3 NC |
443 | ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, $message); |
444 | is($1, "\xa0", $message); | |
e425a60b | 445 | |
a86da477 | 446 | $message = "$Mess (regrepeat invariant); Bug 23769"; |
51b6c6b3 NC |
447 | ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, $message); |
448 | is($1, "\xa0\xa0\xa0", $message); | |
e425a60b | 449 | |
a86da477 | 450 | $message = "$Mess (hard variant); Bug 23769"; |
51b6c6b3 NC |
451 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, $message); |
452 | is($1, "\xa0\xa1", $message); | |
e425a60b | 453 | |
a86da477 | 454 | $message = "$Mess (hard invariant); Bug 23769"; |
51b6c6b3 NC |
455 | ok("ababab\x{100} " =~ /((?:ab)+)/, $message); |
456 | is($1, 'ababab', $message); | |
e425a60b | 457 | |
51b6c6b3 NC |
458 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, $message); |
459 | is($1, "\xa0\xa1\xa0\xa1\xa0\xa1", $message); | |
e425a60b | 460 | |
51b6c6b3 NC |
461 | ok("ababab\x{100} " =~ /((?:ab)+?)/, $message); |
462 | is($1, "ab", $message); | |
e425a60b | 463 | |
a86da477 | 464 | $message = "Don't match first byte of UTF-8 representation; Bug 23769"; |
51b6c6b3 NC |
465 | unlike("\xc4\xc4\xc4", qr/(\x{100}+)/, $message); |
466 | unlike("\xc4\xc4\xc4", qr/(\x{100}+?)/, $message); | |
467 | unlike("\xc4\xc4\xc4", qr/(\x{100}++)/, $message); | |
e425a60b YO |
468 | } |
469 | ||
e425a60b | 470 | { |
e425a60b | 471 | # perl panic: pp_match start/end pointers |
a86da477 NC |
472 | |
473 | is(eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, "a-bc", | |
474 | 'Captures can move backwards in string; Bug 25269'); | |
e425a60b YO |
475 | } |
476 | ||
e425a60b | 477 | { |
a86da477 NC |
478 | # \cA not recognized in character classes |
479 | like("a\cAb", qr/\cA/, '\cA in pattern; Bug 27940'); | |
480 | like("a\cAb", qr/[\cA]/, '\cA in character class; Bug 27940'); | |
481 | like("a\cAb", qr/[\cA-\cB]/, '\cA in character class range; Bug 27940'); | |
482 | like("abc", qr/[^\cA-\cB]/, '\cA in negated character class range; Bug 27940'); | |
483 | like("a\cBb", qr/[\cA-\cC]/, '\cB in character class range; Bug 27940'); | |
484 | like("a\cCbc", qr/[^\cA-\cB]/, '\cC in negated character class range; Bug 27940'); | |
485 | like("a\cAb", qr/(??{"\cA"})/, '\cA in ??{} pattern; Bug 27940'); | |
486 | unlike("ab", qr/a\cIb/x, '\cI in pattern; Bug 27940'); | |
e425a60b YO |
487 | } |
488 | ||
e425a60b | 489 | { |
9d45b377 | 490 | # perl #28532: optional zero-width match at end of string is ignored |
a86da477 NC |
491 | |
492 | ok("abc" =~ /^abc(\z)?/ && defined($1), | |
493 | 'Optional zero-width match at end of string; Bug 28532'); | |
494 | ok("abc" =~ /^abc(\z)??/ && !defined($1), | |
495 | 'Optional zero-width match at end of string; Bug 28532'); | |
e425a60b YO |
496 | } |
497 | ||
e425a60b | 498 | { |
e425a60b YO |
499 | my $utf8 = "\xe9\x{100}"; chop $utf8; |
500 | my $latin1 = "\xe9"; | |
501 | ||
a86da477 NC |
502 | like($utf8, qr/\xe9/i, "utf8/latin; Bug 36207"); |
503 | like($utf8, qr/$latin1/i, "utf8/latin runtime; Bug 36207"); | |
504 | like($utf8, qr/(abc|\xe9)/i, "utf8/latin trie; Bug 36207"); | |
505 | like($utf8, qr/(abc|$latin1)/i, "utf8/latin trie runtime; Bug 36207"); | |
e425a60b | 506 | |
a86da477 NC |
507 | like("\xe9", qr/$utf8/i, "latin/utf8; Bug 36207"); |
508 | like("\xe9", qr/(abc|$utf8)/i, "latin/utf8 trie; Bug 36207"); | |
509 | like($latin1, qr/$utf8/i, "latin/utf8 runtime; Bug 36207"); | |
510 | like($latin1, qr/(abc|$utf8)/i, "latin/utf8 trie runtime; Bug 36207"); | |
e425a60b YO |
511 | } |
512 | ||
e425a60b | 513 | { |
e425a60b YO |
514 | my $s = "abcd"; |
515 | $s =~ /(..)(..)/g; | |
516 | $s = $1; | |
517 | $s = $2; | |
a86da477 NC |
518 | is($2, 'cd', |
519 | "Assigning to original string does not corrupt match vars; Bug 37038"); | |
e425a60b YO |
520 | } |
521 | ||
e425a60b YO |
522 | { |
523 | { | |
524 | package wooosh; | |
525 | sub gloople {"!"} | |
526 | } | |
527 | my $aeek = bless {} => 'wooosh'; | |
2e321296 NC |
528 | is(do {$aeek -> gloople () =~ /(.)/g}, 1, |
529 | "//g match against return value of sub [change e26a497577f3ce7b]"); | |
e425a60b YO |
530 | |
531 | sub gloople {"!"} | |
2e321296 NC |
532 | is(do{gloople () =~ /(.)/g}, 1, |
533 | "change e26a497577f3ce7b didn't affect sub calls for some reason"); | |
e425a60b YO |
534 | } |
535 | ||
e425a60b | 536 | { |
04934b6d | 537 | local $::TODO = "See changes 26925-26928, which reverted change 26410"; |
e425a60b YO |
538 | { |
539 | package lv; | |
540 | our $var = "abc"; | |
541 | sub variable : lvalue {$var} | |
542 | } | |
543 | my $o = bless [] => 'lv'; | |
544 | my $f = ""; | |
545 | my $r = eval { | |
546 | for (1 .. 2) { | |
547 | $f .= $1 if $o -> variable =~ /(.)/g; | |
548 | } | |
549 | 1; | |
550 | }; | |
551 | if ($r) { | |
de26e0cc | 552 | is($f, "ab", "pos() retained between calls"); |
e425a60b YO |
553 | } |
554 | else { | |
04934b6d | 555 | local $::TODO; |
e425a60b YO |
556 | ok 0, "Code failed: $@"; |
557 | } | |
558 | ||
559 | our $var = "abc"; | |
560 | sub variable : lvalue {$var} | |
561 | my $g = ""; | |
562 | my $s = eval { | |
563 | for (1 .. 2) { | |
564 | $g .= $1 if variable =~ /(.)/g; | |
565 | } | |
566 | 1; | |
567 | }; | |
568 | if ($s) { | |
de26e0cc | 569 | is($g, "ab", "pos() retained between calls"); |
e425a60b YO |
570 | } |
571 | else { | |
04934b6d | 572 | local $::TODO; |
e425a60b YO |
573 | ok 0, "Code failed: $@"; |
574 | } | |
575 | } | |
576 | ||
e425a60b YO |
577 | SKIP: |
578 | { | |
ef237063 | 579 | skip "In EBCDIC" if $::IS_EBCDIC; |
e425a60b YO |
580 | no warnings 'utf8'; |
581 | $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8 | |
582 | my $ret = 0; | |
2e321296 NC |
583 | is(do {!($ret = s/[\0]+//g)}, 1, |
584 | "Ill-formed UTF-8 doesn't match NUL in class; Bug 37836"); | |
e425a60b YO |
585 | } |
586 | ||
e425a60b YO |
587 | { |
588 | # chr(65535) should be allowed in regexes | |
a86da477 | 589 | |
e425a60b YO |
590 | no warnings 'utf8'; # To allow non-characters |
591 | my ($c, $r, $s); | |
592 | ||
593 | $c = chr 0xffff; | |
594 | $c =~ s/$c//g; | |
a86da477 | 595 | is($c, "", "U+FFFF, parsed as atom; Bug 38293"); |
e425a60b YO |
596 | |
597 | $c = chr 0xffff; | |
598 | $r = "\\$c"; | |
599 | $c =~ s/$r//g; | |
a86da477 | 600 | is($c, "", "U+FFFF backslashed, parsed as atom; Bug 38293"); |
e425a60b YO |
601 | |
602 | $c = chr 0xffff; | |
603 | $c =~ s/[$c]//g; | |
a86da477 | 604 | is($c, "", "U+FFFF, parsed in class; Bug 38293"); |
e425a60b YO |
605 | |
606 | $c = chr 0xffff; | |
607 | $r = "[\\$c]"; | |
608 | $c =~ s/$r//g; | |
a86da477 | 609 | is($c, "", "U+FFFF backslashed, parsed in class; Bug 38293"); |
e425a60b YO |
610 | |
611 | $s = "A\x{ffff}B"; | |
612 | $s =~ s/\x{ffff}//i; | |
a86da477 | 613 | is($s, "AB", "U+FFFF, EXACTF; Bug 38293"); |
e425a60b YO |
614 | |
615 | $s = "\x{ffff}A"; | |
616 | $s =~ s/\bA//; | |
a86da477 | 617 | is($s, "\x{ffff}", "U+FFFF, BOUND; Bug 38293"); |
e425a60b YO |
618 | |
619 | $s = "\x{ffff}!"; | |
620 | $s =~ s/\B!//; | |
a86da477 | 621 | is($s, "\x{ffff}", "U+FFFF, NBOUND; Bug 38293"); |
e425a60b YO |
622 | } |
623 | ||
e425a60b | 624 | { |
e425a60b YO |
625 | |
626 | # The printing characters | |
627 | my @chars = ("A" .. "Z"); | |
628 | my $delim = ","; | |
629 | my $size = 32771 - 4; | |
630 | my $str = ''; | |
631 | ||
632 | # Create some random junk. Inefficient, but it works. | |
633 | for (my $i = 0; $i < $size; $ i++) { | |
634 | $str .= $chars [rand @chars]; | |
635 | } | |
636 | ||
637 | $str .= ($delim x 4); | |
638 | my $res; | |
639 | my $matched; | |
a86da477 NC |
640 | ok($str =~ s/^(.*?)${delim}{4}//s, "Pattern matches; Bug 39583"); |
641 | is($str, "", "Empty string; Bug 39583"); | |
642 | ok(defined $1 && length ($1) == $size, '$1 is correct size; Bug 39583'); | |
e425a60b YO |
643 | } |
644 | ||
e425a60b | 645 | { |
a86da477 NC |
646 | like("\0-A", qr/\c@-A/, '@- should not be interpolated in a pattern; Bug 27940'); |
647 | like("\0\0A", qr/\c@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); | |
648 | like("X\@-A", qr/X@-A/, '@- should not be interpolated in a pattern; Bug 27940'); | |
649 | like("X\@\@A", qr/X@+A/, '@+ should not be interpolated in a pattern; Bug 27940'); | |
e425a60b | 650 | |
a86da477 NC |
651 | like("X\0A", qr/X\c@?A/, '\c@?; Bug 27940'); |
652 | like("X\0A", qr/X\c@*A/, '\c@*; Bug 27940'); | |
653 | like("X\0A", qr/X\c@(A)/, '\c@(; Bug 27940'); | |
654 | like("X\0A", qr/X(\c@)A/, '\c@); Bug 27940'); | |
655 | like("X\0A", qr/X\c@|ZA/, '\c@|; Bug 27940'); | |
656 | ||
657 | like("X\@A", qr/X@?A/, '@?; Bug 27940'); | |
658 | like("X\@A", qr/X@*A/, '@*; Bug 27940'); | |
659 | like("X\@A", qr/X@(A)/, '@(; Bug 27940'); | |
660 | like("X\@A", qr/X(@)A/, '@); Bug 27940'); | |
661 | like("X\@A", qr/X@|ZA/, '@|; Bug 27940'); | |
e425a60b YO |
662 | |
663 | local $" = ','; # non-whitespace and non-RE-specific | |
a86da477 NC |
664 | like('abc', qr/(.)(.)(.)/, 'The last successful match is bogus; Bug 27940'); |
665 | like("A@+B", qr/A@{+}B/, 'Interpolation of @+ in /@{+}/; Bug 27940'); | |
666 | like("A@-B", qr/A@{-}B/, 'Interpolation of @- in /@{-}/; Bug 27940'); | |
667 | like("A@+B", qr/A@{+}B/x, 'Interpolation of @+ in /@{+}/x; Bug 27940'); | |
668 | like("A@-B", qr/A@{-}B/x, 'Interpolation of @- in /@{-}/x; Bug 27940'); | |
e425a60b YO |
669 | } |
670 | ||
e425a60b | 671 | { |
e425a60b YO |
672 | my $s = 'foo bar baz'; |
673 | my (@k, @v, @fetch, $res); | |
674 | my $count = 0; | |
675 | my @names = qw ($+{A} $+{B} $+{C}); | |
676 | if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { | |
677 | while (my ($k, $v) = each (%+)) { | |
678 | $count++; | |
679 | } | |
680 | @k = sort keys (%+); | |
681 | @v = sort values (%+); | |
682 | $res = 1; | |
683 | push @fetch, | |
684 | ["$+{A}", "$1"], | |
685 | ["$+{B}", "$2"], | |
686 | ["$+{C}", "$3"], | |
687 | ; | |
688 | } | |
689 | foreach (0 .. 2) { | |
690 | if ($fetch [$_]) { | |
a86da477 | 691 | is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); |
e425a60b YO |
692 | } else { |
693 | ok 0, $names[$_]; | |
694 | } | |
695 | } | |
a86da477 NC |
696 | is($res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/; Bug 50496"); |
697 | is($count, 3, "Got 3 keys in %+ via each; Bug 50496"); | |
698 | is(0 + @k, 3, "Got 3 keys in %+ via keys; Bug 50496"); | |
699 | is("@k", "A B C", "Got expected keys; Bug 50496"); | |
700 | is("@v", "bar baz foo", "Got expected values; Bug 50496"); | |
e425a60b YO |
701 | eval ' |
702 | no warnings "uninitialized"; | |
703 | print for $+ {this_key_doesnt_exist}; | |
704 | '; | |
a86da477 | 705 | is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); |
e425a60b YO |
706 | } |
707 | ||
e425a60b YO |
708 | { |
709 | # | |
710 | # Almost the same as the block above, except that the capture is nested. | |
711 | # | |
a86da477 | 712 | |
e425a60b YO |
713 | my $s = 'foo bar baz'; |
714 | my (@k, @v, @fetch, $res); | |
715 | my $count = 0; | |
716 | my @names = qw ($+{A} $+{B} $+{C} $+{D}); | |
717 | if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { | |
718 | while (my ($k,$v) = each(%+)) { | |
719 | $count++; | |
720 | } | |
721 | @k = sort keys (%+); | |
722 | @v = sort values (%+); | |
723 | $res = 1; | |
724 | push @fetch, | |
725 | ["$+{A}", "$2"], | |
726 | ["$+{B}", "$3"], | |
727 | ["$+{C}", "$4"], | |
728 | ["$+{D}", "$1"], | |
729 | ; | |
730 | } | |
731 | foreach (0 .. 3) { | |
732 | if ($fetch [$_]) { | |
a86da477 | 733 | is($fetch[$_][0], $fetch[$_][1], "$names[$_]; Bug 50496"); |
e425a60b YO |
734 | } else { |
735 | ok 0, $names [$_]; | |
736 | } | |
737 | } | |
a86da477 NC |
738 | is($res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/; Bug 50496"); |
739 | is($count, 4, "Got 4 keys in %+ via each; Bug 50496"); | |
740 | is(@k, 4, "Got 4 keys in %+ via keys; Bug 50496"); | |
741 | is("@k", "A B C D", "Got expected keys; Bug 50496"); | |
742 | is("@v", "bar baz foo foo bar baz", "Got expected values; Bug 50496"); | |
e425a60b YO |
743 | eval ' |
744 | no warnings "uninitialized"; | |
745 | print for $+ {this_key_doesnt_exist}; | |
746 | '; | |
a86da477 | 747 | is($@, '', 'lvalue $+ {...} should not throw an exception; Bug 50496'); |
e425a60b YO |
748 | } |
749 | ||
e425a60b | 750 | { |
e425a60b YO |
751 | my $str = 'abc'; |
752 | my $count = 0; | |
753 | my $mval = 0; | |
754 | my $pval = 0; | |
755 | while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} | |
a86da477 NC |
756 | is($mval, 0, '@- should be empty; Bug 36046'); |
757 | is($pval, 0, '@+ should be empty; Bug 36046'); | |
758 | is($count, 1, 'Should have matched once only; Bug 36046'); | |
e425a60b YO |
759 | } |
760 | ||
e425a60b | 761 | { |
a86da477 | 762 | my $message = '/m in precompiled regexp; Bug 40684'; |
e425a60b YO |
763 | my $s = "abc\ndef"; |
764 | my $rex = qr'^abc$'m; | |
51b6c6b3 NC |
765 | ok($s =~ m/$rex/, $message); |
766 | ok($s =~ m/^abc$/m, $message); | |
e425a60b YO |
767 | } |
768 | ||
e425a60b | 769 | { |
a86da477 | 770 | my $message = '(?: ... )? should not lose $^R; Bug 36909'; |
e425a60b YO |
771 | $^R = 'Nothing'; |
772 | { | |
773 | local $^R = "Bad"; | |
51b6c6b3 | 774 | ok('x foofoo y' =~ m { |
e425a60b YO |
775 | (foo) # $^R correctly set |
776 | (?{ "last regexp code result" }) | |
51b6c6b3 NC |
777 | }x, $message); |
778 | is($^R, 'last regexp code result', $message); | |
e425a60b | 779 | } |
51b6c6b3 | 780 | is($^R, 'Nothing', $message); |
e425a60b YO |
781 | |
782 | { | |
783 | local $^R = "Bad"; | |
784 | ||
51b6c6b3 | 785 | ok('x foofoo y' =~ m { |
e425a60b YO |
786 | (?:foo|bar)+ # $^R correctly set |
787 | (?{ "last regexp code result" }) | |
51b6c6b3 NC |
788 | }x, $message); |
789 | is($^R, 'last regexp code result', $message); | |
e425a60b | 790 | } |
51b6c6b3 | 791 | is($^R, 'Nothing', $message); |
e425a60b YO |
792 | |
793 | { | |
794 | local $^R = "Bad"; | |
51b6c6b3 | 795 | ok('x foofoo y' =~ m { |
e425a60b YO |
796 | (foo|bar)\1+ # $^R undefined |
797 | (?{ "last regexp code result" }) | |
51b6c6b3 NC |
798 | }x, $message); |
799 | is($^R, 'last regexp code result', $message); | |
e425a60b | 800 | } |
51b6c6b3 | 801 | is($^R, 'Nothing', $message); |
e425a60b YO |
802 | |
803 | { | |
804 | local $^R = "Bad"; | |
51b6c6b3 | 805 | ok('x foofoo y' =~ m { |
e425a60b YO |
806 | (foo|bar)\1 # This time without the + |
807 | (?{"last regexp code result"}) | |
51b6c6b3 NC |
808 | }x, $message); |
809 | is($^R, 'last regexp code result', $message); | |
e425a60b | 810 | } |
51b6c6b3 | 811 | is($^R, 'Nothing', $message); |
e425a60b YO |
812 | } |
813 | ||
e425a60b | 814 | { |
a86da477 | 815 | my $message = 'Match is linear, not quadratic; Bug 22395'; |
e425a60b YO |
816 | our $count; |
817 | for my $l (10, 100, 1000) { | |
818 | $count = 0; | |
819 | ('a' x $l) =~ /(.*)(?{$count++})[bc]/; | |
04934b6d | 820 | local $::TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; |
51b6c6b3 | 821 | is($count, $l + 1, $message); |
e425a60b YO |
822 | } |
823 | } | |
824 | ||
e425a60b | 825 | { |
a86da477 | 826 | my $message = '@-/@+ should not have undefined values; Bug 22614'; |
e425a60b YO |
827 | local $_ = 'ab'; |
828 | our @len = (); | |
829 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; | |
51b6c6b3 | 830 | is("@len", "2 2 2", $message); |
e425a60b YO |
831 | } |
832 | ||
e425a60b | 833 | { |
a86da477 | 834 | my $message = '$& set on s///; Bug 18209'; |
e425a60b YO |
835 | my $text = ' word1 word2 word3 word4 word5 word6 '; |
836 | ||
837 | my @words = ('word1', 'word3', 'word5'); | |
838 | my $count; | |
839 | foreach my $word (@words) { | |
93f09d7b | 840 | $text =~ s/$word\s//gi; # Leave a space to separate words |
e425a60b YO |
841 | # in the resultant str. |
842 | # The following block is not working. | |
843 | if ($&) { | |
844 | $count ++; | |
845 | } | |
846 | # End bad block | |
847 | } | |
51b6c6b3 NC |
848 | is($count, 3, $message); |
849 | is($text, ' word2 word4 word6 ', $message); | |
e425a60b YO |
850 | } |
851 | ||
e425a60b YO |
852 | { |
853 | # RT#6893 | |
a86da477 | 854 | |
e425a60b YO |
855 | local $_ = qq (A\nB\nC\n); |
856 | my @res; | |
857 | while (m#(\G|\n)([^\n]*)\n#gsx) { | |
858 | push @res, "$2"; | |
859 | last if @res > 3; | |
860 | } | |
de26e0cc | 861 | is("@res", "A B C", "/g pattern shouldn't infinite loop; Bug 6893"); |
e425a60b YO |
862 | } |
863 | ||
e425a60b | 864 | { |
51b6c6b3 | 865 | # No optimizer bug |
e425a60b YO |
866 | my @tails = ('', '(?(1))', '(|)', '()?'); |
867 | my @quants = ('*','+'); | |
868 | my $doit = sub { | |
869 | my $pats = shift; | |
870 | for (@_) { | |
871 | for my $pat (@$pats) { | |
872 | for my $quant (@quants) { | |
873 | for my $tail (@tails) { | |
874 | my $re = "($pat$quant\$)$tail"; | |
a86da477 NC |
875 | ok(/$re/ && $1 eq $_, "'$_' =~ /$re/; Bug 41010"); |
876 | ok(/$re/m && $1 eq $_, "'$_' =~ /$re/m; Bug 41010"); | |
e425a60b YO |
877 | } |
878 | } | |
879 | } | |
880 | } | |
881 | }; | |
882 | ||
883 | my @dpats = ('\d', | |
884 | '[1234567890]', | |
885 | '(1|[23]|4|[56]|[78]|[90])', | |
886 | '(?:1|[23]|4|[56]|[78]|[90])', | |
887 | '(1|2|3|4|5|6|7|8|9|0)', | |
888 | '(?:1|2|3|4|5|6|7|8|9|0)'); | |
889 | my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); | |
890 | my @sstrs = (' '); | |
891 | my @dstrs = ('12345'); | |
892 | $doit -> (\@spats, @sstrs); | |
893 | $doit -> (\@dpats, @dstrs); | |
894 | } | |
895 | ||
e425a60b | 896 | { |
e425a60b YO |
897 | # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string |
898 | ||
899 | my $utf_8 = "\xd6schel"; | |
900 | utf8::upgrade ($utf_8); | |
901 | $utf_8 =~ m {(\xd6|Ö)schel}; | |
a86da477 | 902 | is($1, "\xd6", "Upgrade error; Bug 45605"); |
e425a60b YO |
903 | } |
904 | ||
905 | { | |
e425a60b YO |
906 | # Regardless of utf8ness any character matches itself when |
907 | # doing a case insensitive match. See also [perl #36207] | |
a86da477 | 908 | |
e425a60b YO |
909 | for my $o (0 .. 255) { |
910 | my @ch = (chr ($o), chr ($o)); | |
911 | utf8::upgrade ($ch [1]); | |
912 | for my $u_str (0, 1) { | |
913 | for my $u_pat (0, 1) { | |
a86da477 NC |
914 | like($ch[$u_str], qr/\Q$ch[$u_pat]\E/i, |
915 | "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); | |
916 | like($ch[$u_str], qr/\Q$ch[$u_pat]\E|xyz/i, | |
917 | "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat; Bug 36207"); | |
e425a60b YO |
918 | } |
919 | } | |
920 | } | |
921 | } | |
922 | ||
e425a60b | 923 | { |
a86da477 | 924 | my $message = '$REGMARK in replacement; Bug 49190'; |
e425a60b YO |
925 | our $REGMARK; |
926 | my $_ = "A"; | |
51b6c6b3 NC |
927 | ok(s/(*:B)A/$REGMARK/, $message); |
928 | is($_, "B", $message); | |
e425a60b | 929 | $_ = "CCCCBAA"; |
51b6c6b3 NC |
930 | ok(s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g, $message); |
931 | is($_, "ZYX", $message); | |
e425a60b YO |
932 | } |
933 | ||
e425a60b | 934 | { |
a86da477 | 935 | my $message = 'Substitution evaluation in list context; Bug 52658'; |
e425a60b YO |
936 | my $reg = '../xxx/'; |
937 | my @te = ($reg =~ m{^(/?(?:\.\./)*)}, | |
938 | $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); | |
51b6c6b3 NC |
939 | is($reg, '../bbb/', $message); |
940 | is($te [0], '../', $message); | |
e425a60b YO |
941 | } |
942 | ||
e425a60b | 943 | { |
e425a60b | 944 | my $a = "xyzt" x 8192; |
a86da477 NC |
945 | like($a, qr/\A(?>[a-z])*\z/, |
946 | '(?>) does not cause wrongness on long string; Bug 60034'); | |
e425a60b YO |
947 | my $b = $a . chr 256; |
948 | chop $b; | |
4b0d13b9 | 949 | is($a, $b, 'Bug 60034'); |
a86da477 NC |
950 | like($b, qr/\A(?>[a-z])*\z/, |
951 | '(?>) does not cause wrongness on long string with UTF-8; Bug 60034'); | |
e425a60b YO |
952 | } |
953 | ||
e425a60b YO |
954 | # |
955 | # Keep the following tests last -- they may crash perl | |
956 | # | |
957 | print "# Tests that follow may crash perl\n"; | |
958 | { | |
a86da477 | 959 | |
51b6c6b3 | 960 | my $message = 'Pattern in a loop, failure should not ' . |
a86da477 | 961 | 'affect previous success; Bug 19049/38869'; |
e425a60b YO |
962 | my @list = ( |
963 | 'ab cdef', # Matches regex | |
964 | ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it | |
965 | ); | |
966 | my $y; | |
967 | my $x; | |
968 | foreach (@list) { | |
969 | m/ab(.+)cd/i; # The ignore-case seems to be important | |
970 | $y = $1; # Use $1, which might not be from the last match! | |
971 | $x = substr ($list [0], $- [0], $+ [0] - $- [0]); | |
972 | } | |
51b6c6b3 NC |
973 | is($y, ' ', $message); |
974 | is($x, 'ab cd', $message); | |
e425a60b YO |
975 | } |
976 | ||
e425a60b | 977 | { |
a86da477 | 978 | ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker; Bug 24274"); |
e425a60b | 979 | ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, |
a86da477 | 980 | "Regexp /^(??{'(.)'x 100})/ crashes older perls; Bug 24274"); |
e425a60b YO |
981 | } |
982 | ||
e425a60b | 983 | { |
e425a60b | 984 | # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache |
a86da477 | 985 | |
e425a60b | 986 | local ${^UTF8CACHE} = -1; |
a86da477 | 987 | my $message = "Shouldn't panic; Bug 45337"; |
e425a60b YO |
988 | my $s = "[a]a{2}"; |
989 | utf8::upgrade $s; | |
51b6c6b3 | 990 | like("aaa", qr/$s/, $message); |
e425a60b YO |
991 | } |
992 | { | |
a86da477 | 993 | my $message = "Check if tree logic breaks \$^R; Bug 57042"; |
e425a60b YO |
994 | my $cond_re = qr/\s* |
995 | \s* (?: | |
996 | \( \s* A (?{1}) | |
997 | | \( \s* B (?{2}) | |
998 | ) | |
999 | /x; | |
1000 | my @res; | |
1001 | for my $line ("(A)","(B)") { | |
1002 | if ($line =~ m/$cond_re/) { | |
1003 | push @res, $^R ? "#$^R" : "UNDEF"; | |
1004 | } | |
1005 | } | |
51b6c6b3 | 1006 | is("@res","#1 #2", $message); |
e425a60b YO |
1007 | } |
1008 | { | |
1009 | no warnings 'closure'; | |
1010 | my $re = qr/A(??{"1"})/; | |
1011 | ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; | |
1012 | ok $1 eq "A1"; | |
1013 | ok $2 eq "B"; | |
1014 | } | |
1015 | ||
e425a60b YO |
1016 | # This only works under -DEBUGGING because it relies on an assert(). |
1017 | { | |
51b6c6b3 | 1018 | # Check capture offset re-entrancy of utf8 code. |
e425a60b YO |
1019 | |
1020 | sub fswash { $_[0] =~ s/([>X])//g; } | |
1021 | ||
1022 | my $k1 = "." x 4 . ">>"; | |
1023 | fswash($k1); | |
1024 | ||
1025 | my $k2 = "\x{f1}\x{2022}"; | |
1026 | $k2 =~ s/([\360-\362])/>/g; | |
1027 | fswash($k2); | |
1028 | ||
a86da477 | 1029 | is($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks; Bug 60508"); |
e425a60b YO |
1030 | } |
1031 | ||
e425a60b | 1032 | { |
a86da477 | 1033 | # minimal CURLYM limited to 32767 matches |
e425a60b YO |
1034 | my @pat = ( |
1035 | qr{a(x|y)*b}, # CURLYM | |
1036 | qr{a(x|y)*?b}, # .. with minmod | |
1037 | qr{a([wx]|[yz])*b}, # .. and without tries | |
1038 | qr{a([wx]|[yz])*?b}, | |
1039 | ); | |
1040 | my $len = 32768; | |
1041 | my $s = join '', 'a', 'x' x $len, 'b'; | |
1042 | for my $pat (@pat) { | |
a86da477 | 1043 | like($s, $pat, "$pat; Bug 65372"); |
e425a60b YO |
1044 | } |
1045 | } | |
b4da7bae B |
1046 | |
1047 | { | |
04934b6d | 1048 | local $::TODO = "[perl #38133]"; |
b4da7bae B |
1049 | |
1050 | "A" =~ /(((?:A))?)+/; | |
1051 | my $first = $2; | |
1052 | ||
1053 | "A" =~ /(((A))?)+/; | |
1054 | my $second = $2; | |
1055 | ||
de26e0cc | 1056 | is($first, $second); |
b4da7bae | 1057 | } |
aca53033 FC |
1058 | |
1059 | { | |
51b6c6b3 | 1060 | my $message |
a86da477 | 1061 | = 'utf8 =~ /trie/ where trie matches a continuation octet; Bug 70998'; |
aca53033 FC |
1062 | |
1063 | # Catch warnings: | |
1064 | my $w; | |
1065 | local $SIG{__WARN__} = sub { $w .= shift }; | |
1066 | ||
1067 | # This bug can be reduced to | |
1068 | qq{\x{30ab}} =~ /\xab|\xa9/; | |
1069 | # but it's nice to have a more 'real-world' test. The original test | |
1070 | # case from the RT ticket follows: | |
1071 | ||
1072 | my %conv = ( | |
1073 | "\xab" => "<", | |
1074 | "\xa9" => "(c)", | |
1075 | ); | |
1076 | my $conv_rx = '(' . join('|', map { quotemeta } keys %conv) . ')'; | |
1077 | $conv_rx = qr{$conv_rx}; | |
1078 | ||
1079 | my $x | |
1080 | = qq{\x{3042}\x{304b}\x{3055}\x{305f}\x{306a}\x{306f}\x{307e}} | |
1081 | . qq{\x{3084}\x{3089}\x{308f}\x{3093}\x{3042}\x{304b}\x{3055}} | |
1082 | . qq{\x{305f}\x{306a}\x{306f}\x{307e}\x{3084}\x{3089}\x{308f}} | |
1083 | . qq{\x{3093}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}\x{30cf}} | |
1084 | . qq{\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}\x{30a2}\x{30ab}} | |
1085 | . qq{\x{30b5}\x{30bf}\x{30ca}\x{30cf}\x{30de}\x{30e4}\x{30e9}} | |
1086 | . qq{\x{30ef}\x{30f3}\x{30a2}\x{30ab}\x{30b5}\x{30bf}\x{30ca}} | |
1087 | . qq{\x{30cf}\x{30de}\x{30e4}\x{30e9}\x{30ef}\x{30f3}}; | |
1088 | ||
1089 | $x =~ s{$conv_rx}{$conv{$1}}eg; | |
1090 | ||
51b6c6b3 | 1091 | is($w, undef, $message); |
aca53033 FC |
1092 | } |
1093 | ||
e7f38d0f | 1094 | { |
a86da477 | 1095 | # minimal CURLYM limited to 32767 matches |
51b6c6b3 NC |
1096 | |
1097 | is(join("-", " abc def " =~ /(?=(\S+))/g), "abc-bc-c-def-ef-f", | |
a86da477 | 1098 | 'stclass optimisation does not break + inside (?=); Bug 68564'); |
e7f38d0f YO |
1099 | } |
1100 | ||
5c0a530f NC |
1101 | { |
1102 | use charnames ":full"; | |
1103 | # Delayed interpolation of \N' | |
1104 | my $r1 = qr/\N{THAI CHARACTER SARA I}/; | |
1105 | my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}"; | |
1106 | ||
1107 | # Bug #56444 | |
1108 | ok $s1 =~ /$r1+/, 'my $r1 = qr/\N{THAI CHARACTER SARA I}/; my $s1 = "\x{E34}\x{E34}\x{E34}\x{E34}; $s1 =~ /$r1+/'; | |
1109 | ||
1110 | # Bug #62056 | |
1111 | ok "${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/, '"${s1}A" =~ m/$s1\N{LATIN CAPITAL LETTER A}/'; | |
1112 | ||
1113 | ok "abbbbc" =~ m/\N{1}/ && $& eq "a", '"abbbbc" =~ m/\N{1}/ && $& eq "a"'; | |
1114 | ok "abbbbc" =~ m/\N{3,4}/ && $& eq "abbb", '"abbbbc" =~ m/\N{3,4}/ && $& eq "abbb"'; | |
1115 | } | |
1116 | ||
1117 | { | |
1118 | use charnames ":full"; | |
1119 | my $message = '[perl #74982] Period coming after \N{}'; | |
1120 | ok("\x{ff08}." =~ m/\N{FULLWIDTH LEFT PARENTHESIS}./ && $& eq "\x{ff08}.", $message); | |
1121 | ok("\x{ff08}." =~ m/[\N{FULLWIDTH LEFT PARENTHESIS}]./ && $& eq "\x{ff08}.", $message); | |
1122 | } | |
1123 | ||
453466ce NC |
1124 | SKIP: { |
1125 | ######## "Segfault using HTML::Entities", Richard Jolly <richardjolly@mac.com>, <A3C7D27E-C9F4-11D8-B294-003065AE00B6@mac.com> in perl-unicode@perl.org | |
1126 | ||
1127 | skip('Perl configured without Encode module', 1) | |
1128 | unless $Config{extensions} =~ / Encode /; | |
1129 | ||
1130 | # Test case cut down by jhi | |
1131 | fresh_perl_like(<<'EOP', qr!Malformed UTF-8 character \(unexpected end of string\) in substitution \(s///\) at!, 'Segfault using HTML::Entities'); | |
1132 | use Encode; | |
1133 | my $t = ord('A') == 193 ? "\xEA" : "\xE9"; | |
1134 | Encode::_utf8_on($t); | |
1135 | $t =~ s/([^a])//ge; | |
1136 | EOP | |
1137 | } | |
1138 | ||
e425a60b YO |
1139 | } # End of sub run_tests |
1140 | ||
1141 | 1; |