Commit | Line | Data |
---|---|---|
e425a60b YO |
1 | #!./perl |
2 | # | |
c5de0829 | 3 | # This is a home for regular expression tests that do not fit into |
e425a60b YO |
4 | # the format supported by re/regexp.t. If you want to add a test |
5 | # that does fit that format, add it to re/re_tests, not here. | |
6 | ||
7 | use strict; | |
8 | use warnings; | |
9 | use 5.010; | |
10 | ||
11 | ||
12 | sub run_tests; | |
13 | ||
14 | $| = 1; | |
15 | ||
e425a60b YO |
16 | |
17 | BEGIN { | |
18 | chdir 't' if -d 't'; | |
9d45b377 | 19 | @INC = ('../lib','.'); |
173ee337 | 20 | require './test.pl'; require './charset_tools.pl'; |
2fcf74c4 | 21 | skip_all_if_miniperl("miniperl can't load Tie::Hash::NamedCapture, need for %+ and %-"); |
e425a60b | 22 | } |
e425a60b | 23 | |
9d45b377 | 24 | run_tests() unless caller; |
e425a60b YO |
25 | |
26 | # | |
27 | # Tests start here. | |
28 | # | |
29 | sub run_tests { | |
30 | ||
e425a60b | 31 | { |
de946258 | 32 | my $message = '\C matches octet'; |
e425a60b | 33 | $_ = "a\x{100}b"; |
de946258 NC |
34 | ok(/(.)(\C)(\C)(.)/, $message); |
35 | is($1, "a", $message); | |
ef237063 | 36 | if ($::IS_ASCII) { # ASCII (or equivalent), should be UTF-8 |
de946258 NC |
37 | is($2, "\xC4", $message); |
38 | is($3, "\x80", $message); | |
e425a60b | 39 | } |
ef237063 | 40 | elsif ($::IS_EBCDIC) { # EBCDIC (or equivalent), should be UTF-EBCDIC |
de946258 NC |
41 | is($2, "\x8C", $message); |
42 | is($3, "\x41", $message); | |
e425a60b YO |
43 | } |
44 | else { | |
45 | SKIP: { | |
c1741bad | 46 | ok 0, "Unexpected platform", "ord ('A') =" . ord 'A'; |
e425a60b YO |
47 | skip "Unexpected platform"; |
48 | } | |
49 | } | |
de946258 | 50 | is($4, "b", $message); |
e425a60b YO |
51 | } |
52 | ||
e425a60b | 53 | { |
de946258 | 54 | my $message = '\C matches octet'; |
e425a60b | 55 | $_ = "\x{100}"; |
de946258 | 56 | ok(/(\C)/g, $message); |
ef237063 | 57 | if ($::IS_ASCII) { |
de946258 | 58 | is($1, "\xC4", $message); |
e425a60b | 59 | } |
ef237063 | 60 | elsif ($::IS_EBCDIC) { |
de946258 | 61 | is($1, "\x8C", $message); |
e425a60b YO |
62 | } |
63 | else { | |
c1741bad | 64 | ok 0, "Unexpected platform", "ord ('A') = " . ord 'A'; |
e425a60b | 65 | } |
de946258 | 66 | ok(/(\C)/g, $message); |
ef237063 | 67 | if ($::IS_ASCII) { |
de946258 | 68 | is($1, "\x80", $message); |
e425a60b | 69 | } |
ef237063 | 70 | elsif ($::IS_EBCDIC) { |
de946258 | 71 | is($1, "\x41", $message); |
e425a60b YO |
72 | } |
73 | else { | |
c1741bad | 74 | ok 0, "Unexpected platform", "ord ('A') = " . ord 'A'; |
e425a60b YO |
75 | } |
76 | } | |
77 | ||
e425a60b YO |
78 | { |
79 | # Japhy -- added 03/03/2001 | |
80 | () = (my $str = "abc") =~ /(...)/; | |
81 | $str = "def"; | |
de26e0cc | 82 | is($1, "abc", 'Changing subject does not modify $1'); |
e425a60b YO |
83 | } |
84 | ||
e425a60b YO |
85 | SKIP: |
86 | { | |
87 | # The trick is that in EBCDIC the explicit numeric range should | |
88 | # match (as also in non-EBCDIC) but the explicit alphabetic range | |
89 | # should not match. | |
90 | ok "\x8e" =~ /[\x89-\x91]/, '"\x8e" =~ /[\x89-\x91]/'; | |
91 | ok "\xce" =~ /[\xc9-\xd1]/, '"\xce" =~ /[\xc9-\xd1]/'; | |
2ca8589c | 92 | ok "\xd0" =~ /[\xc9-\xd1]/, '"\xd0" =~ /[\xc9-\xd1]/'; |
e425a60b YO |
93 | |
94 | skip "Not an EBCDIC platform", 2 unless ord ('i') == 0x89 && | |
95 | ord ('J') == 0xd1; | |
96 | ||
97 | # In most places these tests would succeed since \x8e does not | |
98 | # in most character sets match 'i' or 'j' nor would \xce match | |
99 | # 'I' or 'J', but strictly speaking these tests are here for | |
100 | # the good of EBCDIC, so let's test these only there. | |
b33825c4 NC |
101 | unlike("\x8e", qr/[i-j]/, '"\x8e" !~ /[i-j]/'); |
102 | unlike("\xce", qr/[I-J]/, '"\xce" !~ /[I-J]/'); | |
2ca8589c | 103 | unlike("\xd0", qr/[I-J]/, '"\xd0" !~ /[I-J]/'); |
e425a60b YO |
104 | } |
105 | ||
e425a60b YO |
106 | { |
107 | ok "\x{ab}" =~ /\x{ab}/, '"\x{ab}" =~ /\x{ab}/ '; | |
108 | ok "\x{abcd}" =~ /\x{abcd}/, '"\x{abcd}" =~ /\x{abcd}/'; | |
109 | } | |
110 | ||
e425a60b | 111 | { |
de946258 | 112 | my $message = 'bug id 20001008.001'; |
e425a60b YO |
113 | |
114 | my @x = ("stra\337e 138", "stra\337e 138"); | |
115 | for (@x) { | |
de946258 NC |
116 | ok(s/(\d+)\s*([\w\-]+)/$1 . uc $2/e, $message); |
117 | ok(my ($latin) = /^(.+)(?:\s+\d)/, $message); | |
118 | is($latin, "stra\337e", $message); | |
119 | ok($latin =~ s/stra\337e/straße/, $message); | |
e425a60b YO |
120 | # |
121 | # Previous code follows, but outcommented - there were no tests. | |
122 | # | |
123 | # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
124 | # use utf8; # needed for the raw UTF-8 | |
125 | # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a | |
126 | } | |
127 | } | |
128 | ||
e425a60b | 129 | { |
de946258 NC |
130 | my $message = 'Test \x escapes'; |
131 | ok("ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4", $message); | |
132 | ok("ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}", $message); | |
133 | ok("ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}", $message); | |
134 | ok("ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4", $message); | |
135 | ok("ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4", $message); | |
136 | ok("ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}", $message); | |
137 | ok("ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}", $message); | |
138 | ok("ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4", $message); | |
e425a60b YO |
139 | } |
140 | ||
e425a60b | 141 | { |
de946258 | 142 | my $message = 'Match code points > 255'; |
e425a60b | 143 | $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; |
de946258 NC |
144 | ok(/(.\x{300})./, $message); |
145 | ok($` eq "abc\x{100}" && length ($`) == 4, $message); | |
146 | ok($& eq "\x{200}\x{300}\x{380}" && length ($&) == 3, $message); | |
147 | ok($' eq "\x{400}defg" && length ($') == 5, $message); | |
148 | ok($1 eq "\x{200}\x{300}" && length ($1) == 2, $message); | |
e425a60b YO |
149 | } |
150 | ||
e425a60b YO |
151 | { |
152 | my $x = "\x{10FFFD}"; | |
153 | $x =~ s/(.)/$1/g; | |
154 | ok ord($x) == 0x10FFFD && length($x) == 1, "From Robin Houston"; | |
155 | } | |
156 | ||
e425a60b YO |
157 | { |
158 | my %d = ( | |
159 | "7f" => [0, 0, 0], | |
160 | "80" => [1, 1, 0], | |
161 | "ff" => [1, 1, 0], | |
162 | "100" => [0, 1, 1], | |
163 | ); | |
a2d248f9 | 164 | |
e425a60b | 165 | while (my ($code, $match) = each %d) { |
de946258 | 166 | my $message = "Properties of \\x$code"; |
e425a60b | 167 | my $char = eval qq ["\\x{$code}"]; |
a2d248f9 | 168 | |
de946258 NC |
169 | is(0 + ($char =~ /[\x80-\xff]/), $$match[0], $message); |
170 | is(0 + ($char =~ /[\x80-\x{100}]/), $$match[1], $message); | |
171 | is(0 + ($char =~ /[\x{100}]/), $$match[2], $message); | |
e425a60b YO |
172 | } |
173 | } | |
174 | ||
e425a60b YO |
175 | { |
176 | # From Japhy | |
e4e5d8ba | 177 | foreach (qw(c g o)) { |
4d18b353 NC |
178 | warning_like(sub {'' =~ "(?$_)"}, qr/^Useless \(\?$_\)/); |
179 | warning_like(sub {'' =~ "(?-$_)"}, qr/^Useless \(\?-$_\)/); | |
e4e5d8ba | 180 | } |
e425a60b YO |
181 | |
182 | # Now test multi-error regexes | |
f4554ed5 NC |
183 | foreach (['(?g-o)', qr/^Useless \(\?g\)/, qr/^Useless \(\?-o\)/], |
184 | ['(?g-c)', qr/^Useless \(\?g\)/, qr/^Useless \(\?-c\)/], | |
185 | # (?c) means (?g) error won't be thrown | |
186 | ['(?o-cg)', qr/^Useless \(\?o\)/, qr/^Useless \(\?-c\)/], | |
187 | ['(?ogc)', qr/^Useless \(\?o\)/, qr/^Useless \(\?g\)/, | |
188 | qr/^Useless \(\?c\)/], | |
189 | ) { | |
190 | my ($re, @warnings) = @$_; | |
191 | warnings_like(sub {eval "qr/$re/"}, \@warnings, "qr/$re/ warns"); | |
192 | } | |
e425a60b YO |
193 | } |
194 | ||
e425a60b | 195 | { |
de946258 | 196 | my $message = "/x tests"; |
e425a60b | 197 | $_ = "foo"; |
14358a41 | 198 | foreach my $pat (<<" --", <<" --") { |
e425a60b YO |
199 | /f |
200 | o\r | |
201 | o | |
202 | \$ | |
203 | /x | |
204 | -- | |
e425a60b YO |
205 | /f |
206 | o | |
207 | o | |
208 | \$\r | |
209 | /x | |
210 | -- | |
14358a41 NC |
211 | is(eval $pat, 1, $message); |
212 | is($@, '', $message); | |
213 | } | |
e425a60b YO |
214 | } |
215 | ||
e425a60b | 216 | { |
de946258 | 217 | my $message = "/o feature"; |
e425a60b | 218 | sub test_o {$_ [0] =~ /$_[1]/o; return $1} |
de946258 NC |
219 | is(test_o ('abc', '(.)..'), 'a', $message); |
220 | is(test_o ('abc', '..(.)'), 'a', $message); | |
e425a60b YO |
221 | } |
222 | ||
e425a60b YO |
223 | { |
224 | # Test basic $^N usage outside of a regex | |
de946258 | 225 | my $message = '$^N usage outside of a regex'; |
e425a60b | 226 | my $x = "abcdef"; |
de946258 NC |
227 | ok(($x =~ /cde/ and !defined $^N), $message); |
228 | ok(($x =~ /(cde)/ and $^N eq "cde"), $message); | |
229 | ok(($x =~ /(c)(d)(e)/ and $^N eq "e"), $message); | |
230 | ok(($x =~ /(c(d)e)/ and $^N eq "cde"), $message); | |
231 | ok(($x =~ /(foo)|(c(d)e)/ and $^N eq "cde"), $message); | |
232 | ok(($x =~ /(c(d)e)|(foo)/ and $^N eq "cde"), $message); | |
233 | ok(($x =~ /(c(d)e)|(abc)/ and $^N eq "abc"), $message); | |
234 | ok(($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde"), $message); | |
235 | ok(($x =~ /(c(d)e)(abc)?/ and $^N eq "cde"), $message); | |
236 | ok(($x =~ /(?:c(d)e)/ and $^N eq "d"), $message); | |
237 | ok(($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d"), $message); | |
238 | ok(($x =~ /(?:([abc])|([def]))*/ and $^N eq "f"), $message); | |
239 | ok(($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f"), $message); | |
240 | ok(($x =~ /(([ace])|([bd]))*/ and $^N eq "e"), $message); | |
241 | {ok(($x =~ /(([ace])|([bdf]))*/ and $^N eq "f"), $message);} | |
e425a60b YO |
242 | ## Test to see if $^N is automatically localized -- it should now |
243 | ## have the value set in the previous test. | |
de26e0cc | 244 | is($^N, "e", '$^N is automatically localized'); |
e425a60b YO |
245 | |
246 | # Now test inside (?{ ... }) | |
de946258 | 247 | $message = '$^N usage inside (?{ ... })'; |
e425a60b | 248 | our ($y, $z); |
de946258 NC |
249 | ok(($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b"), $message); |
250 | ok(($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"), $message); | |
251 | ok(($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"), $message); | |
252 | ok(($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" | |
253 | and $z eq "abcd"), $message); | |
254 | ok(($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" | |
255 | and $z eq "abcde"), $message); | |
e425a60b YO |
256 | |
257 | } | |
258 | ||
e425a60b YO |
259 | SKIP: |
260 | { | |
261 | ## Should probably put in tests for all the POSIX stuff, | |
262 | ## but not sure how to guarantee a specific locale...... | |
263 | ||
ef237063 | 264 | skip "Not an ASCII platform", 2 unless $::IS_ASCII; |
de946258 | 265 | my $message = 'Test [[:cntrl:]]'; |
e425a60b YO |
266 | my $AllBytes = join "" => map {chr} 0 .. 255; |
267 | (my $x = $AllBytes) =~ s/[[:cntrl:]]//g; | |
de946258 | 268 | is($x, join("", map {chr} 0x20 .. 0x7E, 0x80 .. 0xFF), $message); |
e425a60b YO |
269 | |
270 | ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; | |
de946258 | 271 | is($x, (join "", map {chr} 0x00 .. 0x1F, 0x7F), $message); |
e425a60b YO |
272 | } |
273 | ||
e425a60b YO |
274 | { |
275 | # With /s modifier UTF8 chars were interpreted as bytes | |
de946258 | 276 | my $message = "UTF-8 chars aren't bytes"; |
e425a60b YO |
277 | my $a = "Hello \x{263A} World"; |
278 | my @a = ($a =~ /./gs); | |
de946258 | 279 | is($#a, 12, $message); |
e425a60b YO |
280 | } |
281 | ||
e425a60b | 282 | { |
de946258 | 283 | my $message = '. matches \n with /s'; |
e425a60b YO |
284 | my $str1 = "foo\nbar"; |
285 | my $str2 = "foo\n\x{100}bar"; | |
ef237063 | 286 | my ($a, $b) = map {chr} $::IS_ASCII ? (0xc4, 0x80) : (0x8c, 0x41); |
e425a60b | 287 | my @a; |
de946258 NC |
288 | @a = $str1 =~ /./g; is(@a, 6, $message); is("@a", "f o o b a r", $message); |
289 | @a = $str1 =~ /./gs; is(@a, 7, $message); is("@a", "f o o \n b a r", $message); | |
290 | @a = $str1 =~ /\C/g; is(@a, 7, $message); is("@a", "f o o \n b a r", $message); | |
291 | @a = $str1 =~ /\C/gs; is(@a, 7, $message); is("@a", "f o o \n b a r", $message); | |
292 | @a = $str2 =~ /./g; is(@a, 7, $message); is("@a", "f o o \x{100} b a r", $message); | |
293 | @a = $str2 =~ /./gs; is(@a, 8, $message); is("@a", "f o o \n \x{100} b a r", $message); | |
294 | @a = $str2 =~ /\C/g; is(@a, 9, $message); is("@a", "f o o \n $a $b b a r", $message); | |
295 | @a = $str2 =~ /\C/gs; is(@a, 9, $message); is("@a", "f o o \n $a $b b a r", $message); | |
e425a60b YO |
296 | } |
297 | ||
e425a60b | 298 | { |
e425a60b YO |
299 | no warnings 'digit'; |
300 | # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. | |
301 | my $x; | |
302 | $x = "\x4e" . "E"; | |
303 | ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); | |
304 | ||
305 | $x = "\x4e" . "i"; | |
306 | ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); | |
307 | ||
308 | $x = "\x4" . "j"; | |
309 | ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); | |
310 | ||
311 | $x = "\x0" . "k"; | |
312 | ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); | |
313 | ||
314 | $x = "\x0" . "x"; | |
315 | ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); | |
316 | ||
317 | $x = "\x0" . "xa"; | |
318 | ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); | |
319 | ||
320 | $x = "\x9" . "_b"; | |
321 | ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); | |
322 | ||
323 | # and now again in [] ranges | |
324 | ||
325 | $x = "\x4e" . "E"; | |
326 | ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); | |
327 | ||
328 | $x = "\x4e" . "i"; | |
329 | ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); | |
330 | ||
331 | $x = "\x4" . "j"; | |
332 | ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); | |
333 | ||
334 | $x = "\x0" . "k"; | |
335 | ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); | |
336 | ||
337 | $x = "\x0" . "x"; | |
338 | ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); | |
339 | ||
340 | $x = "\x0" . "xa"; | |
341 | ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); | |
342 | ||
343 | $x = "\x9" . "_b"; | |
344 | ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); | |
345 | ||
346 | # Check that \x{##} works. 5.6.1 fails quite a few of these. | |
347 | ||
348 | $x = "\x9b"; | |
349 | ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); | |
350 | ||
351 | $x = "\x9b" . "y"; | |
352 | ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); | |
353 | ||
354 | $x = "\x9b" . "y"; | |
355 | ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); | |
356 | ||
357 | $x = "\x9b" . "y"; | |
358 | ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); | |
359 | ||
360 | $x = "\x0" . "y"; | |
361 | ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); | |
362 | ||
363 | $x = "\x0" . "y"; | |
364 | ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); | |
365 | ||
366 | $x = "\x9b" . "y"; | |
367 | ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); | |
368 | ||
369 | $x = "\x9b"; | |
370 | ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); | |
371 | ||
372 | $x = "\x9b" . "y"; | |
373 | ok ($x =~ /^[\x{9_b}y]{2}$/, | |
374 | "\\x{9_b} is to be treated as \\x9b (again)"); | |
375 | ||
376 | $x = "\x9b" . "y"; | |
377 | ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); | |
378 | ||
379 | $x = "\x9b" . "y"; | |
380 | ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); | |
381 | ||
382 | $x = "\x0" . "y"; | |
383 | ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); | |
384 | ||
385 | $x = "\x0" . "y"; | |
386 | ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); | |
387 | ||
388 | $x = "\x9b" . "y"; | |
389 | ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); | |
390 | ||
391 | } | |
392 | ||
e425a60b YO |
393 | { |
394 | # High bit bug -- japhy | |
395 | my $x = "ab\200d"; | |
396 | ok $x =~ /.*?\200/, "High bit fine"; | |
397 | } | |
398 | ||
e425a60b YO |
399 | { |
400 | # The basic character classes and Unicode | |
401 | ok "\x{0100}" =~ /\w/, 'LATIN CAPITAL LETTER A WITH MACRON in /\w/'; | |
402 | ok "\x{0660}" =~ /\d/, 'ARABIC-INDIC DIGIT ZERO in /\d/'; | |
403 | ok "\x{1680}" =~ /\s/, 'OGHAM SPACE MARK in /\s/'; | |
404 | } | |
405 | ||
e425a60b | 406 | { |
de946258 NC |
407 | my $message = "Folding matches and Unicode"; |
408 | like("a\x{100}", qr/A/i, $message); | |
409 | like("A\x{100}", qr/a/i, $message); | |
410 | like("a\x{100}", qr/a/i, $message); | |
411 | like("A\x{100}", qr/A/i, $message); | |
412 | like("\x{101}a", qr/\x{100}/i, $message); | |
413 | like("\x{100}a", qr/\x{100}/i, $message); | |
414 | like("\x{101}a", qr/\x{101}/i, $message); | |
415 | like("\x{100}a", qr/\x{101}/i, $message); | |
416 | like("a\x{100}", qr/A\x{100}/i, $message); | |
417 | like("A\x{100}", qr/a\x{100}/i, $message); | |
418 | like("a\x{100}", qr/a\x{100}/i, $message); | |
419 | like("A\x{100}", qr/A\x{100}/i, $message); | |
420 | like("a\x{100}", qr/[A]/i, $message); | |
421 | like("A\x{100}", qr/[a]/i, $message); | |
422 | like("a\x{100}", qr/[a]/i, $message); | |
423 | like("A\x{100}", qr/[A]/i, $message); | |
424 | like("\x{101}a", qr/[\x{100}]/i, $message); | |
425 | like("\x{100}a", qr/[\x{100}]/i, $message); | |
426 | like("\x{101}a", qr/[\x{101}]/i, $message); | |
427 | like("\x{100}a", qr/[\x{101}]/i, $message); | |
e425a60b YO |
428 | } |
429 | ||
e425a60b YO |
430 | { |
431 | use charnames ':full'; | |
de946258 | 432 | my $message = "Folding 'LATIN LETTER A WITH GRAVE'"; |
e425a60b YO |
433 | |
434 | my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; | |
435 | my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; | |
0f289c68 | 436 | |
de946258 NC |
437 | like($lower, qr/$UPPER/i, $message); |
438 | like($UPPER, qr/$lower/i, $message); | |
439 | like($lower, qr/[$UPPER]/i, $message); | |
440 | like($UPPER, qr/[$lower]/i, $message); | |
e425a60b | 441 | |
de946258 | 442 | $message = "Folding 'GREEK LETTER ALPHA WITH VRACHY'"; |
e425a60b YO |
443 | |
444 | $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; | |
445 | $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; | |
446 | ||
de946258 NC |
447 | like($lower, qr/$UPPER/i, $message); |
448 | like($UPPER, qr/$lower/i, $message); | |
449 | like($lower, qr/[$UPPER]/i, $message); | |
450 | like($UPPER, qr/[$lower]/i, $message); | |
e425a60b | 451 | |
de946258 | 452 | $message = "Folding 'LATIN LETTER Y WITH DIAERESIS'"; |
e425a60b YO |
453 | |
454 | $lower = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; | |
455 | $UPPER = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; | |
456 | ||
de946258 NC |
457 | like($lower, qr/$UPPER/i, $message); |
458 | like($UPPER, qr/$lower/i, $message); | |
459 | like($lower, qr/[$UPPER]/i, $message); | |
460 | like($UPPER, qr/[$lower]/i, $message); | |
e425a60b YO |
461 | } |
462 | ||
e425a60b YO |
463 | { |
464 | use charnames ':full'; | |
de946258 | 465 | my $message = "GREEK CAPITAL LETTER SIGMA vs " . |
e425a60b YO |
466 | "COMBINING GREEK PERISPOMENI"; |
467 | ||
468 | my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; | |
469 | my $char = "\N{COMBINING GREEK PERISPOMENI}"; | |
470 | ||
c11a8df3 NC |
471 | warning_is(sub {unlike("_:$char:_", qr/_:$SIGMA:_/i, $message)}, undef, |
472 | 'Did not warn [change a5961de5f4215b5c]'); | |
e425a60b YO |
473 | } |
474 | ||
e425a60b | 475 | { |
de946258 | 476 | my $message = '\X'; |
e425a60b YO |
477 | use charnames ':full'; |
478 | ||
de946258 NC |
479 | ok("a!" =~ /^(\X)!/ && $1 eq "a", $message); |
480 | ok("\xDF!" =~ /^(\X)!/ && $1 eq "\xDF", $message); | |
481 | ok("\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}", $message); | |
482 | ok("\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}", $message); | |
483 | ok("\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && | |
484 | $1 eq "\N{LATIN CAPITAL LETTER E}", $message); | |
485 | ok("\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" | |
e425a60b | 486 | =~ /^(\X)!/ && |
de946258 | 487 | $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}", $message); |
e425a60b | 488 | |
de946258 NC |
489 | $message = '\C and \X'; |
490 | like("!abc!", qr/a\Cc/, $message); | |
491 | like("!abc!", qr/a\Xc/, $message); | |
e425a60b YO |
492 | } |
493 | ||
e425a60b | 494 | { |
de946258 | 495 | my $message = "Final Sigma"; |
e425a60b YO |
496 | |
497 | my $SIGMA = "\x{03A3}"; # CAPITAL | |
498 | my $Sigma = "\x{03C2}"; # SMALL FINAL | |
499 | my $sigma = "\x{03C3}"; # SMALL | |
500 | ||
de946258 NC |
501 | like($SIGMA, qr/$SIGMA/i, $message); |
502 | like($SIGMA, qr/$Sigma/i, $message); | |
503 | like($SIGMA, qr/$sigma/i, $message); | |
e425a60b | 504 | |
de946258 NC |
505 | like($Sigma, qr/$SIGMA/i, $message); |
506 | like($Sigma, qr/$Sigma/i, $message); | |
507 | like($Sigma, qr/$sigma/i, $message); | |
e425a60b | 508 | |
de946258 NC |
509 | like($sigma, qr/$SIGMA/i, $message); |
510 | like($sigma, qr/$Sigma/i, $message); | |
511 | like($sigma, qr/$sigma/i, $message); | |
0f289c68 | 512 | |
de946258 NC |
513 | like($SIGMA, qr/[$SIGMA]/i, $message); |
514 | like($SIGMA, qr/[$Sigma]/i, $message); | |
515 | like($SIGMA, qr/[$sigma]/i, $message); | |
e425a60b | 516 | |
de946258 NC |
517 | like($Sigma, qr/[$SIGMA]/i, $message); |
518 | like($Sigma, qr/[$Sigma]/i, $message); | |
519 | like($Sigma, qr/[$sigma]/i, $message); | |
e425a60b | 520 | |
de946258 NC |
521 | like($sigma, qr/[$SIGMA]/i, $message); |
522 | like($sigma, qr/[$Sigma]/i, $message); | |
523 | like($sigma, qr/[$sigma]/i, $message); | |
e425a60b | 524 | |
de946258 | 525 | $message = "More final Sigma"; |
e425a60b YO |
526 | |
527 | my $S3 = "$SIGMA$Sigma$sigma"; | |
528 | ||
de946258 NC |
529 | ok(":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma, $message); |
530 | ok(":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma, $message); | |
531 | ok(":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma, $message); | |
e425a60b | 532 | |
de946258 NC |
533 | ok(":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma, $message); |
534 | ok(":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma, $message); | |
535 | ok(":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma, $message); | |
e425a60b YO |
536 | } |
537 | ||
e425a60b YO |
538 | { |
539 | use charnames ':full'; | |
de946258 | 540 | my $message = "Parlez-Vous " . |
e425a60b YO |
541 | "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais?"; |
542 | ||
de946258 NC |
543 | ok("Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran.ais/ && |
544 | $& eq "Francais", $message); | |
545 | ok("Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran.ais/ && | |
546 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", $message); | |
547 | ok("Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Cais/ && | |
548 | $& eq "Francais", $message); | |
e425a60b | 549 | # COMBINING CEDILLA is two bytes when encoded |
de946258 NC |
550 | like("Franc\N{COMBINING CEDILLA}ais", qr/Franc\C\Cais/, $message); |
551 | ok("Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Xais/ && | |
552 | $& eq "Francais", $message); | |
553 | ok("Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran\Xais/ && | |
554 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", $message); | |
555 | ok("Franc\N{COMBINING CEDILLA}ais" =~ /Fran\Xais/ && | |
556 | $& eq "Franc\N{COMBINING CEDILLA}ais", $message); | |
557 | ok("Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ | |
e425a60b | 558 | /Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && |
de946258 NC |
559 | $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", $message); |
560 | ok("Franc\N{COMBINING CEDILLA}ais" =~ /Franc\N{COMBINING CEDILLA}ais/ && | |
561 | $& eq "Franc\N{COMBINING CEDILLA}ais", $message); | |
e425a60b YO |
562 | |
563 | my @f = ( | |
564 | ["Fran\N{LATIN SMALL LETTER C}ais", "Francais"], | |
565 | ["Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", | |
566 | "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"], | |
567 | ["Franc\N{COMBINING CEDILLA}ais", "Franc\N{COMBINING CEDILLA}ais"], | |
568 | ); | |
569 | foreach my $entry (@f) { | |
570 | my ($subject, $match) = @$entry; | |
de946258 | 571 | ok($subject =~ /Fran(?:c\N{COMBINING CEDILLA}?| |
e425a60b | 572 | \N{LATIN SMALL LETTER C WITH CEDILLA})ais/x && |
de946258 | 573 | $& eq $match, $message); |
e425a60b YO |
574 | } |
575 | } | |
576 | ||
e425a60b | 577 | { |
de946258 | 578 | my $message = "Lingering (and useless) UTF8 flag doesn't mess up /i"; |
e425a60b YO |
579 | my $pat = "ABcde"; |
580 | my $str = "abcDE\x{100}"; | |
581 | chop $str; | |
de946258 | 582 | like($str, qr/$pat/i, $message); |
e425a60b YO |
583 | |
584 | $pat = "ABcde\x{100}"; | |
585 | $str = "abcDE"; | |
586 | chop $pat; | |
de946258 | 587 | like($str, qr/$pat/i, $message); |
e425a60b YO |
588 | |
589 | $pat = "ABcde\x{100}"; | |
590 | $str = "abcDE\x{100}"; | |
591 | chop $pat; | |
592 | chop $str; | |
de946258 | 593 | like($str, qr/$pat/i, $message); |
e425a60b YO |
594 | } |
595 | ||
e425a60b YO |
596 | { |
597 | use charnames ':full'; | |
de946258 | 598 | my $message = "LATIN SMALL LETTER SHARP S " . |
e425a60b YO |
599 | "(\N{LATIN SMALL LETTER SHARP S})"; |
600 | ||
de946258 NC |
601 | like("\N{LATIN SMALL LETTER SHARP S}", |
602 | qr/\N{LATIN SMALL LETTER SHARP S}/, $message); | |
603 | like("\N{LATIN SMALL LETTER SHARP S}", | |
604 | qr/\N{LATIN SMALL LETTER SHARP S}/i, $message); | |
605 | like("\N{LATIN SMALL LETTER SHARP S}", | |
606 | qr/[\N{LATIN SMALL LETTER SHARP S}]/, $message); | |
607 | like("\N{LATIN SMALL LETTER SHARP S}", | |
608 | qr/[\N{LATIN SMALL LETTER SHARP S}]/i, $message); | |
e425a60b | 609 | |
de946258 NC |
610 | like("ss", qr /\N{LATIN SMALL LETTER SHARP S}/i, $message); |
611 | like("SS", qr /\N{LATIN SMALL LETTER SHARP S}/i, $message); | |
612 | like("ss", qr/[\N{LATIN SMALL LETTER SHARP S}]/i, $message); | |
613 | like("SS", qr/[\N{LATIN SMALL LETTER SHARP S}]/i, $message); | |
e425a60b | 614 | |
de946258 NC |
615 | like("\N{LATIN SMALL LETTER SHARP S}", qr/ss/i, $message); |
616 | like("\N{LATIN SMALL LETTER SHARP S}", qr/SS/i, $message); | |
0f289c68 | 617 | |
de946258 NC |
618 | $message = "Unoptimized named sequence in class"; |
619 | like("ss", qr/[\N{LATIN SMALL LETTER SHARP S}x]/i, $message); | |
620 | like("SS", qr/[\N{LATIN SMALL LETTER SHARP S}x]/i, $message); | |
621 | like("\N{LATIN SMALL LETTER SHARP S}", | |
622 | qr/[\N{LATIN SMALL LETTER SHARP S}x]/, $message); | |
623 | like("\N{LATIN SMALL LETTER SHARP S}", | |
624 | qr/[\N{LATIN SMALL LETTER SHARP S}x]/i, $message); | |
e425a60b YO |
625 | } |
626 | ||
e425a60b YO |
627 | { |
628 | # More whitespace: U+0085, U+2028, U+2029\n"; | |
629 | ||
630 | # U+0085, U+00A0 need to be forced to be Unicode, the \x{100} does that. | |
631 | SKIP: { | |
ef237063 | 632 | skip "EBCDIC platform", 4 if $::IS_EBCDIC; |
e425a60b YO |
633 | # Do \x{0015} and \x{0041} match \s in EBCDIC? |
634 | ok "<\x{100}\x{0085}>" =~ /<\x{100}\s>/, '\x{0085} in \s'; | |
635 | ok "<\x{0085}>" =~ /<\v>/, '\x{0085} in \v'; | |
636 | ok "<\x{100}\x{00A0}>" =~ /<\x{100}\s>/, '\x{00A0} in \s'; | |
637 | ok "<\x{00A0}>" =~ /<\h>/, '\x{00A0} in \h'; | |
638 | } | |
a9c9e371 | 639 | my @h = map {sprintf "%05x" => $_} 0x01680, 0x02000 .. 0x0200A, |
e425a60b YO |
640 | 0x0202F, 0x0205F, 0x03000; |
641 | my @v = map {sprintf "%05x" => $_} 0x02028, 0x02029; | |
642 | ||
643 | my @H = map {sprintf "%05x" => $_} 0x01361, 0x0200B, 0x02408, 0x02420, | |
a9c9e371 | 644 | 0x0303F, 0xE0020, 0x180E; |
e425a60b | 645 | my @V = map {sprintf "%05x" => $_} 0x0008A .. 0x0008D, 0x00348, 0x10100, |
a9c9e371 | 646 | 0xE005F, 0xE007C, 0x180E; |
e425a60b YO |
647 | |
648 | for my $hex (@h) { | |
649 | my $str = eval qq ["<\\x{$hex}>"]; | |
650 | ok $str =~ /<\s>/, "\\x{$hex} in \\s"; | |
651 | ok $str =~ /<\h>/, "\\x{$hex} in \\h"; | |
652 | ok $str !~ /<\v>/, "\\x{$hex} not in \\v"; | |
653 | } | |
654 | ||
655 | for my $hex (@v) { | |
656 | my $str = eval qq ["<\\x{$hex}>"]; | |
657 | ok $str =~ /<\s>/, "\\x{$hex} in \\s"; | |
658 | ok $str =~ /<\v>/, "\\x{$hex} in \\v"; | |
659 | ok $str !~ /<\h>/, "\\x{$hex} not in \\h"; | |
660 | } | |
661 | ||
662 | for my $hex (@H) { | |
663 | my $str = eval qq ["<\\x{$hex}>"]; | |
664 | ok $str =~ /<\S>/, "\\x{$hex} in \\S"; | |
665 | ok $str =~ /<\H>/, "\\x{$hex} in \\H"; | |
666 | } | |
667 | ||
668 | for my $hex (@V) { | |
669 | my $str = eval qq ["<\\x{$hex}>"]; | |
670 | ok $str =~ /<\S>/, "\\x{$hex} in \\S"; | |
671 | ok $str =~ /<\V>/, "\\x{$hex} in \\V"; | |
672 | } | |
673 | } | |
674 | ||
e425a60b YO |
675 | { |
676 | # . with /s should work on characters, as opposed to bytes | |
de946258 | 677 | my $message = ". with /s works on characters, not bytes"; |
e425a60b YO |
678 | |
679 | my $s = "\x{e4}\x{100}"; | |
680 | # This is not expected to match: the point is that | |
681 | # neither should we get "Malformed UTF-8" warnings. | |
c11a8df3 NC |
682 | warning_is(sub {$s =~ /\G(.+?)\n/gcs}, undef, |
683 | "No 'Malformed UTF-8' warning"); | |
e425a60b YO |
684 | |
685 | my @c; | |
686 | push @c => $1 while $s =~ /\G(.)/gs; | |
687 | ||
688 | local $" = ""; | |
de946258 | 689 | is("@c", $s, $message); |
e425a60b YO |
690 | |
691 | # Test only chars < 256 | |
692 | my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; | |
693 | my $r1 = ""; | |
694 | while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
0f289c68 | 695 | $r1 .= $1 . $2; |
e425a60b YO |
696 | } |
697 | ||
698 | my $t2 = $t1 . "\x{100}"; # Repeat with a larger char | |
699 | my $r2 = ""; | |
700 | while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
0f289c68 | 701 | $r2 .= $1 . $2; |
e425a60b YO |
702 | } |
703 | $r2 =~ s/\x{100}//; | |
704 | ||
de946258 | 705 | is($r1, $r2, $message); |
e425a60b YO |
706 | } |
707 | ||
e425a60b | 708 | { |
de946258 NC |
709 | my $message = "Unicode lookbehind"; |
710 | like("A\x{100}B" , qr/(?<=A.)B/, $message); | |
711 | like("A\x{200}\x{300}B", qr/(?<=A..)B/, $message); | |
712 | like("\x{400}AB" , qr/(?<=\x{400}.)B/, $message); | |
713 | like("\x{500}\x{600}B" , qr/(?<=\x{500}.)B/, $message); | |
e425a60b YO |
714 | |
715 | # Original code also contained: | |
716 | # ok "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/; | |
717 | # but that looks like a typo. | |
718 | } | |
719 | ||
e425a60b | 720 | { |
de946258 | 721 | my $message = 'UTF-8 hash keys and /$/'; |
e425a60b YO |
722 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters |
723 | # /2002-01/msg01327.html | |
724 | ||
725 | my $u = "a\x{100}"; | |
726 | my $v = substr ($u, 0, 1); | |
727 | my $w = substr ($u, 1, 1); | |
728 | my %u = ($u => $u, $v => $v, $w => $w); | |
729 | for (keys %u) { | |
730 | my $m1 = /^\w*$/ ? 1 : 0; | |
731 | my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; | |
de946258 | 732 | is($m1, $m2, $message); |
e425a60b YO |
733 | } |
734 | } | |
735 | ||
e425a60b | 736 | { |
de946258 | 737 | my $message = "No SEGV in s/// and UTF-8"; |
e425a60b | 738 | my $s = "s#\x{100}" x 4; |
de946258 | 739 | ok($s =~ s/[^\w]/ /g, $message); |
2e84be61 | 740 | if ( 1 or $ENV{PERL_TEST_LEGACY_POSIX_CC} ) { |
de946258 | 741 | is($s, "s \x{100}" x 4, $message); |
e425a60b YO |
742 | } |
743 | else { | |
de946258 | 744 | is($s, "s " x 4, $message); |
e425a60b YO |
745 | } |
746 | } | |
747 | ||
e425a60b | 748 | { |
de946258 | 749 | my $message = "UTF-8 bug (maybe already known?)"; |
e425a60b YO |
750 | my $u = "foo"; |
751 | $u =~ s/./\x{100}/g; | |
de946258 | 752 | is($u, "\x{100}\x{100}\x{100}", $message); |
e425a60b YO |
753 | |
754 | $u = "foobar"; | |
755 | $u =~ s/[ao]/\x{100}/g; | |
de946258 | 756 | is($u, "f\x{100}\x{100}b\x{100}r", $message); |
e425a60b YO |
757 | |
758 | $u =~ s/\x{100}/e/g; | |
de946258 | 759 | is($u, "feeber", $message); |
e425a60b YO |
760 | } |
761 | ||
e425a60b | 762 | { |
de946258 | 763 | my $message = "UTF-8 bug with s///"; |
e425a60b YO |
764 | # check utf8/non-utf8 mixtures |
765 | # try to force all float/anchored check combinations | |
766 | ||
767 | my $c = "\x{100}"; | |
768 | my $subst; | |
769 | for my $re ("xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", | |
770 | "xx.*(?=$c)", "(?=$c).*xx",) { | |
de946258 NC |
771 | unlike("xxx", qr/$re/, $message); |
772 | ok(+($subst = "xxx") !~ s/$re//, $message); | |
e425a60b YO |
773 | } |
774 | for my $re ("xx.*$c*", "$c*.*xx") { | |
de946258 NC |
775 | like("xxx", qr/$re/, $message); |
776 | ok(+($subst = "xxx") =~ s/$re//, $message); | |
777 | is($subst, "", $message); | |
e425a60b YO |
778 | } |
779 | for my $re ("xxy*", "y*xx") { | |
de946258 NC |
780 | like("xx$c", qr/$re/, $message); |
781 | ok(+($subst = "xx$c") =~ s/$re//, $message); | |
782 | is($subst, $c, $message); | |
783 | unlike("xy$c", qr/$re/, $message); | |
784 | ok(+($subst = "xy$c") !~ s/$re//, $message); | |
e425a60b YO |
785 | } |
786 | for my $re ("xy$c*z", "x$c*yz") { | |
de946258 NC |
787 | like("xyz", qr/$re/, $message); |
788 | ok(+($subst = "xyz") =~ s/$re//, $message); | |
789 | is($subst, "", $message); | |
e425a60b YO |
790 | } |
791 | } | |
792 | ||
e425a60b | 793 | { |
c72077c4 AC |
794 | # The second half of RT #114808 |
795 | warning_is(sub {'aa' =~ /.+\x{100}/}, undef, | |
796 | 'utf8-only floating substr, non-utf8 target, no warning'); | |
797 | } | |
798 | ||
799 | { | |
de946258 | 800 | my $message = "qr /.../x"; |
e425a60b | 801 | my $R = qr / A B C # D E/x; |
de946258 NC |
802 | ok("ABCDE" =~ $R && $& eq "ABC", $message); |
803 | ok("ABCDE" =~ /$R/ && $& eq "ABC", $message); | |
804 | ok("ABCDE" =~ m/$R/ && $& eq "ABC", $message); | |
805 | ok("ABCDE" =~ /($R)/ && $1 eq "ABC", $message); | |
806 | ok("ABCDE" =~ m/($R)/ && $1 eq "ABC", $message); | |
e425a60b YO |
807 | } |
808 | ||
e425a60b YO |
809 | { |
810 | local $\; | |
811 | $_ = 'aaaaaaaaaa'; | |
812 | utf8::upgrade($_); chop $_; $\="\n"; | |
813 | ok /[^\s]+/, 'm/[^\s]/ utf8'; | |
814 | ok /[^\d]+/, 'm/[^\d]/ utf8'; | |
815 | ok +($a = $_, $_ =~ s/[^\s]+/./g), 's/[^\s]/ utf8'; | |
816 | ok +($a = $_, $a =~ s/[^\d]+/./g), 's/[^\s]/ utf8'; | |
817 | } | |
818 | ||
e425a60b YO |
819 | { |
820 | # Subject: Odd regexp behavior | |
821 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> | |
822 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 | |
823 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> | |
824 | # To: perl-unicode@perl.org | |
825 | ||
de946258 | 826 | my $message = 'Markus Kuhn 2003-02-26'; |
0f289c68 | 827 | |
e425a60b | 828 | my $x = "\x{2019}\nk"; |
de946258 NC |
829 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
830 | is($x, "\x{2019} k", $message); | |
e425a60b YO |
831 | |
832 | $x = "b\nk"; | |
de946258 NC |
833 | ok($x =~ s/(\S)\n(\S)/$1 $2/sg, $message); |
834 | is($x, "b k", $message); | |
e425a60b | 835 | |
de946258 | 836 | like("\x{2019}", qr/\S/, $message); |
e425a60b YO |
837 | } |
838 | ||
e425a60b | 839 | { |
e425a60b YO |
840 | ok "\x{100}\n" =~ /\x{100}\n$/, "UTF-8 length cache and fbm_compile"; |
841 | } | |
842 | ||
e425a60b YO |
843 | { |
844 | package Str; | |
845 | use overload q /""/ => sub {${$_ [0]};}; | |
846 | sub new {my ($c, $v) = @_; bless \$v, $c;} | |
847 | ||
848 | package main; | |
849 | $_ = Str -> new ("a\x{100}/\x{100}b"); | |
850 | ok join (":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"; | |
851 | } | |
852 | ||
e425a60b YO |
853 | { |
854 | my $re = qq /^([^X]*)X/; | |
855 | utf8::upgrade ($re); | |
856 | ok "\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; | |
0ab1ff8e KW |
857 | my $loc_re = qq /(?l:^([^X]*)X)/; |
858 | utf8::upgrade ($loc_re); | |
859 | ok "\x{100}X" =~ /$loc_re/, "locale, S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; | |
e425a60b YO |
860 | } |
861 | ||
e425a60b YO |
862 | { |
863 | ok "123\x{100}" =~ /^.*1.*23\x{100}$/, | |
864 | 'UTF-8 + multiple floating substr'; | |
865 | } | |
866 | ||
e425a60b | 867 | { |
de946258 | 868 | my $message = '<20030808193656.5109.1@llama.ni-s.u-net.com>'; |
e425a60b YO |
869 | |
870 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON | |
de946258 | 871 | like(" \x{101}", qr/\x{100}/i, $message); |
e425a60b YO |
872 | |
873 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW | |
de946258 | 874 | like(" \x{1E01}", qr/\x{1E00}/i, $message); |
e425a60b YO |
875 | |
876 | # DESERET SMALL/CAPITAL LETTER LONG I | |
de946258 | 877 | like(" \x{10428}", qr/\x{10400}/i, $message); |
e425a60b YO |
878 | |
879 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' | |
de946258 | 880 | like(" \x{1E01}x", qr/\x{1E00}X/i, $message); |
e425a60b YO |
881 | } |
882 | ||
e425a60b | 883 | { |
26faadbd | 884 | for (120 .. 130, 240 .. 260) { |
e425a60b | 885 | my $head = 'x' x $_; |
de946258 | 886 | my $message = q [Don't misparse \x{...} in regexp ] . |
26faadbd | 887 | q [near EXACT char count limit]; |
e425a60b | 888 | for my $tail ('\x{0061}', '\x{1234}', '\x61') { |
14358a41 NC |
889 | eval qq{like("$head$tail", qr/$head$tail/, \$message)}; |
890 | is($@, '', $message); | |
e425a60b | 891 | } |
de946258 | 892 | $message = q [Don't misparse \N{...} in regexp ] . |
26faadbd | 893 | q [near EXACT char count limit]; |
e425a60b | 894 | for my $tail ('\N{SNOWFLAKE}') { |
14358a41 NC |
895 | eval qq {use charnames ':full'; |
896 | like("$head$tail", qr/$head$tail/, \$message)}; | |
897 | is($@, '', $message); | |
e425a60b YO |
898 | } |
899 | } | |
900 | } | |
901 | ||
e425a60b YO |
902 | { # TRIE related |
903 | our @got = (); | |
904 | "words" =~ /(word|word|word)(?{push @got, $1})s$/; | |
de26e0cc | 905 | is(@got, 1, "TRIE optimisation"); |
e425a60b YO |
906 | |
907 | @got = (); | |
908 | "words" =~ /(word|word|word)(?{push @got,$1})s$/i; | |
de26e0cc | 909 | is(@got, 1,"TRIEF optimisation"); |
e425a60b YO |
910 | |
911 | my @nums = map {int rand 1000} 1 .. 100; | |
912 | my $re = "(" . (join "|", @nums) . ")"; | |
913 | $re = qr/\b$re\b/; | |
914 | ||
915 | foreach (@nums) { | |
916 | ok $_ =~ /$re/, "Trie nums"; | |
917 | } | |
918 | ||
919 | $_ = join " ", @nums; | |
920 | @got = (); | |
921 | push @got, $1 while /$re/g; | |
922 | ||
923 | my %count; | |
924 | $count {$_} ++ for @got; | |
925 | my $ok = 1; | |
926 | for (@nums) { | |
927 | $ok = 0 if --$count {$_} < 0; | |
928 | } | |
929 | ok $ok, "Trie min count matches"; | |
930 | } | |
931 | ||
e425a60b YO |
932 | { |
933 | # TRIE related | |
934 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON | |
935 | ok "foba \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i && | |
936 | $1 eq "\x{101}foo", | |
937 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON"; | |
938 | ||
939 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW | |
940 | ok "foba \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i && | |
941 | $1 eq "\x{1E01}foo", | |
942 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW"; | |
943 | ||
944 | # DESERET SMALL/CAPITAL LETTER LONG I | |
945 | ok "foba \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i && | |
946 | $1 eq "\x{10428}foo", | |
947 | "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I"; | |
948 | ||
949 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' | |
950 | ok "foba \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i && | |
951 | $1 eq "\x{1E01}xfoo", | |
952 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'"; | |
953 | ||
954 | use charnames ':full'; | |
955 | ||
956 | my $s = "\N{LATIN SMALL LETTER SHARP S}"; | |
957 | ok "foba ba$s" =~ qr/(foo|Ba$s|bar)/i && $1 eq "ba$s", | |
958 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
959 | ok "foba ba$s" =~ qr/(Ba$s|foo|bar)/i && $1 eq "ba$s", | |
960 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
961 | ok "foba ba$s" =~ qr/(foo|bar|Ba$s)/i && $1 eq "ba$s", | |
962 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
963 | ||
964 | ok "foba ba$s" =~ qr/(foo|Bass|bar)/i && $1 eq "ba$s", | |
965 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; | |
966 | ||
967 | ok "foba ba$s" =~ qr/(foo|BaSS|bar)/i && $1 eq "ba$s", | |
968 | "TRIEF + LATIN SMALL LETTER SHARP S =~ SS"; | |
969 | ||
970 | ok "foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i | |
971 | && $1 eq "ba${s}pxySS$s$s", | |
972 | "COMMON PREFIX TRIEF + LATIN SMALL LETTER SHARP S"; | |
973 | } | |
974 | ||
e425a60b | 975 | { |
0f289c68 YO |
976 | BEGIN { |
977 | unshift @INC, 'lib'; | |
978 | } | |
e425a60b | 979 | use Cname; |
0f289c68 | 980 | |
e425a60b | 981 | ok 'fooB' =~ /\N{foo}[\N{B}\N{b}]/, "Passthrough charname"; |
94ca1619 KW |
982 | my $name = "foo\xDF"; |
983 | my $result = eval "'A${name}B' =~ /^A\\N{$name}B\$/"; | |
984 | ok !$@ && $result, "Passthrough charname of non-ASCII, Latin1"; | |
e425a60b YO |
985 | # |
986 | # Why doesn't must_warn work here? | |
987 | # | |
988 | my $w; | |
989 | local $SIG {__WARN__} = sub {$w .= "@_"}; | |
990 | eval 'q(xxWxx) =~ /[\N{WARN}]/'; | |
2a53d331 | 991 | ok $w && $w =~ /Using just the first character returned by \\N\{} in character class/, |
ff3f963a | 992 | "single character in [\\N{}] warning"; |
e425a60b YO |
993 | |
994 | undef $w; | |
995 | eval q [ok "\0" !~ /[\N{EMPTY-STR}XY]/, | |
5895685f | 996 | "Zerolength charname in charclass doesn't match \\\\0"]; |
ff3f963a KW |
997 | ok $w && $w =~ /Ignoring zero length/, |
998 | 'Ignoring zero length \N{} in character class warning'; | |
67048c1b KW |
999 | undef $w; |
1000 | eval q [ok 'xy' =~ /x[\N{EMPTY-STR} y]/x, | |
1001 | 'Empty string charname in [] is ignored; finds a following character']; | |
1002 | ok $w && $w =~ /Ignoring zero length/, | |
1003 | 'Ignoring zero length \N{} in character class warning'; | |
1004 | undef $w; | |
1005 | eval q [ok 'x ' =~ /x[\N{EMPTY-STR} y]/, | |
1006 | 'Empty string charname in [] is ignored; finds a following blank under /x']; | |
1007 | ok $w && $w =~ /Ignoring zero length/, | |
1008 | 'Ignoring zero length \N{} in character class warning'; | |
e425a60b YO |
1009 | |
1010 | ok 'AB' =~ /(\N{EVIL})/ && $1 eq 'A', 'Charname caching $1'; | |
1011 | ok 'ABC' =~ /(\N{EVIL})/, 'Charname caching $1'; | |
1012 | ok 'xy' =~ /x\N{EMPTY-STR}y/, | |
1013 | 'Empty string charname produces NOTHING node'; | |
1014 | ok '' =~ /\N{EMPTY-STR}/, | |
1015 | 'Empty string charname produces NOTHING node'; | |
ff3f963a KW |
1016 | ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/, 'Verify that long string works'; |
1017 | ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/i, 'Verify under folding that long string works'; | |
1018 | ||
902994e4 KW |
1019 | eval '/(?[[\N{EMPTY-STR}]])/'; |
1020 | ok $@ && $@ =~ /Zero length \\N\{}/; | |
1021 | ||
bd299e29 | 1022 | undef $w; |
bd299e29 | 1023 | { |
3036c853 | 1024 | () = eval q ["\N{TOO MANY SPACES}"]; |
2d8eb851 | 1025 | like ($@, qr/charnames alias definitions may not contain a sequence of multiple spaces/, "Multiple spaces in a row in a charnames alias is fatal"); |
3036c853 | 1026 | eval q [use utf8; () = "\N{TOO MANY SPACES}"]; |
2d8eb851 | 1027 | like ($@, qr/charnames alias definitions may not contain a sequence of multiple spaces/, "... same under utf8"); |
94ec3a20 | 1028 | } |
bd299e29 KW |
1029 | |
1030 | undef $w; | |
bd299e29 | 1031 | { |
94ec3a20 | 1032 | () = eval q ["\N{TRAILING SPACE }"]; |
2d8eb851 | 1033 | like ($@, qr/charnames alias definitions may not contain trailing white-space/, "Trailing white-space in a charnames alias is fatal"); |
94ec3a20 | 1034 | eval q [use utf8; () = "\N{TRAILING SPACE }"]; |
2d8eb851 | 1035 | like ($@, qr/charnames alias definitions may not contain trailing white-space/, "... same under utf8"); |
94ec3a20 | 1036 | } |
bd299e29 | 1037 | |
2d8eb851 | 1038 | undef $w; |
7fc82458 KW |
1039 | my $Cedilla_Latin1 = "GAR" |
1040 | . latin1_to_native("\xC7") | |
1041 | . "ON"; | |
1042 | my $Cedilla_utf8 = $Cedilla_Latin1; | |
1043 | utf8::upgrade($Cedilla_utf8); | |
1044 | eval qq[is("\\N{$Cedilla_Latin1}", "$Cedilla_Latin1", "A cedilla in character name works")]; | |
1045 | undef $w; | |
1046 | { | |
1047 | use feature 'unicode_eval'; | |
1048 | eval qq[use utf8; is("\\N{$Cedilla_utf8}", "$Cedilla_utf8", "... same under 'use utf8': they work")]; | |
1049 | } | |
1050 | ||
1051 | undef $w; | |
df758df2 KW |
1052 | my $NBSP_Latin1 = "NBSP" |
1053 | . latin1_to_native("\xA0") | |
1054 | . "SEPARATED" | |
1055 | . latin1_to_native("\xA0") | |
1056 | . "SPACE"; | |
1057 | my $NBSP_utf8 = $NBSP_Latin1; | |
1058 | utf8::upgrade($NBSP_utf8); | |
1059 | eval qq[is("\\N{$NBSP_Latin1}", "$NBSP_Latin1", "An NBSP in character name works")]; | |
1060 | like ($w, qr/NO-BREAK SPACE in a charnames alias definition is deprecated/, "... but returns a deprecation warning"); | |
1061 | undef $w; | |
1062 | { | |
1063 | use feature 'unicode_eval'; | |
1064 | eval qq[use utf8; is("\\N{$NBSP_utf8}", "$NBSP_utf8", "Same under 'use utf8': they work")]; | |
1065 | like ($w, qr/NO-BREAK SPACE in a charnames alias definition is deprecated/, "... but return a deprecation warning"); | |
1066 | } | |
1067 | { | |
1068 | # disable lexical warnings | |
1069 | BEGIN { ${^WARNING_BITS} = undef; $^W = 0 } | |
1070 | undef $w; | |
1071 | () = eval qq["\\N{$NBSP_Latin1}"]; | |
1072 | like ($w, qr/NO-BREAK SPACE in a charnames alias definition is deprecated/, "And returns a deprecation warning outside of lexical warnings"); | |
1073 | undef $w; | |
1074 | use feature 'unicode_eval'; | |
1075 | eval qq[use utf8; () = "\\N{$NBSP_utf8}"]; | |
1076 | like ($w, qr/NO-BREAK SPACE in a charnames alias definition is deprecated/, "... same under utf8"); | |
1077 | } | |
1078 | { | |
1079 | no warnings 'deprecated'; | |
1080 | undef $w; | |
1081 | eval qq["\\N{$NBSP_Latin1}"]; | |
1082 | ok (! defined $w, "... and no warning if warnings are off"); | |
1083 | use feature 'unicode_eval'; | |
1084 | eval qq[use utf8; "\\N{$NBSP_utf8}"]; | |
1085 | ok (! defined $w, "... same under 'use utf8'"); | |
1086 | } | |
1087 | { | |
1088 | use warnings FATAL=>'deprecated'; | |
1089 | () = eval qq["\\N{$NBSP_Latin1}"]; | |
1090 | like ($@, qr/NO-BREAK SPACE in a charnames alias definition is deprecated/, "... the warning can be fatal"); | |
1091 | use feature 'unicode_eval'; | |
1092 | eval qq[use utf8; () = "\\N{$NBSP_utf8}"]; | |
1093 | like ($@, qr/NO-BREAK SPACE in a charnames alias definition is deprecated/, "... same under utf8"); | |
1094 | } | |
1095 | ||
6a642c21 FC |
1096 | { |
1097 | BEGIN { no strict; *CnameTest:: = *{"_charnames\0A::" } } | |
1098 | package CnameTest { sub translator { pop } } | |
1099 | BEGIN { $^H{charnames} = \&CnameTest::translator } | |
1100 | undef $w; | |
1101 | () = eval q ["\N{TOO MANY SPACES}"]; | |
2d8eb851 | 1102 | like ($@, qr/charnames alias definitions may not contain a sequence of multiple spaces/, |
6a642c21 FC |
1103 | 'translators in _charnames\0* packages get validated'); |
1104 | } | |
1105 | ||
ff3f963a KW |
1106 | # If remove the limitation in regcomp code these should work |
1107 | # differently | |
1108 | undef $w; | |
e2a7e165 | 1109 | eval q [ok "\N{TOO-LONG-STR}" =~ /^\N{TOO-LONG-STR}$/, 'Verify that what once was too long a string works']; |
ff3f963a KW |
1110 | eval 'q(syntax error) =~ /\N{MALFORMED}/'; |
1111 | ok $@ && $@ =~ /Malformed/, 'Verify that malformed utf8 gives an error'; | |
cb233ae3 | 1112 | eval 'q() =~ /\N{4F}/'; |
4d7cd482 | 1113 | ok $@ && $@ =~ /Invalid character/, 'Verify that leading digit in name gives error'; |
cb233ae3 | 1114 | eval 'q() =~ /\N{COM,MA}/'; |
4d7cd482 | 1115 | ok $@ && $@ =~ /Invalid character/, 'Verify that comma in name gives error'; |
94ca1619 | 1116 | $name = "A\x{D7}O"; |
cb233ae3 | 1117 | eval "q(W) =~ /\\N{$name}/"; |
4d7cd482 | 1118 | ok $@ && $@ =~ /Invalid character/, 'Verify that latin1 symbol in name gives error'; |
bde9e88d KW |
1119 | my $utf8_name = "7 CITIES OF GOLD"; |
1120 | utf8::upgrade($utf8_name); | |
1121 | eval "use utf8; q(W) =~ /\\N{$utf8_name}/"; | |
1122 | ok $@ && $@ =~ /Invalid character/, 'Verify that leading digit in utf8 name gives error'; | |
1123 | $utf8_name = "SHARP #"; | |
1124 | utf8::upgrade($utf8_name); | |
1125 | eval "use utf8; q(W) =~ /\\N{$utf8_name}/"; | |
1126 | ok $@ && $@ =~ /Invalid character/, 'Verify that ASCII symbol in utf8 name gives error'; | |
1127 | $utf8_name = "A HOUSE \xF7 AGAINST ITSELF"; | |
1128 | utf8::upgrade($utf8_name); | |
1129 | eval "use utf8; q(W) =~ /\\N{$utf8_name}/"; | |
1130 | ok $@ && $@ =~ /Invalid character/, 'Verify that latin1 symbol in utf8 name gives error'; | |
1131 | $utf8_name = "\x{664} HORSEMEN}"; | |
1132 | eval "use utf8; q(W) =~ /\\N{$utf8_name}/"; | |
1133 | ok $@ && $@ =~ /Invalid character/, 'Verify that leading above Latin1 digit in utf8 name gives error'; | |
1134 | $utf8_name = "A \x{1F4A9} WOULD SMELL AS SWEET}"; | |
1135 | eval "use utf8; q(W) =~ /\\N{$utf8_name}/"; | |
1136 | ok $@ && $@ =~ /Invalid character/, 'Verify that above Latin1 symbol in utf8 name gives error'; | |
1137 | ||
cb233ae3 KW |
1138 | undef $w; |
1139 | $name = "A\x{D1}O"; | |
1140 | eval "q(W) =~ /\\N{$name}/"; | |
1141 | ok ! $w, 'Verify that latin1 letter in name doesnt give warning'; | |
0f289c68 | 1142 | |
ba7b73c5 NC |
1143 | # This tests the code path that restarts the parse when the recursive |
1144 | # call to S_reg() from within S_grok_bslash_N() discovers that the | |
1145 | # pattern needs to be recalculated as UTF-8. use eval to avoid | |
1146 | # needing literal Unicode in this source file: | |
1147 | my $r = eval "qr/\\N{\x{100}\x{100}}/"; | |
1148 | isnt $r, undef, "Generated regex for multi-char UTF-8 charname" | |
1149 | or diag($@); | |
1150 | ok "\x{100}\x{100}" =~ $r, "which matches"; | |
e425a60b YO |
1151 | } |
1152 | ||
e425a60b YO |
1153 | { |
1154 | use charnames ':full'; | |
1155 | ||
1156 | ok 'aabc' !~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against aabc'; | |
1157 | ok 'a+bc' =~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against a+bc'; | |
1158 | ||
1159 | ok ' A B' =~ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
1160 | 'Intermixed named and unicode escapes'; | |
1161 | ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ | |
1162 | /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
1163 | 'Intermixed named and unicode escapes'; | |
1164 | ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ | |
1165 | /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, | |
0f289c68 | 1166 | 'Intermixed named and unicode escapes'; |
ff3f963a | 1167 | ok "\0" =~ /^\N{NULL}$/, 'Verify that \N{NULL} works; is not confused with an error'; |
e425a60b YO |
1168 | } |
1169 | ||
e425a60b YO |
1170 | { |
1171 | our $brackets; | |
1172 | $brackets = qr{ | |
1173 | { (?> [^{}]+ | (??{ $brackets }) )* } | |
1174 | }x; | |
1175 | ||
1176 | ok "{b{c}d" !~ m/^((??{ $brackets }))/, "Bracket mismatch"; | |
1177 | ||
1178 | SKIP: { | |
1179 | our @stack = (); | |
1180 | my @expect = qw( | |
1181 | stuff1 | |
1182 | stuff2 | |
1183 | <stuff1>and<stuff2> | |
1184 | right | |
1185 | <right> | |
1186 | <<right>> | |
1187 | <<<right>>> | |
1188 | <<stuff1>and<stuff2>><<<<right>>>> | |
1189 | ); | |
1190 | ||
1191 | local $_ = '<<<stuff1>and<stuff2>><<<<right>>>>>'; | |
1192 | ok /^(<((?:(?>[^<>]+)|(?1))*)>(?{push @stack, $2 }))$/, | |
1193 | "Recursion matches"; | |
de26e0cc | 1194 | is(@stack, @expect, "Right amount of matches") |
e425a60b YO |
1195 | or skip "Won't test individual results as count isn't equal", |
1196 | 0 + @expect; | |
1197 | my $idx = 0; | |
1198 | foreach my $expect (@expect) { | |
de26e0cc NC |
1199 | is($stack [$idx], $expect, |
1200 | "Expecting '$expect' at stack pos #$idx"); | |
e425a60b YO |
1201 | $idx ++; |
1202 | } | |
1203 | } | |
1204 | } | |
1205 | ||
e425a60b YO |
1206 | { |
1207 | my $s = '123453456'; | |
1208 | $s =~ s/(?<digits>\d+)\k<digits>/$+{digits}/; | |
1209 | ok $s eq '123456', 'Named capture (angle brackets) s///'; | |
1210 | $s = '123453456'; | |
1211 | $s =~ s/(?'digits'\d+)\k'digits'/$+{digits}/; | |
0f289c68 | 1212 | ok $s eq '123456', 'Named capture (single quotes) s///'; |
e425a60b YO |
1213 | } |
1214 | ||
e425a60b YO |
1215 | { |
1216 | my @ary = ( | |
1217 | pack('U', 0x00F1), # n-tilde | |
1218 | '_'.pack('U', 0x00F1), # _ + n-tilde | |
1219 | 'c'.pack('U', 0x0327), # c + cedilla | |
1220 | pack('U*', 0x00F1, 0x0327), # n-tilde + cedilla | |
e425a60b YO |
1221 | pack('U', 0x0391), # ALPHA |
1222 | pack('U', 0x0391).'2', # ALPHA + 2 | |
1223 | pack('U', 0x0391).'_', # ALPHA + _ | |
1224 | ); | |
1225 | ||
1226 | for my $uni (@ary) { | |
1227 | my ($r1, $c1, $r2, $c2) = eval qq { | |
1228 | use utf8; | |
1229 | scalar ("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/), | |
1230 | \$+{${uni}}, | |
1231 | scalar ("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/), | |
1232 | \$+{${uni}}; | |
1233 | }; | |
1234 | ok $r1, "Named capture UTF (?'')"; | |
1235 | ok defined $c1 && $c1 eq 'foo', "Named capture UTF \%+"; | |
1236 | ok $r2, "Named capture UTF (?<>)"; | |
1237 | ok defined $c2 && $c2 eq 'bar', "Named capture UTF \%+"; | |
1238 | } | |
1239 | } | |
1240 | ||
e425a60b YO |
1241 | { |
1242 | my $s = 'foo bar baz'; | |
1243 | my @res; | |
1244 | if ('1234' =~ /(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { | |
1245 | foreach my $name (sort keys(%-)) { | |
1246 | my $ary = $- {$name}; | |
1247 | foreach my $idx (0 .. $#$ary) { | |
1248 | push @res, "$name:$idx:$ary->[$idx]"; | |
1249 | } | |
1250 | } | |
1251 | } | |
1252 | my @expect = qw (A:0:1 A:1:3 B:0:2 B:1:4); | |
de26e0cc | 1253 | is("@res", "@expect", "Check %-"); |
e425a60b YO |
1254 | eval' |
1255 | no warnings "uninitialized"; | |
1256 | print for $- {this_key_doesnt_exist}; | |
1257 | '; | |
1258 | ok !$@,'lvalue $- {...} should not throw an exception'; | |
1259 | } | |
1260 | ||
e425a60b YO |
1261 | { |
1262 | # \, breaks {3,4} | |
412f55bb | 1263 | no warnings qw{deprecated regexp}; |
e425a60b YO |
1264 | ok "xaaay" !~ /xa{3\,4}y/, '\, in a pattern'; |
1265 | ok "xa{3,4}y" =~ /xa{3\,4}y/, '\, in a pattern'; | |
1266 | ||
1267 | # \c\ followed by _ | |
1268 | ok "x\c_y" !~ /x\c\_y/, '\_ in a pattern'; | |
1269 | ok "x\c\_y" =~ /x\c\_y/, '\_ in a pattern'; | |
1270 | ||
1271 | # \c\ followed by other characters | |
1272 | for my $c ("z", "\0", "!", chr(254), chr(256)) { | |
1273 | my $targ = "a\034$c"; | |
1274 | my $reg = "a\\c\\$c"; | |
1275 | ok eval ("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern"; | |
1276 | } | |
1277 | } | |
1278 | ||
e425a60b YO |
1279 | { # Test the (*PRUNE) pattern |
1280 | our $count = 0; | |
1281 | 'aaab' =~ /a+b?(?{$count++})(*FAIL)/; | |
de26e0cc | 1282 | is($count, 9, "Expect 9 for no (*PRUNE)"); |
e425a60b YO |
1283 | $count = 0; |
1284 | 'aaab' =~ /a+b?(*PRUNE)(?{$count++})(*FAIL)/; | |
de26e0cc | 1285 | is($count, 3, "Expect 3 with (*PRUNE)"); |
e425a60b YO |
1286 | local $_ = 'aaab'; |
1287 | $count = 0; | |
1288 | 1 while /.(*PRUNE)(?{$count++})(*FAIL)/g; | |
de26e0cc | 1289 | is($count, 4, "/.(*PRUNE)/"); |
e425a60b YO |
1290 | $count = 0; |
1291 | 'aaab' =~ /a+b?(??{'(*PRUNE)'})(?{$count++})(*FAIL)/; | |
de26e0cc | 1292 | is($count, 3, "Expect 3 with (*PRUNE)"); |
e425a60b YO |
1293 | local $_ = 'aaab'; |
1294 | $count = 0; | |
1295 | 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g; | |
de26e0cc | 1296 | is($count, 4, "/.(*PRUNE)/"); |
e425a60b YO |
1297 | } |
1298 | ||
e425a60b YO |
1299 | { # Test the (*SKIP) pattern |
1300 | our $count = 0; | |
1301 | 'aaab' =~ /a+b?(*SKIP)(?{$count++})(*FAIL)/; | |
de26e0cc | 1302 | is($count, 1, "Expect 1 with (*SKIP)"); |
e425a60b YO |
1303 | local $_ = 'aaab'; |
1304 | $count = 0; | |
1305 | 1 while /.(*SKIP)(?{$count++})(*FAIL)/g; | |
de26e0cc | 1306 | is($count, 4, "/.(*SKIP)/"); |
e425a60b YO |
1307 | $_ = 'aaabaaab'; |
1308 | $count = 0; | |
1309 | our @res = (); | |
1310 | 1 while /(a+b?)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; | |
de26e0cc NC |
1311 | is($count, 2, "Expect 2 with (*SKIP)"); |
1312 | is("@res", "aaab aaab", "Adjacent (*SKIP) works as expected"); | |
e425a60b YO |
1313 | } |
1314 | ||
e425a60b YO |
1315 | { # Test the (*SKIP) pattern |
1316 | our $count = 0; | |
1317 | 'aaab' =~ /a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/; | |
de26e0cc | 1318 | is($count, 1, "Expect 1 with (*SKIP)"); |
e425a60b YO |
1319 | local $_ = 'aaab'; |
1320 | $count = 0; | |
1321 | 1 while /.(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/g; | |
de26e0cc | 1322 | is($count, 4, "/.(*SKIP)/"); |
e425a60b YO |
1323 | $_ = 'aaabaaab'; |
1324 | $count = 0; | |
1325 | our @res = (); | |
1326 | 1 while /(a+b?)(*MARK:foo)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; | |
de26e0cc NC |
1327 | is($count, 2, "Expect 2 with (*SKIP)"); |
1328 | is("@res", "aaab aaab", "Adjacent (*SKIP) works as expected"); | |
e425a60b YO |
1329 | } |
1330 | ||
e425a60b YO |
1331 | { # Test the (*SKIP) pattern |
1332 | our $count = 0; | |
1333 | 'aaab' =~ /a*(*MARK:a)b?(*MARK:b)(*SKIP:a)(?{$count++})(*FAIL)/; | |
de26e0cc | 1334 | is($count, 3, "Expect 3 with *MARK:a)b?(*MARK:b)(*SKIP:a)"); |
e425a60b YO |
1335 | local $_ = 'aaabaaab'; |
1336 | $count = 0; | |
1337 | our @res = (); | |
1338 | 1 while | |
1339 | /(a*(*MARK:a)b?)(*MARK:x)(*SKIP:a)(?{$count++; push @res,$1})(*FAIL)/g; | |
de26e0cc NC |
1340 | is($count, 5, "Expect 5 with (*MARK:a)b?)(*MARK:x)(*SKIP:a)"); |
1341 | is("@res", "aaab b aaab b ", | |
1342 | "Adjacent (*MARK:a)b?)(*MARK:x)(*SKIP:a) works as expected"); | |
e425a60b YO |
1343 | } |
1344 | ||
e425a60b YO |
1345 | { # Test the (*COMMIT) pattern |
1346 | our $count = 0; | |
1347 | 'aaabaaab' =~ /a+b?(*COMMIT)(?{$count++})(*FAIL)/; | |
de26e0cc | 1348 | is($count, 1, "Expect 1 with (*COMMIT)"); |
e425a60b YO |
1349 | local $_ = 'aaab'; |
1350 | $count = 0; | |
1351 | 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g; | |
de26e0cc | 1352 | is($count, 1, "/.(*COMMIT)/"); |
e425a60b YO |
1353 | $_ = 'aaabaaab'; |
1354 | $count = 0; | |
1355 | our @res = (); | |
1356 | 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g; | |
de26e0cc NC |
1357 | is($count, 1, "Expect 1 with (*COMMIT)"); |
1358 | is("@res", "aaab", "Adjacent (*COMMIT) works as expected"); | |
3542935d DM |
1359 | |
1360 | ok("1\n2a\n" !~ /^\d+(*COMMIT)\w+/m, "COMMIT and anchors"); | |
e425a60b YO |
1361 | } |
1362 | ||
e425a60b YO |
1363 | { |
1364 | # Test named commits and the $REGERROR var | |
1365 | our $REGERROR; | |
1366 | for my $name ('', ':foo') { | |
1367 | for my $pat ("(*PRUNE$name)", | |
1368 | ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", | |
0f289c68 | 1369 | "(*COMMIT$name)") { |
e425a60b YO |
1370 | for my $suffix ('(*FAIL)', '') { |
1371 | 'aaaab' =~ /a+b$pat$suffix/; | |
de26e0cc | 1372 | is($REGERROR, |
e425a60b | 1373 | ($suffix ? ($name ? 'foo' : "1") : ""), |
de26e0cc | 1374 | "Test $pat and \$REGERROR $suffix"); |
e425a60b YO |
1375 | } |
1376 | } | |
1377 | } | |
1378 | } | |
1379 | ||
e425a60b YO |
1380 | { |
1381 | # Test named commits and the $REGERROR var | |
1382 | package Fnorble; | |
1383 | our $REGERROR; | |
1384 | for my $name ('', ':foo') { | |
1385 | for my $pat ("(*PRUNE$name)", | |
1386 | ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", | |
0f289c68 | 1387 | "(*COMMIT$name)") { |
e425a60b YO |
1388 | for my $suffix ('(*FAIL)','') { |
1389 | 'aaaab' =~ /a+b$pat$suffix/; | |
de26e0cc | 1390 | ::is($REGERROR, |
e425a60b | 1391 | ($suffix ? ($name ? 'foo' : "1") : ""), |
de26e0cc | 1392 | "Test $pat and \$REGERROR $suffix"); |
e425a60b YO |
1393 | } |
1394 | } | |
0f289c68 YO |
1395 | } |
1396 | } | |
e425a60b | 1397 | |
e425a60b YO |
1398 | { |
1399 | # Test named commits and the $REGERROR var | |
de946258 | 1400 | my $message = '$REGERROR'; |
e425a60b YO |
1401 | our $REGERROR; |
1402 | for my $word (qw (bar baz bop)) { | |
1403 | $REGERROR = ""; | |
1404 | "aaaaa$word" =~ | |
1405 | /a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/; | |
de946258 | 1406 | is($REGERROR, $word, $message); |
0f289c68 | 1407 | } |
e425a60b YO |
1408 | } |
1409 | ||
e425a60b YO |
1410 | { |
1411 | #Mindnumbingly simple test of (*THEN) | |
1412 | for ("ABC","BAX") { | |
1413 | ok /A (*THEN) X | B (*THEN) C/x, "Simple (*THEN) test"; | |
1414 | } | |
1415 | } | |
1416 | ||
e425a60b | 1417 | { |
de946258 | 1418 | my $message = "Relative Recursion"; |
e425a60b YO |
1419 | my $parens = qr/(\((?:[^()]++|(?-1))*+\))/; |
1420 | local $_ = 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; | |
1421 | my ($all, $one, $two) = ('', '', ''); | |
de946258 NC |
1422 | ok(m/foo $parens \s* \+ \s* bar $parens/x, $message); |
1423 | is($1, '((2*3)+4-3)', $message); | |
1424 | is($2, '(2*(3+4)-1*(2-3))', $message); | |
1425 | is($&, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))', $message); | |
1426 | is($&, $_, $message); | |
e425a60b YO |
1427 | } |
1428 | ||
1429 | { | |
1430 | my $spaces=" "; | |
1431 | local $_ = join 'bar', $spaces, $spaces; | |
1432 | our $count = 0; | |
1433 | s/(?>\s+bar)(?{$count++})//g; | |
de26e0cc NC |
1434 | is($_, $spaces, "SUSPEND final string"); |
1435 | is($count, 1, "Optimiser should have prevented more than one match"); | |
e425a60b YO |
1436 | } |
1437 | ||
e425a60b YO |
1438 | { |
1439 | # From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus> | |
1440 | my $dow_name = "nada"; | |
1441 | my $parser = "(\$dow_name) = \$time_string =~ /(D\x{e9}\\ " . | |
1442 | "C\x{e9}adaoin|D\x{e9}\\ Sathairn|\\w+|\x{100})/"; | |
1443 | my $time_string = "D\x{e9} C\x{e9}adaoin"; | |
1444 | eval $parser; | |
1445 | ok !$@, "Test Eval worked"; | |
de26e0cc | 1446 | is($dow_name, $time_string, "UTF-8 trie common prefix extraction"); |
e425a60b YO |
1447 | } |
1448 | ||
e425a60b YO |
1449 | { |
1450 | my $v; | |
1451 | ($v = 'bar') =~ /(\w+)/g; | |
1452 | $v = 'foo'; | |
de26e0cc NC |
1453 | is("$1", 'bar', |
1454 | '$1 is safe after /g - may fail due to specialized config in pp_hot.c'); | |
e425a60b YO |
1455 | } |
1456 | ||
e425a60b | 1457 | { |
de946258 | 1458 | my $message = "http://nntp.perl.org/group/perl.perl5.porters/118663"; |
e425a60b | 1459 | my $qr_barR1 = qr/(bar)\g-1/; |
de946258 NC |
1460 | like("foobarbarxyz", $qr_barR1, $message); |
1461 | like("foobarbarxyz", qr/foo${qr_barR1}xyz/, $message); | |
1462 | like("foobarbarxyz", qr/(foo)${qr_barR1}xyz/, $message); | |
1463 | like("foobarbarxyz", qr/(foo)(bar)\g{-1}xyz/, $message); | |
1464 | like("foobarbarxyz", qr/(foo${qr_barR1})xyz/, $message); | |
1465 | like("foobarbarxyz", qr/(foo(bar)\g{-1})xyz/, $message); | |
0f289c68 | 1466 | } |
e425a60b | 1467 | |
e425a60b | 1468 | { |
de946258 | 1469 | my $message = '$REGMARK'; |
e425a60b YO |
1470 | our @r = (); |
1471 | our ($REGMARK, $REGERROR); | |
de946258 NC |
1472 | like('foofoo', qr/foo (*MARK:foo) (?{push @r,$REGMARK}) /x, $message); |
1473 | is("@r","foo", $message); | |
1474 | is($REGMARK, "foo", $message); | |
1475 | unlike('foofoo', qr/foo (*MARK:foo) (*FAIL) /x, $message); | |
1476 | is($REGMARK, '', $message); | |
1477 | is($REGERROR, 'foo', $message); | |
e425a60b YO |
1478 | } |
1479 | ||
e425a60b | 1480 | { |
de946258 | 1481 | my $message = '\K test'; |
e425a60b YO |
1482 | my $x; |
1483 | $x = "abc.def.ghi.jkl"; | |
1484 | $x =~ s/.*\K\..*//; | |
de946258 | 1485 | is($x, "abc.def.ghi", $message); |
0f289c68 | 1486 | |
e425a60b YO |
1487 | $x = "one two three four"; |
1488 | $x =~ s/o+ \Kthree//g; | |
de946258 | 1489 | is($x, "one two four", $message); |
0f289c68 | 1490 | |
e425a60b YO |
1491 | $x = "abcde"; |
1492 | $x =~ s/(.)\K/$1/g; | |
de946258 | 1493 | is($x, "aabbccddee", $message); |
e425a60b YO |
1494 | } |
1495 | ||
e425a60b YO |
1496 | { |
1497 | sub kt { | |
1498 | return '4' if $_[0] eq '09028623'; | |
1499 | } | |
1500 | # Nested EVAL using PL_curpm (via $1 or friends) | |
1501 | my $re; | |
1502 | our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x; | |
1503 | $re = qr/^ ( (??{ $grabit }) ) $ /x; | |
1504 | my @res = '0902862349' =~ $re; | |
de26e0cc NC |
1505 | is(join ("-", @res), "0902862349", |
1506 | 'PL_curpm is set properly on nested eval'); | |
e425a60b YO |
1507 | |
1508 | our $qr = qr/ (o) (??{ $1 }) /x; | |
1509 | ok 'boob'=~/( b (??{ $qr }) b )/x && 1, "PL_curpm, nested eval"; | |
1510 | } | |
1511 | ||
e425a60b YO |
1512 | { |
1513 | use charnames ":full"; | |
1514 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "I =~ Alphabetic"; | |
1515 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/, "I =~ Uppercase"; | |
1516 | ok "\N{ROMAN NUMERAL ONE}" !~ /\p{Lowercase}/, "I !~ Lowercase"; | |
1517 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "I =~ ID_Start"; | |
1518 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "I =~ ID_Continue"; | |
1519 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "i =~ Alphabetic"; | |
1520 | ok "\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Uppercase}/, "i !~ Uppercase"; | |
0ab1ff8e KW |
1521 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/i, "i =~ Uppercase under /i"; |
1522 | ok "\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Titlecase}/, "i !~ Titlecase"; | |
1523 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Titlecase}/i, "i =~ Titlecase under /i"; | |
5895685f NC |
1524 | ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/i, "I =~ Lowercase under /i"; |
1525 | ||
e425a60b YO |
1526 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/, "i =~ Lowercase"; |
1527 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "i =~ ID_Start"; | |
1528 | ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "i =~ ID_Continue" | |
1529 | } | |
1530 | ||
0ab1ff8e KW |
1531 | { # More checking that /i works on the few properties that it makes a |
1532 | # difference. Uppercase, Lowercase, and Titlecase were done in the | |
1533 | # block above | |
1534 | ok "A" =~ /\p{PosixUpper}/, "A =~ PosixUpper"; | |
1535 | ok "A" =~ /\p{PosixUpper}/i, "A =~ PosixUpper under /i"; | |
1536 | ok "A" !~ /\p{PosixLower}/, "A !~ PosixLower"; | |
1537 | ok "A" =~ /\p{PosixLower}/i, "A =~ PosixLower under /i"; | |
1538 | ok "a" !~ /\p{PosixUpper}/, "a !~ PosixUpper"; | |
1539 | ok "a" =~ /\p{PosixUpper}/i, "a =~ PosixUpper under /i"; | |
1540 | ok "a" =~ /\p{PosixLower}/, "a =~ PosixLower"; | |
1541 | ok "a" =~ /\p{PosixLower}/i, "a =~ PosixLower under /i"; | |
1542 | ||
1543 | ok "\xC0" =~ /\p{XPosixUpper}/, "\\xC0 =~ XPosixUpper"; | |
1544 | ok "\xC0" =~ /\p{XPosixUpper}/i, "\\xC0 =~ XPosixUpper under /i"; | |
1545 | ok "\xC0" !~ /\p{XPosixLower}/, "\\xC0 !~ XPosixLower"; | |
1546 | ok "\xC0" =~ /\p{XPosixLower}/i, "\\xC0 =~ XPosixLower under /i"; | |
1547 | ok "\xE0" !~ /\p{XPosixUpper}/, "\\xE0 !~ XPosixUpper"; | |
1548 | ok "\xE0" =~ /\p{XPosixUpper}/i, "\\xE0 =~ XPosixUpper under /i"; | |
1549 | ok "\xE0" =~ /\p{XPosixLower}/, "\\xE0 =~ XPosixLower"; | |
1550 | ok "\xE0" =~ /\p{XPosixLower}/i, "\\xE0 =~ XPosixLower under /i"; | |
1551 | ||
1552 | ok "\xC0" =~ /\p{UppercaseLetter}/, "\\xC0 =~ UppercaseLetter"; | |
1553 | ok "\xC0" =~ /\p{UppercaseLetter}/i, "\\xC0 =~ UppercaseLetter under /i"; | |
1554 | ok "\xC0" !~ /\p{LowercaseLetter}/, "\\xC0 !~ LowercaseLetter"; | |
1555 | ok "\xC0" =~ /\p{LowercaseLetter}/i, "\\xC0 =~ LowercaseLetter under /i"; | |
1556 | ok "\xC0" !~ /\p{TitlecaseLetter}/, "\\xC0 !~ TitlecaseLetter"; | |
1557 | ok "\xC0" =~ /\p{TitlecaseLetter}/i, "\\xC0 =~ TitlecaseLetter under /i"; | |
1558 | ok "\xE0" !~ /\p{UppercaseLetter}/, "\\xE0 !~ UppercaseLetter"; | |
1559 | ok "\xE0" =~ /\p{UppercaseLetter}/i, "\\xE0 =~ UppercaseLetter under /i"; | |
1560 | ok "\xE0" =~ /\p{LowercaseLetter}/, "\\xE0 =~ LowercaseLetter"; | |
1561 | ok "\xE0" =~ /\p{LowercaseLetter}/i, "\\xE0 =~ LowercaseLetter under /i"; | |
1562 | ok "\xE0" !~ /\p{TitlecaseLetter}/, "\\xE0 !~ TitlecaseLetter"; | |
1563 | ok "\xE0" =~ /\p{TitlecaseLetter}/i, "\\xE0 =~ TitlecaseLetter under /i"; | |
1564 | ok "\x{1C5}" !~ /\p{UppercaseLetter}/, "\\x{1C5} !~ UppercaseLetter"; | |
1565 | ok "\x{1C5}" =~ /\p{UppercaseLetter}/i, "\\x{1C5} =~ UppercaseLetter under /i"; | |
1566 | ok "\x{1C5}" !~ /\p{LowercaseLetter}/, "\\x{1C5} !~ LowercaseLetter"; | |
1567 | ok "\x{1C5}" =~ /\p{LowercaseLetter}/i, "\\x{1C5} =~ LowercaseLetter under /i"; | |
1568 | ok "\x{1C5}" =~ /\p{TitlecaseLetter}/, "\\x{1C5} =~ TitlecaseLetter"; | |
1569 | ok "\x{1C5}" =~ /\p{TitlecaseLetter}/i, "\\x{1C5} =~ TitlecaseLetter under /i"; | |
1570 | } | |
1571 | ||
e425a60b YO |
1572 | { |
1573 | # requirement of Unicode Technical Standard #18, 1.7 Code Points | |
1574 | # cf. http://www.unicode.org/reports/tr18/#Supplementary_Characters | |
1575 | for my $u (0x7FF, 0x800, 0xFFFF, 0x10000) { | |
1576 | no warnings 'utf8'; # oops | |
1577 | my $c = chr $u; | |
1578 | my $x = sprintf '%04X', $u; | |
1579 | ok "A${c}B" =~ /A[\0-\x{10000}]B/, "Unicode range - $x"; | |
1580 | } | |
1581 | } | |
1582 | ||
e425a60b YO |
1583 | { |
1584 | my $res=""; | |
1585 | ||
1586 | if ('1' =~ /(?|(?<digit>1)|(?<digit>2))/) { | |
1587 | $res = "@{$- {digit}}"; | |
1588 | } | |
de26e0cc NC |
1589 | is($res, "1", |
1590 | "Check that (?|...) doesnt cause dupe entries in the names array"); | |
0f289c68 | 1591 | |
e425a60b YO |
1592 | $res = ""; |
1593 | if ('11' =~ /(?|(?<digit>1)|(?<digit>2))(?&digit)/) { | |
1594 | $res = "@{$- {digit}}"; | |
1595 | } | |
de26e0cc NC |
1596 | is($res, "1", |
1597 | "Check that (?&..) to a buffer inside a (?|...) goes to the leftmost"); | |
e425a60b YO |
1598 | } |
1599 | ||
e425a60b YO |
1600 | { |
1601 | use warnings; | |
de946258 | 1602 | my $message = "ASCII pattern that really is UTF-8"; |
e425a60b YO |
1603 | my @w; |
1604 | local $SIG {__WARN__} = sub {push @w, "@_"}; | |
0f289c68 | 1605 | my $c = qq (\x{DF}); |
de946258 NC |
1606 | like($c, qr/${c}|\x{100}/, $message); |
1607 | is("@w", '', $message); | |
0f289c68 | 1608 | } |
e425a60b | 1609 | |
e425a60b | 1610 | { |
de946258 | 1611 | my $message = "Corruption of match results of qr// across scopes"; |
e425a60b YO |
1612 | my $qr = qr/(fo+)(ba+r)/; |
1613 | 'foobar' =~ /$qr/; | |
de946258 | 1614 | is("$1$2", "foobar", $message); |
e425a60b YO |
1615 | { |
1616 | 'foooooobaaaaar' =~ /$qr/; | |
de946258 | 1617 | is("$1$2", 'foooooobaaaaar', $message); |
e425a60b | 1618 | } |
de946258 | 1619 | is("$1$2", "foobar", $message); |
e425a60b YO |
1620 | } |
1621 | ||
e425a60b | 1622 | { |
de946258 | 1623 | my $message = "HORIZWS"; |
e425a60b YO |
1624 | local $_ = "\t \r\n \n \t".chr(11)."\n"; |
1625 | s/\H/H/g; | |
1626 | s/\h/h/g; | |
de946258 | 1627 | is($_, "hhHHhHhhHH", $message); |
e425a60b YO |
1628 | $_ = "\t \r\n \n \t" . chr (11) . "\n"; |
1629 | utf8::upgrade ($_); | |
1630 | s/\H/H/g; | |
1631 | s/\h/h/g; | |
de946258 | 1632 | is($_, "hhHHhHhhHH", $message); |
0f289c68 | 1633 | } |
e425a60b | 1634 | |
e425a60b | 1635 | { |
de946258 | 1636 | # Various whitespace special patterns |
a9c9e371 | 1637 | my @h = map {chr $_} 0x09, 0x20, 0xa0, 0x1680, 0x2000, |
e425a60b YO |
1638 | 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, |
1639 | 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x205f, | |
1640 | 0x3000; | |
1641 | my @v = map {chr $_} 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, | |
1642 | 0x2029; | |
1643 | my @lb = ("\x0D\x0A", map {chr $_} 0x0A .. 0x0D, 0x85, 0x2028, 0x2029); | |
1644 | foreach my $t ([\@h, qr/\h/, qr/\h+/], | |
1645 | [\@v, qr/\v/, qr/\v+/], | |
1646 | [\@lb, qr/\R/, qr/\R+/],) { | |
1647 | my $ary = shift @$t; | |
1648 | foreach my $pat (@$t) { | |
1649 | foreach my $str (@$ary) { | |
df6841b6 KW |
1650 | my $temp_str = $str; |
1651 | $temp_str = display($temp_str); | |
1652 | ok $str =~ /($pat)/, $temp_str . " =~ /($pat)"; | |
1653 | my $temp_1 = $1; | |
1654 | is($1, $str, "\$1='" . display($temp_1) . "' eq '" . $temp_str . "' after ($pat)"); | |
e425a60b | 1655 | utf8::upgrade ($str); |
df6841b6 KW |
1656 | ok $str =~ /($pat)/, "Upgraded " . $temp_str . " =~ /($pat)/"; |
1657 | is($1, $str, "\$1='" . display($temp_1) . "' eq '" . $temp_str . "'(upgraded) after ($pat)"); | |
e425a60b YO |
1658 | } |
1659 | } | |
1660 | } | |
1661 | } | |
1662 | ||
e425a60b | 1663 | { |
de946258 | 1664 | # Check that \\xDF match properly in its various forms |
e425a60b YO |
1665 | # Test that \xDF matches properly. this is pretty hacky stuff, |
1666 | # but its actually needed. The malarky with '-' is to prevent | |
1667 | # compilation caching from playing any role in the test. | |
1668 | my @df = (chr (0xDF), '-', chr (0xDF)); | |
1669 | utf8::upgrade ($df [2]); | |
1670 | my @strs = ('ss', 'sS', 'Ss', 'SS', chr (0xDF)); | |
1671 | my @ss = map {("$_", "$_")} @strs; | |
1672 | utf8::upgrade ($ss [$_ * 2 + 1]) for 0 .. $#strs; | |
1673 | ||
1674 | for my $ssi (0 .. $#ss) { | |
1675 | for my $dfi (0 .. $#df) { | |
1676 | my $pat = $df [$dfi]; | |
1677 | my $str = $ss [$ssi]; | |
1678 | my $utf_df = ($dfi > 1) ? 'utf8' : ''; | |
1679 | my $utf_ss = ($ssi % 2) ? 'utf8' : ''; | |
1680 | (my $sstr = $str) =~ s/\xDF/\\xDF/; | |
1681 | ||
1682 | if ($utf_df || $utf_ss || length ($ss [$ssi]) == 1) { | |
1683 | my $ret = $str =~ /$pat/i; | |
1684 | next if $pat eq '-'; | |
1685 | ok $ret, "\"$sstr\" =~ /\\xDF/i " . | |
1686 | "(str is @{[$utf_ss||'latin']}, pat is " . | |
1687 | "@{[$utf_df||'latin']})"; | |
1688 | } | |
1689 | else { | |
1690 | my $ret = $str !~ /$pat/i; | |
1691 | next if $pat eq '-'; | |
1692 | ok $ret, "\"$sstr\" !~ /\\xDF/i " . | |
1693 | "(str is @{[$utf_ss||'latin']}, pat is " . | |
1694 | "@{[$utf_df||'latin']})"; | |
1695 | } | |
1696 | } | |
1697 | } | |
1698 | } | |
1699 | ||
e425a60b | 1700 | { |
de946258 | 1701 | my $message = "BBC(Bleadperl Breaks CPAN) Today: String::Multibyte"; |
e425a60b YO |
1702 | my $re = qr/(?:[\x00-\xFF]{4})/; |
1703 | my $hyp = "\0\0\0-"; | |
1704 | my $esc = "\0\0\0\\"; | |
1705 | ||
1706 | my $str = "$esc$hyp$hyp$esc$esc"; | |
1707 | my @a = ($str =~ /\G(?:\Q$esc$esc\E|\Q$esc$hyp\E|$re)/g); | |
1708 | ||
de946258 | 1709 | is(@a,3, $message); |
e425a60b | 1710 | local $" = "="; |
de946258 | 1711 | is("@a","$esc$hyp=$hyp=$esc$esc", $message); |
e425a60b YO |
1712 | } |
1713 | ||
e425a60b YO |
1714 | { |
1715 | # Test for keys in %+ and %- | |
de946258 | 1716 | my $message = 'Test keys in %+ and %-'; |
dcd695b6 | 1717 | no warnings 'uninitialized', 'deprecated', 'experimental::lexical_topic'; |
e425a60b YO |
1718 | my $_ = "abcdef"; |
1719 | /(?<foo>a)|(?<foo>b)/; | |
de946258 NC |
1720 | is((join ",", sort keys %+), "foo", $message); |
1721 | is((join ",", sort keys %-), "foo", $message); | |
1722 | is((join ",", sort values %+), "a", $message); | |
1723 | is((join ",", sort map "@$_", values %-), "a ", $message); | |
e425a60b | 1724 | /(?<bar>a)(?<bar>b)(?<quux>.)/; |
de946258 NC |
1725 | is((join ",", sort keys %+), "bar,quux", $message); |
1726 | is((join ",", sort keys %-), "bar,quux", $message); | |
1727 | is((join ",", sort values %+), "a,c", $message); # leftmost | |
1728 | is((join ",", sort map "@$_", values %-), "a b,c", $message); | |
e425a60b | 1729 | /(?<un>a)(?<deux>c)?/; # second buffer won't capture |
de946258 NC |
1730 | is((join ",", sort keys %+), "un", $message); |
1731 | is((join ",", sort keys %-), "deux,un", $message); | |
1732 | is((join ",", sort values %+), "a", $message); | |
1733 | is((join ",", sort map "@$_", values %-), ",a", $message); | |
e425a60b YO |
1734 | } |
1735 | ||
e425a60b YO |
1736 | { |
1737 | # length() on captures, the numbered ones end up in Perl_magic_len | |
dcd695b6 | 1738 | no warnings 'deprecated', 'experimental::lexical_topic'; |
e425a60b YO |
1739 | my $_ = "aoeu \xe6var ook"; |
1740 | /^ \w+ \s (?<eek>\S+)/x; | |
1741 | ||
de26e0cc NC |
1742 | is(length $`, 0, q[length $`]); |
1743 | is(length $', 4, q[length $']); | |
1744 | is(length $&, 9, q[length $&]); | |
1745 | is(length $1, 4, q[length $1]); | |
1746 | is(length $+{eek}, 4, q[length $+{eek} == length $1]); | |
e425a60b YO |
1747 | } |
1748 | ||
e425a60b YO |
1749 | { |
1750 | my $ok = -1; | |
1751 | ||
1752 | $ok = exists ($-{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; | |
de26e0cc NC |
1753 | is($ok, 1, '$-{x} exists after "bar"=~/(?<x>foo)|bar/'); |
1754 | is(scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'); | |
1755 | is(scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'); | |
e425a60b YO |
1756 | |
1757 | $ok = -1; | |
1758 | $ok = exists ($+{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; | |
de26e0cc NC |
1759 | is($ok, 0, '$+{x} not exists after "bar"=~/(?<x>foo)|bar/'); |
1760 | is(scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'); | |
1761 | is(scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'); | |
e425a60b YO |
1762 | |
1763 | $ok = -1; | |
1764 | $ok = exists ($-{x}) ? 1 : 0 if 'foo' =~ /(?<x>foo)|bar/; | |
de26e0cc NC |
1765 | is($ok, 1, '$-{x} exists after "foo"=~/(?<x>foo)|bar/'); |
1766 | is(scalar (%+), 1, 'scalar %+ == 1 after "foo"=~/(?<x>foo)|bar/'); | |
1767 | is(scalar (%-), 1, 'scalar %- == 1 after "foo"=~/(?<x>foo)|bar/'); | |
e425a60b YO |
1768 | |
1769 | $ok = -1; | |
1770 | $ok = exists ($+{x}) ? 1 : 0 if 'foo'=~/(?<x>foo)|bar/; | |
de26e0cc | 1771 | is($ok, 1, '$+{x} exists after "foo"=~/(?<x>foo)|bar/'); |
e425a60b YO |
1772 | } |
1773 | ||
e425a60b YO |
1774 | { |
1775 | local $_; | |
1776 | ($_ = 'abc') =~ /(abc)/g; | |
0f289c68 | 1777 | $_ = '123'; |
de26e0cc | 1778 | is("$1", 'abc', "/g leads to unsafe match vars: $1"); |
1b474ee3 NC |
1779 | |
1780 | fresh_perl_is(<<'EOP', ">abc<\n", {}, 'mention $&'); | |
1781 | $&; | |
1782 | my $x; | |
1783 | ($x='abc')=~/(abc)/g; | |
1784 | $x='123'; | |
1785 | print ">$1<\n"; | |
1786 | EOP | |
1787 | ||
1b474ee3 NC |
1788 | fresh_perl_is(<<'EOP', ">abc<\n", {}, 'no mention of $&'); |
1789 | my $x; | |
1790 | ($x='abc')=~/(abc)/g; | |
1791 | $x='123'; | |
1792 | print ">$1<\n"; | |
1793 | EOP | |
e425a60b YO |
1794 | } |
1795 | ||
e425a60b | 1796 | { |
de946258 | 1797 | # Message-ID: <20070818091501.7eff4831@r2d2> |
e425a60b YO |
1798 | my $str = ""; |
1799 | for (0 .. 5) { | |
1800 | my @x; | |
1801 | $str .= "@x"; # this should ALWAYS be the empty string | |
1802 | 'a' =~ /(a|)/; | |
1803 | push @x, 1; | |
1804 | } | |
de26e0cc | 1805 | is(length $str, 0, "Trie scope error, string should be empty"); |
e425a60b YO |
1806 | $str = ""; |
1807 | my @foo = ('a') x 5; | |
1808 | for (@foo) { | |
1809 | my @bar; | |
1810 | $str .= "@bar"; | |
1811 | s/a|/push @bar, 1/e; | |
1812 | } | |
de26e0cc | 1813 | is(length $str, 0, "Trie scope error, string should be empty"); |
e425a60b YO |
1814 | } |
1815 | ||
e425a60b | 1816 | { |
e425a60b | 1817 | # more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding |
0f289c68 YO |
1818 | for my $chr (160 .. 255) { |
1819 | my $chr_byte = chr($chr); | |
1820 | my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8); | |
1821 | my $rx = qr{$chr_byte|X}i; | |
1822 | ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr"); | |
1823 | } | |
e425a60b YO |
1824 | } |
1825 | ||
1826 | { | |
e425a60b YO |
1827 | our $a = 3; "" =~ /(??{ $a })/; |
1828 | our $b = $a; | |
de26e0cc | 1829 | is($b, $a, "Copy of scalar used for postponed subexpression"); |
e425a60b YO |
1830 | } |
1831 | ||
e425a60b | 1832 | { |
e425a60b YO |
1833 | our @ctl_n = (); |
1834 | our @plus = (); | |
1835 | our $nested_tags; | |
1836 | $nested_tags = qr{ | |
1837 | < | |
1838 | (\w+) | |
1839 | (?{ | |
1840 | push @ctl_n,$^N; | |
1841 | push @plus,$+; | |
1842 | }) | |
1843 | > | |
1844 | (??{$nested_tags})* | |
1845 | </\s* \w+ \s*> | |
1846 | }x; | |
1847 | ||
1848 | my $match = '<bla><blubb></blubb></bla>' =~ m/^$nested_tags$/; | |
1849 | ok $match, 'nested construct matches'; | |
de26e0cc NC |
1850 | is("@ctl_n", "bla blubb", '$^N inside of (?{}) works as expected'); |
1851 | is("@plus", "bla blubb", '$+ inside of (?{}) works as expected'); | |
e425a60b YO |
1852 | } |
1853 | ||
e425a60b YO |
1854 | SKIP: { |
1855 | # XXX: This set of tests is essentially broken, POSIX character classes | |
0f289c68 | 1856 | # should not have differing definitions under Unicode. |
e425a60b | 1857 | # There are property names for that. |
ef237063 | 1858 | skip "Tests assume ASCII", 4 unless $::IS_ASCII; |
e425a60b YO |
1859 | |
1860 | my @notIsPunct = grep {/[[:punct:]]/ and not /\p{IsPunct}/} | |
1861 | map {chr} 0x20 .. 0x7f; | |
1a3f15c1 | 1862 | is(join ('', @notIsPunct), '$+<=>^`|~', |
de26e0cc | 1863 | '[:punct:] disagrees with IsPunct on Symbols'); |
e425a60b YO |
1864 | |
1865 | my @isPrint = grep {not /[[:print:]]/ and /\p{IsPrint}/} | |
1866 | map {chr} 0 .. 0x1f, 0x7f .. 0x9f; | |
de26e0cc NC |
1867 | is(join ('', @isPrint), "", |
1868 | 'IsPrint agrees with [:print:] on control characters'); | |
e425a60b YO |
1869 | |
1870 | my @isPunct = grep {/[[:punct:]]/ != /\p{IsPunct}/} | |
1871 | map {chr} 0x80 .. 0xff; | |
7620cb10 | 1872 | is(join ('', @isPunct), "\xa1\xa7\xab\xb6\xb7\xbb\xbf", # ¡ « · » ¿ |
de26e0cc | 1873 | 'IsPunct disagrees with [:punct:] outside ASCII'); |
e425a60b YO |
1874 | |
1875 | my @isPunctLatin1 = eval q { | |
d3532481 KW |
1876 | no warnings 'deprecated'; |
1877 | use encoding 'latin1'; | |
e425a60b YO |
1878 | grep {/[[:punct:]]/ != /\p{IsPunct}/} map {chr} 0x80 .. 0xff; |
1879 | }; | |
1880 | skip "Eval failed ($@)", 1 if $@; | |
1881 | skip "PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS set to 0", 1 | |
1882 | if !$ENV{PERL_TEST_LEGACY_POSIX_CC}; | |
de26e0cc NC |
1883 | is(join ('', @isPunctLatin1), '', |
1884 | 'IsPunct agrees with [:punct:] with explicit Latin1'); | |
0f289c68 | 1885 | } |
d1c771f5 | 1886 | |
d1c771f5 B |
1887 | { |
1888 | # Tests for [#perl 71942] | |
1889 | our $count_a; | |
1890 | our $count_b; | |
1891 | ||
1892 | my $c = 0; | |
1893 | for my $re ( | |
1894 | # [ | |
1895 | # should match?, | |
1896 | # input string, | |
1897 | # re 1, | |
1898 | # re 2, | |
1899 | # expected values of count_a and count_b, | |
1900 | # ] | |
1901 | [ | |
1902 | 0, | |
1903 | "xababz", | |
1904 | qr/a+(?{$count_a++})b?(*COMMIT)(*FAIL)/, | |
1905 | qr/a+(?{$count_b++})b?(*COMMIT)z/, | |
1906 | 1, | |
1907 | ], | |
1908 | [ | |
1909 | 0, | |
1910 | "xababz", | |
1911 | qr/a+(?{$count_a++})b?(*COMMIT)\s*(*FAIL)/, | |
1912 | qr/a+(?{$count_b++})b?(*COMMIT)\s*z/, | |
1913 | 1, | |
1914 | ], | |
1915 | [ | |
1916 | 0, | |
1917 | "xababz", | |
1918 | qr/a+(?{$count_a++})(?:b|)?(*COMMIT)(*FAIL)/, | |
1919 | qr/a+(?{$count_b++})(?:b|)?(*COMMIT)z/, | |
1920 | 1, | |
1921 | ], | |
1922 | [ | |
1923 | 0, | |
1924 | "xababz", | |
1925 | qr/a+(?{$count_a++})b{0,6}(*COMMIT)(*FAIL)/, | |
1926 | qr/a+(?{$count_b++})b{0,6}(*COMMIT)z/, | |
1927 | 1, | |
1928 | ], | |
1929 | [ | |
1930 | 0, | |
1931 | "xabcabcz", | |
1932 | qr/a+(?{$count_a++})(bc){0,6}(*COMMIT)(*FAIL)/, | |
1933 | qr/a+(?{$count_b++})(bc){0,6}(*COMMIT)z/, | |
1934 | 1, | |
1935 | ], | |
1936 | [ | |
1937 | 0, | |
1938 | "xabcabcz", | |
1939 | qr/a+(?{$count_a++})(bc*){0,6}(*COMMIT)(*FAIL)/, | |
1940 | qr/a+(?{$count_b++})(bc*){0,6}(*COMMIT)z/, | |
1941 | 1, | |
1942 | ], | |
1943 | ||
1944 | ||
1945 | [ | |
1946 | 0, | |
1947 | "aaaabtz", | |
1948 | qr/a+(?{$count_a++})b?(*PRUNE)(*FAIL)/, | |
1949 | qr/a+(?{$count_b++})b?(*PRUNE)z/, | |
1950 | 4, | |
1951 | ], | |
1952 | [ | |
1953 | 0, | |
1954 | "aaaabtz", | |
1955 | qr/a+(?{$count_a++})b?(*PRUNE)\s*(*FAIL)/, | |
1956 | qr/a+(?{$count_b++})b?(*PRUNE)\s*z/, | |
1957 | 4, | |
1958 | ], | |
1959 | [ | |
1960 | 0, | |
1961 | "aaaabtz", | |
1962 | qr/a+(?{$count_a++})(?:b|)(*PRUNE)(*FAIL)/, | |
1963 | qr/a+(?{$count_b++})(?:b|)(*PRUNE)z/, | |
1964 | 4, | |
1965 | ], | |
1966 | [ | |
1967 | 0, | |
1968 | "aaaabtz", | |
1969 | qr/a+(?{$count_a++})b{0,6}(*PRUNE)(*FAIL)/, | |
1970 | qr/a+(?{$count_b++})b{0,6}(*PRUNE)z/, | |
1971 | 4, | |
1972 | ], | |
1973 | [ | |
1974 | 0, | |
1975 | "aaaabctz", | |
1976 | qr/a+(?{$count_a++})(bc){0,6}(*PRUNE)(*FAIL)/, | |
1977 | qr/a+(?{$count_b++})(bc){0,6}(*PRUNE)z/, | |
1978 | 4, | |
1979 | ], | |
1980 | [ | |
1981 | 0, | |
1982 | "aaaabctz", | |
1983 | qr/a+(?{$count_a++})(bc*){0,6}(*PRUNE)(*FAIL)/, | |
1984 | qr/a+(?{$count_b++})(bc*){0,6}(*PRUNE)z/, | |
1985 | 4, | |
1986 | ], | |
1987 | ||
1988 | [ | |
1989 | 0, | |
1990 | "aaabaaab", | |
1991 | qr/a+(?{$count_a++;})b?(*SKIP)(*FAIL)/, | |
1992 | qr/a+(?{$count_b++;})b?(*SKIP)z/, | |
1993 | 2, | |
1994 | ], | |
1995 | [ | |
1996 | 0, | |
1997 | "aaabaaab", | |
1998 | qr/a+(?{$count_a++;})b?(*SKIP)\s*(*FAIL)/, | |
1999 | qr/a+(?{$count_b++;})b?(*SKIP)\s*z/, | |
2000 | 2, | |
2001 | ], | |
2002 | [ | |
2003 | 0, | |
2004 | "aaabaaab", | |
2005 | qr/a+(?{$count_a++;})(?:b|)(*SKIP)(*FAIL)/, | |
2006 | qr/a+(?{$count_b++;})(?:b|)(*SKIP)z/, | |
2007 | 2, | |
2008 | ], | |
2009 | [ | |
2010 | 0, | |
2011 | "aaabaaab", | |
2012 | qr/a+(?{$count_a++;})b{0,6}(*SKIP)(*FAIL)/, | |
2013 | qr/a+(?{$count_b++;})b{0,6}(*SKIP)z/, | |
2014 | 2, | |
2015 | ], | |
2016 | [ | |
2017 | 0, | |
2018 | "aaabcaaabc", | |
2019 | qr/a+(?{$count_a++;})(bc){0,6}(*SKIP)(*FAIL)/, | |
2020 | qr/a+(?{$count_b++;})(bc){0,6}(*SKIP)z/, | |
2021 | 2, | |
2022 | ], | |
2023 | [ | |
2024 | 0, | |
2025 | "aaabcaaabc", | |
2026 | qr/a+(?{$count_a++;})(bc*){0,6}(*SKIP)(*FAIL)/, | |
2027 | qr/a+(?{$count_b++;})(bc*){0,6}(*SKIP)z/, | |
2028 | 2, | |
2029 | ], | |
2030 | ||
2031 | ||
2032 | [ | |
2033 | 0, | |
2034 | "aaddbdaabyzc", | |
2035 | qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2036 | qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) z \s* c \1 /x, | |
2037 | 4, | |
2038 | ], | |
2039 | [ | |
2040 | 0, | |
2041 | "aaddbdaabyzc", | |
2042 | qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) \s* (*FAIL) \s* c \1 /x, | |
2043 | qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b? (*SKIP:T1) \s* z \s* c \1 /x, | |
2044 | 4, | |
2045 | ], | |
2046 | [ | |
2047 | 0, | |
2048 | "aaddbdaabyzc", | |
2049 | qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (?:b|) (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2050 | qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (?:b|) (*SKIP:T1) z \s* c \1 /x, | |
2051 | 4, | |
2052 | ], | |
2053 | [ | |
2054 | 0, | |
2055 | "aaddbdaabyzc", | |
2056 | qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? b{0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2057 | qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? b{0,6} (*SKIP:T1) z \s* c \1 /x, | |
2058 | 4, | |
2059 | ], | |
2060 | [ | |
2061 | 0, | |
2062 | "aaddbcdaabcyzc", | |
2063 | qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (bc){0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2064 | qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (bc){0,6} (*SKIP:T1) z \s* c \1 /x, | |
2065 | 4, | |
2066 | ], | |
2067 | [ | |
2068 | 0, | |
2069 | "aaddbcdaabcyzc", | |
2070 | qr/a (?{$count_a++;}) (*MARK:T1) (a*) .*? (bc*){0,6} (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2071 | qr/a (?{$count_b++;}) (*MARK:T1) (a*) .*? (bc*){0,6} (*SKIP:T1) z \s* c \1 /x, | |
2072 | 4, | |
2073 | ], | |
2074 | ||
2075 | ||
2076 | [ | |
2077 | 0, | |
2078 | "aaaaddbdaabyzc", | |
2079 | qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2080 | qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, | |
2081 | 2, | |
2082 | ], | |
2083 | [ | |
2084 | 0, | |
2085 | "aaaaddbdaabyzc", | |
2086 | qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) \s* (*FAIL) \s* c \1 /x, | |
2087 | qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b? (*MARK:T1) (*SKIP:T1) \s* z \s* c \1 /x, | |
2088 | 2, | |
2089 | ], | |
2090 | [ | |
2091 | 0, | |
2092 | "aaaaddbdaabyzc", | |
2093 | qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (?:b|) (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2094 | qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (?:b|) (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, | |
2095 | 2, | |
2096 | ], | |
2097 | [ | |
2098 | 0, | |
2099 | "aaaaddbdaabyzc", | |
2100 | qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? b{0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2101 | qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? b{0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, | |
2102 | 2, | |
2103 | ], | |
2104 | [ | |
2105 | 0, | |
2106 | "aaaaddbcdaabcyzc", | |
2107 | qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (bc){0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2108 | qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (bc){0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, | |
2109 | 2, | |
2110 | ], | |
2111 | [ | |
2112 | 0, | |
2113 | "aaaaddbcdaabcyzc", | |
2114 | qr/a (?{$count_a++;}) (a?) (*MARK:T1) (a*) .*? (bc*){0,6} (*MARK:T1) (*SKIP:T1) (*FAIL) \s* c \1 /x, | |
2115 | qr/a (?{$count_b++;}) (a?) (*MARK:T1) (a*) .*? (bc*){0,6} (*MARK:T1) (*SKIP:T1) z \s* c \1 /x, | |
2116 | 2, | |
2117 | ], | |
2118 | ||
2119 | ||
2120 | [ | |
2121 | 0, | |
2122 | "AbcdCBefgBhiBqz", | |
2123 | qr/(A (.*) (?{ $count_a++ }) C? (*THEN) | A D) (*FAIL)/x, | |
2124 | qr/(A (.*) (?{ $count_b++ }) C? (*THEN) | A D) z/x, | |
2125 | 1, | |
2126 | ], | |
2127 | [ | |
2128 | 0, | |
2129 | "AbcdCBefgBhiBqz", | |
2130 | qr/(A (.*) (?{ $count_a++ }) C? (*THEN) | A D) \s* (*FAIL)/x, | |
2131 | qr/(A (.*) (?{ $count_b++ }) C? (*THEN) | A D) \s* z/x, | |
2132 | 1, | |
2133 | ], | |
2134 | [ | |
2135 | 0, | |
2136 | "AbcdCBefgBhiBqz", | |
2137 | qr/(A (.*) (?{ $count_a++ }) (?:C|) (*THEN) | A D) (*FAIL)/x, | |
2138 | qr/(A (.*) (?{ $count_b++ }) (?:C|) (*THEN) | A D) z/x, | |
2139 | 1, | |
2140 | ], | |
2141 | [ | |
2142 | 0, | |
2143 | "AbcdCBefgBhiBqz", | |
2144 | qr/(A (.*) (?{ $count_a++ }) C{0,6} (*THEN) | A D) (*FAIL)/x, | |
2145 | qr/(A (.*) (?{ $count_b++ }) C{0,6} (*THEN) | A D) z/x, | |
2146 | 1, | |
2147 | ], | |
2148 | [ | |
2149 | 0, | |
2150 | "AbcdCEBefgBhiBqz", | |
2151 | qr/(A (.*) (?{ $count_a++ }) (CE){0,6} (*THEN) | A D) (*FAIL)/x, | |
2152 | qr/(A (.*) (?{ $count_b++ }) (CE){0,6} (*THEN) | A D) z/x, | |
2153 | 1, | |
2154 | ], | |
2155 | [ | |
2156 | 0, | |
2157 | "AbcdCBefgBhiBqz", | |
2158 | qr/(A (.*) (?{ $count_a++ }) (CE*){0,6} (*THEN) | A D) (*FAIL)/x, | |
2159 | qr/(A (.*) (?{ $count_b++ }) (CE*){0,6} (*THEN) | A D) z/x, | |
2160 | 1, | |
2161 | ], | |
2162 | ) { | |
2163 | $c++; | |
2164 | $count_a = 0; | |
2165 | $count_b = 0; | |
2166 | ||
2167 | my $match_a = ($re->[1] =~ $re->[2]) || 0; | |
2168 | my $match_b = ($re->[1] =~ $re->[3]) || 0; | |
2169 | ||
de26e0cc NC |
2170 | is($match_a, $re->[0], "match a " . ($re->[0] ? "succeeded" : "failed") . " ($c)"); |
2171 | is($match_b, $re->[0], "match b " . ($re->[0] ? "succeeded" : "failed") . " ($c)"); | |
2172 | is($count_a, $re->[4], "count a ($c)"); | |
2173 | is($count_b, $re->[4], "count b ($c)"); | |
d1c771f5 B |
2174 | } |
2175 | } | |
e425a60b | 2176 | |
b57e4118 KW |
2177 | { # Bleadperl v5.13.8-292-gf56b639 breaks NEZUMI/Unicode-LineBreak-1.011 |
2178 | # \xdf in lookbehind failed to compile as is multi-char fold | |
14358a41 NC |
2179 | my $message = "Lookbehind with \\xdf matchable compiles"; |
2180 | my $r = eval 'qr{ | |
b57e4118 KW |
2181 | (?u: (?<=^url:) | |
2182 | (?<=[/]) (?=[^/]) | | |
2183 | (?<=[^-.]) (?=[-~.,_?\#%=&]) | | |
2184 | (?<=[=&]) (?=.) | |
14358a41 NC |
2185 | )}iox'; |
2186 | is($@, '', $message); | |
bbce3ca6 | 2187 | object_ok($r, 'Regexp', $message); |
b57e4118 KW |
2188 | } |
2189 | ||
84193928 KW |
2190 | # RT #82610 |
2191 | ok 'foo/file.fob' =~ m,^(?=[^\.])[^/]*/(?=[^\.])[^/]*\.fo[^/]$,; | |
2192 | ||
295c2f7d KW |
2193 | { # This was failing unless an explicit /d was added |
2194 | my $p = qr/[\xE0_]/i; | |
2195 | utf8::upgrade($p); | |
2196 | like("\xC0", $p, "Verify \"\\xC0\" =~ /[\\xE0_]/i; pattern in utf8"); | |
2197 | } | |
2198 | ||
a40630bf YO |
2199 | ok "x" =~ /\A(?>(?:(?:)A|B|C?x))\z/, |
2200 | "Check TRIE does not overwrite EXACT following NOTHING at start - RT #111842"; | |
c221a219 | 2201 | |
26faadbd KW |
2202 | { |
2203 | my $single = ":"; | |
2204 | my $upper = "\x{390}"; # Fold is 3 chars. | |
2205 | my $multi = CORE::fc($upper); | |
2206 | ||
2207 | my $failed = 0; | |
2208 | ||
2209 | # Try forcing a node to be split, with a multi-char fold at the | |
2210 | # boundary | |
2211 | for my $repeat (1 .. 300) { | |
2212 | my $string = $single x $repeat; | |
2213 | my $lhs = $string . $upper; | |
2214 | if ($lhs !~ m/$string$multi/i) { | |
2215 | $failed = $repeat; | |
2216 | last; | |
2217 | } | |
2218 | } | |
2219 | ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed"); | |
2220 | ||
b9c48b5b KW |
2221 | $failed = 0; |
2222 | for my $repeat (1 .. 300) { | |
2223 | my $string = $single x $repeat; | |
2224 | my $lhs = $string . "\N{LATIN SMALL LIGATURE FFI}"; | |
2225 | if ($lhs !~ m/${string}ff\N{LATIN SMALL LETTER I}/i) { | |
2226 | $failed = $repeat; | |
2227 | last; | |
2228 | } | |
2229 | } | |
2230 | ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed"); | |
2231 | ||
2232 | $failed = 0; | |
2233 | for my $repeat (1 .. 300) { | |
2234 | my $string = $single x $repeat; | |
2235 | my $lhs = $string . "\N{LATIN SMALL LIGATURE FFL}"; | |
2236 | if ($lhs !~ m/${string}ff\N{U+6c}/i) { | |
2237 | $failed = $repeat; | |
2238 | last; | |
2239 | } | |
2240 | } | |
2241 | ok(! $failed, "Matched multi-char fold across EXACTFish node boundaries; if failed, was at count $failed"); | |
88b3a463 KW |
2242 | |
2243 | # This tests that under /d matching that an 'ss' split across two | |
2244 | # parts of a node doesn't end up turning into something that matches | |
2245 | # \xDF unless it is in utf8. | |
2246 | $failed = 0; | |
2247 | $single = 'a'; # Is non-terminal multi-char fold char | |
2248 | for my $repeat (1 .. 300) { | |
2249 | my $string = $single x $repeat; | |
2250 | my $lhs = "$string\N{LATIN SMALL LETTER SHARP S}"; | |
2251 | utf8::downgrade($lhs); | |
2252 | $string .= "s"; | |
2253 | if ($lhs =~ m/${string}s/di) { | |
2254 | $failed = $repeat; | |
2255 | last; | |
2256 | } | |
2257 | } | |
2258 | ok(! $failed, "Matched multi-char fold 'ss' across EXACTF node boundaries; if failed, was at count $failed"); | |
26faadbd KW |
2259 | } |
2260 | ||
2b233a8f | 2261 | { |
2b233a8f KW |
2262 | fresh_perl_is('print eval "\"\x{101}\" =~ /[[:lower:]]/", "\n"; print eval "\"\x{100}\" =~ /[[:lower:]]/i", "\n";', |
2263 | "1\n1", # Both re's should match | |
20e5bab4 | 2264 | {}, |
2b233a8f KW |
2265 | "get [:lower:] swash in first eval; test under /i in second"); |
2266 | } | |
2267 | ||
c5de0829 JK |
2268 | { |
2269 | #' RT #119075 | |
fd2268bc | 2270 | no warnings 'regexp'; # Silence "has useless greediness modifier" |
c5de0829 JK |
2271 | local $@; |
2272 | eval { /a{0}?/; }; | |
2273 | ok(! $@, | |
2274 | "PCRE regression test: No 'Quantifier follows nothing in regex' warning"); | |
2275 | ||
2276 | } | |
2277 | ||
709d08c2 KW |
2278 | { |
2279 | unlike("\xB5", qr/^_?\p{IsMyRuntimeProperty}\z/, "yadayada"); | |
2280 | like("\xB6", qr/^_?\p{IsMyRuntimeProperty}\z/, "yadayada"); | |
2281 | unlike("\xB7", qr/^_?\p{IsMyRuntimeProperty}\z/, "yadayada"); | |
2282 | like("\xB5", qr/^_?\P{IsMyRuntimeProperty}\z/, "yadayada"); | |
2283 | unlike("\xB6", qr/^_?\P{IsMyRuntimeProperty}\z/, "yadayada"); | |
2284 | like("\xB7", qr/^_?\P{IsMyRuntimeProperty}\z/, "yadayada"); | |
2285 | ||
2286 | unlike("_\xB5", qr/^_?\p{IsMyRuntimeProperty}\z/, "yadayada"); | |
2287 | like("_\xB6", qr/^_?\p{IsMyRuntimeProperty}\z/, "yadayada"); | |
2288 | unlike("_\xB7", qr/^_?\p{IsMyRuntimeProperty}\z/, "yadayada"); | |
2289 | like("_\xB5", qr/^_?\P{IsMyRuntimeProperty}\z/, "yadayada"); | |
2290 | unlike("_\xB6", qr/^_?\P{IsMyRuntimeProperty}\z/, "yadayada"); | |
2291 | like("_\xB7", qr/^_?\P{IsMyRuntimeProperty}\z/, "yadayada"); | |
2292 | } | |
2293 | ||
2294 | # These are defined later, so won't be known at regex compile time above | |
2295 | sub IsMyRuntimeProperty { | |
2296 | return "B6\n"; | |
2297 | } | |
2298 | ||
2299 | sub IsntMyRuntimeProperty { | |
2300 | return "!B6\n"; | |
2301 | } | |
2302 | ||
80836a6e KW |
2303 | { # [perl 121777] |
2304 | my $regex; | |
2305 | { package Some; | |
2306 | # define a Unicode propertyIs_q | |
2307 | sub Is_q | |
2308 | { | |
2309 | sprintf '%x', ord 'q' | |
2310 | } | |
2311 | $regex = qr/\p{Is_q}/; | |
2312 | ||
2313 | # If we uncomment the following line, prior to the patch that | |
de68ac66 KW |
2314 | # fixed this, everything would work because we would have expanded |
2315 | # the property by the time the regex in the 'like' below got | |
2316 | # compiled. | |
80836a6e KW |
2317 | #'q' =~ $regex; |
2318 | } | |
2319 | ||
2320 | like('q', $regex, 'User-defined property matches outside package'); | |
c0611711 KW |
2321 | |
2322 | package Some { | |
2323 | main::like('abcq', qr/abc$regex/, 'Run-time compiled in-package user-defined property matches'); | |
2324 | } | |
80836a6e KW |
2325 | } |
2326 | ||
bc031a7d KW |
2327 | { # From Lingua::Stem::UniNE; no ticket filed but related to #121778 |
2328 | use utf8; | |
2329 | my $word = 'рабта'; | |
2330 | $word =~ s{ (?: | |
2331 | ия # definite articles for nouns: | |
2332 | | ът # ∙ masculine | |
2333 | | та # ∙ feminine | |
2334 | | то # ∙ neutral | |
2335 | | те # ∙ plural | |
2336 | ) $ }{}x; | |
2337 | is($word, 'раб', "Handles UTF8 trie correctly"); | |
2338 | } | |
2339 | ||
e425a60b YO |
2340 | # |
2341 | # Keep the following tests last -- they may crash perl | |
2342 | # | |
2343 | print "# Tests that follow may crash perl\n"; | |
e425a60b YO |
2344 | { |
2345 | eval '/\k/'; | |
2346 | ok $@ =~ /\QSequence \k... not terminated in regex;\E/, | |
2347 | 'Lone \k not allowed'; | |
2348 | } | |
2349 | ||
e425a60b | 2350 | { |
de946258 | 2351 | my $message = "Substitution with lookahead (possible segv)"; |
e425a60b YO |
2352 | $_ = "ns1ns1ns1"; |
2353 | s/ns(?=\d)/ns_/g; | |
de946258 | 2354 | is($_, "ns_1ns_1ns_1", $message); |
e425a60b YO |
2355 | $_ = "ns1"; |
2356 | s/ns(?=\d)/ns_/; | |
de946258 | 2357 | is($_, "ns_1", $message); |
e425a60b YO |
2358 | $_ = "123"; |
2359 | s/(?=\d+)|(?<=\d)/!Bang!/g; | |
de946258 | 2360 | is($_, "!Bang!1!Bang!2!Bang!3!Bang!", $message); |
e425a60b YO |
2361 | } |
2362 | ||
6182169b KW |
2363 | { |
2364 | # Earlier versions of Perl said this was fatal. | |
de946258 | 2365 | my $message = "U+0FFFF shouldn't crash the regex engine"; |
6182169b KW |
2366 | no warnings 'utf8'; |
2367 | my $a = eval "chr(65535)"; | |
2368 | use warnings; | |
2369 | my $warning_message; | |
2370 | local $SIG{__WARN__} = sub { $warning_message = $_[0] }; | |
2371 | eval $a =~ /[a-z]/; | |
de946258 | 2372 | ok(1, $message); # If it didn't crash, it worked. |
6182169b | 2373 | } |
b57e4118 | 2374 | |
06345901 NC |
2375 | TODO: { # Was looping |
2376 | todo_skip('Triggers thread clone SEGV. See #86550') | |
2377 | if $::running_as_thread && $::running_as_thread; | |
478c6b74 | 2378 | watchdog(10); # Use a bigger value for busy systems |
6438af90 KW |
2379 | like("\x{00DF}", qr/[\x{1E9E}_]*/i, "\"\\x{00DF}\" =~ /[\\x{1E9E}_]*/i was looping"); |
2380 | } | |
2381 | ||
f9126265 KW |
2382 | { # Bug #90536, caused failed assertion |
2383 | unlike("s\N{U+DF}", qr/^\x{00DF}/i, "\"s\\N{U+DF}\", qr/^\\x{00DF}/i"); | |
2384 | } | |
2385 | ||
361ee0fe KW |
2386 | # User-defined Unicode properties to match above-Unicode code points |
2387 | sub Is_32_Bit_Super { return "110000\tFFFFFFFF\n" } | |
2388 | sub Is_Portable_Super { return '!utf8::Any' } # Matches beyond 32 bits | |
45d91b83 KW |
2389 | |
2390 | { # Assertion was failing on on 64-bit platforms; just didn't work on 32. | |
2391 | no warnings qw(non_unicode portable); | |
2392 | use Config; | |
2393 | ||
2394 | # We use 'ok' instead of 'like' because the warnings are lexically | |
2395 | # scoped, and want to turn them off, so have to do the match in this | |
2396 | # scope | |
84ea5ef6 | 2397 | if ($Config{uvsize} < 8) { |
361ee0fe | 2398 | ok(chr(0xFFFF_FFFE) =~ /\p{Is_32_Bit_Super}/, |
84ea5ef6 | 2399 | "chr(0xFFFF_FFFE) can match a Unicode property"); |
6cb05c12 KW |
2400 | ok(chr(0xFFFF_FFFF) =~ /\p{Is_32_Bit_Super}/, |
2401 | "chr(0xFFFF_FFFF) can match a Unicode property"); | |
01c5845a KW |
2402 | my $p = qr/^[\x{FFFF_FFFF}]$/; |
2403 | ok(chr(0xFFFF_FFFF) =~ $p, | |
2404 | "chr(0xFFFF_FFFF) can match itself in a [class]"); | |
3ef5b9b5 KW |
2405 | ok(chr(0xFFFF_FFFF) =~ $p, # Tests any caching |
2406 | "chr(0xFFFF_FFFF) can match itself in a [class] subsequently"); | |
84ea5ef6 KW |
2407 | } |
2408 | else { | |
2409 | no warnings 'overflow'; | |
361ee0fe | 2410 | ok(chr(0xFFFF_FFFF_FFFF_FFFE) =~ qr/\p{Is_Portable_Super}/, |
84ea5ef6 | 2411 | "chr(0xFFFF_FFFF_FFFF_FFFE) can match a Unicode property"); |
6cb05c12 KW |
2412 | ok(chr(0xFFFF_FFFF_FFFF_FFFF) =~ qr/^\p{Is_Portable_Super}$/, |
2413 | "chr(0xFFFF_FFFF_FFFF_FFFF) can match a Unicode property"); | |
361ee0fe | 2414 | |
01c5845a KW |
2415 | my $p = qr/^[\x{FFFF_FFFF_FFFF_FFFF}]$/; |
2416 | ok(chr(0xFFFF_FFFF_FFFF_FFFF) =~ $p, | |
2417 | "chr(0xFFFF_FFFF_FFFF_FFFF) can match itself in a [class]"); | |
3ef5b9b5 KW |
2418 | ok(chr(0xFFFF_FFFF_FFFF_FFFF) =~ $p, # Tests any caching |
2419 | "chr(0xFFFF_FFFF_FFFF_FFFF) can match itself in a [class] subsequently"); | |
01c5845a | 2420 | |
361ee0fe KW |
2421 | # This test is because something was declared as 32 bits, but |
2422 | # should have been cast to 64; only a problem where | |
2423 | # sizeof(STRLEN) != sizeof(UV) | |
2424 | ok(chr(0xFFFF_FFFF_FFFF_FFFE) !~ qr/\p{Is_32_Bit_Super}/, "chr(0xFFFF_FFFF_FFFF_FFFE) shouldn't match a range ending in 0xFFFF_FFFF"); | |
45d91b83 KW |
2425 | } |
2426 | } | |
2427 | ||
9d501133 KW |
2428 | { # [perl #112530], the code below caused a panic |
2429 | sub InFoo { "a\tb\n9\ta\n" } | |
2430 | like("\n", qr/\p{InFoo}/, | |
2431 | "Overlapping ranges in user-defined properties"); | |
2432 | } | |
2433 | ||
2e3a23da KW |
2434 | { # Regexp:Grammars was broken: |
2435 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2013-06/msg01290.html | |
2436 | fresh_perl_like('use warnings; "abc" =~ qr{(?&foo){0}abc(?<foo>)}', | |
2437 | 'Quantifier unexpected on zero-length expression', | |
20e5bab4 | 2438 | {}, |
2e3a23da KW |
2439 | 'No segfault on qr{(?&foo){0}abc(?<foo>)}'); |
2440 | } | |
2441 | ||
b57e4118 KW |
2442 | # !!! NOTE that tests that aren't at all likely to crash perl should go |
2443 | # a ways above, above these last ones. | |
f4554ed5 NC |
2444 | |
2445 | done_testing(); | |
e425a60b YO |
2446 | } # End of sub run_tests |
2447 | ||
2448 | 1; |