Commit | Line | Data |
---|---|---|
8d063cd8 | 1 | #!./perl |
8d37f932 DD |
2 | # |
3 | # This is a home for regular expression tests that don't fit into | |
4 | # the format supported by op/regexp.t. If you want to add a test | |
5 | # that does fit that format, add it to op/re_tests, not here. | |
8d063cd8 | 6 | |
9133bbab | 7 | $| = 1; |
3568d838 | 8 | |
fc8cd66c | 9 | # Test counter output is generated by a BEGIN block at bottom of file |
8d37f932 | 10 | |
e4d48cc9 GS |
11 | BEGIN { |
12 | chdir 't' if -d 't'; | |
20822f61 | 13 | @INC = '../lib'; |
e4d48cc9 | 14 | } |
de734bd5 | 15 | our $Message = "Noname test"; |
ffbc6a93 | 16 | |
8d37f932 | 17 | eval 'use Config'; # Defaults assumed if this fails |
8d063cd8 LW |
18 | |
19 | $x = "abc\ndef\n"; | |
20 | ||
21 | if ($x =~ /^abc/) {print "ok 1\n";} else {print "not ok 1\n";} | |
22 | if ($x !~ /^def/) {print "ok 2\n";} else {print "not ok 2\n";} | |
23 | ||
f02c194e RGS |
24 | # used to be a test for $* |
25 | if ($x =~ /^def/m) {print "ok 3\n";} else {print "not ok 3\n";} | |
8d063cd8 LW |
26 | |
27 | $_ = '123'; | |
28 | if (/^([0-9][0-9]*)/) {print "ok 4\n";} else {print "not ok 4\n";} | |
29 | ||
30 | if ($x =~ /^xxx/) {print "not ok 5\n";} else {print "ok 5\n";} | |
31 | if ($x !~ /^abc/) {print "not ok 6\n";} else {print "ok 6\n";} | |
32 | ||
33 | if ($x =~ /def/) {print "ok 7\n";} else {print "not ok 7\n";} | |
34 | if ($x !~ /def/) {print "not ok 8\n";} else {print "ok 8\n";} | |
35 | ||
36 | if ($x !~ /.def/) {print "ok 9\n";} else {print "not ok 9\n";} | |
37 | if ($x =~ /.def/) {print "not ok 10\n";} else {print "ok 10\n";} | |
38 | ||
39 | if ($x =~ /\ndef/) {print "ok 11\n";} else {print "not ok 11\n";} | |
40 | if ($x !~ /\ndef/) {print "not ok 12\n";} else {print "ok 12\n";} | |
41 | ||
42 | $_ = 'aaabbbccc'; | |
43 | if (/(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc') { | |
44 | print "ok 13\n"; | |
45 | } else { | |
46 | print "not ok 13\n"; | |
47 | } | |
48 | if (/(a+b+c+)/ && $1 eq 'aaabbbccc') { | |
49 | print "ok 14\n"; | |
50 | } else { | |
51 | print "not ok 14\n"; | |
52 | } | |
53 | ||
54 | if (/a+b?c+/) {print "not ok 15\n";} else {print "ok 15\n";} | |
55 | ||
56 | $_ = 'aaabccc'; | |
57 | if (/a+b?c+/) {print "ok 16\n";} else {print "not ok 16\n";} | |
58 | if (/a*b+c*/) {print "ok 17\n";} else {print "not ok 17\n";} | |
59 | ||
60 | $_ = 'aaaccc'; | |
61 | if (/a*b?c*/) {print "ok 18\n";} else {print "not ok 18\n";} | |
62 | if (/a*b+c*/) {print "not ok 19\n";} else {print "ok 19\n";} | |
63 | ||
64 | $_ = 'abcdef'; | |
65 | if (/bcd|xyz/) {print "ok 20\n";} else {print "not ok 20\n";} | |
66 | if (/xyz|bcd/) {print "ok 21\n";} else {print "not ok 21\n";} | |
67 | ||
68 | if (m|bc/*d|) {print "ok 22\n";} else {print "not ok 22\n";} | |
378cc40b LW |
69 | |
70 | if (/^$_$/) {print "ok 23\n";} else {print "not ok 23\n";} | |
71 | ||
f02c194e RGS |
72 | # used to be a test for $* |
73 | if ("ab\ncd\n" =~ /^cd/m) {print "ok 24\n";} else {print "not ok 24\n";} | |
378cc40b | 74 | |
cb55de95 JH |
75 | $XXX{123} = 123; |
76 | $XXX{234} = 234; | |
77 | $XXX{345} = 345; | |
78 | ||
79 | @XXX = ('ok 25','not ok 25', 'ok 26','not ok 26','not ok 27'); | |
80 | while ($_ = shift(@XXX)) { | |
81 | ?(.*)? && (print $1,"\n"); | |
82 | /not/ && reset; | |
1606b0c7 JM |
83 | if (/not ok 26/) { |
84 | if ($^O eq 'VMS') { | |
85 | $_ = shift(@XXX); | |
86 | } | |
87 | else { | |
88 | reset 'X'; | |
89 | } | |
90 | } | |
cb55de95 JH |
91 | } |
92 | ||
1606b0c7 JM |
93 | if ($^O ne 'VMS') { |
94 | while (($key,$val) = each(%XXX)) { | |
cb55de95 JH |
95 | print "not ok 27\n"; |
96 | exit; | |
1606b0c7 | 97 | } |
cb55de95 JH |
98 | } |
99 | ||
100 | print "ok 27\n"; | |
378cc40b | 101 | |
378cc40b LW |
102 | 'cde' =~ /[^ab]*/; |
103 | 'xyz' =~ //; | |
104 | if ($& eq 'xyz') {print "ok 28\n";} else {print "not ok 28\n";} | |
105 | ||
106 | $foo = '[^ab]*'; | |
107 | 'cde' =~ /$foo/; | |
108 | 'xyz' =~ //; | |
109 | if ($& eq 'xyz') {print "ok 29\n";} else {print "not ok 29\n";} | |
110 | ||
111 | $foo = '[^ab]*'; | |
112 | 'cde' =~ /$foo/; | |
113 | 'xyz' =~ /$null/; | |
114 | if ($& eq 'xyz') {print "ok 30\n";} else {print "not ok 30\n";} | |
a687059c LW |
115 | |
116 | $_ = 'abcdefghi'; | |
117 | /def/; # optimized up to cmd | |
118 | if ("$`:$&:$'" eq 'abc:def:ghi') {print "ok 31\n";} else {print "not ok 31\n";} | |
119 | ||
120 | /cde/ + 0; # optimized only to spat | |
121 | if ("$`:$&:$'" eq 'ab:cde:fghi') {print "ok 32\n";} else {print "not ok 32\n";} | |
122 | ||
123 | /[d][e][f]/; # not optimized | |
124 | if ("$`:$&:$'" eq 'abc:def:ghi') {print "ok 33\n";} else {print "not ok 33\n";} | |
125 | ||
126 | $_ = 'now is the {time for all} good men to come to.'; | |
127 | / {([^}]*)}/; | |
128 | if ($1 eq 'time for all') {print "ok 34\n";} else {print "not ok 34 $1\n";} | |
129 | ||
130 | $_ = 'xxx {3,4} yyy zzz'; | |
131 | print /( {3,4})/ ? "ok 35\n" : "not ok 35\n"; | |
132 | print $1 eq ' ' ? "ok 36\n" : "not ok 36\n"; | |
133 | print /( {4,})/ ? "not ok 37\n" : "ok 37\n"; | |
134 | print /( {2,3}.)/ ? "ok 38\n" : "not ok 38\n"; | |
135 | print $1 eq ' y' ? "ok 39\n" : "not ok 39\n"; | |
136 | print /(y{2,3}.)/ ? "ok 40\n" : "not ok 40\n"; | |
137 | print $1 eq 'yyy ' ? "ok 41\n" : "not ok 41\n"; | |
138 | print /x {3,4}/ ? "not ok 42\n" : "ok 42\n"; | |
139 | print /^xxx {3,4}/ ? "not ok 43\n" : "ok 43\n"; | |
352d5a3a LW |
140 | |
141 | $_ = "now is the time for all good men to come to."; | |
142 | @words = /(\w+)/g; | |
143 | print join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" | |
144 | ? "ok 44\n" | |
145 | : "not ok 44\n"; | |
146 | ||
147 | @words = (); | |
148 | while (/\w+/g) { | |
149 | push(@words, $&); | |
150 | } | |
151 | print join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" | |
152 | ? "ok 45\n" | |
153 | : "not ok 45\n"; | |
154 | ||
155 | @words = (); | |
71be2cbc | 156 | pos = 0; |
352d5a3a LW |
157 | while (/to/g) { |
158 | push(@words, $&); | |
159 | } | |
160 | print join(':',@words) eq "to:to" | |
161 | ? "ok 46\n" | |
71be2cbc | 162 | : "not ok 46 `@words'\n"; |
352d5a3a | 163 | |
71be2cbc | 164 | pos $_ = 0; |
352d5a3a LW |
165 | @words = /to/g; |
166 | print join(':',@words) eq "to:to" | |
167 | ? "ok 47\n" | |
71be2cbc | 168 | : "not ok 47 `@words'\n"; |
352d5a3a LW |
169 | |
170 | $_ = "abcdefghi"; | |
171 | ||
172 | $pat1 = 'def'; | |
173 | $pat2 = '^def'; | |
174 | $pat3 = '.def.'; | |
175 | $pat4 = 'abc'; | |
176 | $pat5 = '^abc'; | |
177 | $pat6 = 'abc$'; | |
178 | $pat7 = 'ghi'; | |
179 | $pat8 = '\w*ghi'; | |
180 | $pat9 = 'ghi$'; | |
181 | ||
182 | $t1=$t2=$t3=$t4=$t5=$t6=$t7=$t8=$t9=0; | |
183 | ||
184 | for $iter (1..5) { | |
185 | $t1++ if /$pat1/o; | |
186 | $t2++ if /$pat2/o; | |
187 | $t3++ if /$pat3/o; | |
188 | $t4++ if /$pat4/o; | |
189 | $t5++ if /$pat5/o; | |
190 | $t6++ if /$pat6/o; | |
191 | $t7++ if /$pat7/o; | |
192 | $t8++ if /$pat8/o; | |
193 | $t9++ if /$pat9/o; | |
194 | } | |
195 | ||
196 | $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; | |
197 | print $x eq '505550555' ? "ok 48\n" : "not ok 48 $x\n"; | |
1462b684 LW |
198 | |
199 | $xyz = 'xyz'; | |
200 | print "abc" =~ /^abc$|$xyz/ ? "ok 49\n" : "not ok 49\n"; | |
201 | ||
202 | # perl 4.009 says "unmatched ()" | |
203 | eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; | |
204 | print $@ eq "" ? "ok 50\n" : "not ok 50\n"; | |
205 | print $result eq "abc:bc" ? "ok 51\n" : "not ok 51\n"; | |
a0d0e21e LW |
206 | |
207 | ||
208 | $_="abcfooabcbar"; | |
209 | $x=/abc/g; | |
210 | print $` eq "" ? "ok 52\n" : "not ok 52\n" if $x; | |
211 | $x=/abc/g; | |
212 | print $` eq "abcfoo" ? "ok 53\n" : "not ok 53\n" if $x; | |
213 | $x=/abc/g; | |
214 | print $x == 0 ? "ok 54\n" : "not ok 54\n"; | |
71be2cbc | 215 | pos = 0; |
a0d0e21e LW |
216 | $x=/ABC/gi; |
217 | print $` eq "" ? "ok 55\n" : "not ok 55\n" if $x; | |
218 | $x=/ABC/gi; | |
219 | print $` eq "abcfoo" ? "ok 56\n" : "not ok 56\n" if $x; | |
220 | $x=/ABC/gi; | |
221 | print $x == 0 ? "ok 57\n" : "not ok 57\n"; | |
71be2cbc | 222 | pos = 0; |
a0d0e21e LW |
223 | $x=/abc/g; |
224 | print $' eq "fooabcbar" ? "ok 58\n" : "not ok 58\n" if $x; | |
225 | $x=/abc/g; | |
226 | print $' eq "bar" ? "ok 59\n" : "not ok 59\n" if $x; | |
227 | $_ .= ''; | |
228 | @x=/abc/g; | |
229 | print scalar @x == 2 ? "ok 60\n" : "not ok 60\n"; | |
71be2cbc | 230 | |
231 | $_ = "abdc"; | |
232 | pos $_ = 2; | |
c90c0ff4 | 233 | /\Gc/gc; |
71be2cbc | 234 | print "not " if (pos $_) != 2; |
235 | print "ok 61\n"; | |
c90c0ff4 | 236 | /\Gc/g; |
237 | print "not " if defined pos $_; | |
238 | print "ok 62\n"; | |
c277df42 IZ |
239 | |
240 | $out = 1; | |
241 | 'abc' =~ m'a(?{ $out = 2 })b'; | |
242 | print "not " if $out != 2; | |
243 | print "ok 63\n"; | |
244 | ||
245 | $out = 1; | |
246 | 'abc' =~ m'a(?{ $out = 3 })c'; | |
247 | print "not " if $out != 1; | |
459f542a | 248 | print "ok 64\n"; |
c277df42 IZ |
249 | |
250 | $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; | |
251 | @out = /(?<!foo)bar./g; | |
252 | print "not " if "@out" ne 'bar2 barf'; | |
253 | print "ok 65\n"; | |
254 | ||
8d37f932 DD |
255 | # Tests which depend on REG_INFTY |
256 | $reg_infty = defined $Config{reg_infty} ? $Config{reg_infty} : 32767; | |
257 | $reg_infty_m = $reg_infty - 1; $reg_infty_p = $reg_infty + 1; | |
258 | ||
259 | # As well as failing if the pattern matches do unexpected things, the | |
260 | # next three tests will fail if you should have picked up a lower-than- | |
261 | # default value for $reg_infty from Config.pm, but have not. | |
262 | ||
263 | undef $@; | |
264 | print "not " if eval q(('aaa' =~ /(a{1,$reg_infty_m})/)[0] ne 'aaa') || $@; | |
265 | print "ok 66\n"; | |
266 | ||
267 | undef $@; | |
268 | print "not " if eval q(('a' x $reg_infty_m) !~ /a{$reg_infty_m}/) || $@; | |
269 | print "ok 67\n"; | |
270 | ||
271 | undef $@; | |
272 | print "not " if eval q(('a' x ($reg_infty_m - 1)) =~ /a{$reg_infty_m}/) || $@; | |
273 | print "ok 68\n"; | |
274 | ||
275 | undef $@; | |
276 | eval "'aaa' =~ /a{1,$reg_infty}/"; | |
9baa0206 | 277 | print "not " if $@ !~ m%^\QQuantifier in {,} bigger than%; |
8d37f932 DD |
278 | print "ok 69\n"; |
279 | ||
280 | eval "'aaa' =~ /a{1,$reg_infty_p}/"; | |
281 | print "not " | |
9baa0206 | 282 | if $@ !~ m%^\QQuantifier in {,} bigger than%; |
8d37f932 DD |
283 | print "ok 70\n"; |
284 | undef $@; | |
285 | ||
286 | # Poke a couple more parse failures | |
287 | ||
288 | $context = 'x' x 256; | |
289 | eval qq("${context}y" =~ /(?<=$context)y/); | |
9baa0206 | 290 | print "not " if $@ !~ m%^\QLookbehind longer than 255 not%; |
8d37f932 DD |
291 | print "ok 71\n"; |
292 | ||
b8c5462f | 293 | # removed test |
8d37f932 DD |
294 | print "ok 72\n"; |
295 | ||
c277df42 | 296 | # Long Monsters |
8d37f932 | 297 | $test = 73; |
c277df42 IZ |
298 | for $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory |
299 | $a = 'a' x $l; | |
300 | print "# length=$l\nnot " unless "ba$a=" =~ /a$a=/; | |
301 | print "ok $test\n"; | |
302 | $test++; | |
73d6d589 | 303 | |
c277df42 IZ |
304 | print "not " if "b$a=" =~ /a$a=/; |
305 | print "ok $test\n"; | |
306 | $test++; | |
307 | } | |
308 | ||
309 | # 20000 nodes, each taking 3 words per string, and 1 per branch | |
310 | $long_constant_len = join '|', 12120 .. 32645; | |
311 | $long_var_len = join '|', 8120 .. 28645; | |
312 | %ans = ( 'ax13876y25677lbc' => 1, | |
313 | 'ax13876y25677mcb' => 0, # not b. | |
314 | 'ax13876y35677nbc' => 0, # Num too big | |
315 | 'ax13876y25677y21378obc' => 1, | |
316 | 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] | |
317 | 'ax13876y25677y21378y21378kbc' => 1, | |
318 | 'ax13876y25677y21378y21378kcb' => 0, # Not b. | |
319 | 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs | |
320 | ); | |
321 | ||
322 | for ( keys %ans ) { | |
73d6d589 | 323 | print "# const-len `$_' not => $ans{$_}\nnot " |
c277df42 IZ |
324 | if $ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o; |
325 | print "ok $test\n"; | |
326 | $test++; | |
73d6d589 | 327 | print "# var-len `$_' not => $ans{$_}\nnot " |
c277df42 IZ |
328 | if $ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o; |
329 | print "ok $test\n"; | |
330 | $test++; | |
331 | } | |
332 | ||
333 | $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; | |
334 | $expect = "(bla()) ((l)u((e))) (l(e)e)"; | |
335 | ||
73d6d589 | 336 | sub matchit { |
cc6b7395 | 337 | m/ |
c277df42 | 338 | ( |
73d6d589 | 339 | \( |
c277df42 IZ |
340 | (?{ $c = 1 }) # Initialize |
341 | (?: | |
342 | (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop | |
343 | (?! | |
344 | ) # Fail: will unwind one iteration back | |
73d6d589 | 345 | ) |
c277df42 IZ |
346 | (?: |
347 | [^()]+ # Match a big chunk | |
348 | (?= | |
349 | [()] | |
350 | ) # Do not try to match subchunks | |
351 | | | |
73d6d589 | 352 | \( |
c277df42 IZ |
353 | (?{ ++$c }) |
354 | | | |
73d6d589 | 355 | \) |
c277df42 IZ |
356 | (?{ --$c }) |
357 | ) | |
358 | )+ # This may not match with different subblocks | |
359 | ) | |
360 | (?(?{ $c != 0 }) | |
361 | (?! | |
362 | ) # Fail | |
363 | ) # Otherwise the chunk 1 may succeed with $c>0 | |
cc6b7395 | 364 | /xg; |
c277df42 IZ |
365 | } |
366 | ||
0f5d15d6 | 367 | @ans = (); |
c277df42 IZ |
368 | push @ans, $res while $res = matchit; |
369 | ||
370 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; | |
371 | print "ok $test\n"; | |
372 | $test++; | |
373 | ||
374 | @ans = matchit; | |
375 | ||
376 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect; | |
377 | print "ok $test\n"; | |
378 | $test++; | |
379 | ||
96776eda GS |
380 | print "not " unless "abc" =~ /^(??{"a"})b/; |
381 | print "ok $test\n"; | |
382 | $test++; | |
383 | ||
0f5d15d6 | 384 | my $matched; |
14455d6c | 385 | $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; |
0f5d15d6 IZ |
386 | |
387 | @ans = @ans1 = (); | |
388 | push(@ans, $res), push(@ans1, $&) while $res = m/$matched/g; | |
389 | ||
390 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; | |
391 | print "ok $test\n"; | |
392 | $test++; | |
393 | ||
394 | print "# ans1='@ans1'\n# expect='$expect'\nnot " if "@ans1" ne $expect; | |
395 | print "ok $test\n"; | |
396 | $test++; | |
397 | ||
398 | @ans = m/$matched/g; | |
399 | ||
400 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect; | |
401 | print "ok $test\n"; | |
402 | $test++; | |
403 | ||
c277df42 IZ |
404 | @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad |
405 | print "not " if "@ans" ne 'a/ b'; | |
406 | print "ok $test\n"; | |
407 | $test++; | |
408 | ||
cc6b7395 | 409 | $code = '{$blah = 45}'; |
c277df42 | 410 | $blah = 12; |
2cd61cdb IZ |
411 | eval { /(?$code)/ }; |
412 | print "not " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; | |
e4d48cc9 GS |
413 | print "ok $test\n"; |
414 | $test++; | |
415 | ||
2cd61cdb IZ |
416 | for $code ('{$blah = 45}','=xx') { |
417 | $blah = 12; | |
418 | $res = eval { "xx" =~ /(?$code)/o }; | |
419 | if ($code eq '=xx') { | |
420 | print "#'$@','$res','$blah'\nnot " unless not $@ and $res; | |
421 | } else { | |
73d6d589 | 422 | print "#'$@','$res','$blah'\nnot " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; |
2cd61cdb IZ |
423 | } |
424 | print "ok $test\n"; | |
425 | $test++; | |
426 | } | |
427 | ||
e4d48cc9 GS |
428 | $code = '{$blah = 45}'; |
429 | $blah = 12; | |
430 | eval "/(?$code)/"; | |
cc6b7395 IZ |
431 | print "not " if $blah != 45; |
432 | print "ok $test\n"; | |
433 | $test++; | |
434 | ||
435 | $blah = 12; | |
436 | /(?{$blah = 45})/; | |
c277df42 IZ |
437 | print "not " if $blah != 45; |
438 | print "ok $test\n"; | |
439 | $test++; | |
440 | ||
74d6a13a MB |
441 | $x = 'banana'; |
442 | $x =~ /.a/g; | |
443 | print "not " unless pos($x) == 2; | |
444 | print "ok $test\n"; | |
445 | $test++; | |
446 | ||
447 | $x =~ /.z/gc; | |
448 | print "not " unless pos($x) == 2; | |
449 | print "ok $test\n"; | |
450 | $test++; | |
451 | ||
452 | sub f { | |
453 | my $p = $_[0]; | |
454 | return $p; | |
455 | } | |
456 | ||
457 | $x =~ /.a/g; | |
458 | print "not " unless f(pos($x)) == 4; | |
459 | print "ok $test\n"; | |
460 | $test++; | |
4599a1de | 461 | |
ce862d02 IZ |
462 | $x = $^R = 67; |
463 | 'foot' =~ /foo(?{$x = 12; 75})[t]/; | |
464 | print "not " unless $^R eq '75'; | |
465 | print "ok $test\n"; | |
466 | $test++; | |
467 | ||
468 | $x = $^R = 67; | |
469 | 'foot' =~ /foo(?{$x = 12; 75})[xy]/; | |
470 | print "not " unless $^R eq '67' and $x eq '12'; | |
471 | print "ok $test\n"; | |
472 | $test++; | |
473 | ||
474 | $x = $^R = 67; | |
475 | 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; | |
476 | print "not " unless $^R eq '79' and $x eq '12'; | |
477 | print "ok $test\n"; | |
478 | $test++; | |
479 | ||
cc74c5bd | 480 | print "not " unless qr/\b\v$/i eq '(?i-xsm:\b\v$)'; |
8782bef2 GB |
481 | print "ok $test\n"; |
482 | $test++; | |
483 | ||
cc74c5bd | 484 | print "not " unless qr/\b\v$/s eq '(?s-xim:\b\v$)'; |
8782bef2 GB |
485 | print "ok $test\n"; |
486 | $test++; | |
487 | ||
cc74c5bd | 488 | print "not " unless qr/\b\v$/m eq '(?m-xis:\b\v$)'; |
8782bef2 GB |
489 | print "ok $test\n"; |
490 | $test++; | |
491 | ||
cc74c5bd | 492 | print "not " unless qr/\b\v$/x eq '(?x-ism:\b\v$)'; |
8782bef2 GB |
493 | print "ok $test\n"; |
494 | $test++; | |
495 | ||
cc74c5bd | 496 | print "not " unless qr/\b\v$/xism eq '(?msix:\b\v$)'; |
8782bef2 GB |
497 | print "ok $test\n"; |
498 | $test++; | |
499 | ||
cc74c5bd | 500 | print "not " unless qr/\b\v$/ eq '(?-xism:\b\v$)'; |
97197631 IZ |
501 | print "ok $test\n"; |
502 | $test++; | |
503 | ||
7e5428c5 IZ |
504 | $_ = 'xabcx'; |
505 | foreach $ans ('', 'c') { | |
506 | /(?<=(?=a)..)((?=c)|.)/g; | |
02db2b7b | 507 | print "# \$1 ='$1'\n# \$ans='$ans'\nnot " unless $1 eq $ans; |
7e5428c5 IZ |
508 | print "ok $test\n"; |
509 | $test++; | |
510 | } | |
511 | ||
512 | $_ = 'a'; | |
513 | foreach $ans ('', 'a', '') { | |
514 | /^|a|$/g; | |
02db2b7b | 515 | print "# \$& ='$&'\n# \$ans='$ans'\nnot " unless $& eq $ans; |
7e5428c5 IZ |
516 | print "ok $test\n"; |
517 | $test++; | |
518 | } | |
519 | ||
09f25ae4 | 520 | sub prefixify { |
73d6d589 NIS |
521 | my($v,$a,$b,$res) = @_; |
522 | $v =~ s/\Q$a\E/$b/; | |
523 | print "not " unless $res eq $v; | |
09f25ae4 IZ |
524 | print "ok $test\n"; |
525 | $test++; | |
526 | } | |
527 | prefixify('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); | |
528 | prefixify('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); | |
529 | ||
530 | $_ = 'var="foo"'; | |
531 | /(\")/; | |
532 | print "not " unless $1 and /$1/; | |
533 | print "ok $test\n"; | |
534 | $test++; | |
535 | ||
73d6d589 | 536 | $a=qr/(?{++$b})/; |
2cd61cdb | 537 | $b = 7; |
73d6d589 NIS |
538 | /$a$a/; |
539 | print "not " unless $b eq '9'; | |
2cd61cdb IZ |
540 | print "ok $test\n"; |
541 | $test++; | |
542 | ||
73d6d589 NIS |
543 | $c="$a"; |
544 | /$a$a/; | |
545 | print "not " unless $b eq '11'; | |
2cd61cdb IZ |
546 | print "ok $test\n"; |
547 | $test++; | |
548 | ||
549 | { | |
73d6d589 NIS |
550 | use re "eval"; |
551 | /$a$c$a/; | |
552 | print "not " unless $b eq '14'; | |
2cd61cdb IZ |
553 | print "ok $test\n"; |
554 | $test++; | |
555 | ||
160cb429 JH |
556 | local $lex_a = 2; |
557 | my $lex_a = 43; | |
558 | my $lex_b = 17; | |
559 | my $lex_c = 27; | |
560 | my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); | |
561 | print "not " unless $lex_res eq '1'; | |
562 | print "ok $test\n"; | |
563 | $test++; | |
564 | print "not " unless $lex_a eq '44'; | |
565 | print "ok $test\n"; | |
566 | $test++; | |
567 | print "not " unless $lex_c eq '43'; | |
568 | print "ok $test\n"; | |
569 | $test++; | |
570 | ||
571 | ||
73d6d589 | 572 | no re "eval"; |
2cd61cdb | 573 | $match = eval { /$a$c$a/ }; |
73d6d589 | 574 | print "not " |
2cd61cdb IZ |
575 | unless $b eq '14' and $@ =~ /Eval-group not allowed/ and not $match; |
576 | print "ok $test\n"; | |
577 | $test++; | |
578 | } | |
cbce877f IZ |
579 | |
580 | { | |
160cb429 JH |
581 | local $lex_a = 2; |
582 | my $lex_a = 43; | |
583 | my $lex_b = 17; | |
584 | my $lex_c = 27; | |
585 | my $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); | |
586 | print "not " unless $lex_res eq '1'; | |
587 | print "ok $test\n"; | |
588 | $test++; | |
589 | print "not " unless $lex_a eq '44'; | |
590 | print "ok $test\n"; | |
591 | $test++; | |
592 | print "not " unless $lex_c eq '43'; | |
593 | print "ok $test\n"; | |
594 | $test++; | |
595 | } | |
596 | ||
597 | { | |
cbce877f IZ |
598 | package aa; |
599 | $c = 2; | |
600 | $::c = 3; | |
601 | '' =~ /(?{ $c = 4 })/; | |
602 | print "not " unless $c == 4; | |
603 | } | |
604 | print "ok $test\n"; | |
605 | $test++; | |
606 | print "not " unless $c == 3; | |
607 | print "ok $test\n"; | |
73d6d589 NIS |
608 | $test++; |
609 | ||
4599a1de JH |
610 | sub must_warn_pat { |
611 | my $warn_pat = shift; | |
612 | return sub { print "not " unless $_[0] =~ /$warn_pat/ } | |
613 | } | |
614 | ||
615 | sub must_warn { | |
616 | my ($warn_pat, $code) = @_; | |
9f1b1f2d GS |
617 | local %SIG; |
618 | eval 'BEGIN { use warnings; $SIG{__WARN__} = $warn_pat };' . $code; | |
4599a1de JH |
619 | print "ok $test\n"; |
620 | $test++; | |
621 | } | |
622 | ||
623 | ||
624 | sub make_must_warn { | |
625 | my $warn_pat = shift; | |
626 | return sub { must_warn(must_warn_pat($warn_pat)) } | |
627 | } | |
628 | ||
629 | my $for_future = make_must_warn('reserved for future extensions'); | |
630 | ||
631 | &$for_future('q(a:[b]:) =~ /[x[:foo:]]/'); | |
9baa0206 HS |
632 | |
633 | #&$for_future('q(a=[b]=) =~ /[x[=foo=]]/'); | |
634 | print "ok $test\n"; $test++; # now a fatal croak | |
635 | ||
636 | #&$for_future('q(a.[b].) =~ /[x[.foo.]]/'); | |
637 | print "ok $test\n"; $test++; # now a fatal croak | |
f7e33566 GS |
638 | |
639 | # test if failure of patterns returns empty list | |
640 | $_ = 'aaa'; | |
641 | @_ = /bbb/; | |
642 | print "not " if @_; | |
643 | print "ok $test\n"; | |
644 | $test++; | |
645 | ||
646 | @_ = /bbb/g; | |
647 | print "not " if @_; | |
648 | print "ok $test\n"; | |
649 | $test++; | |
650 | ||
651 | @_ = /(bbb)/; | |
652 | print "not " if @_; | |
653 | print "ok $test\n"; | |
654 | $test++; | |
655 | ||
656 | @_ = /(bbb)/g; | |
657 | print "not " if @_; | |
658 | print "ok $test\n"; | |
659 | $test++; | |
660 | ||
6cef1e77 IZ |
661 | /a(?=.$)/; |
662 | print "not " if $#+ != 0 or $#- != 0; | |
663 | print "ok $test\n"; | |
664 | $test++; | |
665 | ||
666 | print "not " if $+[0] != 2 or $-[0] != 1; | |
667 | print "ok $test\n"; | |
668 | $test++; | |
669 | ||
73d6d589 | 670 | print "not " |
6cef1e77 IZ |
671 | if defined $+[1] or defined $-[1] or defined $+[2] or defined $-[2]; |
672 | print "ok $test\n"; | |
673 | $test++; | |
674 | ||
675 | /a(a)(a)/; | |
676 | print "not " if $#+ != 2 or $#- != 2; | |
677 | print "ok $test\n"; | |
678 | $test++; | |
679 | ||
680 | print "not " if $+[0] != 3 or $-[0] != 0; | |
681 | print "ok $test\n"; | |
682 | $test++; | |
683 | ||
684 | print "not " if $+[1] != 2 or $-[1] != 1; | |
685 | print "ok $test\n"; | |
686 | $test++; | |
687 | ||
688 | print "not " if $+[2] != 3 or $-[2] != 2; | |
689 | print "ok $test\n"; | |
690 | $test++; | |
691 | ||
73d6d589 | 692 | print "not " |
6cef1e77 IZ |
693 | if defined $+[3] or defined $-[3] or defined $+[4] or defined $-[4]; |
694 | print "ok $test\n"; | |
695 | $test++; | |
696 | ||
697 | /.(a)(b)?(a)/; | |
698 | print "not " if $#+ != 3 or $#- != 3; | |
699 | print "ok $test\n"; | |
700 | $test++; | |
701 | ||
702 | print "not " if $+[0] != 3 or $-[0] != 0; | |
703 | print "ok $test\n"; | |
704 | $test++; | |
705 | ||
706 | print "not " if $+[1] != 2 or $-[1] != 1; | |
707 | print "ok $test\n"; | |
708 | $test++; | |
709 | ||
710 | print "not " if $+[3] != 3 or $-[3] != 2; | |
711 | print "ok $test\n"; | |
712 | $test++; | |
713 | ||
73d6d589 | 714 | print "not " |
6cef1e77 IZ |
715 | if defined $+[2] or defined $-[2] or defined $+[4] or defined $-[4]; |
716 | print "ok $test\n"; | |
717 | $test++; | |
718 | ||
719 | /.(a)/; | |
720 | print "not " if $#+ != 1 or $#- != 1; | |
721 | print "ok $test\n"; | |
722 | $test++; | |
723 | ||
724 | print "not " if $+[0] != 2 or $-[0] != 0; | |
725 | print "ok $test\n"; | |
726 | $test++; | |
727 | ||
728 | print "not " if $+[1] != 2 or $-[1] != 1; | |
729 | print "ok $test\n"; | |
730 | $test++; | |
731 | ||
73d6d589 | 732 | print "not " |
6cef1e77 IZ |
733 | if defined $+[2] or defined $-[2] or defined $+[3] or defined $-[3]; |
734 | print "ok $test\n"; | |
735 | $test++; | |
736 | ||
03a27ae7 | 737 | eval { $+[0] = 13; }; |
73d6d589 | 738 | print "not " |
03a27ae7 MG |
739 | if $@ !~ /^Modification of a read-only value attempted/; |
740 | print "ok $test\n"; | |
741 | $test++; | |
742 | ||
743 | eval { $-[0] = 13; }; | |
73d6d589 | 744 | print "not " |
03a27ae7 MG |
745 | if $@ !~ /^Modification of a read-only value attempted/; |
746 | print "ok $test\n"; | |
747 | $test++; | |
748 | ||
749 | eval { @+ = (7, 6, 5); }; | |
73d6d589 | 750 | print "not " |
03a27ae7 MG |
751 | if $@ !~ /^Modification of a read-only value attempted/; |
752 | print "ok $test\n"; | |
753 | $test++; | |
754 | ||
755 | eval { @- = qw(foo bar); }; | |
73d6d589 | 756 | print "not " |
03a27ae7 MG |
757 | if $@ !~ /^Modification of a read-only value attempted/; |
758 | print "ok $test\n"; | |
759 | $test++; | |
760 | ||
8f580fb8 IZ |
761 | /.(a)(ba*)?/; |
762 | print "#$#-..$#+\nnot " if $#+ != 2 or $#- != 1; | |
763 | print "ok $test\n"; | |
764 | $test++; | |
765 | ||
ad94a511 IZ |
766 | $_ = 'aaa'; |
767 | pos = 1; | |
768 | @a = /\Ga/g; | |
769 | print "not " unless "@a" eq "a a"; | |
770 | print "ok $test\n"; | |
771 | $test++; | |
772 | ||
22e551b9 IZ |
773 | $str = 'abcde'; |
774 | pos $str = 2; | |
775 | ||
776 | print "not " if $str =~ /^\G/; | |
777 | print "ok $test\n"; | |
778 | $test++; | |
779 | ||
780 | print "not " if $str =~ /^.\G/; | |
781 | print "ok $test\n"; | |
782 | $test++; | |
783 | ||
784 | print "not " unless $str =~ /^..\G/; | |
785 | print "ok $test\n"; | |
786 | $test++; | |
787 | ||
788 | print "not " if $str =~ /^...\G/; | |
789 | print "ok $test\n"; | |
790 | $test++; | |
791 | ||
792 | print "not " unless $str =~ /.\G./ and $& eq 'bc'; | |
793 | print "ok $test\n"; | |
794 | $test++; | |
795 | ||
796 | print "not " unless $str =~ /\G../ and $& eq 'cd'; | |
797 | print "ok $test\n"; | |
798 | $test++; | |
799 | ||
9661b544 IZ |
800 | undef $foo; undef $bar; |
801 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 802 | unless $str =~ /b(?{$foo = $_; $bar = pos})c/ |
9661b544 IZ |
803 | and $foo eq 'abcde' and $bar eq 2; |
804 | print "ok $test\n"; | |
805 | $test++; | |
806 | ||
807 | undef $foo; undef $bar; | |
808 | pos $str = undef; | |
809 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 810 | unless $str =~ /b(?{$foo = $_; $bar = pos})c/g |
9661b544 IZ |
811 | and $foo eq 'abcde' and $bar eq 2 and pos $str eq 3; |
812 | print "ok $test\n"; | |
813 | $test++; | |
814 | ||
815 | $_ = $str; | |
816 | ||
817 | undef $foo; undef $bar; | |
818 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 819 | unless /b(?{$foo = $_; $bar = pos})c/ |
9661b544 IZ |
820 | and $foo eq 'abcde' and $bar eq 2; |
821 | print "ok $test\n"; | |
822 | $test++; | |
823 | ||
824 | undef $foo; undef $bar; | |
825 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 826 | unless /b(?{$foo = $_; $bar = pos})c/g |
9661b544 IZ |
827 | and $foo eq 'abcde' and $bar eq 2 and pos eq 3; |
828 | print "ok $test\n"; | |
829 | $test++; | |
830 | ||
831 | undef $foo; undef $bar; | |
832 | pos = undef; | |
833 | 1 while /b(?{$foo = $_; $bar = pos})c/g; | |
834 | print "#'$str','$foo','$bar'\nnot " | |
835 | unless $foo eq 'abcde' and $bar eq 2 and not defined pos; | |
836 | print "ok $test\n"; | |
837 | $test++; | |
838 | ||
839 | undef $foo; undef $bar; | |
840 | $_ = 'abcde|abcde'; | |
841 | print "#'$str','$foo','$bar','$_'\nnot " | |
73d6d589 | 842 | unless s/b(?{$foo = $_; $bar = pos})c/x/g and $foo eq 'abcde|abcde' |
9661b544 IZ |
843 | and $bar eq 8 and $_ eq 'axde|axde'; |
844 | print "ok $test\n"; | |
845 | $test++; | |
846 | ||
5c5e4c24 IZ |
847 | @res = (); |
848 | # List context: | |
849 | $_ = 'abcde|abcde'; | |
850 | @dummy = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; | |
851 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
852 | $res = "@res"; | |
853 | print "#'@res' '$_'\nnot " | |
854 | unless "@res" eq "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'"; | |
855 | print "ok $test\n"; | |
856 | $test++; | |
857 | ||
858 | @res = (); | |
859 | @dummy = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; | |
860 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
861 | $res = "@res"; | |
862 | print "#'@res' '$_'\nnot " | |
863 | unless "@res" eq | |
864 | "'' 'ab' 'cde|abcde' " . | |
865 | "'' 'abc' 'de|abcde' " . | |
866 | "'abcd' 'e|' 'abcde' " . | |
867 | "'abcde|' 'ab' 'cde' " . | |
868 | "'abcde|' 'abc' 'de'" ; | |
869 | print "ok $test\n"; | |
870 | $test++; | |
871 | ||
b7a35066 IZ |
872 | #Some more \G anchor checks |
873 | $foo='aabbccddeeffgg'; | |
874 | ||
875 | pos($foo)=1; | |
876 | ||
877 | $foo=~/.\G(..)/g; | |
de734bd5 | 878 | iseq($1,'ab'); |
b7a35066 IZ |
879 | |
880 | pos($foo) += 1; | |
881 | $foo=~/.\G(..)/g; | |
882 | print "not " unless($1 eq 'cc'); | |
883 | print "ok $test\n"; | |
884 | $test++; | |
885 | ||
886 | pos($foo) += 1; | |
887 | $foo=~/.\G(..)/g; | |
888 | print "not " unless($1 eq 'de'); | |
889 | print "ok $test\n"; | |
890 | $test++; | |
891 | ||
0ef3e39e HS |
892 | print "not " unless $foo =~ /\Gef/g; |
893 | print "ok $test\n"; | |
894 | $test++; | |
895 | ||
b7a35066 IZ |
896 | undef pos $foo; |
897 | ||
898 | $foo=~/\G(..)/g; | |
899 | print "not " unless($1 eq 'aa'); | |
900 | print "ok $test\n"; | |
901 | $test++; | |
902 | ||
903 | $foo=~/\G(..)/g; | |
904 | print "not " unless($1 eq 'bb'); | |
905 | print "ok $test\n"; | |
906 | $test++; | |
907 | ||
908 | pos($foo)=5; | |
909 | $foo=~/\G(..)/g; | |
910 | print "not " unless($1 eq 'cd'); | |
911 | print "ok $test\n"; | |
912 | $test++; | |
913 | ||
73d6d589 | 914 | $_='123x123'; |
e60df1fa IZ |
915 | @res = /(\d*|x)/g; |
916 | print "not " unless('123||x|123|' eq join '|', @res); | |
917 | print "ok $test\n"; | |
918 | $test++; | |
919 | ||
9d080a66 GS |
920 | # see if matching against temporaries (created via pp_helem()) is safe |
921 | { foo => "ok $test\n".$^X }->{foo} =~ /^(.*)\n/g; | |
922 | print "$1\n"; | |
923 | $test++; | |
924 | ||
cf93c79d IZ |
925 | # See if $i work inside (?{}) in the presense of saved substrings and |
926 | # changing $_ | |
927 | @a = qw(foo bar); | |
928 | @b = (); | |
929 | s/(\w)(?{push @b, $1})/,$1,/g for @a; | |
930 | ||
931 | print "# \@b='@b', expect 'f o o b a r'\nnot " unless("@b" eq "f o o b a r"); | |
932 | print "ok $test\n"; | |
933 | $test++; | |
934 | ||
935 | print "not " unless("@a" eq ",f,,o,,o, ,b,,a,,r,"); | |
936 | print "ok $test\n"; | |
937 | $test++; | |
938 | ||
2c914db6 | 939 | $brackets = qr{ |
14455d6c | 940 | { (?> [^{}]+ | (??{ $brackets }) )* } |
2c914db6 IZ |
941 | }x; |
942 | ||
943 | "{{}" =~ $brackets; | |
944 | print "ok $test\n"; # Did we survive? | |
945 | $test++; | |
946 | ||
947 | "something { long { and } hairy" =~ $brackets; | |
948 | print "ok $test\n"; # Did we survive? | |
949 | $test++; | |
950 | ||
14455d6c | 951 | "something { long { and } hairy" =~ m/((??{ $brackets }))/; |
2c914db6 IZ |
952 | print "not " unless $1 eq "{ and }"; |
953 | print "ok $test\n"; | |
954 | $test++; | |
955 | ||
30944b6d IZ |
956 | $_ = "a-a\nxbb"; |
957 | pos=1; | |
958 | m/^-.*bb/mg and print "not "; | |
959 | print "ok $test\n"; | |
960 | $test++; | |
30382c73 IZ |
961 | |
962 | $text = "aaXbXcc"; | |
963 | pos($text)=0; | |
964 | $text =~ /\GXb*X/g and print 'not '; | |
965 | print "ok $test\n"; | |
966 | $test++; | |
3cf5c195 IZ |
967 | |
968 | $text = "xA\n" x 500; | |
969 | $text =~ /^\s*A/m and print 'not '; | |
970 | print "ok $test\n"; | |
971 | $test++; | |
d506a20d IZ |
972 | |
973 | $text = "abc dbf"; | |
974 | @res = ($text =~ /.*?(b).*?\b/g); | |
975 | "@res" eq 'b b' or print 'not '; | |
976 | print "ok $test\n"; | |
977 | $test++; | |
978 | ||
9442cb0e | 979 | @a = map chr,0..255; |
aeaf5620 GS |
980 | |
981 | @b = grep(/\S/,@a); | |
982 | @c = grep(/[^\s]/,@a); | |
983 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
984 | print "ok $test\n"; |
985 | $test++; | |
986 | ||
aeaf5620 GS |
987 | @b = grep(/\S/,@a); |
988 | @c = grep(/[\S]/,@a); | |
989 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
990 | print "ok $test\n"; |
991 | $test++; | |
992 | ||
aeaf5620 GS |
993 | @b = grep(/\s/,@a); |
994 | @c = grep(/[^\S]/,@a); | |
995 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
996 | print "ok $test\n"; |
997 | $test++; | |
998 | ||
aeaf5620 GS |
999 | @b = grep(/\s/,@a); |
1000 | @c = grep(/[\s]/,@a); | |
1001 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1002 | print "ok $test\n"; |
1003 | $test++; | |
1004 | ||
aeaf5620 GS |
1005 | @b = grep(/\D/,@a); |
1006 | @c = grep(/[^\d]/,@a); | |
1007 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1008 | print "ok $test\n"; |
1009 | $test++; | |
1010 | ||
aeaf5620 GS |
1011 | @b = grep(/\D/,@a); |
1012 | @c = grep(/[\D]/,@a); | |
1013 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1014 | print "ok $test\n"; |
1015 | $test++; | |
1016 | ||
aeaf5620 GS |
1017 | @b = grep(/\d/,@a); |
1018 | @c = grep(/[^\D]/,@a); | |
1019 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1020 | print "ok $test\n"; |
1021 | $test++; | |
1022 | ||
aeaf5620 GS |
1023 | @b = grep(/\d/,@a); |
1024 | @c = grep(/[\d]/,@a); | |
1025 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1026 | print "ok $test\n"; |
1027 | $test++; | |
1028 | ||
aeaf5620 GS |
1029 | @b = grep(/\W/,@a); |
1030 | @c = grep(/[^\w]/,@a); | |
1031 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1032 | print "ok $test\n"; |
1033 | $test++; | |
1034 | ||
aeaf5620 GS |
1035 | @b = grep(/\W/,@a); |
1036 | @c = grep(/[\W]/,@a); | |
1037 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1038 | print "ok $test\n"; |
1039 | $test++; | |
1040 | ||
aeaf5620 GS |
1041 | @b = grep(/\w/,@a); |
1042 | @c = grep(/[^\W]/,@a); | |
1043 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1044 | print "ok $test\n"; |
1045 | $test++; | |
1046 | ||
aeaf5620 GS |
1047 | @b = grep(/\w/,@a); |
1048 | @c = grep(/[\w]/,@a); | |
de734bd5 | 1049 | iseq("@b","@c"); |
1aeab75a GS |
1050 | |
1051 | # see if backtracking optimization works correctly | |
1052 | "\n\n" =~ /\n $ \n/x or print "not "; | |
1053 | print "ok $test\n"; | |
1054 | $test++; | |
1055 | ||
1056 | "\n\n" =~ /\n* $ \n/x or print "not "; | |
1057 | print "ok $test\n"; | |
1058 | $test++; | |
1059 | ||
1060 | "\n\n" =~ /\n+ $ \n/x or print "not "; | |
1061 | print "ok $test\n"; | |
1062 | $test++; | |
05b4157f GS |
1063 | |
1064 | [] =~ /^ARRAY/ or print "# [] \nnot "; | |
1065 | print "ok $test\n"; | |
1066 | $test++; | |
1067 | ||
1068 | eval << 'EOE'; | |
1069 | { | |
1070 | package S; | |
1071 | use overload '""' => sub { 'Object S' }; | |
1072 | sub new { bless [] } | |
1073 | } | |
1074 | $a = 'S'->new; | |
1075 | EOE | |
1076 | ||
1077 | $a and $a =~ /^Object\sS/ or print "# '$a' \nnot "; | |
1078 | print "ok $test\n"; | |
1079 | $test++; | |
815d35b9 MG |
1080 | |
1081 | # test result of match used as match (!) | |
1082 | 'a1b' =~ ('xyz' =~ /y/) and $` eq 'a' or print "not "; | |
1083 | print "ok $test\n"; | |
1084 | $test++; | |
1085 | ||
1086 | 'a1b' =~ ('xyz' =~ /t/) and $` eq 'a' or print "not "; | |
1087 | print "ok $test\n"; | |
1088 | $test++; | |
5e39e1e5 HS |
1089 | |
1090 | $w = 0; | |
1091 | { | |
1092 | local $SIG{__WARN__} = sub { $w = 1 }; | |
1093 | local $^W = 1; | |
1094 | $w = 1 if ("1\n" x 102) =~ /^\s*\n/m; | |
1095 | } | |
1096 | print $w ? "not " : "", "ok $test\n"; | |
1097 | $test++; | |
aaa51d5e JF |
1098 | |
1099 | my %space = ( spc => " ", | |
1100 | tab => "\t", | |
1101 | cr => "\r", | |
1102 | lf => "\n", | |
1103 | ff => "\f", | |
75369ccb JH |
1104 | # There's no \v but the vertical tabulator seems miraculously |
1105 | # be 11 both in ASCII and EBCDIC. | |
aaa51d5e JF |
1106 | vt => chr(11), |
1107 | false => "space" ); | |
1108 | ||
1109 | my @space0 = sort grep { $space{$_} =~ /\s/ } keys %space; | |
1110 | my @space1 = sort grep { $space{$_} =~ /[[:space:]]/ } keys %space; | |
1111 | my @space2 = sort grep { $space{$_} =~ /[[:blank:]]/ } keys %space; | |
1112 | ||
1113 | print "not " unless "@space0" eq "cr ff lf spc tab"; | |
3bec3564 | 1114 | print "ok $test # @space0\n"; |
aaa51d5e JF |
1115 | $test++; |
1116 | ||
1117 | print "not " unless "@space1" eq "cr ff lf spc tab vt"; | |
3bec3564 | 1118 | print "ok $test # @space1\n"; |
aaa51d5e JF |
1119 | $test++; |
1120 | ||
1121 | print "not " unless "@space2" eq "spc tab"; | |
3bec3564 | 1122 | print "ok $test # @space2\n"; |
aaa51d5e | 1123 | $test++; |
73d6d589 | 1124 | |
a1933d95 HS |
1125 | # bugid 20001021.005 - this caused a SEGV |
1126 | print "not " unless undef =~ /^([^\/]*)(.*)$/; | |
1127 | print "ok $test\n"; | |
1128 | $test++; | |
b91bb191 JH |
1129 | |
1130 | # bugid 20000731.001 | |
1131 | ||
1132 | print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/; | |
1133 | print "ok $test\n"; | |
1134 | $test++; | |
1135 | ||
5ae032e5 JH |
1136 | my $ordA = ord('A'); |
1137 | ||
3baa4c62 JH |
1138 | $_ = "a\x{100}b"; |
1139 | if (/(.)(\C)(\C)(.)/) { | |
1140 | print "ok 232\n"; | |
1141 | if ($1 eq "a") { | |
1142 | print "ok 233\n"; | |
1143 | } else { | |
1144 | print "not ok 233\n"; | |
1145 | } | |
5ae032e5 JH |
1146 | if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8 |
1147 | if ($2 eq "\xC4") { | |
1148 | print "ok 234\n"; | |
1149 | } else { | |
1150 | print "not ok 234\n"; | |
1151 | } | |
1152 | if ($3 eq "\x80") { | |
1153 | print "ok 235\n"; | |
1154 | } else { | |
1155 | print "not ok 235\n"; | |
1156 | } | |
1157 | } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC | |
1158 | if ($2 eq "\x8C") { | |
1159 | print "ok 234\n"; | |
1160 | } else { | |
1161 | print "not ok 234\n"; | |
1162 | } | |
1163 | if ($3 eq "\x41") { | |
1164 | print "ok 235\n"; | |
1165 | } else { | |
1166 | print "not ok 235\n"; | |
1167 | } | |
3baa4c62 | 1168 | } else { |
5ae032e5 JH |
1169 | for (234..235) { |
1170 | print "not ok $_ # ord('A') == $ordA\n"; | |
1171 | } | |
3baa4c62 JH |
1172 | } |
1173 | if ($4 eq "b") { | |
1174 | print "ok 236\n"; | |
1175 | } else { | |
1176 | print "not ok 236\n"; | |
1177 | } | |
1178 | } else { | |
1179 | for (232..236) { | |
1180 | print "not ok $_\n"; | |
1181 | } | |
1182 | } | |
1183 | $_ = "\x{100}"; | |
1184 | if (/(\C)/g) { | |
1185 | print "ok 237\n"; | |
73d6d589 | 1186 | # currently \C are still tagged as UTF-8 |
5ae032e5 JH |
1187 | if ($ordA == 65) { |
1188 | if ($1 eq "\xC4") { | |
1189 | print "ok 238\n"; | |
1190 | } else { | |
1191 | print "not ok 238\n"; | |
1192 | } | |
1193 | } elsif ($ordA == 193) { | |
1194 | if ($1 eq "\x8C") { | |
1195 | print "ok 238\n"; | |
1196 | } else { | |
1197 | print "not ok 238\n"; | |
1198 | } | |
3baa4c62 | 1199 | } else { |
5ae032e5 | 1200 | print "not ok 238 # ord('A') == $ordA\n"; |
3baa4c62 JH |
1201 | } |
1202 | } else { | |
1203 | for (237..238) { | |
1204 | print "not ok $_\n"; | |
1205 | } | |
1206 | } | |
1207 | if (/(\C)/g) { | |
1208 | print "ok 239\n"; | |
73d6d589 | 1209 | # currently \C are still tagged as UTF-8 |
5ae032e5 JH |
1210 | if ($ordA == 65) { |
1211 | if ($1 eq "\x80") { | |
1212 | print "ok 240\n"; | |
1213 | } else { | |
1214 | print "not ok 240\n"; | |
1215 | } | |
1216 | } elsif ($ordA == 193) { | |
1217 | if ($1 eq "\x41") { | |
1218 | print "ok 240\n"; | |
1219 | } else { | |
1220 | print "not ok 240\n"; | |
1221 | } | |
3baa4c62 | 1222 | } else { |
5ae032e5 | 1223 | print "not ok 240 # ord('A') == $ordA\n"; |
3baa4c62 JH |
1224 | } |
1225 | } else { | |
1226 | for (239..240) { | |
1227 | print "not ok $_\n"; | |
1228 | } | |
1229 | } | |
b485d051 | 1230 | |
db615365 JP |
1231 | { |
1232 | # japhy -- added 03/03/2001 | |
1233 | () = (my $str = "abc") =~ /(...)/; | |
1234 | $str = "def"; | |
1235 | print "not " if $1 ne "abc"; | |
fd291da9 JH |
1236 | print "ok 241\n"; |
1237 | } | |
1238 | ||
1239 | # The 242 and 243 go with the 244 and 245. | |
1240 | # The trick is that in EBCDIC the explicit numeric range should match | |
1241 | # (as also in non-EBCDIC) but the explicit alphabetic range should not match. | |
1242 | ||
1243 | if ("\x8e" =~ /[\x89-\x91]/) { | |
1244 | print "ok 242\n"; | |
1245 | } else { | |
1246 | print "not ok 242\n"; | |
1247 | } | |
1248 | ||
1249 | if ("\xce" =~ /[\xc9-\xd1]/) { | |
db615365 | 1250 | print "ok 243\n"; |
fd291da9 JH |
1251 | } else { |
1252 | print "not ok 243\n"; | |
1253 | } | |
1254 | ||
1255 | # In most places these tests would succeed since \x8e does not | |
1256 | # in most character sets match 'i' or 'j' nor would \xce match | |
1257 | # 'I' or 'J', but strictly speaking these tests are here for | |
1258 | # the good of EBCDIC, so let's test these only there. | |
1259 | if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC | |
1260 | if ("\x8e" !~ /[i-j]/) { | |
1261 | print "ok 244\n"; | |
1262 | } else { | |
1263 | print "not ok 244\n"; | |
1264 | } | |
1265 | if ("\xce" !~ /[I-J]/) { | |
1266 | print "ok 245\n"; | |
1267 | } else { | |
1268 | print "not ok 245\n"; | |
1269 | } | |
1270 | } else { | |
1271 | for (244..245) { | |
60425c38 | 1272 | print "ok $_ # Skip: only in EBCDIC\n"; |
fd291da9 | 1273 | } |
db615365 | 1274 | } |
4765795a JH |
1275 | |
1276 | print "not " unless "\x{ab}" =~ /\x{ab}/; | |
1277 | print "ok 246\n"; | |
1278 | ||
1279 | print "not " unless "\x{abcd}" =~ /\x{abcd}/; | |
1280 | print "ok 247\n"; | |
1281 | ||
1282 | { | |
f9969324 JH |
1283 | # bug id 20001008.001 |
1284 | ||
fc8cd66c | 1285 | $test = 248; |
f9969324 JH |
1286 | my @x = ("stra\337e 138","stra\337e 138"); |
1287 | for (@x) { | |
1288 | s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; | |
1289 | my($latin) = /^(.+)(?:\s+\d)/; | |
1290 | print $latin eq "stra\337e" ? "ok $test\n" : # 248,249 | |
1291 | "#latin[$latin]\nnot ok $test\n"; | |
1292 | $test++; | |
1293 | $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
1294 | use utf8; # needed for the raw UTF-8 | |
1295 | $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a | |
4765795a JH |
1296 | } |
1297 | } | |
1298 | ||
1299 | { | |
1300 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1301 | print "ok 250\n"; | |
1302 | ||
1303 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1304 | print "ok 251\n"; | |
1305 | ||
1306 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1307 | print "ok 252\n"; | |
1308 | ||
1309 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1310 | print "ok 253\n"; | |
1311 | ||
1312 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1313 | print "ok 254\n"; | |
1314 | ||
1315 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1316 | print "ok 255\n"; | |
1317 | ||
1318 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1319 | print "ok 256\n"; | |
1320 | ||
1321 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1322 | print "ok 257\n"; | |
1323 | } | |
1324 | ||
1325 | { | |
1326 | # the first half of 20001028.003 | |
1327 | ||
1328 | my $X = chr(1448); | |
1329 | my ($Y) = $X =~ /(.*)/; | |
1330 | print "not " unless $Y eq v1448 && length($Y) == 1; | |
1331 | print "ok 258\n"; | |
1332 | } | |
1333 | ||
1334 | { | |
1335 | # 20001108.001 | |
1336 | ||
1337 | my $X = "Szab\x{f3},Bal\x{e1}zs"; | |
1338 | my $Y = $X; | |
1339 | $Y =~ s/(B)/$1/ for 0..3; | |
1340 | print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs"; | |
1341 | print "ok 259\n"; | |
1342 | } | |
1343 | ||
1344 | { | |
1345 | # the second half of 20001028.003 | |
1346 | ||
3568d838 | 1347 | my $X = ''; |
4765795a JH |
1348 | $X =~ s/^/chr(1488)/e; |
1349 | print "not " unless length $X == 1 && ord($X) == 1488; | |
1350 | print "ok 260\n"; | |
1351 | } | |
1352 | ||
1353 | { | |
1354 | # 20000517.001 | |
1355 | ||
1356 | my $x = "\x{100}A"; | |
1357 | ||
1358 | $x =~ s/A/B/; | |
1359 | ||
1360 | print "not " unless $x eq "\x{100}B" && length($x) == 2; | |
1361 | print "ok 261\n"; | |
1362 | } | |
1363 | ||
1364 | { | |
1365 | # bug id 20001230.002 | |
1366 | ||
1367 | print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; | |
1368 | print "ok 262\n"; | |
1369 | ||
1370 | print "not " unless "École" =~ /^\C\C(c)/; | |
1371 | print "ok 263\n"; | |
1372 | } | |
1373 | ||
8dcad4ad | 1374 | SKIP: { |
fc8cd66c | 1375 | $test = 264; # till 575 |
4765795a | 1376 | |
bb99e5f1 | 1377 | use charnames ":full"; |
4765795a JH |
1378 | |
1379 | # This is far from complete testing, there are dozens of character | |
1380 | # classes in Unicode. The mixing of literals and \N{...} is | |
1381 | # intentional so that in non-Latin-1 places we test the native | |
1382 | # characters, not the Unicode code points. | |
1383 | ||
1384 | my %s = ( | |
1385 | "a" => 'Ll', | |
1386 | "\N{CYRILLIC SMALL LETTER A}" => 'Ll', | |
1387 | "A" => 'Lu', | |
1388 | "\N{GREEK CAPITAL LETTER ALPHA}" => 'Lu', | |
1389 | "\N{HIRAGANA LETTER SMALL A}" => 'Lo', | |
1390 | "\N{COMBINING GRAVE ACCENT}" => 'Mn', | |
1391 | "0" => 'Nd', | |
1392 | "\N{ARABIC-INDIC DIGIT ZERO}" => 'Nd', | |
1393 | "_" => 'N', | |
1394 | "!" => 'P', | |
1395 | " " => 'Zs', | |
1396 | "\0" => 'Cc', | |
1397 | ); | |
73d6d589 | 1398 | |
3568d838 JH |
1399 | for my $char (map { s/^\S+ //; $_ } |
1400 | sort map { sprintf("%06x", ord($_))." $_" } keys %s) { | |
4765795a | 1401 | my $class = $s{$char}; |
3568d838 JH |
1402 | my $code = sprintf("%06x", ord($char)); |
1403 | printf "#\n# 0x$code\n#\n"; | |
4765795a JH |
1404 | print "# IsAlpha\n"; |
1405 | if ($class =~ /^[LM]/) { | |
1406 | print "not " unless $char =~ /\p{IsAlpha}/; | |
1407 | print "ok $test\n"; $test++; | |
1408 | print "not " if $char =~ /\P{IsAlpha}/; | |
1409 | print "ok $test\n"; $test++; | |
1410 | } else { | |
1411 | print "not " if $char =~ /\p{IsAlpha}/; | |
1412 | print "ok $test\n"; $test++; | |
1413 | print "not " unless $char =~ /\P{IsAlpha}/; | |
1414 | print "ok $test\n"; $test++; | |
1415 | } | |
1416 | print "# IsAlnum\n"; | |
1417 | if ($class =~ /^[LMN]/ && $char ne "_") { | |
1418 | print "not " unless $char =~ /\p{IsAlnum}/; | |
1419 | print "ok $test\n"; $test++; | |
1420 | print "not " if $char =~ /\P{IsAlnum}/; | |
1421 | print "ok $test\n"; $test++; | |
1422 | } else { | |
1423 | print "not " if $char =~ /\p{IsAlnum}/; | |
1424 | print "ok $test\n"; $test++; | |
1425 | print "not " unless $char =~ /\P{IsAlnum}/; | |
1426 | print "ok $test\n"; $test++; | |
1427 | } | |
1428 | print "# IsASCII\n"; | |
24817587 JH |
1429 | if (ord("A") == 193) { |
1430 | print "ok $test # Skip: in EBCDIC\n"; $test++; | |
1431 | print "ok $test # Skip: in EBCDIC\n"; $test++; | |
4765795a | 1432 | } else { |
24817587 JH |
1433 | if ($code le '00007f') { |
1434 | print "not " unless $char =~ /\p{IsASCII}/; | |
1435 | print "ok $test\n"; $test++; | |
1436 | print "not " if $char =~ /\P{IsASCII}/; | |
1437 | print "ok $test\n"; $test++; | |
1438 | } else { | |
1439 | print "not " if $char =~ /\p{IsASCII}/; | |
1440 | print "ok $test\n"; $test++; | |
1441 | print "not " unless $char =~ /\P{IsASCII}/; | |
1442 | print "ok $test\n"; $test++; | |
1443 | } | |
4765795a JH |
1444 | } |
1445 | print "# IsCntrl\n"; | |
1446 | if ($class =~ /^C/) { | |
1447 | print "not " unless $char =~ /\p{IsCntrl}/; | |
1448 | print "ok $test\n"; $test++; | |
1449 | print "not " if $char =~ /\P{IsCntrl}/; | |
1450 | print "ok $test\n"; $test++; | |
1451 | } else { | |
1452 | print "not " if $char =~ /\p{IsCntrl}/; | |
1453 | print "ok $test\n"; $test++; | |
1454 | print "not " unless $char =~ /\P{IsCntrl}/; | |
1455 | print "ok $test\n"; $test++; | |
1456 | } | |
1457 | print "# IsBlank\n"; | |
1458 | if ($class =~ /^Z[lp]/ || $char eq " ") { | |
1459 | print "not " unless $char =~ /\p{IsBlank}/; | |
1460 | print "ok $test\n"; $test++; | |
1461 | print "not " if $char =~ /\P{IsBlank}/; | |
1462 | print "ok $test\n"; $test++; | |
1463 | } else { | |
1464 | print "not " if $char =~ /\p{IsBlank}/; | |
1465 | print "ok $test\n"; $test++; | |
1466 | print "not " unless $char =~ /\P{IsBlank}/; | |
1467 | print "ok $test\n"; $test++; | |
1468 | } | |
1469 | print "# IsDigit\n"; | |
1470 | if ($class =~ /^Nd$/) { | |
1471 | print "not " unless $char =~ /\p{IsDigit}/; | |
1472 | print "ok $test\n"; $test++; | |
1473 | print "not " if $char =~ /\P{IsDigit}/; | |
1474 | print "ok $test\n"; $test++; | |
1475 | } else { | |
1476 | print "not " if $char =~ /\p{IsDigit}/; | |
1477 | print "ok $test\n"; $test++; | |
1478 | print "not " unless $char =~ /\P{IsDigit}/; | |
1479 | print "ok $test\n"; $test++; | |
1480 | } | |
1481 | print "# IsGraph\n"; | |
1482 | if ($class =~ /^([LMNPS])|Co/) { | |
1483 | print "not " unless $char =~ /\p{IsGraph}/; | |
1484 | print "ok $test\n"; $test++; | |
1485 | print "not " if $char =~ /\P{IsGraph}/; | |
1486 | print "ok $test\n"; $test++; | |
1487 | } else { | |
1488 | print "not " if $char =~ /\p{IsGraph}/; | |
1489 | print "ok $test\n"; $test++; | |
1490 | print "not " unless $char =~ /\P{IsGraph}/; | |
1491 | print "ok $test\n"; $test++; | |
1492 | } | |
1493 | print "# IsLower\n"; | |
1494 | if ($class =~ /^Ll$/) { | |
1495 | print "not " unless $char =~ /\p{IsLower}/; | |
1496 | print "ok $test\n"; $test++; | |
1497 | print "not " if $char =~ /\P{IsLower}/; | |
1498 | print "ok $test\n"; $test++; | |
1499 | } else { | |
1500 | print "not " if $char =~ /\p{IsLower}/; | |
1501 | print "ok $test\n"; $test++; | |
1502 | print "not " unless $char =~ /\P{IsLower}/; | |
1503 | print "ok $test\n"; $test++; | |
1504 | } | |
1505 | print "# IsPrint\n"; | |
1506 | if ($class =~ /^([LMNPS])|Co|Zs/) { | |
1507 | print "not " unless $char =~ /\p{IsPrint}/; | |
1508 | print "ok $test\n"; $test++; | |
1509 | print "not " if $char =~ /\P{IsPrint}/; | |
1510 | print "ok $test\n"; $test++; | |
1511 | } else { | |
1512 | print "not " if $char =~ /\p{IsPrint}/; | |
1513 | print "ok $test\n"; $test++; | |
1514 | print "not " unless $char =~ /\P{IsPrint}/; | |
1515 | print "ok $test\n"; $test++; | |
1516 | } | |
1517 | print "# IsPunct\n"; | |
1518 | if ($class =~ /^P/ || $char eq "_") { | |
1519 | print "not " unless $char =~ /\p{IsPunct}/; | |
1520 | print "ok $test\n"; $test++; | |
1521 | print "not " if $char =~ /\P{IsPunct}/; | |
1522 | print "ok $test\n"; $test++; | |
1523 | } else { | |
1524 | print "not " if $char =~ /\p{IsPunct}/; | |
1525 | print "ok $test\n"; $test++; | |
1526 | print "not " unless $char =~ /\P{IsPunct}/; | |
1527 | print "ok $test\n"; $test++; | |
1528 | } | |
1529 | print "# IsSpace\n"; | |
1530 | if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) { | |
1531 | print "not " unless $char =~ /\p{IsSpace}/; | |
1532 | print "ok $test\n"; $test++; | |
1533 | print "not " if $char =~ /\P{IsSpace}/; | |
1534 | print "ok $test\n"; $test++; | |
1535 | } else { | |
1536 | print "not " if $char =~ /\p{IsSpace}/; | |
1537 | print "ok $test\n"; $test++; | |
1538 | print "not " unless $char =~ /\P{IsSpace}/; | |
1539 | print "ok $test\n"; $test++; | |
1540 | } | |
1541 | print "# IsUpper\n"; | |
1542 | if ($class =~ /^L[ut]/) { | |
1543 | print "not " unless $char =~ /\p{IsUpper}/; | |
1544 | print "ok $test\n"; $test++; | |
1545 | print "not " if $char =~ /\P{IsUpper}/; | |
1546 | print "ok $test\n"; $test++; | |
1547 | } else { | |
1548 | print "not " if $char =~ /\p{IsUpper}/; | |
1549 | print "ok $test\n"; $test++; | |
1550 | print "not " unless $char =~ /\P{IsUpper}/; | |
1551 | print "ok $test\n"; $test++; | |
1552 | } | |
1553 | print "# IsWord\n"; | |
1554 | if ($class =~ /^[LMN]/ || $char eq "_") { | |
1555 | print "not " unless $char =~ /\p{IsWord}/; | |
1556 | print "ok $test\n"; $test++; | |
1557 | print "not " if $char =~ /\P{IsWord}/; | |
1558 | print "ok $test\n"; $test++; | |
1559 | } else { | |
1560 | print "not " if $char =~ /\p{IsWord}/; | |
1561 | print "ok $test\n"; $test++; | |
1562 | print "not " unless $char =~ /\P{IsWord}/; | |
1563 | print "ok $test\n"; $test++; | |
1564 | } | |
1565 | } | |
1566 | } | |
1567 | ||
1568 | { | |
1569 | $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; | |
1570 | ||
1571 | if (/(.\x{300})./) { | |
1572 | print "ok 576\n"; | |
1573 | ||
1574 | print "not " unless $` eq "abc\x{100}" && length($`) == 4; | |
73d6d589 | 1575 | print "ok 577\n"; |
4765795a JH |
1576 | |
1577 | print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3; | |
73d6d589 | 1578 | print "ok 578\n"; |
4765795a JH |
1579 | |
1580 | print "not " unless $' eq "\x{400}defg" && length($') == 5; | |
73d6d589 | 1581 | print "ok 579\n"; |
4765795a JH |
1582 | |
1583 | print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2; | |
73d6d589 | 1584 | print "ok 580\n"; |
a8a2fe91 JH |
1585 | } else { |
1586 | for (576..580) { print "not ok $_\n" } | |
4765795a JH |
1587 | } |
1588 | } | |
8269fa76 JH |
1589 | |
1590 | { | |
1591 | # bug id 20010306.008 | |
1592 | ||
1593 | $a = "a\x{1234}"; | |
1594 | # The original bug report had 'no utf8' here but that was irrelevant. | |
1595 | $a =~ m/\w/; # used to core dump | |
1596 | ||
1597 | print "ok 581\n"; | |
1598 | } | |
b8ef571c JH |
1599 | |
1600 | { | |
339e86bc JH |
1601 | $test = 582; |
1602 | ||
b8ef571c JH |
1603 | # bugid 20010410.006 |
1604 | for my $rx ( | |
1605 | '/(.*?)\{(.*?)\}/csg', | |
1606 | '/(.*?)\{(.*?)\}/cg', | |
1607 | '/(.*?)\{(.*?)\}/sg', | |
1608 | '/(.*?)\{(.*?)\}/g', | |
1609 | '/(.+?)\{(.+?)\}/csg', | |
1610 | ) | |
1611 | { | |
1612 | my($input, $i); | |
1613 | ||
1614 | $i = 0; | |
1615 | $input = "a{b}c{d}"; | |
1616 | eval <<EOT; | |
1617 | while (eval \$input =~ $rx) { | |
1618 | print "# \\\$1 = '\$1' \\\$2 = '\$2'\n"; | |
1619 | ++\$i; | |
1620 | } | |
1621 | EOT | |
1622 | print "not " unless $i == 2; | |
1623 | print "ok " . $test++ . "\n"; | |
1624 | } | |
1625 | } | |
209a9bc1 JH |
1626 | |
1627 | { | |
1628 | # from Robin Houston | |
1629 | ||
b851fbc1 | 1630 | my $x = "\x{10FFFD}"; |
209a9bc1 | 1631 | $x =~ s/(.)/$1/g; |
b851fbc1 | 1632 | print "not " unless ord($x) == 0x10FFFD && length($x) == 1; |
209a9bc1 JH |
1633 | print "ok 587\n"; |
1634 | } | |
3568d838 JH |
1635 | |
1636 | { | |
1637 | my $x = "\x7f"; | |
1638 | ||
1639 | print "not " if $x =~ /[\x80-\xff]/; | |
1640 | print "ok 588\n"; | |
1641 | ||
1642 | print "not " if $x =~ /[\x80-\x{100}]/; | |
1643 | print "ok 589\n"; | |
1644 | ||
1645 | print "not " if $x =~ /[\x{100}]/; | |
1646 | print "ok 590\n"; | |
1647 | ||
1648 | print "not " if $x =~ /\p{InLatin1Supplement}/; | |
1649 | print "ok 591\n"; | |
1650 | ||
1651 | print "not " unless $x =~ /\P{InLatin1Supplement}/; | |
1652 | print "ok 592\n"; | |
1653 | ||
1654 | print "not " if $x =~ /\p{InLatinExtendedA}/; | |
1655 | print "ok 593\n"; | |
1656 | ||
1657 | print "not " unless $x =~ /\P{InLatinExtendedA}/; | |
1658 | print "ok 594\n"; | |
1659 | } | |
1660 | ||
1661 | { | |
1662 | my $x = "\x80"; | |
1663 | ||
1664 | print "not " unless $x =~ /[\x80-\xff]/; | |
1665 | print "ok 595\n"; | |
1666 | ||
1667 | print "not " unless $x =~ /[\x80-\x{100}]/; | |
1668 | print "ok 596\n"; | |
1669 | ||
1670 | print "not " if $x =~ /[\x{100}]/; | |
1671 | print "ok 597\n"; | |
1672 | ||
1673 | print "not " unless $x =~ /\p{InLatin1Supplement}/; | |
1674 | print "ok 598\n"; | |
1675 | ||
1676 | print "not " if $x =~ /\P{InLatin1Supplement}/; | |
1677 | print "ok 599\n"; | |
1678 | ||
1679 | print "not " if $x =~ /\p{InLatinExtendedA}/; | |
1680 | print "ok 600\n"; | |
1681 | ||
1682 | print "not " unless $x =~ /\P{InLatinExtendedA}/; | |
1683 | print "ok 601\n"; | |
1684 | } | |
1685 | ||
1686 | { | |
1687 | my $x = "\xff"; | |
1688 | ||
1689 | print "not " unless $x =~ /[\x80-\xff]/; | |
1690 | print "ok 602\n"; | |
1691 | ||
1692 | print "not " unless $x =~ /[\x80-\x{100}]/; | |
1693 | print "ok 603\n"; | |
1694 | ||
1695 | print "not " if $x =~ /[\x{100}]/; | |
1696 | print "ok 604\n"; | |
1697 | ||
cc8040a1 TD |
1698 | # the next two tests must be ignored on EBCDIC |
1699 | print "not " unless $x =~ /\p{InLatin1Supplement}/ or ord("A") == 193; | |
3568d838 JH |
1700 | print "ok 605\n"; |
1701 | ||
cc8040a1 | 1702 | print "not " if $x =~ /\P{InLatin1Supplement}/ and ord("A") != 193; |
3568d838 JH |
1703 | print "ok 606\n"; |
1704 | ||
1705 | print "not " if $x =~ /\p{InLatinExtendedA}/; | |
1706 | print "ok 607\n"; | |
1707 | ||
1708 | print "not " unless $x =~ /\P{InLatinExtendedA}/; | |
1709 | print "ok 608\n"; | |
1710 | } | |
1711 | ||
1712 | { | |
1713 | my $x = "\x{100}"; | |
1714 | ||
1715 | print "not " if $x =~ /[\x80-\xff]/; | |
1716 | print "ok 609\n"; | |
1717 | ||
1718 | print "not " unless $x =~ /[\x80-\x{100}]/; | |
1719 | print "ok 610\n"; | |
1720 | ||
1721 | print "not " unless $x =~ /[\x{100}]/; | |
1722 | print "ok 611\n"; | |
1723 | ||
1724 | print "not " if $x =~ /\p{InLatin1Supplement}/; | |
1725 | print "ok 612\n"; | |
1726 | ||
1727 | print "not " unless $x =~ /\P{InLatin1Supplement}/; | |
1728 | print "ok 613\n"; | |
1729 | ||
1730 | print "not " unless $x =~ /\p{InLatinExtendedA}/; | |
1731 | print "ok 614\n"; | |
1732 | ||
1733 | print "not " if $x =~ /\P{InLatinExtendedA}/; | |
1734 | print "ok 615\n"; | |
1735 | } | |
1736 | ||
9d1d55b5 JP |
1737 | { |
1738 | # from japhy | |
1739 | my $w; | |
1740 | use warnings; | |
1741 | local $SIG{__WARN__} = sub { $w .= shift }; | |
1742 | ||
1743 | $w = ""; | |
1744 | eval 'qr/(?c)/'; | |
1745 | print "not " if $w !~ /^Useless \(\?c\)/; | |
1746 | print "ok 616\n"; | |
1747 | ||
1748 | $w = ""; | |
1749 | eval 'qr/(?-c)/'; | |
1750 | print "not " if $w !~ /^Useless \(\?-c\)/; | |
1751 | print "ok 617\n"; | |
1752 | ||
1753 | $w = ""; | |
1754 | eval 'qr/(?g)/'; | |
1755 | print "not " if $w !~ /^Useless \(\?g\)/; | |
1756 | print "ok 618\n"; | |
1757 | ||
1758 | $w = ""; | |
1759 | eval 'qr/(?-g)/'; | |
1760 | print "not " if $w !~ /^Useless \(\?-g\)/; | |
1761 | print "ok 619\n"; | |
1762 | ||
1763 | $w = ""; | |
1764 | eval 'qr/(?o)/'; | |
1765 | print "not " if $w !~ /^Useless \(\?o\)/; | |
1766 | print "ok 620\n"; | |
1767 | ||
1768 | $w = ""; | |
1769 | eval 'qr/(?-o)/'; | |
1770 | print "not " if $w !~ /^Useless \(\?-o\)/; | |
1771 | print "ok 621\n"; | |
1772 | ||
1773 | # now test multi-error regexes | |
1774 | ||
1775 | $w = ""; | |
1776 | eval 'qr/(?g-o)/'; | |
1777 | print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-o\)/; | |
1778 | print "ok 622\n"; | |
1779 | ||
1780 | $w = ""; | |
1781 | eval 'qr/(?g-c)/'; | |
1782 | print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-c\)/; | |
1783 | print "ok 623\n"; | |
1784 | ||
1785 | $w = ""; | |
1786 | eval 'qr/(?o-cg)/'; # (?c) means (?g) error won't be thrown | |
1787 | print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?-c\)/; | |
1788 | print "ok 624\n"; | |
1789 | ||
1790 | $w = ""; | |
1791 | eval 'qr/(?ogc)/'; | |
1792 | print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?g\).*\nUseless \(\?c\)/; | |
1793 | print "ok 625\n"; | |
1794 | } | |
a72deede JH |
1795 | |
1796 | # More Unicode "class" tests | |
1797 | ||
1798 | { | |
1799 | use charnames ':full'; | |
1800 | ||
1801 | print "not " unless "\N{LATIN CAPITAL LETTER A}" =~ /\p{InBasicLatin}/; | |
1802 | print "ok 626\n"; | |
1803 | ||
1804 | print "not " unless "\N{LATIN CAPITAL LETTER A WITH GRAVE}" =~ /\p{InLatin1Supplement}/; | |
1805 | print "ok 627\n"; | |
1806 | ||
1807 | print "not " unless "\N{LATIN CAPITAL LETTER A WITH MACRON}" =~ /\p{InLatinExtendedA}/; | |
1808 | print "ok 628\n"; | |
1809 | ||
1810 | print "not " unless "\N{LATIN SMALL LETTER B WITH STROKE}" =~ /\p{InLatinExtendedB}/; | |
1811 | print "ok 629\n"; | |
1812 | ||
1813 | print "not " unless "\N{KATAKANA LETTER SMALL A}" =~ /\p{InKatakana}/; | |
1814 | print "ok 630\n"; | |
1815 | } | |
1816 | ||
6002328a JH |
1817 | $_ = "foo"; |
1818 | ||
1819 | eval <<"EOT"; die if $@; | |
1820 | /f | |
1821 | o\r | |
1822 | o | |
1823 | \$ | |
1824 | /x && print "ok 631\n"; | |
1825 | EOT | |
1826 | ||
1827 | eval <<"EOT"; die if $@; | |
1828 | /f | |
1829 | o | |
1830 | o | |
1831 | \$\r | |
1832 | /x && print "ok 632\n"; | |
1833 | EOT | |
1834 | ||
569b5e07 AB |
1835 | #test /o feature |
1836 | sub test_o { $_[0] =~/$_[1]/o; return $1} | |
1837 | if(test_o('abc','(.)..') eq 'a') { | |
395ddfe6 | 1838 | print "ok 633\n"; |
569b5e07 | 1839 | } else { |
395ddfe6 | 1840 | print "not ok 633\n"; |
569b5e07 AB |
1841 | } |
1842 | if(test_o('abc','..(.)') eq 'a') { | |
395ddfe6 | 1843 | print "ok 634\n"; |
569b5e07 | 1844 | } else { |
395ddfe6 | 1845 | print "not ok 634\n"; |
569b5e07 AB |
1846 | } |
1847 | ||
f79b3095 JH |
1848 | # 635..639: ID 20010619.003 (only the space character is |
1849 | # supposed to be [:print:], not the whole isprint()). | |
1850 | ||
1851 | print "not " if "\n" =~ /[[:print:]]/; | |
1852 | print "ok 635\n"; | |
1853 | ||
1854 | print "not " if "\t" =~ /[[:print:]]/; | |
1855 | print "ok 636\n"; | |
1856 | ||
e857312d | 1857 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. |
f79b3095 JH |
1858 | print "not " if "\014" =~ /[[:print:]]/; |
1859 | print "ok 637\n"; | |
1860 | ||
1861 | print "not " if "\r" =~ /[[:print:]]/; | |
1862 | print "ok 638\n"; | |
1863 | ||
1864 | print "not " unless " " =~ /[[:print:]]/; | |
1865 | print "ok 639\n"; | |
1866 | ||
a01268b5 JH |
1867 | ## |
1868 | ## Test basic $^N usage outside of a regex | |
1869 | ## | |
1870 | $x = "abcdef"; | |
1871 | $T="ok 640\n";if ($x =~ /cde/ and not defined $^N) {print $T} else {print "not $T"}; | |
1872 | $T="ok 641\n";if ($x =~ /(cde)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1873 | $T="ok 642\n";if ($x =~ /(c)(d)(e)/ and $^N eq "e") {print $T} else {print "not $T"}; | |
1874 | $T="ok 643\n";if ($x =~ /(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1875 | $T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1876 | $T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1877 | $T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc") {print $T} else {print "not $T"}; | |
1878 | $T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1879 | $T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1880 | $T="ok 649\n";if ($x =~ /(?:c(d)e)/ and $^N eq "d" ) {print $T} else {print "not $T"}; | |
1881 | $T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d" ) {print $T} else {print "not $T"}; | |
1882 | $T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; | |
1883 | $T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; | |
1884 | $T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/ and $^N eq "e" ){print $T} else {print "not $T"}; | |
1885 | { | |
1886 | $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; | |
1887 | } | |
1888 | ## test to see if $^N is automatically localized -- it should now | |
1889 | ## have the value set in test 653 | |
1890 | $T="ok 655\n";if ($^N eq "e" ){print $T} else {print "not $T"}; | |
1891 | ||
1892 | ## | |
1893 | ## Now test inside (?{...}) | |
1894 | ## | |
1895 | $T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b" ){print $T} else {print "not $T"}; | |
1896 | $T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; | |
1897 | $T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; | |
1898 | $T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd") | |
1899 | {print $T} else {print "not $T"}; | |
1900 | $T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde") | |
1901 | {print $T} else {print "not $T"}; | |
2796c109 JH |
1902 | |
1903 | # Test the Unicode script classes | |
1904 | ||
cf25bb62 | 1905 | print "not " unless chr(0x100) =~ /\p{IsLatin}/; # outside Latin-1 |
2796c109 JH |
1906 | print "ok 661\n"; |
1907 | ||
cf25bb62 | 1908 | print "not " unless chr(0x212b) =~ /\p{IsLatin}/; # Angstrom sign, very outside |
2796c109 JH |
1909 | print "ok 662\n"; |
1910 | ||
cf25bb62 | 1911 | print "not " unless chr(0x5d0) =~ /\p{IsHebrew}/; # inside InHebrew |
2796c109 JH |
1912 | print "ok 663\n"; |
1913 | ||
cf25bb62 | 1914 | print "not " unless chr(0xfb4f) =~ /\p{IsHebrew}/; # outside InHebrew |
2796c109 JH |
1915 | print "ok 664\n"; |
1916 | ||
7be0dac3 NC |
1917 | # # singleton (not in a range, this test must be ignored on EBCDIC) |
1918 | # print "not " unless chr(0xb5) =~ /\p{IsGreek}/ or ord("A") == 193; | |
1919 | # print "ok 665\n"; | |
1920 | print "ok 665 # 0xb5 moved from Greek to Common with Unicode 4.0.1\n"; | |
5f9563ea | 1921 | |
cf25bb62 | 1922 | print "not " unless chr(0x37a) =~ /\p{IsGreek}/; # singleton |
5f9563ea JH |
1923 | print "ok 666\n"; |
1924 | ||
cf25bb62 | 1925 | print "not " unless chr(0x386) =~ /\p{IsGreek}/; # singleton |
5f9563ea JH |
1926 | print "ok 667\n"; |
1927 | ||
cf25bb62 | 1928 | print "not " unless chr(0x387) =~ /\P{IsGreek}/; # not there |
5f9563ea JH |
1929 | print "ok 668\n"; |
1930 | ||
cf25bb62 | 1931 | print "not " unless chr(0x388) =~ /\p{IsGreek}/; # range |
5f9563ea JH |
1932 | print "ok 669\n"; |
1933 | ||
cf25bb62 | 1934 | print "not " unless chr(0x38a) =~ /\p{IsGreek}/; # range |
5f9563ea JH |
1935 | print "ok 670\n"; |
1936 | ||
cf25bb62 | 1937 | print "not " unless chr(0x38b) =~ /\P{IsGreek}/; # not there |
5f9563ea JH |
1938 | print "ok 671\n"; |
1939 | ||
cf25bb62 | 1940 | print "not " unless chr(0x38c) =~ /\p{IsGreek}/; # singleton |
5f9563ea JH |
1941 | print "ok 672\n"; |
1942 | ||
550cec39 | 1943 | if (ord("A") == 65) { |
7be5a6cf JF |
1944 | ## |
1945 | ## Test [:cntrl:]... | |
1946 | ## | |
1947 | ## Should probably put in tests for all the POSIX stuff, but not sure how to | |
1948 | ## guarantee a specific locale...... | |
1949 | ## | |
550cec39 JH |
1950 | $AllBytes = join('', map { chr($_) } 0..255); |
1951 | ($x = $AllBytes) =~ s/[[:cntrl:]]//g; | |
1952 | if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { | |
1953 | print "not "; | |
1954 | } | |
1955 | print "ok 673\n"; | |
1956 | ||
1957 | ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; | |
1958 | if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " } | |
1959 | print "ok 674\n"; | |
1960 | } else { | |
1961 | print "ok $_ # Skip: EBCDIC\n" for 673..674; | |
1962 | } | |
f33976b4 DB |
1963 | |
1964 | # With /s modifier UTF8 chars were interpreted as bytes | |
1965 | { | |
1966 | my $a = "Hello \x{263A} World"; | |
1967 | ||
1968 | my @a = ($a =~ /./gs); | |
1969 | ||
1970 | print "not " unless $#a == 12; | |
1971 | print "ok 675\n"; | |
1972 | } | |
cce850e4 JH |
1973 | |
1974 | @a = ("foo\nbar" =~ /./g); | |
1975 | print "ok 676\n" if @a == 6 && "@a" eq "f o o b a r"; | |
1976 | ||
1977 | @a = ("foo\nbar" =~ /./gs); | |
1978 | print "ok 677\n" if @a == 7 && "@a" eq "f o o \n b a r"; | |
1979 | ||
1980 | @a = ("foo\nbar" =~ /\C/g); | |
1981 | print "ok 678\n" if @a == 7 && "@a" eq "f o o \n b a r"; | |
1982 | ||
1983 | @a = ("foo\nbar" =~ /\C/gs); | |
1984 | print "ok 679\n" if @a == 7 && "@a" eq "f o o \n b a r"; | |
1985 | ||
1986 | @a = ("foo\n\x{100}bar" =~ /./g); | |
1987 | print "ok 680\n" if @a == 7 && "@a" eq "f o o \x{100} b a r"; | |
1988 | ||
1989 | @a = ("foo\n\x{100}bar" =~ /./gs); | |
1990 | print "ok 681\n" if @a == 8 && "@a" eq "f o o \n \x{100} b a r"; | |
1991 | ||
1992 | ($a, $b) = map { chr } ord('A') == 65 ? (0xc4, 0x80) : (0x8c, 0x41); | |
1993 | ||
1994 | @a = ("foo\n\x{100}bar" =~ /\C/g); | |
1995 | print "ok 682\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; | |
1996 | ||
1997 | @a = ("foo\n\x{100}bar" =~ /\C/gs); | |
1998 | print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; | |
1999 | ||
0af80b60 HS |
2000 | { |
2001 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context | |
2002 | $_ = "ababacadaea"; | |
2003 | $a = join ":", /b./gc; | |
2004 | $b = join ":", /a./gc; | |
2005 | $c = pos; | |
2006 | print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n"; | |
2007 | } | |
d9f424b2 JH |
2008 | |
2009 | { | |
75685a94 JH |
2010 | # [ID 20010407.006] matching utf8 return values from functions does not work |
2011 | ||
d9f424b2 JH |
2012 | package ID_20010407_006; |
2013 | ||
2014 | sub x { | |
2015 | "a\x{1234}"; | |
2016 | } | |
2017 | ||
2018 | my $x = x; | |
2019 | my $y; | |
2020 | ||
2021 | $x =~ /(..)/; $y = $1; | |
2022 | print "not " unless length($y) == 2 && $y eq $x; | |
75685a94 | 2023 | print "ok 685\n"; |
d9f424b2 JH |
2024 | |
2025 | x =~ /(..)/; $y = $1; | |
2026 | print "not " unless length($y) == 2 && $y eq $x; | |
2027 | print "ok 686\n"; | |
2028 | } | |
a4c04bdc | 2029 | |
e2d8ce26 | 2030 | |
fc8cd66c | 2031 | $test = 687; |
a4c04bdc NC |
2032 | |
2033 | # Force scalar context on the patern match | |
fc8cd66c | 2034 | sub ok ($;$) { |
a4c04bdc | 2035 | my($ok, $name) = @_; |
03949bff | 2036 | my $todo = $TODO ? " # TODO $TODO" : ''; |
a4c04bdc | 2037 | |
f0852a51 | 2038 | printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, |
03949bff | 2039 | ($name||$Message)."$todo\tLine ".((caller)[2]); |
a4c04bdc NC |
2040 | |
2041 | printf "# Failed test at line %d\n", (caller)[2] unless $ok; | |
2042 | ||
2043 | $test++; | |
2044 | return $ok; | |
2045 | } | |
2046 | ||
2047 | { | |
2048 | # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. | |
2049 | $x = "\x4e" . "E"; | |
2050 | ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); | |
2051 | ||
2052 | $x = "\x4e" . "i"; | |
2053 | ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); | |
2054 | ||
2055 | $x = "\x4" . "j"; | |
2056 | ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); | |
2057 | ||
2058 | $x = "\x0" . "k"; | |
2059 | ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); | |
2060 | ||
2061 | $x = "\x0" . "x"; | |
2062 | ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); | |
2063 | ||
2064 | $x = "\x0" . "xa"; | |
2065 | ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); | |
2066 | ||
2067 | $x = "\x9" . "_b"; | |
2068 | ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); | |
2069 | ||
2070 | print "# and now again in [] ranges\n"; | |
2071 | ||
2072 | $x = "\x4e" . "E"; | |
2073 | ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); | |
2074 | ||
2075 | $x = "\x4e" . "i"; | |
2076 | ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); | |
2077 | ||
2078 | $x = "\x4" . "j"; | |
2079 | ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); | |
2080 | ||
2081 | $x = "\x0" . "k"; | |
2082 | ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); | |
2083 | ||
2084 | $x = "\x0" . "x"; | |
2085 | ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); | |
2086 | ||
2087 | $x = "\x0" . "xa"; | |
2088 | ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); | |
2089 | ||
2090 | $x = "\x9" . "_b"; | |
2091 | ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); | |
2092 | ||
2093 | } | |
2094 | ||
2095 | { | |
2096 | # Check that \x{##} works. 5.6.1 fails quite a few of these. | |
2097 | ||
2098 | $x = "\x9b"; | |
2099 | ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); | |
2100 | ||
2101 | $x = "\x9b" . "y"; | |
2102 | ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); | |
2103 | ||
2104 | $x = "\x9b" . "y"; | |
2105 | ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); | |
2106 | ||
2107 | $x = "\x9b" . "y"; | |
2108 | ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); | |
2109 | ||
2110 | $x = "\x0" . "y"; | |
2111 | ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); | |
2112 | ||
2113 | $x = "\x0" . "y"; | |
2114 | ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); | |
2115 | ||
2116 | $x = "\x9b" . "y"; | |
2117 | ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); | |
2118 | ||
2119 | print "# and now again in [] ranges\n"; | |
2120 | ||
2121 | $x = "\x9b"; | |
2122 | ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); | |
2123 | ||
2124 | $x = "\x9b" . "y"; | |
2125 | ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); | |
2126 | ||
2127 | $x = "\x9b" . "y"; | |
2128 | ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); | |
2129 | ||
2130 | $x = "\x9b" . "y"; | |
2131 | ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); | |
2132 | ||
2133 | $x = "\x0" . "y"; | |
2134 | ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); | |
2135 | ||
2136 | $x = "\x0" . "y"; | |
2137 | ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); | |
2138 | ||
2139 | $x = "\x9b" . "y"; | |
2140 | ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); | |
2141 | } | |
e2d8ce26 JP |
2142 | |
2143 | { | |
d9efae67 JH |
2144 | # high bit bug -- japhy |
2145 | my $x = "ab\200d"; | |
2146 | $x =~ /.*?\200/ or print "not "; | |
2147 | print "ok 715\n"; | |
e2d8ce26 JP |
2148 | } |
2149 | ||
4193bef7 JH |
2150 | print "# some Unicode properties\n"; |
2151 | ||
d9efae67 | 2152 | { |
4193bef7 | 2153 | # Dashes, underbars, case. |
d9efae67 JH |
2154 | print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/; |
2155 | print "ok 716\n"; | |
ab13f0c7 | 2156 | |
4193bef7 | 2157 | # Complement, leading and trailing whitespace. |
ab13f0c7 JH |
2158 | print "not " unless "\x80" =~ /\P{ ^ In Latin 1 Supplement }/; |
2159 | print "ok 717\n"; | |
4193bef7 | 2160 | |
f173cd49 JH |
2161 | # No ^In, dashes, case, dash, any intervening (word-break) whitespace. |
2162 | # (well, newlines don't work...) | |
2163 | print "not " unless "\x80" =~ /\p{latin-1 supplement}/; | |
4193bef7 JH |
2164 | print "ok 718\n"; |
2165 | } | |
2166 | ||
2167 | { | |
2168 | print "not " unless "a" =~ /\pL/; | |
2169 | print "ok 719\n"; | |
4193bef7 | 2170 | |
4193bef7 JH |
2171 | print "not " unless "a" =~ /\p{IsLl}/; |
2172 | print "ok 720\n"; | |
4193bef7 | 2173 | |
c87b7cc2 | 2174 | print "not " if "a" =~ /\p{IsLu}/; |
4193bef7 | 2175 | print "ok 721\n"; |
4193bef7 | 2176 | |
61247495 | 2177 | print "not " unless "a" =~ /\p{Ll}/; |
4193bef7 | 2178 | print "ok 722\n"; |
c87b7cc2 | 2179 | |
61247495 | 2180 | print "not " if "a" =~ /\p{Lu}/; |
c87b7cc2 JH |
2181 | print "ok 723\n"; |
2182 | ||
61247495 | 2183 | print "not " unless "A" =~ /\pL/; |
c87b7cc2 JH |
2184 | print "ok 724\n"; |
2185 | ||
61247495 | 2186 | print "not " unless "A" =~ /\p{IsLu}/; |
c87b7cc2 JH |
2187 | print "ok 725\n"; |
2188 | ||
61247495 | 2189 | print "not " if "A" =~ /\p{IsLl}/; |
c87b7cc2 JH |
2190 | print "ok 726\n"; |
2191 | ||
61247495 | 2192 | print "not " unless "A" =~ /\p{Lu}/; |
c87b7cc2 JH |
2193 | print "ok 727\n"; |
2194 | ||
61247495 | 2195 | print "not " if "A" =~ /\p{Ll}/; |
c87b7cc2 JH |
2196 | print "ok 728\n"; |
2197 | ||
61247495 | 2198 | print "not " if "a" =~ /\PL/; |
c87b7cc2 JH |
2199 | print "ok 729\n"; |
2200 | ||
61247495 | 2201 | print "not " if "a" =~ /\P{IsLl}/; |
c87b7cc2 | 2202 | print "ok 730\n"; |
61247495 JH |
2203 | |
2204 | print "not " unless "a" =~ /\P{IsLu}/; | |
2205 | print "ok 731\n"; | |
2206 | ||
2207 | print "not " if "a" =~ /\P{Ll}/; | |
2208 | print "ok 732\n"; | |
2209 | ||
2210 | print "not " unless "a" =~ /\P{Lu}/; | |
2211 | print "ok 733\n"; | |
2212 | ||
2213 | print "not " if "A" =~ /\PL/; | |
2214 | print "ok 734\n"; | |
2215 | ||
2216 | print "not " if "A" =~ /\P{IsLu}/; | |
2217 | print "ok 735\n"; | |
2218 | ||
2219 | print "not " unless "A" =~ /\P{IsLl}/; | |
2220 | print "ok 736\n"; | |
2221 | ||
2222 | print "not " if "A" =~ /\P{Lu}/; | |
2223 | print "ok 737\n"; | |
2224 | ||
2225 | print "not " unless "A" =~ /\P{Ll}/; | |
2226 | print "ok 738\n"; | |
2227 | ||
4193bef7 | 2228 | } |
9b4e380a JH |
2229 | |
2230 | { | |
2231 | print "not " if "a" =~ /\p{Common}/; | |
2232 | print "ok 739\n"; | |
2233 | ||
2234 | print "not " unless "1" =~ /\p{Common}/; | |
2235 | print "ok 740\n"; | |
2236 | } | |
2237 | ||
2238 | { | |
2239 | print "not " if "a" =~ /\p{Inherited}/; | |
2240 | print "ok 741\n"; | |
2241 | ||
2242 | print "not " unless "\x{300}" =~ /\p{Inherited}/; | |
2243 | print "ok 742\n"; | |
2244 | } | |
2245 | ||
2246 | { | |
12ac2576 JP |
2247 | # L& and LC are the same |
2248 | print "not " unless "a" =~ /\p{LC}/ and "a" =~ /\p{L&}/; | |
9b4e380a JH |
2249 | print "ok 743\n"; |
2250 | ||
12ac2576 | 2251 | print "not " if "1" =~ /\p{LC}/ or "1" =~ /\p{L&}/; |
9b4e380a JH |
2252 | print "ok 744\n"; |
2253 | } | |
d73e5302 JH |
2254 | |
2255 | { | |
904bc114 | 2256 | print "not " unless "a" =~ /\p{Lowercase Letter}/; |
d73e5302 JH |
2257 | print "ok 745\n"; |
2258 | ||
904bc114 | 2259 | print "not " if "A" =~ /\p{lowercaseletter}/; |
d73e5302 JH |
2260 | print "ok 746\n"; |
2261 | } | |
2262 | ||
2263 | { | |
cf25bb62 | 2264 | print "not " unless "\x{AC00}" =~ /\p{HangulSyllables}/; |
d73e5302 JH |
2265 | print "ok 747\n"; |
2266 | } | |
71d929cb JH |
2267 | |
2268 | { | |
701a277b JH |
2269 | # Script=, Block=, Category= |
2270 | ||
71d929cb JH |
2271 | print "not " unless "\x{0100}" =~ /\p{Script=Latin}/; |
2272 | print "ok 748\n"; | |
2273 | ||
2274 | print "not " unless "\x{0100}" =~ /\p{Block=LatinExtendedA}/; | |
2275 | print "ok 749\n"; | |
2276 | ||
2277 | print "not " unless "\x{0100}" =~ /\p{Category=UppercaseLetter}/; | |
2278 | print "ok 750\n"; | |
2279 | } | |
2280 | ||
ef54fa25 | 2281 | { |
701a277b JH |
2282 | print "# the basic character classes and Unicode \n"; |
2283 | ||
ef54fa25 JH |
2284 | # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101; |
2285 | print "not " unless "\x{0100}" =~ /\w/; | |
2286 | print "ok 751\n"; | |
2287 | ||
2288 | # 0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;; | |
2289 | print "not " unless "\x{0660}" =~ /\d/; | |
2290 | print "ok 752\n"; | |
2291 | ||
2292 | # 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;; | |
2293 | print "not " unless "\x{1680}" =~ /\s/; | |
2294 | print "ok 753\n"; | |
2295 | } | |
701a277b JH |
2296 | |
2297 | { | |
2298 | print "# folding matches and Unicode\n"; | |
2299 | ||
2300 | print "not " unless "a\x{100}" =~ /A/i; | |
2301 | print "ok 754\n"; | |
2302 | ||
27e28630 | 2303 | print "not " unless "A\x{100}" =~ /a/i; |
701a277b JH |
2304 | print "ok 755\n"; |
2305 | ||
2306 | print "not " unless "a\x{100}" =~ /a/i; | |
2307 | print "ok 756\n"; | |
2308 | ||
2309 | print "not " unless "A\x{100}" =~ /A/i; | |
2310 | print "ok 757\n"; | |
bc517b45 JH |
2311 | |
2312 | print "not " unless "\x{101}a" =~ /\x{100}/i; | |
2313 | print "ok 758\n"; | |
2314 | ||
2315 | print "not " unless "\x{100}a" =~ /\x{100}/i; | |
2316 | print "ok 759\n"; | |
2317 | ||
2318 | print "not " unless "\x{101}a" =~ /\x{101}/i; | |
2319 | print "ok 760\n"; | |
2320 | ||
2321 | print "not " unless "\x{100}a" =~ /\x{101}/i; | |
2322 | print "ok 761\n"; | |
2323 | ||
2324 | print "not " unless "a\x{100}" =~ /A\x{100}/i; | |
2325 | print "ok 762\n"; | |
2326 | ||
27e28630 | 2327 | print "not " unless "A\x{100}" =~ /a\x{100}/i; |
bc517b45 JH |
2328 | print "ok 763\n"; |
2329 | ||
2330 | print "not " unless "a\x{100}" =~ /a\x{100}/i; | |
2331 | print "ok 764\n"; | |
2332 | ||
2333 | print "not " unless "A\x{100}" =~ /A\x{100}/i; | |
2334 | print "ok 765\n"; | |
2335 | ||
2336 | print "not " unless "a\x{100}" =~ /[A]/i; | |
2337 | print "ok 766\n"; | |
2338 | ||
27e28630 | 2339 | print "not " unless "A\x{100}" =~ /[a]/i; |
bc517b45 JH |
2340 | print "ok 767\n"; |
2341 | ||
2342 | print "not " unless "a\x{100}" =~ /[a]/i; | |
2343 | print "ok 768\n"; | |
2344 | ||
2345 | print "not " unless "A\x{100}" =~ /[A]/i; | |
2346 | print "ok 769\n"; | |
2347 | ||
2348 | print "not " unless "\x{101}a" =~ /[\x{100}]/i; | |
2349 | print "ok 770\n"; | |
2350 | ||
2351 | print "not " unless "\x{100}a" =~ /[\x{100}]/i; | |
2352 | print "ok 771\n"; | |
2353 | ||
2354 | print "not " unless "\x{101}a" =~ /[\x{101}]/i; | |
2355 | print "ok 772\n"; | |
2356 | ||
2357 | print "not " unless "\x{100}a" =~ /[\x{101}]/i; | |
2358 | print "ok 773\n"; | |
2359 | ||
701a277b | 2360 | } |
a5961de5 JH |
2361 | |
2362 | { | |
2363 | use charnames ':full'; | |
2364 | ||
2365 | print "# LATIN LETTER A WITH GRAVE\n"; | |
2366 | my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; | |
2367 | my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; | |
2368 | ||
bc517b45 JH |
2369 | print $lower =~ m/$UPPER/i ? "ok 774\n" : "not ok 774\n"; |
2370 | print $UPPER =~ m/$lower/i ? "ok 775\n" : "not ok 775\n"; | |
2371 | print $lower =~ m/[$UPPER]/i ? "ok 776\n" : "not ok 776\n"; | |
2372 | print $UPPER =~ m/[$lower]/i ? "ok 777\n" : "not ok 777\n"; | |
a5961de5 JH |
2373 | |
2374 | print "# GREEK LETTER ALPHA WITH VRACHY\n"; | |
2375 | ||
2376 | $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; | |
2377 | $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; | |
2378 | ||
bc517b45 JH |
2379 | print $lower =~ m/$UPPER/i ? "ok 778\n" : "not ok 778\n"; |
2380 | print $UPPER =~ m/$lower/i ? "ok 779\n" : "not ok 779\n"; | |
2381 | print $lower =~ m/[$UPPER]/i ? "ok 780\n" : "not ok 780\n"; | |
2382 | print $UPPER =~ m/[$lower]/i ? "ok 781\n" : "not ok 781\n"; | |
a5961de5 JH |
2383 | |
2384 | print "# LATIN LETTER Y WITH DIAERESIS\n"; | |
2385 | ||
2386 | $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; | |
2387 | $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; | |
bc517b45 JH |
2388 | print $lower =~ m/$UPPER/i ? "ok 782\n" : "not ok 782\n"; |
2389 | print $UPPER =~ m/$lower/i ? "ok 783\n" : "not ok 783\n"; | |
2390 | print $lower =~ m/[$UPPER]/i ? "ok 784\n" : "not ok 784\n"; | |
2391 | print $UPPER =~ m/[$lower]/i ? "ok 785\n" : "not ok 785\n"; | |
a5961de5 | 2392 | } |
55da9344 JH |
2393 | |
2394 | { | |
2395 | use warnings; | |
2396 | use charnames ':full'; | |
2397 | ||
2398 | print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n"; | |
2399 | ||
2400 | my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; | |
d07ddd77 | 2401 | my $char = "\N{COMBINING GREEK PERISPOMENI}"; |
55da9344 | 2402 | |
bc517b45 JH |
2403 | # Before #13843 this was failing by matching falsely. |
2404 | print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n"; | |
55da9344 | 2405 | } |
b7c83a7e JH |
2406 | |
2407 | { | |
2408 | print "# \\X\n"; | |
2409 | ||
2410 | use charnames ':full'; | |
2411 | ||
eb08e2da JH |
2412 | print "a!" =~ /^(\X)!/ && $1 eq "a" ? |
2413 | "ok 787\n" : "not ok 787 # $1\n"; | |
2414 | print "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF" ? | |
2415 | "ok 788\n" : "not ok 788 # $1\n"; | |
2416 | print "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}" ? | |
2417 | "ok 789\n" : "not ok 789 # $1\n"; | |
2418 | print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ? | |
2419 | "ok 790\n" : "not ok 790 # $1\n"; | |
2420 | print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && | |
2421 | $1 eq "\N{LATIN CAPITAL LETTER E}" ? | |
2422 | "ok 791\n" : "not ok 791 # $1\n"; | |
2423 | print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~ | |
2424 | /^(\X)!/ && | |
2425 | $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ? | |
2426 | "ok 792\n" : "not ok 792 # $1\n"; | |
b7c83a7e | 2427 | } |
112bedeb JH |
2428 | |
2429 | { | |
2430 | print "#\\C and \\X\n"; | |
2431 | ||
2432 | print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; | |
2433 | print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; | |
2434 | } | |
09091399 JH |
2435 | |
2436 | { | |
2437 | print "# FINAL SIGMA\n"; | |
2438 | ||
2439 | my $SIGMA = "\x{03A3}"; # CAPITAL | |
2440 | my $Sigma = "\x{03C2}"; # SMALL FINAL | |
2441 | my $sigma = "\x{03C3}"; # SMALL | |
2442 | ||
2443 | print $SIGMA =~ /$SIGMA/i ? "ok 795\n" : "not ok 795\n"; | |
2444 | print $SIGMA =~ /$Sigma/i ? "ok 796\n" : "not ok 796\n"; | |
2445 | print $SIGMA =~ /$sigma/i ? "ok 797\n" : "not ok 797\n"; | |
2446 | ||
2447 | print $Sigma =~ /$SIGMA/i ? "ok 798\n" : "not ok 798\n"; | |
2448 | print $Sigma =~ /$Sigma/i ? "ok 799\n" : "not ok 799\n"; | |
2449 | print $Sigma =~ /$sigma/i ? "ok 800\n" : "not ok 800\n"; | |
2450 | ||
2451 | print $sigma =~ /$SIGMA/i ? "ok 801\n" : "not ok 801\n"; | |
2452 | print $sigma =~ /$Sigma/i ? "ok 802\n" : "not ok 802\n"; | |
2453 | print $sigma =~ /$sigma/i ? "ok 803\n" : "not ok 803\n"; | |
2454 | ||
2455 | print $SIGMA =~ /[$SIGMA]/i ? "ok 804\n" : "not ok 804\n"; | |
2456 | print $SIGMA =~ /[$Sigma]/i ? "ok 805\n" : "not ok 805\n"; | |
2457 | print $SIGMA =~ /[$sigma]/i ? "ok 806\n" : "not ok 806\n"; | |
2458 | ||
2459 | print $Sigma =~ /[$SIGMA]/i ? "ok 807\n" : "not ok 807\n"; | |
2460 | print $Sigma =~ /[$Sigma]/i ? "ok 808\n" : "not ok 808\n"; | |
2461 | print $Sigma =~ /[$sigma]/i ? "ok 809\n" : "not ok 809\n"; | |
2462 | ||
2463 | print $sigma =~ /[$SIGMA]/i ? "ok 810\n" : "not ok 810\n"; | |
2464 | print $sigma =~ /[$Sigma]/i ? "ok 811\n" : "not ok 811\n"; | |
2465 | print $sigma =~ /[$sigma]/i ? "ok 812\n" : "not ok 812\n"; | |
2466 | } | |
e036fef9 JF |
2467 | |
2468 | { | |
2469 | print "# parlez-vous?\n"; | |
2470 | ||
2471 | use charnames ':full'; | |
2472 | ||
2473 | print "fran\N{LATIN SMALL LETTER C}ais" =~ | |
2474 | /fran.ais/ && | |
2475 | $& eq "francais" ? | |
2476 | "ok 813\n" : "not ok 813\n"; | |
2477 | ||
2478 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ | |
2479 | /fran.ais/ && | |
2480 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? | |
2481 | "ok 814\n" : "not ok 814\n"; | |
2482 | ||
2483 | print "fran\N{LATIN SMALL LETTER C}ais" =~ | |
2484 | /fran\Cais/ && | |
2485 | $& eq "francais" ? | |
2486 | "ok 815\n" : "not ok 815\n"; | |
2487 | ||
2488 | print "franc\N{COMBINING CEDILLA}ais" =~ | |
2489 | /franc\C\Cais/ ? # COMBINING CEDILLA is two bytes when encoded | |
2490 | "ok 816\n" : "not ok 816\n"; | |
2491 | ||
2492 | print "fran\N{LATIN SMALL LETTER C}ais" =~ | |
2493 | /fran\Xais/ && | |
2494 | $& eq "francais" ? | |
2495 | "ok 817\n" : "not ok 817\n"; | |
2496 | ||
2497 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ | |
2498 | /fran\Xais/ && | |
2499 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? | |
2500 | "ok 818\n" : "not ok 818\n"; | |
2501 | ||
2502 | print "franc\N{COMBINING CEDILLA}ais" =~ | |
2503 | /fran\Xais/ && | |
2504 | $& eq "franc\N{COMBINING CEDILLA}ais" ? | |
2505 | "ok 819\n" : "not ok 819\n"; | |
2506 | ||
2507 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ | |
2508 | /fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && | |
2509 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? | |
2510 | "ok 820\n" : "not ok 820\n"; | |
2511 | ||
2512 | print "franc\N{COMBINING CEDILLA}ais" =~ | |
2513 | /franc\N{COMBINING CEDILLA}ais/ && | |
2514 | $& eq "franc\N{COMBINING CEDILLA}ais" ? | |
2515 | "ok 821\n" : "not ok 821\n"; | |
2516 | ||
2517 | print "fran\N{LATIN SMALL LETTER C}ais" =~ | |
2518 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && | |
2519 | $& eq "francais" ? | |
2520 | "ok 822\n" : "not ok 822\n"; | |
2521 | ||
2522 | print "fran\N{LATIN SMALL LETTER C}ais" =~ | |
2523 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && | |
2524 | $& eq "francais" ? | |
2525 | "ok 823\n" : "not ok 823\n"; | |
2526 | ||
2527 | print "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ | |
2528 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && | |
2529 | $& eq "fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" ? | |
2530 | "ok 824\n" : "not ok 824\n"; | |
2531 | ||
2532 | print "franc\N{COMBINING CEDILLA}ais" =~ | |
2533 | /fran(?:c\N{COMBINING CEDILLA}?|\N{LATIN SMALL LETTER C WITH CEDILLA})ais/ && | |
2534 | $& eq "franc\N{COMBINING CEDILLA}ais" ? | |
2535 | "ok 825\n" : "not ok 825\n"; | |
2536 | } | |
ffce6cc2 JH |
2537 | |
2538 | { | |
2539 | print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n"; | |
2540 | ||
2541 | { | |
2542 | my $regex = "ABcde"; | |
2543 | my $string = "abcDE\x{100}"; | |
2544 | chop($string); | |
2545 | if ($string =~ m/$regex/i) { | |
2546 | print "ok 826\n"; | |
2547 | } else { | |
2548 | print "not ok 826\n"; | |
2549 | } | |
2550 | } | |
2551 | ||
2552 | { | |
2553 | my $regex = "ABcde\x{100}"; | |
2554 | my $string = "abcDE"; | |
2555 | chop($regex); | |
2556 | if ($string =~ m/$regex/i) { | |
2557 | print "ok 827\n"; | |
2558 | } else { | |
2559 | print "not ok 827\n"; | |
2560 | } | |
2561 | } | |
2562 | ||
2563 | { | |
2564 | my $regex = "ABcde\x{100}"; | |
2565 | my $string = "abcDE\x{100}"; | |
2566 | chop($regex); | |
2567 | chop($string); | |
2568 | if ($string =~ m/$regex/i) { | |
2569 | print "ok 828\n"; | |
2570 | } else { | |
2571 | print "not ok 828\n"; | |
2572 | } | |
2573 | } | |
2574 | } | |
d07ddd77 JH |
2575 | |
2576 | { | |
2577 | print "# more SIGMAs\n"; | |
2578 | ||
2579 | my $SIGMA = "\x{03A3}"; # CAPITAL | |
2580 | my $Sigma = "\x{03C2}"; # SMALL FINAL | |
2581 | my $sigma = "\x{03C3}"; # SMALL | |
2582 | ||
2583 | my $S3 = "$SIGMA$Sigma$sigma"; | |
2584 | ||
2585 | print ":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma ? | |
2586 | "ok 829\n" : "not ok 829\n"; | |
2587 | print ":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? | |
2588 | "ok 830\n" : "not ok 830\n"; | |
2589 | print ":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma ? | |
2590 | "ok 831\n" : "not ok 831\n"; | |
2591 | ||
2592 | print ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma ? | |
2593 | "ok 832\n" : "not ok 832\n"; | |
2594 | print ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? | |
2595 | "ok 833\n" : "not ok 833\n"; | |
2596 | print ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma ? | |
2597 | "ok 834\n" : "not ok 834\n"; | |
2598 | } | |
925f9e00 JH |
2599 | |
2600 | { | |
2601 | print "# LATIN SMALL LETTER SHARP S\n"; | |
2602 | ||
2603 | use charnames ':full'; | |
2604 | ||
fc8cd66c | 2605 | $test= 835; |
925f9e00 | 2606 | |
fc8cd66c YO |
2607 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/); |
2608 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /\N{LATIN SMALL LETTER SHARP S}/i); | |
925f9e00 | 2609 | |
fc8cd66c YO |
2610 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/); |
2611 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); | |
925f9e00 | 2612 | |
fc8cd66c YO |
2613 | ok("ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i); |
2614 | ok("SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i); | |
2615 | ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); | |
2616 | ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i); | |
925f9e00 | 2617 | |
fc8cd66c YO |
2618 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i); |
2619 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i); | |
925f9e00 | 2620 | } |
d8f6a732 JH |
2621 | |
2622 | { | |
2623 | print "# more whitespace: U+0085, U+2028, U+2029\n"; | |
2624 | ||
2625 | # U+0085 needs to be forced to be Unicode, the \x{100} does that. | |
250d67eb JH |
2626 | if ($ordA == 193) { |
2627 | print "<\x{100}\x{0085}>" =~ /<\x{100}e>/ ? "ok 845\n" : "not ok 845\n"; | |
2628 | } else { | |
2629 | print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n"; | |
2630 | } | |
5486206c JH |
2631 | print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n"; |
2632 | print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n"; | |
d8f6a732 JH |
2633 | } |
2634 | ||
def8e4ea | 2635 | { |
229196e5 | 2636 | print "# . with /s should work on characters, as opposed to bytes\n"; |
def8e4ea JH |
2637 | |
2638 | my $s = "\x{e4}\x{100}"; | |
2639 | ||
2640 | # This is not expected to match: the point is that | |
2641 | # neither should we get "Malformed UTF-8" warnings. | |
2642 | print $s =~ /\G(.+?)\n/gcs ? | |
5486206c | 2643 | "not ok 848\n" : "ok 848\n"; |
def8e4ea JH |
2644 | |
2645 | my @c; | |
2646 | ||
2647 | while ($s =~ /\G(.)/gs) { | |
2648 | push @c, $1; | |
2649 | } | |
2650 | ||
5486206c | 2651 | print join("", @c) eq $s ? "ok 849\n" : "not ok 849\n"; |
a0804c9e A |
2652 | |
2653 | my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256 | |
2654 | my $r1 = ""; | |
2655 | while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
2656 | $r1 .= $1 . $2; | |
2657 | } | |
2658 | ||
2659 | my $t2 = $t1 . "\x{100}"; # repeat with a larger char | |
2660 | my $r2 = ""; | |
2661 | while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { | |
2662 | $r2 .= $1 . $2; | |
2663 | } | |
2664 | $r2 =~ s/\x{100}//; | |
5486206c | 2665 | print $r1 eq $r2 ? "ok 850\n" : "not ok 850\n"; |
def8e4ea | 2666 | } |
e54858b0 JH |
2667 | |
2668 | { | |
2669 | print "# Unicode lookbehind\n"; | |
2670 | ||
5486206c JH |
2671 | print "A\x{100}B" =~ /(?<=A.)B/ ? "ok 851\n" : "not ok 851\n"; |
2672 | print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 852\n" : "not ok 852\n"; | |
2673 | print "\x{400}AB" =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n"; | |
2674 | print "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n"; | |
e54858b0 | 2675 | } |
c46248c1 JH |
2676 | |
2677 | { | |
85fd1718 JH |
2678 | print "# UTF-8 hash keys and /\$/\n"; |
2679 | # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2002-01/msg01327.html | |
2680 | ||
2681 | my $u = "a\x{100}"; | |
2682 | my $v = substr($u,0,1); | |
2683 | my $w = substr($u,1,1); | |
2684 | my %u = ( $u => $u, $v => $v, $w => $w ); | |
8d21bda2 | 2685 | my $i = 855; |
85fd1718 JH |
2686 | for (keys %u) { |
2687 | my $m1 = /^\w*$/ ? 1 : 0; | |
2688 | my $m2 = $u{$_}=~/^\w*$/ ? 1 : 0; | |
2689 | print $m1 == $m2 ? "ok $i\n" : "not ok $i # $m1 $m2\n"; | |
2690 | $i++; | |
2691 | } | |
2692 | } | |
8d21bda2 JH |
2693 | |
2694 | { | |
2695 | print "# [ID 20020124.005]\n"; | |
2696 | # Fixed by #14795. | |
2697 | my $i = 858; | |
2698 | for my $char ("a", "\x{df}", "\x{100}"){ | |
2699 | $x = "$char b $char"; | |
2700 | $x =~ s{($char)}{ | |
2701 | "c" =~ /c/; | |
2702 | "x"; | |
2703 | }ge; | |
2704 | print substr($x,0,1) eq substr($x,-1,1) ? | |
2705 | "ok $i\n" : "not ok $i # debug: $x\n"; | |
2706 | $i++; | |
2707 | } | |
2708 | } | |
6e602e29 A |
2709 | |
2710 | { | |
2711 | print "# SEGV in s/// and UTF-8\n"; | |
2712 | $s = "s#\x{100}" x 4; | |
2713 | $s =~ s/[^\w]/ /g; | |
2714 | print $s eq "s \x{100}" x 4 ? "ok 861\n" : "not ok 861\n"; | |
2715 | } | |
f272994b A |
2716 | |
2717 | { | |
2718 | print "# UTF-8 bug (maybe alreayd known?)\n"; | |
2719 | my $u; | |
2720 | ||
2721 | $u = "foo"; | |
2722 | $u =~ s/./\x{100}/g; | |
2723 | print $u eq "\x{100}\x{100}\x{100}" ? "ok 862\n" : "not ok 862\n"; | |
2724 | ||
2725 | $u = "foobar"; | |
2726 | $u =~ s/[ao]/\x{100}/g; | |
2727 | print $u eq "f\x{100}\x{100}b\x{100}r" ? "ok 863\n" : "not ok 863\n"; | |
2728 | ||
2729 | $u =~ s/\x{100}/e/g; | |
2730 | print $u eq "feeber" ? "ok 864\n" : "not ok 864\n"; | |
2731 | } | |
2732 | ||
33b8afdf JH |
2733 | { |
2734 | print "# UTF-8 bug with s///\n"; | |
2735 | # check utf8/non-utf8 mixtures | |
2736 | # try to force all float/anchored check combinations | |
2737 | my $c = "\x{100}"; | |
fc8cd66c | 2738 | $test = 865; |
33b8afdf JH |
2739 | my $subst; |
2740 | for my $re ( | |
2741 | "xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", "xx.*(?=$c)", "(?=$c).*xx", | |
2742 | ) { | |
2743 | print "xxx" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; | |
2744 | ++$test; | |
2745 | print +($subst = "xxx") =~ s/$re// ? "not ok $test\n" : "ok $test\n"; | |
2746 | ++$test; | |
2747 | } | |
2748 | for my $re ("xx.*$c*", "$c*.*xx") { | |
2749 | print "xxx" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; | |
2750 | ++$test; | |
2751 | ($subst = "xxx") =~ s/$re//; | |
2752 | print $subst eq '' ? "ok $test\n" : "not ok $test\t# $subst\n"; | |
2753 | ++$test; | |
2754 | } | |
2755 | for my $re ("xxy*", "y*xx") { | |
2756 | print "xx$c" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; | |
2757 | ++$test; | |
2758 | ($subst = "xx$c") =~ s/$re//; | |
2759 | print $subst eq $c ? "ok $test\n" : "not ok $test\n"; | |
2760 | ++$test; | |
2761 | print "xy$c" =~ /$re/ ? "not ok $test\n" : "ok $test\n"; | |
2762 | ++$test; | |
2763 | print +($subst = "xy$c") =~ /$re/ ? "not ok $test\n" : "ok $test\n"; | |
2764 | ++$test; | |
2765 | } | |
2766 | for my $re ("xy$c*z", "x$c*yz") { | |
2767 | print "xyz" =~ /$re/ ? "ok $test\n" : "not ok $test\n"; | |
2768 | ++$test; | |
2769 | ($subst = "xyz") =~ s/$re//; | |
2770 | print $subst eq '' ? "ok $test\n" : "not ok $test\n"; | |
2771 | ++$test; | |
2772 | } | |
2773 | } | |
574c8022 | 2774 | |
ff385a1b JF |
2775 | { |
2776 | print "# qr/.../x\n"; | |
fc8cd66c | 2777 | $test = 893; |
ff385a1b JF |
2778 | |
2779 | my $R = qr/ A B C # D E/x; | |
2780 | ||
2781 | print eval {"ABCDE" =~ $R} ? "ok $test\n" : "not ok $test\n"; | |
2782 | $test++; | |
2783 | ||
2784 | print eval {"ABCDE" =~ m/$R/} ? "ok $test\n" : "not ok $test\n"; | |
2785 | $test++; | |
2786 | ||
2787 | print eval {"ABCDE" =~ m/($R)/} ? "ok $test\n" : "not ok $test\n"; | |
2788 | $test++; | |
2789 | } | |
bc45ce41 JH |
2790 | |
2791 | { | |
2792 | print "# illegal Unicode properties\n"; | |
fc8cd66c | 2793 | $test = 896; |
bc45ce41 JH |
2794 | |
2795 | print eval { "a" =~ /\pq / } ? "not ok $test\n" : "ok $test\n"; | |
2796 | $test++; | |
2797 | ||
2798 | print eval { "a" =~ /\p{qrst} / } ? "not ok $test\n" : "ok $test\n"; | |
2799 | $test++; | |
2800 | } | |
d65afb4b HS |
2801 | |
2802 | { | |
491fd90a | 2803 | print "# [ID 20020412.005] wrong pmop flags checked when empty pattern\n"; |
d65afb4b | 2804 | # requires reuse of last successful pattern |
fc8cd66c | 2805 | $test = 898; |
d65afb4b HS |
2806 | $test =~ /\d/; |
2807 | for (0 .. 1) { | |
2808 | my $match = ?? + 0; | |
2809 | if ($match != $_) { | |
2810 | print "ok $test\n"; | |
2811 | } else { | |
2812 | printf "not ok %s\t# 'match once' %s on %s iteration\n", $test, | |
2813 | $match ? 'succeeded' : 'failed', $_ ? 'second' : 'first'; | |
2814 | } | |
2815 | ++$test; | |
2816 | } | |
2817 | $test =~ /(\d)/; | |
2818 | my $result = join '', $test =~ //g; | |
2819 | if ($result eq $test) { | |
2820 | print "ok $test\n"; | |
2821 | } else { | |
2822 | printf "not ok %s\t# expected '%s', got '%s'\n", $test, $test, $result; | |
2823 | } | |
2824 | ++$test; | |
2825 | } | |
491fd90a JH |
2826 | |
2827 | print "# user-defined character properties\n"; | |
2828 | ||
2829 | sub InKana1 { | |
2830 | return <<'END'; | |
2831 | 3040 309F | |
2832 | 30A0 30FF | |
2833 | END | |
2834 | } | |
2835 | ||
2836 | sub InKana2 { | |
2837 | return <<'END'; | |
2838 | +utf8::InHiragana | |
2839 | +utf8::InKatakana | |
2840 | END | |
2841 | } | |
2842 | ||
2843 | sub InKana3 { | |
2844 | return <<'END'; | |
2845 | +utf8::InHiragana | |
2846 | +utf8::InKatakana | |
2847 | -utf8::IsCn | |
2848 | END | |
2849 | } | |
2850 | ||
2851 | sub InNotKana { | |
2852 | return <<'END'; | |
2853 | !utf8::InHiragana | |
2854 | -utf8::InKatakana | |
2855 | +utf8::IsCn | |
2856 | END | |
2857 | } | |
2858 | ||
2859 | $test = 901; | |
2860 | ||
2861 | print "\x{3040}" =~ /\p{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2862 | print "\x{303F}" =~ /\P{InKana1}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2863 | ||
2864 | print "\x{3040}" =~ /\p{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2865 | print "\x{303F}" =~ /\P{InKana2}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2866 | ||
2867 | print "\x{3041}" =~ /\p{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2868 | print "\x{3040}" =~ /\P{InKana3}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2869 | ||
2870 | print "\x{3040}" =~ /\p{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2871 | print "\x{3041}" =~ /\P{InNotKana}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2872 | ||
11ef8fdd JH |
2873 | sub InConsonant { # Not EBCDIC-aware. |
2874 | return <<EOF; | |
2875 | 0061 007f | |
2876 | -0061 | |
2877 | -0065 | |
2878 | -0069 | |
2879 | -006f | |
2880 | -0075 | |
2881 | EOF | |
2882 | } | |
2883 | ||
2884 | print "d" =~ /\p{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2885 | print "e" =~ /\P{InConsonant}/ ? "ok $test\n" : "not ok $test\n"; $test++; | |
2886 | ||
faf11cac HS |
2887 | { |
2888 | print "# [ID 20020630.002] utf8 regex only matches 32k\n"; | |
2889 | $test = 911; | |
2890 | for ([ 'byte', "\x{ff}" ], [ 'utf8', "\x{1ff}" ]) { | |
2891 | my($type, $char) = @$_; | |
2892 | for my $len (32000, 32768, 33000) { | |
2893 | my $s = $char . "f" x $len; | |
2894 | my $r = $s =~ /$char([f]*)/gc; | |
2895 | print $r ? "ok $test\n" : "not ok $test\t# <$type x $len> fail\n"; | |
2896 | ++$test; | |
2897 | print +(!$r or pos($s) == $len + 1) ? "ok $test\n" | |
2898 | : "not ok $test\t# <$type x $len> pos @{[ pos($s) ]}\n"; | |
2899 | ++$test; | |
2900 | } | |
2901 | } | |
2902 | } | |
2903 | ||
2904 | $test = 923; | |
f3b1e556 YST |
2905 | |
2906 | $a = bless qr/foo/, 'Foo'; | |
2907 | print(('goodfood' =~ $a ? '' : 'not '), | |
2908 | "ok $test\t# reblessed qr// matches\n"); | |
2909 | ++$test; | |
2910 | ||
2911 | print(($a eq '(?-xism:foo)' ? '' : 'not '), | |
2912 | "ok $test\t# reblessed qr// stringizes\n"); | |
2913 | ++$test; | |
446eaa42 YST |
2914 | |
2915 | $x = "\x{3fe}"; | |
cb50f42d YST |
2916 | $z=$y = "\317\276"; # $y is byte representation of $x |
2917 | ||
446eaa42 YST |
2918 | $a = qr/$x/; |
2919 | print(($x =~ $a ? '' : 'not '), "ok $test - utf8 interpolation in qr//\n"); | |
2920 | ++$test; | |
2921 | ||
2922 | print(("a$a" =~ $x ? '' : 'not '), | |
cb50f42d YST |
2923 | "ok $test - stringifed qr// preserves utf8\n"); |
2924 | ++$test; | |
2925 | ||
2926 | print(("a$x" =~ /^a$a\z/ ? '' : 'not '), | |
2927 | "ok $test - interpolated qr// preserves utf8\n"); | |
2928 | ++$test; | |
2929 | ||
2930 | print(("a$x" =~ /^a(??{$a})\z/ ? '' : 'not '), | |
2931 | "ok $test - postponed interpolation of qr// preserves utf8\n"); | |
2932 | ++$test; | |
2933 | ||
ab01544f RGS |
2934 | print((length(qr/##/x) == 12 ? '' : 'not '), |
2935 | "ok $test - ## in qr// doesn't corrupt memory [perl #17776]\n"); | |
2936 | ++$test; | |
2937 | ||
cb50f42d YST |
2938 | { use re 'eval'; |
2939 | ||
2940 | print(("$x$x" =~ /^$x(??{$x})\z/ ? '' : 'not '), | |
2941 | "ok $test - postponed utf8 string in utf8 re matches utf8\n"); | |
2942 | ++$test; | |
2943 | ||
2944 | print(("$y$x" =~ /^$y(??{$x})\z/ ? '' : 'not '), | |
2945 | "ok $test - postponed utf8 string in non-utf8 re matches utf8\n"); | |
446eaa42 YST |
2946 | ++$test; |
2947 | ||
cb50f42d YST |
2948 | print(("$y$x" !~ /^$y(??{$y})\z/ ? '' : 'not '), |
2949 | "ok $test - postponed non-utf8 string in non-utf8 re doesn't match utf8\n"); | |
446eaa42 YST |
2950 | ++$test; |
2951 | ||
cb50f42d YST |
2952 | print(("$x$x" !~ /^$x(??{$y})\z/ ? '' : 'not '), |
2953 | "ok $test - postponed non-utf8 string in utf8 re doesn't match utf8\n"); | |
446eaa42 YST |
2954 | ++$test; |
2955 | ||
cb50f42d YST |
2956 | print(("$y$y" =~ /^$y(??{$y})\z/ ? '' : 'not '), |
2957 | "ok $test - postponed non-utf8 string in non-utf8 re matches non-utf8\n"); | |
2958 | ++$test; | |
2959 | ||
2960 | print(("$x$y" =~ /^$x(??{$y})\z/ ? '' : 'not '), | |
2961 | "ok $test - postponed non-utf8 string in utf8 re matches non-utf8\n"); | |
2962 | ++$test; | |
2963 | $y = $z; # reset $y after upgrade | |
2964 | ||
2965 | print(("$x$y" !~ /^$x(??{$x})\z/ ? '' : 'not '), | |
2966 | "ok $test - postponed utf8 string in utf8 re doesn't match non-utf8\n"); | |
2967 | ++$test; | |
2968 | $y = $z; # reset $y after upgrade | |
2969 | ||
2970 | print(("$y$y" !~ /^$y(??{$x})\z/ ? '' : 'not '), | |
2971 | "ok $test - postponed utf8 string in non-utf8 re doesn't match non-utf8\n"); | |
2972 | ++$test; | |
2973 | ||
2974 | } # no re 'eval' | |
2975 | ||
2d92f8a0 JH |
2976 | print "# more user-defined character properties\n"; |
2977 | ||
2978 | sub IsSyriac1 { | |
2979 | return <<'END'; | |
2980 | 0712 072C | |
2981 | 0730 074A | |
2982 | END | |
2983 | } | |
2984 | ||
711a919c TS |
2985 | ok("\x{0712}" =~ /\p{IsSyriac1}/, '\x{0712}, \p{IsSyriac1}'); |
2986 | ok("\x{072F}" =~ /\P{IsSyriac1}/, '\x{072F}, \P{IsSyriac1}'); | |
2d92f8a0 JH |
2987 | |
2988 | sub Syriac1 { | |
2989 | return <<'END'; | |
2990 | 0712 072C | |
2991 | 0730 074A | |
2992 | END | |
2993 | } | |
2994 | ||
711a919c TS |
2995 | ok("\x{0712}" =~ /\p{Syriac1}/, '\x{0712}, \p{Syriac1}'); |
2996 | ok("\x{072F}" =~ /\P{Syriac1}/, '\x{072F}, \p{Syriac1}'); | |
2d92f8a0 | 2997 | |
4a818d86 TS |
2998 | print "# user-defined character properties may lack \\n at the end\n"; |
2999 | sub InGreekSmall { return "03B1\t03C9" } | |
3000 | sub InGreekCapital { return "0391\t03A9\n-03A2" } | |
3001 | ||
3002 | ok("\x{03C0}" =~ /\p{InGreekSmall}/, "Small pi"); | |
3003 | ok("\x{03C2}" =~ /\p{InGreekSmall}/, "Final sigma"); | |
3004 | ok("\x{03A0}" =~ /\p{InGreekCapital}/, "Capital PI"); | |
3005 | ok("\x{03A2}" =~ /\P{InGreekCapital}/, "Reserved"); | |
3006 | ||
711a919c TS |
3007 | sub AsciiHexAndDash { |
3008 | return <<'END'; | |
3009 | +utf8::ASCII_Hex_Digit | |
3010 | +utf8::Dash | |
3011 | END | |
3012 | } | |
3013 | ||
3014 | ok("-" =~ /\p{Dash}/, "'-' is Dash"); | |
3015 | ok("A" =~ /\p{ASCII_Hex_Digit}/, "'A' is ASCII_Hex_Digit"); | |
3016 | ok("-" =~ /\p{AsciiHexAndDash}/, "'-' is AsciiHexAndDash"); | |
3017 | ok("A" =~ /\p{AsciiHexAndDash}/, "'A' is AsciiHexAndDash"); | |
3018 | ||
0e933229 | 3019 | { |
3a2263fe RGS |
3020 | print "# Change #18179\n"; |
3021 | # previously failed with "panic: end_shift | |
0e933229 IH |
3022 | my $s = "\x{100}" x 5; |
3023 | my $ok = $s =~ /(\x{100}{4})/; | |
3024 | my($ord, $len) = (ord $1, length $1); | |
3025 | print +($ok && $ord == 0x100 && $len == 4) | |
fc8cd66c | 3026 | ? "ok $test\n" : "not ok $test\t# [#18179] $ok/$ord/$len\n"; |
0e933229 IH |
3027 | ++$test; |
3028 | } | |
3029 | ||
f14c76ed RGS |
3030 | { |
3031 | print "# [perl #15763]\n"; | |
3032 | ||
3033 | $a = "x\x{100}"; | |
3034 | chop $a; # but leaves the UTF-8 flag | |
3035 | $a .= "y"; # 1 byte before "y" | |
3036 | ||
3037 | ok($a =~ /^\C/, 'match one \C on 1-byte UTF-8'); | |
3038 | ok($a =~ /^\C{1}/, 'match \C{1}'); | |
3039 | ||
3040 | ok($a =~ /^\Cy/, 'match \Cy'); | |
3041 | ok($a =~ /^\C{1}y/, 'match \C{1}y'); | |
3042 | ||
3043 | $a = "\x{100}y"; # 2 bytes before "y" | |
3044 | ||
3045 | ok($a =~ /^\C/, 'match one \C on 2-byte UTF-8'); | |
3046 | ok($a =~ /^\C{1}/, 'match \C{1}'); | |
3047 | ok($a =~ /^\C\C/, 'match two \C'); | |
3048 | ok($a =~ /^\C{2}/, 'match \C{2}'); | |
3049 | ||
3050 | ok($a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'); | |
3051 | ok($a =~ /^\C{3}/, 'match \C{3}'); | |
3052 | ||
3053 | ok($a =~ /^\C\Cy/, 'match two \C'); | |
3054 | ok($a =~ /^\C{2}y/, 'match \C{2}'); | |
3055 | ||
836995da RGS |
3056 | ok($a !~ /^\C\C\Cy/, q{don't match three \Cy}); |
3057 | ok($a !~ /^\C{2}\Cy/, q{don't match \C{3}y}); | |
f14c76ed RGS |
3058 | |
3059 | $a = "\x{1000}y"; # 3 bytes before "y" | |
3060 | ||
3061 | ok($a =~ /^\C/, 'match one \C on three-byte UTF-8'); | |
3062 | ok($a =~ /^\C{1}/, 'match \C{1}'); | |
3063 | ok($a =~ /^\C\C/, 'match two \C'); | |
3064 | ok($a =~ /^\C{2}/, 'match \C{2}'); | |
3065 | ok($a =~ /^\C\C\C/, 'match three \C'); | |
3066 | ok($a =~ /^\C{3}/, 'match \C{3}'); | |
3067 | ||
3068 | ok($a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'); | |
3069 | ok($a =~ /^\C{4}/, 'match \C{4}'); | |
3070 | ||
3071 | ok($a =~ /^\C\C\Cy/, 'match three \Cy'); | |
3072 | ok($a =~ /^\C{3}y/, 'match \C{3}y'); | |
3073 | ||
836995da RGS |
3074 | ok($a !~ /^\C\C\C\C\y/, q{don't match four \Cy}); |
3075 | ok($a !~ /^\C{4}y/, q{don't match \C{4}y}); | |
f14c76ed RGS |
3076 | } |
3077 | ||
88044e37 NC |
3078 | { |
3079 | local $\; | |
3080 | $_ = 'aaaaaaaaaa'; | |
3081 | utf8::upgrade($_); chop $_; $\="\n"; | |
3082 | ok(/[^\s]+/, "m/[^\s]/ utf8"); | |
3083 | ok(/[^\d]+/, "m/[^\d]/ utf8"); | |
3084 | ok(($a = $_, $_ =~ s/[^\s]+/./g), "s/[^\s]/ utf8"); | |
3085 | ok(($a = $_, $a =~ s/[^\d]+/./g), "s/[^\s]/ utf8"); | |
3086 | } | |
f14c76ed | 3087 | |
ce97f9d6 JH |
3088 | ok("\x{100}" =~ /\x{100}/, "[perl #15397]"); |
3089 | ok("\x{100}" =~ /(\x{100})/, "[perl #15397]"); | |
3090 | ok("\x{100}" =~ /(\x{100}){1}/, "[perl #15397]"); | |
3091 | ok("\x{100}\x{100}" =~ /(\x{100}){2}/, "[perl #15397]"); | |
3092 | ok("\x{100}\x{100}" =~ /(\x{100})(\x{100})/, "[perl #15397]"); | |
3093 | ||
fbeb8e69 JH |
3094 | $x = "CD"; |
3095 | $x =~ /(AB)*?CD/; | |
3096 | ok(!defined $1, "[perl #7471]"); | |
ce97f9d6 | 3097 | |
fbeb8e69 JH |
3098 | $x = "CD"; |
3099 | $x =~ /(AB)*CD/; | |
3100 | ok(!defined $1, "[perl #7471]"); | |
3101 | ||
ec391688 JH |
3102 | $pattern = "^(b+?|a){1,2}c"; |
3103 | ok("bac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); | |
3104 | ok("bbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); | |
3105 | ok("bbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); | |
3106 | ok("bbbbac" =~ /$pattern/ && $1 eq 'a', "[perl #3547]"); | |
3107 | ||
a30b2f1f RGS |
3108 | { |
3109 | # [perl #18232] | |
3110 | "\x{100}" =~ /(.)/; | |
3111 | ok( $1 eq "\x{100}", '$1 is utf-8 [perl #18232]' ); | |
3112 | { 'a' =~ /./; } | |
3113 | ok( $1 eq "\x{100}", '$1 is still utf-8' ); | |
3114 | ok( $1 ne "\xC4\x80", '$1 is not non-utf-8' ); | |
3115 | } | |
3116 | ||
f119b0fb JH |
3117 | { |
3118 | use utf8; | |
3119 | my $attr = 'Name-1' ; | |
3120 | ||
3121 | my $NormalChar = qr/[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; | |
3122 | my $NormalWord = qr/${NormalChar}+?/; | |
3123 | my $PredNameHyphen = qr/^${NormalWord}(\-${NormalWord})*?$/; | |
3124 | ||
3125 | $attr =~ /^$/; | |
3126 | ok( $attr =~ $PredNameHyphen, "[perl #19767] original test" ); | |
3127 | } | |
3128 | ||
3129 | { | |
3130 | use utf8; | |
3131 | "a" =~ m/[b]/; | |
3132 | ok ( "0" =~ /\p{N}+\z/, "[perl #19767] variant test" ); | |
3133 | } | |
3134 | ||
faf82a0b AE |
3135 | { |
3136 | ||
3137 | $p = 1; | |
3138 | foreach (1,2,3,4) { | |
3139 | $p++ if /(??{ $p })/ | |
3140 | } | |
3141 | ok ($p == 5, "[perl #20683] (??{ }) returns stale values"); | |
3142 | { package P; $a=1; sub TIESCALAR { bless[] } sub FETCH { $a++ } } | |
3143 | tie $p, P; | |
3144 | foreach (1,2,3,4) { | |
3145 | /(??{ $p })/ | |
3146 | } | |
3147 | ok ( $p == 5, "(??{ }) returns stale values"); | |
3148 | } | |
3149 | ||
351208f1 JH |
3150 | { |
3151 | # Subject: Odd regexp behavior | |
3152 | # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> | |
3153 | # Date: Wed, 26 Feb 2003 16:53:12 +0000 | |
3154 | # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> | |
3155 | # To: perl-unicode@perl.org | |
3156 | ||
3157 | $x = "\x{2019}\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
3158 | ok($x eq "\x{2019} k", "Markus Kuhn 2003-02-26"); | |
3159 | ||
3160 | $x = "b\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; | |
3161 | ok($x eq "b k", "Markus Kuhn 2003-02-26"); | |
3162 | ||
3163 | ok("\x{2019}" =~ /\S/, "Markus Kuhn 2003-02-26"); | |
3164 | } | |
3165 | ||
91e09a61 AE |
3166 | { |
3167 | my $i; | |
3168 | ok('-1-3-5-' eq join('', split /((??{$i++}))/, '-1-3-5-'), | |
080c2dec AE |
3169 | "[perl #21411] (??{ .. }) corrupts split's stack"); |
3170 | split /(?{'WOW'})/, 'abc'; | |
3171 | ok('a|b|c' eq join ('|', @_), | |
3172 | "[perl #21411] (?{ .. }) version of the above"); | |
3173 | } | |
3174 | ||
3175 | { | |
36daacbf DM |
3176 | # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it |
3177 | # hasn't been crashing. Disable this test until it is fixed properly. | |
3178 | # XXX also check what it returns rather than just doing ok(1,...) | |
3179 | # split /(?{ split "" })/, "abc"; | |
080c2dec | 3180 | ok(1,'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'); |
91e09a61 AE |
3181 | } |
3182 | ||
c517dc2b JH |
3183 | { |
3184 | ok("\x{100}\n" =~ /\x{100}\n$/, "UTF8 length cache and fbm_compile"); | |
3185 | } | |
ec391688 | 3186 | |
16e1b944 JH |
3187 | { |
3188 | package Str; | |
3189 | use overload q/""/ => sub { ${$_[0]}; }; | |
3190 | sub new { my ($c, $v) = @_; bless \$v, $c; } | |
3191 | ||
3192 | package main; | |
3193 | $_ = Str->new("a\x{100}/\x{100}b"); | |
3194 | ok(join(":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"); | |
3195 | } | |
3196 | ||
7ef91622 IH |
3197 | { |
3198 | $_ = "code: 'x' { '...' }\n"; study; | |
3199 | my @x; push @x, $& while m/'[^\']*'/gx; | |
3200 | ok(join(":", @x) eq "'x':'...'", | |
3201 | "[perl #17757] Parse::RecDescent triggers infinite loop"); | |
3202 | } | |
3203 | ||
14ebb1a2 JH |
3204 | { |
3205 | my $re = qq/^([^X]*)X/; | |
3206 | utf8::upgrade($re); | |
3207 | ok("\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"); | |
3208 | } | |
3209 | ||
f02c194e RGS |
3210 | # bug #22354 |
3211 | sub func ($) { | |
3212 | ok( "a\nb" !~ /^b/, $_[0] ); | |
3213 | ok( "a\nb" =~ /^b/m, "$_[0] - with /m" ); | |
3214 | } | |
3215 | func "standalone"; | |
3216 | $_ = "x"; s/x/func "in subst"/e; | |
3217 | $_ = "x"; s/x/func "in multiline subst"/em; | |
3218 | #$_ = "x"; /x(?{func "in regexp"})/; | |
3219 | #$_ = "x"; /x(?{func "in multiline regexp"})/m; | |
3220 | ||
c74340f9 | 3221 | # bug RT#19049 |
5aca4364 AG |
3222 | $_="abcdef\n"; |
3223 | @x = m/./g; | |
c74340f9 | 3224 | ok("abcde" eq "$`", 'RT#19049 - global match not setting $`'); |
5aca4364 | 3225 | |
6b43b216 DM |
3226 | ok("123\x{100}" =~ /^.*1.*23\x{100}$/, 'uft8 + multiple floating substr'); |
3227 | ||
89d0f8e1 | 3228 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON |
c1e0e3d2 NIS |
3229 | ok(" \x{101}" =~ qr/\x{100}/i, |
3230 | "<20030808193656.5109.1@llama.ni-s.u-net.com>"); | |
3231 | ||
89d0f8e1 | 3232 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW |
5dab1207 NIS |
3233 | ok(" \x{1E01}" =~ qr/\x{1E00}/i, |
3234 | "<20030808193656.5109.1@llama.ni-s.u-net.com>"); | |
3235 | ||
89d0f8e1 JH |
3236 | # DESERET SMALL/CAPITAL LETTER LONG I |
3237 | ok(" \x{10428}" =~ qr/\x{10400}/i, | |
3238 | "<20030808193656.5109.1@llama.ni-s.u-net.com>"); | |
3239 | ||
3240 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' | |
3241 | ok(" \x{1E01}x" =~ qr/\x{1E00}X/i, | |
3242 | "<20030808193656.5109.1@llama.ni-s.u-net.com>"); | |
3243 | ||
a7913593 JH |
3244 | { |
3245 | # [perl #23769] Unicode regex broken on simple example | |
3246 | # regrepeat() didn't handle UTF-8 EXACT case right. | |
3247 | ||
3248 | my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; | |
3249 | ||
3250 | ok($s =~ /\x{a0}/, "[perl #23769]"); | |
3251 | ok($s =~ /\x{a0}+/, "[perl #23769]"); | |
3252 | ok($s =~ /\x{a0}\x{a0}/, "[perl #23769]"); | |
090f7165 JH |
3253 | |
3254 | ok("aaa\x{100}" =~ /(a+)/, "[perl #23769] easy invariant"); | |
3255 | ok($1 eq "aaa", "[perl #23769]"); | |
3256 | ||
3257 | ok("\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/, "[perl #23769] regrepeat invariant"); | |
3258 | ok($1 eq "\xa0\xa0\xa0", "[perl #23769]"); | |
3259 | ||
3260 | ok("ababab\x{100} " =~ /((?:ab)+)/, "[perl #23769] hard invariant"); | |
3261 | ok($1 eq "ababab", "[perl #23769]"); | |
3262 | ||
3263 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/, "[perl #23769] hard variant"); | |
3264 | ok($1 eq "\xa0\xa1\xa0\xa1\xa0\xa1", "[perl #23769]"); | |
3265 | ||
3266 | ok("aaa\x{100} " =~ /(a+?)/, "[perl #23769] easy invariant"); | |
3267 | ok($1 eq "a", "[perl #23769]"); | |
3268 | ||
3269 | ok("\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/, "[perl #23769] regrepeat variant"); | |
3270 | ok($1 eq "\xa0", "[perl #23769]"); | |
3271 | ||
3272 | ok("ababab\x{100} " =~ /((?:ab)+?)/, "[perl #23769] hard invariant"); | |
3273 | ok($1 eq "ab", "[perl #23769]"); | |
3274 | ||
3275 | ok("\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/, "[perl #23769] hard variant"); | |
3276 | ok($1 eq "\xa0\xa1", "[perl #23769]"); | |
3277 | ||
3278 | ok("\xc4\xc4\xc4" !~ /(\x{100}+)/, "[perl #23769] don't match first byte of utf8 representation"); | |
3279 | ok("\xc4\xc4\xc4" !~ /(\x{100}+?)/, "[perl #23769] don't match first byte of utf8 representation"); | |
a7913593 JH |
3280 | } |
3281 | ||
4be7a33f HS |
3282 | for (120 .. 130) { |
3283 | my $head = 'x' x $_; | |
3284 | for my $tail ('\x{0061}', '\x{1234}') { | |
3285 | ok( | |
3286 | eval qq{ "$head$tail" =~ /$head$tail/ }, | |
3287 | '\x{...} misparsed in regexp near 127 char EXACT limit' | |
3288 | ); | |
3289 | } | |
3290 | } | |
3291 | ||
289555fe HS |
3292 | # perl #25269: panic: pp_match start/end pointers |
3293 | ok("a-bc" eq eval { | |
3294 | my($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; | |
3295 | "$x-$y"; | |
3296 | }, 'captures can move backwards in string'); | |
3297 | ||
b6d5fef8 RGS |
3298 | # perl #27940: \cA not recognized in character classes |
3299 | ok("a\cAb" =~ /\cA/, '\cA in pattern'); | |
3300 | ok("a\cAb" =~ /[\cA]/, '\cA in character class'); | |
3301 | ok("a\cAb" =~ /[\cA-\cB]/, '\cA in character class range'); | |
3302 | ok("abc" =~ /[^\cA-\cB]/, '\cA in negated character class range'); | |
3303 | ok("a\cBb" =~ /[\cA-\cC]/, '\cB in character class range'); | |
3304 | ok("a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range'); | |
3305 | ok("a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern'); | |
25f684f7 | 3306 | ok("ab" !~ /a\cIb/x, '\cI in pattern'); |
5dab1207 | 3307 | |
0e788c72 HS |
3308 | # perl #28532: optional zero-width match at end of string is ignored |
3309 | ok(("abc" =~ /^abc(\z)?/) && defined($1), | |
3310 | 'optional zero-width match at end of string'); | |
3311 | ok(("abc" =~ /^abc(\z)??/) && !defined($1), | |
3312 | 'optional zero-width match at end of string'); | |
3313 | ||
a3621e74 YO |
3314 | |
3315 | ||
3316 | { # TRIE related | |
3317 | my @got=(); | |
3318 | "words"=~/(word|word|word)(?{push @got,$1})s$/; | |
3319 | ok(@got==1,"TRIE optimation is working") or warn "# @got"; | |
3320 | @got=(); | |
3321 | "words"=~/(word|word|word)(?{push @got,$1})s$/i; | |
3322 | ok(@got==1,"TRIEF optimisation is working") or warn "# @got"; | |
3323 | ||
3324 | my @nums=map {int rand 1000} 1..100; | |
3325 | my $re="(".(join "|",@nums).")"; | |
3326 | $re=qr/\b$re\b/; | |
3327 | ||
3328 | foreach (@nums) { | |
3329 | ok($_=~/$re/,"Trie nums"); | |
3330 | } | |
3331 | $_=join " ", @nums; | |
3332 | @got=(); | |
3333 | push @got,$1 while /$re/g; | |
3334 | ||
3335 | my %count; | |
3336 | $count{$_}++ for @got; | |
3337 | my $ok=1; | |
3338 | for (@nums) { | |
3339 | $ok=0 if --$count{$_}<0; | |
3340 | } | |
3341 | ok($ok,"Trie min count matches"); | |
3342 | } | |
3343 | ||
3344 | ||
3345 | # TRIE related | |
3346 | # LATIN SMALL/CAPITAL LETTER A WITH MACRON | |
3347 | ok(("foba \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i) && $1 eq "\x{101}foo", | |
3348 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON"); | |
3349 | ||
3350 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW | |
3351 | ok(("foba \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i) && $1 eq "\x{1E01}foo", | |
3352 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW"); | |
3353 | ||
3354 | # DESERET SMALL/CAPITAL LETTER LONG I | |
3355 | ok(("foba \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i) && $1 eq "\x{10428}foo", | |
3356 | "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I"); | |
3357 | ||
3358 | # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' | |
3359 | ok(("foba \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i) && $1 eq "\x{1E01}xfoo", | |
3360 | "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'"); | |
3361 | ||
3362 | {# TRIE related | |
3363 | ||
3364 | use charnames ':full'; | |
3365 | ||
3366 | $s="\N{LATIN SMALL LETTER SHARP S}"; | |
3367 | ok(("foba ba$s" =~ qr/(foo|Ba$s|bar)/i) | |
3368 | && $1 eq "ba$s", | |
3369 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); | |
3370 | ok(("foba ba$s" =~ qr/(Ba$s|foo|bar)/i) | |
3371 | && $1 eq "ba$s", | |
3372 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); | |
3373 | ok(("foba ba$s" =~ qr/(foo|bar|Ba$s)/i) | |
3374 | && $1 eq "ba$s", | |
3375 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); | |
3376 | ||
3377 | ok(("foba ba$s" =~ qr/(foo|Bass|bar)/i) | |
3378 | && $1 eq "ba$s", | |
3379 | "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"); | |
3380 | ||
3381 | ok(("foba ba$s" =~ qr/(foo|BaSS|bar)/i) | |
3382 | && $1 eq "ba$s", | |
3383 | "TRIEF + LATIN SMALL LETTER SHARP S =~ SS"); | |
786e8c11 YO |
3384 | |
3385 | ok(("foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i) | |
3386 | && $1 eq "ba${s}pxySS$s$s", | |
3387 | "COMMON PREFIX TRIEF + LATIN SMALL LETTER SHARP S"); | |
3388 | ||
3389 | ||
a3621e74 YO |
3390 | } |
3391 | ||
3392 | ||
e1d1eefb | 3393 | print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n"; |
fc8cd66c | 3394 | if (!$ENV{PERL_SKIP_PSYCHO_TEST}){ |
a3621e74 YO |
3395 | my @normal=qw(these are some normal words); |
3396 | my $psycho=join "|",@normal,map chr $_,255..20000; | |
3397 | ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho'); | |
fc8cd66c YO |
3398 | } else { |
3399 | ok(1,'Skipped Psycho'); | |
a3621e74 | 3400 | } |
809e8e66 DM |
3401 | |
3402 | # [perl #36207] mixed utf8 / latin-1 and case folding | |
3403 | ||
3404 | { | |
108bb1ad YO |
3405 | my $utf8 = "\xe9\x{100}"; chop $utf8; |
3406 | my $latin1 = "\xe9"; | |
3407 | ||
3408 | ok($utf8 =~ /\xe9/i, "utf8/latin"); | |
3409 | ok($utf8 =~ /$latin1/i, "utf8/latin runtime"); | |
3410 | ok($utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"); | |
3411 | ok($utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"); | |
3412 | ||
a0a388a1 | 3413 | ok("\xe9" =~ /$utf8/i, "# latin/utf8"); |
108bb1ad | 3414 | ok("\xe9" =~ /(abc|$utf8)/i, "# latin/utf8 trie"); |
a0a388a1 | 3415 | ok($latin1 =~ /$utf8/i, "# latin/utf8 runtime"); |
108bb1ad | 3416 | ok($latin1 =~ /(abc|$utf8)/i, "# latin/utf8 trie runtime"); |
809e8e66 DM |
3417 | } |
3418 | ||
740266bf NC |
3419 | # [perl #37038] Global regular matches generate invalid pointers |
3420 | ||
3421 | { | |
3422 | my $s = "abcd"; | |
3423 | $s =~ /(..)(..)/g; | |
3424 | $s = $1; | |
3425 | $s = $2; | |
3426 | ok($s eq 'cd', | |
4f5010f2 | 3427 | "# assigning to original string should not corrupt match vars"); |
740266bf NC |
3428 | } |
3429 | ||
0523e772 NC |
3430 | { |
3431 | package wooosh; | |
3432 | sub gloople { | |
3433 | "!"; | |
3434 | } | |
3435 | package main; | |
3436 | ||
3437 | my $aeek = bless {}, 'wooosh'; | |
3438 | eval {$aeek->gloople() =~ /(.)/g;}; | |
547ba77e | 3439 | ok($@ eq "", "//g match against return value of sub") or print "# $@\n"; |
0523e772 | 3440 | } |
740266bf | 3441 | |
86f12da2 NC |
3442 | { |
3443 | sub gloople { | |
3444 | "!"; | |
3445 | } | |
3446 | eval {gloople() =~ /(.)/g;}; | |
3447 | ok($@ eq "", "# 26410 didn't affect sub calls for some reason") | |
3448 | or print "# $@\n"; | |
3449 | } | |
3450 | ||
3451 | { | |
03949bff | 3452 | local $TODO = "See changes 26925-26928, which reverted change 26410"; |
86f12da2 NC |
3453 | package lv; |
3454 | $var = "abc"; | |
3455 | sub variable : lvalue { $var } | |
3456 | ||
3457 | package main; | |
3458 | my $o = bless [], "lv"; | |
3459 | my $f = ""; | |
3460 | eval { for (1..2) { $f .= $1 if $o->variable =~ /(.)/g } }; | |
03949bff | 3461 | ok($f eq "ab", "pos retained between calls") or print "# $@\n"; |
86f12da2 NC |
3462 | } |
3463 | ||
3464 | { | |
03949bff | 3465 | local $TODO = "See changes 26925-26928, which reverted change 26410"; |
86f12da2 NC |
3466 | $var = "abc"; |
3467 | sub variable : lvalue { $var } | |
3468 | ||
3469 | my $f = ""; | |
3470 | eval { for (1..2) { $f .= $1 if variable() =~ /(.)/g } }; | |
03949bff | 3471 | ok($f eq "ab", "pos retained between calls") or print "# $@\n"; |
86f12da2 NC |
3472 | } |
3473 | ||
9f7f3913 TS |
3474 | # [perl #37836] Simple Regex causes SEGV when run on specific data |
3475 | if ($ordA == 193) { | |
3476 | print "ok $test # Skip: in EBCDIC\n"; $test++; | |
3477 | } else { | |
3478 | no warnings 'utf8'; | |
3479 | $_ = pack('U0C2', 0xa2, 0xf8); # ill-formed UTF-8 | |
3480 | my $ret = 0; | |
3481 | eval { $ret = s/[\0]+//g }; | |
3482 | ok($ret == 0, "ill-formed UTF-8 doesn't match NUL in class"); | |
3483 | } | |
3484 | ||
3485 | { # [perl #38293] chr(65535) should be allowed in regexes | |
3486 | no warnings 'utf8'; # to allow non-characters | |
3487 | my($c, $r, $s); | |
3488 | ||
3489 | $c = chr 0xffff; | |
3490 | $c =~ s/$c//g; | |
3491 | ok($c eq "", "U+FFFF, parsed as atom"); | |
3492 | ||
3493 | $c = chr 0xffff; | |
3494 | $r = "\\$c"; | |
3495 | $c =~ s/$r//g; | |
3496 | ok($c eq "", "U+FFFF backslashed, parsed as atom"); | |
3497 | ||
3498 | $c = chr 0xffff; | |
3499 | $c =~ s/[$c]//g; | |
3500 | ok($c eq "", "U+FFFF, parsed in class"); | |
3501 | ||
3502 | $c = chr 0xffff; | |
3503 | $r = "[\\$c]"; | |
3504 | $c =~ s/$r//g; | |
3505 | ok($c eq "", "U+FFFF backslashed, parsed in class"); | |
3506 | ||
3507 | $s = "A\x{ffff}B"; | |
3508 | $s =~ s/\x{ffff}//i; | |
3509 | ok($s eq "AB", "U+FFFF, EXACTF"); | |
3510 | ||
3511 | $s = "\x{ffff}A"; | |
3512 | $s =~ s/\bA//; | |
3513 | ok($s eq "\x{ffff}", "U+FFFF, BOUND"); | |
3514 | ||
3515 | $s = "\x{ffff}!"; | |
3516 | $s =~ s/\B!//; | |
3517 | ok($s eq "\x{ffff}", "U+FFFF, NBOUND"); | |
3518 | } # non-characters end | |
3519 | ||
a30987ac YO |
3520 | { |
3521 | # https://rt.perl.org/rt3/Ticket/Display.html?id=39583 | |
3522 | ||
3523 | # The printing characters | |
3524 | my @chars = ("A".."Z"); | |
3525 | my $delim = ","; | |
3526 | my $size = 32771 - 4; | |
fc8cd66c | 3527 | my $str = ''; |
a30987ac YO |
3528 | |
3529 | # create some random junk. Inefficient, but it works. | |
3530 | for ($i = 0 ; $i < $size ; $i++) { | |
fc8cd66c | 3531 | $str .= $chars[int(rand(@chars))]; |
a30987ac YO |
3532 | } |
3533 | ||
fc8cd66c | 3534 | $str .= ($delim x 4); |
a30987ac YO |
3535 | my $res; |
3536 | my $matched; | |
fc8cd66c | 3537 | if ($str =~ s/^(.*?)${delim}{4}//s) { |
a30987ac YO |
3538 | $res = $1; |
3539 | $matched=1; | |
3540 | } | |
3541 | ok($matched,'pattern matches'); | |
fc8cd66c | 3542 | ok(length($str)==0,"Empty string"); |
a30987ac YO |
3543 | ok(defined($res) && length($res)==$size,"\$1 is correct size"); |
3544 | } | |
9f7f3913 | 3545 | |
1749ea0d TS |
3546 | { # related to [perl #27940] |
3547 | ok("\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'); | |
3548 | ok("\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'); | |
3549 | ok("X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'); | |
3550 | ok("X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'); | |
3551 | ||
3552 | ok("X\0A" =~ /X\c@?A/, '\c@?'); | |
3553 | ok("X\0A" =~ /X\c@*A/, '\c@*'); | |
3554 | ok("X\0A" =~ /X\c@(A)/, '\c@('); | |
3555 | ok("X\0A" =~ /X(\c@)A/, '\c@)'); | |
3556 | ok("X\0A" =~ /X\c@|ZA/, '\c@|'); | |
3557 | ||
3558 | ok("X\@A" =~ /X@?A/, '@?'); | |
3559 | ok("X\@A" =~ /X@*A/, '@*'); | |
3560 | ok("X\@A" =~ /X@(A)/, '@('); | |
3561 | ok("X\@A" =~ /X(@)A/, '@)'); | |
3562 | ok("X\@A" =~ /X@|ZA/, '@|'); | |
3563 | ||
3564 | local $" = ','; # non-whitespace and non-RE-specific | |
3565 | ok('abc' =~ /(.)(.)(.)/, 'the last successful match is bogus'); | |
3566 | ok("A@+B" =~ /A@{+}B/, 'interpolation of @+ in /@{+}/'); | |
3567 | ok("A@-B" =~ /A@{-}B/, 'interpolation of @- in /@{-}/'); | |
3568 | ok("A@+B" =~ /A@{+}B/x, 'interpolation of @+ in /@{+}/x'); | |
3569 | ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x'); | |
3570 | } | |
3571 | ||
fc8cd66c YO |
3572 | { |
3573 | use lib 'lib'; | |
3574 | use Cname; | |
3575 | ||
3576 | ok('fooB'=~/\N{foo}[\N{B}\N{b}]/,"Passthrough charname"); | |
3577 | $test=1233; my $handle=make_must_warn('Ignoring excess chars from'); | |
3578 | $handle->('q(xxWxx) =~ /[\N{WARN}]/'); | |
3579 | { | |
3580 | my $code; | |
3581 | my $w=""; | |
3582 | local $SIG{__WARN__} = sub { $w.=shift }; | |
3583 | eval($code=<<'EOFTEST') or die "$@\n$code\n"; | |
3584 | { | |
3585 | use warnings; | |
3586 | ||
3587 | #1234 | |
3588 | ok("\0" !~ /[\N{EMPTY-STR}XY]/, | |
3589 | "Zerolength charname in charclass doesnt match \0"); | |
3590 | 1; | |
3591 | } | |
3592 | EOFTEST | |
3593 | ok($w=~/Ignoring zero length/, | |
3594 | "Got expected zero length warning"); | |
3595 | warn $code; | |
3596 | ||
3597 | } | |
3598 | $handle= make_must_warn('Ignoring zero length'); | |
3599 | $handle->('qq(\\0) =~ /[\N{EMPTY-STR}XY]/'); | |
3600 | ok('AB'=~/(\N{EVIL})/ && $1 eq 'A',"Charname caching $1"); | |
3601 | ok('ABC'=~/(\N{EVIL})/,"Charname caching $1"); | |
3602 | ok('xy'=~/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node'); | |
3603 | ok(''=~/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node 2'); | |
3604 | ||
3605 | } | |
3606 | { | |
3607 | print "# MORE LATIN SMALL LETTER SHARP S\n"; | |
3608 | ||
3609 | use charnames ':full'; | |
3610 | ||
3611 | #see also test #835 | |
3612 | ok("ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, | |
3613 | "unoptimized named sequence in class 1"); | |
3614 | ok("SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, | |
3615 | "unoptimized named sequence in class 2"); | |
3616 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/, | |
3617 | "unoptimized named sequence in class 3"); | |
3618 | ok("\N{LATIN SMALL LETTER SHARP S}" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i, | |
3619 | "unoptimized named sequence in class 4"); | |
3620 | ||
3621 | ok('aabc' !~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against aabc'); | |
3622 | ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc'); | |
3623 | ok('a+bc' =~ /a\N{PLUS SIGN}b/,'/a\N{PLUS SIGN}b/ against a+bc'); | |
3624 | ||
3625 | ok(' A B'=~/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
3626 | 'Intermixed named and unicode escapes 1'); | |
3627 | ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}"=~ | |
3628 | /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, | |
3629 | 'Intermixed named and unicode escapes 2'); | |
3630 | ok("\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042} 3"=~ | |
3631 | /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, | |
3632 | 'Intermixed named and unicode escapes'); | |
3633 | } | |
faec1544 DM |
3634 | $brackets = qr{ |
3635 | { (?> [^{}]+ | (??{ $brackets }) )* } | |
3636 | }x; | |
3637 | ok("{b{c}d" !~ m/^((??{ $brackets }))/, "bracket mismatch"); | |
3638 | ||
6bda09f9 YO |
3639 | SKIP:{ |
3640 | our @stack=(); | |
3641 | my @expect=qw( | |
3642 | stuff1 | |
3643 | stuff2 | |
3644 | <stuff1>and<stuff2> | |
3645 | right | |
3646 | <right> | |
3647 | <<right>> | |
3648 | <<<right>>> | |
3649 | <<stuff1>and<stuff2>><<<<right>>>> | |
3650 | ); | |
3651 | ||
3652 | local $_='<<<stuff1>and<stuff2>><<<<right>>>>>'; | |
3653 | ok(/^(<((?:(?>[^<>]+)|(?1))*)>(?{push @stack, $2 }))$/, | |
3654 | "Recursion should match"); | |
3655 | ok(@stack==@expect) | |
3656 | or skip("Won't test individual results as count isn't equal", | |
3657 | 0+@expect); | |
3658 | foreach my $idx (@expect) { | |
3659 | ok($expect[$idx] eq $stack[$idx], | |
3660 | "Expecting '$expect' at stack pos #$idx"); | |
3661 | } | |
3662 | ||
3663 | } | |
81714fb9 YO |
3664 | { |
3665 | my $s='123453456'; | |
3666 | $s=~s/(?<digits>\d+)\k<digits>/$+{digits}/; | |
3667 | ok($s eq '123456','Named capture (angle brackets) s///'); | |
3668 | $s='123453456'; | |
3669 | $s=~s/(?'digits'\d+)\k'digits'/$+{digits}/; | |
3670 | ok($s eq '123456','Named capture (single quotes) s///'); | |
3671 | } | |
1f1031fe YO |
3672 | |
3673 | { | |
1b0dc72f TS |
3674 | my @ary = ( |
3675 | pack('U', 0x00F1), # n-tilde | |
3676 | '_'.pack('U', 0x00F1), # _ + n-tilde | |
3677 | 'c'.pack('U', 0x0327), # c + cedilla | |
3678 | pack('U*', 0x00F1, 0x0327), # n-tilde + cedilla | |
3679 | 'a'.pack('U', 0x00B2), # a + superscript two | |
3680 | pack('U', 0x0391), # ALPHA | |
3681 | pack('U', 0x0391).'2', # ALPHA + 2 | |
3682 | pack('U', 0x0391).'_', # ALPHA + _ | |
3683 | ); | |
3684 | for my $uni (@ary) { | |
3685 | my ($r1, $c1, $r2, $c2) = eval qq{ | |
3686 | use utf8; | |
3687 | scalar("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/), | |
3688 | \$+{${uni}}, | |
3689 | scalar("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/), | |
3690 | \$+{${uni}}; | |
3691 | }; | |
3692 | ok($r1, "Named capture UTF (?'')"); | |
3693 | ok(defined $c1 && $c1 eq 'foo', "Named capture UTF \%+"); | |
3694 | ok($r2, "Named capture UTF (?<>)"); | |
3695 | ok(defined $c2 && $c2 eq 'bar', "Named capture UTF \%+"); | |
1f1031fe YO |
3696 | } |
3697 | } | |
3698 | ||
e62cc96a YO |
3699 | sub iseq($$;$) { |
3700 | my ( $got, $expect, $name)=@_; | |
03949bff | 3701 | my $todo = $TODO ? " # TODO $TODO" : ''; |
e62cc96a YO |
3702 | |
3703 | $_=defined($_) ? "'$_'" : "undef" | |
3704 | for $got, $expect; | |
3705 | ||
3706 | my $ok= $got eq $expect; | |
3707 | ||
03949bff | 3708 | printf "%sok %d - %s$todo\n", ($ok ? "" : "not "), $test, |
de734bd5 | 3709 | ($name||$Message)."\tLine ".((caller)[2]); |
e62cc96a YO |
3710 | |
3711 | printf "# Failed test at line %d\n". | |
3712 | "# expected: %s\n". | |
3713 | "# result: %s\n", | |
3714 | (caller)[2], $expect, $got | |
3715 | unless $ok; | |
3716 | ||
3717 | $test++; | |
3718 | return $ok; | |
3719 | } | |
81714fb9 YO |
3720 | { |
3721 | my $s='foo bar baz'; | |
e62cc96a YO |
3722 | my (@k,@v,@fetch,$res); |
3723 | my $count= 0; | |
3724 | my @names=qw($+{A} $+{B} $+{C}); | |
81714fb9 YO |
3725 | if ($s=~/(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { |
3726 | while (my ($k,$v)=each(%+)) { | |
3727 | $count++; | |
3728 | } | |
3729 | @k=sort keys(%+); | |
3730 | @v=sort values(%+); | |
e62cc96a YO |
3731 | $res=1; |
3732 | push @fetch, | |
3733 | [ "$+{A}", "$1" ], | |
3734 | [ "$+{B}", "$2" ], | |
3735 | [ "$+{C}", "$3" ], | |
3736 | ; | |
3737 | } | |
3738 | foreach (0..2) { | |
3739 | if ($fetch[$_]) { | |
3740 | iseq($fetch[$_][0],$fetch[$_][1],$names[$_]); | |
3741 | } else { | |
3742 | ok(0, $names[$_]); | |
3743 | } | |
81714fb9 | 3744 | } |
e62cc96a YO |
3745 | iseq($res,1,"$s~=/(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/"); |
3746 | iseq($count,3,"Got 3 keys in %+ via each"); | |
3747 | iseq(0+@k, 3, 'Got 3 keys in %+ via keys'); | |
3748 | iseq("@k","A B C", "Got expected keys"); | |
3749 | iseq("@v","bar baz foo", "Got expected values"); | |
0a4db386 YO |
3750 | eval' |
3751 | print for $+{this_key_doesnt_exist}; | |
3752 | '; | |
3753 | ok(!$@,'lvalue $+{...} should not throw an exception'); | |
81714fb9 | 3754 | } |
44a2ac75 YO |
3755 | { |
3756 | my $s='foo bar baz'; | |
3757 | my @res; | |
3758 | if ('1234'=~/(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { | |
3759 | foreach my $name (sort keys(%-)) { | |
3760 | my $ary = $-{$name}; | |
3761 | foreach my $idx (0..$#$ary) { | |
3762 | push @res,"$name:$idx:$ary->[$idx]"; | |
3763 | } | |
3764 | } | |
3765 | } | |
3766 | my @expect=qw(A:0:1 A:1:3 B:0:2 B:1:4); | |
3767 | iseq("@res","@expect","Check %-"); | |
3768 | eval' | |
3769 | print for $-{this_key_doesnt_exist}; | |
3770 | '; | |
3771 | ok(!$@,'lvalue $-{...} should not throw an exception'); | |
3772 | } | |
bfac009d DM |
3773 | # stress test CURLYX/WHILEM. |
3774 | # | |
3775 | # This test includes varying levels of nesting, and according to | |
3776 | # profiling done against build 28905, exercises every code line in the | |
3777 | # CURLYX and WHILEM blocks, except those related to LONGJMP, the | |
3778 | # super-linear cache and warnings. It executes about 0.5M regexes | |
3779 | ||
24b23f37 YO |
3780 | if ($ENV{PERL_SKIP_PSYCHO_TEST}){ |
3781 | printf "ok %d Skip: No psycho tests\n", $test++; | |
3782 | } else { | |
e1d1eefb | 3783 | print "# set PERL_SKIP_PSYCHO_TEST to skip this test\n"; |
bfac009d DM |
3784 | my $r = qr/^ |
3785 | (?: | |
3786 | ( (?:a|z+)+ ) | |
3787 | (?: | |
3788 | ( (?:b|z+){3,}? ) | |
3789 | ( | |
3790 | (?: | |
c476f425 DM |
3791 | (?: |
3792 | (?:c|z+){1,1}?z | |
3793 | )? | |
bfac009d DM |
3794 | (?:c|z+){1,1} |
3795 | )* | |
3796 | ) | |
3797 | (?:z*){2,} | |
3798 | ( (?:z+|d)+ ) | |
3799 | (?: | |
3800 | ( (?:e|z+)+ ) | |
3801 | )* | |
3802 | ( (?:f|z+)+ ) | |
3803 | )* | |
3804 | ( (?:z+|g)+ ) | |
3805 | (?: | |
3806 | ( (?:h|z+)+ ) | |
3807 | )* | |
3808 | ( (?:i|z+)+ ) | |
3809 | )+ | |
3810 | ( (?:j|z+)+ ) | |
3811 | (?: | |
3812 | ( (?:k|z+)+ ) | |
3813 | )* | |
3814 | ( (?:l|z+)+ ) | |
3815 | $/x; | |
3816 | ||
3817 | ||
3818 | my $ok = 1; | |
3819 | my $msg = "CURLYX stress test"; | |
3820 | OUTER: | |
3821 | for my $a ("x","a","aa") { | |
3822 | for my $b ("x","bbb","bbbb") { | |
3823 | my $bs = $a.$b; | |
3824 | for my $c ("x","c","cc") { | |
3825 | my $cs = $bs.$c; | |
3826 | for my $d ("x","d","dd") { | |
3827 | my $ds = $cs.$d; | |
3828 | for my $e ("x","e","ee") { | |
3829 | my $es = $ds.$e; | |
3830 | for my $f ("x","f","ff") { | |
3831 | my $fs = $es.$f; | |
3832 | for my $g ("x","g","gg") { | |
3833 | my $gs = $fs.$g; | |
3834 | for my $h ("x","h","hh") { | |
3835 | my $hs = $gs.$h; | |
3836 | for my $i ("x","i","ii") { | |
3837 | my $is = $hs.$i; | |
3838 | for my $j ("x","j","jj") { | |
3839 | my $js = $is.$j; | |
3840 | for my $k ("x","k","kk") { | |
3841 | my $ks = $js.$k; | |
3842 | for my $l ("x","l","ll") { | |
3843 | my $ls = $ks.$l; | |
3844 | if ($ls =~ $r) { | |
3845 | if ($ls =~ /x/) { | |
3846 | $msg .= ": unexpected match for [$ls]"; | |
3847 | $ok = 0; | |
3848 | last OUTER; | |
3849 | } | |
3850 | my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12"; | |
3851 | unless ($ls eq $cap) { | |
3852 | $msg .= ": capture: [$ls], got [$cap]"; | |
3853 | $ok = 0; | |
3854 | last OUTER; | |
3855 | } | |
3856 | } | |
3857 | else { | |
3858 | unless ($ls =~ /x/) { | |
3859 | $msg = ": failed for [$ls]"; | |
3860 | $ok = 0; | |
3861 | last OUTER; | |
3862 | } | |
3863 | } | |
3864 | } | |
3865 | } | |
3866 | } | |
3867 | } | |
3868 | } | |
3869 | } | |
3870 | } | |
3871 | } | |
3872 | } | |
3873 | } | |
3874 | } | |
3875 | } | |
3876 | ok($ok, $msg); | |
3877 | } | |
3878 | ||
cc74c5bd TS |
3879 | # \, breaks {3,4} |
3880 | ok("xaaay" !~ /xa{3\,4}y/, "\, in a pattern"); | |
3881 | ok("xa{3,4}y" =~ /xa{3\,4}y/, "\, in a pattern"); | |
3882 | ||
3883 | # \c\ followed by _ | |
3884 | ok("x\c_y" !~ /x\c\_y/, "\_ in a pattern"); | |
3885 | ok("x\c\_y" =~ /x\c\_y/, "\_ in a pattern"); | |
3886 | ||
3887 | # \c\ followed by other characters | |
3888 | for my $c ("z", "\0", "!", chr(254), chr(256)) { | |
3889 | my $targ = "a\034$c"; | |
3890 | my $reg = "a\\c\\$c"; | |
3891 | ok(eval("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern"); | |
3892 | } | |
bfac009d | 3893 | |
581d1b5b RGS |
3894 | { |
3895 | my $str='abc'; | |
3896 | my $count=0; | |
3897 | my $mval=0; | |
3898 | my $pval=0; | |
3899 | while ($str=~/b/g) { $mval=$#-; $pval=$#+; $count++ } | |
3900 | iseq($mval,0,"\@- should be empty [RT#36046]"); | |
3901 | iseq($pval,0,"\@+ should be empty [RT#36046]"); | |
3902 | iseq($count,1,"should have matched once only [RT#36046]"); | |
3903 | } | |
3904 | ||
5d458dd8 | 3905 | { # Test the (*PRUNE) pattern |
24b23f37 | 3906 | our $count = 0; |
e2e6a0f1 | 3907 | 'aaab'=~/a+b?(?{$count++})(*FAIL)/; |
5d458dd8 | 3908 | iseq($count,9,"expect 9 for no (*PRUNE)"); |
24b23f37 | 3909 | $count = 0; |
5d458dd8 YO |
3910 | 'aaab'=~/a+b?(*PRUNE)(?{$count++})(*FAIL)/; |
3911 | iseq($count,3,"expect 3 with (*PRUNE)"); | |
24b23f37 YO |
3912 | local $_='aaab'; |
3913 | $count=0; | |
5d458dd8 YO |
3914 | 1 while /.(*PRUNE)(?{$count++})(*FAIL)/g; |
3915 | iseq($count,4,"/.(*PRUNE)/"); | |
24b23f37 | 3916 | $count = 0; |
5d458dd8 YO |
3917 | 'aaab'=~/a+b?(??{'(*PRUNE)'})(?{$count++})(*FAIL)/; |
3918 | iseq($count,3,"expect 3 with (*PRUNE)"); | |
24b23f37 YO |
3919 | local $_='aaab'; |
3920 | $count=0; | |
5d458dd8 YO |
3921 | 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g; |
3922 | iseq($count,4,"/.(*PRUNE)/"); | |
24b23f37 | 3923 | } |
5d458dd8 | 3924 | { # Test the (*SKIP) pattern |
24b23f37 | 3925 | our $count = 0; |
5d458dd8 YO |
3926 | 'aaab'=~/a+b?(*SKIP)(?{$count++})(*FAIL)/; |
3927 | iseq($count,1,"expect 1 with (*SKIP)"); | |
24b23f37 YO |
3928 | local $_='aaab'; |
3929 | $count=0; | |
5d458dd8 YO |
3930 | 1 while /.(*SKIP)(?{$count++})(*FAIL)/g; |
3931 | iseq($count,4,"/.(*SKIP)/"); | |
24b23f37 YO |
3932 | $_='aaabaaab'; |
3933 | $count=0; | |
3934 | our @res=(); | |
5d458dd8 YO |
3935 | 1 while /(a+b?)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; |
3936 | iseq($count,2,"Expect 2 with (*SKIP)" ); | |
3937 | iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" ); | |
24b23f37 | 3938 | } |
5d458dd8 | 3939 | { # Test the (*SKIP) pattern |
24b23f37 | 3940 | our $count = 0; |
5d458dd8 YO |
3941 | 'aaab'=~/a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/; |
3942 | iseq($count,1,"expect 1 with (*SKIP)"); | |
24b23f37 YO |
3943 | local $_='aaab'; |
3944 | $count=0; | |
5d458dd8 YO |
3945 | 1 while /.(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/g; |
3946 | iseq($count,4,"/.(*SKIP)/"); | |
24b23f37 YO |
3947 | $_='aaabaaab'; |
3948 | $count=0; | |
3949 | our @res=(); | |
5d458dd8 YO |
3950 | 1 while /(a+b?)(*MARK:foo)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; |
3951 | iseq($count,2,"Expect 2 with (*SKIP)" ); | |
3952 | iseq("@res","aaab aaab","adjacent (*SKIP) works as expected" ); | |
e2e6a0f1 | 3953 | } |
5d458dd8 | 3954 | { # Test the (*SKIP) pattern |
e2e6a0f1 | 3955 | our $count = 0; |
5d458dd8 YO |
3956 | 'aaab'=~/a*(*MARK:a)b?(*MARK:b)(*SKIP:a)(?{$count++})(*FAIL)/; |
3957 | iseq($count,3,"expect 3 with *MARK:a)b?(*MARK:b)(*SKIP:a)"); | |
e2e6a0f1 YO |
3958 | local $_='aaabaaab'; |
3959 | $count=0; | |
3960 | our @res=(); | |
5d458dd8 YO |
3961 | 1 while /(a*(*MARK:a)b?)(*MARK:x)(*SKIP:a)(?{$count++; push @res,$1})(*FAIL)/g; |
3962 | iseq($count,5,"Expect 5 with (*MARK:a)b?)(*MARK:x)(*SKIP:a)" ); | |
3963 | iseq("@res","aaab b aaab b ","adjacent (*MARK:a)b?)(*MARK:x)(*SKIP:a) works as expected" ); | |
e2e6a0f1 YO |
3964 | } |
3965 | { # Test the (*COMMIT) pattern | |
3966 | our $count = 0; | |
3967 | 'aaabaaab'=~/a+b?(*COMMIT)(?{$count++})(*FAIL)/; | |
3968 | iseq($count,1,"expect 1 with (*COMMIT)"); | |
3969 | local $_='aaab'; | |
3970 | $count=0; | |
3971 | 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g; | |
3972 | iseq($count,1,"/.(*COMMIT)/"); | |
3973 | $_='aaabaaab'; | |
3974 | $count=0; | |
3975 | our @res=(); | |
3976 | 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g; | |
3977 | iseq($count,1,"Expect 1 with (*COMMIT)" ); | |
3978 | iseq("@res","aaab","adjacent (*COMMIT) works as expected" ); | |
3979 | } | |
3980 | { | |
3981 | # Test named commits and the $REGERROR var | |
3982 | our $REGERROR; | |
3983 | for my $name ('',':foo') | |
3984 | { | |
5d458dd8 YO |
3985 | for my $pat ("(*PRUNE$name)", |
3986 | ($name? "(*MARK$name)" : "") | |
3987 | . "(*SKIP$name)", | |
3988 | "(*COMMIT$name)") | |
e2e6a0f1 YO |
3989 | { |
3990 | for my $suffix ('(*FAIL)','') | |
3991 | { | |
3992 | 'aaaab'=~/a+b$pat$suffix/; | |
3993 | iseq( | |
3994 | $REGERROR, | |
3995 | ($suffix ? ($name ? 'foo' : "1") : ""), | |
3996 | "Test $pat and \$REGERROR $suffix" | |
3997 | ); | |
3998 | } | |
3999 | } | |
4000 | } | |
4001 | } | |
4002 | { | |
4003 | # Test named commits and the $REGERROR var | |
4004 | package Fnorble; | |
4005 | our $REGERROR; | |
4006 | for my $name ('',':foo') | |
4007 | { | |
5d458dd8 YO |
4008 | for my $pat ("(*PRUNE$name)", |
4009 | ($name? "(*MARK$name)" : "") | |
4010 | . "(*SKIP$name)", | |
4011 | "(*COMMIT$name)") | |
e2e6a0f1 YO |
4012 | { |
4013 | for my $suffix ('(*FAIL)','') | |
4014 | { | |
4015 | 'aaaab'=~/a+b$pat$suffix/; | |
4016 | ::iseq( | |
4017 | $REGERROR, | |
4018 | ($suffix ? ($name ? 'foo' : "1") : ""), | |
4019 | "Test $pat and \$REGERROR $suffix" | |
4020 | ); | |
4021 | } | |
4022 | } | |
4023 | } | |
4024 | } | |
4025 | { | |
4026 | # Test named commits and the $REGERROR var | |
f0852a51 | 4027 | local $Message = "\$REGERROR"; |
e2e6a0f1 YO |
4028 | our $REGERROR; |
4029 | for $word (qw(bar baz bop)) { | |
4030 | $REGERROR=""; | |
4031 | "aaaaa$word"=~/a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/; | |
4032 | iseq($REGERROR,$word); | |
4033 | } | |
4034 | } | |
4035 | { #Regression test for perlbug 40684 | |
f0852a51 | 4036 | local $Message = "RT#40684 tests:"; |
e2e6a0f1 YO |
4037 | my $s = "abc\ndef"; |
4038 | my $rex = qr'^abc$'m; | |
4039 | ok($s =~ m/$rex/); | |
4040 | ok($s =~ m/^abc$/m); | |
24b23f37 | 4041 | } |
5d458dd8 YO |
4042 | { |
4043 | #Mindnumbingly simple test of (*THEN) | |
4044 | for ("ABC","BAX") { | |
4045 | ok(/A (*THEN) X | B (*THEN) C/x,"Simple (*THEN) test"); | |
4046 | } | |
542fa716 YO |
4047 | } |
4048 | ||
4049 | { | |
f0852a51 | 4050 | local $Message = "Relative Recursion"; |
542fa716 YO |
4051 | my $parens=qr/(\((?:[^()]++|(?-1))*+\))/; |
4052 | local $_='foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; | |
4053 | my ($all,$one,$two)=('','',''); | |
4054 | if (/foo $parens \s* \+ \s* bar $parens/x) { | |
4055 | $all=$&; | |
4056 | $one=$1; | |
4057 | $two=$2; | |
4058 | } | |
4059 | iseq($one, '((2*3)+4-3)'); | |
4060 | iseq($two, '(2*(3+4)-1*(2-3))'); | |
4061 | iseq($all, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'); | |
4062 | iseq($all, $_); | |
4063 | } | |
b515a41d YO |
4064 | { |
4065 | my $spaces=" "; | |
4066 | local $_=join 'bar',$spaces,$spaces; | |
4067 | our $count=0; | |
4068 | s/(?>\s+bar)(?{$count++})//g; | |
4069 | iseq($_,$spaces,"SUSPEND final string"); | |
4070 | iseq($count,1,"Optimiser should have prevented more than one match"); | |
4071 | } | |
f0852a51 YO |
4072 | { |
4073 | local $Message = "RT#36909 test"; | |
4074 | $^R = 'Nothing'; | |
4075 | { | |
4076 | local $^R = "Bad"; | |
4077 | ok('x foofoo y' =~ m{ | |
4078 | (foo) # $^R correctly set | |
4079 | (?{ "last regexp code result" }) | |
4080 | }x); | |
4081 | iseq($^R,'last regexp code result'); | |
4082 | } | |
4083 | iseq($^R,'Nothing'); | |
4084 | { | |
4085 | local $^R = "Bad"; | |
4086 | ||
4087 | ok('x foofoo y' =~ m{ | |
4088 | (?:foo|bar)+ # $^R correctly set | |
4089 | (?{"last regexp code result"}) | |
4090 | }x); | |
4091 | iseq($^R,'last regexp code result'); | |
4092 | } | |
4093 | iseq($^R,'Nothing'); | |
4094 | ||
4095 | { | |
4096 | local $^R = "Bad"; | |
4097 | ok('x foofoo y' =~ m{ | |
4098 | (foo|bar)\1+ # $^R undefined | |
4099 | (?{"last regexp code result"}) | |
4100 | }x); | |
4101 | iseq($^R,'last regexp code result'); | |
4102 | } | |
4103 | iseq($^R,'Nothing'); | |
19b95bf0 DM |
4104 | |
4105 | { | |
4106 | local $^R = "Bad"; | |
4107 | ok('x foofoo y' =~ m{ | |
4108 | (foo|bar)\1 # this time without the + | |
4109 | (?{"last regexp code result"}) | |
4110 | }x); | |
4111 | iseq($^R,'last regexp code result'); | |
4112 | } | |
4113 | iseq($^R,'Nothing'); | |
f0852a51 | 4114 | } |
cf2a2b69 | 4115 | { |
03949bff NC |
4116 | local $Message="RT 22395"; |
4117 | local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; | |
cf2a2b69 | 4118 | our $count; |
58e23c8d | 4119 | for my $l (10,100,1000) { |
cf2a2b69 YO |
4120 | $count=0; |
4121 | ('a' x $l) =~ /(.*)(?{$count++})[bc]/; | |
03949bff | 4122 | iseq( $count, $l + 1); |
cf2a2b69 YO |
4123 | } |
4124 | } | |
04e1c60a | 4125 | { |
a678b0e4 YO |
4126 | local $Message = "RT#22614"; |
4127 | local $_='ab'; | |
4128 | our @len=(); | |
4129 | /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; | |
4130 | iseq("@len","2 2 2"); | |
4131 | } | |
4132 | { | |
04e1c60a YO |
4133 | local $Message = "RT#18209"; |
4134 | my $text = ' word1 word2 word3 word4 word5 word6 '; | |
4135 | ||
4136 | my @words = ('word1', 'word3', 'word5'); | |
4137 | my $count; | |
4138 | foreach my $word (@words){ | |
4139 | $text =~ s/$word\s//gi; # Leave a space to seperate words in the resultant str. | |
4140 | # The following block is not working. | |
4141 | if($&){ | |
4142 | $count++; | |
4143 | } | |
4144 | # End bad block | |
4145 | } | |
4146 | iseq($count,3); | |
4147 | iseq($text,' word2 word4 word6 '); | |
4148 | } | |
58e23c8d YO |