Commit | Line | Data |
---|---|---|
8d063cd8 | 1 | #!./perl |
8d37f932 DD |
2 | # |
3 | # This is a home for regular expression tests that don't fit into | |
4 | # the format supported by op/regexp.t. If you want to add a test | |
5 | # that does fit that format, add it to op/re_tests, not here. | |
8d063cd8 | 6 | |
9133bbab | 7 | $| = 1; |
3568d838 | 8 | |
112bedeb | 9 | print "1..794\n"; |
8d37f932 | 10 | |
e4d48cc9 GS |
11 | BEGIN { |
12 | chdir 't' if -d 't'; | |
20822f61 | 13 | @INC = '../lib'; |
e4d48cc9 | 14 | } |
ffbc6a93 | 15 | |
8d37f932 | 16 | eval 'use Config'; # Defaults assumed if this fails |
8d063cd8 LW |
17 | |
18 | $x = "abc\ndef\n"; | |
19 | ||
20 | if ($x =~ /^abc/) {print "ok 1\n";} else {print "not ok 1\n";} | |
21 | if ($x !~ /^def/) {print "ok 2\n";} else {print "not ok 2\n";} | |
22 | ||
23 | $* = 1; | |
24 | if ($x =~ /^def/) {print "ok 3\n";} else {print "not ok 3\n";} | |
25 | $* = 0; | |
26 | ||
27 | $_ = '123'; | |
28 | if (/^([0-9][0-9]*)/) {print "ok 4\n";} else {print "not ok 4\n";} | |
29 | ||
30 | if ($x =~ /^xxx/) {print "not ok 5\n";} else {print "ok 5\n";} | |
31 | if ($x !~ /^abc/) {print "not ok 6\n";} else {print "ok 6\n";} | |
32 | ||
33 | if ($x =~ /def/) {print "ok 7\n";} else {print "not ok 7\n";} | |
34 | if ($x !~ /def/) {print "not ok 8\n";} else {print "ok 8\n";} | |
35 | ||
36 | if ($x !~ /.def/) {print "ok 9\n";} else {print "not ok 9\n";} | |
37 | if ($x =~ /.def/) {print "not ok 10\n";} else {print "ok 10\n";} | |
38 | ||
39 | if ($x =~ /\ndef/) {print "ok 11\n";} else {print "not ok 11\n";} | |
40 | if ($x !~ /\ndef/) {print "not ok 12\n";} else {print "ok 12\n";} | |
41 | ||
42 | $_ = 'aaabbbccc'; | |
43 | if (/(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc') { | |
44 | print "ok 13\n"; | |
45 | } else { | |
46 | print "not ok 13\n"; | |
47 | } | |
48 | if (/(a+b+c+)/ && $1 eq 'aaabbbccc') { | |
49 | print "ok 14\n"; | |
50 | } else { | |
51 | print "not ok 14\n"; | |
52 | } | |
53 | ||
54 | if (/a+b?c+/) {print "not ok 15\n";} else {print "ok 15\n";} | |
55 | ||
56 | $_ = 'aaabccc'; | |
57 | if (/a+b?c+/) {print "ok 16\n";} else {print "not ok 16\n";} | |
58 | if (/a*b+c*/) {print "ok 17\n";} else {print "not ok 17\n";} | |
59 | ||
60 | $_ = 'aaaccc'; | |
61 | if (/a*b?c*/) {print "ok 18\n";} else {print "not ok 18\n";} | |
62 | if (/a*b+c*/) {print "not ok 19\n";} else {print "ok 19\n";} | |
63 | ||
64 | $_ = 'abcdef'; | |
65 | if (/bcd|xyz/) {print "ok 20\n";} else {print "not ok 20\n";} | |
66 | if (/xyz|bcd/) {print "ok 21\n";} else {print "not ok 21\n";} | |
67 | ||
68 | if (m|bc/*d|) {print "ok 22\n";} else {print "not ok 22\n";} | |
378cc40b LW |
69 | |
70 | if (/^$_$/) {print "ok 23\n";} else {print "not ok 23\n";} | |
71 | ||
72 | $* = 1; # test 3 only tested the optimized version--this one is for real | |
73 | if ("ab\ncd\n" =~ /^cd/) {print "ok 24\n";} else {print "not ok 24\n";} | |
74 | $* = 0; | |
75 | ||
cb55de95 JH |
76 | $XXX{123} = 123; |
77 | $XXX{234} = 234; | |
78 | $XXX{345} = 345; | |
79 | ||
80 | @XXX = ('ok 25','not ok 25', 'ok 26','not ok 26','not ok 27'); | |
81 | while ($_ = shift(@XXX)) { | |
82 | ?(.*)? && (print $1,"\n"); | |
83 | /not/ && reset; | |
84 | /not ok 26/ && reset 'X'; | |
85 | } | |
86 | ||
87 | while (($key,$val) = each(%XXX)) { | |
88 | print "not ok 27\n"; | |
89 | exit; | |
90 | } | |
91 | ||
92 | print "ok 27\n"; | |
378cc40b | 93 | |
378cc40b LW |
94 | 'cde' =~ /[^ab]*/; |
95 | 'xyz' =~ //; | |
96 | if ($& eq 'xyz') {print "ok 28\n";} else {print "not ok 28\n";} | |
97 | ||
98 | $foo = '[^ab]*'; | |
99 | 'cde' =~ /$foo/; | |
100 | 'xyz' =~ //; | |
101 | if ($& eq 'xyz') {print "ok 29\n";} else {print "not ok 29\n";} | |
102 | ||
103 | $foo = '[^ab]*'; | |
104 | 'cde' =~ /$foo/; | |
105 | 'xyz' =~ /$null/; | |
106 | if ($& eq 'xyz') {print "ok 30\n";} else {print "not ok 30\n";} | |
a687059c LW |
107 | |
108 | $_ = 'abcdefghi'; | |
109 | /def/; # optimized up to cmd | |
110 | if ("$`:$&:$'" eq 'abc:def:ghi') {print "ok 31\n";} else {print "not ok 31\n";} | |
111 | ||
112 | /cde/ + 0; # optimized only to spat | |
113 | if ("$`:$&:$'" eq 'ab:cde:fghi') {print "ok 32\n";} else {print "not ok 32\n";} | |
114 | ||
115 | /[d][e][f]/; # not optimized | |
116 | if ("$`:$&:$'" eq 'abc:def:ghi') {print "ok 33\n";} else {print "not ok 33\n";} | |
117 | ||
118 | $_ = 'now is the {time for all} good men to come to.'; | |
119 | / {([^}]*)}/; | |
120 | if ($1 eq 'time for all') {print "ok 34\n";} else {print "not ok 34 $1\n";} | |
121 | ||
122 | $_ = 'xxx {3,4} yyy zzz'; | |
123 | print /( {3,4})/ ? "ok 35\n" : "not ok 35\n"; | |
124 | print $1 eq ' ' ? "ok 36\n" : "not ok 36\n"; | |
125 | print /( {4,})/ ? "not ok 37\n" : "ok 37\n"; | |
126 | print /( {2,3}.)/ ? "ok 38\n" : "not ok 38\n"; | |
127 | print $1 eq ' y' ? "ok 39\n" : "not ok 39\n"; | |
128 | print /(y{2,3}.)/ ? "ok 40\n" : "not ok 40\n"; | |
129 | print $1 eq 'yyy ' ? "ok 41\n" : "not ok 41\n"; | |
130 | print /x {3,4}/ ? "not ok 42\n" : "ok 42\n"; | |
131 | print /^xxx {3,4}/ ? "not ok 43\n" : "ok 43\n"; | |
352d5a3a LW |
132 | |
133 | $_ = "now is the time for all good men to come to."; | |
134 | @words = /(\w+)/g; | |
135 | print join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" | |
136 | ? "ok 44\n" | |
137 | : "not ok 44\n"; | |
138 | ||
139 | @words = (); | |
140 | while (/\w+/g) { | |
141 | push(@words, $&); | |
142 | } | |
143 | print join(':',@words) eq "now:is:the:time:for:all:good:men:to:come:to" | |
144 | ? "ok 45\n" | |
145 | : "not ok 45\n"; | |
146 | ||
147 | @words = (); | |
71be2cbc | 148 | pos = 0; |
352d5a3a LW |
149 | while (/to/g) { |
150 | push(@words, $&); | |
151 | } | |
152 | print join(':',@words) eq "to:to" | |
153 | ? "ok 46\n" | |
71be2cbc | 154 | : "not ok 46 `@words'\n"; |
352d5a3a | 155 | |
71be2cbc | 156 | pos $_ = 0; |
352d5a3a LW |
157 | @words = /to/g; |
158 | print join(':',@words) eq "to:to" | |
159 | ? "ok 47\n" | |
71be2cbc | 160 | : "not ok 47 `@words'\n"; |
352d5a3a LW |
161 | |
162 | $_ = "abcdefghi"; | |
163 | ||
164 | $pat1 = 'def'; | |
165 | $pat2 = '^def'; | |
166 | $pat3 = '.def.'; | |
167 | $pat4 = 'abc'; | |
168 | $pat5 = '^abc'; | |
169 | $pat6 = 'abc$'; | |
170 | $pat7 = 'ghi'; | |
171 | $pat8 = '\w*ghi'; | |
172 | $pat9 = 'ghi$'; | |
173 | ||
174 | $t1=$t2=$t3=$t4=$t5=$t6=$t7=$t8=$t9=0; | |
175 | ||
176 | for $iter (1..5) { | |
177 | $t1++ if /$pat1/o; | |
178 | $t2++ if /$pat2/o; | |
179 | $t3++ if /$pat3/o; | |
180 | $t4++ if /$pat4/o; | |
181 | $t5++ if /$pat5/o; | |
182 | $t6++ if /$pat6/o; | |
183 | $t7++ if /$pat7/o; | |
184 | $t8++ if /$pat8/o; | |
185 | $t9++ if /$pat9/o; | |
186 | } | |
187 | ||
188 | $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; | |
189 | print $x eq '505550555' ? "ok 48\n" : "not ok 48 $x\n"; | |
1462b684 LW |
190 | |
191 | $xyz = 'xyz'; | |
192 | print "abc" =~ /^abc$|$xyz/ ? "ok 49\n" : "not ok 49\n"; | |
193 | ||
194 | # perl 4.009 says "unmatched ()" | |
195 | eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; | |
196 | print $@ eq "" ? "ok 50\n" : "not ok 50\n"; | |
197 | print $result eq "abc:bc" ? "ok 51\n" : "not ok 51\n"; | |
a0d0e21e LW |
198 | |
199 | ||
200 | $_="abcfooabcbar"; | |
201 | $x=/abc/g; | |
202 | print $` eq "" ? "ok 52\n" : "not ok 52\n" if $x; | |
203 | $x=/abc/g; | |
204 | print $` eq "abcfoo" ? "ok 53\n" : "not ok 53\n" if $x; | |
205 | $x=/abc/g; | |
206 | print $x == 0 ? "ok 54\n" : "not ok 54\n"; | |
71be2cbc | 207 | pos = 0; |
a0d0e21e LW |
208 | $x=/ABC/gi; |
209 | print $` eq "" ? "ok 55\n" : "not ok 55\n" if $x; | |
210 | $x=/ABC/gi; | |
211 | print $` eq "abcfoo" ? "ok 56\n" : "not ok 56\n" if $x; | |
212 | $x=/ABC/gi; | |
213 | print $x == 0 ? "ok 57\n" : "not ok 57\n"; | |
71be2cbc | 214 | pos = 0; |
a0d0e21e LW |
215 | $x=/abc/g; |
216 | print $' eq "fooabcbar" ? "ok 58\n" : "not ok 58\n" if $x; | |
217 | $x=/abc/g; | |
218 | print $' eq "bar" ? "ok 59\n" : "not ok 59\n" if $x; | |
219 | $_ .= ''; | |
220 | @x=/abc/g; | |
221 | print scalar @x == 2 ? "ok 60\n" : "not ok 60\n"; | |
71be2cbc | 222 | |
223 | $_ = "abdc"; | |
224 | pos $_ = 2; | |
c90c0ff4 | 225 | /\Gc/gc; |
71be2cbc | 226 | print "not " if (pos $_) != 2; |
227 | print "ok 61\n"; | |
c90c0ff4 | 228 | /\Gc/g; |
229 | print "not " if defined pos $_; | |
230 | print "ok 62\n"; | |
c277df42 IZ |
231 | |
232 | $out = 1; | |
233 | 'abc' =~ m'a(?{ $out = 2 })b'; | |
234 | print "not " if $out != 2; | |
235 | print "ok 63\n"; | |
236 | ||
237 | $out = 1; | |
238 | 'abc' =~ m'a(?{ $out = 3 })c'; | |
239 | print "not " if $out != 1; | |
459f542a | 240 | print "ok 64\n"; |
c277df42 IZ |
241 | |
242 | $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; | |
243 | @out = /(?<!foo)bar./g; | |
244 | print "not " if "@out" ne 'bar2 barf'; | |
245 | print "ok 65\n"; | |
246 | ||
8d37f932 DD |
247 | # Tests which depend on REG_INFTY |
248 | $reg_infty = defined $Config{reg_infty} ? $Config{reg_infty} : 32767; | |
249 | $reg_infty_m = $reg_infty - 1; $reg_infty_p = $reg_infty + 1; | |
250 | ||
251 | # As well as failing if the pattern matches do unexpected things, the | |
252 | # next three tests will fail if you should have picked up a lower-than- | |
253 | # default value for $reg_infty from Config.pm, but have not. | |
254 | ||
255 | undef $@; | |
256 | print "not " if eval q(('aaa' =~ /(a{1,$reg_infty_m})/)[0] ne 'aaa') || $@; | |
257 | print "ok 66\n"; | |
258 | ||
259 | undef $@; | |
260 | print "not " if eval q(('a' x $reg_infty_m) !~ /a{$reg_infty_m}/) || $@; | |
261 | print "ok 67\n"; | |
262 | ||
263 | undef $@; | |
264 | print "not " if eval q(('a' x ($reg_infty_m - 1)) =~ /a{$reg_infty_m}/) || $@; | |
265 | print "ok 68\n"; | |
266 | ||
267 | undef $@; | |
268 | eval "'aaa' =~ /a{1,$reg_infty}/"; | |
9baa0206 | 269 | print "not " if $@ !~ m%^\QQuantifier in {,} bigger than%; |
8d37f932 DD |
270 | print "ok 69\n"; |
271 | ||
272 | eval "'aaa' =~ /a{1,$reg_infty_p}/"; | |
273 | print "not " | |
9baa0206 | 274 | if $@ !~ m%^\QQuantifier in {,} bigger than%; |
8d37f932 DD |
275 | print "ok 70\n"; |
276 | undef $@; | |
277 | ||
278 | # Poke a couple more parse failures | |
279 | ||
280 | $context = 'x' x 256; | |
281 | eval qq("${context}y" =~ /(?<=$context)y/); | |
9baa0206 | 282 | print "not " if $@ !~ m%^\QLookbehind longer than 255 not%; |
8d37f932 DD |
283 | print "ok 71\n"; |
284 | ||
b8c5462f | 285 | # removed test |
8d37f932 DD |
286 | print "ok 72\n"; |
287 | ||
c277df42 | 288 | # Long Monsters |
8d37f932 | 289 | $test = 73; |
c277df42 IZ |
290 | for $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory |
291 | $a = 'a' x $l; | |
292 | print "# length=$l\nnot " unless "ba$a=" =~ /a$a=/; | |
293 | print "ok $test\n"; | |
294 | $test++; | |
73d6d589 | 295 | |
c277df42 IZ |
296 | print "not " if "b$a=" =~ /a$a=/; |
297 | print "ok $test\n"; | |
298 | $test++; | |
299 | } | |
300 | ||
301 | # 20000 nodes, each taking 3 words per string, and 1 per branch | |
302 | $long_constant_len = join '|', 12120 .. 32645; | |
303 | $long_var_len = join '|', 8120 .. 28645; | |
304 | %ans = ( 'ax13876y25677lbc' => 1, | |
305 | 'ax13876y25677mcb' => 0, # not b. | |
306 | 'ax13876y35677nbc' => 0, # Num too big | |
307 | 'ax13876y25677y21378obc' => 1, | |
308 | 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] | |
309 | 'ax13876y25677y21378y21378kbc' => 1, | |
310 | 'ax13876y25677y21378y21378kcb' => 0, # Not b. | |
311 | 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs | |
312 | ); | |
313 | ||
314 | for ( keys %ans ) { | |
73d6d589 | 315 | print "# const-len `$_' not => $ans{$_}\nnot " |
c277df42 IZ |
316 | if $ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o; |
317 | print "ok $test\n"; | |
318 | $test++; | |
73d6d589 | 319 | print "# var-len `$_' not => $ans{$_}\nnot " |
c277df42 IZ |
320 | if $ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o; |
321 | print "ok $test\n"; | |
322 | $test++; | |
323 | } | |
324 | ||
325 | $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; | |
326 | $expect = "(bla()) ((l)u((e))) (l(e)e)"; | |
327 | ||
73d6d589 | 328 | sub matchit { |
cc6b7395 | 329 | m/ |
c277df42 | 330 | ( |
73d6d589 | 331 | \( |
c277df42 IZ |
332 | (?{ $c = 1 }) # Initialize |
333 | (?: | |
334 | (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop | |
335 | (?! | |
336 | ) # Fail: will unwind one iteration back | |
73d6d589 | 337 | ) |
c277df42 IZ |
338 | (?: |
339 | [^()]+ # Match a big chunk | |
340 | (?= | |
341 | [()] | |
342 | ) # Do not try to match subchunks | |
343 | | | |
73d6d589 | 344 | \( |
c277df42 IZ |
345 | (?{ ++$c }) |
346 | | | |
73d6d589 | 347 | \) |
c277df42 IZ |
348 | (?{ --$c }) |
349 | ) | |
350 | )+ # This may not match with different subblocks | |
351 | ) | |
352 | (?(?{ $c != 0 }) | |
353 | (?! | |
354 | ) # Fail | |
355 | ) # Otherwise the chunk 1 may succeed with $c>0 | |
cc6b7395 | 356 | /xg; |
c277df42 IZ |
357 | } |
358 | ||
0f5d15d6 | 359 | @ans = (); |
c277df42 IZ |
360 | push @ans, $res while $res = matchit; |
361 | ||
362 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; | |
363 | print "ok $test\n"; | |
364 | $test++; | |
365 | ||
366 | @ans = matchit; | |
367 | ||
368 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect; | |
369 | print "ok $test\n"; | |
370 | $test++; | |
371 | ||
96776eda GS |
372 | print "not " unless "abc" =~ /^(??{"a"})b/; |
373 | print "ok $test\n"; | |
374 | $test++; | |
375 | ||
0f5d15d6 | 376 | my $matched; |
14455d6c | 377 | $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; |
0f5d15d6 IZ |
378 | |
379 | @ans = @ans1 = (); | |
380 | push(@ans, $res), push(@ans1, $&) while $res = m/$matched/g; | |
381 | ||
382 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; | |
383 | print "ok $test\n"; | |
384 | $test++; | |
385 | ||
386 | print "# ans1='@ans1'\n# expect='$expect'\nnot " if "@ans1" ne $expect; | |
387 | print "ok $test\n"; | |
388 | $test++; | |
389 | ||
390 | @ans = m/$matched/g; | |
391 | ||
392 | print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect; | |
393 | print "ok $test\n"; | |
394 | $test++; | |
395 | ||
c277df42 IZ |
396 | @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad |
397 | print "not " if "@ans" ne 'a/ b'; | |
398 | print "ok $test\n"; | |
399 | $test++; | |
400 | ||
cc6b7395 | 401 | $code = '{$blah = 45}'; |
c277df42 | 402 | $blah = 12; |
2cd61cdb IZ |
403 | eval { /(?$code)/ }; |
404 | print "not " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; | |
e4d48cc9 GS |
405 | print "ok $test\n"; |
406 | $test++; | |
407 | ||
2cd61cdb IZ |
408 | for $code ('{$blah = 45}','=xx') { |
409 | $blah = 12; | |
410 | $res = eval { "xx" =~ /(?$code)/o }; | |
411 | if ($code eq '=xx') { | |
412 | print "#'$@','$res','$blah'\nnot " unless not $@ and $res; | |
413 | } else { | |
73d6d589 | 414 | print "#'$@','$res','$blah'\nnot " unless $@ and $@ =~ /not allowed at runtime/ and $blah == 12; |
2cd61cdb IZ |
415 | } |
416 | print "ok $test\n"; | |
417 | $test++; | |
418 | } | |
419 | ||
e4d48cc9 GS |
420 | $code = '{$blah = 45}'; |
421 | $blah = 12; | |
422 | eval "/(?$code)/"; | |
cc6b7395 IZ |
423 | print "not " if $blah != 45; |
424 | print "ok $test\n"; | |
425 | $test++; | |
426 | ||
427 | $blah = 12; | |
428 | /(?{$blah = 45})/; | |
c277df42 IZ |
429 | print "not " if $blah != 45; |
430 | print "ok $test\n"; | |
431 | $test++; | |
432 | ||
74d6a13a MB |
433 | $x = 'banana'; |
434 | $x =~ /.a/g; | |
435 | print "not " unless pos($x) == 2; | |
436 | print "ok $test\n"; | |
437 | $test++; | |
438 | ||
439 | $x =~ /.z/gc; | |
440 | print "not " unless pos($x) == 2; | |
441 | print "ok $test\n"; | |
442 | $test++; | |
443 | ||
444 | sub f { | |
445 | my $p = $_[0]; | |
446 | return $p; | |
447 | } | |
448 | ||
449 | $x =~ /.a/g; | |
450 | print "not " unless f(pos($x)) == 4; | |
451 | print "ok $test\n"; | |
452 | $test++; | |
4599a1de | 453 | |
ce862d02 IZ |
454 | $x = $^R = 67; |
455 | 'foot' =~ /foo(?{$x = 12; 75})[t]/; | |
456 | print "not " unless $^R eq '75'; | |
457 | print "ok $test\n"; | |
458 | $test++; | |
459 | ||
460 | $x = $^R = 67; | |
461 | 'foot' =~ /foo(?{$x = 12; 75})[xy]/; | |
462 | print "not " unless $^R eq '67' and $x eq '12'; | |
463 | print "ok $test\n"; | |
464 | $test++; | |
465 | ||
466 | $x = $^R = 67; | |
467 | 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; | |
468 | print "not " unless $^R eq '79' and $x eq '12'; | |
469 | print "ok $test\n"; | |
470 | $test++; | |
471 | ||
8782bef2 GB |
472 | print "not " unless qr/\b\v$/i eq '(?i-xsm:\bv$)'; |
473 | print "ok $test\n"; | |
474 | $test++; | |
475 | ||
476 | print "not " unless qr/\b\v$/s eq '(?s-xim:\bv$)'; | |
477 | print "ok $test\n"; | |
478 | $test++; | |
479 | ||
480 | print "not " unless qr/\b\v$/m eq '(?m-xis:\bv$)'; | |
481 | print "ok $test\n"; | |
482 | $test++; | |
483 | ||
484 | print "not " unless qr/\b\v$/x eq '(?x-ism:\bv$)'; | |
485 | print "ok $test\n"; | |
486 | $test++; | |
487 | ||
488 | print "not " unless qr/\b\v$/xism eq '(?msix:\bv$)'; | |
489 | print "ok $test\n"; | |
490 | $test++; | |
491 | ||
492 | print "not " unless qr/\b\v$/ eq '(?-xism:\bv$)'; | |
97197631 IZ |
493 | print "ok $test\n"; |
494 | $test++; | |
495 | ||
7e5428c5 IZ |
496 | $_ = 'xabcx'; |
497 | foreach $ans ('', 'c') { | |
498 | /(?<=(?=a)..)((?=c)|.)/g; | |
02db2b7b | 499 | print "# \$1 ='$1'\n# \$ans='$ans'\nnot " unless $1 eq $ans; |
7e5428c5 IZ |
500 | print "ok $test\n"; |
501 | $test++; | |
502 | } | |
503 | ||
504 | $_ = 'a'; | |
505 | foreach $ans ('', 'a', '') { | |
506 | /^|a|$/g; | |
02db2b7b | 507 | print "# \$& ='$&'\n# \$ans='$ans'\nnot " unless $& eq $ans; |
7e5428c5 IZ |
508 | print "ok $test\n"; |
509 | $test++; | |
510 | } | |
511 | ||
09f25ae4 | 512 | sub prefixify { |
73d6d589 NIS |
513 | my($v,$a,$b,$res) = @_; |
514 | $v =~ s/\Q$a\E/$b/; | |
515 | print "not " unless $res eq $v; | |
09f25ae4 IZ |
516 | print "ok $test\n"; |
517 | $test++; | |
518 | } | |
519 | prefixify('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); | |
520 | prefixify('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); | |
521 | ||
522 | $_ = 'var="foo"'; | |
523 | /(\")/; | |
524 | print "not " unless $1 and /$1/; | |
525 | print "ok $test\n"; | |
526 | $test++; | |
527 | ||
73d6d589 | 528 | $a=qr/(?{++$b})/; |
2cd61cdb | 529 | $b = 7; |
73d6d589 NIS |
530 | /$a$a/; |
531 | print "not " unless $b eq '9'; | |
2cd61cdb IZ |
532 | print "ok $test\n"; |
533 | $test++; | |
534 | ||
73d6d589 NIS |
535 | $c="$a"; |
536 | /$a$a/; | |
537 | print "not " unless $b eq '11'; | |
2cd61cdb IZ |
538 | print "ok $test\n"; |
539 | $test++; | |
540 | ||
541 | { | |
73d6d589 NIS |
542 | use re "eval"; |
543 | /$a$c$a/; | |
544 | print "not " unless $b eq '14'; | |
2cd61cdb IZ |
545 | print "ok $test\n"; |
546 | $test++; | |
547 | ||
160cb429 JH |
548 | local $lex_a = 2; |
549 | my $lex_a = 43; | |
550 | my $lex_b = 17; | |
551 | my $lex_c = 27; | |
552 | my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); | |
553 | print "not " unless $lex_res eq '1'; | |
554 | print "ok $test\n"; | |
555 | $test++; | |
556 | print "not " unless $lex_a eq '44'; | |
557 | print "ok $test\n"; | |
558 | $test++; | |
559 | print "not " unless $lex_c eq '43'; | |
560 | print "ok $test\n"; | |
561 | $test++; | |
562 | ||
563 | ||
73d6d589 | 564 | no re "eval"; |
2cd61cdb | 565 | $match = eval { /$a$c$a/ }; |
73d6d589 | 566 | print "not " |
2cd61cdb IZ |
567 | unless $b eq '14' and $@ =~ /Eval-group not allowed/ and not $match; |
568 | print "ok $test\n"; | |
569 | $test++; | |
570 | } | |
cbce877f IZ |
571 | |
572 | { | |
160cb429 JH |
573 | local $lex_a = 2; |
574 | my $lex_a = 43; | |
575 | my $lex_b = 17; | |
576 | my $lex_c = 27; | |
577 | my $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); | |
578 | print "not " unless $lex_res eq '1'; | |
579 | print "ok $test\n"; | |
580 | $test++; | |
581 | print "not " unless $lex_a eq '44'; | |
582 | print "ok $test\n"; | |
583 | $test++; | |
584 | print "not " unless $lex_c eq '43'; | |
585 | print "ok $test\n"; | |
586 | $test++; | |
587 | } | |
588 | ||
589 | { | |
cbce877f IZ |
590 | package aa; |
591 | $c = 2; | |
592 | $::c = 3; | |
593 | '' =~ /(?{ $c = 4 })/; | |
594 | print "not " unless $c == 4; | |
595 | } | |
596 | print "ok $test\n"; | |
597 | $test++; | |
598 | print "not " unless $c == 3; | |
599 | print "ok $test\n"; | |
73d6d589 NIS |
600 | $test++; |
601 | ||
4599a1de JH |
602 | sub must_warn_pat { |
603 | my $warn_pat = shift; | |
604 | return sub { print "not " unless $_[0] =~ /$warn_pat/ } | |
605 | } | |
606 | ||
607 | sub must_warn { | |
608 | my ($warn_pat, $code) = @_; | |
9f1b1f2d GS |
609 | local %SIG; |
610 | eval 'BEGIN { use warnings; $SIG{__WARN__} = $warn_pat };' . $code; | |
4599a1de JH |
611 | print "ok $test\n"; |
612 | $test++; | |
613 | } | |
614 | ||
615 | ||
616 | sub make_must_warn { | |
617 | my $warn_pat = shift; | |
618 | return sub { must_warn(must_warn_pat($warn_pat)) } | |
619 | } | |
620 | ||
621 | my $for_future = make_must_warn('reserved for future extensions'); | |
622 | ||
623 | &$for_future('q(a:[b]:) =~ /[x[:foo:]]/'); | |
9baa0206 HS |
624 | |
625 | #&$for_future('q(a=[b]=) =~ /[x[=foo=]]/'); | |
626 | print "ok $test\n"; $test++; # now a fatal croak | |
627 | ||
628 | #&$for_future('q(a.[b].) =~ /[x[.foo.]]/'); | |
629 | print "ok $test\n"; $test++; # now a fatal croak | |
f7e33566 GS |
630 | |
631 | # test if failure of patterns returns empty list | |
632 | $_ = 'aaa'; | |
633 | @_ = /bbb/; | |
634 | print "not " if @_; | |
635 | print "ok $test\n"; | |
636 | $test++; | |
637 | ||
638 | @_ = /bbb/g; | |
639 | print "not " if @_; | |
640 | print "ok $test\n"; | |
641 | $test++; | |
642 | ||
643 | @_ = /(bbb)/; | |
644 | print "not " if @_; | |
645 | print "ok $test\n"; | |
646 | $test++; | |
647 | ||
648 | @_ = /(bbb)/g; | |
649 | print "not " if @_; | |
650 | print "ok $test\n"; | |
651 | $test++; | |
652 | ||
6cef1e77 IZ |
653 | /a(?=.$)/; |
654 | print "not " if $#+ != 0 or $#- != 0; | |
655 | print "ok $test\n"; | |
656 | $test++; | |
657 | ||
658 | print "not " if $+[0] != 2 or $-[0] != 1; | |
659 | print "ok $test\n"; | |
660 | $test++; | |
661 | ||
73d6d589 | 662 | print "not " |
6cef1e77 IZ |
663 | if defined $+[1] or defined $-[1] or defined $+[2] or defined $-[2]; |
664 | print "ok $test\n"; | |
665 | $test++; | |
666 | ||
667 | /a(a)(a)/; | |
668 | print "not " if $#+ != 2 or $#- != 2; | |
669 | print "ok $test\n"; | |
670 | $test++; | |
671 | ||
672 | print "not " if $+[0] != 3 or $-[0] != 0; | |
673 | print "ok $test\n"; | |
674 | $test++; | |
675 | ||
676 | print "not " if $+[1] != 2 or $-[1] != 1; | |
677 | print "ok $test\n"; | |
678 | $test++; | |
679 | ||
680 | print "not " if $+[2] != 3 or $-[2] != 2; | |
681 | print "ok $test\n"; | |
682 | $test++; | |
683 | ||
73d6d589 | 684 | print "not " |
6cef1e77 IZ |
685 | if defined $+[3] or defined $-[3] or defined $+[4] or defined $-[4]; |
686 | print "ok $test\n"; | |
687 | $test++; | |
688 | ||
689 | /.(a)(b)?(a)/; | |
690 | print "not " if $#+ != 3 or $#- != 3; | |
691 | print "ok $test\n"; | |
692 | $test++; | |
693 | ||
694 | print "not " if $+[0] != 3 or $-[0] != 0; | |
695 | print "ok $test\n"; | |
696 | $test++; | |
697 | ||
698 | print "not " if $+[1] != 2 or $-[1] != 1; | |
699 | print "ok $test\n"; | |
700 | $test++; | |
701 | ||
702 | print "not " if $+[3] != 3 or $-[3] != 2; | |
703 | print "ok $test\n"; | |
704 | $test++; | |
705 | ||
73d6d589 | 706 | print "not " |
6cef1e77 IZ |
707 | if defined $+[2] or defined $-[2] or defined $+[4] or defined $-[4]; |
708 | print "ok $test\n"; | |
709 | $test++; | |
710 | ||
711 | /.(a)/; | |
712 | print "not " if $#+ != 1 or $#- != 1; | |
713 | print "ok $test\n"; | |
714 | $test++; | |
715 | ||
716 | print "not " if $+[0] != 2 or $-[0] != 0; | |
717 | print "ok $test\n"; | |
718 | $test++; | |
719 | ||
720 | print "not " if $+[1] != 2 or $-[1] != 1; | |
721 | print "ok $test\n"; | |
722 | $test++; | |
723 | ||
73d6d589 | 724 | print "not " |
6cef1e77 IZ |
725 | if defined $+[2] or defined $-[2] or defined $+[3] or defined $-[3]; |
726 | print "ok $test\n"; | |
727 | $test++; | |
728 | ||
03a27ae7 | 729 | eval { $+[0] = 13; }; |
73d6d589 | 730 | print "not " |
03a27ae7 MG |
731 | if $@ !~ /^Modification of a read-only value attempted/; |
732 | print "ok $test\n"; | |
733 | $test++; | |
734 | ||
735 | eval { $-[0] = 13; }; | |
73d6d589 | 736 | print "not " |
03a27ae7 MG |
737 | if $@ !~ /^Modification of a read-only value attempted/; |
738 | print "ok $test\n"; | |
739 | $test++; | |
740 | ||
741 | eval { @+ = (7, 6, 5); }; | |
73d6d589 | 742 | print "not " |
03a27ae7 MG |
743 | if $@ !~ /^Modification of a read-only value attempted/; |
744 | print "ok $test\n"; | |
745 | $test++; | |
746 | ||
747 | eval { @- = qw(foo bar); }; | |
73d6d589 | 748 | print "not " |
03a27ae7 MG |
749 | if $@ !~ /^Modification of a read-only value attempted/; |
750 | print "ok $test\n"; | |
751 | $test++; | |
752 | ||
8f580fb8 IZ |
753 | /.(a)(ba*)?/; |
754 | print "#$#-..$#+\nnot " if $#+ != 2 or $#- != 1; | |
755 | print "ok $test\n"; | |
756 | $test++; | |
757 | ||
ad94a511 IZ |
758 | $_ = 'aaa'; |
759 | pos = 1; | |
760 | @a = /\Ga/g; | |
761 | print "not " unless "@a" eq "a a"; | |
762 | print "ok $test\n"; | |
763 | $test++; | |
764 | ||
22e551b9 IZ |
765 | $str = 'abcde'; |
766 | pos $str = 2; | |
767 | ||
768 | print "not " if $str =~ /^\G/; | |
769 | print "ok $test\n"; | |
770 | $test++; | |
771 | ||
772 | print "not " if $str =~ /^.\G/; | |
773 | print "ok $test\n"; | |
774 | $test++; | |
775 | ||
776 | print "not " unless $str =~ /^..\G/; | |
777 | print "ok $test\n"; | |
778 | $test++; | |
779 | ||
780 | print "not " if $str =~ /^...\G/; | |
781 | print "ok $test\n"; | |
782 | $test++; | |
783 | ||
784 | print "not " unless $str =~ /.\G./ and $& eq 'bc'; | |
785 | print "ok $test\n"; | |
786 | $test++; | |
787 | ||
788 | print "not " unless $str =~ /\G../ and $& eq 'cd'; | |
789 | print "ok $test\n"; | |
790 | $test++; | |
791 | ||
9661b544 IZ |
792 | undef $foo; undef $bar; |
793 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 794 | unless $str =~ /b(?{$foo = $_; $bar = pos})c/ |
9661b544 IZ |
795 | and $foo eq 'abcde' and $bar eq 2; |
796 | print "ok $test\n"; | |
797 | $test++; | |
798 | ||
799 | undef $foo; undef $bar; | |
800 | pos $str = undef; | |
801 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 802 | unless $str =~ /b(?{$foo = $_; $bar = pos})c/g |
9661b544 IZ |
803 | and $foo eq 'abcde' and $bar eq 2 and pos $str eq 3; |
804 | print "ok $test\n"; | |
805 | $test++; | |
806 | ||
807 | $_ = $str; | |
808 | ||
809 | undef $foo; undef $bar; | |
810 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 811 | unless /b(?{$foo = $_; $bar = pos})c/ |
9661b544 IZ |
812 | and $foo eq 'abcde' and $bar eq 2; |
813 | print "ok $test\n"; | |
814 | $test++; | |
815 | ||
816 | undef $foo; undef $bar; | |
817 | print "#'$str','$foo','$bar'\nnot " | |
73d6d589 | 818 | unless /b(?{$foo = $_; $bar = pos})c/g |
9661b544 IZ |
819 | and $foo eq 'abcde' and $bar eq 2 and pos eq 3; |
820 | print "ok $test\n"; | |
821 | $test++; | |
822 | ||
823 | undef $foo; undef $bar; | |
824 | pos = undef; | |
825 | 1 while /b(?{$foo = $_; $bar = pos})c/g; | |
826 | print "#'$str','$foo','$bar'\nnot " | |
827 | unless $foo eq 'abcde' and $bar eq 2 and not defined pos; | |
828 | print "ok $test\n"; | |
829 | $test++; | |
830 | ||
831 | undef $foo; undef $bar; | |
832 | $_ = 'abcde|abcde'; | |
833 | print "#'$str','$foo','$bar','$_'\nnot " | |
73d6d589 | 834 | unless s/b(?{$foo = $_; $bar = pos})c/x/g and $foo eq 'abcde|abcde' |
9661b544 IZ |
835 | and $bar eq 8 and $_ eq 'axde|axde'; |
836 | print "ok $test\n"; | |
837 | $test++; | |
838 | ||
5c5e4c24 IZ |
839 | @res = (); |
840 | # List context: | |
841 | $_ = 'abcde|abcde'; | |
842 | @dummy = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; | |
843 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
844 | $res = "@res"; | |
845 | print "#'@res' '$_'\nnot " | |
846 | unless "@res" eq "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'"; | |
847 | print "ok $test\n"; | |
848 | $test++; | |
849 | ||
850 | @res = (); | |
851 | @dummy = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; | |
852 | @res = map {defined $_ ? "'$_'" : 'undef'} @res; | |
853 | $res = "@res"; | |
854 | print "#'@res' '$_'\nnot " | |
855 | unless "@res" eq | |
856 | "'' 'ab' 'cde|abcde' " . | |
857 | "'' 'abc' 'de|abcde' " . | |
858 | "'abcd' 'e|' 'abcde' " . | |
859 | "'abcde|' 'ab' 'cde' " . | |
860 | "'abcde|' 'abc' 'de'" ; | |
861 | print "ok $test\n"; | |
862 | $test++; | |
863 | ||
b7a35066 IZ |
864 | #Some more \G anchor checks |
865 | $foo='aabbccddeeffgg'; | |
866 | ||
867 | pos($foo)=1; | |
868 | ||
869 | $foo=~/.\G(..)/g; | |
870 | print "not " unless($1 eq 'ab'); | |
871 | print "ok $test\n"; | |
872 | $test++; | |
873 | ||
874 | pos($foo) += 1; | |
875 | $foo=~/.\G(..)/g; | |
876 | print "not " unless($1 eq 'cc'); | |
877 | print "ok $test\n"; | |
878 | $test++; | |
879 | ||
880 | pos($foo) += 1; | |
881 | $foo=~/.\G(..)/g; | |
882 | print "not " unless($1 eq 'de'); | |
883 | print "ok $test\n"; | |
884 | $test++; | |
885 | ||
0ef3e39e HS |
886 | print "not " unless $foo =~ /\Gef/g; |
887 | print "ok $test\n"; | |
888 | $test++; | |
889 | ||
b7a35066 IZ |
890 | undef pos $foo; |
891 | ||
892 | $foo=~/\G(..)/g; | |
893 | print "not " unless($1 eq 'aa'); | |
894 | print "ok $test\n"; | |
895 | $test++; | |
896 | ||
897 | $foo=~/\G(..)/g; | |
898 | print "not " unless($1 eq 'bb'); | |
899 | print "ok $test\n"; | |
900 | $test++; | |
901 | ||
902 | pos($foo)=5; | |
903 | $foo=~/\G(..)/g; | |
904 | print "not " unless($1 eq 'cd'); | |
905 | print "ok $test\n"; | |
906 | $test++; | |
907 | ||
73d6d589 | 908 | $_='123x123'; |
e60df1fa IZ |
909 | @res = /(\d*|x)/g; |
910 | print "not " unless('123||x|123|' eq join '|', @res); | |
911 | print "ok $test\n"; | |
912 | $test++; | |
913 | ||
9d080a66 GS |
914 | # see if matching against temporaries (created via pp_helem()) is safe |
915 | { foo => "ok $test\n".$^X }->{foo} =~ /^(.*)\n/g; | |
916 | print "$1\n"; | |
917 | $test++; | |
918 | ||
cf93c79d IZ |
919 | # See if $i work inside (?{}) in the presense of saved substrings and |
920 | # changing $_ | |
921 | @a = qw(foo bar); | |
922 | @b = (); | |
923 | s/(\w)(?{push @b, $1})/,$1,/g for @a; | |
924 | ||
925 | print "# \@b='@b', expect 'f o o b a r'\nnot " unless("@b" eq "f o o b a r"); | |
926 | print "ok $test\n"; | |
927 | $test++; | |
928 | ||
929 | print "not " unless("@a" eq ",f,,o,,o, ,b,,a,,r,"); | |
930 | print "ok $test\n"; | |
931 | $test++; | |
932 | ||
2c914db6 | 933 | $brackets = qr{ |
14455d6c | 934 | { (?> [^{}]+ | (??{ $brackets }) )* } |
2c914db6 IZ |
935 | }x; |
936 | ||
937 | "{{}" =~ $brackets; | |
938 | print "ok $test\n"; # Did we survive? | |
939 | $test++; | |
940 | ||
941 | "something { long { and } hairy" =~ $brackets; | |
942 | print "ok $test\n"; # Did we survive? | |
943 | $test++; | |
944 | ||
14455d6c | 945 | "something { long { and } hairy" =~ m/((??{ $brackets }))/; |
2c914db6 IZ |
946 | print "not " unless $1 eq "{ and }"; |
947 | print "ok $test\n"; | |
948 | $test++; | |
949 | ||
30944b6d IZ |
950 | $_ = "a-a\nxbb"; |
951 | pos=1; | |
952 | m/^-.*bb/mg and print "not "; | |
953 | print "ok $test\n"; | |
954 | $test++; | |
30382c73 IZ |
955 | |
956 | $text = "aaXbXcc"; | |
957 | pos($text)=0; | |
958 | $text =~ /\GXb*X/g and print 'not '; | |
959 | print "ok $test\n"; | |
960 | $test++; | |
3cf5c195 IZ |
961 | |
962 | $text = "xA\n" x 500; | |
963 | $text =~ /^\s*A/m and print 'not '; | |
964 | print "ok $test\n"; | |
965 | $test++; | |
d506a20d IZ |
966 | |
967 | $text = "abc dbf"; | |
968 | @res = ($text =~ /.*?(b).*?\b/g); | |
969 | "@res" eq 'b b' or print 'not '; | |
970 | print "ok $test\n"; | |
971 | $test++; | |
972 | ||
9442cb0e | 973 | @a = map chr,0..255; |
aeaf5620 GS |
974 | |
975 | @b = grep(/\S/,@a); | |
976 | @c = grep(/[^\s]/,@a); | |
977 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
978 | print "ok $test\n"; |
979 | $test++; | |
980 | ||
aeaf5620 GS |
981 | @b = grep(/\S/,@a); |
982 | @c = grep(/[\S]/,@a); | |
983 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
984 | print "ok $test\n"; |
985 | $test++; | |
986 | ||
aeaf5620 GS |
987 | @b = grep(/\s/,@a); |
988 | @c = grep(/[^\S]/,@a); | |
989 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
990 | print "ok $test\n"; |
991 | $test++; | |
992 | ||
aeaf5620 GS |
993 | @b = grep(/\s/,@a); |
994 | @c = grep(/[\s]/,@a); | |
995 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
996 | print "ok $test\n"; |
997 | $test++; | |
998 | ||
aeaf5620 GS |
999 | @b = grep(/\D/,@a); |
1000 | @c = grep(/[^\d]/,@a); | |
1001 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1002 | print "ok $test\n"; |
1003 | $test++; | |
1004 | ||
aeaf5620 GS |
1005 | @b = grep(/\D/,@a); |
1006 | @c = grep(/[\D]/,@a); | |
1007 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1008 | print "ok $test\n"; |
1009 | $test++; | |
1010 | ||
aeaf5620 GS |
1011 | @b = grep(/\d/,@a); |
1012 | @c = grep(/[^\D]/,@a); | |
1013 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1014 | print "ok $test\n"; |
1015 | $test++; | |
1016 | ||
aeaf5620 GS |
1017 | @b = grep(/\d/,@a); |
1018 | @c = grep(/[\d]/,@a); | |
1019 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1020 | print "ok $test\n"; |
1021 | $test++; | |
1022 | ||
aeaf5620 GS |
1023 | @b = grep(/\W/,@a); |
1024 | @c = grep(/[^\w]/,@a); | |
1025 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1026 | print "ok $test\n"; |
1027 | $test++; | |
1028 | ||
aeaf5620 GS |
1029 | @b = grep(/\W/,@a); |
1030 | @c = grep(/[\W]/,@a); | |
1031 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1032 | print "ok $test\n"; |
1033 | $test++; | |
1034 | ||
aeaf5620 GS |
1035 | @b = grep(/\w/,@a); |
1036 | @c = grep(/[^\W]/,@a); | |
1037 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1038 | print "ok $test\n"; |
1039 | $test++; | |
1040 | ||
aeaf5620 GS |
1041 | @b = grep(/\w/,@a); |
1042 | @c = grep(/[\w]/,@a); | |
1043 | print "not " if "@b" ne "@c"; | |
9442cb0e GS |
1044 | print "ok $test\n"; |
1045 | $test++; | |
1aeab75a GS |
1046 | |
1047 | # see if backtracking optimization works correctly | |
1048 | "\n\n" =~ /\n $ \n/x or print "not "; | |
1049 | print "ok $test\n"; | |
1050 | $test++; | |
1051 | ||
1052 | "\n\n" =~ /\n* $ \n/x or print "not "; | |
1053 | print "ok $test\n"; | |
1054 | $test++; | |
1055 | ||
1056 | "\n\n" =~ /\n+ $ \n/x or print "not "; | |
1057 | print "ok $test\n"; | |
1058 | $test++; | |
05b4157f GS |
1059 | |
1060 | [] =~ /^ARRAY/ or print "# [] \nnot "; | |
1061 | print "ok $test\n"; | |
1062 | $test++; | |
1063 | ||
1064 | eval << 'EOE'; | |
1065 | { | |
1066 | package S; | |
1067 | use overload '""' => sub { 'Object S' }; | |
1068 | sub new { bless [] } | |
1069 | } | |
1070 | $a = 'S'->new; | |
1071 | EOE | |
1072 | ||
1073 | $a and $a =~ /^Object\sS/ or print "# '$a' \nnot "; | |
1074 | print "ok $test\n"; | |
1075 | $test++; | |
815d35b9 MG |
1076 | |
1077 | # test result of match used as match (!) | |
1078 | 'a1b' =~ ('xyz' =~ /y/) and $` eq 'a' or print "not "; | |
1079 | print "ok $test\n"; | |
1080 | $test++; | |
1081 | ||
1082 | 'a1b' =~ ('xyz' =~ /t/) and $` eq 'a' or print "not "; | |
1083 | print "ok $test\n"; | |
1084 | $test++; | |
5e39e1e5 HS |
1085 | |
1086 | $w = 0; | |
1087 | { | |
1088 | local $SIG{__WARN__} = sub { $w = 1 }; | |
1089 | local $^W = 1; | |
1090 | $w = 1 if ("1\n" x 102) =~ /^\s*\n/m; | |
1091 | } | |
1092 | print $w ? "not " : "", "ok $test\n"; | |
1093 | $test++; | |
aaa51d5e JF |
1094 | |
1095 | my %space = ( spc => " ", | |
1096 | tab => "\t", | |
1097 | cr => "\r", | |
1098 | lf => "\n", | |
1099 | ff => "\f", | |
75369ccb JH |
1100 | # There's no \v but the vertical tabulator seems miraculously |
1101 | # be 11 both in ASCII and EBCDIC. | |
aaa51d5e JF |
1102 | vt => chr(11), |
1103 | false => "space" ); | |
1104 | ||
1105 | my @space0 = sort grep { $space{$_} =~ /\s/ } keys %space; | |
1106 | my @space1 = sort grep { $space{$_} =~ /[[:space:]]/ } keys %space; | |
1107 | my @space2 = sort grep { $space{$_} =~ /[[:blank:]]/ } keys %space; | |
1108 | ||
1109 | print "not " unless "@space0" eq "cr ff lf spc tab"; | |
3bec3564 | 1110 | print "ok $test # @space0\n"; |
aaa51d5e JF |
1111 | $test++; |
1112 | ||
1113 | print "not " unless "@space1" eq "cr ff lf spc tab vt"; | |
3bec3564 | 1114 | print "ok $test # @space1\n"; |
aaa51d5e JF |
1115 | $test++; |
1116 | ||
1117 | print "not " unless "@space2" eq "spc tab"; | |
3bec3564 | 1118 | print "ok $test # @space2\n"; |
aaa51d5e | 1119 | $test++; |
73d6d589 | 1120 | |
a1933d95 HS |
1121 | # bugid 20001021.005 - this caused a SEGV |
1122 | print "not " unless undef =~ /^([^\/]*)(.*)$/; | |
1123 | print "ok $test\n"; | |
1124 | $test++; | |
b91bb191 JH |
1125 | |
1126 | # bugid 20000731.001 | |
1127 | ||
1128 | print "not " unless "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/; | |
1129 | print "ok $test\n"; | |
1130 | $test++; | |
1131 | ||
5ae032e5 JH |
1132 | my $ordA = ord('A'); |
1133 | ||
3baa4c62 JH |
1134 | $_ = "a\x{100}b"; |
1135 | if (/(.)(\C)(\C)(.)/) { | |
1136 | print "ok 232\n"; | |
1137 | if ($1 eq "a") { | |
1138 | print "ok 233\n"; | |
1139 | } else { | |
1140 | print "not ok 233\n"; | |
1141 | } | |
5ae032e5 JH |
1142 | if ($ordA == 65) { # ASCII (or equivalent), should be UTF-8 |
1143 | if ($2 eq "\xC4") { | |
1144 | print "ok 234\n"; | |
1145 | } else { | |
1146 | print "not ok 234\n"; | |
1147 | } | |
1148 | if ($3 eq "\x80") { | |
1149 | print "ok 235\n"; | |
1150 | } else { | |
1151 | print "not ok 235\n"; | |
1152 | } | |
1153 | } elsif ($ordA == 193) { # EBCDIC (or equivalent), should be UTF-EBCDIC | |
1154 | if ($2 eq "\x8C") { | |
1155 | print "ok 234\n"; | |
1156 | } else { | |
1157 | print "not ok 234\n"; | |
1158 | } | |
1159 | if ($3 eq "\x41") { | |
1160 | print "ok 235\n"; | |
1161 | } else { | |
1162 | print "not ok 235\n"; | |
1163 | } | |
3baa4c62 | 1164 | } else { |
5ae032e5 JH |
1165 | for (234..235) { |
1166 | print "not ok $_ # ord('A') == $ordA\n"; | |
1167 | } | |
3baa4c62 JH |
1168 | } |
1169 | if ($4 eq "b") { | |
1170 | print "ok 236\n"; | |
1171 | } else { | |
1172 | print "not ok 236\n"; | |
1173 | } | |
1174 | } else { | |
1175 | for (232..236) { | |
1176 | print "not ok $_\n"; | |
1177 | } | |
1178 | } | |
1179 | $_ = "\x{100}"; | |
1180 | if (/(\C)/g) { | |
1181 | print "ok 237\n"; | |
73d6d589 | 1182 | # currently \C are still tagged as UTF-8 |
5ae032e5 JH |
1183 | if ($ordA == 65) { |
1184 | if ($1 eq "\xC4") { | |
1185 | print "ok 238\n"; | |
1186 | } else { | |
1187 | print "not ok 238\n"; | |
1188 | } | |
1189 | } elsif ($ordA == 193) { | |
1190 | if ($1 eq "\x8C") { | |
1191 | print "ok 238\n"; | |
1192 | } else { | |
1193 | print "not ok 238\n"; | |
1194 | } | |
3baa4c62 | 1195 | } else { |
5ae032e5 | 1196 | print "not ok 238 # ord('A') == $ordA\n"; |
3baa4c62 JH |
1197 | } |
1198 | } else { | |
1199 | for (237..238) { | |
1200 | print "not ok $_\n"; | |
1201 | } | |
1202 | } | |
1203 | if (/(\C)/g) { | |
1204 | print "ok 239\n"; | |
73d6d589 | 1205 | # currently \C are still tagged as UTF-8 |
5ae032e5 JH |
1206 | if ($ordA == 65) { |
1207 | if ($1 eq "\x80") { | |
1208 | print "ok 240\n"; | |
1209 | } else { | |
1210 | print "not ok 240\n"; | |
1211 | } | |
1212 | } elsif ($ordA == 193) { | |
1213 | if ($1 eq "\x41") { | |
1214 | print "ok 240\n"; | |
1215 | } else { | |
1216 | print "not ok 240\n"; | |
1217 | } | |
3baa4c62 | 1218 | } else { |
5ae032e5 | 1219 | print "not ok 240 # ord('A') == $ordA\n"; |
3baa4c62 JH |
1220 | } |
1221 | } else { | |
1222 | for (239..240) { | |
1223 | print "not ok $_\n"; | |
1224 | } | |
1225 | } | |
b485d051 | 1226 | |
db615365 JP |
1227 | { |
1228 | # japhy -- added 03/03/2001 | |
1229 | () = (my $str = "abc") =~ /(...)/; | |
1230 | $str = "def"; | |
1231 | print "not " if $1 ne "abc"; | |
fd291da9 JH |
1232 | print "ok 241\n"; |
1233 | } | |
1234 | ||
1235 | # The 242 and 243 go with the 244 and 245. | |
1236 | # The trick is that in EBCDIC the explicit numeric range should match | |
1237 | # (as also in non-EBCDIC) but the explicit alphabetic range should not match. | |
1238 | ||
1239 | if ("\x8e" =~ /[\x89-\x91]/) { | |
1240 | print "ok 242\n"; | |
1241 | } else { | |
1242 | print "not ok 242\n"; | |
1243 | } | |
1244 | ||
1245 | if ("\xce" =~ /[\xc9-\xd1]/) { | |
db615365 | 1246 | print "ok 243\n"; |
fd291da9 JH |
1247 | } else { |
1248 | print "not ok 243\n"; | |
1249 | } | |
1250 | ||
1251 | # In most places these tests would succeed since \x8e does not | |
1252 | # in most character sets match 'i' or 'j' nor would \xce match | |
1253 | # 'I' or 'J', but strictly speaking these tests are here for | |
1254 | # the good of EBCDIC, so let's test these only there. | |
1255 | if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC | |
1256 | if ("\x8e" !~ /[i-j]/) { | |
1257 | print "ok 244\n"; | |
1258 | } else { | |
1259 | print "not ok 244\n"; | |
1260 | } | |
1261 | if ("\xce" !~ /[I-J]/) { | |
1262 | print "ok 245\n"; | |
1263 | } else { | |
1264 | print "not ok 245\n"; | |
1265 | } | |
1266 | } else { | |
1267 | for (244..245) { | |
60425c38 | 1268 | print "ok $_ # Skip: only in EBCDIC\n"; |
fd291da9 | 1269 | } |
db615365 | 1270 | } |
4765795a JH |
1271 | |
1272 | print "not " unless "\x{ab}" =~ /\x{ab}/; | |
1273 | print "ok 246\n"; | |
1274 | ||
1275 | print "not " unless "\x{abcd}" =~ /\x{abcd}/; | |
1276 | print "ok 247\n"; | |
1277 | ||
1278 | { | |
1279 | # bug id 20001008.001 | |
1280 | ||
4765795a JH |
1281 | my $test = 248; |
1282 | my @x = ("stra\337e 138","stra\337e 138"); | |
1283 | for (@x) { | |
1284 | s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; | |
1285 | my($latin) = /^(.+)(?:\s+\d)/; | |
1286 | print $latin eq "stra\337e" ? "ok $test\n" : # 248,249 | |
1287 | "#latin[$latin]\nnot ok $test\n"; | |
1288 | $test++; | |
1289 | $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a | |
169da838 | 1290 | use utf8; # needed for the raw UTF-8 |
4765795a JH |
1291 | $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a |
1292 | } | |
1293 | } | |
1294 | ||
1295 | { | |
1296 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1297 | print "ok 250\n"; | |
1298 | ||
1299 | print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1300 | print "ok 251\n"; | |
1301 | ||
1302 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; | |
1303 | print "ok 252\n"; | |
1304 | ||
1305 | print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; | |
1306 | print "ok 253\n"; | |
1307 | ||
1308 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1309 | print "ok 254\n"; | |
1310 | ||
1311 | print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1312 | print "ok 255\n"; | |
1313 | ||
1314 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; | |
1315 | print "ok 256\n"; | |
1316 | ||
1317 | print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; | |
1318 | print "ok 257\n"; | |
1319 | } | |
1320 | ||
1321 | { | |
1322 | # the first half of 20001028.003 | |
1323 | ||
1324 | my $X = chr(1448); | |
1325 | my ($Y) = $X =~ /(.*)/; | |
1326 | print "not " unless $Y eq v1448 && length($Y) == 1; | |
1327 | print "ok 258\n"; | |
1328 | } | |
1329 | ||
1330 | { | |
1331 | # 20001108.001 | |
1332 | ||
1333 | my $X = "Szab\x{f3},Bal\x{e1}zs"; | |
1334 | my $Y = $X; | |
1335 | $Y =~ s/(B)/$1/ for 0..3; | |
1336 | print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs"; | |
1337 | print "ok 259\n"; | |
1338 | } | |
1339 | ||
1340 | { | |
1341 | # the second half of 20001028.003 | |
1342 | ||
3568d838 | 1343 | my $X = ''; |
4765795a JH |
1344 | $X =~ s/^/chr(1488)/e; |
1345 | print "not " unless length $X == 1 && ord($X) == 1488; | |
1346 | print "ok 260\n"; | |
1347 | } | |
1348 | ||
1349 | { | |
1350 | # 20000517.001 | |
1351 | ||
1352 | my $x = "\x{100}A"; | |
1353 | ||
1354 | $x =~ s/A/B/; | |
1355 | ||
1356 | print "not " unless $x eq "\x{100}B" && length($x) == 2; | |
1357 | print "ok 261\n"; | |
1358 | } | |
1359 | ||
1360 | { | |
1361 | # bug id 20001230.002 | |
1362 | ||
1363 | print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; | |
1364 | print "ok 262\n"; | |
1365 | ||
1366 | print "not " unless "École" =~ /^\C\C(c)/; | |
1367 | print "ok 263\n"; | |
1368 | } | |
1369 | ||
1370 | { | |
1371 | my $test = 264; # till 575 | |
1372 | ||
1373 | use charnames ':full'; | |
1374 | ||
1375 | # This is far from complete testing, there are dozens of character | |
1376 | # classes in Unicode. The mixing of literals and \N{...} is | |
1377 | # intentional so that in non-Latin-1 places we test the native | |
1378 | # characters, not the Unicode code points. | |
1379 | ||
1380 | my %s = ( | |
1381 | "a" => 'Ll', | |
1382 | "\N{CYRILLIC SMALL LETTER A}" => 'Ll', | |
1383 | "A" => 'Lu', | |
1384 | "\N{GREEK CAPITAL LETTER ALPHA}" => 'Lu', | |
1385 | "\N{HIRAGANA LETTER SMALL A}" => 'Lo', | |
1386 | "\N{COMBINING GRAVE ACCENT}" => 'Mn', | |
1387 | "0" => 'Nd', | |
1388 | "\N{ARABIC-INDIC DIGIT ZERO}" => 'Nd', | |
1389 | "_" => 'N', | |
1390 | "!" => 'P', | |
1391 | " " => 'Zs', | |
1392 | "\0" => 'Cc', | |
1393 | ); | |
73d6d589 | 1394 | |
3568d838 JH |
1395 | for my $char (map { s/^\S+ //; $_ } |
1396 | sort map { sprintf("%06x", ord($_))." $_" } keys %s) { | |
4765795a | 1397 | my $class = $s{$char}; |
3568d838 JH |
1398 | my $code = sprintf("%06x", ord($char)); |
1399 | printf "#\n# 0x$code\n#\n"; | |
4765795a JH |
1400 | print "# IsAlpha\n"; |
1401 | if ($class =~ /^[LM]/) { | |
1402 | print "not " unless $char =~ /\p{IsAlpha}/; | |
1403 | print "ok $test\n"; $test++; | |
1404 | print "not " if $char =~ /\P{IsAlpha}/; | |
1405 | print "ok $test\n"; $test++; | |
1406 | } else { | |
1407 | print "not " if $char =~ /\p{IsAlpha}/; | |
1408 | print "ok $test\n"; $test++; | |
1409 | print "not " unless $char =~ /\P{IsAlpha}/; | |
1410 | print "ok $test\n"; $test++; | |
1411 | } | |
1412 | print "# IsAlnum\n"; | |
1413 | if ($class =~ /^[LMN]/ && $char ne "_") { | |
1414 | print "not " unless $char =~ /\p{IsAlnum}/; | |
1415 | print "ok $test\n"; $test++; | |
1416 | print "not " if $char =~ /\P{IsAlnum}/; | |
1417 | print "ok $test\n"; $test++; | |
1418 | } else { | |
1419 | print "not " if $char =~ /\p{IsAlnum}/; | |
1420 | print "ok $test\n"; $test++; | |
1421 | print "not " unless $char =~ /\P{IsAlnum}/; | |
1422 | print "ok $test\n"; $test++; | |
1423 | } | |
1424 | print "# IsASCII\n"; | |
3568d838 | 1425 | if ($code le '00007f') { |
4765795a JH |
1426 | print "not " unless $char =~ /\p{IsASCII}/; |
1427 | print "ok $test\n"; $test++; | |
1428 | print "not " if $char =~ /\P{IsASCII}/; | |
1429 | print "ok $test\n"; $test++; | |
1430 | } else { | |
1431 | print "not " if $char =~ /\p{IsASCII}/; | |
1432 | print "ok $test\n"; $test++; | |
1433 | print "not " unless $char =~ /\P{IsASCII}/; | |
1434 | print "ok $test\n"; $test++; | |
1435 | } | |
1436 | print "# IsCntrl\n"; | |
1437 | if ($class =~ /^C/) { | |
1438 | print "not " unless $char =~ /\p{IsCntrl}/; | |
1439 | print "ok $test\n"; $test++; | |
1440 | print "not " if $char =~ /\P{IsCntrl}/; | |
1441 | print "ok $test\n"; $test++; | |
1442 | } else { | |
1443 | print "not " if $char =~ /\p{IsCntrl}/; | |
1444 | print "ok $test\n"; $test++; | |
1445 | print "not " unless $char =~ /\P{IsCntrl}/; | |
1446 | print "ok $test\n"; $test++; | |
1447 | } | |
1448 | print "# IsBlank\n"; | |
1449 | if ($class =~ /^Z[lp]/ || $char eq " ") { | |
1450 | print "not " unless $char =~ /\p{IsBlank}/; | |
1451 | print "ok $test\n"; $test++; | |
1452 | print "not " if $char =~ /\P{IsBlank}/; | |
1453 | print "ok $test\n"; $test++; | |
1454 | } else { | |
1455 | print "not " if $char =~ /\p{IsBlank}/; | |
1456 | print "ok $test\n"; $test++; | |
1457 | print "not " unless $char =~ /\P{IsBlank}/; | |
1458 | print "ok $test\n"; $test++; | |
1459 | } | |
1460 | print "# IsDigit\n"; | |
1461 | if ($class =~ /^Nd$/) { | |
1462 | print "not " unless $char =~ /\p{IsDigit}/; | |
1463 | print "ok $test\n"; $test++; | |
1464 | print "not " if $char =~ /\P{IsDigit}/; | |
1465 | print "ok $test\n"; $test++; | |
1466 | } else { | |
1467 | print "not " if $char =~ /\p{IsDigit}/; | |
1468 | print "ok $test\n"; $test++; | |
1469 | print "not " unless $char =~ /\P{IsDigit}/; | |
1470 | print "ok $test\n"; $test++; | |
1471 | } | |
1472 | print "# IsGraph\n"; | |
1473 | if ($class =~ /^([LMNPS])|Co/) { | |
1474 | print "not " unless $char =~ /\p{IsGraph}/; | |
1475 | print "ok $test\n"; $test++; | |
1476 | print "not " if $char =~ /\P{IsGraph}/; | |
1477 | print "ok $test\n"; $test++; | |
1478 | } else { | |
1479 | print "not " if $char =~ /\p{IsGraph}/; | |
1480 | print "ok $test\n"; $test++; | |
1481 | print "not " unless $char =~ /\P{IsGraph}/; | |
1482 | print "ok $test\n"; $test++; | |
1483 | } | |
1484 | print "# IsLower\n"; | |
1485 | if ($class =~ /^Ll$/) { | |
1486 | print "not " unless $char =~ /\p{IsLower}/; | |
1487 | print "ok $test\n"; $test++; | |
1488 | print "not " if $char =~ /\P{IsLower}/; | |
1489 | print "ok $test\n"; $test++; | |
1490 | } else { | |
1491 | print "not " if $char =~ /\p{IsLower}/; | |
1492 | print "ok $test\n"; $test++; | |
1493 | print "not " unless $char =~ /\P{IsLower}/; | |
1494 | print "ok $test\n"; $test++; | |
1495 | } | |
1496 | print "# IsPrint\n"; | |
1497 | if ($class =~ /^([LMNPS])|Co|Zs/) { | |
1498 | print "not " unless $char =~ /\p{IsPrint}/; | |
1499 | print "ok $test\n"; $test++; | |
1500 | print "not " if $char =~ /\P{IsPrint}/; | |
1501 | print "ok $test\n"; $test++; | |
1502 | } else { | |
1503 | print "not " if $char =~ /\p{IsPrint}/; | |
1504 | print "ok $test\n"; $test++; | |
1505 | print "not " unless $char =~ /\P{IsPrint}/; | |
1506 | print "ok $test\n"; $test++; | |
1507 | } | |
1508 | print "# IsPunct\n"; | |
1509 | if ($class =~ /^P/ || $char eq "_") { | |
1510 | print "not " unless $char =~ /\p{IsPunct}/; | |
1511 | print "ok $test\n"; $test++; | |
1512 | print "not " if $char =~ /\P{IsPunct}/; | |
1513 | print "ok $test\n"; $test++; | |
1514 | } else { | |
1515 | print "not " if $char =~ /\p{IsPunct}/; | |
1516 | print "ok $test\n"; $test++; | |
1517 | print "not " unless $char =~ /\P{IsPunct}/; | |
1518 | print "ok $test\n"; $test++; | |
1519 | } | |
1520 | print "# IsSpace\n"; | |
1521 | if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) { | |
1522 | print "not " unless $char =~ /\p{IsSpace}/; | |
1523 | print "ok $test\n"; $test++; | |
1524 | print "not " if $char =~ /\P{IsSpace}/; | |
1525 | print "ok $test\n"; $test++; | |
1526 | } else { | |
1527 | print "not " if $char =~ /\p{IsSpace}/; | |
1528 | print "ok $test\n"; $test++; | |
1529 | print "not " unless $char =~ /\P{IsSpace}/; | |
1530 | print "ok $test\n"; $test++; | |
1531 | } | |
1532 | print "# IsUpper\n"; | |
1533 | if ($class =~ /^L[ut]/) { | |
1534 | print "not " unless $char =~ /\p{IsUpper}/; | |
1535 | print "ok $test\n"; $test++; | |
1536 | print "not " if $char =~ /\P{IsUpper}/; | |
1537 | print "ok $test\n"; $test++; | |
1538 | } else { | |
1539 | print "not " if $char =~ /\p{IsUpper}/; | |
1540 | print "ok $test\n"; $test++; | |
1541 | print "not " unless $char =~ /\P{IsUpper}/; | |
1542 | print "ok $test\n"; $test++; | |
1543 | } | |
1544 | print "# IsWord\n"; | |
1545 | if ($class =~ /^[LMN]/ || $char eq "_") { | |
1546 | print "not " unless $char =~ /\p{IsWord}/; | |
1547 | print "ok $test\n"; $test++; | |
1548 | print "not " if $char =~ /\P{IsWord}/; | |
1549 | print "ok $test\n"; $test++; | |
1550 | } else { | |
1551 | print "not " if $char =~ /\p{IsWord}/; | |
1552 | print "ok $test\n"; $test++; | |
1553 | print "not " unless $char =~ /\P{IsWord}/; | |
1554 | print "ok $test\n"; $test++; | |
1555 | } | |
1556 | } | |
1557 | } | |
1558 | ||
1559 | { | |
1560 | $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; | |
1561 | ||
1562 | if (/(.\x{300})./) { | |
1563 | print "ok 576\n"; | |
1564 | ||
1565 | print "not " unless $` eq "abc\x{100}" && length($`) == 4; | |
73d6d589 | 1566 | print "ok 577\n"; |
4765795a JH |
1567 | |
1568 | print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3; | |
73d6d589 | 1569 | print "ok 578\n"; |
4765795a JH |
1570 | |
1571 | print "not " unless $' eq "\x{400}defg" && length($') == 5; | |
73d6d589 | 1572 | print "ok 579\n"; |
4765795a JH |
1573 | |
1574 | print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2; | |
73d6d589 | 1575 | print "ok 580\n"; |
a8a2fe91 JH |
1576 | } else { |
1577 | for (576..580) { print "not ok $_\n" } | |
4765795a JH |
1578 | } |
1579 | } | |
8269fa76 JH |
1580 | |
1581 | { | |
1582 | # bug id 20010306.008 | |
1583 | ||
1584 | $a = "a\x{1234}"; | |
1585 | # The original bug report had 'no utf8' here but that was irrelevant. | |
1586 | $a =~ m/\w/; # used to core dump | |
1587 | ||
1588 | print "ok 581\n"; | |
1589 | } | |
b8ef571c JH |
1590 | |
1591 | { | |
339e86bc JH |
1592 | $test = 582; |
1593 | ||
b8ef571c JH |
1594 | # bugid 20010410.006 |
1595 | for my $rx ( | |
1596 | '/(.*?)\{(.*?)\}/csg', | |
1597 | '/(.*?)\{(.*?)\}/cg', | |
1598 | '/(.*?)\{(.*?)\}/sg', | |
1599 | '/(.*?)\{(.*?)\}/g', | |
1600 | '/(.+?)\{(.+?)\}/csg', | |
1601 | ) | |
1602 | { | |
1603 | my($input, $i); | |
1604 | ||
1605 | $i = 0; | |
1606 | $input = "a{b}c{d}"; | |
1607 | eval <<EOT; | |
1608 | while (eval \$input =~ $rx) { | |
1609 | print "# \\\$1 = '\$1' \\\$2 = '\$2'\n"; | |
1610 | ++\$i; | |
1611 | } | |
1612 | EOT | |
1613 | print "not " unless $i == 2; | |
1614 | print "ok " . $test++ . "\n"; | |
1615 | } | |
1616 | } | |
209a9bc1 JH |
1617 | |
1618 | { | |
1619 | # from Robin Houston | |
1620 | ||
b851fbc1 | 1621 | my $x = "\x{10FFFD}"; |
209a9bc1 | 1622 | $x =~ s/(.)/$1/g; |
b851fbc1 | 1623 | print "not " unless ord($x) == 0x10FFFD && length($x) == 1; |
209a9bc1 JH |
1624 | print "ok 587\n"; |
1625 | } | |
3568d838 JH |
1626 | |
1627 | { | |
1628 | my $x = "\x7f"; | |
1629 | ||
1630 | print "not " if $x =~ /[\x80-\xff]/; | |
1631 | print "ok 588\n"; | |
1632 | ||
1633 | print "not " if $x =~ /[\x80-\x{100}]/; | |
1634 | print "ok 589\n"; | |
1635 | ||
1636 | print "not " if $x =~ /[\x{100}]/; | |
1637 | print "ok 590\n"; | |
1638 | ||
1639 | print "not " if $x =~ /\p{InLatin1Supplement}/; | |
1640 | print "ok 591\n"; | |
1641 | ||
1642 | print "not " unless $x =~ /\P{InLatin1Supplement}/; | |
1643 | print "ok 592\n"; | |
1644 | ||
1645 | print "not " if $x =~ /\p{InLatinExtendedA}/; | |
1646 | print "ok 593\n"; | |
1647 | ||
1648 | print "not " unless $x =~ /\P{InLatinExtendedA}/; | |
1649 | print "ok 594\n"; | |
1650 | } | |
1651 | ||
1652 | { | |
1653 | my $x = "\x80"; | |
1654 | ||
1655 | print "not " unless $x =~ /[\x80-\xff]/; | |
1656 | print "ok 595\n"; | |
1657 | ||
1658 | print "not " unless $x =~ /[\x80-\x{100}]/; | |
1659 | print "ok 596\n"; | |
1660 | ||
1661 | print "not " if $x =~ /[\x{100}]/; | |
1662 | print "ok 597\n"; | |
1663 | ||
1664 | print "not " unless $x =~ /\p{InLatin1Supplement}/; | |
1665 | print "ok 598\n"; | |
1666 | ||
1667 | print "not " if $x =~ /\P{InLatin1Supplement}/; | |
1668 | print "ok 599\n"; | |
1669 | ||
1670 | print "not " if $x =~ /\p{InLatinExtendedA}/; | |
1671 | print "ok 600\n"; | |
1672 | ||
1673 | print "not " unless $x =~ /\P{InLatinExtendedA}/; | |
1674 | print "ok 601\n"; | |
1675 | } | |
1676 | ||
1677 | { | |
1678 | my $x = "\xff"; | |
1679 | ||
1680 | print "not " unless $x =~ /[\x80-\xff]/; | |
1681 | print "ok 602\n"; | |
1682 | ||
1683 | print "not " unless $x =~ /[\x80-\x{100}]/; | |
1684 | print "ok 603\n"; | |
1685 | ||
1686 | print "not " if $x =~ /[\x{100}]/; | |
1687 | print "ok 604\n"; | |
1688 | ||
1689 | print "not " unless $x =~ /\p{InLatin1Supplement}/; | |
1690 | print "ok 605\n"; | |
1691 | ||
1692 | print "not " if $x =~ /\P{InLatin1Supplement}/; | |
1693 | print "ok 606\n"; | |
1694 | ||
1695 | print "not " if $x =~ /\p{InLatinExtendedA}/; | |
1696 | print "ok 607\n"; | |
1697 | ||
1698 | print "not " unless $x =~ /\P{InLatinExtendedA}/; | |
1699 | print "ok 608\n"; | |
1700 | } | |
1701 | ||
1702 | { | |
1703 | my $x = "\x{100}"; | |
1704 | ||
1705 | print "not " if $x =~ /[\x80-\xff]/; | |
1706 | print "ok 609\n"; | |
1707 | ||
1708 | print "not " unless $x =~ /[\x80-\x{100}]/; | |
1709 | print "ok 610\n"; | |
1710 | ||
1711 | print "not " unless $x =~ /[\x{100}]/; | |
1712 | print "ok 611\n"; | |
1713 | ||
1714 | print "not " if $x =~ /\p{InLatin1Supplement}/; | |
1715 | print "ok 612\n"; | |
1716 | ||
1717 | print "not " unless $x =~ /\P{InLatin1Supplement}/; | |
1718 | print "ok 613\n"; | |
1719 | ||
1720 | print "not " unless $x =~ /\p{InLatinExtendedA}/; | |
1721 | print "ok 614\n"; | |
1722 | ||
1723 | print "not " if $x =~ /\P{InLatinExtendedA}/; | |
1724 | print "ok 615\n"; | |
1725 | } | |
1726 | ||
9d1d55b5 JP |
1727 | { |
1728 | # from japhy | |
1729 | my $w; | |
1730 | use warnings; | |
1731 | local $SIG{__WARN__} = sub { $w .= shift }; | |
1732 | ||
1733 | $w = ""; | |
1734 | eval 'qr/(?c)/'; | |
1735 | print "not " if $w !~ /^Useless \(\?c\)/; | |
1736 | print "ok 616\n"; | |
1737 | ||
1738 | $w = ""; | |
1739 | eval 'qr/(?-c)/'; | |
1740 | print "not " if $w !~ /^Useless \(\?-c\)/; | |
1741 | print "ok 617\n"; | |
1742 | ||
1743 | $w = ""; | |
1744 | eval 'qr/(?g)/'; | |
1745 | print "not " if $w !~ /^Useless \(\?g\)/; | |
1746 | print "ok 618\n"; | |
1747 | ||
1748 | $w = ""; | |
1749 | eval 'qr/(?-g)/'; | |
1750 | print "not " if $w !~ /^Useless \(\?-g\)/; | |
1751 | print "ok 619\n"; | |
1752 | ||
1753 | $w = ""; | |
1754 | eval 'qr/(?o)/'; | |
1755 | print "not " if $w !~ /^Useless \(\?o\)/; | |
1756 | print "ok 620\n"; | |
1757 | ||
1758 | $w = ""; | |
1759 | eval 'qr/(?-o)/'; | |
1760 | print "not " if $w !~ /^Useless \(\?-o\)/; | |
1761 | print "ok 621\n"; | |
1762 | ||
1763 | # now test multi-error regexes | |
1764 | ||
1765 | $w = ""; | |
1766 | eval 'qr/(?g-o)/'; | |
1767 | print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-o\)/; | |
1768 | print "ok 622\n"; | |
1769 | ||
1770 | $w = ""; | |
1771 | eval 'qr/(?g-c)/'; | |
1772 | print "not " if $w !~ /^Useless \(\?g\).*\nUseless \(\?-c\)/; | |
1773 | print "ok 623\n"; | |
1774 | ||
1775 | $w = ""; | |
1776 | eval 'qr/(?o-cg)/'; # (?c) means (?g) error won't be thrown | |
1777 | print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?-c\)/; | |
1778 | print "ok 624\n"; | |
1779 | ||
1780 | $w = ""; | |
1781 | eval 'qr/(?ogc)/'; | |
1782 | print "not " if $w !~ /^Useless \(\?o\).*\nUseless \(\?g\).*\nUseless \(\?c\)/; | |
1783 | print "ok 625\n"; | |
1784 | } | |
a72deede JH |
1785 | |
1786 | # More Unicode "class" tests | |
1787 | ||
1788 | { | |
1789 | use charnames ':full'; | |
1790 | ||
1791 | print "not " unless "\N{LATIN CAPITAL LETTER A}" =~ /\p{InBasicLatin}/; | |
1792 | print "ok 626\n"; | |
1793 | ||
1794 | print "not " unless "\N{LATIN CAPITAL LETTER A WITH GRAVE}" =~ /\p{InLatin1Supplement}/; | |
1795 | print "ok 627\n"; | |
1796 | ||
1797 | print "not " unless "\N{LATIN CAPITAL LETTER A WITH MACRON}" =~ /\p{InLatinExtendedA}/; | |
1798 | print "ok 628\n"; | |
1799 | ||
1800 | print "not " unless "\N{LATIN SMALL LETTER B WITH STROKE}" =~ /\p{InLatinExtendedB}/; | |
1801 | print "ok 629\n"; | |
1802 | ||
1803 | print "not " unless "\N{KATAKANA LETTER SMALL A}" =~ /\p{InKatakana}/; | |
1804 | print "ok 630\n"; | |
1805 | } | |
1806 | ||
6002328a JH |
1807 | $_ = "foo"; |
1808 | ||
1809 | eval <<"EOT"; die if $@; | |
1810 | /f | |
1811 | o\r | |
1812 | o | |
1813 | \$ | |
1814 | /x && print "ok 631\n"; | |
1815 | EOT | |
1816 | ||
1817 | eval <<"EOT"; die if $@; | |
1818 | /f | |
1819 | o | |
1820 | o | |
1821 | \$\r | |
1822 | /x && print "ok 632\n"; | |
1823 | EOT | |
1824 | ||
569b5e07 AB |
1825 | #test /o feature |
1826 | sub test_o { $_[0] =~/$_[1]/o; return $1} | |
1827 | if(test_o('abc','(.)..') eq 'a') { | |
395ddfe6 | 1828 | print "ok 633\n"; |
569b5e07 | 1829 | } else { |
395ddfe6 | 1830 | print "not ok 633\n"; |
569b5e07 AB |
1831 | } |
1832 | if(test_o('abc','..(.)') eq 'a') { | |
395ddfe6 | 1833 | print "ok 634\n"; |
569b5e07 | 1834 | } else { |
395ddfe6 | 1835 | print "not ok 634\n"; |
569b5e07 AB |
1836 | } |
1837 | ||
f79b3095 JH |
1838 | # 635..639: ID 20010619.003 (only the space character is |
1839 | # supposed to be [:print:], not the whole isprint()). | |
1840 | ||
1841 | print "not " if "\n" =~ /[[:print:]]/; | |
1842 | print "ok 635\n"; | |
1843 | ||
1844 | print "not " if "\t" =~ /[[:print:]]/; | |
1845 | print "ok 636\n"; | |
1846 | ||
e857312d | 1847 | # Amazingly vertical tabulator is the same in ASCII and EBCDIC. |
f79b3095 JH |
1848 | print "not " if "\014" =~ /[[:print:]]/; |
1849 | print "ok 637\n"; | |
1850 | ||
1851 | print "not " if "\r" =~ /[[:print:]]/; | |
1852 | print "ok 638\n"; | |
1853 | ||
1854 | print "not " unless " " =~ /[[:print:]]/; | |
1855 | print "ok 639\n"; | |
1856 | ||
a01268b5 JH |
1857 | ## |
1858 | ## Test basic $^N usage outside of a regex | |
1859 | ## | |
1860 | $x = "abcdef"; | |
1861 | $T="ok 640\n";if ($x =~ /cde/ and not defined $^N) {print $T} else {print "not $T"}; | |
1862 | $T="ok 641\n";if ($x =~ /(cde)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1863 | $T="ok 642\n";if ($x =~ /(c)(d)(e)/ and $^N eq "e") {print $T} else {print "not $T"}; | |
1864 | $T="ok 643\n";if ($x =~ /(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1865 | $T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1866 | $T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1867 | $T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc") {print $T} else {print "not $T"}; | |
1868 | $T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1869 | $T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde") {print $T} else {print "not $T"}; | |
1870 | $T="ok 649\n";if ($x =~ /(?:c(d)e)/ and $^N eq "d" ) {print $T} else {print "not $T"}; | |
1871 | $T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d" ) {print $T} else {print "not $T"}; | |
1872 | $T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; | |
1873 | $T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; | |
1874 | $T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/ and $^N eq "e" ){print $T} else {print "not $T"}; | |
1875 | { | |
1876 | $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"}; | |
1877 | } | |
1878 | ## test to see if $^N is automatically localized -- it should now | |
1879 | ## have the value set in test 653 | |
1880 | $T="ok 655\n";if ($^N eq "e" ){print $T} else {print "not $T"}; | |
1881 | ||
1882 | ## | |
1883 | ## Now test inside (?{...}) | |
1884 | ## | |
1885 | $T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b" ){print $T} else {print "not $T"}; | |
1886 | $T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; | |
1887 | $T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"}; | |
1888 | $T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd") | |
1889 | {print $T} else {print "not $T"}; | |
1890 | $T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde") | |
1891 | {print $T} else {print "not $T"}; | |
2796c109 JH |
1892 | |
1893 | # Test the Unicode script classes | |
1894 | ||
1895 | print "not " unless chr(0x100) =~ /\p{InLatin}/; # outside Latin-1 | |
1896 | print "ok 661\n"; | |
1897 | ||
1898 | print "not " unless chr(0x212b) =~ /\p{InLatin}/; # Angstrom sign, very outside | |
1899 | print "ok 662\n"; | |
1900 | ||
1901 | print "not " unless chr(0x5d0) =~ /\p{InHebrew}/; # inside HebrewBlock | |
1902 | print "ok 663\n"; | |
1903 | ||
1904 | print "not " unless chr(0xfb4f) =~ /\p{InHebrew}/; # outside HebrewBlock | |
1905 | print "ok 664\n"; | |
1906 | ||
5f9563ea JH |
1907 | print "not " unless chr(0xb5) =~ /\p{InGreek}/; # singleton (not in a range) |
1908 | print "ok 665\n"; | |
1909 | ||
1910 | print "not " unless chr(0x37a) =~ /\p{InGreek}/; # singleton | |
1911 | print "ok 666\n"; | |
1912 | ||
1913 | print "not " unless chr(0x386) =~ /\p{InGreek}/; # singleton | |
1914 | print "ok 667\n"; | |
1915 | ||
1916 | print "not " unless chr(0x387) =~ /\P{InGreek}/; # not there | |
1917 | print "ok 668\n"; | |
1918 | ||
1919 | print "not " unless chr(0x388) =~ /\p{InGreek}/; # range | |
1920 | print "ok 669\n"; | |
1921 | ||
1922 | print "not " unless chr(0x38a) =~ /\p{InGreek}/; # range | |
1923 | print "ok 670\n"; | |
1924 | ||
1925 | print "not " unless chr(0x38b) =~ /\P{InGreek}/; # not there | |
1926 | print "ok 671\n"; | |
1927 | ||
1928 | print "not " unless chr(0x38c) =~ /\p{InGreek}/; # singleton | |
1929 | print "ok 672\n"; | |
1930 | ||
7be5a6cf JF |
1931 | ## |
1932 | ## Test [:cntrl:]... | |
1933 | ## | |
1934 | ## Should probably put in tests for all the POSIX stuff, but not sure how to | |
1935 | ## guarantee a specific locale...... | |
1936 | ## | |
1937 | $AllBytes = join('', map { chr($_) } 0..255); | |
1938 | ($x = $AllBytes) =~ s/[[:cntrl:]]//g; | |
1939 | if ($x ne join('', map { chr($_) } 0x20..0x7E, 0x80..0xFF)) { print "not " }; | |
1940 | print "ok 673\n"; | |
1941 | ||
1942 | ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; | |
1943 | if ($x ne join('', map { chr($_) } 0..0x1F, 0x7F)) { print "not " }; | |
1944 | print "ok 674\n"; | |
f33976b4 DB |
1945 | |
1946 | # With /s modifier UTF8 chars were interpreted as bytes | |
1947 | { | |
1948 | my $a = "Hello \x{263A} World"; | |
1949 | ||
1950 | my @a = ($a =~ /./gs); | |
1951 | ||
1952 | print "not " unless $#a == 12; | |
1953 | print "ok 675\n"; | |
1954 | } | |
cce850e4 JH |
1955 | |
1956 | @a = ("foo\nbar" =~ /./g); | |
1957 | print "ok 676\n" if @a == 6 && "@a" eq "f o o b a r"; | |
1958 | ||
1959 | @a = ("foo\nbar" =~ /./gs); | |
1960 | print "ok 677\n" if @a == 7 && "@a" eq "f o o \n b a r"; | |
1961 | ||
1962 | @a = ("foo\nbar" =~ /\C/g); | |
1963 | print "ok 678\n" if @a == 7 && "@a" eq "f o o \n b a r"; | |
1964 | ||
1965 | @a = ("foo\nbar" =~ /\C/gs); | |
1966 | print "ok 679\n" if @a == 7 && "@a" eq "f o o \n b a r"; | |
1967 | ||
1968 | @a = ("foo\n\x{100}bar" =~ /./g); | |
1969 | print "ok 680\n" if @a == 7 && "@a" eq "f o o \x{100} b a r"; | |
1970 | ||
1971 | @a = ("foo\n\x{100}bar" =~ /./gs); | |
1972 | print "ok 681\n" if @a == 8 && "@a" eq "f o o \n \x{100} b a r"; | |
1973 | ||
1974 | ($a, $b) = map { chr } ord('A') == 65 ? (0xc4, 0x80) : (0x8c, 0x41); | |
1975 | ||
1976 | @a = ("foo\n\x{100}bar" =~ /\C/g); | |
1977 | print "ok 682\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; | |
1978 | ||
1979 | @a = ("foo\n\x{100}bar" =~ /\C/gs); | |
1980 | print "ok 683\n" if @a == 9 && "@a" eq "f o o \n $a $b b a r"; | |
1981 | ||
0af80b60 HS |
1982 | { |
1983 | # [ID 20010814.004] pos() doesn't work when using =~m// in list context | |
1984 | $_ = "ababacadaea"; | |
1985 | $a = join ":", /b./gc; | |
1986 | $b = join ":", /a./gc; | |
1987 | $c = pos; | |
1988 | print "$a $b $c" eq 'ba:ba ad:ae 10' ? "ok 684\n" : "not ok 684\t# $a $b $c\n"; | |
1989 | } | |
d9f424b2 JH |
1990 | |
1991 | { | |
75685a94 JH |
1992 | # [ID 20010407.006] matching utf8 return values from functions does not work |
1993 | ||
d9f424b2 JH |
1994 | package ID_20010407_006; |
1995 | ||
1996 | sub x { | |
1997 | "a\x{1234}"; | |
1998 | } | |
1999 | ||
2000 | my $x = x; | |
2001 | my $y; | |
2002 | ||
2003 | $x =~ /(..)/; $y = $1; | |
2004 | print "not " unless length($y) == 2 && $y eq $x; | |
75685a94 | 2005 | print "ok 685\n"; |
d9f424b2 JH |
2006 | |
2007 | x =~ /(..)/; $y = $1; | |
2008 | print "not " unless length($y) == 2 && $y eq $x; | |
2009 | print "ok 686\n"; | |
2010 | } | |
a4c04bdc | 2011 | |
e2d8ce26 | 2012 | |
a4c04bdc NC |
2013 | my $test = 687; |
2014 | ||
2015 | # Force scalar context on the patern match | |
2016 | sub ok ($$) { | |
2017 | my($ok, $name) = @_; | |
2018 | ||
2019 | printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; | |
2020 | ||
2021 | printf "# Failed test at line %d\n", (caller)[2] unless $ok; | |
2022 | ||
2023 | $test++; | |
2024 | return $ok; | |
2025 | } | |
2026 | ||
2027 | { | |
2028 | # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. | |
2029 | $x = "\x4e" . "E"; | |
2030 | ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); | |
2031 | ||
2032 | $x = "\x4e" . "i"; | |
2033 | ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); | |
2034 | ||
2035 | $x = "\x4" . "j"; | |
2036 | ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); | |
2037 | ||
2038 | $x = "\x0" . "k"; | |
2039 | ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); | |
2040 | ||
2041 | $x = "\x0" . "x"; | |
2042 | ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); | |
2043 | ||
2044 | $x = "\x0" . "xa"; | |
2045 | ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); | |
2046 | ||
2047 | $x = "\x9" . "_b"; | |
2048 | ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); | |
2049 | ||
2050 | print "# and now again in [] ranges\n"; | |
2051 | ||
2052 | $x = "\x4e" . "E"; | |
2053 | ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); | |
2054 | ||
2055 | $x = "\x4e" . "i"; | |
2056 | ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); | |
2057 | ||
2058 | $x = "\x4" . "j"; | |
2059 | ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); | |
2060 | ||
2061 | $x = "\x0" . "k"; | |
2062 | ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); | |
2063 | ||
2064 | $x = "\x0" . "x"; | |
2065 | ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); | |
2066 | ||
2067 | $x = "\x0" . "xa"; | |
2068 | ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); | |
2069 | ||
2070 | $x = "\x9" . "_b"; | |
2071 | ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); | |
2072 | ||
2073 | } | |
2074 | ||
2075 | { | |
2076 | # Check that \x{##} works. 5.6.1 fails quite a few of these. | |
2077 | ||
2078 | $x = "\x9b"; | |
2079 | ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); | |
2080 | ||
2081 | $x = "\x9b" . "y"; | |
2082 | ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); | |
2083 | ||
2084 | $x = "\x9b" . "y"; | |
2085 | ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); | |
2086 | ||
2087 | $x = "\x9b" . "y"; | |
2088 | ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); | |
2089 | ||
2090 | $x = "\x0" . "y"; | |
2091 | ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); | |
2092 | ||
2093 | $x = "\x0" . "y"; | |
2094 | ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); | |
2095 | ||
2096 | $x = "\x9b" . "y"; | |
2097 | ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); | |
2098 | ||
2099 | print "# and now again in [] ranges\n"; | |
2100 | ||
2101 | $x = "\x9b"; | |
2102 | ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); | |
2103 | ||
2104 | $x = "\x9b" . "y"; | |
2105 | ok ($x =~ /^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); | |
2106 | ||
2107 | $x = "\x9b" . "y"; | |
2108 | ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); | |
2109 | ||
2110 | $x = "\x9b" . "y"; | |
2111 | ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); | |
2112 | ||
2113 | $x = "\x0" . "y"; | |
2114 | ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); | |
2115 | ||
2116 | $x = "\x0" . "y"; | |
2117 | ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); | |
2118 | ||
2119 | $x = "\x9b" . "y"; | |
2120 | ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); | |
2121 | } | |
e2d8ce26 JP |
2122 | |
2123 | { | |
d9efae67 JH |
2124 | # high bit bug -- japhy |
2125 | my $x = "ab\200d"; | |
2126 | $x =~ /.*?\200/ or print "not "; | |
2127 | print "ok 715\n"; | |
e2d8ce26 JP |
2128 | } |
2129 | ||
4193bef7 JH |
2130 | print "# some Unicode properties\n"; |
2131 | ||
d9efae67 | 2132 | { |
4193bef7 | 2133 | # Dashes, underbars, case. |
d9efae67 JH |
2134 | print "not " unless "\x80" =~ /\p{in-latin1_SUPPLEMENT}/; |
2135 | print "ok 716\n"; | |
ab13f0c7 | 2136 | |
4193bef7 | 2137 | # Complement, leading and trailing whitespace. |
ab13f0c7 JH |
2138 | print "not " unless "\x80" =~ /\P{ ^ In Latin 1 Supplement }/; |
2139 | print "ok 717\n"; | |
4193bef7 | 2140 | |
f173cd49 JH |
2141 | # No ^In, dashes, case, dash, any intervening (word-break) whitespace. |
2142 | # (well, newlines don't work...) | |
2143 | print "not " unless "\x80" =~ /\p{latin-1 supplement}/; | |
4193bef7 JH |
2144 | print "ok 718\n"; |
2145 | } | |
2146 | ||
2147 | { | |
2148 | print "not " unless "a" =~ /\pL/; | |
2149 | print "ok 719\n"; | |
4193bef7 | 2150 | |
4193bef7 JH |
2151 | print "not " unless "a" =~ /\p{IsLl}/; |
2152 | print "ok 720\n"; | |
4193bef7 | 2153 | |
c87b7cc2 | 2154 | print "not " if "a" =~ /\p{IsLu}/; |
4193bef7 | 2155 | print "ok 721\n"; |
4193bef7 | 2156 | |
61247495 | 2157 | print "not " unless "a" =~ /\p{Ll}/; |
4193bef7 | 2158 | print "ok 722\n"; |
c87b7cc2 | 2159 | |
61247495 | 2160 | print "not " if "a" =~ /\p{Lu}/; |
c87b7cc2 JH |
2161 | print "ok 723\n"; |
2162 | ||
61247495 | 2163 | print "not " unless "A" =~ /\pL/; |
c87b7cc2 JH |
2164 | print "ok 724\n"; |
2165 | ||
61247495 | 2166 | print "not " unless "A" =~ /\p{IsLu}/; |
c87b7cc2 JH |
2167 | print "ok 725\n"; |
2168 | ||
61247495 | 2169 | print "not " if "A" =~ /\p{IsLl}/; |
c87b7cc2 JH |
2170 | print "ok 726\n"; |
2171 | ||
61247495 | 2172 | print "not " unless "A" =~ /\p{Lu}/; |
c87b7cc2 JH |
2173 | print "ok 727\n"; |
2174 | ||
61247495 | 2175 | print "not " if "A" =~ /\p{Ll}/; |
c87b7cc2 JH |
2176 | print "ok 728\n"; |
2177 | ||
61247495 | 2178 | print "not " if "a" =~ /\PL/; |
c87b7cc2 JH |
2179 | print "ok 729\n"; |
2180 | ||
61247495 | 2181 | print "not " if "a" =~ /\P{IsLl}/; |
c87b7cc2 | 2182 | print "ok 730\n"; |
61247495 JH |
2183 | |
2184 | print "not " unless "a" =~ /\P{IsLu}/; | |
2185 | print "ok 731\n"; | |
2186 | ||
2187 | print "not " if "a" =~ /\P{Ll}/; | |
2188 | print "ok 732\n"; | |
2189 | ||
2190 | print "not " unless "a" =~ /\P{Lu}/; | |
2191 | print "ok 733\n"; | |
2192 | ||
2193 | print "not " if "A" =~ /\PL/; | |
2194 | print "ok 734\n"; | |
2195 | ||
2196 | print "not " if "A" =~ /\P{IsLu}/; | |
2197 | print "ok 735\n"; | |
2198 | ||
2199 | print "not " unless "A" =~ /\P{IsLl}/; | |
2200 | print "ok 736\n"; | |
2201 | ||
2202 | print "not " if "A" =~ /\P{Lu}/; | |
2203 | print "ok 737\n"; | |
2204 | ||
2205 | print "not " unless "A" =~ /\P{Ll}/; | |
2206 | print "ok 738\n"; | |
2207 | ||
4193bef7 | 2208 | } |
9b4e380a JH |
2209 | |
2210 | { | |
2211 | print "not " if "a" =~ /\p{Common}/; | |
2212 | print "ok 739\n"; | |
2213 | ||
2214 | print "not " unless "1" =~ /\p{Common}/; | |
2215 | print "ok 740\n"; | |
2216 | } | |
2217 | ||
2218 | { | |
2219 | print "not " if "a" =~ /\p{Inherited}/; | |
2220 | print "ok 741\n"; | |
2221 | ||
2222 | print "not " unless "\x{300}" =~ /\p{Inherited}/; | |
2223 | print "ok 742\n"; | |
2224 | } | |
2225 | ||
2226 | { | |
2227 | print "not " unless "a" =~ /\p{L&}/; | |
2228 | print "ok 743\n"; | |
2229 | ||
2230 | print "not " if "1" =~ /\p{L&}/; | |
2231 | print "ok 744\n"; | |
2232 | } | |
d73e5302 JH |
2233 | |
2234 | { | |
2235 | print "not " unless "a" =~ /\p{LowercaseLetter}/; | |
2236 | print "ok 745\n"; | |
2237 | ||
b23d7fc6 JH |
2238 | print "not " if "A" =~ /\p{ |
2239 | Lowercase | |
2240 | Letter | |
2241 | }/x; | |
d73e5302 JH |
2242 | print "ok 746\n"; |
2243 | } | |
2244 | ||
2245 | { | |
2246 | print "not " unless "\x{AC00}" =~ /\p{HangulSyllable}/; | |
2247 | print "ok 747\n"; | |
2248 | } | |
71d929cb JH |
2249 | |
2250 | { | |
701a277b JH |
2251 | # Script=, Block=, Category= |
2252 | ||
71d929cb JH |
2253 | print "not " unless "\x{0100}" =~ /\p{Script=Latin}/; |
2254 | print "ok 748\n"; | |
2255 | ||
2256 | print "not " unless "\x{0100}" =~ /\p{Block=LatinExtendedA}/; | |
2257 | print "ok 749\n"; | |
2258 | ||
2259 | print "not " unless "\x{0100}" =~ /\p{Category=UppercaseLetter}/; | |
2260 | print "ok 750\n"; | |
2261 | } | |
2262 | ||
ef54fa25 | 2263 | { |
701a277b JH |
2264 | print "# the basic character classes and Unicode \n"; |
2265 | ||
ef54fa25 JH |
2266 | # 0100;LATIN CAPITAL LETTER A WITH MACRON;Lu;0;L;0041 0304;;;;N;LATIN CAPITAL LETTER A MACRON;;;0101; |
2267 | print "not " unless "\x{0100}" =~ /\w/; | |
2268 | print "ok 751\n"; | |
2269 | ||
2270 | # 0660;ARABIC-INDIC DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;; | |
2271 | print "not " unless "\x{0660}" =~ /\d/; | |
2272 | print "ok 752\n"; | |
2273 | ||
2274 | # 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;; | |
2275 | print "not " unless "\x{1680}" =~ /\s/; | |
2276 | print "ok 753\n"; | |
2277 | } | |
701a277b JH |
2278 | |
2279 | { | |
2280 | print "# folding matches and Unicode\n"; | |
2281 | ||
2282 | print "not " unless "a\x{100}" =~ /A/i; | |
2283 | print "ok 754\n"; | |
2284 | ||
2285 | print "not " unless "A\x{100}" =~ /A/i; | |
2286 | print "ok 755\n"; | |
2287 | ||
2288 | print "not " unless "a\x{100}" =~ /a/i; | |
2289 | print "ok 756\n"; | |
2290 | ||
2291 | print "not " unless "A\x{100}" =~ /A/i; | |
2292 | print "ok 757\n"; | |
bc517b45 JH |
2293 | |
2294 | print "not " unless "\x{101}a" =~ /\x{100}/i; | |
2295 | print "ok 758\n"; | |
2296 | ||
2297 | print "not " unless "\x{100}a" =~ /\x{100}/i; | |
2298 | print "ok 759\n"; | |
2299 | ||
2300 | print "not " unless "\x{101}a" =~ /\x{101}/i; | |
2301 | print "ok 760\n"; | |
2302 | ||
2303 | print "not " unless "\x{100}a" =~ /\x{101}/i; | |
2304 | print "ok 761\n"; | |
2305 | ||
2306 | print "not " unless "a\x{100}" =~ /A\x{100}/i; | |
2307 | print "ok 762\n"; | |
2308 | ||
2309 | print "not " unless "A\x{100}" =~ /A\x{100}/i; | |
2310 | print "ok 763\n"; | |
2311 | ||
2312 | print "not " unless "a\x{100}" =~ /a\x{100}/i; | |
2313 | print "ok 764\n"; | |
2314 | ||
2315 | print "not " unless "A\x{100}" =~ /A\x{100}/i; | |
2316 | print "ok 765\n"; | |
2317 | ||
2318 | print "not " unless "a\x{100}" =~ /[A]/i; | |
2319 | print "ok 766\n"; | |
2320 | ||
2321 | print "not " unless "A\x{100}" =~ /[A]/i; | |
2322 | print "ok 767\n"; | |
2323 | ||
2324 | print "not " unless "a\x{100}" =~ /[a]/i; | |
2325 | print "ok 768\n"; | |
2326 | ||
2327 | print "not " unless "A\x{100}" =~ /[A]/i; | |
2328 | print "ok 769\n"; | |
2329 | ||
2330 | print "not " unless "\x{101}a" =~ /[\x{100}]/i; | |
2331 | print "ok 770\n"; | |
2332 | ||
2333 | print "not " unless "\x{100}a" =~ /[\x{100}]/i; | |
2334 | print "ok 771\n"; | |
2335 | ||
2336 | print "not " unless "\x{101}a" =~ /[\x{101}]/i; | |
2337 | print "ok 772\n"; | |
2338 | ||
2339 | print "not " unless "\x{100}a" =~ /[\x{101}]/i; | |
2340 | print "ok 773\n"; | |
2341 | ||
701a277b | 2342 | } |
a5961de5 JH |
2343 | |
2344 | { | |
2345 | use charnames ':full'; | |
2346 | ||
2347 | print "# LATIN LETTER A WITH GRAVE\n"; | |
2348 | my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; | |
2349 | my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; | |
2350 | ||
bc517b45 JH |
2351 | print $lower =~ m/$UPPER/i ? "ok 774\n" : "not ok 774\n"; |
2352 | print $UPPER =~ m/$lower/i ? "ok 775\n" : "not ok 775\n"; | |
2353 | print $lower =~ m/[$UPPER]/i ? "ok 776\n" : "not ok 776\n"; | |
2354 | print $UPPER =~ m/[$lower]/i ? "ok 777\n" : "not ok 777\n"; | |
a5961de5 JH |
2355 | |
2356 | print "# GREEK LETTER ALPHA WITH VRACHY\n"; | |
2357 | ||
2358 | $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; | |
2359 | $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; | |
2360 | ||
bc517b45 JH |
2361 | print $lower =~ m/$UPPER/i ? "ok 778\n" : "not ok 778\n"; |
2362 | print $UPPER =~ m/$lower/i ? "ok 779\n" : "not ok 779\n"; | |
2363 | print $lower =~ m/[$UPPER]/i ? "ok 780\n" : "not ok 780\n"; | |
2364 | print $UPPER =~ m/[$lower]/i ? "ok 781\n" : "not ok 781\n"; | |
a5961de5 JH |
2365 | |
2366 | print "# LATIN LETTER Y WITH DIAERESIS\n"; | |
2367 | ||
2368 | $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; | |
2369 | $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; | |
bc517b45 JH |
2370 | print $lower =~ m/$UPPER/i ? "ok 782\n" : "not ok 782\n"; |
2371 | print $UPPER =~ m/$lower/i ? "ok 783\n" : "not ok 783\n"; | |
2372 | print $lower =~ m/[$UPPER]/i ? "ok 784\n" : "not ok 784\n"; | |
2373 | print $UPPER =~ m/[$lower]/i ? "ok 785\n" : "not ok 785\n"; | |
a5961de5 | 2374 | } |
55da9344 JH |
2375 | |
2376 | { | |
2377 | use warnings; | |
2378 | use charnames ':full'; | |
2379 | ||
2380 | print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n"; | |
2381 | ||
2382 | my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; | |
2383 | ||
2384 | my $hSIGMA = sprintf "%04x", ord $SIGMA; | |
2385 | ||
2386 | my $char = "\N{COMBINING GREEK PERISPOMENI}"; | |
2387 | my $code = sprintf "%04x", ord($char); | |
2388 | ||
bc517b45 JH |
2389 | # Before #13843 this was failing by matching falsely. |
2390 | print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 786\n" : "ok 786\n"; | |
55da9344 | 2391 | } |
b7c83a7e JH |
2392 | |
2393 | { | |
2394 | print "# \\X\n"; | |
2395 | ||
2396 | use charnames ':full'; | |
2397 | ||
eb08e2da JH |
2398 | print "a!" =~ /^(\X)!/ && $1 eq "a" ? |
2399 | "ok 787\n" : "not ok 787 # $1\n"; | |
2400 | print "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF" ? | |
2401 | "ok 788\n" : "not ok 788 # $1\n"; | |
2402 | print "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}" ? | |
2403 | "ok 789\n" : "not ok 789 # $1\n"; | |
2404 | print "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}" ? | |
2405 | "ok 790\n" : "not ok 790 # $1\n"; | |
2406 | print "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && | |
2407 | $1 eq "\N{LATIN CAPITAL LETTER E}" ? | |
2408 | "ok 791\n" : "not ok 791 # $1\n"; | |
2409 | print "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" =~ | |
2410 | /^(\X)!/ && | |
2411 | $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}" ? | |
2412 | "ok 792\n" : "not ok 792 # $1\n"; | |
b7c83a7e | 2413 | } |
112bedeb JH |
2414 | |
2415 | { | |
2416 | print "#\\C and \\X\n"; | |
2417 | ||
2418 | print "!abc!" =~ /a\Cc/ ? "ok 793\n" : "not ok 793\n"; | |
2419 | print "!abc!" =~ /a\Xc/ ? "ok 794\n" : "not ok 794\n"; | |
2420 | } |