Commit | Line | Data |
---|---|---|
07f72646 BF |
1 | #!./perl |
2 | ||
3 | # Checks if the parser behaves correctly in edge case | |
4 | # (including weird syntax errors) | |
5 | ||
6 | BEGIN { | |
b5efbd1f | 7 | chdir 't' if -d 't'; |
07f72646 | 8 | require './test.pl'; |
0b1b7115 | 9 | skip_all_if_miniperl("miniperl, no arybase"); |
971ab96b | 10 | skip_all_without_unicode_tables(); |
07f72646 BF |
11 | } |
12 | ||
13 | use 5.016; | |
14 | use utf8; | |
15 | use open qw( :utf8 :std ); | |
32833930 | 16 | no warnings qw(misc reserved); |
07f72646 | 17 | |
ce4793f1 | 18 | plan (tests => 66894); |
07f72646 | 19 | |
e660c409 FC |
20 | # ${single:colon} should not be treated as a simple variable, but as a |
21 | # block with a label inside. | |
07f72646 BF |
22 | { |
23 | no strict; | |
24 | ||
25 | local $@; | |
e660c409 FC |
26 | eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'"; |
27 | is ${"\x{30cd}colon"}, 'label, not var', | |
28 | '${\x{30cd}single:\x{30cd}colon} should be block-label'; | |
07f72646 BF |
29 | |
30 | local $@; | |
31 | no utf8; | |
e660c409 FC |
32 | evalbytes '${single:colon} = "block/label, not var"'; |
33 | is($::colon, | |
34 | 'block/label, not var', | |
07f72646 BF |
35 | '...same with ${single:colon}' |
36 | ); | |
37 | } | |
38 | ||
39 | # ${yadda'etc} and ${yadda::etc} should both work under strict | |
40 | { | |
41 | local $@; | |
42 | eval q<use strict; ${flark::fleem}>; | |
43 | is($@, '', q<${package::var} works>); | |
44 | ||
45 | local $@; | |
46 | eval q<use strict; ${fleem'flark}>; | |
47 | is($@, '', q<...as does ${package'var}>); | |
48 | } | |
49 | ||
50 | # The first character in ${...} should respect the rules | |
32833930 | 51 | { |
07f72646 BF |
52 | local $@; |
53 | use utf8; | |
54 | eval '${☭asd} = 1'; | |
55 | like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special)) | |
56 | } | |
32833930 BF |
57 | |
58 | # Checking that at least some of the special variables work | |
59 | for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 [ ] ! @ / \ = )) { | |
3a6b4f26 FC |
60 | SKIP: { |
61 | skip_if_miniperl('No $[ under miniperl', 2) if $v eq '['; | |
32833930 BF |
62 | local $@; |
63 | evalbytes "\$$v;"; | |
64 | is $@, '', "No syntax error for \$$v"; | |
65 | ||
66 | local $@; | |
67 | eval "use utf8; \$$v;"; | |
048c5953 | 68 | is $@, '', "No syntax error for \$$v under 'use utf8'"; |
3a6b4f26 | 69 | } |
32833930 BF |
70 | } |
71 | ||
72 | # Checking if the Latin-1 range behaves as expected, and that the behavior is the | |
73 | # same whenever under strict or not. | |
fef6cdc5 | 74 | for ( 0x0 .. 0xff ) { |
3027a07d KW |
75 | my @warnings; |
76 | local $SIG {__WARN__} = sub {push @warnings, @_ }; | |
048c5953 KW |
77 | my $ord = utf8::unicode_to_native($_); |
78 | my $chr = chr $ord; | |
fef6cdc5 KW |
79 | my $syntax_error = 0; # Do we expect this code point to generate a |
80 | # syntax error? Assume not, for now | |
3027a07d | 81 | my $deprecated = 0; |
048c5953 | 82 | my $name; |
2fb9f143 KW |
83 | |
84 | # A different number of tests are run depending on the branches in this | |
85 | # loop iteration. This allows us to add skips to make the reported total | |
86 | # the same for each iteration. | |
87 | my $tests = 0; | |
3027a07d | 88 | my $max_tests = 6; |
2fb9f143 | 89 | |
fef6cdc5 KW |
90 | if ($chr =~ /[[:graph:]]/a) { |
91 | $name = "'$chr'"; | |
92 | $syntax_error = 1 if $chr eq '{'; | |
93 | } | |
94 | elsif ($chr =~ /[[:space:]]/a) { | |
95 | $name = sprintf "\\x%02x, an ASCII space character", $ord; | |
96 | $syntax_error = 1; | |
97 | } | |
98 | elsif ($chr =~ /[[:cntrl:]]/a) { | |
ce4793f1 KW |
99 | $name = sprintf "\\x%02x, an ASCII control", $ord; |
100 | $syntax_error = 1; | |
fef6cdc5 | 101 | } |
4475d0d2 KW |
102 | elsif ($chr =~ /\pC/) { |
103 | if ($chr eq "\N{SHY}") { | |
104 | $name = sprintf "\\x%02x, SHY", $ord; | |
105 | } | |
106 | else { | |
107 | $name = sprintf "\\x%02x, a C1 control", $ord; | |
108 | } | |
acab2422 | 109 | $syntax_error = 1; |
e9229257 | 110 | $deprecated = ! $syntax_error; |
048c5953 KW |
111 | } |
112 | elsif ($chr =~ /\p{XIDStart}/) { | |
113 | $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord; | |
114 | } | |
4475d0d2 KW |
115 | elsif ($chr =~ /\p{XPosixSpace}/) { |
116 | $name = sprintf "\\x%02x, a non-ASCII space character", $ord; | |
acab2422 | 117 | $syntax_error = 1; |
e9229257 | 118 | $deprecated = ! $syntax_error; |
4475d0d2 | 119 | } |
048c5953 | 120 | else { |
4475d0d2 | 121 | $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord; |
048c5953 | 122 | } |
32833930 | 123 | no warnings 'closure'; |
048c5953 | 124 | my $esc = sprintf("%X", $ord); |
32833930 BF |
125 | utf8::downgrade($chr); |
126 | if ($chr !~ /\p{XIDS}/u) { | |
fef6cdc5 KW |
127 | if ($syntax_error) { |
128 | evalbytes "\$$chr"; | |
e9229257 KW |
129 | like($@, qr/ syntax\ error | Unrecognized\ character /x, |
130 | "$name as a length-1 variable generates a syntax error"); | |
fef6cdc5 | 131 | $tests++; |
97bf8a23 | 132 | utf8::upgrade($chr); |
4a493553 | 133 | eval "no strict; \$$chr = 4;", |
97bf8a23 KW |
134 | like($@, qr/ syntax\ error | Unrecognized\ character /x, |
135 | " ... and the same under 'use utf8'"); | |
136 | $tests++; | |
fef6cdc5 | 137 | } |
ce4793f1 | 138 | elsif ($chr =~ /[[:punct:][:digit:]]/a) { |
fef6cdc5 KW |
139 | |
140 | # Unlike other variables, we dare not try setting the length-1 | |
ce4793f1 KW |
141 | # variables that are ASCII punctuation and digits. This is |
142 | # because many of these variables have meaning to the system, and | |
143 | # setting them could have side effects or not work as expected | |
144 | # (And using fresh_perl() doesn't always help.) For all these we | |
145 | # just verify that they don't generate a syntax error. | |
fef6cdc5 KW |
146 | local $@; |
147 | evalbytes "\$$chr;"; | |
148 | is $@, '', "$name as a length-1 variable doesn't generate a syntax error"; | |
149 | $tests++; | |
150 | utf8::upgrade($chr); | |
151 | evalbytes "no strict; use utf8; \$$chr;", | |
152 | is $@, '', " ... and the same under 'use utf8'"; | |
153 | $tests++; | |
154 | } | |
155 | else { | |
40c554ff KW |
156 | is evalbytes "no strict; \$$chr = 10", |
157 | 10, | |
048c5953 | 158 | "$name is legal as a length-1 variable"; |
40c554ff | 159 | $tests++; |
fef6cdc5 KW |
160 | if ($chr =~ /[[:ascii:]]/) { |
161 | utf8::upgrade($chr); | |
162 | is evalbytes "no strict; use utf8; \$$chr = 1", | |
163 | 1, | |
164 | " ... and is legal under 'use utf8'"; | |
165 | $tests++; | |
166 | } | |
167 | else { | |
40c554ff KW |
168 | utf8::upgrade($chr); |
169 | local $@; | |
170 | eval "no strict; use utf8; \$$chr = 1"; | |
171 | like $@, | |
172 | qr/\QUnrecognized character \x{\E\L$esc/, | |
173 | " ... but is illegal as a length-1 variable under 'use utf8'"; | |
174 | $tests++; | |
175 | } | |
fef6cdc5 KW |
176 | } |
177 | } | |
32833930 BF |
178 | else { |
179 | { | |
180 | no utf8; | |
181 | local $@; | |
182 | evalbytes "no strict; \$$chr = 1"; | |
048c5953 | 183 | is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable"); |
2fb9f143 | 184 | $tests++; |
32833930 | 185 | |
fef6cdc5 | 186 | if ($chr !~ /[[:ascii:]]/) { |
40c554ff KW |
187 | local $@; |
188 | evalbytes "use strict; \$$chr = 1"; | |
189 | is($@, | |
190 | '', | |
191 | " ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS" | |
192 | ); | |
193 | $tests++; | |
32833930 | 194 | |
40c554ff KW |
195 | local $@; |
196 | evalbytes "\$a$chr = 1"; | |
197 | like($@, | |
198 | qr/Unrecognized character /, | |
199 | " ... but under 'no utf8', it's not allowed in length-2+ variables" | |
200 | ); | |
201 | $tests++; | |
fef6cdc5 | 202 | } |
32833930 BF |
203 | } |
204 | { | |
205 | use utf8; | |
502bdc0f KW |
206 | my $utf8 = $chr; |
207 | utf8::upgrade($utf8); | |
32833930 | 208 | local $@; |
502bdc0f | 209 | eval "no strict; \$$utf8 = 1"; |
048c5953 | 210 | is($@, '', " ... and under 'use utf8', 'no strict', is a valid length-1 variable"); |
2fb9f143 | 211 | $tests++; |
32833930 BF |
212 | |
213 | local $@; | |
502bdc0f | 214 | eval "use strict; \$$utf8 = 1"; |
fef6cdc5 KW |
215 | if ($chr =~ /[ab]/) { # These are special, for sort() |
216 | is($@, '', " ... and under 'use utf8', 'use strict'," | |
217 | . " is a valid length-1 variable (\$a and \$b are special)"); | |
218 | $tests++; | |
219 | } | |
220 | else { | |
40c554ff KW |
221 | like($@, |
222 | qr/Global symbol "\$$utf8" requires explicit package name/, | |
223 | " ... and under utf8 has to be required under strict" | |
224 | ); | |
225 | $tests++; | |
fef6cdc5 | 226 | } |
32833930 BF |
227 | } |
228 | } | |
2fb9f143 | 229 | |
3027a07d KW |
230 | if (! $deprecated) { |
231 | if ($chr =~ /[#*]/) { | |
232 | ||
233 | # Length-1 variables with these two characters used to be used by | |
dce12407 | 234 | # Perl, but now a warning is generated that they're gone. |
3027a07d KW |
235 | # Ignore such warnings. |
236 | for (my $i = @warnings - 1; $i >= 0; $i--) { | |
237 | splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/; | |
238 | } | |
239 | } | |
97bf8a23 KW |
240 | my $message = " ... and doesn't generate any warnings"; |
241 | $message = " TODO $message" if $ord == 0 | |
242 | || $chr =~ /\s/a; | |
243 | ||
244 | if (! ok(@warnings == 0, $message)) { | |
e68670ae KW |
245 | note join "\n", @warnings; |
246 | } | |
3027a07d KW |
247 | $tests++; |
248 | } | |
249 | elsif (! @warnings) { | |
250 | fail(" ... and generates deprecation warnings (since is deprecated)"); | |
251 | $tests++; | |
252 | } | |
253 | else { | |
254 | ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings), | |
255 | " ... and generates deprecation warnings (only)"); | |
256 | $tests++; | |
257 | } | |
258 | ||
2fb9f143 KW |
259 | SKIP: { |
260 | die "Wrong max count for tests" if $tests > $max_tests; | |
261 | skip("untaken tests", $max_tests - $tests) if $max_tests > $tests; | |
262 | } | |
32833930 BF |
263 | } |
264 | ||
265 | { | |
266 | use utf8; | |
267 | my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla | |
268 | is($@, '', "ASCII character + combining character works as a variable name"); | |
048c5953 | 269 | is($ret, 100, " ... and returns the correct value"); |
32833930 BF |
270 | } |
271 | ||
272 | # From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail | |
273 | for my $chr ( | |
274 | "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}", | |
275 | "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}", | |
276 | "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}", | |
277 | ) | |
278 | { | |
279 | no warnings 'non_unicode'; | |
280 | my $esc = sprintf("%x", ord $chr); | |
281 | local $@; | |
282 | eval "\$$chr = 1; \$$chr"; | |
283 | like($@, | |
284 | qr/\QUnrecognized character \x{$esc};/, | |
285 | "\\x{$esc} is illegal for a length-one identifier" | |
286 | ); | |
287 | } | |
288 | ||
289 | for my $i (0x100..0xffff) { | |
290 | my $chr = chr($i); | |
291 | my $esc = sprintf("%x", $i); | |
292 | local $@; | |
293 | eval "my \$$chr = q<test>; \$$chr;"; | |
294 | if ( $chr =~ /^\p{_Perl_IDStart}$/ ) { | |
295 | is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i)); | |
296 | } | |
297 | else { | |
298 | like($@, | |
299 | qr/\QUnrecognized character \x{$esc};/, | |
300 | "\\x{$esc} isn't XIDS, illegal as a length-1 variable", | |
301 | ) | |
302 | } | |
0a520fce BF |
303 | } |
304 | ||
305 | { | |
306 | # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz | |
307 | # https://rt.perl.org/rt3/Public/Bug/Display.html?id=117101 | |
308 | no strict; | |
309 | ||
310 | local $@; | |
311 | eval <<'EOP'; | |
312 | q{$} =~ /(.)/; | |
313 | is($$1, $$, q{$$1 parses as ${$1}}); | |
314 | ||
315 | $doof = "test"; | |
316 | $test = "Got here"; | |
317 | $::{+$$} = *doof; | |
318 | ||
319 | is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} ); | |
320 | EOP | |
321 | is($@, '', q{$$1 parses correctly}); | |
322 | ||
323 | for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) { | |
324 | my $esc = sprintf("\\x{%x}", ord $chr); | |
325 | local $@; | |
326 | eval <<" EOP"; | |
327 | \$$chr = q{\$}; | |
328 | \$\$$chr; | |
329 | EOP | |
330 | ||
331 | like($@, | |
332 | qr/syntax error|Unrecognized character/, | |
333 | qq{\$\$$esc is a syntax error} | |
334 | ); | |
335 | } | |
336 | } | |
a21046ad | 337 | |
f7bd557b | 338 | { |
a21046ad BF |
339 | # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz |
340 | # https://rt.perl.org/rt3/Ticket/Display.html?id=117145 | |
341 | local $@; | |
342 | my $var = 10; | |
343 | eval ' ${ var }'; | |
344 | ||
345 | is( | |
346 | $@, | |
347 | '', | |
348 | '${ var } works under strict' | |
349 | ); | |
350 | ||
351 | { | |
352 | no strict; | |
b29f65fc | 353 | |
ce4793f1 | 354 | for my $var ( '$', "^GLOBAL_PHASE", "^V" ) { |
a21046ad BF |
355 | eval "\${ $var}"; |
356 | is($@, '', "\${ $var} works" ); | |
357 | eval "\${$var }"; | |
358 | is($@, '', "\${$var } works" ); | |
359 | eval "\${ $var }"; | |
360 | is($@, '', "\${ $var } works" ); | |
361 | } | |
ce4793f1 KW |
362 | my $var = "\7LOBAL_PHASE"; |
363 | eval "\${ $var}"; | |
364 | like($@, qr/Unrecognized character \\x07/, | |
365 | "\${ $var} generates 'Unrecognized character' error" ); | |
366 | eval "\${$var }"; | |
367 | like($@, qr/Unrecognized character \\x07/, | |
368 | "\${$var } generates 'Unrecognized character' error" ); | |
369 | eval "\${ $var }"; | |
370 | like($@, qr/Unrecognized character \\x07/, | |
371 | "\${ $var } generates 'Unrecognized character' error" ); | |
a21046ad BF |
372 | } |
373 | } | |
f7bd557b BF |
374 | |
375 | { | |
376 | is( | |
377 | "".eval "*{\nOIN}", | |
378 | "*main::OIN", | |
379 | "Newlines at the start of an identifier should be skipped over" | |
380 | ); | |
381 | ||
382 | ||
e9229257 KW |
383 | SKIP: { |
384 | skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1) | |
385 | if $::IS_EBCDIC; | |
386 | is( | |
387 | "".eval "*{^JOIN}", | |
388 | "*main::\nOIN", | |
389 | " ... but \$^J is still legal" | |
390 | ); | |
391 | } | |
f7bd557b BF |
392 | |
393 | my $ret = eval "\${\cT\n}"; | |
ce4793f1 | 394 | like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message'); |
b29f65fc | 395 | } |
7bb20a13 BF |
396 | |
397 | { | |
398 | # Prior to 5.19.4, the following changed behavior depending | |
399 | # on the presence of the newline after '@{'. | |
400 | sub foo (&) { [1] } | |
401 | my %foo = (a=>2); | |
402 | my $ret = @{ foo { "a" } }; | |
403 | is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}'); | |
404 | ||
405 | $ret = @{ | |
406 | foo { "a" } | |
407 | }; | |
408 | is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}'); | |
409 | ||
410 | } |