Commit | Line | Data |
---|---|---|
07f72646 BF |
1 | #!./perl |
2 | ||
3 | # Checks if the parser behaves correctly in edge case | |
4 | # (including weird syntax errors) | |
5 | ||
6 | BEGIN { | |
b5efbd1f | 7 | chdir 't' if -d 't'; |
07f72646 | 8 | require './test.pl'; |
971ab96b | 9 | skip_all_without_unicode_tables(); |
07f72646 BF |
10 | } |
11 | ||
12 | use 5.016; | |
13 | use utf8; | |
14 | use open qw( :utf8 :std ); | |
32833930 | 15 | no warnings qw(misc reserved); |
07f72646 | 16 | |
dcb414ac | 17 | plan (tests => 66880); |
07f72646 | 18 | |
e660c409 FC |
19 | # ${single:colon} should not be treated as a simple variable, but as a |
20 | # block with a label inside. | |
07f72646 BF |
21 | { |
22 | no strict; | |
23 | ||
24 | local $@; | |
e660c409 FC |
25 | eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'"; |
26 | is ${"\x{30cd}colon"}, 'label, not var', | |
27 | '${\x{30cd}single:\x{30cd}colon} should be block-label'; | |
07f72646 BF |
28 | |
29 | local $@; | |
30 | no utf8; | |
e660c409 FC |
31 | evalbytes '${single:colon} = "block/label, not var"'; |
32 | is($::colon, | |
33 | 'block/label, not var', | |
07f72646 BF |
34 | '...same with ${single:colon}' |
35 | ); | |
36 | } | |
37 | ||
38 | # ${yadda'etc} and ${yadda::etc} should both work under strict | |
39 | { | |
40 | local $@; | |
41 | eval q<use strict; ${flark::fleem}>; | |
42 | is($@, '', q<${package::var} works>); | |
43 | ||
44 | local $@; | |
45 | eval q<use strict; ${fleem'flark}>; | |
46 | is($@, '', q<...as does ${package'var}>); | |
47 | } | |
48 | ||
49 | # The first character in ${...} should respect the rules | |
32833930 | 50 | { |
07f72646 BF |
51 | local $@; |
52 | use utf8; | |
53 | eval '${☭asd} = 1'; | |
54 | like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special)) | |
55 | } | |
32833930 BF |
56 | |
57 | # Checking that at least some of the special variables work | |
c22e17d0 | 58 | for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 ] ! @ / \ = )) { |
3a6b4f26 | 59 | SKIP: { |
32833930 BF |
60 | local $@; |
61 | evalbytes "\$$v;"; | |
62 | is $@, '', "No syntax error for \$$v"; | |
63 | ||
64 | local $@; | |
65 | eval "use utf8; \$$v;"; | |
048c5953 | 66 | is $@, '', "No syntax error for \$$v under 'use utf8'"; |
3a6b4f26 | 67 | } |
32833930 BF |
68 | } |
69 | ||
70 | # Checking if the Latin-1 range behaves as expected, and that the behavior is the | |
71 | # same whenever under strict or not. | |
fef6cdc5 | 72 | for ( 0x0 .. 0xff ) { |
3027a07d KW |
73 | my @warnings; |
74 | local $SIG {__WARN__} = sub {push @warnings, @_ }; | |
048c5953 KW |
75 | my $ord = utf8::unicode_to_native($_); |
76 | my $chr = chr $ord; | |
fef6cdc5 KW |
77 | my $syntax_error = 0; # Do we expect this code point to generate a |
78 | # syntax error? Assume not, for now | |
3027a07d | 79 | my $deprecated = 0; |
048c5953 | 80 | my $name; |
2fb9f143 KW |
81 | |
82 | # A different number of tests are run depending on the branches in this | |
83 | # loop iteration. This allows us to add skips to make the reported total | |
84 | # the same for each iteration. | |
85 | my $tests = 0; | |
3027a07d | 86 | my $max_tests = 6; |
2fb9f143 | 87 | |
fef6cdc5 KW |
88 | if ($chr =~ /[[:graph:]]/a) { |
89 | $name = "'$chr'"; | |
90 | $syntax_error = 1 if $chr eq '{'; | |
91 | } | |
92 | elsif ($chr =~ /[[:space:]]/a) { | |
93 | $name = sprintf "\\x%02x, an ASCII space character", $ord; | |
94 | $syntax_error = 1; | |
95 | } | |
96 | elsif ($chr =~ /[[:cntrl:]]/a) { | |
ce4793f1 KW |
97 | $name = sprintf "\\x%02x, an ASCII control", $ord; |
98 | $syntax_error = 1; | |
fef6cdc5 | 99 | } |
4475d0d2 KW |
100 | elsif ($chr =~ /\pC/) { |
101 | if ($chr eq "\N{SHY}") { | |
102 | $name = sprintf "\\x%02x, SHY", $ord; | |
103 | } | |
104 | else { | |
105 | $name = sprintf "\\x%02x, a C1 control", $ord; | |
106 | } | |
acab2422 | 107 | $syntax_error = 1; |
e9229257 | 108 | $deprecated = ! $syntax_error; |
048c5953 KW |
109 | } |
110 | elsif ($chr =~ /\p{XIDStart}/) { | |
111 | $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord; | |
112 | } | |
4475d0d2 KW |
113 | elsif ($chr =~ /\p{XPosixSpace}/) { |
114 | $name = sprintf "\\x%02x, a non-ASCII space character", $ord; | |
acab2422 | 115 | $syntax_error = 1; |
e9229257 | 116 | $deprecated = ! $syntax_error; |
4475d0d2 | 117 | } |
048c5953 | 118 | else { |
4475d0d2 | 119 | $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord; |
048c5953 | 120 | } |
32833930 | 121 | no warnings 'closure'; |
048c5953 | 122 | my $esc = sprintf("%X", $ord); |
32833930 BF |
123 | utf8::downgrade($chr); |
124 | if ($chr !~ /\p{XIDS}/u) { | |
fef6cdc5 KW |
125 | if ($syntax_error) { |
126 | evalbytes "\$$chr"; | |
e9229257 KW |
127 | like($@, qr/ syntax\ error | Unrecognized\ character /x, |
128 | "$name as a length-1 variable generates a syntax error"); | |
fef6cdc5 | 129 | $tests++; |
97bf8a23 | 130 | utf8::upgrade($chr); |
4a493553 | 131 | eval "no strict; \$$chr = 4;", |
97bf8a23 KW |
132 | like($@, qr/ syntax\ error | Unrecognized\ character /x, |
133 | " ... and the same under 'use utf8'"); | |
134 | $tests++; | |
fef6cdc5 | 135 | } |
ce4793f1 | 136 | elsif ($chr =~ /[[:punct:][:digit:]]/a) { |
dcb414ac | 137 | next if ($chr eq '#' or $chr eq '*'); # RT 133583 |
fef6cdc5 KW |
138 | |
139 | # Unlike other variables, we dare not try setting the length-1 | |
ce4793f1 KW |
140 | # variables that are ASCII punctuation and digits. This is |
141 | # because many of these variables have meaning to the system, and | |
142 | # setting them could have side effects or not work as expected | |
143 | # (And using fresh_perl() doesn't always help.) For all these we | |
144 | # just verify that they don't generate a syntax error. | |
fef6cdc5 KW |
145 | local $@; |
146 | evalbytes "\$$chr;"; | |
147 | is $@, '', "$name as a length-1 variable doesn't generate a syntax error"; | |
148 | $tests++; | |
149 | utf8::upgrade($chr); | |
150 | evalbytes "no strict; use utf8; \$$chr;", | |
151 | is $@, '', " ... and the same under 'use utf8'"; | |
152 | $tests++; | |
153 | } | |
154 | else { | |
40c554ff KW |
155 | is evalbytes "no strict; \$$chr = 10", |
156 | 10, | |
048c5953 | 157 | "$name is legal as a length-1 variable"; |
40c554ff | 158 | $tests++; |
fef6cdc5 KW |
159 | if ($chr =~ /[[:ascii:]]/) { |
160 | utf8::upgrade($chr); | |
161 | is evalbytes "no strict; use utf8; \$$chr = 1", | |
162 | 1, | |
163 | " ... and is legal under 'use utf8'"; | |
164 | $tests++; | |
165 | } | |
166 | else { | |
40c554ff KW |
167 | utf8::upgrade($chr); |
168 | local $@; | |
169 | eval "no strict; use utf8; \$$chr = 1"; | |
170 | like $@, | |
171 | qr/\QUnrecognized character \x{\E\L$esc/, | |
172 | " ... but is illegal as a length-1 variable under 'use utf8'"; | |
173 | $tests++; | |
174 | } | |
fef6cdc5 KW |
175 | } |
176 | } | |
32833930 BF |
177 | else { |
178 | { | |
179 | no utf8; | |
180 | local $@; | |
181 | evalbytes "no strict; \$$chr = 1"; | |
048c5953 | 182 | is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable"); |
2fb9f143 | 183 | $tests++; |
32833930 | 184 | |
fef6cdc5 | 185 | if ($chr !~ /[[:ascii:]]/) { |
40c554ff KW |
186 | local $@; |
187 | evalbytes "use strict; \$$chr = 1"; | |
188 | is($@, | |
189 | '', | |
190 | " ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS" | |
191 | ); | |
192 | $tests++; | |
32833930 | 193 | |
40c554ff KW |
194 | local $@; |
195 | evalbytes "\$a$chr = 1"; | |
196 | like($@, | |
197 | qr/Unrecognized character /, | |
198 | " ... but under 'no utf8', it's not allowed in length-2+ variables" | |
199 | ); | |
200 | $tests++; | |
fef6cdc5 | 201 | } |
32833930 BF |
202 | } |
203 | { | |
204 | use utf8; | |
502bdc0f KW |
205 | my $utf8 = $chr; |
206 | utf8::upgrade($utf8); | |
32833930 | 207 | local $@; |
502bdc0f | 208 | eval "no strict; \$$utf8 = 1"; |
048c5953 | 209 | is($@, '', " ... and under 'use utf8', 'no strict', is a valid length-1 variable"); |
2fb9f143 | 210 | $tests++; |
32833930 BF |
211 | |
212 | local $@; | |
502bdc0f | 213 | eval "use strict; \$$utf8 = 1"; |
fef6cdc5 KW |
214 | if ($chr =~ /[ab]/) { # These are special, for sort() |
215 | is($@, '', " ... and under 'use utf8', 'use strict'," | |
216 | . " is a valid length-1 variable (\$a and \$b are special)"); | |
217 | $tests++; | |
218 | } | |
219 | else { | |
40c554ff KW |
220 | like($@, |
221 | qr/Global symbol "\$$utf8" requires explicit package name/, | |
222 | " ... and under utf8 has to be required under strict" | |
223 | ); | |
224 | $tests++; | |
fef6cdc5 | 225 | } |
32833930 BF |
226 | } |
227 | } | |
2fb9f143 | 228 | |
3027a07d KW |
229 | if (! $deprecated) { |
230 | if ($chr =~ /[#*]/) { | |
231 | ||
232 | # Length-1 variables with these two characters used to be used by | |
dce12407 | 233 | # Perl, but now a warning is generated that they're gone. |
3027a07d KW |
234 | # Ignore such warnings. |
235 | for (my $i = @warnings - 1; $i >= 0; $i--) { | |
236 | splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/; | |
237 | } | |
238 | } | |
97bf8a23 KW |
239 | my $message = " ... and doesn't generate any warnings"; |
240 | $message = " TODO $message" if $ord == 0 | |
241 | || $chr =~ /\s/a; | |
242 | ||
243 | if (! ok(@warnings == 0, $message)) { | |
e68670ae KW |
244 | note join "\n", @warnings; |
245 | } | |
3027a07d KW |
246 | $tests++; |
247 | } | |
248 | elsif (! @warnings) { | |
249 | fail(" ... and generates deprecation warnings (since is deprecated)"); | |
250 | $tests++; | |
251 | } | |
252 | else { | |
253 | ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings), | |
254 | " ... and generates deprecation warnings (only)"); | |
255 | $tests++; | |
256 | } | |
257 | ||
2fb9f143 KW |
258 | SKIP: { |
259 | die "Wrong max count for tests" if $tests > $max_tests; | |
260 | skip("untaken tests", $max_tests - $tests) if $max_tests > $tests; | |
261 | } | |
32833930 BF |
262 | } |
263 | ||
264 | { | |
265 | use utf8; | |
266 | my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla | |
267 | is($@, '', "ASCII character + combining character works as a variable name"); | |
048c5953 | 268 | is($ret, 100, " ... and returns the correct value"); |
32833930 BF |
269 | } |
270 | ||
271 | # From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail | |
272 | for my $chr ( | |
273 | "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}", | |
274 | "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}", | |
275 | "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}", | |
276 | ) | |
277 | { | |
278 | no warnings 'non_unicode'; | |
279 | my $esc = sprintf("%x", ord $chr); | |
280 | local $@; | |
281 | eval "\$$chr = 1; \$$chr"; | |
282 | like($@, | |
283 | qr/\QUnrecognized character \x{$esc};/, | |
284 | "\\x{$esc} is illegal for a length-one identifier" | |
285 | ); | |
286 | } | |
287 | ||
288 | for my $i (0x100..0xffff) { | |
289 | my $chr = chr($i); | |
290 | my $esc = sprintf("%x", $i); | |
291 | local $@; | |
292 | eval "my \$$chr = q<test>; \$$chr;"; | |
293 | if ( $chr =~ /^\p{_Perl_IDStart}$/ ) { | |
294 | is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i)); | |
295 | } | |
296 | else { | |
297 | like($@, | |
298 | qr/\QUnrecognized character \x{$esc};/, | |
299 | "\\x{$esc} isn't XIDS, illegal as a length-1 variable", | |
300 | ) | |
301 | } | |
0a520fce BF |
302 | } |
303 | ||
304 | { | |
305 | # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz | |
8034715d | 306 | # https://github.com/Perl/perl5/issues/12841 |
0a520fce BF |
307 | no strict; |
308 | ||
309 | local $@; | |
310 | eval <<'EOP'; | |
311 | q{$} =~ /(.)/; | |
312 | is($$1, $$, q{$$1 parses as ${$1}}); | |
313 | ||
314 | $doof = "test"; | |
315 | $test = "Got here"; | |
316 | $::{+$$} = *doof; | |
317 | ||
318 | is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} ); | |
319 | EOP | |
320 | is($@, '', q{$$1 parses correctly}); | |
321 | ||
322 | for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) { | |
323 | my $esc = sprintf("\\x{%x}", ord $chr); | |
324 | local $@; | |
325 | eval <<" EOP"; | |
326 | \$$chr = q{\$}; | |
327 | \$\$$chr; | |
328 | EOP | |
329 | ||
330 | like($@, | |
331 | qr/syntax error|Unrecognized character/, | |
332 | qq{\$\$$esc is a syntax error} | |
333 | ); | |
334 | } | |
335 | } | |
a21046ad | 336 | |
f7bd557b | 337 | { |
a21046ad | 338 | # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz |
8034715d | 339 | # https://github.com/Perl/perl5/issues/12849 |
a21046ad BF |
340 | local $@; |
341 | my $var = 10; | |
342 | eval ' ${ var }'; | |
343 | ||
344 | is( | |
345 | $@, | |
346 | '', | |
347 | '${ var } works under strict' | |
348 | ); | |
349 | ||
350 | { | |
351 | no strict; | |
b29f65fc | 352 | |
ce4793f1 | 353 | for my $var ( '$', "^GLOBAL_PHASE", "^V" ) { |
a21046ad BF |
354 | eval "\${ $var}"; |
355 | is($@, '', "\${ $var} works" ); | |
356 | eval "\${$var }"; | |
357 | is($@, '', "\${$var } works" ); | |
358 | eval "\${ $var }"; | |
359 | is($@, '', "\${ $var } works" ); | |
360 | } | |
ce4793f1 KW |
361 | my $var = "\7LOBAL_PHASE"; |
362 | eval "\${ $var}"; | |
363 | like($@, qr/Unrecognized character \\x07/, | |
364 | "\${ $var} generates 'Unrecognized character' error" ); | |
365 | eval "\${$var }"; | |
366 | like($@, qr/Unrecognized character \\x07/, | |
367 | "\${$var } generates 'Unrecognized character' error" ); | |
368 | eval "\${ $var }"; | |
369 | like($@, qr/Unrecognized character \\x07/, | |
370 | "\${ $var } generates 'Unrecognized character' error" ); | |
a21046ad BF |
371 | } |
372 | } | |
f7bd557b BF |
373 | |
374 | { | |
375 | is( | |
376 | "".eval "*{\nOIN}", | |
377 | "*main::OIN", | |
378 | "Newlines at the start of an identifier should be skipped over" | |
379 | ); | |
380 | ||
381 | ||
e9229257 KW |
382 | SKIP: { |
383 | skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1) | |
384 | if $::IS_EBCDIC; | |
385 | is( | |
386 | "".eval "*{^JOIN}", | |
387 | "*main::\nOIN", | |
388 | " ... but \$^J is still legal" | |
389 | ); | |
390 | } | |
f7bd557b BF |
391 | |
392 | my $ret = eval "\${\cT\n}"; | |
ce4793f1 | 393 | like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message'); |
b29f65fc | 394 | } |
7bb20a13 BF |
395 | |
396 | { | |
397 | # Prior to 5.19.4, the following changed behavior depending | |
398 | # on the presence of the newline after '@{'. | |
399 | sub foo (&) { [1] } | |
400 | my %foo = (a=>2); | |
401 | my $ret = @{ foo { "a" } }; | |
402 | is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}'); | |
403 | ||
404 | $ret = @{ | |
405 | foo { "a" } | |
406 | }; | |
407 | is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}'); | |
408 | ||
409 | } |