3 # Checks if the parser behaves correctly in edge case
4 # (including weird syntax errors)
9 skip_all_if_miniperl("miniperl, no arybase");
10 skip_all_without_unicode_tables();
15 use open qw( :utf8 :std );
16 no warnings qw(misc reserved);
18 plan (tests => 66894);
20 # ${single:colon} should not be treated as a simple variable, but as a
21 # block with a label inside.
26 eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'";
27 is ${"\x{30cd}colon"}, 'label, not var',
28 '${\x{30cd}single:\x{30cd}colon} should be block-label';
32 evalbytes '${single:colon} = "block/label, not var"';
34 'block/label, not var',
35 '...same with ${single:colon}'
39 # ${yadda'etc} and ${yadda::etc} should both work under strict
42 eval q<use strict; ${flark::fleem}>;
43 is($@, '', q<${package::var} works>);
46 eval q<use strict; ${fleem'flark}>;
47 is($@, '', q<...as does ${package'var}>);
50 # The first character in ${...} should respect the rules
55 like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special))
58 # Checking that at least some of the special variables work
59 for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 [ ] ! @ / \ = )) {
61 skip_if_miniperl('No $[ under miniperl', 2) if $v eq '[';
64 is $@, '', "No syntax error for \$$v";
67 eval "use utf8; \$$v;";
68 is $@, '', "No syntax error for \$$v under 'use utf8'";
72 # Checking if the Latin-1 range behaves as expected, and that the behavior is the
73 # same whenever under strict or not.
76 local $SIG {__WARN__} = sub {push @warnings, @_ };
77 my $ord = utf8::unicode_to_native($_);
79 my $syntax_error = 0; # Do we expect this code point to generate a
80 # syntax error? Assume not, for now
84 # A different number of tests are run depending on the branches in this
85 # loop iteration. This allows us to add skips to make the reported total
86 # the same for each iteration.
90 if ($chr =~ /[[:graph:]]/a) {
92 $syntax_error = 1 if $chr eq '{';
94 elsif ($chr =~ /[[:space:]]/a) {
95 $name = sprintf "\\x%02x, an ASCII space character", $ord;
98 elsif ($chr =~ /[[:cntrl:]]/a) {
99 $name = sprintf "\\x%02x, an ASCII control", $ord;
102 elsif ($chr =~ /\pC/) {
103 if ($chr eq "\N{SHY}") {
104 $name = sprintf "\\x%02x, SHY", $ord;
107 $name = sprintf "\\x%02x, a C1 control", $ord;
110 $deprecated = ! $syntax_error;
112 elsif ($chr =~ /\p{XIDStart}/) {
113 $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord;
115 elsif ($chr =~ /\p{XPosixSpace}/) {
116 $name = sprintf "\\x%02x, a non-ASCII space character", $ord;
118 $deprecated = ! $syntax_error;
121 $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord;
123 no warnings 'closure';
124 my $esc = sprintf("%X", $ord);
125 utf8::downgrade($chr);
126 if ($chr !~ /\p{XIDS}/u) {
129 like($@, qr/ syntax\ error | Unrecognized\ character /x,
130 "$name as a length-1 variable generates a syntax error");
133 eval "no strict; \$$chr = 4;",
134 like($@, qr/ syntax\ error | Unrecognized\ character /x,
135 " ... and the same under 'use utf8'");
138 elsif ($chr =~ /[[:punct:][:digit:]]/a) {
140 # Unlike other variables, we dare not try setting the length-1
141 # variables that are ASCII punctuation and digits. This is
142 # because many of these variables have meaning to the system, and
143 # setting them could have side effects or not work as expected
144 # (And using fresh_perl() doesn't always help.) For all these we
145 # just verify that they don't generate a syntax error.
148 is $@, '', "$name as a length-1 variable doesn't generate a syntax error";
151 evalbytes "no strict; use utf8; \$$chr;",
152 is $@, '', " ... and the same under 'use utf8'";
156 is evalbytes "no strict; \$$chr = 10",
158 "$name is legal as a length-1 variable";
160 if ($chr =~ /[[:ascii:]]/) {
162 is evalbytes "no strict; use utf8; \$$chr = 1",
164 " ... and is legal under 'use utf8'";
170 eval "no strict; use utf8; \$$chr = 1";
172 qr/\QUnrecognized character \x{\E\L$esc/,
173 " ... but is illegal as a length-1 variable under 'use utf8'";
182 evalbytes "no strict; \$$chr = 1";
183 is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable");
186 if ($chr !~ /[[:ascii:]]/) {
188 evalbytes "use strict; \$$chr = 1";
191 " ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS"
196 evalbytes "\$a$chr = 1";
198 qr/Unrecognized character /,
199 " ... but under 'no utf8', it's not allowed in length-2+ variables"
207 utf8::upgrade($utf8);
209 eval "no strict; \$$utf8 = 1";
210 is($@, '', " ... and under 'use utf8', 'no strict', is a valid length-1 variable");
214 eval "use strict; \$$utf8 = 1";
215 if ($chr =~ /[ab]/) { # These are special, for sort()
216 is($@, '', " ... and under 'use utf8', 'use strict',"
217 . " is a valid length-1 variable (\$a and \$b are special)");
222 qr/Global symbol "\$$utf8" requires explicit package name/,
223 " ... and under utf8 has to be required under strict"
231 if ($chr =~ /[#*]/) {
233 # Length-1 variables with these two characters used to be used by
234 # Perl, but now a warning is generated that they're gone.
235 # Ignore such warnings.
236 for (my $i = @warnings - 1; $i >= 0; $i--) {
237 splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/;
240 my $message = " ... and doesn't generate any warnings";
241 $message = " TODO $message" if $ord == 0
244 if (! ok(@warnings == 0, $message)) {
245 note join "\n", @warnings;
249 elsif (! @warnings) {
250 fail(" ... and generates deprecation warnings (since is deprecated)");
254 ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings),
255 " ... and generates deprecation warnings (only)");
260 die "Wrong max count for tests" if $tests > $max_tests;
261 skip("untaken tests", $max_tests - $tests) if $max_tests > $tests;
267 my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla
268 is($@, '', "ASCII character + combining character works as a variable name");
269 is($ret, 100, " ... and returns the correct value");
272 # From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail
274 "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}",
275 "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}",
276 "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}",
279 no warnings 'non_unicode';
280 my $esc = sprintf("%x", ord $chr);
282 eval "\$$chr = 1; \$$chr";
284 qr/\QUnrecognized character \x{$esc};/,
285 "\\x{$esc} is illegal for a length-one identifier"
289 for my $i (0x100..0xffff) {
291 my $esc = sprintf("%x", $i);
293 eval "my \$$chr = q<test>; \$$chr;";
294 if ( $chr =~ /^\p{_Perl_IDStart}$/ ) {
295 is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i));
299 qr/\QUnrecognized character \x{$esc};/,
300 "\\x{$esc} isn't XIDS, illegal as a length-1 variable",
306 # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz
307 # https://rt.perl.org/rt3/Public/Bug/Display.html?id=117101
313 is($$1, $$, q{$$1 parses as ${$1}});
319 is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} );
321 is($@, '', q{$$1 parses correctly});
323 for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) {
324 my $esc = sprintf("\\x{%x}", ord $chr);
332 qr/syntax error|Unrecognized character/,
333 qq{\$\$$esc is a syntax error}
339 # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz
340 # https://rt.perl.org/rt3/Ticket/Display.html?id=117145
348 '${ var } works under strict'
354 for my $var ( '$', "^GLOBAL_PHASE", "^V" ) {
356 is($@, '', "\${ $var} works" );
358 is($@, '', "\${$var } works" );
360 is($@, '', "\${ $var } works" );
362 my $var = "\7LOBAL_PHASE";
364 like($@, qr/Unrecognized character \\x07/,
365 "\${ $var} generates 'Unrecognized character' error" );
367 like($@, qr/Unrecognized character \\x07/,
368 "\${$var } generates 'Unrecognized character' error" );
370 like($@, qr/Unrecognized character \\x07/,
371 "\${ $var } generates 'Unrecognized character' error" );
379 "Newlines at the start of an identifier should be skipped over"
384 skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1)
389 " ... but \$^J is still legal"
393 my $ret = eval "\${\cT\n}";
394 like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message');
398 # Prior to 5.19.4, the following changed behavior depending
399 # on the presence of the newline after '@{'.
402 my $ret = @{ foo { "a" } };
403 is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}');
408 is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}');