This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
uni/variables.t: Fix grammar in comment
[perl5.git] / t / uni / variables.t
CommitLineData
07f72646
BF
1#!./perl
2
3# Checks if the parser behaves correctly in edge case
4# (including weird syntax errors)
5
6BEGIN {
b5efbd1f 7 chdir 't' if -d 't';
07f72646 8 require './test.pl';
0b1b7115 9 skip_all_if_miniperl("miniperl, no arybase");
971ab96b 10 skip_all_without_unicode_tables();
07f72646
BF
11}
12
13use 5.016;
14use utf8;
15use open qw( :utf8 :std );
32833930 16no warnings qw(misc reserved);
07f72646 17
3027a07d 18plan (tests => 66900);
07f72646 19
e660c409
FC
20# ${single:colon} should not be treated as a simple variable, but as a
21# block with a label inside.
07f72646
BF
22{
23 no strict;
24
25 local $@;
e660c409
FC
26 eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'";
27 is ${"\x{30cd}colon"}, 'label, not var',
28 '${\x{30cd}single:\x{30cd}colon} should be block-label';
07f72646
BF
29
30 local $@;
31 no utf8;
e660c409
FC
32 evalbytes '${single:colon} = "block/label, not var"';
33 is($::colon,
34 'block/label, not var',
07f72646
BF
35 '...same with ${single:colon}'
36 );
37}
38
39# ${yadda'etc} and ${yadda::etc} should both work under strict
40{
41 local $@;
42 eval q<use strict; ${flark::fleem}>;
43 is($@, '', q<${package::var} works>);
44
45 local $@;
46 eval q<use strict; ${fleem'flark}>;
47 is($@, '', q<...as does ${package'var}>);
48}
49
50# The first character in ${...} should respect the rules
32833930 51{
07f72646
BF
52 local $@;
53 use utf8;
54 eval '${☭asd} = 1';
55 like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special))
56}
32833930
BF
57
58# Checking that at least some of the special variables work
59for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 [ ] ! @ / \ = )) {
3a6b4f26
FC
60 SKIP: {
61 skip_if_miniperl('No $[ under miniperl', 2) if $v eq '[';
32833930
BF
62 local $@;
63 evalbytes "\$$v;";
64 is $@, '', "No syntax error for \$$v";
65
66 local $@;
67 eval "use utf8; \$$v;";
048c5953 68 is $@, '', "No syntax error for \$$v under 'use utf8'";
3a6b4f26 69 }
32833930
BF
70}
71
72# Checking if the Latin-1 range behaves as expected, and that the behavior is the
73# same whenever under strict or not.
fef6cdc5 74for ( 0x0 .. 0xff ) {
3027a07d
KW
75 my @warnings;
76 local $SIG {__WARN__} = sub {push @warnings, @_ };
048c5953
KW
77 my $ord = utf8::unicode_to_native($_);
78 my $chr = chr $ord;
fef6cdc5
KW
79 my $syntax_error = 0; # Do we expect this code point to generate a
80 # syntax error? Assume not, for now
3027a07d 81 my $deprecated = 0;
048c5953 82 my $name;
2fb9f143
KW
83
84 # A different number of tests are run depending on the branches in this
85 # loop iteration. This allows us to add skips to make the reported total
86 # the same for each iteration.
87 my $tests = 0;
3027a07d 88 my $max_tests = 6;
2fb9f143 89
fef6cdc5
KW
90 if ($chr =~ /[[:graph:]]/a) {
91 $name = "'$chr'";
92 $syntax_error = 1 if $chr eq '{';
93 }
94 elsif ($chr =~ /[[:space:]]/a) {
95 $name = sprintf "\\x%02x, an ASCII space character", $ord;
96 $syntax_error = 1;
97 }
98 elsif ($chr =~ /[[:cntrl:]]/a) {
99 if ($chr eq "\N{NULL}") {
100 $name = sprintf "\\x%02x, NUL", $ord;
101 $syntax_error = 1;
102 }
103 else {
104 $name = sprintf "\\x%02x, an ASCII control", $ord;
e9229257
KW
105 $syntax_error = $::IS_EBCDIC;
106 $deprecated = ! $syntax_error;
fef6cdc5
KW
107 }
108 }
4475d0d2
KW
109 elsif ($chr =~ /\pC/) {
110 if ($chr eq "\N{SHY}") {
111 $name = sprintf "\\x%02x, SHY", $ord;
112 }
113 else {
114 $name = sprintf "\\x%02x, a C1 control", $ord;
115 }
e9229257
KW
116 $syntax_error = $::IS_EBCDIC;
117 $deprecated = ! $syntax_error;
048c5953
KW
118 }
119 elsif ($chr =~ /\p{XIDStart}/) {
120 $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord;
121 }
4475d0d2
KW
122 elsif ($chr =~ /\p{XPosixSpace}/) {
123 $name = sprintf "\\x%02x, a non-ASCII space character", $ord;
e9229257
KW
124 $syntax_error = $::IS_EBCDIC;
125 $deprecated = ! $syntax_error;
4475d0d2 126 }
048c5953 127 else {
4475d0d2 128 $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord;
048c5953 129 }
32833930 130 no warnings 'closure';
048c5953 131 my $esc = sprintf("%X", $ord);
32833930
BF
132 utf8::downgrade($chr);
133 if ($chr !~ /\p{XIDS}/u) {
fef6cdc5
KW
134 if ($syntax_error) {
135 evalbytes "\$$chr";
e9229257
KW
136 like($@, qr/ syntax\ error | Unrecognized\ character /x,
137 "$name as a length-1 variable generates a syntax error");
fef6cdc5
KW
138 $tests++;
139 }
9c9bd585 140 elsif ($ord < 32 || $chr =~ /[[:punct:][:digit:]]/a) {
fef6cdc5
KW
141
142 # Unlike other variables, we dare not try setting the length-1
143 # variables that are \cX (for all valid X) nor ASCII ones that are
144 # punctuation nor digits. This is because many of these variables
145 # have meaning to the system, and setting them could have side
146 # effects or not work as expected (And using fresh_perl() doesn't
147 # always help.) For example, setting $^D (to use a visible
148 # representation of code point 0x04) turns on tracing, and setting
149 # $^E sets an error number, but what gets printed is instead a
150 # string associated with that number. For all these we just
151 # verify that they don't generate a syntax error.
152 local $@;
153 evalbytes "\$$chr;";
154 is $@, '', "$name as a length-1 variable doesn't generate a syntax error";
155 $tests++;
156 utf8::upgrade($chr);
157 evalbytes "no strict; use utf8; \$$chr;",
158 is $@, '', " ... and the same under 'use utf8'";
159 $tests++;
160 }
161 else {
40c554ff
KW
162 is evalbytes "no strict; \$$chr = 10",
163 10,
048c5953 164 "$name is legal as a length-1 variable";
40c554ff 165 $tests++;
fef6cdc5
KW
166 if ($chr =~ /[[:ascii:]]/) {
167 utf8::upgrade($chr);
168 is evalbytes "no strict; use utf8; \$$chr = 1",
169 1,
170 " ... and is legal under 'use utf8'";
171 $tests++;
172 }
173 else {
40c554ff
KW
174 utf8::upgrade($chr);
175 local $@;
176 eval "no strict; use utf8; \$$chr = 1";
177 like $@,
178 qr/\QUnrecognized character \x{\E\L$esc/,
179 " ... but is illegal as a length-1 variable under 'use utf8'";
180 $tests++;
181 }
fef6cdc5
KW
182 }
183 }
32833930
BF
184 else {
185 {
186 no utf8;
187 local $@;
188 evalbytes "no strict; \$$chr = 1";
048c5953 189 is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable");
2fb9f143 190 $tests++;
32833930 191
fef6cdc5 192 if ($chr !~ /[[:ascii:]]/) {
40c554ff
KW
193 local $@;
194 evalbytes "use strict; \$$chr = 1";
195 is($@,
196 '',
197 " ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS"
198 );
199 $tests++;
32833930 200
40c554ff
KW
201 local $@;
202 evalbytes "\$a$chr = 1";
203 like($@,
204 qr/Unrecognized character /,
205 " ... but under 'no utf8', it's not allowed in length-2+ variables"
206 );
207 $tests++;
fef6cdc5 208 }
32833930
BF
209 }
210 {
211 use utf8;
502bdc0f
KW
212 my $utf8 = $chr;
213 utf8::upgrade($utf8);
32833930 214 local $@;
502bdc0f 215 eval "no strict; \$$utf8 = 1";
048c5953 216 is($@, '', " ... and under 'use utf8', 'no strict', is a valid length-1 variable");
2fb9f143 217 $tests++;
32833930
BF
218
219 local $@;
502bdc0f 220 eval "use strict; \$$utf8 = 1";
fef6cdc5
KW
221 if ($chr =~ /[ab]/) { # These are special, for sort()
222 is($@, '', " ... and under 'use utf8', 'use strict',"
223 . " is a valid length-1 variable (\$a and \$b are special)");
224 $tests++;
225 }
226 else {
40c554ff
KW
227 like($@,
228 qr/Global symbol "\$$utf8" requires explicit package name/,
229 " ... and under utf8 has to be required under strict"
230 );
231 $tests++;
fef6cdc5 232 }
32833930
BF
233 }
234 }
2fb9f143 235
3027a07d
KW
236 if (! $deprecated) {
237 if ($chr =~ /[#*]/) {
238
239 # Length-1 variables with these two characters used to be used by
9100b351 240 # Perl, but now it generates a warning that they're gone.
3027a07d
KW
241 # Ignore such warnings.
242 for (my $i = @warnings - 1; $i >= 0; $i--) {
243 splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/;
244 }
245 }
246 ok(@warnings == 0, " ... and doesn't generate any warnings");
247 $tests++;
248 }
249 elsif (! @warnings) {
250 fail(" ... and generates deprecation warnings (since is deprecated)");
251 $tests++;
252 }
253 else {
254 ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings),
255 " ... and generates deprecation warnings (only)");
256 $tests++;
257 }
258
2fb9f143
KW
259 SKIP: {
260 die "Wrong max count for tests" if $tests > $max_tests;
261 skip("untaken tests", $max_tests - $tests) if $max_tests > $tests;
262 }
32833930
BF
263}
264
265{
266 use utf8;
267 my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla
268 is($@, '', "ASCII character + combining character works as a variable name");
048c5953 269 is($ret, 100, " ... and returns the correct value");
32833930
BF
270}
271
272# From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail
273for my $chr (
274 "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}",
275 "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}",
276 "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}",
277 )
278{
279 no warnings 'non_unicode';
280 my $esc = sprintf("%x", ord $chr);
281 local $@;
282 eval "\$$chr = 1; \$$chr";
283 like($@,
284 qr/\QUnrecognized character \x{$esc};/,
285 "\\x{$esc} is illegal for a length-one identifier"
286 );
287}
288
289for my $i (0x100..0xffff) {
290 my $chr = chr($i);
291 my $esc = sprintf("%x", $i);
292 local $@;
293 eval "my \$$chr = q<test>; \$$chr;";
294 if ( $chr =~ /^\p{_Perl_IDStart}$/ ) {
295 is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i));
296 }
297 else {
298 like($@,
299 qr/\QUnrecognized character \x{$esc};/,
300 "\\x{$esc} isn't XIDS, illegal as a length-1 variable",
301 )
302 }
0a520fce
BF
303}
304
305{
306 # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz
307 # https://rt.perl.org/rt3/Public/Bug/Display.html?id=117101
308 no strict;
309
310 local $@;
311 eval <<'EOP';
312 q{$} =~ /(.)/;
313 is($$1, $$, q{$$1 parses as ${$1}});
314
315 $doof = "test";
316 $test = "Got here";
317 $::{+$$} = *doof;
318
319 is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} );
320EOP
321 is($@, '', q{$$1 parses correctly});
322
323 for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) {
324 my $esc = sprintf("\\x{%x}", ord $chr);
325 local $@;
326 eval <<" EOP";
327 \$$chr = q{\$};
328 \$\$$chr;
329 EOP
330
331 like($@,
332 qr/syntax error|Unrecognized character/,
333 qq{\$\$$esc is a syntax error}
334 );
335 }
336}
a21046ad 337
f7bd557b 338{
a21046ad
BF
339 # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz
340 # https://rt.perl.org/rt3/Ticket/Display.html?id=117145
341 local $@;
342 my $var = 10;
343 eval ' ${ var }';
344
345 is(
346 $@,
347 '',
348 '${ var } works under strict'
349 );
350
351 {
352 no strict;
b29f65fc
BF
353 # Silence the deprecation warning for literal controls
354 no warnings 'deprecated';
355
a21046ad 356 for my $var ( '$', "\7LOBAL_PHASE", "^GLOBAL_PHASE", "^V" ) {
e9229257
KW
357 SKIP: {
358 skip("Literal control characters in variable names forbidden on EBCDIC", 3)
359 if ($::IS_EBCDIC && ord substr($var, 0, 1) < 32);
a21046ad
BF
360 eval "\${ $var}";
361 is($@, '', "\${ $var} works" );
362 eval "\${$var }";
363 is($@, '', "\${$var } works" );
364 eval "\${ $var }";
365 is($@, '', "\${ $var } works" );
e9229257 366 }
a21046ad
BF
367 }
368 }
369}
f7bd557b
BF
370
371{
372 is(
373 "".eval "*{\nOIN}",
374 "*main::OIN",
375 "Newlines at the start of an identifier should be skipped over"
376 );
377
378
e9229257
KW
379 SKIP: {
380 skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1)
381 if $::IS_EBCDIC;
382 is(
383 "".eval "*{^JOIN}",
384 "*main::\nOIN",
385 " ... but \$^J is still legal"
386 );
387 }
f7bd557b 388
e9229257
KW
389 SKIP: {
390 skip("Literal control characters in variable names forbidden on EBCDIC", 2)
391 if $::IS_EBCDIC;
b29f65fc 392 no warnings 'deprecated';
f7bd557b
BF
393 my $ret = eval "\${\cT\n}";
394 is($@, "", 'No errors from using ${\n\cT\n}');
048c5953 395 is($ret, $^T, " ... and we got the right value");
e9229257 396 }
f7bd557b 397}
b29f65fc 398
e9229257
KW
399SKIP: {
400 skip("Literal control characters in variable names forbidden on EBCDIC", 5)
401 if $::IS_EBCDIC;
402
b29f65fc
BF
403 # Originally from t/base/lex.t, moved here since we can't
404 # turn deprecation warnings off in that file.
405 no strict;
406 no warnings 'deprecated';
407
408 my $CX = "\cX";
409 $ {$CX} = 17;
410
411 # Does the syntax where we use the literal control character still work?
412 is(
413 eval "\$ {\cX}",
414 17,
415 "Literal control character variables work"
416 );
417
418 eval "\$\cQ = 24"; # Literal control character
048c5953
KW
419 is($@, "", " ... and they can be assigned to without error");
420 is(${"\cQ"}, 24, " ... and the assignment works");
421 is($^Q, 24, " ... even if we access the variable through the caret name");
b29f65fc
BF
422 is(\${"\cQ"}, \$^Q, '\${\cQ} == \$^Q');
423}
7bb20a13
BF
424
425{
426 # Prior to 5.19.4, the following changed behavior depending
427 # on the presence of the newline after '@{'.
428 sub foo (&) { [1] }
429 my %foo = (a=>2);
430 my $ret = @{ foo { "a" } };
431 is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}');
432
433 $ret = @{
434 foo { "a" }
435 };
436 is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}');
437
438}