t/uni/variables.t

   1 #!./perl
   2
   3 # Checks if the parser behaves correctly in edge case
   4 # (including weird syntax errors)
   5
   6 BEGIN {
   7     chdir 't' if -d 't';
   8     require './test.pl';
   9     skip_all_without_unicode_tables();
  10 }
  11
  12 use 5.016;
  13 use utf8;
  14 use open qw( :utf8 :std );
  15 no warnings qw(misc reserved);
  16
  17 plan (tests => 66880);
  18
  19 # ${single:colon} should not be treated as a simple variable, but as a
  20 # block with a label inside.
  21 {
  22     no strict;
  23
  24     local $@;
  25     eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'";
  26     is ${"\x{30cd}colon"}, 'label, not var',
  27          '${\x{30cd}single:\x{30cd}colon} should be block-label';
  28
  29     local $@;
  30     no utf8;
  31     evalbytes '${single:colon} = "block/label, not var"';
  32     is($::colon,
  33          'block/label, not var',
  34          '...same with ${single:colon}'
  35         );
  36 }
  37
  38 # ${yadda'etc} and ${yadda::etc} should both work under strict
  39 {
  40     local $@;
  41     eval q<use strict; ${flark::fleem}>;
  42     is($@, '', q<${package::var} works>);
  43
  44     local $@;
  45     eval q<use strict; ${fleem'flark}>;
  46     is($@, '', q<...as does ${package'var}>);
  47 }
  48
  49 # The first character in ${...} should respect the rules
  50 {
  51    local $@;
  52    use utf8;
  53    eval '${☭asd} = 1';
  54    like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special))
  55 }
  56
  57 # Checking that at least some of the special variables work
  58 for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 ] ! @ / \ = )) {
  59   SKIP: {
  60     local $@;
  61     evalbytes "\$$v;";
  62     is $@, '', "No syntax error for \$$v";
  63
  64     local $@;
  65     eval "use utf8; \$$v;";
  66     is $@, '', "No syntax error for \$$v under 'use utf8'";
  67   }
  68 }
  69
  70 # Checking if the Latin-1 range behaves as expected, and that the behavior is the
  71 # same whenever under strict or not.
  72 for ( 0x0 .. 0xff ) {
  73     my @warnings;
  74     local $SIG {__WARN__} = sub {push @warnings, @_ };
  75     my $ord = utf8::unicode_to_native($_);
  76     my $chr = chr $ord;
  77     my $syntax_error = 0;   # Do we expect this code point to generate a
  78                             # syntax error?  Assume not, for now
  79     my $deprecated = 0;
  80     my $name;
  81
  82     # A different number of tests are run depending on the branches in this
  83     # loop iteration.  This allows us to add skips to make the reported total
  84     # the same for each iteration.
  85     my $tests = 0;
  86     my $max_tests = 6;
  87
  88     if ($chr =~ /[[:graph:]]/a) {
  89         $name = "'$chr'";
  90         $syntax_error = 1 if $chr eq '{';
  91     }
  92     elsif ($chr =~ /[[:space:]]/a) {
  93         $name = sprintf "\\x%02x, an ASCII space character", $ord;
  94         $syntax_error = 1;
  95     }
  96     elsif ($chr =~ /[[:cntrl:]]/a) {
  97         $name = sprintf "\\x%02x, an ASCII control", $ord;
  98         $syntax_error = 1;
  99     }
 100     elsif ($chr =~ /\pC/) {
 101         if ($chr eq "\N{SHY}") {
 102             $name = sprintf "\\x%02x, SHY", $ord;
 103         }
 104         else {
 105             $name = sprintf "\\x%02x, a C1 control", $ord;
 106         }
 107         $syntax_error = 1;
 108         $deprecated = ! $syntax_error;
 109     }
 110     elsif ($chr =~ /\p{XIDStart}/) {
 111         $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord;
 112     }
 113     elsif ($chr =~ /\p{XPosixSpace}/) {
 114         $name = sprintf "\\x%02x, a non-ASCII space character", $ord;
 115         $syntax_error = 1;
 116         $deprecated = ! $syntax_error;
 117     }
 118     else {
 119         $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord;
 120     }
 121     no warnings 'closure';
 122     my $esc = sprintf("%X", $ord);
 123     utf8::downgrade($chr);
 124     if ($chr !~ /\p{XIDS}/u) {
 125         if ($syntax_error) {
 126             evalbytes "\$$chr";
 127             like($@, qr/ syntax\ error | Unrecognized\ character /x,
 128                      "$name as a length-1 variable generates a syntax error");
 129             $tests++;
 130             utf8::upgrade($chr);
 131             eval "no strict; \$$chr = 4;",
 132             like($@, qr/ syntax\ error | Unrecognized\ character /x,
 133                      "  ... and the same under 'use utf8'");
 134             $tests++;
 135         }
 136         elsif ($chr =~ /[[:punct:][:digit:]]/a) {
 137             next if ($chr eq '#' or $chr eq '*'); # RT 133583
 138
 139             # Unlike other variables, we dare not try setting the length-1
 140             # variables that are ASCII punctuation and digits.  This is
 141             # because many of these variables have meaning to the system, and
 142             # setting them could have side effects or not work as expected
 143             # (And using fresh_perl() doesn't always help.) For all these we
 144             # just verify that they don't generate a syntax error.
 145             local $@;
 146             evalbytes "\$$chr;";
 147             is $@, '', "$name as a length-1 variable doesn't generate a syntax error";
 148             $tests++;
 149             utf8::upgrade($chr);
 150             evalbytes "no strict; use utf8; \$$chr;",
 151             is $@, '', "  ... and the same under 'use utf8'";
 152             $tests++;
 153         }
 154         else {
 155             is evalbytes "no strict; \$$chr = 10",
 156                 10,
 157                 "$name is legal as a length-1 variable";
 158             $tests++;
 159             if ($chr =~ /[[:ascii:]]/) {
 160                 utf8::upgrade($chr);
 161                 is evalbytes "no strict; use utf8; \$$chr = 1",
 162                     1,
 163                     "  ... and is legal under 'use utf8'";
 164                 $tests++;
 165             }
 166             else {
 167                 utf8::upgrade($chr);
 168                 local $@;
 169                 eval "no strict; use utf8; \$$chr = 1";
 170                 like $@,
 171                     qr/\QUnrecognized character \x{\E\L$esc/,
 172                     "  ... but is illegal as a length-1 variable under 'use utf8'";
 173                 $tests++;
 174             }
 175         }
 176     }
 177     else {
 178         {
 179             no utf8;
 180             local $@;
 181             evalbytes "no strict; \$$chr = 1";
 182             is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable");
 183             $tests++;
 184
 185             if ($chr !~ /[[:ascii:]]/) {
 186                 local $@;
 187                 evalbytes "use strict; \$$chr = 1";
 188                 is($@,
 189                     '',
 190                     "  ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS"
 191                 );
 192                 $tests++;
 193
 194                 local $@;
 195                 evalbytes "\$a$chr = 1";
 196                 like($@,
 197                     qr/Unrecognized character /,
 198                     "  ... but under 'no utf8', it's not allowed in length-2+ variables"
 199                 );
 200                 $tests++;
 201             }
 202         }
 203         {
 204             use utf8;
 205             my $utf8 = $chr;
 206             utf8::upgrade($utf8);
 207             local $@;
 208             eval "no strict; \$$utf8 = 1";
 209             is($@, '', "  ... and under 'use utf8', 'no strict', is a valid length-1 variable");
 210             $tests++;
 211
 212             local $@;
 213             eval "use strict; \$$utf8 = 1";
 214             if ($chr =~ /[ab]/) {   # These are special, for sort()
 215                 is($@, '', "  ... and under 'use utf8', 'use strict',"
 216                     . " is a valid length-1 variable (\$a and \$b are special)");
 217                 $tests++;
 218             }
 219             else {
 220                 like($@,
 221                     qr/Global symbol "\$$utf8" requires explicit package name/,
 222                     "  ... and under utf8 has to be required under strict"
 223                 );
 224                 $tests++;
 225             }
 226         }
 227     }
 228
 229     if (! $deprecated) {
 230         if ($chr =~ /[#*]/) {
 231
 232             # Length-1 variables with these two characters used to be used by
 233             # Perl, but now a warning is generated that they're gone.
 234             # Ignore such warnings.
 235             for (my $i = @warnings - 1; $i >= 0; $i--) {
 236                 splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/;
 237             }
 238         }
 239         my $message = "  ... and doesn't generate any warnings";
 240         $message = "  TODO $message" if    $ord == 0
 241                                         || $chr =~ /\s/a;
 242
 243         if (! ok(@warnings == 0, $message)) {
 244             note join "\n", @warnings;
 245         }
 246         $tests++;
 247     }
 248     elsif (! @warnings) {
 249         fail("  ... and generates deprecation warnings (since is deprecated)");
 250         $tests++;
 251     }
 252     else {
 253         ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings),
 254            "  ... and generates deprecation warnings (only)");
 255         $tests++;
 256     }
 257
 258     SKIP: {
 259         die "Wrong max count for tests" if $tests > $max_tests;
 260         skip("untaken tests", $max_tests - $tests) if $max_tests > $tests;
 261     }
 262 }
 263
 264 {
 265     use utf8;
 266     my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla
 267     is($@, '', "ASCII character + combining character works as a variable name");
 268     is($ret, 100, "  ... and returns the correct value");
 269 }
 270
 271 # From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail
 272 for my $chr (
 273       "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}",
 274       "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}",
 275       "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}",
 276    )
 277 {
 278    no warnings 'non_unicode';
 279    my $esc = sprintf("%x", ord $chr);
 280    local $@;
 281    eval "\$$chr = 1; \$$chr";
 282    like($@,
 283         qr/\QUnrecognized character \x{$esc};/,
 284         "\\x{$esc} is illegal for a length-one identifier"
 285        );
 286 }
 287
 288 for my $i (0x100..0xffff) {
 289    my $chr = chr($i);
 290    my $esc = sprintf("%x", $i);
 291    local $@;
 292    eval "my \$$chr = q<test>; \$$chr;";
 293    if ( $chr =~ /^\p{_Perl_IDStart}$/ ) {
 294       is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i));
 295    }
 296    else {
 297       like($@,
 298            qr/\QUnrecognized character \x{$esc};/,
 299            "\\x{$esc} isn't XIDS, illegal as a length-1 variable",
 300           )
 301    }
 302 }
 303
 304 {
 305     # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz
 306     # https://github.com/Perl/perl5/issues/12841
 307     no strict;
 308
 309     local $@;
 310     eval <<'EOP';
 311     q{$} =~ /(.)/;
 312     is($$1, $$, q{$$1 parses as ${$1}});
 313
 314     $doof = "test";
 315     $test = "Got here";
 316     $::{+$$} = *doof;
 317
 318     is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} );
 319 EOP
 320     is($@, '', q{$$1 parses correctly});
 321
 322     for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) {
 323         my $esc = sprintf("\\x{%x}", ord $chr);
 324         local $@;
 325         eval <<"    EOP";
 326             \$$chr = q{\$};
 327             \$\$$chr;
 328     EOP
 329
 330         like($@,
 331              qr/syntax error|Unrecognized character/,
 332              qq{\$\$$esc is a syntax error}
 333         );
 334     }
 335 }
 336
 337 {
 338     # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz
 339     # https://github.com/Perl/perl5/issues/12849
 340     local $@;
 341     my $var = 10;
 342     eval ' ${  var  }';
 343
 344     is(
 345         $@,
 346         '',
 347         '${  var  } works under strict'
 348     );
 349
 350     {
 351         no strict;
 352
 353         for my $var ( '$', "^GLOBAL_PHASE", "^V" ) {
 354             eval "\${ $var}";
 355             is($@, '', "\${ $var} works" );
 356             eval "\${$var }";
 357             is($@, '', "\${$var } works" );
 358             eval "\${ $var }";
 359             is($@, '', "\${ $var } works" );
 360         }
 361         my $var = "\7LOBAL_PHASE";
 362         eval "\${ $var}";
 363         like($@, qr/Unrecognized character \\x07/,
 364              "\${ $var} generates 'Unrecognized character' error" );
 365         eval "\${$var }";
 366         like($@, qr/Unrecognized character \\x07/,
 367              "\${$var } generates 'Unrecognized character' error" );
 368         eval "\${ $var }";
 369         like($@, qr/Unrecognized character \\x07/,
 370              "\${ $var } generates 'Unrecognized character' error" );
 371     }
 372 }
 373
 374 {
 375     is(
 376         "".eval "*{\nOIN}",
 377         "*main::OIN",
 378         "Newlines at the start of an identifier should be skipped over"
 379     );
 380
 381
 382     SKIP: {
 383         skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1)
 384                                                                 if $::IS_EBCDIC;
 385         is(
 386             "".eval "*{^JOIN}",
 387             "*main::\nOIN",
 388             "  ... but \$^J is still legal"
 389         );
 390     }
 391
 392     my $ret = eval "\${\cT\n}";
 393     like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message');
 394 }
 395
 396 {
 397     # Prior to 5.19.4, the following changed behavior depending
 398     # on the presence of the newline after '@{'.
 399     sub foo (&) { [1] }
 400     my %foo = (a=>2);
 401     my $ret = @{ foo { "a" } };
 402     is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}');
 403
 404     $ret = @{
 405             foo { "a" }
 406         };
 407     is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}');
 408
 409 }