t/uni/parser.t

   1 #!./perl
   2
   3 # Checks if the parser behaves correctly in edge cases
   4 # (including weird syntax errors)
   5
   6 BEGIN {
   7     chdir 't' if -d 't';
   8     require './test.pl';
   9     require './charset_tools.pl';
  10     skip_all_without_unicode_tables();
  11 }
  12
  13 plan (tests => 58);
  14
  15 use utf8;
  16 use open qw( :utf8 :std );
  17
  18 is *tèst, "*main::tèst", "sanity check.";
  19 ok $::{"tèst"}, "gets the right glob in the stash.";
  20
  21 my $glob_by_sub = sub { *ｍａｉｎ::method }->();
  22
  23 is *ｍａｉｎ::method, "*ｍａｉｎ::method", "glob stringy works";
  24 is "" . *ｍａｉｎ::method, "*ｍａｉｎ::method", "glob stringify-through-concat works";
  25 is $glob_by_sub, "*ｍａｉｎ::method", "glob stringy works";
  26 is "" . $glob_by_sub, "*ｍａｉｎ::method", "";
  27
  28 sub gimme_glob {
  29     no strict 'refs';
  30     is *{$_[0]}, "*main::$_[0]";
  31     *{$_[0]};
  32 }
  33
  34 is "" . gimme_glob("下郎"), "*main::下郎";
  35 $a = *下郎;
  36 is "" . $a, "*main::下郎";
  37
  38 *{gimme_glob("下郎")} = sub {};
  39
  40 {
  41     ok defined *{"下郎"}{CODE};
  42     ok !defined *{"\344\270\213\351\203\216"}{CODE};
  43 }
  44
  45 $Lèon = 1;
  46 is ${*Lèon{SCALAR}}, 1, "scalar define in the right glob,";
  47 ok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one.";
  48
  49 my $a = "foo" . chr(190);
  50 my $b = $a    . chr(256);
  51 chop $b; # $b is $a with utf8 on
  52
  53 is $a, $b, '$a equals $b';
  54
  55 *$b = sub { 5 };
  56
  57 is eval { main->$a }, 5, q!$a can call $b's sub!;
  58 ok !$@, "..and there's no error.";
  59
  60 my $c = $b;
  61 utf8::encode($c);
  62 ok $b ne $c, '$b unequal $c';
  63 eval { main->$c };
  64 ok $@, q!$c can't call $b's sub.!;
  65
  66 # Now define another sub under the downgraded name:
  67 *$a = sub { 6 };
  68 # Call it:
  69 is eval { main->$a }, 6, "Adding a new sub to *a and calling it works,";
  70 ok !$@, "..without errors.";
  71 eval { main->$c };
  72 ok $@, "but it's still unreachable through *c";
  73
  74 *$b = \10;
  75 is ${*$a{SCALAR}}, 10;
  76 is ${*$b{SCALAR}}, 10;
  77 is ${*$c{SCALAR}}, undef;
  78
  79 opendir FÒÒ, ".";
  80 closedir FÒÒ;
  81 ::ok($::{"FÒÒ"}, "Bareword generates the right glob.");
  82 ::ok(!$::{"F\303\222\303\222"});
  83
  84 sub участники { 1 }
  85
  86 ok $::{"участники"}, "non-const sub declarations generate the right glob";
  87 is $::{"участники"}->(), 1;
  88
  89 sub 原 () { 1 }
  90
  91 is grep({ $_ eq "\x{539f}"     } keys %::), 1, "Constant subs generate the right glob.";
  92 is grep({ $_ eq "\345\216\237" } keys %::), 0;
  93
  94 #These should probably go elsewhere.
  95 eval q{ sub wròng1 (_$); wròng1(1,2) };
  96 like( $@, qr/Malformed prototype for main::wròng1/, 'Malformed prototype croak is clean.' );
  97
  98 eval q{ sub ча::ики ($__); ча::ики(1,2) };
  99 like( $@, qr/Malformed prototype for ча::ики/ );
 100
 101 our $問 = 10;
 102 is $問, 10, "our works";
 103 is $main::問, 10, "...as does getting the same variable through the fully qualified name";
 104 is ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't";
 105
 106 {
 107     use charnames qw( :full );
 108
 109     eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !;
 110     $@ =~ s/eval \d+/eval 11/;
 111     is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after  my $ニ <-- HERE near column 8 at (eval 11) line 1.
 112 ', "'Unrecognized character' croak is UTF-8 clean";
 113
 114     eval "q\0foobar\0 \x{FFFF}+1";
 115     $@ =~ s/eval \d+/eval 11/;
 116     is(
 117         $@,
 118        "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n",
 119        "...and nul-clean"
 120     );
 121
 122     {
 123         use re 'eval';
 124         my $f = qq{(?{\$ネ+ 1; \x{1F42A} })};
 125         eval { "a" =~ /^a$f/ };
 126         my $e = $@;
 127         $e =~ s/eval \d+/eval 11/;
 128         is(
 129             $e,
 130             "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$ネ+ 1; <-- HERE near column 13 at (eval 11) line 1.\n",
 131             "Messages from a re-eval are UTF-8 clean"
 132         );
 133
 134         $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })};
 135         eval { "a" =~ /^a$f/ };
 136         my $e = $@;
 137         $e =~ s/eval \d+/eval 11/;
 138         is(
 139             $e,
 140             "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n",
 141            "...and nul-clean"
 142         );
 143     }
 144
 145     {
 146         eval qq{\$ネ+ 1; \x{1F42A}};
 147         $@ =~ s/eval \d+/eval 11/;
 148         is(
 149             $@,
 150             "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$ネ+ 1; <-- HERE near column 8 at (eval 11) line 1.\n",
 151             "Unrecognized character error doesn't cut off in the middle of characters"
 152         )
 153     }
 154
 155 }
 156
 157 {
 158     use feature 'state';
 159     for ( qw( my state our ) ) {
 160         local $@;
 161         eval "$_ Ｆｏｏ $x = 1;";
 162         like $@, qr/No such class Ｆｏｏ/u, "'No such class' warning for $_ is UTF-8 clean";
 163     }
 164 }
 165
 166 {
 167     local $@;
 168     eval "our \$main::\x{30cb};";
 169     like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean";
 170 }
 171
 172 {
 173     use feature 'state';
 174     local $@;
 175     for ( qw( my state ) ) {
 176         eval "$_ \$::\x{30cb};";
 177         like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!;
 178     }
 179 }
 180
 181 {
 182     local $@;
 183     eval qq!print \x{30cb}, "comma""!;
 184     like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles.";
 185 }
 186
 187 # tests for "Bad name"
 188 eval q{ Ｆｏｏ::$bar };
 189 like( $@, qr/Bad name after Ｆｏｏ::/, 'Bad name after Ｆｏｏ::' );
 190 eval q{ Ｆｏｏ''bar };
 191 like( $@, qr/Bad name after Ｆｏｏ'/, 'Bad name after Ｆｏｏ\'' );
 192
 193 {
 194     no warnings 'utf8';
 195     local $SIG{__WARN__} = sub { }; # The eval will also output a warning,
 196                                     # which we ignore
 197     my $malformed_to_be = ($::IS_EBCDIC)   # Overlong sequence
 198                            ? "\x{74}\x{41}"
 199                            : "\x{c0}\x{a0}";
 200     CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\"";
 201     like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}');
 202 }
 203
 204 # RT# 124216: Perl_sv_clear: Assertion
 205 # If a parsing error occurred during a forced token within an interpolated
 206 # context, the stack unwinding failed to restore PL_lex_defer and so after
 207 # error recovery the state restored after the forced token was processed
 208 # was the wrong one, resulting in the lexer thinking we're still inside a
 209 # quoted string and things getting freed multiple times.
 210 #
 211 # The \x{3030} char isn't a legal var name, and this triggers the error.
 212 #
 213 # NB: this only failed if the closing quote of the interpolated string is
 214 # the last char of the file (i.e. no trailing \n).
 215
 216 {
 217     my $bad = "\x{3030}";
 218     # Write out the individual utf8 bytes making up \x{3030}. This
 219     # avoids 'Wide char in print' warnings from test.pl. (We may still
 220     # get that warning when compiling the prog itself, since the
 221     # error it prints to stderr contains a wide char.)
 222     utf8::encode($bad);
 223
 224     fresh_perl_like(qq{use utf8; "\$$bad"},
 225         qr/
 226             \A
 227             ( \QWide character in print at - line 1.\E\n )?
 228             \Qsyntax error at - line 1, near \E"\$.*"\n
 229             \QExecution of - aborted due to compilation errors.\E\z
 230         /xm,
 231
 232         {stderr => 1}, "RT# 124216");
 233 }
 234
 235 SKIP: {
 236
 237     use Config;
 238     if ($Config{uvsize} < 8) {
 239         skip("test is only valid on 64-bit ints", 4);
 240     }
 241     else {
 242         my $a;
 243         my $b;
 244
 245         # This caused a memory fault [perl #128738]
 246         $b = byte_utf8a_to_utf8n("\xFE\x82\x80\x80\x80\x80\x80"); # 0x80000000
 247         eval "\$a = q ${b}abc${b}";
 248         is $@, "",
 249                "No errors in eval'ing a string with large code point delimiter";
 250         is $a, 'abc',
 251                "Got expected result in eval'ing a string with a large code point"
 252             . " delimiter";
 253
 254         $b = byte_utf8a_to_utf8n("\xFE\x83\xBF\xBF\xBF\xBF\xBF"); # 0xFFFFFFFF
 255         eval "\$a = q ${b}Hello, \\\\whirled!${b}";
 256         is $@, "",
 257                "No errors in eval'ing a string with large code point delimiter";
 258         is $a, 'Hello, \whirled!',
 259                "Got expected result in eval'ing a string with a large code point"
 260             . " delimiter";
 261     }
 262 }
 263
 264 fresh_perl_is(<<'EOS', <<'EXPECT', {}, 'no panic in pad_findmy_pvn (#134061)');
 265 use utf8;
 266 eval "sort \x{100}%";
 267 die $@;
 268 EOS
 269 syntax error at (eval 1) line 1, at EOF
 270 EXPECT
 271
 272 # New tests go here ^^^^^
 273
 274 # Keep this test last, as it will mess up line number reporting for any
 275 # subsequent tests.
 276
 277 <<END;
 278 ${
 279 #line 57
 280 qq ϟϟ }
 281 END
 282 is __LINE__, 59, '#line directive and qq with uni delims inside heredoc';
 283
 284 # Put new tests above the line number tests.