This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Use fnc to force out malformed warnings
[perl5.git] / t / uni / parser.t
CommitLineData
1a96a5b7
BF
1#!./perl
2
3# Checks if the parser behaves correctly in edge cases
4# (including weird syntax errors)
5
6BEGIN {
b5efbd1f 7 chdir 't' if -d 't';
1a96a5b7 8 require './test.pl';
41779348 9 skip_all_without_unicode_tables();
1a96a5b7
BF
10}
11
2aa61ac3 12plan (tests => 55);
1a96a5b7
BF
13
14use utf8;
15use open qw( :utf8 :std );
16
0435b362 17is *tèst, "*main::tèst", "sanity check.";
1a96a5b7
BF
18ok $::{"tèst"}, "gets the right glob in the stash.";
19
20my $glob_by_sub = sub { *main::method }->();
21
22is *main::method, "*main::method", "glob stringy works";
23is "" . *main::method, "*main::method", "glob stringify-through-concat works";
24is $glob_by_sub, "*main::method", "glob stringy works";
25is "" . $glob_by_sub, "*main::method", "";
26
27sub gimme_glob {
28 no strict 'refs';
29 is *{$_[0]}, "*main::$_[0]";
30 *{$_[0]};
31}
32
33is "" . gimme_glob("下郎"), "*main::下郎";
34$a = *下郎;
35is "" . $a, "*main::下郎";
36
37*{gimme_glob("下郎")} = sub {};
38
39{
40 ok defined *{"下郎"}{CODE};
41 ok !defined *{"\344\270\213\351\203\216"}{CODE};
42}
43
44$Lèon = 1;
45is ${*Lèon{SCALAR}}, 1, "scalar define in the right glob,";
46ok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one.";
47
48my $a = "foo" . chr(190);
49my $b = $a . chr(256);
50chop $b; # $b is $a with utf8 on
51
52is $a, $b, '$a equals $b';
53
54*$b = sub { 5 };
55
56is eval { main->$a }, 5, q!$a can call $b's sub!;
57ok !$@, "..and there's no error.";
58
59my $c = $b;
60utf8::encode($c);
61ok $b ne $c, '$b unequal $c';
62eval { main->$c };
63ok $@, q!$c can't call $b's sub.!;
64
65# Now define another sub under the downgraded name:
66*$a = sub { 6 };
67# Call it:
68is eval { main->$a }, 6, "Adding a new sub to *a and calling it works,";
69ok !$@, "..without errors.";
70eval { main->$c };
71ok $@, "but it's still unreachable through *c";
72
73*$b = \10;
74is ${*$a{SCALAR}}, 10;
75is ${*$b{SCALAR}}, 10;
76is ${*$c{SCALAR}}, undef;
77
78opendir FÒÒ, ".";
79closedir FÒÒ;
80::ok($::{"FÒÒ"}, "Bareword generates the right glob.");
81::ok(!$::{"F\303\222\303\222"});
82
83sub участники { 1 }
84
85ok $::{"участники"}, "non-const sub declarations generate the right glob";
2eaf799e 86is $::{"участники"}->(), 1;
1a96a5b7 87
3453414d
BF
88sub 原 () { 1 }
89
90is grep({ $_ eq "\x{539f}" } keys %::), 1, "Constant subs generate the right glob.";
91is grep({ $_ eq "\345\216\237" } keys %::), 0;
92
108f32a5
BF
93#These should probably go elsewhere.
94eval q{ sub wròng1 (_$); wròng1(1,2) };
95like( $@, qr/Malformed prototype for main::wròng1/, 'Malformed prototype croak is clean.' );
96
97eval q{ sub ча::ики ($__); ча::ики(1,2) };
98like( $@, qr/Malformed prototype for ча::ики/ );
99
23b0eed2
BF
100our $問 = 10;
101is $問, 10, "our works";
102is $main::問, 10, "...as does getting the same variable through the fully qualified name";
103is ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't";
e2f06df0
BF
104
105{
106 use charnames qw( :full );
107
108 eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !;
055745ba 109 $@ =~ s/eval \d+/eval 11/;
08454bd8 110 is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after my $ニ <-- HERE near column 8 at (eval 11) line 1.
e2f06df0 111', "'Unrecognized character' croak is UTF-8 clean";
84051091
BF
112
113 eval "q\0foobar\0 \x{FFFF}+1";
114 $@ =~ s/eval \d+/eval 11/;
115 is(
116 $@,
117 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n",
118 "...and nul-clean"
119 );
120
ce4bd586 121 {
84051091
BF
122 use re 'eval';
123 my $f = qq{(?{\$ネ+ 1; \x{1F42A} })};
124 eval { "a" =~ /^a$f/ };
125 my $e = $@;
126 $e =~ s/eval \d+/eval 11/;
127 is(
128 $e,
129 "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$ネ+ 1; <-- HERE near column 13 at (eval 11) line 1.\n",
130 "Messages from a re-eval are UTF-8 clean"
131 );
132
133 $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })};
134 eval { "a" =~ /^a$f/ };
135 my $e = $@;
136 $e =~ s/eval \d+/eval 11/;
137 is(
138 $e,
139 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n",
140 "...and nul-clean"
141 );
142 }
143
144 {
145 eval qq{\$ネ+ 1; \x{1F42A}};
146 $@ =~ s/eval \d+/eval 11/;
147 is(
148 $@,
149 "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$ネ+ 1; <-- HERE near column 8 at (eval 11) line 1.\n",
150 "Unrecognized character error doesn't cut off in the middle of characters"
151 )
152 }
153
e2f06df0 154}
3c54b17a
BF
155
156{
157 use feature 'state';
158 for ( qw( my state our ) ) {
159 local $@;
160 eval "$_ Foo $x = 1;";
161 like $@, qr/No such class Foo/u, "'No such class' warning for $_ is UTF-8 clean";
162 }
4bca4ee0
BF
163}
164
165{
166 local $@;
167 eval "our \$main::\x{30cb};";
168 like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean";
169}
58576270
BF
170
171{
172 use feature 'state';
173 local $@;
174 for ( qw( my state ) ) {
175 eval "$_ \$::\x{30cb};";
176 like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!;
177 }
178}
d0fb66e4
BF
179
180{
181 local $@;
182 eval qq!print \x{30cb}, "comma""!;
183 like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles.";
184}
86fe3f36
BF
185
186# tests for "Bad name"
187eval q{ Foo::$bar };
188like( $@, qr/Bad name after Foo::/, 'Bad name after Foo::' );
189eval q{ Foo''bar };
190like( $@, qr/Bad name after Foo'/, 'Bad name after Foo\'' );
107160e2
KW
191
192{
193 no warnings 'utf8';
75219bac
KW
194 local $SIG{__WARN__} = sub { }; # The eval will also output a warning,
195 # which we ignore
28642c96
KW
196 my $malformed_to_be = ($::IS_EBCDIC) # Overlong sequence
197 ? "\x{74}\x{41}"
198 : "\x{c0}\x{a0}";
107160e2 199 CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\"";
be192702 200 like( $@, qr/Malformed UTF-8 character immediately after '\\N\{abc' at .* within string/, 'Malformed UTF-8 input to \N{}');
107160e2 201}
c7f317a9
DM
202
203# RT# 124216: Perl_sv_clear: Assertion
204# If a parsing error occurred during a forced token within an interpolated
205# context, the stack unwinding failed to restore PL_lex_defer and so after
206# error recovery the state restored after the forced token was processed
207# was the wrong one, resulting in the lexer thinking we're still inside a
208# quoted string and things getting freed multiple times.
209#
c7f317a9
DM
210# The \x{3030} char isn't a legal var name, and this triggers the error.
211#
212# NB: this only failed if the closing quote of the interpolated string is
213# the last char of the file (i.e. no trailing \n).
214
215{
10574810
DM
216 my $bad = "\x{3030}";
217 # Write out the individual utf8 bytes making up \x{3030}. This
218 # avoids 'Wide char in print' warnings from test.pl. (We may still
219 # get that warning when compiling the prog itself, since the
220 # error it prints to stderr contains a wide char.)
221 utf8::encode($bad);
222
223 fresh_perl_like(qq{use utf8; "\$$bad"},
e7385084
DM
224 qr/
225 \A
226 ( \QWide character in print at - line 1.\E\n )?
227 \Qsyntax error at - line 1, near \E"\$.*"\n
228 \QExecution of - aborted due to compilation errors.\E\z
229 /xm,
230
231 {stderr => 1}, "RT# 124216");
c7f317a9 232}
2ca4363d 233
2aa61ac3
KW
234SKIP: { # [perl #128738]
235 use Config;
236 if ($Config{uvsize} < 8) {
237 skip("test is only valid on 64-bit ints", 2);
238 }
239 else {
7fc4e712 240 no warnings 'deprecated';
2aa61ac3
KW
241 my $a;
242 eval "\$a = q \x{ffffffff}Hello, \\\\whirled!\x{ffffffff}";
243 is $@, "",
244 "No errors in eval'ing a string with large code point delimiter";
245 is $a, 'Hello, \whirled!',
246 "Got expected result in eval'ing a string with a large code point"
247 . " delimiter";
248 }
249}
250
2ca4363d
FC
251
252# New tests go here ^^^^^
253
254# Keep this test last, as it will mess up line number reporting for any
255# subsequent tests.
256
257<<END;
258${
259#line 57
260qq ϟϟ }
261END
262is __LINE__, 59, '#line directive and qq with uni delims inside heredoc';
263
264# Put new tests above the line number tests.