This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix spelling: precede
[perl5.git] / t / uni / parser.t
CommitLineData
1a96a5b7
BF
1#!./perl
2
3# Checks if the parser behaves correctly in edge cases
4# (including weird syntax errors)
5
6BEGIN {
b5efbd1f 7 chdir 't' if -d 't';
1a96a5b7 8 require './test.pl';
f70e3f26 9 require './charset_tools.pl';
41779348 10 skip_all_without_unicode_tables();
1a96a5b7
BF
11}
12
5a621908 13plan (tests => 58);
1a96a5b7
BF
14
15use utf8;
16use open qw( :utf8 :std );
17
0435b362 18is *tèst, "*main::tèst", "sanity check.";
1a96a5b7
BF
19ok $::{"tèst"}, "gets the right glob in the stash.";
20
21my $glob_by_sub = sub { *main::method }->();
22
23is *main::method, "*main::method", "glob stringy works";
24is "" . *main::method, "*main::method", "glob stringify-through-concat works";
25is $glob_by_sub, "*main::method", "glob stringy works";
26is "" . $glob_by_sub, "*main::method", "";
27
28sub gimme_glob {
29 no strict 'refs';
30 is *{$_[0]}, "*main::$_[0]";
31 *{$_[0]};
32}
33
34is "" . gimme_glob("下郎"), "*main::下郎";
35$a = *下郎;
36is "" . $a, "*main::下郎";
37
38*{gimme_glob("下郎")} = sub {};
39
40{
41 ok defined *{"下郎"}{CODE};
42 ok !defined *{"\344\270\213\351\203\216"}{CODE};
43}
44
45$Lèon = 1;
46is ${*Lèon{SCALAR}}, 1, "scalar define in the right glob,";
47ok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one.";
48
49my $a = "foo" . chr(190);
50my $b = $a . chr(256);
51chop $b; # $b is $a with utf8 on
52
53is $a, $b, '$a equals $b';
54
55*$b = sub { 5 };
56
57is eval { main->$a }, 5, q!$a can call $b's sub!;
58ok !$@, "..and there's no error.";
59
60my $c = $b;
61utf8::encode($c);
62ok $b ne $c, '$b unequal $c';
63eval { main->$c };
64ok $@, q!$c can't call $b's sub.!;
65
66# Now define another sub under the downgraded name:
67*$a = sub { 6 };
68# Call it:
69is eval { main->$a }, 6, "Adding a new sub to *a and calling it works,";
70ok !$@, "..without errors.";
71eval { main->$c };
72ok $@, "but it's still unreachable through *c";
73
74*$b = \10;
75is ${*$a{SCALAR}}, 10;
76is ${*$b{SCALAR}}, 10;
77is ${*$c{SCALAR}}, undef;
78
79opendir FÒÒ, ".";
80closedir FÒÒ;
81::ok($::{"FÒÒ"}, "Bareword generates the right glob.");
82::ok(!$::{"F\303\222\303\222"});
83
84sub участники { 1 }
85
86ok $::{"участники"}, "non-const sub declarations generate the right glob";
2eaf799e 87is $::{"участники"}->(), 1;
1a96a5b7 88
3453414d
BF
89sub 原 () { 1 }
90
91is grep({ $_ eq "\x{539f}" } keys %::), 1, "Constant subs generate the right glob.";
92is grep({ $_ eq "\345\216\237" } keys %::), 0;
93
108f32a5
BF
94#These should probably go elsewhere.
95eval q{ sub wròng1 (_$); wròng1(1,2) };
96like( $@, qr/Malformed prototype for main::wròng1/, 'Malformed prototype croak is clean.' );
97
98eval q{ sub ча::ики ($__); ча::ики(1,2) };
99like( $@, qr/Malformed prototype for ча::ики/ );
100
23b0eed2
BF
101our $問 = 10;
102is $問, 10, "our works";
103is $main::問, 10, "...as does getting the same variable through the fully qualified name";
104is ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't";
e2f06df0
BF
105
106{
107 use charnames qw( :full );
108
109 eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !;
055745ba 110 $@ =~ s/eval \d+/eval 11/;
08454bd8 111 is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after my $ニ <-- HERE near column 8 at (eval 11) line 1.
e2f06df0 112', "'Unrecognized character' croak is UTF-8 clean";
84051091
BF
113
114 eval "q\0foobar\0 \x{FFFF}+1";
115 $@ =~ s/eval \d+/eval 11/;
116 is(
117 $@,
118 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n",
119 "...and nul-clean"
120 );
121
ce4bd586 122 {
84051091
BF
123 use re 'eval';
124 my $f = qq{(?{\$ネ+ 1; \x{1F42A} })};
125 eval { "a" =~ /^a$f/ };
126 my $e = $@;
127 $e =~ s/eval \d+/eval 11/;
128 is(
129 $e,
130 "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$ネ+ 1; <-- HERE near column 13 at (eval 11) line 1.\n",
131 "Messages from a re-eval are UTF-8 clean"
132 );
133
134 $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })};
135 eval { "a" =~ /^a$f/ };
136 my $e = $@;
137 $e =~ s/eval \d+/eval 11/;
138 is(
139 $e,
140 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n",
141 "...and nul-clean"
142 );
143 }
144
145 {
146 eval qq{\$ネ+ 1; \x{1F42A}};
147 $@ =~ s/eval \d+/eval 11/;
148 is(
149 $@,
150 "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$ネ+ 1; <-- HERE near column 8 at (eval 11) line 1.\n",
151 "Unrecognized character error doesn't cut off in the middle of characters"
152 )
153 }
154
e2f06df0 155}
3c54b17a
BF
156
157{
158 use feature 'state';
159 for ( qw( my state our ) ) {
160 local $@;
161 eval "$_ Foo $x = 1;";
162 like $@, qr/No such class Foo/u, "'No such class' warning for $_ is UTF-8 clean";
163 }
4bca4ee0
BF
164}
165
166{
167 local $@;
168 eval "our \$main::\x{30cb};";
169 like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean";
170}
58576270
BF
171
172{
173 use feature 'state';
174 local $@;
175 for ( qw( my state ) ) {
176 eval "$_ \$::\x{30cb};";
177 like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!;
178 }
179}
d0fb66e4
BF
180
181{
182 local $@;
183 eval qq!print \x{30cb}, "comma""!;
184 like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles.";
185}
86fe3f36
BF
186
187# tests for "Bad name"
188eval q{ Foo::$bar };
189like( $@, qr/Bad name after Foo::/, 'Bad name after Foo::' );
190eval q{ Foo''bar };
191like( $@, qr/Bad name after Foo'/, 'Bad name after Foo\'' );
107160e2
KW
192
193{
194 no warnings 'utf8';
75219bac
KW
195 local $SIG{__WARN__} = sub { }; # The eval will also output a warning,
196 # which we ignore
28642c96
KW
197 my $malformed_to_be = ($::IS_EBCDIC) # Overlong sequence
198 ? "\x{74}\x{41}"
199 : "\x{c0}\x{a0}";
107160e2 200 CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\"";
efa571ab 201 like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}');
107160e2 202}
c7f317a9
DM
203
204# RT# 124216: Perl_sv_clear: Assertion
205# If a parsing error occurred during a forced token within an interpolated
206# context, the stack unwinding failed to restore PL_lex_defer and so after
207# error recovery the state restored after the forced token was processed
208# was the wrong one, resulting in the lexer thinking we're still inside a
209# quoted string and things getting freed multiple times.
210#
c7f317a9
DM
211# The \x{3030} char isn't a legal var name, and this triggers the error.
212#
213# NB: this only failed if the closing quote of the interpolated string is
214# the last char of the file (i.e. no trailing \n).
215
216{
10574810
DM
217 my $bad = "\x{3030}";
218 # Write out the individual utf8 bytes making up \x{3030}. This
219 # avoids 'Wide char in print' warnings from test.pl. (We may still
220 # get that warning when compiling the prog itself, since the
221 # error it prints to stderr contains a wide char.)
222 utf8::encode($bad);
223
224 fresh_perl_like(qq{use utf8; "\$$bad"},
e7385084
DM
225 qr/
226 \A
227 ( \QWide character in print at - line 1.\E\n )?
228 \Qsyntax error at - line 1, near \E"\$.*"\n
229 \QExecution of - aborted due to compilation errors.\E\z
230 /xm,
231
232 {stderr => 1}, "RT# 124216");
c7f317a9 233}
2ca4363d 234
f70e3f26 235SKIP: {
51316de8 236
2aa61ac3
KW
237 use Config;
238 if ($Config{uvsize} < 8) {
f70e3f26 239 skip("test is only valid on 64-bit ints", 4);
2aa61ac3
KW
240 }
241 else {
242 my $a;
f70e3f26
KW
243 my $b;
244
245 # This caused a memory fault [perl #128738]
246 $b = byte_utf8a_to_utf8n("\xFE\x82\x80\x80\x80\x80\x80"); # 0x80000000
247 eval "\$a = q ${b}abc${b}";
248 is $@, "",
249 "No errors in eval'ing a string with large code point delimiter";
250 is $a, 'abc',
251 "Got expected result in eval'ing a string with a large code point"
252 . " delimiter";
253
254 $b = byte_utf8a_to_utf8n("\xFE\x83\xBF\xBF\xBF\xBF\xBF"); # 0xFFFFFFFF
255 eval "\$a = q ${b}Hello, \\\\whirled!${b}";
2aa61ac3
KW
256 is $@, "",
257 "No errors in eval'ing a string with large code point delimiter";
258 is $a, 'Hello, \whirled!',
259 "Got expected result in eval'ing a string with a large code point"
260 . " delimiter";
261 }
262}
263
5a621908
TC
264fresh_perl_is(<<'EOS', <<'EXPECT', {}, 'no panic in pad_findmy_pvn (#134061)');
265use utf8;
266eval "sort \x{100}%";
267die $@;
268EOS
269syntax error at (eval 1) line 1, at EOF
270EXPECT
2ca4363d
FC
271
272# New tests go here ^^^^^
273
274# Keep this test last, as it will mess up line number reporting for any
275# subsequent tests.
276
277<<END;
278${
279#line 57
280qq ϟϟ }
281END
282is __LINE__, 59, '#line directive and qq with uni delims inside heredoc';
283
284# Put new tests above the line number tests.