This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make pad names always UTF8
[perl5.git] / ext / XS-APItest / t / fetch_pad_names.t
CommitLineData
2435e5d3
BF
1use strict;
2use warnings;
3use Encode ();
4
5use Test::More tests => 77;
6
7use XS::APItest qw( fetch_pad_names pad_scalar );
8
9local $SIG{__WARN__} = sub { warn $_[0] unless $_[0] =~ /Wide character in print at/ };
10
11ok defined &fetch_pad_names, "sub imported";
12ok defined &pad_scalar;
13
14my $cv = sub {
15 my $test;
16};
17
18ok fetch_pad_names($cv), "Fetch working.";
19is ref fetch_pad_names($cv), ref [], 'Fetch returns an arrayref';
20is @{fetch_pad_names($cv)}, 1, 'Sub has one lexical.';
21is fetch_pad_names($cv)->[0], '$test', "Fetching a simple scalar works.";
22
23$cv = sub {
24 use utf8;
25
26 my $zest = 'invariant';
27 my $zèst = 'latin-1';
28
29 return [pad_scalar(1, "zèst"), pad_scalar(1, "z\350st"), pad_scalar(1, "z\303\250st")];
30};
31
32my $names_av = fetch_pad_names($cv);
33my $flagged = my $unflagged = "\$z\x{c3}\x{a8}st";
34Encode::_utf8_on($flagged);
35
36general_tests( $cv->(), $names_av, {
37 results => [
38 { cmp => 'latin-1', msg => 'Fetches through UTF-8.' },
39 { cmp => 'latin-1', msg => 'Fetches through Latin-1.' },
40 { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." },
41 ],
42 pad_size => {
43 total => { cmp => 2, msg => 'Sub has two lexicals.' },
2502ffdf
FC
44 utf8 => { cmp => 2, msg => 'Sub has only UTF-8 vars.' },
45 invariant => { cmp => 0, msg => 'Sub has no invariant vars.' },
2435e5d3
BF
46 },
47 vars => [
48 { name => '$zest', msg => 'Sub has [\$zest].', type => 'ok' },
49 { name => "\$z\x{e8}st", msg => "Sub has [\$t\x{e8}st].", type => 'ok' },
50 { name => $unflagged, msg => "Sub doesn't have [$unflagged].", type => 'not ok' },
51 { name => $flagged, msg => "But does have it when flagged.", type => 'ok' },
52 ],
53});
54
55$cv = do {
56 my $ascii = 'Defined';
57 sub {
58 use utf8;
59 my $партнеры = $ascii;
60 return [$партнеры, pad_scalar(1, "партнеры"), pad_scalar(1, "\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213")];
61 };
62};
63
64$names_av = fetch_pad_names($cv);
65my $hex_var = "\$\x{43f}\x{430}\x{440}\x{442}\x{43d}\x{435}\x{440}\x{44b}";
66$flagged = $unflagged = "\$\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213";
67Encode::_utf8_on($flagged);
68
69my $russian_var = do {
70 use utf8;
71 '$партнеры';
72};
73
74general_tests( $cv->(), $names_av, {
75 results => [
76 { cmp => 'Defined', msg => 'UTF-8 fetching works.' },
77 { cmp => 'Defined', msg => 'pad_scalar fetch.' },
78 { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." },
79 ],
80 pad_size => {
81 total => { cmp => 2, msg => 'Sub has two lexicals, including those it closed over.' },
2502ffdf
FC
82 utf8 => { cmp => 2, msg => 'UTF-8 in the pad.' },
83 invariant => { cmp => 0, msg => '' },
2435e5d3
BF
84 },
85 vars => [
86 { name => '$ascii', msg => 'Sub has [$ascii].', type => 'ok' },
87 { name => $russian_var, msg => "Sub has [$russian_var].", type => 'ok' },
88 { name => $hex_var, msg => "Sub has [$hex_var].", type => 'ok' },
89 { name => $unflagged, msg => "Sub doesn't have [$unflagged]", type => 'not ok' },
90 { name => $flagged, msg => "But does have it when flagged.", type => 'ok' },
91 ],
92});
93
94my $leon1 = "\$L\x{e9}on";
95my $leon2 = my $leon3 = "\$L\x{c3}\x{a9}on";
96Encode::_utf8_on($leon2);
97
98local $@;
99$cv = eval <<"END";
100 sub {
101 use utf8;
102 my \$Leon = 'Invariant';
103 my $leon1 = 'Latin-1';
104 return [ \$Leon, $leon1, $leon2, pad_scalar(1, "L\x{e9}on"), pad_scalar(1, "L\x{c3}\x{a9}on")];
105 };
106END
107
108my $err = $@;
109ok !$err, $@;
110
111$names_av = fetch_pad_names($cv);
112
113general_tests( $cv->(), $names_av, {
114 results => [
115 { cmp => 'Invariant', msg => '' },
116 { cmp => 'Latin-1', msg => "Fetched through [$leon1]" },
117 { cmp => 'Latin-1', msg => "Fetched through [$leon2]" },
118 { cmp => 'Latin-1', msg => 'pad_scalar fetch.' },
119 { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." },
120 ],
121 pad_size => {
122 total => { cmp => 2, msg => 'Sub has two lexicals' },
2502ffdf
FC
123 utf8 => { cmp => 2, msg => 'Latin-1 got upgraded to UTF-8.' },
124 invariant => { cmp => 0, msg => '' },
2435e5d3
BF
125 },
126 vars => [
127 { name => '$Leon', msg => 'Sub has [$Leon].', type => 'ok' },
128 { name => $leon1, msg => "Sub has [$leon1].", type => 'ok' },
129 { name => $leon2, msg => "Sub has [$leon2].", type => 'ok' },
130 { name => $leon3, msg => "Sub doesn't have [$leon3]", type => 'not ok' },
131 ],
132});
133
134
135{
136 use utf8;
137 my $Cèon = 4;
138 my $str1 = "\$C\x{e8}on";
139 my $str2 = my $str3 = "\$C\x{c3}\x{a8}on";
140 Encode::_utf8_on($str2);
141
142 local $@;
143 $cv = eval <<"END_EVAL";
144 sub { [ \$Cèon, $str1, $str2 ] };
145END_EVAL
146
147 $err = $@;
148 ok !$err;
149
150 $names_av = fetch_pad_names($cv);
151
152 general_tests( $cv->(), $names_av, {
153 results => [ ({ SKIP => 1 }) x 3 ],
154 pad_size => {
155 total => { cmp => 1, msg => 'Sub has one lexical, which it closed over.' },
2502ffdf
FC
156 utf8 => { cmp => 1, msg => '' },
157 invariant => { cmp => 0, msg => '' },
2435e5d3
BF
158 },
159 vars => [
160 { name => '$Ceon', msg => "Sub doesn't have [\$Ceon].", type => 'not ok' },
161 map({ { name => $_, msg => "Sub has [$_].", type => 'ok' } } $str1, $str2 ),
162 { name => $str3, msg => "Sub doesn't have [$str3]", type => 'not ok' },
163 ],
164 });
165
166}
167
2435e5d3
BF
168$cv = sub {
169 use utf8;
170 our $戦国 = 10;
171 {
172 no strict 'refs';
173 my ($symref, $encoded_sym) = (__PACKAGE__ . "::戦国") x 2;
174 utf8::encode($encoded_sym);
175 return [ $戦国, ${$symref}, ${$encoded_sym} ];
176 }
177};
178
179my $flagged_our = my $unflagged_our = "\$\346\210\246\345\233\275";
180Encode::_utf8_on($flagged_our);
181
182$names_av = fetch_pad_names($cv);
183
184general_tests( $cv->(), $names_av, {
185 results => [
186 { cmp => '10', msg => 'Fetched UTF-8 our var.' },
0be4d16f
BF
187 { cmp => '10', msg => "Symref fetch of an our works." },
188 { cmp => undef, msg => "..and using the encoded form yields undef." },
2435e5d3
BF
189 ],
190 pad_size => {
191 total => { cmp => 3, msg => 'Sub has three lexicals.' },
2502ffdf
FC
192 utf8 => { cmp => 3, msg => 'Japanese stored as UTF-8.' },
193 invariant => { cmp => 0, msg => '' },
2435e5d3
BF
194 },
195 vars => [
196 { name => "\$\x{6226}\x{56fd}", msg => "Sub has [\$\x{6226}\x{56fd}].", type => 'ok' },
197 { name => $flagged_our, msg => "Sub has [$flagged_our].", type => 'ok' },
198 { name => $unflagged_our, msg => "Sub doesn't have [$unflagged_our]", type => 'not ok' },
199 ],
200});
201
202
203{
204
205use utf8;
206{
207 my $test;
208 BEGIN {
209 $test = "t\x{c3}\x{a8}st";
210 Encode::_utf8_on($test);
211 }
212 use constant test => $test;
213}
214
215$cv = sub {
216 my $tèst = 'Good';
217
218 return [
219 $tèst,
220 pad_scalar(1, "tèst"), #"UTF-8"
221 pad_scalar(1, "t\350st"), #"Latin-1"
222 pad_scalar(1, "t\x{c3}\x{a8}st"), #"Octal"
223 pad_scalar(1, test()), #'UTF-8 enc'
224 ];
225};
226
227$names_av = fetch_pad_names($cv);
228
229general_tests( $cv->(), $names_av, {
230 results => [
231 { cmp => 'Good', msg => 'Fetched through Perl.' },
232 { cmp => 'Good', msg => "pad_scalar: UTF-8 works." },
233 { cmp => 'Good', msg => "pad_scalar: Latin-1 works." },
234 { cmp => 'NOT_IN_PAD', msg => "pad_scalar: Doesn't fetch through octets." },
235 { cmp => 'Good', msg => "pad_scalar: UTF-8-through-encoding works." },
236 ],
237 pad_size => {
238 total => { cmp => 1, msg => 'Sub has one lexical.' },
2502ffdf
FC
239 utf8 => { cmp => 1, msg => '' },
240 invariant => { cmp => 0, msg => '' },
2435e5d3
BF
241 },
242 vars => [],
243});
244
245}
246
247$cv = do {
248 use utf8;
249 sub {
250 my $ニコニコ = 'katakana';
251 my $にこにこ = 'hiragana';
252
253 return [
254 $ニコニコ,
255 $にこにこ,
256 pad_scalar(1, "にこにこ"),
257 pad_scalar(1, "\x{306b}\x{3053}\x{306b}\x{3053}"),
258 pad_scalar(1, "\343\201\253\343\201\223\343\201\253\343\201\223"),
259 pad_scalar(1, "ニコニコ"),
260 pad_scalar(1, "\x{30cb}\x{30b3}\x{30cb}\x{30b3}"),
261 pad_scalar(1, "\343\203\213\343\202\263\343\203\213\343\202\263"),
262 ];
263 }
264};
265
266$names_av = fetch_pad_names($cv);
267
268general_tests( $cv->(), $names_av, {
269 results => [
270 { cmp => 'katakana', msg => '' },
271 { cmp => 'hiragana', msg => '' },
272 { cmp => 'hiragana', msg => '' },
273 { cmp => 'hiragana', msg => '' },
274 { cmp => 'NOT_IN_PAD', msg => '' },
275 { cmp => 'katakana', msg => '' },
276 { cmp => 'katakana', msg => '' },
277 { cmp => 'NOT_IN_PAD', msg => '' },
278 ],
279 pad_size => {
280 total => { cmp => 2, msg => 'Sub has two lexicals.' },
281 utf8 => { cmp => 2, msg => '' },
282 invariant => { cmp => 0, msg => '' },
283 },
284 vars => [],
285});
286
287{
288 {
289 my $utf8_e;
290 BEGIN {
291 $utf8_e = "e";
292 Encode::_utf8_on($utf8_e);
293 }
294 use constant utf8_e => $utf8_e;
295 }
296 my $e = 'Invariant';
297 is pad_scalar(1, "e"), pad_scalar(1, utf8_e), 'Fetches the same thing, even if invariant but with differing utf8ness.';
298}
299
300
301sub general_tests {
302 my ($results, $names_av, $tests) = @_;
303
304 for my $i (0..$#$results) {
305 next if $tests->{results}[$i]{SKIP};
306 is $results->[$i], $tests->{results}[$i]{cmp}, $tests->{results}[$i]{msg};
307 }
308
309 is @$names_av, $tests->{pad_size}{total}{cmp}, $tests->{pad_size}{total}{msg};
310 is grep( Encode::is_utf8($_), @$names_av), $tests->{pad_size}{utf8}{cmp};
311 is grep( !Encode::is_utf8($_), @$names_av), $tests->{pad_size}{invariant}{cmp};
312
313 for my $var (@{$tests->{vars}}) {
0f539b13 314 no warnings 'experimental::smartmatch';
2435e5d3
BF
315 if ($var->{type} eq 'ok') {
316 ok $var->{name} ~~ $names_av, $var->{msg};
317 } else {
318 ok !($var->{name} ~~ $names_av), $var->{msg};
319 }
320 }
321
322}