Commit | Line | Data |
---|---|---|
2435e5d3 BF |
1 | use strict; |
2 | use warnings; | |
3 | use Encode (); | |
4 | ||
efc41636 KW |
5 | use Test::More; |
6 | if (ord("A") != 65) { | |
f3b09d0b KW |
7 | # pad_scalar() requires constant input. To port this to EBCDIC would |
8 | # require copy, paste, and changing all the values for each code page. | |
9 | plan skip_all => "ASCII-centric tests"; | |
efc41636 KW |
10 | } |
11 | else { | |
12 | plan tests => 77; | |
13 | } | |
2435e5d3 BF |
14 | |
15 | use XS::APItest qw( fetch_pad_names pad_scalar ); | |
16 | ||
17 | local $SIG{__WARN__} = sub { warn $_[0] unless $_[0] =~ /Wide character in print at/ }; | |
18 | ||
19 | ok defined &fetch_pad_names, "sub imported"; | |
20 | ok defined &pad_scalar; | |
21 | ||
22 | my $cv = sub { | |
23 | my $test; | |
24 | }; | |
25 | ||
26 | ok fetch_pad_names($cv), "Fetch working."; | |
27 | is ref fetch_pad_names($cv), ref [], 'Fetch returns an arrayref'; | |
28 | is @{fetch_pad_names($cv)}, 1, 'Sub has one lexical.'; | |
29 | is fetch_pad_names($cv)->[0], '$test', "Fetching a simple scalar works."; | |
30 | ||
31 | $cv = sub { | |
32 | use utf8; | |
33 | ||
34 | my $zest = 'invariant'; | |
35 | my $zèst = 'latin-1'; | |
36 | ||
37 | return [pad_scalar(1, "zèst"), pad_scalar(1, "z\350st"), pad_scalar(1, "z\303\250st")]; | |
38 | }; | |
39 | ||
40 | my $names_av = fetch_pad_names($cv); | |
41 | my $flagged = my $unflagged = "\$z\x{c3}\x{a8}st"; | |
42 | Encode::_utf8_on($flagged); | |
43 | ||
44 | general_tests( $cv->(), $names_av, { | |
45 | results => [ | |
46 | { cmp => 'latin-1', msg => 'Fetches through UTF-8.' }, | |
47 | { cmp => 'latin-1', msg => 'Fetches through Latin-1.' }, | |
48 | { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." }, | |
49 | ], | |
50 | pad_size => { | |
51 | total => { cmp => 2, msg => 'Sub has two lexicals.' }, | |
2502ffdf FC |
52 | utf8 => { cmp => 2, msg => 'Sub has only UTF-8 vars.' }, |
53 | invariant => { cmp => 0, msg => 'Sub has no invariant vars.' }, | |
2435e5d3 BF |
54 | }, |
55 | vars => [ | |
56 | { name => '$zest', msg => 'Sub has [\$zest].', type => 'ok' }, | |
57 | { name => "\$z\x{e8}st", msg => "Sub has [\$t\x{e8}st].", type => 'ok' }, | |
58 | { name => $unflagged, msg => "Sub doesn't have [$unflagged].", type => 'not ok' }, | |
59 | { name => $flagged, msg => "But does have it when flagged.", type => 'ok' }, | |
60 | ], | |
61 | }); | |
62 | ||
63 | $cv = do { | |
64 | my $ascii = 'Defined'; | |
65 | sub { | |
66 | use utf8; | |
67 | my $партнеры = $ascii; | |
68 | return [$партнеры, pad_scalar(1, "партнеры"), pad_scalar(1, "\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213")]; | |
69 | }; | |
70 | }; | |
71 | ||
72 | $names_av = fetch_pad_names($cv); | |
73 | my $hex_var = "\$\x{43f}\x{430}\x{440}\x{442}\x{43d}\x{435}\x{440}\x{44b}"; | |
74 | $flagged = $unflagged = "\$\320\277\320\260\321\200\321\202\320\275\320\265\321\200\321\213"; | |
75 | Encode::_utf8_on($flagged); | |
76 | ||
77 | my $russian_var = do { | |
78 | use utf8; | |
79 | '$партнеры'; | |
80 | }; | |
81 | ||
82 | general_tests( $cv->(), $names_av, { | |
83 | results => [ | |
84 | { cmp => 'Defined', msg => 'UTF-8 fetching works.' }, | |
85 | { cmp => 'Defined', msg => 'pad_scalar fetch.' }, | |
86 | { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." }, | |
87 | ], | |
88 | pad_size => { | |
89 | total => { cmp => 2, msg => 'Sub has two lexicals, including those it closed over.' }, | |
2502ffdf FC |
90 | utf8 => { cmp => 2, msg => 'UTF-8 in the pad.' }, |
91 | invariant => { cmp => 0, msg => '' }, | |
2435e5d3 BF |
92 | }, |
93 | vars => [ | |
94 | { name => '$ascii', msg => 'Sub has [$ascii].', type => 'ok' }, | |
95 | { name => $russian_var, msg => "Sub has [$russian_var].", type => 'ok' }, | |
96 | { name => $hex_var, msg => "Sub has [$hex_var].", type => 'ok' }, | |
97 | { name => $unflagged, msg => "Sub doesn't have [$unflagged]", type => 'not ok' }, | |
98 | { name => $flagged, msg => "But does have it when flagged.", type => 'ok' }, | |
99 | ], | |
100 | }); | |
101 | ||
102 | my $leon1 = "\$L\x{e9}on"; | |
103 | my $leon2 = my $leon3 = "\$L\x{c3}\x{a9}on"; | |
104 | Encode::_utf8_on($leon2); | |
105 | ||
106 | local $@; | |
107 | $cv = eval <<"END"; | |
108 | sub { | |
109 | use utf8; | |
110 | my \$Leon = 'Invariant'; | |
111 | my $leon1 = 'Latin-1'; | |
112 | return [ \$Leon, $leon1, $leon2, pad_scalar(1, "L\x{e9}on"), pad_scalar(1, "L\x{c3}\x{a9}on")]; | |
113 | }; | |
114 | END | |
115 | ||
116 | my $err = $@; | |
117 | ok !$err, $@; | |
118 | ||
119 | $names_av = fetch_pad_names($cv); | |
120 | ||
121 | general_tests( $cv->(), $names_av, { | |
122 | results => [ | |
123 | { cmp => 'Invariant', msg => '' }, | |
124 | { cmp => 'Latin-1', msg => "Fetched through [$leon1]" }, | |
125 | { cmp => 'Latin-1', msg => "Fetched through [$leon2]" }, | |
126 | { cmp => 'Latin-1', msg => 'pad_scalar fetch.' }, | |
127 | { cmp => 'NOT_IN_PAD', msg => "Doesn't fetch using octets." }, | |
128 | ], | |
129 | pad_size => { | |
130 | total => { cmp => 2, msg => 'Sub has two lexicals' }, | |
2502ffdf FC |
131 | utf8 => { cmp => 2, msg => 'Latin-1 got upgraded to UTF-8.' }, |
132 | invariant => { cmp => 0, msg => '' }, | |
2435e5d3 BF |
133 | }, |
134 | vars => [ | |
135 | { name => '$Leon', msg => 'Sub has [$Leon].', type => 'ok' }, | |
136 | { name => $leon1, msg => "Sub has [$leon1].", type => 'ok' }, | |
137 | { name => $leon2, msg => "Sub has [$leon2].", type => 'ok' }, | |
138 | { name => $leon3, msg => "Sub doesn't have [$leon3]", type => 'not ok' }, | |
139 | ], | |
140 | }); | |
141 | ||
142 | ||
143 | { | |
144 | use utf8; | |
145 | my $Cèon = 4; | |
146 | my $str1 = "\$C\x{e8}on"; | |
147 | my $str2 = my $str3 = "\$C\x{c3}\x{a8}on"; | |
148 | Encode::_utf8_on($str2); | |
149 | ||
150 | local $@; | |
151 | $cv = eval <<"END_EVAL"; | |
152 | sub { [ \$Cèon, $str1, $str2 ] }; | |
153 | END_EVAL | |
154 | ||
155 | $err = $@; | |
156 | ok !$err; | |
157 | ||
158 | $names_av = fetch_pad_names($cv); | |
159 | ||
160 | general_tests( $cv->(), $names_av, { | |
161 | results => [ ({ SKIP => 1 }) x 3 ], | |
162 | pad_size => { | |
163 | total => { cmp => 1, msg => 'Sub has one lexical, which it closed over.' }, | |
2502ffdf FC |
164 | utf8 => { cmp => 1, msg => '' }, |
165 | invariant => { cmp => 0, msg => '' }, | |
2435e5d3 BF |
166 | }, |
167 | vars => [ | |
168 | { name => '$Ceon', msg => "Sub doesn't have [\$Ceon].", type => 'not ok' }, | |
169 | map({ { name => $_, msg => "Sub has [$_].", type => 'ok' } } $str1, $str2 ), | |
170 | { name => $str3, msg => "Sub doesn't have [$str3]", type => 'not ok' }, | |
171 | ], | |
172 | }); | |
173 | ||
174 | } | |
175 | ||
2435e5d3 BF |
176 | $cv = sub { |
177 | use utf8; | |
178 | our $戦国 = 10; | |
179 | { | |
180 | no strict 'refs'; | |
181 | my ($symref, $encoded_sym) = (__PACKAGE__ . "::戦国") x 2; | |
182 | utf8::encode($encoded_sym); | |
183 | return [ $戦国, ${$symref}, ${$encoded_sym} ]; | |
184 | } | |
185 | }; | |
186 | ||
187 | my $flagged_our = my $unflagged_our = "\$\346\210\246\345\233\275"; | |
188 | Encode::_utf8_on($flagged_our); | |
189 | ||
190 | $names_av = fetch_pad_names($cv); | |
191 | ||
192 | general_tests( $cv->(), $names_av, { | |
193 | results => [ | |
194 | { cmp => '10', msg => 'Fetched UTF-8 our var.' }, | |
0be4d16f BF |
195 | { cmp => '10', msg => "Symref fetch of an our works." }, |
196 | { cmp => undef, msg => "..and using the encoded form yields undef." }, | |
2435e5d3 BF |
197 | ], |
198 | pad_size => { | |
199 | total => { cmp => 3, msg => 'Sub has three lexicals.' }, | |
2502ffdf FC |
200 | utf8 => { cmp => 3, msg => 'Japanese stored as UTF-8.' }, |
201 | invariant => { cmp => 0, msg => '' }, | |
2435e5d3 BF |
202 | }, |
203 | vars => [ | |
204 | { name => "\$\x{6226}\x{56fd}", msg => "Sub has [\$\x{6226}\x{56fd}].", type => 'ok' }, | |
205 | { name => $flagged_our, msg => "Sub has [$flagged_our].", type => 'ok' }, | |
206 | { name => $unflagged_our, msg => "Sub doesn't have [$unflagged_our]", type => 'not ok' }, | |
207 | ], | |
208 | }); | |
209 | ||
210 | ||
211 | { | |
212 | ||
213 | use utf8; | |
214 | { | |
215 | my $test; | |
216 | BEGIN { | |
217 | $test = "t\x{c3}\x{a8}st"; | |
218 | Encode::_utf8_on($test); | |
219 | } | |
220 | use constant test => $test; | |
221 | } | |
222 | ||
223 | $cv = sub { | |
224 | my $tèst = 'Good'; | |
225 | ||
226 | return [ | |
227 | $tèst, | |
228 | pad_scalar(1, "tèst"), #"UTF-8" | |
229 | pad_scalar(1, "t\350st"), #"Latin-1" | |
230 | pad_scalar(1, "t\x{c3}\x{a8}st"), #"Octal" | |
231 | pad_scalar(1, test()), #'UTF-8 enc' | |
232 | ]; | |
233 | }; | |
234 | ||
235 | $names_av = fetch_pad_names($cv); | |
236 | ||
237 | general_tests( $cv->(), $names_av, { | |
238 | results => [ | |
239 | { cmp => 'Good', msg => 'Fetched through Perl.' }, | |
240 | { cmp => 'Good', msg => "pad_scalar: UTF-8 works." }, | |
241 | { cmp => 'Good', msg => "pad_scalar: Latin-1 works." }, | |
242 | { cmp => 'NOT_IN_PAD', msg => "pad_scalar: Doesn't fetch through octets." }, | |
243 | { cmp => 'Good', msg => "pad_scalar: UTF-8-through-encoding works." }, | |
244 | ], | |
245 | pad_size => { | |
246 | total => { cmp => 1, msg => 'Sub has one lexical.' }, | |
2502ffdf FC |
247 | utf8 => { cmp => 1, msg => '' }, |
248 | invariant => { cmp => 0, msg => '' }, | |
2435e5d3 BF |
249 | }, |
250 | vars => [], | |
251 | }); | |
252 | ||
253 | } | |
254 | ||
255 | $cv = do { | |
256 | use utf8; | |
257 | sub { | |
258 | my $ニコニコ = 'katakana'; | |
259 | my $にこにこ = 'hiragana'; | |
260 | ||
261 | return [ | |
262 | $ニコニコ, | |
263 | $にこにこ, | |
264 | pad_scalar(1, "にこにこ"), | |
265 | pad_scalar(1, "\x{306b}\x{3053}\x{306b}\x{3053}"), | |
266 | pad_scalar(1, "\343\201\253\343\201\223\343\201\253\343\201\223"), | |
267 | pad_scalar(1, "ニコニコ"), | |
268 | pad_scalar(1, "\x{30cb}\x{30b3}\x{30cb}\x{30b3}"), | |
269 | pad_scalar(1, "\343\203\213\343\202\263\343\203\213\343\202\263"), | |
270 | ]; | |
271 | } | |
272 | }; | |
273 | ||
274 | $names_av = fetch_pad_names($cv); | |
275 | ||
276 | general_tests( $cv->(), $names_av, { | |
277 | results => [ | |
278 | { cmp => 'katakana', msg => '' }, | |
279 | { cmp => 'hiragana', msg => '' }, | |
280 | { cmp => 'hiragana', msg => '' }, | |
281 | { cmp => 'hiragana', msg => '' }, | |
282 | { cmp => 'NOT_IN_PAD', msg => '' }, | |
283 | { cmp => 'katakana', msg => '' }, | |
284 | { cmp => 'katakana', msg => '' }, | |
285 | { cmp => 'NOT_IN_PAD', msg => '' }, | |
286 | ], | |
287 | pad_size => { | |
288 | total => { cmp => 2, msg => 'Sub has two lexicals.' }, | |
289 | utf8 => { cmp => 2, msg => '' }, | |
290 | invariant => { cmp => 0, msg => '' }, | |
291 | }, | |
292 | vars => [], | |
293 | }); | |
294 | ||
295 | { | |
296 | { | |
297 | my $utf8_e; | |
298 | BEGIN { | |
299 | $utf8_e = "e"; | |
300 | Encode::_utf8_on($utf8_e); | |
301 | } | |
302 | use constant utf8_e => $utf8_e; | |
303 | } | |
304 | my $e = 'Invariant'; | |
305 | is pad_scalar(1, "e"), pad_scalar(1, utf8_e), 'Fetches the same thing, even if invariant but with differing utf8ness.'; | |
306 | } | |
307 | ||
308 | ||
309 | sub general_tests { | |
310 | my ($results, $names_av, $tests) = @_; | |
311 | ||
312 | for my $i (0..$#$results) { | |
313 | next if $tests->{results}[$i]{SKIP}; | |
314 | is $results->[$i], $tests->{results}[$i]{cmp}, $tests->{results}[$i]{msg}; | |
315 | } | |
316 | ||
317 | is @$names_av, $tests->{pad_size}{total}{cmp}, $tests->{pad_size}{total}{msg}; | |
1648ed58 FC |
318 | is grep( Encode::is_utf8($_), @$names_av), |
319 | $tests->{pad_size}{utf8}{cmp}, $tests->{pad_size}{utf8}{msg}; | |
320 | is grep( !Encode::is_utf8($_), @$names_av), $tests->{pad_size}{invariant}{cmp}, | |
321 | $tests->{pad_size}{invariant}{msg}; | |
2435e5d3 BF |
322 | |
323 | for my $var (@{$tests->{vars}}) { | |
324 | if ($var->{type} eq 'ok') { | |
5f3202fa | 325 | ok +(grep { $_ eq $var->{name} } @$names_av), $var->{msg}; |
2435e5d3 | 326 | } else { |
5f3202fa | 327 | ok !(grep { $_ eq $var->{name} } @$names_av), $var->{msg}; |
2435e5d3 BF |
328 | } |
329 | } | |
330 | ||
331 | } |