Commit | Line | Data |
---|---|---|
6aa905cf KW |
1 | #!perl -w |
2 | ||
3 | # This is a base file to be used by various .t's in its directory | |
4 | ||
5 | use strict; | |
6 | use Test::More; | |
7 | ||
8 | BEGIN { | |
9 | use_ok('XS::APItest'); | |
10 | require 'charset_tools.pl'; | |
11 | require './t/utf8_setup.pl'; | |
12 | }; | |
13 | ||
14 | $|=1; | |
15 | ||
16 | no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit | |
17 | # machines, and that is tested elsewhere | |
18 | ||
19 | use XS::APItest; | |
20 | use Data::Dumper; | |
21 | ||
9cdc3054 | 22 | my @warnings_gotten; |
6aa905cf KW |
23 | |
24 | use warnings 'utf8'; | |
8d6f1506 KW |
25 | local $SIG{__WARN__} = sub { my @copy = @_; |
26 | push @warnings_gotten, map { chomp; $_ } @copy; | |
27 | }; | |
6aa905cf | 28 | |
6aa905cf KW |
29 | |
30 | # Now test the cases where a legal code point is generated, but may or may not | |
31 | # be allowed/warned on. | |
32 | my @tests = ( | |
af816908 | 33 | # ($testname, $bytes, $controlling_warning_category, |
b7e1f4b2 | 34 | # $allowed_uv, $needed_to_discern_len ) |
6aa905cf KW |
35 | [ "lowest surrogate", |
36 | (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), | |
6aa905cf | 37 | 'surrogate', 0xD800, |
6aa905cf KW |
38 | ], |
39 | [ "a middle surrogate", | |
40 | (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"), | |
6aa905cf | 41 | 'surrogate', 0xD90D, |
6aa905cf KW |
42 | ], |
43 | [ "highest surrogate", | |
44 | (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), | |
6aa905cf | 45 | 'surrogate', 0xDFFF, |
6aa905cf KW |
46 | ], |
47 | [ "first non_unicode", | |
48 | (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), | |
6aa905cf | 49 | 'non_unicode', 0x110000, |
6aa905cf | 50 | 2, |
6aa905cf KW |
51 | ], |
52 | [ "non_unicode whose first byte tells that", | |
53 | (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), | |
6aa905cf KW |
54 | 'non_unicode', |
55 | (isASCII) ? 0x140000 : 0x200000, | |
6aa905cf | 56 | 1, |
6aa905cf KW |
57 | ], |
58 | [ "first of 32 consecutive non-character code points", | |
59 | (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), | |
6aa905cf | 60 | 'nonchar', 0xFDD0, |
6aa905cf KW |
61 | ], |
62 | [ "a mid non-character code point of the 32 consecutive ones", | |
63 | (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), | |
6aa905cf | 64 | 'nonchar', 0xFDE0, |
6aa905cf KW |
65 | ], |
66 | [ "final of 32 consecutive non-character code points", | |
67 | (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), | |
6aa905cf | 68 | 'nonchar', 0xFDEF, |
6aa905cf KW |
69 | ], |
70 | [ "non-character code point U+FFFE", | |
71 | (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"), | |
6aa905cf | 72 | 'nonchar', 0xFFFE, |
6aa905cf KW |
73 | ], |
74 | [ "non-character code point U+FFFF", | |
75 | (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), | |
6aa905cf | 76 | 'nonchar', 0xFFFF, |
6aa905cf KW |
77 | ], |
78 | [ "non-character code point U+1FFFE", | |
79 | (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), | |
6aa905cf | 80 | 'nonchar', 0x1FFFE, |
6aa905cf KW |
81 | ], |
82 | [ "non-character code point U+1FFFF", | |
83 | (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), | |
6aa905cf | 84 | 'nonchar', 0x1FFFF, |
6aa905cf KW |
85 | ], |
86 | [ "non-character code point U+2FFFE", | |
87 | (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), | |
6aa905cf | 88 | 'nonchar', 0x2FFFE, |
6aa905cf KW |
89 | ], |
90 | [ "non-character code point U+2FFFF", | |
91 | (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), | |
6aa905cf | 92 | 'nonchar', 0x2FFFF, |
6aa905cf KW |
93 | ], |
94 | [ "non-character code point U+3FFFE", | |
95 | (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), | |
6aa905cf | 96 | 'nonchar', 0x3FFFE, |
6aa905cf KW |
97 | ], |
98 | [ "non-character code point U+3FFFF", | |
99 | (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), | |
6aa905cf | 100 | 'nonchar', 0x3FFFF, |
6aa905cf KW |
101 | ], |
102 | [ "non-character code point U+4FFFE", | |
103 | (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), | |
6aa905cf | 104 | 'nonchar', 0x4FFFE, |
6aa905cf KW |
105 | ], |
106 | [ "non-character code point U+4FFFF", | |
107 | (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), | |
6aa905cf | 108 | 'nonchar', 0x4FFFF, |
6aa905cf KW |
109 | ], |
110 | [ "non-character code point U+5FFFE", | |
111 | (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), | |
6aa905cf | 112 | 'nonchar', 0x5FFFE, |
6aa905cf KW |
113 | ], |
114 | [ "non-character code point U+5FFFF", | |
115 | (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), | |
6aa905cf | 116 | 'nonchar', 0x5FFFF, |
6aa905cf KW |
117 | ], |
118 | [ "non-character code point U+6FFFE", | |
119 | (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), | |
6aa905cf | 120 | 'nonchar', 0x6FFFE, |
6aa905cf KW |
121 | ], |
122 | [ "non-character code point U+6FFFF", | |
123 | (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), | |
6aa905cf | 124 | 'nonchar', 0x6FFFF, |
6aa905cf KW |
125 | ], |
126 | [ "non-character code point U+7FFFE", | |
127 | (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), | |
6aa905cf | 128 | 'nonchar', 0x7FFFE, |
6aa905cf KW |
129 | ], |
130 | [ "non-character code point U+7FFFF", | |
131 | (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), | |
6aa905cf | 132 | 'nonchar', 0x7FFFF, |
6aa905cf KW |
133 | ], |
134 | [ "non-character code point U+8FFFE", | |
135 | (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), | |
6aa905cf | 136 | 'nonchar', 0x8FFFE, |
6aa905cf KW |
137 | ], |
138 | [ "non-character code point U+8FFFF", | |
139 | (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), | |
6aa905cf | 140 | 'nonchar', 0x8FFFF, |
6aa905cf KW |
141 | ], |
142 | [ "non-character code point U+9FFFE", | |
143 | (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), | |
6aa905cf | 144 | 'nonchar', 0x9FFFE, |
6aa905cf KW |
145 | ], |
146 | [ "non-character code point U+9FFFF", | |
147 | (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), | |
6aa905cf | 148 | 'nonchar', 0x9FFFF, |
6aa905cf KW |
149 | ], |
150 | [ "non-character code point U+AFFFE", | |
151 | (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), | |
6aa905cf | 152 | 'nonchar', 0xAFFFE, |
6aa905cf KW |
153 | ], |
154 | [ "non-character code point U+AFFFF", | |
155 | (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), | |
6aa905cf | 156 | 'nonchar', 0xAFFFF, |
6aa905cf KW |
157 | ], |
158 | [ "non-character code point U+BFFFE", | |
159 | (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), | |
6aa905cf | 160 | 'nonchar', 0xBFFFE, |
6aa905cf KW |
161 | ], |
162 | [ "non-character code point U+BFFFF", | |
163 | (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), | |
6aa905cf | 164 | 'nonchar', 0xBFFFF, |
6aa905cf KW |
165 | ], |
166 | [ "non-character code point U+CFFFE", | |
167 | (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), | |
6aa905cf | 168 | 'nonchar', 0xCFFFE, |
6aa905cf KW |
169 | ], |
170 | [ "non-character code point U+CFFFF", | |
171 | (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), | |
6aa905cf | 172 | 'nonchar', 0xCFFFF, |
6aa905cf KW |
173 | ], |
174 | [ "non-character code point U+DFFFE", | |
175 | (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), | |
6aa905cf | 176 | 'nonchar', 0xDFFFE, |
6aa905cf KW |
177 | ], |
178 | [ "non-character code point U+DFFFF", | |
179 | (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), | |
6aa905cf | 180 | 'nonchar', 0xDFFFF, |
6aa905cf KW |
181 | ], |
182 | [ "non-character code point U+EFFFE", | |
183 | (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), | |
6aa905cf | 184 | 'nonchar', 0xEFFFE, |
6aa905cf KW |
185 | ], |
186 | [ "non-character code point U+EFFFF", | |
187 | (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), | |
6aa905cf | 188 | 'nonchar', 0xEFFFF, |
6aa905cf KW |
189 | ], |
190 | [ "non-character code point U+FFFFE", | |
191 | (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), | |
6aa905cf | 192 | 'nonchar', 0xFFFFE, |
6aa905cf KW |
193 | ], |
194 | [ "non-character code point U+FFFFF", | |
195 | (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), | |
6aa905cf | 196 | 'nonchar', 0xFFFFF, |
6aa905cf KW |
197 | ], |
198 | [ "non-character code point U+10FFFE", | |
199 | (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), | |
6aa905cf | 200 | 'nonchar', 0x10FFFE, |
6aa905cf KW |
201 | ], |
202 | [ "non-character code point U+10FFFF", | |
203 | (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), | |
6aa905cf | 204 | 'nonchar', 0x10FFFF, |
6aa905cf | 205 | ], |
76513bdc KW |
206 | [ "requires at least 32 bits", |
207 | (isASCII) | |
208 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
209 | : I8_to_native( | |
210 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
211 | # This code point is chosen so that it is representable in a UV on | |
212 | # 32-bit machines | |
76513bdc | 213 | 'utf8', 0x80000000, |
76513bdc | 214 | (isASCII) ? 1 : 8, |
76513bdc KW |
215 | ], |
216 | [ "highest 32 bit code point", | |
217 | (isASCII) | |
218 | ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" | |
219 | : I8_to_native( | |
220 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), | |
76513bdc | 221 | 'utf8', 0xFFFFFFFF, |
76513bdc | 222 | (isASCII) ? 1 : 8, |
76513bdc KW |
223 | ], |
224 | [ "requires at least 32 bits, and use SUPER-type flags, instead of" | |
225 | . " ABOVE_31_BIT", | |
226 | (isASCII) | |
227 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
228 | : I8_to_native( | |
229 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
76513bdc | 230 | 'utf8', 0x80000000, |
76513bdc | 231 | 1, |
76513bdc KW |
232 | ], |
233 | [ "overflow with warnings/disallow for more than 31 bits", | |
234 | # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT | |
235 | # with overflow. The overflow malformation is never allowed, so | |
236 | # preventing it takes precedence if the ABOVE_31_BIT options would | |
237 | # otherwise allow in an overflowing value. The ASCII code points (1 | |
238 | # for 32-bits; 1 for 64) were chosen because the old overflow | |
239 | # detection algorithm did not catch them; this means this test also | |
240 | # checks for that fix. The EBCDIC are arbitrary overflowing ones | |
241 | # since we have no reports of failures with it. | |
242 | (($::is64bit) | |
243 | ? ((isASCII) | |
244 | ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" | |
245 | : I8_to_native( | |
246 | "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) | |
247 | : ((isASCII) | |
248 | ? "\xfe\x86\x80\x80\x80\x80\x80" | |
249 | : I8_to_native( | |
250 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), | |
04f42bf6 | 251 | 'utf8', -1, |
76513bdc | 252 | (isASCII || $::is64bit) ? 2 : 8, |
76513bdc | 253 | ], |
6aa905cf KW |
254 | ); |
255 | ||
256 | if (! $::is64bit) { | |
257 | if (isASCII) { | |
258 | no warnings qw{portable overflow}; | |
259 | push @tests, | |
260 | [ "Lowest 33 bit code point: overflow", | |
261 | "\xFE\x84\x80\x80\x80\x80\x80", | |
04f42bf6 | 262 | 'utf8', -1, |
b3169593 | 263 | 1, |
6aa905cf KW |
264 | ]; |
265 | } | |
266 | } | |
267 | else { | |
268 | no warnings qw{portable overflow}; | |
269 | push @tests, | |
270 | [ "More than 32 bits", | |
271 | (isASCII) | |
272 | ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" | |
273 | : I8_to_native( | |
274 | "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
6aa905cf | 275 | 'utf8', 0x1000000000, |
b3169593 | 276 | (isASCII) ? 1 : 7, |
6aa905cf KW |
277 | ]; |
278 | if (! isASCII) { | |
279 | push @tests, # These could falsely show wrongly in a naive | |
280 | # implementation | |
281 | [ "requires at least 32 bits", | |
282 | I8_to_native( | |
283 | "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
6aa905cf | 284 | 'utf8', 0x800000000, |
b3169593 | 285 | 7, |
6aa905cf KW |
286 | ], |
287 | [ "requires at least 32 bits", | |
288 | I8_to_native( | |
289 | "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
6aa905cf | 290 | 'utf8', 0x10000000000, |
b3169593 | 291 | 6, |
6aa905cf KW |
292 | ], |
293 | [ "requires at least 32 bits", | |
294 | I8_to_native( | |
295 | "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
6aa905cf | 296 | 'utf8', 0x200000000000, |
b3169593 | 297 | 5, |
6aa905cf KW |
298 | ], |
299 | [ "requires at least 32 bits", | |
300 | I8_to_native( | |
301 | "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
6aa905cf | 302 | 'utf8', 0x4000000000000, |
b3169593 | 303 | 4, |
6aa905cf KW |
304 | ], |
305 | [ "requires at least 32 bits", | |
306 | I8_to_native( | |
307 | "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
6aa905cf | 308 | 'utf8', 0x80000000000000, |
b3169593 | 309 | 3, |
6aa905cf KW |
310 | ], |
311 | [ "requires at least 32 bits", | |
312 | I8_to_native( | |
313 | "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
6aa905cf | 314 | 'utf8', 0x1000000000000000, |
b3169593 | 315 | 2, |
6aa905cf KW |
316 | ]; |
317 | } | |
318 | } | |
319 | ||
5722c46d KW |
320 | sub flags_to_text($$) |
321 | { | |
322 | my ($flags, $flags_to_text_ref) = @_; | |
323 | ||
324 | # Returns a string containing a mnemonic representation of the bits that | |
325 | # are set in the $flags. These are assumed to be flag bits. The return | |
326 | # looks like "FOO|BAR|BAZ". The second parameter is a reference to an | |
327 | # array that gives the textual representation of all the possible flags. | |
328 | # Element 0 is the text for the bit 0 flag; element 1 for bit 1; .... If | |
329 | # no bits at all are set the string "0" is returned; | |
330 | ||
331 | my @flag_text; | |
332 | my $shift = 0; | |
333 | ||
334 | return "0" if $flags == 0; | |
335 | ||
336 | while ($flags) { | |
337 | #diag sprintf "%x", $flags; | |
338 | if ($flags & 1) { | |
339 | push @flag_text, $flags_to_text_ref->[$shift]; | |
340 | } | |
341 | $shift++; | |
342 | $flags >>= 1; | |
343 | } | |
344 | ||
345 | return join "|", @flag_text; | |
346 | } | |
347 | ||
348 | # Possible flag returns from utf8n_to_uvchr_error(). These should have G_, | |
349 | # instead of A_, D_, but the prefixes will be used in a a later commit, so | |
350 | # minimize churn by having them here. | |
351 | my @utf8n_flags_to_text = ( qw( | |
352 | A_EMPTY | |
353 | A_CONTINUATION | |
354 | A_NON_CONTINUATION | |
355 | A_SHORT | |
356 | A_LONG | |
357 | A_LONG_AND_ITS_VALUE | |
358 | PLACEHOLDER | |
359 | A_OVERFLOW | |
360 | D_SURROGATE | |
361 | W_SURROGATE | |
362 | D_NONCHAR | |
363 | W_NONCHAR | |
364 | D_SUPER | |
365 | W_SUPER | |
366 | D_ABOVE_31_BIT | |
367 | W_ABOVE_31_BIT | |
368 | CHECK_ONLY | |
369 | NO_CONFIDENCE_IN_CURLEN_ | |
370 | ) ); | |
371 | ||
a8ee5133 KW |
372 | sub utf8n_display_call($) |
373 | { | |
374 | # Converts an eval string that calls test_utf8n_to_uvchr into a more human | |
375 | # readable form, and returns it. Doesn't work if the byte string contains | |
376 | # an apostrophe. The return will look something like: | |
377 | # test_utf8n_to_uvchr_error('$bytes', $length, $flags) | |
378 | #diag $_[0]; | |
379 | ||
380 | $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x; | |
381 | my $text1 = $1; # Everything before the byte string | |
382 | my $bytes = $2; | |
383 | my $text2 = $3; # Includes the length | |
384 | my $flags = $4; | |
385 | ||
386 | return $text1 | |
387 | . display_bytes($bytes) | |
388 | . $text2 | |
389 | . flags_to_text($flags, \@utf8n_flags_to_text) | |
390 | . ')'; | |
391 | } | |
392 | ||
d884ea32 KW |
393 | sub uvchr_display_call($) |
394 | { | |
395 | # Converts an eval string that calls test_uvchr_to_utf8 into a more human | |
396 | # readable form, and returns it. The return will look something like: | |
397 | # test_uvchr_to_utf8n_flags($uv, $flags) | |
398 | #diag $_[0]; | |
399 | ||
400 | my @flags_to_text = ( qw( | |
401 | W_SURROGATE | |
402 | W_NONCHAR | |
403 | W_SUPER | |
404 | W_ABOVE_31_BIT | |
405 | D_SURROGATE | |
406 | D_NONCHAR | |
407 | D_SUPER | |
408 | D_ABOVE_31_BIT | |
409 | ) ); | |
410 | ||
411 | $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x; | |
412 | my $text = $1; | |
413 | my $cp = sprintf "%X", $2; | |
414 | my $flags = $3; | |
415 | ||
416 | return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')'; | |
417 | } | |
418 | ||
6aa905cf KW |
419 | # This test is split into this number of files. |
420 | my $num_test_files = $ENV{TEST_JOBS} || 1; | |
421 | $num_test_files = 10 if $num_test_files > 10; | |
422 | ||
423 | my $test_count = -1; | |
424 | foreach my $test (@tests) { | |
425 | $test_count++; | |
426 | next if $test_count % $num_test_files != $::TEST_CHUNK; | |
427 | ||
af816908 | 428 | my ($testname, $bytes, |
9cdc3054 | 429 | $controlling_warning_category, $allowed_uv, $needed_to_discern_len |
6aa905cf KW |
430 | ) = @$test; |
431 | ||
432 | my $length = length $bytes; | |
04f42bf6 | 433 | my $will_overflow = $allowed_uv < 0; |
6aa905cf | 434 | |
2c511c58 KW |
435 | my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv); |
436 | ||
af816908 KW |
437 | my $utf8n_flag_to_warn; |
438 | my $utf8n_flag_to_disallow; | |
439 | my $uvchr_flag_to_warn; | |
440 | my $uvchr_flag_to_disallow; | |
b7e1f4b2 | 441 | |
bf422d6a KW |
442 | # Many of the code points being tested are middling in that if code point |
443 | # edge cases work, these are very likely to as well. Because this test | |
444 | # file takes a while to execute, we skip testing the edge effects of code | |
445 | # points deemed middling, while testing their basics and continuing to | |
446 | # fully test the non-middling code points. | |
447 | my $skip_most_tests = 0; | |
448 | ||
601e92f1 KW |
449 | my $cp_message_qr; # Pattern that matches the message raised when |
450 | # that message contains the problematic code | |
451 | # point. The message is the same (currently) both | |
452 | # when going from/to utf8. | |
453 | my $non_cp_trailing_text; # The suffix text when the message doesn't | |
454 | # contain a code point. (This is a result of | |
455 | # some sort of malformation that means we | |
456 | # can't get an exact code poin | |
457 | ||
af816908 KW |
458 | if ($will_overflow || $allowed_uv > 0x10FFFF) { |
459 | ||
460 | $utf8n_flag_to_warn = $::UTF8_WARN_SUPER; | |
461 | $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER; | |
462 | $uvchr_flag_to_warn = $::UNICODE_WARN_SUPER; | |
463 | $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;; | |
464 | ||
465 | if ($will_overflow) { | |
601e92f1 KW |
466 | $non_cp_trailing_text = "overflows"; |
467 | $cp_message_qr = qr/\Q$non_cp_trailing_text\E/; | |
af816908 KW |
468 | } |
469 | elsif ($allowed_uv > 0x7FFFFFFF) { | |
601e92f1 KW |
470 | $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E |
471 | \Q and not portable\E/x; | |
472 | $non_cp_trailing_text = "is for a non-Unicode code point, and is not portable"; | |
af816908 | 473 | } |
601e92f1 KW |
474 | else { |
475 | $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E | |
476 | \Q may not be portable\E/x; | |
477 | $non_cp_trailing_text = "is for a non-Unicode code point, may not" | |
478 | . " be portable"; | |
af816908 | 479 | } |
3022ad00 KW |
480 | } |
481 | elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) { | |
601e92f1 KW |
482 | $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/; |
483 | $non_cp_trailing_text = "is for a surrogate"; | |
67e45424 | 484 | $needed_to_discern_len = 2 unless defined $needed_to_discern_len; |
bf422d6a | 485 | $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF; |
af816908 KW |
486 | |
487 | $utf8n_flag_to_warn = $::UTF8_WARN_SURROGATE; | |
488 | $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE; | |
489 | $uvchr_flag_to_warn = $::UNICODE_WARN_SURROGATE; | |
490 | $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;; | |
3022ad00 KW |
491 | } |
492 | elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF) | |
493 | || ($allowed_uv & 0xFFFE) == 0xFFFE) | |
494 | { | |
601e92f1 KW |
495 | $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E |
496 | \Q is not recommended for open interchange\E/x; | |
497 | $non_cp_trailing_text = "if you see this, there is an error"; | |
67e45424 | 498 | $needed_to_discern_len = $length unless defined $needed_to_discern_len; |
bf422d6a KW |
499 | if ( ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF) |
500 | || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE)) | |
501 | { | |
502 | $skip_most_tests = 1; | |
503 | } | |
af816908 KW |
504 | |
505 | $utf8n_flag_to_warn = $::UTF8_WARN_NONCHAR; | |
506 | $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR; | |
507 | $uvchr_flag_to_warn = $::UNICODE_WARN_NONCHAR; | |
508 | $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;; | |
3022ad00 KW |
509 | } |
510 | else { | |
511 | die "Can't figure out what type of warning to test for $testname" | |
512 | } | |
513 | ||
67e45424 KW |
514 | die 'Didn\'t set $needed_to_discern_len for ' . $testname |
515 | unless defined $needed_to_discern_len; | |
af816908 KW |
516 | my $disallow_flags = $utf8n_flag_to_disallow; |
517 | my $warn_flags = $disallow_flags << 1; | |
518 | ||
519 | # The convention is that the got flag is the same value as the disallow | |
520 | # one, and the warn flag is the next bit over. If this were violated, the | |
521 | # tests here should start failing. We could do an eval under no strict to | |
522 | # be sure. | |
523 | my $expected_error_flags = $disallow_flags; | |
67e45424 | 524 | |
6aa905cf KW |
525 | { |
526 | use warnings; | |
9cdc3054 | 527 | undef @warnings_gotten; |
6aa905cf KW |
528 | my $ret = test_isUTF8_CHAR($bytes, $length); |
529 | my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0); | |
530 | if ($will_overflow) { | |
d402d77f KW |
531 | is($ret, 0, "For $testname: isUTF8_CHAR() returns 0"); |
532 | is($ret_flags, 0, " And isUTF8_CHAR_flags() returns 0"); | |
6aa905cf KW |
533 | } |
534 | else { | |
535 | is($ret, $length, | |
d402d77f KW |
536 | "For $testname: isUTF8_CHAR() returns expected length: $length"); |
537 | is($ret_flags, $length, " And isUTF8_CHAR_flags(...,0)" | |
6aa905cf KW |
538 | . " returns expected length: $length"); |
539 | } | |
9cdc3054 | 540 | is(scalar @warnings_gotten, 0, |
d402d77f KW |
541 | " And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags generated" |
542 | . " any warnings") | |
9cdc3054 | 543 | or output_warnings(@warnings_gotten); |
6aa905cf | 544 | |
9cdc3054 | 545 | undef @warnings_gotten; |
6aa905cf KW |
546 | $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); |
547 | if ($will_overflow) { | |
d402d77f | 548 | is($ret, 0, " And isSTRICT_UTF8_CHAR() returns 0"); |
6aa905cf KW |
549 | } |
550 | else { | |
551 | my $expected_ret = ( $testname =~ /surrogate|non-character/ | |
552 | || $allowed_uv > 0x10FFFF) | |
553 | ? 0 | |
554 | : $length; | |
d402d77f | 555 | is($ret, $expected_ret, " And isSTRICT_UTF8_CHAR() returns" |
6aa905cf KW |
556 | . " expected length: $expected_ret"); |
557 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
558 | $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE); | |
559 | is($ret, $expected_ret, | |
d402d77f KW |
560 | " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" |
561 | . " acts like isSTRICT_UTF8_CHAR"); | |
6aa905cf | 562 | } |
9cdc3054 | 563 | is(scalar @warnings_gotten, 0, |
d402d77f KW |
564 | " And neither isSTRICT_UTF8_CHAR() nor isUTF8_CHAR_flags" |
565 | . " generated any warnings") | |
9cdc3054 | 566 | or output_warnings(@warnings_gotten); |
6aa905cf | 567 | |
9cdc3054 | 568 | undef @warnings_gotten; |
6aa905cf KW |
569 | $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); |
570 | if ($will_overflow) { | |
d402d77f | 571 | is($ret, 0, " And isC9_STRICT_UTF8_CHAR() returns 0"); |
6aa905cf KW |
572 | } |
573 | else { | |
574 | my $expected_ret = ( $testname =~ /surrogate/ | |
575 | || $allowed_uv > 0x10FFFF) | |
576 | ? 0 | |
577 | : $length; | |
d402d77f | 578 | is($ret, $expected_ret, " And isC9_STRICT_UTF8_CHAR()" |
6aa905cf KW |
579 | ." returns expected length: $expected_ret"); |
580 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
581 | $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); | |
582 | is($ret, $expected_ret, | |
d402d77f KW |
583 | " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" |
584 | . " acts like isC9_STRICT_UTF8_CHAR"); | |
6aa905cf | 585 | } |
9cdc3054 | 586 | is(scalar @warnings_gotten, 0, |
d402d77f KW |
587 | " And neither isC9_STRICT_UTF8_CHAR() nor isUTF8_CHAR_flags" |
588 | . " generated any warnings") | |
9cdc3054 | 589 | or output_warnings(@warnings_gotten); |
6aa905cf KW |
590 | |
591 | # Test partial character handling, for each byte not a full character | |
592 | for my $j (1.. $length - 1) { | |
593 | ||
594 | # Skip the test for the interaction between overflow and above-31 | |
595 | # bit. It is really testing other things than the partial | |
596 | # character tests, for which other tests in this file are | |
597 | # sufficient | |
04f42bf6 | 598 | last if $will_overflow; |
6aa905cf KW |
599 | |
600 | foreach my $disallow_flag (0, $disallow_flags) { | |
601 | my $partial = substr($bytes, 0, $j); | |
602 | my $ret_should_be; | |
603 | my $comment; | |
604 | if ($disallow_flag) { | |
605 | $ret_should_be = 0; | |
606 | $comment = "disallowed"; | |
607 | if ($j < $needed_to_discern_len) { | |
608 | $ret_should_be = 1; | |
609 | $comment .= ", but need $needed_to_discern_len bytes" | |
610 | . " to discern:"; | |
611 | } | |
612 | } | |
613 | else { | |
614 | $ret_should_be = 1; | |
615 | $comment = "allowed"; | |
616 | } | |
617 | ||
9cdc3054 | 618 | undef @warnings_gotten; |
6aa905cf KW |
619 | |
620 | $ret = test_is_utf8_valid_partial_char_flags($partial, $j, | |
621 | $disallow_flag); | |
622 | is($ret, $ret_should_be, | |
d402d77f KW |
623 | " And is_utf8_valid_partial_char_flags(" |
624 | . display_bytes($partial) | |
625 | . "), $comment: returns $ret_should_be"); | |
9cdc3054 | 626 | is(scalar @warnings_gotten, 0, |
d402d77f | 627 | " And is_utf8_valid_partial_char_flags()" |
6aa905cf | 628 | . " generated no warnings") |
9cdc3054 | 629 | or output_warnings(@warnings_gotten); |
6aa905cf KW |
630 | } |
631 | } | |
632 | } | |
633 | ||
634 | # This is more complicated than the malformations tested earlier, as there | |
635 | # are several orthogonal variables involved. We test all the subclasses | |
636 | # of utf8 warnings to verify they work with and without the utf8 class, | |
637 | # and don't have effects on other sublass warnings | |
9cdc3054 | 638 | foreach my $trial_warning_category ('utf8', 'surrogate', 'nonchar', 'non_unicode') { |
bf422d6a | 639 | next if $skip_most_tests && $trial_warning_category ne $controlling_warning_category; |
6aa905cf | 640 | foreach my $warn_flag (0, $warn_flags) { |
bf422d6a | 641 | next if $skip_most_tests && ! $warn_flag; |
6aa905cf | 642 | foreach my $disallow_flag (0, $disallow_flags) { |
bf422d6a | 643 | next if $skip_most_tests && ! $disallow_flag; |
6aa905cf | 644 | foreach my $do_warning (0, 1) { |
bf422d6a | 645 | next if $skip_most_tests && ! $do_warning; |
6aa905cf KW |
646 | |
647 | # We try each of the above with various combinations of | |
648 | # malformations that can occur on the same input sequence. | |
649 | foreach my $short ("", "short") { | |
bf422d6a | 650 | next if $skip_most_tests && $short; |
6aa905cf KW |
651 | foreach my $unexpected_noncont ("", |
652 | "unexpected non-continuation") | |
653 | { | |
bf422d6a | 654 | next if $skip_most_tests && $unexpected_noncont; |
6aa905cf | 655 | foreach my $overlong ("", "overlong") { |
bf422d6a | 656 | next if $overlong && $skip_most_tests; |
6aa905cf | 657 | |
b3169593 KW |
658 | # If we're creating an overlong, it can't be longer than |
659 | # the maximum length, so skip if we're already at that | |
660 | # length. | |
661 | next if $overlong && $length >= $::max_bytes; | |
6aa905cf KW |
662 | |
663 | my @malformations; | |
9cdc3054 | 664 | my @expected_return_flags; |
6aa905cf KW |
665 | push @malformations, $short if $short; |
666 | push @malformations, $unexpected_noncont | |
667 | if $unexpected_noncont; | |
668 | push @malformations, $overlong if $overlong; | |
669 | ||
670 | # The overflow malformation test in the input | |
671 | # array is coerced into being treated like one of | |
672 | # the others. | |
673 | if ($will_overflow) { | |
674 | push @malformations, 'overflow'; | |
9cdc3054 | 675 | push @expected_return_flags, $::UTF8_GOT_OVERFLOW; |
6aa905cf KW |
676 | } |
677 | ||
601e92f1 KW |
678 | my $message; |
679 | if (@malformations && grep { $_ !~ /overlong/ } @malformations) { | |
680 | $message = qr/\Q$non_cp_trailing_text\E/; | |
681 | } | |
682 | else { | |
683 | $message = $cp_message_qr; | |
684 | } | |
685 | ||
6aa905cf KW |
686 | my $malformations_name = join "/", @malformations; |
687 | $malformations_name .= " malformation" | |
688 | if $malformations_name; | |
689 | $malformations_name .= "s" if @malformations > 1; | |
690 | my $this_bytes = $bytes; | |
691 | my $this_length = $length; | |
692 | my $expected_uv = $allowed_uv; | |
b3169593 | 693 | my $this_expected_len = $length; |
6aa905cf KW |
694 | my $this_needed_to_discern_len = $needed_to_discern_len; |
695 | if ($malformations_name) { | |
696 | $expected_uv = 0; | |
697 | ||
698 | # Coerce the input into the desired | |
699 | # malformation | |
700 | if ($malformations_name =~ /overlong/) { | |
701 | ||
702 | # For an overlong, we convert the original | |
703 | # start byte into a continuation byte with | |
704 | # the same data bits as originally. ... | |
705 | substr($this_bytes, 0, 1) | |
706 | = start_byte_to_cont(substr($this_bytes, | |
707 | 0, 1)); | |
708 | ||
709 | # ... Then we prepend it with a known | |
710 | # overlong sequence. This should evaluate | |
711 | # to the exact same code point as the | |
712 | # original. | |
713 | $this_bytes | |
714 | = I8_to_native("\xff") | |
dbb8d798 | 715 | . (I8_to_native(chr $::lowest_continuation) |
6aa905cf KW |
716 | x ( $::max_bytes - 1 - length($this_bytes))) |
717 | . $this_bytes; | |
718 | $this_length = length($this_bytes); | |
719 | $this_needed_to_discern_len | |
720 | = $::max_bytes - ($this_expected_len | |
721 | - $this_needed_to_discern_len); | |
722 | $this_expected_len = $::max_bytes; | |
9cdc3054 | 723 | push @expected_return_flags, $::UTF8_GOT_LONG; |
6aa905cf KW |
724 | } |
725 | if ($malformations_name =~ /short/) { | |
726 | ||
727 | # Just tell the test to not look far | |
728 | # enough into the input. | |
729 | $this_length--; | |
730 | $this_expected_len--; | |
9cdc3054 | 731 | push @expected_return_flags, $::UTF8_GOT_SHORT; |
6aa905cf KW |
732 | } |
733 | if ($malformations_name | |
734 | =~ /non-continuation/) | |
735 | { | |
736 | # Change the final continuation byte into | |
737 | # a non one. | |
738 | my $pos = ($short) ? -2 : -1; | |
739 | substr($this_bytes, $pos, 1) = '?'; | |
740 | $this_expected_len--; | |
9cdc3054 | 741 | push @expected_return_flags, |
6aa905cf KW |
742 | $::UTF8_GOT_NON_CONTINUATION; |
743 | } | |
744 | } | |
745 | ||
746 | my $eval_warn = $do_warning | |
9cdc3054 KW |
747 | ? "use warnings '$trial_warning_category'" |
748 | : $trial_warning_category eq "utf8" | |
6aa905cf KW |
749 | ? "no warnings 'utf8'" |
750 | : ( "use warnings 'utf8';" | |
9cdc3054 | 751 | . " no warnings '$trial_warning_category'"); |
6aa905cf KW |
752 | |
753 | # Is effectively disallowed if we've set up a | |
754 | # malformation, even if the flag indicates it is | |
755 | # allowed. Fix up test name to indicate this as | |
756 | # well | |
757 | my $disallowed = $disallow_flag | |
758 | || $malformations_name; | |
759 | my $this_name = "utf8n_to_uvchr_error() $testname: " | |
760 | . (($disallow_flag) | |
761 | ? 'disallowed' | |
762 | : $disallowed | |
763 | ? $disallowed | |
764 | : 'allowed'); | |
765 | $this_name .= ", $eval_warn"; | |
766 | $this_name .= ", " . (($warn_flag) | |
767 | ? 'with warning flag' | |
768 | : 'no warning flag'); | |
769 | ||
9cdc3054 | 770 | undef @warnings_gotten; |
6aa905cf | 771 | my $ret_ref; |
a8ee5133 | 772 | my $this_flags = $warn_flag | $disallow_flag; |
6aa905cf KW |
773 | my $eval_text = "$eval_warn; \$ret_ref" |
774 | . " = test_utf8n_to_uvchr_error(" | |
775 | . "'$this_bytes'," | |
a8ee5133 | 776 | . " $this_length, $this_flags)"; |
6aa905cf KW |
777 | eval "$eval_text"; |
778 | if (! ok ("$@ eq ''", | |
779 | "$this_name: eval succeeded")) | |
780 | { | |
a8ee5133 KW |
781 | diag "\$@='$@'; call was: " |
782 | . utf8n_display_call($eval_text); | |
6aa905cf KW |
783 | next; |
784 | } | |
785 | if ($disallowed) { | |
d402d77f | 786 | is($ret_ref->[0], 0, " And returns 0") |
a8ee5133 | 787 | or diag "Call was: " . utf8n_display_call($eval_text); |
6aa905cf KW |
788 | } |
789 | else { | |
790 | is($ret_ref->[0], $expected_uv, | |
d402d77f KW |
791 | " And returns expected uv: " |
792 | . $uv_string) | |
a8ee5133 | 793 | or diag "Call was: " . utf8n_display_call($eval_text); |
6aa905cf KW |
794 | } |
795 | is($ret_ref->[1], $this_expected_len, | |
d402d77f | 796 | " And returns expected length:" |
6aa905cf | 797 | . " $this_expected_len") |
a8ee5133 | 798 | or diag "Call was: " . utf8n_display_call($eval_text); |
6aa905cf | 799 | |
9cdc3054 | 800 | my $returned_flags = $ret_ref->[2]; |
6aa905cf | 801 | |
9cdc3054 KW |
802 | for (my $i = @expected_return_flags - 1; $i >= 0; $i--) { |
803 | if (ok($expected_return_flags[$i] & $returned_flags, | |
d402d77f | 804 | " Expected and got return flag" |
6aa905cf KW |
805 | . " for $malformations[$i] malformation")) |
806 | { | |
9cdc3054 | 807 | $returned_flags &= ~$expected_return_flags[$i]; |
6aa905cf | 808 | } |
9cdc3054 | 809 | splice @expected_return_flags, $i, 1; |
6aa905cf | 810 | } |
9cdc3054 | 811 | is(scalar @expected_return_flags, 0, |
d402d77f | 812 | " Got all the expected malformation errors") |
9cdc3054 | 813 | or diag Dumper \@expected_return_flags; |
6aa905cf KW |
814 | |
815 | if ( $this_expected_len >= $this_needed_to_discern_len | |
816 | && ($warn_flag || $disallow_flag)) | |
817 | { | |
9cdc3054 | 818 | is($returned_flags, $expected_error_flags, |
d402d77f | 819 | " Got the correct error flag") |
a8ee5133 | 820 | or diag "Call was: " . utf8n_display_call($eval_text); |
6aa905cf KW |
821 | } |
822 | else { | |
d402d77f | 823 | is($returned_flags, 0, " Got no other error flag") |
5722c46d KW |
824 | or |
825 | ||
826 | # We strip off any prefixes from the flag names | |
827 | diag "The unexpected flags were: " | |
828 | . (flags_to_text($returned_flags, | |
829 | \@utf8n_flags_to_text) | |
830 | =~ s/ \b [A-Z] _ //xgr); | |
6aa905cf KW |
831 | } |
832 | ||
833 | if (@malformations) { | |
9cdc3054 | 834 | if (! $do_warning && $trial_warning_category eq 'utf8') { |
6aa905cf KW |
835 | goto no_warnings_expected; |
836 | } | |
837 | ||
838 | # Check that each malformation generates a | |
839 | # warning, removing that warning if found | |
840 | MALFORMATION: | |
841 | foreach my $malformation (@malformations) { | |
9cdc3054 KW |
842 | foreach (my $i = 0; $i < @warnings_gotten; $i++) { |
843 | if ($warnings_gotten[$i] =~ /$malformation/) { | |
d402d77f | 844 | pass(" Expected and got" |
6aa905cf | 845 | . "'$malformation' warning"); |
9cdc3054 | 846 | splice @warnings_gotten, $i, 1; |
6aa905cf KW |
847 | next MALFORMATION; |
848 | } | |
849 | } | |
d402d77f KW |
850 | fail(" Expected '$malformation' warning" |
851 | . " but didn't get it"); | |
6aa905cf KW |
852 | |
853 | } | |
854 | } | |
855 | ||
856 | # Any overflow will override any super or above-31 | |
857 | # warnings. | |
858 | goto no_warnings_expected | |
859 | if $will_overflow || $this_expected_len | |
860 | < $this_needed_to_discern_len; | |
861 | ||
862 | if ( ! $do_warning | |
9cdc3054 KW |
863 | && ( $trial_warning_category eq 'utf8' |
864 | || $trial_warning_category eq $controlling_warning_category)) | |
6aa905cf KW |
865 | { |
866 | goto no_warnings_expected; | |
867 | } | |
868 | elsif ($warn_flag) { | |
9cdc3054 | 869 | if (is(scalar @warnings_gotten, 1, |
d402d77f | 870 | " Got a single warning ")) |
6aa905cf | 871 | { |
9cdc3054 | 872 | like($warnings_gotten[0], $message, |
d402d77f | 873 | " Got expected warning") |
a8ee5133 | 874 | or diag "Call was: " |
d402d77f | 875 | . utf8n_display_call($eval_text); |
6aa905cf KW |
876 | } |
877 | else { | |
a8ee5133 | 878 | diag "Call was: " . utf8n_display_call($eval_text); |
9cdc3054 KW |
879 | if (scalar @warnings_gotten) { |
880 | output_warnings(@warnings_gotten); | |
6aa905cf KW |
881 | } |
882 | } | |
883 | } | |
884 | else { | |
9cdc3054 | 885 | |
6aa905cf | 886 | no_warnings_expected: |
9cdc3054 | 887 | unless (is(scalar @warnings_gotten, 0, |
d402d77f | 888 | " Got no warnings")) |
6aa905cf | 889 | { |
a8ee5133 | 890 | diag "Call was: " . utf8n_display_call($eval_text); |
9cdc3054 | 891 | output_warnings(@warnings_gotten); |
6aa905cf KW |
892 | } |
893 | } | |
894 | ||
895 | # Check CHECK_ONLY results when the input is | |
896 | # disallowed. Do this when actually disallowed, | |
897 | # not just when the $disallow_flag is set | |
898 | if ($disallowed) { | |
9cdc3054 | 899 | undef @warnings_gotten; |
a8ee5133 KW |
900 | $this_flags = $disallow_flag|$::UTF8_CHECK_ONLY; |
901 | $eval_text = "\$ret_ref = test_utf8n_to_uvchr_error(" | |
902 | . "'$this_bytes', $this_length, $this_flags)"; | |
903 | eval "$eval_text"; | |
904 | if (! ok ("$@ eq ''", | |
905 | " And eval succeeded with CHECK_ONLY")) | |
906 | { | |
907 | diag "\$@='$@'; Call was: " | |
908 | . utf8n_display_call($eval_text); | |
909 | next; | |
910 | } | |
d402d77f | 911 | is($ret_ref->[0], 0, " CHECK_ONLY: Returns 0") |
a8ee5133 | 912 | or diag "Call was: " . utf8n_display_call($eval_text); |
6aa905cf | 913 | is($ret_ref->[1], -1, |
d402d77f | 914 | " CHECK_ONLY: returns -1 for length") |
a8ee5133 | 915 | or diag "Call was: " . utf8n_display_call($eval_text); |
9cdc3054 | 916 | if (! is(scalar @warnings_gotten, 0, |
d402d77f | 917 | " CHECK_ONLY: no warnings generated")) |
6aa905cf | 918 | { |
a8ee5133 | 919 | diag "Call was: " . utf8n_display_call($eval_text); |
9cdc3054 | 920 | output_warnings(@warnings_gotten); |
6aa905cf KW |
921 | } |
922 | } | |
923 | ||
924 | # Now repeat some of the above, but for | |
925 | # uvchr_to_utf8_flags(). Since this comes from an | |
926 | # existing code point, it hasn't overflowed, and | |
927 | # isn't malformed. | |
928 | next if @malformations; | |
929 | ||
930 | # The warning and disallow flags passed in are for | |
931 | # utf8n_to_uvchr_error(). Convert them for | |
932 | # uvchr_to_utf8_flags(). | |
933 | my $uvchr_warn_flag = 0; | |
934 | my $uvchr_disallow_flag = 0; | |
935 | if ($warn_flag) { | |
af816908 | 936 | $uvchr_warn_flag = $uvchr_flag_to_warn; |
6aa905cf KW |
937 | } |
938 | if ($disallow_flag) { | |
af816908 | 939 | $uvchr_disallow_flag = $uvchr_flag_to_disallow; |
6aa905cf KW |
940 | } |
941 | ||
942 | $disallowed = $uvchr_disallow_flag; | |
943 | ||
944 | $this_name = "uvchr_to_utf8_flags() $testname: " | |
945 | . (($uvchr_disallow_flag) | |
946 | ? 'disallowed' | |
947 | : ($disallowed) | |
948 | ? 'ABOVE_31_BIT allowed' | |
949 | : 'allowed'); | |
950 | $this_name .= ", $eval_warn"; | |
951 | $this_name .= ", " . (($uvchr_warn_flag) | |
952 | ? 'with warning flag' | |
953 | : 'no warning flag'); | |
954 | ||
9cdc3054 | 955 | undef @warnings_gotten; |
6aa905cf | 956 | my $ret; |
a8ee5133 | 957 | $this_flags = $uvchr_warn_flag | $uvchr_disallow_flag; |
6aa905cf KW |
958 | $eval_text = "$eval_warn; \$ret =" |
959 | . " test_uvchr_to_utf8_flags(" | |
d884ea32 | 960 | . "$allowed_uv, $this_flags)"; |
6aa905cf KW |
961 | eval "$eval_text"; |
962 | if (! ok ("$@ eq ''", "$this_name: eval succeeded")) | |
963 | { | |
d884ea32 KW |
964 | diag "\$@='$@'; call was: " |
965 | . uvchr_display_call($eval_text); | |
6aa905cf KW |
966 | next; |
967 | } | |
968 | if ($disallowed) { | |
d402d77f | 969 | is($ret, undef, " And returns undef") |
d884ea32 | 970 | or diag "Call was: " . uvchr_display_call($eval_text); |
6aa905cf KW |
971 | } |
972 | else { | |
d402d77f | 973 | is($ret, $this_bytes, " And returns expected string") |
d884ea32 | 974 | or diag "Call was: " . uvchr_display_call($eval_text); |
6aa905cf KW |
975 | } |
976 | if (! $do_warning | |
9cdc3054 | 977 | && ($trial_warning_category eq 'utf8' || $trial_warning_category eq $controlling_warning_category)) |
6aa905cf | 978 | { |
9cdc3054 | 979 | if (!is(scalar @warnings_gotten, 0, |
d402d77f | 980 | " No warnings generated")) |
6aa905cf | 981 | { |
d884ea32 | 982 | diag "Call was: " . uvchr_display_call($eval_text); |
9cdc3054 | 983 | output_warnings(@warnings_gotten); |
6aa905cf KW |
984 | } |
985 | } | |
986 | elsif ( $uvchr_warn_flag | |
9cdc3054 KW |
987 | && ( $trial_warning_category eq 'utf8' |
988 | || $trial_warning_category eq $controlling_warning_category)) | |
6aa905cf | 989 | { |
9cdc3054 | 990 | if (is(scalar @warnings_gotten, 1, |
d402d77f | 991 | " Got a single warning ")) |
6aa905cf | 992 | { |
9cdc3054 | 993 | like($warnings_gotten[0], $message, |
d402d77f | 994 | " Got expected warning") |
d884ea32 KW |
995 | or diag "Call was: " |
996 | . uvchr_display_call($eval_text); | |
6aa905cf KW |
997 | } |
998 | else { | |
d884ea32 | 999 | diag "Call was: " . uvchr_display_call($eval_text); |
9cdc3054 KW |
1000 | output_warnings(@warnings_gotten) |
1001 | if scalar @warnings_gotten; | |
6aa905cf KW |
1002 | } |
1003 | } | |
1004 | } | |
1005 | } | |
1006 | } | |
1007 | } | |
1008 | } | |
1009 | } | |
1010 | } | |
1011 | } | |
1012 | ||
1013 | done_testing; |