Commit | Line | Data |
---|---|---|
6aa905cf KW |
1 | #!perl -w |
2 | ||
3 | # This is a base file to be used by various .t's in its directory | |
4 | ||
5 | use strict; | |
6 | use Test::More; | |
7 | ||
8 | BEGIN { | |
9 | use_ok('XS::APItest'); | |
10 | require 'charset_tools.pl'; | |
11 | require './t/utf8_setup.pl'; | |
12 | }; | |
13 | ||
14 | $|=1; | |
15 | ||
16 | no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit | |
17 | # machines, and that is tested elsewhere | |
18 | ||
19 | use XS::APItest; | |
20 | use Data::Dumper; | |
21 | ||
9cdc3054 | 22 | my @warnings_gotten; |
6aa905cf KW |
23 | |
24 | use warnings 'utf8'; | |
9cdc3054 | 25 | local $SIG{__WARN__} = sub { push @warnings_gotten, @_ }; |
6aa905cf KW |
26 | |
27 | sub nonportable_regex ($) { | |
28 | ||
29 | # Returns a pattern that matches the non-portable message raised either | |
30 | # for the specific input code point, or the one generated when there | |
31 | # is some malformation that precludes the message containing the specific | |
32 | # code point | |
33 | ||
34 | my $code_point = shift; | |
35 | ||
36 | my $string = sprintf '(Code point 0x%X is not Unicode, and' | |
37 | . '|Any UTF-8 sequence that starts with' | |
38 | . ' "(\\\x[[:xdigit:]]{2})+" is for a' | |
39 | . ' non-Unicode code point, and is) not portable', | |
40 | $code_point; | |
41 | return qr/$string/; | |
42 | } | |
43 | ||
44 | # Now test the cases where a legal code point is generated, but may or may not | |
45 | # be allowed/warned on. | |
46 | my @tests = ( | |
9cdc3054 | 47 | # ($testname, $bytes, $disallow_flags, $controlling_warning_category, |
b7e1f4b2 | 48 | # $allowed_uv, $needed_to_discern_len ) |
6aa905cf KW |
49 | [ "lowest surrogate", |
50 | (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), | |
b7e1f4b2 | 51 | $::UTF8_DISALLOW_SURROGATE, |
6aa905cf | 52 | 'surrogate', 0xD800, |
6aa905cf KW |
53 | ], |
54 | [ "a middle surrogate", | |
55 | (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"), | |
b7e1f4b2 | 56 | $::UTF8_DISALLOW_SURROGATE, |
6aa905cf | 57 | 'surrogate', 0xD90D, |
6aa905cf KW |
58 | ], |
59 | [ "highest surrogate", | |
60 | (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), | |
b7e1f4b2 | 61 | $::UTF8_DISALLOW_SURROGATE, |
6aa905cf | 62 | 'surrogate', 0xDFFF, |
6aa905cf KW |
63 | ], |
64 | [ "first non_unicode", | |
65 | (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), | |
b7e1f4b2 | 66 | $::UTF8_DISALLOW_SUPER, |
6aa905cf | 67 | 'non_unicode', 0x110000, |
6aa905cf | 68 | 2, |
6aa905cf KW |
69 | ], |
70 | [ "non_unicode whose first byte tells that", | |
71 | (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 72 | $::UTF8_DISALLOW_SUPER, |
6aa905cf KW |
73 | 'non_unicode', |
74 | (isASCII) ? 0x140000 : 0x200000, | |
6aa905cf | 75 | 1, |
6aa905cf KW |
76 | ], |
77 | [ "first of 32 consecutive non-character code points", | |
78 | (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), | |
b7e1f4b2 | 79 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 80 | 'nonchar', 0xFDD0, |
6aa905cf KW |
81 | ], |
82 | [ "a mid non-character code point of the 32 consecutive ones", | |
83 | (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), | |
b7e1f4b2 | 84 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 85 | 'nonchar', 0xFDE0, |
6aa905cf KW |
86 | ], |
87 | [ "final of 32 consecutive non-character code points", | |
88 | (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), | |
b7e1f4b2 | 89 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 90 | 'nonchar', 0xFDEF, |
6aa905cf KW |
91 | ], |
92 | [ "non-character code point U+FFFE", | |
93 | (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"), | |
b7e1f4b2 | 94 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 95 | 'nonchar', 0xFFFE, |
6aa905cf KW |
96 | ], |
97 | [ "non-character code point U+FFFF", | |
98 | (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), | |
b7e1f4b2 | 99 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 100 | 'nonchar', 0xFFFF, |
6aa905cf KW |
101 | ], |
102 | [ "non-character code point U+1FFFE", | |
103 | (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), | |
b7e1f4b2 | 104 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 105 | 'nonchar', 0x1FFFE, |
6aa905cf KW |
106 | ], |
107 | [ "non-character code point U+1FFFF", | |
108 | (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), | |
b7e1f4b2 | 109 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 110 | 'nonchar', 0x1FFFF, |
6aa905cf KW |
111 | ], |
112 | [ "non-character code point U+2FFFE", | |
113 | (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), | |
b7e1f4b2 | 114 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 115 | 'nonchar', 0x2FFFE, |
6aa905cf KW |
116 | ], |
117 | [ "non-character code point U+2FFFF", | |
118 | (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), | |
b7e1f4b2 | 119 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 120 | 'nonchar', 0x2FFFF, |
6aa905cf KW |
121 | ], |
122 | [ "non-character code point U+3FFFE", | |
123 | (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), | |
b7e1f4b2 | 124 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 125 | 'nonchar', 0x3FFFE, |
6aa905cf KW |
126 | ], |
127 | [ "non-character code point U+3FFFF", | |
128 | (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), | |
b7e1f4b2 | 129 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 130 | 'nonchar', 0x3FFFF, |
6aa905cf KW |
131 | ], |
132 | [ "non-character code point U+4FFFE", | |
133 | (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), | |
b7e1f4b2 | 134 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 135 | 'nonchar', 0x4FFFE, |
6aa905cf KW |
136 | ], |
137 | [ "non-character code point U+4FFFF", | |
138 | (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), | |
b7e1f4b2 | 139 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 140 | 'nonchar', 0x4FFFF, |
6aa905cf KW |
141 | ], |
142 | [ "non-character code point U+5FFFE", | |
143 | (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), | |
b7e1f4b2 | 144 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 145 | 'nonchar', 0x5FFFE, |
6aa905cf KW |
146 | ], |
147 | [ "non-character code point U+5FFFF", | |
148 | (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), | |
b7e1f4b2 | 149 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 150 | 'nonchar', 0x5FFFF, |
6aa905cf KW |
151 | ], |
152 | [ "non-character code point U+6FFFE", | |
153 | (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), | |
b7e1f4b2 | 154 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 155 | 'nonchar', 0x6FFFE, |
6aa905cf KW |
156 | ], |
157 | [ "non-character code point U+6FFFF", | |
158 | (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), | |
b7e1f4b2 | 159 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 160 | 'nonchar', 0x6FFFF, |
6aa905cf KW |
161 | ], |
162 | [ "non-character code point U+7FFFE", | |
163 | (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), | |
b7e1f4b2 | 164 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 165 | 'nonchar', 0x7FFFE, |
6aa905cf KW |
166 | ], |
167 | [ "non-character code point U+7FFFF", | |
168 | (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), | |
b7e1f4b2 | 169 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 170 | 'nonchar', 0x7FFFF, |
6aa905cf KW |
171 | ], |
172 | [ "non-character code point U+8FFFE", | |
173 | (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), | |
b7e1f4b2 | 174 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 175 | 'nonchar', 0x8FFFE, |
6aa905cf KW |
176 | ], |
177 | [ "non-character code point U+8FFFF", | |
178 | (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), | |
b7e1f4b2 | 179 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 180 | 'nonchar', 0x8FFFF, |
6aa905cf KW |
181 | ], |
182 | [ "non-character code point U+9FFFE", | |
183 | (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), | |
b7e1f4b2 | 184 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 185 | 'nonchar', 0x9FFFE, |
6aa905cf KW |
186 | ], |
187 | [ "non-character code point U+9FFFF", | |
188 | (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), | |
b7e1f4b2 | 189 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 190 | 'nonchar', 0x9FFFF, |
6aa905cf KW |
191 | ], |
192 | [ "non-character code point U+AFFFE", | |
193 | (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), | |
b7e1f4b2 | 194 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 195 | 'nonchar', 0xAFFFE, |
6aa905cf KW |
196 | ], |
197 | [ "non-character code point U+AFFFF", | |
198 | (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), | |
b7e1f4b2 | 199 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 200 | 'nonchar', 0xAFFFF, |
6aa905cf KW |
201 | ], |
202 | [ "non-character code point U+BFFFE", | |
203 | (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), | |
b7e1f4b2 | 204 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 205 | 'nonchar', 0xBFFFE, |
6aa905cf KW |
206 | ], |
207 | [ "non-character code point U+BFFFF", | |
208 | (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), | |
b7e1f4b2 | 209 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 210 | 'nonchar', 0xBFFFF, |
6aa905cf KW |
211 | ], |
212 | [ "non-character code point U+CFFFE", | |
213 | (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), | |
b7e1f4b2 | 214 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 215 | 'nonchar', 0xCFFFE, |
6aa905cf KW |
216 | ], |
217 | [ "non-character code point U+CFFFF", | |
218 | (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), | |
b7e1f4b2 | 219 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 220 | 'nonchar', 0xCFFFF, |
6aa905cf KW |
221 | ], |
222 | [ "non-character code point U+DFFFE", | |
223 | (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), | |
b7e1f4b2 | 224 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 225 | 'nonchar', 0xDFFFE, |
6aa905cf KW |
226 | ], |
227 | [ "non-character code point U+DFFFF", | |
228 | (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), | |
b7e1f4b2 | 229 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 230 | 'nonchar', 0xDFFFF, |
6aa905cf KW |
231 | ], |
232 | [ "non-character code point U+EFFFE", | |
233 | (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), | |
b7e1f4b2 | 234 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 235 | 'nonchar', 0xEFFFE, |
6aa905cf KW |
236 | ], |
237 | [ "non-character code point U+EFFFF", | |
238 | (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), | |
b7e1f4b2 | 239 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 240 | 'nonchar', 0xEFFFF, |
6aa905cf KW |
241 | ], |
242 | [ "non-character code point U+FFFFE", | |
243 | (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), | |
b7e1f4b2 | 244 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 245 | 'nonchar', 0xFFFFE, |
6aa905cf KW |
246 | ], |
247 | [ "non-character code point U+FFFFF", | |
248 | (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), | |
b7e1f4b2 | 249 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 250 | 'nonchar', 0xFFFFF, |
6aa905cf KW |
251 | ], |
252 | [ "non-character code point U+10FFFE", | |
253 | (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), | |
b7e1f4b2 | 254 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 255 | 'nonchar', 0x10FFFE, |
6aa905cf KW |
256 | ], |
257 | [ "non-character code point U+10FFFF", | |
258 | (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), | |
b7e1f4b2 | 259 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 260 | 'nonchar', 0x10FFFF, |
6aa905cf | 261 | ], |
76513bdc KW |
262 | [ "requires at least 32 bits", |
263 | (isASCII) | |
264 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
265 | : I8_to_native( | |
266 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
267 | # This code point is chosen so that it is representable in a UV on | |
268 | # 32-bit machines | |
b7e1f4b2 | 269 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
76513bdc | 270 | 'utf8', 0x80000000, |
76513bdc | 271 | (isASCII) ? 1 : 8, |
76513bdc KW |
272 | ], |
273 | [ "highest 32 bit code point", | |
274 | (isASCII) | |
275 | ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" | |
276 | : I8_to_native( | |
277 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), | |
b7e1f4b2 | 278 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
76513bdc | 279 | 'utf8', 0xFFFFFFFF, |
76513bdc | 280 | (isASCII) ? 1 : 8, |
76513bdc KW |
281 | ], |
282 | [ "requires at least 32 bits, and use SUPER-type flags, instead of" | |
283 | . " ABOVE_31_BIT", | |
284 | (isASCII) | |
285 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
286 | : I8_to_native( | |
287 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 288 | $::UTF8_DISALLOW_SUPER, |
76513bdc | 289 | 'utf8', 0x80000000, |
76513bdc | 290 | 1, |
76513bdc KW |
291 | ], |
292 | [ "overflow with warnings/disallow for more than 31 bits", | |
293 | # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT | |
294 | # with overflow. The overflow malformation is never allowed, so | |
295 | # preventing it takes precedence if the ABOVE_31_BIT options would | |
296 | # otherwise allow in an overflowing value. The ASCII code points (1 | |
297 | # for 32-bits; 1 for 64) were chosen because the old overflow | |
298 | # detection algorithm did not catch them; this means this test also | |
299 | # checks for that fix. The EBCDIC are arbitrary overflowing ones | |
300 | # since we have no reports of failures with it. | |
301 | (($::is64bit) | |
302 | ? ((isASCII) | |
303 | ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" | |
304 | : I8_to_native( | |
305 | "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) | |
306 | : ((isASCII) | |
307 | ? "\xfe\x86\x80\x80\x80\x80\x80" | |
308 | : I8_to_native( | |
309 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), | |
76513bdc | 310 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
04f42bf6 | 311 | 'utf8', -1, |
76513bdc | 312 | (isASCII || $::is64bit) ? 2 : 8, |
76513bdc | 313 | ], |
6aa905cf KW |
314 | ); |
315 | ||
316 | if (! $::is64bit) { | |
317 | if (isASCII) { | |
318 | no warnings qw{portable overflow}; | |
319 | push @tests, | |
320 | [ "Lowest 33 bit code point: overflow", | |
321 | "\xFE\x84\x80\x80\x80\x80\x80", | |
b7e1f4b2 | 322 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
04f42bf6 | 323 | 'utf8', -1, |
b3169593 | 324 | 1, |
6aa905cf KW |
325 | ]; |
326 | } | |
327 | } | |
328 | else { | |
329 | no warnings qw{portable overflow}; | |
330 | push @tests, | |
331 | [ "More than 32 bits", | |
332 | (isASCII) | |
333 | ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" | |
334 | : I8_to_native( | |
335 | "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 336 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 337 | 'utf8', 0x1000000000, |
b3169593 | 338 | (isASCII) ? 1 : 7, |
6aa905cf KW |
339 | ]; |
340 | if (! isASCII) { | |
341 | push @tests, # These could falsely show wrongly in a naive | |
342 | # implementation | |
343 | [ "requires at least 32 bits", | |
344 | I8_to_native( | |
345 | "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 346 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 347 | 'utf8', 0x800000000, |
b3169593 | 348 | 7, |
6aa905cf KW |
349 | ], |
350 | [ "requires at least 32 bits", | |
351 | I8_to_native( | |
352 | "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 353 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 354 | 'utf8', 0x10000000000, |
b3169593 | 355 | 6, |
6aa905cf KW |
356 | ], |
357 | [ "requires at least 32 bits", | |
358 | I8_to_native( | |
359 | "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 360 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 361 | 'utf8', 0x200000000000, |
b3169593 | 362 | 5, |
6aa905cf KW |
363 | ], |
364 | [ "requires at least 32 bits", | |
365 | I8_to_native( | |
366 | "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 367 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 368 | 'utf8', 0x4000000000000, |
b3169593 | 369 | 4, |
6aa905cf KW |
370 | ], |
371 | [ "requires at least 32 bits", | |
372 | I8_to_native( | |
373 | "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 374 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 375 | 'utf8', 0x80000000000000, |
b3169593 | 376 | 3, |
6aa905cf KW |
377 | ], |
378 | [ "requires at least 32 bits", | |
379 | I8_to_native( | |
380 | "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 381 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 382 | 'utf8', 0x1000000000000000, |
b3169593 | 383 | 2, |
6aa905cf KW |
384 | ]; |
385 | } | |
386 | } | |
387 | ||
5722c46d KW |
388 | sub flags_to_text($$) |
389 | { | |
390 | my ($flags, $flags_to_text_ref) = @_; | |
391 | ||
392 | # Returns a string containing a mnemonic representation of the bits that | |
393 | # are set in the $flags. These are assumed to be flag bits. The return | |
394 | # looks like "FOO|BAR|BAZ". The second parameter is a reference to an | |
395 | # array that gives the textual representation of all the possible flags. | |
396 | # Element 0 is the text for the bit 0 flag; element 1 for bit 1; .... If | |
397 | # no bits at all are set the string "0" is returned; | |
398 | ||
399 | my @flag_text; | |
400 | my $shift = 0; | |
401 | ||
402 | return "0" if $flags == 0; | |
403 | ||
404 | while ($flags) { | |
405 | #diag sprintf "%x", $flags; | |
406 | if ($flags & 1) { | |
407 | push @flag_text, $flags_to_text_ref->[$shift]; | |
408 | } | |
409 | $shift++; | |
410 | $flags >>= 1; | |
411 | } | |
412 | ||
413 | return join "|", @flag_text; | |
414 | } | |
415 | ||
416 | # Possible flag returns from utf8n_to_uvchr_error(). These should have G_, | |
417 | # instead of A_, D_, but the prefixes will be used in a a later commit, so | |
418 | # minimize churn by having them here. | |
419 | my @utf8n_flags_to_text = ( qw( | |
420 | A_EMPTY | |
421 | A_CONTINUATION | |
422 | A_NON_CONTINUATION | |
423 | A_SHORT | |
424 | A_LONG | |
425 | A_LONG_AND_ITS_VALUE | |
426 | PLACEHOLDER | |
427 | A_OVERFLOW | |
428 | D_SURROGATE | |
429 | W_SURROGATE | |
430 | D_NONCHAR | |
431 | W_NONCHAR | |
432 | D_SUPER | |
433 | W_SUPER | |
434 | D_ABOVE_31_BIT | |
435 | W_ABOVE_31_BIT | |
436 | CHECK_ONLY | |
437 | NO_CONFIDENCE_IN_CURLEN_ | |
438 | ) ); | |
439 | ||
d884ea32 KW |
440 | sub uvchr_display_call($) |
441 | { | |
442 | # Converts an eval string that calls test_uvchr_to_utf8 into a more human | |
443 | # readable form, and returns it. The return will look something like: | |
444 | # test_uvchr_to_utf8n_flags($uv, $flags) | |
445 | #diag $_[0]; | |
446 | ||
447 | my @flags_to_text = ( qw( | |
448 | W_SURROGATE | |
449 | W_NONCHAR | |
450 | W_SUPER | |
451 | W_ABOVE_31_BIT | |
452 | D_SURROGATE | |
453 | D_NONCHAR | |
454 | D_SUPER | |
455 | D_ABOVE_31_BIT | |
456 | ) ); | |
457 | ||
458 | $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x; | |
459 | my $text = $1; | |
460 | my $cp = sprintf "%X", $2; | |
461 | my $flags = $3; | |
462 | ||
463 | return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')'; | |
464 | } | |
465 | ||
6aa905cf KW |
466 | # This test is split into this number of files. |
467 | my $num_test_files = $ENV{TEST_JOBS} || 1; | |
468 | $num_test_files = 10 if $num_test_files > 10; | |
469 | ||
470 | my $test_count = -1; | |
471 | foreach my $test (@tests) { | |
472 | $test_count++; | |
473 | next if $test_count % $num_test_files != $::TEST_CHUNK; | |
474 | ||
b7e1f4b2 | 475 | my ($testname, $bytes, $disallow_flags, |
9cdc3054 | 476 | $controlling_warning_category, $allowed_uv, $needed_to_discern_len |
6aa905cf KW |
477 | ) = @$test; |
478 | ||
479 | my $length = length $bytes; | |
04f42bf6 | 480 | my $will_overflow = $allowed_uv < 0; |
6aa905cf | 481 | |
b7e1f4b2 KW |
482 | # The convention is that the got flag is the same value as the disallow |
483 | # one, and the warn flag is the next bit over. If this were violated, the | |
484 | # tests here should start failing. We could do an eval under no strict to | |
485 | # be sure. | |
486 | my $expected_error_flags = $disallow_flags; | |
487 | my $warn_flags = $disallow_flags << 1; | |
488 | ||
3022ad00 KW |
489 | my $message; |
490 | if ($allowed_uv > 0x7FFFFFFF) { | |
491 | $message = nonportable_regex($allowed_uv); | |
492 | } | |
493 | elsif ($allowed_uv > 0x10FFFF) { | |
494 | $message = qr/(not Unicode|for a non-Unicode code point).* may not be portable/; | |
495 | } | |
496 | elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) { | |
497 | $message = qr/surrogate/; | |
67e45424 | 498 | $needed_to_discern_len = 2 unless defined $needed_to_discern_len; |
3022ad00 KW |
499 | } |
500 | elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF) | |
501 | || ($allowed_uv & 0xFFFE) == 0xFFFE) | |
502 | { | |
503 | $message = qr/Unicode non-character.*is not recommended for open interchange/; | |
67e45424 | 504 | $needed_to_discern_len = $length unless defined $needed_to_discern_len; |
3022ad00 KW |
505 | } |
506 | elsif ($will_overflow) { | |
507 | $message = qr/overflows/; | |
508 | } | |
509 | else { | |
510 | die "Can't figure out what type of warning to test for $testname" | |
511 | } | |
512 | ||
67e45424 KW |
513 | die 'Didn\'t set $needed_to_discern_len for ' . $testname |
514 | unless defined $needed_to_discern_len; | |
515 | ||
6aa905cf KW |
516 | { |
517 | use warnings; | |
9cdc3054 | 518 | undef @warnings_gotten; |
6aa905cf KW |
519 | my $ret = test_isUTF8_CHAR($bytes, $length); |
520 | my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0); | |
521 | if ($will_overflow) { | |
522 | is($ret, 0, "isUTF8_CHAR() $testname: returns 0"); | |
523 | is($ret_flags, 0, "isUTF8_CHAR_flags() $testname: returns 0"); | |
524 | } | |
525 | else { | |
526 | is($ret, $length, | |
527 | "isUTF8_CHAR() $testname: returns expected length: $length"); | |
528 | is($ret_flags, $length, "isUTF8_CHAR_flags(...,0) $testname:" | |
529 | . " returns expected length: $length"); | |
530 | } | |
9cdc3054 | 531 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
532 | "isUTF8_CHAR() and isUTF8_CHAR()_flags $testname: generated" |
533 | . " no warnings") | |
9cdc3054 | 534 | or output_warnings(@warnings_gotten); |
6aa905cf | 535 | |
9cdc3054 | 536 | undef @warnings_gotten; |
6aa905cf KW |
537 | $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); |
538 | if ($will_overflow) { | |
539 | is($ret, 0, "isSTRICT_UTF8_CHAR() $testname: returns 0"); | |
540 | } | |
541 | else { | |
542 | my $expected_ret = ( $testname =~ /surrogate|non-character/ | |
543 | || $allowed_uv > 0x10FFFF) | |
544 | ? 0 | |
545 | : $length; | |
546 | is($ret, $expected_ret, "isSTRICT_UTF8_CHAR() $testname: returns" | |
547 | . " expected length: $expected_ret"); | |
548 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
549 | $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE); | |
550 | is($ret, $expected_ret, | |
551 | "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" | |
552 | . " acts like isSTRICT_UTF8_CHAR"); | |
553 | } | |
9cdc3054 | 554 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
555 | "isSTRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" |
556 | . " generated no warnings") | |
9cdc3054 | 557 | or output_warnings(@warnings_gotten); |
6aa905cf | 558 | |
9cdc3054 | 559 | undef @warnings_gotten; |
6aa905cf KW |
560 | $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); |
561 | if ($will_overflow) { | |
562 | is($ret, 0, "isC9_STRICT_UTF8_CHAR() $testname: returns 0"); | |
563 | } | |
564 | else { | |
565 | my $expected_ret = ( $testname =~ /surrogate/ | |
566 | || $allowed_uv > 0x10FFFF) | |
567 | ? 0 | |
568 | : $length; | |
569 | is($ret, $expected_ret, "isC9_STRICT_UTF8_CHAR() $testname:" | |
570 | ." returns expected length: $expected_ret"); | |
571 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
572 | $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); | |
573 | is($ret, $expected_ret, | |
574 | "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" | |
575 | . " acts like isC9_STRICT_UTF8_CHAR"); | |
576 | } | |
9cdc3054 | 577 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
578 | "isC9_STRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" |
579 | . " generated no warnings") | |
9cdc3054 | 580 | or output_warnings(@warnings_gotten); |
6aa905cf KW |
581 | |
582 | # Test partial character handling, for each byte not a full character | |
583 | for my $j (1.. $length - 1) { | |
584 | ||
585 | # Skip the test for the interaction between overflow and above-31 | |
586 | # bit. It is really testing other things than the partial | |
587 | # character tests, for which other tests in this file are | |
588 | # sufficient | |
04f42bf6 | 589 | last if $will_overflow; |
6aa905cf KW |
590 | |
591 | foreach my $disallow_flag (0, $disallow_flags) { | |
592 | my $partial = substr($bytes, 0, $j); | |
593 | my $ret_should_be; | |
594 | my $comment; | |
595 | if ($disallow_flag) { | |
596 | $ret_should_be = 0; | |
597 | $comment = "disallowed"; | |
598 | if ($j < $needed_to_discern_len) { | |
599 | $ret_should_be = 1; | |
600 | $comment .= ", but need $needed_to_discern_len bytes" | |
601 | . " to discern:"; | |
602 | } | |
603 | } | |
604 | else { | |
605 | $ret_should_be = 1; | |
606 | $comment = "allowed"; | |
607 | } | |
608 | ||
9cdc3054 | 609 | undef @warnings_gotten; |
6aa905cf KW |
610 | |
611 | $ret = test_is_utf8_valid_partial_char_flags($partial, $j, | |
612 | $disallow_flag); | |
613 | is($ret, $ret_should_be, | |
614 | "$testname: is_utf8_valid_partial_char_flags(" | |
615 | . display_bytes($partial) | |
616 | . "), $comment: returns $ret_should_be"); | |
9cdc3054 | 617 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
618 | "$testname: is_utf8_valid_partial_char_flags()" |
619 | . " generated no warnings") | |
9cdc3054 | 620 | or output_warnings(@warnings_gotten); |
6aa905cf KW |
621 | } |
622 | } | |
623 | } | |
624 | ||
625 | # This is more complicated than the malformations tested earlier, as there | |
626 | # are several orthogonal variables involved. We test all the subclasses | |
627 | # of utf8 warnings to verify they work with and without the utf8 class, | |
628 | # and don't have effects on other sublass warnings | |
9cdc3054 | 629 | foreach my $trial_warning_category ('utf8', 'surrogate', 'nonchar', 'non_unicode') { |
6aa905cf KW |
630 | foreach my $warn_flag (0, $warn_flags) { |
631 | foreach my $disallow_flag (0, $disallow_flags) { | |
632 | foreach my $do_warning (0, 1) { | |
633 | ||
634 | # We try each of the above with various combinations of | |
635 | # malformations that can occur on the same input sequence. | |
636 | foreach my $short ("", "short") { | |
637 | foreach my $unexpected_noncont ("", | |
638 | "unexpected non-continuation") | |
639 | { | |
640 | foreach my $overlong ("", "overlong") { | |
641 | ||
b3169593 KW |
642 | # If we're creating an overlong, it can't be longer than |
643 | # the maximum length, so skip if we're already at that | |
644 | # length. | |
645 | next if $overlong && $length >= $::max_bytes; | |
6aa905cf KW |
646 | |
647 | my @malformations; | |
9cdc3054 | 648 | my @expected_return_flags; |
6aa905cf KW |
649 | push @malformations, $short if $short; |
650 | push @malformations, $unexpected_noncont | |
651 | if $unexpected_noncont; | |
652 | push @malformations, $overlong if $overlong; | |
653 | ||
654 | # The overflow malformation test in the input | |
655 | # array is coerced into being treated like one of | |
656 | # the others. | |
657 | if ($will_overflow) { | |
658 | push @malformations, 'overflow'; | |
9cdc3054 | 659 | push @expected_return_flags, $::UTF8_GOT_OVERFLOW; |
6aa905cf KW |
660 | } |
661 | ||
662 | my $malformations_name = join "/", @malformations; | |
663 | $malformations_name .= " malformation" | |
664 | if $malformations_name; | |
665 | $malformations_name .= "s" if @malformations > 1; | |
666 | my $this_bytes = $bytes; | |
667 | my $this_length = $length; | |
668 | my $expected_uv = $allowed_uv; | |
b3169593 | 669 | my $this_expected_len = $length; |
6aa905cf KW |
670 | my $this_needed_to_discern_len = $needed_to_discern_len; |
671 | if ($malformations_name) { | |
672 | $expected_uv = 0; | |
673 | ||
674 | # Coerce the input into the desired | |
675 | # malformation | |
676 | if ($malformations_name =~ /overlong/) { | |
677 | ||
678 | # For an overlong, we convert the original | |
679 | # start byte into a continuation byte with | |
680 | # the same data bits as originally. ... | |
681 | substr($this_bytes, 0, 1) | |
682 | = start_byte_to_cont(substr($this_bytes, | |
683 | 0, 1)); | |
684 | ||
685 | # ... Then we prepend it with a known | |
686 | # overlong sequence. This should evaluate | |
687 | # to the exact same code point as the | |
688 | # original. | |
689 | $this_bytes | |
690 | = I8_to_native("\xff") | |
dbb8d798 | 691 | . (I8_to_native(chr $::lowest_continuation) |
6aa905cf KW |
692 | x ( $::max_bytes - 1 - length($this_bytes))) |
693 | . $this_bytes; | |
694 | $this_length = length($this_bytes); | |
695 | $this_needed_to_discern_len | |
696 | = $::max_bytes - ($this_expected_len | |
697 | - $this_needed_to_discern_len); | |
698 | $this_expected_len = $::max_bytes; | |
9cdc3054 | 699 | push @expected_return_flags, $::UTF8_GOT_LONG; |
6aa905cf KW |
700 | } |
701 | if ($malformations_name =~ /short/) { | |
702 | ||
703 | # Just tell the test to not look far | |
704 | # enough into the input. | |
705 | $this_length--; | |
706 | $this_expected_len--; | |
9cdc3054 | 707 | push @expected_return_flags, $::UTF8_GOT_SHORT; |
6aa905cf KW |
708 | } |
709 | if ($malformations_name | |
710 | =~ /non-continuation/) | |
711 | { | |
712 | # Change the final continuation byte into | |
713 | # a non one. | |
714 | my $pos = ($short) ? -2 : -1; | |
715 | substr($this_bytes, $pos, 1) = '?'; | |
716 | $this_expected_len--; | |
9cdc3054 | 717 | push @expected_return_flags, |
6aa905cf KW |
718 | $::UTF8_GOT_NON_CONTINUATION; |
719 | } | |
720 | } | |
721 | ||
722 | my $eval_warn = $do_warning | |
9cdc3054 KW |
723 | ? "use warnings '$trial_warning_category'" |
724 | : $trial_warning_category eq "utf8" | |
6aa905cf KW |
725 | ? "no warnings 'utf8'" |
726 | : ( "use warnings 'utf8';" | |
9cdc3054 | 727 | . " no warnings '$trial_warning_category'"); |
6aa905cf KW |
728 | |
729 | # Is effectively disallowed if we've set up a | |
730 | # malformation, even if the flag indicates it is | |
731 | # allowed. Fix up test name to indicate this as | |
732 | # well | |
733 | my $disallowed = $disallow_flag | |
734 | || $malformations_name; | |
735 | my $this_name = "utf8n_to_uvchr_error() $testname: " | |
736 | . (($disallow_flag) | |
737 | ? 'disallowed' | |
738 | : $disallowed | |
739 | ? $disallowed | |
740 | : 'allowed'); | |
741 | $this_name .= ", $eval_warn"; | |
742 | $this_name .= ", " . (($warn_flag) | |
743 | ? 'with warning flag' | |
744 | : 'no warning flag'); | |
745 | ||
9cdc3054 | 746 | undef @warnings_gotten; |
6aa905cf KW |
747 | my $ret_ref; |
748 | my $display_bytes = display_bytes($this_bytes); | |
749 | my $call = " Call was: $eval_warn; \$ret_ref" | |
750 | . " = test_utf8n_to_uvchr_error(" | |
751 | . "'$display_bytes', $this_length," | |
752 | . "$warn_flag" | |
753 | . "|$disallow_flag)"; | |
754 | my $eval_text = "$eval_warn; \$ret_ref" | |
755 | . " = test_utf8n_to_uvchr_error(" | |
756 | . "'$this_bytes'," | |
757 | . " $this_length, $warn_flag" | |
758 | . "|$disallow_flag)"; | |
759 | eval "$eval_text"; | |
760 | if (! ok ("$@ eq ''", | |
761 | "$this_name: eval succeeded")) | |
762 | { | |
763 | diag "\$!='$!'; eval'd=\"$call\""; | |
764 | next; | |
765 | } | |
766 | if ($disallowed) { | |
767 | is($ret_ref->[0], 0, "$this_name: Returns 0") | |
768 | or diag $call; | |
769 | } | |
770 | else { | |
771 | is($ret_ref->[0], $expected_uv, | |
772 | "$this_name: Returns expected uv: " | |
773 | . sprintf("0x%04X", $expected_uv)) | |
774 | or diag $call; | |
775 | } | |
776 | is($ret_ref->[1], $this_expected_len, | |
777 | "$this_name: Returns expected length:" | |
778 | . " $this_expected_len") | |
779 | or diag $call; | |
780 | ||
9cdc3054 | 781 | my $returned_flags = $ret_ref->[2]; |
6aa905cf | 782 | |
9cdc3054 KW |
783 | for (my $i = @expected_return_flags - 1; $i >= 0; $i--) { |
784 | if (ok($expected_return_flags[$i] & $returned_flags, | |
6aa905cf KW |
785 | "Expected and got error bit return" |
786 | . " for $malformations[$i] malformation")) | |
787 | { | |
9cdc3054 | 788 | $returned_flags &= ~$expected_return_flags[$i]; |
6aa905cf | 789 | } |
9cdc3054 | 790 | splice @expected_return_flags, $i, 1; |
6aa905cf | 791 | } |
9cdc3054 | 792 | is(scalar @expected_return_flags, 0, |
6aa905cf | 793 | "Got all the expected malformation errors") |
9cdc3054 | 794 | or diag Dumper \@expected_return_flags; |
6aa905cf KW |
795 | |
796 | if ( $this_expected_len >= $this_needed_to_discern_len | |
797 | && ($warn_flag || $disallow_flag)) | |
798 | { | |
9cdc3054 | 799 | is($returned_flags, $expected_error_flags, |
6aa905cf KW |
800 | "Got the correct error flag") |
801 | or diag $call; | |
802 | } | |
803 | else { | |
5722c46d KW |
804 | is($returned_flags, 0, "Got no other error flag") |
805 | or | |
806 | ||
807 | # We strip off any prefixes from the flag names | |
808 | diag "The unexpected flags were: " | |
809 | . (flags_to_text($returned_flags, | |
810 | \@utf8n_flags_to_text) | |
811 | =~ s/ \b [A-Z] _ //xgr); | |
6aa905cf KW |
812 | } |
813 | ||
814 | if (@malformations) { | |
9cdc3054 | 815 | if (! $do_warning && $trial_warning_category eq 'utf8') { |
6aa905cf KW |
816 | goto no_warnings_expected; |
817 | } | |
818 | ||
819 | # Check that each malformation generates a | |
820 | # warning, removing that warning if found | |
821 | MALFORMATION: | |
822 | foreach my $malformation (@malformations) { | |
9cdc3054 KW |
823 | foreach (my $i = 0; $i < @warnings_gotten; $i++) { |
824 | if ($warnings_gotten[$i] =~ /$malformation/) { | |
6aa905cf KW |
825 | pass("Expected and got" |
826 | . "'$malformation' warning"); | |
9cdc3054 | 827 | splice @warnings_gotten, $i, 1; |
6aa905cf KW |
828 | next MALFORMATION; |
829 | } | |
830 | } | |
831 | fail("Expected '$malformation' warning" | |
832 | . " but didn't get it"); | |
833 | ||
834 | } | |
835 | } | |
836 | ||
837 | # Any overflow will override any super or above-31 | |
838 | # warnings. | |
839 | goto no_warnings_expected | |
840 | if $will_overflow || $this_expected_len | |
841 | < $this_needed_to_discern_len; | |
842 | ||
843 | if ( ! $do_warning | |
9cdc3054 KW |
844 | && ( $trial_warning_category eq 'utf8' |
845 | || $trial_warning_category eq $controlling_warning_category)) | |
6aa905cf KW |
846 | { |
847 | goto no_warnings_expected; | |
848 | } | |
849 | elsif ($warn_flag) { | |
9cdc3054 | 850 | if (is(scalar @warnings_gotten, 1, |
6aa905cf KW |
851 | "$this_name: Got a single warning ")) |
852 | { | |
9cdc3054 | 853 | like($warnings_gotten[0], $message, |
6aa905cf KW |
854 | "$this_name: Got expected warning") |
855 | or diag $call; | |
856 | } | |
857 | else { | |
858 | diag $call; | |
9cdc3054 KW |
859 | if (scalar @warnings_gotten) { |
860 | output_warnings(@warnings_gotten); | |
6aa905cf KW |
861 | } |
862 | } | |
863 | } | |
864 | else { | |
9cdc3054 | 865 | |
6aa905cf | 866 | no_warnings_expected: |
9cdc3054 | 867 | unless (is(scalar @warnings_gotten, 0, |
6aa905cf KW |
868 | "$this_name: Got no warnings")) |
869 | { | |
870 | diag $call; | |
9cdc3054 | 871 | output_warnings(@warnings_gotten); |
6aa905cf KW |
872 | } |
873 | } | |
874 | ||
875 | # Check CHECK_ONLY results when the input is | |
876 | # disallowed. Do this when actually disallowed, | |
877 | # not just when the $disallow_flag is set | |
878 | if ($disallowed) { | |
9cdc3054 | 879 | undef @warnings_gotten; |
6aa905cf KW |
880 | $ret_ref = test_utf8n_to_uvchr_error( |
881 | $this_bytes, $this_length, | |
882 | $disallow_flag|$::UTF8_CHECK_ONLY); | |
883 | is($ret_ref->[0], 0, | |
884 | "$this_name, CHECK_ONLY: Returns 0") | |
885 | or diag $call; | |
886 | is($ret_ref->[1], -1, | |
887 | "$this_name: CHECK_ONLY: returns -1 for length") | |
888 | or diag $call; | |
9cdc3054 | 889 | if (! is(scalar @warnings_gotten, 0, |
6aa905cf KW |
890 | "$this_name, CHECK_ONLY: no warnings" |
891 | . " generated")) | |
892 | { | |
893 | diag $call; | |
9cdc3054 | 894 | output_warnings(@warnings_gotten); |
6aa905cf KW |
895 | } |
896 | } | |
897 | ||
898 | # Now repeat some of the above, but for | |
899 | # uvchr_to_utf8_flags(). Since this comes from an | |
900 | # existing code point, it hasn't overflowed, and | |
901 | # isn't malformed. | |
902 | next if @malformations; | |
903 | ||
904 | # The warning and disallow flags passed in are for | |
905 | # utf8n_to_uvchr_error(). Convert them for | |
906 | # uvchr_to_utf8_flags(). | |
907 | my $uvchr_warn_flag = 0; | |
908 | my $uvchr_disallow_flag = 0; | |
909 | if ($warn_flag) { | |
910 | if ($warn_flag == $::UTF8_WARN_SURROGATE) { | |
911 | $uvchr_warn_flag = $::UNICODE_WARN_SURROGATE | |
912 | } | |
913 | elsif ($warn_flag == $::UTF8_WARN_NONCHAR) { | |
914 | $uvchr_warn_flag = $::UNICODE_WARN_NONCHAR | |
915 | } | |
916 | elsif ($warn_flag == $::UTF8_WARN_SUPER) { | |
917 | $uvchr_warn_flag = $::UNICODE_WARN_SUPER | |
918 | } | |
919 | elsif ($warn_flag == $::UTF8_WARN_ABOVE_31_BIT) { | |
920 | $uvchr_warn_flag | |
921 | = $::UNICODE_WARN_ABOVE_31_BIT; | |
922 | } | |
923 | else { | |
924 | fail(sprintf "Unexpected warn flag: %x", | |
925 | $warn_flag); | |
926 | next; | |
927 | } | |
928 | } | |
929 | if ($disallow_flag) { | |
930 | if ($disallow_flag == $::UTF8_DISALLOW_SURROGATE) | |
931 | { | |
932 | $uvchr_disallow_flag | |
933 | = $::UNICODE_DISALLOW_SURROGATE; | |
934 | } | |
935 | elsif ($disallow_flag == $::UTF8_DISALLOW_NONCHAR) | |
936 | { | |
937 | $uvchr_disallow_flag | |
938 | = $::UNICODE_DISALLOW_NONCHAR; | |
939 | } | |
940 | elsif ($disallow_flag == $::UTF8_DISALLOW_SUPER) { | |
941 | $uvchr_disallow_flag | |
942 | = $::UNICODE_DISALLOW_SUPER; | |
943 | } | |
944 | elsif ($disallow_flag | |
945 | == $::UTF8_DISALLOW_ABOVE_31_BIT) | |
946 | { | |
947 | $uvchr_disallow_flag = | |
948 | $::UNICODE_DISALLOW_ABOVE_31_BIT; | |
949 | } | |
950 | else { | |
951 | fail(sprintf "Unexpected disallow flag: %x", | |
952 | $disallow_flag); | |
953 | next; | |
954 | } | |
955 | } | |
956 | ||
957 | $disallowed = $uvchr_disallow_flag; | |
958 | ||
959 | $this_name = "uvchr_to_utf8_flags() $testname: " | |
960 | . (($uvchr_disallow_flag) | |
961 | ? 'disallowed' | |
962 | : ($disallowed) | |
963 | ? 'ABOVE_31_BIT allowed' | |
964 | : 'allowed'); | |
965 | $this_name .= ", $eval_warn"; | |
966 | $this_name .= ", " . (($uvchr_warn_flag) | |
967 | ? 'with warning flag' | |
968 | : 'no warning flag'); | |
969 | ||
9cdc3054 | 970 | undef @warnings_gotten; |
6aa905cf | 971 | my $ret; |
d884ea32 | 972 | my $this_flags = $uvchr_warn_flag | $uvchr_disallow_flag; |
6aa905cf KW |
973 | $eval_text = "$eval_warn; \$ret =" |
974 | . " test_uvchr_to_utf8_flags(" | |
d884ea32 | 975 | . "$allowed_uv, $this_flags)"; |
6aa905cf KW |
976 | eval "$eval_text"; |
977 | if (! ok ("$@ eq ''", "$this_name: eval succeeded")) | |
978 | { | |
d884ea32 KW |
979 | diag "\$@='$@'; call was: " |
980 | . uvchr_display_call($eval_text); | |
6aa905cf KW |
981 | next; |
982 | } | |
983 | if ($disallowed) { | |
984 | is($ret, undef, "$this_name: Returns undef") | |
d884ea32 | 985 | or diag "Call was: " . uvchr_display_call($eval_text); |
6aa905cf KW |
986 | } |
987 | else { | |
d884ea32 KW |
988 | is($ret, $this_bytes, "$this_name: Returns expected string") |
989 | or diag "Call was: " . uvchr_display_call($eval_text); | |
6aa905cf KW |
990 | } |
991 | if (! $do_warning | |
9cdc3054 | 992 | && ($trial_warning_category eq 'utf8' || $trial_warning_category eq $controlling_warning_category)) |
6aa905cf | 993 | { |
9cdc3054 | 994 | if (!is(scalar @warnings_gotten, 0, |
6aa905cf KW |
995 | "$this_name: No warnings generated")) |
996 | { | |
d884ea32 | 997 | diag "Call was: " . uvchr_display_call($eval_text); |
9cdc3054 | 998 | output_warnings(@warnings_gotten); |
6aa905cf KW |
999 | } |
1000 | } | |
1001 | elsif ( $uvchr_warn_flag | |
9cdc3054 KW |
1002 | && ( $trial_warning_category eq 'utf8' |
1003 | || $trial_warning_category eq $controlling_warning_category)) | |
6aa905cf | 1004 | { |
9cdc3054 | 1005 | if (is(scalar @warnings_gotten, 1, |
6aa905cf KW |
1006 | "$this_name: Got a single warning ")) |
1007 | { | |
9cdc3054 | 1008 | like($warnings_gotten[0], $message, |
6aa905cf | 1009 | "$this_name: Got expected warning") |
d884ea32 KW |
1010 | or diag "Call was: " |
1011 | . uvchr_display_call($eval_text); | |
6aa905cf KW |
1012 | } |
1013 | else { | |
d884ea32 | 1014 | diag "Call was: " . uvchr_display_call($eval_text); |
9cdc3054 KW |
1015 | output_warnings(@warnings_gotten) |
1016 | if scalar @warnings_gotten; | |
6aa905cf KW |
1017 | } |
1018 | } | |
1019 | } | |
1020 | } | |
1021 | } | |
1022 | } | |
1023 | } | |
1024 | } | |
1025 | } | |
1026 | } | |
1027 | ||
1028 | done_testing; |