Commit | Line | Data |
---|---|---|
6aa905cf KW |
1 | #!perl -w |
2 | ||
3 | # This is a base file to be used by various .t's in its directory | |
4 | ||
5 | use strict; | |
6 | use Test::More; | |
7 | ||
8 | BEGIN { | |
9 | use_ok('XS::APItest'); | |
10 | require 'charset_tools.pl'; | |
11 | require './t/utf8_setup.pl'; | |
12 | }; | |
13 | ||
14 | $|=1; | |
15 | ||
16 | no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit | |
17 | # machines, and that is tested elsewhere | |
18 | ||
19 | use XS::APItest; | |
20 | use Data::Dumper; | |
21 | ||
22 | my @warnings; | |
23 | ||
24 | use warnings 'utf8'; | |
25 | local $SIG{__WARN__} = sub { push @warnings, @_ }; | |
26 | ||
27 | sub nonportable_regex ($) { | |
28 | ||
29 | # Returns a pattern that matches the non-portable message raised either | |
30 | # for the specific input code point, or the one generated when there | |
31 | # is some malformation that precludes the message containing the specific | |
32 | # code point | |
33 | ||
34 | my $code_point = shift; | |
35 | ||
36 | my $string = sprintf '(Code point 0x%X is not Unicode, and' | |
37 | . '|Any UTF-8 sequence that starts with' | |
38 | . ' "(\\\x[[:xdigit:]]{2})+" is for a' | |
39 | . ' non-Unicode code point, and is) not portable', | |
40 | $code_point; | |
41 | return qr/$string/; | |
42 | } | |
43 | ||
44 | # Now test the cases where a legal code point is generated, but may or may not | |
45 | # be allowed/warned on. | |
46 | my @tests = ( | |
47 | # ($testname, $bytes, $warn_flags, $disallow_flags, $expected_error_flags, | |
48 | # $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message ) | |
49 | [ "lowest surrogate", | |
50 | (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), | |
51 | $::UTF8_WARN_SURROGATE, $::UTF8_DISALLOW_SURROGATE, $::UTF8_GOT_SURROGATE, | |
52 | 'surrogate', 0xD800, | |
53 | (isASCII) ? 3 : 4, | |
54 | 2, | |
55 | qr/surrogate/ | |
56 | ], | |
57 | [ "a middle surrogate", | |
58 | (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"), | |
59 | $::UTF8_WARN_SURROGATE, $::UTF8_DISALLOW_SURROGATE, $::UTF8_GOT_SURROGATE, | |
60 | 'surrogate', 0xD90D, | |
61 | (isASCII) ? 3 : 4, | |
62 | 2, | |
63 | qr/surrogate/ | |
64 | ], | |
65 | [ "highest surrogate", | |
66 | (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), | |
67 | $::UTF8_WARN_SURROGATE, $::UTF8_DISALLOW_SURROGATE, $::UTF8_GOT_SURROGATE, | |
68 | 'surrogate', 0xDFFF, | |
69 | (isASCII) ? 3 : 4, | |
70 | 2, | |
71 | qr/surrogate/ | |
72 | ], | |
73 | [ "first non_unicode", | |
74 | (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), | |
75 | $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, | |
76 | 'non_unicode', 0x110000, | |
77 | (isASCII) ? 4 : 5, | |
78 | 2, | |
79 | qr/(not Unicode|for a non-Unicode code point).* may not be portable/ | |
80 | ], | |
81 | [ "non_unicode whose first byte tells that", | |
82 | (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), | |
83 | $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, | |
84 | 'non_unicode', | |
85 | (isASCII) ? 0x140000 : 0x200000, | |
86 | (isASCII) ? 4 : 5, | |
87 | 1, | |
88 | qr/(not Unicode|for a non-Unicode code point).* may not be portable/ | |
89 | ], | |
90 | [ "first of 32 consecutive non-character code points", | |
91 | (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), | |
92 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
93 | 'nonchar', 0xFDD0, | |
94 | (isASCII) ? 3 : 4, | |
95 | (isASCII) ? 3 : 4, | |
96 | qr/Unicode non-character.*is not recommended for open interchange/ | |
97 | ], | |
98 | [ "a mid non-character code point of the 32 consecutive ones", | |
99 | (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), | |
100 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
101 | 'nonchar', 0xFDE0, | |
102 | (isASCII) ? 3 : 4, | |
103 | (isASCII) ? 3 : 4, | |
104 | qr/Unicode non-character.*is not recommended for open interchange/ | |
105 | ], | |
106 | [ "final of 32 consecutive non-character code points", | |
107 | (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), | |
108 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
109 | 'nonchar', 0xFDEF, | |
110 | (isASCII) ? 3 : 4, | |
111 | (isASCII) ? 3 : 4, | |
112 | qr/Unicode non-character.*is not recommended for open interchange/ | |
113 | ], | |
114 | [ "non-character code point U+FFFE", | |
115 | (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"), | |
116 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
117 | 'nonchar', 0xFFFE, | |
118 | (isASCII) ? 3 : 4, | |
119 | (isASCII) ? 3 : 4, | |
120 | qr/Unicode non-character.*is not recommended for open interchange/ | |
121 | ], | |
122 | [ "non-character code point U+FFFF", | |
123 | (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), | |
124 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
125 | 'nonchar', 0xFFFF, | |
126 | (isASCII) ? 3 : 4, | |
127 | (isASCII) ? 3 : 4, | |
128 | qr/Unicode non-character.*is not recommended for open interchange/ | |
129 | ], | |
130 | [ "non-character code point U+1FFFE", | |
131 | (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), | |
132 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
133 | 'nonchar', 0x1FFFE, | |
134 | 4, 4, | |
135 | qr/Unicode non-character.*is not recommended for open interchange/ | |
136 | ], | |
137 | [ "non-character code point U+1FFFF", | |
138 | (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), | |
139 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
140 | 'nonchar', 0x1FFFF, | |
141 | 4, 4, | |
142 | qr/Unicode non-character.*is not recommended for open interchange/ | |
143 | ], | |
144 | [ "non-character code point U+2FFFE", | |
145 | (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), | |
146 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
147 | 'nonchar', 0x2FFFE, | |
148 | 4, 4, | |
149 | qr/Unicode non-character.*is not recommended for open interchange/ | |
150 | ], | |
151 | [ "non-character code point U+2FFFF", | |
152 | (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), | |
153 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
154 | 'nonchar', 0x2FFFF, | |
155 | 4, 4, | |
156 | qr/Unicode non-character.*is not recommended for open interchange/ | |
157 | ], | |
158 | [ "non-character code point U+3FFFE", | |
159 | (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), | |
160 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
161 | 'nonchar', 0x3FFFE, | |
162 | 4, 4, | |
163 | qr/Unicode non-character.*is not recommended for open interchange/ | |
164 | ], | |
165 | [ "non-character code point U+3FFFF", | |
166 | (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), | |
167 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
168 | 'nonchar', 0x3FFFF, | |
169 | 4, 4, | |
170 | qr/Unicode non-character.*is not recommended for open interchange/ | |
171 | ], | |
172 | [ "non-character code point U+4FFFE", | |
173 | (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), | |
174 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
175 | 'nonchar', 0x4FFFE, | |
176 | (isASCII) ? 4 : 5, | |
177 | (isASCII) ? 4 : 5, | |
178 | qr/Unicode non-character.*is not recommended for open interchange/ | |
179 | ], | |
180 | [ "non-character code point U+4FFFF", | |
181 | (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), | |
182 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
183 | 'nonchar', 0x4FFFF, | |
184 | (isASCII) ? 4 : 5, | |
185 | (isASCII) ? 4 : 5, | |
186 | qr/Unicode non-character.*is not recommended for open interchange/ | |
187 | ], | |
188 | [ "non-character code point U+5FFFE", | |
189 | (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), | |
190 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
191 | 'nonchar', 0x5FFFE, | |
192 | (isASCII) ? 4 : 5, | |
193 | (isASCII) ? 4 : 5, | |
194 | qr/Unicode non-character.*is not recommended for open interchange/ | |
195 | ], | |
196 | [ "non-character code point U+5FFFF", | |
197 | (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), | |
198 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
199 | 'nonchar', 0x5FFFF, | |
200 | (isASCII) ? 4 : 5, | |
201 | (isASCII) ? 4 : 5, | |
202 | qr/Unicode non-character.*is not recommended for open interchange/ | |
203 | ], | |
204 | [ "non-character code point U+6FFFE", | |
205 | (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), | |
206 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
207 | 'nonchar', 0x6FFFE, | |
208 | (isASCII) ? 4 : 5, | |
209 | (isASCII) ? 4 : 5, | |
210 | qr/Unicode non-character.*is not recommended for open interchange/ | |
211 | ], | |
212 | [ "non-character code point U+6FFFF", | |
213 | (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), | |
214 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
215 | 'nonchar', 0x6FFFF, | |
216 | (isASCII) ? 4 : 5, | |
217 | (isASCII) ? 4 : 5, | |
218 | qr/Unicode non-character.*is not recommended for open interchange/ | |
219 | ], | |
220 | [ "non-character code point U+7FFFE", | |
221 | (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), | |
222 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
223 | 'nonchar', 0x7FFFE, | |
224 | (isASCII) ? 4 : 5, | |
225 | (isASCII) ? 4 : 5, | |
226 | qr/Unicode non-character.*is not recommended for open interchange/ | |
227 | ], | |
228 | [ "non-character code point U+7FFFF", | |
229 | (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), | |
230 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
231 | 'nonchar', 0x7FFFF, | |
232 | (isASCII) ? 4 : 5, | |
233 | (isASCII) ? 4 : 5, | |
234 | qr/Unicode non-character.*is not recommended for open interchange/ | |
235 | ], | |
236 | [ "non-character code point U+8FFFE", | |
237 | (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), | |
238 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
239 | 'nonchar', 0x8FFFE, | |
240 | (isASCII) ? 4 : 5, | |
241 | (isASCII) ? 4 : 5, | |
242 | qr/Unicode non-character.*is not recommended for open interchange/ | |
243 | ], | |
244 | [ "non-character code point U+8FFFF", | |
245 | (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), | |
246 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
247 | 'nonchar', 0x8FFFF, | |
248 | (isASCII) ? 4 : 5, | |
249 | (isASCII) ? 4 : 5, | |
250 | qr/Unicode non-character.*is not recommended for open interchange/ | |
251 | ], | |
252 | [ "non-character code point U+9FFFE", | |
253 | (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), | |
254 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
255 | 'nonchar', 0x9FFFE, | |
256 | (isASCII) ? 4 : 5, | |
257 | (isASCII) ? 4 : 5, | |
258 | qr/Unicode non-character.*is not recommended for open interchange/ | |
259 | ], | |
260 | [ "non-character code point U+9FFFF", | |
261 | (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), | |
262 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
263 | 'nonchar', 0x9FFFF, | |
264 | (isASCII) ? 4 : 5, | |
265 | (isASCII) ? 4 : 5, | |
266 | qr/Unicode non-character.*is not recommended for open interchange/ | |
267 | ], | |
268 | [ "non-character code point U+AFFFE", | |
269 | (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), | |
270 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
271 | 'nonchar', 0xAFFFE, | |
272 | (isASCII) ? 4 : 5, | |
273 | (isASCII) ? 4 : 5, | |
274 | qr/Unicode non-character.*is not recommended for open interchange/ | |
275 | ], | |
276 | [ "non-character code point U+AFFFF", | |
277 | (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), | |
278 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
279 | 'nonchar', 0xAFFFF, | |
280 | (isASCII) ? 4 : 5, | |
281 | (isASCII) ? 4 : 5, | |
282 | qr/Unicode non-character.*is not recommended for open interchange/ | |
283 | ], | |
284 | [ "non-character code point U+BFFFE", | |
285 | (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), | |
286 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
287 | 'nonchar', 0xBFFFE, | |
288 | (isASCII) ? 4 : 5, | |
289 | (isASCII) ? 4 : 5, | |
290 | qr/Unicode non-character.*is not recommended for open interchange/ | |
291 | ], | |
292 | [ "non-character code point U+BFFFF", | |
293 | (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), | |
294 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
295 | 'nonchar', 0xBFFFF, | |
296 | (isASCII) ? 4 : 5, | |
297 | (isASCII) ? 4 : 5, | |
298 | qr/Unicode non-character.*is not recommended for open interchange/ | |
299 | ], | |
300 | [ "non-character code point U+CFFFE", | |
301 | (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), | |
302 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
303 | 'nonchar', 0xCFFFE, | |
304 | (isASCII) ? 4 : 5, | |
305 | (isASCII) ? 4 : 5, | |
306 | qr/Unicode non-character.*is not recommended for open interchange/ | |
307 | ], | |
308 | [ "non-character code point U+CFFFF", | |
309 | (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), | |
310 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
311 | 'nonchar', 0xCFFFF, | |
312 | (isASCII) ? 4 : 5, | |
313 | (isASCII) ? 4 : 5, | |
314 | qr/Unicode non-character.*is not recommended for open interchange/ | |
315 | ], | |
316 | [ "non-character code point U+DFFFE", | |
317 | (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), | |
318 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
319 | 'nonchar', 0xDFFFE, | |
320 | (isASCII) ? 4 : 5, | |
321 | (isASCII) ? 4 : 5, | |
322 | qr/Unicode non-character.*is not recommended for open interchange/ | |
323 | ], | |
324 | [ "non-character code point U+DFFFF", | |
325 | (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), | |
326 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
327 | 'nonchar', 0xDFFFF, | |
328 | (isASCII) ? 4 : 5, | |
329 | (isASCII) ? 4 : 5, | |
330 | qr/Unicode non-character.*is not recommended for open interchange/ | |
331 | ], | |
332 | [ "non-character code point U+EFFFE", | |
333 | (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), | |
334 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
335 | 'nonchar', 0xEFFFE, | |
336 | (isASCII) ? 4 : 5, | |
337 | (isASCII) ? 4 : 5, | |
338 | qr/Unicode non-character.*is not recommended for open interchange/ | |
339 | ], | |
340 | [ "non-character code point U+EFFFF", | |
341 | (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), | |
342 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
343 | 'nonchar', 0xEFFFF, | |
344 | (isASCII) ? 4 : 5, | |
345 | (isASCII) ? 4 : 5, | |
346 | qr/Unicode non-character.*is not recommended for open interchange/ | |
347 | ], | |
348 | [ "non-character code point U+FFFFE", | |
349 | (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), | |
350 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
351 | 'nonchar', 0xFFFFE, | |
352 | (isASCII) ? 4 : 5, | |
353 | (isASCII) ? 4 : 5, | |
354 | qr/Unicode non-character.*is not recommended for open interchange/ | |
355 | ], | |
356 | [ "non-character code point U+FFFFF", | |
357 | (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), | |
358 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
359 | 'nonchar', 0xFFFFF, | |
360 | (isASCII) ? 4 : 5, | |
361 | (isASCII) ? 4 : 5, | |
362 | qr/Unicode non-character.*is not recommended for open interchange/ | |
363 | ], | |
364 | [ "non-character code point U+10FFFE", | |
365 | (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), | |
366 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
367 | 'nonchar', 0x10FFFE, | |
368 | (isASCII) ? 4 : 5, | |
369 | (isASCII) ? 4 : 5, | |
370 | qr/Unicode non-character.*is not recommended for open interchange/ | |
371 | ], | |
372 | [ "non-character code point U+10FFFF", | |
373 | (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), | |
374 | $::UTF8_WARN_NONCHAR, $::UTF8_DISALLOW_NONCHAR, $::UTF8_GOT_NONCHAR, | |
375 | 'nonchar', 0x10FFFF, | |
376 | (isASCII) ? 4 : 5, | |
377 | (isASCII) ? 4 : 5, | |
378 | qr/Unicode non-character.*is not recommended for open interchange/ | |
379 | ], | |
380 | [ "requires at least 32 bits", | |
381 | (isASCII) | |
382 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
383 | : I8_to_native( | |
384 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
385 | # This code point is chosen so that it is representable in a UV on | |
386 | # 32-bit machines | |
387 | $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, | |
388 | $::UTF8_GOT_ABOVE_31_BIT, | |
389 | 'utf8', 0x80000000, | |
390 | (isASCII) ? 7 : $::max_bytes, | |
391 | (isASCII) ? 1 : 8, | |
392 | nonportable_regex(0x80000000) | |
393 | ], | |
394 | [ "highest 32 bit code point", | |
395 | (isASCII) | |
396 | ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" | |
397 | : I8_to_native( | |
398 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), | |
399 | $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, | |
400 | $::UTF8_GOT_ABOVE_31_BIT, | |
401 | 'utf8', 0xFFFFFFFF, | |
402 | (isASCII) ? 7 : $::max_bytes, | |
403 | (isASCII) ? 1 : 8, | |
404 | nonportable_regex(0xffffffff) | |
405 | ], | |
406 | [ "requires at least 32 bits, and use SUPER-type flags, instead of" | |
407 | . " ABOVE_31_BIT", | |
408 | (isASCII) | |
409 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
410 | : I8_to_native( | |
411 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
412 | $::UTF8_WARN_SUPER, $::UTF8_DISALLOW_SUPER, $::UTF8_GOT_SUPER, | |
413 | 'utf8', 0x80000000, | |
414 | (isASCII) ? 7 : $::max_bytes, | |
415 | 1, | |
416 | nonportable_regex(0x80000000) | |
417 | ], | |
418 | [ "overflow with warnings/disallow for more than 31 bits", | |
419 | # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT | |
420 | # with overflow. The overflow malformation is never allowed, so | |
421 | # preventing it takes precedence if the ABOVE_31_BIT options would | |
422 | # otherwise allow in an overflowing value. The ASCII code points (1 | |
423 | # for 32-bits; 1 for 64) were chosen because the old overflow | |
424 | # detection algorithm did not catch them; this means this test also | |
425 | # checks for that fix. The EBCDIC are arbitrary overflowing ones | |
426 | # since we have no reports of failures with it. | |
427 | (($::is64bit) | |
428 | ? ((isASCII) | |
429 | ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" | |
430 | : I8_to_native( | |
431 | "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) | |
432 | : ((isASCII) | |
433 | ? "\xfe\x86\x80\x80\x80\x80\x80" | |
434 | : I8_to_native( | |
435 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), | |
436 | $::UTF8_WARN_ABOVE_31_BIT, | |
437 | $::UTF8_DISALLOW_ABOVE_31_BIT, | |
438 | $::UTF8_GOT_ABOVE_31_BIT, | |
439 | 'utf8', 0, | |
440 | (! isASCII || $::is64bit) ? $::max_bytes : 7, | |
441 | (isASCII || $::is64bit) ? 2 : 8, | |
442 | qr/overflows/ | |
443 | ], | |
444 | ); | |
445 | ||
446 | if (! $::is64bit) { | |
447 | if (isASCII) { | |
448 | no warnings qw{portable overflow}; | |
449 | push @tests, | |
450 | [ "Lowest 33 bit code point: overflow", | |
451 | "\xFE\x84\x80\x80\x80\x80\x80", | |
452 | $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, | |
453 | $::UTF8_GOT_ABOVE_31_BIT, | |
454 | 'utf8', 0x100000000, | |
455 | 7, 1, | |
456 | qr/and( is)? not portable/ | |
457 | ]; | |
458 | } | |
459 | } | |
460 | else { | |
461 | no warnings qw{portable overflow}; | |
462 | push @tests, | |
463 | [ "More than 32 bits", | |
464 | (isASCII) | |
465 | ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" | |
466 | : I8_to_native( | |
467 | "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
468 | $::UTF8_WARN_ABOVE_31_BIT, $::UTF8_DISALLOW_ABOVE_31_BIT, | |
469 | $::UTF8_GOT_ABOVE_31_BIT, | |
470 | 'utf8', 0x1000000000, | |
471 | $::max_bytes, (isASCII) ? 1 : 7, | |
472 | qr/and( is)? not portable/ | |
473 | ]; | |
474 | if (! isASCII) { | |
475 | push @tests, # These could falsely show wrongly in a naive | |
476 | # implementation | |
477 | [ "requires at least 32 bits", | |
478 | I8_to_native( | |
479 | "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
480 | $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, | |
481 | $::UTF8_GOT_ABOVE_31_BIT, | |
482 | 'utf8', 0x800000000, | |
483 | $::max_bytes, 7, | |
484 | nonportable_regex(0x80000000) | |
485 | ], | |
486 | [ "requires at least 32 bits", | |
487 | I8_to_native( | |
488 | "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
489 | $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, | |
490 | $::UTF8_GOT_ABOVE_31_BIT, | |
491 | 'utf8', 0x10000000000, | |
492 | $::max_bytes, 6, | |
493 | nonportable_regex(0x10000000000) | |
494 | ], | |
495 | [ "requires at least 32 bits", | |
496 | I8_to_native( | |
497 | "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
498 | $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, | |
499 | $::UTF8_GOT_ABOVE_31_BIT, | |
500 | 'utf8', 0x200000000000, | |
501 | $::max_bytes, 5, | |
502 | nonportable_regex(0x20000000000) | |
503 | ], | |
504 | [ "requires at least 32 bits", | |
505 | I8_to_native( | |
506 | "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
507 | $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, | |
508 | $::UTF8_GOT_ABOVE_31_BIT, | |
509 | 'utf8', 0x4000000000000, | |
510 | $::max_bytes, 4, | |
511 | nonportable_regex(0x4000000000000) | |
512 | ], | |
513 | [ "requires at least 32 bits", | |
514 | I8_to_native( | |
515 | "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
516 | $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, | |
517 | $::UTF8_GOT_ABOVE_31_BIT, | |
518 | 'utf8', 0x80000000000000, | |
519 | $::max_bytes, 3, | |
520 | nonportable_regex(0x80000000000000) | |
521 | ], | |
522 | [ "requires at least 32 bits", | |
523 | I8_to_native( | |
524 | "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
525 | $::UTF8_WARN_ABOVE_31_BIT,$::UTF8_DISALLOW_ABOVE_31_BIT, | |
526 | $::UTF8_GOT_ABOVE_31_BIT, | |
527 | 'utf8', 0x1000000000000000, | |
528 | $::max_bytes, 2, | |
529 | nonportable_regex(0x1000000000000000) | |
530 | ]; | |
531 | } | |
532 | } | |
533 | ||
534 | # This test is split into this number of files. | |
535 | my $num_test_files = $ENV{TEST_JOBS} || 1; | |
536 | $num_test_files = 10 if $num_test_files > 10; | |
537 | ||
538 | my $test_count = -1; | |
539 | foreach my $test (@tests) { | |
540 | $test_count++; | |
541 | next if $test_count % $num_test_files != $::TEST_CHUNK; | |
542 | ||
543 | my ($testname, $bytes, $warn_flags, $disallow_flags, $expected_error_flags, | |
544 | $category, $allowed_uv, $expected_len, $needed_to_discern_len, $message | |
545 | ) = @$test; | |
546 | ||
547 | my $length = length $bytes; | |
548 | my $will_overflow = $testname =~ /overflow/ ? 'overflow' : ""; | |
549 | ||
550 | { | |
551 | use warnings; | |
552 | undef @warnings; | |
553 | my $ret = test_isUTF8_CHAR($bytes, $length); | |
554 | my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0); | |
555 | if ($will_overflow) { | |
556 | is($ret, 0, "isUTF8_CHAR() $testname: returns 0"); | |
557 | is($ret_flags, 0, "isUTF8_CHAR_flags() $testname: returns 0"); | |
558 | } | |
559 | else { | |
560 | is($ret, $length, | |
561 | "isUTF8_CHAR() $testname: returns expected length: $length"); | |
562 | is($ret_flags, $length, "isUTF8_CHAR_flags(...,0) $testname:" | |
563 | . " returns expected length: $length"); | |
564 | } | |
565 | is(scalar @warnings, 0, | |
566 | "isUTF8_CHAR() and isUTF8_CHAR()_flags $testname: generated" | |
567 | . " no warnings") | |
568 | or output_warnings(@warnings); | |
569 | ||
570 | undef @warnings; | |
571 | $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); | |
572 | if ($will_overflow) { | |
573 | is($ret, 0, "isSTRICT_UTF8_CHAR() $testname: returns 0"); | |
574 | } | |
575 | else { | |
576 | my $expected_ret = ( $testname =~ /surrogate|non-character/ | |
577 | || $allowed_uv > 0x10FFFF) | |
578 | ? 0 | |
579 | : $length; | |
580 | is($ret, $expected_ret, "isSTRICT_UTF8_CHAR() $testname: returns" | |
581 | . " expected length: $expected_ret"); | |
582 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
583 | $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE); | |
584 | is($ret, $expected_ret, | |
585 | "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" | |
586 | . " acts like isSTRICT_UTF8_CHAR"); | |
587 | } | |
588 | is(scalar @warnings, 0, | |
589 | "isSTRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" | |
590 | . " generated no warnings") | |
591 | or output_warnings(@warnings); | |
592 | ||
593 | undef @warnings; | |
594 | $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); | |
595 | if ($will_overflow) { | |
596 | is($ret, 0, "isC9_STRICT_UTF8_CHAR() $testname: returns 0"); | |
597 | } | |
598 | else { | |
599 | my $expected_ret = ( $testname =~ /surrogate/ | |
600 | || $allowed_uv > 0x10FFFF) | |
601 | ? 0 | |
602 | : $length; | |
603 | is($ret, $expected_ret, "isC9_STRICT_UTF8_CHAR() $testname:" | |
604 | ." returns expected length: $expected_ret"); | |
605 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
606 | $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); | |
607 | is($ret, $expected_ret, | |
608 | "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" | |
609 | . " acts like isC9_STRICT_UTF8_CHAR"); | |
610 | } | |
611 | is(scalar @warnings, 0, | |
612 | "isC9_STRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" | |
613 | . " generated no warnings") | |
614 | or output_warnings(@warnings); | |
615 | ||
616 | # Test partial character handling, for each byte not a full character | |
617 | for my $j (1.. $length - 1) { | |
618 | ||
619 | # Skip the test for the interaction between overflow and above-31 | |
620 | # bit. It is really testing other things than the partial | |
621 | # character tests, for which other tests in this file are | |
622 | # sufficient | |
623 | last if $testname =~ /overflow/; | |
624 | ||
625 | foreach my $disallow_flag (0, $disallow_flags) { | |
626 | my $partial = substr($bytes, 0, $j); | |
627 | my $ret_should_be; | |
628 | my $comment; | |
629 | if ($disallow_flag) { | |
630 | $ret_should_be = 0; | |
631 | $comment = "disallowed"; | |
632 | if ($j < $needed_to_discern_len) { | |
633 | $ret_should_be = 1; | |
634 | $comment .= ", but need $needed_to_discern_len bytes" | |
635 | . " to discern:"; | |
636 | } | |
637 | } | |
638 | else { | |
639 | $ret_should_be = 1; | |
640 | $comment = "allowed"; | |
641 | } | |
642 | ||
643 | undef @warnings; | |
644 | ||
645 | $ret = test_is_utf8_valid_partial_char_flags($partial, $j, | |
646 | $disallow_flag); | |
647 | is($ret, $ret_should_be, | |
648 | "$testname: is_utf8_valid_partial_char_flags(" | |
649 | . display_bytes($partial) | |
650 | . "), $comment: returns $ret_should_be"); | |
651 | is(scalar @warnings, 0, | |
652 | "$testname: is_utf8_valid_partial_char_flags()" | |
653 | . " generated no warnings") | |
654 | or output_warnings(@warnings); | |
655 | } | |
656 | } | |
657 | } | |
658 | ||
659 | # This is more complicated than the malformations tested earlier, as there | |
660 | # are several orthogonal variables involved. We test all the subclasses | |
661 | # of utf8 warnings to verify they work with and without the utf8 class, | |
662 | # and don't have effects on other sublass warnings | |
663 | foreach my $warning ('utf8', 'surrogate', 'nonchar', 'non_unicode') { | |
664 | foreach my $warn_flag (0, $warn_flags) { | |
665 | foreach my $disallow_flag (0, $disallow_flags) { | |
666 | foreach my $do_warning (0, 1) { | |
667 | ||
668 | # We try each of the above with various combinations of | |
669 | # malformations that can occur on the same input sequence. | |
670 | foreach my $short ("", "short") { | |
671 | foreach my $unexpected_noncont ("", | |
672 | "unexpected non-continuation") | |
673 | { | |
674 | foreach my $overlong ("", "overlong") { | |
675 | ||
676 | # If we're already at the longest possible, we | |
677 | # can't create an overlong (which would be longer) | |
678 | # can't handle anything larger. | |
679 | next if $overlong && $expected_len >= $::max_bytes; | |
680 | ||
681 | my @malformations; | |
682 | my @expected_errors; | |
683 | push @malformations, $short if $short; | |
684 | push @malformations, $unexpected_noncont | |
685 | if $unexpected_noncont; | |
686 | push @malformations, $overlong if $overlong; | |
687 | ||
688 | # The overflow malformation test in the input | |
689 | # array is coerced into being treated like one of | |
690 | # the others. | |
691 | if ($will_overflow) { | |
692 | push @malformations, 'overflow'; | |
693 | push @expected_errors, $::UTF8_GOT_OVERFLOW; | |
694 | } | |
695 | ||
696 | my $malformations_name = join "/", @malformations; | |
697 | $malformations_name .= " malformation" | |
698 | if $malformations_name; | |
699 | $malformations_name .= "s" if @malformations > 1; | |
700 | my $this_bytes = $bytes; | |
701 | my $this_length = $length; | |
702 | my $expected_uv = $allowed_uv; | |
703 | my $this_expected_len = $expected_len; | |
704 | my $this_needed_to_discern_len = $needed_to_discern_len; | |
705 | if ($malformations_name) { | |
706 | $expected_uv = 0; | |
707 | ||
708 | # Coerce the input into the desired | |
709 | # malformation | |
710 | if ($malformations_name =~ /overlong/) { | |
711 | ||
712 | # For an overlong, we convert the original | |
713 | # start byte into a continuation byte with | |
714 | # the same data bits as originally. ... | |
715 | substr($this_bytes, 0, 1) | |
716 | = start_byte_to_cont(substr($this_bytes, | |
717 | 0, 1)); | |
718 | ||
719 | # ... Then we prepend it with a known | |
720 | # overlong sequence. This should evaluate | |
721 | # to the exact same code point as the | |
722 | # original. | |
723 | $this_bytes | |
724 | = I8_to_native("\xff") | |
725 | . (I8_to_native(chr $::first_continuation) | |
726 | x ( $::max_bytes - 1 - length($this_bytes))) | |
727 | . $this_bytes; | |
728 | $this_length = length($this_bytes); | |
729 | $this_needed_to_discern_len | |
730 | = $::max_bytes - ($this_expected_len | |
731 | - $this_needed_to_discern_len); | |
732 | $this_expected_len = $::max_bytes; | |
733 | push @expected_errors, $::UTF8_GOT_LONG; | |
734 | } | |
735 | if ($malformations_name =~ /short/) { | |
736 | ||
737 | # Just tell the test to not look far | |
738 | # enough into the input. | |
739 | $this_length--; | |
740 | $this_expected_len--; | |
741 | push @expected_errors, $::UTF8_GOT_SHORT; | |
742 | } | |
743 | if ($malformations_name | |
744 | =~ /non-continuation/) | |
745 | { | |
746 | # Change the final continuation byte into | |
747 | # a non one. | |
748 | my $pos = ($short) ? -2 : -1; | |
749 | substr($this_bytes, $pos, 1) = '?'; | |
750 | $this_expected_len--; | |
751 | push @expected_errors, | |
752 | $::UTF8_GOT_NON_CONTINUATION; | |
753 | } | |
754 | } | |
755 | ||
756 | my $eval_warn = $do_warning | |
757 | ? "use warnings '$warning'" | |
758 | : $warning eq "utf8" | |
759 | ? "no warnings 'utf8'" | |
760 | : ( "use warnings 'utf8';" | |
761 | . " no warnings '$warning'"); | |
762 | ||
763 | # Is effectively disallowed if we've set up a | |
764 | # malformation, even if the flag indicates it is | |
765 | # allowed. Fix up test name to indicate this as | |
766 | # well | |
767 | my $disallowed = $disallow_flag | |
768 | || $malformations_name; | |
769 | my $this_name = "utf8n_to_uvchr_error() $testname: " | |
770 | . (($disallow_flag) | |
771 | ? 'disallowed' | |
772 | : $disallowed | |
773 | ? $disallowed | |
774 | : 'allowed'); | |
775 | $this_name .= ", $eval_warn"; | |
776 | $this_name .= ", " . (($warn_flag) | |
777 | ? 'with warning flag' | |
778 | : 'no warning flag'); | |
779 | ||
780 | undef @warnings; | |
781 | my $ret_ref; | |
782 | my $display_bytes = display_bytes($this_bytes); | |
783 | my $call = " Call was: $eval_warn; \$ret_ref" | |
784 | . " = test_utf8n_to_uvchr_error(" | |
785 | . "'$display_bytes', $this_length," | |
786 | . "$warn_flag" | |
787 | . "|$disallow_flag)"; | |
788 | my $eval_text = "$eval_warn; \$ret_ref" | |
789 | . " = test_utf8n_to_uvchr_error(" | |
790 | . "'$this_bytes'," | |
791 | . " $this_length, $warn_flag" | |
792 | . "|$disallow_flag)"; | |
793 | eval "$eval_text"; | |
794 | if (! ok ("$@ eq ''", | |
795 | "$this_name: eval succeeded")) | |
796 | { | |
797 | diag "\$!='$!'; eval'd=\"$call\""; | |
798 | next; | |
799 | } | |
800 | if ($disallowed) { | |
801 | is($ret_ref->[0], 0, "$this_name: Returns 0") | |
802 | or diag $call; | |
803 | } | |
804 | else { | |
805 | is($ret_ref->[0], $expected_uv, | |
806 | "$this_name: Returns expected uv: " | |
807 | . sprintf("0x%04X", $expected_uv)) | |
808 | or diag $call; | |
809 | } | |
810 | is($ret_ref->[1], $this_expected_len, | |
811 | "$this_name: Returns expected length:" | |
812 | . " $this_expected_len") | |
813 | or diag $call; | |
814 | ||
815 | my $errors = $ret_ref->[2]; | |
816 | ||
817 | for (my $i = @expected_errors - 1; $i >= 0; $i--) { | |
818 | if (ok($expected_errors[$i] & $errors, | |
819 | "Expected and got error bit return" | |
820 | . " for $malformations[$i] malformation")) | |
821 | { | |
822 | $errors &= ~$expected_errors[$i]; | |
823 | } | |
824 | splice @expected_errors, $i, 1; | |
825 | } | |
826 | is(scalar @expected_errors, 0, | |
827 | "Got all the expected malformation errors") | |
828 | or diag Dumper \@expected_errors; | |
829 | ||
830 | if ( $this_expected_len >= $this_needed_to_discern_len | |
831 | && ($warn_flag || $disallow_flag)) | |
832 | { | |
833 | is($errors, $expected_error_flags, | |
834 | "Got the correct error flag") | |
835 | or diag $call; | |
836 | } | |
837 | else { | |
838 | is($errors, 0, "Got no other error flag"); | |
839 | } | |
840 | ||
841 | if (@malformations) { | |
842 | if (! $do_warning && $warning eq 'utf8') { | |
843 | goto no_warnings_expected; | |
844 | } | |
845 | ||
846 | # Check that each malformation generates a | |
847 | # warning, removing that warning if found | |
848 | MALFORMATION: | |
849 | foreach my $malformation (@malformations) { | |
850 | foreach (my $i = 0; $i < @warnings; $i++) { | |
851 | if ($warnings[$i] =~ /$malformation/) { | |
852 | pass("Expected and got" | |
853 | . "'$malformation' warning"); | |
854 | splice @warnings, $i, 1; | |
855 | next MALFORMATION; | |
856 | } | |
857 | } | |
858 | fail("Expected '$malformation' warning" | |
859 | . " but didn't get it"); | |
860 | ||
861 | } | |
862 | } | |
863 | ||
864 | # Any overflow will override any super or above-31 | |
865 | # warnings. | |
866 | goto no_warnings_expected | |
867 | if $will_overflow || $this_expected_len | |
868 | < $this_needed_to_discern_len; | |
869 | ||
870 | if ( ! $do_warning | |
871 | && ( $warning eq 'utf8' | |
872 | || $warning eq $category)) | |
873 | { | |
874 | goto no_warnings_expected; | |
875 | } | |
876 | elsif ($warn_flag) { | |
877 | if (is(scalar @warnings, 1, | |
878 | "$this_name: Got a single warning ")) | |
879 | { | |
880 | like($warnings[0], $message, | |
881 | "$this_name: Got expected warning") | |
882 | or diag $call; | |
883 | } | |
884 | else { | |
885 | diag $call; | |
886 | if (scalar @warnings) { | |
887 | output_warnings(@warnings); | |
888 | } | |
889 | } | |
890 | } | |
891 | else { | |
892 | no_warnings_expected: | |
893 | unless (is(scalar @warnings, 0, | |
894 | "$this_name: Got no warnings")) | |
895 | { | |
896 | diag $call; | |
897 | output_warnings(@warnings); | |
898 | } | |
899 | } | |
900 | ||
901 | # Check CHECK_ONLY results when the input is | |
902 | # disallowed. Do this when actually disallowed, | |
903 | # not just when the $disallow_flag is set | |
904 | if ($disallowed) { | |
905 | undef @warnings; | |
906 | $ret_ref = test_utf8n_to_uvchr_error( | |
907 | $this_bytes, $this_length, | |
908 | $disallow_flag|$::UTF8_CHECK_ONLY); | |
909 | is($ret_ref->[0], 0, | |
910 | "$this_name, CHECK_ONLY: Returns 0") | |
911 | or diag $call; | |
912 | is($ret_ref->[1], -1, | |
913 | "$this_name: CHECK_ONLY: returns -1 for length") | |
914 | or diag $call; | |
915 | if (! is(scalar @warnings, 0, | |
916 | "$this_name, CHECK_ONLY: no warnings" | |
917 | . " generated")) | |
918 | { | |
919 | diag $call; | |
920 | output_warnings(@warnings); | |
921 | } | |
922 | } | |
923 | ||
924 | # Now repeat some of the above, but for | |
925 | # uvchr_to_utf8_flags(). Since this comes from an | |
926 | # existing code point, it hasn't overflowed, and | |
927 | # isn't malformed. | |
928 | next if @malformations; | |
929 | ||
930 | # The warning and disallow flags passed in are for | |
931 | # utf8n_to_uvchr_error(). Convert them for | |
932 | # uvchr_to_utf8_flags(). | |
933 | my $uvchr_warn_flag = 0; | |
934 | my $uvchr_disallow_flag = 0; | |
935 | if ($warn_flag) { | |
936 | if ($warn_flag == $::UTF8_WARN_SURROGATE) { | |
937 | $uvchr_warn_flag = $::UNICODE_WARN_SURROGATE | |
938 | } | |
939 | elsif ($warn_flag == $::UTF8_WARN_NONCHAR) { | |
940 | $uvchr_warn_flag = $::UNICODE_WARN_NONCHAR | |
941 | } | |
942 | elsif ($warn_flag == $::UTF8_WARN_SUPER) { | |
943 | $uvchr_warn_flag = $::UNICODE_WARN_SUPER | |
944 | } | |
945 | elsif ($warn_flag == $::UTF8_WARN_ABOVE_31_BIT) { | |
946 | $uvchr_warn_flag | |
947 | = $::UNICODE_WARN_ABOVE_31_BIT; | |
948 | } | |
949 | else { | |
950 | fail(sprintf "Unexpected warn flag: %x", | |
951 | $warn_flag); | |
952 | next; | |
953 | } | |
954 | } | |
955 | if ($disallow_flag) { | |
956 | if ($disallow_flag == $::UTF8_DISALLOW_SURROGATE) | |
957 | { | |
958 | $uvchr_disallow_flag | |
959 | = $::UNICODE_DISALLOW_SURROGATE; | |
960 | } | |
961 | elsif ($disallow_flag == $::UTF8_DISALLOW_NONCHAR) | |
962 | { | |
963 | $uvchr_disallow_flag | |
964 | = $::UNICODE_DISALLOW_NONCHAR; | |
965 | } | |
966 | elsif ($disallow_flag == $::UTF8_DISALLOW_SUPER) { | |
967 | $uvchr_disallow_flag | |
968 | = $::UNICODE_DISALLOW_SUPER; | |
969 | } | |
970 | elsif ($disallow_flag | |
971 | == $::UTF8_DISALLOW_ABOVE_31_BIT) | |
972 | { | |
973 | $uvchr_disallow_flag = | |
974 | $::UNICODE_DISALLOW_ABOVE_31_BIT; | |
975 | } | |
976 | else { | |
977 | fail(sprintf "Unexpected disallow flag: %x", | |
978 | $disallow_flag); | |
979 | next; | |
980 | } | |
981 | } | |
982 | ||
983 | $disallowed = $uvchr_disallow_flag; | |
984 | ||
985 | $this_name = "uvchr_to_utf8_flags() $testname: " | |
986 | . (($uvchr_disallow_flag) | |
987 | ? 'disallowed' | |
988 | : ($disallowed) | |
989 | ? 'ABOVE_31_BIT allowed' | |
990 | : 'allowed'); | |
991 | $this_name .= ", $eval_warn"; | |
992 | $this_name .= ", " . (($uvchr_warn_flag) | |
993 | ? 'with warning flag' | |
994 | : 'no warning flag'); | |
995 | ||
996 | undef @warnings; | |
997 | my $ret; | |
998 | my $warn_flag = sprintf "0x%x", $uvchr_warn_flag; | |
999 | my $disallow_flag = sprintf "0x%x", | |
1000 | $uvchr_disallow_flag; | |
1001 | $call = sprintf(" Call was: $eval_warn; \$ret" | |
1002 | . " = test_uvchr_to_utf8_flags(" | |
1003 | . " 0x%x, $warn_flag|$disallow_flag)", | |
1004 | $allowed_uv); | |
1005 | $eval_text = "$eval_warn; \$ret =" | |
1006 | . " test_uvchr_to_utf8_flags(" | |
1007 | . "$allowed_uv, $warn_flag|" | |
1008 | . "$disallow_flag)"; | |
1009 | eval "$eval_text"; | |
1010 | if (! ok ("$@ eq ''", "$this_name: eval succeeded")) | |
1011 | { | |
1012 | diag "\$!='$!'; eval'd=\"$eval_text\""; | |
1013 | next; | |
1014 | } | |
1015 | if ($disallowed) { | |
1016 | is($ret, undef, "$this_name: Returns undef") | |
1017 | or diag $call; | |
1018 | } | |
1019 | else { | |
1020 | is($ret, $bytes, "$this_name: Returns expected string") | |
1021 | or diag $call; | |
1022 | } | |
1023 | if (! $do_warning | |
1024 | && ($warning eq 'utf8' || $warning eq $category)) | |
1025 | { | |
1026 | if (!is(scalar @warnings, 0, | |
1027 | "$this_name: No warnings generated")) | |
1028 | { | |
1029 | diag $call; | |
1030 | output_warnings(@warnings); | |
1031 | } | |
1032 | } | |
1033 | elsif ( $uvchr_warn_flag | |
1034 | && ( $warning eq 'utf8' | |
1035 | || $warning eq $category)) | |
1036 | { | |
1037 | if (is(scalar @warnings, 1, | |
1038 | "$this_name: Got a single warning ")) | |
1039 | { | |
1040 | like($warnings[0], $message, | |
1041 | "$this_name: Got expected warning") | |
1042 | or diag $call; | |
1043 | } | |
1044 | else { | |
1045 | diag $call; | |
1046 | output_warnings(@warnings) | |
1047 | if scalar @warnings; | |
1048 | } | |
1049 | } | |
1050 | } | |
1051 | } | |
1052 | } | |
1053 | } | |
1054 | } | |
1055 | } | |
1056 | } | |
1057 | } | |
1058 | ||
1059 | done_testing; |