Commit | Line | Data |
---|---|---|
6aa905cf KW |
1 | #!perl -w |
2 | ||
3 | # This is a base file to be used by various .t's in its directory | |
4 | ||
5 | use strict; | |
6 | use Test::More; | |
7 | ||
8 | BEGIN { | |
9 | use_ok('XS::APItest'); | |
10 | require 'charset_tools.pl'; | |
11 | require './t/utf8_setup.pl'; | |
12 | }; | |
13 | ||
14 | $|=1; | |
15 | ||
16 | no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit | |
17 | # machines, and that is tested elsewhere | |
18 | ||
19 | use XS::APItest; | |
20 | use Data::Dumper; | |
21 | ||
9cdc3054 | 22 | my @warnings_gotten; |
6aa905cf KW |
23 | |
24 | use warnings 'utf8'; | |
9cdc3054 | 25 | local $SIG{__WARN__} = sub { push @warnings_gotten, @_ }; |
6aa905cf KW |
26 | |
27 | sub nonportable_regex ($) { | |
28 | ||
29 | # Returns a pattern that matches the non-portable message raised either | |
30 | # for the specific input code point, or the one generated when there | |
31 | # is some malformation that precludes the message containing the specific | |
32 | # code point | |
33 | ||
34 | my $code_point = shift; | |
35 | ||
36 | my $string = sprintf '(Code point 0x%X is not Unicode, and' | |
37 | . '|Any UTF-8 sequence that starts with' | |
38 | . ' "(\\\x[[:xdigit:]]{2})+" is for a' | |
39 | . ' non-Unicode code point, and is) not portable', | |
40 | $code_point; | |
41 | return qr/$string/; | |
42 | } | |
43 | ||
44 | # Now test the cases where a legal code point is generated, but may or may not | |
45 | # be allowed/warned on. | |
46 | my @tests = ( | |
9cdc3054 | 47 | # ($testname, $bytes, $disallow_flags, $controlling_warning_category, |
b7e1f4b2 | 48 | # $allowed_uv, $needed_to_discern_len ) |
6aa905cf KW |
49 | [ "lowest surrogate", |
50 | (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"), | |
b7e1f4b2 | 51 | $::UTF8_DISALLOW_SURROGATE, |
6aa905cf | 52 | 'surrogate', 0xD800, |
6aa905cf KW |
53 | ], |
54 | [ "a middle surrogate", | |
55 | (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"), | |
b7e1f4b2 | 56 | $::UTF8_DISALLOW_SURROGATE, |
6aa905cf | 57 | 'surrogate', 0xD90D, |
6aa905cf KW |
58 | ], |
59 | [ "highest surrogate", | |
60 | (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"), | |
b7e1f4b2 | 61 | $::UTF8_DISALLOW_SURROGATE, |
6aa905cf | 62 | 'surrogate', 0xDFFF, |
6aa905cf KW |
63 | ], |
64 | [ "first non_unicode", | |
65 | (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"), | |
b7e1f4b2 | 66 | $::UTF8_DISALLOW_SUPER, |
6aa905cf | 67 | 'non_unicode', 0x110000, |
6aa905cf | 68 | 2, |
6aa905cf KW |
69 | ], |
70 | [ "non_unicode whose first byte tells that", | |
71 | (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 72 | $::UTF8_DISALLOW_SUPER, |
6aa905cf KW |
73 | 'non_unicode', |
74 | (isASCII) ? 0x140000 : 0x200000, | |
6aa905cf | 75 | 1, |
6aa905cf KW |
76 | ], |
77 | [ "first of 32 consecutive non-character code points", | |
78 | (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"), | |
b7e1f4b2 | 79 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 80 | 'nonchar', 0xFDD0, |
6aa905cf KW |
81 | ], |
82 | [ "a mid non-character code point of the 32 consecutive ones", | |
83 | (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"), | |
b7e1f4b2 | 84 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 85 | 'nonchar', 0xFDE0, |
6aa905cf KW |
86 | ], |
87 | [ "final of 32 consecutive non-character code points", | |
88 | (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"), | |
b7e1f4b2 | 89 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 90 | 'nonchar', 0xFDEF, |
6aa905cf KW |
91 | ], |
92 | [ "non-character code point U+FFFE", | |
93 | (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"), | |
b7e1f4b2 | 94 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 95 | 'nonchar', 0xFFFE, |
6aa905cf KW |
96 | ], |
97 | [ "non-character code point U+FFFF", | |
98 | (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"), | |
b7e1f4b2 | 99 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 100 | 'nonchar', 0xFFFF, |
6aa905cf KW |
101 | ], |
102 | [ "non-character code point U+1FFFE", | |
103 | (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"), | |
b7e1f4b2 | 104 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 105 | 'nonchar', 0x1FFFE, |
6aa905cf KW |
106 | ], |
107 | [ "non-character code point U+1FFFF", | |
108 | (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"), | |
b7e1f4b2 | 109 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 110 | 'nonchar', 0x1FFFF, |
6aa905cf KW |
111 | ], |
112 | [ "non-character code point U+2FFFE", | |
113 | (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"), | |
b7e1f4b2 | 114 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 115 | 'nonchar', 0x2FFFE, |
6aa905cf KW |
116 | ], |
117 | [ "non-character code point U+2FFFF", | |
118 | (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"), | |
b7e1f4b2 | 119 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 120 | 'nonchar', 0x2FFFF, |
6aa905cf KW |
121 | ], |
122 | [ "non-character code point U+3FFFE", | |
123 | (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"), | |
b7e1f4b2 | 124 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 125 | 'nonchar', 0x3FFFE, |
6aa905cf KW |
126 | ], |
127 | [ "non-character code point U+3FFFF", | |
128 | (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"), | |
b7e1f4b2 | 129 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 130 | 'nonchar', 0x3FFFF, |
6aa905cf KW |
131 | ], |
132 | [ "non-character code point U+4FFFE", | |
133 | (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"), | |
b7e1f4b2 | 134 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 135 | 'nonchar', 0x4FFFE, |
6aa905cf KW |
136 | ], |
137 | [ "non-character code point U+4FFFF", | |
138 | (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"), | |
b7e1f4b2 | 139 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 140 | 'nonchar', 0x4FFFF, |
6aa905cf KW |
141 | ], |
142 | [ "non-character code point U+5FFFE", | |
143 | (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"), | |
b7e1f4b2 | 144 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 145 | 'nonchar', 0x5FFFE, |
6aa905cf KW |
146 | ], |
147 | [ "non-character code point U+5FFFF", | |
148 | (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"), | |
b7e1f4b2 | 149 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 150 | 'nonchar', 0x5FFFF, |
6aa905cf KW |
151 | ], |
152 | [ "non-character code point U+6FFFE", | |
153 | (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"), | |
b7e1f4b2 | 154 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 155 | 'nonchar', 0x6FFFE, |
6aa905cf KW |
156 | ], |
157 | [ "non-character code point U+6FFFF", | |
158 | (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"), | |
b7e1f4b2 | 159 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 160 | 'nonchar', 0x6FFFF, |
6aa905cf KW |
161 | ], |
162 | [ "non-character code point U+7FFFE", | |
163 | (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"), | |
b7e1f4b2 | 164 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 165 | 'nonchar', 0x7FFFE, |
6aa905cf KW |
166 | ], |
167 | [ "non-character code point U+7FFFF", | |
168 | (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"), | |
b7e1f4b2 | 169 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 170 | 'nonchar', 0x7FFFF, |
6aa905cf KW |
171 | ], |
172 | [ "non-character code point U+8FFFE", | |
173 | (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"), | |
b7e1f4b2 | 174 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 175 | 'nonchar', 0x8FFFE, |
6aa905cf KW |
176 | ], |
177 | [ "non-character code point U+8FFFF", | |
178 | (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"), | |
b7e1f4b2 | 179 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 180 | 'nonchar', 0x8FFFF, |
6aa905cf KW |
181 | ], |
182 | [ "non-character code point U+9FFFE", | |
183 | (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"), | |
b7e1f4b2 | 184 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 185 | 'nonchar', 0x9FFFE, |
6aa905cf KW |
186 | ], |
187 | [ "non-character code point U+9FFFF", | |
188 | (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"), | |
b7e1f4b2 | 189 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 190 | 'nonchar', 0x9FFFF, |
6aa905cf KW |
191 | ], |
192 | [ "non-character code point U+AFFFE", | |
193 | (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"), | |
b7e1f4b2 | 194 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 195 | 'nonchar', 0xAFFFE, |
6aa905cf KW |
196 | ], |
197 | [ "non-character code point U+AFFFF", | |
198 | (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"), | |
b7e1f4b2 | 199 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 200 | 'nonchar', 0xAFFFF, |
6aa905cf KW |
201 | ], |
202 | [ "non-character code point U+BFFFE", | |
203 | (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"), | |
b7e1f4b2 | 204 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 205 | 'nonchar', 0xBFFFE, |
6aa905cf KW |
206 | ], |
207 | [ "non-character code point U+BFFFF", | |
208 | (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"), | |
b7e1f4b2 | 209 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 210 | 'nonchar', 0xBFFFF, |
6aa905cf KW |
211 | ], |
212 | [ "non-character code point U+CFFFE", | |
213 | (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"), | |
b7e1f4b2 | 214 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 215 | 'nonchar', 0xCFFFE, |
6aa905cf KW |
216 | ], |
217 | [ "non-character code point U+CFFFF", | |
218 | (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"), | |
b7e1f4b2 | 219 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 220 | 'nonchar', 0xCFFFF, |
6aa905cf KW |
221 | ], |
222 | [ "non-character code point U+DFFFE", | |
223 | (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"), | |
b7e1f4b2 | 224 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 225 | 'nonchar', 0xDFFFE, |
6aa905cf KW |
226 | ], |
227 | [ "non-character code point U+DFFFF", | |
228 | (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"), | |
b7e1f4b2 | 229 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 230 | 'nonchar', 0xDFFFF, |
6aa905cf KW |
231 | ], |
232 | [ "non-character code point U+EFFFE", | |
233 | (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"), | |
b7e1f4b2 | 234 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 235 | 'nonchar', 0xEFFFE, |
6aa905cf KW |
236 | ], |
237 | [ "non-character code point U+EFFFF", | |
238 | (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"), | |
b7e1f4b2 | 239 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 240 | 'nonchar', 0xEFFFF, |
6aa905cf KW |
241 | ], |
242 | [ "non-character code point U+FFFFE", | |
243 | (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"), | |
b7e1f4b2 | 244 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 245 | 'nonchar', 0xFFFFE, |
6aa905cf KW |
246 | ], |
247 | [ "non-character code point U+FFFFF", | |
248 | (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"), | |
b7e1f4b2 | 249 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 250 | 'nonchar', 0xFFFFF, |
6aa905cf KW |
251 | ], |
252 | [ "non-character code point U+10FFFE", | |
253 | (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"), | |
b7e1f4b2 | 254 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 255 | 'nonchar', 0x10FFFE, |
6aa905cf KW |
256 | ], |
257 | [ "non-character code point U+10FFFF", | |
258 | (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"), | |
b7e1f4b2 | 259 | $::UTF8_DISALLOW_NONCHAR, |
6aa905cf | 260 | 'nonchar', 0x10FFFF, |
6aa905cf | 261 | ], |
76513bdc KW |
262 | [ "requires at least 32 bits", |
263 | (isASCII) | |
264 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
265 | : I8_to_native( | |
266 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
267 | # This code point is chosen so that it is representable in a UV on | |
268 | # 32-bit machines | |
b7e1f4b2 | 269 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
76513bdc | 270 | 'utf8', 0x80000000, |
76513bdc | 271 | (isASCII) ? 1 : 8, |
76513bdc KW |
272 | ], |
273 | [ "highest 32 bit code point", | |
274 | (isASCII) | |
275 | ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf" | |
276 | : I8_to_native( | |
277 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"), | |
b7e1f4b2 | 278 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
76513bdc | 279 | 'utf8', 0xFFFFFFFF, |
76513bdc | 280 | (isASCII) ? 1 : 8, |
76513bdc KW |
281 | ], |
282 | [ "requires at least 32 bits, and use SUPER-type flags, instead of" | |
283 | . " ABOVE_31_BIT", | |
284 | (isASCII) | |
285 | ? "\xfe\x82\x80\x80\x80\x80\x80" | |
286 | : I8_to_native( | |
287 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 288 | $::UTF8_DISALLOW_SUPER, |
76513bdc | 289 | 'utf8', 0x80000000, |
76513bdc | 290 | 1, |
76513bdc KW |
291 | ], |
292 | [ "overflow with warnings/disallow for more than 31 bits", | |
293 | # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT | |
294 | # with overflow. The overflow malformation is never allowed, so | |
295 | # preventing it takes precedence if the ABOVE_31_BIT options would | |
296 | # otherwise allow in an overflowing value. The ASCII code points (1 | |
297 | # for 32-bits; 1 for 64) were chosen because the old overflow | |
298 | # detection algorithm did not catch them; this means this test also | |
299 | # checks for that fix. The EBCDIC are arbitrary overflowing ones | |
300 | # since we have no reports of failures with it. | |
301 | (($::is64bit) | |
302 | ? ((isASCII) | |
303 | ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf" | |
304 | : I8_to_native( | |
305 | "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0")) | |
306 | : ((isASCII) | |
307 | ? "\xfe\x86\x80\x80\x80\x80\x80" | |
308 | : I8_to_native( | |
309 | "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))), | |
76513bdc | 310 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
04f42bf6 | 311 | 'utf8', -1, |
76513bdc | 312 | (isASCII || $::is64bit) ? 2 : 8, |
76513bdc | 313 | ], |
6aa905cf KW |
314 | ); |
315 | ||
316 | if (! $::is64bit) { | |
317 | if (isASCII) { | |
318 | no warnings qw{portable overflow}; | |
319 | push @tests, | |
320 | [ "Lowest 33 bit code point: overflow", | |
321 | "\xFE\x84\x80\x80\x80\x80\x80", | |
b7e1f4b2 | 322 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
04f42bf6 | 323 | 'utf8', -1, |
b3169593 | 324 | 1, |
6aa905cf KW |
325 | ]; |
326 | } | |
327 | } | |
328 | else { | |
329 | no warnings qw{portable overflow}; | |
330 | push @tests, | |
331 | [ "More than 32 bits", | |
332 | (isASCII) | |
333 | ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80" | |
334 | : I8_to_native( | |
335 | "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 336 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 337 | 'utf8', 0x1000000000, |
b3169593 | 338 | (isASCII) ? 1 : 7, |
6aa905cf KW |
339 | ]; |
340 | if (! isASCII) { | |
341 | push @tests, # These could falsely show wrongly in a naive | |
342 | # implementation | |
343 | [ "requires at least 32 bits", | |
344 | I8_to_native( | |
345 | "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 346 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 347 | 'utf8', 0x800000000, |
b3169593 | 348 | 7, |
6aa905cf KW |
349 | ], |
350 | [ "requires at least 32 bits", | |
351 | I8_to_native( | |
352 | "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 353 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 354 | 'utf8', 0x10000000000, |
b3169593 | 355 | 6, |
6aa905cf KW |
356 | ], |
357 | [ "requires at least 32 bits", | |
358 | I8_to_native( | |
359 | "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 360 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 361 | 'utf8', 0x200000000000, |
b3169593 | 362 | 5, |
6aa905cf KW |
363 | ], |
364 | [ "requires at least 32 bits", | |
365 | I8_to_native( | |
366 | "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 367 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 368 | 'utf8', 0x4000000000000, |
b3169593 | 369 | 4, |
6aa905cf KW |
370 | ], |
371 | [ "requires at least 32 bits", | |
372 | I8_to_native( | |
373 | "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 374 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 375 | 'utf8', 0x80000000000000, |
b3169593 | 376 | 3, |
6aa905cf KW |
377 | ], |
378 | [ "requires at least 32 bits", | |
379 | I8_to_native( | |
380 | "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"), | |
b7e1f4b2 | 381 | $::UTF8_DISALLOW_ABOVE_31_BIT, |
6aa905cf | 382 | 'utf8', 0x1000000000000000, |
b3169593 | 383 | 2, |
6aa905cf KW |
384 | ]; |
385 | } | |
386 | } | |
387 | ||
388 | # This test is split into this number of files. | |
389 | my $num_test_files = $ENV{TEST_JOBS} || 1; | |
390 | $num_test_files = 10 if $num_test_files > 10; | |
391 | ||
392 | my $test_count = -1; | |
393 | foreach my $test (@tests) { | |
394 | $test_count++; | |
395 | next if $test_count % $num_test_files != $::TEST_CHUNK; | |
396 | ||
b7e1f4b2 | 397 | my ($testname, $bytes, $disallow_flags, |
9cdc3054 | 398 | $controlling_warning_category, $allowed_uv, $needed_to_discern_len |
6aa905cf KW |
399 | ) = @$test; |
400 | ||
401 | my $length = length $bytes; | |
04f42bf6 | 402 | my $will_overflow = $allowed_uv < 0; |
6aa905cf | 403 | |
b7e1f4b2 KW |
404 | # The convention is that the got flag is the same value as the disallow |
405 | # one, and the warn flag is the next bit over. If this were violated, the | |
406 | # tests here should start failing. We could do an eval under no strict to | |
407 | # be sure. | |
408 | my $expected_error_flags = $disallow_flags; | |
409 | my $warn_flags = $disallow_flags << 1; | |
410 | ||
3022ad00 KW |
411 | my $message; |
412 | if ($allowed_uv > 0x7FFFFFFF) { | |
413 | $message = nonportable_regex($allowed_uv); | |
414 | } | |
415 | elsif ($allowed_uv > 0x10FFFF) { | |
416 | $message = qr/(not Unicode|for a non-Unicode code point).* may not be portable/; | |
417 | } | |
418 | elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) { | |
419 | $message = qr/surrogate/; | |
67e45424 | 420 | $needed_to_discern_len = 2 unless defined $needed_to_discern_len; |
3022ad00 KW |
421 | } |
422 | elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF) | |
423 | || ($allowed_uv & 0xFFFE) == 0xFFFE) | |
424 | { | |
425 | $message = qr/Unicode non-character.*is not recommended for open interchange/; | |
67e45424 | 426 | $needed_to_discern_len = $length unless defined $needed_to_discern_len; |
3022ad00 KW |
427 | } |
428 | elsif ($will_overflow) { | |
429 | $message = qr/overflows/; | |
430 | } | |
431 | else { | |
432 | die "Can't figure out what type of warning to test for $testname" | |
433 | } | |
434 | ||
67e45424 KW |
435 | die 'Didn\'t set $needed_to_discern_len for ' . $testname |
436 | unless defined $needed_to_discern_len; | |
437 | ||
6aa905cf KW |
438 | { |
439 | use warnings; | |
9cdc3054 | 440 | undef @warnings_gotten; |
6aa905cf KW |
441 | my $ret = test_isUTF8_CHAR($bytes, $length); |
442 | my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0); | |
443 | if ($will_overflow) { | |
444 | is($ret, 0, "isUTF8_CHAR() $testname: returns 0"); | |
445 | is($ret_flags, 0, "isUTF8_CHAR_flags() $testname: returns 0"); | |
446 | } | |
447 | else { | |
448 | is($ret, $length, | |
449 | "isUTF8_CHAR() $testname: returns expected length: $length"); | |
450 | is($ret_flags, $length, "isUTF8_CHAR_flags(...,0) $testname:" | |
451 | . " returns expected length: $length"); | |
452 | } | |
9cdc3054 | 453 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
454 | "isUTF8_CHAR() and isUTF8_CHAR()_flags $testname: generated" |
455 | . " no warnings") | |
9cdc3054 | 456 | or output_warnings(@warnings_gotten); |
6aa905cf | 457 | |
9cdc3054 | 458 | undef @warnings_gotten; |
6aa905cf KW |
459 | $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); |
460 | if ($will_overflow) { | |
461 | is($ret, 0, "isSTRICT_UTF8_CHAR() $testname: returns 0"); | |
462 | } | |
463 | else { | |
464 | my $expected_ret = ( $testname =~ /surrogate|non-character/ | |
465 | || $allowed_uv > 0x10FFFF) | |
466 | ? 0 | |
467 | : $length; | |
468 | is($ret, $expected_ret, "isSTRICT_UTF8_CHAR() $testname: returns" | |
469 | . " expected length: $expected_ret"); | |
470 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
471 | $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE); | |
472 | is($ret, $expected_ret, | |
473 | "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')" | |
474 | . " acts like isSTRICT_UTF8_CHAR"); | |
475 | } | |
9cdc3054 | 476 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
477 | "isSTRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" |
478 | . " generated no warnings") | |
9cdc3054 | 479 | or output_warnings(@warnings_gotten); |
6aa905cf | 480 | |
9cdc3054 | 481 | undef @warnings_gotten; |
6aa905cf KW |
482 | $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length); |
483 | if ($will_overflow) { | |
484 | is($ret, 0, "isC9_STRICT_UTF8_CHAR() $testname: returns 0"); | |
485 | } | |
486 | else { | |
487 | my $expected_ret = ( $testname =~ /surrogate/ | |
488 | || $allowed_uv > 0x10FFFF) | |
489 | ? 0 | |
490 | : $length; | |
491 | is($ret, $expected_ret, "isC9_STRICT_UTF8_CHAR() $testname:" | |
492 | ." returns expected length: $expected_ret"); | |
493 | $ret = test_isUTF8_CHAR_flags($bytes, $length, | |
494 | $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE); | |
495 | is($ret, $expected_ret, | |
496 | "isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')" | |
497 | . " acts like isC9_STRICT_UTF8_CHAR"); | |
498 | } | |
9cdc3054 | 499 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
500 | "isC9_STRICT_UTF8_CHAR() and isUTF8_CHAR_flags $testname:" |
501 | . " generated no warnings") | |
9cdc3054 | 502 | or output_warnings(@warnings_gotten); |
6aa905cf KW |
503 | |
504 | # Test partial character handling, for each byte not a full character | |
505 | for my $j (1.. $length - 1) { | |
506 | ||
507 | # Skip the test for the interaction between overflow and above-31 | |
508 | # bit. It is really testing other things than the partial | |
509 | # character tests, for which other tests in this file are | |
510 | # sufficient | |
04f42bf6 | 511 | last if $will_overflow; |
6aa905cf KW |
512 | |
513 | foreach my $disallow_flag (0, $disallow_flags) { | |
514 | my $partial = substr($bytes, 0, $j); | |
515 | my $ret_should_be; | |
516 | my $comment; | |
517 | if ($disallow_flag) { | |
518 | $ret_should_be = 0; | |
519 | $comment = "disallowed"; | |
520 | if ($j < $needed_to_discern_len) { | |
521 | $ret_should_be = 1; | |
522 | $comment .= ", but need $needed_to_discern_len bytes" | |
523 | . " to discern:"; | |
524 | } | |
525 | } | |
526 | else { | |
527 | $ret_should_be = 1; | |
528 | $comment = "allowed"; | |
529 | } | |
530 | ||
9cdc3054 | 531 | undef @warnings_gotten; |
6aa905cf KW |
532 | |
533 | $ret = test_is_utf8_valid_partial_char_flags($partial, $j, | |
534 | $disallow_flag); | |
535 | is($ret, $ret_should_be, | |
536 | "$testname: is_utf8_valid_partial_char_flags(" | |
537 | . display_bytes($partial) | |
538 | . "), $comment: returns $ret_should_be"); | |
9cdc3054 | 539 | is(scalar @warnings_gotten, 0, |
6aa905cf KW |
540 | "$testname: is_utf8_valid_partial_char_flags()" |
541 | . " generated no warnings") | |
9cdc3054 | 542 | or output_warnings(@warnings_gotten); |
6aa905cf KW |
543 | } |
544 | } | |
545 | } | |
546 | ||
547 | # This is more complicated than the malformations tested earlier, as there | |
548 | # are several orthogonal variables involved. We test all the subclasses | |
549 | # of utf8 warnings to verify they work with and without the utf8 class, | |
550 | # and don't have effects on other sublass warnings | |
9cdc3054 | 551 | foreach my $trial_warning_category ('utf8', 'surrogate', 'nonchar', 'non_unicode') { |
6aa905cf KW |
552 | foreach my $warn_flag (0, $warn_flags) { |
553 | foreach my $disallow_flag (0, $disallow_flags) { | |
554 | foreach my $do_warning (0, 1) { | |
555 | ||
556 | # We try each of the above with various combinations of | |
557 | # malformations that can occur on the same input sequence. | |
558 | foreach my $short ("", "short") { | |
559 | foreach my $unexpected_noncont ("", | |
560 | "unexpected non-continuation") | |
561 | { | |
562 | foreach my $overlong ("", "overlong") { | |
563 | ||
b3169593 KW |
564 | # If we're creating an overlong, it can't be longer than |
565 | # the maximum length, so skip if we're already at that | |
566 | # length. | |
567 | next if $overlong && $length >= $::max_bytes; | |
6aa905cf KW |
568 | |
569 | my @malformations; | |
9cdc3054 | 570 | my @expected_return_flags; |
6aa905cf KW |
571 | push @malformations, $short if $short; |
572 | push @malformations, $unexpected_noncont | |
573 | if $unexpected_noncont; | |
574 | push @malformations, $overlong if $overlong; | |
575 | ||
576 | # The overflow malformation test in the input | |
577 | # array is coerced into being treated like one of | |
578 | # the others. | |
579 | if ($will_overflow) { | |
580 | push @malformations, 'overflow'; | |
9cdc3054 | 581 | push @expected_return_flags, $::UTF8_GOT_OVERFLOW; |
6aa905cf KW |
582 | } |
583 | ||
584 | my $malformations_name = join "/", @malformations; | |
585 | $malformations_name .= " malformation" | |
586 | if $malformations_name; | |
587 | $malformations_name .= "s" if @malformations > 1; | |
588 | my $this_bytes = $bytes; | |
589 | my $this_length = $length; | |
590 | my $expected_uv = $allowed_uv; | |
b3169593 | 591 | my $this_expected_len = $length; |
6aa905cf KW |
592 | my $this_needed_to_discern_len = $needed_to_discern_len; |
593 | if ($malformations_name) { | |
594 | $expected_uv = 0; | |
595 | ||
596 | # Coerce the input into the desired | |
597 | # malformation | |
598 | if ($malformations_name =~ /overlong/) { | |
599 | ||
600 | # For an overlong, we convert the original | |
601 | # start byte into a continuation byte with | |
602 | # the same data bits as originally. ... | |
603 | substr($this_bytes, 0, 1) | |
604 | = start_byte_to_cont(substr($this_bytes, | |
605 | 0, 1)); | |
606 | ||
607 | # ... Then we prepend it with a known | |
608 | # overlong sequence. This should evaluate | |
609 | # to the exact same code point as the | |
610 | # original. | |
611 | $this_bytes | |
612 | = I8_to_native("\xff") | |
dbb8d798 | 613 | . (I8_to_native(chr $::lowest_continuation) |
6aa905cf KW |
614 | x ( $::max_bytes - 1 - length($this_bytes))) |
615 | . $this_bytes; | |
616 | $this_length = length($this_bytes); | |
617 | $this_needed_to_discern_len | |
618 | = $::max_bytes - ($this_expected_len | |
619 | - $this_needed_to_discern_len); | |
620 | $this_expected_len = $::max_bytes; | |
9cdc3054 | 621 | push @expected_return_flags, $::UTF8_GOT_LONG; |
6aa905cf KW |
622 | } |
623 | if ($malformations_name =~ /short/) { | |
624 | ||
625 | # Just tell the test to not look far | |
626 | # enough into the input. | |
627 | $this_length--; | |
628 | $this_expected_len--; | |
9cdc3054 | 629 | push @expected_return_flags, $::UTF8_GOT_SHORT; |
6aa905cf KW |
630 | } |
631 | if ($malformations_name | |
632 | =~ /non-continuation/) | |
633 | { | |
634 | # Change the final continuation byte into | |
635 | # a non one. | |
636 | my $pos = ($short) ? -2 : -1; | |
637 | substr($this_bytes, $pos, 1) = '?'; | |
638 | $this_expected_len--; | |
9cdc3054 | 639 | push @expected_return_flags, |
6aa905cf KW |
640 | $::UTF8_GOT_NON_CONTINUATION; |
641 | } | |
642 | } | |
643 | ||
644 | my $eval_warn = $do_warning | |
9cdc3054 KW |
645 | ? "use warnings '$trial_warning_category'" |
646 | : $trial_warning_category eq "utf8" | |
6aa905cf KW |
647 | ? "no warnings 'utf8'" |
648 | : ( "use warnings 'utf8';" | |
9cdc3054 | 649 | . " no warnings '$trial_warning_category'"); |
6aa905cf KW |
650 | |
651 | # Is effectively disallowed if we've set up a | |
652 | # malformation, even if the flag indicates it is | |
653 | # allowed. Fix up test name to indicate this as | |
654 | # well | |
655 | my $disallowed = $disallow_flag | |
656 | || $malformations_name; | |
657 | my $this_name = "utf8n_to_uvchr_error() $testname: " | |
658 | . (($disallow_flag) | |
659 | ? 'disallowed' | |
660 | : $disallowed | |
661 | ? $disallowed | |
662 | : 'allowed'); | |
663 | $this_name .= ", $eval_warn"; | |
664 | $this_name .= ", " . (($warn_flag) | |
665 | ? 'with warning flag' | |
666 | : 'no warning flag'); | |
667 | ||
9cdc3054 | 668 | undef @warnings_gotten; |
6aa905cf KW |
669 | my $ret_ref; |
670 | my $display_bytes = display_bytes($this_bytes); | |
671 | my $call = " Call was: $eval_warn; \$ret_ref" | |
672 | . " = test_utf8n_to_uvchr_error(" | |
673 | . "'$display_bytes', $this_length," | |
674 | . "$warn_flag" | |
675 | . "|$disallow_flag)"; | |
676 | my $eval_text = "$eval_warn; \$ret_ref" | |
677 | . " = test_utf8n_to_uvchr_error(" | |
678 | . "'$this_bytes'," | |
679 | . " $this_length, $warn_flag" | |
680 | . "|$disallow_flag)"; | |
681 | eval "$eval_text"; | |
682 | if (! ok ("$@ eq ''", | |
683 | "$this_name: eval succeeded")) | |
684 | { | |
685 | diag "\$!='$!'; eval'd=\"$call\""; | |
686 | next; | |
687 | } | |
688 | if ($disallowed) { | |
689 | is($ret_ref->[0], 0, "$this_name: Returns 0") | |
690 | or diag $call; | |
691 | } | |
692 | else { | |
693 | is($ret_ref->[0], $expected_uv, | |
694 | "$this_name: Returns expected uv: " | |
695 | . sprintf("0x%04X", $expected_uv)) | |
696 | or diag $call; | |
697 | } | |
698 | is($ret_ref->[1], $this_expected_len, | |
699 | "$this_name: Returns expected length:" | |
700 | . " $this_expected_len") | |
701 | or diag $call; | |
702 | ||
9cdc3054 | 703 | my $returned_flags = $ret_ref->[2]; |
6aa905cf | 704 | |
9cdc3054 KW |
705 | for (my $i = @expected_return_flags - 1; $i >= 0; $i--) { |
706 | if (ok($expected_return_flags[$i] & $returned_flags, | |
6aa905cf KW |
707 | "Expected and got error bit return" |
708 | . " for $malformations[$i] malformation")) | |
709 | { | |
9cdc3054 | 710 | $returned_flags &= ~$expected_return_flags[$i]; |
6aa905cf | 711 | } |
9cdc3054 | 712 | splice @expected_return_flags, $i, 1; |
6aa905cf | 713 | } |
9cdc3054 | 714 | is(scalar @expected_return_flags, 0, |
6aa905cf | 715 | "Got all the expected malformation errors") |
9cdc3054 | 716 | or diag Dumper \@expected_return_flags; |
6aa905cf KW |
717 | |
718 | if ( $this_expected_len >= $this_needed_to_discern_len | |
719 | && ($warn_flag || $disallow_flag)) | |
720 | { | |
9cdc3054 | 721 | is($returned_flags, $expected_error_flags, |
6aa905cf KW |
722 | "Got the correct error flag") |
723 | or diag $call; | |
724 | } | |
725 | else { | |
9cdc3054 | 726 | is($returned_flags, 0, "Got no other error flag"); |
6aa905cf KW |
727 | } |
728 | ||
729 | if (@malformations) { | |
9cdc3054 | 730 | if (! $do_warning && $trial_warning_category eq 'utf8') { |
6aa905cf KW |
731 | goto no_warnings_expected; |
732 | } | |
733 | ||
734 | # Check that each malformation generates a | |
735 | # warning, removing that warning if found | |
736 | MALFORMATION: | |
737 | foreach my $malformation (@malformations) { | |
9cdc3054 KW |
738 | foreach (my $i = 0; $i < @warnings_gotten; $i++) { |
739 | if ($warnings_gotten[$i] =~ /$malformation/) { | |
6aa905cf KW |
740 | pass("Expected and got" |
741 | . "'$malformation' warning"); | |
9cdc3054 | 742 | splice @warnings_gotten, $i, 1; |
6aa905cf KW |
743 | next MALFORMATION; |
744 | } | |
745 | } | |
746 | fail("Expected '$malformation' warning" | |
747 | . " but didn't get it"); | |
748 | ||
749 | } | |
750 | } | |
751 | ||
752 | # Any overflow will override any super or above-31 | |
753 | # warnings. | |
754 | goto no_warnings_expected | |
755 | if $will_overflow || $this_expected_len | |
756 | < $this_needed_to_discern_len; | |
757 | ||
758 | if ( ! $do_warning | |
9cdc3054 KW |
759 | && ( $trial_warning_category eq 'utf8' |
760 | || $trial_warning_category eq $controlling_warning_category)) | |
6aa905cf KW |
761 | { |
762 | goto no_warnings_expected; | |
763 | } | |
764 | elsif ($warn_flag) { | |
9cdc3054 | 765 | if (is(scalar @warnings_gotten, 1, |
6aa905cf KW |
766 | "$this_name: Got a single warning ")) |
767 | { | |
9cdc3054 | 768 | like($warnings_gotten[0], $message, |
6aa905cf KW |
769 | "$this_name: Got expected warning") |
770 | or diag $call; | |
771 | } | |
772 | else { | |
773 | diag $call; | |
9cdc3054 KW |
774 | if (scalar @warnings_gotten) { |
775 | output_warnings(@warnings_gotten); | |
6aa905cf KW |
776 | } |
777 | } | |
778 | } | |
779 | else { | |
9cdc3054 | 780 | |
6aa905cf | 781 | no_warnings_expected: |
9cdc3054 | 782 | unless (is(scalar @warnings_gotten, 0, |
6aa905cf KW |
783 | "$this_name: Got no warnings")) |
784 | { | |
785 | diag $call; | |
9cdc3054 | 786 | output_warnings(@warnings_gotten); |
6aa905cf KW |
787 | } |
788 | } | |
789 | ||
790 | # Check CHECK_ONLY results when the input is | |
791 | # disallowed. Do this when actually disallowed, | |
792 | # not just when the $disallow_flag is set | |
793 | if ($disallowed) { | |
9cdc3054 | 794 | undef @warnings_gotten; |
6aa905cf KW |
795 | $ret_ref = test_utf8n_to_uvchr_error( |
796 | $this_bytes, $this_length, | |
797 | $disallow_flag|$::UTF8_CHECK_ONLY); | |
798 | is($ret_ref->[0], 0, | |
799 | "$this_name, CHECK_ONLY: Returns 0") | |
800 | or diag $call; | |
801 | is($ret_ref->[1], -1, | |
802 | "$this_name: CHECK_ONLY: returns -1 for length") | |
803 | or diag $call; | |
9cdc3054 | 804 | if (! is(scalar @warnings_gotten, 0, |
6aa905cf KW |
805 | "$this_name, CHECK_ONLY: no warnings" |
806 | . " generated")) | |
807 | { | |
808 | diag $call; | |
9cdc3054 | 809 | output_warnings(@warnings_gotten); |
6aa905cf KW |
810 | } |
811 | } | |
812 | ||
813 | # Now repeat some of the above, but for | |
814 | # uvchr_to_utf8_flags(). Since this comes from an | |
815 | # existing code point, it hasn't overflowed, and | |
816 | # isn't malformed. | |
817 | next if @malformations; | |
818 | ||
819 | # The warning and disallow flags passed in are for | |
820 | # utf8n_to_uvchr_error(). Convert them for | |
821 | # uvchr_to_utf8_flags(). | |
822 | my $uvchr_warn_flag = 0; | |
823 | my $uvchr_disallow_flag = 0; | |
824 | if ($warn_flag) { | |
825 | if ($warn_flag == $::UTF8_WARN_SURROGATE) { | |
826 | $uvchr_warn_flag = $::UNICODE_WARN_SURROGATE | |
827 | } | |
828 | elsif ($warn_flag == $::UTF8_WARN_NONCHAR) { | |
829 | $uvchr_warn_flag = $::UNICODE_WARN_NONCHAR | |
830 | } | |
831 | elsif ($warn_flag == $::UTF8_WARN_SUPER) { | |
832 | $uvchr_warn_flag = $::UNICODE_WARN_SUPER | |
833 | } | |
834 | elsif ($warn_flag == $::UTF8_WARN_ABOVE_31_BIT) { | |
835 | $uvchr_warn_flag | |
836 | = $::UNICODE_WARN_ABOVE_31_BIT; | |
837 | } | |
838 | else { | |
839 | fail(sprintf "Unexpected warn flag: %x", | |
840 | $warn_flag); | |
841 | next; | |
842 | } | |
843 | } | |
844 | if ($disallow_flag) { | |
845 | if ($disallow_flag == $::UTF8_DISALLOW_SURROGATE) | |
846 | { | |
847 | $uvchr_disallow_flag | |
848 | = $::UNICODE_DISALLOW_SURROGATE; | |
849 | } | |
850 | elsif ($disallow_flag == $::UTF8_DISALLOW_NONCHAR) | |
851 | { | |
852 | $uvchr_disallow_flag | |
853 | = $::UNICODE_DISALLOW_NONCHAR; | |
854 | } | |
855 | elsif ($disallow_flag == $::UTF8_DISALLOW_SUPER) { | |
856 | $uvchr_disallow_flag | |
857 | = $::UNICODE_DISALLOW_SUPER; | |
858 | } | |
859 | elsif ($disallow_flag | |
860 | == $::UTF8_DISALLOW_ABOVE_31_BIT) | |
861 | { | |
862 | $uvchr_disallow_flag = | |
863 | $::UNICODE_DISALLOW_ABOVE_31_BIT; | |
864 | } | |
865 | else { | |
866 | fail(sprintf "Unexpected disallow flag: %x", | |
867 | $disallow_flag); | |
868 | next; | |
869 | } | |
870 | } | |
871 | ||
872 | $disallowed = $uvchr_disallow_flag; | |
873 | ||
874 | $this_name = "uvchr_to_utf8_flags() $testname: " | |
875 | . (($uvchr_disallow_flag) | |
876 | ? 'disallowed' | |
877 | : ($disallowed) | |
878 | ? 'ABOVE_31_BIT allowed' | |
879 | : 'allowed'); | |
880 | $this_name .= ", $eval_warn"; | |
881 | $this_name .= ", " . (($uvchr_warn_flag) | |
882 | ? 'with warning flag' | |
883 | : 'no warning flag'); | |
884 | ||
9cdc3054 | 885 | undef @warnings_gotten; |
6aa905cf KW |
886 | my $ret; |
887 | my $warn_flag = sprintf "0x%x", $uvchr_warn_flag; | |
888 | my $disallow_flag = sprintf "0x%x", | |
889 | $uvchr_disallow_flag; | |
890 | $call = sprintf(" Call was: $eval_warn; \$ret" | |
891 | . " = test_uvchr_to_utf8_flags(" | |
892 | . " 0x%x, $warn_flag|$disallow_flag)", | |
893 | $allowed_uv); | |
894 | $eval_text = "$eval_warn; \$ret =" | |
895 | . " test_uvchr_to_utf8_flags(" | |
896 | . "$allowed_uv, $warn_flag|" | |
897 | . "$disallow_flag)"; | |
898 | eval "$eval_text"; | |
899 | if (! ok ("$@ eq ''", "$this_name: eval succeeded")) | |
900 | { | |
901 | diag "\$!='$!'; eval'd=\"$eval_text\""; | |
902 | next; | |
903 | } | |
904 | if ($disallowed) { | |
905 | is($ret, undef, "$this_name: Returns undef") | |
906 | or diag $call; | |
907 | } | |
908 | else { | |
909 | is($ret, $bytes, "$this_name: Returns expected string") | |
910 | or diag $call; | |
911 | } | |
912 | if (! $do_warning | |
9cdc3054 | 913 | && ($trial_warning_category eq 'utf8' || $trial_warning_category eq $controlling_warning_category)) |
6aa905cf | 914 | { |
9cdc3054 | 915 | if (!is(scalar @warnings_gotten, 0, |
6aa905cf KW |
916 | "$this_name: No warnings generated")) |
917 | { | |
918 | diag $call; | |
9cdc3054 | 919 | output_warnings(@warnings_gotten); |
6aa905cf KW |
920 | } |
921 | } | |
922 | elsif ( $uvchr_warn_flag | |
9cdc3054 KW |
923 | && ( $trial_warning_category eq 'utf8' |
924 | || $trial_warning_category eq $controlling_warning_category)) | |
6aa905cf | 925 | { |
9cdc3054 | 926 | if (is(scalar @warnings_gotten, 1, |
6aa905cf KW |
927 | "$this_name: Got a single warning ")) |
928 | { | |
9cdc3054 | 929 | like($warnings_gotten[0], $message, |
6aa905cf KW |
930 | "$this_name: Got expected warning") |
931 | or diag $call; | |
932 | } | |
933 | else { | |
934 | diag $call; | |
9cdc3054 KW |
935 | output_warnings(@warnings_gotten) |
936 | if scalar @warnings_gotten; | |
6aa905cf KW |
937 | } |
938 | } | |
939 | } | |
940 | } | |
941 | } | |
942 | } | |
943 | } | |
944 | } | |
945 | } | |
946 | } | |
947 | ||
948 | done_testing; |