This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/t/utf8_warn_base.pl: Add a test
[perl5.git] / ext / XS-APItest / t / utf8_warn_base.pl
CommitLineData
6aa905cf
KW
1#!perl -w
2
3# This is a base file to be used by various .t's in its directory
3f055917
KW
4# It tests various malformed UTF-8 sequences and some code points that are
5# "problematic", and verifies that the correct warnings/flags etc are
6# generated when using them. For the code points, it also takes the UTF-8 and
7# perturbs it to be malformed in various ways, and tests that this gets
8# appropriately detected.
6aa905cf
KW
9
10use strict;
11use Test::More;
12
13BEGIN {
14 use_ok('XS::APItest');
15 require 'charset_tools.pl';
16 require './t/utf8_setup.pl';
17};
18
19$|=1;
20
21no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit
22 # machines, and that is tested elsewhere
6aa905cf 23use XS::APItest;
6aa905cf 24
9cdc3054 25my @warnings_gotten;
6aa905cf
KW
26
27use warnings 'utf8';
8d6f1506
KW
28local $SIG{__WARN__} = sub { my @copy = @_;
29 push @warnings_gotten, map { chomp; $_ } @copy;
30 };
6d736463 31
57ff5f59 32my $highest_non_extended_utf8_cp = (isASCII) ? 0x7FFFFFFF : 0x3FFFFFFF;
9e35eec9
KW
33my $native_lowest_continuation_chr = I8_to_native(chr $::lowest_continuation);
34
6d736463
KW
35sub requires_extended_utf8($) {
36
37 # Returns a boolean as to whether or not the code point parameter fits
38 # into 31 bits, subject to the convention that a negative code point
39 # stands for one that overflows the word size, so won't fit in 31 bits.
40
57ff5f59 41 return shift > $highest_non_extended_utf8_cp;
6d736463
KW
42}
43
3f055917
KW
44sub is_extended_utf8($) {
45
46 # Returns a boolean as to whether or not the input UTF-8 sequence uses
47 # Perl extended UTF-8.
48
49 my $byte = substr(shift, 0, 1);
50 return ord $byte >= 0xFE if isASCII;
51 return $byte == I8_to_native("\xFF");
52}
53
404a1403
KW
54sub overflow_discern_len($) {
55
8e0327af
KW
56 # Returns how many bytes are needed to tell if a non-overlong UTF-8
57 # sequence is for a code point that won't fit in the platform's word size.
58 # Only the length of the sequence representing a single code point is
59 # needed.
404a1403
KW
60
61 if (isASCII) {
62 return ($::is64bit) ? 3 : ((shift == $::max_bytes)
63 ? 1
64 : 2);
65 }
66
67 return ($::is64bit) ? 2 : 8;
68}
69
8e0327af
KW
70sub overlong_discern_len($) {
71
72 # Returns how many bytes are needed to tell if the input UTF-8 sequence
73 # for a code point is overlong
74
75 my $string = shift;
76 my $length = length $string;
77 my $byte = ord native_to_I8(substr($string, 0, 1));
78 if (isASCII) {
79 return ($length == $::max_bytes)
80 # This is constrained to 1 on 32-bit machines, as it
81 # overflows there
82 ? (($::is64bit) ? 7 : 1)
83 : (($length == 2) ? 1 : 2);
84 }
85
86 return ($length == $::max_bytes) ? 8 : (($length <= 3) ? 1 : 2);
87}
88
c7f9e4bb
KW
89my @tests;
90{
91 no warnings qw(portable overflow);
92 @tests = (
8f79178b
KW
93 # $testname,
94 # $bytes, UTF-8 string
95 # $allowed_uv, code point $bytes evaluates to; -1 if
96 # overflows
97 # $needed_to_discern_len optional, how long an initial substring do
98 # we need to tell that the string must be for
99 # a code point in the category it falls in,
100 # like being a surrogate; 0 indicates we need
101 # the whole string. Some categories have a
102 # default that is used if this is omitted.
3f055917
KW
103 [ "overlong malformation, lowest 2-byte",
104 (isASCII) ? "\xc0\x80" : I8_to_native("\xc0\xa0"),
105 0, # NUL
106 ],
107 [ "overlong malformation, highest 2-byte",
108 (isASCII) ? "\xc1\xbf" : I8_to_native("\xc4\xbf"),
109 (isASCII) ? 0x7F : utf8::unicode_to_native(0x9F),
110 ],
111 [ "overlong malformation, lowest 3-byte",
112 (isASCII) ? "\xe0\x80\x80" : I8_to_native("\xe0\xa0\xa0"),
113 0, # NUL
114 ],
115 [ "overlong malformation, highest 3-byte",
116 (isASCII) ? "\xe0\x9f\xbf" : I8_to_native("\xe0\xbf\xbf"),
117 (isASCII) ? 0x7FF : 0x3FF,
118 ],
8f79178b
KW
119 [ "lowest surrogate",
120 (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"),
121 0xD800,
122 ],
123 [ "a middle surrogate",
124 (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"),
125 0xD90D,
126 ],
127 [ "highest surrogate",
128 (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"),
129 0xDFFF,
130 ],
8f79178b
KW
131 [ "first of 32 consecutive non-character code points",
132 (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"),
133 0xFDD0,
134 ],
135 [ "a mid non-character code point of the 32 consecutive ones",
136 (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"),
137 0xFDE0,
138 ],
139 [ "final of 32 consecutive non-character code points",
140 (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"),
141 0xFDEF,
142 ],
143 [ "non-character code point U+FFFE",
144 (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"),
145 0xFFFE,
146 ],
147 [ "non-character code point U+FFFF",
148 (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"),
149 0xFFFF,
150 ],
3f055917
KW
151 [ "overlong malformation, lowest 4-byte",
152 (isASCII) ? "\xf0\x80\x80\x80" : I8_to_native("\xf0\xa0\xa0\xa0"),
153 0, # NUL
154 ],
155 [ "overlong malformation, highest 4-byte",
156 (isASCII) ? "\xf0\x8F\xbf\xbf" : I8_to_native("\xf0\xaf\xbf\xbf"),
157 (isASCII) ? 0xFFFF : 0x3FFF,
158 ],
8f79178b
KW
159 [ "non-character code point U+1FFFE",
160 (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"),
161 0x1FFFE,
162 ],
163 [ "non-character code point U+1FFFF",
164 (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"),
165 0x1FFFF,
166 ],
167 [ "non-character code point U+2FFFE",
168 (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"),
169 0x2FFFE,
170 ],
171 [ "non-character code point U+2FFFF",
172 (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"),
173 0x2FFFF,
174 ],
175 [ "non-character code point U+3FFFE",
176 (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"),
177 0x3FFFE,
178 ],
179 [ "non-character code point U+3FFFF",
180 (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"),
181 0x3FFFF,
182 ],
183 [ "non-character code point U+4FFFE",
184 (isASCII)
185 ? "\xf1\x8f\xbf\xbe"
186 : I8_to_native("\xf8\xa9\xbf\xbf\xbe"),
187 0x4FFFE,
188 ],
189 [ "non-character code point U+4FFFF",
190 (isASCII)
191 ? "\xf1\x8f\xbf\xbf"
192 : I8_to_native("\xf8\xa9\xbf\xbf\xbf"),
193 0x4FFFF,
194 ],
195 [ "non-character code point U+5FFFE",
196 (isASCII)
197 ? "\xf1\x9f\xbf\xbe"
198 : I8_to_native("\xf8\xab\xbf\xbf\xbe"),
199 0x5FFFE,
200 ],
201 [ "non-character code point U+5FFFF",
202 (isASCII)
203 ? "\xf1\x9f\xbf\xbf"
204 : I8_to_native("\xf8\xab\xbf\xbf\xbf"),
205 0x5FFFF,
206 ],
207 [ "non-character code point U+6FFFE",
208 (isASCII)
209 ? "\xf1\xaf\xbf\xbe"
210 : I8_to_native("\xf8\xad\xbf\xbf\xbe"),
211 0x6FFFE,
212 ],
213 [ "non-character code point U+6FFFF",
214 (isASCII)
215 ? "\xf1\xaf\xbf\xbf"
216 : I8_to_native("\xf8\xad\xbf\xbf\xbf"),
217 0x6FFFF,
218 ],
219 [ "non-character code point U+7FFFE",
220 (isASCII)
221 ? "\xf1\xbf\xbf\xbe"
222 : I8_to_native("\xf8\xaf\xbf\xbf\xbe"),
223 0x7FFFE,
224 ],
225 [ "non-character code point U+7FFFF",
226 (isASCII)
227 ? "\xf1\xbf\xbf\xbf"
228 : I8_to_native("\xf8\xaf\xbf\xbf\xbf"),
229 0x7FFFF,
230 ],
231 [ "non-character code point U+8FFFE",
232 (isASCII)
233 ? "\xf2\x8f\xbf\xbe"
234 : I8_to_native("\xf8\xb1\xbf\xbf\xbe"),
235 0x8FFFE,
236 ],
237 [ "non-character code point U+8FFFF",
238 (isASCII)
239 ? "\xf2\x8f\xbf\xbf"
240 : I8_to_native("\xf8\xb1\xbf\xbf\xbf"),
241 0x8FFFF,
242 ],
243 [ "non-character code point U+9FFFE",
244 (isASCII)
245 ? "\xf2\x9f\xbf\xbe"
246 : I8_to_native("\xf8\xb3\xbf\xbf\xbe"),
247 0x9FFFE,
248 ],
249 [ "non-character code point U+9FFFF",
250 (isASCII)
251 ? "\xf2\x9f\xbf\xbf"
252 : I8_to_native("\xf8\xb3\xbf\xbf\xbf"),
253 0x9FFFF,
254 ],
255 [ "non-character code point U+AFFFE",
256 (isASCII)
257 ? "\xf2\xaf\xbf\xbe"
258 : I8_to_native("\xf8\xb5\xbf\xbf\xbe"),
259 0xAFFFE,
260 ],
261 [ "non-character code point U+AFFFF",
262 (isASCII)
263 ? "\xf2\xaf\xbf\xbf"
264 : I8_to_native("\xf8\xb5\xbf\xbf\xbf"),
265 0xAFFFF,
266 ],
267 [ "non-character code point U+BFFFE",
268 (isASCII)
269 ? "\xf2\xbf\xbf\xbe"
270 : I8_to_native("\xf8\xb7\xbf\xbf\xbe"),
271 0xBFFFE,
272 ],
273 [ "non-character code point U+BFFFF",
274 (isASCII)
275 ? "\xf2\xbf\xbf\xbf"
276 : I8_to_native("\xf8\xb7\xbf\xbf\xbf"),
277 0xBFFFF,
278 ],
279 [ "non-character code point U+CFFFE",
280 (isASCII)
281 ? "\xf3\x8f\xbf\xbe"
282 : I8_to_native("\xf8\xb9\xbf\xbf\xbe"),
283 0xCFFFE,
284 ],
285 [ "non-character code point U+CFFFF",
286 (isASCII)
287 ? "\xf3\x8f\xbf\xbf"
288 : I8_to_native("\xf8\xb9\xbf\xbf\xbf"),
289 0xCFFFF,
290 ],
291 [ "non-character code point U+DFFFE",
292 (isASCII)
293 ? "\xf3\x9f\xbf\xbe"
294 : I8_to_native("\xf8\xbb\xbf\xbf\xbe"),
295 0xDFFFE,
296 ],
297 [ "non-character code point U+DFFFF",
298 (isASCII)
299 ? "\xf3\x9f\xbf\xbf"
300 : I8_to_native("\xf8\xbb\xbf\xbf\xbf"),
301 0xDFFFF,
302 ],
303 [ "non-character code point U+EFFFE",
304 (isASCII)
305 ? "\xf3\xaf\xbf\xbe"
306 : I8_to_native("\xf8\xbd\xbf\xbf\xbe"),
307 0xEFFFE,
308 ],
309 [ "non-character code point U+EFFFF",
310 (isASCII)
311 ? "\xf3\xaf\xbf\xbf"
312 : I8_to_native("\xf8\xbd\xbf\xbf\xbf"),
313 0xEFFFF,
314 ],
315 [ "non-character code point U+FFFFE",
316 (isASCII)
317 ? "\xf3\xbf\xbf\xbe"
318 : I8_to_native("\xf8\xbf\xbf\xbf\xbe"),
319 0xFFFFE,
320 ],
321 [ "non-character code point U+FFFFF",
322 (isASCII)
323 ? "\xf3\xbf\xbf\xbf"
324 : I8_to_native("\xf8\xbf\xbf\xbf\xbf"),
325 0xFFFFF,
326 ],
327 [ "non-character code point U+10FFFE",
328 (isASCII)
329 ? "\xf4\x8f\xbf\xbe"
330 : I8_to_native("\xf9\xa1\xbf\xbf\xbe"),
331 0x10FFFE,
332 ],
333 [ "non-character code point U+10FFFF",
334 (isASCII)
335 ? "\xf4\x8f\xbf\xbf"
336 : I8_to_native("\xf9\xa1\xbf\xbf\xbf"),
337 0x10FFFF,
338 ],
7c9f4ec6
KW
339 [ "first non_unicode",
340 (isASCII)
341 ? "\xf4\x90\x80\x80"
342 : I8_to_native("\xf9\xa2\xa0\xa0\xa0"),
343 0x110000,
344 2,
345 ],
346 [ "non_unicode whose first byte tells that",
347 (isASCII)
348 ? "\xf5\x80\x80\x80"
349 : I8_to_native("\xfa\xa0\xa0\xa0\xa0"),
350 (isASCII) ? 0x140000 : 0x200000,
351 1,
352 ],
3f055917
KW
353 [ "overlong malformation, lowest 5-byte",
354 (isASCII)
355 ? "\xf8\x80\x80\x80\x80"
356 : I8_to_native("\xf8\xa0\xa0\xa0\xa0"),
357 0, # NUL
358 ],
359 [ "overlong malformation, highest 5-byte",
360 (isASCII)
361 ? "\xf8\x87\xbf\xbf\xbf"
362 : I8_to_native("\xf8\xa7\xbf\xbf\xbf"),
363 (isASCII) ? 0x1FFFFF : 0x3FFFF,
364 ],
365 [ "overlong malformation, lowest 6-byte",
366 (isASCII)
367 ? "\xfc\x80\x80\x80\x80\x80"
368 : I8_to_native("\xfc\xa0\xa0\xa0\xa0\xa0"),
369 0, # NUL
370 ],
371 [ "overlong malformation, highest 6-byte",
372 (isASCII)
373 ? "\xfc\x83\xbf\xbf\xbf\xbf"
374 : I8_to_native("\xfc\xa3\xbf\xbf\xbf\xbf"),
375 (isASCII) ? 0x3FFFFFF : 0x3FFFFF,
376 ],
377 [ "overlong malformation, lowest 7-byte",
378 (isASCII)
379 ? "\xfe\x80\x80\x80\x80\x80\x80"
380 : I8_to_native("\xfe\xa0\xa0\xa0\xa0\xa0\xa0"),
381 0, # NUL
382 ],
383 [ "overlong malformation, highest 7-byte",
384 (isASCII)
385 ? "\xfe\x81\xbf\xbf\xbf\xbf\xbf"
386 : I8_to_native("\xfe\xa1\xbf\xbf\xbf\xbf\xbf"),
387 (isASCII) ? 0x7FFFFFFF : 0x3FFFFFF,
388 ],
48ceb60a 389 [ "lowest 32 bit code point",
8f79178b
KW
390 (isASCII)
391 ? "\xfe\x82\x80\x80\x80\x80\x80"
392 : I8_to_native(
393 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b 394 0x80000000,
8f79178b
KW
395 ],
396 [ "highest 32 bit code point",
397 (isASCII)
398 ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
399 : I8_to_native(
400 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
401 0xFFFFFFFF,
8f79178b 402 ],
f4da64d4
KW
403 [ "Lowest 33 bit code point",
404 (isASCII)
405 ? "\xfe\x84\x80\x80\x80\x80\x80"
406 : I8_to_native(
407 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"),
408 ($::is64bit) ? 0x100000000 : -1, # Overflows on 32-bit systems
f4da64d4 409 ],
8f79178b
KW
410 );
411
412 if (! $::is64bit) {
413 if (isASCII) {
8f79178b 414 push @tests,
cf8a8202
KW
415 [ "overlong malformation, but naively looks like overflow",
416 "\xff\x80\x80\x80\x80\x80\x80\x83\xbf\xbf\xbf\xbf\xbf",
417 0xFFFFFFFF,
418 ],
c81d4d83
KW
419 [ "overflow that old algorithm failed to detect",
420 "\xfe\x86\x80\x80\x80\x80\x80",
421 -1,
8f79178b
KW
422 ];
423 }
424 }
c81d4d83 425
3f055917
KW
426 push @tests,
427 [ "overlong malformation, lowest max-byte",
428 (isASCII)
429 ? "\xff\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"
430 : I8_to_native(
431 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
432 0, # NUL
433 ],
434 [ "overlong malformation, highest max-byte",
435 (isASCII) # 2**36-1 on ASCII; 2**30-1 on EBCDIC
436 ? "\xff\x80\x80\x80\x80\x80\x80\xbf\xbf\xbf\xbf\xbf\xbf"
437 : I8_to_native(
438 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xbf\xbf\xbf\xbf\xbf\xbf"),
439 (isASCII) ? (($::is64bit) ? 0xFFFFFFFFF : -1) : 0x3FFFFFFF,
440 ];
441
45d8ef8c
KW
442 if (isASCII) {
443 push @tests,
444 [ "Lowest code point requiring 13 bytes to represent", # 2**36
445 "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80",
446 ($::is64bit) ? 0x1000000000 : -1, # overflows on 32bit
447 ],
448 };
449
c81d4d83 450 if ($::is64bit) {
ba627a0b
KW
451 push @tests,
452 [ "highest 64 bit code point",
453 (isASCII)
454 ? "\xff\x80\x8f\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"
455 : I8_to_native(
456 "\xff\xaf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"),
457 0xFFFFFFFFFFFFFFFF,
458 (isASCII) ? 1 : 2,
459 ],
460 [ "first 65 bit code point",
461 (isASCII)
462 ? "\xff\x80\x9f\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"
463 : I8_to_native(
464 "\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
465 -1,
ba627a0b 466 ];
c81d4d83
KW
467 if (isASCII) {
468 push @tests,
469 [ "overflow that old algorithm failed to detect",
470 "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf",
471 -1,
c81d4d83
KW
472 ];
473 }
474 else {
8f79178b
KW
475 push @tests, # These could falsely show wrongly in a naive
476 # implementation
477 [ "requires at least 32 bits",
478 I8_to_native(
6aa905cf 479 "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b 480 0x800000000,
8f79178b
KW
481 ],
482 [ "requires at least 32 bits",
483 I8_to_native(
6aa905cf 484 "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b 485 0x10000000000,
8f79178b
KW
486 ],
487 [ "requires at least 32 bits",
488 I8_to_native(
6aa905cf 489 "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b 490 0x200000000000,
8f79178b
KW
491 ],
492 [ "requires at least 32 bits",
493 I8_to_native(
6aa905cf 494 "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b 495 0x4000000000000,
8f79178b
KW
496 ],
497 [ "requires at least 32 bits",
498 I8_to_native(
6aa905cf 499 "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b 500 0x80000000000000,
8f79178b
KW
501 ],
502 [ "requires at least 32 bits",
503 I8_to_native(
6aa905cf 504 "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b 505 0x1000000000000000,
8f79178b
KW
506 ];
507 }
6aa905cf 508 }
c7f9e4bb 509}
6aa905cf 510
5722c46d
KW
511sub flags_to_text($$)
512{
513 my ($flags, $flags_to_text_ref) = @_;
514
515 # Returns a string containing a mnemonic representation of the bits that
516 # are set in the $flags. These are assumed to be flag bits. The return
517 # looks like "FOO|BAR|BAZ". The second parameter is a reference to an
518 # array that gives the textual representation of all the possible flags.
519 # Element 0 is the text for the bit 0 flag; element 1 for bit 1; .... If
520 # no bits at all are set the string "0" is returned;
521
522 my @flag_text;
523 my $shift = 0;
524
525 return "0" if $flags == 0;
526
527 while ($flags) {
528 #diag sprintf "%x", $flags;
529 if ($flags & 1) {
530 push @flag_text, $flags_to_text_ref->[$shift];
531 }
532 $shift++;
533 $flags >>= 1;
534 }
535
536 return join "|", @flag_text;
537}
538
539# Possible flag returns from utf8n_to_uvchr_error(). These should have G_,
540# instead of A_, D_, but the prefixes will be used in a a later commit, so
541# minimize churn by having them here.
542my @utf8n_flags_to_text = ( qw(
543 A_EMPTY
544 A_CONTINUATION
545 A_NON_CONTINUATION
546 A_SHORT
547 A_LONG
548 A_LONG_AND_ITS_VALUE
549 PLACEHOLDER
550 A_OVERFLOW
551 D_SURROGATE
552 W_SURROGATE
553 D_NONCHAR
554 W_NONCHAR
555 D_SUPER
556 W_SUPER
d044b7a7
KW
557 D_PERL_EXTENDED
558 W_PERL_EXTENDED
5722c46d
KW
559 CHECK_ONLY
560 NO_CONFIDENCE_IN_CURLEN_
561 ) );
562
a8ee5133
KW
563sub utf8n_display_call($)
564{
565 # Converts an eval string that calls test_utf8n_to_uvchr into a more human
566 # readable form, and returns it. Doesn't work if the byte string contains
567 # an apostrophe. The return will look something like:
568 # test_utf8n_to_uvchr_error('$bytes', $length, $flags)
569 #diag $_[0];
570
571 $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x;
572 my $text1 = $1; # Everything before the byte string
573 my $bytes = $2;
574 my $text2 = $3; # Includes the length
575 my $flags = $4;
576
577 return $text1
578 . display_bytes($bytes)
579 . $text2
580 . flags_to_text($flags, \@utf8n_flags_to_text)
581 . ')';
582}
583
d884ea32
KW
584sub uvchr_display_call($)
585{
586 # Converts an eval string that calls test_uvchr_to_utf8 into a more human
587 # readable form, and returns it. The return will look something like:
588 # test_uvchr_to_utf8n_flags($uv, $flags)
589 #diag $_[0];
590
591 my @flags_to_text = ( qw(
592 W_SURROGATE
593 W_NONCHAR
594 W_SUPER
d044b7a7 595 W_PERL_EXTENDED
d884ea32
KW
596 D_SURROGATE
597 D_NONCHAR
598 D_SUPER
d044b7a7 599 D_PERL_EXTENDED
d884ea32
KW
600 ) );
601
602 $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
603 my $text = $1;
604 my $cp = sprintf "%X", $2;
605 my $flags = $3;
606
607 return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')';
608}
609
69485e19
KW
610sub do_warnings_test(@)
611{
612 my @expected_warnings = @_;
613
614 # Compares the input expected warnings array with @warnings_gotten,
615 # generating a pass for each found, removing it from @warnings_gotten.
616 # Any discrepancies generate test failures. Returns TRUE if no
617 # discrepcancies; otherwise FALSE.
618
619 my $succeeded = 1;
620
621 if (@expected_warnings == 0) {
622 if (! is(@warnings_gotten, 0, " Expected and got no warnings")) {
623 output_warnings(@warnings_gotten);
624 $succeeded = 0;
625 }
626 return $succeeded;
627 }
628
629 # Check that we got all the expected warnings,
630 # removing each one found
631 WARNING:
632 foreach my $expected (@expected_warnings) {
633 foreach (my $i = 0; $i < @warnings_gotten; $i++) {
634 if ($warnings_gotten[$i] =~ $expected) {
635 pass(" Expected and got warning: "
636 . " $warnings_gotten[$i]");
637 splice @warnings_gotten, $i, 1;
638 next WARNING;
639 }
640 }
641 fail(" Expected a warning that matches "
642 . $expected . " but didn't get it");
643 $succeeded = 0;
644 }
645
646 if (! is(@warnings_gotten, 0, " Got no unexpected warnings")) {
647 output_warnings(@warnings_gotten);
648 $succeeded = 0;
649 }
650
651 return $succeeded;
652}
653
6aa905cf
KW
654# This test is split into this number of files.
655my $num_test_files = $ENV{TEST_JOBS} || 1;
656$num_test_files = 10 if $num_test_files > 10;
657
658my $test_count = -1;
659foreach my $test (@tests) {
660 $test_count++;
661 next if $test_count % $num_test_files != $::TEST_CHUNK;
662
d42d7565 663 my ($testname, $bytes, $allowed_uv, $needed_to_discern_len) = @$test;
6aa905cf
KW
664
665 my $length = length $bytes;
3f055917 666 my $initially_overlong = $testname =~ /overlong/;
04f42bf6 667 my $will_overflow = $allowed_uv < 0;
6aa905cf 668
2c511c58 669 my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
c0e5ee4b 670 my $display_bytes = display_bytes($bytes);
2c511c58 671
d42d7565 672 my $controlling_warning_category;
af816908
KW
673 my $utf8n_flag_to_warn;
674 my $utf8n_flag_to_disallow;
675 my $uvchr_flag_to_warn;
676 my $uvchr_flag_to_disallow;
b7e1f4b2 677
e4e140b4
KW
678 # We want to test that the independent flags are actually independent.
679 # For example, that a surrogate doesn't trigger a non-character warning,
680 # and conversely, turning off an above-Unicode flag doesn't suppress a
681 # surrogate warning. Earlier versions of this file used nested loops to
682 # test all possible combinations. But that creates lots of tests, making
683 # this run too long. What is now done instead is to use the complement of
684 # the category we are testing to greatly reduce the combinatorial
685 # explosion. For example, if we have a surrogate and we aren't expecting
686 # a warning about it, we set all the flags for non-surrogates to raise
687 # warnings. If one shows up, it indicates the flags aren't independent.
688 my $utf8n_flag_to_warn_complement;
689 my $utf8n_flag_to_disallow_complement;
690 my $uvchr_flag_to_warn_complement;
691 my $uvchr_flag_to_disallow_complement;
692
bf422d6a
KW
693 # Many of the code points being tested are middling in that if code point
694 # edge cases work, these are very likely to as well. Because this test
695 # file takes a while to execute, we skip testing the edge effects of code
696 # points deemed middling, while testing their basics and continuing to
697 # fully test the non-middling code points.
698 my $skip_most_tests = 0;
699
601e92f1
KW
700 my $cp_message_qr; # Pattern that matches the message raised when
701 # that message contains the problematic code
702 # point. The message is the same (currently) both
703 # when going from/to utf8.
704 my $non_cp_trailing_text; # The suffix text when the message doesn't
705 # contain a code point. (This is a result of
706 # some sort of malformation that means we
707 # can't get an exact code poin
57ff5f59
KW
708 my $extended_cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
709 \Q requires a Perl extension, and so is not\E
710 \Q portable\E/x;
711 my $extended_non_cp_trailing_text
712 = "is a Perl extension, and so is not portable";
601e92f1 713
3f055917
KW
714 # What bytes should have been used to specify a code point that has been
715 # specified as an overlong.
716 my $correct_bytes_for_overlong;
717
c0e5ee4b
KW
718 # Is this test malformed from the beginning? If so, we know to generally
719 # expect that the tests will show it isn't valid.
720 my $initially_malformed = 0;
721
3f055917
KW
722 if ($initially_overlong) {
723 $non_cp_trailing_text = "if you see this, there is an error";
724 $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
725 if (! defined $needed_to_discern_len) {
726 $needed_to_discern_len = overlong_discern_len($bytes);
727 }
728 $initially_malformed = 1;
729 $correct_bytes_for_overlong = display_bytes_no_quotes(chr $allowed_uv);
730 $utf8n_flag_to_warn = 0;
731 $utf8n_flag_to_disallow = 0;
732
733 $utf8n_flag_to_warn_complement = $::UTF8_WARN_SURROGATE;
734 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE;
735 if (! $will_overflow && $allowed_uv <= 0x10FFFF) {
736 $utf8n_flag_to_warn_complement |= $::UTF8_WARN_SUPER;
737 $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_SUPER;
738 if (($allowed_uv & 0xFFFF) != 0xFFFF) {
739 $utf8n_flag_to_warn_complement |= $::UTF8_WARN_NONCHAR;
740 $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_NONCHAR;
741 }
742 }
743 if (! is_extended_utf8($bytes)) {
744 $utf8n_flag_to_warn_complement |= $::UTF8_WARN_PERL_EXTENDED;
745 $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_PERL_EXTENDED;
746 }
747 $controlling_warning_category = 'utf8';
748 }
749 elsif($will_overflow || $allowed_uv > 0x10FFFF) {
af816908 750
d044b7a7 751 # Set the SUPER flags; later, we test for PERL_EXTENDED as well.
af816908
KW
752 $utf8n_flag_to_warn = $::UTF8_WARN_SUPER;
753 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
754 $uvchr_flag_to_warn = $::UNICODE_WARN_SUPER;
755 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
756
d044b7a7
KW
757 # Below, we add the flags for non-perl_extended to the code points
758 # that don't fit that category. Special tests are done for this
759 # category in the inner loop.
e4e140b4
KW
760 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
761 |$::UTF8_WARN_SURROGATE;
762 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
763 |$::UTF8_DISALLOW_SURROGATE;
764 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
765 |$::UNICODE_WARN_SURROGATE;
766 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
767 |$::UNICODE_DISALLOW_SURROGATE;
768 $controlling_warning_category = 'non_unicode';
769
8f79178b 770 if ($will_overflow) { # This is realy a malformation
69485e19 771 $non_cp_trailing_text = "if you see this, there is an error";
601e92f1 772 $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
c0e5ee4b 773 $initially_malformed = 1;
404a1403
KW
774 if (! defined $needed_to_discern_len) {
775 $needed_to_discern_len = overflow_discern_len($length);
776 }
af816908 777 }
6d736463 778 elsif (requires_extended_utf8($allowed_uv)) {
57ff5f59
KW
779 $cp_message_qr = $extended_cp_message_qr;
780 $non_cp_trailing_text = $extended_non_cp_trailing_text;
781 $needed_to_discern_len = 1 unless defined $needed_to_discern_len;
af816908 782 }
601e92f1
KW
783 else {
784 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
785 \Q may not be portable\E/x;
786 $non_cp_trailing_text = "is for a non-Unicode code point, may not"
787 . " be portable";
d044b7a7
KW
788 $utf8n_flag_to_warn_complement |= $::UTF8_WARN_PERL_EXTENDED;
789 $utf8n_flag_to_disallow_complement
790 |= $::UTF8_DISALLOW_PERL_EXTENDED;
791 $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_PERL_EXTENDED;
e4e140b4 792 $uvchr_flag_to_disallow_complement
d044b7a7 793 |= $::UNICODE_DISALLOW_PERL_EXTENDED;
af816908 794 }
3022ad00
KW
795 }
796 elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
601e92f1
KW
797 $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/;
798 $non_cp_trailing_text = "is for a surrogate";
67e45424 799 $needed_to_discern_len = 2 unless defined $needed_to_discern_len;
bf422d6a 800 $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF;
af816908
KW
801
802 $utf8n_flag_to_warn = $::UTF8_WARN_SURROGATE;
803 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE;
804 $uvchr_flag_to_warn = $::UNICODE_WARN_SURROGATE;
805 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;;
e4e140b4
KW
806
807 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
808 |$::UTF8_WARN_SUPER
d044b7a7 809 |$::UTF8_WARN_PERL_EXTENDED;
e4e140b4
KW
810 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
811 |$::UTF8_DISALLOW_SUPER
d044b7a7 812 |$::UTF8_DISALLOW_PERL_EXTENDED;
e4e140b4
KW
813 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
814 |$::UNICODE_WARN_SUPER
d044b7a7 815 |$::UNICODE_WARN_PERL_EXTENDED;
e4e140b4
KW
816 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
817 |$::UNICODE_DISALLOW_SUPER
d044b7a7 818 |$::UNICODE_DISALLOW_PERL_EXTENDED;
d42d7565 819 $controlling_warning_category = 'surrogate';
3022ad00
KW
820 }
821 elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
822 || ($allowed_uv & 0xFFFE) == 0xFFFE)
823 {
601e92f1
KW
824 $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E
825 \Q is not recommended for open interchange\E/x;
826 $non_cp_trailing_text = "if you see this, there is an error";
67e45424 827 $needed_to_discern_len = $length unless defined $needed_to_discern_len;
bf422d6a
KW
828 if ( ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF)
829 || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE))
830 {
831 $skip_most_tests = 1;
832 }
af816908
KW
833
834 $utf8n_flag_to_warn = $::UTF8_WARN_NONCHAR;
835 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR;
836 $uvchr_flag_to_warn = $::UNICODE_WARN_NONCHAR;
837 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;;
e4e140b4
KW
838
839 $utf8n_flag_to_warn_complement = $::UTF8_WARN_SURROGATE
840 |$::UTF8_WARN_SUPER
d044b7a7 841 |$::UTF8_WARN_PERL_EXTENDED;
e4e140b4
KW
842 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
843 |$::UTF8_DISALLOW_SUPER
d044b7a7 844 |$::UTF8_DISALLOW_PERL_EXTENDED;
e4e140b4
KW
845 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_SURROGATE
846 |$::UNICODE_WARN_SUPER
d044b7a7 847 |$::UNICODE_WARN_PERL_EXTENDED;
e4e140b4
KW
848 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
849 |$::UNICODE_DISALLOW_SUPER
d044b7a7 850 |$::UNICODE_DISALLOW_PERL_EXTENDED;
e4e140b4 851
d42d7565 852 $controlling_warning_category = 'nonchar';
3022ad00
KW
853 }
854 else {
855 die "Can't figure out what type of warning to test for $testname"
856 }
857
67e45424
KW
858 die 'Didn\'t set $needed_to_discern_len for ' . $testname
859 unless defined $needed_to_discern_len;
c0e5ee4b 860
8139d291
KW
861 # We try various combinations of malformations that can occur
862 foreach my $short (0, 1) {
863 next if $skip_most_tests && $short;
864 foreach my $unexpected_noncont (0, 1) {
865 next if $skip_most_tests && $unexpected_noncont;
866 foreach my $overlong (0, 1) {
867 next if $overlong && $skip_most_tests;
3f055917 868 next if $initially_overlong && ! $overlong;
8f79178b 869
8139d291
KW
870 # If we're creating an overlong, it can't be longer than the
871 # maximum length, so skip if we're already at that length.
3f055917
KW
872 next if (! $initially_overlong && $overlong)
873 && $length >= $::max_bytes;
8139d291 874
57ff5f59
KW
875 my $this_cp_message_qr = $cp_message_qr;
876 my $this_non_cp_trailing_text = $non_cp_trailing_text;
877
717dd9f9
KW
878 foreach my $malformed_allow_type (0..2) {
879 # 0 don't allow this malformation; ignored if no malformation
880 # 1 allow, with REPLACEMENT CHARACTER returned
881 # 2 allow, with intended code point returned. All malformations
882 # other than overlong can't determine the intended code point,
883 # so this isn't valid for them.
884 next if $malformed_allow_type == 2
885 && ($will_overflow || $short || $unexpected_noncont);
886 next if $skip_most_tests && $malformed_allow_type;
717dd9f9 887
4816e15f
KW
888 # Here we are in the innermost loop for malformations. So we
889 # know which ones are in effect. Can now change the input to be
890 # appropriately malformed. We also can set up certain other
891 # things now, like whether we expect a return flag from this
892 # malformation, and which flag.
893
894 my $this_bytes = $bytes;
895 my $this_length = $length;
896 my $this_expected_len = $length;
897 my $this_needed_to_discern_len = $needed_to_discern_len;
898
899 my @malformation_names;
900 my @expected_malformation_warnings;
901 my @expected_malformation_return_flags;
902
717dd9f9
KW
903 # Contains the flags for any allowed malformations. Currently no
904 # combinations of on/off are tested for. It's either all are
905 # allowed, or none are.
906 my $allow_flags = 0;
57ff5f59
KW
907 my $overlong_is_in_perl_extended_utf8 = 0;
908 my $dont_use_overlong_cp = 0;
717dd9f9 909
4816e15f 910 if ($overlong) {
3f055917 911 if (! $initially_overlong) {
9e35eec9 912 my $new_expected_len;
4816e15f
KW
913
914 # To force this malformation, we convert the original start
915 # byte into a continuation byte with the same data bits as
916 # originally. ...
9e35eec9
KW
917 my $start_byte = substr($this_bytes, 0, 1);
918 my $converted_to_continuation_byte
919 = start_byte_to_cont($start_byte);
4816e15f
KW
920
921 # ... Then we prepend it with a known overlong sequence. This
922 # should evaluate to the exact same code point as the
9e35eec9
KW
923 # original. We try to avoid an overlong using Perl extended
924 # UTF-8. The code points are the highest representable as
925 # overlongs on the respective platform without using extended
926 # UTF-8.
927 if (native_to_I8($start_byte) lt "\xFC") {
928 $start_byte = I8_to_native("\xFC");
929 $new_expected_len = 6;
930 }
931 elsif (! isASCII && native_to_I8($start_byte) lt "\xFE") {
932
933 # FE is not extended UTF-8 on EBCDIC
934 $start_byte = I8_to_native("\xFE");
935 $new_expected_len = 7;
936 }
937 else { # Must use extended UTF-8. On ASCII platforms, we
938 # could express some overlongs here starting with
939 # \xFE, but there's no real reason to do so.
57ff5f59 940 $overlong_is_in_perl_extended_utf8 = 1;
9e35eec9
KW
941 $start_byte = I8_to_native("\xFF");
942 $new_expected_len = $::max_bytes;
57ff5f59
KW
943 $this_cp_message_qr = $extended_cp_message_qr;
944
945 # The warning that gets raised doesn't include the code
946 # point in the message if the code point can be expressed
947 # without using extended UTF-8, but the particular
948 # overlong sequence used is in extended UTF-8. To do
949 # otherwise would be confusing to the user, as it would
950 # claim the code point requires extended, when it doesn't.
951 $dont_use_overlong_cp = 1
952 unless requires_extended_utf8($allowed_uv);
953 $this_non_cp_trailing_text = $extended_non_cp_trailing_text;
9e35eec9
KW
954 }
955
956 # Splice in the revise continuation byte, preceded by the
957 # start byte and the proper number of the lowest continuation
958 # bytes.
959 $this_bytes = $start_byte
960 . ($native_lowest_continuation_chr
961 x ( $new_expected_len - 1 - length($this_bytes)))
962 . $converted_to_continuation_byte
963 . substr($this_bytes, 1);
4816e15f 964 $this_length = length($this_bytes);
9e35eec9
KW
965 $this_needed_to_discern_len = $new_expected_len
966 - ( $this_expected_len
4816e15f 967 - $this_needed_to_discern_len);
9e35eec9 968 $this_expected_len = $new_expected_len;
3f055917 969 }
4816e15f
KW
970 }
971
972 if ($short) {
4816e15f
KW
973
974 # To force this malformation, just tell the test to not look
975 # as far as it should into the input.
976 $this_length--;
977 $this_expected_len--;
717dd9f9
KW
978
979 $allow_flags |= $::UTF8_ALLOW_SHORT if $malformed_allow_type;
4816e15f
KW
980 }
981
982 if ($unexpected_noncont) {
4816e15f
KW
983
984 # To force this malformation, change the final continuation
985 # byte into a non continuation.
986 my $pos = ($short) ? -2 : -1;
987 substr($this_bytes, $pos, 1) = '?';
988 $this_expected_len--;
4816e15f
KW
989 }
990
b39a3694
KW
991 # The whole point of a test that is malformed from the beginning
992 # is to test for that malformation. If we've modified things so
993 # much that we don't have enough information to detect that
994 # malformation, there's no point in testing.
995 next if $initially_malformed
996 && $this_expected_len < $this_needed_to_discern_len;
997
4816e15f
KW
998 # Here, we've transformed the input with all of the desired
999 # non-overflow malformations. We are now in a position to
1000 # construct any potential warnings for those malformations. But
1001 # it's a pain to get the detailed messages exactly right, so for
1002 # now XXX, only do so for those that return an explicit code
1003 # point.
1004
1005 if ($overlong) {
153bcbd6
KW
1006 push @malformation_names, 'overlong';
1007 push @expected_malformation_return_flags, $::UTF8_GOT_LONG;
4816e15f
KW
1008
1009 # If one of the other malformation types is also in effect, we
1010 # don't know what the intended code point was.
1011 if ($short || $unexpected_noncont || $will_overflow) {
1012 push @expected_malformation_warnings, qr/overlong/;
1013 }
1014 else {
1015 my $wrong_bytes = display_bytes_no_quotes(
1016 substr($this_bytes, 0, $this_length));
3f055917
KW
1017 if (! defined $correct_bytes_for_overlong) {
1018 $correct_bytes_for_overlong
1019 = display_bytes_no_quotes($bytes);
1020 }
4816e15f
KW
1021 my $prefix = ($allowed_uv > 0x10FFFF) ? "0x" : "U+";
1022 push @expected_malformation_warnings,
1023 qr/\QMalformed UTF-8 character: $wrong_bytes\E
1024 \Q (overlong; instead use\E
3f055917 1025 \Q $correct_bytes_for_overlong to\E
4816e15f
KW
1026 \Q represent $prefix$uv_string)/x;
1027 }
153bcbd6
KW
1028
1029 if ($malformed_allow_type == 2) {
1030 $allow_flags |= $::UTF8_ALLOW_LONG_AND_ITS_VALUE;
1031 }
1032 elsif ($malformed_allow_type) {
1033 $allow_flags |= $::UTF8_ALLOW_LONG;
1034 }
4816e15f
KW
1035 }
1036 if ($short) {
153bcbd6
KW
1037 push @malformation_names, 'short';
1038 push @expected_malformation_return_flags, $::UTF8_GOT_SHORT;
4816e15f
KW
1039 push @expected_malformation_warnings, qr/too short/;
1040 }
1041 if ($unexpected_noncont) {
153bcbd6
KW
1042 push @malformation_names, 'unexpected non-continuation';
1043 push @expected_malformation_return_flags,
1044 $::UTF8_GOT_NON_CONTINUATION;
1045 $allow_flags |= $::UTF8_ALLOW_NON_CONTINUATION
1046 if $malformed_allow_type;
4816e15f
KW
1047 push @expected_malformation_warnings,
1048 qr/unexpected non-continuation byte/;
1049 }
1050
1051 # The overflow malformation is done differently than other
1052 # malformations. It comes from manually typed tests in the test
1053 # array. We now make it be treated like one of the other
1054 # malformations. But some has to be deferred until the inner loop
1055 my $overflow_msg_pattern;
1056 if ($will_overflow) {
1057 push @malformation_names, 'overflow';
1058
1059 $overflow_msg_pattern = display_bytes_no_quotes(
1060 substr($this_bytes, 0, $this_expected_len));
1061 $overflow_msg_pattern = qr/\QMalformed UTF-8 character:\E
1062 \Q $overflow_msg_pattern\E
1063 \Q (overflows)\E/x;
1064 push @expected_malformation_return_flags, $::UTF8_GOT_OVERFLOW;
717dd9f9 1065 $allow_flags |= $::UTF8_ALLOW_OVERFLOW if $malformed_allow_type;
4816e15f
KW
1066 }
1067
1068 # And we can create the malformation-related text for the the test
1069 # names we eventually will generate.
1070 my $malformations_name = "";
1071 if (@malformation_names) {
717dd9f9
KW
1072 $malformations_name .= "dis" unless $malformed_allow_type;
1073 $malformations_name .= "allowed ";
4816e15f
KW
1074 $malformations_name .= "malformation";
1075 $malformations_name .= "s" if @malformation_names > 1;
1076 $malformations_name .= ": ";
1077 $malformations_name .= join "/", @malformation_names;
1078 $malformations_name = " ($malformations_name)";
1079 }
1080
1081 # Done setting up the malformation related stuff
1082
8e0e76af
KW
1083 { # First test the isFOO calls
1084 use warnings; # XXX no warnings 'deprecated'; # Make sure these don't raise warnings
1085 undef @warnings_gotten;
1086
8e0327af
KW
1087 my $ret = test_isUTF8_CHAR($this_bytes, $this_length);
1088 my $ret_flags
1089 = test_isUTF8_CHAR_flags($this_bytes, $this_length, 0);
1090 if ($malformations_name) {
1091 is($ret, 0, "For $testname$malformations_name: isUTF8_CHAR() returns 0");
8e0e76af
KW
1092 is($ret_flags, 0, " And isUTF8_CHAR_flags() returns 0");
1093 }
1094 else {
8e0327af
KW
1095 is($ret, $this_length, "For $testname: isUTF8_CHAR() returns"
1096 . " expected length: $this_length");
1097 is($ret_flags, $this_length,
8e0e76af 1098 " And isUTF8_CHAR_flags(...,0) returns expected"
8e0327af 1099 . " length: $this_length");
8e0e76af
KW
1100 }
1101 is(scalar @warnings_gotten, 0,
1102 " And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags"
1103 . " generated any warnings")
1104 or output_warnings(@warnings_gotten);
1105
1106 undef @warnings_gotten;
8e0327af
KW
1107 $ret = test_isSTRICT_UTF8_CHAR($this_bytes, $this_length);
1108 if ($malformations_name) {
8e0e76af
KW
1109 is($ret, 0, " And isSTRICT_UTF8_CHAR() returns 0");
1110 }
1111 else {
1112 my $expected_ret
1113 = ( $testname =~ /surrogate|non-character/
1114 || $allowed_uv > 0x10FFFF)
1115 ? 0
8e0327af 1116 : $this_length;
8e0e76af
KW
1117 is($ret, $expected_ret,
1118 " And isSTRICT_UTF8_CHAR() returns expected"
1119 . " length: $expected_ret");
8e0327af 1120 $ret = test_isUTF8_CHAR_flags($this_bytes, $this_length,
8e0e76af
KW
1121 $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE);
1122 is($ret, $expected_ret,
1123 " And isUTF8_CHAR_flags('"
1124 . "DISALLOW_ILLEGAL_INTERCHANGE') acts like"
1125 . " isSTRICT_UTF8_CHAR");
1126 }
1127 is(scalar @warnings_gotten, 0,
1128 " And neither isSTRICT_UTF8_CHAR() nor"
1129 . " isUTF8_CHAR_flags generated any warnings")
1130 or output_warnings(@warnings_gotten);
1131
1132 undef @warnings_gotten;
8e0327af
KW
1133 $ret = test_isC9_STRICT_UTF8_CHAR($this_bytes, $this_length);
1134 if ($malformations_name) {
8e0e76af
KW
1135 is($ret, 0, " And isC9_STRICT_UTF8_CHAR() returns 0");
1136 }
1137 else {
1138 my $expected_ret = ( $testname =~ /surrogate/
1139 || $allowed_uv > 0x10FFFF)
1140 ? 0
8e0327af 1141 : $this_expected_len;
8e0e76af
KW
1142 is($ret, $expected_ret, " And isC9_STRICT_UTF8_CHAR()"
1143 . " returns expected length:"
1144 . " $expected_ret");
8e0327af 1145 $ret = test_isUTF8_CHAR_flags($this_bytes, $this_length,
8e0e76af
KW
1146 $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
1147 is($ret, $expected_ret,
1148 " And isUTF8_CHAR_flags('"
1149 . "DISALLOW_ILLEGAL_C9_INTERCHANGE') acts like"
1150 . " isC9_STRICT_UTF8_CHAR");
1151 }
1152 is(scalar @warnings_gotten, 0,
1153 " And neither isC9_STRICT_UTF8_CHAR() nor"
1154 . " isUTF8_CHAR_flags generated any warnings")
1155 or output_warnings(@warnings_gotten);
1156
1157 foreach my $disallow_type (0..2) {
1158 # 0 is don't disallow this type of code point
1159 # 1 is do disallow
1160 # 2 is do disallow, but only code points requiring
1161 # perl-extended-UTF8
1162
1163 my $disallow_flags;
1164 my $expected_ret;
1165
8e0327af 1166 if ($malformations_name) {
8e0e76af
KW
1167
1168 # Malformations are by default disallowed, so testing
1169 # with $disallow_type equal to 0 is sufficicient.
1170 next if $disallow_type;
1171
1172 $disallow_flags = 0;
1173 $expected_ret = 0;
1174 }
1175 elsif ($disallow_type == 1) {
1176 $disallow_flags = $utf8n_flag_to_disallow;
1177 $expected_ret = 0;
1178 }
1179 elsif ($disallow_type == 2) {
1180 next if ! requires_extended_utf8($allowed_uv);
1181 $disallow_flags = $::UTF8_DISALLOW_PERL_EXTENDED;
1182 $expected_ret = 0;
1183 }
1184 else { # type is 0
1185 $disallow_flags = $utf8n_flag_to_disallow_complement;
8e0327af 1186 $expected_ret = $this_length;
8e0e76af
KW
1187 }
1188
8e0327af 1189 $ret = test_isUTF8_CHAR_flags($this_bytes, $this_length,
8e0e76af
KW
1190 $disallow_flags);
1191 is($ret, $expected_ret,
1192 " And isUTF8_CHAR_flags($display_bytes,"
1193 . " $disallow_flags) returns $expected_ret")
1194 or diag "The flags mean "
1195 . flags_to_text($disallow_flags,
1196 \@utf8n_flags_to_text);
1197 is(scalar @warnings_gotten, 0,
1198 " And isUTF8_CHAR_flags(...) generated"
1199 . " no warnings")
1200 or output_warnings(@warnings_gotten);
1201
1202 # Test partial character handling, for each byte not a
1203 # full character
1204 my $did_test_partial = 0;
8e0327af 1205 for (my $j = 1; $j < $this_length - 1; $j++) {
8e0e76af 1206 $did_test_partial = 1;
8e0327af 1207 my $partial = substr($this_bytes, 0, $j);
8e0e76af
KW
1208 my $ret_should_be;
1209 my $comment;
8e0327af 1210 if ($disallow_type || $malformations_name) {
8e0e76af
KW
1211 $ret_should_be = 0;
1212 $comment = "disallowed";
8e0327af
KW
1213
1214 # The number of bytes required to tell if a
1215 # sequence has something wrong is the smallest of
1216 # all the things wrong with it. We start with the
1217 # number for this type of code point, if that is
1218 # disallowed; or the whole length if not. The
1219 # latter is what a couple of the malformations
1220 # require.
1221 my $needed_to_tell = ($disallow_type)
1222 ? $this_needed_to_discern_len
1223 : $this_expected_len;
1224
1225 # Then we see if the malformations that are
1226 # detectable early in the string are present.
1227 if ($overlong) {
1228 my $dl = overlong_discern_len($this_bytes);
1229 $needed_to_tell = $dl if $dl < $needed_to_tell;
1230 }
1231 if ($will_overflow) {
1232 my $dl = overflow_discern_len($length);
1233 $needed_to_tell = $dl if $dl < $needed_to_tell;
1234 }
1235
1236 if ($j < $needed_to_tell) {
8e0e76af 1237 $ret_should_be = 1;
8e0327af 1238 $comment .= ", but need $needed_to_tell"
8e0e76af
KW
1239 . " bytes to discern:";
1240 }
1241 }
1242 else {
1243 $ret_should_be = 1;
1244 $comment = "allowed";
1245 }
1246
1247 undef @warnings_gotten;
1248
1249 $ret = test_is_utf8_valid_partial_char_flags($partial,
1250 $j, $disallow_flags);
1251 is($ret, $ret_should_be,
1252 " And is_utf8_valid_partial_char_flags("
1253 . display_bytes($partial)
1254 . ", $disallow_flags), $comment: returns"
1255 . " $ret_should_be")
1256 or diag "The flags mean "
1257 . flags_to_text($disallow_flags, \@utf8n_flags_to_text);
1258 }
1259
1260 if ($did_test_partial) {
1261 is(scalar @warnings_gotten, 0,
1262 " And is_utf8_valid_partial_char_flags()"
1263 . " generated no warnings for any of the lengths")
1264 or output_warnings(@warnings_gotten);
1265 }
1266 }
1267 }
1268
1269 # Now test the to/from UTF-8 calls. There are several orthogonal
1270 # variables involved. We test most possible combinations
1271
8139d291 1272 foreach my $do_disallow (0, 1) {
3f055917
KW
1273 if ($do_disallow) {
1274 next if $initially_overlong;
1275 }
1276 else {
1277 next if $skip_most_tests;
1278 }
6aa905cf 1279
69485e19
KW
1280 # We classify the warnings into certain "interesting" types,
1281 # described later
1282 foreach my $warning_type (0..4) {
1283 next if $skip_most_tests && $warning_type != 1;
1284 foreach my $use_warn_flag (0, 1) {
3f055917
KW
1285 if ($use_warn_flag) {
1286 next if $initially_overlong;
1287 }
1288 else {
1289 next if $skip_most_tests;
1290 }
69485e19 1291
8f79178b
KW
1292 # Finally, here is the inner loop
1293
69485e19
KW
1294 my $this_utf8n_flag_to_warn = $utf8n_flag_to_warn;
1295 my $this_utf8n_flag_to_disallow = $utf8n_flag_to_disallow;
1296 my $this_uvchr_flag_to_warn = $uvchr_flag_to_warn;
1297 my $this_uvchr_flag_to_disallow = $uvchr_flag_to_disallow;
1298
1299 my $eval_warn;
1300 my $expect_regular_warnings;
1301 my $expect_warnings_for_malformed;
1302 my $expect_warnings_for_overflow;
1303
1304 if ($warning_type == 0) {
1305 $eval_warn = "use warnings; no warnings 'deprecated'";
1306 $expect_regular_warnings = $use_warn_flag;
717dd9f9
KW
1307
1308 # We ordinarily expect overflow warnings here. But it
1309 # is somewhat more complicated, and the final
1310 # determination is deferred to one place in the filw
1311 # where we handle overflow.
69485e19 1312 $expect_warnings_for_overflow = 1;
717dd9f9
KW
1313
1314 # We would ordinarily expect malformed warnings in
1315 # this case, but not if malformations are allowed.
1316 $expect_warnings_for_malformed
1317 = $malformed_allow_type == 0;
6aa905cf 1318 }
69485e19
KW
1319 elsif ($warning_type == 1) {
1320 $eval_warn = "no warnings";
1321 $expect_regular_warnings = 0;
1322 $expect_warnings_for_overflow = 0;
1323 $expect_warnings_for_malformed = 0;
1324 }
1325 elsif ($warning_type == 2) {
1326 $eval_warn = "no warnings; use warnings 'utf8'";
1327 $expect_regular_warnings = $use_warn_flag;
1328 $expect_warnings_for_overflow = 1;
717dd9f9
KW
1329 $expect_warnings_for_malformed
1330 = $malformed_allow_type == 0;
69485e19
KW
1331 }
1332 elsif ($warning_type == 3) {
1333 $eval_warn = "no warnings; use warnings"
e4e140b4
KW
1334 . " '$controlling_warning_category'";
1335 $expect_regular_warnings = $use_warn_flag;
69485e19 1336 $expect_warnings_for_overflow
e4e140b4 1337 = $controlling_warning_category eq 'non_unicode';
69485e19
KW
1338 $expect_warnings_for_malformed = 0;
1339 }
1340 elsif ($warning_type == 4) { # Like type 3, but uses the
d044b7a7 1341 # PERL_EXTENDED flags
69485e19 1342 # The complement flags were set up so that the
d044b7a7 1343 # PERL_EXTENDED flags have been tested that they don't
69485e19
KW
1344 # trigger wrongly for too small code points. And the
1345 # flags have been set up so that those small code
1346 # points are tested for being above Unicode. What's
1347 # left to test is that the large code points do
d044b7a7 1348 # trigger the PERL_EXTENDED flags.
6d736463 1349 next if ! requires_extended_utf8($allowed_uv);
e4e140b4 1350 next if $controlling_warning_category ne 'non_unicode';
69485e19
KW
1351 $eval_warn = "no warnings; use warnings 'non_unicode'";
1352 $expect_regular_warnings = 1;
1353 $expect_warnings_for_overflow = 1;
1354 $expect_warnings_for_malformed = 0;
d044b7a7 1355 $this_utf8n_flag_to_warn = $::UTF8_WARN_PERL_EXTENDED;
69485e19 1356 $this_utf8n_flag_to_disallow
d044b7a7
KW
1357 = $::UTF8_DISALLOW_PERL_EXTENDED;
1358 $this_uvchr_flag_to_warn
1359 = $::UNICODE_WARN_PERL_EXTENDED;
69485e19 1360 $this_uvchr_flag_to_disallow
d044b7a7 1361 = $::UNICODE_DISALLOW_PERL_EXTENDED;
601e92f1
KW
1362 }
1363 else {
69485e19 1364 die "Unexpected warning type '$warning_type'";
601e92f1
KW
1365 }
1366
69485e19
KW
1367 # We only need to test the case where all warnings are
1368 # enabled (type 0) to see if turning off the warning flag
1369 # causes things to not be output. If those pass, then
1370 # turning on some sub-category of warnings, or turning off
1371 # warnings altogether are extremely likely to not output
1372 # warnings either, given how the warnings subsystem is
1373 # supposed to work, and this file assumes it does work.
1374 next if $warning_type != 0 && ! $use_warn_flag;
1375
1376 # The convention is that the 'got' flag is the same value
1377 # as the disallow one. If this were violated, the tests
1378 # here should start failing.
1379 my $return_flag = $this_utf8n_flag_to_disallow;
1380
e4e140b4
KW
1381 # If we aren't expecting warnings/disallow for this, turn
1382 # on all the other flags. That makes sure that they all
1383 # are independent of this flag, and so we don't need to
1384 # test them individually.
57ff5f59
KW
1385 my $this_warning_flags
1386 = ($use_warn_flag)
1387 ? $this_utf8n_flag_to_warn
1388 : ($overlong_is_in_perl_extended_utf8
1389 ? ($utf8n_flag_to_warn_complement
1390 & ~$::UTF8_WARN_PERL_EXTENDED)
1391 : $utf8n_flag_to_warn_complement);
1392 my $this_disallow_flags
1393 = ($do_disallow)
1394 ? $this_utf8n_flag_to_disallow
1395 : ($overlong_is_in_perl_extended_utf8
1396 ? ($utf8n_flag_to_disallow_complement
1397 & ~$::UTF8_DISALLOW_PERL_EXTENDED)
1398 : $utf8n_flag_to_disallow_complement);
6aa905cf 1399 my $expected_uv = $allowed_uv;
717dd9f9 1400 my $this_uv_string = $uv_string;
6aa905cf 1401
4816e15f
KW
1402 my @expected_return_flags
1403 = @expected_malformation_return_flags;
69485e19 1404 my @expected_warnings;
4816e15f 1405 push @expected_warnings, @expected_malformation_warnings
69485e19
KW
1406 if $expect_warnings_for_malformed;
1407
69485e19
KW
1408 # The overflow malformation is done differently than other
1409 # malformations. It comes from manually typed tests in
1410 # the test array, but it also is above Unicode and uses
1411 # Perl extended UTF-8, so affects some of the flags being
1412 # tested. We now make it be treated like one of the other
1413 # generated malformations.
1414 if ($will_overflow) {
1415
1416 # An overflow is (way) above Unicode, and overrides
1417 # everything else.
1418 $expect_regular_warnings = 0;
1419
717dd9f9
KW
1420 # Earlier, we tentatively calculated whether this
1421 # should emit a message or not. It's tentative
1422 # because, even if we ordinarily would output it, we
1423 # don't if malformations are allowed -- except an
d044b7a7 1424 # overflow is also a SUPER and PERL_EXTENDED, and if
717dd9f9
KW
1425 # warnings for those are enabled, the overflow
1426 # warning does get raised.
1427 if ( $expect_warnings_for_overflow
1428 && ( $malformed_allow_type == 0
1429 || ( $this_warning_flags
1430 & ($::UTF8_WARN_SUPER
d044b7a7 1431 |$::UTF8_WARN_PERL_EXTENDED))))
717dd9f9 1432 {
4816e15f 1433 push @expected_warnings, $overflow_msg_pattern;
6aa905cf 1434 }
69485e19
KW
1435 }
1436
1437 # It may be that the malformations have shortened the
1438 # amount of input we look at so much that we can't tell
1439 # what the category the code point was in. Otherwise, set
1440 # up the expected return flags based on the warnings and
1441 # disallowments.
1442 if ($this_expected_len < $this_needed_to_discern_len) {
1443 $expect_regular_warnings = 0;
1444 }
1445 elsif ( ($this_warning_flags & $this_utf8n_flag_to_warn)
1446 || ( $this_disallow_flags
1447 & $this_utf8n_flag_to_disallow))
1448 {
1449 push @expected_return_flags, $return_flag;
1450 }
1451
1452 # Finish setting up the expected warning.
1453 if ($expect_regular_warnings) {
1454
1455 # So far the array contains warnings generated by
1456 # malformations. Add the expected regular one.
57ff5f59 1457 unshift @expected_warnings, $this_cp_message_qr;
69485e19
KW
1458
1459 # But it may need to be modified, because either of
1460 # these malformations means we can't determine the
1461 # expected code point.
57ff5f59
KW
1462 if ( $short || $unexpected_noncont
1463 || $dont_use_overlong_cp)
1464 {
69485e19
KW
1465 my $first_byte = substr($this_bytes, 0, 1);
1466 $expected_warnings[0] = display_bytes(
1467 substr($this_bytes, 0, $this_expected_len));
1468 $expected_warnings[0]
1469 = qr/[Aa]\Qny UTF-8 sequence that starts with\E
1470 \Q $expected_warnings[0]\E
57ff5f59 1471 \Q $this_non_cp_trailing_text\E/x;
69485e19
KW
1472 }
1473 }
6aa905cf 1474
717dd9f9
KW
1475 # Is effectively disallowed if we've set up a malformation
1476 # (unless malformations are allowed), even if the flag
1477 # indicates it is allowed. Fix up test name to indicate
1478 # this as well
1479 my $disallowed = 0;
1480 if ( $this_disallow_flags & $this_utf8n_flag_to_disallow
1481 && $this_expected_len >= $this_needed_to_discern_len)
1482 {
1483 $disallowed = 1;
1484 }
1485 if ($malformations_name) {
1486 if ($malformed_allow_type == 0) {
1487 $disallowed = 1;
1488 }
1489 elsif ($malformed_allow_type == 1) {
1490
1491 # Even if allowed, the malformation returns the
1492 # REPLACEMENT CHARACTER.
1493 $expected_uv = 0xFFFD;
1494 $this_uv_string = "0xFFFD"
1495 }
1496 }
1497
a93dd121
KW
1498 my $this_name = "utf8n_to_uvchr_error() $testname: ";
1499 if (! $initially_malformed) {
1500 $this_name .= ($disallowed)
1501 ? 'disallowed, '
1502 : 'allowed, ';
1503 }
1504 $this_name .= "$eval_warn";
69485e19
KW
1505 $this_name .= ", " . (( $this_warning_flags
1506 & $this_utf8n_flag_to_warn)
1507 ? 'with flag for raising warnings'
1508 : 'no flag for raising warnings');
1509 $this_name .= $malformations_name;
db0f09e6 1510
8f79178b 1511 # Do the actual test using an eval
9cdc3054 1512 undef @warnings_gotten;
6aa905cf 1513 my $ret_ref;
717dd9f9
KW
1514 my $this_flags
1515 = $allow_flags|$this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1516 my $eval_text = "$eval_warn; \$ret_ref"
1517 . " = test_utf8n_to_uvchr_error("
69485e19 1518 . "'$this_bytes', $this_length, $this_flags)";
6aa905cf 1519 eval "$eval_text";
69485e19 1520 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
6aa905cf 1521 {
a8ee5133
KW
1522 diag "\$@='$@'; call was: "
1523 . utf8n_display_call($eval_text);
6aa905cf
KW
1524 next;
1525 }
1526 if ($disallowed) {
d402d77f 1527 is($ret_ref->[0], 0, " And returns 0")
a8ee5133 1528 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1529 }
1530 else {
1531 is($ret_ref->[0], $expected_uv,
d402d77f 1532 " And returns expected uv: "
717dd9f9 1533 . $this_uv_string)
a8ee5133 1534 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1535 }
1536 is($ret_ref->[1], $this_expected_len,
d402d77f 1537 " And returns expected length:"
6aa905cf 1538 . " $this_expected_len")
a8ee5133 1539 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1540
9cdc3054 1541 my $returned_flags = $ret_ref->[2];
6aa905cf 1542
9cdc3054 1543 for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
69485e19
KW
1544 if ($expected_return_flags[$i] & $returned_flags) {
1545 if ($expected_return_flags[$i]
d044b7a7 1546 == $::UTF8_GOT_PERL_EXTENDED)
69485e19
KW
1547 {
1548 pass(" Expected and got return flag for"
d044b7a7 1549 . " PERL_EXTENDED");
69485e19
KW
1550 }
1551 # The first entries in this are
1552 # malformations
1553 elsif ($i > @malformation_names - 1) {
1554 pass(" Expected and got return flag"
e4e140b4 1555 . " for " . $controlling_warning_category);
69485e19
KW
1556 }
1557 else {
1558 pass(" Expected and got return flag for "
1559 . $malformation_names[$i]
1560 . " malformation");
1561 }
9cdc3054 1562 $returned_flags &= ~$expected_return_flags[$i];
69485e19 1563 splice @expected_return_flags, $i, 1;
6aa905cf 1564 }
6aa905cf 1565 }
6aa905cf 1566
69485e19
KW
1567 is($returned_flags, 0,
1568 " Got no unexpected return flags")
1569 or diag "The unexpected flags gotten were: "
5722c46d
KW
1570 . (flags_to_text($returned_flags,
1571 \@utf8n_flags_to_text)
69485e19
KW
1572 # We strip off any prefixes from the flag
1573 # names
1574 =~ s/ \b [A-Z] _ //xgr);
1575 is (scalar @expected_return_flags, 0,
1576 " Got all expected return flags")
1577 or diag "The expected flags not gotten were: "
1578 . (flags_to_text(eval join("|",
1579 @expected_return_flags),
1580 \@utf8n_flags_to_text)
1581 # We strip off any prefixes from the flag
1582 # names
5722c46d 1583 =~ s/ \b [A-Z] _ //xgr);
9cdc3054 1584
69485e19
KW
1585 do_warnings_test(@expected_warnings)
1586 or diag "Call was: " . utf8n_display_call($eval_text);
1587 undef @warnings_gotten;
6aa905cf
KW
1588
1589 # Check CHECK_ONLY results when the input is
1590 # disallowed. Do this when actually disallowed,
69485e19 1591 # not just when the $this_disallow_flags is set
6aa905cf 1592 if ($disallowed) {
69485e19
KW
1593 my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
1594 my $eval_text = "use warnings; no warnings 'deprecated'; \$ret_ref ="
1595 . " test_utf8n_to_uvchr_error('"
1596 . "$this_bytes', $this_length,"
1597 . " $this_flags)";
1598 eval $eval_text;
a8ee5133
KW
1599 if (! ok ("$@ eq ''",
1600 " And eval succeeded with CHECK_ONLY"))
1601 {
1602 diag "\$@='$@'; Call was: "
1603 . utf8n_display_call($eval_text);
1604 next;
1605 }
d402d77f 1606 is($ret_ref->[0], 0, " CHECK_ONLY: Returns 0")
a8ee5133 1607 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1608 is($ret_ref->[1], -1,
d402d77f 1609 " CHECK_ONLY: returns -1 for length")
a8ee5133 1610 or diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1611 if (! is(scalar @warnings_gotten, 0,
d402d77f 1612 " CHECK_ONLY: no warnings generated"))
6aa905cf 1613 {
a8ee5133 1614 diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1615 output_warnings(@warnings_gotten);
6aa905cf
KW
1616 }
1617 }
1618
1619 # Now repeat some of the above, but for
1620 # uvchr_to_utf8_flags(). Since this comes from an
8f79178b
KW
1621 # existing code point, it hasn't overflowed, and isn't
1622 # malformed.
69485e19
KW
1623 next if @malformation_names;
1624
1625 $this_warning_flags = ($use_warn_flag)
1626 ? $this_uvchr_flag_to_warn
1627 : 0;
1628 $this_disallow_flags = ($do_disallow)
1629 ? $this_uvchr_flag_to_disallow
1630 : 0;
1631
1632 $disallowed = $this_disallow_flags
1633 & $this_uvchr_flag_to_disallow;
1634 $this_name .= ", " . (( $this_warning_flags
1635 & $this_utf8n_flag_to_warn)
1636 ? 'with flag for raising warnings'
1637 : 'no flag for raising warnings');
6aa905cf
KW
1638
1639 $this_name = "uvchr_to_utf8_flags() $testname: "
69485e19 1640 . (($disallowed)
6aa905cf 1641 ? 'disallowed'
6aa905cf
KW
1642 : 'allowed');
1643 $this_name .= ", $eval_warn";
69485e19
KW
1644 $this_name .= ", " . (( $this_warning_flags
1645 & $this_uvchr_flag_to_warn)
6aa905cf
KW
1646 ? 'with warning flag'
1647 : 'no warning flag');
1648
9cdc3054 1649 undef @warnings_gotten;
6aa905cf 1650 my $ret;
69485e19 1651 $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1652 $eval_text = "$eval_warn; \$ret ="
1653 . " test_uvchr_to_utf8_flags("
d884ea32 1654 . "$allowed_uv, $this_flags)";
6aa905cf
KW
1655 eval "$eval_text";
1656 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1657 {
d884ea32
KW
1658 diag "\$@='$@'; call was: "
1659 . uvchr_display_call($eval_text);
6aa905cf
KW
1660 next;
1661 }
1662 if ($disallowed) {
d402d77f 1663 is($ret, undef, " And returns undef")
d884ea32 1664 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1665 }
1666 else {
d402d77f 1667 is($ret, $this_bytes, " And returns expected string")
d884ea32 1668 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf 1669 }
69485e19
KW
1670
1671 do_warnings_test(@expected_warnings)
1672 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1673 }
1674 }
717dd9f9 1675 }
6aa905cf
KW
1676 }
1677 }
8f79178b 1678 }
6aa905cf
KW
1679 }
1680}
1681
1682done_testing;