This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/t/utf8_warn_base.pl: Remove hash element
[perl5.git] / ext / XS-APItest / t / utf8_warn_base.pl
CommitLineData
6aa905cf
KW
1#!perl -w
2
3# This is a base file to be used by various .t's in its directory
4
5use strict;
6use Test::More;
7
8BEGIN {
9 use_ok('XS::APItest');
10 require 'charset_tools.pl';
11 require './t/utf8_setup.pl';
12};
13
14$|=1;
15
16no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit
17 # machines, and that is tested elsewhere
18
19use XS::APItest;
20use Data::Dumper;
21
9cdc3054 22my @warnings_gotten;
6aa905cf
KW
23
24use warnings 'utf8';
8d6f1506
KW
25local $SIG{__WARN__} = sub { my @copy = @_;
26 push @warnings_gotten, map { chomp; $_ } @copy;
27 };
6aa905cf 28
6aa905cf
KW
29
30# Now test the cases where a legal code point is generated, but may or may not
31# be allowed/warned on.
32my @tests = (
d42d7565 33 # ($testname, $bytes, $allowed_uv, $needed_to_discern_len )
6aa905cf
KW
34 [ "lowest surrogate",
35 (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"),
d42d7565 36 0xD800,
6aa905cf
KW
37 ],
38 [ "a middle surrogate",
39 (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"),
d42d7565 40 0xD90D,
6aa905cf
KW
41 ],
42 [ "highest surrogate",
43 (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"),
d42d7565 44 0xDFFF,
6aa905cf
KW
45 ],
46 [ "first non_unicode",
47 (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"),
d42d7565 48 0x110000,
6aa905cf 49 2,
6aa905cf
KW
50 ],
51 [ "non_unicode whose first byte tells that",
52 (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"),
6aa905cf 53 (isASCII) ? 0x140000 : 0x200000,
6aa905cf 54 1,
6aa905cf
KW
55 ],
56 [ "first of 32 consecutive non-character code points",
57 (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"),
d42d7565 58 0xFDD0,
6aa905cf
KW
59 ],
60 [ "a mid non-character code point of the 32 consecutive ones",
61 (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"),
d42d7565 62 0xFDE0,
6aa905cf
KW
63 ],
64 [ "final of 32 consecutive non-character code points",
65 (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"),
d42d7565 66 0xFDEF,
6aa905cf
KW
67 ],
68 [ "non-character code point U+FFFE",
69 (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"),
d42d7565 70 0xFFFE,
6aa905cf
KW
71 ],
72 [ "non-character code point U+FFFF",
73 (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"),
d42d7565 74 0xFFFF,
6aa905cf
KW
75 ],
76 [ "non-character code point U+1FFFE",
77 (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"),
d42d7565 78 0x1FFFE,
6aa905cf
KW
79 ],
80 [ "non-character code point U+1FFFF",
81 (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"),
d42d7565 82 0x1FFFF,
6aa905cf
KW
83 ],
84 [ "non-character code point U+2FFFE",
85 (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"),
d42d7565 86 0x2FFFE,
6aa905cf
KW
87 ],
88 [ "non-character code point U+2FFFF",
89 (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"),
d42d7565 90 0x2FFFF,
6aa905cf
KW
91 ],
92 [ "non-character code point U+3FFFE",
93 (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"),
d42d7565 94 0x3FFFE,
6aa905cf
KW
95 ],
96 [ "non-character code point U+3FFFF",
97 (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"),
d42d7565 98 0x3FFFF,
6aa905cf
KW
99 ],
100 [ "non-character code point U+4FFFE",
101 (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"),
d42d7565 102 0x4FFFE,
6aa905cf
KW
103 ],
104 [ "non-character code point U+4FFFF",
105 (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"),
d42d7565 106 0x4FFFF,
6aa905cf
KW
107 ],
108 [ "non-character code point U+5FFFE",
109 (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"),
d42d7565 110 0x5FFFE,
6aa905cf
KW
111 ],
112 [ "non-character code point U+5FFFF",
113 (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"),
d42d7565 114 0x5FFFF,
6aa905cf
KW
115 ],
116 [ "non-character code point U+6FFFE",
117 (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"),
d42d7565 118 0x6FFFE,
6aa905cf
KW
119 ],
120 [ "non-character code point U+6FFFF",
121 (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"),
d42d7565 122 0x6FFFF,
6aa905cf
KW
123 ],
124 [ "non-character code point U+7FFFE",
125 (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"),
d42d7565 126 0x7FFFE,
6aa905cf
KW
127 ],
128 [ "non-character code point U+7FFFF",
129 (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"),
d42d7565 130 0x7FFFF,
6aa905cf
KW
131 ],
132 [ "non-character code point U+8FFFE",
133 (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"),
d42d7565 134 0x8FFFE,
6aa905cf
KW
135 ],
136 [ "non-character code point U+8FFFF",
137 (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"),
d42d7565 138 0x8FFFF,
6aa905cf
KW
139 ],
140 [ "non-character code point U+9FFFE",
141 (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"),
d42d7565 142 0x9FFFE,
6aa905cf
KW
143 ],
144 [ "non-character code point U+9FFFF",
145 (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"),
d42d7565 146 0x9FFFF,
6aa905cf
KW
147 ],
148 [ "non-character code point U+AFFFE",
149 (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"),
d42d7565 150 0xAFFFE,
6aa905cf
KW
151 ],
152 [ "non-character code point U+AFFFF",
153 (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"),
d42d7565 154 0xAFFFF,
6aa905cf
KW
155 ],
156 [ "non-character code point U+BFFFE",
157 (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"),
d42d7565 158 0xBFFFE,
6aa905cf
KW
159 ],
160 [ "non-character code point U+BFFFF",
161 (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"),
d42d7565 162 0xBFFFF,
6aa905cf
KW
163 ],
164 [ "non-character code point U+CFFFE",
165 (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"),
d42d7565 166 0xCFFFE,
6aa905cf
KW
167 ],
168 [ "non-character code point U+CFFFF",
169 (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"),
d42d7565 170 0xCFFFF,
6aa905cf
KW
171 ],
172 [ "non-character code point U+DFFFE",
173 (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"),
d42d7565 174 0xDFFFE,
6aa905cf
KW
175 ],
176 [ "non-character code point U+DFFFF",
177 (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"),
d42d7565 178 0xDFFFF,
6aa905cf
KW
179 ],
180 [ "non-character code point U+EFFFE",
181 (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"),
d42d7565 182 0xEFFFE,
6aa905cf
KW
183 ],
184 [ "non-character code point U+EFFFF",
185 (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"),
d42d7565 186 0xEFFFF,
6aa905cf
KW
187 ],
188 [ "non-character code point U+FFFFE",
189 (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"),
d42d7565 190 0xFFFFE,
6aa905cf
KW
191 ],
192 [ "non-character code point U+FFFFF",
193 (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"),
d42d7565 194 0xFFFFF,
6aa905cf
KW
195 ],
196 [ "non-character code point U+10FFFE",
197 (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"),
d42d7565 198 0x10FFFE,
6aa905cf
KW
199 ],
200 [ "non-character code point U+10FFFF",
201 (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"),
d42d7565 202 0x10FFFF,
6aa905cf 203 ],
76513bdc
KW
204 [ "requires at least 32 bits",
205 (isASCII)
206 ? "\xfe\x82\x80\x80\x80\x80\x80"
207 : I8_to_native(
208 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
209 # This code point is chosen so that it is representable in a UV on
210 # 32-bit machines
d42d7565 211 0x80000000,
76513bdc 212 (isASCII) ? 1 : 8,
76513bdc
KW
213 ],
214 [ "highest 32 bit code point",
215 (isASCII)
216 ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
217 : I8_to_native(
218 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
d42d7565 219 0xFFFFFFFF,
76513bdc 220 (isASCII) ? 1 : 8,
76513bdc
KW
221 ],
222 [ "requires at least 32 bits, and use SUPER-type flags, instead of"
223 . " ABOVE_31_BIT",
224 (isASCII)
225 ? "\xfe\x82\x80\x80\x80\x80\x80"
226 : I8_to_native(
227 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 228 0x80000000,
76513bdc 229 1,
76513bdc
KW
230 ],
231 [ "overflow with warnings/disallow for more than 31 bits",
232 # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT
233 # with overflow. The overflow malformation is never allowed, so
234 # preventing it takes precedence if the ABOVE_31_BIT options would
235 # otherwise allow in an overflowing value. The ASCII code points (1
236 # for 32-bits; 1 for 64) were chosen because the old overflow
237 # detection algorithm did not catch them; this means this test also
238 # checks for that fix. The EBCDIC are arbitrary overflowing ones
239 # since we have no reports of failures with it.
240 (($::is64bit)
241 ? ((isASCII)
242 ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"
243 : I8_to_native(
244 "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"))
245 : ((isASCII)
246 ? "\xfe\x86\x80\x80\x80\x80\x80"
247 : I8_to_native(
248 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))),
d42d7565 249 -1,
76513bdc 250 (isASCII || $::is64bit) ? 2 : 8,
76513bdc 251 ],
6aa905cf
KW
252);
253
254if (! $::is64bit) {
255 if (isASCII) {
256 no warnings qw{portable overflow};
257 push @tests,
258 [ "Lowest 33 bit code point: overflow",
259 "\xFE\x84\x80\x80\x80\x80\x80",
d42d7565 260 -1,
b3169593 261 1,
6aa905cf
KW
262 ];
263 }
264}
265else {
266 no warnings qw{portable overflow};
267 push @tests,
268 [ "More than 32 bits",
269 (isASCII)
270 ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80"
271 : I8_to_native(
272 "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 273 0x1000000000,
b3169593 274 (isASCII) ? 1 : 7,
6aa905cf
KW
275 ];
276 if (! isASCII) {
277 push @tests, # These could falsely show wrongly in a naive
278 # implementation
279 [ "requires at least 32 bits",
280 I8_to_native(
281 "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 282 0x800000000,
b3169593 283 7,
6aa905cf
KW
284 ],
285 [ "requires at least 32 bits",
286 I8_to_native(
287 "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 288 0x10000000000,
b3169593 289 6,
6aa905cf
KW
290 ],
291 [ "requires at least 32 bits",
292 I8_to_native(
293 "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 294 0x200000000000,
b3169593 295 5,
6aa905cf
KW
296 ],
297 [ "requires at least 32 bits",
298 I8_to_native(
299 "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 300 0x4000000000000,
b3169593 301 4,
6aa905cf
KW
302 ],
303 [ "requires at least 32 bits",
304 I8_to_native(
305 "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 306 0x80000000000000,
b3169593 307 3,
6aa905cf
KW
308 ],
309 [ "requires at least 32 bits",
310 I8_to_native(
311 "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
d42d7565 312 0x1000000000000000,
b3169593 313 2,
6aa905cf
KW
314 ];
315 }
316}
317
5722c46d
KW
318sub flags_to_text($$)
319{
320 my ($flags, $flags_to_text_ref) = @_;
321
322 # Returns a string containing a mnemonic representation of the bits that
323 # are set in the $flags. These are assumed to be flag bits. The return
324 # looks like "FOO|BAR|BAZ". The second parameter is a reference to an
325 # array that gives the textual representation of all the possible flags.
326 # Element 0 is the text for the bit 0 flag; element 1 for bit 1; .... If
327 # no bits at all are set the string "0" is returned;
328
329 my @flag_text;
330 my $shift = 0;
331
332 return "0" if $flags == 0;
333
334 while ($flags) {
335 #diag sprintf "%x", $flags;
336 if ($flags & 1) {
337 push @flag_text, $flags_to_text_ref->[$shift];
338 }
339 $shift++;
340 $flags >>= 1;
341 }
342
343 return join "|", @flag_text;
344}
345
346# Possible flag returns from utf8n_to_uvchr_error(). These should have G_,
347# instead of A_, D_, but the prefixes will be used in a a later commit, so
348# minimize churn by having them here.
349my @utf8n_flags_to_text = ( qw(
350 A_EMPTY
351 A_CONTINUATION
352 A_NON_CONTINUATION
353 A_SHORT
354 A_LONG
355 A_LONG_AND_ITS_VALUE
356 PLACEHOLDER
357 A_OVERFLOW
358 D_SURROGATE
359 W_SURROGATE
360 D_NONCHAR
361 W_NONCHAR
362 D_SUPER
363 W_SUPER
364 D_ABOVE_31_BIT
365 W_ABOVE_31_BIT
366 CHECK_ONLY
367 NO_CONFIDENCE_IN_CURLEN_
368 ) );
369
a8ee5133
KW
370sub utf8n_display_call($)
371{
372 # Converts an eval string that calls test_utf8n_to_uvchr into a more human
373 # readable form, and returns it. Doesn't work if the byte string contains
374 # an apostrophe. The return will look something like:
375 # test_utf8n_to_uvchr_error('$bytes', $length, $flags)
376 #diag $_[0];
377
378 $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x;
379 my $text1 = $1; # Everything before the byte string
380 my $bytes = $2;
381 my $text2 = $3; # Includes the length
382 my $flags = $4;
383
384 return $text1
385 . display_bytes($bytes)
386 . $text2
387 . flags_to_text($flags, \@utf8n_flags_to_text)
388 . ')';
389}
390
d884ea32
KW
391sub uvchr_display_call($)
392{
393 # Converts an eval string that calls test_uvchr_to_utf8 into a more human
394 # readable form, and returns it. The return will look something like:
395 # test_uvchr_to_utf8n_flags($uv, $flags)
396 #diag $_[0];
397
398 my @flags_to_text = ( qw(
399 W_SURROGATE
400 W_NONCHAR
401 W_SUPER
402 W_ABOVE_31_BIT
403 D_SURROGATE
404 D_NONCHAR
405 D_SUPER
406 D_ABOVE_31_BIT
407 ) );
408
409 $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
410 my $text = $1;
411 my $cp = sprintf "%X", $2;
412 my $flags = $3;
413
414 return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')';
415}
416
69485e19
KW
417sub do_warnings_test(@)
418{
419 my @expected_warnings = @_;
420
421 # Compares the input expected warnings array with @warnings_gotten,
422 # generating a pass for each found, removing it from @warnings_gotten.
423 # Any discrepancies generate test failures. Returns TRUE if no
424 # discrepcancies; otherwise FALSE.
425
426 my $succeeded = 1;
427
428 if (@expected_warnings == 0) {
429 if (! is(@warnings_gotten, 0, " Expected and got no warnings")) {
430 output_warnings(@warnings_gotten);
431 $succeeded = 0;
432 }
433 return $succeeded;
434 }
435
436 # Check that we got all the expected warnings,
437 # removing each one found
438 WARNING:
439 foreach my $expected (@expected_warnings) {
440 foreach (my $i = 0; $i < @warnings_gotten; $i++) {
441 if ($warnings_gotten[$i] =~ $expected) {
442 pass(" Expected and got warning: "
443 . " $warnings_gotten[$i]");
444 splice @warnings_gotten, $i, 1;
445 next WARNING;
446 }
447 }
448 fail(" Expected a warning that matches "
449 . $expected . " but didn't get it");
450 $succeeded = 0;
451 }
452
453 if (! is(@warnings_gotten, 0, " Got no unexpected warnings")) {
454 output_warnings(@warnings_gotten);
455 $succeeded = 0;
456 }
457
458 return $succeeded;
459}
460
6aa905cf
KW
461# This test is split into this number of files.
462my $num_test_files = $ENV{TEST_JOBS} || 1;
463$num_test_files = 10 if $num_test_files > 10;
464
465my $test_count = -1;
466foreach my $test (@tests) {
467 $test_count++;
468 next if $test_count % $num_test_files != $::TEST_CHUNK;
469
d42d7565 470 my ($testname, $bytes, $allowed_uv, $needed_to_discern_len) = @$test;
6aa905cf
KW
471
472 my $length = length $bytes;
04f42bf6 473 my $will_overflow = $allowed_uv < 0;
6aa905cf 474
2c511c58
KW
475 my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
476
d42d7565 477 my $controlling_warning_category;
af816908
KW
478 my $utf8n_flag_to_warn;
479 my $utf8n_flag_to_disallow;
480 my $uvchr_flag_to_warn;
481 my $uvchr_flag_to_disallow;
b7e1f4b2 482
e4e140b4
KW
483 # We want to test that the independent flags are actually independent.
484 # For example, that a surrogate doesn't trigger a non-character warning,
485 # and conversely, turning off an above-Unicode flag doesn't suppress a
486 # surrogate warning. Earlier versions of this file used nested loops to
487 # test all possible combinations. But that creates lots of tests, making
488 # this run too long. What is now done instead is to use the complement of
489 # the category we are testing to greatly reduce the combinatorial
490 # explosion. For example, if we have a surrogate and we aren't expecting
491 # a warning about it, we set all the flags for non-surrogates to raise
492 # warnings. If one shows up, it indicates the flags aren't independent.
493 my $utf8n_flag_to_warn_complement;
494 my $utf8n_flag_to_disallow_complement;
495 my $uvchr_flag_to_warn_complement;
496 my $uvchr_flag_to_disallow_complement;
497
bf422d6a
KW
498 # Many of the code points being tested are middling in that if code point
499 # edge cases work, these are very likely to as well. Because this test
500 # file takes a while to execute, we skip testing the edge effects of code
501 # points deemed middling, while testing their basics and continuing to
502 # fully test the non-middling code points.
503 my $skip_most_tests = 0;
504
601e92f1
KW
505 my $cp_message_qr; # Pattern that matches the message raised when
506 # that message contains the problematic code
507 # point. The message is the same (currently) both
508 # when going from/to utf8.
509 my $non_cp_trailing_text; # The suffix text when the message doesn't
510 # contain a code point. (This is a result of
511 # some sort of malformation that means we
512 # can't get an exact code poin
513
af816908
KW
514 if ($will_overflow || $allowed_uv > 0x10FFFF) {
515
e4e140b4 516 # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
af816908
KW
517 $utf8n_flag_to_warn = $::UTF8_WARN_SUPER;
518 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
519 $uvchr_flag_to_warn = $::UNICODE_WARN_SUPER;
520 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
521
e4e140b4
KW
522 # Below, we add the flags for non-above-31 bit to the code points that
523 # don't fit that category. Special tests are done for this category
524 # in the inner loop.
525 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
526 |$::UTF8_WARN_SURROGATE;
527 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
528 |$::UTF8_DISALLOW_SURROGATE;
529 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
530 |$::UNICODE_WARN_SURROGATE;
531 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
532 |$::UNICODE_DISALLOW_SURROGATE;
533 $controlling_warning_category = 'non_unicode';
534
af816908 535 if ($will_overflow) {
69485e19 536 $non_cp_trailing_text = "if you see this, there is an error";
601e92f1 537 $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
af816908
KW
538 }
539 elsif ($allowed_uv > 0x7FFFFFFF) {
601e92f1
KW
540 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
541 \Q and not portable\E/x;
542 $non_cp_trailing_text = "is for a non-Unicode code point, and is not portable";
af816908 543 }
601e92f1
KW
544 else {
545 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
546 \Q may not be portable\E/x;
547 $non_cp_trailing_text = "is for a non-Unicode code point, may not"
548 . " be portable";
e4e140b4
KW
549 $utf8n_flag_to_warn_complement |= $::UTF8_WARN_ABOVE_31_BIT;
550 $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
551 $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_ABOVE_31_BIT;
552 $uvchr_flag_to_disallow_complement
553 |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
af816908 554 }
3022ad00
KW
555 }
556 elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
601e92f1
KW
557 $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/;
558 $non_cp_trailing_text = "is for a surrogate";
67e45424 559 $needed_to_discern_len = 2 unless defined $needed_to_discern_len;
bf422d6a 560 $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF;
af816908
KW
561
562 $utf8n_flag_to_warn = $::UTF8_WARN_SURROGATE;
563 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE;
564 $uvchr_flag_to_warn = $::UNICODE_WARN_SURROGATE;
565 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;;
e4e140b4
KW
566
567 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
568 |$::UTF8_WARN_SUPER
569 |$::UTF8_WARN_ABOVE_31_BIT;
570 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
571 |$::UTF8_DISALLOW_SUPER
572 |$::UTF8_DISALLOW_ABOVE_31_BIT;
573 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
574 |$::UNICODE_WARN_SUPER
575 |$::UNICODE_WARN_ABOVE_31_BIT;
576 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
577 |$::UNICODE_DISALLOW_SUPER
578 |$::UNICODE_DISALLOW_ABOVE_31_BIT;
d42d7565 579 $controlling_warning_category = 'surrogate';
3022ad00
KW
580 }
581 elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
582 || ($allowed_uv & 0xFFFE) == 0xFFFE)
583 {
601e92f1
KW
584 $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E
585 \Q is not recommended for open interchange\E/x;
586 $non_cp_trailing_text = "if you see this, there is an error";
67e45424 587 $needed_to_discern_len = $length unless defined $needed_to_discern_len;
bf422d6a
KW
588 if ( ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF)
589 || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE))
590 {
591 $skip_most_tests = 1;
592 }
af816908
KW
593
594 $utf8n_flag_to_warn = $::UTF8_WARN_NONCHAR;
595 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR;
596 $uvchr_flag_to_warn = $::UNICODE_WARN_NONCHAR;
597 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;;
e4e140b4
KW
598
599 $utf8n_flag_to_warn_complement = $::UTF8_WARN_SURROGATE
600 |$::UTF8_WARN_SUPER
601 |$::UTF8_WARN_ABOVE_31_BIT;
602 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
603 |$::UTF8_DISALLOW_SUPER
604 |$::UTF8_DISALLOW_ABOVE_31_BIT;
605 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_SURROGATE
606 |$::UNICODE_WARN_SUPER
607 |$::UNICODE_WARN_ABOVE_31_BIT;
608 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
609 |$::UNICODE_DISALLOW_SUPER
610 |$::UNICODE_DISALLOW_ABOVE_31_BIT;
611
d42d7565 612 $controlling_warning_category = 'nonchar';
3022ad00
KW
613 }
614 else {
615 die "Can't figure out what type of warning to test for $testname"
616 }
617
67e45424
KW
618 die 'Didn\'t set $needed_to_discern_len for ' . $testname
619 unless defined $needed_to_discern_len;
6aa905cf
KW
620 {
621 use warnings;
9cdc3054 622 undef @warnings_gotten;
6aa905cf
KW
623 my $ret = test_isUTF8_CHAR($bytes, $length);
624 my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0);
625 if ($will_overflow) {
d402d77f
KW
626 is($ret, 0, "For $testname: isUTF8_CHAR() returns 0");
627 is($ret_flags, 0, " And isUTF8_CHAR_flags() returns 0");
6aa905cf
KW
628 }
629 else {
630 is($ret, $length,
d402d77f
KW
631 "For $testname: isUTF8_CHAR() returns expected length: $length");
632 is($ret_flags, $length, " And isUTF8_CHAR_flags(...,0)"
6aa905cf
KW
633 . " returns expected length: $length");
634 }
9cdc3054 635 is(scalar @warnings_gotten, 0,
d402d77f
KW
636 " And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags generated"
637 . " any warnings")
9cdc3054 638 or output_warnings(@warnings_gotten);
6aa905cf 639
9cdc3054 640 undef @warnings_gotten;
6aa905cf
KW
641 $ret = test_isSTRICT_UTF8_CHAR($bytes, $length);
642 if ($will_overflow) {
d402d77f 643 is($ret, 0, " And isSTRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
644 }
645 else {
646 my $expected_ret = ( $testname =~ /surrogate|non-character/
647 || $allowed_uv > 0x10FFFF)
648 ? 0
649 : $length;
d402d77f 650 is($ret, $expected_ret, " And isSTRICT_UTF8_CHAR() returns"
6aa905cf
KW
651 . " expected length: $expected_ret");
652 $ret = test_isUTF8_CHAR_flags($bytes, $length,
653 $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE);
654 is($ret, $expected_ret,
d402d77f
KW
655 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')"
656 . " acts like isSTRICT_UTF8_CHAR");
6aa905cf 657 }
9cdc3054 658 is(scalar @warnings_gotten, 0,
d402d77f
KW
659 " And neither isSTRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
660 . " generated any warnings")
9cdc3054 661 or output_warnings(@warnings_gotten);
6aa905cf 662
9cdc3054 663 undef @warnings_gotten;
6aa905cf
KW
664 $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length);
665 if ($will_overflow) {
d402d77f 666 is($ret, 0, " And isC9_STRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
667 }
668 else {
669 my $expected_ret = ( $testname =~ /surrogate/
670 || $allowed_uv > 0x10FFFF)
671 ? 0
672 : $length;
d402d77f 673 is($ret, $expected_ret, " And isC9_STRICT_UTF8_CHAR()"
6aa905cf
KW
674 ." returns expected length: $expected_ret");
675 $ret = test_isUTF8_CHAR_flags($bytes, $length,
676 $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
677 is($ret, $expected_ret,
d402d77f
KW
678 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')"
679 . " acts like isC9_STRICT_UTF8_CHAR");
6aa905cf 680 }
9cdc3054 681 is(scalar @warnings_gotten, 0,
d402d77f
KW
682 " And neither isC9_STRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
683 . " generated any warnings")
9cdc3054 684 or output_warnings(@warnings_gotten);
6aa905cf
KW
685
686 # Test partial character handling, for each byte not a full character
687 for my $j (1.. $length - 1) {
688
689 # Skip the test for the interaction between overflow and above-31
690 # bit. It is really testing other things than the partial
691 # character tests, for which other tests in this file are
692 # sufficient
04f42bf6 693 last if $will_overflow;
6aa905cf 694
69485e19 695 foreach my $disallow_flag (0, $utf8n_flag_to_disallow) {
6aa905cf
KW
696 my $partial = substr($bytes, 0, $j);
697 my $ret_should_be;
698 my $comment;
699 if ($disallow_flag) {
700 $ret_should_be = 0;
701 $comment = "disallowed";
702 if ($j < $needed_to_discern_len) {
703 $ret_should_be = 1;
704 $comment .= ", but need $needed_to_discern_len bytes"
705 . " to discern:";
706 }
707 }
708 else {
709 $ret_should_be = 1;
710 $comment = "allowed";
711 }
712
9cdc3054 713 undef @warnings_gotten;
6aa905cf
KW
714
715 $ret = test_is_utf8_valid_partial_char_flags($partial, $j,
716 $disallow_flag);
717 is($ret, $ret_should_be,
d402d77f
KW
718 " And is_utf8_valid_partial_char_flags("
719 . display_bytes($partial)
720 . "), $comment: returns $ret_should_be");
9cdc3054 721 is(scalar @warnings_gotten, 0,
d402d77f 722 " And is_utf8_valid_partial_char_flags()"
6aa905cf 723 . " generated no warnings")
9cdc3054 724 or output_warnings(@warnings_gotten);
6aa905cf
KW
725 }
726 }
727 }
728
69485e19
KW
729 # This is more complicated than the malformations tested in other files in
730 # this directory, as there are several orthogonal variables involved. We
731 # test most possible combinations
69485e19
KW
732 foreach my $do_disallow (0, 1) {
733 next if $skip_most_tests && ! $do_disallow;
6aa905cf 734
e4e140b4 735 # We try various combinations of malformations that can occur
6aa905cf 736 foreach my $short ("", "short") {
bf422d6a 737 next if $skip_most_tests && $short;
6aa905cf
KW
738 foreach my $unexpected_noncont ("",
739 "unexpected non-continuation")
740 {
bf422d6a 741 next if $skip_most_tests && $unexpected_noncont;
6aa905cf 742 foreach my $overlong ("", "overlong") {
bf422d6a 743 next if $overlong && $skip_most_tests;
6aa905cf 744
b3169593
KW
745 # If we're creating an overlong, it can't be longer than
746 # the maximum length, so skip if we're already at that
747 # length.
748 next if $overlong && $length >= $::max_bytes;
6aa905cf 749
69485e19
KW
750 # We classify the warnings into certain "interesting" types,
751 # described later
752 foreach my $warning_type (0..4) {
753 next if $skip_most_tests && $warning_type != 1;
754 foreach my $use_warn_flag (0, 1) {
755 next if $skip_most_tests && ! $use_warn_flag;
756
757 my $this_utf8n_flag_to_warn = $utf8n_flag_to_warn;
758 my $this_utf8n_flag_to_disallow = $utf8n_flag_to_disallow;
759 my $this_uvchr_flag_to_warn = $uvchr_flag_to_warn;
760 my $this_uvchr_flag_to_disallow = $uvchr_flag_to_disallow;
761
762 my $eval_warn;
763 my $expect_regular_warnings;
764 my $expect_warnings_for_malformed;
765 my $expect_warnings_for_overflow;
766
767 if ($warning_type == 0) {
768 $eval_warn = "use warnings; no warnings 'deprecated'";
769 $expect_regular_warnings = $use_warn_flag;
770 $expect_warnings_for_overflow = 1;
771 $expect_warnings_for_malformed = 1;
6aa905cf 772 }
69485e19
KW
773 elsif ($warning_type == 1) {
774 $eval_warn = "no warnings";
775 $expect_regular_warnings = 0;
776 $expect_warnings_for_overflow = 0;
777 $expect_warnings_for_malformed = 0;
778 }
779 elsif ($warning_type == 2) {
780 $eval_warn = "no warnings; use warnings 'utf8'";
781 $expect_regular_warnings = $use_warn_flag;
782 $expect_warnings_for_overflow = 1;
783 $expect_warnings_for_malformed = 1;
784 }
785 elsif ($warning_type == 3) {
786 $eval_warn = "no warnings; use warnings"
e4e140b4
KW
787 . " '$controlling_warning_category'";
788 $expect_regular_warnings = $use_warn_flag;
69485e19 789 $expect_warnings_for_overflow
e4e140b4 790 = $controlling_warning_category eq 'non_unicode';
69485e19
KW
791 $expect_warnings_for_malformed = 0;
792 }
793 elsif ($warning_type == 4) { # Like type 3, but uses the
794 # above-31-bit flags
795 # The complement flags were set up so that the
796 # above-31-bit flags have been tested that they don't
797 # trigger wrongly for too small code points. And the
798 # flags have been set up so that those small code
799 # points are tested for being above Unicode. What's
800 # left to test is that the large code points do
801 # trigger the above-31-bit flags.
802 next if ! $will_overflow && $allowed_uv < 0x80000000;
e4e140b4 803 next if $controlling_warning_category ne 'non_unicode';
69485e19
KW
804 $eval_warn = "no warnings; use warnings 'non_unicode'";
805 $expect_regular_warnings = 1;
806 $expect_warnings_for_overflow = 1;
807 $expect_warnings_for_malformed = 0;
808 $this_utf8n_flag_to_warn = $::UTF8_WARN_ABOVE_31_BIT;
809 $this_utf8n_flag_to_disallow
810 = $::UTF8_DISALLOW_ABOVE_31_BIT;
811 $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
812 $this_uvchr_flag_to_disallow
813 = $::UNICODE_DISALLOW_ABOVE_31_BIT;
601e92f1
KW
814 }
815 else {
69485e19 816 die "Unexpected warning type '$warning_type'";
601e92f1
KW
817 }
818
69485e19
KW
819 # We only need to test the case where all warnings are
820 # enabled (type 0) to see if turning off the warning flag
821 # causes things to not be output. If those pass, then
822 # turning on some sub-category of warnings, or turning off
823 # warnings altogether are extremely likely to not output
824 # warnings either, given how the warnings subsystem is
825 # supposed to work, and this file assumes it does work.
826 next if $warning_type != 0 && ! $use_warn_flag;
827
828 # The convention is that the 'got' flag is the same value
829 # as the disallow one. If this were violated, the tests
830 # here should start failing.
831 my $return_flag = $this_utf8n_flag_to_disallow;
832
e4e140b4
KW
833 # If we aren't expecting warnings/disallow for this, turn
834 # on all the other flags. That makes sure that they all
835 # are independent of this flag, and so we don't need to
836 # test them individually.
837 my $this_warning_flags = ($use_warn_flag)
838 ? $this_utf8n_flag_to_warn
839 : $utf8n_flag_to_warn_complement;
69485e19 840 my $this_disallow_flags = ($do_disallow)
e4e140b4
KW
841 ? $this_utf8n_flag_to_disallow
842 : $utf8n_flag_to_disallow_complement;
6aa905cf
KW
843 my $this_bytes = $bytes;
844 my $this_length = $length;
845 my $expected_uv = $allowed_uv;
b3169593 846 my $this_expected_len = $length;
6aa905cf 847 my $this_needed_to_discern_len = $needed_to_discern_len;
6aa905cf 848
69485e19
KW
849 my @malformation_names;
850 my @expected_warnings;
851 my @expected_return_flags;
6aa905cf 852
69485e19
KW
853 # Now go through the possible malformations wanted, and
854 # change the input accordingly. We also can set up
855 # certain other things now, like whether we expect a
856 # return flag from this malformation and which flag.
857 if ($overlong) {
6aa905cf
KW
858 # For an overlong, we convert the original
859 # start byte into a continuation byte with
860 # the same data bits as originally. ...
861 substr($this_bytes, 0, 1)
862 = start_byte_to_cont(substr($this_bytes,
863 0, 1));
864
865 # ... Then we prepend it with a known
866 # overlong sequence. This should evaluate
867 # to the exact same code point as the
868 # original.
869 $this_bytes
870 = I8_to_native("\xff")
dbb8d798 871 . (I8_to_native(chr $::lowest_continuation)
6aa905cf
KW
872 x ( $::max_bytes - 1 - length($this_bytes)))
873 . $this_bytes;
874 $this_length = length($this_bytes);
875 $this_needed_to_discern_len
876 = $::max_bytes - ($this_expected_len
877 - $this_needed_to_discern_len);
878 $this_expected_len = $::max_bytes;
9cdc3054 879 push @expected_return_flags, $::UTF8_GOT_LONG;
69485e19
KW
880 push @malformation_names, $overlong;
881 if ($expect_warnings_for_malformed) {
882 if ( ! $short
883 && ! $unexpected_noncont
884 && ! $will_overflow)
885 {
886 my $overlong_bytes
887 = display_bytes_no_quotes($this_bytes);
888 my $correct_bytes
889 = display_bytes_no_quotes($bytes);
890 push @expected_warnings,
891 qr/\QMalformed UTF-8 character:\E
892 \Q $overlong_bytes (overlong;\E
893 \Q instead use $correct_bytes to\E
894 \Q represent U+$uv_string)/x;
895 }
896 else {
897 push @expected_warnings, qr/overlong/;
898 }
6aa905cf 899 }
69485e19 900 }
6aa905cf 901
69485e19
KW
902 if ($short) {
903 push @malformation_names, $short;
904 push @expected_warnings, qr/short/
905 if $expect_warnings_for_malformed;
906
907 # To force this malformation, just tell the test to
908 # not look as far as it should into the input.
909 $this_length--;
910 $this_expected_len--;
911 push @expected_return_flags, $::UTF8_GOT_SHORT;
912 }
913
914 if ($unexpected_noncont) {
915 push @malformation_names, $unexpected_noncont;
916 push @expected_warnings, qr/$unexpected_noncont/
917 if $expect_warnings_for_malformed;
918
919 # To force this malformation, change the final
920 # continuation byte into a non continuation.
921 my $pos = ($short) ? -2 : -1;
922 substr($this_bytes, $pos, 1) = '?';
923 $this_expected_len--;
924 push @expected_return_flags,
925 $::UTF8_GOT_NON_CONTINUATION;
926 }
927
928 # The overflow malformation is done differently than other
929 # malformations. It comes from manually typed tests in
930 # the test array, but it also is above Unicode and uses
931 # Perl extended UTF-8, so affects some of the flags being
932 # tested. We now make it be treated like one of the other
933 # generated malformations.
934 if ($will_overflow) {
935
936 # An overflow is (way) above Unicode, and overrides
937 # everything else.
938 $expect_regular_warnings = 0;
939
940 push @malformation_names, 'overflow';
941 if ($expect_warnings_for_overflow) {
942 my $qr = display_bytes_no_quotes(
943 substr($this_bytes, 0, $this_expected_len));
944 $qr = qr/\QMalformed UTF-8 character: \E
945 \Q$qr (overflows)\E/x;
946 push @expected_warnings, $qr;
6aa905cf 947 }
69485e19 948 push @expected_return_flags, $::UTF8_GOT_OVERFLOW;
6aa905cf
KW
949 }
950
69485e19
KW
951 # Here, we've set things up based on the malformations.
952 # Now generate the text for them for the test name.
953 my $malformations_name = "";
954 if (@malformation_names) {
955 $malformations_name .= "malformation";
956 $malformations_name .= "s" if @malformation_names > 1;
957 $malformations_name .= ": ";
958 $malformations_name .= join "/", @malformation_names;
959 $malformations_name = " ($malformations_name)";
960 }
961
962 # It may be that the malformations have shortened the
963 # amount of input we look at so much that we can't tell
964 # what the category the code point was in. Otherwise, set
965 # up the expected return flags based on the warnings and
966 # disallowments.
967 if ($this_expected_len < $this_needed_to_discern_len) {
968 $expect_regular_warnings = 0;
969 }
970 elsif ( ($this_warning_flags & $this_utf8n_flag_to_warn)
971 || ( $this_disallow_flags
972 & $this_utf8n_flag_to_disallow))
973 {
974 push @expected_return_flags, $return_flag;
975 }
976
977 # Finish setting up the expected warning.
978 if ($expect_regular_warnings) {
979
980 # So far the array contains warnings generated by
981 # malformations. Add the expected regular one.
982 unshift @expected_warnings, $cp_message_qr;
983
984 # But it may need to be modified, because either of
985 # these malformations means we can't determine the
986 # expected code point.
987 if ($short || $unexpected_noncont) {
988 my $first_byte = substr($this_bytes, 0, 1);
989 $expected_warnings[0] = display_bytes(
990 substr($this_bytes, 0, $this_expected_len));
991 $expected_warnings[0]
992 = qr/[Aa]\Qny UTF-8 sequence that starts with\E
993 \Q $expected_warnings[0]\E
994 \Q $non_cp_trailing_text\E/x;
995 }
996 }
6aa905cf
KW
997
998 # Is effectively disallowed if we've set up a
999 # malformation, even if the flag indicates it is
1000 # allowed. Fix up test name to indicate this as
1001 # well
69485e19
KW
1002 my $disallowed = ( $this_disallow_flags
1003 & $this_utf8n_flag_to_disallow)
1004 || $malformations_name;
6aa905cf 1005 my $this_name = "utf8n_to_uvchr_error() $testname: "
69485e19
KW
1006 . (($disallowed)
1007 ? 'disallowed'
1008 : 'allowed');
6aa905cf 1009 $this_name .= ", $eval_warn";
69485e19
KW
1010 $this_name .= ", " . (( $this_warning_flags
1011 & $this_utf8n_flag_to_warn)
1012 ? 'with flag for raising warnings'
1013 : 'no flag for raising warnings');
1014 $this_name .= $malformations_name;
db0f09e6 1015
9cdc3054 1016 undef @warnings_gotten;
6aa905cf 1017 my $ret_ref;
69485e19 1018 my $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1019 my $eval_text = "$eval_warn; \$ret_ref"
1020 . " = test_utf8n_to_uvchr_error("
69485e19 1021 . "'$this_bytes', $this_length, $this_flags)";
6aa905cf 1022 eval "$eval_text";
69485e19 1023 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
6aa905cf 1024 {
a8ee5133
KW
1025 diag "\$@='$@'; call was: "
1026 . utf8n_display_call($eval_text);
6aa905cf
KW
1027 next;
1028 }
1029 if ($disallowed) {
d402d77f 1030 is($ret_ref->[0], 0, " And returns 0")
a8ee5133 1031 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1032 }
1033 else {
1034 is($ret_ref->[0], $expected_uv,
d402d77f
KW
1035 " And returns expected uv: "
1036 . $uv_string)
a8ee5133 1037 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1038 }
1039 is($ret_ref->[1], $this_expected_len,
d402d77f 1040 " And returns expected length:"
6aa905cf 1041 . " $this_expected_len")
a8ee5133 1042 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1043
9cdc3054 1044 my $returned_flags = $ret_ref->[2];
6aa905cf 1045
9cdc3054 1046 for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
69485e19
KW
1047 if ($expected_return_flags[$i] & $returned_flags) {
1048 if ($expected_return_flags[$i]
1049 == $::UTF8_DISALLOW_ABOVE_31_BIT)
1050 {
1051 pass(" Expected and got return flag for"
1052 . " above_31_bit");
1053 }
1054 # The first entries in this are
1055 # malformations
1056 elsif ($i > @malformation_names - 1) {
1057 pass(" Expected and got return flag"
e4e140b4 1058 . " for " . $controlling_warning_category);
69485e19
KW
1059 }
1060 else {
1061 pass(" Expected and got return flag for "
1062 . $malformation_names[$i]
1063 . " malformation");
1064 }
9cdc3054 1065 $returned_flags &= ~$expected_return_flags[$i];
69485e19 1066 splice @expected_return_flags, $i, 1;
6aa905cf 1067 }
6aa905cf 1068 }
6aa905cf 1069
69485e19
KW
1070 is($returned_flags, 0,
1071 " Got no unexpected return flags")
1072 or diag "The unexpected flags gotten were: "
5722c46d
KW
1073 . (flags_to_text($returned_flags,
1074 \@utf8n_flags_to_text)
69485e19
KW
1075 # We strip off any prefixes from the flag
1076 # names
1077 =~ s/ \b [A-Z] _ //xgr);
1078 is (scalar @expected_return_flags, 0,
1079 " Got all expected return flags")
1080 or diag "The expected flags not gotten were: "
1081 . (flags_to_text(eval join("|",
1082 @expected_return_flags),
1083 \@utf8n_flags_to_text)
1084 # We strip off any prefixes from the flag
1085 # names
5722c46d 1086 =~ s/ \b [A-Z] _ //xgr);
9cdc3054 1087
69485e19
KW
1088 do_warnings_test(@expected_warnings)
1089 or diag "Call was: " . utf8n_display_call($eval_text);
1090 undef @warnings_gotten;
6aa905cf
KW
1091
1092 # Check CHECK_ONLY results when the input is
1093 # disallowed. Do this when actually disallowed,
69485e19 1094 # not just when the $this_disallow_flags is set
6aa905cf 1095 if ($disallowed) {
69485e19
KW
1096 my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
1097 my $eval_text = "use warnings; no warnings 'deprecated'; \$ret_ref ="
1098 . " test_utf8n_to_uvchr_error('"
1099 . "$this_bytes', $this_length,"
1100 . " $this_flags)";
1101 eval $eval_text;
a8ee5133
KW
1102 if (! ok ("$@ eq ''",
1103 " And eval succeeded with CHECK_ONLY"))
1104 {
1105 diag "\$@='$@'; Call was: "
1106 . utf8n_display_call($eval_text);
1107 next;
1108 }
d402d77f 1109 is($ret_ref->[0], 0, " CHECK_ONLY: Returns 0")
a8ee5133 1110 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1111 is($ret_ref->[1], -1,
d402d77f 1112 " CHECK_ONLY: returns -1 for length")
a8ee5133 1113 or diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1114 if (! is(scalar @warnings_gotten, 0,
d402d77f 1115 " CHECK_ONLY: no warnings generated"))
6aa905cf 1116 {
a8ee5133 1117 diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1118 output_warnings(@warnings_gotten);
6aa905cf
KW
1119 }
1120 }
1121
1122 # Now repeat some of the above, but for
1123 # uvchr_to_utf8_flags(). Since this comes from an
1124 # existing code point, it hasn't overflowed, and
1125 # isn't malformed.
69485e19
KW
1126 next if @malformation_names;
1127
1128 $this_warning_flags = ($use_warn_flag)
1129 ? $this_uvchr_flag_to_warn
1130 : 0;
1131 $this_disallow_flags = ($do_disallow)
1132 ? $this_uvchr_flag_to_disallow
1133 : 0;
1134
1135 $disallowed = $this_disallow_flags
1136 & $this_uvchr_flag_to_disallow;
1137 $this_name .= ", " . (( $this_warning_flags
1138 & $this_utf8n_flag_to_warn)
1139 ? 'with flag for raising warnings'
1140 : 'no flag for raising warnings');
6aa905cf
KW
1141
1142 $this_name = "uvchr_to_utf8_flags() $testname: "
69485e19 1143 . (($disallowed)
6aa905cf 1144 ? 'disallowed'
6aa905cf
KW
1145 : 'allowed');
1146 $this_name .= ", $eval_warn";
69485e19
KW
1147 $this_name .= ", " . (( $this_warning_flags
1148 & $this_uvchr_flag_to_warn)
6aa905cf
KW
1149 ? 'with warning flag'
1150 : 'no warning flag');
1151
9cdc3054 1152 undef @warnings_gotten;
6aa905cf 1153 my $ret;
69485e19 1154 $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1155 $eval_text = "$eval_warn; \$ret ="
1156 . " test_uvchr_to_utf8_flags("
d884ea32 1157 . "$allowed_uv, $this_flags)";
6aa905cf
KW
1158 eval "$eval_text";
1159 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1160 {
d884ea32
KW
1161 diag "\$@='$@'; call was: "
1162 . uvchr_display_call($eval_text);
6aa905cf
KW
1163 next;
1164 }
1165 if ($disallowed) {
d402d77f 1166 is($ret, undef, " And returns undef")
d884ea32 1167 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1168 }
1169 else {
d402d77f 1170 is($ret, $this_bytes, " And returns expected string")
d884ea32 1171 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf 1172 }
69485e19
KW
1173
1174 do_warnings_test(@expected_warnings)
1175 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1176 }
1177 }
1178 }
1179 }
1180 }
6aa905cf
KW
1181 }
1182}
1183
1184done_testing;