This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/t/utf8_warn_base.pl: Do test on all platforms
[perl5.git] / ext / XS-APItest / t / utf8_warn_base.pl
CommitLineData
6aa905cf
KW
1#!perl -w
2
3# This is a base file to be used by various .t's in its directory
8f79178b
KW
4# It tests various code points that are "problematic", and verifies that the
5# correct warnings/flags etc are generated when using them. It also takes the
6# UTF-8 for some of them and perturbs it to be malformed in various ways, and
7# tests that this gets appropriately detected.
6aa905cf
KW
8
9use strict;
10use Test::More;
11
12BEGIN {
13 use_ok('XS::APItest');
14 require 'charset_tools.pl';
15 require './t/utf8_setup.pl';
16};
17
18$|=1;
19
20no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit
21 # machines, and that is tested elsewhere
22
23use XS::APItest;
24use Data::Dumper;
25
9cdc3054 26my @warnings_gotten;
6aa905cf
KW
27
28use warnings 'utf8';
8d6f1506
KW
29local $SIG{__WARN__} = sub { my @copy = @_;
30 push @warnings_gotten, map { chomp; $_ } @copy;
31 };
c7f9e4bb
KW
32my @tests;
33{
34 no warnings qw(portable overflow);
35 @tests = (
8f79178b
KW
36 # $testname,
37 # $bytes, UTF-8 string
38 # $allowed_uv, code point $bytes evaluates to; -1 if
39 # overflows
40 # $needed_to_discern_len optional, how long an initial substring do
41 # we need to tell that the string must be for
42 # a code point in the category it falls in,
43 # like being a surrogate; 0 indicates we need
44 # the whole string. Some categories have a
45 # default that is used if this is omitted.
46 [ "lowest surrogate",
47 (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"),
48 0xD800,
49 ],
50 [ "a middle surrogate",
51 (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"),
52 0xD90D,
53 ],
54 [ "highest surrogate",
55 (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"),
56 0xDFFF,
57 ],
8f79178b
KW
58 [ "first of 32 consecutive non-character code points",
59 (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"),
60 0xFDD0,
61 ],
62 [ "a mid non-character code point of the 32 consecutive ones",
63 (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"),
64 0xFDE0,
65 ],
66 [ "final of 32 consecutive non-character code points",
67 (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"),
68 0xFDEF,
69 ],
70 [ "non-character code point U+FFFE",
71 (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"),
72 0xFFFE,
73 ],
74 [ "non-character code point U+FFFF",
75 (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"),
76 0xFFFF,
77 ],
78 [ "non-character code point U+1FFFE",
79 (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"),
80 0x1FFFE,
81 ],
82 [ "non-character code point U+1FFFF",
83 (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"),
84 0x1FFFF,
85 ],
86 [ "non-character code point U+2FFFE",
87 (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"),
88 0x2FFFE,
89 ],
90 [ "non-character code point U+2FFFF",
91 (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"),
92 0x2FFFF,
93 ],
94 [ "non-character code point U+3FFFE",
95 (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"),
96 0x3FFFE,
97 ],
98 [ "non-character code point U+3FFFF",
99 (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"),
100 0x3FFFF,
101 ],
102 [ "non-character code point U+4FFFE",
103 (isASCII)
104 ? "\xf1\x8f\xbf\xbe"
105 : I8_to_native("\xf8\xa9\xbf\xbf\xbe"),
106 0x4FFFE,
107 ],
108 [ "non-character code point U+4FFFF",
109 (isASCII)
110 ? "\xf1\x8f\xbf\xbf"
111 : I8_to_native("\xf8\xa9\xbf\xbf\xbf"),
112 0x4FFFF,
113 ],
114 [ "non-character code point U+5FFFE",
115 (isASCII)
116 ? "\xf1\x9f\xbf\xbe"
117 : I8_to_native("\xf8\xab\xbf\xbf\xbe"),
118 0x5FFFE,
119 ],
120 [ "non-character code point U+5FFFF",
121 (isASCII)
122 ? "\xf1\x9f\xbf\xbf"
123 : I8_to_native("\xf8\xab\xbf\xbf\xbf"),
124 0x5FFFF,
125 ],
126 [ "non-character code point U+6FFFE",
127 (isASCII)
128 ? "\xf1\xaf\xbf\xbe"
129 : I8_to_native("\xf8\xad\xbf\xbf\xbe"),
130 0x6FFFE,
131 ],
132 [ "non-character code point U+6FFFF",
133 (isASCII)
134 ? "\xf1\xaf\xbf\xbf"
135 : I8_to_native("\xf8\xad\xbf\xbf\xbf"),
136 0x6FFFF,
137 ],
138 [ "non-character code point U+7FFFE",
139 (isASCII)
140 ? "\xf1\xbf\xbf\xbe"
141 : I8_to_native("\xf8\xaf\xbf\xbf\xbe"),
142 0x7FFFE,
143 ],
144 [ "non-character code point U+7FFFF",
145 (isASCII)
146 ? "\xf1\xbf\xbf\xbf"
147 : I8_to_native("\xf8\xaf\xbf\xbf\xbf"),
148 0x7FFFF,
149 ],
150 [ "non-character code point U+8FFFE",
151 (isASCII)
152 ? "\xf2\x8f\xbf\xbe"
153 : I8_to_native("\xf8\xb1\xbf\xbf\xbe"),
154 0x8FFFE,
155 ],
156 [ "non-character code point U+8FFFF",
157 (isASCII)
158 ? "\xf2\x8f\xbf\xbf"
159 : I8_to_native("\xf8\xb1\xbf\xbf\xbf"),
160 0x8FFFF,
161 ],
162 [ "non-character code point U+9FFFE",
163 (isASCII)
164 ? "\xf2\x9f\xbf\xbe"
165 : I8_to_native("\xf8\xb3\xbf\xbf\xbe"),
166 0x9FFFE,
167 ],
168 [ "non-character code point U+9FFFF",
169 (isASCII)
170 ? "\xf2\x9f\xbf\xbf"
171 : I8_to_native("\xf8\xb3\xbf\xbf\xbf"),
172 0x9FFFF,
173 ],
174 [ "non-character code point U+AFFFE",
175 (isASCII)
176 ? "\xf2\xaf\xbf\xbe"
177 : I8_to_native("\xf8\xb5\xbf\xbf\xbe"),
178 0xAFFFE,
179 ],
180 [ "non-character code point U+AFFFF",
181 (isASCII)
182 ? "\xf2\xaf\xbf\xbf"
183 : I8_to_native("\xf8\xb5\xbf\xbf\xbf"),
184 0xAFFFF,
185 ],
186 [ "non-character code point U+BFFFE",
187 (isASCII)
188 ? "\xf2\xbf\xbf\xbe"
189 : I8_to_native("\xf8\xb7\xbf\xbf\xbe"),
190 0xBFFFE,
191 ],
192 [ "non-character code point U+BFFFF",
193 (isASCII)
194 ? "\xf2\xbf\xbf\xbf"
195 : I8_to_native("\xf8\xb7\xbf\xbf\xbf"),
196 0xBFFFF,
197 ],
198 [ "non-character code point U+CFFFE",
199 (isASCII)
200 ? "\xf3\x8f\xbf\xbe"
201 : I8_to_native("\xf8\xb9\xbf\xbf\xbe"),
202 0xCFFFE,
203 ],
204 [ "non-character code point U+CFFFF",
205 (isASCII)
206 ? "\xf3\x8f\xbf\xbf"
207 : I8_to_native("\xf8\xb9\xbf\xbf\xbf"),
208 0xCFFFF,
209 ],
210 [ "non-character code point U+DFFFE",
211 (isASCII)
212 ? "\xf3\x9f\xbf\xbe"
213 : I8_to_native("\xf8\xbb\xbf\xbf\xbe"),
214 0xDFFFE,
215 ],
216 [ "non-character code point U+DFFFF",
217 (isASCII)
218 ? "\xf3\x9f\xbf\xbf"
219 : I8_to_native("\xf8\xbb\xbf\xbf\xbf"),
220 0xDFFFF,
221 ],
222 [ "non-character code point U+EFFFE",
223 (isASCII)
224 ? "\xf3\xaf\xbf\xbe"
225 : I8_to_native("\xf8\xbd\xbf\xbf\xbe"),
226 0xEFFFE,
227 ],
228 [ "non-character code point U+EFFFF",
229 (isASCII)
230 ? "\xf3\xaf\xbf\xbf"
231 : I8_to_native("\xf8\xbd\xbf\xbf\xbf"),
232 0xEFFFF,
233 ],
234 [ "non-character code point U+FFFFE",
235 (isASCII)
236 ? "\xf3\xbf\xbf\xbe"
237 : I8_to_native("\xf8\xbf\xbf\xbf\xbe"),
238 0xFFFFE,
239 ],
240 [ "non-character code point U+FFFFF",
241 (isASCII)
242 ? "\xf3\xbf\xbf\xbf"
243 : I8_to_native("\xf8\xbf\xbf\xbf\xbf"),
244 0xFFFFF,
245 ],
246 [ "non-character code point U+10FFFE",
247 (isASCII)
248 ? "\xf4\x8f\xbf\xbe"
249 : I8_to_native("\xf9\xa1\xbf\xbf\xbe"),
250 0x10FFFE,
251 ],
252 [ "non-character code point U+10FFFF",
253 (isASCII)
254 ? "\xf4\x8f\xbf\xbf"
255 : I8_to_native("\xf9\xa1\xbf\xbf\xbf"),
256 0x10FFFF,
257 ],
7c9f4ec6
KW
258 [ "first non_unicode",
259 (isASCII)
260 ? "\xf4\x90\x80\x80"
261 : I8_to_native("\xf9\xa2\xa0\xa0\xa0"),
262 0x110000,
263 2,
264 ],
265 [ "non_unicode whose first byte tells that",
266 (isASCII)
267 ? "\xf5\x80\x80\x80"
268 : I8_to_native("\xfa\xa0\xa0\xa0\xa0"),
269 (isASCII) ? 0x140000 : 0x200000,
270 1,
271 ],
48ceb60a 272 [ "lowest 32 bit code point",
8f79178b
KW
273 (isASCII)
274 ? "\xfe\x82\x80\x80\x80\x80\x80"
275 : I8_to_native(
276 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
277 0x80000000,
278 (isASCII) ? 1 : 8,
279 ],
280 [ "highest 32 bit code point",
281 (isASCII)
282 ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
283 : I8_to_native(
284 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
285 0xFFFFFFFF,
286 (isASCII) ? 1 : 8,
287 ],
f4da64d4
KW
288 [ "Lowest 33 bit code point",
289 (isASCII)
290 ? "\xfe\x84\x80\x80\x80\x80\x80"
291 : I8_to_native(
292 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"),
293 ($::is64bit) ? 0x100000000 : -1, # Overflows on 32-bit systems
294 (isASCII && ! $::is64bit) ? 2 : 1,
295 ],
8f79178b
KW
296 );
297
298 if (! $::is64bit) {
299 if (isASCII) {
8f79178b 300 push @tests,
c81d4d83
KW
301 [ "overflow that old algorithm failed to detect",
302 "\xfe\x86\x80\x80\x80\x80\x80",
303 -1,
304 2,
8f79178b
KW
305 ];
306 }
307 }
c81d4d83
KW
308
309 if ($::is64bit) {
c81d4d83
KW
310 if (isASCII) {
311 push @tests,
f6104ed7
KW
312 [ "Lowest code point requiring 13 bytes to represent",
313 "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80",
314 0x1000000000,
315 1,
316 ],
c81d4d83
KW
317 [ "overflow that old algorithm failed to detect",
318 "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf",
319 -1,
320 3,
321 ];
322 }
323 else {
8f79178b
KW
324 push @tests, # These could falsely show wrongly in a naive
325 # implementation
326 [ "requires at least 32 bits",
327 I8_to_native(
6aa905cf 328 "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
329 0x800000000,
330 7,
331 ],
332 [ "requires at least 32 bits",
333 I8_to_native(
6aa905cf 334 "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
335 0x10000000000,
336 6,
337 ],
338 [ "requires at least 32 bits",
339 I8_to_native(
6aa905cf 340 "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
341 0x200000000000,
342 5,
343 ],
344 [ "requires at least 32 bits",
345 I8_to_native(
6aa905cf 346 "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
347 0x4000000000000,
348 4,
349 ],
350 [ "requires at least 32 bits",
351 I8_to_native(
6aa905cf 352 "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
353 0x80000000000000,
354 3,
355 ],
356 [ "requires at least 32 bits",
357 I8_to_native(
6aa905cf 358 "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
359 0x1000000000000000,
360 2,
361 ];
362 }
6aa905cf 363 }
c7f9e4bb 364}
6aa905cf 365
5722c46d
KW
366sub flags_to_text($$)
367{
368 my ($flags, $flags_to_text_ref) = @_;
369
370 # Returns a string containing a mnemonic representation of the bits that
371 # are set in the $flags. These are assumed to be flag bits. The return
372 # looks like "FOO|BAR|BAZ". The second parameter is a reference to an
373 # array that gives the textual representation of all the possible flags.
374 # Element 0 is the text for the bit 0 flag; element 1 for bit 1; .... If
375 # no bits at all are set the string "0" is returned;
376
377 my @flag_text;
378 my $shift = 0;
379
380 return "0" if $flags == 0;
381
382 while ($flags) {
383 #diag sprintf "%x", $flags;
384 if ($flags & 1) {
385 push @flag_text, $flags_to_text_ref->[$shift];
386 }
387 $shift++;
388 $flags >>= 1;
389 }
390
391 return join "|", @flag_text;
392}
393
394# Possible flag returns from utf8n_to_uvchr_error(). These should have G_,
395# instead of A_, D_, but the prefixes will be used in a a later commit, so
396# minimize churn by having them here.
397my @utf8n_flags_to_text = ( qw(
398 A_EMPTY
399 A_CONTINUATION
400 A_NON_CONTINUATION
401 A_SHORT
402 A_LONG
403 A_LONG_AND_ITS_VALUE
404 PLACEHOLDER
405 A_OVERFLOW
406 D_SURROGATE
407 W_SURROGATE
408 D_NONCHAR
409 W_NONCHAR
410 D_SUPER
411 W_SUPER
412 D_ABOVE_31_BIT
413 W_ABOVE_31_BIT
414 CHECK_ONLY
415 NO_CONFIDENCE_IN_CURLEN_
416 ) );
417
a8ee5133
KW
418sub utf8n_display_call($)
419{
420 # Converts an eval string that calls test_utf8n_to_uvchr into a more human
421 # readable form, and returns it. Doesn't work if the byte string contains
422 # an apostrophe. The return will look something like:
423 # test_utf8n_to_uvchr_error('$bytes', $length, $flags)
424 #diag $_[0];
425
426 $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x;
427 my $text1 = $1; # Everything before the byte string
428 my $bytes = $2;
429 my $text2 = $3; # Includes the length
430 my $flags = $4;
431
432 return $text1
433 . display_bytes($bytes)
434 . $text2
435 . flags_to_text($flags, \@utf8n_flags_to_text)
436 . ')';
437}
438
d884ea32
KW
439sub uvchr_display_call($)
440{
441 # Converts an eval string that calls test_uvchr_to_utf8 into a more human
442 # readable form, and returns it. The return will look something like:
443 # test_uvchr_to_utf8n_flags($uv, $flags)
444 #diag $_[0];
445
446 my @flags_to_text = ( qw(
447 W_SURROGATE
448 W_NONCHAR
449 W_SUPER
450 W_ABOVE_31_BIT
451 D_SURROGATE
452 D_NONCHAR
453 D_SUPER
454 D_ABOVE_31_BIT
455 ) );
456
457 $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
458 my $text = $1;
459 my $cp = sprintf "%X", $2;
460 my $flags = $3;
461
462 return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')';
463}
464
69485e19
KW
465sub do_warnings_test(@)
466{
467 my @expected_warnings = @_;
468
469 # Compares the input expected warnings array with @warnings_gotten,
470 # generating a pass for each found, removing it from @warnings_gotten.
471 # Any discrepancies generate test failures. Returns TRUE if no
472 # discrepcancies; otherwise FALSE.
473
474 my $succeeded = 1;
475
476 if (@expected_warnings == 0) {
477 if (! is(@warnings_gotten, 0, " Expected and got no warnings")) {
478 output_warnings(@warnings_gotten);
479 $succeeded = 0;
480 }
481 return $succeeded;
482 }
483
484 # Check that we got all the expected warnings,
485 # removing each one found
486 WARNING:
487 foreach my $expected (@expected_warnings) {
488 foreach (my $i = 0; $i < @warnings_gotten; $i++) {
489 if ($warnings_gotten[$i] =~ $expected) {
490 pass(" Expected and got warning: "
491 . " $warnings_gotten[$i]");
492 splice @warnings_gotten, $i, 1;
493 next WARNING;
494 }
495 }
496 fail(" Expected a warning that matches "
497 . $expected . " but didn't get it");
498 $succeeded = 0;
499 }
500
501 if (! is(@warnings_gotten, 0, " Got no unexpected warnings")) {
502 output_warnings(@warnings_gotten);
503 $succeeded = 0;
504 }
505
506 return $succeeded;
507}
508
6aa905cf
KW
509# This test is split into this number of files.
510my $num_test_files = $ENV{TEST_JOBS} || 1;
511$num_test_files = 10 if $num_test_files > 10;
512
513my $test_count = -1;
514foreach my $test (@tests) {
515 $test_count++;
516 next if $test_count % $num_test_files != $::TEST_CHUNK;
517
d42d7565 518 my ($testname, $bytes, $allowed_uv, $needed_to_discern_len) = @$test;
6aa905cf
KW
519
520 my $length = length $bytes;
04f42bf6 521 my $will_overflow = $allowed_uv < 0;
6aa905cf 522
2c511c58
KW
523 my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
524
d42d7565 525 my $controlling_warning_category;
af816908
KW
526 my $utf8n_flag_to_warn;
527 my $utf8n_flag_to_disallow;
528 my $uvchr_flag_to_warn;
529 my $uvchr_flag_to_disallow;
b7e1f4b2 530
e4e140b4
KW
531 # We want to test that the independent flags are actually independent.
532 # For example, that a surrogate doesn't trigger a non-character warning,
533 # and conversely, turning off an above-Unicode flag doesn't suppress a
534 # surrogate warning. Earlier versions of this file used nested loops to
535 # test all possible combinations. But that creates lots of tests, making
536 # this run too long. What is now done instead is to use the complement of
537 # the category we are testing to greatly reduce the combinatorial
538 # explosion. For example, if we have a surrogate and we aren't expecting
539 # a warning about it, we set all the flags for non-surrogates to raise
540 # warnings. If one shows up, it indicates the flags aren't independent.
541 my $utf8n_flag_to_warn_complement;
542 my $utf8n_flag_to_disallow_complement;
543 my $uvchr_flag_to_warn_complement;
544 my $uvchr_flag_to_disallow_complement;
545
bf422d6a
KW
546 # Many of the code points being tested are middling in that if code point
547 # edge cases work, these are very likely to as well. Because this test
548 # file takes a while to execute, we skip testing the edge effects of code
549 # points deemed middling, while testing their basics and continuing to
550 # fully test the non-middling code points.
551 my $skip_most_tests = 0;
552
601e92f1
KW
553 my $cp_message_qr; # Pattern that matches the message raised when
554 # that message contains the problematic code
555 # point. The message is the same (currently) both
556 # when going from/to utf8.
557 my $non_cp_trailing_text; # The suffix text when the message doesn't
558 # contain a code point. (This is a result of
559 # some sort of malformation that means we
560 # can't get an exact code poin
561
af816908
KW
562 if ($will_overflow || $allowed_uv > 0x10FFFF) {
563
e4e140b4 564 # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
af816908
KW
565 $utf8n_flag_to_warn = $::UTF8_WARN_SUPER;
566 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
567 $uvchr_flag_to_warn = $::UNICODE_WARN_SUPER;
568 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
569
e4e140b4
KW
570 # Below, we add the flags for non-above-31 bit to the code points that
571 # don't fit that category. Special tests are done for this category
572 # in the inner loop.
573 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
574 |$::UTF8_WARN_SURROGATE;
575 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
576 |$::UTF8_DISALLOW_SURROGATE;
577 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
578 |$::UNICODE_WARN_SURROGATE;
579 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
580 |$::UNICODE_DISALLOW_SURROGATE;
581 $controlling_warning_category = 'non_unicode';
582
8f79178b 583 if ($will_overflow) { # This is realy a malformation
69485e19 584 $non_cp_trailing_text = "if you see this, there is an error";
601e92f1 585 $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
af816908
KW
586 }
587 elsif ($allowed_uv > 0x7FFFFFFF) {
601e92f1
KW
588 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
589 \Q and not portable\E/x;
590 $non_cp_trailing_text = "is for a non-Unicode code point, and is not portable";
af816908 591 }
601e92f1
KW
592 else {
593 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
594 \Q may not be portable\E/x;
595 $non_cp_trailing_text = "is for a non-Unicode code point, may not"
596 . " be portable";
e4e140b4
KW
597 $utf8n_flag_to_warn_complement |= $::UTF8_WARN_ABOVE_31_BIT;
598 $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
599 $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_ABOVE_31_BIT;
600 $uvchr_flag_to_disallow_complement
601 |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
af816908 602 }
3022ad00
KW
603 }
604 elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
601e92f1
KW
605 $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/;
606 $non_cp_trailing_text = "is for a surrogate";
67e45424 607 $needed_to_discern_len = 2 unless defined $needed_to_discern_len;
bf422d6a 608 $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF;
af816908
KW
609
610 $utf8n_flag_to_warn = $::UTF8_WARN_SURROGATE;
611 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE;
612 $uvchr_flag_to_warn = $::UNICODE_WARN_SURROGATE;
613 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;;
e4e140b4
KW
614
615 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
616 |$::UTF8_WARN_SUPER
617 |$::UTF8_WARN_ABOVE_31_BIT;
618 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
619 |$::UTF8_DISALLOW_SUPER
620 |$::UTF8_DISALLOW_ABOVE_31_BIT;
621 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
622 |$::UNICODE_WARN_SUPER
623 |$::UNICODE_WARN_ABOVE_31_BIT;
624 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
625 |$::UNICODE_DISALLOW_SUPER
626 |$::UNICODE_DISALLOW_ABOVE_31_BIT;
d42d7565 627 $controlling_warning_category = 'surrogate';
3022ad00
KW
628 }
629 elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
630 || ($allowed_uv & 0xFFFE) == 0xFFFE)
631 {
601e92f1
KW
632 $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E
633 \Q is not recommended for open interchange\E/x;
634 $non_cp_trailing_text = "if you see this, there is an error";
67e45424 635 $needed_to_discern_len = $length unless defined $needed_to_discern_len;
bf422d6a
KW
636 if ( ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF)
637 || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE))
638 {
639 $skip_most_tests = 1;
640 }
af816908
KW
641
642 $utf8n_flag_to_warn = $::UTF8_WARN_NONCHAR;
643 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR;
644 $uvchr_flag_to_warn = $::UNICODE_WARN_NONCHAR;
645 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;;
e4e140b4
KW
646
647 $utf8n_flag_to_warn_complement = $::UTF8_WARN_SURROGATE
648 |$::UTF8_WARN_SUPER
649 |$::UTF8_WARN_ABOVE_31_BIT;
650 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
651 |$::UTF8_DISALLOW_SUPER
652 |$::UTF8_DISALLOW_ABOVE_31_BIT;
653 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_SURROGATE
654 |$::UNICODE_WARN_SUPER
655 |$::UNICODE_WARN_ABOVE_31_BIT;
656 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
657 |$::UNICODE_DISALLOW_SUPER
658 |$::UNICODE_DISALLOW_ABOVE_31_BIT;
659
d42d7565 660 $controlling_warning_category = 'nonchar';
3022ad00
KW
661 }
662 else {
663 die "Can't figure out what type of warning to test for $testname"
664 }
665
67e45424
KW
666 die 'Didn\'t set $needed_to_discern_len for ' . $testname
667 unless defined $needed_to_discern_len;
8f79178b
KW
668 { # First test the isFOO calls
669 use warnings; # Make sure these don't raise warnings
9cdc3054 670 undef @warnings_gotten;
8f79178b 671
6aa905cf
KW
672 my $ret = test_isUTF8_CHAR($bytes, $length);
673 my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0);
674 if ($will_overflow) {
d402d77f
KW
675 is($ret, 0, "For $testname: isUTF8_CHAR() returns 0");
676 is($ret_flags, 0, " And isUTF8_CHAR_flags() returns 0");
6aa905cf
KW
677 }
678 else {
679 is($ret, $length,
d402d77f
KW
680 "For $testname: isUTF8_CHAR() returns expected length: $length");
681 is($ret_flags, $length, " And isUTF8_CHAR_flags(...,0)"
6aa905cf
KW
682 . " returns expected length: $length");
683 }
9cdc3054 684 is(scalar @warnings_gotten, 0,
d402d77f
KW
685 " And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags generated"
686 . " any warnings")
9cdc3054 687 or output_warnings(@warnings_gotten);
6aa905cf 688
9cdc3054 689 undef @warnings_gotten;
6aa905cf
KW
690 $ret = test_isSTRICT_UTF8_CHAR($bytes, $length);
691 if ($will_overflow) {
d402d77f 692 is($ret, 0, " And isSTRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
693 }
694 else {
695 my $expected_ret = ( $testname =~ /surrogate|non-character/
696 || $allowed_uv > 0x10FFFF)
697 ? 0
698 : $length;
d402d77f 699 is($ret, $expected_ret, " And isSTRICT_UTF8_CHAR() returns"
6aa905cf
KW
700 . " expected length: $expected_ret");
701 $ret = test_isUTF8_CHAR_flags($bytes, $length,
702 $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE);
703 is($ret, $expected_ret,
d402d77f
KW
704 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')"
705 . " acts like isSTRICT_UTF8_CHAR");
6aa905cf 706 }
9cdc3054 707 is(scalar @warnings_gotten, 0,
d402d77f
KW
708 " And neither isSTRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
709 . " generated any warnings")
9cdc3054 710 or output_warnings(@warnings_gotten);
6aa905cf 711
9cdc3054 712 undef @warnings_gotten;
6aa905cf
KW
713 $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length);
714 if ($will_overflow) {
d402d77f 715 is($ret, 0, " And isC9_STRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
716 }
717 else {
718 my $expected_ret = ( $testname =~ /surrogate/
719 || $allowed_uv > 0x10FFFF)
720 ? 0
721 : $length;
d402d77f 722 is($ret, $expected_ret, " And isC9_STRICT_UTF8_CHAR()"
6aa905cf
KW
723 ." returns expected length: $expected_ret");
724 $ret = test_isUTF8_CHAR_flags($bytes, $length,
8f79178b 725 $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
6aa905cf 726 is($ret, $expected_ret,
d402d77f
KW
727 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')"
728 . " acts like isC9_STRICT_UTF8_CHAR");
6aa905cf 729 }
9cdc3054 730 is(scalar @warnings_gotten, 0,
d402d77f
KW
731 " And neither isC9_STRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
732 . " generated any warnings")
9cdc3054 733 or output_warnings(@warnings_gotten);
6aa905cf
KW
734
735 # Test partial character handling, for each byte not a full character
736 for my $j (1.. $length - 1) {
737
738 # Skip the test for the interaction between overflow and above-31
739 # bit. It is really testing other things than the partial
740 # character tests, for which other tests in this file are
741 # sufficient
04f42bf6 742 last if $will_overflow;
6aa905cf 743
69485e19 744 foreach my $disallow_flag (0, $utf8n_flag_to_disallow) {
6aa905cf
KW
745 my $partial = substr($bytes, 0, $j);
746 my $ret_should_be;
747 my $comment;
748 if ($disallow_flag) {
749 $ret_should_be = 0;
750 $comment = "disallowed";
751 if ($j < $needed_to_discern_len) {
752 $ret_should_be = 1;
753 $comment .= ", but need $needed_to_discern_len bytes"
754 . " to discern:";
755 }
756 }
757 else {
758 $ret_should_be = 1;
759 $comment = "allowed";
760 }
761
9cdc3054 762 undef @warnings_gotten;
6aa905cf
KW
763
764 $ret = test_is_utf8_valid_partial_char_flags($partial, $j,
765 $disallow_flag);
766 is($ret, $ret_should_be,
d402d77f
KW
767 " And is_utf8_valid_partial_char_flags("
768 . display_bytes($partial)
769 . "), $comment: returns $ret_should_be");
9cdc3054 770 is(scalar @warnings_gotten, 0,
d402d77f 771 " And is_utf8_valid_partial_char_flags()"
6aa905cf 772 . " generated no warnings")
9cdc3054 773 or output_warnings(@warnings_gotten);
6aa905cf
KW
774 }
775 }
776 }
777
8f79178b 778 # Now test the to/from UTF-8 calls
69485e19
KW
779 # This is more complicated than the malformations tested in other files in
780 # this directory, as there are several orthogonal variables involved. We
781 # test most possible combinations
8f79178b
KW
782 foreach my $do_disallow (0, 1) {
783 next if $skip_most_tests && ! $do_disallow;
784
785 # We try various combinations of malformations that can occur
786 foreach my $short ("", "short") {
787 next if $skip_most_tests && $short;
788 foreach my $unexpected_noncont ("", "unexpected non-continuation") {
789 next if $skip_most_tests && $unexpected_noncont;
790 foreach my $overlong ("", "overlong") {
791 next if $overlong && $skip_most_tests;
792
793 # If we're creating an overlong, it can't be longer than the
794 # maximum length, so skip if we're already at that length.
795 next if $overlong && $length >= $::max_bytes;
6aa905cf 796
69485e19
KW
797 # We classify the warnings into certain "interesting" types,
798 # described later
799 foreach my $warning_type (0..4) {
800 next if $skip_most_tests && $warning_type != 1;
801 foreach my $use_warn_flag (0, 1) {
802 next if $skip_most_tests && ! $use_warn_flag;
803
8f79178b
KW
804 # Finally, here is the inner loop
805
69485e19
KW
806 my $this_utf8n_flag_to_warn = $utf8n_flag_to_warn;
807 my $this_utf8n_flag_to_disallow = $utf8n_flag_to_disallow;
808 my $this_uvchr_flag_to_warn = $uvchr_flag_to_warn;
809 my $this_uvchr_flag_to_disallow = $uvchr_flag_to_disallow;
810
811 my $eval_warn;
812 my $expect_regular_warnings;
813 my $expect_warnings_for_malformed;
814 my $expect_warnings_for_overflow;
815
816 if ($warning_type == 0) {
817 $eval_warn = "use warnings; no warnings 'deprecated'";
818 $expect_regular_warnings = $use_warn_flag;
819 $expect_warnings_for_overflow = 1;
820 $expect_warnings_for_malformed = 1;
6aa905cf 821 }
69485e19
KW
822 elsif ($warning_type == 1) {
823 $eval_warn = "no warnings";
824 $expect_regular_warnings = 0;
825 $expect_warnings_for_overflow = 0;
826 $expect_warnings_for_malformed = 0;
827 }
828 elsif ($warning_type == 2) {
829 $eval_warn = "no warnings; use warnings 'utf8'";
830 $expect_regular_warnings = $use_warn_flag;
831 $expect_warnings_for_overflow = 1;
832 $expect_warnings_for_malformed = 1;
833 }
834 elsif ($warning_type == 3) {
835 $eval_warn = "no warnings; use warnings"
e4e140b4
KW
836 . " '$controlling_warning_category'";
837 $expect_regular_warnings = $use_warn_flag;
69485e19 838 $expect_warnings_for_overflow
e4e140b4 839 = $controlling_warning_category eq 'non_unicode';
69485e19
KW
840 $expect_warnings_for_malformed = 0;
841 }
842 elsif ($warning_type == 4) { # Like type 3, but uses the
843 # above-31-bit flags
844 # The complement flags were set up so that the
845 # above-31-bit flags have been tested that they don't
846 # trigger wrongly for too small code points. And the
847 # flags have been set up so that those small code
848 # points are tested for being above Unicode. What's
849 # left to test is that the large code points do
850 # trigger the above-31-bit flags.
851 next if ! $will_overflow && $allowed_uv < 0x80000000;
e4e140b4 852 next if $controlling_warning_category ne 'non_unicode';
69485e19
KW
853 $eval_warn = "no warnings; use warnings 'non_unicode'";
854 $expect_regular_warnings = 1;
855 $expect_warnings_for_overflow = 1;
856 $expect_warnings_for_malformed = 0;
857 $this_utf8n_flag_to_warn = $::UTF8_WARN_ABOVE_31_BIT;
858 $this_utf8n_flag_to_disallow
859 = $::UTF8_DISALLOW_ABOVE_31_BIT;
860 $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
861 $this_uvchr_flag_to_disallow
862 = $::UNICODE_DISALLOW_ABOVE_31_BIT;
601e92f1
KW
863 }
864 else {
69485e19 865 die "Unexpected warning type '$warning_type'";
601e92f1
KW
866 }
867
69485e19
KW
868 # We only need to test the case where all warnings are
869 # enabled (type 0) to see if turning off the warning flag
870 # causes things to not be output. If those pass, then
871 # turning on some sub-category of warnings, or turning off
872 # warnings altogether are extremely likely to not output
873 # warnings either, given how the warnings subsystem is
874 # supposed to work, and this file assumes it does work.
875 next if $warning_type != 0 && ! $use_warn_flag;
876
877 # The convention is that the 'got' flag is the same value
878 # as the disallow one. If this were violated, the tests
879 # here should start failing.
880 my $return_flag = $this_utf8n_flag_to_disallow;
881
e4e140b4
KW
882 # If we aren't expecting warnings/disallow for this, turn
883 # on all the other flags. That makes sure that they all
884 # are independent of this flag, and so we don't need to
885 # test them individually.
886 my $this_warning_flags = ($use_warn_flag)
887 ? $this_utf8n_flag_to_warn
888 : $utf8n_flag_to_warn_complement;
69485e19 889 my $this_disallow_flags = ($do_disallow)
e4e140b4
KW
890 ? $this_utf8n_flag_to_disallow
891 : $utf8n_flag_to_disallow_complement;
6aa905cf
KW
892 my $this_bytes = $bytes;
893 my $this_length = $length;
894 my $expected_uv = $allowed_uv;
b3169593 895 my $this_expected_len = $length;
6aa905cf 896 my $this_needed_to_discern_len = $needed_to_discern_len;
6aa905cf 897
69485e19
KW
898 my @malformation_names;
899 my @expected_warnings;
900 my @expected_return_flags;
6aa905cf 901
69485e19
KW
902 # Now go through the possible malformations wanted, and
903 # change the input accordingly. We also can set up
904 # certain other things now, like whether we expect a
905 # return flag from this malformation and which flag.
906 if ($overlong) {
8f79178b
KW
907
908 # To force this malformation, we convert the original
909 # start byte into a continuation byte with the same
910 # data bits as originally. ...
911 substr($this_bytes, 0, 1)
912 = start_byte_to_cont(substr($this_bytes,
913 0, 1));
914
915 # ... Then we prepend it with a known overlong
916 # sequence. This should evaluate to the exact same
917 # code point as the original.
918 $this_bytes
919 = I8_to_native("\xff")
920 . (I8_to_native(chr $::lowest_continuation)
921 x ( $::max_bytes - 1 - length($this_bytes)))
922 . $this_bytes;
923 $this_length = length($this_bytes);
924 $this_needed_to_discern_len
925 = $::max_bytes - ($this_expected_len
6aa905cf 926 - $this_needed_to_discern_len);
8f79178b
KW
927 $this_expected_len = $::max_bytes;
928 push @expected_return_flags, $::UTF8_GOT_LONG;
69485e19
KW
929 push @malformation_names, $overlong;
930 if ($expect_warnings_for_malformed) {
931 if ( ! $short
932 && ! $unexpected_noncont
933 && ! $will_overflow)
934 {
935 my $overlong_bytes
936 = display_bytes_no_quotes($this_bytes);
937 my $correct_bytes
938 = display_bytes_no_quotes($bytes);
939 push @expected_warnings,
940 qr/\QMalformed UTF-8 character:\E
941 \Q $overlong_bytes (overlong;\E
942 \Q instead use $correct_bytes to\E
943 \Q represent U+$uv_string)/x;
944 }
945 else {
946 push @expected_warnings, qr/overlong/;
947 }
6aa905cf 948 }
69485e19 949 }
6aa905cf 950
69485e19
KW
951 if ($short) {
952 push @malformation_names, $short;
953 push @expected_warnings, qr/short/
954 if $expect_warnings_for_malformed;
955
956 # To force this malformation, just tell the test to
957 # not look as far as it should into the input.
958 $this_length--;
959 $this_expected_len--;
960 push @expected_return_flags, $::UTF8_GOT_SHORT;
961 }
962
963 if ($unexpected_noncont) {
964 push @malformation_names, $unexpected_noncont;
965 push @expected_warnings, qr/$unexpected_noncont/
966 if $expect_warnings_for_malformed;
967
968 # To force this malformation, change the final
969 # continuation byte into a non continuation.
970 my $pos = ($short) ? -2 : -1;
971 substr($this_bytes, $pos, 1) = '?';
972 $this_expected_len--;
973 push @expected_return_flags,
974 $::UTF8_GOT_NON_CONTINUATION;
975 }
976
977 # The overflow malformation is done differently than other
978 # malformations. It comes from manually typed tests in
979 # the test array, but it also is above Unicode and uses
980 # Perl extended UTF-8, so affects some of the flags being
981 # tested. We now make it be treated like one of the other
982 # generated malformations.
983 if ($will_overflow) {
984
985 # An overflow is (way) above Unicode, and overrides
986 # everything else.
987 $expect_regular_warnings = 0;
988
989 push @malformation_names, 'overflow';
990 if ($expect_warnings_for_overflow) {
991 my $qr = display_bytes_no_quotes(
992 substr($this_bytes, 0, $this_expected_len));
993 $qr = qr/\QMalformed UTF-8 character: \E
994 \Q$qr (overflows)\E/x;
995 push @expected_warnings, $qr;
6aa905cf 996 }
69485e19 997 push @expected_return_flags, $::UTF8_GOT_OVERFLOW;
6aa905cf
KW
998 }
999
69485e19
KW
1000 # Here, we've set things up based on the malformations.
1001 # Now generate the text for them for the test name.
1002 my $malformations_name = "";
1003 if (@malformation_names) {
1004 $malformations_name .= "malformation";
1005 $malformations_name .= "s" if @malformation_names > 1;
1006 $malformations_name .= ": ";
1007 $malformations_name .= join "/", @malformation_names;
1008 $malformations_name = " ($malformations_name)";
1009 }
1010
1011 # It may be that the malformations have shortened the
1012 # amount of input we look at so much that we can't tell
1013 # what the category the code point was in. Otherwise, set
1014 # up the expected return flags based on the warnings and
1015 # disallowments.
1016 if ($this_expected_len < $this_needed_to_discern_len) {
1017 $expect_regular_warnings = 0;
1018 }
1019 elsif ( ($this_warning_flags & $this_utf8n_flag_to_warn)
1020 || ( $this_disallow_flags
1021 & $this_utf8n_flag_to_disallow))
1022 {
1023 push @expected_return_flags, $return_flag;
1024 }
1025
1026 # Finish setting up the expected warning.
1027 if ($expect_regular_warnings) {
1028
1029 # So far the array contains warnings generated by
1030 # malformations. Add the expected regular one.
1031 unshift @expected_warnings, $cp_message_qr;
1032
1033 # But it may need to be modified, because either of
1034 # these malformations means we can't determine the
1035 # expected code point.
1036 if ($short || $unexpected_noncont) {
1037 my $first_byte = substr($this_bytes, 0, 1);
1038 $expected_warnings[0] = display_bytes(
1039 substr($this_bytes, 0, $this_expected_len));
1040 $expected_warnings[0]
1041 = qr/[Aa]\Qny UTF-8 sequence that starts with\E
1042 \Q $expected_warnings[0]\E
1043 \Q $non_cp_trailing_text\E/x;
1044 }
1045 }
6aa905cf
KW
1046
1047 # Is effectively disallowed if we've set up a
1048 # malformation, even if the flag indicates it is
1049 # allowed. Fix up test name to indicate this as
1050 # well
69485e19
KW
1051 my $disallowed = ( $this_disallow_flags
1052 & $this_utf8n_flag_to_disallow)
1053 || $malformations_name;
6aa905cf 1054 my $this_name = "utf8n_to_uvchr_error() $testname: "
69485e19
KW
1055 . (($disallowed)
1056 ? 'disallowed'
1057 : 'allowed');
6aa905cf 1058 $this_name .= ", $eval_warn";
69485e19
KW
1059 $this_name .= ", " . (( $this_warning_flags
1060 & $this_utf8n_flag_to_warn)
1061 ? 'with flag for raising warnings'
1062 : 'no flag for raising warnings');
1063 $this_name .= $malformations_name;
db0f09e6 1064
8f79178b 1065 # Do the actual test using an eval
9cdc3054 1066 undef @warnings_gotten;
6aa905cf 1067 my $ret_ref;
69485e19 1068 my $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1069 my $eval_text = "$eval_warn; \$ret_ref"
1070 . " = test_utf8n_to_uvchr_error("
69485e19 1071 . "'$this_bytes', $this_length, $this_flags)";
6aa905cf 1072 eval "$eval_text";
69485e19 1073 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
6aa905cf 1074 {
a8ee5133
KW
1075 diag "\$@='$@'; call was: "
1076 . utf8n_display_call($eval_text);
6aa905cf
KW
1077 next;
1078 }
1079 if ($disallowed) {
d402d77f 1080 is($ret_ref->[0], 0, " And returns 0")
a8ee5133 1081 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1082 }
1083 else {
1084 is($ret_ref->[0], $expected_uv,
d402d77f
KW
1085 " And returns expected uv: "
1086 . $uv_string)
a8ee5133 1087 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1088 }
1089 is($ret_ref->[1], $this_expected_len,
d402d77f 1090 " And returns expected length:"
6aa905cf 1091 . " $this_expected_len")
a8ee5133 1092 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1093
9cdc3054 1094 my $returned_flags = $ret_ref->[2];
6aa905cf 1095
9cdc3054 1096 for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
69485e19
KW
1097 if ($expected_return_flags[$i] & $returned_flags) {
1098 if ($expected_return_flags[$i]
1099 == $::UTF8_DISALLOW_ABOVE_31_BIT)
1100 {
1101 pass(" Expected and got return flag for"
1102 . " above_31_bit");
1103 }
1104 # The first entries in this are
1105 # malformations
1106 elsif ($i > @malformation_names - 1) {
1107 pass(" Expected and got return flag"
e4e140b4 1108 . " for " . $controlling_warning_category);
69485e19
KW
1109 }
1110 else {
1111 pass(" Expected and got return flag for "
1112 . $malformation_names[$i]
1113 . " malformation");
1114 }
9cdc3054 1115 $returned_flags &= ~$expected_return_flags[$i];
69485e19 1116 splice @expected_return_flags, $i, 1;
6aa905cf 1117 }
6aa905cf 1118 }
6aa905cf 1119
69485e19
KW
1120 is($returned_flags, 0,
1121 " Got no unexpected return flags")
1122 or diag "The unexpected flags gotten were: "
5722c46d
KW
1123 . (flags_to_text($returned_flags,
1124 \@utf8n_flags_to_text)
69485e19
KW
1125 # We strip off any prefixes from the flag
1126 # names
1127 =~ s/ \b [A-Z] _ //xgr);
1128 is (scalar @expected_return_flags, 0,
1129 " Got all expected return flags")
1130 or diag "The expected flags not gotten were: "
1131 . (flags_to_text(eval join("|",
1132 @expected_return_flags),
1133 \@utf8n_flags_to_text)
1134 # We strip off any prefixes from the flag
1135 # names
5722c46d 1136 =~ s/ \b [A-Z] _ //xgr);
9cdc3054 1137
69485e19
KW
1138 do_warnings_test(@expected_warnings)
1139 or diag "Call was: " . utf8n_display_call($eval_text);
1140 undef @warnings_gotten;
6aa905cf
KW
1141
1142 # Check CHECK_ONLY results when the input is
1143 # disallowed. Do this when actually disallowed,
69485e19 1144 # not just when the $this_disallow_flags is set
6aa905cf 1145 if ($disallowed) {
69485e19
KW
1146 my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
1147 my $eval_text = "use warnings; no warnings 'deprecated'; \$ret_ref ="
1148 . " test_utf8n_to_uvchr_error('"
1149 . "$this_bytes', $this_length,"
1150 . " $this_flags)";
1151 eval $eval_text;
a8ee5133
KW
1152 if (! ok ("$@ eq ''",
1153 " And eval succeeded with CHECK_ONLY"))
1154 {
1155 diag "\$@='$@'; Call was: "
1156 . utf8n_display_call($eval_text);
1157 next;
1158 }
d402d77f 1159 is($ret_ref->[0], 0, " CHECK_ONLY: Returns 0")
a8ee5133 1160 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1161 is($ret_ref->[1], -1,
d402d77f 1162 " CHECK_ONLY: returns -1 for length")
a8ee5133 1163 or diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1164 if (! is(scalar @warnings_gotten, 0,
d402d77f 1165 " CHECK_ONLY: no warnings generated"))
6aa905cf 1166 {
a8ee5133 1167 diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1168 output_warnings(@warnings_gotten);
6aa905cf
KW
1169 }
1170 }
1171
1172 # Now repeat some of the above, but for
1173 # uvchr_to_utf8_flags(). Since this comes from an
8f79178b
KW
1174 # existing code point, it hasn't overflowed, and isn't
1175 # malformed.
69485e19
KW
1176 next if @malformation_names;
1177
1178 $this_warning_flags = ($use_warn_flag)
1179 ? $this_uvchr_flag_to_warn
1180 : 0;
1181 $this_disallow_flags = ($do_disallow)
1182 ? $this_uvchr_flag_to_disallow
1183 : 0;
1184
1185 $disallowed = $this_disallow_flags
1186 & $this_uvchr_flag_to_disallow;
1187 $this_name .= ", " . (( $this_warning_flags
1188 & $this_utf8n_flag_to_warn)
1189 ? 'with flag for raising warnings'
1190 : 'no flag for raising warnings');
6aa905cf
KW
1191
1192 $this_name = "uvchr_to_utf8_flags() $testname: "
69485e19 1193 . (($disallowed)
6aa905cf 1194 ? 'disallowed'
6aa905cf
KW
1195 : 'allowed');
1196 $this_name .= ", $eval_warn";
69485e19
KW
1197 $this_name .= ", " . (( $this_warning_flags
1198 & $this_uvchr_flag_to_warn)
6aa905cf
KW
1199 ? 'with warning flag'
1200 : 'no warning flag');
1201
9cdc3054 1202 undef @warnings_gotten;
6aa905cf 1203 my $ret;
69485e19 1204 $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1205 $eval_text = "$eval_warn; \$ret ="
1206 . " test_uvchr_to_utf8_flags("
d884ea32 1207 . "$allowed_uv, $this_flags)";
6aa905cf
KW
1208 eval "$eval_text";
1209 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1210 {
d884ea32
KW
1211 diag "\$@='$@'; call was: "
1212 . uvchr_display_call($eval_text);
6aa905cf
KW
1213 next;
1214 }
1215 if ($disallowed) {
d402d77f 1216 is($ret, undef, " And returns undef")
d884ea32 1217 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1218 }
1219 else {
d402d77f 1220 is($ret, $this_bytes, " And returns expected string")
d884ea32 1221 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf 1222 }
69485e19
KW
1223
1224 do_warnings_test(@expected_warnings)
1225 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1226 }
1227 }
6aa905cf
KW
1228 }
1229 }
8f79178b 1230 }
6aa905cf
KW
1231 }
1232}
1233
1234done_testing;