This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/t/utf8_warn_base.pl: Revamp testing isFOO
[perl5.git] / ext / XS-APItest / t / utf8_warn_base.pl
1 #!perl -w
2
3 # This is a base file to be used by various .t's in its directory
4 # It tests various code points that are "problematic", and verifies that the
5 # correct warnings/flags etc are generated when using them.  It also takes the
6 # UTF-8 for some of them and perturbs it to be malformed in various ways, and
7 # tests that this gets appropriately detected.
8
9 use strict;
10 use Test::More;
11
12 BEGIN {
13     use_ok('XS::APItest');
14     require 'charset_tools.pl';
15     require './t/utf8_setup.pl';
16 };
17
18 $|=1;
19
20 no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit
21                           # machines, and that is tested elsewhere
22
23 use XS::APItest;
24 use Data::Dumper;
25
26 my @warnings_gotten;
27
28 use warnings 'utf8';
29 local $SIG{__WARN__} = sub { my @copy = @_;
30                              push @warnings_gotten, map { chomp; $_ } @copy;
31                            };
32 my @tests;
33 {
34     no warnings qw(portable overflow);
35     @tests = (
36         # $testname,
37         # $bytes,                  UTF-8 string
38         # $allowed_uv,             code point $bytes evaluates to; -1 if
39         #                          overflows
40         # $needed_to_discern_len   optional, how long an initial substring do
41         #                          we need to tell that the string must be for
42         #                          a code point in the category it falls in,
43         #                          like being a surrogate; 0 indicates we need
44         #                          the whole string.  Some categories have a
45         #                          default that is used if this is omitted.
46         [ "lowest surrogate",
47             (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"),
48             0xD800,
49         ],
50         [ "a middle surrogate",
51             (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"),
52             0xD90D,
53         ],
54         [ "highest surrogate",
55             (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"),
56             0xDFFF,
57         ],
58         [ "first of 32 consecutive non-character code points",
59             (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"),
60             0xFDD0,
61         ],
62         [ "a mid non-character code point of the 32 consecutive ones",
63             (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"),
64             0xFDE0,
65         ],
66         [ "final of 32 consecutive non-character code points",
67             (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"),
68             0xFDEF,
69         ],
70         [ "non-character code point U+FFFE",
71             (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"),
72             0xFFFE,
73         ],
74         [ "non-character code point U+FFFF",
75             (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"),
76             0xFFFF,
77         ],
78         [ "non-character code point U+1FFFE",
79             (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"),
80             0x1FFFE,
81         ],
82         [ "non-character code point U+1FFFF",
83             (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"),
84             0x1FFFF,
85         ],
86         [ "non-character code point U+2FFFE",
87             (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"),
88             0x2FFFE,
89         ],
90         [ "non-character code point U+2FFFF",
91             (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"),
92             0x2FFFF,
93         ],
94         [ "non-character code point U+3FFFE",
95             (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"),
96             0x3FFFE,
97         ],
98         [ "non-character code point U+3FFFF",
99             (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"),
100             0x3FFFF,
101         ],
102         [ "non-character code point U+4FFFE",
103             (isASCII)
104             ?               "\xf1\x8f\xbf\xbe"
105             : I8_to_native("\xf8\xa9\xbf\xbf\xbe"),
106             0x4FFFE,
107         ],
108         [ "non-character code point U+4FFFF",
109             (isASCII)
110             ?               "\xf1\x8f\xbf\xbf"
111             : I8_to_native("\xf8\xa9\xbf\xbf\xbf"),
112             0x4FFFF,
113         ],
114         [ "non-character code point U+5FFFE",
115             (isASCII)
116             ?              "\xf1\x9f\xbf\xbe"
117             : I8_to_native("\xf8\xab\xbf\xbf\xbe"),
118             0x5FFFE,
119         ],
120         [ "non-character code point U+5FFFF",
121             (isASCII)
122             ?              "\xf1\x9f\xbf\xbf"
123             : I8_to_native("\xf8\xab\xbf\xbf\xbf"),
124             0x5FFFF,
125         ],
126         [ "non-character code point U+6FFFE",
127             (isASCII)
128             ?              "\xf1\xaf\xbf\xbe"
129             : I8_to_native("\xf8\xad\xbf\xbf\xbe"),
130             0x6FFFE,
131         ],
132         [ "non-character code point U+6FFFF",
133             (isASCII)
134             ?              "\xf1\xaf\xbf\xbf"
135             : I8_to_native("\xf8\xad\xbf\xbf\xbf"),
136             0x6FFFF,
137         ],
138         [ "non-character code point U+7FFFE",
139             (isASCII)
140             ?              "\xf1\xbf\xbf\xbe"
141             : I8_to_native("\xf8\xaf\xbf\xbf\xbe"),
142             0x7FFFE,
143         ],
144         [ "non-character code point U+7FFFF",
145             (isASCII)
146             ?              "\xf1\xbf\xbf\xbf"
147             : I8_to_native("\xf8\xaf\xbf\xbf\xbf"),
148             0x7FFFF,
149         ],
150         [ "non-character code point U+8FFFE",
151             (isASCII)
152             ?              "\xf2\x8f\xbf\xbe"
153             : I8_to_native("\xf8\xb1\xbf\xbf\xbe"),
154             0x8FFFE,
155         ],
156         [ "non-character code point U+8FFFF",
157             (isASCII)
158             ?              "\xf2\x8f\xbf\xbf"
159             : I8_to_native("\xf8\xb1\xbf\xbf\xbf"),
160             0x8FFFF,
161         ],
162         [ "non-character code point U+9FFFE",
163             (isASCII)
164             ?              "\xf2\x9f\xbf\xbe"
165             : I8_to_native("\xf8\xb3\xbf\xbf\xbe"),
166             0x9FFFE,
167         ],
168         [ "non-character code point U+9FFFF",
169             (isASCII)
170             ?              "\xf2\x9f\xbf\xbf"
171             : I8_to_native("\xf8\xb3\xbf\xbf\xbf"),
172             0x9FFFF,
173         ],
174         [ "non-character code point U+AFFFE",
175             (isASCII)
176             ?              "\xf2\xaf\xbf\xbe"
177             : I8_to_native("\xf8\xb5\xbf\xbf\xbe"),
178             0xAFFFE,
179         ],
180         [ "non-character code point U+AFFFF",
181             (isASCII)
182             ?              "\xf2\xaf\xbf\xbf"
183             : I8_to_native("\xf8\xb5\xbf\xbf\xbf"),
184             0xAFFFF,
185         ],
186         [ "non-character code point U+BFFFE",
187             (isASCII)
188             ?              "\xf2\xbf\xbf\xbe"
189             : I8_to_native("\xf8\xb7\xbf\xbf\xbe"),
190             0xBFFFE,
191         ],
192         [ "non-character code point U+BFFFF",
193             (isASCII)
194             ?              "\xf2\xbf\xbf\xbf"
195             : I8_to_native("\xf8\xb7\xbf\xbf\xbf"),
196             0xBFFFF,
197         ],
198         [ "non-character code point U+CFFFE",
199             (isASCII)
200             ?              "\xf3\x8f\xbf\xbe"
201             : I8_to_native("\xf8\xb9\xbf\xbf\xbe"),
202             0xCFFFE,
203         ],
204         [ "non-character code point U+CFFFF",
205             (isASCII)
206             ?              "\xf3\x8f\xbf\xbf"
207             : I8_to_native("\xf8\xb9\xbf\xbf\xbf"),
208             0xCFFFF,
209         ],
210         [ "non-character code point U+DFFFE",
211             (isASCII)
212             ?              "\xf3\x9f\xbf\xbe"
213             : I8_to_native("\xf8\xbb\xbf\xbf\xbe"),
214             0xDFFFE,
215         ],
216         [ "non-character code point U+DFFFF",
217             (isASCII)
218             ?              "\xf3\x9f\xbf\xbf"
219             : I8_to_native("\xf8\xbb\xbf\xbf\xbf"),
220             0xDFFFF,
221         ],
222         [ "non-character code point U+EFFFE",
223             (isASCII)
224             ?              "\xf3\xaf\xbf\xbe"
225             : I8_to_native("\xf8\xbd\xbf\xbf\xbe"),
226             0xEFFFE,
227         ],
228         [ "non-character code point U+EFFFF",
229             (isASCII)
230             ?              "\xf3\xaf\xbf\xbf"
231             : I8_to_native("\xf8\xbd\xbf\xbf\xbf"),
232             0xEFFFF,
233         ],
234         [ "non-character code point U+FFFFE",
235             (isASCII)
236             ?              "\xf3\xbf\xbf\xbe"
237             : I8_to_native("\xf8\xbf\xbf\xbf\xbe"),
238             0xFFFFE,
239         ],
240         [ "non-character code point U+FFFFF",
241             (isASCII)
242             ?              "\xf3\xbf\xbf\xbf"
243             : I8_to_native("\xf8\xbf\xbf\xbf\xbf"),
244             0xFFFFF,
245         ],
246         [ "non-character code point U+10FFFE",
247             (isASCII)
248             ?              "\xf4\x8f\xbf\xbe"
249             : I8_to_native("\xf9\xa1\xbf\xbf\xbe"),
250             0x10FFFE,
251         ],
252         [ "non-character code point U+10FFFF",
253             (isASCII)
254             ?              "\xf4\x8f\xbf\xbf"
255             : I8_to_native("\xf9\xa1\xbf\xbf\xbf"),
256             0x10FFFF,
257         ],
258         [ "first non_unicode",
259             (isASCII)
260             ?              "\xf4\x90\x80\x80"
261             : I8_to_native("\xf9\xa2\xa0\xa0\xa0"),
262             0x110000,
263             2,
264         ],
265         [ "non_unicode whose first byte tells that",
266             (isASCII)
267             ?              "\xf5\x80\x80\x80"
268             : I8_to_native("\xfa\xa0\xa0\xa0\xa0"),
269             (isASCII) ? 0x140000 : 0x200000,
270             1,
271         ],
272         [ "lowest 32 bit code point",
273             (isASCII)
274             ?  "\xfe\x82\x80\x80\x80\x80\x80"
275             : I8_to_native(
276                 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
277             0x80000000,
278             (isASCII) ? 1 : 8,
279         ],
280         [ "highest 32 bit code point",
281             (isASCII)
282             ?  "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
283             : I8_to_native(
284                "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
285             0xFFFFFFFF,
286             (isASCII) ? 1 : 8,
287         ],
288         [ "Lowest 33 bit code point",
289             (isASCII)
290             ?  "\xfe\x84\x80\x80\x80\x80\x80"
291             : I8_to_native(
292                 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"),
293             ($::is64bit) ? 0x100000000 : -1,   # Overflows on 32-bit systems
294             (isASCII && ! $::is64bit) ? 2 : 1,
295         ],
296     );
297
298     if (! $::is64bit) {
299         if (isASCII) {
300             push @tests,
301                 [ "overflow that old algorithm failed to detect",
302                     "\xfe\x86\x80\x80\x80\x80\x80",
303                     -1,
304                     2,
305                 ];
306         }
307     }
308
309     if ($::is64bit) {
310         push @tests,
311             [ "highest 64 bit code point",
312               (isASCII)
313               ? "\xff\x80\x8f\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"
314               : I8_to_native(
315                 "\xff\xaf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"),
316               0xFFFFFFFFFFFFFFFF,
317               (isASCII) ? 1 : 2,
318             ],
319             [ "first 65 bit code point",
320               (isASCII)
321               ? "\xff\x80\x9f\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"
322               : I8_to_native(
323                 "\xff\xb0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
324               -1,
325               (isASCII) ? 3 : 2,
326             ];
327         if (isASCII) {
328             push @tests,
329                 [ "Lowest code point requiring 13 bytes to represent",
330                     "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80",
331                     0x1000000000,
332                     1,
333                 ],
334                 [ "overflow that old algorithm failed to detect",
335                     "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf",
336                     -1,
337                     3,
338                 ];
339         }
340         else {
341             push @tests,    # These could falsely show wrongly in a naive
342                             # implementation
343                 [ "requires at least 32 bits",
344                     I8_to_native(
345                     "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
346                     0x800000000,
347                     7,
348                 ],
349                 [ "requires at least 32 bits",
350                     I8_to_native(
351                     "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
352                     0x10000000000,
353                     6,
354                 ],
355                 [ "requires at least 32 bits",
356                     I8_to_native(
357                     "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
358                     0x200000000000,
359                     5,
360                 ],
361                 [ "requires at least 32 bits",
362                     I8_to_native(
363                     "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
364                     0x4000000000000,
365                     4,
366                 ],
367                 [ "requires at least 32 bits",
368                     I8_to_native(
369                     "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
370                     0x80000000000000,
371                     3,
372                 ],
373                 [ "requires at least 32 bits",
374                     I8_to_native(
375                     "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
376                     0x1000000000000000,
377                     2,
378                 ];
379         }
380     }
381 }
382
383 sub flags_to_text($$)
384 {
385     my ($flags, $flags_to_text_ref) = @_;
386
387     # Returns a string containing a mnemonic representation of the bits that
388     # are set in the $flags.  These are assumed to be flag bits.  The return
389     # looks like "FOO|BAR|BAZ".  The second parameter is a reference to an
390     # array that gives the textual representation of all the possible flags.
391     # Element 0 is the text for the bit 0 flag; element 1 for bit 1; ....  If
392     # no bits at all are set the string "0" is returned;
393
394     my @flag_text;
395     my $shift = 0;
396
397     return "0" if $flags == 0;
398
399     while ($flags) {
400         #diag sprintf "%x", $flags;
401         if ($flags & 1) {
402             push @flag_text, $flags_to_text_ref->[$shift];
403         }
404         $shift++;
405         $flags >>= 1;
406     }
407
408     return join "|", @flag_text;
409 }
410
411 # Possible flag returns from utf8n_to_uvchr_error().  These should have G_,
412 # instead of A_, D_, but the prefixes will be used in a a later commit, so
413 # minimize churn by having them here.
414 my @utf8n_flags_to_text =  ( qw(
415         A_EMPTY
416         A_CONTINUATION
417         A_NON_CONTINUATION
418         A_SHORT
419         A_LONG
420         A_LONG_AND_ITS_VALUE
421         PLACEHOLDER
422         A_OVERFLOW
423         D_SURROGATE
424         W_SURROGATE
425         D_NONCHAR
426         W_NONCHAR
427         D_SUPER
428         W_SUPER
429         D_ABOVE_31_BIT
430         W_ABOVE_31_BIT
431         CHECK_ONLY
432         NO_CONFIDENCE_IN_CURLEN_
433     ) );
434
435 sub utf8n_display_call($)
436 {
437     # Converts an eval string that calls test_utf8n_to_uvchr into a more human
438     # readable form, and returns it.  Doesn't work if the byte string contains
439     # an apostrophe.  The return will look something like:
440     #   test_utf8n_to_uvchr_error('$bytes', $length, $flags)
441     #diag $_[0];
442
443     $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x;
444     my $text1 = $1;     # Everything before the byte string
445     my $bytes = $2;
446     my $text2 = $3;     # Includes the length
447     my $flags = $4;
448
449     return $text1
450          . display_bytes($bytes)
451          . $text2
452          . flags_to_text($flags, \@utf8n_flags_to_text)
453          . ')';
454 }
455
456 sub uvchr_display_call($)
457 {
458     # Converts an eval string that calls test_uvchr_to_utf8 into a more human
459     # readable form, and returns it.  The return will look something like:
460     #   test_uvchr_to_utf8n_flags($uv, $flags)
461     #diag $_[0];
462
463     my @flags_to_text =  ( qw(
464             W_SURROGATE
465             W_NONCHAR
466             W_SUPER
467             W_ABOVE_31_BIT
468             D_SURROGATE
469             D_NONCHAR
470             D_SUPER
471             D_ABOVE_31_BIT
472        ) );
473
474     $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
475     my $text = $1;
476     my $cp = sprintf "%X", $2;
477     my $flags = $3;
478
479     return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')';
480 }
481
482 sub do_warnings_test(@)
483 {
484     my @expected_warnings = @_;
485
486     # Compares the input expected warnings array with @warnings_gotten,
487     # generating a pass for each found, removing it from @warnings_gotten.
488     # Any discrepancies generate test failures.  Returns TRUE if no
489     # discrepcancies; otherwise FALSE.
490
491     my $succeeded = 1;
492
493     if (@expected_warnings == 0) {
494         if (! is(@warnings_gotten, 0, "    Expected and got no warnings")) {
495             output_warnings(@warnings_gotten);
496             $succeeded = 0;
497         }
498         return $succeeded;
499     }
500
501     # Check that we got all the expected warnings,
502     # removing each one found
503   WARNING:
504     foreach my $expected (@expected_warnings) {
505         foreach (my $i = 0; $i < @warnings_gotten; $i++) {
506             if ($warnings_gotten[$i] =~ $expected) {
507                 pass("    Expected and got warning: "
508                     . " $warnings_gotten[$i]");
509                 splice @warnings_gotten, $i, 1;
510                 next WARNING;
511             }
512         }
513         fail("    Expected a warning that matches "
514             . $expected . " but didn't get it");
515         $succeeded = 0;
516     }
517
518     if (! is(@warnings_gotten, 0, "    Got no unexpected warnings")) {
519         output_warnings(@warnings_gotten);
520         $succeeded = 0;
521     }
522
523     return $succeeded;
524 }
525
526 # This test is split into this number of files.
527 my $num_test_files = $ENV{TEST_JOBS} || 1;
528 $num_test_files = 10 if $num_test_files > 10;
529
530 my $test_count = -1;
531 foreach my $test (@tests) {
532     $test_count++;
533     next if $test_count % $num_test_files != $::TEST_CHUNK;
534
535     my ($testname, $bytes, $allowed_uv, $needed_to_discern_len) = @$test;
536
537     my $length = length $bytes;
538     my $will_overflow = $allowed_uv < 0;
539
540     my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
541     my $display_bytes = display_bytes($bytes);
542
543     my $controlling_warning_category;
544     my $utf8n_flag_to_warn;
545     my $utf8n_flag_to_disallow;
546     my $uvchr_flag_to_warn;
547     my $uvchr_flag_to_disallow;
548
549     # We want to test that the independent flags are actually independent.
550     # For example, that a surrogate doesn't trigger a non-character warning,
551     # and conversely, turning off an above-Unicode flag doesn't suppress a
552     # surrogate warning.  Earlier versions of this file used nested loops to
553     # test all possible combinations.  But that creates lots of tests, making
554     # this run too long.  What is now done instead is to use the complement of
555     # the category we are testing to greatly reduce the combinatorial
556     # explosion.  For example, if we have a surrogate and we aren't expecting
557     # a warning about it, we set all the flags for non-surrogates to raise
558     # warnings.  If one shows up, it indicates the flags aren't independent.
559     my $utf8n_flag_to_warn_complement;
560     my $utf8n_flag_to_disallow_complement;
561     my $uvchr_flag_to_warn_complement;
562     my $uvchr_flag_to_disallow_complement;
563
564     # Many of the code points being tested are middling in that if code point
565     # edge cases work, these are very likely to as well.  Because this test
566     # file takes a while to execute, we skip testing the edge effects of code
567     # points deemed middling, while testing their basics and continuing to
568     # fully test the non-middling code points.
569     my $skip_most_tests = 0;
570
571     my $cp_message_qr;      # Pattern that matches the message raised when
572                             # that message contains the problematic code
573                             # point.  The message is the same (currently) both
574                             # when going from/to utf8.
575     my $non_cp_trailing_text;   # The suffix text when the message doesn't
576                                 # contain a code point.  (This is a result of
577                                 # some sort of malformation that means we
578                                 # can't get an exact code poin
579
580     # Is this test malformed from the beginning?  If so, we know to generally
581     # expect that the tests will show it isn't valid.
582     my $initially_malformed = 0;
583
584     if ($will_overflow || $allowed_uv > 0x10FFFF) {
585
586         # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
587         $utf8n_flag_to_warn     = $::UTF8_WARN_SUPER;
588         $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
589         $uvchr_flag_to_warn     = $::UNICODE_WARN_SUPER;
590         $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
591
592         # Below, we add the flags for non-above-31 bit to the code points that
593         # don't fit that category.  Special tests are done for this category
594         # in the inner loop.
595         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
596                                             |$::UTF8_WARN_SURROGATE;
597         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
598                                             |$::UTF8_DISALLOW_SURROGATE;
599         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_NONCHAR
600                                             |$::UNICODE_WARN_SURROGATE;
601         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
602                                             |$::UNICODE_DISALLOW_SURROGATE;
603         $controlling_warning_category = 'non_unicode';
604
605         if ($will_overflow) {  # This is realy a malformation
606             $non_cp_trailing_text = "if you see this, there is an error";
607             $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
608             $initially_malformed = 1;
609         }
610         elsif ($allowed_uv > 0x7FFFFFFF) {
611             $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
612                                 \Q and not portable\E/x;
613             $non_cp_trailing_text = "is for a non-Unicode code point, and is not portable";
614         }
615         else {
616             $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
617                                 \Q may not be portable\E/x;
618             $non_cp_trailing_text = "is for a non-Unicode code point, may not"
619                                 . " be portable";
620             $utf8n_flag_to_warn_complement     |= $::UTF8_WARN_ABOVE_31_BIT;
621             $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
622             $uvchr_flag_to_warn_complement     |= $::UNICODE_WARN_ABOVE_31_BIT;
623             $uvchr_flag_to_disallow_complement
624                                             |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
625         }
626     }
627     elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
628         $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/;
629         $non_cp_trailing_text = "is for a surrogate";
630         $needed_to_discern_len = 2 unless defined $needed_to_discern_len;
631         $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF;
632
633         $utf8n_flag_to_warn     = $::UTF8_WARN_SURROGATE;
634         $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE;
635         $uvchr_flag_to_warn     = $::UNICODE_WARN_SURROGATE;
636         $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;;
637
638         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
639                                             |$::UTF8_WARN_SUPER
640                                             |$::UTF8_WARN_ABOVE_31_BIT;
641         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
642                                             |$::UTF8_DISALLOW_SUPER
643                                             |$::UTF8_DISALLOW_ABOVE_31_BIT;
644         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_NONCHAR
645                                             |$::UNICODE_WARN_SUPER
646                                             |$::UNICODE_WARN_ABOVE_31_BIT;
647         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
648                                             |$::UNICODE_DISALLOW_SUPER
649                                             |$::UNICODE_DISALLOW_ABOVE_31_BIT;
650         $controlling_warning_category = 'surrogate';
651     }
652     elsif (   ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
653            || ($allowed_uv & 0xFFFE) == 0xFFFE)
654     {
655         $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E
656                             \Q is not recommended for open interchange\E/x;
657         $non_cp_trailing_text = "if you see this, there is an error";
658         $needed_to_discern_len = $length unless defined $needed_to_discern_len;
659         if (   ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF)
660             || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE))
661         {
662             $skip_most_tests = 1;
663         }
664
665         $utf8n_flag_to_warn     = $::UTF8_WARN_NONCHAR;
666         $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR;
667         $uvchr_flag_to_warn     = $::UNICODE_WARN_NONCHAR;
668         $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;;
669
670         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_SURROGATE
671                                             |$::UTF8_WARN_SUPER
672                                             |$::UTF8_WARN_ABOVE_31_BIT;
673         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
674                                             |$::UTF8_DISALLOW_SUPER
675                                             |$::UTF8_DISALLOW_ABOVE_31_BIT;
676         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_SURROGATE
677                                             |$::UNICODE_WARN_SUPER
678                                             |$::UNICODE_WARN_ABOVE_31_BIT;
679         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
680                                             |$::UNICODE_DISALLOW_SUPER
681                                             |$::UNICODE_DISALLOW_ABOVE_31_BIT;
682
683         $controlling_warning_category = 'nonchar';
684     }
685     else {
686         die "Can't figure out what type of warning to test for $testname"
687     }
688
689     die 'Didn\'t set $needed_to_discern_len for ' . $testname
690                                         unless defined $needed_to_discern_len;
691
692     {   # First test the isFOO calls
693         use warnings; no warnings 'deprecated';   # Make sure these don't raise warnings
694         undef @warnings_gotten;
695
696         my $ret = test_isUTF8_CHAR($bytes, $length);
697         my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0);
698         if ($initially_malformed) {
699             is($ret, 0, "For $testname: isUTF8_CHAR() returns 0");
700             is($ret_flags, 0, "    And isUTF8_CHAR_flags() returns 0");
701         }
702         else {
703             is($ret, $length,
704                "For $testname: isUTF8_CHAR() returns expected length: $length");
705             is($ret_flags, $length, "    And isUTF8_CHAR_flags(...,0)"
706                                   . " returns expected length: $length");
707         }
708         is(scalar @warnings_gotten, 0,
709                 "    And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags generated"
710               . " any warnings")
711           or output_warnings(@warnings_gotten);
712
713         undef @warnings_gotten;
714         $ret = test_isSTRICT_UTF8_CHAR($bytes, $length);
715         if ($initially_malformed) {
716             is($ret, 0, "    And isSTRICT_UTF8_CHAR() returns 0");
717         }
718         else {
719             my $expected_ret = (   $testname =~ /surrogate|non-character/
720                                 || $allowed_uv > 0x10FFFF)
721                                ? 0
722                                : $length;
723             is($ret, $expected_ret, "    And isSTRICT_UTF8_CHAR() returns"
724                                   . " expected length: $expected_ret");
725             $ret = test_isUTF8_CHAR_flags($bytes, $length,
726                                           $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE);
727             is($ret, $expected_ret,
728                     "    And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')"
729                     . " acts like isSTRICT_UTF8_CHAR");
730         }
731         is(scalar @warnings_gotten, 0,
732                 "    And neither isSTRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
733               . " generated any warnings")
734           or output_warnings(@warnings_gotten);
735
736         undef @warnings_gotten;
737         $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length);
738         if ($initially_malformed) {
739             is($ret, 0, "    And isC9_STRICT_UTF8_CHAR() returns 0");
740         }
741         else {
742             my $expected_ret = (   $testname =~ /surrogate/
743                                 || $allowed_uv > 0x10FFFF)
744                                ? 0
745                                : $length;
746             is($ret, $expected_ret, "    And isC9_STRICT_UTF8_CHAR()"
747                                    ." returns expected length: $expected_ret");
748             $ret = test_isUTF8_CHAR_flags($bytes, $length,
749                                         $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
750             is($ret, $expected_ret,
751                   "    And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')"
752                 . " acts like isC9_STRICT_UTF8_CHAR");
753         }
754         is(scalar @warnings_gotten, 0,
755                 "    And neither isC9_STRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
756               . " generated any warnings")
757           or output_warnings(@warnings_gotten);
758
759         foreach my $disallow_type (0..2) {
760             # 0 is don't disallow this type of code point
761             # 1 is do disallow
762             # 2 is do disallow, but only for above 31 bit
763
764             my $disallow_flags;
765             my $expected_ret;
766
767             if ($initially_malformed) {
768
769                 # Malformations are by default disallowed, so testing with
770                 # $disallow_type equal to 0 is sufficicient.
771                 next if $disallow_type;
772
773                 $disallow_flags = 0;
774                 $expected_ret = 0;
775             }
776             elsif ($disallow_type == 1) {
777                 $disallow_flags = $utf8n_flag_to_disallow;
778                 $expected_ret = 0;
779             }
780             elsif ($disallow_type == 2) {
781                 next if ! $will_overflow && $allowed_uv < 0x80000000;
782                 $disallow_flags = $::UTF8_DISALLOW_ABOVE_31_BIT;
783                 $expected_ret = 0;
784             }
785             else {  # type is 0
786                 $disallow_flags = $utf8n_flag_to_disallow_complement;
787                 $expected_ret = $length;
788             }
789
790             $ret = test_isUTF8_CHAR_flags($bytes, $length, $disallow_flags);
791             is($ret, $expected_ret, "    And isUTF8_CHAR_flags("
792                                   . "$display_bytes, $disallow_flags) returns "
793                                   . $expected_ret)
794              or diag "The flags mean "
795               . flags_to_text($disallow_flags, \@utf8n_flags_to_text);
796
797             is(scalar @warnings_gotten, 0,
798                     "    And isUTF8_CHAR_flags(...) generated no warnings")
799             or output_warnings(@warnings_gotten);
800
801             # Test partial character handling, for each byte not a full character
802             for (my $j = 1; $j < $length - 1; $j++) {
803                 my $partial = substr($bytes, 0, $j);
804                 my $ret_should_be;
805                 my $comment;
806                 if ($disallow_type || $initially_malformed) {
807                     $ret_should_be = 0;
808                     $comment = "disallowed";
809                     if ($j < $needed_to_discern_len) {
810                         $ret_should_be = 1;
811                         $comment .= ", but need $needed_to_discern_len"
812                                  . " bytes to discern:";
813                     }
814                 }
815                 else {
816                     $ret_should_be = 1;
817                     $comment = "allowed";
818                 }
819
820                 undef @warnings_gotten;
821
822                 $ret = test_is_utf8_valid_partial_char_flags($partial, $j,
823                                                              $disallow_flags);
824                 is($ret, $ret_should_be,
825                     "    And is_utf8_valid_partial_char_flags("
826                     . display_bytes($partial)
827                     . ", $disallow_flags), $comment: returns $ret_should_be")
828                  or diag "The flags mean "
829                   . flags_to_text($disallow_flags, \@utf8n_flags_to_text);
830                 is(scalar @warnings_gotten, 0,
831                         "    And is_utf8_valid_partial_char_flags()"
832                       . " generated no warnings")
833                   or output_warnings(@warnings_gotten);
834             }
835         }
836     }
837
838     # Now test the to/from UTF-8 calls
839     # This is more complicated than the malformations tested in other files in
840     # this directory, as there are several orthogonal variables involved.  We
841     # test most possible combinations
842     foreach my $do_disallow (0, 1) {
843       next if $skip_most_tests && ! $do_disallow;
844
845       # We try various combinations of malformations that can occur
846       foreach my $short ("", "short") {
847         next if $skip_most_tests && $short;
848         foreach my $unexpected_noncont ("", "unexpected non-continuation") {
849           next if $skip_most_tests && $unexpected_noncont;
850           foreach my $overlong ("", "overlong") {
851             next if $overlong && $skip_most_tests;
852
853             # If we're creating an overlong, it can't be longer than the
854             # maximum length, so skip if we're already at that length.
855             next if $overlong && $length >= $::max_bytes;
856
857               # We classify the warnings into certain "interesting" types,
858               # described later
859               foreach my $warning_type (0..4) {
860                 next if $skip_most_tests && $warning_type != 1;
861                 foreach my $use_warn_flag (0, 1) {
862                     next if $skip_most_tests && ! $use_warn_flag;
863
864                     # Finally, here is the inner loop
865
866                     my $this_utf8n_flag_to_warn = $utf8n_flag_to_warn;
867                     my $this_utf8n_flag_to_disallow = $utf8n_flag_to_disallow;
868                     my $this_uvchr_flag_to_warn = $uvchr_flag_to_warn;
869                     my $this_uvchr_flag_to_disallow = $uvchr_flag_to_disallow;
870
871                     my $eval_warn;
872                     my $expect_regular_warnings;
873                     my $expect_warnings_for_malformed;
874                     my $expect_warnings_for_overflow;
875
876                     if ($warning_type == 0) {
877                         $eval_warn = "use warnings; no warnings 'deprecated'";
878                         $expect_regular_warnings = $use_warn_flag;
879                         $expect_warnings_for_overflow = 1;
880                         $expect_warnings_for_malformed = 1;
881                     }
882                     elsif ($warning_type == 1) {
883                         $eval_warn = "no warnings";
884                         $expect_regular_warnings = 0;
885                         $expect_warnings_for_overflow = 0;
886                         $expect_warnings_for_malformed = 0;
887                     }
888                     elsif ($warning_type == 2) {
889                         $eval_warn = "no warnings; use warnings 'utf8'";
890                         $expect_regular_warnings = $use_warn_flag;
891                         $expect_warnings_for_overflow = 1;
892                         $expect_warnings_for_malformed = 1;
893                     }
894                     elsif ($warning_type == 3) {
895                         $eval_warn = "no warnings; use warnings"
896                                    . " '$controlling_warning_category'";
897                         $expect_regular_warnings = $use_warn_flag;
898                         $expect_warnings_for_overflow
899                             = $controlling_warning_category eq 'non_unicode';
900                         $expect_warnings_for_malformed = 0;
901                     }
902                     elsif ($warning_type == 4) {  # Like type 3, but uses the
903                                                   # above-31-bit flags
904                         # The complement flags were set up so that the
905                         # above-31-bit flags have been tested that they don't
906                         # trigger wrongly for too small code points.  And the
907                         # flags have been set up so that those small code
908                         # points are tested for being above Unicode.  What's
909                         # left to test is that the large code points do
910                         # trigger the above-31-bit flags.
911                         next if ! $will_overflow && $allowed_uv < 0x80000000;
912                         next if $controlling_warning_category ne 'non_unicode';
913                         $eval_warn = "no warnings; use warnings 'non_unicode'";
914                         $expect_regular_warnings = 1;
915                         $expect_warnings_for_overflow = 1;
916                         $expect_warnings_for_malformed = 0;
917                         $this_utf8n_flag_to_warn   = $::UTF8_WARN_ABOVE_31_BIT;
918                         $this_utf8n_flag_to_disallow
919                                                 = $::UTF8_DISALLOW_ABOVE_31_BIT;
920                         $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
921                         $this_uvchr_flag_to_disallow
922                                              = $::UNICODE_DISALLOW_ABOVE_31_BIT;
923                     }
924                     else {
925                        die "Unexpected warning type '$warning_type'";
926                     }
927
928                     # We only need to test the case where all warnings are
929                     # enabled (type 0) to see if turning off the warning flag
930                     # causes things to not be output.  If those pass, then
931                     # turning on some sub-category of warnings, or turning off
932                     # warnings altogether are extremely likely to not output
933                     # warnings either, given how the warnings subsystem is
934                     # supposed to work, and this file assumes it does work.
935                     next if $warning_type != 0 && ! $use_warn_flag;
936
937                     # The convention is that the 'got' flag is the same value
938                     # as the disallow one.  If this were violated, the tests
939                     # here should start failing.
940                     my $return_flag = $this_utf8n_flag_to_disallow;
941
942                     # If we aren't expecting warnings/disallow for this, turn
943                     # on all the other flags.  That makes sure that they all
944                     # are independent of this flag, and so we don't need to
945                     # test them individually.
946                     my $this_warning_flags  = ($use_warn_flag)
947                                               ? $this_utf8n_flag_to_warn
948                                               : $utf8n_flag_to_warn_complement;
949                     my $this_disallow_flags = ($do_disallow)
950                                           ? $this_utf8n_flag_to_disallow
951                                           : $utf8n_flag_to_disallow_complement;
952                     my $this_bytes = $bytes;
953                     my $this_length = $length;
954                     my $expected_uv = $allowed_uv;
955                     my $this_expected_len = $length;
956                     my $this_needed_to_discern_len = $needed_to_discern_len;
957
958                     my @malformation_names;
959                     my @expected_warnings;
960                     my @expected_return_flags;
961
962                     # Now go through the possible malformations wanted,  and
963                     # change the input accordingly.  We also can set up
964                     # certain other things now, like whether we expect a
965                     # return flag from this malformation and which flag.
966                     if ($overlong) {
967
968                         # To force this malformation, we convert the original
969                         # start byte into a continuation byte with the same
970                         # data bits as originally. ...
971                         substr($this_bytes, 0, 1)
972                             = start_byte_to_cont(substr($this_bytes,
973                                                         0, 1));
974
975                         # ... Then we prepend it with a known overlong
976                         # sequence.  This should evaluate to the exact same
977                         # code point as the original.
978                         $this_bytes
979                         =  I8_to_native("\xff")
980                         . (I8_to_native(chr $::lowest_continuation)
981                         x ( $::max_bytes - 1 - length($this_bytes)))
982                         . $this_bytes;
983                         $this_length = length($this_bytes);
984                         $this_needed_to_discern_len
985                                 = $::max_bytes - ($this_expected_len
986                                                - $this_needed_to_discern_len);
987                         $this_expected_len = $::max_bytes;
988                         push @expected_return_flags, $::UTF8_GOT_LONG;
989                         push @malformation_names, $overlong;
990                         if ($expect_warnings_for_malformed) {
991                             if (   ! $short
992                                 && ! $unexpected_noncont
993                                 && ! $will_overflow)
994                             {
995                                 my $overlong_bytes
996                                         = display_bytes_no_quotes($this_bytes);
997                                 my $correct_bytes
998                                              = display_bytes_no_quotes($bytes);
999                                 my $prefix = ($allowed_uv > 0x10FFFF)
1000                                              ? "0x"
1001                                              : "U+";
1002                                 push @expected_warnings,
1003                                      qr/\QMalformed UTF-8 character:\E
1004                                         \Q $overlong_bytes (overlong;\E
1005                                         \Q instead use $correct_bytes to\E
1006                                         \Q represent $prefix$uv_string)/x;
1007                             }
1008                             else {
1009                                 push @expected_warnings, qr/overlong/;
1010                             }
1011                         }
1012                     }
1013
1014                     if ($short) {
1015                         push @malformation_names, $short;
1016                         push @expected_warnings, qr/short/
1017                                             if $expect_warnings_for_malformed;
1018
1019                         # To force this malformation, just tell the test to
1020                         # not look as far as it should into the input.
1021                         $this_length--;
1022                         $this_expected_len--;
1023                         push @expected_return_flags, $::UTF8_GOT_SHORT;
1024                     }
1025
1026                     if ($unexpected_noncont) {
1027                         push @malformation_names, $unexpected_noncont;
1028                         push @expected_warnings, qr/$unexpected_noncont/
1029                                             if $expect_warnings_for_malformed;
1030
1031                         # To force this malformation, change the final
1032                         # continuation byte into a non continuation.
1033                         my $pos = ($short) ? -2 : -1;
1034                         substr($this_bytes, $pos, 1) = '?';
1035                         $this_expected_len--;
1036                         push @expected_return_flags,
1037                                         $::UTF8_GOT_NON_CONTINUATION;
1038                     }
1039
1040                     # The overflow malformation is done differently than other
1041                     # malformations.  It comes from manually typed tests in
1042                     # the test array, but it also is above Unicode and uses
1043                     # Perl extended UTF-8, so affects some of the flags being
1044                     # tested.  We now make it be treated like one of the other
1045                     # generated malformations.
1046                     if ($will_overflow) {
1047
1048                         # An overflow is (way) above Unicode, and overrides
1049                         # everything else.
1050                         $expect_regular_warnings = 0;
1051
1052                         push @malformation_names, 'overflow';
1053                         if ($expect_warnings_for_overflow) {
1054                             my $qr = display_bytes_no_quotes(
1055                                    substr($this_bytes, 0, $this_expected_len));
1056                             $qr = qr/\QMalformed UTF-8 character: \E
1057                                      \Q$qr (overflows)\E/x;
1058                             push @expected_warnings, $qr;
1059                         }
1060                         push @expected_return_flags, $::UTF8_GOT_OVERFLOW;
1061                     }
1062
1063                     # Here, we've set things up based on the malformations.
1064                     # Now generate the text for them for the test name.
1065                     my $malformations_name = "";
1066                     if (@malformation_names) {
1067                         $malformations_name .= "malformation";
1068                         $malformations_name .= "s" if @malformation_names > 1;
1069                         $malformations_name .= ": ";
1070                         $malformations_name .=  join "/", @malformation_names;
1071                         $malformations_name =  " ($malformations_name)";
1072                     }
1073
1074                     # It may be that the malformations have shortened the
1075                     # amount of input we look at so much that we can't tell
1076                     # what the category the code point was in.  Otherwise, set
1077                     # up the expected return flags based on the warnings and
1078                     # disallowments.
1079                     if ($this_expected_len < $this_needed_to_discern_len) {
1080                         $expect_regular_warnings = 0;
1081                     }
1082                     elsif (   ($this_warning_flags & $this_utf8n_flag_to_warn)
1083                            || (  $this_disallow_flags
1084                                & $this_utf8n_flag_to_disallow))
1085                     {
1086                         push @expected_return_flags, $return_flag;
1087                     }
1088
1089                     # Finish setting up the expected warning.
1090                     if ($expect_regular_warnings) {
1091
1092                         # So far the array contains warnings generated by
1093                         # malformations.  Add the expected regular one.
1094                         unshift @expected_warnings, $cp_message_qr;
1095
1096                         # But it may need to be modified, because either of
1097                         # these malformations means we can't determine the
1098                         # expected code point.
1099                         if ($short || $unexpected_noncont) {
1100                             my $first_byte = substr($this_bytes, 0, 1);
1101                             $expected_warnings[0] = display_bytes(
1102                                     substr($this_bytes, 0, $this_expected_len));
1103                             $expected_warnings[0]
1104                                 = qr/[Aa]\Qny UTF-8 sequence that starts with\E
1105                                      \Q $expected_warnings[0]\E
1106                                      \Q $non_cp_trailing_text\E/x;
1107                         }
1108                     }
1109
1110                     # Is effectively disallowed if we've set up a
1111                     # malformation, even if the flag indicates it is
1112                     # allowed.  Fix up test name to indicate this as
1113                     # well
1114                     my $disallowed = (  $this_disallow_flags
1115                                       & $this_utf8n_flag_to_disallow)
1116                                   || $malformations_name;
1117                     my $this_name = "utf8n_to_uvchr_error() $testname: "
1118                                                 . (($disallowed)
1119                                                    ? 'disallowed'
1120                                                    : 'allowed');
1121                     $this_name .= ", $eval_warn";
1122                     $this_name .= ", " . ((  $this_warning_flags
1123                                             & $this_utf8n_flag_to_warn)
1124                                           ? 'with flag for raising warnings'
1125                                           : 'no flag for raising warnings');
1126                     $this_name .= $malformations_name;
1127
1128                     # Do the actual test using an eval
1129                     undef @warnings_gotten;
1130                     my $ret_ref;
1131                     my $this_flags = $this_warning_flags|$this_disallow_flags;
1132                     my $eval_text =      "$eval_warn; \$ret_ref"
1133                             . " = test_utf8n_to_uvchr_error("
1134                             . "'$this_bytes', $this_length, $this_flags)";
1135                     eval "$eval_text";
1136                     if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1137                     {
1138                         diag "\$@='$@'; call was: "
1139                            . utf8n_display_call($eval_text);
1140                         next;
1141                     }
1142                     if ($disallowed) {
1143                         is($ret_ref->[0], 0, "    And returns 0")
1144                           or diag "Call was: " . utf8n_display_call($eval_text);
1145                     }
1146                     else {
1147                         is($ret_ref->[0], $expected_uv,
1148                                 "    And returns expected uv: "
1149                               . $uv_string)
1150                           or diag "Call was: " . utf8n_display_call($eval_text);
1151                     }
1152                     is($ret_ref->[1], $this_expected_len,
1153                                         "    And returns expected length:"
1154                                       . " $this_expected_len")
1155                       or diag "Call was: " . utf8n_display_call($eval_text);
1156
1157                     my $returned_flags = $ret_ref->[2];
1158
1159                     for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
1160                         if ($expected_return_flags[$i] & $returned_flags) {
1161                             if ($expected_return_flags[$i]
1162                                             == $::UTF8_DISALLOW_ABOVE_31_BIT)
1163                             {
1164                                 pass("    Expected and got return flag for"
1165                                    . " above_31_bit");
1166                             }
1167                                    # The first entries in this are
1168                                    # malformations
1169                             elsif ($i > @malformation_names - 1)  {
1170                                 pass("    Expected and got return flag"
1171                                    . " for " . $controlling_warning_category);
1172                             }
1173                             else {
1174                                 pass("    Expected and got return flag for "
1175                                    . $malformation_names[$i]
1176                                    . " malformation");
1177                             }
1178                             $returned_flags &= ~$expected_return_flags[$i];
1179                             splice @expected_return_flags, $i, 1;
1180                         }
1181                     }
1182
1183                     is($returned_flags, 0,
1184                        "    Got no unexpected return flags")
1185                       or diag "The unexpected flags gotten were: "
1186                            . (flags_to_text($returned_flags,
1187                                             \@utf8n_flags_to_text)
1188                                 # We strip off any prefixes from the flag
1189                                 # names
1190                              =~ s/ \b [A-Z] _ //xgr);
1191                     is (scalar @expected_return_flags, 0,
1192                         "    Got all expected return flags")
1193                         or diag "The expected flags not gotten were: "
1194                            . (flags_to_text(eval join("|",
1195                                                         @expected_return_flags),
1196                                             \@utf8n_flags_to_text)
1197                                 # We strip off any prefixes from the flag
1198                                 # names
1199                              =~ s/ \b [A-Z] _ //xgr);
1200
1201                     do_warnings_test(@expected_warnings)
1202                       or diag "Call was: " . utf8n_display_call($eval_text);
1203                     undef @warnings_gotten;
1204
1205                     # Check CHECK_ONLY results when the input is
1206                     # disallowed.  Do this when actually disallowed,
1207                     # not just when the $this_disallow_flags is set
1208                     if ($disallowed) {
1209                         my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
1210                         my $eval_text = "use warnings; no warnings 'deprecated'; \$ret_ref ="
1211                                       . " test_utf8n_to_uvchr_error('"
1212                                       . "$this_bytes', $this_length,"
1213                                       . " $this_flags)";
1214                         eval $eval_text;
1215                         if (! ok ("$@ eq ''",
1216                             "    And eval succeeded with CHECK_ONLY"))
1217                         {
1218                             diag "\$@='$@'; Call was: "
1219                                . utf8n_display_call($eval_text);
1220                             next;
1221                         }
1222                         is($ret_ref->[0], 0, "    CHECK_ONLY: Returns 0")
1223                           or diag "Call was: " . utf8n_display_call($eval_text);
1224                         is($ret_ref->[1], -1,
1225                                        "    CHECK_ONLY: returns -1 for length")
1226                           or diag "Call was: " . utf8n_display_call($eval_text);
1227                         if (! is(scalar @warnings_gotten, 0,
1228                                       "    CHECK_ONLY: no warnings generated"))
1229                         {
1230                             diag "Call was: " . utf8n_display_call($eval_text);
1231                             output_warnings(@warnings_gotten);
1232                         }
1233                     }
1234
1235                     # Now repeat some of the above, but for
1236                     # uvchr_to_utf8_flags().  Since this comes from an
1237                     # existing code point, it hasn't overflowed, and isn't
1238                     # malformed.
1239                     next if @malformation_names;
1240
1241                     $this_warning_flags = ($use_warn_flag)
1242                                           ? $this_uvchr_flag_to_warn
1243                                           : 0;
1244                     $this_disallow_flags = ($do_disallow)
1245                                            ? $this_uvchr_flag_to_disallow
1246                                            : 0;
1247
1248                     $disallowed = $this_disallow_flags
1249                                 & $this_uvchr_flag_to_disallow;
1250                     $this_name .= ", " . ((  $this_warning_flags
1251                                            & $this_utf8n_flag_to_warn)
1252                                           ? 'with flag for raising warnings'
1253                                           : 'no flag for raising warnings');
1254
1255                     $this_name = "uvchr_to_utf8_flags() $testname: "
1256                                             . (($disallowed)
1257                                                 ? 'disallowed'
1258                                                 : 'allowed');
1259                     $this_name .= ", $eval_warn";
1260                     $this_name .= ", " . ((  $this_warning_flags
1261                                            & $this_uvchr_flag_to_warn)
1262                                         ? 'with warning flag'
1263                                         : 'no warning flag');
1264
1265                     undef @warnings_gotten;
1266                     my $ret;
1267                     $this_flags = $this_warning_flags|$this_disallow_flags;
1268                     $eval_text = "$eval_warn; \$ret ="
1269                             . " test_uvchr_to_utf8_flags("
1270                             . "$allowed_uv, $this_flags)";
1271                     eval "$eval_text";
1272                     if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1273                     {
1274                         diag "\$@='$@'; call was: "
1275                            . uvchr_display_call($eval_text);
1276                         next;
1277                     }
1278                     if ($disallowed) {
1279                         is($ret, undef, "    And returns undef")
1280                           or diag "Call was: " . uvchr_display_call($eval_text);
1281                     }
1282                     else {
1283                         is($ret, $this_bytes, "    And returns expected string")
1284                           or diag "Call was: " . uvchr_display_call($eval_text);
1285                     }
1286
1287                     do_warnings_test(@expected_warnings)
1288                       or diag "Call was: " . uvchr_display_call($eval_text);
1289                 }
1290               }
1291           }
1292         }
1293       }
1294     }
1295 }
1296
1297 done_testing;