APItest/t/utf8_warn_base.pl: Split test into 64 vs 32 bit versions
[perl.git] / ext / XS-APItest / t / utf8_warn_base.pl
1 #!perl -w
2
3 # This is a base file to be used by various .t's in its directory
4 # It tests various code points that are "problematic", and verifies that the
5 # correct warnings/flags etc are generated when using them.  It also takes the
6 # UTF-8 for some of them and perturbs it to be malformed in various ways, and
7 # tests that this gets appropriately detected.
8
9 use strict;
10 use Test::More;
11
12 BEGIN {
13     use_ok('XS::APItest');
14     require 'charset_tools.pl';
15     require './t/utf8_setup.pl';
16 };
17
18 $|=1;
19
20 no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit
21                           # machines, and that is tested elsewhere
22
23 use XS::APItest;
24 use Data::Dumper;
25
26 my @warnings_gotten;
27
28 use warnings 'utf8';
29 local $SIG{__WARN__} = sub { my @copy = @_;
30                              push @warnings_gotten, map { chomp; $_ } @copy;
31                            };
32 my @tests;
33 {
34     no warnings qw(portable overflow);
35     @tests = (
36         # $testname,
37         # $bytes,                  UTF-8 string
38         # $allowed_uv,             code point $bytes evaluates to; -1 if
39         #                          overflows
40         # $needed_to_discern_len   optional, how long an initial substring do
41         #                          we need to tell that the string must be for
42         #                          a code point in the category it falls in,
43         #                          like being a surrogate; 0 indicates we need
44         #                          the whole string.  Some categories have a
45         #                          default that is used if this is omitted.
46         [ "lowest surrogate",
47             (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"),
48             0xD800,
49         ],
50         [ "a middle surrogate",
51             (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"),
52             0xD90D,
53         ],
54         [ "highest surrogate",
55             (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"),
56             0xDFFF,
57         ],
58         [ "first non_unicode",
59             (isASCII)
60             ?              "\xf4\x90\x80\x80"
61             : I8_to_native("\xf9\xa2\xa0\xa0\xa0"),
62             0x110000,
63             2,
64         ],
65         [ "non_unicode whose first byte tells that",
66             (isASCII)
67             ?              "\xf5\x80\x80\x80"
68             : I8_to_native("\xfa\xa0\xa0\xa0\xa0"),
69             (isASCII) ? 0x140000 : 0x200000,
70             1,
71         ],
72         [ "first of 32 consecutive non-character code points",
73             (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"),
74             0xFDD0,
75         ],
76         [ "a mid non-character code point of the 32 consecutive ones",
77             (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"),
78             0xFDE0,
79         ],
80         [ "final of 32 consecutive non-character code points",
81             (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"),
82             0xFDEF,
83         ],
84         [ "non-character code point U+FFFE",
85             (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"),
86             0xFFFE,
87         ],
88         [ "non-character code point U+FFFF",
89             (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"),
90             0xFFFF,
91         ],
92         [ "non-character code point U+1FFFE",
93             (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"),
94             0x1FFFE,
95         ],
96         [ "non-character code point U+1FFFF",
97             (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"),
98             0x1FFFF,
99         ],
100         [ "non-character code point U+2FFFE",
101             (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"),
102             0x2FFFE,
103         ],
104         [ "non-character code point U+2FFFF",
105             (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"),
106             0x2FFFF,
107         ],
108         [ "non-character code point U+3FFFE",
109             (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"),
110             0x3FFFE,
111         ],
112         [ "non-character code point U+3FFFF",
113             (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"),
114             0x3FFFF,
115         ],
116         [ "non-character code point U+4FFFE",
117             (isASCII)
118             ?               "\xf1\x8f\xbf\xbe"
119             : I8_to_native("\xf8\xa9\xbf\xbf\xbe"),
120             0x4FFFE,
121         ],
122         [ "non-character code point U+4FFFF",
123             (isASCII)
124             ?               "\xf1\x8f\xbf\xbf"
125             : I8_to_native("\xf8\xa9\xbf\xbf\xbf"),
126             0x4FFFF,
127         ],
128         [ "non-character code point U+5FFFE",
129             (isASCII)
130             ?              "\xf1\x9f\xbf\xbe"
131             : I8_to_native("\xf8\xab\xbf\xbf\xbe"),
132             0x5FFFE,
133         ],
134         [ "non-character code point U+5FFFF",
135             (isASCII)
136             ?              "\xf1\x9f\xbf\xbf"
137             : I8_to_native("\xf8\xab\xbf\xbf\xbf"),
138             0x5FFFF,
139         ],
140         [ "non-character code point U+6FFFE",
141             (isASCII)
142             ?              "\xf1\xaf\xbf\xbe"
143             : I8_to_native("\xf8\xad\xbf\xbf\xbe"),
144             0x6FFFE,
145         ],
146         [ "non-character code point U+6FFFF",
147             (isASCII)
148             ?              "\xf1\xaf\xbf\xbf"
149             : I8_to_native("\xf8\xad\xbf\xbf\xbf"),
150             0x6FFFF,
151         ],
152         [ "non-character code point U+7FFFE",
153             (isASCII)
154             ?              "\xf1\xbf\xbf\xbe"
155             : I8_to_native("\xf8\xaf\xbf\xbf\xbe"),
156             0x7FFFE,
157         ],
158         [ "non-character code point U+7FFFF",
159             (isASCII)
160             ?              "\xf1\xbf\xbf\xbf"
161             : I8_to_native("\xf8\xaf\xbf\xbf\xbf"),
162             0x7FFFF,
163         ],
164         [ "non-character code point U+8FFFE",
165             (isASCII)
166             ?              "\xf2\x8f\xbf\xbe"
167             : I8_to_native("\xf8\xb1\xbf\xbf\xbe"),
168             0x8FFFE,
169         ],
170         [ "non-character code point U+8FFFF",
171             (isASCII)
172             ?              "\xf2\x8f\xbf\xbf"
173             : I8_to_native("\xf8\xb1\xbf\xbf\xbf"),
174             0x8FFFF,
175         ],
176         [ "non-character code point U+9FFFE",
177             (isASCII)
178             ?              "\xf2\x9f\xbf\xbe"
179             : I8_to_native("\xf8\xb3\xbf\xbf\xbe"),
180             0x9FFFE,
181         ],
182         [ "non-character code point U+9FFFF",
183             (isASCII)
184             ?              "\xf2\x9f\xbf\xbf"
185             : I8_to_native("\xf8\xb3\xbf\xbf\xbf"),
186             0x9FFFF,
187         ],
188         [ "non-character code point U+AFFFE",
189             (isASCII)
190             ?              "\xf2\xaf\xbf\xbe"
191             : I8_to_native("\xf8\xb5\xbf\xbf\xbe"),
192             0xAFFFE,
193         ],
194         [ "non-character code point U+AFFFF",
195             (isASCII)
196             ?              "\xf2\xaf\xbf\xbf"
197             : I8_to_native("\xf8\xb5\xbf\xbf\xbf"),
198             0xAFFFF,
199         ],
200         [ "non-character code point U+BFFFE",
201             (isASCII)
202             ?              "\xf2\xbf\xbf\xbe"
203             : I8_to_native("\xf8\xb7\xbf\xbf\xbe"),
204             0xBFFFE,
205         ],
206         [ "non-character code point U+BFFFF",
207             (isASCII)
208             ?              "\xf2\xbf\xbf\xbf"
209             : I8_to_native("\xf8\xb7\xbf\xbf\xbf"),
210             0xBFFFF,
211         ],
212         [ "non-character code point U+CFFFE",
213             (isASCII)
214             ?              "\xf3\x8f\xbf\xbe"
215             : I8_to_native("\xf8\xb9\xbf\xbf\xbe"),
216             0xCFFFE,
217         ],
218         [ "non-character code point U+CFFFF",
219             (isASCII)
220             ?              "\xf3\x8f\xbf\xbf"
221             : I8_to_native("\xf8\xb9\xbf\xbf\xbf"),
222             0xCFFFF,
223         ],
224         [ "non-character code point U+DFFFE",
225             (isASCII)
226             ?              "\xf3\x9f\xbf\xbe"
227             : I8_to_native("\xf8\xbb\xbf\xbf\xbe"),
228             0xDFFFE,
229         ],
230         [ "non-character code point U+DFFFF",
231             (isASCII)
232             ?              "\xf3\x9f\xbf\xbf"
233             : I8_to_native("\xf8\xbb\xbf\xbf\xbf"),
234             0xDFFFF,
235         ],
236         [ "non-character code point U+EFFFE",
237             (isASCII)
238             ?              "\xf3\xaf\xbf\xbe"
239             : I8_to_native("\xf8\xbd\xbf\xbf\xbe"),
240             0xEFFFE,
241         ],
242         [ "non-character code point U+EFFFF",
243             (isASCII)
244             ?              "\xf3\xaf\xbf\xbf"
245             : I8_to_native("\xf8\xbd\xbf\xbf\xbf"),
246             0xEFFFF,
247         ],
248         [ "non-character code point U+FFFFE",
249             (isASCII)
250             ?              "\xf3\xbf\xbf\xbe"
251             : I8_to_native("\xf8\xbf\xbf\xbf\xbe"),
252             0xFFFFE,
253         ],
254         [ "non-character code point U+FFFFF",
255             (isASCII)
256             ?              "\xf3\xbf\xbf\xbf"
257             : I8_to_native("\xf8\xbf\xbf\xbf\xbf"),
258             0xFFFFF,
259         ],
260         [ "non-character code point U+10FFFE",
261             (isASCII)
262             ?              "\xf4\x8f\xbf\xbe"
263             : I8_to_native("\xf9\xa1\xbf\xbf\xbe"),
264             0x10FFFE,
265         ],
266         [ "non-character code point U+10FFFF",
267             (isASCII)
268             ?              "\xf4\x8f\xbf\xbf"
269             : I8_to_native("\xf9\xa1\xbf\xbf\xbf"),
270             0x10FFFF,
271         ],
272         [ "requires at least 32 bits",
273             (isASCII)
274             ?  "\xfe\x82\x80\x80\x80\x80\x80"
275             : I8_to_native(
276                 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
277             # This code point is chosen so that it is representable in a UV on
278             # 32-bit machines
279             0x80000000,
280             (isASCII) ? 1 : 8,
281         ],
282         [ "highest 32 bit code point",
283             (isASCII)
284             ?  "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
285             : I8_to_native(
286                "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
287             0xFFFFFFFF,
288             (isASCII) ? 1 : 8,
289         ],
290         [ "requires at least 32 bits, and use SUPER-type flags, instead of"
291         . " ABOVE_31_BIT",
292             (isASCII)
293             ? "\xfe\x82\x80\x80\x80\x80\x80"
294             : I8_to_native(
295             "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
296             0x80000000,
297             1,
298         ],
299     );
300
301     if (! $::is64bit) {
302         if (isASCII) {
303             push @tests,
304                 [ "Lowest 33 bit code point: overflow",
305                     "\xFE\x84\x80\x80\x80\x80\x80",
306                     -1,
307                     1,
308                 ],
309                 [ "overflow that old algorithm failed to detect",
310                     "\xfe\x86\x80\x80\x80\x80\x80",
311                     -1,
312                     2,
313                 ];
314         }
315     }
316
317     if ($::is64bit) {
318         push @tests,
319             [ "More than 32 bits",
320                 (isASCII)
321                 ?       "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80"
322                 : I8_to_native(
323                         "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
324                 0x1000000000,
325                 (isASCII) ? 1 : 7,
326             ];
327         if (isASCII) {
328             push @tests,
329                 [ "overflow that old algorithm failed to detect",
330                     "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf",
331                     -1,
332                     3,
333                 ];
334         }
335         else {
336             push @tests,    # These could falsely show wrongly in a naive
337                             # implementation
338                 [ "requires at least 32 bits",
339                     I8_to_native(
340                     "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
341                     0x800000000,
342                     7,
343                 ],
344                 [ "requires at least 32 bits",
345                     I8_to_native(
346                     "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
347                     0x10000000000,
348                     6,
349                 ],
350                 [ "requires at least 32 bits",
351                     I8_to_native(
352                     "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
353                     0x200000000000,
354                     5,
355                 ],
356                 [ "requires at least 32 bits",
357                     I8_to_native(
358                     "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
359                     0x4000000000000,
360                     4,
361                 ],
362                 [ "requires at least 32 bits",
363                     I8_to_native(
364                     "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
365                     0x80000000000000,
366                     3,
367                 ],
368                 [ "requires at least 32 bits",
369                     I8_to_native(
370                     "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
371                     0x1000000000000000,
372                     2,
373                 ];
374         }
375     }
376 }
377
378 sub flags_to_text($$)
379 {
380     my ($flags, $flags_to_text_ref) = @_;
381
382     # Returns a string containing a mnemonic representation of the bits that
383     # are set in the $flags.  These are assumed to be flag bits.  The return
384     # looks like "FOO|BAR|BAZ".  The second parameter is a reference to an
385     # array that gives the textual representation of all the possible flags.
386     # Element 0 is the text for the bit 0 flag; element 1 for bit 1; ....  If
387     # no bits at all are set the string "0" is returned;
388
389     my @flag_text;
390     my $shift = 0;
391
392     return "0" if $flags == 0;
393
394     while ($flags) {
395         #diag sprintf "%x", $flags;
396         if ($flags & 1) {
397             push @flag_text, $flags_to_text_ref->[$shift];
398         }
399         $shift++;
400         $flags >>= 1;
401     }
402
403     return join "|", @flag_text;
404 }
405
406 # Possible flag returns from utf8n_to_uvchr_error().  These should have G_,
407 # instead of A_, D_, but the prefixes will be used in a a later commit, so
408 # minimize churn by having them here.
409 my @utf8n_flags_to_text =  ( qw(
410         A_EMPTY
411         A_CONTINUATION
412         A_NON_CONTINUATION
413         A_SHORT
414         A_LONG
415         A_LONG_AND_ITS_VALUE
416         PLACEHOLDER
417         A_OVERFLOW
418         D_SURROGATE
419         W_SURROGATE
420         D_NONCHAR
421         W_NONCHAR
422         D_SUPER
423         W_SUPER
424         D_ABOVE_31_BIT
425         W_ABOVE_31_BIT
426         CHECK_ONLY
427         NO_CONFIDENCE_IN_CURLEN_
428     ) );
429
430 sub utf8n_display_call($)
431 {
432     # Converts an eval string that calls test_utf8n_to_uvchr into a more human
433     # readable form, and returns it.  Doesn't work if the byte string contains
434     # an apostrophe.  The return will look something like:
435     #   test_utf8n_to_uvchr_error('$bytes', $length, $flags)
436     #diag $_[0];
437
438     $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x;
439     my $text1 = $1;     # Everything before the byte string
440     my $bytes = $2;
441     my $text2 = $3;     # Includes the length
442     my $flags = $4;
443
444     return $text1
445          . display_bytes($bytes)
446          . $text2
447          . flags_to_text($flags, \@utf8n_flags_to_text)
448          . ')';
449 }
450
451 sub uvchr_display_call($)
452 {
453     # Converts an eval string that calls test_uvchr_to_utf8 into a more human
454     # readable form, and returns it.  The return will look something like:
455     #   test_uvchr_to_utf8n_flags($uv, $flags)
456     #diag $_[0];
457
458     my @flags_to_text =  ( qw(
459             W_SURROGATE
460             W_NONCHAR
461             W_SUPER
462             W_ABOVE_31_BIT
463             D_SURROGATE
464             D_NONCHAR
465             D_SUPER
466             D_ABOVE_31_BIT
467        ) );
468
469     $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
470     my $text = $1;
471     my $cp = sprintf "%X", $2;
472     my $flags = $3;
473
474     return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')';
475 }
476
477 sub do_warnings_test(@)
478 {
479     my @expected_warnings = @_;
480
481     # Compares the input expected warnings array with @warnings_gotten,
482     # generating a pass for each found, removing it from @warnings_gotten.
483     # Any discrepancies generate test failures.  Returns TRUE if no
484     # discrepcancies; otherwise FALSE.
485
486     my $succeeded = 1;
487
488     if (@expected_warnings == 0) {
489         if (! is(@warnings_gotten, 0, "    Expected and got no warnings")) {
490             output_warnings(@warnings_gotten);
491             $succeeded = 0;
492         }
493         return $succeeded;
494     }
495
496     # Check that we got all the expected warnings,
497     # removing each one found
498   WARNING:
499     foreach my $expected (@expected_warnings) {
500         foreach (my $i = 0; $i < @warnings_gotten; $i++) {
501             if ($warnings_gotten[$i] =~ $expected) {
502                 pass("    Expected and got warning: "
503                     . " $warnings_gotten[$i]");
504                 splice @warnings_gotten, $i, 1;
505                 next WARNING;
506             }
507         }
508         fail("    Expected a warning that matches "
509             . $expected . " but didn't get it");
510         $succeeded = 0;
511     }
512
513     if (! is(@warnings_gotten, 0, "    Got no unexpected warnings")) {
514         output_warnings(@warnings_gotten);
515         $succeeded = 0;
516     }
517
518     return $succeeded;
519 }
520
521 # This test is split into this number of files.
522 my $num_test_files = $ENV{TEST_JOBS} || 1;
523 $num_test_files = 10 if $num_test_files > 10;
524
525 my $test_count = -1;
526 foreach my $test (@tests) {
527     $test_count++;
528     next if $test_count % $num_test_files != $::TEST_CHUNK;
529
530     my ($testname, $bytes, $allowed_uv, $needed_to_discern_len) = @$test;
531
532     my $length = length $bytes;
533     my $will_overflow = $allowed_uv < 0;
534
535     my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
536
537     my $controlling_warning_category;
538     my $utf8n_flag_to_warn;
539     my $utf8n_flag_to_disallow;
540     my $uvchr_flag_to_warn;
541     my $uvchr_flag_to_disallow;
542
543     # We want to test that the independent flags are actually independent.
544     # For example, that a surrogate doesn't trigger a non-character warning,
545     # and conversely, turning off an above-Unicode flag doesn't suppress a
546     # surrogate warning.  Earlier versions of this file used nested loops to
547     # test all possible combinations.  But that creates lots of tests, making
548     # this run too long.  What is now done instead is to use the complement of
549     # the category we are testing to greatly reduce the combinatorial
550     # explosion.  For example, if we have a surrogate and we aren't expecting
551     # a warning about it, we set all the flags for non-surrogates to raise
552     # warnings.  If one shows up, it indicates the flags aren't independent.
553     my $utf8n_flag_to_warn_complement;
554     my $utf8n_flag_to_disallow_complement;
555     my $uvchr_flag_to_warn_complement;
556     my $uvchr_flag_to_disallow_complement;
557
558     # Many of the code points being tested are middling in that if code point
559     # edge cases work, these are very likely to as well.  Because this test
560     # file takes a while to execute, we skip testing the edge effects of code
561     # points deemed middling, while testing their basics and continuing to
562     # fully test the non-middling code points.
563     my $skip_most_tests = 0;
564
565     my $cp_message_qr;      # Pattern that matches the message raised when
566                             # that message contains the problematic code
567                             # point.  The message is the same (currently) both
568                             # when going from/to utf8.
569     my $non_cp_trailing_text;   # The suffix text when the message doesn't
570                                 # contain a code point.  (This is a result of
571                                 # some sort of malformation that means we
572                                 # can't get an exact code poin
573
574     if ($will_overflow || $allowed_uv > 0x10FFFF) {
575
576         # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
577         $utf8n_flag_to_warn     = $::UTF8_WARN_SUPER;
578         $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
579         $uvchr_flag_to_warn     = $::UNICODE_WARN_SUPER;
580         $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
581
582         # Below, we add the flags for non-above-31 bit to the code points that
583         # don't fit that category.  Special tests are done for this category
584         # in the inner loop.
585         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
586                                             |$::UTF8_WARN_SURROGATE;
587         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
588                                             |$::UTF8_DISALLOW_SURROGATE;
589         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_NONCHAR
590                                             |$::UNICODE_WARN_SURROGATE;
591         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
592                                             |$::UNICODE_DISALLOW_SURROGATE;
593         $controlling_warning_category = 'non_unicode';
594
595         if ($will_overflow) {  # This is realy a malformation
596             $non_cp_trailing_text = "if you see this, there is an error";
597             $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
598         }
599         elsif ($allowed_uv > 0x7FFFFFFF) {
600             $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
601                                 \Q and not portable\E/x;
602             $non_cp_trailing_text = "is for a non-Unicode code point, and is not portable";
603         }
604         else {
605             $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
606                                 \Q may not be portable\E/x;
607             $non_cp_trailing_text = "is for a non-Unicode code point, may not"
608                                 . " be portable";
609             $utf8n_flag_to_warn_complement     |= $::UTF8_WARN_ABOVE_31_BIT;
610             $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
611             $uvchr_flag_to_warn_complement     |= $::UNICODE_WARN_ABOVE_31_BIT;
612             $uvchr_flag_to_disallow_complement
613                                             |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
614         }
615     }
616     elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
617         $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/;
618         $non_cp_trailing_text = "is for a surrogate";
619         $needed_to_discern_len = 2 unless defined $needed_to_discern_len;
620         $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF;
621
622         $utf8n_flag_to_warn     = $::UTF8_WARN_SURROGATE;
623         $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE;
624         $uvchr_flag_to_warn     = $::UNICODE_WARN_SURROGATE;
625         $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;;
626
627         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_NONCHAR
628                                             |$::UTF8_WARN_SUPER
629                                             |$::UTF8_WARN_ABOVE_31_BIT;
630         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
631                                             |$::UTF8_DISALLOW_SUPER
632                                             |$::UTF8_DISALLOW_ABOVE_31_BIT;
633         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_NONCHAR
634                                             |$::UNICODE_WARN_SUPER
635                                             |$::UNICODE_WARN_ABOVE_31_BIT;
636         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
637                                             |$::UNICODE_DISALLOW_SUPER
638                                             |$::UNICODE_DISALLOW_ABOVE_31_BIT;
639         $controlling_warning_category = 'surrogate';
640     }
641     elsif (   ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
642            || ($allowed_uv & 0xFFFE) == 0xFFFE)
643     {
644         $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E
645                             \Q is not recommended for open interchange\E/x;
646         $non_cp_trailing_text = "if you see this, there is an error";
647         $needed_to_discern_len = $length unless defined $needed_to_discern_len;
648         if (   ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF)
649             || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE))
650         {
651             $skip_most_tests = 1;
652         }
653
654         $utf8n_flag_to_warn     = $::UTF8_WARN_NONCHAR;
655         $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR;
656         $uvchr_flag_to_warn     = $::UNICODE_WARN_NONCHAR;
657         $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;;
658
659         $utf8n_flag_to_warn_complement     = $::UTF8_WARN_SURROGATE
660                                             |$::UTF8_WARN_SUPER
661                                             |$::UTF8_WARN_ABOVE_31_BIT;
662         $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
663                                             |$::UTF8_DISALLOW_SUPER
664                                             |$::UTF8_DISALLOW_ABOVE_31_BIT;
665         $uvchr_flag_to_warn_complement     = $::UNICODE_WARN_SURROGATE
666                                             |$::UNICODE_WARN_SUPER
667                                             |$::UNICODE_WARN_ABOVE_31_BIT;
668         $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
669                                             |$::UNICODE_DISALLOW_SUPER
670                                             |$::UNICODE_DISALLOW_ABOVE_31_BIT;
671
672         $controlling_warning_category = 'nonchar';
673     }
674     else {
675         die "Can't figure out what type of warning to test for $testname"
676     }
677
678     die 'Didn\'t set $needed_to_discern_len for ' . $testname
679                                         unless defined $needed_to_discern_len;
680     {   # First test the isFOO calls
681         use warnings;   # Make sure these don't raise warnings
682         undef @warnings_gotten;
683
684         my $ret = test_isUTF8_CHAR($bytes, $length);
685         my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0);
686         if ($will_overflow) {
687             is($ret, 0, "For $testname: isUTF8_CHAR() returns 0");
688             is($ret_flags, 0, "    And isUTF8_CHAR_flags() returns 0");
689         }
690         else {
691             is($ret, $length,
692                "For $testname: isUTF8_CHAR() returns expected length: $length");
693             is($ret_flags, $length, "    And isUTF8_CHAR_flags(...,0)"
694                                   . " returns expected length: $length");
695         }
696         is(scalar @warnings_gotten, 0,
697                 "    And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags generated"
698               . " any warnings")
699           or output_warnings(@warnings_gotten);
700
701         undef @warnings_gotten;
702         $ret = test_isSTRICT_UTF8_CHAR($bytes, $length);
703         if ($will_overflow) {
704             is($ret, 0, "    And isSTRICT_UTF8_CHAR() returns 0");
705         }
706         else {
707             my $expected_ret = (   $testname =~ /surrogate|non-character/
708                                 || $allowed_uv > 0x10FFFF)
709                                ? 0
710                                : $length;
711             is($ret, $expected_ret, "    And isSTRICT_UTF8_CHAR() returns"
712                                   . " expected length: $expected_ret");
713             $ret = test_isUTF8_CHAR_flags($bytes, $length,
714                                           $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE);
715             is($ret, $expected_ret,
716                     "    And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')"
717                     . " acts like isSTRICT_UTF8_CHAR");
718         }
719         is(scalar @warnings_gotten, 0,
720                 "    And neither isSTRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
721               . " generated any warnings")
722           or output_warnings(@warnings_gotten);
723
724         undef @warnings_gotten;
725         $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length);
726         if ($will_overflow) {
727             is($ret, 0, "    And isC9_STRICT_UTF8_CHAR() returns 0");
728         }
729         else {
730             my $expected_ret = (   $testname =~ /surrogate/
731                                 || $allowed_uv > 0x10FFFF)
732                                ? 0
733                                : $length;
734             is($ret, $expected_ret, "    And isC9_STRICT_UTF8_CHAR()"
735                                    ." returns expected length: $expected_ret");
736             $ret = test_isUTF8_CHAR_flags($bytes, $length,
737                                         $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
738             is($ret, $expected_ret,
739                   "    And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')"
740                 . " acts like isC9_STRICT_UTF8_CHAR");
741         }
742         is(scalar @warnings_gotten, 0,
743                 "    And neither isC9_STRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
744               . " generated any warnings")
745           or output_warnings(@warnings_gotten);
746
747         # Test partial character handling, for each byte not a full character
748         for my $j (1.. $length - 1) {
749
750             # Skip the test for the interaction between overflow and above-31
751             # bit.  It is really testing other things than the partial
752             # character tests, for which other tests in this file are
753             # sufficient
754             last if $will_overflow;
755
756             foreach my $disallow_flag (0, $utf8n_flag_to_disallow) {
757                 my $partial = substr($bytes, 0, $j);
758                 my $ret_should_be;
759                 my $comment;
760                 if ($disallow_flag) {
761                     $ret_should_be = 0;
762                     $comment = "disallowed";
763                     if ($j < $needed_to_discern_len) {
764                         $ret_should_be = 1;
765                         $comment .= ", but need $needed_to_discern_len bytes"
766                                  .  " to discern:";
767                     }
768                 }
769                 else {
770                     $ret_should_be = 1;
771                     $comment = "allowed";
772                 }
773
774                 undef @warnings_gotten;
775
776                 $ret = test_is_utf8_valid_partial_char_flags($partial, $j,
777                                                              $disallow_flag);
778                 is($ret, $ret_should_be,
779                                 "    And is_utf8_valid_partial_char_flags("
780                               . display_bytes($partial)
781                               . "), $comment: returns $ret_should_be");
782                 is(scalar @warnings_gotten, 0,
783                         "    And is_utf8_valid_partial_char_flags()"
784                       . " generated no warnings")
785                   or output_warnings(@warnings_gotten);
786             }
787         }
788     }
789
790     # Now test the to/from UTF-8 calls
791     # This is more complicated than the malformations tested in other files in
792     # this directory, as there are several orthogonal variables involved.  We
793     # test most possible combinations
794     foreach my $do_disallow (0, 1) {
795       next if $skip_most_tests && ! $do_disallow;
796
797       # We try various combinations of malformations that can occur
798       foreach my $short ("", "short") {
799         next if $skip_most_tests && $short;
800         foreach my $unexpected_noncont ("", "unexpected non-continuation") {
801           next if $skip_most_tests && $unexpected_noncont;
802           foreach my $overlong ("", "overlong") {
803             next if $overlong && $skip_most_tests;
804
805             # If we're creating an overlong, it can't be longer than the
806             # maximum length, so skip if we're already at that length.
807             next if $overlong && $length >= $::max_bytes;
808
809               # We classify the warnings into certain "interesting" types,
810               # described later
811               foreach my $warning_type (0..4) {
812                 next if $skip_most_tests && $warning_type != 1;
813                 foreach my $use_warn_flag (0, 1) {
814                     next if $skip_most_tests && ! $use_warn_flag;
815
816                     # Finally, here is the inner loop
817
818                     my $this_utf8n_flag_to_warn = $utf8n_flag_to_warn;
819                     my $this_utf8n_flag_to_disallow = $utf8n_flag_to_disallow;
820                     my $this_uvchr_flag_to_warn = $uvchr_flag_to_warn;
821                     my $this_uvchr_flag_to_disallow = $uvchr_flag_to_disallow;
822
823                     my $eval_warn;
824                     my $expect_regular_warnings;
825                     my $expect_warnings_for_malformed;
826                     my $expect_warnings_for_overflow;
827
828                     if ($warning_type == 0) {
829                         $eval_warn = "use warnings; no warnings 'deprecated'";
830                         $expect_regular_warnings = $use_warn_flag;
831                         $expect_warnings_for_overflow = 1;
832                         $expect_warnings_for_malformed = 1;
833                     }
834                     elsif ($warning_type == 1) {
835                         $eval_warn = "no warnings";
836                         $expect_regular_warnings = 0;
837                         $expect_warnings_for_overflow = 0;
838                         $expect_warnings_for_malformed = 0;
839                     }
840                     elsif ($warning_type == 2) {
841                         $eval_warn = "no warnings; use warnings 'utf8'";
842                         $expect_regular_warnings = $use_warn_flag;
843                         $expect_warnings_for_overflow = 1;
844                         $expect_warnings_for_malformed = 1;
845                     }
846                     elsif ($warning_type == 3) {
847                         $eval_warn = "no warnings; use warnings"
848                                    . " '$controlling_warning_category'";
849                         $expect_regular_warnings = $use_warn_flag;
850                         $expect_warnings_for_overflow
851                             = $controlling_warning_category eq 'non_unicode';
852                         $expect_warnings_for_malformed = 0;
853                     }
854                     elsif ($warning_type == 4) {  # Like type 3, but uses the
855                                                   # above-31-bit flags
856                         # The complement flags were set up so that the
857                         # above-31-bit flags have been tested that they don't
858                         # trigger wrongly for too small code points.  And the
859                         # flags have been set up so that those small code
860                         # points are tested for being above Unicode.  What's
861                         # left to test is that the large code points do
862                         # trigger the above-31-bit flags.
863                         next if ! $will_overflow && $allowed_uv < 0x80000000;
864                         next if $controlling_warning_category ne 'non_unicode';
865                         $eval_warn = "no warnings; use warnings 'non_unicode'";
866                         $expect_regular_warnings = 1;
867                         $expect_warnings_for_overflow = 1;
868                         $expect_warnings_for_malformed = 0;
869                         $this_utf8n_flag_to_warn   = $::UTF8_WARN_ABOVE_31_BIT;
870                         $this_utf8n_flag_to_disallow
871                                                 = $::UTF8_DISALLOW_ABOVE_31_BIT;
872                         $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
873                         $this_uvchr_flag_to_disallow
874                                              = $::UNICODE_DISALLOW_ABOVE_31_BIT;
875                     }
876                     else {
877                        die "Unexpected warning type '$warning_type'";
878                     }
879
880                     # We only need to test the case where all warnings are
881                     # enabled (type 0) to see if turning off the warning flag
882                     # causes things to not be output.  If those pass, then
883                     # turning on some sub-category of warnings, or turning off
884                     # warnings altogether are extremely likely to not output
885                     # warnings either, given how the warnings subsystem is
886                     # supposed to work, and this file assumes it does work.
887                     next if $warning_type != 0 && ! $use_warn_flag;
888
889                     # The convention is that the 'got' flag is the same value
890                     # as the disallow one.  If this were violated, the tests
891                     # here should start failing.
892                     my $return_flag = $this_utf8n_flag_to_disallow;
893
894                     # If we aren't expecting warnings/disallow for this, turn
895                     # on all the other flags.  That makes sure that they all
896                     # are independent of this flag, and so we don't need to
897                     # test them individually.
898                     my $this_warning_flags  = ($use_warn_flag)
899                                               ? $this_utf8n_flag_to_warn
900                                               : $utf8n_flag_to_warn_complement;
901                     my $this_disallow_flags = ($do_disallow)
902                                           ? $this_utf8n_flag_to_disallow
903                                           : $utf8n_flag_to_disallow_complement;
904                     my $this_bytes = $bytes;
905                     my $this_length = $length;
906                     my $expected_uv = $allowed_uv;
907                     my $this_expected_len = $length;
908                     my $this_needed_to_discern_len = $needed_to_discern_len;
909
910                     my @malformation_names;
911                     my @expected_warnings;
912                     my @expected_return_flags;
913
914                     # Now go through the possible malformations wanted,  and
915                     # change the input accordingly.  We also can set up
916                     # certain other things now, like whether we expect a
917                     # return flag from this malformation and which flag.
918                     if ($overlong) {
919
920                         # To force this malformation, we convert the original
921                         # start byte into a continuation byte with the same
922                         # data bits as originally. ...
923                         substr($this_bytes, 0, 1)
924                             = start_byte_to_cont(substr($this_bytes,
925                                                         0, 1));
926
927                         # ... Then we prepend it with a known overlong
928                         # sequence.  This should evaluate to the exact same
929                         # code point as the original.
930                         $this_bytes
931                         =  I8_to_native("\xff")
932                         . (I8_to_native(chr $::lowest_continuation)
933                         x ( $::max_bytes - 1 - length($this_bytes)))
934                         . $this_bytes;
935                         $this_length = length($this_bytes);
936                         $this_needed_to_discern_len
937                                 = $::max_bytes - ($this_expected_len
938                                                - $this_needed_to_discern_len);
939                         $this_expected_len = $::max_bytes;
940                         push @expected_return_flags, $::UTF8_GOT_LONG;
941                         push @malformation_names, $overlong;
942                         if ($expect_warnings_for_malformed) {
943                             if (   ! $short
944                                 && ! $unexpected_noncont
945                                 && ! $will_overflow)
946                             {
947                                 my $overlong_bytes
948                                         = display_bytes_no_quotes($this_bytes);
949                                 my $correct_bytes
950                                              = display_bytes_no_quotes($bytes);
951                                 push @expected_warnings,
952                                      qr/\QMalformed UTF-8 character:\E
953                                         \Q $overlong_bytes (overlong;\E
954                                         \Q instead use $correct_bytes to\E
955                                         \Q represent U+$uv_string)/x;
956                             }
957                             else {
958                                 push @expected_warnings, qr/overlong/;
959                             }
960                         }
961                     }
962
963                     if ($short) {
964                         push @malformation_names, $short;
965                         push @expected_warnings, qr/short/
966                                             if $expect_warnings_for_malformed;
967
968                         # To force this malformation, just tell the test to
969                         # not look as far as it should into the input.
970                         $this_length--;
971                         $this_expected_len--;
972                         push @expected_return_flags, $::UTF8_GOT_SHORT;
973                     }
974
975                     if ($unexpected_noncont) {
976                         push @malformation_names, $unexpected_noncont;
977                         push @expected_warnings, qr/$unexpected_noncont/
978                                             if $expect_warnings_for_malformed;
979
980                         # To force this malformation, change the final
981                         # continuation byte into a non continuation.
982                         my $pos = ($short) ? -2 : -1;
983                         substr($this_bytes, $pos, 1) = '?';
984                         $this_expected_len--;
985                         push @expected_return_flags,
986                                         $::UTF8_GOT_NON_CONTINUATION;
987                     }
988
989                     # The overflow malformation is done differently than other
990                     # malformations.  It comes from manually typed tests in
991                     # the test array, but it also is above Unicode and uses
992                     # Perl extended UTF-8, so affects some of the flags being
993                     # tested.  We now make it be treated like one of the other
994                     # generated malformations.
995                     if ($will_overflow) {
996
997                         # An overflow is (way) above Unicode, and overrides
998                         # everything else.
999                         $expect_regular_warnings = 0;
1000
1001                         push @malformation_names, 'overflow';
1002                         if ($expect_warnings_for_overflow) {
1003                             my $qr = display_bytes_no_quotes(
1004                                    substr($this_bytes, 0, $this_expected_len));
1005                             $qr = qr/\QMalformed UTF-8 character: \E
1006                                      \Q$qr (overflows)\E/x;
1007                             push @expected_warnings, $qr;
1008                         }
1009                         push @expected_return_flags, $::UTF8_GOT_OVERFLOW;
1010                     }
1011
1012                     # Here, we've set things up based on the malformations.
1013                     # Now generate the text for them for the test name.
1014                     my $malformations_name = "";
1015                     if (@malformation_names) {
1016                         $malformations_name .= "malformation";
1017                         $malformations_name .= "s" if @malformation_names > 1;
1018                         $malformations_name .= ": ";
1019                         $malformations_name .=  join "/", @malformation_names;
1020                         $malformations_name =  " ($malformations_name)";
1021                     }
1022
1023                     # It may be that the malformations have shortened the
1024                     # amount of input we look at so much that we can't tell
1025                     # what the category the code point was in.  Otherwise, set
1026                     # up the expected return flags based on the warnings and
1027                     # disallowments.
1028                     if ($this_expected_len < $this_needed_to_discern_len) {
1029                         $expect_regular_warnings = 0;
1030                     }
1031                     elsif (   ($this_warning_flags & $this_utf8n_flag_to_warn)
1032                            || (  $this_disallow_flags
1033                                & $this_utf8n_flag_to_disallow))
1034                     {
1035                         push @expected_return_flags, $return_flag;
1036                     }
1037
1038                     # Finish setting up the expected warning.
1039                     if ($expect_regular_warnings) {
1040
1041                         # So far the array contains warnings generated by
1042                         # malformations.  Add the expected regular one.
1043                         unshift @expected_warnings, $cp_message_qr;
1044
1045                         # But it may need to be modified, because either of
1046                         # these malformations means we can't determine the
1047                         # expected code point.
1048                         if ($short || $unexpected_noncont) {
1049                             my $first_byte = substr($this_bytes, 0, 1);
1050                             $expected_warnings[0] = display_bytes(
1051                                     substr($this_bytes, 0, $this_expected_len));
1052                             $expected_warnings[0]
1053                                 = qr/[Aa]\Qny UTF-8 sequence that starts with\E
1054                                      \Q $expected_warnings[0]\E
1055                                      \Q $non_cp_trailing_text\E/x;
1056                         }
1057                     }
1058
1059                     # Is effectively disallowed if we've set up a
1060                     # malformation, even if the flag indicates it is
1061                     # allowed.  Fix up test name to indicate this as
1062                     # well
1063                     my $disallowed = (  $this_disallow_flags
1064                                       & $this_utf8n_flag_to_disallow)
1065                                   || $malformations_name;
1066                     my $this_name = "utf8n_to_uvchr_error() $testname: "
1067                                                 . (($disallowed)
1068                                                    ? 'disallowed'
1069                                                    : 'allowed');
1070                     $this_name .= ", $eval_warn";
1071                     $this_name .= ", " . ((  $this_warning_flags
1072                                             & $this_utf8n_flag_to_warn)
1073                                           ? 'with flag for raising warnings'
1074                                           : 'no flag for raising warnings');
1075                     $this_name .= $malformations_name;
1076
1077                     # Do the actual test using an eval
1078                     undef @warnings_gotten;
1079                     my $ret_ref;
1080                     my $this_flags = $this_warning_flags|$this_disallow_flags;
1081                     my $eval_text =      "$eval_warn; \$ret_ref"
1082                             . " = test_utf8n_to_uvchr_error("
1083                             . "'$this_bytes', $this_length, $this_flags)";
1084                     eval "$eval_text";
1085                     if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1086                     {
1087                         diag "\$@='$@'; call was: "
1088                            . utf8n_display_call($eval_text);
1089                         next;
1090                     }
1091                     if ($disallowed) {
1092                         is($ret_ref->[0], 0, "    And returns 0")
1093                           or diag "Call was: " . utf8n_display_call($eval_text);
1094                     }
1095                     else {
1096                         is($ret_ref->[0], $expected_uv,
1097                                 "    And returns expected uv: "
1098                               . $uv_string)
1099                           or diag "Call was: " . utf8n_display_call($eval_text);
1100                     }
1101                     is($ret_ref->[1], $this_expected_len,
1102                                         "    And returns expected length:"
1103                                       . " $this_expected_len")
1104                       or diag "Call was: " . utf8n_display_call($eval_text);
1105
1106                     my $returned_flags = $ret_ref->[2];
1107
1108                     for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
1109                         if ($expected_return_flags[$i] & $returned_flags) {
1110                             if ($expected_return_flags[$i]
1111                                             == $::UTF8_DISALLOW_ABOVE_31_BIT)
1112                             {
1113                                 pass("    Expected and got return flag for"
1114                                    . " above_31_bit");
1115                             }
1116                                    # The first entries in this are
1117                                    # malformations
1118                             elsif ($i > @malformation_names - 1)  {
1119                                 pass("    Expected and got return flag"
1120                                    . " for " . $controlling_warning_category);
1121                             }
1122                             else {
1123                                 pass("    Expected and got return flag for "
1124                                    . $malformation_names[$i]
1125                                    . " malformation");
1126                             }
1127                             $returned_flags &= ~$expected_return_flags[$i];
1128                             splice @expected_return_flags, $i, 1;
1129                         }
1130                     }
1131
1132                     is($returned_flags, 0,
1133                        "    Got no unexpected return flags")
1134                       or diag "The unexpected flags gotten were: "
1135                            . (flags_to_text($returned_flags,
1136                                             \@utf8n_flags_to_text)
1137                                 # We strip off any prefixes from the flag
1138                                 # names
1139                              =~ s/ \b [A-Z] _ //xgr);
1140                     is (scalar @expected_return_flags, 0,
1141                         "    Got all expected return flags")
1142                         or diag "The expected flags not gotten were: "
1143                            . (flags_to_text(eval join("|",
1144                                                         @expected_return_flags),
1145                                             \@utf8n_flags_to_text)
1146                                 # We strip off any prefixes from the flag
1147                                 # names
1148                              =~ s/ \b [A-Z] _ //xgr);
1149
1150                     do_warnings_test(@expected_warnings)
1151                       or diag "Call was: " . utf8n_display_call($eval_text);
1152                     undef @warnings_gotten;
1153
1154                     # Check CHECK_ONLY results when the input is
1155                     # disallowed.  Do this when actually disallowed,
1156                     # not just when the $this_disallow_flags is set
1157                     if ($disallowed) {
1158                         my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
1159                         my $eval_text = "use warnings; no warnings 'deprecated'; \$ret_ref ="
1160                                       . " test_utf8n_to_uvchr_error('"
1161                                       . "$this_bytes', $this_length,"
1162                                       . " $this_flags)";
1163                         eval $eval_text;
1164                         if (! ok ("$@ eq ''",
1165                             "    And eval succeeded with CHECK_ONLY"))
1166                         {
1167                             diag "\$@='$@'; Call was: "
1168                                . utf8n_display_call($eval_text);
1169                             next;
1170                         }
1171                         is($ret_ref->[0], 0, "    CHECK_ONLY: Returns 0")
1172                           or diag "Call was: " . utf8n_display_call($eval_text);
1173                         is($ret_ref->[1], -1,
1174                                        "    CHECK_ONLY: returns -1 for length")
1175                           or diag "Call was: " . utf8n_display_call($eval_text);
1176                         if (! is(scalar @warnings_gotten, 0,
1177                                       "    CHECK_ONLY: no warnings generated"))
1178                         {
1179                             diag "Call was: " . utf8n_display_call($eval_text);
1180                             output_warnings(@warnings_gotten);
1181                         }
1182                     }
1183
1184                     # Now repeat some of the above, but for
1185                     # uvchr_to_utf8_flags().  Since this comes from an
1186                     # existing code point, it hasn't overflowed, and isn't
1187                     # malformed.
1188                     next if @malformation_names;
1189
1190                     $this_warning_flags = ($use_warn_flag)
1191                                           ? $this_uvchr_flag_to_warn
1192                                           : 0;
1193                     $this_disallow_flags = ($do_disallow)
1194                                            ? $this_uvchr_flag_to_disallow
1195                                            : 0;
1196
1197                     $disallowed = $this_disallow_flags
1198                                 & $this_uvchr_flag_to_disallow;
1199                     $this_name .= ", " . ((  $this_warning_flags
1200                                            & $this_utf8n_flag_to_warn)
1201                                           ? 'with flag for raising warnings'
1202                                           : 'no flag for raising warnings');
1203
1204                     $this_name = "uvchr_to_utf8_flags() $testname: "
1205                                             . (($disallowed)
1206                                                 ? 'disallowed'
1207                                                 : 'allowed');
1208                     $this_name .= ", $eval_warn";
1209                     $this_name .= ", " . ((  $this_warning_flags
1210                                            & $this_uvchr_flag_to_warn)
1211                                         ? 'with warning flag'
1212                                         : 'no warning flag');
1213
1214                     undef @warnings_gotten;
1215                     my $ret;
1216                     $this_flags = $this_warning_flags|$this_disallow_flags;
1217                     $eval_text = "$eval_warn; \$ret ="
1218                             . " test_uvchr_to_utf8_flags("
1219                             . "$allowed_uv, $this_flags)";
1220                     eval "$eval_text";
1221                     if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1222                     {
1223                         diag "\$@='$@'; call was: "
1224                            . uvchr_display_call($eval_text);
1225                         next;
1226                     }
1227                     if ($disallowed) {
1228                         is($ret, undef, "    And returns undef")
1229                           or diag "Call was: " . uvchr_display_call($eval_text);
1230                     }
1231                     else {
1232                         is($ret, $this_bytes, "    And returns expected string")
1233                           or diag "Call was: " . uvchr_display_call($eval_text);
1234                     }
1235
1236                     do_warnings_test(@expected_warnings)
1237                       or diag "Call was: " . uvchr_display_call($eval_text);
1238                 }
1239               }
1240           }
1241         }
1242       }
1243     }
1244 }
1245
1246 done_testing;