This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/t/utf8_warn_base.pl: Rename a test
[perl5.git] / ext / XS-APItest / t / utf8_warn_base.pl
CommitLineData
6aa905cf
KW
1#!perl -w
2
3# This is a base file to be used by various .t's in its directory
8f79178b
KW
4# It tests various code points that are "problematic", and verifies that the
5# correct warnings/flags etc are generated when using them. It also takes the
6# UTF-8 for some of them and perturbs it to be malformed in various ways, and
7# tests that this gets appropriately detected.
6aa905cf
KW
8
9use strict;
10use Test::More;
11
12BEGIN {
13 use_ok('XS::APItest');
14 require 'charset_tools.pl';
15 require './t/utf8_setup.pl';
16};
17
18$|=1;
19
20no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit
21 # machines, and that is tested elsewhere
22
23use XS::APItest;
24use Data::Dumper;
25
9cdc3054 26my @warnings_gotten;
6aa905cf
KW
27
28use warnings 'utf8';
8d6f1506
KW
29local $SIG{__WARN__} = sub { my @copy = @_;
30 push @warnings_gotten, map { chomp; $_ } @copy;
31 };
c7f9e4bb
KW
32my @tests;
33{
34 no warnings qw(portable overflow);
35 @tests = (
8f79178b
KW
36 # $testname,
37 # $bytes, UTF-8 string
38 # $allowed_uv, code point $bytes evaluates to; -1 if
39 # overflows
40 # $needed_to_discern_len optional, how long an initial substring do
41 # we need to tell that the string must be for
42 # a code point in the category it falls in,
43 # like being a surrogate; 0 indicates we need
44 # the whole string. Some categories have a
45 # default that is used if this is omitted.
46 [ "lowest surrogate",
47 (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"),
48 0xD800,
49 ],
50 [ "a middle surrogate",
51 (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"),
52 0xD90D,
53 ],
54 [ "highest surrogate",
55 (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"),
56 0xDFFF,
57 ],
8f79178b
KW
58 [ "first of 32 consecutive non-character code points",
59 (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"),
60 0xFDD0,
61 ],
62 [ "a mid non-character code point of the 32 consecutive ones",
63 (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"),
64 0xFDE0,
65 ],
66 [ "final of 32 consecutive non-character code points",
67 (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"),
68 0xFDEF,
69 ],
70 [ "non-character code point U+FFFE",
71 (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"),
72 0xFFFE,
73 ],
74 [ "non-character code point U+FFFF",
75 (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"),
76 0xFFFF,
77 ],
78 [ "non-character code point U+1FFFE",
79 (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"),
80 0x1FFFE,
81 ],
82 [ "non-character code point U+1FFFF",
83 (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"),
84 0x1FFFF,
85 ],
86 [ "non-character code point U+2FFFE",
87 (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"),
88 0x2FFFE,
89 ],
90 [ "non-character code point U+2FFFF",
91 (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"),
92 0x2FFFF,
93 ],
94 [ "non-character code point U+3FFFE",
95 (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"),
96 0x3FFFE,
97 ],
98 [ "non-character code point U+3FFFF",
99 (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"),
100 0x3FFFF,
101 ],
102 [ "non-character code point U+4FFFE",
103 (isASCII)
104 ? "\xf1\x8f\xbf\xbe"
105 : I8_to_native("\xf8\xa9\xbf\xbf\xbe"),
106 0x4FFFE,
107 ],
108 [ "non-character code point U+4FFFF",
109 (isASCII)
110 ? "\xf1\x8f\xbf\xbf"
111 : I8_to_native("\xf8\xa9\xbf\xbf\xbf"),
112 0x4FFFF,
113 ],
114 [ "non-character code point U+5FFFE",
115 (isASCII)
116 ? "\xf1\x9f\xbf\xbe"
117 : I8_to_native("\xf8\xab\xbf\xbf\xbe"),
118 0x5FFFE,
119 ],
120 [ "non-character code point U+5FFFF",
121 (isASCII)
122 ? "\xf1\x9f\xbf\xbf"
123 : I8_to_native("\xf8\xab\xbf\xbf\xbf"),
124 0x5FFFF,
125 ],
126 [ "non-character code point U+6FFFE",
127 (isASCII)
128 ? "\xf1\xaf\xbf\xbe"
129 : I8_to_native("\xf8\xad\xbf\xbf\xbe"),
130 0x6FFFE,
131 ],
132 [ "non-character code point U+6FFFF",
133 (isASCII)
134 ? "\xf1\xaf\xbf\xbf"
135 : I8_to_native("\xf8\xad\xbf\xbf\xbf"),
136 0x6FFFF,
137 ],
138 [ "non-character code point U+7FFFE",
139 (isASCII)
140 ? "\xf1\xbf\xbf\xbe"
141 : I8_to_native("\xf8\xaf\xbf\xbf\xbe"),
142 0x7FFFE,
143 ],
144 [ "non-character code point U+7FFFF",
145 (isASCII)
146 ? "\xf1\xbf\xbf\xbf"
147 : I8_to_native("\xf8\xaf\xbf\xbf\xbf"),
148 0x7FFFF,
149 ],
150 [ "non-character code point U+8FFFE",
151 (isASCII)
152 ? "\xf2\x8f\xbf\xbe"
153 : I8_to_native("\xf8\xb1\xbf\xbf\xbe"),
154 0x8FFFE,
155 ],
156 [ "non-character code point U+8FFFF",
157 (isASCII)
158 ? "\xf2\x8f\xbf\xbf"
159 : I8_to_native("\xf8\xb1\xbf\xbf\xbf"),
160 0x8FFFF,
161 ],
162 [ "non-character code point U+9FFFE",
163 (isASCII)
164 ? "\xf2\x9f\xbf\xbe"
165 : I8_to_native("\xf8\xb3\xbf\xbf\xbe"),
166 0x9FFFE,
167 ],
168 [ "non-character code point U+9FFFF",
169 (isASCII)
170 ? "\xf2\x9f\xbf\xbf"
171 : I8_to_native("\xf8\xb3\xbf\xbf\xbf"),
172 0x9FFFF,
173 ],
174 [ "non-character code point U+AFFFE",
175 (isASCII)
176 ? "\xf2\xaf\xbf\xbe"
177 : I8_to_native("\xf8\xb5\xbf\xbf\xbe"),
178 0xAFFFE,
179 ],
180 [ "non-character code point U+AFFFF",
181 (isASCII)
182 ? "\xf2\xaf\xbf\xbf"
183 : I8_to_native("\xf8\xb5\xbf\xbf\xbf"),
184 0xAFFFF,
185 ],
186 [ "non-character code point U+BFFFE",
187 (isASCII)
188 ? "\xf2\xbf\xbf\xbe"
189 : I8_to_native("\xf8\xb7\xbf\xbf\xbe"),
190 0xBFFFE,
191 ],
192 [ "non-character code point U+BFFFF",
193 (isASCII)
194 ? "\xf2\xbf\xbf\xbf"
195 : I8_to_native("\xf8\xb7\xbf\xbf\xbf"),
196 0xBFFFF,
197 ],
198 [ "non-character code point U+CFFFE",
199 (isASCII)
200 ? "\xf3\x8f\xbf\xbe"
201 : I8_to_native("\xf8\xb9\xbf\xbf\xbe"),
202 0xCFFFE,
203 ],
204 [ "non-character code point U+CFFFF",
205 (isASCII)
206 ? "\xf3\x8f\xbf\xbf"
207 : I8_to_native("\xf8\xb9\xbf\xbf\xbf"),
208 0xCFFFF,
209 ],
210 [ "non-character code point U+DFFFE",
211 (isASCII)
212 ? "\xf3\x9f\xbf\xbe"
213 : I8_to_native("\xf8\xbb\xbf\xbf\xbe"),
214 0xDFFFE,
215 ],
216 [ "non-character code point U+DFFFF",
217 (isASCII)
218 ? "\xf3\x9f\xbf\xbf"
219 : I8_to_native("\xf8\xbb\xbf\xbf\xbf"),
220 0xDFFFF,
221 ],
222 [ "non-character code point U+EFFFE",
223 (isASCII)
224 ? "\xf3\xaf\xbf\xbe"
225 : I8_to_native("\xf8\xbd\xbf\xbf\xbe"),
226 0xEFFFE,
227 ],
228 [ "non-character code point U+EFFFF",
229 (isASCII)
230 ? "\xf3\xaf\xbf\xbf"
231 : I8_to_native("\xf8\xbd\xbf\xbf\xbf"),
232 0xEFFFF,
233 ],
234 [ "non-character code point U+FFFFE",
235 (isASCII)
236 ? "\xf3\xbf\xbf\xbe"
237 : I8_to_native("\xf8\xbf\xbf\xbf\xbe"),
238 0xFFFFE,
239 ],
240 [ "non-character code point U+FFFFF",
241 (isASCII)
242 ? "\xf3\xbf\xbf\xbf"
243 : I8_to_native("\xf8\xbf\xbf\xbf\xbf"),
244 0xFFFFF,
245 ],
246 [ "non-character code point U+10FFFE",
247 (isASCII)
248 ? "\xf4\x8f\xbf\xbe"
249 : I8_to_native("\xf9\xa1\xbf\xbf\xbe"),
250 0x10FFFE,
251 ],
252 [ "non-character code point U+10FFFF",
253 (isASCII)
254 ? "\xf4\x8f\xbf\xbf"
255 : I8_to_native("\xf9\xa1\xbf\xbf\xbf"),
256 0x10FFFF,
257 ],
7c9f4ec6
KW
258 [ "first non_unicode",
259 (isASCII)
260 ? "\xf4\x90\x80\x80"
261 : I8_to_native("\xf9\xa2\xa0\xa0\xa0"),
262 0x110000,
263 2,
264 ],
265 [ "non_unicode whose first byte tells that",
266 (isASCII)
267 ? "\xf5\x80\x80\x80"
268 : I8_to_native("\xfa\xa0\xa0\xa0\xa0"),
269 (isASCII) ? 0x140000 : 0x200000,
270 1,
271 ],
48ceb60a 272 [ "lowest 32 bit code point",
8f79178b
KW
273 (isASCII)
274 ? "\xfe\x82\x80\x80\x80\x80\x80"
275 : I8_to_native(
276 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
277 0x80000000,
278 (isASCII) ? 1 : 8,
279 ],
280 [ "highest 32 bit code point",
281 (isASCII)
282 ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
283 : I8_to_native(
284 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
285 0xFFFFFFFF,
286 (isASCII) ? 1 : 8,
287 ],
288 [ "requires at least 32 bits, and use SUPER-type flags, instead of"
289 . " ABOVE_31_BIT",
290 (isASCII)
291 ? "\xfe\x82\x80\x80\x80\x80\x80"
292 : I8_to_native(
76513bdc 293 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
294 0x80000000,
295 1,
296 ],
8f79178b
KW
297 );
298
299 if (! $::is64bit) {
300 if (isASCII) {
8f79178b
KW
301 push @tests,
302 [ "Lowest 33 bit code point: overflow",
303 "\xFE\x84\x80\x80\x80\x80\x80",
304 -1,
305 1,
c81d4d83
KW
306 ],
307 [ "overflow that old algorithm failed to detect",
308 "\xfe\x86\x80\x80\x80\x80\x80",
309 -1,
310 2,
8f79178b
KW
311 ];
312 }
313 }
c81d4d83
KW
314
315 if ($::is64bit) {
6aa905cf 316 push @tests,
8f79178b
KW
317 [ "More than 32 bits",
318 (isASCII)
319 ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80"
320 : I8_to_native(
321 "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
322 0x1000000000,
323 (isASCII) ? 1 : 7,
6aa905cf 324 ];
c81d4d83
KW
325 if (isASCII) {
326 push @tests,
327 [ "overflow that old algorithm failed to detect",
328 "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf",
329 -1,
330 3,
331 ];
332 }
333 else {
8f79178b
KW
334 push @tests, # These could falsely show wrongly in a naive
335 # implementation
336 [ "requires at least 32 bits",
337 I8_to_native(
6aa905cf 338 "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
339 0x800000000,
340 7,
341 ],
342 [ "requires at least 32 bits",
343 I8_to_native(
6aa905cf 344 "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
345 0x10000000000,
346 6,
347 ],
348 [ "requires at least 32 bits",
349 I8_to_native(
6aa905cf 350 "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
351 0x200000000000,
352 5,
353 ],
354 [ "requires at least 32 bits",
355 I8_to_native(
6aa905cf 356 "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
357 0x4000000000000,
358 4,
359 ],
360 [ "requires at least 32 bits",
361 I8_to_native(
6aa905cf 362 "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
363 0x80000000000000,
364 3,
365 ],
366 [ "requires at least 32 bits",
367 I8_to_native(
6aa905cf 368 "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
8f79178b
KW
369 0x1000000000000000,
370 2,
371 ];
372 }
6aa905cf 373 }
c7f9e4bb 374}
6aa905cf 375
5722c46d
KW
376sub flags_to_text($$)
377{
378 my ($flags, $flags_to_text_ref) = @_;
379
380 # Returns a string containing a mnemonic representation of the bits that
381 # are set in the $flags. These are assumed to be flag bits. The return
382 # looks like "FOO|BAR|BAZ". The second parameter is a reference to an
383 # array that gives the textual representation of all the possible flags.
384 # Element 0 is the text for the bit 0 flag; element 1 for bit 1; .... If
385 # no bits at all are set the string "0" is returned;
386
387 my @flag_text;
388 my $shift = 0;
389
390 return "0" if $flags == 0;
391
392 while ($flags) {
393 #diag sprintf "%x", $flags;
394 if ($flags & 1) {
395 push @flag_text, $flags_to_text_ref->[$shift];
396 }
397 $shift++;
398 $flags >>= 1;
399 }
400
401 return join "|", @flag_text;
402}
403
404# Possible flag returns from utf8n_to_uvchr_error(). These should have G_,
405# instead of A_, D_, but the prefixes will be used in a a later commit, so
406# minimize churn by having them here.
407my @utf8n_flags_to_text = ( qw(
408 A_EMPTY
409 A_CONTINUATION
410 A_NON_CONTINUATION
411 A_SHORT
412 A_LONG
413 A_LONG_AND_ITS_VALUE
414 PLACEHOLDER
415 A_OVERFLOW
416 D_SURROGATE
417 W_SURROGATE
418 D_NONCHAR
419 W_NONCHAR
420 D_SUPER
421 W_SUPER
422 D_ABOVE_31_BIT
423 W_ABOVE_31_BIT
424 CHECK_ONLY
425 NO_CONFIDENCE_IN_CURLEN_
426 ) );
427
a8ee5133
KW
428sub utf8n_display_call($)
429{
430 # Converts an eval string that calls test_utf8n_to_uvchr into a more human
431 # readable form, and returns it. Doesn't work if the byte string contains
432 # an apostrophe. The return will look something like:
433 # test_utf8n_to_uvchr_error('$bytes', $length, $flags)
434 #diag $_[0];
435
436 $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x;
437 my $text1 = $1; # Everything before the byte string
438 my $bytes = $2;
439 my $text2 = $3; # Includes the length
440 my $flags = $4;
441
442 return $text1
443 . display_bytes($bytes)
444 . $text2
445 . flags_to_text($flags, \@utf8n_flags_to_text)
446 . ')';
447}
448
d884ea32
KW
449sub uvchr_display_call($)
450{
451 # Converts an eval string that calls test_uvchr_to_utf8 into a more human
452 # readable form, and returns it. The return will look something like:
453 # test_uvchr_to_utf8n_flags($uv, $flags)
454 #diag $_[0];
455
456 my @flags_to_text = ( qw(
457 W_SURROGATE
458 W_NONCHAR
459 W_SUPER
460 W_ABOVE_31_BIT
461 D_SURROGATE
462 D_NONCHAR
463 D_SUPER
464 D_ABOVE_31_BIT
465 ) );
466
467 $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
468 my $text = $1;
469 my $cp = sprintf "%X", $2;
470 my $flags = $3;
471
472 return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')';
473}
474
69485e19
KW
475sub do_warnings_test(@)
476{
477 my @expected_warnings = @_;
478
479 # Compares the input expected warnings array with @warnings_gotten,
480 # generating a pass for each found, removing it from @warnings_gotten.
481 # Any discrepancies generate test failures. Returns TRUE if no
482 # discrepcancies; otherwise FALSE.
483
484 my $succeeded = 1;
485
486 if (@expected_warnings == 0) {
487 if (! is(@warnings_gotten, 0, " Expected and got no warnings")) {
488 output_warnings(@warnings_gotten);
489 $succeeded = 0;
490 }
491 return $succeeded;
492 }
493
494 # Check that we got all the expected warnings,
495 # removing each one found
496 WARNING:
497 foreach my $expected (@expected_warnings) {
498 foreach (my $i = 0; $i < @warnings_gotten; $i++) {
499 if ($warnings_gotten[$i] =~ $expected) {
500 pass(" Expected and got warning: "
501 . " $warnings_gotten[$i]");
502 splice @warnings_gotten, $i, 1;
503 next WARNING;
504 }
505 }
506 fail(" Expected a warning that matches "
507 . $expected . " but didn't get it");
508 $succeeded = 0;
509 }
510
511 if (! is(@warnings_gotten, 0, " Got no unexpected warnings")) {
512 output_warnings(@warnings_gotten);
513 $succeeded = 0;
514 }
515
516 return $succeeded;
517}
518
6aa905cf
KW
519# This test is split into this number of files.
520my $num_test_files = $ENV{TEST_JOBS} || 1;
521$num_test_files = 10 if $num_test_files > 10;
522
523my $test_count = -1;
524foreach my $test (@tests) {
525 $test_count++;
526 next if $test_count % $num_test_files != $::TEST_CHUNK;
527
d42d7565 528 my ($testname, $bytes, $allowed_uv, $needed_to_discern_len) = @$test;
6aa905cf
KW
529
530 my $length = length $bytes;
04f42bf6 531 my $will_overflow = $allowed_uv < 0;
6aa905cf 532
2c511c58
KW
533 my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
534
d42d7565 535 my $controlling_warning_category;
af816908
KW
536 my $utf8n_flag_to_warn;
537 my $utf8n_flag_to_disallow;
538 my $uvchr_flag_to_warn;
539 my $uvchr_flag_to_disallow;
b7e1f4b2 540
e4e140b4
KW
541 # We want to test that the independent flags are actually independent.
542 # For example, that a surrogate doesn't trigger a non-character warning,
543 # and conversely, turning off an above-Unicode flag doesn't suppress a
544 # surrogate warning. Earlier versions of this file used nested loops to
545 # test all possible combinations. But that creates lots of tests, making
546 # this run too long. What is now done instead is to use the complement of
547 # the category we are testing to greatly reduce the combinatorial
548 # explosion. For example, if we have a surrogate and we aren't expecting
549 # a warning about it, we set all the flags for non-surrogates to raise
550 # warnings. If one shows up, it indicates the flags aren't independent.
551 my $utf8n_flag_to_warn_complement;
552 my $utf8n_flag_to_disallow_complement;
553 my $uvchr_flag_to_warn_complement;
554 my $uvchr_flag_to_disallow_complement;
555
bf422d6a
KW
556 # Many of the code points being tested are middling in that if code point
557 # edge cases work, these are very likely to as well. Because this test
558 # file takes a while to execute, we skip testing the edge effects of code
559 # points deemed middling, while testing their basics and continuing to
560 # fully test the non-middling code points.
561 my $skip_most_tests = 0;
562
601e92f1
KW
563 my $cp_message_qr; # Pattern that matches the message raised when
564 # that message contains the problematic code
565 # point. The message is the same (currently) both
566 # when going from/to utf8.
567 my $non_cp_trailing_text; # The suffix text when the message doesn't
568 # contain a code point. (This is a result of
569 # some sort of malformation that means we
570 # can't get an exact code poin
571
af816908
KW
572 if ($will_overflow || $allowed_uv > 0x10FFFF) {
573
e4e140b4 574 # Set the SUPER flags; later, we test for ABOVE_31_BIT as well.
af816908
KW
575 $utf8n_flag_to_warn = $::UTF8_WARN_SUPER;
576 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
577 $uvchr_flag_to_warn = $::UNICODE_WARN_SUPER;
578 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
579
e4e140b4
KW
580 # Below, we add the flags for non-above-31 bit to the code points that
581 # don't fit that category. Special tests are done for this category
582 # in the inner loop.
583 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
584 |$::UTF8_WARN_SURROGATE;
585 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
586 |$::UTF8_DISALLOW_SURROGATE;
587 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
588 |$::UNICODE_WARN_SURROGATE;
589 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
590 |$::UNICODE_DISALLOW_SURROGATE;
591 $controlling_warning_category = 'non_unicode';
592
8f79178b 593 if ($will_overflow) { # This is realy a malformation
69485e19 594 $non_cp_trailing_text = "if you see this, there is an error";
601e92f1 595 $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
af816908
KW
596 }
597 elsif ($allowed_uv > 0x7FFFFFFF) {
601e92f1
KW
598 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
599 \Q and not portable\E/x;
600 $non_cp_trailing_text = "is for a non-Unicode code point, and is not portable";
af816908 601 }
601e92f1
KW
602 else {
603 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
604 \Q may not be portable\E/x;
605 $non_cp_trailing_text = "is for a non-Unicode code point, may not"
606 . " be portable";
e4e140b4
KW
607 $utf8n_flag_to_warn_complement |= $::UTF8_WARN_ABOVE_31_BIT;
608 $utf8n_flag_to_disallow_complement |= $::UTF8_DISALLOW_ABOVE_31_BIT;
609 $uvchr_flag_to_warn_complement |= $::UNICODE_WARN_ABOVE_31_BIT;
610 $uvchr_flag_to_disallow_complement
611 |= $::UNICODE_DISALLOW_ABOVE_31_BIT;
af816908 612 }
3022ad00
KW
613 }
614 elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
601e92f1
KW
615 $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/;
616 $non_cp_trailing_text = "is for a surrogate";
67e45424 617 $needed_to_discern_len = 2 unless defined $needed_to_discern_len;
bf422d6a 618 $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF;
af816908
KW
619
620 $utf8n_flag_to_warn = $::UTF8_WARN_SURROGATE;
621 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE;
622 $uvchr_flag_to_warn = $::UNICODE_WARN_SURROGATE;
623 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;;
e4e140b4
KW
624
625 $utf8n_flag_to_warn_complement = $::UTF8_WARN_NONCHAR
626 |$::UTF8_WARN_SUPER
627 |$::UTF8_WARN_ABOVE_31_BIT;
628 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_NONCHAR
629 |$::UTF8_DISALLOW_SUPER
630 |$::UTF8_DISALLOW_ABOVE_31_BIT;
631 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_NONCHAR
632 |$::UNICODE_WARN_SUPER
633 |$::UNICODE_WARN_ABOVE_31_BIT;
634 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_NONCHAR
635 |$::UNICODE_DISALLOW_SUPER
636 |$::UNICODE_DISALLOW_ABOVE_31_BIT;
d42d7565 637 $controlling_warning_category = 'surrogate';
3022ad00
KW
638 }
639 elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
640 || ($allowed_uv & 0xFFFE) == 0xFFFE)
641 {
601e92f1
KW
642 $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E
643 \Q is not recommended for open interchange\E/x;
644 $non_cp_trailing_text = "if you see this, there is an error";
67e45424 645 $needed_to_discern_len = $length unless defined $needed_to_discern_len;
bf422d6a
KW
646 if ( ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF)
647 || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE))
648 {
649 $skip_most_tests = 1;
650 }
af816908
KW
651
652 $utf8n_flag_to_warn = $::UTF8_WARN_NONCHAR;
653 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR;
654 $uvchr_flag_to_warn = $::UNICODE_WARN_NONCHAR;
655 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;;
e4e140b4
KW
656
657 $utf8n_flag_to_warn_complement = $::UTF8_WARN_SURROGATE
658 |$::UTF8_WARN_SUPER
659 |$::UTF8_WARN_ABOVE_31_BIT;
660 $utf8n_flag_to_disallow_complement = $::UTF8_DISALLOW_SURROGATE
661 |$::UTF8_DISALLOW_SUPER
662 |$::UTF8_DISALLOW_ABOVE_31_BIT;
663 $uvchr_flag_to_warn_complement = $::UNICODE_WARN_SURROGATE
664 |$::UNICODE_WARN_SUPER
665 |$::UNICODE_WARN_ABOVE_31_BIT;
666 $uvchr_flag_to_disallow_complement = $::UNICODE_DISALLOW_SURROGATE
667 |$::UNICODE_DISALLOW_SUPER
668 |$::UNICODE_DISALLOW_ABOVE_31_BIT;
669
d42d7565 670 $controlling_warning_category = 'nonchar';
3022ad00
KW
671 }
672 else {
673 die "Can't figure out what type of warning to test for $testname"
674 }
675
67e45424
KW
676 die 'Didn\'t set $needed_to_discern_len for ' . $testname
677 unless defined $needed_to_discern_len;
8f79178b
KW
678 { # First test the isFOO calls
679 use warnings; # Make sure these don't raise warnings
9cdc3054 680 undef @warnings_gotten;
8f79178b 681
6aa905cf
KW
682 my $ret = test_isUTF8_CHAR($bytes, $length);
683 my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0);
684 if ($will_overflow) {
d402d77f
KW
685 is($ret, 0, "For $testname: isUTF8_CHAR() returns 0");
686 is($ret_flags, 0, " And isUTF8_CHAR_flags() returns 0");
6aa905cf
KW
687 }
688 else {
689 is($ret, $length,
d402d77f
KW
690 "For $testname: isUTF8_CHAR() returns expected length: $length");
691 is($ret_flags, $length, " And isUTF8_CHAR_flags(...,0)"
6aa905cf
KW
692 . " returns expected length: $length");
693 }
9cdc3054 694 is(scalar @warnings_gotten, 0,
d402d77f
KW
695 " And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags generated"
696 . " any warnings")
9cdc3054 697 or output_warnings(@warnings_gotten);
6aa905cf 698
9cdc3054 699 undef @warnings_gotten;
6aa905cf
KW
700 $ret = test_isSTRICT_UTF8_CHAR($bytes, $length);
701 if ($will_overflow) {
d402d77f 702 is($ret, 0, " And isSTRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
703 }
704 else {
705 my $expected_ret = ( $testname =~ /surrogate|non-character/
706 || $allowed_uv > 0x10FFFF)
707 ? 0
708 : $length;
d402d77f 709 is($ret, $expected_ret, " And isSTRICT_UTF8_CHAR() returns"
6aa905cf
KW
710 . " expected length: $expected_ret");
711 $ret = test_isUTF8_CHAR_flags($bytes, $length,
712 $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE);
713 is($ret, $expected_ret,
d402d77f
KW
714 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')"
715 . " acts like isSTRICT_UTF8_CHAR");
6aa905cf 716 }
9cdc3054 717 is(scalar @warnings_gotten, 0,
d402d77f
KW
718 " And neither isSTRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
719 . " generated any warnings")
9cdc3054 720 or output_warnings(@warnings_gotten);
6aa905cf 721
9cdc3054 722 undef @warnings_gotten;
6aa905cf
KW
723 $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length);
724 if ($will_overflow) {
d402d77f 725 is($ret, 0, " And isC9_STRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
726 }
727 else {
728 my $expected_ret = ( $testname =~ /surrogate/
729 || $allowed_uv > 0x10FFFF)
730 ? 0
731 : $length;
d402d77f 732 is($ret, $expected_ret, " And isC9_STRICT_UTF8_CHAR()"
6aa905cf
KW
733 ." returns expected length: $expected_ret");
734 $ret = test_isUTF8_CHAR_flags($bytes, $length,
8f79178b 735 $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
6aa905cf 736 is($ret, $expected_ret,
d402d77f
KW
737 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')"
738 . " acts like isC9_STRICT_UTF8_CHAR");
6aa905cf 739 }
9cdc3054 740 is(scalar @warnings_gotten, 0,
d402d77f
KW
741 " And neither isC9_STRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
742 . " generated any warnings")
9cdc3054 743 or output_warnings(@warnings_gotten);
6aa905cf
KW
744
745 # Test partial character handling, for each byte not a full character
746 for my $j (1.. $length - 1) {
747
748 # Skip the test for the interaction between overflow and above-31
749 # bit. It is really testing other things than the partial
750 # character tests, for which other tests in this file are
751 # sufficient
04f42bf6 752 last if $will_overflow;
6aa905cf 753
69485e19 754 foreach my $disallow_flag (0, $utf8n_flag_to_disallow) {
6aa905cf
KW
755 my $partial = substr($bytes, 0, $j);
756 my $ret_should_be;
757 my $comment;
758 if ($disallow_flag) {
759 $ret_should_be = 0;
760 $comment = "disallowed";
761 if ($j < $needed_to_discern_len) {
762 $ret_should_be = 1;
763 $comment .= ", but need $needed_to_discern_len bytes"
764 . " to discern:";
765 }
766 }
767 else {
768 $ret_should_be = 1;
769 $comment = "allowed";
770 }
771
9cdc3054 772 undef @warnings_gotten;
6aa905cf
KW
773
774 $ret = test_is_utf8_valid_partial_char_flags($partial, $j,
775 $disallow_flag);
776 is($ret, $ret_should_be,
d402d77f
KW
777 " And is_utf8_valid_partial_char_flags("
778 . display_bytes($partial)
779 . "), $comment: returns $ret_should_be");
9cdc3054 780 is(scalar @warnings_gotten, 0,
d402d77f 781 " And is_utf8_valid_partial_char_flags()"
6aa905cf 782 . " generated no warnings")
9cdc3054 783 or output_warnings(@warnings_gotten);
6aa905cf
KW
784 }
785 }
786 }
787
8f79178b 788 # Now test the to/from UTF-8 calls
69485e19
KW
789 # This is more complicated than the malformations tested in other files in
790 # this directory, as there are several orthogonal variables involved. We
791 # test most possible combinations
8f79178b
KW
792 foreach my $do_disallow (0, 1) {
793 next if $skip_most_tests && ! $do_disallow;
794
795 # We try various combinations of malformations that can occur
796 foreach my $short ("", "short") {
797 next if $skip_most_tests && $short;
798 foreach my $unexpected_noncont ("", "unexpected non-continuation") {
799 next if $skip_most_tests && $unexpected_noncont;
800 foreach my $overlong ("", "overlong") {
801 next if $overlong && $skip_most_tests;
802
803 # If we're creating an overlong, it can't be longer than the
804 # maximum length, so skip if we're already at that length.
805 next if $overlong && $length >= $::max_bytes;
6aa905cf 806
69485e19
KW
807 # We classify the warnings into certain "interesting" types,
808 # described later
809 foreach my $warning_type (0..4) {
810 next if $skip_most_tests && $warning_type != 1;
811 foreach my $use_warn_flag (0, 1) {
812 next if $skip_most_tests && ! $use_warn_flag;
813
8f79178b
KW
814 # Finally, here is the inner loop
815
69485e19
KW
816 my $this_utf8n_flag_to_warn = $utf8n_flag_to_warn;
817 my $this_utf8n_flag_to_disallow = $utf8n_flag_to_disallow;
818 my $this_uvchr_flag_to_warn = $uvchr_flag_to_warn;
819 my $this_uvchr_flag_to_disallow = $uvchr_flag_to_disallow;
820
821 my $eval_warn;
822 my $expect_regular_warnings;
823 my $expect_warnings_for_malformed;
824 my $expect_warnings_for_overflow;
825
826 if ($warning_type == 0) {
827 $eval_warn = "use warnings; no warnings 'deprecated'";
828 $expect_regular_warnings = $use_warn_flag;
829 $expect_warnings_for_overflow = 1;
830 $expect_warnings_for_malformed = 1;
6aa905cf 831 }
69485e19
KW
832 elsif ($warning_type == 1) {
833 $eval_warn = "no warnings";
834 $expect_regular_warnings = 0;
835 $expect_warnings_for_overflow = 0;
836 $expect_warnings_for_malformed = 0;
837 }
838 elsif ($warning_type == 2) {
839 $eval_warn = "no warnings; use warnings 'utf8'";
840 $expect_regular_warnings = $use_warn_flag;
841 $expect_warnings_for_overflow = 1;
842 $expect_warnings_for_malformed = 1;
843 }
844 elsif ($warning_type == 3) {
845 $eval_warn = "no warnings; use warnings"
e4e140b4
KW
846 . " '$controlling_warning_category'";
847 $expect_regular_warnings = $use_warn_flag;
69485e19 848 $expect_warnings_for_overflow
e4e140b4 849 = $controlling_warning_category eq 'non_unicode';
69485e19
KW
850 $expect_warnings_for_malformed = 0;
851 }
852 elsif ($warning_type == 4) { # Like type 3, but uses the
853 # above-31-bit flags
854 # The complement flags were set up so that the
855 # above-31-bit flags have been tested that they don't
856 # trigger wrongly for too small code points. And the
857 # flags have been set up so that those small code
858 # points are tested for being above Unicode. What's
859 # left to test is that the large code points do
860 # trigger the above-31-bit flags.
861 next if ! $will_overflow && $allowed_uv < 0x80000000;
e4e140b4 862 next if $controlling_warning_category ne 'non_unicode';
69485e19
KW
863 $eval_warn = "no warnings; use warnings 'non_unicode'";
864 $expect_regular_warnings = 1;
865 $expect_warnings_for_overflow = 1;
866 $expect_warnings_for_malformed = 0;
867 $this_utf8n_flag_to_warn = $::UTF8_WARN_ABOVE_31_BIT;
868 $this_utf8n_flag_to_disallow
869 = $::UTF8_DISALLOW_ABOVE_31_BIT;
870 $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
871 $this_uvchr_flag_to_disallow
872 = $::UNICODE_DISALLOW_ABOVE_31_BIT;
601e92f1
KW
873 }
874 else {
69485e19 875 die "Unexpected warning type '$warning_type'";
601e92f1
KW
876 }
877
69485e19
KW
878 # We only need to test the case where all warnings are
879 # enabled (type 0) to see if turning off the warning flag
880 # causes things to not be output. If those pass, then
881 # turning on some sub-category of warnings, or turning off
882 # warnings altogether are extremely likely to not output
883 # warnings either, given how the warnings subsystem is
884 # supposed to work, and this file assumes it does work.
885 next if $warning_type != 0 && ! $use_warn_flag;
886
887 # The convention is that the 'got' flag is the same value
888 # as the disallow one. If this were violated, the tests
889 # here should start failing.
890 my $return_flag = $this_utf8n_flag_to_disallow;
891
e4e140b4
KW
892 # If we aren't expecting warnings/disallow for this, turn
893 # on all the other flags. That makes sure that they all
894 # are independent of this flag, and so we don't need to
895 # test them individually.
896 my $this_warning_flags = ($use_warn_flag)
897 ? $this_utf8n_flag_to_warn
898 : $utf8n_flag_to_warn_complement;
69485e19 899 my $this_disallow_flags = ($do_disallow)
e4e140b4
KW
900 ? $this_utf8n_flag_to_disallow
901 : $utf8n_flag_to_disallow_complement;
6aa905cf
KW
902 my $this_bytes = $bytes;
903 my $this_length = $length;
904 my $expected_uv = $allowed_uv;
b3169593 905 my $this_expected_len = $length;
6aa905cf 906 my $this_needed_to_discern_len = $needed_to_discern_len;
6aa905cf 907
69485e19
KW
908 my @malformation_names;
909 my @expected_warnings;
910 my @expected_return_flags;
6aa905cf 911
69485e19
KW
912 # Now go through the possible malformations wanted, and
913 # change the input accordingly. We also can set up
914 # certain other things now, like whether we expect a
915 # return flag from this malformation and which flag.
916 if ($overlong) {
8f79178b
KW
917
918 # To force this malformation, we convert the original
919 # start byte into a continuation byte with the same
920 # data bits as originally. ...
921 substr($this_bytes, 0, 1)
922 = start_byte_to_cont(substr($this_bytes,
923 0, 1));
924
925 # ... Then we prepend it with a known overlong
926 # sequence. This should evaluate to the exact same
927 # code point as the original.
928 $this_bytes
929 = I8_to_native("\xff")
930 . (I8_to_native(chr $::lowest_continuation)
931 x ( $::max_bytes - 1 - length($this_bytes)))
932 . $this_bytes;
933 $this_length = length($this_bytes);
934 $this_needed_to_discern_len
935 = $::max_bytes - ($this_expected_len
6aa905cf 936 - $this_needed_to_discern_len);
8f79178b
KW
937 $this_expected_len = $::max_bytes;
938 push @expected_return_flags, $::UTF8_GOT_LONG;
69485e19
KW
939 push @malformation_names, $overlong;
940 if ($expect_warnings_for_malformed) {
941 if ( ! $short
942 && ! $unexpected_noncont
943 && ! $will_overflow)
944 {
945 my $overlong_bytes
946 = display_bytes_no_quotes($this_bytes);
947 my $correct_bytes
948 = display_bytes_no_quotes($bytes);
949 push @expected_warnings,
950 qr/\QMalformed UTF-8 character:\E
951 \Q $overlong_bytes (overlong;\E
952 \Q instead use $correct_bytes to\E
953 \Q represent U+$uv_string)/x;
954 }
955 else {
956 push @expected_warnings, qr/overlong/;
957 }
6aa905cf 958 }
69485e19 959 }
6aa905cf 960
69485e19
KW
961 if ($short) {
962 push @malformation_names, $short;
963 push @expected_warnings, qr/short/
964 if $expect_warnings_for_malformed;
965
966 # To force this malformation, just tell the test to
967 # not look as far as it should into the input.
968 $this_length--;
969 $this_expected_len--;
970 push @expected_return_flags, $::UTF8_GOT_SHORT;
971 }
972
973 if ($unexpected_noncont) {
974 push @malformation_names, $unexpected_noncont;
975 push @expected_warnings, qr/$unexpected_noncont/
976 if $expect_warnings_for_malformed;
977
978 # To force this malformation, change the final
979 # continuation byte into a non continuation.
980 my $pos = ($short) ? -2 : -1;
981 substr($this_bytes, $pos, 1) = '?';
982 $this_expected_len--;
983 push @expected_return_flags,
984 $::UTF8_GOT_NON_CONTINUATION;
985 }
986
987 # The overflow malformation is done differently than other
988 # malformations. It comes from manually typed tests in
989 # the test array, but it also is above Unicode and uses
990 # Perl extended UTF-8, so affects some of the flags being
991 # tested. We now make it be treated like one of the other
992 # generated malformations.
993 if ($will_overflow) {
994
995 # An overflow is (way) above Unicode, and overrides
996 # everything else.
997 $expect_regular_warnings = 0;
998
999 push @malformation_names, 'overflow';
1000 if ($expect_warnings_for_overflow) {
1001 my $qr = display_bytes_no_quotes(
1002 substr($this_bytes, 0, $this_expected_len));
1003 $qr = qr/\QMalformed UTF-8 character: \E
1004 \Q$qr (overflows)\E/x;
1005 push @expected_warnings, $qr;
6aa905cf 1006 }
69485e19 1007 push @expected_return_flags, $::UTF8_GOT_OVERFLOW;
6aa905cf
KW
1008 }
1009
69485e19
KW
1010 # Here, we've set things up based on the malformations.
1011 # Now generate the text for them for the test name.
1012 my $malformations_name = "";
1013 if (@malformation_names) {
1014 $malformations_name .= "malformation";
1015 $malformations_name .= "s" if @malformation_names > 1;
1016 $malformations_name .= ": ";
1017 $malformations_name .= join "/", @malformation_names;
1018 $malformations_name = " ($malformations_name)";
1019 }
1020
1021 # It may be that the malformations have shortened the
1022 # amount of input we look at so much that we can't tell
1023 # what the category the code point was in. Otherwise, set
1024 # up the expected return flags based on the warnings and
1025 # disallowments.
1026 if ($this_expected_len < $this_needed_to_discern_len) {
1027 $expect_regular_warnings = 0;
1028 }
1029 elsif ( ($this_warning_flags & $this_utf8n_flag_to_warn)
1030 || ( $this_disallow_flags
1031 & $this_utf8n_flag_to_disallow))
1032 {
1033 push @expected_return_flags, $return_flag;
1034 }
1035
1036 # Finish setting up the expected warning.
1037 if ($expect_regular_warnings) {
1038
1039 # So far the array contains warnings generated by
1040 # malformations. Add the expected regular one.
1041 unshift @expected_warnings, $cp_message_qr;
1042
1043 # But it may need to be modified, because either of
1044 # these malformations means we can't determine the
1045 # expected code point.
1046 if ($short || $unexpected_noncont) {
1047 my $first_byte = substr($this_bytes, 0, 1);
1048 $expected_warnings[0] = display_bytes(
1049 substr($this_bytes, 0, $this_expected_len));
1050 $expected_warnings[0]
1051 = qr/[Aa]\Qny UTF-8 sequence that starts with\E
1052 \Q $expected_warnings[0]\E
1053 \Q $non_cp_trailing_text\E/x;
1054 }
1055 }
6aa905cf
KW
1056
1057 # Is effectively disallowed if we've set up a
1058 # malformation, even if the flag indicates it is
1059 # allowed. Fix up test name to indicate this as
1060 # well
69485e19
KW
1061 my $disallowed = ( $this_disallow_flags
1062 & $this_utf8n_flag_to_disallow)
1063 || $malformations_name;
6aa905cf 1064 my $this_name = "utf8n_to_uvchr_error() $testname: "
69485e19
KW
1065 . (($disallowed)
1066 ? 'disallowed'
1067 : 'allowed');
6aa905cf 1068 $this_name .= ", $eval_warn";
69485e19
KW
1069 $this_name .= ", " . (( $this_warning_flags
1070 & $this_utf8n_flag_to_warn)
1071 ? 'with flag for raising warnings'
1072 : 'no flag for raising warnings');
1073 $this_name .= $malformations_name;
db0f09e6 1074
8f79178b 1075 # Do the actual test using an eval
9cdc3054 1076 undef @warnings_gotten;
6aa905cf 1077 my $ret_ref;
69485e19 1078 my $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1079 my $eval_text = "$eval_warn; \$ret_ref"
1080 . " = test_utf8n_to_uvchr_error("
69485e19 1081 . "'$this_bytes', $this_length, $this_flags)";
6aa905cf 1082 eval "$eval_text";
69485e19 1083 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
6aa905cf 1084 {
a8ee5133
KW
1085 diag "\$@='$@'; call was: "
1086 . utf8n_display_call($eval_text);
6aa905cf
KW
1087 next;
1088 }
1089 if ($disallowed) {
d402d77f 1090 is($ret_ref->[0], 0, " And returns 0")
a8ee5133 1091 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1092 }
1093 else {
1094 is($ret_ref->[0], $expected_uv,
d402d77f
KW
1095 " And returns expected uv: "
1096 . $uv_string)
a8ee5133 1097 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
1098 }
1099 is($ret_ref->[1], $this_expected_len,
d402d77f 1100 " And returns expected length:"
6aa905cf 1101 . " $this_expected_len")
a8ee5133 1102 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1103
9cdc3054 1104 my $returned_flags = $ret_ref->[2];
6aa905cf 1105
9cdc3054 1106 for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
69485e19
KW
1107 if ($expected_return_flags[$i] & $returned_flags) {
1108 if ($expected_return_flags[$i]
1109 == $::UTF8_DISALLOW_ABOVE_31_BIT)
1110 {
1111 pass(" Expected and got return flag for"
1112 . " above_31_bit");
1113 }
1114 # The first entries in this are
1115 # malformations
1116 elsif ($i > @malformation_names - 1) {
1117 pass(" Expected and got return flag"
e4e140b4 1118 . " for " . $controlling_warning_category);
69485e19
KW
1119 }
1120 else {
1121 pass(" Expected and got return flag for "
1122 . $malformation_names[$i]
1123 . " malformation");
1124 }
9cdc3054 1125 $returned_flags &= ~$expected_return_flags[$i];
69485e19 1126 splice @expected_return_flags, $i, 1;
6aa905cf 1127 }
6aa905cf 1128 }
6aa905cf 1129
69485e19
KW
1130 is($returned_flags, 0,
1131 " Got no unexpected return flags")
1132 or diag "The unexpected flags gotten were: "
5722c46d
KW
1133 . (flags_to_text($returned_flags,
1134 \@utf8n_flags_to_text)
69485e19
KW
1135 # We strip off any prefixes from the flag
1136 # names
1137 =~ s/ \b [A-Z] _ //xgr);
1138 is (scalar @expected_return_flags, 0,
1139 " Got all expected return flags")
1140 or diag "The expected flags not gotten were: "
1141 . (flags_to_text(eval join("|",
1142 @expected_return_flags),
1143 \@utf8n_flags_to_text)
1144 # We strip off any prefixes from the flag
1145 # names
5722c46d 1146 =~ s/ \b [A-Z] _ //xgr);
9cdc3054 1147
69485e19
KW
1148 do_warnings_test(@expected_warnings)
1149 or diag "Call was: " . utf8n_display_call($eval_text);
1150 undef @warnings_gotten;
6aa905cf
KW
1151
1152 # Check CHECK_ONLY results when the input is
1153 # disallowed. Do this when actually disallowed,
69485e19 1154 # not just when the $this_disallow_flags is set
6aa905cf 1155 if ($disallowed) {
69485e19
KW
1156 my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
1157 my $eval_text = "use warnings; no warnings 'deprecated'; \$ret_ref ="
1158 . " test_utf8n_to_uvchr_error('"
1159 . "$this_bytes', $this_length,"
1160 . " $this_flags)";
1161 eval $eval_text;
a8ee5133
KW
1162 if (! ok ("$@ eq ''",
1163 " And eval succeeded with CHECK_ONLY"))
1164 {
1165 diag "\$@='$@'; Call was: "
1166 . utf8n_display_call($eval_text);
1167 next;
1168 }
d402d77f 1169 is($ret_ref->[0], 0, " CHECK_ONLY: Returns 0")
a8ee5133 1170 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1171 is($ret_ref->[1], -1,
d402d77f 1172 " CHECK_ONLY: returns -1 for length")
a8ee5133 1173 or diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1174 if (! is(scalar @warnings_gotten, 0,
d402d77f 1175 " CHECK_ONLY: no warnings generated"))
6aa905cf 1176 {
a8ee5133 1177 diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1178 output_warnings(@warnings_gotten);
6aa905cf
KW
1179 }
1180 }
1181
1182 # Now repeat some of the above, but for
1183 # uvchr_to_utf8_flags(). Since this comes from an
8f79178b
KW
1184 # existing code point, it hasn't overflowed, and isn't
1185 # malformed.
69485e19
KW
1186 next if @malformation_names;
1187
1188 $this_warning_flags = ($use_warn_flag)
1189 ? $this_uvchr_flag_to_warn
1190 : 0;
1191 $this_disallow_flags = ($do_disallow)
1192 ? $this_uvchr_flag_to_disallow
1193 : 0;
1194
1195 $disallowed = $this_disallow_flags
1196 & $this_uvchr_flag_to_disallow;
1197 $this_name .= ", " . (( $this_warning_flags
1198 & $this_utf8n_flag_to_warn)
1199 ? 'with flag for raising warnings'
1200 : 'no flag for raising warnings');
6aa905cf
KW
1201
1202 $this_name = "uvchr_to_utf8_flags() $testname: "
69485e19 1203 . (($disallowed)
6aa905cf 1204 ? 'disallowed'
6aa905cf
KW
1205 : 'allowed');
1206 $this_name .= ", $eval_warn";
69485e19
KW
1207 $this_name .= ", " . (( $this_warning_flags
1208 & $this_uvchr_flag_to_warn)
6aa905cf
KW
1209 ? 'with warning flag'
1210 : 'no warning flag');
1211
9cdc3054 1212 undef @warnings_gotten;
6aa905cf 1213 my $ret;
69485e19 1214 $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1215 $eval_text = "$eval_warn; \$ret ="
1216 . " test_uvchr_to_utf8_flags("
d884ea32 1217 . "$allowed_uv, $this_flags)";
6aa905cf
KW
1218 eval "$eval_text";
1219 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1220 {
d884ea32
KW
1221 diag "\$@='$@'; call was: "
1222 . uvchr_display_call($eval_text);
6aa905cf
KW
1223 next;
1224 }
1225 if ($disallowed) {
d402d77f 1226 is($ret, undef, " And returns undef")
d884ea32 1227 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1228 }
1229 else {
d402d77f 1230 is($ret, $this_bytes, " And returns expected string")
d884ea32 1231 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf 1232 }
69485e19
KW
1233
1234 do_warnings_test(@expected_warnings)
1235 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1236 }
1237 }
6aa905cf
KW
1238 }
1239 }
8f79178b 1240 }
6aa905cf
KW
1241 }
1242}
1243
1244done_testing;