This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8n_to_uvchr() Use correct warnings category
[perl5.git] / ext / XS-APItest / t / utf8_warn_base.pl
CommitLineData
6aa905cf
KW
1#!perl -w
2
3# This is a base file to be used by various .t's in its directory
4
5use strict;
6use Test::More;
7
8BEGIN {
9 use_ok('XS::APItest');
10 require 'charset_tools.pl';
11 require './t/utf8_setup.pl';
12};
13
14$|=1;
15
16no warnings 'deprecated'; # Some of the below are above IV_MAX on 32 bit
17 # machines, and that is tested elsewhere
18
19use XS::APItest;
20use Data::Dumper;
21
9cdc3054 22my @warnings_gotten;
6aa905cf
KW
23
24use warnings 'utf8';
8d6f1506
KW
25local $SIG{__WARN__} = sub { my @copy = @_;
26 push @warnings_gotten, map { chomp; $_ } @copy;
27 };
6aa905cf 28
6aa905cf
KW
29
30# Now test the cases where a legal code point is generated, but may or may not
31# be allowed/warned on.
32my @tests = (
af816908 33 # ($testname, $bytes, $controlling_warning_category,
b7e1f4b2 34 # $allowed_uv, $needed_to_discern_len )
6aa905cf
KW
35 [ "lowest surrogate",
36 (isASCII) ? "\xed\xa0\x80" : I8_to_native("\xf1\xb6\xa0\xa0"),
6aa905cf 37 'surrogate', 0xD800,
6aa905cf
KW
38 ],
39 [ "a middle surrogate",
40 (isASCII) ? "\xed\xa4\x8d" : I8_to_native("\xf1\xb6\xa8\xad"),
6aa905cf 41 'surrogate', 0xD90D,
6aa905cf
KW
42 ],
43 [ "highest surrogate",
44 (isASCII) ? "\xed\xbf\xbf" : I8_to_native("\xf1\xb7\xbf\xbf"),
6aa905cf 45 'surrogate', 0xDFFF,
6aa905cf
KW
46 ],
47 [ "first non_unicode",
48 (isASCII) ? "\xf4\x90\x80\x80" : I8_to_native("\xf9\xa2\xa0\xa0\xa0"),
6aa905cf 49 'non_unicode', 0x110000,
6aa905cf 50 2,
6aa905cf
KW
51 ],
52 [ "non_unicode whose first byte tells that",
53 (isASCII) ? "\xf5\x80\x80\x80" : I8_to_native("\xfa\xa0\xa0\xa0\xa0"),
6aa905cf
KW
54 'non_unicode',
55 (isASCII) ? 0x140000 : 0x200000,
6aa905cf 56 1,
6aa905cf
KW
57 ],
58 [ "first of 32 consecutive non-character code points",
59 (isASCII) ? "\xef\xb7\x90" : I8_to_native("\xf1\xbf\xae\xb0"),
6aa905cf 60 'nonchar', 0xFDD0,
6aa905cf
KW
61 ],
62 [ "a mid non-character code point of the 32 consecutive ones",
63 (isASCII) ? "\xef\xb7\xa0" : I8_to_native("\xf1\xbf\xaf\xa0"),
6aa905cf 64 'nonchar', 0xFDE0,
6aa905cf
KW
65 ],
66 [ "final of 32 consecutive non-character code points",
67 (isASCII) ? "\xef\xb7\xaf" : I8_to_native("\xf1\xbf\xaf\xaf"),
6aa905cf 68 'nonchar', 0xFDEF,
6aa905cf
KW
69 ],
70 [ "non-character code point U+FFFE",
71 (isASCII) ? "\xef\xbf\xbe" : I8_to_native("\xf1\xbf\xbf\xbe"),
6aa905cf 72 'nonchar', 0xFFFE,
6aa905cf
KW
73 ],
74 [ "non-character code point U+FFFF",
75 (isASCII) ? "\xef\xbf\xbf" : I8_to_native("\xf1\xbf\xbf\xbf"),
6aa905cf 76 'nonchar', 0xFFFF,
6aa905cf
KW
77 ],
78 [ "non-character code point U+1FFFE",
79 (isASCII) ? "\xf0\x9f\xbf\xbe" : I8_to_native("\xf3\xbf\xbf\xbe"),
6aa905cf 80 'nonchar', 0x1FFFE,
6aa905cf
KW
81 ],
82 [ "non-character code point U+1FFFF",
83 (isASCII) ? "\xf0\x9f\xbf\xbf" : I8_to_native("\xf3\xbf\xbf\xbf"),
6aa905cf 84 'nonchar', 0x1FFFF,
6aa905cf
KW
85 ],
86 [ "non-character code point U+2FFFE",
87 (isASCII) ? "\xf0\xaf\xbf\xbe" : I8_to_native("\xf5\xbf\xbf\xbe"),
6aa905cf 88 'nonchar', 0x2FFFE,
6aa905cf
KW
89 ],
90 [ "non-character code point U+2FFFF",
91 (isASCII) ? "\xf0\xaf\xbf\xbf" : I8_to_native("\xf5\xbf\xbf\xbf"),
6aa905cf 92 'nonchar', 0x2FFFF,
6aa905cf
KW
93 ],
94 [ "non-character code point U+3FFFE",
95 (isASCII) ? "\xf0\xbf\xbf\xbe" : I8_to_native("\xf7\xbf\xbf\xbe"),
6aa905cf 96 'nonchar', 0x3FFFE,
6aa905cf
KW
97 ],
98 [ "non-character code point U+3FFFF",
99 (isASCII) ? "\xf0\xbf\xbf\xbf" : I8_to_native("\xf7\xbf\xbf\xbf"),
6aa905cf 100 'nonchar', 0x3FFFF,
6aa905cf
KW
101 ],
102 [ "non-character code point U+4FFFE",
103 (isASCII) ? "\xf1\x8f\xbf\xbe" : I8_to_native("\xf8\xa9\xbf\xbf\xbe"),
6aa905cf 104 'nonchar', 0x4FFFE,
6aa905cf
KW
105 ],
106 [ "non-character code point U+4FFFF",
107 (isASCII) ? "\xf1\x8f\xbf\xbf" : I8_to_native("\xf8\xa9\xbf\xbf\xbf"),
6aa905cf 108 'nonchar', 0x4FFFF,
6aa905cf
KW
109 ],
110 [ "non-character code point U+5FFFE",
111 (isASCII) ? "\xf1\x9f\xbf\xbe" : I8_to_native("\xf8\xab\xbf\xbf\xbe"),
6aa905cf 112 'nonchar', 0x5FFFE,
6aa905cf
KW
113 ],
114 [ "non-character code point U+5FFFF",
115 (isASCII) ? "\xf1\x9f\xbf\xbf" : I8_to_native("\xf8\xab\xbf\xbf\xbf"),
6aa905cf 116 'nonchar', 0x5FFFF,
6aa905cf
KW
117 ],
118 [ "non-character code point U+6FFFE",
119 (isASCII) ? "\xf1\xaf\xbf\xbe" : I8_to_native("\xf8\xad\xbf\xbf\xbe"),
6aa905cf 120 'nonchar', 0x6FFFE,
6aa905cf
KW
121 ],
122 [ "non-character code point U+6FFFF",
123 (isASCII) ? "\xf1\xaf\xbf\xbf" : I8_to_native("\xf8\xad\xbf\xbf\xbf"),
6aa905cf 124 'nonchar', 0x6FFFF,
6aa905cf
KW
125 ],
126 [ "non-character code point U+7FFFE",
127 (isASCII) ? "\xf1\xbf\xbf\xbe" : I8_to_native("\xf8\xaf\xbf\xbf\xbe"),
6aa905cf 128 'nonchar', 0x7FFFE,
6aa905cf
KW
129 ],
130 [ "non-character code point U+7FFFF",
131 (isASCII) ? "\xf1\xbf\xbf\xbf" : I8_to_native("\xf8\xaf\xbf\xbf\xbf"),
6aa905cf 132 'nonchar', 0x7FFFF,
6aa905cf
KW
133 ],
134 [ "non-character code point U+8FFFE",
135 (isASCII) ? "\xf2\x8f\xbf\xbe" : I8_to_native("\xf8\xb1\xbf\xbf\xbe"),
6aa905cf 136 'nonchar', 0x8FFFE,
6aa905cf
KW
137 ],
138 [ "non-character code point U+8FFFF",
139 (isASCII) ? "\xf2\x8f\xbf\xbf" : I8_to_native("\xf8\xb1\xbf\xbf\xbf"),
6aa905cf 140 'nonchar', 0x8FFFF,
6aa905cf
KW
141 ],
142 [ "non-character code point U+9FFFE",
143 (isASCII) ? "\xf2\x9f\xbf\xbe" : I8_to_native("\xf8\xb3\xbf\xbf\xbe"),
6aa905cf 144 'nonchar', 0x9FFFE,
6aa905cf
KW
145 ],
146 [ "non-character code point U+9FFFF",
147 (isASCII) ? "\xf2\x9f\xbf\xbf" : I8_to_native("\xf8\xb3\xbf\xbf\xbf"),
6aa905cf 148 'nonchar', 0x9FFFF,
6aa905cf
KW
149 ],
150 [ "non-character code point U+AFFFE",
151 (isASCII) ? "\xf2\xaf\xbf\xbe" : I8_to_native("\xf8\xb5\xbf\xbf\xbe"),
6aa905cf 152 'nonchar', 0xAFFFE,
6aa905cf
KW
153 ],
154 [ "non-character code point U+AFFFF",
155 (isASCII) ? "\xf2\xaf\xbf\xbf" : I8_to_native("\xf8\xb5\xbf\xbf\xbf"),
6aa905cf 156 'nonchar', 0xAFFFF,
6aa905cf
KW
157 ],
158 [ "non-character code point U+BFFFE",
159 (isASCII) ? "\xf2\xbf\xbf\xbe" : I8_to_native("\xf8\xb7\xbf\xbf\xbe"),
6aa905cf 160 'nonchar', 0xBFFFE,
6aa905cf
KW
161 ],
162 [ "non-character code point U+BFFFF",
163 (isASCII) ? "\xf2\xbf\xbf\xbf" : I8_to_native("\xf8\xb7\xbf\xbf\xbf"),
6aa905cf 164 'nonchar', 0xBFFFF,
6aa905cf
KW
165 ],
166 [ "non-character code point U+CFFFE",
167 (isASCII) ? "\xf3\x8f\xbf\xbe" : I8_to_native("\xf8\xb9\xbf\xbf\xbe"),
6aa905cf 168 'nonchar', 0xCFFFE,
6aa905cf
KW
169 ],
170 [ "non-character code point U+CFFFF",
171 (isASCII) ? "\xf3\x8f\xbf\xbf" : I8_to_native("\xf8\xb9\xbf\xbf\xbf"),
6aa905cf 172 'nonchar', 0xCFFFF,
6aa905cf
KW
173 ],
174 [ "non-character code point U+DFFFE",
175 (isASCII) ? "\xf3\x9f\xbf\xbe" : I8_to_native("\xf8\xbb\xbf\xbf\xbe"),
6aa905cf 176 'nonchar', 0xDFFFE,
6aa905cf
KW
177 ],
178 [ "non-character code point U+DFFFF",
179 (isASCII) ? "\xf3\x9f\xbf\xbf" : I8_to_native("\xf8\xbb\xbf\xbf\xbf"),
6aa905cf 180 'nonchar', 0xDFFFF,
6aa905cf
KW
181 ],
182 [ "non-character code point U+EFFFE",
183 (isASCII) ? "\xf3\xaf\xbf\xbe" : I8_to_native("\xf8\xbd\xbf\xbf\xbe"),
6aa905cf 184 'nonchar', 0xEFFFE,
6aa905cf
KW
185 ],
186 [ "non-character code point U+EFFFF",
187 (isASCII) ? "\xf3\xaf\xbf\xbf" : I8_to_native("\xf8\xbd\xbf\xbf\xbf"),
6aa905cf 188 'nonchar', 0xEFFFF,
6aa905cf
KW
189 ],
190 [ "non-character code point U+FFFFE",
191 (isASCII) ? "\xf3\xbf\xbf\xbe" : I8_to_native("\xf8\xbf\xbf\xbf\xbe"),
6aa905cf 192 'nonchar', 0xFFFFE,
6aa905cf
KW
193 ],
194 [ "non-character code point U+FFFFF",
195 (isASCII) ? "\xf3\xbf\xbf\xbf" : I8_to_native("\xf8\xbf\xbf\xbf\xbf"),
6aa905cf 196 'nonchar', 0xFFFFF,
6aa905cf
KW
197 ],
198 [ "non-character code point U+10FFFE",
199 (isASCII) ? "\xf4\x8f\xbf\xbe" : I8_to_native("\xf9\xa1\xbf\xbf\xbe"),
6aa905cf 200 'nonchar', 0x10FFFE,
6aa905cf
KW
201 ],
202 [ "non-character code point U+10FFFF",
203 (isASCII) ? "\xf4\x8f\xbf\xbf" : I8_to_native("\xf9\xa1\xbf\xbf\xbf"),
6aa905cf 204 'nonchar', 0x10FFFF,
6aa905cf 205 ],
76513bdc
KW
206 [ "requires at least 32 bits",
207 (isASCII)
208 ? "\xfe\x82\x80\x80\x80\x80\x80"
209 : I8_to_native(
210 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
211 # This code point is chosen so that it is representable in a UV on
212 # 32-bit machines
69485e19 213 'non_unicode', 0x80000000,
76513bdc 214 (isASCII) ? 1 : 8,
76513bdc
KW
215 ],
216 [ "highest 32 bit code point",
217 (isASCII)
218 ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
219 : I8_to_native(
220 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
69485e19 221 'non_unicode', 0xFFFFFFFF,
76513bdc 222 (isASCII) ? 1 : 8,
76513bdc
KW
223 ],
224 [ "requires at least 32 bits, and use SUPER-type flags, instead of"
225 . " ABOVE_31_BIT",
226 (isASCII)
227 ? "\xfe\x82\x80\x80\x80\x80\x80"
228 : I8_to_native(
229 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 230 'non_unicode', 0x80000000,
76513bdc 231 1,
76513bdc
KW
232 ],
233 [ "overflow with warnings/disallow for more than 31 bits",
234 # This tests the interaction of WARN_ABOVE_31_BIT/DISALLOW_ABOVE_31_BIT
235 # with overflow. The overflow malformation is never allowed, so
236 # preventing it takes precedence if the ABOVE_31_BIT options would
237 # otherwise allow in an overflowing value. The ASCII code points (1
238 # for 32-bits; 1 for 64) were chosen because the old overflow
239 # detection algorithm did not catch them; this means this test also
240 # checks for that fix. The EBCDIC are arbitrary overflowing ones
241 # since we have no reports of failures with it.
242 (($::is64bit)
243 ? ((isASCII)
244 ? "\xff\x80\x90\x90\x90\xbf\xbf\xbf\xbf\xbf\xbf\xbf\xbf"
245 : I8_to_native(
246 "\xff\xB0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"))
247 : ((isASCII)
248 ? "\xfe\x86\x80\x80\x80\x80\x80"
249 : I8_to_native(
250 "\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa4\xa0\xa0\xa0\xa0\xa0\xa0"))),
69485e19 251 'non_unicode', -1,
76513bdc 252 (isASCII || $::is64bit) ? 2 : 8,
76513bdc 253 ],
6aa905cf
KW
254);
255
256if (! $::is64bit) {
257 if (isASCII) {
258 no warnings qw{portable overflow};
259 push @tests,
260 [ "Lowest 33 bit code point: overflow",
261 "\xFE\x84\x80\x80\x80\x80\x80",
69485e19 262 'non_unicode', -1,
b3169593 263 1,
6aa905cf
KW
264 ];
265 }
266}
267else {
268 no warnings qw{portable overflow};
269 push @tests,
270 [ "More than 32 bits",
271 (isASCII)
272 ? "\xff\x80\x80\x80\x80\x80\x81\x80\x80\x80\x80\x80\x80"
273 : I8_to_native(
274 "\xff\xa0\xa0\xa0\xa0\xa0\xa2\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 275 'non_unicode', 0x1000000000,
b3169593 276 (isASCII) ? 1 : 7,
6aa905cf
KW
277 ];
278 if (! isASCII) {
279 push @tests, # These could falsely show wrongly in a naive
280 # implementation
281 [ "requires at least 32 bits",
282 I8_to_native(
283 "\xff\xa0\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 284 'non_unicode', 0x800000000,
b3169593 285 7,
6aa905cf
KW
286 ],
287 [ "requires at least 32 bits",
288 I8_to_native(
289 "\xff\xa0\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 290 'non_unicode', 0x10000000000,
b3169593 291 6,
6aa905cf
KW
292 ],
293 [ "requires at least 32 bits",
294 I8_to_native(
295 "\xff\xa0\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 296 'non_unicode', 0x200000000000,
b3169593 297 5,
6aa905cf
KW
298 ],
299 [ "requires at least 32 bits",
300 I8_to_native(
301 "\xff\xa0\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 302 'non_unicode', 0x4000000000000,
b3169593 303 4,
6aa905cf
KW
304 ],
305 [ "requires at least 32 bits",
306 I8_to_native(
307 "\xff\xa0\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 308 'non_unicode', 0x80000000000000,
b3169593 309 3,
6aa905cf
KW
310 ],
311 [ "requires at least 32 bits",
312 I8_to_native(
313 "\xff\xa1\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0"),
69485e19 314 'non_unicode', 0x1000000000000000,
b3169593 315 2,
6aa905cf
KW
316 ];
317 }
318}
319
5722c46d
KW
320sub flags_to_text($$)
321{
322 my ($flags, $flags_to_text_ref) = @_;
323
324 # Returns a string containing a mnemonic representation of the bits that
325 # are set in the $flags. These are assumed to be flag bits. The return
326 # looks like "FOO|BAR|BAZ". The second parameter is a reference to an
327 # array that gives the textual representation of all the possible flags.
328 # Element 0 is the text for the bit 0 flag; element 1 for bit 1; .... If
329 # no bits at all are set the string "0" is returned;
330
331 my @flag_text;
332 my $shift = 0;
333
334 return "0" if $flags == 0;
335
336 while ($flags) {
337 #diag sprintf "%x", $flags;
338 if ($flags & 1) {
339 push @flag_text, $flags_to_text_ref->[$shift];
340 }
341 $shift++;
342 $flags >>= 1;
343 }
344
345 return join "|", @flag_text;
346}
347
348# Possible flag returns from utf8n_to_uvchr_error(). These should have G_,
349# instead of A_, D_, but the prefixes will be used in a a later commit, so
350# minimize churn by having them here.
351my @utf8n_flags_to_text = ( qw(
352 A_EMPTY
353 A_CONTINUATION
354 A_NON_CONTINUATION
355 A_SHORT
356 A_LONG
357 A_LONG_AND_ITS_VALUE
358 PLACEHOLDER
359 A_OVERFLOW
360 D_SURROGATE
361 W_SURROGATE
362 D_NONCHAR
363 W_NONCHAR
364 D_SUPER
365 W_SUPER
366 D_ABOVE_31_BIT
367 W_ABOVE_31_BIT
368 CHECK_ONLY
369 NO_CONFIDENCE_IN_CURLEN_
370 ) );
371
a8ee5133
KW
372sub utf8n_display_call($)
373{
374 # Converts an eval string that calls test_utf8n_to_uvchr into a more human
375 # readable form, and returns it. Doesn't work if the byte string contains
376 # an apostrophe. The return will look something like:
377 # test_utf8n_to_uvchr_error('$bytes', $length, $flags)
378 #diag $_[0];
379
380 $_[0] =~ / ^ ( [^(]* \( ) ' ( [^']*? ) ' ( .+ , \D* ) ( \d+ ) \) $ /x;
381 my $text1 = $1; # Everything before the byte string
382 my $bytes = $2;
383 my $text2 = $3; # Includes the length
384 my $flags = $4;
385
386 return $text1
387 . display_bytes($bytes)
388 . $text2
389 . flags_to_text($flags, \@utf8n_flags_to_text)
390 . ')';
391}
392
d884ea32
KW
393sub uvchr_display_call($)
394{
395 # Converts an eval string that calls test_uvchr_to_utf8 into a more human
396 # readable form, and returns it. The return will look something like:
397 # test_uvchr_to_utf8n_flags($uv, $flags)
398 #diag $_[0];
399
400 my @flags_to_text = ( qw(
401 W_SURROGATE
402 W_NONCHAR
403 W_SUPER
404 W_ABOVE_31_BIT
405 D_SURROGATE
406 D_NONCHAR
407 D_SUPER
408 D_ABOVE_31_BIT
409 ) );
410
411 $_[0] =~ / ^ ( [^(]* \( ) ( \d+ ) , \s* ( \d+ ) \) $ /x;
412 my $text = $1;
413 my $cp = sprintf "%X", $2;
414 my $flags = $3;
415
416 return "${text}0x$cp, " . flags_to_text($flags, \@flags_to_text) . ')';
417}
418
69485e19
KW
419sub do_warnings_test(@)
420{
421 my @expected_warnings = @_;
422
423 # Compares the input expected warnings array with @warnings_gotten,
424 # generating a pass for each found, removing it from @warnings_gotten.
425 # Any discrepancies generate test failures. Returns TRUE if no
426 # discrepcancies; otherwise FALSE.
427
428 my $succeeded = 1;
429
430 if (@expected_warnings == 0) {
431 if (! is(@warnings_gotten, 0, " Expected and got no warnings")) {
432 output_warnings(@warnings_gotten);
433 $succeeded = 0;
434 }
435 return $succeeded;
436 }
437
438 # Check that we got all the expected warnings,
439 # removing each one found
440 WARNING:
441 foreach my $expected (@expected_warnings) {
442 foreach (my $i = 0; $i < @warnings_gotten; $i++) {
443 if ($warnings_gotten[$i] =~ $expected) {
444 pass(" Expected and got warning: "
445 . " $warnings_gotten[$i]");
446 splice @warnings_gotten, $i, 1;
447 next WARNING;
448 }
449 }
450 fail(" Expected a warning that matches "
451 . $expected . " but didn't get it");
452 $succeeded = 0;
453 }
454
455 if (! is(@warnings_gotten, 0, " Got no unexpected warnings")) {
456 output_warnings(@warnings_gotten);
457 $succeeded = 0;
458 }
459
460 return $succeeded;
461}
462
6aa905cf
KW
463# This test is split into this number of files.
464my $num_test_files = $ENV{TEST_JOBS} || 1;
465$num_test_files = 10 if $num_test_files > 10;
466
467my $test_count = -1;
468foreach my $test (@tests) {
469 $test_count++;
470 next if $test_count % $num_test_files != $::TEST_CHUNK;
471
af816908 472 my ($testname, $bytes,
9cdc3054 473 $controlling_warning_category, $allowed_uv, $needed_to_discern_len
6aa905cf
KW
474 ) = @$test;
475
476 my $length = length $bytes;
04f42bf6 477 my $will_overflow = $allowed_uv < 0;
6aa905cf 478
2c511c58
KW
479 my $uv_string = sprintf(($allowed_uv < 0x100) ? "%02X" : "%04X", $allowed_uv);
480
af816908
KW
481 my $utf8n_flag_to_warn;
482 my $utf8n_flag_to_disallow;
483 my $uvchr_flag_to_warn;
484 my $uvchr_flag_to_disallow;
b7e1f4b2 485
bf422d6a
KW
486 # Many of the code points being tested are middling in that if code point
487 # edge cases work, these are very likely to as well. Because this test
488 # file takes a while to execute, we skip testing the edge effects of code
489 # points deemed middling, while testing their basics and continuing to
490 # fully test the non-middling code points.
491 my $skip_most_tests = 0;
492
601e92f1
KW
493 my $cp_message_qr; # Pattern that matches the message raised when
494 # that message contains the problematic code
495 # point. The message is the same (currently) both
496 # when going from/to utf8.
497 my $non_cp_trailing_text; # The suffix text when the message doesn't
498 # contain a code point. (This is a result of
499 # some sort of malformation that means we
500 # can't get an exact code poin
501
af816908
KW
502 if ($will_overflow || $allowed_uv > 0x10FFFF) {
503
504 $utf8n_flag_to_warn = $::UTF8_WARN_SUPER;
505 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SUPER;
506 $uvchr_flag_to_warn = $::UNICODE_WARN_SUPER;
507 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SUPER;;
508
509 if ($will_overflow) {
69485e19 510 $non_cp_trailing_text = "if you see this, there is an error";
601e92f1 511 $cp_message_qr = qr/\Q$non_cp_trailing_text\E/;
af816908
KW
512 }
513 elsif ($allowed_uv > 0x7FFFFFFF) {
601e92f1
KW
514 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
515 \Q and not portable\E/x;
516 $non_cp_trailing_text = "is for a non-Unicode code point, and is not portable";
af816908 517 }
601e92f1
KW
518 else {
519 $cp_message_qr = qr/\QCode point 0x$uv_string is not Unicode,\E
520 \Q may not be portable\E/x;
521 $non_cp_trailing_text = "is for a non-Unicode code point, may not"
522 . " be portable";
af816908 523 }
3022ad00
KW
524 }
525 elsif ($allowed_uv >= 0xD800 && $allowed_uv <= 0xDFFF) {
601e92f1
KW
526 $cp_message_qr = qr/UTF-16 surrogate U\+$uv_string/;
527 $non_cp_trailing_text = "is for a surrogate";
67e45424 528 $needed_to_discern_len = 2 unless defined $needed_to_discern_len;
bf422d6a 529 $skip_most_tests = 1 if $allowed_uv > 0xD800 && $allowed_uv < 0xDFFF;
af816908
KW
530
531 $utf8n_flag_to_warn = $::UTF8_WARN_SURROGATE;
532 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_SURROGATE;
533 $uvchr_flag_to_warn = $::UNICODE_WARN_SURROGATE;
534 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_SURROGATE;;
3022ad00
KW
535 }
536 elsif ( ($allowed_uv >= 0xFDD0 && $allowed_uv <= 0xFDEF)
537 || ($allowed_uv & 0xFFFE) == 0xFFFE)
538 {
601e92f1
KW
539 $cp_message_qr = qr/\QUnicode non-character U+$uv_string\E
540 \Q is not recommended for open interchange\E/x;
541 $non_cp_trailing_text = "if you see this, there is an error";
67e45424 542 $needed_to_discern_len = $length unless defined $needed_to_discern_len;
bf422d6a
KW
543 if ( ($allowed_uv > 0xFDD0 && $allowed_uv < 0xFDEF)
544 || ($allowed_uv > 0xFFFF && $allowed_uv < 0x10FFFE))
545 {
546 $skip_most_tests = 1;
547 }
af816908
KW
548
549 $utf8n_flag_to_warn = $::UTF8_WARN_NONCHAR;
550 $utf8n_flag_to_disallow = $::UTF8_DISALLOW_NONCHAR;
551 $uvchr_flag_to_warn = $::UNICODE_WARN_NONCHAR;
552 $uvchr_flag_to_disallow = $::UNICODE_DISALLOW_NONCHAR;;
3022ad00
KW
553 }
554 else {
555 die "Can't figure out what type of warning to test for $testname"
556 }
557
67e45424
KW
558 die 'Didn\'t set $needed_to_discern_len for ' . $testname
559 unless defined $needed_to_discern_len;
6aa905cf
KW
560 {
561 use warnings;
9cdc3054 562 undef @warnings_gotten;
6aa905cf
KW
563 my $ret = test_isUTF8_CHAR($bytes, $length);
564 my $ret_flags = test_isUTF8_CHAR_flags($bytes, $length, 0);
565 if ($will_overflow) {
d402d77f
KW
566 is($ret, 0, "For $testname: isUTF8_CHAR() returns 0");
567 is($ret_flags, 0, " And isUTF8_CHAR_flags() returns 0");
6aa905cf
KW
568 }
569 else {
570 is($ret, $length,
d402d77f
KW
571 "For $testname: isUTF8_CHAR() returns expected length: $length");
572 is($ret_flags, $length, " And isUTF8_CHAR_flags(...,0)"
6aa905cf
KW
573 . " returns expected length: $length");
574 }
9cdc3054 575 is(scalar @warnings_gotten, 0,
d402d77f
KW
576 " And neither isUTF8_CHAR() nor isUTF8_CHAR()_flags generated"
577 . " any warnings")
9cdc3054 578 or output_warnings(@warnings_gotten);
6aa905cf 579
9cdc3054 580 undef @warnings_gotten;
6aa905cf
KW
581 $ret = test_isSTRICT_UTF8_CHAR($bytes, $length);
582 if ($will_overflow) {
d402d77f 583 is($ret, 0, " And isSTRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
584 }
585 else {
586 my $expected_ret = ( $testname =~ /surrogate|non-character/
587 || $allowed_uv > 0x10FFFF)
588 ? 0
589 : $length;
d402d77f 590 is($ret, $expected_ret, " And isSTRICT_UTF8_CHAR() returns"
6aa905cf
KW
591 . " expected length: $expected_ret");
592 $ret = test_isUTF8_CHAR_flags($bytes, $length,
593 $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE);
594 is($ret, $expected_ret,
d402d77f
KW
595 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_INTERCHANGE')"
596 . " acts like isSTRICT_UTF8_CHAR");
6aa905cf 597 }
9cdc3054 598 is(scalar @warnings_gotten, 0,
d402d77f
KW
599 " And neither isSTRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
600 . " generated any warnings")
9cdc3054 601 or output_warnings(@warnings_gotten);
6aa905cf 602
9cdc3054 603 undef @warnings_gotten;
6aa905cf
KW
604 $ret = test_isC9_STRICT_UTF8_CHAR($bytes, $length);
605 if ($will_overflow) {
d402d77f 606 is($ret, 0, " And isC9_STRICT_UTF8_CHAR() returns 0");
6aa905cf
KW
607 }
608 else {
609 my $expected_ret = ( $testname =~ /surrogate/
610 || $allowed_uv > 0x10FFFF)
611 ? 0
612 : $length;
d402d77f 613 is($ret, $expected_ret, " And isC9_STRICT_UTF8_CHAR()"
6aa905cf
KW
614 ." returns expected length: $expected_ret");
615 $ret = test_isUTF8_CHAR_flags($bytes, $length,
616 $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
617 is($ret, $expected_ret,
d402d77f
KW
618 " And isUTF8_CHAR_flags('DISALLOW_ILLEGAL_C9_INTERCHANGE')"
619 . " acts like isC9_STRICT_UTF8_CHAR");
6aa905cf 620 }
9cdc3054 621 is(scalar @warnings_gotten, 0,
d402d77f
KW
622 " And neither isC9_STRICT_UTF8_CHAR() nor isUTF8_CHAR_flags"
623 . " generated any warnings")
9cdc3054 624 or output_warnings(@warnings_gotten);
6aa905cf
KW
625
626 # Test partial character handling, for each byte not a full character
627 for my $j (1.. $length - 1) {
628
629 # Skip the test for the interaction between overflow and above-31
630 # bit. It is really testing other things than the partial
631 # character tests, for which other tests in this file are
632 # sufficient
04f42bf6 633 last if $will_overflow;
6aa905cf 634
69485e19 635 foreach my $disallow_flag (0, $utf8n_flag_to_disallow) {
6aa905cf
KW
636 my $partial = substr($bytes, 0, $j);
637 my $ret_should_be;
638 my $comment;
639 if ($disallow_flag) {
640 $ret_should_be = 0;
641 $comment = "disallowed";
642 if ($j < $needed_to_discern_len) {
643 $ret_should_be = 1;
644 $comment .= ", but need $needed_to_discern_len bytes"
645 . " to discern:";
646 }
647 }
648 else {
649 $ret_should_be = 1;
650 $comment = "allowed";
651 }
652
9cdc3054 653 undef @warnings_gotten;
6aa905cf
KW
654
655 $ret = test_is_utf8_valid_partial_char_flags($partial, $j,
656 $disallow_flag);
657 is($ret, $ret_should_be,
d402d77f
KW
658 " And is_utf8_valid_partial_char_flags("
659 . display_bytes($partial)
660 . "), $comment: returns $ret_should_be");
9cdc3054 661 is(scalar @warnings_gotten, 0,
d402d77f 662 " And is_utf8_valid_partial_char_flags()"
6aa905cf 663 . " generated no warnings")
9cdc3054 664 or output_warnings(@warnings_gotten);
6aa905cf
KW
665 }
666 }
667 }
668
69485e19
KW
669 # This is more complicated than the malformations tested in other files in
670 # this directory, as there are several orthogonal variables involved. We
671 # test most possible combinations
672 foreach my $trial_warning_category ('surrogate', 'nonchar', 'non_unicode') {
bf422d6a 673 next if $skip_most_tests && $trial_warning_category ne $controlling_warning_category;
69485e19
KW
674 foreach my $do_disallow (0, 1) {
675 next if $skip_most_tests && ! $do_disallow;
6aa905cf
KW
676
677 # We try each of the above with various combinations of
678 # malformations that can occur on the same input sequence.
679 foreach my $short ("", "short") {
bf422d6a 680 next if $skip_most_tests && $short;
6aa905cf
KW
681 foreach my $unexpected_noncont ("",
682 "unexpected non-continuation")
683 {
bf422d6a 684 next if $skip_most_tests && $unexpected_noncont;
6aa905cf 685 foreach my $overlong ("", "overlong") {
bf422d6a 686 next if $overlong && $skip_most_tests;
6aa905cf 687
b3169593
KW
688 # If we're creating an overlong, it can't be longer than
689 # the maximum length, so skip if we're already at that
690 # length.
691 next if $overlong && $length >= $::max_bytes;
6aa905cf 692
69485e19
KW
693 # We classify the warnings into certain "interesting" types,
694 # described later
695 foreach my $warning_type (0..4) {
696 next if $skip_most_tests && $warning_type != 1;
697 foreach my $use_warn_flag (0, 1) {
698 next if $skip_most_tests && ! $use_warn_flag;
699
700 my $this_utf8n_flag_to_warn = $utf8n_flag_to_warn;
701 my $this_utf8n_flag_to_disallow = $utf8n_flag_to_disallow;
702 my $this_uvchr_flag_to_warn = $uvchr_flag_to_warn;
703 my $this_uvchr_flag_to_disallow = $uvchr_flag_to_disallow;
704
705 my $eval_warn;
706 my $expect_regular_warnings;
707 my $expect_warnings_for_malformed;
708 my $expect_warnings_for_overflow;
709
710 if ($warning_type == 0) {
711 $eval_warn = "use warnings; no warnings 'deprecated'";
712 $expect_regular_warnings = $use_warn_flag;
713 $expect_warnings_for_overflow = 1;
714 $expect_warnings_for_malformed = 1;
6aa905cf 715 }
69485e19
KW
716 elsif ($warning_type == 1) {
717 $eval_warn = "no warnings";
718 $expect_regular_warnings = 0;
719 $expect_warnings_for_overflow = 0;
720 $expect_warnings_for_malformed = 0;
721 }
722 elsif ($warning_type == 2) {
723 $eval_warn = "no warnings; use warnings 'utf8'";
724 $expect_regular_warnings = $use_warn_flag;
725 $expect_warnings_for_overflow = 1;
726 $expect_warnings_for_malformed = 1;
727 }
728 elsif ($warning_type == 3) {
729 $eval_warn = "no warnings; use warnings"
730 . " '$trial_warning_category'";
731 $expect_regular_warnings
732 = ( $trial_warning_category eq $controlling_warning_category
733 && $use_warn_flag);
734 $expect_warnings_for_overflow
735 = $trial_warning_category eq 'non_unicode';
736 $expect_warnings_for_malformed = 0;
737 }
738 elsif ($warning_type == 4) { # Like type 3, but uses the
739 # above-31-bit flags
740 # The complement flags were set up so that the
741 # above-31-bit flags have been tested that they don't
742 # trigger wrongly for too small code points. And the
743 # flags have been set up so that those small code
744 # points are tested for being above Unicode. What's
745 # left to test is that the large code points do
746 # trigger the above-31-bit flags.
747 next if ! $will_overflow && $allowed_uv < 0x80000000;
748 next if $trial_warning_category ne 'non_unicode';
749 $eval_warn = "no warnings; use warnings 'non_unicode'";
750 $expect_regular_warnings = 1;
751 $expect_warnings_for_overflow = 1;
752 $expect_warnings_for_malformed = 0;
753 $this_utf8n_flag_to_warn = $::UTF8_WARN_ABOVE_31_BIT;
754 $this_utf8n_flag_to_disallow
755 = $::UTF8_DISALLOW_ABOVE_31_BIT;
756 $this_uvchr_flag_to_warn = $::UNICODE_WARN_ABOVE_31_BIT;
757 $this_uvchr_flag_to_disallow
758 = $::UNICODE_DISALLOW_ABOVE_31_BIT;
601e92f1
KW
759 }
760 else {
69485e19 761 die "Unexpected warning type '$warning_type'";
601e92f1
KW
762 }
763
69485e19
KW
764 # We only need to test the case where all warnings are
765 # enabled (type 0) to see if turning off the warning flag
766 # causes things to not be output. If those pass, then
767 # turning on some sub-category of warnings, or turning off
768 # warnings altogether are extremely likely to not output
769 # warnings either, given how the warnings subsystem is
770 # supposed to work, and this file assumes it does work.
771 next if $warning_type != 0 && ! $use_warn_flag;
772
773 # The convention is that the 'got' flag is the same value
774 # as the disallow one. If this were violated, the tests
775 # here should start failing.
776 my $return_flag = $this_utf8n_flag_to_disallow;
777
778 my $this_warning_flags = ($use_warn_flag)
779 ? $this_utf8n_flag_to_warn
780 : 0;
781 my $this_disallow_flags = ($do_disallow)
782 ? $this_utf8n_flag_to_disallow
783 : 0;
6aa905cf
KW
784 my $this_bytes = $bytes;
785 my $this_length = $length;
786 my $expected_uv = $allowed_uv;
b3169593 787 my $this_expected_len = $length;
6aa905cf 788 my $this_needed_to_discern_len = $needed_to_discern_len;
6aa905cf 789
69485e19
KW
790 my @malformation_names;
791 my @expected_warnings;
792 my @expected_return_flags;
6aa905cf 793
69485e19
KW
794 # Now go through the possible malformations wanted, and
795 # change the input accordingly. We also can set up
796 # certain other things now, like whether we expect a
797 # return flag from this malformation and which flag.
798 if ($overlong) {
6aa905cf
KW
799 # For an overlong, we convert the original
800 # start byte into a continuation byte with
801 # the same data bits as originally. ...
802 substr($this_bytes, 0, 1)
803 = start_byte_to_cont(substr($this_bytes,
804 0, 1));
805
806 # ... Then we prepend it with a known
807 # overlong sequence. This should evaluate
808 # to the exact same code point as the
809 # original.
810 $this_bytes
811 = I8_to_native("\xff")
dbb8d798 812 . (I8_to_native(chr $::lowest_continuation)
6aa905cf
KW
813 x ( $::max_bytes - 1 - length($this_bytes)))
814 . $this_bytes;
815 $this_length = length($this_bytes);
816 $this_needed_to_discern_len
817 = $::max_bytes - ($this_expected_len
818 - $this_needed_to_discern_len);
819 $this_expected_len = $::max_bytes;
9cdc3054 820 push @expected_return_flags, $::UTF8_GOT_LONG;
69485e19
KW
821 push @malformation_names, $overlong;
822 if ($expect_warnings_for_malformed) {
823 if ( ! $short
824 && ! $unexpected_noncont
825 && ! $will_overflow)
826 {
827 my $overlong_bytes
828 = display_bytes_no_quotes($this_bytes);
829 my $correct_bytes
830 = display_bytes_no_quotes($bytes);
831 push @expected_warnings,
832 qr/\QMalformed UTF-8 character:\E
833 \Q $overlong_bytes (overlong;\E
834 \Q instead use $correct_bytes to\E
835 \Q represent U+$uv_string)/x;
836 }
837 else {
838 push @expected_warnings, qr/overlong/;
839 }
6aa905cf 840 }
69485e19 841 }
6aa905cf 842
69485e19
KW
843 if ($short) {
844 push @malformation_names, $short;
845 push @expected_warnings, qr/short/
846 if $expect_warnings_for_malformed;
847
848 # To force this malformation, just tell the test to
849 # not look as far as it should into the input.
850 $this_length--;
851 $this_expected_len--;
852 push @expected_return_flags, $::UTF8_GOT_SHORT;
853 }
854
855 if ($unexpected_noncont) {
856 push @malformation_names, $unexpected_noncont;
857 push @expected_warnings, qr/$unexpected_noncont/
858 if $expect_warnings_for_malformed;
859
860 # To force this malformation, change the final
861 # continuation byte into a non continuation.
862 my $pos = ($short) ? -2 : -1;
863 substr($this_bytes, $pos, 1) = '?';
864 $this_expected_len--;
865 push @expected_return_flags,
866 $::UTF8_GOT_NON_CONTINUATION;
867 }
868
869 # The overflow malformation is done differently than other
870 # malformations. It comes from manually typed tests in
871 # the test array, but it also is above Unicode and uses
872 # Perl extended UTF-8, so affects some of the flags being
873 # tested. We now make it be treated like one of the other
874 # generated malformations.
875 if ($will_overflow) {
876
877 # An overflow is (way) above Unicode, and overrides
878 # everything else.
879 $expect_regular_warnings = 0;
880
881 push @malformation_names, 'overflow';
882 if ($expect_warnings_for_overflow) {
883 my $qr = display_bytes_no_quotes(
884 substr($this_bytes, 0, $this_expected_len));
885 $qr = qr/\QMalformed UTF-8 character: \E
886 \Q$qr (overflows)\E/x;
887 push @expected_warnings, $qr;
6aa905cf 888 }
69485e19 889 push @expected_return_flags, $::UTF8_GOT_OVERFLOW;
6aa905cf
KW
890 }
891
69485e19
KW
892 # Here, we've set things up based on the malformations.
893 # Now generate the text for them for the test name.
894 my $malformations_name = "";
895 if (@malformation_names) {
896 $malformations_name .= "malformation";
897 $malformations_name .= "s" if @malformation_names > 1;
898 $malformations_name .= ": ";
899 $malformations_name .= join "/", @malformation_names;
900 $malformations_name = " ($malformations_name)";
901 }
902
903 # It may be that the malformations have shortened the
904 # amount of input we look at so much that we can't tell
905 # what the category the code point was in. Otherwise, set
906 # up the expected return flags based on the warnings and
907 # disallowments.
908 if ($this_expected_len < $this_needed_to_discern_len) {
909 $expect_regular_warnings = 0;
910 }
911 elsif ( ($this_warning_flags & $this_utf8n_flag_to_warn)
912 || ( $this_disallow_flags
913 & $this_utf8n_flag_to_disallow))
914 {
915 push @expected_return_flags, $return_flag;
916 }
917
918 # Finish setting up the expected warning.
919 if ($expect_regular_warnings) {
920
921 # So far the array contains warnings generated by
922 # malformations. Add the expected regular one.
923 unshift @expected_warnings, $cp_message_qr;
924
925 # But it may need to be modified, because either of
926 # these malformations means we can't determine the
927 # expected code point.
928 if ($short || $unexpected_noncont) {
929 my $first_byte = substr($this_bytes, 0, 1);
930 $expected_warnings[0] = display_bytes(
931 substr($this_bytes, 0, $this_expected_len));
932 $expected_warnings[0]
933 = qr/[Aa]\Qny UTF-8 sequence that starts with\E
934 \Q $expected_warnings[0]\E
935 \Q $non_cp_trailing_text\E/x;
936 }
937 }
6aa905cf
KW
938
939 # Is effectively disallowed if we've set up a
940 # malformation, even if the flag indicates it is
941 # allowed. Fix up test name to indicate this as
942 # well
69485e19
KW
943 my $disallowed = ( $this_disallow_flags
944 & $this_utf8n_flag_to_disallow)
945 || $malformations_name;
6aa905cf 946 my $this_name = "utf8n_to_uvchr_error() $testname: "
69485e19
KW
947 . (($disallowed)
948 ? 'disallowed'
949 : 'allowed');
6aa905cf 950 $this_name .= ", $eval_warn";
69485e19
KW
951 $this_name .= ", " . (( $this_warning_flags
952 & $this_utf8n_flag_to_warn)
953 ? 'with flag for raising warnings'
954 : 'no flag for raising warnings');
955 $this_name .= $malformations_name;
db0f09e6 956
9cdc3054 957 undef @warnings_gotten;
6aa905cf 958 my $ret_ref;
69485e19 959 my $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
960 my $eval_text = "$eval_warn; \$ret_ref"
961 . " = test_utf8n_to_uvchr_error("
69485e19 962 . "'$this_bytes', $this_length, $this_flags)";
6aa905cf 963 eval "$eval_text";
69485e19 964 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
6aa905cf 965 {
a8ee5133
KW
966 diag "\$@='$@'; call was: "
967 . utf8n_display_call($eval_text);
6aa905cf
KW
968 next;
969 }
970 if ($disallowed) {
d402d77f 971 is($ret_ref->[0], 0, " And returns 0")
a8ee5133 972 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
973 }
974 else {
975 is($ret_ref->[0], $expected_uv,
d402d77f
KW
976 " And returns expected uv: "
977 . $uv_string)
a8ee5133 978 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf
KW
979 }
980 is($ret_ref->[1], $this_expected_len,
d402d77f 981 " And returns expected length:"
6aa905cf 982 . " $this_expected_len")
a8ee5133 983 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 984
9cdc3054 985 my $returned_flags = $ret_ref->[2];
6aa905cf 986
9cdc3054 987 for (my $i = @expected_return_flags - 1; $i >= 0; $i--) {
69485e19
KW
988 if ($expected_return_flags[$i] & $returned_flags) {
989 if ($expected_return_flags[$i]
990 == $::UTF8_DISALLOW_ABOVE_31_BIT)
991 {
992 pass(" Expected and got return flag for"
993 . " above_31_bit");
994 }
995 # The first entries in this are
996 # malformations
997 elsif ($i > @malformation_names - 1) {
998 pass(" Expected and got return flag"
999 . " for " . $trial_warning_category);
1000 }
1001 else {
1002 pass(" Expected and got return flag for "
1003 . $malformation_names[$i]
1004 . " malformation");
1005 }
9cdc3054 1006 $returned_flags &= ~$expected_return_flags[$i];
69485e19 1007 splice @expected_return_flags, $i, 1;
6aa905cf 1008 }
6aa905cf 1009 }
6aa905cf 1010
69485e19
KW
1011 is($returned_flags, 0,
1012 " Got no unexpected return flags")
1013 or diag "The unexpected flags gotten were: "
5722c46d
KW
1014 . (flags_to_text($returned_flags,
1015 \@utf8n_flags_to_text)
69485e19
KW
1016 # We strip off any prefixes from the flag
1017 # names
1018 =~ s/ \b [A-Z] _ //xgr);
1019 is (scalar @expected_return_flags, 0,
1020 " Got all expected return flags")
1021 or diag "The expected flags not gotten were: "
1022 . (flags_to_text(eval join("|",
1023 @expected_return_flags),
1024 \@utf8n_flags_to_text)
1025 # We strip off any prefixes from the flag
1026 # names
5722c46d 1027 =~ s/ \b [A-Z] _ //xgr);
9cdc3054 1028
69485e19
KW
1029 do_warnings_test(@expected_warnings)
1030 or diag "Call was: " . utf8n_display_call($eval_text);
1031 undef @warnings_gotten;
6aa905cf
KW
1032
1033 # Check CHECK_ONLY results when the input is
1034 # disallowed. Do this when actually disallowed,
69485e19 1035 # not just when the $this_disallow_flags is set
6aa905cf 1036 if ($disallowed) {
69485e19
KW
1037 my $this_flags = $this_disallow_flags|$::UTF8_CHECK_ONLY;
1038 my $eval_text = "use warnings; no warnings 'deprecated'; \$ret_ref ="
1039 . " test_utf8n_to_uvchr_error('"
1040 . "$this_bytes', $this_length,"
1041 . " $this_flags)";
1042 eval $eval_text;
a8ee5133
KW
1043 if (! ok ("$@ eq ''",
1044 " And eval succeeded with CHECK_ONLY"))
1045 {
1046 diag "\$@='$@'; Call was: "
1047 . utf8n_display_call($eval_text);
1048 next;
1049 }
d402d77f 1050 is($ret_ref->[0], 0, " CHECK_ONLY: Returns 0")
a8ee5133 1051 or diag "Call was: " . utf8n_display_call($eval_text);
6aa905cf 1052 is($ret_ref->[1], -1,
d402d77f 1053 " CHECK_ONLY: returns -1 for length")
a8ee5133 1054 or diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1055 if (! is(scalar @warnings_gotten, 0,
d402d77f 1056 " CHECK_ONLY: no warnings generated"))
6aa905cf 1057 {
a8ee5133 1058 diag "Call was: " . utf8n_display_call($eval_text);
9cdc3054 1059 output_warnings(@warnings_gotten);
6aa905cf
KW
1060 }
1061 }
1062
1063 # Now repeat some of the above, but for
1064 # uvchr_to_utf8_flags(). Since this comes from an
1065 # existing code point, it hasn't overflowed, and
1066 # isn't malformed.
69485e19
KW
1067 next if @malformation_names;
1068
1069 $this_warning_flags = ($use_warn_flag)
1070 ? $this_uvchr_flag_to_warn
1071 : 0;
1072 $this_disallow_flags = ($do_disallow)
1073 ? $this_uvchr_flag_to_disallow
1074 : 0;
1075
1076 $disallowed = $this_disallow_flags
1077 & $this_uvchr_flag_to_disallow;
1078 $this_name .= ", " . (( $this_warning_flags
1079 & $this_utf8n_flag_to_warn)
1080 ? 'with flag for raising warnings'
1081 : 'no flag for raising warnings');
6aa905cf
KW
1082
1083 $this_name = "uvchr_to_utf8_flags() $testname: "
69485e19 1084 . (($disallowed)
6aa905cf 1085 ? 'disallowed'
6aa905cf
KW
1086 : 'allowed');
1087 $this_name .= ", $eval_warn";
69485e19
KW
1088 $this_name .= ", " . (( $this_warning_flags
1089 & $this_uvchr_flag_to_warn)
6aa905cf
KW
1090 ? 'with warning flag'
1091 : 'no warning flag');
1092
9cdc3054 1093 undef @warnings_gotten;
6aa905cf 1094 my $ret;
69485e19 1095 $this_flags = $this_warning_flags|$this_disallow_flags;
6aa905cf
KW
1096 $eval_text = "$eval_warn; \$ret ="
1097 . " test_uvchr_to_utf8_flags("
d884ea32 1098 . "$allowed_uv, $this_flags)";
6aa905cf
KW
1099 eval "$eval_text";
1100 if (! ok ("$@ eq ''", "$this_name: eval succeeded"))
1101 {
d884ea32
KW
1102 diag "\$@='$@'; call was: "
1103 . uvchr_display_call($eval_text);
6aa905cf
KW
1104 next;
1105 }
1106 if ($disallowed) {
d402d77f 1107 is($ret, undef, " And returns undef")
d884ea32 1108 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1109 }
1110 else {
d402d77f 1111 is($ret, $this_bytes, " And returns expected string")
d884ea32 1112 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf 1113 }
69485e19
KW
1114
1115 do_warnings_test(@expected_warnings)
1116 or diag "Call was: " . uvchr_display_call($eval_text);
6aa905cf
KW
1117 }
1118 }
1119 }
1120 }
1121 }
1122 }
1123 }
1124}
1125
1126done_testing;