This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest:t/utf8_setup.pl: Display printables as themselves
[perl5.git] / ext / XS-APItest / t / utf8_setup.pl
1 # Common subroutines and constants, called by .t files in this directory that
2 # deal with UTF-8
3
4 # The  test files can't use byte_utf8a_to_utf8n() from t/charset_tools.pl
5 # because that uses the same functions we are testing here.  So UTF-EBCDIC
6 # strings are hard-coded as I8 strings in this file instead, and we use the
7 # translation functions to/from I8 from that file instead.
8
9 sub isASCII { ord "A" == 65 }
10
11 sub display_bytes_no_quotes {
12     use bytes;
13     my $string = shift;
14     return join("", map {
15                           ($_ =~ /[[:print:]]/)
16                           ? $_
17                           : sprintf("\\x%02x", ord $_)
18                         } split "", $string)
19 }
20
21 sub display_bytes {
22     return   '"' . display_bytes_no_quotes(shift) . '"';
23 }
24
25 sub output_warnings(@) {
26     my @list = @_;
27     if (@list) {
28         diag "The warnings were:\n" . join "\n", map { chomp; $_ } @list;
29     }
30     else {
31         diag "No warnings were raised";
32     }
33 }
34
35 sub start_byte_to_cont($) {
36
37     # Extract the code point information from the input UTF-8 start byte, and
38     # return a continuation byte containing the same information.  This is
39     # used in constructing an overlong malformation from valid input.
40
41     my $byte = shift;
42     my $len = test_UTF8_SKIP($byte);
43     if ($len < 2) {
44         die "start_byte_to_cont() is expecting a UTF-8 variant";
45     }
46
47     $byte = ord native_to_I8($byte);
48
49     # Copied from utf8.h.  This gets rid of the leading 1 bits.
50     $byte &= ((($len) >= 7) ? 0x00 : (0x1F >> (($len)-2)));
51
52     $byte |= (isASCII) ? 0x80 : 0xA0;
53     return I8_to_native(chr $byte);
54 }
55
56 $::is64bit = length sprintf("%x", ~0) > 8;
57
58 $::lowest_continuation = (isASCII) ? 0x80 : 0xA0;
59
60 $::I8c = (isASCII) ? "\x80" : "\xa0";    # A continuation byte
61
62
63 $::max_bytes = (isASCII) ? 13 : 14; # Max number of bytes in a UTF-8 sequence
64                                     # representing a single code point
65
66 # Copied from utf8.h
67 $::UTF8_ALLOW_EMPTY            = 0x0001;
68 $::UTF8_GOT_EMPTY              = $UTF8_ALLOW_EMPTY;
69 $::UTF8_ALLOW_CONTINUATION     = 0x0002;
70 $::UTF8_GOT_CONTINUATION       = $UTF8_ALLOW_CONTINUATION;
71 $::UTF8_ALLOW_NON_CONTINUATION = 0x0004;
72 $::UTF8_GOT_NON_CONTINUATION   = $UTF8_ALLOW_NON_CONTINUATION;
73 $::UTF8_ALLOW_SHORT            = 0x0008;
74 $::UTF8_GOT_SHORT              = $UTF8_ALLOW_SHORT;
75 $::UTF8_ALLOW_LONG             = 0x0010;
76 $::UTF8_ALLOW_LONG_AND_ITS_VALUE = $UTF8_ALLOW_LONG|0x0020;
77 $::UTF8_GOT_LONG               = $UTF8_ALLOW_LONG;
78 $::UTF8_ALLOW_OVERFLOW         = 0x0080;
79 $::UTF8_GOT_OVERFLOW           = $UTF8_ALLOW_OVERFLOW;
80 $::UTF8_DISALLOW_SURROGATE     = 0x0100;
81 $::UTF8_GOT_SURROGATE          = $UTF8_DISALLOW_SURROGATE;
82 $::UTF8_WARN_SURROGATE         = 0x0200;
83 $::UTF8_DISALLOW_NONCHAR       = 0x0400;
84 $::UTF8_GOT_NONCHAR            = $UTF8_DISALLOW_NONCHAR;
85 $::UTF8_WARN_NONCHAR           = 0x0800;
86 $::UTF8_DISALLOW_SUPER         = 0x1000;
87 $::UTF8_GOT_SUPER              = $UTF8_DISALLOW_SUPER;
88 $::UTF8_WARN_SUPER             = 0x2000;
89 $::UTF8_DISALLOW_PERL_EXTENDED  = 0x4000;
90 $::UTF8_GOT_PERL_EXTENDED       = $UTF8_DISALLOW_PERL_EXTENDED;
91 $::UTF8_WARN_PERL_EXTENDED      = 0x8000;
92 $::UTF8_CHECK_ONLY             = 0x10000;
93 $::UTF8_NO_CONFIDENCE_IN_CURLEN_ = 0x20000;
94
95 $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE
96                              = $UTF8_DISALLOW_SUPER|$UTF8_DISALLOW_SURROGATE;
97 $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE
98               = $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE|$UTF8_DISALLOW_NONCHAR;
99 $::UTF8_WARN_ILLEGAL_C9_INTERCHANGE
100                              = $UTF8_WARN_SUPER|$UTF8_WARN_SURROGATE;
101 $::UTF8_WARN_ILLEGAL_INTERCHANGE
102               = $UTF8_WARN_ILLEGAL_C9_INTERCHANGE|$UTF8_WARN_NONCHAR;
103
104 # Test uvchr_to_utf8().
105 $::UNICODE_WARN_SURROGATE        = 0x0001;
106 $::UNICODE_WARN_NONCHAR          = 0x0002;
107 $::UNICODE_WARN_SUPER            = 0x0004;
108 $::UNICODE_WARN_PERL_EXTENDED     = 0x0008;
109 $::UNICODE_DISALLOW_SURROGATE    = 0x0010;
110 $::UNICODE_DISALLOW_NONCHAR      = 0x0020;
111 $::UNICODE_DISALLOW_SUPER        = 0x0040;
112 $::UNICODE_DISALLOW_PERL_EXTENDED = 0x0080;