This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
APItest/t: Change some variable names
[perl5.git] / ext / XS-APItest / t / utf8_setup.pl
1 # Common subroutines and constants, called by .t files in this directory that
2 # deal with UTF-8
3
4 # The  test files can't use byte_utf8a_to_utf8n() from t/charset_tools.pl
5 # because that uses the same functions we are testing here.  So UTF-EBCDIC
6 # strings are hard-coded as I8 strings in this file instead, and we use the
7 # translation functions to/from I8 from that file instead.
8
9 sub isASCII { ord "A" == 65 }
10
11 sub display_bytes_no_quotes {
12     use bytes;
13     my $string = shift;
14     return join("", map { sprintf("\\x%02x", ord $_) } split "", $string)
15 }
16
17 sub display_bytes {
18     return   '"' . display_bytes_no_quotes(shift) . '"';
19 }
20
21 sub output_warnings(@) {
22     my @list = @_;
23     if (@list) {
24         diag "The warnings were:\n" . join "\n", map { chomp; $_ } @list;
25     }
26     else {
27         diag "No warnings were raised";
28     }
29 }
30
31 sub start_byte_to_cont($) {
32
33     # Extract the code point information from the input UTF-8 start byte, and
34     # return a continuation byte containing the same information.  This is
35     # used in constructing an overlong malformation from valid input.
36
37     my $byte = shift;
38     my $len = test_UTF8_SKIP($byte);
39     if ($len < 2) {
40         die "start_byte_to_cont() is expecting a UTF-8 variant";
41     }
42
43     $byte = ord native_to_I8($byte);
44
45     # Copied from utf8.h.  This gets rid of the leading 1 bits.
46     $byte &= ((($len) >= 7) ? 0x00 : (0x1F >> (($len)-2)));
47
48     $byte |= (isASCII) ? 0x80 : 0xA0;
49     return I8_to_native(chr $byte);
50 }
51
52 $::is64bit = length sprintf("%x", ~0) > 8;
53
54 $::lowest_continuation = (isASCII) ? 0x80 : 0xA0;
55
56 $::I8c = (isASCII) ? "\x80" : "\xa0";    # A continuation byte
57
58
59 $::max_bytes = (isASCII) ? 13 : 14; # Max number of bytes in a UTF-8 sequence
60                                     # representing a single code point
61
62 # Copied from utf8.h
63 $::UTF8_ALLOW_EMPTY            = 0x0001;
64 $::UTF8_GOT_EMPTY              = $UTF8_ALLOW_EMPTY;
65 $::UTF8_ALLOW_CONTINUATION     = 0x0002;
66 $::UTF8_GOT_CONTINUATION       = $UTF8_ALLOW_CONTINUATION;
67 $::UTF8_ALLOW_NON_CONTINUATION = 0x0004;
68 $::UTF8_GOT_NON_CONTINUATION   = $UTF8_ALLOW_NON_CONTINUATION;
69 $::UTF8_ALLOW_SHORT            = 0x0008;
70 $::UTF8_GOT_SHORT              = $UTF8_ALLOW_SHORT;
71 $::UTF8_ALLOW_LONG             = 0x0010;
72 $::UTF8_ALLOW_LONG_AND_ITS_VALUE = $UTF8_ALLOW_LONG|0x0020;
73 $::UTF8_GOT_LONG               = $UTF8_ALLOW_LONG;
74 $::UTF8_ALLOW_OVERFLOW         = 0x0080;
75 $::UTF8_GOT_OVERFLOW           = $UTF8_ALLOW_OVERFLOW;
76 $::UTF8_DISALLOW_SURROGATE     = 0x0100;
77 $::UTF8_GOT_SURROGATE          = $UTF8_DISALLOW_SURROGATE;
78 $::UTF8_WARN_SURROGATE         = 0x0200;
79 $::UTF8_DISALLOW_NONCHAR       = 0x0400;
80 $::UTF8_GOT_NONCHAR            = $UTF8_DISALLOW_NONCHAR;
81 $::UTF8_WARN_NONCHAR           = 0x0800;
82 $::UTF8_DISALLOW_SUPER         = 0x1000;
83 $::UTF8_GOT_SUPER              = $UTF8_DISALLOW_SUPER;
84 $::UTF8_WARN_SUPER             = 0x2000;
85 $::UTF8_DISALLOW_ABOVE_31_BIT  = 0x4000;
86 $::UTF8_GOT_ABOVE_31_BIT       = $UTF8_DISALLOW_ABOVE_31_BIT;
87 $::UTF8_WARN_ABOVE_31_BIT      = 0x8000;
88 $::UTF8_CHECK_ONLY             = 0x10000;
89 $::UTF8_NO_CONFIDENCE_IN_CURLEN_ = 0x20000;
90
91 $::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE
92                              = $UTF8_DISALLOW_SUPER|$UTF8_DISALLOW_SURROGATE;
93 $::UTF8_DISALLOW_ILLEGAL_INTERCHANGE
94               = $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE|$UTF8_DISALLOW_NONCHAR;
95 $::UTF8_WARN_ILLEGAL_C9_INTERCHANGE
96                              = $UTF8_WARN_SUPER|$UTF8_WARN_SURROGATE;
97 $::UTF8_WARN_ILLEGAL_INTERCHANGE
98               = $UTF8_WARN_ILLEGAL_C9_INTERCHANGE|$UTF8_WARN_NONCHAR;
99
100 # Test uvchr_to_utf8().
101 $::UNICODE_WARN_SURROGATE        = 0x0001;
102 $::UNICODE_WARN_NONCHAR          = 0x0002;
103 $::UNICODE_WARN_SUPER            = 0x0004;
104 $::UNICODE_WARN_ABOVE_31_BIT     = 0x0008;
105 $::UNICODE_DISALLOW_SURROGATE    = 0x0010;
106 $::UNICODE_DISALLOW_NONCHAR      = 0x0020;
107 $::UNICODE_DISALLOW_SUPER        = 0x0040;
108 $::UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080;