This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Split XS-APItest/t/utf8.t
[perl5.git] / ext / XS-APItest / t / utf8_setup.pl
CommitLineData
6aa905cf
KW
1# Common subroutines and constants, called by .t files in this directory that
2# deal with UTF-8
3
4# The test files can't use byte_utf8a_to_utf8n() from t/charset_tools.pl
5# because that uses the same functions we are testing here. So UTF-EBCDIC
6# strings are hard-coded as I8 strings in this file instead, and we use the
7# translation functions to/from I8 from that file instead.
8
9sub isASCII { ord "A" == 65 }
10
11sub display_bytes {
12 use bytes;
13 my $string = shift;
14 return '"'
15 . join("", map { sprintf("\\x%02x", ord $_) } split "", $string)
16 . '"';
17}
18
19sub output_warnings(@) {
20 diag "The warnings were:\n" . join("", @_);
21}
22
23sub start_byte_to_cont($) {
24
25 # Extract the code point information from the input UTF-8 start byte, and
26 # return a continuation byte containing the same information. This is
27 # used in constructing an overlong malformation from valid input.
28
29 my $byte = shift;
30 my $len = test_UTF8_SKIP($byte);
31 if ($len < 2) {
32 die "start_byte_to_cont() is expecting a UTF-8 variant";
33 }
34
35 $byte = ord native_to_I8($byte);
36
37 # Copied from utf8.h. This gets rid of the leading 1 bits.
38 $byte &= ((($len) >= 7) ? 0x00 : (0x1F >> (($len)-2)));
39
40 $byte |= (isASCII) ? 0x80 : 0xA0;
41 return I8_to_native(chr $byte);
42}
43
44$::is64bit = length sprintf("%x", ~0) > 8;
45
46$::first_continuation = (isASCII) ? 0x80 : 0xA0;
47
48$::I8c = (isASCII) ? "\x80" : "\xa0"; # A continuation byte
49
50
51$::max_bytes = (isASCII) ? 13 : 14; # Max number of bytes in a UTF-8 sequence
52 # representing a single code point
53
54# Copied from utf8.h
55$::UTF8_ALLOW_EMPTY = 0x0001;
56$::UTF8_GOT_EMPTY = $UTF8_ALLOW_EMPTY;
57$::UTF8_ALLOW_CONTINUATION = 0x0002;
58$::UTF8_GOT_CONTINUATION = $UTF8_ALLOW_CONTINUATION;
59$::UTF8_ALLOW_NON_CONTINUATION = 0x0004;
60$::UTF8_GOT_NON_CONTINUATION = $UTF8_ALLOW_NON_CONTINUATION;
61$::UTF8_ALLOW_SHORT = 0x0008;
62$::UTF8_GOT_SHORT = $UTF8_ALLOW_SHORT;
63$::UTF8_ALLOW_LONG = 0x0010;
64$::UTF8_ALLOW_LONG_AND_ITS_VALUE = $UTF8_ALLOW_LONG|0x0020;
65$::UTF8_GOT_LONG = $UTF8_ALLOW_LONG;
66$::UTF8_ALLOW_OVERFLOW = 0x0080;
67$::UTF8_GOT_OVERFLOW = $UTF8_ALLOW_OVERFLOW;
68$::UTF8_DISALLOW_SURROGATE = 0x0100;
69$::UTF8_GOT_SURROGATE = $UTF8_DISALLOW_SURROGATE;
70$::UTF8_WARN_SURROGATE = 0x0200;
71$::UTF8_DISALLOW_NONCHAR = 0x0400;
72$::UTF8_GOT_NONCHAR = $UTF8_DISALLOW_NONCHAR;
73$::UTF8_WARN_NONCHAR = 0x0800;
74$::UTF8_DISALLOW_SUPER = 0x1000;
75$::UTF8_GOT_SUPER = $UTF8_DISALLOW_SUPER;
76$::UTF8_WARN_SUPER = 0x2000;
77$::UTF8_DISALLOW_ABOVE_31_BIT = 0x4000;
78$::UTF8_GOT_ABOVE_31_BIT = $UTF8_DISALLOW_ABOVE_31_BIT;
79$::UTF8_WARN_ABOVE_31_BIT = 0x8000;
80$::UTF8_CHECK_ONLY = 0x10000;
81$::UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE
82 = $UTF8_DISALLOW_SUPER|$UTF8_DISALLOW_SURROGATE;
83$::UTF8_DISALLOW_ILLEGAL_INTERCHANGE
84 = $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE|$UTF8_DISALLOW_NONCHAR;
85$::UTF8_WARN_ILLEGAL_C9_INTERCHANGE
86 = $UTF8_WARN_SUPER|$UTF8_WARN_SURROGATE;
87$::UTF8_WARN_ILLEGAL_INTERCHANGE
88 = $UTF8_WARN_ILLEGAL_C9_INTERCHANGE|$UTF8_WARN_NONCHAR;
89
90# Test uvchr_to_utf8().
91$::UNICODE_WARN_SURROGATE = 0x0001;
92$::UNICODE_WARN_NONCHAR = 0x0002;
93$::UNICODE_WARN_SUPER = 0x0004;
94$::UNICODE_WARN_ABOVE_31_BIT = 0x0008;
95$::UNICODE_DISALLOW_SURROGATE = 0x0010;
96$::UNICODE_DISALLOW_NONCHAR = 0x0020;
97$::UNICODE_DISALLOW_SUPER = 0x0040;
98$::UNICODE_DISALLOW_ABOVE_31_BIT = 0x0080;