This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utfebcdic.h: Comment changes only
[perl5.git] / utfebcdic.h
CommitLineData
1d72bdf6
NIS
1/* utfebcdic.h
2 *
2eee27d7
SS
3 * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009,
4 * 2010, 2011 by Larry Wall, Nick Ing-Simmons, and others
1d72bdf6
NIS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 * Macros to implement UTF-EBCDIC as perl's internal encoding
97237291 10 * Adapted from version 7.1 of Unicode Technical Report #16:
1d72bdf6 11 * http://www.unicode.org/unicode/reports/tr16
fe749c9a
KW
12 *
13 * To summarize, the way it works is:
14 * To convert an EBCDIC character to UTF-EBCDIC:
d06134e5 15 * 1) convert to Unicode. The table in this file that does this for
97237291
KW
16 * EBCDIC bytes is PL_e2a (with inverse PL_a2e). The 'a' stands for
17 * ASCII platform, meaning latin1.
18 * 2) convert that to a utf8-like string called I8 ('I' stands for
d06134e5
KW
19 * intermediate) with variant characters occupying multiple bytes. This
20 * step is similar to the utf8-creating step from Unicode, but the details
21 * are different. This transformation is called UTF8-Mod. There is a
22 * chart about the bit patterns in a comment later in this file. But
fe749c9a
KW
23 * essentially here are the differences:
24 * UTF8 I8
25 * invariant byte starts with 0 starts with 0 or 100
26 * continuation byte starts with 10 starts with 101
27 * start byte same in both: if the code point requires N bytes,
28 * then the leading N bits are 1, followed by a 0. (No
29 * trailing 0 for the very largest possible allocation
30 * in I8, far beyond the current Unicode standard's
31 * max, as shown in the comment later in this file.)
97237291
KW
32 * 3) Use the algorithm in tr16 to convert each byte from step 2 into
33 * final UTF-EBCDIC. This is done by table lookup from a table
34 * constructed from the algorithm, reproduced in this file as
35 * PL_utf2e, with its inverse being PL_e2utf. They are constructed so that
36 * all EBCDIC invariants remain invariant, but no others do, and the first
37 * byte of a variant will always have its upper bit set. But note that
38 * the upper bit of some invariants is also 1.
39 *
40 * For example, the ordinal value of 'A' is 193 in EBCDIC, and also is 193 in
41 * UTF-EBCDIC. Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3
42 * converts it back to 193. As an example of how a variant character works,
43 * take LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in
44 * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2 converts
45 * that to two bytes = 11000111 10111111 = C7 BF, and Step 3 converts those to
46 * 0x8B 0x73.
45f80db9 47 *
fe749c9a
KW
48 * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight
49 * EBCDIC, reverse the steps.
50 *
51 * The EBCDIC invariants have been chosen to be those characters whose Unicode
52 * equivalents have ordinal numbers less than 160, that is the same characters
53 * that are expressible in ASCII, plus the C1 controls. So there are 160
54 * invariants instead of the 128 in UTF-8. (My guess is that this is because
45f80db9 55 * the C1 control NEL (and maybe others) is important in IBM.)
fe749c9a
KW
56 *
57 * The purpose of Step 3 is to make the encoding be invariant for the chosen
58 * characters. This messes up the convenient patterns found in step 2, so
59 * generally, one has to undo step 3 into a temporary to use them. However,
97237291
KW
60 * one "shadow", or parallel table, PL_utf8skip, has been constructed that
61 * doesn't require undoing things. It is such that for each byte, it says
62 * how long the sequence is if that (UTF-EBCDIC) byte were to begin it
63 *
64 * There are actually 3 slightly different UTF-EBCDIC encodings in
65 * this file, one for each of the code pages recognized by Perl. That
66 * means that there are actually three different sets of tables, one for each
67 * code page. (If Perl is compiled on platforms using another EBCDIC code
68 * page, it may not compile, or Perl may silently mistake it for one of the
69 * three.)
fe749c9a 70 *
97237291
KW
71 * Note that tr16 actually only specifies one version of UTF-EBCDIC, based on
72 * the 1047 encoding, and which is supposed to be used for all code pages.
73 * But this doesn't work. To illustrate the problem, consider the '^' character.
74 * On a 037 code page it is the single byte 176, whereas under 1047 UTF-EBCDIC
75 * it is the single byte 95. If Perl implemented tr16 exactly, it would mean
76 * that changing a string containing '^' to UTF-EBCDIC would change that '^'
77 * from 176 to 95 (and vice-versa), violating the rule that ASCII-range
78 * characters are the same in UTF-8 or not. Much code in Perl assumes this
79 * rule. See for example
80 * http://grokbase.com/t/perl/mvs/025xf0yhmn/utf-ebcdic-for-posix-bc-malformed-utf-8-character
81 * What Perl does is create a version of UTF-EBCDIC suited to each code page;
82 * the one for the 1047 code page is identical to what's specified in tr16.
83 * This complicates interchanging files between computers using different code
84 * pages. Best is to convert to I8 before sending them, as the I8
85 * representation is the same no matter what the underlying code page is.
fe749c9a
KW
86 *
87 * EBCDIC characters above 0xFF are the same as Unicode in Perl's
88 * implementation of all 3 encodings, so for those Step 1 is trivial.
89 *
90 * (Note that the entries for invariant characters are necessarily the same in
97237291 91 * PL_e2a and PL_e2utf; likewise for their inverses.)
fe749c9a
KW
92 *
93 * UTF-EBCDIC strings are the same length or longer than UTF-8 representations
94 * of the same string. The maximum code point representable as 2 bytes in
95 * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8.
1d72bdf6
NIS
96 */
97
98START_EXTERN_C
99
100#ifdef DOINIT
101/* Indexed by encoded byte this table gives the length of the sequence.
102 Adapted from the shadow flags table in tr16.
9df205e2 103 The entries marked 9 in tr16 are continuation bytes and are marked
c4d5f83a 104 as length 1 here so that we can recover.
1d72bdf6 105*/
f5e1abaf 106#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
f466f02a 107EXTCONST U8 PL_utf8skip[] = {
1d72bdf6
NIS
1081,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1091,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1101,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1111,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
c4d5f83a
NIS
1121,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1131,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1141,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1151,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1d72bdf6
NIS
1162,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1172,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1182,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2,
1192,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3,
1201,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1211,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1221,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1231,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
124};
f5e1abaf
JH
125#endif
126
127#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
f466f02a 128U8 PL_utf8skip[] = {
f5e1abaf
JH
1291,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1301,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1311,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1321,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1331,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1341,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1351,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1361,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1,
1372,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1382,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1392,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1401,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3,
1414,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1421,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4,
1437,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1441,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1
145};
146#endif
147
148#if '^' == 176 /* if defined(??) (OS/400?) 037 */
f466f02a 149U8 PL_utf8skip[] = {
f5e1abaf
JH
1501,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1511,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1521,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1531,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1541,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1551,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,
1561,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1571,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1582,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1592,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1602,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1611,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3,
1621,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1631,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1641,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1651,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
166};
167#endif
1d72bdf6 168
fe749c9a
KW
169/* Transform tables from tr16 applied after encoding to render encoding EBCDIC
170 * like, meaning that all the invariants are actually invariant, eg, that 'A'
171 * remains 'A' */
1d72bdf6 172
f5e1abaf 173#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
f466f02a 174EXTCONST U8 PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-1047) */
1d72bdf6
NIS
175 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
176 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
177 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
178 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
179 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
180 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
181 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
182 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
183 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
184 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
185 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
186 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
187 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
188 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
189 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 190 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
1d72bdf6
NIS
191};
192
f466f02a 193EXTCONST U8 PL_e2utf[] = { /* UTFEBCDIC (IBM-1047) to I8 */
1d72bdf6
NIS
194 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
195 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
196 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
197 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
198 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
199 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
200 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
201 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
202 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
203 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
204 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8,
205 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7,
206 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
207 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
208 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 209 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
1d72bdf6 210};
f5e1abaf
JH
211#endif /* 1047 */
212
213#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
f466f02a 214U8 PL_utf2e[] = { /* I8 to UTFEBCDIC (POSIX-BC) */
f5e1abaf
JH
215 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
216 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
217 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
218 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
219 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
220 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
221 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
222 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
223 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
224 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
225 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
226 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73,
227 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
228 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
229 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 230 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE
f5e1abaf
JH
231};
232
f466f02a 233U8 PL_e2utf[] = { /* UTFEBCDIC (POSIX-BC) to I8 */
f5e1abaf
JH
234 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
235 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
236 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
237 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
238 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
239 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
240 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
241 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
242 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
243 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
244 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
245 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7,
246 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
247 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3,
248 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 249 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E
f5e1abaf
JH
250};
251#endif /* POSIX-BC */
252
253#if '^' == 176 /* if defined(??) (OS/400?) 037 */
f466f02a 254U8 PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-037) */
f5e1abaf
JH
255 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
256 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
257 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
258 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
259 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
260 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
261 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
262 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
263 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
264 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
265 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
266 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
267 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
268 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
269 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 270 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
f5e1abaf
JH
271};
272
f466f02a 273U8 PL_e2utf[] = { /* UTFEBCDIC (IBM-037) to I8 */
f5e1abaf
JH
274 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
275 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
276 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
277 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
278 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
279 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9,
280 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
281 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
282 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
283 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
284 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
285 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7,
286 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
287 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
288 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 289 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
f5e1abaf
JH
290};
291#endif /* 037 */
1d72bdf6
NIS
292
293/* These tables moved from perl.h and converted to hex.
486ec47a 294 They map platform code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
1d72bdf6
NIS
295*/
296
297#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
f466f02a 298EXTCONST U8 PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */
1d72bdf6
NIS
299 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
300 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
301 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
302 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
303 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
304 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
305 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
306 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
307 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
308 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
309 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
310 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
311 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
312 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
313 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
314 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
315};
316
f466f02a 317EXTCONST U8 PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
1d72bdf6
NIS
318 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
319 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
320 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
321 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
322 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
323 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
324 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
325 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
326 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
327 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
328 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE,
329 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7,
330 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
331 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
332 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
333 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
334};
d02f4dad 335
f466f02a 336EXTCONST U8 PL_latin1_lc[] = { /* lowercasing */
1248eb9d
KW
337 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
338 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
339 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
340 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
341 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
342 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
343 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
344 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
345 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
346 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
347 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0xAD, 0x8E, 0xAF,
348 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0x8D, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
349 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
350 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
351 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
352 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
353};
354
f466f02a 355EXTCONST U8 PL_mod_latin1_uc[] = { /* uppercasing */
1248eb9d
KW
356 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
357 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
358 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
359 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
360 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
361 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
362 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
363 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
364 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xBA, 0xAE, 0x8F,
365 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
366 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
367 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
368 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
369 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
370 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
371 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
372};
373
f466f02a 374EXTCONST U8 PL_fold_latin1[] = { /* fast EBCDIC case folding table,
1248eb9d
KW
375 'A' => 'a'; 'a' => 'A'; full
376 0-255 range */
d02f4dad
KW
377 0, 1, 2, 3, 4, 5, 6, 7,
378 8, 9, 10, 11, 12, 13, 14, 15,
379 16, 17, 18, 19, 20, 21, 22, 23,
380 24, 25, 26, 27, 28, 29, 30, 31,
381 32, 33, 34, 35, 36, 37, 38, 39,
382 40, 41, 42, 43, 44, 45, 46, 47,
383 48, 49, 50, 51, 52, 53, 54, 55,
384 56, 57, 58, 59, 60, 61, 62, 63,
385 64, 65, 98, 99, 100, 101, 102, 103,
386 104, 105, 74, 75, 76, 77, 78, 79,
387 80, 113, 114, 115, 116, 117, 118, 119,
388 120, 89, 90, 91, 92, 93, 94, 95,
389 96, 97, 66, 67, 68, 69, 70, 71,
390 72, 73, 106, 107, 108, 109, 110, 111,
391 128, 81, 82, 83, 84, 85, 86, 87,
392 88, 121, 122, 123, 124, 125, 126, 127,
393 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
394 'H', 'I', 138, 139, 172, 186, 174, 143,
395 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
396 'Q', 'R', 154, 155, 158, 157, 156, 159,
397 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
398 'Y', 'Z', 170, 171, 140, 173, 142, 175,
399 176, 177, 178, 179, 180, 181, 182, 183,
400 184, 185, 141, 187, 188, 189, 190, 191,
401 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
402 'h', 'i', 202, 235, 236, 237, 238, 239,
403 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
404 'q', 'r', 218, 251, 252, 253, 254, 223,
405 224, 225, 's', 't', 'u', 'v', 'w', 'x',
406 'y', 'z', 234, 203, 204, 205, 206, 207,
407 240, 241, 242, 243, 244, 245, 246, 247,
408 248, 249, 250, 219, 220, 221, 222, 255
409};
1d72bdf6
NIS
410#endif /* 1047 */
411
412#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
f466f02a 413EXTCONST U8 PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */
1d72bdf6
NIS
414 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
415 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
416 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
417 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
418 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
419 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
420 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
421 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
422 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
423 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
424 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1,
425 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
426 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
427 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59,
428 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
429 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
430};
431
f466f02a 432EXTCONST U8 PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
1d72bdf6
NIS
433 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
434 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
435 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
436 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
437 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
438 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
439 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
440 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
441 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
442 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
443 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
444 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7,
445 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
446 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF,
447 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
448 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E
449};
d02f4dad 450
f466f02a 451EXTCONST U8 PL_latin1_lc[] = { /* lowercasing */
1248eb9d
KW
452 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
453 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
454 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
455 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
456 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
457 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
458 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
459 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
460 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
461 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
462 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
463 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
464 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
465 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDB, 0xDE, 0xDF,
466 0xC0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
467 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xFD, 0xDE, 0xFF
468};
469
f466f02a 470EXTCONST U8 PL_mod_latin1_uc[] = { /* uppercasing */
1248eb9d
KW
471 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
472 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
473 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
474 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
475 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
476 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
477 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
478 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
479 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
480 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
481 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
482 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
483 0xE0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
484 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDD, 0xFC, 0xDD, 0xFE, 0xDF,
485 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
486 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
487};
488
f466f02a 489EXTCONST U8 PL_fold_latin1[] = { /* fast EBCDIC case folding table,
1248eb9d
KW
490 'A' => 'a'; 'a' => 'A'; full
491 0-255 range */
d02f4dad
KW
492 0, 1, 2, 3, 4, 5, 6, 7,
493 8, 9, 10, 11, 12, 13, 14, 15,
494 16, 17, 18, 19, 20, 21, 22, 23,
495 24, 25, 26, 27, 28, 29, 30, 31,
496 32, 33, 34, 35, 36, 37, 38, 39,
497 40, 41, 42, 43, 44, 45, 46, 47,
498 48, 49, 50, 51, 52, 53, 54, 55,
499 56, 57, 58, 59, 60, 61, 62, 63,
500 64, 65, 98, 99, 100, 101, 102, 103,
501 104, 105, 74, 75, 76, 77, 78, 79,
502 80, 113, 114, 115, 116, 117, 118, 119,
503 120, 89, 90, 91, 92, 93, 94, 95,
504 96, 97, 66, 67, 68, 69, 70, 71,
505 72, 73, 106, 107, 108, 109, 110, 111,
506 128, 81, 82, 83, 84, 85, 86, 87,
507 88, 121, 122, 123, 124, 125, 126, 127,
508 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
509 'H', 'I', 138, 139, 172, 173, 174, 143,
510 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
511 'Q', 'R', 154, 155, 158, 157, 156, 159,
512 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
513 'Y', 'Z', 170, 171, 140, 141, 142, 175,
514 176, 177, 178, 179, 180, 181, 182, 183,
515 184, 185, 186, 187, 188, 189, 190, 191,
516 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
517 'h', 'i', 202, 235, 236, 237, 238, 239,
518 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
519 'q', 'r', 218, 221, 252, 219, 254, 223,
520 192, 225, 's', 't', 'u', 'v', 'w', 'x',
521 'y', 'z', 234, 203, 204, 205, 206, 207,
522 240, 241, 242, 243, 244, 245, 246, 247,
523 248, 249, 250, 251, 220, 253, 222, 255
524};
1d72bdf6 525#endif /* POSIX-BC */
f5e1abaf 526
1d72bdf6 527#if '^' == 176 /* if defined(??) (OS/400?) 037 */
f466f02a 528EXTCONST U8 PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */
1d72bdf6
NIS
529 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
530 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
531 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
532 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
533 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
534 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
535 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
536 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
537 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
538 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
539 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC,
540 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
541 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
542 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59,
543 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
544 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
545};
546
f466f02a 547EXTCONST U8 PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
1d72bdf6
NIS
548 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
549 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
550 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
551 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
552 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
553 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
554 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
555 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
556 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
557 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
558 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
559 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
560 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
561 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
562 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
563 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
564};
d02f4dad 565
f466f02a 566EXTCONST U8 PL_latin1_lc[] = { /* lowercasing */
1248eb9d
KW
567 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
568 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
569 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
570 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
571 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
572 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
573 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
574 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
575 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
576 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
577 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
578 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
579 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
580 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
581 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
582 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
583};
584
f466f02a 585EXTCONST U8 PL_mod_latin1_uc[] = { /* uppercasing */
1248eb9d
KW
586 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
587 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
588 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
589 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
590 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
591 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
592 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
593 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
594 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
595 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
596 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
597 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
598 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
599 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
600 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
601 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
602};
603
f466f02a 604EXTCONST U8 PL_fold_latin1[] = { /* fast EBCDIC case folding table,
1248eb9d
KW
605 'A' => 'a'; 'a' => 'A'; full
606 0-255 range */
d02f4dad
KW
607 0, 1, 2, 3, 4, 5, 6, 7,
608 8, 9, 10, 11, 12, 13, 14, 15,
609 16, 17, 18, 19, 20, 21, 22, 23,
610 24, 25, 26, 27, 28, 29, 30, 31,
611 32, 33, 34, 35, 36, 37, 38, 39,
612 40, 41, 42, 43, 44, 45, 46, 47,
613 48, 49, 50, 51, 52, 53, 54, 55,
614 56, 57, 58, 59, 60, 61, 62, 63,
615 64, 65, 98, 99, 100, 101, 102, 103,
616 104, 105, 74, 75, 76, 77, 78, 79,
617 80, 113, 114, 115, 116, 117, 118, 119,
618 120, 89, 90, 91, 92, 93, 94, 95,
619 96, 97, 66, 67, 68, 69, 70, 71,
620 72, 73, 106, 107, 108, 109, 110, 111,
621 128, 81, 82, 83, 84, 85, 86, 87,
622 88, 121, 122, 123, 124, 125, 126, 127,
623 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
624 'H', 'I', 138, 139, 172, 173, 174, 143,
625 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
626 'Q', 'R', 154, 155, 158, 157, 156, 159,
627 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
628 'Y', 'Z', 170, 171, 140, 141, 142, 175,
629 176, 177, 178, 179, 180, 181, 182, 183,
630 184, 185, 186, 187, 188, 189, 190, 191,
631 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
632 'h', 'i', 202, 235, 236, 237, 238, 239,
633 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
634 'q', 'r', 218, 251, 252, 253, 254, 223,
635 224, 225, 's', 't', 'u', 'v', 'w', 'x',
636 'y', 'z', 234, 203, 204, 205, 206, 207,
637 240, 241, 242, 243, 244, 245, 246, 247,
638 248, 249, 250, 219, 220, 221, 222, 255
639};
1d72bdf6
NIS
640#endif /* 037 */
641
1248eb9d 642/* This is the same in all code pages, as only A-Z, a-z are affected */
f466f02a 643EXTCONST U8 PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
1248eb9d
KW
644 'a'; 'a' => 'A' */
645 0, 1, 2, 3, 4, 5, 6, 7,
646 8, 9, 10, 11, 12, 13, 14, 15,
647 16, 17, 18, 19, 20, 21, 22, 23,
648 24, 25, 26, 27, 28, 29, 30, 31,
649 32, 33, 34, 35, 36, 37, 38, 39,
650 40, 41, 42, 43, 44, 45, 46, 47,
651 48, 49, 50, 51, 52, 53, 54, 55,
652 56, 57, 58, 59, 60, 61, 62, 63,
653 64, 65, 66, 67, 68, 69, 70, 71,
654 72, 73, 74, 75, 76, 77, 78, 79,
655 80, 81, 82, 83, 84, 85, 86, 87,
656 88, 89, 90, 91, 92, 93, 94, 95,
657 96, 97, 98, 99, 100, 101, 102, 103,
658 104, 105, 106, 107, 108, 109, 110, 111,
659 112, 113, 114, 115, 116, 117, 118, 119,
660 120, 121, 122, 123, 124, 125, 126, 127,
661 128, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
662 'H', 'I', 138, 139, 140, 141, 142, 143,
663 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
664 'Q', 'R', 154, 155, 156, 157, 158, 159,
665 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
666 'Y', 'Z', 170, 171, 172, 173, 174, 175,
667 176, 177, 178, 179, 180, 181, 182, 183,
668 184, 185, 186, 187, 188, 189, 190, 191,
669 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
670 'h', 'i', 202, 203, 204, 205, 206, 207,
671 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
672 'q', 'r', 218, 219, 220, 221, 222, 223,
673 224, 225, 's', 't', 'u', 'v', 'w', 'x',
674 'y', 'z', 234, 235, 236, 237, 238, 239,
675 240, 241, 242, 243, 244, 245, 246, 247,
676 248, 249, 250, 251, 252, 253, 254, 255
677};
44f2fc15 678
1d72bdf6 679#else
f466f02a
KW
680EXTCONST U8 PL_utf8skip[];
681EXTCONST U8 PL_e2utf[];
682EXTCONST U8 PL_utf2e[];
683EXTCONST U8 PL_e2a[];
684EXTCONST U8 PL_a2e[];
685EXTCONST U8 PL_fold[];
686EXTCONST U8 PL_fold_latin1[];
687EXTCONST U8 PL_latin1_lc[];
688EXTCONST U8 PL_mod_latin1_uc[];
1d72bdf6
NIS
689#endif
690
691END_EXTERN_C
692
1e54db1a 693/* EBCDIC-happy ways of converting native code to UTF-8 */
1d72bdf6 694
59a449d5
KW
695#define NATIVE_TO_LATIN1(ch) PL_e2a[(U8)(ch)]
696#define LATIN1_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
697
d53cee75
JG
698#define NATIVE_UTF8_TO_I8(ch) PL_e2utf[(U8)(ch)]
699#define I8_TO_NATIVE_UTF8(ch) PL_utf2e[(U8)(ch)]
59a449d5 700
bc3632a8
KW
701/* Transforms in wide UV chars */
702#define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))
703#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : LATIN1_TO_NATIVE(ch))
704
1d72bdf6 705/*
d06134e5 706 The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
1d72bdf6
NIS
707
708 Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
709 U+0000..U+007F 000000000xxxxxxx 0xxxxxxx
710 U+0080..U+009F 00000000100xxxxx 100xxxxx
1d72bdf6
NIS
711 U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx
712 U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx
713 U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx
714 U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx
715 U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
716 U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
717
d06134e5 718 Note: The I8 transformation is valid for UCS-4 values X'0' to
1d72bdf6
NIS
719 X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space).
720
721 */
722
5aaebcb3
KW
723/* Input is a true Unicode (not-native) code point */
724#define OFFUNISKIP(uv) ( (uv) < 0xA0 ? 1 : \
1d72bdf6
NIS
725 (uv) < 0x400 ? 2 : \
726 (uv) < 0x4000 ? 3 : \
727 (uv) < 0x40000 ? 4 : \
728 (uv) < 0x400000 ? 5 : \
729 (uv) < 0x4000000 ? 6 : 7 )
730
4ed7d5f0 731#define UNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0)
530495eb 732
15824458
KW
733/* UTF-EBCDIC semantic macros - transform back into I8 and then compare
734 * Comments as to the meaning of each are given at their corresponding utf8.h
735 * definitions */
0447e8df 736
bc3632a8
KW
737#define UTF8_IS_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
738 && NATIVE_UTF8_TO_I8(c) != 0xE0)
739#define UTF8_IS_CONTINUATION(c) ((NATIVE_UTF8_TO_I8(c) & 0xE0) == 0xA0)
740#define UTF8_IS_CONTINUED(c) (NATIVE_UTF8_TO_I8(c) >= 0xA0)
e5119cf4 741
bc3632a8
KW
742#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
743 && NATIVE_UTF8_TO_I8(c) <= 0xC7)
e5119cf4 744/* Saying it this way adds a runtime test, but removes 2 run-time lookups */
f466f02a 745/*#define UTF8_IS_DOWNGRADEABLE_START(c) ((c) == I8_TO_NATIVE_UTF8(0xC5) \
e5119cf4
KW
746 || (c) == I8_TO_NATIVE_UTF8(0xC6) \
747 || (c) == I8_TO_NATIVE_UTF8(0xC7))
748*/
bc3632a8 749#define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_UTF8_TO_I8(c) >= 0xC8)
1d72bdf6 750
ee372ee9
KW
751/* Can't exceed 7 on EBCDIC platforms */
752#define UTF_START_MARK(len) (0xFF & (0xFE << (7-(len))))
753
22901f30 754#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
1d72bdf6
NIS
755#define UTF_CONTINUATION_MARK 0xA0
756#define UTF_CONTINUATION_MASK ((U8)0x1f)
757#define UTF_ACCUMULATION_SHIFT 5
758
03c76984
KW
759/* How wide can a single UTF-8 encoded character become in bytes. */
760/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
761 * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
762 * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode
763 * non-negative integers in a binary format, even those above Unicode */
764#define UTF8_MAXBYTES 7
765
766/* The maximum number of UTF-8 bytes a single Unicode character can
767 * uppercase/lowercase/fold into. Unicode guarantees that the maximum
768 * expansion is 3 characters. On EBCDIC platforms, the highest Unicode
769 * character occupies 5 bytes, therefore this number is 15 */
770#define UTF8_MAXBYTES_CASE 15
771
843a4590
KW
772#define MAX_UTF8_TWO_BYTE 0x3FF
773
e9a8c099
MHM
774/*
775 * Local variables:
776 * c-indentation-style: bsd
777 * c-basic-offset: 4
14d04a33 778 * indent-tabs-mode: nil
e9a8c099
MHM
779 * End:
780 *
14d04a33 781 * ex: set ts=8 sts=4 sw=4 et:
e9a8c099 782 */