This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make casing tables native
[perl5.git] / utfebcdic.h
CommitLineData
1d72bdf6
NIS
1/* utfebcdic.h
2 *
2eee27d7
SS
3 * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009,
4 * 2010, 2011 by Larry Wall, Nick Ing-Simmons, and others
1d72bdf6
NIS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 * Macros to implement UTF-EBCDIC as perl's internal encoding
486ec47a 10 * Taken from version 7.1 of Unicode Technical Report #16:
1d72bdf6 11 * http://www.unicode.org/unicode/reports/tr16
fe749c9a
KW
12 *
13 * To summarize, the way it works is:
14 * To convert an EBCDIC character to UTF-EBCDIC:
d06134e5 15 * 1) convert to Unicode. The table in this file that does this for
fe749c9a
KW
16 * EBCDIC bytes is PL_e2a (with inverse PLa2e). The 'a' stands for
17 * ASCIIish, meaning latin1.
d06134e5
KW
18 * 2) convert that to a utf8-like string called I8 (I stands for
19 * intermediate) with variant characters occupying multiple bytes. This
20 * step is similar to the utf8-creating step from Unicode, but the details
21 * are different. This transformation is called UTF8-Mod. There is a
22 * chart about the bit patterns in a comment later in this file. But
fe749c9a
KW
23 * essentially here are the differences:
24 * UTF8 I8
25 * invariant byte starts with 0 starts with 0 or 100
26 * continuation byte starts with 10 starts with 101
27 * start byte same in both: if the code point requires N bytes,
28 * then the leading N bits are 1, followed by a 0. (No
29 * trailing 0 for the very largest possible allocation
30 * in I8, far beyond the current Unicode standard's
31 * max, as shown in the comment later in this file.)
32 * 3) Use the table published in tr16 to convert each byte from step 2 into
d06134e5
KW
33 * final UTF-EBCDIC. That table is reproduced in this file as PL_utf2e,
34 * and its inverse is PL_e2utf. They are constructed so that all EBCDIC
35 * invariants remain invariant, but no others do. For example, the
36 * ordinal value of 'A' is 193 in EBCDIC, and also is 193 in UTF-EBCDIC.
37 * Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 converts
38 * it back to 193. As an example of how a variant character works, take
486ec47a 39 * LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in
d06134e5
KW
40 * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2
41 * converts that to two bytes = 11000111 10111111 = C7 BF, and Step 3
42 * converts those to 0x8B 0x73. The table is constructed so that the
43 * first byte of the final form of a variant will always have its upper
44 * bit set (at least in the encodings that Perl recognizes, and probably
45 * all). But note that the upper bit of some invariants is also 1.
45f80db9 46 *
fe749c9a
KW
47 * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight
48 * EBCDIC, reverse the steps.
49 *
50 * The EBCDIC invariants have been chosen to be those characters whose Unicode
51 * equivalents have ordinal numbers less than 160, that is the same characters
52 * that are expressible in ASCII, plus the C1 controls. So there are 160
53 * invariants instead of the 128 in UTF-8. (My guess is that this is because
45f80db9 54 * the C1 control NEL (and maybe others) is important in IBM.)
fe749c9a
KW
55 *
56 * The purpose of Step 3 is to make the encoding be invariant for the chosen
57 * characters. This messes up the convenient patterns found in step 2, so
58 * generally, one has to undo step 3 into a temporary to use them. However,
59 * a "shadow", or parallel table, PL_utf8skip, has been constructed so that for
45f80db9 60 * each byte, it says how long the sequence is if that byte were to begin it
fe749c9a
KW
61 *
62 * There are actually 3 slightly different UTF-EBCDIC encodings in this file,
63 * one for each of the code pages recognized by Perl. That means that there
64 * are actually three different sets of tables, one for each code page. (If
d06134e5 65 * Perl is compiled on platforms using another EBCDIC code page, it may not
45f80db9 66 * compile, or Perl may silently mistake it for one of the three.)
fe749c9a
KW
67 *
68 * EBCDIC characters above 0xFF are the same as Unicode in Perl's
69 * implementation of all 3 encodings, so for those Step 1 is trivial.
70 *
71 * (Note that the entries for invariant characters are necessarily the same in
72 * PL_e2a and PLe2f, and the same for their inverses.)
73 *
74 * UTF-EBCDIC strings are the same length or longer than UTF-8 representations
75 * of the same string. The maximum code point representable as 2 bytes in
76 * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8.
1d72bdf6
NIS
77 */
78
79START_EXTERN_C
80
81#ifdef DOINIT
82/* Indexed by encoded byte this table gives the length of the sequence.
83 Adapted from the shadow flags table in tr16.
9df205e2 84 The entries marked 9 in tr16 are continuation bytes and are marked
c4d5f83a 85 as length 1 here so that we can recover.
1d72bdf6 86*/
f5e1abaf 87#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
1d72bdf6
NIS
88EXTCONST unsigned char PL_utf8skip[] = {
891,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
901,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
911,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
921,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
c4d5f83a
NIS
931,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
941,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
951,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
961,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1d72bdf6
NIS
972,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
982,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
992,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2,
1002,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3,
1011,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1021,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1031,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1041,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
105};
f5e1abaf
JH
106#endif
107
108#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
109unsigned char PL_utf8skip[] = {
1101,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1111,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1121,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1131,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1141,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1151,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1161,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1171,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1,
1182,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1192,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1202,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1211,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3,
1224,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1231,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4,
1247,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1251,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1
126};
127#endif
128
129#if '^' == 176 /* if defined(??) (OS/400?) 037 */
130unsigned char PL_utf8skip[] = {
1311,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1321,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1331,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1341,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1351,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1361,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,
1371,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1381,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1392,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1402,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1412,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1421,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3,
1431,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1441,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1451,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1461,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
147};
148#endif
1d72bdf6 149
fe749c9a
KW
150/* Transform tables from tr16 applied after encoding to render encoding EBCDIC
151 * like, meaning that all the invariants are actually invariant, eg, that 'A'
152 * remains 'A' */
1d72bdf6 153
f5e1abaf 154#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
537124e4 155EXTCONST unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-1047) */
1d72bdf6
NIS
156 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
157 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
158 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
159 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
160 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
161 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
162 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
163 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
164 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
165 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
166 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
167 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
168 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
169 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
170 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 171 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
1d72bdf6
NIS
172};
173
537124e4 174EXTCONST unsigned char PL_e2utf[] = { /* UTFEBCDIC (IBM-1047) to I8 */
1d72bdf6
NIS
175 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
176 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
177 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
178 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
179 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
180 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
181 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
182 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
183 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
184 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
185 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8,
186 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7,
187 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
188 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
189 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 190 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
1d72bdf6 191};
f5e1abaf
JH
192#endif /* 1047 */
193
194#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
537124e4 195unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (POSIX-BC) */
f5e1abaf
JH
196 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
197 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
198 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
199 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
200 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
201 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
202 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
203 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
204 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
205 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
206 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
207 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73,
208 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
209 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
210 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 211 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE
f5e1abaf
JH
212};
213
537124e4 214unsigned char PL_e2utf[] = { /* UTFEBCDIC (POSIX-BC) to I8 */
f5e1abaf
JH
215 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
216 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
217 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
218 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
219 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
220 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
221 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
222 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
223 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
224 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
225 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
226 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7,
227 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
228 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3,
229 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 230 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E
f5e1abaf
JH
231};
232#endif /* POSIX-BC */
233
234#if '^' == 176 /* if defined(??) (OS/400?) 037 */
537124e4 235unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-037) */
f5e1abaf
JH
236 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
237 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
238 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
239 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
240 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
241 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
242 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
243 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
244 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
245 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
246 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
247 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
248 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
249 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
250 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 251 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
f5e1abaf
JH
252};
253
537124e4 254unsigned char PL_e2utf[] = { /* UTFEBCDIC (IBM-037) to I8 */
f5e1abaf
JH
255 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
256 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
257 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
258 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
259 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
260 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9,
261 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
262 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
263 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
264 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
265 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
266 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7,
267 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
268 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
269 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 270 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
f5e1abaf
JH
271};
272#endif /* 037 */
1d72bdf6
NIS
273
274/* These tables moved from perl.h and converted to hex.
486ec47a 275 They map platform code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
1d72bdf6
NIS
276*/
277
278#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
279EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */
280 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
281 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
282 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
283 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
284 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
285 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
286 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
287 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
288 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
289 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
290 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
291 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
292 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
293 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
294 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
295 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
296};
297
431fc773 298EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
1d72bdf6
NIS
299 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
300 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
301 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
302 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
303 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
304 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
305 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
306 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
307 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
308 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
309 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE,
310 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7,
311 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
312 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
313 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
314 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
315};
d02f4dad 316
1248eb9d
KW
317EXTCONST unsigned char PL_latin1_lc[] = { /* lowercasing */
318 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
319 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
320 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
321 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
322 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
323 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
324 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
325 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
326 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
327 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
328 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0xAD, 0x8E, 0xAF,
329 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0x8D, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
330 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
331 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
332 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
333 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
334};
335
336EXTCONST unsigned char PL_mod_latin1_uc[] = { /* uppercasing */
337 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
338 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
339 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
340 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
341 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
342 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
343 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
344 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
345 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xBA, 0xAE, 0x8F,
346 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
347 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
348 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
349 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
350 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
351 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
352 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
353};
354
355EXTCONST unsigned char PL_fold_latin1[] = { /* fast EBCDIC case folding table,
356 'A' => 'a'; 'a' => 'A'; full
357 0-255 range */
d02f4dad
KW
358 0, 1, 2, 3, 4, 5, 6, 7,
359 8, 9, 10, 11, 12, 13, 14, 15,
360 16, 17, 18, 19, 20, 21, 22, 23,
361 24, 25, 26, 27, 28, 29, 30, 31,
362 32, 33, 34, 35, 36, 37, 38, 39,
363 40, 41, 42, 43, 44, 45, 46, 47,
364 48, 49, 50, 51, 52, 53, 54, 55,
365 56, 57, 58, 59, 60, 61, 62, 63,
366 64, 65, 98, 99, 100, 101, 102, 103,
367 104, 105, 74, 75, 76, 77, 78, 79,
368 80, 113, 114, 115, 116, 117, 118, 119,
369 120, 89, 90, 91, 92, 93, 94, 95,
370 96, 97, 66, 67, 68, 69, 70, 71,
371 72, 73, 106, 107, 108, 109, 110, 111,
372 128, 81, 82, 83, 84, 85, 86, 87,
373 88, 121, 122, 123, 124, 125, 126, 127,
374 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
375 'H', 'I', 138, 139, 172, 186, 174, 143,
376 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
377 'Q', 'R', 154, 155, 158, 157, 156, 159,
378 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
379 'Y', 'Z', 170, 171, 140, 173, 142, 175,
380 176, 177, 178, 179, 180, 181, 182, 183,
381 184, 185, 141, 187, 188, 189, 190, 191,
382 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
383 'h', 'i', 202, 235, 236, 237, 238, 239,
384 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
385 'q', 'r', 218, 251, 252, 253, 254, 223,
386 224, 225, 's', 't', 'u', 'v', 'w', 'x',
387 'y', 'z', 234, 203, 204, 205, 206, 207,
388 240, 241, 242, 243, 244, 245, 246, 247,
389 248, 249, 250, 219, 220, 221, 222, 255
390};
1d72bdf6
NIS
391#endif /* 1047 */
392
393#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
394EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */
395 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
396 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
397 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
398 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
399 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
400 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
401 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
402 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
403 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
404 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
405 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1,
406 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
407 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
408 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59,
409 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
410 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
411};
412
413EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
414 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
415 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
416 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
417 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
418 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
419 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
420 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
421 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
422 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
423 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
424 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
425 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7,
426 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
427 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF,
428 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
429 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E
430};
d02f4dad 431
1248eb9d
KW
432EXTCONST unsigned char PL_latin1_lc[] = { /* lowercasing */
433 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
434 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
435 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
436 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
437 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
438 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
439 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
440 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
441 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
442 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
443 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
444 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
445 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
446 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDB, 0xDE, 0xDF,
447 0xC0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
448 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xFD, 0xDE, 0xFF
449};
450
451EXTCONST unsigned char PL_mod_latin1_uc[] = { /* uppercasing */
452 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
453 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
454 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
455 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
456 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
457 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
458 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
459 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
460 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
461 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
462 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
463 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
464 0xE0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
465 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDD, 0xFC, 0xDD, 0xFE, 0xDF,
466 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
467 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
468};
469
470EXTCONST unsigned char PL_fold_latin1[] = { /* fast EBCDIC case folding table,
471 'A' => 'a'; 'a' => 'A'; full
472 0-255 range */
d02f4dad
KW
473 0, 1, 2, 3, 4, 5, 6, 7,
474 8, 9, 10, 11, 12, 13, 14, 15,
475 16, 17, 18, 19, 20, 21, 22, 23,
476 24, 25, 26, 27, 28, 29, 30, 31,
477 32, 33, 34, 35, 36, 37, 38, 39,
478 40, 41, 42, 43, 44, 45, 46, 47,
479 48, 49, 50, 51, 52, 53, 54, 55,
480 56, 57, 58, 59, 60, 61, 62, 63,
481 64, 65, 98, 99, 100, 101, 102, 103,
482 104, 105, 74, 75, 76, 77, 78, 79,
483 80, 113, 114, 115, 116, 117, 118, 119,
484 120, 89, 90, 91, 92, 93, 94, 95,
485 96, 97, 66, 67, 68, 69, 70, 71,
486 72, 73, 106, 107, 108, 109, 110, 111,
487 128, 81, 82, 83, 84, 85, 86, 87,
488 88, 121, 122, 123, 124, 125, 126, 127,
489 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
490 'H', 'I', 138, 139, 172, 173, 174, 143,
491 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
492 'Q', 'R', 154, 155, 158, 157, 156, 159,
493 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
494 'Y', 'Z', 170, 171, 140, 141, 142, 175,
495 176, 177, 178, 179, 180, 181, 182, 183,
496 184, 185, 186, 187, 188, 189, 190, 191,
497 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
498 'h', 'i', 202, 235, 236, 237, 238, 239,
499 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
500 'q', 'r', 218, 221, 252, 219, 254, 223,
501 192, 225, 's', 't', 'u', 'v', 'w', 'x',
502 'y', 'z', 234, 203, 204, 205, 206, 207,
503 240, 241, 242, 243, 244, 245, 246, 247,
504 248, 249, 250, 251, 220, 253, 222, 255
505};
1d72bdf6 506#endif /* POSIX-BC */
f5e1abaf 507
1d72bdf6
NIS
508#if '^' == 176 /* if defined(??) (OS/400?) 037 */
509EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */
510 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
511 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
512 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
513 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
514 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
515 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
516 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
517 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
518 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
519 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
520 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC,
521 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
522 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
523 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59,
524 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
525 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
526};
527
528EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
529 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
530 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
531 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
532 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
533 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
534 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
535 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
536 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
537 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
538 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
539 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
540 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
541 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
542 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
543 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
544 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
545};
d02f4dad 546
1248eb9d
KW
547EXTCONST unsigned char PL_latin1_lc[] = { /* lowercasing */
548 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
549 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
550 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
551 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
552 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
553 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
554 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
555 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
556 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
557 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
558 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
559 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
560 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
561 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
562 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
563 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
564};
565
566EXTCONST unsigned char PL_mod_latin1_uc[] = { /* uppercasing */
567 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
568 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
569 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
570 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
571 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
572 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
573 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
574 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
575 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
576 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
577 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
578 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
579 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
580 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
581 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
582 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
583};
584
585EXTCONST unsigned char PL_fold_latin1[] = { /* fast EBCDIC case folding table,
586 'A' => 'a'; 'a' => 'A'; full
587 0-255 range */
d02f4dad
KW
588 0, 1, 2, 3, 4, 5, 6, 7,
589 8, 9, 10, 11, 12, 13, 14, 15,
590 16, 17, 18, 19, 20, 21, 22, 23,
591 24, 25, 26, 27, 28, 29, 30, 31,
592 32, 33, 34, 35, 36, 37, 38, 39,
593 40, 41, 42, 43, 44, 45, 46, 47,
594 48, 49, 50, 51, 52, 53, 54, 55,
595 56, 57, 58, 59, 60, 61, 62, 63,
596 64, 65, 98, 99, 100, 101, 102, 103,
597 104, 105, 74, 75, 76, 77, 78, 79,
598 80, 113, 114, 115, 116, 117, 118, 119,
599 120, 89, 90, 91, 92, 93, 94, 95,
600 96, 97, 66, 67, 68, 69, 70, 71,
601 72, 73, 106, 107, 108, 109, 110, 111,
602 128, 81, 82, 83, 84, 85, 86, 87,
603 88, 121, 122, 123, 124, 125, 126, 127,
604 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
605 'H', 'I', 138, 139, 172, 173, 174, 143,
606 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
607 'Q', 'R', 154, 155, 158, 157, 156, 159,
608 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
609 'Y', 'Z', 170, 171, 140, 141, 142, 175,
610 176, 177, 178, 179, 180, 181, 182, 183,
611 184, 185, 186, 187, 188, 189, 190, 191,
612 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
613 'h', 'i', 202, 235, 236, 237, 238, 239,
614 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
615 'q', 'r', 218, 251, 252, 253, 254, 223,
616 224, 225, 's', 't', 'u', 'v', 'w', 'x',
617 'y', 'z', 234, 203, 204, 205, 206, 207,
618 240, 241, 242, 243, 244, 245, 246, 247,
619 248, 249, 250, 219, 220, 221, 222, 255
620};
1d72bdf6
NIS
621#endif /* 037 */
622
1248eb9d
KW
623/* This is the same in all code pages, as only A-Z, a-z are affected */
624EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
625 'a'; 'a' => 'A' */
626 0, 1, 2, 3, 4, 5, 6, 7,
627 8, 9, 10, 11, 12, 13, 14, 15,
628 16, 17, 18, 19, 20, 21, 22, 23,
629 24, 25, 26, 27, 28, 29, 30, 31,
630 32, 33, 34, 35, 36, 37, 38, 39,
631 40, 41, 42, 43, 44, 45, 46, 47,
632 48, 49, 50, 51, 52, 53, 54, 55,
633 56, 57, 58, 59, 60, 61, 62, 63,
634 64, 65, 66, 67, 68, 69, 70, 71,
635 72, 73, 74, 75, 76, 77, 78, 79,
636 80, 81, 82, 83, 84, 85, 86, 87,
637 88, 89, 90, 91, 92, 93, 94, 95,
638 96, 97, 98, 99, 100, 101, 102, 103,
639 104, 105, 106, 107, 108, 109, 110, 111,
640 112, 113, 114, 115, 116, 117, 118, 119,
641 120, 121, 122, 123, 124, 125, 126, 127,
642 128, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
643 'H', 'I', 138, 139, 140, 141, 142, 143,
644 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
645 'Q', 'R', 154, 155, 156, 157, 158, 159,
646 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
647 'Y', 'Z', 170, 171, 172, 173, 174, 175,
648 176, 177, 178, 179, 180, 181, 182, 183,
649 184, 185, 186, 187, 188, 189, 190, 191,
650 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
651 'h', 'i', 202, 203, 204, 205, 206, 207,
652 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
653 'q', 'r', 218, 219, 220, 221, 222, 223,
654 224, 225, 's', 't', 'u', 'v', 'w', 'x',
655 'y', 'z', 234, 235, 236, 237, 238, 239,
656 240, 241, 242, 243, 244, 245, 246, 247,
657 248, 249, 250, 251, 252, 253, 254, 255
658};
44f2fc15 659
1d72bdf6
NIS
660#else
661EXTCONST unsigned char PL_utf8skip[];
662EXTCONST unsigned char PL_e2utf[];
663EXTCONST unsigned char PL_utf2e[];
664EXTCONST unsigned char PL_e2a[];
665EXTCONST unsigned char PL_a2e[];
d02f4dad 666EXTCONST unsigned char PL_fold[];
1248eb9d
KW
667EXTCONST unsigned char PL_fold_latin1[];
668EXTCONST unsigned char PL_latin1_lc[];
669EXTCONST unsigned char PL_mod_latin1_uc[];
1d72bdf6
NIS
670#endif
671
672END_EXTERN_C
673
1e54db1a 674/* EBCDIC-happy ways of converting native code to UTF-8 */
1d72bdf6 675
59a449d5
KW
676#define NATIVE_TO_LATIN1(ch) PL_e2a[(U8)(ch)]
677#define LATIN1_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
678
679#define NATIVE_UTF8_TO_I8(ch) (ch) PL_e2utf[(U8)(ch)]
680#define I8_TO_NATIVE_UTF8(ch) (ch) PL_utf2e[(U8)(ch)]
681
bc3632a8
KW
682/* Transforms in wide UV chars */
683#define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))
684#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : LATIN1_TO_NATIVE(ch))
685
1d72bdf6 686/*
d06134e5 687 The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
1d72bdf6
NIS
688
689 Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
690 U+0000..U+007F 000000000xxxxxxx 0xxxxxxx
691 U+0080..U+009F 00000000100xxxxx 100xxxxx
1d72bdf6
NIS
692 U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx
693 U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx
694 U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx
695 U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx
696 U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
697 U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
698
d06134e5 699 Note: The I8 transformation is valid for UCS-4 values X'0' to
1d72bdf6
NIS
700 X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space).
701
702 */
703
704#define UNISKIP(uv) ( (uv) < 0xA0 ? 1 : \
705 (uv) < 0x400 ? 2 : \
706 (uv) < 0x4000 ? 3 : \
707 (uv) < 0x40000 ? 4 : \
708 (uv) < 0x400000 ? 5 : \
709 (uv) < 0x4000000 ? 6 : 7 )
710
c4d5f83a 711#define UNI_IS_INVARIANT(c) ((c) < 0xA0)
530495eb 712
15824458
KW
713/* UTF-EBCDIC semantic macros - transform back into I8 and then compare
714 * Comments as to the meaning of each are given at their corresponding utf8.h
715 * definitions */
0447e8df 716
bc3632a8
KW
717#define UTF8_IS_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
718 && NATIVE_UTF8_TO_I8(c) != 0xE0)
719#define UTF8_IS_CONTINUATION(c) ((NATIVE_UTF8_TO_I8(c) & 0xE0) == 0xA0)
720#define UTF8_IS_CONTINUED(c) (NATIVE_UTF8_TO_I8(c) >= 0xA0)
721#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
722 && NATIVE_UTF8_TO_I8(c) <= 0xC7)
723#define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_UTF8_TO_I8(c) >= 0xC8)
1d72bdf6 724
22901f30
RGS
725#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
726#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
1d72bdf6
NIS
727#define UTF_CONTINUATION_MARK 0xA0
728#define UTF_CONTINUATION_MASK ((U8)0x1f)
729#define UTF_ACCUMULATION_SHIFT 5
730
03c76984
KW
731/* How wide can a single UTF-8 encoded character become in bytes. */
732/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
733 * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
734 * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode
735 * non-negative integers in a binary format, even those above Unicode */
736#define UTF8_MAXBYTES 7
737
738/* The maximum number of UTF-8 bytes a single Unicode character can
739 * uppercase/lowercase/fold into. Unicode guarantees that the maximum
740 * expansion is 3 characters. On EBCDIC platforms, the highest Unicode
741 * character occupies 5 bytes, therefore this number is 15 */
742#define UTF8_MAXBYTES_CASE 15
743
e9a8c099
MHM
744/*
745 * Local variables:
746 * c-indentation-style: bsd
747 * c-basic-offset: 4
14d04a33 748 * indent-tabs-mode: nil
e9a8c099
MHM
749 * End:
750 *
14d04a33 751 * ex: set ts=8 sts=4 sw=4 et:
e9a8c099 752 */