This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Weak refs to pad hvs should go stale
[perl5.git] / utfebcdic.h
CommitLineData
1d72bdf6
NIS
1/* utfebcdic.h
2 *
83706693
RGS
3 * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 by Larry Wall,
4 * Nick Ing-Simmons, and others
1d72bdf6
NIS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 * Macros to implement UTF-EBCDIC as perl's internal encoding
486ec47a 10 * Taken from version 7.1 of Unicode Technical Report #16:
1d72bdf6 11 * http://www.unicode.org/unicode/reports/tr16
fe749c9a
KW
12 *
13 * To summarize, the way it works is:
14 * To convert an EBCDIC character to UTF-EBCDIC:
d06134e5 15 * 1) convert to Unicode. The table in this file that does this for
fe749c9a
KW
16 * EBCDIC bytes is PL_e2a (with inverse PLa2e). The 'a' stands for
17 * ASCIIish, meaning latin1.
d06134e5
KW
18 * 2) convert that to a utf8-like string called I8 (I stands for
19 * intermediate) with variant characters occupying multiple bytes. This
20 * step is similar to the utf8-creating step from Unicode, but the details
21 * are different. This transformation is called UTF8-Mod. There is a
22 * chart about the bit patterns in a comment later in this file. But
fe749c9a
KW
23 * essentially here are the differences:
24 * UTF8 I8
25 * invariant byte starts with 0 starts with 0 or 100
26 * continuation byte starts with 10 starts with 101
27 * start byte same in both: if the code point requires N bytes,
28 * then the leading N bits are 1, followed by a 0. (No
29 * trailing 0 for the very largest possible allocation
30 * in I8, far beyond the current Unicode standard's
31 * max, as shown in the comment later in this file.)
32 * 3) Use the table published in tr16 to convert each byte from step 2 into
d06134e5
KW
33 * final UTF-EBCDIC. That table is reproduced in this file as PL_utf2e,
34 * and its inverse is PL_e2utf. They are constructed so that all EBCDIC
35 * invariants remain invariant, but no others do. For example, the
36 * ordinal value of 'A' is 193 in EBCDIC, and also is 193 in UTF-EBCDIC.
37 * Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 converts
38 * it back to 193. As an example of how a variant character works, take
486ec47a 39 * LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in
d06134e5
KW
40 * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2
41 * converts that to two bytes = 11000111 10111111 = C7 BF, and Step 3
42 * converts those to 0x8B 0x73. The table is constructed so that the
43 * first byte of the final form of a variant will always have its upper
44 * bit set (at least in the encodings that Perl recognizes, and probably
45 * all). But note that the upper bit of some invariants is also 1.
fe749c9a
KW
46 *
47 * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight
48 * EBCDIC, reverse the steps.
49 *
50 * The EBCDIC invariants have been chosen to be those characters whose Unicode
51 * equivalents have ordinal numbers less than 160, that is the same characters
52 * that are expressible in ASCII, plus the C1 controls. So there are 160
53 * invariants instead of the 128 in UTF-8. (My guess is that this is because
54 * the C1 control NEL (and maybe others) is important in IBM.)
55 *
56 * The purpose of Step 3 is to make the encoding be invariant for the chosen
57 * characters. This messes up the convenient patterns found in step 2, so
58 * generally, one has to undo step 3 into a temporary to use them. However,
59 * a "shadow", or parallel table, PL_utf8skip, has been constructed so that for
60 * each byte, it says how long the sequence is if that byte were to begin it
61 *
62 * There are actually 3 slightly different UTF-EBCDIC encodings in this file,
63 * one for each of the code pages recognized by Perl. That means that there
64 * are actually three different sets of tables, one for each code page. (If
d06134e5
KW
65 * Perl is compiled on platforms using another EBCDIC code page, it may not
66 * compile, or Perl may silently mistake it for one of the three.)
fe749c9a
KW
67 *
68 * EBCDIC characters above 0xFF are the same as Unicode in Perl's
69 * implementation of all 3 encodings, so for those Step 1 is trivial.
70 *
71 * (Note that the entries for invariant characters are necessarily the same in
72 * PL_e2a and PLe2f, and the same for their inverses.)
73 *
74 * UTF-EBCDIC strings are the same length or longer than UTF-8 representations
75 * of the same string. The maximum code point representable as 2 bytes in
76 * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8.
1d72bdf6
NIS
77 */
78
79START_EXTERN_C
80
81#ifdef DOINIT
82/* Indexed by encoded byte this table gives the length of the sequence.
83 Adapted from the shadow flags table in tr16.
9df205e2 84 The entries marked 9 in tr16 are continuation bytes and are marked
c4d5f83a 85 as length 1 here so that we can recover.
1d72bdf6 86*/
f5e1abaf 87#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
1d72bdf6
NIS
88EXTCONST unsigned char PL_utf8skip[] = {
891,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
901,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
911,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
921,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
c4d5f83a
NIS
931,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
941,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
951,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
961,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1d72bdf6
NIS
972,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
982,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
992,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2,
1002,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3,
1011,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1021,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1031,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1041,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
105};
f5e1abaf
JH
106#endif
107
108#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
109unsigned char PL_utf8skip[] = {
1101,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1111,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1121,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1131,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1141,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1151,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1161,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1171,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1,
1182,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1192,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1202,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1211,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3,
1224,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1231,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4,
1247,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1251,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1
126};
127#endif
128
129#if '^' == 176 /* if defined(??) (OS/400?) 037 */
130unsigned char PL_utf8skip[] = {
1311,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1321,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1331,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1341,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1351,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1361,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,
1371,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1381,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1392,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1402,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1412,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1421,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3,
1431,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1441,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1451,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1461,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
147};
148#endif
1d72bdf6 149
fe749c9a
KW
150/* Transform tables from tr16 applied after encoding to render encoding EBCDIC
151 * like, meaning that all the invariants are actually invariant, eg, that 'A'
152 * remains 'A' */
1d72bdf6 153
f5e1abaf 154#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
d06134e5 155EXTCONST unsigned char PL_utf2e[] = { /* I8 to EBCDIC (IBM-1047) */
1d72bdf6
NIS
156 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
157 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
158 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
159 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
160 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
161 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
162 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
163 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
164 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
165 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
166 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
167 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
168 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
169 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
170 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 171 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
1d72bdf6
NIS
172};
173
d06134e5 174EXTCONST unsigned char PL_e2utf[] = { /* EBCDIC (IBM-1047) to I8 */
1d72bdf6
NIS
175 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
176 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
177 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
178 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
179 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
180 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
181 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
182 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
183 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
184 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
185 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8,
186 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7,
187 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
188 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
189 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 190 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
1d72bdf6 191};
f5e1abaf
JH
192#endif /* 1047 */
193
194#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
d06134e5 195unsigned char PL_utf2e[] = { /* I8 to EBCDIC (POSIX-BC) */
f5e1abaf
JH
196 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
197 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
198 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
199 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
200 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
201 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
202 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
203 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
204 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
205 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
206 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
207 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73,
208 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
209 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
210 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 211 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE
f5e1abaf
JH
212};
213
d06134e5 214unsigned char PL_e2utf[] = { /* EBCDIC (POSIX-BC) to I8 */
f5e1abaf
JH
215 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
216 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
217 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
218 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
219 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
220 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
221 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
222 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
223 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
224 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
225 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
226 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7,
227 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
228 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3,
229 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 230 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E
f5e1abaf
JH
231};
232#endif /* POSIX-BC */
233
234#if '^' == 176 /* if defined(??) (OS/400?) 037 */
d06134e5 235unsigned char PL_utf2e[] = { /* I8 to EBCDIC (IBM-037) */
f5e1abaf
JH
236 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
237 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
238 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
239 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
240 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
241 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
242 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
243 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
244 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
245 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
246 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
247 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
248 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
249 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
250 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 251 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
f5e1abaf
JH
252};
253
d06134e5 254unsigned char PL_e2utf[] = { /* EBCDIC (IBM-037) to I8 */
f5e1abaf
JH
255 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
256 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
257 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
258 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
259 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
260 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9,
261 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
262 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
263 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
264 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
265 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
266 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7,
267 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
268 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
269 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 270 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
f5e1abaf
JH
271};
272#endif /* 037 */
1d72bdf6
NIS
273
274/* These tables moved from perl.h and converted to hex.
486ec47a 275 They map platform code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
1d72bdf6
NIS
276*/
277
278#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
279EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */
280 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
281 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
282 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
283 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
284 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
285 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
286 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
287 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
288 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
289 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
290 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
291 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
292 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
293 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
294 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
295 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
296};
297
5cd46e1f
KW
298#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
299#define LATIN_SMALL_LETTER_SHARP_S 0x59
300#define MICRO_SIGN 0xA0
f508a607
KW
301#define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x0067
302#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x0047
5cd46e1f 303
431fc773 304EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
1d72bdf6
NIS
305 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
306 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
307 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
308 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
309 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
310 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
311 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
312 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
313 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
314 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
315 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE,
316 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7,
317 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
318 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
319 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
320 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
321};
d02f4dad
KW
322
323EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
324 'a'; 'a' => 'A' */
325 0, 1, 2, 3, 4, 5, 6, 7,
326 8, 9, 10, 11, 12, 13, 14, 15,
327 16, 17, 18, 19, 20, 21, 22, 23,
328 24, 25, 26, 27, 28, 29, 30, 31,
329 32, 33, 34, 35, 36, 37, 38, 39,
330 40, 41, 42, 43, 44, 45, 46, 47,
331 48, 49, 50, 51, 52, 53, 54, 55,
332 56, 57, 58, 59, 60, 61, 62, 63,
333 64, 65, 98, 99, 100, 101, 102, 103,
334 104, 105, 74, 75, 76, 77, 78, 79,
335 80, 113, 114, 115, 116, 117, 118, 119,
336 120, 89, 90, 91, 92, 93, 94, 95,
337 96, 97, 66, 67, 68, 69, 70, 71,
338 72, 73, 106, 107, 108, 109, 110, 111,
339 128, 81, 82, 83, 84, 85, 86, 87,
340 88, 121, 122, 123, 124, 125, 126, 127,
341 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
342 'H', 'I', 138, 139, 172, 186, 174, 143,
343 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
344 'Q', 'R', 154, 155, 158, 157, 156, 159,
345 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
346 'Y', 'Z', 170, 171, 140, 173, 142, 175,
347 176, 177, 178, 179, 180, 181, 182, 183,
348 184, 185, 141, 187, 188, 189, 190, 191,
349 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
350 'h', 'i', 202, 235, 236, 237, 238, 239,
351 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
352 'q', 'r', 218, 251, 252, 253, 254, 223,
353 224, 225, 's', 't', 'u', 'v', 'w', 'x',
354 'y', 'z', 234, 203, 204, 205, 206, 207,
355 240, 241, 242, 243, 244, 245, 246, 247,
356 248, 249, 250, 219, 220, 221, 222, 255
357};
1d72bdf6
NIS
358#endif /* 1047 */
359
360#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
361EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */
362 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
363 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
364 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
365 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
366 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
367 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
368 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
369 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
370 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
371 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
372 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1,
373 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
374 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
375 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59,
376 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
377 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
378};
379
5cd46e1f
KW
380#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
381#define LATIN_SMALL_LETTER_SHARP_S 0x59
382#define MICRO_SIGN 0xA0
f508a607
KW
383#define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x0067
384#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x0047
5cd46e1f 385
1d72bdf6
NIS
386EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
387 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
388 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
389 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
390 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
391 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
392 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
393 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
394 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
395 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
396 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
397 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
398 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7,
399 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
400 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF,
401 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
402 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E
403};
d02f4dad
KW
404
405EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
406 'a'; 'a' => 'A' */
407 0, 1, 2, 3, 4, 5, 6, 7,
408 8, 9, 10, 11, 12, 13, 14, 15,
409 16, 17, 18, 19, 20, 21, 22, 23,
410 24, 25, 26, 27, 28, 29, 30, 31,
411 32, 33, 34, 35, 36, 37, 38, 39,
412 40, 41, 42, 43, 44, 45, 46, 47,
413 48, 49, 50, 51, 52, 53, 54, 55,
414 56, 57, 58, 59, 60, 61, 62, 63,
415 64, 65, 98, 99, 100, 101, 102, 103,
416 104, 105, 74, 75, 76, 77, 78, 79,
417 80, 113, 114, 115, 116, 117, 118, 119,
418 120, 89, 90, 91, 92, 93, 94, 95,
419 96, 97, 66, 67, 68, 69, 70, 71,
420 72, 73, 106, 107, 108, 109, 110, 111,
421 128, 81, 82, 83, 84, 85, 86, 87,
422 88, 121, 122, 123, 124, 125, 126, 127,
423 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
424 'H', 'I', 138, 139, 172, 173, 174, 143,
425 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
426 'Q', 'R', 154, 155, 158, 157, 156, 159,
427 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
428 'Y', 'Z', 170, 171, 140, 141, 142, 175,
429 176, 177, 178, 179, 180, 181, 182, 183,
430 184, 185, 186, 187, 188, 189, 190, 191,
431 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
432 'h', 'i', 202, 235, 236, 237, 238, 239,
433 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
434 'q', 'r', 218, 221, 252, 219, 254, 223,
435 192, 225, 's', 't', 'u', 'v', 'w', 'x',
436 'y', 'z', 234, 203, 204, 205, 206, 207,
437 240, 241, 242, 243, 244, 245, 246, 247,
438 248, 249, 250, 251, 220, 253, 222, 255
439};
1d72bdf6 440#endif /* POSIX-BC */
f5e1abaf 441
1d72bdf6
NIS
442#if '^' == 176 /* if defined(??) (OS/400?) 037 */
443EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */
444 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
445 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
446 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
447 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
448 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
449 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
450 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
451 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
452 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
453 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
454 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC,
455 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
456 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
457 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59,
458 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
459 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
460};
461
5cd46e1f
KW
462
463#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
464#define LATIN_SMALL_LETTER_SHARP_S 0x59
465#define MICRO_SIGN 0xA0
f508a607
KW
466#define LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE 0x0067
467#define LATIN_SMALL_LETTER_A_WITH_RING_ABOVE 0x0047
5cd46e1f 468
1d72bdf6
NIS
469EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
470 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
471 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
472 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
473 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
474 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
475 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
476 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
477 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
478 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
479 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
480 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
481 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
482 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
483 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
484 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
485 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
486};
d02f4dad
KW
487
488EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
489 'a'; 'a' => 'A' */
490 0, 1, 2, 3, 4, 5, 6, 7,
491 8, 9, 10, 11, 12, 13, 14, 15,
492 16, 17, 18, 19, 20, 21, 22, 23,
493 24, 25, 26, 27, 28, 29, 30, 31,
494 32, 33, 34, 35, 36, 37, 38, 39,
495 40, 41, 42, 43, 44, 45, 46, 47,
496 48, 49, 50, 51, 52, 53, 54, 55,
497 56, 57, 58, 59, 60, 61, 62, 63,
498 64, 65, 98, 99, 100, 101, 102, 103,
499 104, 105, 74, 75, 76, 77, 78, 79,
500 80, 113, 114, 115, 116, 117, 118, 119,
501 120, 89, 90, 91, 92, 93, 94, 95,
502 96, 97, 66, 67, 68, 69, 70, 71,
503 72, 73, 106, 107, 108, 109, 110, 111,
504 128, 81, 82, 83, 84, 85, 86, 87,
505 88, 121, 122, 123, 124, 125, 126, 127,
506 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
507 'H', 'I', 138, 139, 172, 173, 174, 143,
508 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
509 'Q', 'R', 154, 155, 158, 157, 156, 159,
510 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
511 'Y', 'Z', 170, 171, 140, 141, 142, 175,
512 176, 177, 178, 179, 180, 181, 182, 183,
513 184, 185, 186, 187, 188, 189, 190, 191,
514 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
515 'h', 'i', 202, 235, 236, 237, 238, 239,
516 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
517 'q', 'r', 218, 251, 252, 253, 254, 223,
518 224, 225, 's', 't', 'u', 'v', 'w', 'x',
519 'y', 'z', 234, 203, 204, 205, 206, 207,
520 240, 241, 242, 243, 244, 245, 246, 247,
521 248, 249, 250, 219, 220, 221, 222, 255
522};
1d72bdf6
NIS
523#endif /* 037 */
524
525#else
526EXTCONST unsigned char PL_utf8skip[];
527EXTCONST unsigned char PL_e2utf[];
528EXTCONST unsigned char PL_utf2e[];
529EXTCONST unsigned char PL_e2a[];
530EXTCONST unsigned char PL_a2e[];
d02f4dad 531EXTCONST unsigned char PL_fold[];
1d72bdf6
NIS
532#endif
533
d02f4dad
KW
534/* Since the EBCDIC code pages are isomorphic to Latin1, that table is merely a
535 * duplicate */
536EXTCONST unsigned char * PL_fold_latin1 = PL_fold;
537
1d72bdf6
NIS
538END_EXTERN_C
539
1e54db1a 540/* EBCDIC-happy ways of converting native code to UTF-8 */
1d72bdf6
NIS
541
542/* Native to iso-8859-1 */
543#define NATIVE_TO_ASCII(ch) PL_e2a[(U8)(ch)]
544#define ASCII_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
9df205e2
KW
545/* Transform after encoding, essentially converts to/from I8 */
546#define NATIVE_TO_UTF(ch) PL_e2utf[(U8)(ch)] /* to I8 */
547#define UTF_TO_NATIVE(ch) PL_utf2e[(U8)(ch)] /* from I8 */
1d72bdf6
NIS
548/* Transform in wide UV char space */
549#define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch))
c4d5f83a 550#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
1d72bdf6
NIS
551/* Transform in invariant..byte space */
552#define NATIVE_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch))
553#define ASCII_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch))
554
555/*
d06134e5 556 The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
1d72bdf6
NIS
557
558 Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
559 U+0000..U+007F 000000000xxxxxxx 0xxxxxxx
560 U+0080..U+009F 00000000100xxxxx 100xxxxx
561 U+00A0..U+00FF 00000000yyyxxxxx 11000yyy 101xxxxx
562
563 U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx
564 U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx
565 U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx
566 U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx
567 U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
568 U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
569
d06134e5 570 Note: The I8 transformation is valid for UCS-4 values X'0' to
1d72bdf6
NIS
571 X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space).
572
573 */
574
575#define UNISKIP(uv) ( (uv) < 0xA0 ? 1 : \
576 (uv) < 0x400 ? 2 : \
577 (uv) < 0x4000 ? 3 : \
578 (uv) < 0x40000 ? 4 : \
579 (uv) < 0x400000 ? 5 : \
580 (uv) < 0x4000000 ? 6 : 7 )
581
c4d5f83a
NIS
582
583#define UNI_IS_INVARIANT(c) ((c) < 0xA0)
d06134e5 584/* UTF-EBCDIC semantic macros - transform back into I8 and then compare */
1d72bdf6 585#define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) != 0xA0)
2b9d42f0 586#define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0)
1d72bdf6
NIS
587#define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0)
588#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xF8) == 0xC0)
589
22901f30
RGS
590#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
591#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
1d72bdf6
NIS
592#define UTF_CONTINUATION_MARK 0xA0
593#define UTF_CONTINUATION_MASK ((U8)0x1f)
594#define UTF_ACCUMULATION_SHIFT 5
595
e9a8c099
MHM
596/*
597 * Local variables:
598 * c-indentation-style: bsd
599 * c-basic-offset: 4
600 * indent-tabs-mode: t
601 * End:
602 *
603 * ex: set ts=8 sts=4 sw=4 noet:
604 */