This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utfebcdic.h: comment additions, fix typo
[perl5.git] / utfebcdic.h
CommitLineData
1d72bdf6
NIS
1/* utfebcdic.h
2 *
83706693
RGS
3 * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 by Larry Wall,
4 * Nick Ing-Simmons, and others
1d72bdf6
NIS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 * Macros to implement UTF-EBCDIC as perl's internal encoding
10 * Taken from version 7.1 of Unicode Techical Report #16:
11 * http://www.unicode.org/unicode/reports/tr16
fe749c9a
KW
12 *
13 * To summarize, the way it works is:
14 * To convert an EBCDIC character to UTF-EBCDIC:
d06134e5 15 * 1) convert to Unicode. The table in this file that does this for
fe749c9a
KW
16 * EBCDIC bytes is PL_e2a (with inverse PLa2e). The 'a' stands for
17 * ASCIIish, meaning latin1.
d06134e5
KW
18 * 2) convert that to a utf8-like string called I8 (I stands for
19 * intermediate) with variant characters occupying multiple bytes. This
20 * step is similar to the utf8-creating step from Unicode, but the details
21 * are different. This transformation is called UTF8-Mod. There is a
22 * chart about the bit patterns in a comment later in this file. But
fe749c9a
KW
23 * essentially here are the differences:
24 * UTF8 I8
25 * invariant byte starts with 0 starts with 0 or 100
26 * continuation byte starts with 10 starts with 101
27 * start byte same in both: if the code point requires N bytes,
28 * then the leading N bits are 1, followed by a 0. (No
29 * trailing 0 for the very largest possible allocation
30 * in I8, far beyond the current Unicode standard's
31 * max, as shown in the comment later in this file.)
32 * 3) Use the table published in tr16 to convert each byte from step 2 into
d06134e5
KW
33 * final UTF-EBCDIC. That table is reproduced in this file as PL_utf2e,
34 * and its inverse is PL_e2utf. They are constructed so that all EBCDIC
35 * invariants remain invariant, but no others do. For example, the
36 * ordinal value of 'A' is 193 in EBCDIC, and also is 193 in UTF-EBCDIC.
37 * Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 converts
38 * it back to 193. As an example of how a variant character works, take
39 * LATIN SMALL LETTER Y WITH DIAERESIS, which is typicially 0xDF in
40 * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2
41 * converts that to two bytes = 11000111 10111111 = C7 BF, and Step 3
42 * converts those to 0x8B 0x73. The table is constructed so that the
43 * first byte of the final form of a variant will always have its upper
44 * bit set (at least in the encodings that Perl recognizes, and probably
45 * all). But note that the upper bit of some invariants is also 1.
fe749c9a
KW
46 *
47 * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight
48 * EBCDIC, reverse the steps.
49 *
50 * The EBCDIC invariants have been chosen to be those characters whose Unicode
51 * equivalents have ordinal numbers less than 160, that is the same characters
52 * that are expressible in ASCII, plus the C1 controls. So there are 160
53 * invariants instead of the 128 in UTF-8. (My guess is that this is because
54 * the C1 control NEL (and maybe others) is important in IBM.)
55 *
56 * The purpose of Step 3 is to make the encoding be invariant for the chosen
57 * characters. This messes up the convenient patterns found in step 2, so
58 * generally, one has to undo step 3 into a temporary to use them. However,
59 * a "shadow", or parallel table, PL_utf8skip, has been constructed so that for
60 * each byte, it says how long the sequence is if that byte were to begin it
61 *
62 * There are actually 3 slightly different UTF-EBCDIC encodings in this file,
63 * one for each of the code pages recognized by Perl. That means that there
64 * are actually three different sets of tables, one for each code page. (If
d06134e5
KW
65 * Perl is compiled on platforms using another EBCDIC code page, it may not
66 * compile, or Perl may silently mistake it for one of the three.)
fe749c9a
KW
67 *
68 * EBCDIC characters above 0xFF are the same as Unicode in Perl's
69 * implementation of all 3 encodings, so for those Step 1 is trivial.
70 *
71 * (Note that the entries for invariant characters are necessarily the same in
72 * PL_e2a and PLe2f, and the same for their inverses.)
73 *
74 * UTF-EBCDIC strings are the same length or longer than UTF-8 representations
75 * of the same string. The maximum code point representable as 2 bytes in
76 * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8.
1d72bdf6
NIS
77 */
78
79START_EXTERN_C
80
81#ifdef DOINIT
82/* Indexed by encoded byte this table gives the length of the sequence.
83 Adapted from the shadow flags table in tr16.
9df205e2 84 The entries marked 9 in tr16 are continuation bytes and are marked
c4d5f83a 85 as length 1 here so that we can recover.
1d72bdf6 86*/
f5e1abaf 87#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
1d72bdf6
NIS
88EXTCONST unsigned char PL_utf8skip[] = {
891,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
901,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
911,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
921,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
c4d5f83a
NIS
931,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
941,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
951,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
961,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1d72bdf6
NIS
972,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
982,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
992,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2,
1002,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3,
1011,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1021,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1031,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1041,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
105};
f5e1abaf
JH
106#endif
107
108#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
109unsigned char PL_utf8skip[] = {
1101,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1111,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1121,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1131,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1141,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1151,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1161,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1171,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1,
1182,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1192,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1202,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1211,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3,
1224,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1231,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4,
1247,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1251,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1
126};
127#endif
128
129#if '^' == 176 /* if defined(??) (OS/400?) 037 */
130unsigned char PL_utf8skip[] = {
1311,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1321,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1331,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1341,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1351,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1361,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,
1371,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1381,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
1392,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1402,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
1412,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
1421,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3,
1431,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
1441,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
1451,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
1461,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
147};
148#endif
1d72bdf6 149
fe749c9a
KW
150/* Transform tables from tr16 applied after encoding to render encoding EBCDIC
151 * like, meaning that all the invariants are actually invariant, eg, that 'A'
152 * remains 'A' */
1d72bdf6 153
f5e1abaf 154#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
d06134e5 155EXTCONST unsigned char PL_utf2e[] = { /* I8 to EBCDIC (IBM-1047) */
1d72bdf6
NIS
156 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
157 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
158 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
159 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
160 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
161 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
162 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
163 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
164 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
165 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
166 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
167 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
168 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
169 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
170 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 171 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
1d72bdf6
NIS
172};
173
d06134e5 174EXTCONST unsigned char PL_e2utf[] = { /* EBCDIC (IBM-1047) to I8 */
1d72bdf6
NIS
175 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
176 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
177 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
178 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
179 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
180 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
181 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
182 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
183 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
184 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
185 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8,
186 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7,
187 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
188 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
189 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 190 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
1d72bdf6 191};
f5e1abaf
JH
192#endif /* 1047 */
193
194#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
d06134e5 195unsigned char PL_utf2e[] = { /* I8 to EBCDIC (POSIX-BC) */
f5e1abaf
JH
196 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
197 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
198 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
199 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
200 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
201 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
202 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
203 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
204 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
205 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
206 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
207 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73,
208 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
209 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
210 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 211 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE
f5e1abaf
JH
212};
213
d06134e5 214unsigned char PL_e2utf[] = { /* EBCDIC (POSIX-BC) to I8 */
f5e1abaf
JH
215 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
216 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
217 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
218 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
219 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
220 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
221 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
222 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
223 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
224 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
225 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
226 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7,
227 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
228 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3,
229 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 230 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E
f5e1abaf
JH
231};
232#endif /* POSIX-BC */
233
234#if '^' == 176 /* if defined(??) (OS/400?) 037 */
d06134e5 235unsigned char PL_utf2e[] = { /* I8 to EBCDIC (IBM-037) */
f5e1abaf
JH
236 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
237 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
238 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
239 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
240 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
241 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
242 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
243 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
244 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
245 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
246 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
247 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
248 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
249 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
250 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
cbbb00c6 251 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
f5e1abaf
JH
252};
253
d06134e5 254unsigned char PL_e2utf[] = { /* EBCDIC (IBM-037) to I8 */
f5e1abaf
JH
255 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
256 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
257 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
258 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
259 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
260 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9,
261 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
262 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
263 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
264 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
265 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
266 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7,
267 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
268 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
269 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
cbbb00c6 270 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
f5e1abaf
JH
271};
272#endif /* 037 */
1d72bdf6
NIS
273
274/* These tables moved from perl.h and converted to hex.
275 They map platfrom code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
276*/
277
278#if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
279EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */
280 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
281 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
282 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
283 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
284 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
285 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
286 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
287 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
288 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
289 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
290 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
291 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
292 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
293 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
294 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
295 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
296};
297
5cd46e1f
KW
298#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
299#define LATIN_SMALL_LETTER_SHARP_S 0x59
300#define MICRO_SIGN 0xA0
301
431fc773 302EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
1d72bdf6
NIS
303 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
304 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
305 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
306 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
307 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
308 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
309 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
310 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
311 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
312 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
313 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE,
314 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7,
315 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
316 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
317 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
318 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
319};
d02f4dad
KW
320
321EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
322 'a'; 'a' => 'A' */
323 0, 1, 2, 3, 4, 5, 6, 7,
324 8, 9, 10, 11, 12, 13, 14, 15,
325 16, 17, 18, 19, 20, 21, 22, 23,
326 24, 25, 26, 27, 28, 29, 30, 31,
327 32, 33, 34, 35, 36, 37, 38, 39,
328 40, 41, 42, 43, 44, 45, 46, 47,
329 48, 49, 50, 51, 52, 53, 54, 55,
330 56, 57, 58, 59, 60, 61, 62, 63,
331 64, 65, 98, 99, 100, 101, 102, 103,
332 104, 105, 74, 75, 76, 77, 78, 79,
333 80, 113, 114, 115, 116, 117, 118, 119,
334 120, 89, 90, 91, 92, 93, 94, 95,
335 96, 97, 66, 67, 68, 69, 70, 71,
336 72, 73, 106, 107, 108, 109, 110, 111,
337 128, 81, 82, 83, 84, 85, 86, 87,
338 88, 121, 122, 123, 124, 125, 126, 127,
339 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
340 'H', 'I', 138, 139, 172, 186, 174, 143,
341 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
342 'Q', 'R', 154, 155, 158, 157, 156, 159,
343 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
344 'Y', 'Z', 170, 171, 140, 173, 142, 175,
345 176, 177, 178, 179, 180, 181, 182, 183,
346 184, 185, 141, 187, 188, 189, 190, 191,
347 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
348 'h', 'i', 202, 235, 236, 237, 238, 239,
349 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
350 'q', 'r', 218, 251, 252, 253, 254, 223,
351 224, 225, 's', 't', 'u', 'v', 'w', 'x',
352 'y', 'z', 234, 203, 204, 205, 206, 207,
353 240, 241, 242, 243, 244, 245, 246, 247,
354 248, 249, 250, 219, 220, 221, 222, 255
355};
1d72bdf6
NIS
356#endif /* 1047 */
357
358#if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
359EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */
360 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
361 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
362 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
363 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
364 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
365 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
366 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
367 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
368 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
369 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
370 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1,
371 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
372 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
373 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59,
374 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
375 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
376};
377
5cd46e1f
KW
378#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
379#define LATIN_SMALL_LETTER_SHARP_S 0x59
380#define MICRO_SIGN 0xA0
381
1d72bdf6
NIS
382EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
383 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
384 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
385 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
386 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
387 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
388 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
389 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
390 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
391 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
392 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
393 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
394 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7,
395 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
396 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF,
397 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
398 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E
399};
d02f4dad
KW
400
401EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
402 'a'; 'a' => 'A' */
403 0, 1, 2, 3, 4, 5, 6, 7,
404 8, 9, 10, 11, 12, 13, 14, 15,
405 16, 17, 18, 19, 20, 21, 22, 23,
406 24, 25, 26, 27, 28, 29, 30, 31,
407 32, 33, 34, 35, 36, 37, 38, 39,
408 40, 41, 42, 43, 44, 45, 46, 47,
409 48, 49, 50, 51, 52, 53, 54, 55,
410 56, 57, 58, 59, 60, 61, 62, 63,
411 64, 65, 98, 99, 100, 101, 102, 103,
412 104, 105, 74, 75, 76, 77, 78, 79,
413 80, 113, 114, 115, 116, 117, 118, 119,
414 120, 89, 90, 91, 92, 93, 94, 95,
415 96, 97, 66, 67, 68, 69, 70, 71,
416 72, 73, 106, 107, 108, 109, 110, 111,
417 128, 81, 82, 83, 84, 85, 86, 87,
418 88, 121, 122, 123, 124, 125, 126, 127,
419 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
420 'H', 'I', 138, 139, 172, 173, 174, 143,
421 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
422 'Q', 'R', 154, 155, 158, 157, 156, 159,
423 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
424 'Y', 'Z', 170, 171, 140, 141, 142, 175,
425 176, 177, 178, 179, 180, 181, 182, 183,
426 184, 185, 186, 187, 188, 189, 190, 191,
427 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
428 'h', 'i', 202, 235, 236, 237, 238, 239,
429 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
430 'q', 'r', 218, 221, 252, 219, 254, 223,
431 192, 225, 's', 't', 'u', 'v', 'w', 'x',
432 'y', 'z', 234, 203, 204, 205, 206, 207,
433 240, 241, 242, 243, 244, 245, 246, 247,
434 248, 249, 250, 251, 220, 253, 222, 255
435};
1d72bdf6 436#endif /* POSIX-BC */
f5e1abaf 437
1d72bdf6
NIS
438#if '^' == 176 /* if defined(??) (OS/400?) 037 */
439EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */
440 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
441 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
442 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
443 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
444 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
445 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
446 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
447 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
448 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
449 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
450 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC,
451 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
452 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
453 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59,
454 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
455 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
456};
457
5cd46e1f
KW
458
459#define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF
460#define LATIN_SMALL_LETTER_SHARP_S 0x59
461#define MICRO_SIGN 0xA0
462
1d72bdf6
NIS
463EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
464 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
465 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
466 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
467 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
468 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
469 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
470 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
471 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
472 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
473 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
474 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
475 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
476 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
477 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
478 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
479 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
480};
d02f4dad
KW
481
482EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
483 'a'; 'a' => 'A' */
484 0, 1, 2, 3, 4, 5, 6, 7,
485 8, 9, 10, 11, 12, 13, 14, 15,
486 16, 17, 18, 19, 20, 21, 22, 23,
487 24, 25, 26, 27, 28, 29, 30, 31,
488 32, 33, 34, 35, 36, 37, 38, 39,
489 40, 41, 42, 43, 44, 45, 46, 47,
490 48, 49, 50, 51, 52, 53, 54, 55,
491 56, 57, 58, 59, 60, 61, 62, 63,
492 64, 65, 98, 99, 100, 101, 102, 103,
493 104, 105, 74, 75, 76, 77, 78, 79,
494 80, 113, 114, 115, 116, 117, 118, 119,
495 120, 89, 90, 91, 92, 93, 94, 95,
496 96, 97, 66, 67, 68, 69, 70, 71,
497 72, 73, 106, 107, 108, 109, 110, 111,
498 128, 81, 82, 83, 84, 85, 86, 87,
499 88, 121, 122, 123, 124, 125, 126, 127,
500 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
501 'H', 'I', 138, 139, 172, 173, 174, 143,
502 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
503 'Q', 'R', 154, 155, 158, 157, 156, 159,
504 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
505 'Y', 'Z', 170, 171, 140, 141, 142, 175,
506 176, 177, 178, 179, 180, 181, 182, 183,
507 184, 185, 186, 187, 188, 189, 190, 191,
508 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
509 'h', 'i', 202, 235, 236, 237, 238, 239,
510 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
511 'q', 'r', 218, 251, 252, 253, 254, 223,
512 224, 225, 's', 't', 'u', 'v', 'w', 'x',
513 'y', 'z', 234, 203, 204, 205, 206, 207,
514 240, 241, 242, 243, 244, 245, 246, 247,
515 248, 249, 250, 219, 220, 221, 222, 255
516};
1d72bdf6
NIS
517#endif /* 037 */
518
519#else
520EXTCONST unsigned char PL_utf8skip[];
521EXTCONST unsigned char PL_e2utf[];
522EXTCONST unsigned char PL_utf2e[];
523EXTCONST unsigned char PL_e2a[];
524EXTCONST unsigned char PL_a2e[];
d02f4dad 525EXTCONST unsigned char PL_fold[];
1d72bdf6
NIS
526#endif
527
d02f4dad
KW
528/* Since the EBCDIC code pages are isomorphic to Latin1, that table is merely a
529 * duplicate */
530EXTCONST unsigned char * PL_fold_latin1 = PL_fold;
531
1d72bdf6
NIS
532END_EXTERN_C
533
1e54db1a 534/* EBCDIC-happy ways of converting native code to UTF-8 */
1d72bdf6
NIS
535
536/* Native to iso-8859-1 */
537#define NATIVE_TO_ASCII(ch) PL_e2a[(U8)(ch)]
538#define ASCII_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
9df205e2
KW
539/* Transform after encoding, essentially converts to/from I8 */
540#define NATIVE_TO_UTF(ch) PL_e2utf[(U8)(ch)] /* to I8 */
541#define UTF_TO_NATIVE(ch) PL_utf2e[(U8)(ch)] /* from I8 */
1d72bdf6
NIS
542/* Transform in wide UV char space */
543#define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch))
c4d5f83a 544#define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
1d72bdf6
NIS
545/* Transform in invariant..byte space */
546#define NATIVE_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch))
547#define ASCII_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch))
548
549/*
d06134e5 550 The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
1d72bdf6
NIS
551
552 Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
553 U+0000..U+007F 000000000xxxxxxx 0xxxxxxx
554 U+0080..U+009F 00000000100xxxxx 100xxxxx
555 U+00A0..U+00FF 00000000yyyxxxxx 11000yyy 101xxxxx
556
557 U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx
558 U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx
559 U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx
560 U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx
561 U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
562 U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
563
d06134e5 564 Note: The I8 transformation is valid for UCS-4 values X'0' to
1d72bdf6
NIS
565 X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space).
566
567 */
568
569#define UNISKIP(uv) ( (uv) < 0xA0 ? 1 : \
570 (uv) < 0x400 ? 2 : \
571 (uv) < 0x4000 ? 3 : \
572 (uv) < 0x40000 ? 4 : \
573 (uv) < 0x400000 ? 5 : \
574 (uv) < 0x4000000 ? 6 : 7 )
575
c4d5f83a
NIS
576
577#define UNI_IS_INVARIANT(c) ((c) < 0xA0)
d06134e5 578/* UTF-EBCDIC semantic macros - transform back into I8 and then compare */
1d72bdf6 579#define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) != 0xA0)
2b9d42f0 580#define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0)
1d72bdf6
NIS
581#define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0)
582#define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xF8) == 0xC0)
583
22901f30
RGS
584#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
585#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
1d72bdf6
NIS
586#define UTF_CONTINUATION_MARK 0xA0
587#define UTF_CONTINUATION_MASK ((U8)0x1f)
588#define UTF_ACCUMULATION_SHIFT 5
589
e9a8c099
MHM
590/*
591 * Local variables:
592 * c-indentation-style: bsd
593 * c-basic-offset: 4
594 * indent-tabs-mode: t
595 * End:
596 *
597 * ex: set ts=8 sts=4 sw=4 noet:
598 */