3 * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009,
4 * 2010, 2011 by Larry Wall, Nick Ing-Simmons, and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
9 * Macros to implement UTF-EBCDIC as perl's internal encoding
10 * Taken from version 7.1 of Unicode Technical Report #16:
11 * http://www.unicode.org/unicode/reports/tr16
13 * To summarize, the way it works is:
14 * To convert an EBCDIC character to UTF-EBCDIC:
15 * 1) convert to Unicode. The table in this file that does this for
16 * EBCDIC bytes is PL_e2a (with inverse PLa2e). The 'a' stands for
17 * ASCIIish, meaning latin1.
18 * 2) convert that to a utf8-like string called I8 (I stands for
19 * intermediate) with variant characters occupying multiple bytes. This
20 * step is similar to the utf8-creating step from Unicode, but the details
21 * are different. This transformation is called UTF8-Mod. There is a
22 * chart about the bit patterns in a comment later in this file. But
23 * essentially here are the differences:
25 * invariant byte starts with 0 starts with 0 or 100
26 * continuation byte starts with 10 starts with 101
27 * start byte same in both: if the code point requires N bytes,
28 * then the leading N bits are 1, followed by a 0. (No
29 * trailing 0 for the very largest possible allocation
30 * in I8, far beyond the current Unicode standard's
31 * max, as shown in the comment later in this file.)
32 * 3) Use the table published in tr16 to convert each byte from step 2 into
33 * final UTF-EBCDIC. That table is reproduced in this file as PL_utf2e,
34 * and its inverse is PL_e2utf. They are constructed so that all EBCDIC
35 * invariants remain invariant, but no others do. For example, the
36 * ordinal value of 'A' is 193 in EBCDIC, and also is 193 in UTF-EBCDIC.
37 * Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 converts
38 * it back to 193. As an example of how a variant character works, take
39 * LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in
40 * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2
41 * converts that to two bytes = 11000111 10111111 = C7 BF, and Step 3
42 * converts those to 0x8B 0x73. The table is constructed so that the
43 * first byte of the final form of a variant will always have its upper
44 * bit set (at least in the encodings that Perl recognizes, and probably
45 * all). But note that the upper bit of some invariants is also 1.
47 * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight
48 * EBCDIC, reverse the steps.
50 * The EBCDIC invariants have been chosen to be those characters whose Unicode
51 * equivalents have ordinal numbers less than 160, that is the same characters
52 * that are expressible in ASCII, plus the C1 controls. So there are 160
53 * invariants instead of the 128 in UTF-8. (My guess is that this is because
54 * the C1 control NEL (and maybe others) is important in IBM.)
56 * The purpose of Step 3 is to make the encoding be invariant for the chosen
57 * characters. This messes up the convenient patterns found in step 2, so
58 * generally, one has to undo step 3 into a temporary to use them. However,
59 * a "shadow", or parallel table, PL_utf8skip, has been constructed so that for
60 * each byte, it says how long the sequence is if that byte were to begin it
62 * There are actually 3 slightly different UTF-EBCDIC encodings in this file,
63 * one for each of the code pages recognized by Perl. That means that there
64 * are actually three different sets of tables, one for each code page. (If
65 * Perl is compiled on platforms using another EBCDIC code page, it may not
66 * compile, or Perl may silently mistake it for one of the three.)
68 * EBCDIC characters above 0xFF are the same as Unicode in Perl's
69 * implementation of all 3 encodings, so for those Step 1 is trivial.
71 * (Note that the entries for invariant characters are necessarily the same in
72 * PL_e2a and PLe2f, and the same for their inverses.)
74 * UTF-EBCDIC strings are the same length or longer than UTF-8 representations
75 * of the same string. The maximum code point representable as 2 bytes in
76 * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8.
82 /* Indexed by encoded byte this table gives the length of the sequence.
83 Adapted from the shadow flags table in tr16.
84 The entries marked 9 in tr16 are continuation bytes and are marked
85 as length 1 here so that we can recover.
87 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
88 EXTCONST unsigned char PL_utf8skip[] = {
89 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
90 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
91 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
92 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
93 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
94 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
95 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
96 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
97 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
98 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
99 2,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2,
100 2,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3,
101 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
102 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
103 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
104 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
108 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
109 unsigned char PL_utf8skip[] = {
110 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
111 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
112 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
113 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
114 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
115 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
116 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
117 1,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1,
118 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
119 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
120 2,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
121 1,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3,
122 4,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
123 1,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4,
124 7,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
125 1,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1
129 #if '^' == 176 /* if defined(??) (OS/400?) 037 */
130 unsigned char PL_utf8skip[] = {
131 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
132 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
133 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
134 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
135 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
136 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,
137 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
138 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
139 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
140 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
141 2,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
142 1,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3,
143 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
144 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
145 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
146 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
150 /* Transform tables from tr16 applied after encoding to render encoding EBCDIC
151 * like, meaning that all the invariants are actually invariant, eg, that 'A'
154 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
155 EXTCONST unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-1047) */
156 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
157 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
158 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
159 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
160 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
161 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
162 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
163 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
164 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
165 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
166 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
167 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
168 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
169 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
170 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
171 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
174 EXTCONST unsigned char PL_e2utf[] = { /* UTFEBCDIC (IBM-1047) to I8 */
175 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
176 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
177 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
178 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
179 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
180 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
181 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
182 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
183 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
184 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
185 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8,
186 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7,
187 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
188 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
189 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
190 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
194 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
195 unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (POSIX-BC) */
196 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
197 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
198 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
199 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
200 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
201 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
202 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
203 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
204 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
205 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
206 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
207 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73,
208 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
209 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
210 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
211 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE
214 unsigned char PL_e2utf[] = { /* UTFEBCDIC (POSIX-BC) to I8 */
215 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
216 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
217 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
218 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
219 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
220 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
221 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
222 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
223 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
224 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
225 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
226 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7,
227 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
228 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3,
229 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
230 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E
232 #endif /* POSIX-BC */
234 #if '^' == 176 /* if defined(??) (OS/400?) 037 */
235 unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-037) */
236 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
237 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
238 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
239 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
240 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
241 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
242 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
243 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
244 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
245 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
246 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
247 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
248 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
249 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
250 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
251 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
254 unsigned char PL_e2utf[] = { /* UTFEBCDIC (IBM-037) to I8 */
255 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
256 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
257 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
258 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
259 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
260 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9,
261 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
262 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
263 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
264 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
265 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
266 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7,
267 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
268 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
269 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
270 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
274 /* These tables moved from perl.h and converted to hex.
275 They map platform code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
278 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
279 EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */
280 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
281 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
282 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
283 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
284 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
285 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
286 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
287 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
288 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
289 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
290 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
291 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
292 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
293 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
294 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
295 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
298 EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
299 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
300 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
301 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
302 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
303 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
304 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
305 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
306 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
307 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
308 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
309 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE,
310 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7,
311 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
312 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
313 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
314 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
317 EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
319 0, 1, 2, 3, 4, 5, 6, 7,
320 8, 9, 10, 11, 12, 13, 14, 15,
321 16, 17, 18, 19, 20, 21, 22, 23,
322 24, 25, 26, 27, 28, 29, 30, 31,
323 32, 33, 34, 35, 36, 37, 38, 39,
324 40, 41, 42, 43, 44, 45, 46, 47,
325 48, 49, 50, 51, 52, 53, 54, 55,
326 56, 57, 58, 59, 60, 61, 62, 63,
327 64, 65, 98, 99, 100, 101, 102, 103,
328 104, 105, 74, 75, 76, 77, 78, 79,
329 80, 113, 114, 115, 116, 117, 118, 119,
330 120, 89, 90, 91, 92, 93, 94, 95,
331 96, 97, 66, 67, 68, 69, 70, 71,
332 72, 73, 106, 107, 108, 109, 110, 111,
333 128, 81, 82, 83, 84, 85, 86, 87,
334 88, 121, 122, 123, 124, 125, 126, 127,
335 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
336 'H', 'I', 138, 139, 172, 186, 174, 143,
337 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
338 'Q', 'R', 154, 155, 158, 157, 156, 159,
339 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
340 'Y', 'Z', 170, 171, 140, 173, 142, 175,
341 176, 177, 178, 179, 180, 181, 182, 183,
342 184, 185, 141, 187, 188, 189, 190, 191,
343 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
344 'h', 'i', 202, 235, 236, 237, 238, 239,
345 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
346 'q', 'r', 218, 251, 252, 253, 254, 223,
347 224, 225, 's', 't', 'u', 'v', 'w', 'x',
348 'y', 'z', 234, 203, 204, 205, 206, 207,
349 240, 241, 242, 243, 244, 245, 246, 247,
350 248, 249, 250, 219, 220, 221, 222, 255
354 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
355 EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */
356 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
357 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
358 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
359 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
360 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
361 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
362 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
363 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
364 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
365 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
366 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1,
367 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
368 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
369 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59,
370 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
371 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
374 EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
375 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
376 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
377 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
378 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
379 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
380 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
381 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
382 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
383 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
384 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
385 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
386 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7,
387 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
388 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF,
389 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
390 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E
393 EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
395 0, 1, 2, 3, 4, 5, 6, 7,
396 8, 9, 10, 11, 12, 13, 14, 15,
397 16, 17, 18, 19, 20, 21, 22, 23,
398 24, 25, 26, 27, 28, 29, 30, 31,
399 32, 33, 34, 35, 36, 37, 38, 39,
400 40, 41, 42, 43, 44, 45, 46, 47,
401 48, 49, 50, 51, 52, 53, 54, 55,
402 56, 57, 58, 59, 60, 61, 62, 63,
403 64, 65, 98, 99, 100, 101, 102, 103,
404 104, 105, 74, 75, 76, 77, 78, 79,
405 80, 113, 114, 115, 116, 117, 118, 119,
406 120, 89, 90, 91, 92, 93, 94, 95,
407 96, 97, 66, 67, 68, 69, 70, 71,
408 72, 73, 106, 107, 108, 109, 110, 111,
409 128, 81, 82, 83, 84, 85, 86, 87,
410 88, 121, 122, 123, 124, 125, 126, 127,
411 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
412 'H', 'I', 138, 139, 172, 173, 174, 143,
413 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
414 'Q', 'R', 154, 155, 158, 157, 156, 159,
415 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
416 'Y', 'Z', 170, 171, 140, 141, 142, 175,
417 176, 177, 178, 179, 180, 181, 182, 183,
418 184, 185, 186, 187, 188, 189, 190, 191,
419 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
420 'h', 'i', 202, 235, 236, 237, 238, 239,
421 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
422 'q', 'r', 218, 221, 252, 219, 254, 223,
423 192, 225, 's', 't', 'u', 'v', 'w', 'x',
424 'y', 'z', 234, 203, 204, 205, 206, 207,
425 240, 241, 242, 243, 244, 245, 246, 247,
426 248, 249, 250, 251, 220, 253, 222, 255
428 #endif /* POSIX-BC */
430 #if '^' == 176 /* if defined(??) (OS/400?) 037 */
431 EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */
432 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
433 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
434 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
435 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
436 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
437 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
438 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
439 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
440 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
441 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
442 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC,
443 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
444 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
445 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59,
446 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
447 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
450 EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
451 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
452 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
453 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
454 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
455 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
456 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
457 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
458 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
459 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
460 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
461 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
462 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
463 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
464 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
465 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
466 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
469 EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
471 0, 1, 2, 3, 4, 5, 6, 7,
472 8, 9, 10, 11, 12, 13, 14, 15,
473 16, 17, 18, 19, 20, 21, 22, 23,
474 24, 25, 26, 27, 28, 29, 30, 31,
475 32, 33, 34, 35, 36, 37, 38, 39,
476 40, 41, 42, 43, 44, 45, 46, 47,
477 48, 49, 50, 51, 52, 53, 54, 55,
478 56, 57, 58, 59, 60, 61, 62, 63,
479 64, 65, 98, 99, 100, 101, 102, 103,
480 104, 105, 74, 75, 76, 77, 78, 79,
481 80, 113, 114, 115, 116, 117, 118, 119,
482 120, 89, 90, 91, 92, 93, 94, 95,
483 96, 97, 66, 67, 68, 69, 70, 71,
484 72, 73, 106, 107, 108, 109, 110, 111,
485 128, 81, 82, 83, 84, 85, 86, 87,
486 88, 121, 122, 123, 124, 125, 126, 127,
487 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
488 'H', 'I', 138, 139, 172, 173, 174, 143,
489 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
490 'Q', 'R', 154, 155, 158, 157, 156, 159,
491 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
492 'Y', 'Z', 170, 171, 140, 141, 142, 175,
493 176, 177, 178, 179, 180, 181, 182, 183,
494 184, 185, 186, 187, 188, 189, 190, 191,
495 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
496 'h', 'i', 202, 235, 236, 237, 238, 239,
497 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
498 'q', 'r', 218, 251, 252, 253, 254, 223,
499 224, 225, 's', 't', 'u', 'v', 'w', 'x',
500 'y', 'z', 234, 203, 204, 205, 206, 207,
501 240, 241, 242, 243, 244, 245, 246, 247,
502 248, 249, 250, 219, 220, 221, 222, 255
506 /* Since the EBCDIC code pages are isomorphic to Latin1, that table is merely a
508 EXTCONST unsigned char * PL_fold_latin1 = PL_fold;
511 EXTCONST unsigned char PL_utf8skip[];
512 EXTCONST unsigned char PL_e2utf[];
513 EXTCONST unsigned char PL_utf2e[];
514 EXTCONST unsigned char PL_e2a[];
515 EXTCONST unsigned char PL_a2e[];
516 EXTCONST unsigned char PL_fold[];
517 EXTCONST unsigned char * PL_fold_latin1;
522 /* EBCDIC-happy ways of converting native code to UTF-8 */
524 #define NATIVE_TO_LATIN1(ch) PL_e2a[(U8)(ch)]
525 #define LATIN1_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
527 #define NATIVE_UTF8_TO_I8(ch) (ch) PL_e2utf[(U8)(ch)]
528 #define I8_TO_NATIVE_UTF8(ch) (ch) PL_utf2e[(U8)(ch)]
530 /* Transforms in wide UV chars */
531 #define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))
532 #define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : LATIN1_TO_NATIVE(ch))
535 The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
537 Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
538 U+0000..U+007F 000000000xxxxxxx 0xxxxxxx
539 U+0080..U+009F 00000000100xxxxx 100xxxxx
540 U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx
541 U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx
542 U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx
543 U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx
544 U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
545 U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
547 Note: The I8 transformation is valid for UCS-4 values X'0' to
548 X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space).
552 #define UNISKIP(uv) ( (uv) < 0xA0 ? 1 : \
554 (uv) < 0x4000 ? 3 : \
555 (uv) < 0x40000 ? 4 : \
556 (uv) < 0x400000 ? 5 : \
557 (uv) < 0x4000000 ? 6 : 7 )
559 #define UNI_IS_INVARIANT(c) ((c) < 0xA0)
561 /* UTF-EBCDIC semantic macros - transform back into I8 and then compare
562 * Comments as to the meaning of each are given at their corresponding utf8.h
565 #define UTF8_IS_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
566 && NATIVE_UTF8_TO_I8(c) != 0xE0)
567 #define UTF8_IS_CONTINUATION(c) ((NATIVE_UTF8_TO_I8(c) & 0xE0) == 0xA0)
568 #define UTF8_IS_CONTINUED(c) (NATIVE_UTF8_TO_I8(c) >= 0xA0)
569 #define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
570 && NATIVE_UTF8_TO_I8(c) <= 0xC7)
571 #define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_UTF8_TO_I8(c) >= 0xC8)
573 #define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
574 #define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
575 #define UTF_CONTINUATION_MARK 0xA0
576 #define UTF_CONTINUATION_MASK ((U8)0x1f)
577 #define UTF_ACCUMULATION_SHIFT 5
579 /* How wide can a single UTF-8 encoded character become in bytes. */
580 /* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
581 * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
582 * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode
583 * non-negative integers in a binary format, even those above Unicode */
584 #define UTF8_MAXBYTES 7
586 /* The maximum number of UTF-8 bytes a single Unicode character can
587 * uppercase/lowercase/fold into. Unicode guarantees that the maximum
588 * expansion is 3 characters. On EBCDIC platforms, the highest Unicode
589 * character occupies 5 bytes, therefore this number is 15 */
590 #define UTF8_MAXBYTES_CASE 15
594 * c-indentation-style: bsd
596 * indent-tabs-mode: nil
599 * ex: set ts=8 sts=4 sw=4 et: