3 * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009,
4 * 2010, 2011 by Larry Wall, Nick Ing-Simmons, and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
9 * Macros to implement UTF-EBCDIC as perl's internal encoding
10 * Adapted from version 7.1 of Unicode Technical Report #16:
11 * http://www.unicode.org/unicode/reports/tr16
13 * To summarize, the way it works is:
14 * To convert an EBCDIC character to UTF-EBCDIC:
15 * 1) convert to Unicode. The table in this file that does this for
16 * EBCDIC bytes is PL_e2a (with inverse PL_a2e). The 'a' stands for
17 * ASCII platform, meaning latin1.
18 * 2) convert that to a utf8-like string called I8 ('I' stands for
19 * intermediate) with variant characters occupying multiple bytes. This
20 * step is similar to the utf8-creating step from Unicode, but the details
21 * are different. This transformation is called UTF8-Mod. There is a
22 * chart about the bit patterns in a comment later in this file. But
23 * essentially here are the differences:
25 * invariant byte starts with 0 starts with 0 or 100
26 * continuation byte starts with 10 starts with 101
27 * start byte same in both: if the code point requires N bytes,
28 * then the leading N bits are 1, followed by a 0. (No
29 * trailing 0 for the very largest possible allocation
30 * in I8, far beyond the current Unicode standard's
31 * max, as shown in the comment later in this file.)
32 * 3) Use the algorithm in tr16 to convert each byte from step 2 into
33 * final UTF-EBCDIC. This is done by table lookup from a table
34 * constructed from the algorithm, reproduced in this file as
35 * PL_utf2e, with its inverse being PL_e2utf. They are constructed so that
36 * all EBCDIC invariants remain invariant, but no others do, and the first
37 * byte of a variant will always have its upper bit set. But note that
38 * the upper bit of some invariants is also 1.
40 * For example, the ordinal value of 'A' is 193 in EBCDIC, and also is 193 in
41 * UTF-EBCDIC. Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3
42 * converts it back to 193. As an example of how a variant character works,
43 * take LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in
44 * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2 converts
45 * that to two bytes = 11000111 10111111 = C7 BF, and Step 3 converts those to
48 * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight
49 * EBCDIC, reverse the steps.
51 * The EBCDIC invariants have been chosen to be those characters whose Unicode
52 * equivalents have ordinal numbers less than 160, that is the same characters
53 * that are expressible in ASCII, plus the C1 controls. So there are 160
54 * invariants instead of the 128 in UTF-8. (My guess is that this is because
55 * the C1 control NEL (and maybe others) is important in IBM.)
57 * The purpose of Step 3 is to make the encoding be invariant for the chosen
58 * characters. This messes up the convenient patterns found in step 2, so
59 * generally, one has to undo step 3 into a temporary to use them. However,
60 * one "shadow", or parallel table, PL_utf8skip, has been constructed that
61 * doesn't require undoing things. It is such that for each byte, it says
62 * how long the sequence is if that (UTF-EBCDIC) byte were to begin it
64 * There are actually 3 slightly different UTF-EBCDIC encodings in
65 * this file, one for each of the code pages recognized by Perl. That
66 * means that there are actually three different sets of tables, one for each
67 * code page. (If Perl is compiled on platforms using another EBCDIC code
68 * page, it may not compile, or Perl may silently mistake it for one of the
71 * Note that tr16 actually only specifies one version of UTF-EBCDIC, based on
72 * the 1047 encoding, and which is supposed to be used for all code pages.
73 * But this doesn't work. To illustrate the problem, consider the '^' character.
74 * On a 037 code page it is the single byte 176, whereas under 1047 UTF-EBCDIC
75 * it is the single byte 95. If Perl implemented tr16 exactly, it would mean
76 * that changing a string containing '^' to UTF-EBCDIC would change that '^'
77 * from 176 to 95 (and vice-versa), violating the rule that ASCII-range
78 * characters are the same in UTF-8 or not. Much code in Perl assumes this
79 * rule. See for example
80 * http://grokbase.com/t/perl/mvs/025xf0yhmn/utf-ebcdic-for-posix-bc-malformed-utf-8-character
81 * What Perl does is create a version of UTF-EBCDIC suited to each code page;
82 * the one for the 1047 code page is identical to what's specified in tr16.
83 * This complicates interchanging files between computers using different code
84 * pages. Best is to convert to I8 before sending them, as the I8
85 * representation is the same no matter what the underlying code page is.
87 * EBCDIC characters above 0xFF are the same as Unicode in Perl's
88 * implementation of all 3 encodings, so for those Step 1 is trivial.
90 * (Note that the entries for invariant characters are necessarily the same in
91 * PL_e2a and PL_e2utf; likewise for their inverses.)
93 * UTF-EBCDIC strings are the same length or longer than UTF-8 representations
94 * of the same string. The maximum code point representable as 2 bytes in
95 * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8.
101 /* Indexed by encoded byte this table gives the length of the sequence.
102 Adapted from the shadow flags table in tr16.
103 The entries marked 9 in tr16 are continuation bytes and are marked
104 as length 1 here so that we can recover.
106 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
107 EXTCONST U8 PL_utf8skip[] = {
108 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
109 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
110 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
111 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
112 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
113 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
114 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
115 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
116 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
117 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
118 2,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2,
119 2,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3,
120 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
121 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
122 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
123 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
127 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
129 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
130 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
131 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
132 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
133 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
134 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
135 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
136 1,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1,
137 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
138 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
139 2,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
140 1,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3,
141 4,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
142 1,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4,
143 7,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
144 1,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1
148 #if '^' == 176 /* if defined(??) (OS/400?) 037 */
150 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
151 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
152 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
153 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
154 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
155 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,
156 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
157 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,
158 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
159 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
160 2,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2,
161 1,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3,
162 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,
163 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4,
164 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5,
165 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1
169 /* Transform tables from tr16 applied after encoding to render encoding EBCDIC
170 * like, meaning that all the invariants are actually invariant, eg, that 'A'
173 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
174 EXTCONST U8 PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-1047) */
175 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
176 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
177 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
178 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
179 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
180 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
181 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
182 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
183 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
184 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
185 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
186 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
187 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
188 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
189 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
190 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
193 EXTCONST U8 PL_e2utf[] = { /* UTFEBCDIC (IBM-1047) to I8 */
194 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
195 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
196 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
197 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
198 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
199 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
200 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
201 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
202 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
203 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
204 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8,
205 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7,
206 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
207 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
208 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
209 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
213 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
214 U8 PL_utf2e[] = { /* I8 to UTFEBCDIC (POSIX-BC) */
215 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
216 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
217 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
218 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
219 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
220 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
221 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
222 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
223 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
224 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
225 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
226 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73,
227 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
228 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
229 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
230 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE
233 U8 PL_e2utf[] = { /* UTFEBCDIC (POSIX-BC) to I8 */
234 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
235 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
236 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
237 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
238 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
239 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
240 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
241 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
242 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
243 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
244 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
245 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7,
246 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
247 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3,
248 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
249 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E
251 #endif /* POSIX-BC */
253 #if '^' == 176 /* if defined(??) (OS/400?) 037 */
254 U8 PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-037) */
255 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
256 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
257 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
258 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
259 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
260 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
261 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
262 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
263 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
264 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
265 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56,
266 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73,
267 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C,
268 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6,
269 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB,
270 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE
273 U8 PL_e2utf[] = { /* UTFEBCDIC (IBM-037) to I8 */
274 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
275 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
276 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
277 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
278 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
279 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9,
280 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
281 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
282 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB,
283 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2,
284 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8,
285 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7,
286 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED,
287 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3,
288 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA,
289 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F
293 /* These tables moved from perl.h and converted to hex.
294 They map platform code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1).
297 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */
298 EXTCONST U8 PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */
299 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
300 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
301 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
302 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
303 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
304 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
305 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
306 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
307 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
308 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
309 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
310 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
311 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
312 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
313 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
314 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
317 EXTCONST U8 PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */
318 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
319 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
320 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
321 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
322 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
323 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
324 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
325 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
326 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
327 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
328 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE,
329 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7,
330 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
331 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
332 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
333 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
336 EXTCONST U8 PL_latin1_lc[] = { /* lowercasing */
337 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
338 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
339 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
340 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
341 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
342 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
343 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
344 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
345 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
346 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
347 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0xAD, 0x8E, 0xAF,
348 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0x8D, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
349 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
350 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
351 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
352 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
355 EXTCONST U8 PL_mod_latin1_uc[] = { /* uppercasing */
356 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
357 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
358 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
359 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
360 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
361 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
362 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
363 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
364 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xBA, 0xAE, 0x8F,
365 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
366 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
367 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
368 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
369 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
370 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
371 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
374 EXTCONST U8 PL_fold_latin1[] = { /* fast EBCDIC case folding table,
375 'A' => 'a'; 'a' => 'A'; full
377 0, 1, 2, 3, 4, 5, 6, 7,
378 8, 9, 10, 11, 12, 13, 14, 15,
379 16, 17, 18, 19, 20, 21, 22, 23,
380 24, 25, 26, 27, 28, 29, 30, 31,
381 32, 33, 34, 35, 36, 37, 38, 39,
382 40, 41, 42, 43, 44, 45, 46, 47,
383 48, 49, 50, 51, 52, 53, 54, 55,
384 56, 57, 58, 59, 60, 61, 62, 63,
385 64, 65, 98, 99, 100, 101, 102, 103,
386 104, 105, 74, 75, 76, 77, 78, 79,
387 80, 113, 114, 115, 116, 117, 118, 119,
388 120, 89, 90, 91, 92, 93, 94, 95,
389 96, 97, 66, 67, 68, 69, 70, 71,
390 72, 73, 106, 107, 108, 109, 110, 111,
391 128, 81, 82, 83, 84, 85, 86, 87,
392 88, 121, 122, 123, 124, 125, 126, 127,
393 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
394 'H', 'I', 138, 139, 172, 186, 174, 143,
395 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
396 'Q', 'R', 154, 155, 158, 157, 156, 159,
397 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
398 'Y', 'Z', 170, 171, 140, 173, 142, 175,
399 176, 177, 178, 179, 180, 181, 182, 183,
400 184, 185, 141, 187, 188, 189, 190, 191,
401 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
402 'h', 'i', 202, 235, 236, 237, 238, 239,
403 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
404 'q', 'r', 218, 251, 252, 253, 254, 223,
405 224, 225, 's', 't', 'u', 'v', 'w', 'x',
406 'y', 'z', 234, 203, 204, 205, 206, 207,
407 240, 241, 242, 243, 244, 245, 246, 247,
408 248, 249, 250, 219, 220, 221, 222, 255
412 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */
413 EXTCONST U8 PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */
414 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
415 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
416 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
417 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
418 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
419 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D,
420 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
421 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07,
422 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
423 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F,
424 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1,
425 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
426 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
427 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59,
428 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
429 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
432 EXTCONST U8 PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */
433 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
434 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
435 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
436 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
437 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
438 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F,
439 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
440 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
441 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
442 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
443 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
444 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7,
445 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
446 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF,
447 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
448 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E
451 EXTCONST U8 PL_latin1_lc[] = { /* lowercasing */
452 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
453 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
454 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
455 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
456 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
457 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
458 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
459 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
460 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
461 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
462 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
463 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
464 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
465 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDB, 0xDE, 0xDF,
466 0xC0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
467 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xFD, 0xDE, 0xFF
470 EXTCONST U8 PL_mod_latin1_uc[] = { /* uppercasing */
471 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
472 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
473 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
474 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
475 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
476 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
477 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
478 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
479 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
480 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
481 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
482 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
483 0xE0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
484 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDD, 0xFC, 0xDD, 0xFE, 0xDF,
485 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
486 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
489 EXTCONST U8 PL_fold_latin1[] = { /* fast EBCDIC case folding table,
490 'A' => 'a'; 'a' => 'A'; full
492 0, 1, 2, 3, 4, 5, 6, 7,
493 8, 9, 10, 11, 12, 13, 14, 15,
494 16, 17, 18, 19, 20, 21, 22, 23,
495 24, 25, 26, 27, 28, 29, 30, 31,
496 32, 33, 34, 35, 36, 37, 38, 39,
497 40, 41, 42, 43, 44, 45, 46, 47,
498 48, 49, 50, 51, 52, 53, 54, 55,
499 56, 57, 58, 59, 60, 61, 62, 63,
500 64, 65, 98, 99, 100, 101, 102, 103,
501 104, 105, 74, 75, 76, 77, 78, 79,
502 80, 113, 114, 115, 116, 117, 118, 119,
503 120, 89, 90, 91, 92, 93, 94, 95,
504 96, 97, 66, 67, 68, 69, 70, 71,
505 72, 73, 106, 107, 108, 109, 110, 111,
506 128, 81, 82, 83, 84, 85, 86, 87,
507 88, 121, 122, 123, 124, 125, 126, 127,
508 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
509 'H', 'I', 138, 139, 172, 173, 174, 143,
510 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
511 'Q', 'R', 154, 155, 158, 157, 156, 159,
512 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
513 'Y', 'Z', 170, 171, 140, 141, 142, 175,
514 176, 177, 178, 179, 180, 181, 182, 183,
515 184, 185, 186, 187, 188, 189, 190, 191,
516 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
517 'h', 'i', 202, 235, 236, 237, 238, 239,
518 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
519 'q', 'r', 218, 221, 252, 219, 254, 223,
520 192, 225, 's', 't', 'u', 'v', 'w', 'x',
521 'y', 'z', 234, 203, 204, 205, 206, 207,
522 240, 241, 242, 243, 244, 245, 246, 247,
523 248, 249, 250, 251, 220, 253, 222, 255
525 #endif /* POSIX-BC */
527 #if '^' == 176 /* if defined(??) (OS/400?) 037 */
528 EXTCONST U8 PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */
529 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
530 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
531 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
532 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
533 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
534 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
535 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
536 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
537 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
538 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
539 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC,
540 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
541 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
542 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59,
543 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
544 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF
547 EXTCONST U8 PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */
548 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
549 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
550 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
551 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
552 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
553 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
554 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
555 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
556 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
557 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
558 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
559 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
560 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
561 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
562 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
563 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
566 EXTCONST U8 PL_latin1_lc[] = { /* lowercasing */
567 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
568 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
569 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
570 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
571 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
572 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
573 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
574 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
575 0x70, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
576 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
577 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
578 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
579 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
580 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
581 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
582 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
585 EXTCONST U8 PL_mod_latin1_uc[] = { /* uppercasing */
586 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
587 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
588 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
589 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
590 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
591 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0xDF, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
592 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
593 0x80, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
594 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
595 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
596 0xDF, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
597 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
598 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
599 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
600 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
601 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
604 EXTCONST U8 PL_fold_latin1[] = { /* fast EBCDIC case folding table,
605 'A' => 'a'; 'a' => 'A'; full
607 0, 1, 2, 3, 4, 5, 6, 7,
608 8, 9, 10, 11, 12, 13, 14, 15,
609 16, 17, 18, 19, 20, 21, 22, 23,
610 24, 25, 26, 27, 28, 29, 30, 31,
611 32, 33, 34, 35, 36, 37, 38, 39,
612 40, 41, 42, 43, 44, 45, 46, 47,
613 48, 49, 50, 51, 52, 53, 54, 55,
614 56, 57, 58, 59, 60, 61, 62, 63,
615 64, 65, 98, 99, 100, 101, 102, 103,
616 104, 105, 74, 75, 76, 77, 78, 79,
617 80, 113, 114, 115, 116, 117, 118, 119,
618 120, 89, 90, 91, 92, 93, 94, 95,
619 96, 97, 66, 67, 68, 69, 70, 71,
620 72, 73, 106, 107, 108, 109, 110, 111,
621 128, 81, 82, 83, 84, 85, 86, 87,
622 88, 121, 122, 123, 124, 125, 126, 127,
623 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
624 'H', 'I', 138, 139, 172, 173, 174, 143,
625 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
626 'Q', 'R', 154, 155, 158, 157, 156, 159,
627 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
628 'Y', 'Z', 170, 171, 140, 141, 142, 175,
629 176, 177, 178, 179, 180, 181, 182, 183,
630 184, 185, 186, 187, 188, 189, 190, 191,
631 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
632 'h', 'i', 202, 235, 236, 237, 238, 239,
633 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
634 'q', 'r', 218, 251, 252, 253, 254, 223,
635 224, 225, 's', 't', 'u', 'v', 'w', 'x',
636 'y', 'z', 234, 203, 204, 205, 206, 207,
637 240, 241, 242, 243, 244, 245, 246, 247,
638 248, 249, 250, 219, 220, 221, 222, 255
642 /* This is the same in all code pages, as only A-Z, a-z are affected */
643 EXTCONST U8 PL_fold[] = { /* fast EBCDIC case folding table, 'A' =>
645 0, 1, 2, 3, 4, 5, 6, 7,
646 8, 9, 10, 11, 12, 13, 14, 15,
647 16, 17, 18, 19, 20, 21, 22, 23,
648 24, 25, 26, 27, 28, 29, 30, 31,
649 32, 33, 34, 35, 36, 37, 38, 39,
650 40, 41, 42, 43, 44, 45, 46, 47,
651 48, 49, 50, 51, 52, 53, 54, 55,
652 56, 57, 58, 59, 60, 61, 62, 63,
653 64, 65, 66, 67, 68, 69, 70, 71,
654 72, 73, 74, 75, 76, 77, 78, 79,
655 80, 81, 82, 83, 84, 85, 86, 87,
656 88, 89, 90, 91, 92, 93, 94, 95,
657 96, 97, 98, 99, 100, 101, 102, 103,
658 104, 105, 106, 107, 108, 109, 110, 111,
659 112, 113, 114, 115, 116, 117, 118, 119,
660 120, 121, 122, 123, 124, 125, 126, 127,
661 128, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
662 'H', 'I', 138, 139, 140, 141, 142, 143,
663 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P',
664 'Q', 'R', 154, 155, 156, 157, 158, 159,
665 160, 161, 'S', 'T', 'U', 'V', 'W', 'X',
666 'Y', 'Z', 170, 171, 172, 173, 174, 175,
667 176, 177, 178, 179, 180, 181, 182, 183,
668 184, 185, 186, 187, 188, 189, 190, 191,
669 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
670 'h', 'i', 202, 203, 204, 205, 206, 207,
671 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
672 'q', 'r', 218, 219, 220, 221, 222, 223,
673 224, 225, 's', 't', 'u', 'v', 'w', 'x',
674 'y', 'z', 234, 235, 236, 237, 238, 239,
675 240, 241, 242, 243, 244, 245, 246, 247,
676 248, 249, 250, 251, 252, 253, 254, 255
680 EXTCONST U8 PL_utf8skip[];
681 EXTCONST U8 PL_e2utf[];
682 EXTCONST U8 PL_utf2e[];
683 EXTCONST U8 PL_e2a[];
684 EXTCONST U8 PL_a2e[];
685 EXTCONST U8 PL_fold[];
686 EXTCONST U8 PL_fold_latin1[];
687 EXTCONST U8 PL_latin1_lc[];
688 EXTCONST U8 PL_mod_latin1_uc[];
693 /* EBCDIC-happy ways of converting native code to UTF-8 */
695 #define NATIVE_TO_LATIN1(ch) PL_e2a[(U8)(ch)]
696 #define LATIN1_TO_NATIVE(ch) PL_a2e[(U8)(ch)]
698 #define NATIVE_UTF8_TO_I8(ch) PL_e2utf[(U8)(ch)]
699 #define I8_TO_NATIVE_UTF8(ch) PL_utf2e[(U8)(ch)]
701 /* Transforms in wide UV chars */
702 #define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_LATIN1(ch))
703 #define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : LATIN1_TO_NATIVE(ch))
706 The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
708 Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
709 U+0000..U+007F 000000000xxxxxxx 0xxxxxxx
710 U+0080..U+009F 00000000100xxxxx 100xxxxx
711 U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx
712 U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx
713 U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx
714 U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx
715 U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
716 U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx
718 Note: The I8 transformation is valid for UCS-4 values X'0' to
719 X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space).
723 /* Input is a true Unicode (not-native) code point */
724 #define OFFUNISKIP(uv) ( (uv) < 0xA0 ? 1 : \
726 (uv) < 0x4000 ? 3 : \
727 (uv) < 0x40000 ? 4 : \
728 (uv) < 0x400000 ? 5 : \
729 (uv) < 0x4000000 ? 6 : 7 )
731 #define UNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0)
733 /* UTF-EBCDIC semantic macros - transform back into I8 and then compare
734 * Comments as to the meaning of each are given at their corresponding utf8.h
737 #define UTF8_IS_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
738 && NATIVE_UTF8_TO_I8(c) != 0xE0)
739 #define UTF8_IS_CONTINUATION(c) ((NATIVE_UTF8_TO_I8(c) & 0xE0) == 0xA0)
740 #define UTF8_IS_CONTINUED(c) (NATIVE_UTF8_TO_I8(c) >= 0xA0)
742 #define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_UTF8_TO_I8(c) >= 0xC5 \
743 && NATIVE_UTF8_TO_I8(c) <= 0xC7)
744 /* Saying it this way adds a runtime test, but removes 2 run-time lookups */
745 /*#define UTF8_IS_DOWNGRADEABLE_START(c) ((c) == I8_TO_NATIVE_UTF8(0xC5) \
746 || (c) == I8_TO_NATIVE_UTF8(0xC6) \
747 || (c) == I8_TO_NATIVE_UTF8(0xC7))
749 #define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_UTF8_TO_I8(c) >= 0xC8)
751 /* Can't exceed 7 on EBCDIC platforms */
752 #define UTF_START_MARK(len) (0xFF & (0xFE << (7-(len))))
754 #define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
755 #define UTF_CONTINUATION_MARK 0xA0
756 #define UTF_CONTINUATION_MASK ((U8)0x1f)
757 #define UTF_ACCUMULATION_SHIFT 5
759 /* How wide can a single UTF-8 encoded character become in bytes. */
760 /* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
761 * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
762 * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode
763 * non-negative integers in a binary format, even those above Unicode */
764 #define UTF8_MAXBYTES 7
766 /* The maximum number of UTF-8 bytes a single Unicode character can
767 * uppercase/lowercase/fold into. Unicode guarantees that the maximum
768 * expansion is 3 characters. On EBCDIC platforms, the highest Unicode
769 * character occupies 5 bytes, therefore this number is 15 */
770 #define UTF8_MAXBYTES_CASE 15
772 #define MAX_UTF8_TWO_BYTE 0x3FF
776 * c-indentation-style: bsd
778 * indent-tabs-mode: nil
781 * ex: set ts=8 sts=4 sw=4 et: