| 1 | /* utfebcdic.h |
| 2 | * |
| 3 | * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 by Larry Wall, |
| 4 | * Nick Ing-Simmons, and others |
| 5 | * |
| 6 | * You may distribute under the terms of either the GNU General Public |
| 7 | * License or the Artistic License, as specified in the README file. |
| 8 | * |
| 9 | * Macros to implement UTF-EBCDIC as perl's internal encoding |
| 10 | * Taken from version 7.1 of Unicode Techical Report #16: |
| 11 | * http://www.unicode.org/unicode/reports/tr16 |
| 12 | * |
| 13 | * To summarize, the way it works is: |
| 14 | * To convert an EBCDIC character to UTF-EBCDIC: |
| 15 | * 1) convert to Unicode. The table in this file that does this for |
| 16 | * EBCDIC bytes is PL_e2a (with inverse PLa2e). The 'a' stands for |
| 17 | * ASCIIish, meaning latin1. |
| 18 | * 2) convert that to a utf8-like string called I8 (I stands for |
| 19 | * intermediate) with variant characters occupying multiple bytes. This |
| 20 | * step is similar to the utf8-creating step from Unicode, but the details |
| 21 | * are different. This transformation is called UTF8-Mod. There is a |
| 22 | * chart about the bit patterns in a comment later in this file. But |
| 23 | * essentially here are the differences: |
| 24 | * UTF8 I8 |
| 25 | * invariant byte starts with 0 starts with 0 or 100 |
| 26 | * continuation byte starts with 10 starts with 101 |
| 27 | * start byte same in both: if the code point requires N bytes, |
| 28 | * then the leading N bits are 1, followed by a 0. (No |
| 29 | * trailing 0 for the very largest possible allocation |
| 30 | * in I8, far beyond the current Unicode standard's |
| 31 | * max, as shown in the comment later in this file.) |
| 32 | * 3) Use the table published in tr16 to convert each byte from step 2 into |
| 33 | * final UTF-EBCDIC. That table is reproduced in this file as PL_utf2e, |
| 34 | * and its inverse is PL_e2utf. They are constructed so that all EBCDIC |
| 35 | * invariants remain invariant, but no others do. For example, the |
| 36 | * ordinal value of 'A' is 193 in EBCDIC, and also is 193 in UTF-EBCDIC. |
| 37 | * Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 converts |
| 38 | * it back to 193. As an example of how a variant character works, take |
| 39 | * LATIN SMALL LETTER Y WITH DIAERESIS, which is typicially 0xDF in |
| 40 | * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2 |
| 41 | * converts that to two bytes = 11000111 10111111 = C7 BF, and Step 3 |
| 42 | * converts those to 0x8B 0x73. The table is constructed so that the |
| 43 | * first byte of the final form of a variant will always have its upper |
| 44 | * bit set (at least in the encodings that Perl recognizes, and probably |
| 45 | * all). But note that the upper bit of some invariants is also 1. |
| 46 | * |
| 47 | * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight |
| 48 | * EBCDIC, reverse the steps. |
| 49 | * |
| 50 | * The EBCDIC invariants have been chosen to be those characters whose Unicode |
| 51 | * equivalents have ordinal numbers less than 160, that is the same characters |
| 52 | * that are expressible in ASCII, plus the C1 controls. So there are 160 |
| 53 | * invariants instead of the 128 in UTF-8. (My guess is that this is because |
| 54 | * the C1 control NEL (and maybe others) is important in IBM.) |
| 55 | * |
| 56 | * The purpose of Step 3 is to make the encoding be invariant for the chosen |
| 57 | * characters. This messes up the convenient patterns found in step 2, so |
| 58 | * generally, one has to undo step 3 into a temporary to use them. However, |
| 59 | * a "shadow", or parallel table, PL_utf8skip, has been constructed so that for |
| 60 | * each byte, it says how long the sequence is if that byte were to begin it |
| 61 | * |
| 62 | * There are actually 3 slightly different UTF-EBCDIC encodings in this file, |
| 63 | * one for each of the code pages recognized by Perl. That means that there |
| 64 | * are actually three different sets of tables, one for each code page. (If |
| 65 | * Perl is compiled on platforms using another EBCDIC code page, it may not |
| 66 | * compile, or Perl may silently mistake it for one of the three.) |
| 67 | * |
| 68 | * EBCDIC characters above 0xFF are the same as Unicode in Perl's |
| 69 | * implementation of all 3 encodings, so for those Step 1 is trivial. |
| 70 | * |
| 71 | * (Note that the entries for invariant characters are necessarily the same in |
| 72 | * PL_e2a and PLe2f, and the same for their inverses.) |
| 73 | * |
| 74 | * UTF-EBCDIC strings are the same length or longer than UTF-8 representations |
| 75 | * of the same string. The maximum code point representable as 2 bytes in |
| 76 | * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8. |
| 77 | */ |
| 78 | |
| 79 | START_EXTERN_C |
| 80 | |
| 81 | #ifdef DOINIT |
| 82 | /* Indexed by encoded byte this table gives the length of the sequence. |
| 83 | Adapted from the shadow flags table in tr16. |
| 84 | The entries marked 9 in tr6 are continuation bytes and are marked |
| 85 | as length 1 here so that we can recover. |
| 86 | */ |
| 87 | #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */ |
| 88 | EXTCONST unsigned char PL_utf8skip[] = { |
| 89 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 90 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 91 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 92 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 93 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 94 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 95 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 96 | 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1, |
| 97 | 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, |
| 98 | 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, |
| 99 | 2,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2, |
| 100 | 2,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3, |
| 101 | 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3, |
| 102 | 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4, |
| 103 | 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5, |
| 104 | 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1 |
| 105 | }; |
| 106 | #endif |
| 107 | |
| 108 | #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */ |
| 109 | unsigned char PL_utf8skip[] = { |
| 110 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 111 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 112 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 113 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 114 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 115 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 116 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 117 | 1,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1, |
| 118 | 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, |
| 119 | 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, |
| 120 | 2,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2, |
| 121 | 1,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3, |
| 122 | 4,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3, |
| 123 | 1,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4, |
| 124 | 7,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5, |
| 125 | 1,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1 |
| 126 | }; |
| 127 | #endif |
| 128 | |
| 129 | #if '^' == 176 /* if defined(??) (OS/400?) 037 */ |
| 130 | unsigned char PL_utf8skip[] = { |
| 131 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 132 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 133 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 134 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 135 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 136 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2, |
| 137 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 138 | 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1, |
| 139 | 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, |
| 140 | 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, |
| 141 | 2,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2, |
| 142 | 1,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3, |
| 143 | 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3, |
| 144 | 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4, |
| 145 | 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5, |
| 146 | 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1 |
| 147 | }; |
| 148 | #endif |
| 149 | |
| 150 | /* Transform tables from tr16 applied after encoding to render encoding EBCDIC |
| 151 | * like, meaning that all the invariants are actually invariant, eg, that 'A' |
| 152 | * remains 'A' */ |
| 153 | |
| 154 | #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */ |
| 155 | EXTCONST unsigned char PL_utf2e[] = { /* I8 to EBCDIC (IBM-1047) */ |
| 156 | 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 157 | 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, |
| 158 | 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, |
| 159 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, |
| 160 | 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, |
| 161 | 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, |
| 162 | 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, |
| 163 | 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, |
| 164 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, |
| 165 | 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, |
| 166 | 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, |
| 167 | 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73, |
| 168 | 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, |
| 169 | 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, |
| 170 | 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB, |
| 171 | 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE |
| 172 | }; |
| 173 | |
| 174 | EXTCONST unsigned char PL_e2utf[] = { /* EBCDIC (IBM-1047) to I8 */ |
| 175 | 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 176 | 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, |
| 177 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, |
| 178 | 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, |
| 179 | 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, |
| 180 | 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E, |
| 181 | 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, |
| 182 | 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, |
| 183 | 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, |
| 184 | 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, |
| 185 | 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8, |
| 186 | 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7, |
| 187 | 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, |
| 188 | 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, |
| 189 | 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, |
| 190 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F |
| 191 | }; |
| 192 | #endif /* 1047 */ |
| 193 | |
| 194 | #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */ |
| 195 | unsigned char PL_utf2e[] = { /* I8 to EBCDIC (POSIX-BC) */ |
| 196 | 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 197 | 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, |
| 198 | 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, |
| 199 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, |
| 200 | 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, |
| 201 | 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D, |
| 202 | 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, |
| 203 | 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07, |
| 204 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, |
| 205 | 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F, |
| 206 | 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, |
| 207 | 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73, |
| 208 | 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, |
| 209 | 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, |
| 210 | 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB, |
| 211 | 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE |
| 212 | }; |
| 213 | |
| 214 | unsigned char PL_e2utf[] = { /* EBCDIC (POSIX-BC) to I8 */ |
| 215 | 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 216 | 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, |
| 217 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, |
| 218 | 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, |
| 219 | 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, |
| 220 | 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F, |
| 221 | 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, |
| 222 | 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, |
| 223 | 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, |
| 224 | 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, |
| 225 | 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8, |
| 226 | 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7, |
| 227 | 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, |
| 228 | 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3, |
| 229 | 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, |
| 230 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E |
| 231 | }; |
| 232 | #endif /* POSIX-BC */ |
| 233 | |
| 234 | #if '^' == 176 /* if defined(??) (OS/400?) 037 */ |
| 235 | unsigned char PL_utf2e[] = { /* I8 to EBCDIC (IBM-037) */ |
| 236 | 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 237 | 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, |
| 238 | 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, |
| 239 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, |
| 240 | 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, |
| 241 | 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D, |
| 242 | 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, |
| 243 | 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, |
| 244 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, |
| 245 | 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, |
| 246 | 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, |
| 247 | 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73, |
| 248 | 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, |
| 249 | 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, |
| 250 | 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB, |
| 251 | 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE |
| 252 | }; |
| 253 | |
| 254 | unsigned char PL_e2utf[] = { /* EBCDIC (IBM-037) to I8 */ |
| 255 | 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 256 | 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, |
| 257 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, |
| 258 | 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, |
| 259 | 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, |
| 260 | 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9, |
| 261 | 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, |
| 262 | 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, |
| 263 | 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, |
| 264 | 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, |
| 265 | 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8, |
| 266 | 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7, |
| 267 | 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, |
| 268 | 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, |
| 269 | 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, |
| 270 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F |
| 271 | }; |
| 272 | #endif /* 037 */ |
| 273 | |
| 274 | /* These tables moved from perl.h and converted to hex. |
| 275 | They map platfrom code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1). |
| 276 | */ |
| 277 | |
| 278 | #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */ |
| 279 | EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */ |
| 280 | 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 281 | 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, |
| 282 | 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, |
| 283 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, |
| 284 | 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, |
| 285 | 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, |
| 286 | 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, |
| 287 | 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, |
| 288 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, |
| 289 | 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, |
| 290 | 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC, |
| 291 | 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB, |
| 292 | 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, |
| 293 | 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59, |
| 294 | 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, |
| 295 | 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF |
| 296 | }; |
| 297 | |
| 298 | #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF |
| 299 | #define LATIN_SMALL_LETTER_SHARP_S 0x59 |
| 300 | #define MICRO_SIGN 0xA0 |
| 301 | |
| 302 | EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */ |
| 303 | 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 304 | 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, |
| 305 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, |
| 306 | 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, |
| 307 | 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, |
| 308 | 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E, |
| 309 | 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, |
| 310 | 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, |
| 311 | 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1, |
| 312 | 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4, |
| 313 | 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE, |
| 314 | 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7, |
| 315 | 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5, |
| 316 | 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF, |
| 317 | 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5, |
| 318 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F |
| 319 | }; |
| 320 | #endif /* 1047 */ |
| 321 | |
| 322 | #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */ |
| 323 | EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */ |
| 324 | 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 325 | 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, |
| 326 | 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, |
| 327 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, |
| 328 | 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, |
| 329 | 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D, |
| 330 | 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, |
| 331 | 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07, |
| 332 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, |
| 333 | 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F, |
| 334 | 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1, |
| 335 | 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB, |
| 336 | 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, |
| 337 | 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59, |
| 338 | 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, |
| 339 | 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF |
| 340 | }; |
| 341 | |
| 342 | #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF |
| 343 | #define LATIN_SMALL_LETTER_SHARP_S 0x59 |
| 344 | #define MICRO_SIGN 0xA0 |
| 345 | |
| 346 | EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */ |
| 347 | 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 348 | 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, |
| 349 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, |
| 350 | 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, |
| 351 | 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, |
| 352 | 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F, |
| 353 | 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, |
| 354 | 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, |
| 355 | 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1, |
| 356 | 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4, |
| 357 | 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE, |
| 358 | 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7, |
| 359 | 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5, |
| 360 | 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF, |
| 361 | 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5, |
| 362 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E |
| 363 | }; |
| 364 | #endif /* POSIX-BC */ |
| 365 | |
| 366 | #if '^' == 176 /* if defined(??) (OS/400?) 037 */ |
| 367 | EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */ |
| 368 | 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 369 | 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, |
| 370 | 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, |
| 371 | 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, |
| 372 | 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, |
| 373 | 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D, |
| 374 | 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, |
| 375 | 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, |
| 376 | 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, |
| 377 | 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, |
| 378 | 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC, |
| 379 | 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB, |
| 380 | 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, |
| 381 | 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59, |
| 382 | 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, |
| 383 | 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF |
| 384 | }; |
| 385 | |
| 386 | |
| 387 | #define LATIN_SMALL_LETTER_Y_WITH_DIAERESIS 0xDF |
| 388 | #define LATIN_SMALL_LETTER_SHARP_S 0x59 |
| 389 | #define MICRO_SIGN 0xA0 |
| 390 | |
| 391 | EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */ |
| 392 | 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
| 393 | 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, |
| 394 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, |
| 395 | 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, |
| 396 | 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, |
| 397 | 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC, |
| 398 | 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, |
| 399 | 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, |
| 400 | 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1, |
| 401 | 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4, |
| 402 | 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE, |
| 403 | 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7, |
| 404 | 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5, |
| 405 | 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF, |
| 406 | 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5, |
| 407 | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F |
| 408 | }; |
| 409 | #endif /* 037 */ |
| 410 | |
| 411 | #else |
| 412 | EXTCONST unsigned char PL_utf8skip[]; |
| 413 | EXTCONST unsigned char PL_e2utf[]; |
| 414 | EXTCONST unsigned char PL_utf2e[]; |
| 415 | EXTCONST unsigned char PL_e2a[]; |
| 416 | EXTCONST unsigned char PL_a2e[]; |
| 417 | #endif |
| 418 | |
| 419 | END_EXTERN_C |
| 420 | |
| 421 | /* EBCDIC-happy ways of converting native code to UTF-8 */ |
| 422 | |
| 423 | /* Native to iso-8859-1 */ |
| 424 | #define NATIVE_TO_ASCII(ch) PL_e2a[(U8)(ch)] |
| 425 | #define ASCII_TO_NATIVE(ch) PL_a2e[(U8)(ch)] |
| 426 | /* Transform after encoding */ |
| 427 | #define NATIVE_TO_UTF(ch) PL_e2utf[(U8)(ch)] |
| 428 | #define UTF_TO_NATIVE(ch) PL_utf2e[(U8)(ch)] |
| 429 | /* Transform in wide UV char space */ |
| 430 | #define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch)) |
| 431 | #define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch)) |
| 432 | /* Transform in invariant..byte space */ |
| 433 | #define NATIVE_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch)) |
| 434 | #define ASCII_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch)) |
| 435 | |
| 436 | /* |
| 437 | The following table is adapted from tr16, it shows I8 encoding of Unicode code points. |
| 438 | |
| 439 | Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte |
| 440 | U+0000..U+007F 000000000xxxxxxx 0xxxxxxx |
| 441 | U+0080..U+009F 00000000100xxxxx 100xxxxx |
| 442 | U+00A0..U+00FF 00000000yyyxxxxx 11000yyy 101xxxxx |
| 443 | |
| 444 | U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx |
| 445 | U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx |
| 446 | U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx |
| 447 | U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx |
| 448 | U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx |
| 449 | U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx |
| 450 | |
| 451 | Note: The I8 transformation is valid for UCS-4 values X'0' to |
| 452 | X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space). |
| 453 | |
| 454 | */ |
| 455 | |
| 456 | #define UNISKIP(uv) ( (uv) < 0xA0 ? 1 : \ |
| 457 | (uv) < 0x400 ? 2 : \ |
| 458 | (uv) < 0x4000 ? 3 : \ |
| 459 | (uv) < 0x40000 ? 4 : \ |
| 460 | (uv) < 0x400000 ? 5 : \ |
| 461 | (uv) < 0x4000000 ? 6 : 7 ) |
| 462 | |
| 463 | |
| 464 | #define UNI_IS_INVARIANT(c) ((c) < 0xA0) |
| 465 | /* UTF-EBCDIC semantic macros - transform back into I8 and then compare */ |
| 466 | #define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xE0) != 0xA0) |
| 467 | #define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0) |
| 468 | #define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0) |
| 469 | #define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xA0 && (NATIVE_TO_UTF(c) & 0xF8) == 0xC0) |
| 470 | |
| 471 | #define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len))))) |
| 472 | #define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2))) |
| 473 | #define UTF_CONTINUATION_MARK 0xA0 |
| 474 | #define UTF_CONTINUATION_MASK ((U8)0x1f) |
| 475 | #define UTF_ACCUMULATION_SHIFT 5 |
| 476 | |
| 477 | /* |
| 478 | * Local variables: |
| 479 | * c-indentation-style: bsd |
| 480 | * c-basic-offset: 4 |
| 481 | * indent-tabs-mode: t |
| 482 | * End: |
| 483 | * |
| 484 | * ex: set ts=8 sts=4 sw=4 noet: |
| 485 | */ |