Commit | Line | Data |
---|---|---|
4bc3dcfa KW |
1 | /* -*- buffer-read-only: t -*- |
2 | * !!!!!!! DO NOT EDIT THIS FILE !!!!!!! | |
3 | * This file is built by regen/ebcdic.pl. | |
4 | * Any changes made here will be lost! | |
5 | */ | |
6 | ||
7 | ||
6a5bc5ac KW |
8 | #ifndef PERL_EBCDIC_TABLES_H_ /* Guard against nested #includes */ |
9 | #define PERL_EBCDIC_TABLES_H_ 1 | |
4bc3dcfa KW |
10 | |
11 | /* This file contains definitions for various tables used in EBCDIC handling. | |
3de6d141 KW |
12 | * More info is in utfebcdic.h |
13 | * | |
14 | * Some of the tables are adapted from | |
f6521f7c | 15 | * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
3de6d141 KW |
16 | * which requires this copyright notice: |
17 | ||
18 | Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> | |
19 | ||
20 | Permission is hereby granted, free of charge, to any person obtaining a copy of | |
21 | this software and associated documentation files (the "Software"), to deal in | |
22 | the Software without restriction, including without limitation the rights to | |
23 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |
24 | of the Software, and to permit persons to whom the Software is furnished to do | |
25 | so, subject to the following conditions: | |
26 | ||
27 | The above copyright notice and this permission notice shall be included in all | |
28 | copies or substantial portions of the Software. | |
29 | ||
30 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
31 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
32 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
33 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
34 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
35 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
36 | SOFTWARE. | |
37 | ||
38 | */ | |
4bc3dcfa KW |
39 | |
40 | #if 'A' == 193 /* EBCDIC 1047 */ \ | |
41 | && '\\' == 224 && '[' == 173 && ']' == 189 && '{' == 192 && '}' == 208 \ | |
42 | && '^' == 95 && '~' == 161 && '!' == 90 && '#' == 123 && '|' == 79 \ | |
c11f6329 | 43 | && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 21 |
4bc3dcfa KW |
44 | |
45 | /* Index is ASCII platform code point; value is EBCDIC 1047 equivalent */ | |
c05125c5 | 46 | # ifndef DOINIT |
0a142f46 | 47 | EXTCONST U8 PL_a2e[]; |
c05125c5 | 48 | # else |
0a142f46 | 49 | EXTCONST U8 PL_a2e[] = { |
94e72741 KW |
50 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
51 | /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, | |
52 | /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, | |
53 | /*2_*/0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, | |
54 | /*3_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, | |
55 | /*4_*/0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, | |
56 | /*5_*/0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, | |
57 | /*6_*/0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, | |
58 | /*7_*/0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, | |
59 | /*8_*/0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, | |
60 | /*9_*/0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, | |
61 | /*A_*/0x41,0xAA,0x4A,0xB1,0x9F,0xB2,0x6A,0xB5,0xBB,0xB4,0x9A,0x8A,0xB0,0xCA,0xAF,0xBC, | |
62 | /*B_*/0x90,0x8F,0xEA,0xFA,0xBE,0xA0,0xB6,0xB3,0x9D,0xDA,0x9B,0x8B,0xB7,0xB8,0xB9,0xAB, | |
63 | /*C_*/0x64,0x65,0x62,0x66,0x63,0x67,0x9E,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77, | |
64 | /*D_*/0xAC,0x69,0xED,0xEE,0xEB,0xEF,0xEC,0xBF,0x80,0xFD,0xFE,0xFB,0xFC,0xBA,0xAE,0x59, | |
65 | /*E_*/0x44,0x45,0x42,0x46,0x43,0x47,0x9C,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, | |
66 | /*F_*/0x8C,0x49,0xCD,0xCE,0xCB,0xCF,0xCC,0xE1,0x70,0xDD,0xDE,0xDB,0xDC,0x8D,0x8E,0xDF | |
67 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 68 | }; |
c05125c5 | 69 | # endif |
4bc3dcfa KW |
70 | |
71 | /* Index is EBCDIC 1047 code point; value is ASCII platform equivalent */ | |
c05125c5 | 72 | # ifndef DOINIT |
0a142f46 | 73 | EXTCONST U8 PL_e2a[]; |
c05125c5 | 74 | # else |
0a142f46 | 75 | EXTCONST U8 PL_e2a[] = { |
94e72741 KW |
76 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
77 | /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, | |
78 | /*1_*/0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, | |
79 | /*2_*/0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, | |
80 | /*3_*/0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, | |
81 | /*4_*/0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, | |
82 | /*5_*/0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, | |
83 | /*6_*/0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, | |
84 | /*7_*/0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, | |
85 | /*8_*/0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, | |
86 | /*9_*/0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, | |
87 | /*A_*/0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, | |
88 | /*B_*/0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, | |
89 | /*C_*/0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, | |
90 | /*D_*/0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, | |
91 | /*E_*/0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, | |
92 | /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F | |
93 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 94 | }; |
c05125c5 | 95 | # endif |
4bc3dcfa KW |
96 | |
97 | /* (Confusingly named) Index is EBCDIC 1047 I8 byte; value is | |
98 | * EBCDIC 1047 UTF-EBCDIC equivalent */ | |
c05125c5 | 99 | # ifndef DOINIT |
0a142f46 | 100 | EXTCONST U8 PL_utf2e[]; |
c05125c5 | 101 | # else |
0a142f46 | 102 | EXTCONST U8 PL_utf2e[] = { |
94e72741 KW |
103 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
104 | /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x15,0x0B,0x0C,0x0D,0x0E,0x0F, | |
105 | /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, | |
106 | /*2_*/0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, | |
107 | /*3_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, | |
108 | /*4_*/0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, | |
109 | /*5_*/0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAD,0xE0,0xBD,0x5F,0x6D, | |
110 | /*6_*/0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, | |
111 | /*7_*/0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, | |
112 | /*8_*/0x20,0x21,0x22,0x23,0x24,0x25,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, | |
113 | /*9_*/0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, | |
114 | /*A_*/0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, | |
115 | /*B_*/0x57,0x58,0x59,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72,0x73, | |
116 | /*C_*/0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B,0x9C, | |
117 | /*D_*/0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAE,0xAF,0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6, | |
118 | /*E_*/0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, | |
119 | /*F_*/0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE | |
120 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 121 | }; |
c05125c5 | 122 | # endif |
4bc3dcfa KW |
123 | |
124 | /* (Confusingly named) Index is EBCDIC 1047 UTF-EBCDIC byte; value is | |
125 | * EBCDIC 1047 I8 equivalent */ | |
c05125c5 | 126 | # ifndef DOINIT |
0a142f46 | 127 | EXTCONST U8 PL_e2utf[]; |
c05125c5 | 128 | # else |
0a142f46 | 129 | EXTCONST U8 PL_e2utf[] = { |
94e72741 KW |
130 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
131 | /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, | |
132 | /*1_*/0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, | |
133 | /*2_*/0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, | |
134 | /*3_*/0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, | |
135 | /*4_*/0x20,0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0x2E,0x3C,0x28,0x2B,0x7C, | |
136 | /*5_*/0x26,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0xB2,0x21,0x24,0x2A,0x29,0x3B,0x5E, | |
137 | /*6_*/0x2D,0x2F,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0x2C,0x25,0x5F,0x3E,0x3F, | |
138 | /*7_*/0xBC,0xBD,0xBE,0xBF,0xC0,0xC1,0xC2,0xC3,0xC4,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, | |
139 | /*8_*/0xC5,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB, | |
140 | /*9_*/0xCC,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xCD,0xCE,0xCF,0xD0,0xD1,0xD2, | |
141 | /*A_*/0xD3,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xD4,0xD5,0xD6,0x5B,0xD7,0xD8, | |
142 | /*B_*/0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0x5D,0xE6,0xE7, | |
143 | /*C_*/0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, | |
144 | /*D_*/0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xEE,0xEF,0xF0,0xF1,0xF2,0xF3, | |
145 | /*E_*/0x5C,0xF4,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA, | |
146 | /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xFB,0xFC,0xFD,0xFE,0xFF,0x9F | |
147 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 148 | }; |
c05125c5 | 149 | # endif |
4bc3dcfa | 150 | |
4719093e KW |
151 | /* Index is EBCDIC 1047 UTF-EBCDIC byte; value is UTF8SKIP for start bytes |
152 | * (including for overlongs); 1 for continuation. Adapted from the shadow | |
153 | * flags table in tr16. The entries marked 9 in tr16 are continuation bytes | |
154 | * and are marked as length 1 here so that we can recover. */ | |
c05125c5 | 155 | # ifndef DOINIT |
0a142f46 | 156 | EXTCONST U8 PL_utf8skip[]; |
c05125c5 | 157 | # else |
0a142f46 | 158 | EXTCONST U8 PL_utf8skip[] = { |
94e72741 KW |
159 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
160 | /*0_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
161 | /*1_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
162 | /*2_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
163 | /*3_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
164 | /*4_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
165 | /*5_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
166 | /*6_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
167 | /*7_*/ 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, | |
168 | /*8_*/ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, | |
169 | /*9_*/ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, | |
170 | /*A_*/ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, | |
171 | /*B_*/ 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3, | |
172 | /*C_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, | |
173 | /*D_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, | |
174 | /*E_*/ 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5, | |
175 | /*F_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 14, 1 | |
176 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 177 | }; |
c05125c5 | 178 | # endif |
4bc3dcfa KW |
179 | |
180 | /* Index is EBCDIC 1047 code point; value is its lowercase equivalent */ | |
c05125c5 | 181 | # ifndef DOINIT |
0a142f46 | 182 | EXTCONST U8 PL_latin1_lc[]; |
c05125c5 | 183 | # else |
0a142f46 | 184 | EXTCONST U8 PL_latin1_lc[] = { |
94e72741 KW |
185 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
186 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
187 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
188 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
189 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
190 | /*4_*/0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
191 | /*5_*/0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
192 | /*6_*/0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
193 | /*7_*/0x70,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
194 | /*8_*/0x70,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, | |
195 | /*9_*/0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9C,0x9F, | |
196 | /*A_*/0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0x8C,0xAD,0x8E,0xAF, | |
197 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0x8D,0xBB,0xBC,0xBD,0xBE,0xBF, | |
198 | /*C_*/0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
199 | /*D_*/0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, | |
200 | /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
201 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF | |
202 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 203 | }; |
c05125c5 | 204 | # endif |
4bc3dcfa KW |
205 | |
206 | /* Index is EBCDIC 1047 code point; value is its uppercase equivalent. | |
207 | * The 'mod' in the name means that codepoints whose uppercase is above 255 or | |
208 | * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */ | |
c05125c5 | 209 | # ifndef DOINIT |
0a142f46 | 210 | EXTCONST U8 PL_mod_latin1_uc[]; |
c05125c5 | 211 | # else |
0a142f46 | 212 | EXTCONST U8 PL_mod_latin1_uc[] = { |
94e72741 KW |
213 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
214 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
215 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
216 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
217 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
218 | /*4_*/0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
219 | /*5_*/0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0xDF,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
220 | /*6_*/0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
221 | /*7_*/0x80,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
222 | /*8_*/0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xBA,0xAE,0x8F, | |
223 | /*9_*/0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9E,0x9F, | |
224 | /*A_*/0xDF,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, | |
225 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, | |
226 | /*C_*/0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, | |
227 | /*D_*/0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, | |
228 | /*E_*/0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, | |
229 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF | |
230 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 231 | }; |
c05125c5 | 232 | # endif |
4bc3dcfa KW |
233 | |
234 | /* Index is EBCDIC 1047 code point; For A-Z, value is a-z; for a-z, value | |
235 | * is A-Z; all other code points map to themselves */ | |
c05125c5 | 236 | # ifndef DOINIT |
0a142f46 | 237 | EXTCONST U8 PL_fold[]; |
c05125c5 | 238 | # else |
0a142f46 | 239 | EXTCONST U8 PL_fold[] = { |
94e72741 KW |
240 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
241 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
242 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
243 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
244 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
245 | /*4_*/0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
246 | /*5_*/0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
247 | /*6_*/0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
248 | /*7_*/0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
249 | /*8_*/0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, | |
250 | /*9_*/0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, | |
251 | /*A_*/0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, | |
252 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, | |
253 | /*C_*/0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
254 | /*D_*/0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, | |
255 | /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, | |
256 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF | |
257 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 258 | }; |
c05125c5 | 259 | # endif |
4bc3dcfa KW |
260 | |
261 | /* Index is EBCDIC 1047 code point; value is its other fold-pair equivalent | |
262 | * (A => a; a => A, etc) in the 0-255 range. If no such equivalent, value is | |
263 | * the code point itself */ | |
c05125c5 | 264 | # ifndef DOINIT |
0a142f46 | 265 | EXTCONST U8 PL_fold_latin1[]; |
c05125c5 | 266 | # else |
0a142f46 | 267 | EXTCONST U8 PL_fold_latin1[] = { |
94e72741 KW |
268 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
269 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
270 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
271 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
272 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
273 | /*4_*/0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
274 | /*5_*/0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
275 | /*6_*/0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
276 | /*7_*/0x80,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
277 | /*8_*/0x70,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xBA,0xAE,0x8F, | |
278 | /*9_*/0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9C,0x9F, | |
279 | /*A_*/0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0x8C,0xAD,0x8E,0xAF, | |
280 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0x8D,0xBB,0xBC,0xBD,0xBE,0xBF, | |
281 | /*C_*/0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, | |
282 | /*D_*/0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, | |
283 | /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
284 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF | |
285 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 286 | }; |
c05125c5 | 287 | # endif |
4bc3dcfa | 288 | |
3de6d141 KW |
289 | |
290 | /* The table below is adapted from | |
f6521f7c | 291 | * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
3de6d141 KW |
292 | * See copyright notice at the beginning of this file. |
293 | */ | |
294 | ||
295 | # ifndef DOINIT | |
0a142f46 | 296 | EXTCONST U8 PL_extended_utf8_dfa_tab[]; |
3de6d141 | 297 | # else |
0a142f46 | 298 | EXTCONST U8 PL_extended_utf8_dfa_tab[] = { |
3de6d141 KW |
299 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
300 | /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
301 | /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
302 | /*2_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
303 | /*3_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
304 | /*4_ */ 0, 7, 7, 8, 8, 9, 9, 9, 9, 10, 10, 0, 0, 0, 0, 0, | |
305 | /*5_ */ 0, 10, 10, 10, 10, 10, 10, 11, 11, 11, 0, 0, 0, 0, 0, 0, | |
306 | /*6_ */ 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 0, | |
307 | /*7_ */ 11, 11, 11, 11, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, | |
308 | /*8_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
309 | /*9_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
310 | /*A_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 2, 2, | |
311 | /*B_ */ 2, 2, 2, 2, 2, 2, 2, 1, 3, 3, 3, 3, 3, 0, 3, 3, | |
312 | /*C_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, | |
313 | /*D_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 12, 4, 4, 4, | |
314 | /*E_ */ 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 13, 5, 5, | |
315 | /*F_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 14, 6, 15, 1, 0, | |
316 | /*N0= 0*/ 0, 1, 16, 32, 48, 64, 80, 1, 1, 1, 1, 1, 96,112,128,144, | |
317 | /*N1= 16*/ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, | |
318 | /*N2= 32*/ 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, 1, 1, 1, 1, | |
319 | /*N3= 48*/ 1, 1, 1, 1, 1, 1, 1, 32, 32, 32, 32, 32, 1, 1, 1, 1, | |
320 | /*N4= 64*/ 1, 1, 1, 1, 1, 1, 1, 48, 48, 48, 48, 48, 1, 1, 1, 1, | |
321 | /*N5= 80*/ 1, 1, 1, 1, 1, 1, 1, 64, 64, 64, 64, 64, 1, 1, 1, 1, | |
322 | /*N6= 96*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 1, 1, 1, 1, | |
323 | /*N7=112*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48, 48, 1, 1, 1, 1, | |
324 | /*N8=128*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 64, 64, 1, 1, 1, 1, | |
325 | /*N9=144*/ 1, 1, 1, 1, 1, 1, 1, 1, 80, 80, 80, 80, 1, 1, 1, 1 | |
326 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15*/ | |
327 | }; | |
328 | # endif | |
329 | ||
123deead KW |
330 | |
331 | /* The table below is adapted from | |
f6521f7c | 332 | * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
123deead KW |
333 | * See copyright notice at the beginning of this file. |
334 | */ | |
335 | ||
336 | # ifndef DOINIT | |
0a142f46 | 337 | EXTCONST U16 PL_strict_utf8_dfa_tab[]; |
123deead | 338 | # else |
0a142f46 | 339 | EXTCONST U16 PL_strict_utf8_dfa_tab[] = { |
123deead KW |
340 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
341 | /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
342 | /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
343 | /*2_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
344 | /*3_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
345 | /*4_ */ 0, 10, 11, 12, 12, 12, 12, 12, 12, 13, 14, 0, 0, 0, 0, 0, | |
346 | /*5_ */ 0, 13, 14, 13, 14, 15, 16, 17, 18, 17, 0, 0, 0, 0, 0, 0, | |
347 | /*6_ */ 0, 0, 18, 17, 18, 19, 20, 17, 18, 17, 18, 0, 0, 0, 0, 0, | |
348 | /*7_ */ 17, 18, 21, 22, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, | |
349 | /*8_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
350 | /*9_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
351 | /*A_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 2, 2, | |
352 | /*B_ */ 2, 2, 2, 2, 2, 2, 2, 1, 3, 3, 3, 3, 3, 0, 3, 3, | |
353 | /*C_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, | |
354 | /*D_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 8, 6, 4, 5, | |
355 | /*E_ */ 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 5, 9, 7, 1, | |
356 | /*F_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, | |
357 | /*N0 = 0*/ 0, 1, 23, 46, 69,138,115,184, 92,161, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
358 | /*N1 = 23*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
359 | /*N2 = 46*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, | |
360 | /*N3 = 69*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, | |
361 | /*N4 = 92*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, | |
362 | /*N5 =115*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 1, 1, 46,207, | |
363 | /*N6 =138*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,276, | |
364 | /*N7 =161*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 69,322, 69,322, 69,322, 69,322, 69,322, | |
365 | /*N8 =184*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 69,322, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
366 | /*N9 =207*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23,230,253, 23, 23, 23, 23, 23,299, | |
367 | /*N10=230*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, | |
368 | /*N11=253*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
369 | /*N12=276*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,299, | |
370 | /*N13=299*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, | |
371 | /*N14=322*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,299 | |
372 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22*/ | |
373 | }; | |
374 | # endif | |
375 | ||
c5bfbb64 KW |
376 | |
377 | /* The table below is adapted from | |
f6521f7c | 378 | * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
c5bfbb64 KW |
379 | * See copyright notice at the beginning of this file. |
380 | */ | |
381 | ||
382 | # ifndef DOINIT | |
0a142f46 | 383 | EXTCONST U8 PL_c9_utf8_dfa_tab[]; |
c5bfbb64 | 384 | # else |
0a142f46 | 385 | EXTCONST U8 PL_c9_utf8_dfa_tab[] = { |
c5bfbb64 KW |
386 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
387 | /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
388 | /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
389 | /*2_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
390 | /*3_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
391 | /*4_ */ 0, 9, 9, 10, 10, 10, 10, 10, 10, 11, 11, 0, 0, 0, 0, 0, | |
392 | /*5_ */ 0, 11, 11, 11, 11, 11, 11, 12, 12, 12, 0, 0, 0, 0, 0, 0, | |
393 | /*6_ */ 0, 0, 12, 12, 12, 13, 13, 12, 12, 12, 12, 0, 0, 0, 0, 0, | |
394 | /*7_ */ 12, 12, 12, 12, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, | |
395 | /*8_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
396 | /*9_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
397 | /*A_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 2, 2, | |
398 | /*B_ */ 2, 2, 2, 2, 2, 2, 2, 1, 3, 3, 3, 3, 3, 0, 3, 3, | |
399 | /*C_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, | |
400 | /*D_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 6, 5, 4, 4, | |
401 | /*E_ */ 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 8, 7, 1, | |
402 | /*F_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, | |
403 | /*N0= 0*/ 0, 1, 14, 28, 42, 70, 56, 98, 84, 1, 1, 1, 1, 1, | |
404 | /*N1=14*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
405 | /*N2=28*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, 14, | |
406 | /*N3=42*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, | |
407 | /*N4=56*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, | |
408 | /*N5=70*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 1, | |
409 | /*N6=84*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 42, 42, | |
410 | /*N7=98*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 1, 1, 1, 1 | |
411 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13*/ | |
412 | }; | |
413 | # endif | |
414 | ||
b37fc6e8 KW |
415 | /* This table partitions all the code points of the platform into ranges which |
416 | * have the property that all the code points in each range have the same | |
417 | * number of bytes in their UTF-EBCDIC representations, and the adjacent | |
418 | * ranges have a different number of bytes. | |
419 | * | |
420 | * Each number in the table begins such a range, which extends up to just | |
421 | * before the following table entry, except the final entry is understood to | |
422 | * extend to the platform's infinity | |
423 | */ | |
424 | # ifndef DOINIT | |
425 | EXTCONST UV PL_partition_by_byte_length[]; | |
426 | # else | |
427 | EXTCONST UV PL_partition_by_byte_length[] = { | |
428 | 0x00, | |
429 | 0x41, | |
430 | 0x4b, | |
431 | 0x51, | |
432 | 0x5a, | |
433 | 0x62, | |
434 | 0x6b, | |
435 | 0x70, | |
436 | 0x79, | |
437 | 0x80, | |
438 | 0x81, | |
439 | 0x8a, | |
440 | 0x91, | |
441 | 0x9a, | |
442 | 0xa1, | |
443 | 0xaa, | |
444 | 0xad, | |
445 | 0xae, | |
446 | 0xbd, | |
447 | 0xbe, | |
448 | 0xc0, | |
449 | 0xca, | |
450 | 0xd0, | |
451 | 0xda, | |
452 | 0xe0, | |
453 | 0xe1, | |
454 | 0xe2, | |
455 | 0xea, | |
456 | 0xf0, | |
457 | 0xfa, | |
458 | 0xff, | |
459 | 0x100, | |
460 | 0x400, | |
461 | 0x4000, | |
462 | 0x40000, | |
463 | 0x400000, | |
464 | 0x4000000, | |
465 | 0x40000000 | |
466 | }; | |
467 | # endif | |
468 | ||
4bc3dcfa KW |
469 | #endif /* EBCDIC 1047 */ |
470 | ||
471 | #if 'A' == 193 /* EBCDIC 037 */ \ | |
472 | && '\\' == 224 && '[' == 186 && ']' == 187 && '{' == 192 && '}' == 208 \ | |
473 | && '^' == 176 && '~' == 161 && '!' == 90 && '#' == 123 && '|' == 79 \ | |
c11f6329 | 474 | && '$' == 91 && '@' == 124 && '`' == 121 && '\n' == 37 |
4bc3dcfa KW |
475 | |
476 | /* Index is ASCII platform code point; value is EBCDIC 037 equivalent */ | |
c05125c5 | 477 | # ifndef DOINIT |
0a142f46 | 478 | EXTCONST U8 PL_a2e[]; |
c05125c5 | 479 | # else |
0a142f46 | 480 | EXTCONST U8 PL_a2e[] = { |
94e72741 KW |
481 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
482 | /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F, | |
483 | /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, | |
484 | /*2_*/0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, | |
485 | /*3_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, | |
486 | /*4_*/0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, | |
487 | /*5_*/0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xBA,0xE0,0xBB,0xB0,0x6D, | |
488 | /*6_*/0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, | |
489 | /*7_*/0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, | |
490 | /*8_*/0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, | |
491 | /*9_*/0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, | |
492 | /*A_*/0x41,0xAA,0x4A,0xB1,0x9F,0xB2,0x6A,0xB5,0xBD,0xB4,0x9A,0x8A,0x5F,0xCA,0xAF,0xBC, | |
493 | /*B_*/0x90,0x8F,0xEA,0xFA,0xBE,0xA0,0xB6,0xB3,0x9D,0xDA,0x9B,0x8B,0xB7,0xB8,0xB9,0xAB, | |
494 | /*C_*/0x64,0x65,0x62,0x66,0x63,0x67,0x9E,0x68,0x74,0x71,0x72,0x73,0x78,0x75,0x76,0x77, | |
495 | /*D_*/0xAC,0x69,0xED,0xEE,0xEB,0xEF,0xEC,0xBF,0x80,0xFD,0xFE,0xFB,0xFC,0xAD,0xAE,0x59, | |
496 | /*E_*/0x44,0x45,0x42,0x46,0x43,0x47,0x9C,0x48,0x54,0x51,0x52,0x53,0x58,0x55,0x56,0x57, | |
497 | /*F_*/0x8C,0x49,0xCD,0xCE,0xCB,0xCF,0xCC,0xE1,0x70,0xDD,0xDE,0xDB,0xDC,0x8D,0x8E,0xDF | |
498 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 499 | }; |
c05125c5 | 500 | # endif |
4bc3dcfa KW |
501 | |
502 | /* Index is EBCDIC 037 code point; value is ASCII platform equivalent */ | |
c05125c5 | 503 | # ifndef DOINIT |
0a142f46 | 504 | EXTCONST U8 PL_e2a[]; |
c05125c5 | 505 | # else |
0a142f46 | 506 | EXTCONST U8 PL_e2a[] = { |
94e72741 KW |
507 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
508 | /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, | |
509 | /*1_*/0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, | |
510 | /*2_*/0x80,0x81,0x82,0x83,0x84,0x0A,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, | |
511 | /*3_*/0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, | |
512 | /*4_*/0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, | |
513 | /*5_*/0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0xAC, | |
514 | /*6_*/0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, | |
515 | /*7_*/0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, | |
516 | /*8_*/0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, | |
517 | /*9_*/0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, | |
518 | /*A_*/0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0xDD,0xDE,0xAE, | |
519 | /*B_*/0x5E,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0x5B,0x5D,0xAF,0xA8,0xB4,0xD7, | |
520 | /*C_*/0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, | |
521 | /*D_*/0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, | |
522 | /*E_*/0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, | |
523 | /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F | |
524 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 525 | }; |
c05125c5 | 526 | # endif |
4bc3dcfa KW |
527 | |
528 | /* (Confusingly named) Index is EBCDIC 037 I8 byte; value is | |
529 | * EBCDIC 037 UTF-EBCDIC equivalent */ | |
c05125c5 | 530 | # ifndef DOINIT |
0a142f46 | 531 | EXTCONST U8 PL_utf2e[]; |
c05125c5 | 532 | # else |
0a142f46 | 533 | EXTCONST U8 PL_utf2e[] = { |
94e72741 KW |
534 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
535 | /*0_*/0x00,0x01,0x02,0x03,0x37,0x2D,0x2E,0x2F,0x16,0x05,0x25,0x0B,0x0C,0x0D,0x0E,0x0F, | |
536 | /*1_*/0x10,0x11,0x12,0x13,0x3C,0x3D,0x32,0x26,0x18,0x19,0x3F,0x27,0x1C,0x1D,0x1E,0x1F, | |
537 | /*2_*/0x40,0x5A,0x7F,0x7B,0x5B,0x6C,0x50,0x7D,0x4D,0x5D,0x5C,0x4E,0x6B,0x60,0x4B,0x61, | |
538 | /*3_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0x7A,0x5E,0x4C,0x7E,0x6E,0x6F, | |
539 | /*4_*/0x7C,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6, | |
540 | /*5_*/0xD7,0xD8,0xD9,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xBA,0xE0,0xBB,0xB0,0x6D, | |
541 | /*6_*/0x79,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x91,0x92,0x93,0x94,0x95,0x96, | |
542 | /*7_*/0x97,0x98,0x99,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xC0,0x4F,0xD0,0xA1,0x07, | |
543 | /*8_*/0x20,0x21,0x22,0x23,0x24,0x15,0x06,0x17,0x28,0x29,0x2A,0x2B,0x2C,0x09,0x0A,0x1B, | |
544 | /*9_*/0x30,0x31,0x1A,0x33,0x34,0x35,0x36,0x08,0x38,0x39,0x3A,0x3B,0x04,0x14,0x3E,0xFF, | |
545 | /*A_*/0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x51,0x52,0x53,0x54,0x55,0x56, | |
546 | /*B_*/0x57,0x58,0x59,0x5F,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x70,0x71,0x72, | |
547 | /*C_*/0x73,0x74,0x75,0x76,0x77,0x78,0x80,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,0x90,0x9A,0x9B, | |
548 | /*D_*/0x9C,0x9D,0x9E,0x9F,0xA0,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB1,0xB2,0xB3,0xB4,0xB5, | |
549 | /*E_*/0xB6,0xB7,0xB8,0xB9,0xBC,0xBD,0xBE,0xBF,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,0xDA,0xDB, | |
550 | /*F_*/0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE | |
551 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 552 | }; |
c05125c5 | 553 | # endif |
4bc3dcfa KW |
554 | |
555 | /* (Confusingly named) Index is EBCDIC 037 UTF-EBCDIC byte; value is | |
556 | * EBCDIC 037 I8 equivalent */ | |
c05125c5 | 557 | # ifndef DOINIT |
0a142f46 | 558 | EXTCONST U8 PL_e2utf[]; |
c05125c5 | 559 | # else |
0a142f46 | 560 | EXTCONST U8 PL_e2utf[] = { |
94e72741 KW |
561 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
562 | /*0_*/0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, | |
563 | /*1_*/0x10,0x11,0x12,0x13,0x9D,0x85,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, | |
564 | /*2_*/0x80,0x81,0x82,0x83,0x84,0x0A,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, | |
565 | /*3_*/0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, | |
566 | /*4_*/0x20,0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0x2E,0x3C,0x28,0x2B,0x7C, | |
567 | /*5_*/0x26,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,0xB0,0xB1,0xB2,0x21,0x24,0x2A,0x29,0x3B,0xB3, | |
568 | /*6_*/0x2D,0x2F,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0x2C,0x25,0x5F,0x3E,0x3F, | |
569 | /*7_*/0xBD,0xBE,0xBF,0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, | |
570 | /*8_*/0xC6,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC, | |
571 | /*9_*/0xCD,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xCE,0xCF,0xD0,0xD1,0xD2,0xD3, | |
572 | /*A_*/0xD4,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA, | |
573 | /*B_*/0x5E,0xDB,0xDC,0xDD,0xDE,0xDF,0xE0,0xE1,0xE2,0xE3,0x5B,0x5D,0xE4,0xE5,0xE6,0xE7, | |
574 | /*C_*/0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, | |
575 | /*D_*/0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xEE,0xEF,0xF0,0xF1,0xF2,0xF3, | |
576 | /*E_*/0x5C,0xF4,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA, | |
577 | /*F_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xFB,0xFC,0xFD,0xFE,0xFF,0x9F | |
578 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 579 | }; |
c05125c5 | 580 | # endif |
4bc3dcfa | 581 | |
4719093e KW |
582 | /* Index is EBCDIC 037 UTF-EBCDIC byte; value is UTF8SKIP for start bytes |
583 | * (including for overlongs); 1 for continuation. Adapted from the shadow | |
584 | * flags table in tr16. The entries marked 9 in tr16 are continuation bytes | |
585 | * and are marked as length 1 here so that we can recover. */ | |
c05125c5 | 586 | # ifndef DOINIT |
0a142f46 | 587 | EXTCONST U8 PL_utf8skip[]; |
c05125c5 | 588 | # else |
0a142f46 | 589 | EXTCONST U8 PL_utf8skip[] = { |
94e72741 KW |
590 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
591 | /*0_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
592 | /*1_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
593 | /*2_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
594 | /*3_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
595 | /*4_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
596 | /*5_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
597 | /*6_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
598 | /*7_*/ 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, | |
599 | /*8_*/ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, | |
600 | /*9_*/ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, | |
601 | /*A_*/ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, | |
602 | /*B_*/ 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 3, 3, 3, 3, | |
603 | /*C_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, | |
604 | /*D_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, | |
605 | /*E_*/ 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5, | |
606 | /*F_*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 14, 1 | |
607 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 608 | }; |
c05125c5 | 609 | # endif |
4bc3dcfa KW |
610 | |
611 | /* Index is EBCDIC 037 code point; value is its lowercase equivalent */ | |
c05125c5 | 612 | # ifndef DOINIT |
0a142f46 | 613 | EXTCONST U8 PL_latin1_lc[]; |
c05125c5 | 614 | # else |
0a142f46 | 615 | EXTCONST U8 PL_latin1_lc[] = { |
94e72741 KW |
616 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
617 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
618 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
619 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
620 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
621 | /*4_*/0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
622 | /*5_*/0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
623 | /*6_*/0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
624 | /*7_*/0x70,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
625 | /*8_*/0x70,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, | |
626 | /*9_*/0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9C,0x9F, | |
627 | /*A_*/0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0x8C,0x8D,0x8E,0xAF, | |
628 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, | |
629 | /*C_*/0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
630 | /*D_*/0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, | |
631 | /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
632 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF | |
633 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 634 | }; |
c05125c5 | 635 | # endif |
4bc3dcfa KW |
636 | |
637 | /* Index is EBCDIC 037 code point; value is its uppercase equivalent. | |
638 | * The 'mod' in the name means that codepoints whose uppercase is above 255 or | |
639 | * longer than 1 character map to LATIN SMALL LETTER Y WITH DIARESIS */ | |
c05125c5 | 640 | # ifndef DOINIT |
0a142f46 | 641 | EXTCONST U8 PL_mod_latin1_uc[]; |
c05125c5 | 642 | # else |
0a142f46 | 643 | EXTCONST U8 PL_mod_latin1_uc[] = { |
94e72741 KW |
644 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
645 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
646 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
647 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
648 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
649 | /*4_*/0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
650 | /*5_*/0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0xDF,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
651 | /*6_*/0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
652 | /*7_*/0x80,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
653 | /*8_*/0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xAD,0xAE,0x8F, | |
654 | /*9_*/0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9E,0x9F, | |
655 | /*A_*/0xDF,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, | |
656 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, | |
657 | /*C_*/0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, | |
658 | /*D_*/0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, | |
659 | /*E_*/0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, | |
660 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF | |
661 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 662 | }; |
c05125c5 | 663 | # endif |
4bc3dcfa KW |
664 | |
665 | /* Index is EBCDIC 037 code point; For A-Z, value is a-z; for a-z, value | |
666 | * is A-Z; all other code points map to themselves */ | |
c05125c5 | 667 | # ifndef DOINIT |
0a142f46 | 668 | EXTCONST U8 PL_fold[]; |
c05125c5 | 669 | # else |
0a142f46 | 670 | EXTCONST U8 PL_fold[] = { |
94e72741 KW |
671 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
672 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
673 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
674 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
675 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
676 | /*4_*/0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
677 | /*5_*/0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
678 | /*6_*/0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
679 | /*7_*/0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
680 | /*8_*/0x80,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, | |
681 | /*9_*/0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, | |
682 | /*A_*/0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF, | |
683 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, | |
684 | /*C_*/0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
685 | /*D_*/0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF, | |
686 | /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF, | |
687 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF | |
688 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 689 | }; |
c05125c5 | 690 | # endif |
4bc3dcfa KW |
691 | |
692 | /* Index is EBCDIC 037 code point; value is its other fold-pair equivalent | |
693 | * (A => a; a => A, etc) in the 0-255 range. If no such equivalent, value is | |
694 | * the code point itself */ | |
c05125c5 | 695 | # ifndef DOINIT |
0a142f46 | 696 | EXTCONST U8 PL_fold_latin1[]; |
c05125c5 | 697 | # else |
0a142f46 | 698 | EXTCONST U8 PL_fold_latin1[] = { |
94e72741 KW |
699 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
700 | /*0_*/0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, | |
701 | /*1_*/0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, | |
702 | /*2_*/0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, | |
703 | /*3_*/0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, | |
704 | /*4_*/0x40,0x41,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, | |
705 | /*5_*/0x50,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, | |
706 | /*6_*/0x60,0x61,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, | |
707 | /*7_*/0x80,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F, | |
708 | /*8_*/0x70,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0x8A,0x8B,0xAC,0xAD,0xAE,0x8F, | |
709 | /*9_*/0x90,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0x9A,0x9B,0x9E,0x9D,0x9C,0x9F, | |
710 | /*A_*/0xA0,0xA1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xAA,0xAB,0x8C,0x8D,0x8E,0xAF, | |
711 | /*B_*/0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF, | |
712 | /*C_*/0xC0,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0xCA,0xEB,0xEC,0xED,0xEE,0xEF, | |
713 | /*D_*/0xD0,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0xDA,0xFB,0xFC,0xFD,0xFE,0xDF, | |
714 | /*E_*/0xE0,0xE1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xEA,0xCB,0xCC,0xCD,0xCE,0xCF, | |
715 | /*F_*/0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xDB,0xDC,0xDD,0xDE,0xFF | |
716 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ | |
4bc3dcfa | 717 | }; |
c05125c5 | 718 | # endif |
4bc3dcfa | 719 | |
3de6d141 KW |
720 | |
721 | /* The table below is adapted from | |
f6521f7c | 722 | * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
3de6d141 KW |
723 | * See copyright notice at the beginning of this file. |
724 | */ | |
725 | ||
726 | # ifndef DOINIT | |
0a142f46 | 727 | EXTCONST U8 PL_extended_utf8_dfa_tab[]; |
3de6d141 | 728 | # else |
0a142f46 | 729 | EXTCONST U8 PL_extended_utf8_dfa_tab[] = { |
3de6d141 KW |
730 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
731 | /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
732 | /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
733 | /*2_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
734 | /*3_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
735 | /*4_ */ 0, 7, 7, 8, 8, 9, 9, 9, 9, 10, 10, 0, 0, 0, 0, 0, | |
736 | /*5_ */ 0, 10, 10, 10, 10, 10, 10, 11, 11, 11, 0, 0, 0, 0, 0, 11, | |
737 | /*6_ */ 0, 0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, 0, 0, 0, 0, | |
738 | /*7_ */ 11, 11, 11, 1, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, | |
739 | /*8_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
740 | /*9_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
741 | /*A_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
742 | /*B_ */ 0, 2, 2, 2, 2, 2, 1, 3, 3, 3, 0, 0, 3, 3, 3, 3, | |
743 | /*C_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, | |
744 | /*D_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 12, 4, 4, 4, | |
745 | /*E_ */ 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 13, 5, 5, | |
746 | /*F_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 14, 6, 15, 1, 0, | |
747 | /*N0= 0*/ 0, 1, 16, 32, 48, 64, 80, 1, 1, 1, 1, 1, 96,112,128,144, | |
748 | /*N1= 16*/ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, | |
749 | /*N2= 32*/ 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, 1, 1, 1, 1, | |
750 | /*N3= 48*/ 1, 1, 1, 1, 1, 1, 1, 32, 32, 32, 32, 32, 1, 1, 1, 1, | |
751 | /*N4= 64*/ 1, 1, 1, 1, 1, 1, 1, 48, 48, 48, 48, 48, 1, 1, 1, 1, | |
752 | /*N5= 80*/ 1, 1, 1, 1, 1, 1, 1, 64, 64, 64, 64, 64, 1, 1, 1, 1, | |
753 | /*N6= 96*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 32, 1, 1, 1, 1, | |
754 | /*N7=112*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48, 48, 1, 1, 1, 1, | |
755 | /*N8=128*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 64, 64, 64, 1, 1, 1, 1, | |
756 | /*N9=144*/ 1, 1, 1, 1, 1, 1, 1, 1, 80, 80, 80, 80, 1, 1, 1, 1 | |
757 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15*/ | |
758 | }; | |
759 | # endif | |
760 | ||
123deead KW |
761 | |
762 | /* The table below is adapted from | |
f6521f7c | 763 | * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
123deead KW |
764 | * See copyright notice at the beginning of this file. |
765 | */ | |
766 | ||
767 | # ifndef DOINIT | |
0a142f46 | 768 | EXTCONST U16 PL_strict_utf8_dfa_tab[]; |
123deead | 769 | # else |
0a142f46 | 770 | EXTCONST U16 PL_strict_utf8_dfa_tab[] = { |
123deead KW |
771 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
772 | /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
773 | /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
774 | /*2_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
775 | /*3_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
776 | /*4_ */ 0, 10, 11, 12, 12, 12, 12, 12, 12, 13, 14, 0, 0, 0, 0, 0, | |
777 | /*5_ */ 0, 13, 14, 13, 14, 15, 16, 17, 18, 17, 0, 0, 0, 0, 0, 18, | |
778 | /*6_ */ 0, 0, 17, 18, 19, 20, 17, 18, 17, 18, 17, 0, 0, 0, 0, 0, | |
779 | /*7_ */ 18, 21, 22, 1, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, | |
780 | /*8_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
781 | /*9_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
782 | /*A_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
783 | /*B_ */ 0, 2, 2, 2, 2, 2, 1, 3, 3, 3, 0, 0, 3, 3, 3, 3, | |
784 | /*C_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, | |
785 | /*D_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 8, 6, 4, 5, | |
786 | /*E_ */ 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 5, 9, 7, 1, | |
787 | /*F_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, | |
788 | /*N0 = 0*/ 0, 1, 23, 46, 69,138,115,184, 92,161, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
789 | /*N1 = 23*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
790 | /*N2 = 46*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, | |
791 | /*N3 = 69*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, | |
792 | /*N4 = 92*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, | |
793 | /*N5 =115*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 1, 1, 46,207, | |
794 | /*N6 =138*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,276, | |
795 | /*N7 =161*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 69,322, 69,322, 69,322, 69,322, 69,322, | |
796 | /*N8 =184*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 69,322, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
797 | /*N9 =207*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23,230,253, 23, 23, 23, 23, 23,299, | |
798 | /*N10=230*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, | |
799 | /*N11=253*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, | |
800 | /*N12=276*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,299, | |
801 | /*N13=299*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, | |
802 | /*N14=322*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,299 | |
803 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22*/ | |
804 | }; | |
805 | # endif | |
806 | ||
c5bfbb64 KW |
807 | |
808 | /* The table below is adapted from | |
f6521f7c | 809 | * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ |
c5bfbb64 KW |
810 | * See copyright notice at the beginning of this file. |
811 | */ | |
812 | ||
813 | # ifndef DOINIT | |
0a142f46 | 814 | EXTCONST U8 PL_c9_utf8_dfa_tab[]; |
c5bfbb64 | 815 | # else |
0a142f46 | 816 | EXTCONST U8 PL_c9_utf8_dfa_tab[] = { |
c5bfbb64 KW |
817 | /* _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F*/ |
818 | /*0_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
819 | /*1_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
820 | /*2_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
821 | /*3_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
822 | /*4_ */ 0, 9, 9, 10, 10, 10, 10, 10, 10, 11, 11, 0, 0, 0, 0, 0, | |
823 | /*5_ */ 0, 11, 11, 11, 11, 11, 11, 12, 12, 12, 0, 0, 0, 0, 0, 12, | |
824 | /*6_ */ 0, 0, 12, 12, 13, 13, 12, 12, 12, 12, 12, 0, 0, 0, 0, 0, | |
825 | /*7_ */ 12, 12, 12, 1, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, | |
826 | /*8_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
827 | /*9_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
828 | /*A_ */ 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, | |
829 | /*B_ */ 0, 2, 2, 2, 2, 2, 1, 3, 3, 3, 0, 0, 3, 3, 3, 3, | |
830 | /*C_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, | |
831 | /*D_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 6, 5, 4, 4, | |
832 | /*E_ */ 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 8, 7, 1, | |
833 | /*F_ */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, | |
834 | /*N0= 0*/ 0, 1, 14, 28, 42, 70, 56, 98, 84, 1, 1, 1, 1, 1, | |
835 | /*N1=14*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, | |
836 | /*N2=28*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 14, 14, 14, 14, 14, | |
837 | /*N3=42*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 28, | |
838 | /*N4=56*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, | |
839 | /*N5=70*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 28, 28, 28, 1, | |
840 | /*N6=84*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 42, 42, | |
841 | /*N7=98*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 42, 1, 1, 1, 1 | |
842 | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13*/ | |
843 | }; | |
844 | # endif | |
845 | ||
b37fc6e8 KW |
846 | /* This table partitions all the code points of the platform into ranges which |
847 | * have the property that all the code points in each range have the same | |
848 | * number of bytes in their UTF-EBCDIC representations, and the adjacent | |
849 | * ranges have a different number of bytes. | |
850 | * | |
851 | * Each number in the table begins such a range, which extends up to just | |
852 | * before the following table entry, except the final entry is understood to | |
853 | * extend to the platform's infinity | |
854 | */ | |
855 | # ifndef DOINIT | |
856 | EXTCONST UV PL_partition_by_byte_length[]; | |
857 | # else | |
858 | EXTCONST UV PL_partition_by_byte_length[] = { | |
859 | 0x00, | |
860 | 0x41, | |
861 | 0x4b, | |
862 | 0x51, | |
863 | 0x5a, | |
864 | 0x5f, | |
865 | 0x60, | |
866 | 0x62, | |
867 | 0x6b, | |
868 | 0x70, | |
869 | 0x79, | |
870 | 0x80, | |
871 | 0x81, | |
872 | 0x8a, | |
873 | 0x91, | |
874 | 0x9a, | |
875 | 0xa1, | |
876 | 0xaa, | |
877 | 0xb0, | |
878 | 0xb1, | |
879 | 0xba, | |
880 | 0xbc, | |
881 | 0xc0, | |
882 | 0xca, | |
883 | 0xd0, | |
884 | 0xda, | |
885 | 0xe0, | |
886 | 0xe1, | |
887 | 0xe2, | |
888 | 0xea, | |
889 | 0xf0, | |
890 | 0xfa, | |
891 | 0xff, | |
892 | 0x100, | |
893 | 0x400, | |
894 | 0x4000, | |
895 | 0x40000, | |
896 | 0x400000, | |
897 | 0x4000000, | |
898 | 0x40000000 | |
899 | }; | |
900 | # endif | |
901 | ||
4bc3dcfa KW |
902 | #endif /* EBCDIC 037 */ |
903 | ||
6a5bc5ac | 904 | #endif /* PERL_EBCDIC_TABLES_H_ */ |
4bc3dcfa KW |
905 | |
906 | /* ex: set ro: */ |