| 1 | /* utfebcdic.h |
| 2 | * |
| 3 | * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009, |
| 4 | * 2010, 2011 by Larry Wall, Nick Ing-Simmons, and others |
| 5 | * |
| 6 | * You may distribute under the terms of either the GNU General Public |
| 7 | * License or the Artistic License, as specified in the README file. |
| 8 | * |
| 9 | * Macros to implement UTF-EBCDIC as perl's internal encoding |
| 10 | * Adapted from version 7.1 of Unicode Technical Report #16: |
| 11 | * http://www.unicode.org/unicode/reports/tr16 |
| 12 | * |
| 13 | * To summarize, the way it works is: |
| 14 | * To convert an EBCDIC code point to UTF-EBCDIC: |
| 15 | * 1) convert to Unicode. No conversion is necesary for code points above |
| 16 | * 255, as Unicode and EBCDIC are identical in this range. For smaller |
| 17 | * code points, the conversion is done by lookup in the PL_e2a table (with |
| 18 | * inverse PL_a2e) in the generated file 'ebcdic_tables.h'. The 'a' |
| 19 | * stands for ASCII platform, meaning 0-255 Unicode. |
| 20 | * 2) convert that to a utf8-like string called I8 ('I' stands for |
| 21 | * intermediate) with variant characters occupying multiple bytes. This |
| 22 | * step is similar to the utf8-creating step from Unicode, but the details |
| 23 | * are different. This transformation is called UTF8-Mod. There is a |
| 24 | * chart about the bit patterns in a comment later in this file. But |
| 25 | * essentially here are the differences: |
| 26 | * UTF8 I8 |
| 27 | * invariant byte starts with 0 starts with 0 or 100 |
| 28 | * continuation byte starts with 10 starts with 101 |
| 29 | * start byte same in both: if the code point requires N bytes, |
| 30 | * then the leading N bits are 1, followed by a 0. If |
| 31 | * all 8 bits in the first byte are 1, the code point |
| 32 | * will occupy 14 bytes (compared to 13 in Perl's |
| 33 | * extended UTF-8). This is incompatible with what |
| 34 | * tr16 implies should be the representation of code |
| 35 | * points 2**30 and above, but allows Perl to be able |
| 36 | * to represent all code points that fit in a 64-bit |
| 37 | * word in either our extended UTF-EBCDIC or UTF-8. |
| 38 | * 3) Use the algorithm in tr16 to convert each byte from step 2 into |
| 39 | * final UTF-EBCDIC. This is done by table lookup from a table |
| 40 | * constructed from the algorithm, reproduced in ebcdic_tables.h as |
| 41 | * PL_utf2e, with its inverse being PL_e2utf. They are constructed so that |
| 42 | * all EBCDIC invariants remain invariant, but no others do, and the first |
| 43 | * byte of a variant will always have its upper bit set. But note that |
| 44 | * the upper bit of some invariants is also 1. The table also is designed |
| 45 | * so that lexically comparing two UTF-EBCDIC-variant characters yields |
| 46 | * the Unicode code point order. (To get native code point order, one has |
| 47 | * to convert the latin1-range characters to their native code point |
| 48 | * value.) |
| 49 | * |
| 50 | * For example, the ordinal value of 'A' is 193 in EBCDIC, and also is 193 in |
| 51 | * UTF-EBCDIC. Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 |
| 52 | * converts it back to 193. As an example of how a variant character works, |
| 53 | * take LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in |
| 54 | * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2 converts |
| 55 | * that to two bytes = 11000111 10111111 = C7 BF, and Step 3 converts those to |
| 56 | * 0x8B 0x73. |
| 57 | * |
| 58 | * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight |
| 59 | * EBCDIC, reverse the steps. |
| 60 | * |
| 61 | * The EBCDIC invariants have been chosen to be those characters whose Unicode |
| 62 | * equivalents have ordinal numbers less than 160, that is the same characters |
| 63 | * that are expressible in ASCII, plus the C1 controls. So there are 160 |
| 64 | * invariants instead of the 128 in UTF-8. |
| 65 | * |
| 66 | * The purpose of Step 3 is to make the encoding be invariant for the chosen |
| 67 | * characters. This messes up the convenient patterns found in step 2, so |
| 68 | * generally, one has to undo step 3 into a temporary to use them. However, |
| 69 | * one "shadow", or parallel table, PL_utf8skip, has been constructed that |
| 70 | * doesn't require undoing things. It is such that for each byte, it says |
| 71 | * how long the sequence is if that (UTF-EBCDIC) byte were to begin it |
| 72 | * |
| 73 | * There are actually 3 slightly different UTF-EBCDIC encodings in |
| 74 | * ebcdic_tables.h, one for each of the code pages recognized by Perl. That |
| 75 | * means that there are actually three different sets of tables, one for each |
| 76 | * code page. (If Perl is compiled on platforms using another EBCDIC code |
| 77 | * page, it may not compile, or Perl may silently mistake it for one of the |
| 78 | * three.) |
| 79 | * |
| 80 | * Note that tr16 actually only specifies one version of UTF-EBCDIC, based on |
| 81 | * the 1047 encoding, and which is supposed to be used for all code pages. |
| 82 | * But this doesn't work. To illustrate the problem, consider the '^' character. |
| 83 | * On a 037 code page it is the single byte 176, whereas under 1047 UTF-EBCDIC |
| 84 | * it is the single byte 95. If Perl implemented tr16 exactly, it would mean |
| 85 | * that changing a string containing '^' to UTF-EBCDIC would change that '^' |
| 86 | * from 176 to 95 (and vice-versa), violating the rule that ASCII-range |
| 87 | * characters are the same in UTF-8 or not. Much code in Perl assumes this |
| 88 | * rule. See for example |
| 89 | * http://grokbase.com/t/perl/mvs/025xf0yhmn/utf-ebcdic-for-posix-bc-malformed-utf-8-character |
| 90 | * What Perl does is create a version of UTF-EBCDIC suited to each code page; |
| 91 | * the one for the 1047 code page is identical to what's specified in tr16. |
| 92 | * This complicates interchanging files between computers using different code |
| 93 | * pages. Best is to convert to I8 before sending them, as the I8 |
| 94 | * representation is the same no matter what the underlying code page is. |
| 95 | * |
| 96 | * Because of the way UTF-EBCDIC is constructed, the lowest 32 code points that |
| 97 | * aren't equivalent to ASCII characters nor C1 controls form the set of |
| 98 | * continuation bytes; the remaining 64 non-ASCII, non-control code points form |
| 99 | * the potential start bytes, in order. (However, the first 5 of these lead to |
| 100 | * malformed overlongs, so there really are only 59 start bytes, and the first |
| 101 | * three of the 59 are the start bytes for the Latin1 range.) Hence the |
| 102 | * UTF-EBCDIC for the smallest variant code point, 0x160, will have likely 0x41 |
| 103 | * as its continuation byte, provided 0x41 isn't an ASCII or C1 equivalent. |
| 104 | * And its start byte will be the code point that is 37 (32+5) non-ASCII, |
| 105 | * non-control code points past it. (0 - 3F are controls, and 40 is SPACE, |
| 106 | * leaving 41 as the first potentially available one.) In contrast, on ASCII |
| 107 | * platforms, the first 64 (not 32) non-ASCII code points are the continuation |
| 108 | * bytes. And the first 2 (not 5) potential start bytes form overlong |
| 109 | * malformed sequences. |
| 110 | * |
| 111 | * EBCDIC characters above 0xFF are the same as Unicode in Perl's |
| 112 | * implementation of all 3 encodings, so for those Step 1 is trivial. |
| 113 | * |
| 114 | * (Note that the entries for invariant characters are necessarily the same in |
| 115 | * PL_e2a and PL_e2utf; likewise for their inverses.) |
| 116 | * |
| 117 | * UTF-EBCDIC strings are the same length or longer than UTF-8 representations |
| 118 | * of the same string. The maximum code point representable as 2 bytes in |
| 119 | * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8. |
| 120 | */ |
| 121 | |
| 122 | START_EXTERN_C |
| 123 | |
| 124 | #include "ebcdic_tables.h" |
| 125 | |
| 126 | END_EXTERN_C |
| 127 | |
| 128 | /* EBCDIC-happy ways of converting native code to UTF-8 */ |
| 129 | |
| 130 | /* Use these when ch is known to be < 256 */ |
| 131 | #define NATIVE_TO_LATIN1(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) PL_e2a[(U8)(ch)]) |
| 132 | #define LATIN1_TO_NATIVE(ch) (__ASSERT_(FITS_IN_8_BITS(ch)) PL_a2e[(U8)(ch)]) |
| 133 | |
| 134 | /* Use these on bytes */ |
| 135 | #define NATIVE_UTF8_TO_I8(b) (__ASSERT_(FITS_IN_8_BITS(b)) PL_e2utf[(U8)(b)]) |
| 136 | #define I8_TO_NATIVE_UTF8(b) (__ASSERT_(FITS_IN_8_BITS(b)) PL_utf2e[(U8)(b)]) |
| 137 | |
| 138 | /* Transforms in wide UV chars */ |
| 139 | #define NATIVE_TO_UNI(ch) (FITS_IN_8_BITS(ch) ? NATIVE_TO_LATIN1(ch) : (UV) (ch)) |
| 140 | #define UNI_TO_NATIVE(ch) (FITS_IN_8_BITS(ch) ? LATIN1_TO_NATIVE(ch) : (UV) (ch)) |
| 141 | |
| 142 | /* How wide can a single UTF-8 encoded character become in bytes. */ |
| 143 | /* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8 |
| 144 | * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be |
| 145 | * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode |
| 146 | * non-negative integers in a binary format, even those above Unicode. 14 is |
| 147 | * the smallest number that covers 2**64 |
| 148 | * |
| 149 | * WARNING: This number must be in sync with the value in |
| 150 | * regen/charset_translations.pl. */ |
| 151 | #define UTF8_MAXBYTES 14 |
| 152 | |
| 153 | /* |
| 154 | The following table is adapted from tr16, it shows the I8 encoding of Unicode code points. |
| 155 | |
| 156 | Unicode U32 Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th Byte |
| 157 | U+0000..U+007F 000000000xxxxxxx 0xxxxxxx |
| 158 | U+0080..U+009F 00000000100xxxxx 100xxxxx |
| 159 | U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx |
| 160 | U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx |
| 161 | U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx |
| 162 | U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx |
| 163 | U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx |
| 164 | U+4000000..U+3FFFFFFF 00uuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 11111110 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx |
| 165 | |
| 166 | Beyond this, Perl uses an incompatible extension, similar to the one used in |
| 167 | regular UTF-8. There are now 14 bytes. A full 32 bits of information thus looks like this: |
| 168 | 1st Byte 2nd-7th 8th Byte 9th Byte 10th B 11th B 12th B 13th B 14th B |
| 169 | U+40000000..U+FFFFFFFF ttuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 11111111 10100000 101000tt 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx |
| 170 | |
| 171 | For 32-bit words, the 2nd through 7th bytes effectively function as leading |
| 172 | zeros. Above 32 bits, these fill up, with each byte yielding 5 bits of |
| 173 | information, so that with 13 continuation bytes, we can handle 65 bits, just |
| 174 | above what a 64 bit word can hold |
| 175 | |
| 176 | The following table gives the I8: |
| 177 | |
| 178 | I8 Code Points 1st Byte 2nd Byte 3rd 4th 5th 6th 7th 8th 9th-14th |
| 179 | |
| 180 | 0x0000..0x009F 00..9F |
| 181 | 0x00A0..0x00FF * C5..C7 A0..BF |
| 182 | U+0100..U+03FF C8..DF A0..BF |
| 183 | U+0400..U+3FFF * E1..EF A0..BF A0..BF |
| 184 | U+4000..U+7FFF F0 * B0..BF A0..BF A0..BF |
| 185 | U+8000..U+D7FF F1 A0..B5 A0..BF A0..BF |
| 186 | U+D800..U+DFFF F1 B6..B7 A0..BF A0..BF (surrogates) |
| 187 | U+E000..U+FFFF F1 B8..BF A0..BF A0..BF |
| 188 | U+10000..U+3FFFF F2..F7 A0..BF A0..BF A0..BF |
| 189 | U+40000..U+FFFFF F8 * A8..BF A0..BF A0..BF A0..BF |
| 190 | U+100000..U+10FFFF F9 A0..A1 A0..BF A0..BF A0..BF |
| 191 | Below are above-Unicode code points |
| 192 | U+110000..U+1FFFFF F9 A2..BF A0..BF A0..BF A0..BF |
| 193 | U+200000..U+3FFFFF FA..FB A0..BF A0..BF A0..BF A0..BF |
| 194 | U+400000..U+1FFFFFF FC * A4..BF A0..BF A0..BF A0..BF A0..BF |
| 195 | U+2000000..U+3FFFFFF FD A0..BF A0..BF A0..BF A0..BF A0..BF |
| 196 | U+4000000..U+3FFFFFFF FE * A2..BF A0..BF A0..BF A0..BF A0..BF A0..BF |
| 197 | U+40000000.. FF A0..BF A0..BF A0..BF A0..BF A0..BF A0..BF * A1..BF A0..BF |
| 198 | |
| 199 | Note the gaps before several of the byte entries above marked by '*'. These are |
| 200 | caused by legal UTF-8 avoiding non-shortest encodings: it is technically |
| 201 | possible to UTF-8-encode a single code point in different ways, but that is |
| 202 | explicitly forbidden, and the shortest possible encoding should always be used |
| 203 | (and that is what Perl does). */ |
| 204 | |
| 205 | /* Comments as to the meaning of each are given at their corresponding utf8.h |
| 206 | * definitions. */ |
| 207 | |
| 208 | #define UTF_ACCUMULATION_SHIFT 5 |
| 209 | |
| 210 | #define isUTF8_POSSIBLY_PROBLEMATIC(c) \ |
| 211 | _generic_isCC(c, _CC_UTF8_START_BYTE_IS_FOR_AT_LEAST_SURROGATE) |
| 212 | |
| 213 | /* ^? is defined to be APC on EBCDIC systems. See the definition of toCTRL() |
| 214 | * for more */ |
| 215 | #define QUESTION_MARK_CTRL LATIN1_TO_NATIVE(0x9F) |
| 216 | |
| 217 | #define UNICODE_IS_PERL_EXTENDED(uv) UNLIKELY((UV) (uv) > 0x3FFFFFFF) |
| 218 | |
| 219 | /* Helper macros for isUTF8_CHAR_foo, so use those instead of this. These were |
| 220 | * generated by regen/regcharclass.pl, and then moved here. Then they were |
| 221 | * hand-edited to add some LIKELY() calls, presuming that malformations are |
| 222 | * unlikely. The lines that generated it were then commented out. This was |
| 223 | * done because it takes on the order of 10 minutes to generate, and is never |
| 224 | * going to change, unless the generated code is improved, and figuring out the |
| 225 | * LIKELYs there would be hard. |
| 226 | * |
| 227 | */ |
| 228 | |
| 229 | #if '^' == 95 /* CP 1047 */ |
| 230 | /* UTF8_CHAR: Matches legal UTF-EBCDIC variant code points up through 0x1FFFFFF |
| 231 | |
| 232 | 0xA0 - 0x1FFFFF |
| 233 | */ |
| 234 | |
| 235 | /*** GENERATED CODE ***/ |
| 236 | #define is_UTF8_CHAR_utf8_no_length_checks(s) \ |
| 237 | ( ( 0x80 == ((const U8*)s)[0] || ( 0x8A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xAC ) || ( 0xAE <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xB6 ) ) ?\ |
| 238 | ( LIKELY( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) ? 2 : 0 )\ |
| 239 | : ( ( ( ((const U8*)s)[0] & 0xFC ) == 0xB8 ) || ((const U8*)s)[0] == 0xBC || ( ( ((const U8*)s)[0] & 0xFE ) == 0xBE ) || ( ( ((const U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ |
| 240 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) ? 3 : 0 )\ |
| 241 | : ( 0xDC == ((const U8*)s)[0] ) ? \ |
| 242 | ( LIKELY( ( ( ( 0x57 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 243 | : ( ( 0xDD <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xDF ) || 0xE1 == ((const U8*)s)[0] || ( 0xEA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xEC ) ) ?\ |
| 244 | ( LIKELY( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 245 | : ( 0xED == ((const U8*)s)[0] ) ? \ |
| 246 | ( LIKELY( ( ( ( ( 0x49 == ((const U8*)s)[1] || 0x4A == ((const U8*)s)[1] ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ |
| 247 | : ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && LIKELY( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) ) && LIKELY( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && LIKELY( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) && LIKELY( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 ) |
| 248 | |
| 249 | /* UTF8_CHAR_STRICT: Matches legal Unicode UTF-8 variant code points, no |
| 250 | surrrogates nor non-character code points */ |
| 251 | /*** GENERATED CODE ***/ |
| 252 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) \ |
| 253 | ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) ?\ |
| 254 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 255 | : ( 0x73 == ((const U8*)s)[1] ) ? \ |
| 256 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ?\ |
| 257 | ( LIKELY( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ? 4 : 0 )\ |
| 258 | : LIKELY( ( 0x73 == ((const U8*)s)[2] ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFE ) == 0x70 ) ) ? 4 : 0 )\ |
| 259 | : 0 ) |
| 260 | |
| 261 | |
| 262 | /*** GENERATED CODE ***/ |
| 263 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) \ |
| 264 | ( ( 0xED == ((const U8*)s)[0] ) ? \ |
| 265 | ( ( ( ( ((const U8*)s)[1] & 0xEF ) == 0x49 ) || ( ( ((const U8*)s)[1] & 0xF9 ) == 0x51 ) || ((const U8*)s)[1] == 0x63 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x65 ) || ((const U8*)s)[1] == 0x69 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x70 ) ) ?\ |
| 266 | ( LIKELY( ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ |
| 267 | : ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x62 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x64 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x68 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x71 ) ) ?\ |
| 268 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ?\ |
| 269 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ |
| 270 | : ( 0x73 == ((const U8*)s)[2] ) ? \ |
| 271 | ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ?\ |
| 272 | ( LIKELY( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ? 5 : 0 )\ |
| 273 | : LIKELY( ( 0x73 == ((const U8*)s)[3] ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFE ) == 0x70 ) ) ? 5 : 0 )\ |
| 274 | : 0 ) \ |
| 275 | : 0 ) \ |
| 276 | : ( 0xEE == ((const U8*)s)[0] ) ? \ |
| 277 | ( ( 0x41 == ((const U8*)s)[1] ) ? \ |
| 278 | ( LIKELY( ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ |
| 279 | : ( 0x42 == ((const U8*)s)[1] ) ? \ |
| 280 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ?\ |
| 281 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ |
| 282 | : ( 0x73 == ((const U8*)s)[2] ) ? \ |
| 283 | ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ?\ |
| 284 | ( LIKELY( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ? 5 : 0 )\ |
| 285 | : LIKELY( ( 0x73 == ((const U8*)s)[3] ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFE ) == 0x70 ) ) ? 5 : 0 )\ |
| 286 | : 0 ) \ |
| 287 | : 0 ) \ |
| 288 | : 0 ) |
| 289 | |
| 290 | |
| 291 | /*** GENERATED CODE ***/ |
| 292 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks(s) \ |
| 293 | ( ( 0x80 == ((const U8*)s)[0] || ( 0x8A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xAC ) || ( 0xAE <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xB6 ) ) ?\ |
| 294 | ( LIKELY( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) ? 2 : 0 )\ |
| 295 | : ( ( ( ((const U8*)s)[0] & 0xFC ) == 0xB8 ) || ((const U8*)s)[0] == 0xBC || ( ( ((const U8*)s)[0] & 0xFE ) == 0xBE ) || ( ( ((const U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ |
| 296 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) ? 3 : 0 )\ |
| 297 | : ( 0xDC == ((const U8*)s)[0] ) ? \ |
| 298 | ( LIKELY( ( ( ( 0x57 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 299 | : ( 0xDD == ((const U8*)s)[0] ) ? \ |
| 300 | ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x64 ) || ( 0x67 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) ?\ |
| 301 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 302 | : ( 0x73 == ((const U8*)s)[1] ) ? \ |
| 303 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x54 ) || ( 0x57 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ?\ |
| 304 | ( LIKELY( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ? 4 : 0 )\ |
| 305 | : ( 0x55 == ((const U8*)s)[2] ) ? \ |
| 306 | ( LIKELY( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x56 ) ) ? 4 : 0 )\ |
| 307 | : ( 0x56 == ((const U8*)s)[2] ) ? \ |
| 308 | ( LIKELY( ( 0x57 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ? 4 : 0 )\ |
| 309 | : LIKELY( ( 0x73 == ((const U8*)s)[2] ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFE ) == 0x70 ) ) ? 4 : 0 )\ |
| 310 | : 0 ) \ |
| 311 | : ( 0xDE == ((const U8*)s)[0] || 0xE1 == ((const U8*)s)[0] || 0xEB == ((const U8*)s)[0] ) ? \ |
| 312 | ( LIKELY( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 313 | : ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) ? is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) : is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) ) |
| 314 | |
| 315 | /* C9_STRICT_UTF8_CHAR: Matches legal Unicode UTF-8 variant code points |
| 316 | including non-character code points, no surrogates |
| 317 | 0x00A0 - 0xD7FF |
| 318 | 0xE000 - 0x10FFFF |
| 319 | */ |
| 320 | /*** GENERATED CODE ***/ |
| 321 | #define is_C9_STRICT_UTF8_CHAR_utf8_no_length_checks(s) \ |
| 322 | ( ( 0x80 == ((const U8*)s)[0] || ( 0x8A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xAC ) || ( 0xAE <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xB6 ) ) ?\ |
| 323 | ( LIKELY( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) ? 2 : 0 )\ |
| 324 | : ( ( ( ((const U8*)s)[0] & 0xFC ) == 0xB8 ) || ((const U8*)s)[0] == 0xBC || ( ( ((const U8*)s)[0] & 0xFE ) == 0xBE ) || ( ( ((const U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ |
| 325 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) ? 3 : 0 )\ |
| 326 | : ( 0xDC == ((const U8*)s)[0] ) ? \ |
| 327 | ( LIKELY( ( ( ( 0x57 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 328 | : ( 0xDD == ((const U8*)s)[0] ) ? \ |
| 329 | ( LIKELY( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x64 ) || ( 0x67 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 330 | : ( ( ((const U8*)s)[0] & 0xFE ) == 0xDE || 0xE1 == ((const U8*)s)[0] || ( 0xEA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xEC ) ) ?\ |
| 331 | ( LIKELY( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ |
| 332 | : ( 0xED == ((const U8*)s)[0] ) ? \ |
| 333 | ( LIKELY( ( ( ( ( 0x49 == ((const U8*)s)[1] || 0x4A == ((const U8*)s)[1] ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ |
| 334 | : LIKELY( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] || 0x42 == ((const U8*)s)[1] ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( ((const U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 ) |
| 335 | |
| 336 | #endif |
| 337 | |
| 338 | #if '^' == 176 /* CP 037 */ |
| 339 | |
| 340 | /*** GENERATED CODE ***/ |
| 341 | #define is_UTF8_CHAR_utf8_no_length_checks(s) \ |
| 342 | ( ( 0x78 == ((const U8*)s)[0] || 0x80 == ((const U8*)s)[0] || ( 0x8A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xAF ) || ( 0xB1 <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xB5 ) ) ?\ |
| 343 | ( LIKELY( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) ? 2 : 0 )\ |
| 344 | : ( ((const U8*)s)[0] == 0xB7 || ( ( ((const U8*)s)[0] & 0xFE ) == 0xB8 ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xBC ) || ( ( ((const U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ |
| 345 | ( LIKELY( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) ? 3 : 0 )\ |
| 346 | : ( 0xDC == ((const U8*)s)[0] ) ? \ |
| 347 | ( LIKELY( ( ( ( 0x57 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 348 | : ( ( 0xDD <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xDF ) || 0xE1 == ((const U8*)s)[0] || ( 0xEA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xEC ) ) ?\ |
| 349 | ( LIKELY( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 350 | : ( 0xED == ((const U8*)s)[0] ) ? \ |
| 351 | ( LIKELY( ( ( ( ( 0x49 == ((const U8*)s)[1] || 0x4A == ((const U8*)s)[1] ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ |
| 352 | : ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && LIKELY( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) ) && LIKELY( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && LIKELY( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) && LIKELY( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 ) |
| 353 | |
| 354 | /* XXX Below do not have LIKELY() added */ |
| 355 | /*** GENERATED CODE ***/ |
| 356 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) \ |
| 357 | ( ( ( ( ((const U8*)s)[1] & 0xEF ) == 0x49 ) || ( ( ((const U8*)s)[1] & 0xF9 ) == 0x51 ) || ((const U8*)s)[1] == 0x62 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x64 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x68 ) || ((const U8*)s)[1] == 0x71 ) ?\ |
| 358 | ( ( ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ |
| 359 | : ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x5F || ((const U8*)s)[1] == 0x63 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x65 ) || ((const U8*)s)[1] == 0x69 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x70 ) ) ?\ |
| 360 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFE ) == 0x70 ) ?\ |
| 361 | ( ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ |
| 362 | : ( 0x72 == ((const U8*)s)[2] ) ? \ |
| 363 | ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFE ) == 0x70 ) ?\ |
| 364 | ( ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ? 5 : 0 )\ |
| 365 | : ( ( 0x72 == ((const U8*)s)[3] ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || 0x70 == ((const U8*)s)[4] ) ) ? 5 : 0 )\ |
| 366 | : 0 ) \ |
| 367 | : 0 ) |
| 368 | |
| 369 | |
| 370 | /*** GENERATED CODE ***/ |
| 371 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) \ |
| 372 | ( ( 0xEE == ((const U8*)s)[0] ) ? \ |
| 373 | ( ( 0x41 == ((const U8*)s)[1] ) ? \ |
| 374 | ( ( ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ |
| 375 | : ( 0x42 == ((const U8*)s)[1] ) ? \ |
| 376 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFE ) == 0x70 ) ?\ |
| 377 | ( ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ |
| 378 | : ( 0x72 == ((const U8*)s)[2] ) ? \ |
| 379 | ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( ((const U8*)s)[3] & 0xFE ) == 0x70 ) ?\ |
| 380 | ( ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ? 5 : 0 )\ |
| 381 | : ( ( 0x72 == ((const U8*)s)[3] ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || 0x70 == ((const U8*)s)[4] ) ) ? 5 : 0 )\ |
| 382 | : 0 ) \ |
| 383 | : 0 ) \ |
| 384 | : 0 ) |
| 385 | |
| 386 | |
| 387 | /*** GENERATED CODE ***/ |
| 388 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part2(s) \ |
| 389 | ( ( ( ( ( 0x57 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 ) |
| 390 | |
| 391 | |
| 392 | /*** GENERATED CODE ***/ |
| 393 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part3(s) \ |
| 394 | ( ( 0xDD == ((const U8*)s)[0] ) ? \ |
| 395 | ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( ((const U8*)s)[1] & 0xFE ) == 0x62 || ( 0x66 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFE ) == 0x70 ) ?\ |
| 396 | ( ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 397 | : ( 0x72 == ((const U8*)s)[1] ) ? \ |
| 398 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x54 ) || ( 0x57 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFE ) == 0x70 ) ?\ |
| 399 | ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ? 4 : 0 )\ |
| 400 | : ( 0x55 == ((const U8*)s)[2] ) ? \ |
| 401 | ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x56 ) ) ? 4 : 0 )\ |
| 402 | : ( 0x56 == ((const U8*)s)[2] ) ? \ |
| 403 | ( ( ( 0x57 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ? 4 : 0 )\ |
| 404 | : ( ( 0x72 == ((const U8*)s)[2] ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || 0x70 == ((const U8*)s)[3] ) ) ? 4 : 0 )\ |
| 405 | : 0 ) \ |
| 406 | : ( 0xDE == ((const U8*)s)[0] || 0xE1 == ((const U8*)s)[0] || 0xEB == ((const U8*)s)[0] ) ?\ |
| 407 | ( ( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 408 | : ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) ?\ |
| 409 | ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( ((const U8*)s)[1] & 0xFE ) == 0x70 ) ?\ |
| 410 | ( ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 411 | : ( 0x72 == ((const U8*)s)[1] ) ? \ |
| 412 | ( ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( ((const U8*)s)[2] & 0xFE ) == 0x70 ) ?\ |
| 413 | ( ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ? 4 : 0 )\ |
| 414 | : ( ( 0x72 == ((const U8*)s)[2] ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || 0x70 == ((const U8*)s)[3] ) ) ? 4 : 0 )\ |
| 415 | : 0 ) \ |
| 416 | : ( 0xED == ((const U8*)s)[0] ) ? is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) : is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) ) |
| 417 | |
| 418 | |
| 419 | /*** GENERATED CODE ***/ |
| 420 | #define is_STRICT_UTF8_CHAR_utf8_no_length_checks(s) \ |
| 421 | ( ( 0x78 == ((const U8*)s)[0] || 0x80 == ((const U8*)s)[0] || ( 0x8A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xAF ) || ( 0xB1 <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xB5 ) ) ?\ |
| 422 | ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) ? 2 : 0 )\ |
| 423 | : ( ((const U8*)s)[0] == 0xB7 || ( ( ((const U8*)s)[0] & 0xFE ) == 0xB8 ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xBC ) || ( ( ((const U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ |
| 424 | ( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) ? 3 : 0 )\ |
| 425 | : ( 0xDC == ((const U8*)s)[0] ) ? is_STRICT_UTF8_CHAR_utf8_no_length_checks_part2(s) : is_STRICT_UTF8_CHAR_utf8_no_length_checks_part3(s) ) |
| 426 | |
| 427 | /* C9_STRICT_UTF8_CHAR: Matches legal Unicode UTF-8 variant code points |
| 428 | including non-character code points, no surrogates |
| 429 | 0x00A0 - 0xD7FF |
| 430 | 0xE000 - 0x10FFFF |
| 431 | */ |
| 432 | /*** GENERATED CODE ***/ |
| 433 | #define is_C9_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) \ |
| 434 | ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) ? 2 : 0 ) |
| 435 | |
| 436 | |
| 437 | /*** GENERATED CODE ***/ |
| 438 | #define is_C9_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) \ |
| 439 | ( ( ((const U8*)s)[0] == 0xB7 || ( ( ((const U8*)s)[0] & 0xFE ) == 0xB8 ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xBC ) || ( ( ((const U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((const U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ |
| 440 | ( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) ? 3 : 0 )\ |
| 441 | : ( 0xDC == ((const U8*)s)[0] ) ? \ |
| 442 | ( ( ( ( ( 0x57 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 443 | : ( 0xDD == ((const U8*)s)[0] ) ? \ |
| 444 | ( ( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( ((const U8*)s)[1] & 0xFE ) == 0x62 || ( 0x66 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 445 | : ( ( ((const U8*)s)[0] & 0xFE ) == 0xDE || 0xE1 == ((const U8*)s)[0] || ( 0xEA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xEC ) ) ?\ |
| 446 | ( ( ( ( ( 0x41 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ |
| 447 | : ( 0xED == ((const U8*)s)[0] ) ? \ |
| 448 | ( ( ( ( ( ( 0x49 == ((const U8*)s)[1] || 0x4A == ((const U8*)s)[1] ) || ( 0x51 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x59 ) || 0x5F == ((const U8*)s)[1] || ( 0x62 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[1] && ((const U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ |
| 449 | : ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x41 == ((const U8*)s)[1] || 0x42 == ((const U8*)s)[1] ) ) && ( ( 0x41 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x59 ) || 0x5F == ((const U8*)s)[2] || ( 0x62 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[2] && ((const U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x59 ) || 0x5F == ((const U8*)s)[3] || ( 0x62 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[3] && ((const U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x59 ) || 0x5F == ((const U8*)s)[4] || ( 0x62 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((const U8*)s)[4] && ((const U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 ) |
| 450 | |
| 451 | |
| 452 | /*** GENERATED CODE ***/ |
| 453 | #define is_C9_STRICT_UTF8_CHAR_utf8_no_length_checks(s) \ |
| 454 | ( ( 0x78 == ((const U8*)s)[0] || 0x80 == ((const U8*)s)[0] || ( 0x8A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xAF ) || ( 0xB1 <= ((const U8*)s)[0] && ((const U8*)s)[0] <= 0xB5 ) ) ? is_C9_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) : is_C9_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) ) |
| 455 | |
| 456 | #endif |
| 457 | |
| 458 | /* is_UTF8_CHAR_utf8_no_length_checks() in both code pages handles UTF-8 that |
| 459 | * has this start byte (expressed in I8) as the maximum */ |
| 460 | #define _IS_UTF8_CHAR_HIGHEST_START_BYTE 0xF9 |
| 461 | |
| 462 | /* |
| 463 | * ex: set ts=8 sts=4 sw=4 et: |
| 464 | */ |