1 /* -*- buffer-read-only: t -*-
5 * Copyright (C) 2007, 2011 by Larry Wall and others
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
10 * !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
11 * This file is built by regen/regcharclass.pl.
12 * Any changes made here will be lost!
16 #ifndef H_REGCHARCLASS /* Guard against nested #includes */
17 #define H_REGCHARCLASS 1
20 LNBREAK: Line Break: \R
22 "\x0D\x0A" # CRLF - Network (Windows) line ending
24 0x0B # VT | VERTICAL TAB
26 0x0D # CR | CARRIAGE RETURN
27 0x85 # NEL | NEXT LINE
28 0x2028 # LINE SEPARATOR
29 0x2029 # PARAGRAPH SEPARATOR
31 /*** GENERATED CODE ***/
32 #define is_LNBREAK(s,is_utf8) \
33 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
34 : ( 0x0D == ((U8*)s)[0] ) ? \
35 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
37 ( ( 0xC2 == ((U8*)s)[0] ) ? \
38 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
39 : ( 0xE2 == ((U8*)s)[0] ) ? \
40 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
42 : ( 0x85 == ((U8*)s)[0] ) )
44 /*** GENERATED CODE ***/
45 #define is_LNBREAK_safe(s,e,is_utf8) \
47 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
48 : ( 0x0D == ((U8*)s)[0] ) ? \
49 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
51 ( ( 0xC2 == ((U8*)s)[0] ) ? \
52 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
53 : ( 0xE2 == ((U8*)s)[0] ) ? \
54 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
56 : ( 0x85 == ((U8*)s)[0] ) ) \
58 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
59 : ( 0x0D == ((U8*)s)[0] ) ? \
60 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
62 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0x85 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
63 : ( 0x85 == ((U8*)s)[0] ) ) \
65 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
66 : ( !( is_utf8 ) ) ? \
67 ( 0x85 == ((U8*)s)[0] ) \
71 /*** GENERATED CODE ***/
72 #define is_LNBREAK_utf8(s) \
73 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
74 : ( 0x0D == ((U8*)s)[0] ) ? \
75 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
76 : ( 0xC2 == ((U8*)s)[0] ) ? \
77 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
78 : ( 0xE2 == ((U8*)s)[0] ) ? \
79 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
82 /*** GENERATED CODE ***/
83 #define is_LNBREAK_utf8_safe(s,e) \
85 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
86 : ( 0x0D == ((U8*)s)[0] ) ? \
87 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
88 : ( 0xC2 == ((U8*)s)[0] ) ? \
89 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
90 : ( 0xE2 == ((U8*)s)[0] ) ? \
91 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
94 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
95 : ( 0x0D == ((U8*)s)[0] ) ? \
96 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
97 : ( 0xC2 == ((U8*)s)[0] ) ? \
98 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
101 ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) \
104 /*** GENERATED CODE ***/
105 #define is_LNBREAK_latin1(s) \
106 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
107 : ( 0x0D == ((U8*)s)[0] ) ? \
108 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
109 : ( 0x85 == ((U8*)s)[0] ) )
111 /*** GENERATED CODE ***/
112 #define is_LNBREAK_latin1_safe(s,e) \
114 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
115 : ( 0x0D == ((U8*)s)[0] ) ? \
116 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
117 : ( 0x85 == ((U8*)s)[0] ) ) \
119 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )\
123 HORIZWS: Horizontal Whitespace: \h \H
128 0x1680 # OGHAM SPACE MARK
129 0x180e # MONGOLIAN VOWEL SEPARATOR
134 0x2004 # THREE-PER-EM SPACE
135 0x2005 # FOUR-PER-EM SPACE
136 0x2006 # SIX-PER-EM SPACE
137 0x2007 # FIGURE SPACE
138 0x2008 # PUNCTUATION SPACE
141 0x202f # NARROW NO-BREAK SPACE
142 0x205f # MEDIUM MATHEMATICAL SPACE
143 0x3000 # IDEOGRAPHIC SPACE
145 /*** GENERATED CODE ***/
146 #define is_HORIZWS(s,is_utf8) \
147 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
149 ( ( 0xC2 == ((U8*)s)[0] ) ? \
150 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
151 : ( 0xE1 == ((U8*)s)[0] ) ? \
152 ( ( 0x9A == ((U8*)s)[1] ) ? \
153 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
154 : ( 0xA0 == ((U8*)s)[1] ) ? \
155 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
157 : ( 0xE2 == ((U8*)s)[0] ) ? \
158 ( ( 0x80 == ((U8*)s)[1] ) ? \
159 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
160 : ( 0x81 == ((U8*)s)[1] ) ? \
161 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
163 : ( 0xE3 == ((U8*)s)[0] ) ? \
164 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
166 : ( 0xA0 == ((U8*)s)[0] ) )
168 /*** GENERATED CODE ***/
169 #define is_HORIZWS_safe(s,e,is_utf8) \
171 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
173 ( ( 0xC2 == ((U8*)s)[0] ) ? \
174 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
175 : ( 0xE1 == ((U8*)s)[0] ) ? \
176 ( ( 0x9A == ((U8*)s)[1] ) ? \
177 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
178 : ( 0xA0 == ((U8*)s)[1] ) ? \
179 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
181 : ( 0xE2 == ((U8*)s)[0] ) ? \
182 ( ( 0x80 == ((U8*)s)[1] ) ? \
183 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
184 : ( 0x81 == ((U8*)s)[1] ) ? \
185 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
187 : ( 0xE3 == ((U8*)s)[0] ) ? \
188 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\
190 : ( 0xA0 == ((U8*)s)[0] ) ) \
192 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
194 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0xA0 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
195 : ( 0xA0 == ((U8*)s)[0] ) ) \
197 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
198 : ( !( is_utf8 ) ) ? \
199 ( 0xA0 == ((U8*)s)[0] ) \
203 /*** GENERATED CODE ***/
204 #define is_HORIZWS_utf8(s) \
205 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
206 : ( 0xC2 == ((U8*)s)[0] ) ? \
207 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
208 : ( 0xE1 == ((U8*)s)[0] ) ? \
209 ( ( 0x9A == ((U8*)s)[1] ) ? \
210 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
211 : ( 0xA0 == ((U8*)s)[1] ) ? \
212 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
214 : ( 0xE2 == ((U8*)s)[0] ) ? \
215 ( ( 0x80 == ((U8*)s)[1] ) ? \
216 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
217 : ( 0x81 == ((U8*)s)[1] ) ? \
218 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
220 : ( 0xE3 == ((U8*)s)[0] ) ? \
221 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
224 /*** GENERATED CODE ***/
225 #define is_HORIZWS_utf8_safe(s,e) \
227 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
228 : ( 0xC2 == ((U8*)s)[0] ) ? \
229 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
230 : ( 0xE1 == ((U8*)s)[0] ) ? \
231 ( ( 0x9A == ((U8*)s)[1] ) ? \
232 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
233 : ( 0xA0 == ((U8*)s)[1] ) ? \
234 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
236 : ( 0xE2 == ((U8*)s)[0] ) ? \
237 ( ( 0x80 == ((U8*)s)[1] ) ? \
238 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
239 : ( 0x81 == ((U8*)s)[1] ) ? \
240 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
242 : ( 0xE3 == ((U8*)s)[0] ) ? \
243 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
246 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
247 : ( 0xC2 == ((U8*)s)[0] ) ? \
248 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
251 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) \
254 /*** GENERATED CODE ***/
255 #define is_HORIZWS_latin1(s) \
256 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] )
258 /*** GENERATED CODE ***/
259 #define is_HORIZWS_latin1_safe(s,e) \
261 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] ) \
264 /*** GENERATED CODE ***/
265 #define is_HORIZWS_cp(cp) \
266 ( 0x09 == cp || ( 0x09 < cp && \
267 ( 0x20 == cp || ( 0x20 < cp && \
268 ( 0xA0 == cp || ( 0xA0 < cp && \
269 ( 0x1680 == cp || ( 0x1680 < cp && \
270 ( 0x180E == cp || ( 0x180E < cp && \
271 ( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \
272 ( 0x202F == cp || ( 0x202F < cp && \
273 ( 0x205F == cp || ( 0x205F < cp && \
274 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
277 VERTWS: Vertical Whitespace: \v \V
284 0x2028 # LINE SEPARATOR
285 0x2029 # PARAGRAPH SEPARATOR
287 /*** GENERATED CODE ***/
288 #define is_VERTWS(s,is_utf8) \
289 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
291 ( ( 0xC2 == ((U8*)s)[0] ) ? \
292 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
293 : ( 0xE2 == ((U8*)s)[0] ) ? \
294 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
296 : ( 0x85 == ((U8*)s)[0] ) )
298 /*** GENERATED CODE ***/
299 #define is_VERTWS_safe(s,e,is_utf8) \
301 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
303 ( ( 0xC2 == ((U8*)s)[0] ) ? \
304 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
305 : ( 0xE2 == ((U8*)s)[0] ) ? \
306 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
308 : ( 0x85 == ((U8*)s)[0] ) ) \
310 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
312 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0x85 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
313 : ( 0x85 == ((U8*)s)[0] ) ) \
315 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
316 : ( !( is_utf8 ) ) ? \
317 ( 0x85 == ((U8*)s)[0] ) \
321 /*** GENERATED CODE ***/
322 #define is_VERTWS_utf8(s) \
323 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
324 : ( 0xC2 == ((U8*)s)[0] ) ? \
325 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
326 : ( 0xE2 == ((U8*)s)[0] ) ? \
327 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
330 /*** GENERATED CODE ***/
331 #define is_VERTWS_utf8_safe(s,e) \
333 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
334 : ( 0xC2 == ((U8*)s)[0] ) ? \
335 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
336 : ( 0xE2 == ((U8*)s)[0] ) ? \
337 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
340 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
341 : ( 0xC2 == ((U8*)s)[0] ) ? \
342 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
345 ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) \
348 /*** GENERATED CODE ***/
349 #define is_VERTWS_latin1(s) \
350 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )
352 /*** GENERATED CODE ***/
353 #define is_VERTWS_latin1_safe(s,e) \
355 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )\
358 /*** GENERATED CODE ***/
359 #define is_VERTWS_cp(cp) \
360 ( ( 0x0A <= cp && cp <= 0x0D ) || ( 0x0D < cp && \
361 ( 0x85 == cp || ( 0x85 < cp && \
362 ( 0x2028 == cp || ( 0x2028 < cp && \
363 0x2029 == cp ) ) ) ) ) )
366 #endif /* H_REGCHARCLASS */