This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Unicode/UCD.pm: Clarify pod
[perl5.git] / regcharclass.h
CommitLineData
8770da0e 1/* -*- buffer-read-only: t -*-
58fbde93
RGS
2 *
3 * regcharclass.h
4 *
2eee27d7 5 * Copyright (C) 2007, 2011 by Larry Wall and others
58fbde93
RGS
6 *
7 * You may distribute under the terms of either the GNU General Public
8 * License or the Artistic License, as specified in the README file.
9 *
10 * !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
98e310af 11 * This file is built by regen/regcharclass.pl.
58fbde93
RGS
12 * Any changes made here will be lost!
13 */
12b72891
RGS
14
15/*
16 LNBREAK: Line Break: \R
17
58fbde93 18 "\x0D\x0A" # CRLF - Network (Windows) line ending
12b72891
RGS
19 0x0A # LF | LINE FEED
20 0x0B # VT | VERTICAL TAB
21 0x0C # FF | FORM FEED
22 0x0D # CR | CARRIAGE RETURN
23 0x85 # NEL | NEXT LINE
24 0x2028 # LINE SEPARATOR
25 0x2029 # PARAGRAPH SEPARATOR
26*/
27/*** GENERATED CODE ***/
28#define is_LNBREAK(s,is_utf8) \
e64b1bd1
YO
29( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
30: ( 0x0D == ((U8*)s)[0] ) ? \
31 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
32: ( is_utf8 ) ? \
33 ( ( 0xC2 == ((U8*)s)[0] ) ? \
34 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
35 : ( 0xE2 == ((U8*)s)[0] ) ? \
36 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
37 : 0 ) \
38: ( 0x85 == ((U8*)s)[0] ) )
e1d1eefb 39
12b72891
RGS
40/*** GENERATED CODE ***/
41#define is_LNBREAK_safe(s,e,is_utf8) \
e64b1bd1
YO
42( ((e)-(s) > 2) ? \
43 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
44 : ( 0x0D == ((U8*)s)[0] ) ? \
45 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
46 : ( is_utf8 ) ? \
47 ( ( 0xC2 == ((U8*)s)[0] ) ? \
48 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
49 : ( 0xE2 == ((U8*)s)[0] ) ? \
50 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
51 : 0 ) \
52 : ( 0x85 == ((U8*)s)[0] ) ) \
53: ((e)-(s) > 1) ? \
54 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
55 : ( 0x0D == ((U8*)s)[0] ) ? \
56 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
57 : ( is_utf8 ) ? \
58 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0x85 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
59 : ( 0x85 == ((U8*)s)[0] ) ) \
60: ((e)-(s) > 0) ? \
61 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
62 : ( !( is_utf8 ) ) ? \
63 ( 0x85 == ((U8*)s)[0] ) \
64 : 0 ) \
65: 0 )
e1d1eefb 66
12b72891
RGS
67/*** GENERATED CODE ***/
68#define is_LNBREAK_utf8(s) \
e64b1bd1
YO
69( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
70: ( 0x0D == ((U8*)s)[0] ) ? \
71 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
72: ( 0xC2 == ((U8*)s)[0] ) ? \
73 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
74: ( 0xE2 == ((U8*)s)[0] ) ? \
75 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
76: 0 )
e1d1eefb 77
12b72891
RGS
78/*** GENERATED CODE ***/
79#define is_LNBREAK_utf8_safe(s,e) \
e64b1bd1
YO
80( ((e)-(s) > 2) ? \
81 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
82 : ( 0x0D == ((U8*)s)[0] ) ? \
83 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
84 : ( 0xC2 == ((U8*)s)[0] ) ? \
85 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
86 : ( 0xE2 == ((U8*)s)[0] ) ? \
87 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
88 : 0 ) \
89: ((e)-(s) > 1) ? \
90 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
91 : ( 0x0D == ((U8*)s)[0] ) ? \
92 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
93 : ( 0xC2 == ((U8*)s)[0] ) ? \
94 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
95 : 0 ) \
96: ((e)-(s) > 0) ? \
97 ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) \
98: 0 )
e1d1eefb 99
12b72891
RGS
100/*** GENERATED CODE ***/
101#define is_LNBREAK_latin1(s) \
e64b1bd1
YO
102( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
103: ( 0x0D == ((U8*)s)[0] ) ? \
104 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
105: ( 0x85 == ((U8*)s)[0] ) )
e1d1eefb 106
12b72891
RGS
107/*** GENERATED CODE ***/
108#define is_LNBREAK_latin1_safe(s,e) \
e64b1bd1
YO
109( ((e)-(s) > 1) ? \
110 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \
111 : ( 0x0D == ((U8*)s)[0] ) ? \
112 ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \
113 : ( 0x85 == ((U8*)s)[0] ) ) \
114: ((e)-(s) > 0) ? \
115 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )\
116: 0 )
12b72891
RGS
117
118/*
119 HORIZWS: Horizontal Whitespace: \h \H
120
121 0x09 # HT
122 0x20 # SPACE
123 0xa0 # NBSP
124 0x1680 # OGHAM SPACE MARK
125 0x180e # MONGOLIAN VOWEL SEPARATOR
126 0x2000 # EN QUAD
127 0x2001 # EM QUAD
128 0x2002 # EN SPACE
129 0x2003 # EM SPACE
130 0x2004 # THREE-PER-EM SPACE
131 0x2005 # FOUR-PER-EM SPACE
132 0x2006 # SIX-PER-EM SPACE
133 0x2007 # FIGURE SPACE
134 0x2008 # PUNCTUATION SPACE
135 0x2009 # THIN SPACE
136 0x200A # HAIR SPACE
137 0x202f # NARROW NO-BREAK SPACE
138 0x205f # MEDIUM MATHEMATICAL SPACE
139 0x3000 # IDEOGRAPHIC SPACE
140*/
141/*** GENERATED CODE ***/
142#define is_HORIZWS(s,is_utf8) \
e64b1bd1
YO
143( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
144: ( is_utf8 ) ? \
145 ( ( 0xC2 == ((U8*)s)[0] ) ? \
146 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
147 : ( 0xE1 == ((U8*)s)[0] ) ? \
148 ( ( 0x9A == ((U8*)s)[1] ) ? \
149 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
150 : ( 0xA0 == ((U8*)s)[1] ) ? \
151 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
152 : 0 ) \
153 : ( 0xE2 == ((U8*)s)[0] ) ? \
154 ( ( 0x80 == ((U8*)s)[1] ) ? \
155 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
156 : ( 0x81 == ((U8*)s)[1] ) ? \
157 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
158 : 0 ) \
159 : ( 0xE3 == ((U8*)s)[0] ) ? \
160 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
161 : 0 ) \
162: ( 0xA0 == ((U8*)s)[0] ) )
e1d1eefb 163
12b72891
RGS
164/*** GENERATED CODE ***/
165#define is_HORIZWS_safe(s,e,is_utf8) \
e64b1bd1
YO
166( ((e)-(s) > 2) ? \
167 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
168 : ( is_utf8 ) ? \
169 ( ( 0xC2 == ((U8*)s)[0] ) ? \
170 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
171 : ( 0xE1 == ((U8*)s)[0] ) ? \
172 ( ( 0x9A == ((U8*)s)[1] ) ? \
173 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
174 : ( 0xA0 == ((U8*)s)[1] ) ? \
175 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
176 : 0 ) \
177 : ( 0xE2 == ((U8*)s)[0] ) ? \
178 ( ( 0x80 == ((U8*)s)[1] ) ? \
179 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
180 : ( 0x81 == ((U8*)s)[1] ) ? \
181 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
182 : 0 ) \
183 : ( 0xE3 == ((U8*)s)[0] ) ? \
184 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\
185 : 0 ) \
186 : ( 0xA0 == ((U8*)s)[0] ) ) \
187: ((e)-(s) > 1) ? \
188 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
189 : ( is_utf8 ) ? \
190 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0xA0 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
191 : ( 0xA0 == ((U8*)s)[0] ) ) \
192: ((e)-(s) > 0) ? \
193 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
194 : ( !( is_utf8 ) ) ? \
195 ( 0xA0 == ((U8*)s)[0] ) \
196 : 0 ) \
197: 0 )
e1d1eefb 198
12b72891
RGS
199/*** GENERATED CODE ***/
200#define is_HORIZWS_utf8(s) \
e64b1bd1
YO
201( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
202: ( 0xC2 == ((U8*)s)[0] ) ? \
203 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
204: ( 0xE1 == ((U8*)s)[0] ) ? \
205 ( ( 0x9A == ((U8*)s)[1] ) ? \
206 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
207 : ( 0xA0 == ((U8*)s)[1] ) ? \
208 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
209 : 0 ) \
210: ( 0xE2 == ((U8*)s)[0] ) ? \
211 ( ( 0x80 == ((U8*)s)[1] ) ? \
212 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
213 : ( 0x81 == ((U8*)s)[1] ) ? \
214 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
215 : 0 ) \
216: ( 0xE3 == ((U8*)s)[0] ) ? \
217 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
218: 0 )
e1d1eefb 219
12b72891
RGS
220/*** GENERATED CODE ***/
221#define is_HORIZWS_utf8_safe(s,e) \
e64b1bd1
YO
222( ((e)-(s) > 2) ? \
223 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
224 : ( 0xC2 == ((U8*)s)[0] ) ? \
225 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
226 : ( 0xE1 == ((U8*)s)[0] ) ? \
227 ( ( 0x9A == ((U8*)s)[1] ) ? \
228 ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
229 : ( 0xA0 == ((U8*)s)[1] ) ? \
230 ( ( 0x8E == ((U8*)s)[2] ) ? 3 : 0 ) \
231 : 0 ) \
232 : ( 0xE2 == ((U8*)s)[0] ) ? \
233 ( ( 0x80 == ((U8*)s)[1] ) ? \
234 ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
235 : ( 0x81 == ((U8*)s)[1] ) ? \
236 ( ( 0x9F == ((U8*)s)[2] ) ? 3 : 0 ) \
237 : 0 ) \
238 : ( 0xE3 == ((U8*)s)[0] ) ? \
239 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
240 : 0 ) \
241: ((e)-(s) > 1) ? \
242 ( ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) ? 1 \
243 : ( 0xC2 == ((U8*)s)[0] ) ? \
244 ( ( 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
245 : 0 ) \
246: ((e)-(s) > 0) ? \
247 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] ) \
248: 0 )
e1d1eefb 249
12b72891
RGS
250/*** GENERATED CODE ***/
251#define is_HORIZWS_latin1(s) \
e64b1bd1 252( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] )
e1d1eefb 253
12b72891
RGS
254/*** GENERATED CODE ***/
255#define is_HORIZWS_latin1_safe(s,e) \
e64b1bd1
YO
256( ((e)-(s) > 0) ? \
257 ( 0x09 == ((U8*)s)[0] || 0x20 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] ) \
258: 0 )
e1d1eefb 259
12b72891
RGS
260/*** GENERATED CODE ***/
261#define is_HORIZWS_cp(cp) \
e64b1bd1
YO
262( 0x09 == cp || ( 0x09 < cp && \
263( 0x20 == cp || ( 0x20 < cp && \
264( 0xA0 == cp || ( 0xA0 < cp && \
265( 0x1680 == cp || ( 0x1680 < cp && \
266( 0x180E == cp || ( 0x180E < cp && \
267( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \
268( 0x202F == cp || ( 0x202F < cp && \
269( 0x205F == cp || ( 0x205F < cp && \
2700x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
e1d1eefb 271
12b72891
RGS
272/*
273 VERTWS: Vertical Whitespace: \v \V
e1d1eefb 274
12b72891
RGS
275 0x0A # LF
276 0x0B # VT
58fbde93 277 0x0C # FF
12b72891
RGS
278 0x0D # CR
279 0x85 # NEL
280 0x2028 # LINE SEPARATOR
281 0x2029 # PARAGRAPH SEPARATOR
282*/
283/*** GENERATED CODE ***/
284#define is_VERTWS(s,is_utf8) \
e64b1bd1
YO
285( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
286: ( is_utf8 ) ? \
287 ( ( 0xC2 == ((U8*)s)[0] ) ? \
288 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
289 : ( 0xE2 == ((U8*)s)[0] ) ? \
290 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
291 : 0 ) \
292: ( 0x85 == ((U8*)s)[0] ) )
e1d1eefb 293
12b72891
RGS
294/*** GENERATED CODE ***/
295#define is_VERTWS_safe(s,e,is_utf8) \
e64b1bd1
YO
296( ((e)-(s) > 2) ? \
297 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
298 : ( is_utf8 ) ? \
299 ( ( 0xC2 == ((U8*)s)[0] ) ? \
300 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
301 : ( 0xE2 == ((U8*)s)[0] ) ? \
302 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
303 : 0 ) \
304 : ( 0x85 == ((U8*)s)[0] ) ) \
305: ((e)-(s) > 1) ? \
306 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
307 : ( is_utf8 ) ? \
308 ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0x85 == ((U8*)s)[1] ) ) ? 2 : 0 ) \
309 : ( 0x85 == ((U8*)s)[0] ) ) \
310: ((e)-(s) > 0) ? \
311 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
312 : ( !( is_utf8 ) ) ? \
313 ( 0x85 == ((U8*)s)[0] ) \
314 : 0 ) \
315: 0 )
e1d1eefb 316
12b72891
RGS
317/*** GENERATED CODE ***/
318#define is_VERTWS_utf8(s) \
e64b1bd1
YO
319( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
320: ( 0xC2 == ((U8*)s)[0] ) ? \
321 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
322: ( 0xE2 == ((U8*)s)[0] ) ? \
323 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
324: 0 )
e1d1eefb 325
12b72891
RGS
326/*** GENERATED CODE ***/
327#define is_VERTWS_utf8_safe(s,e) \
e64b1bd1
YO
328( ((e)-(s) > 2) ? \
329 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
330 : ( 0xC2 == ((U8*)s)[0] ) ? \
331 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
332 : ( 0xE2 == ((U8*)s)[0] ) ? \
333 ( ( ( 0x80 == ((U8*)s)[1] ) && ( 0xA8 == ((U8*)s)[2] || 0xA9 == ((U8*)s)[2] ) ) ? 3 : 0 )\
334 : 0 ) \
335: ((e)-(s) > 1) ? \
336 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) ? 1 \
337 : ( 0xC2 == ((U8*)s)[0] ) ? \
338 ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \
339 : 0 ) \
340: ((e)-(s) > 0) ? \
341 ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) \
342: 0 )
e1d1eefb 343
12b72891
RGS
344/*** GENERATED CODE ***/
345#define is_VERTWS_latin1(s) \
e64b1bd1 346( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )
e1d1eefb 347
12b72891
RGS
348/*** GENERATED CODE ***/
349#define is_VERTWS_latin1_safe(s,e) \
e64b1bd1
YO
350( ((e)-(s) > 0) ? \
351 ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )\
352: 0 )
e1d1eefb 353
12b72891
RGS
354/*** GENERATED CODE ***/
355#define is_VERTWS_cp(cp) \
e64b1bd1
YO
356( ( 0x0A <= cp && cp <= 0x0D ) || ( 0x0D < cp && \
357( 0x85 == cp || ( 0x85 < cp && \
358( 0x2028 == cp || ( 0x2028 < cp && \
3590x2029 == cp ) ) ) ) ) )
e1d1eefb 360
8770da0e 361
58fbde93 362/* ex: set ro: */