This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.sym: Add comments
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
eb2624c9 3 This file is built by regen/regcomp.pl from regcomp.sym.
d09b2d29 4 Any changes made here will be lost!
78102347 5 */
d09b2d29 6
6bda09f9
YO
7/* Regops and State definitions */
8
781aab5c
KW
9#define REGNODE_MAX 111
10#define REGMATCH_STATE_MAX 151
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
14#define BOL 2 /* 0x02 Match "" at beginning of line. */
15#define MBOL 3 /* 0x03 Same, assuming multiline. */
16#define SBOL 4 /* 0x04 Same, assuming singleline. */
17#define EOS 5 /* 0x05 Match "" at end of string. */
18#define EOL 6 /* 0x06 Match "" at end of line. */
19#define MEOL 7 /* 0x07 Same, assuming multiline. */
20#define SEOL 8 /* 0x08 Same, assuming singleline. */
1e355c70 21#define BOUND 9 /* 0x09 Match "" at any word boundary using native charset semantics for non-utf8 */
f2284805 22#define BOUNDL 10 /* 0x0a Match "" at any locale word boundary */
1e355c70 23#define BOUNDU 11 /* 0x0b Match "" at any word boundary using Unicode semantics */
0c6e81eb
KW
24#define BOUNDA 12 /* 0x0c Match "" at any word boundary using ASCII semantics */
25#define NBOUND 13 /* 0x0d Match "" at any word non-boundary using native charset semantics for non-utf8 */
26#define NBOUNDL 14 /* 0x0e Match "" at any locale word non-boundary */
27#define NBOUNDU 15 /* 0x0f Match "" at any word non-boundary using Unicode semantics */
28#define NBOUNDA 16 /* 0x10 Match "" at any word non-boundary using ASCII semantics */
29#define GPOS 17 /* 0x11 Matches where last m//g left off. */
30#define REG_ANY 18 /* 0x12 Match any one character (except newline). */
31#define SANY 19 /* 0x13 Match any one character. */
32#define CANY 20 /* 0x14 Match any one byte. */
33#define ANYOF 21 /* 0x15 Match character in (or not in) this class, single char match only */
34#define ANYOFV 22 /* 0x16 Match character in (or not in) this class, can match-multiple chars */
35#define ALNUM 23 /* 0x17 Match any alphanumeric character using native charset semantics for non-utf8 */
36#define ALNUML 24 /* 0x18 Match any alphanumeric char in locale */
37#define ALNUMU 25 /* 0x19 Match any alphanumeric char using Unicode semantics */
38#define ALNUMA 26 /* 0x1a Match [A-Za-z_0-9] */
39#define NALNUM 27 /* 0x1b Match any non-alphanumeric character using native charset semantics for non-utf8 */
40#define NALNUML 28 /* 0x1c Match any non-alphanumeric char in locale */
41#define NALNUMU 29 /* 0x1d Match any non-alphanumeric char using Unicode semantics */
42#define NALNUMA 30 /* 0x1e Match [^A-Za-z_0-9] */
43#define SPACE 31 /* 0x1f Match any whitespace character using native charset semantics for non-utf8 */
44#define SPACEL 32 /* 0x20 Match any whitespace char in locale */
45#define SPACEU 33 /* 0x21 Match any whitespace char using Unicode semantics */
46#define SPACEA 34 /* 0x22 Match [ \t\n\f\r] */
47#define NSPACE 35 /* 0x23 Match any non-whitespace character using native charset semantics for non-utf8 */
48#define NSPACEL 36 /* 0x24 Match any non-whitespace char in locale */
49#define NSPACEU 37 /* 0x25 Match any non-whitespace char using Unicode semantics */
50#define NSPACEA 38 /* 0x26 Match [^ \t\n\f\r] */
51#define DIGIT 39 /* 0x27 Match any numeric character using native charset semantics for non-utf8 */
52#define DIGITL 40 /* 0x28 Match any numeric character in locale */
53#define DIGITA 41 /* 0x29 Match [0-9] */
54#define NDIGIT 42 /* 0x2a Match any non-numeric character using native charset semantics for non-utf8 */
55#define NDIGITL 43 /* 0x2b Match any non-numeric character in locale */
56#define NDIGITA 44 /* 0x2c Match [^0-9] */
57#define CLUMP 45 /* 0x2d Match any extended grapheme cluster sequence */
58#define BRANCH 46 /* 0x2e Match this alternative, or the next... */
59#define BACK 47 /* 0x2f Match "", "next" ptr points backward. */
60#define EXACT 48 /* 0x30 Match this string (preceded by length). */
85514a34
KW
61#define EXACTF 49 /* 0x31 Match this (folded if in UTF-8) string, folded, native charset semantics for non-utf8 (prec. by length). */
62#define EXACTFL 50 /* 0x32 Match this (not guaranteed to be folded) string, folded in locale (w/len). */
63#define EXACTFU 51 /* 0x33 Match this (folded if in UTF-8) string, folded, Unicode semantics for non-utf8 (prec. by length). */
64#define EXACTFA 52 /* 0x34 Match this (not guaranteed to be folded) string, folded, Unicode semantics for non-utf8, but no ASCII-range character matches outside ASCII (prec. by length),. */
7986cb47
KW
65#define NOTHING 53 /* 0x35 Match empty string. */
66#define TAIL 54 /* 0x36 Match empty string. Can jump here from outside. */
67#define STAR 55 /* 0x37 Match this (simple) thing 0 or more times. */
68#define PLUS 56 /* 0x38 Match this (simple) thing 1 or more times. */
69#define CURLY 57 /* 0x39 Match this simple thing {n,m} times. */
70#define CURLYN 58 /* 0x3a Capture next-after-this simple thing */
71#define CURLYM 59 /* 0x3b Capture this medium-complex thing {n,m} times. */
72#define CURLYX 60 /* 0x3c Match this complex thing {n,m} times. */
73#define WHILEM 61 /* 0x3d Do curly processing and see if rest matches. */
74#define OPEN 62 /* 0x3e Mark this point in input as start of */
75#define CLOSE 63 /* 0x3f Analogous to OPEN. */
76#define REF 64 /* 0x40 Match some already matched string */
77#define REFF 65 /* 0x41 Match already matched string, folded using native charset semantics for non-utf8 */
78#define REFFL 66 /* 0x42 Match already matched string, folded in loc. */
79#define REFFU 67 /* 0x43 Match already matched string, folded using unicode semantics for non-utf8 */
781aab5c
KW
80#define REFFA 68 /* 0x44 Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
81#define NREF 69 /* 0x45 Match some already matched string */
82#define NREFF 70 /* 0x46 Match already matched string, folded using native charset semantics for non-utf8 */
83#define NREFFL 71 /* 0x47 Match already matched string, folded in loc. */
84#define NREFFU 72 /* 0x48 Match already matched string, folded using unicode semantics for non-utf8 */
85#define NREFFA 73 /* 0x49 Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
86#define IFMATCH 74 /* 0x4a Succeeds if the following matches. */
87#define UNLESSM 75 /* 0x4b Fails if the following matches. */
88#define SUSPEND 76 /* 0x4c "Independent" sub-RE. */
89#define IFTHEN 77 /* 0x4d Switch, should be preceded by switcher . */
90#define GROUPP 78 /* 0x4e Whether the group matched. */
91#define LONGJMP 79 /* 0x4f Jump far away. */
92#define BRANCHJ 80 /* 0x50 BRANCH with long offset. */
93#define EVAL 81 /* 0x51 Execute some Perl code. */
94#define MINMOD 82 /* 0x52 Next operator is not greedy. */
95#define LOGICAL 83 /* 0x53 Next opcode should set the flag only. */
96#define RENUM 84 /* 0x54 Group with independently numbered parens. */
97#define TRIE 85 /* 0x55 Match many EXACT(F[ALU]?)? at once. flags==type */
98#define TRIEC 86 /* 0x56 Same as TRIE, but with embedded charclass data */
99#define AHOCORASICK 87 /* 0x57 Aho Corasick stclass. flags==type */
100#define AHOCORASICKC 88 /* 0x58 Same as AHOCORASICK, but with embedded charclass data */
101#define GOSUB 89 /* 0x59 recurse to paren arg1 at (signed) ofs arg2 */
102#define GOSTART 90 /* 0x5a recurse to start of pattern */
103#define NGROUPP 91 /* 0x5b Whether the group matched. */
104#define INSUBP 92 /* 0x5c Whether we are in a specific recurse. */
105#define DEFINEP 93 /* 0x5d Never execute directly. */
106#define ENDLIKE 94 /* 0x5e Used only for the type field of verbs */
107#define OPFAIL 95 /* 0x5f Same as (?!) */
108#define ACCEPT 96 /* 0x60 Accepts the current matched string. */
109#define VERB 97 /* 0x61 Used only for the type field of verbs */
110#define PRUNE 98 /* 0x62 Pattern fails at this startpoint if no-backtracking through this */
111#define MARKPOINT 99 /* 0x63 Push the current location for rollback by cut. */
112#define SKIP 100 /* 0x64 On failure skip forward (to the mark) before retrying */
113#define COMMIT 101 /* 0x65 Pattern fails outright if backtracking through this */
114#define CUTGROUP 102 /* 0x66 On failure go to the next alternation in the group */
115#define KEEPS 103 /* 0x67 $& begins here. */
116#define LNBREAK 104 /* 0x68 generic newline pattern */
117#define VERTWS 105 /* 0x69 vertical whitespace (Perl 6) */
118#define NVERTWS 106 /* 0x6a not vertical whitespace (Perl 6) */
119#define HORIZWS 107 /* 0x6b horizontal whitespace (Perl 6) */
120#define NHORIZWS 108 /* 0x6c not horizontal whitespace (Perl 6) */
121#define FOLDCHAR 109 /* 0x6d codepoint with tricky case folding properties. */
122#define OPTIMIZED 110 /* 0x6e Placeholder for dump. */
123#define PSEUDO 111 /* 0x6f Pseudo opcode for internal use. */
03363afd 124 /* ------------ States ------------- */
24b23f37
YO
125#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
126#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
127#define EVAL_AB (REGNODE_MAX + 3) /* state for EVAL */
128#define EVAL_AB_fail (REGNODE_MAX + 4) /* state for EVAL */
129#define CURLYX_end (REGNODE_MAX + 5) /* state for CURLYX */
130#define CURLYX_end_fail (REGNODE_MAX + 6) /* state for CURLYX */
131#define WHILEM_A_pre (REGNODE_MAX + 7) /* state for WHILEM */
132#define WHILEM_A_pre_fail (REGNODE_MAX + 8) /* state for WHILEM */
133#define WHILEM_A_min (REGNODE_MAX + 9) /* state for WHILEM */
134#define WHILEM_A_min_fail (REGNODE_MAX + 10) /* state for WHILEM */
135#define WHILEM_A_max (REGNODE_MAX + 11) /* state for WHILEM */
136#define WHILEM_A_max_fail (REGNODE_MAX + 12) /* state for WHILEM */
137#define WHILEM_B_min (REGNODE_MAX + 13) /* state for WHILEM */
138#define WHILEM_B_min_fail (REGNODE_MAX + 14) /* state for WHILEM */
139#define WHILEM_B_max (REGNODE_MAX + 15) /* state for WHILEM */
140#define WHILEM_B_max_fail (REGNODE_MAX + 16) /* state for WHILEM */
141#define BRANCH_next (REGNODE_MAX + 17) /* state for BRANCH */
142#define BRANCH_next_fail (REGNODE_MAX + 18) /* state for BRANCH */
143#define CURLYM_A (REGNODE_MAX + 19) /* state for CURLYM */
144#define CURLYM_A_fail (REGNODE_MAX + 20) /* state for CURLYM */
145#define CURLYM_B (REGNODE_MAX + 21) /* state for CURLYM */
146#define CURLYM_B_fail (REGNODE_MAX + 22) /* state for CURLYM */
147#define IFMATCH_A (REGNODE_MAX + 23) /* state for IFMATCH */
148#define IFMATCH_A_fail (REGNODE_MAX + 24) /* state for IFMATCH */
149#define CURLY_B_min_known (REGNODE_MAX + 25) /* state for CURLY */
150#define CURLY_B_min_known_fail (REGNODE_MAX + 26) /* state for CURLY */
151#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
152#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
153#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
154#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
155#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
156#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
e2e6a0f1
YO
157#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
158#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
5d458dd8
YO
159#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */
160#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */
161#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */
162#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */
ee9b8eae
YO
163#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */
164#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */
03363afd 165
6bda09f9 166/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
167
168#ifndef DOINIT
22c35a8c 169EXTCONST U8 PL_regkind[];
d09b2d29 170#else
22c35a8c 171EXTCONST U8 PL_regkind[] = {
e2e6a0f1
YO
172 END, /* END */
173 END, /* SUCCEED */
174 BOL, /* BOL */
175 BOL, /* MBOL */
176 BOL, /* SBOL */
177 EOL, /* EOS */
178 EOL, /* EOL */
179 EOL, /* MEOL */
180 EOL, /* SEOL */
181 BOUND, /* BOUND */
182 BOUND, /* BOUNDL */
1e355c70 183 BOUND, /* BOUNDU */
0c6e81eb 184 BOUND, /* BOUNDA */
e2e6a0f1
YO
185 NBOUND, /* NBOUND */
186 NBOUND, /* NBOUNDL */
1e355c70 187 NBOUND, /* NBOUNDU */
0c6e81eb 188 NBOUND, /* NBOUNDA */
e2e6a0f1
YO
189 GPOS, /* GPOS */
190 REG_ANY, /* REG_ANY */
191 REG_ANY, /* SANY */
192 REG_ANY, /* CANY */
193 ANYOF, /* ANYOF */
0e019ad6 194 ANYOF, /* ANYOFV */
e2e6a0f1
YO
195 ALNUM, /* ALNUM */
196 ALNUM, /* ALNUML */
fdf48794 197 ALNUM, /* ALNUMU */
0c6e81eb 198 ALNUM, /* ALNUMA */
e2e6a0f1
YO
199 NALNUM, /* NALNUM */
200 NALNUM, /* NALNUML */
fdf48794 201 NALNUM, /* NALNUMU */
0c6e81eb 202 NALNUM, /* NALNUMA */
e2e6a0f1
YO
203 SPACE, /* SPACE */
204 SPACE, /* SPACEL */
fdf48794 205 SPACE, /* SPACEU */
0c6e81eb 206 SPACE, /* SPACEA */
e2e6a0f1
YO
207 NSPACE, /* NSPACE */
208 NSPACE, /* NSPACEL */
fdf48794 209 NSPACE, /* NSPACEU */
0c6e81eb 210 NSPACE, /* NSPACEA */
e2e6a0f1
YO
211 DIGIT, /* DIGIT */
212 DIGIT, /* DIGITL */
0c6e81eb 213 DIGIT, /* DIGITA */
e2e6a0f1
YO
214 NDIGIT, /* NDIGIT */
215 NDIGIT, /* NDIGITL */
0c6e81eb 216 NDIGIT, /* NDIGITA */
e2e6a0f1
YO
217 CLUMP, /* CLUMP */
218 BRANCH, /* BRANCH */
219 BACK, /* BACK */
220 EXACT, /* EXACT */
221 EXACT, /* EXACTF */
222 EXACT, /* EXACTFL */
01f98ec2 223 EXACT, /* EXACTFU */
7986cb47 224 EXACT, /* EXACTFA */
e2e6a0f1
YO
225 NOTHING, /* NOTHING */
226 NOTHING, /* TAIL */
227 STAR, /* STAR */
228 PLUS, /* PLUS */
229 CURLY, /* CURLY */
230 CURLY, /* CURLYN */
231 CURLY, /* CURLYM */
232 CURLY, /* CURLYX */
233 WHILEM, /* WHILEM */
234 OPEN, /* OPEN */
235 CLOSE, /* CLOSE */
236 REF, /* REF */
237 REF, /* REFF */
238 REF, /* REFFL */
01f98ec2 239 REF, /* REFFU */
781aab5c 240 REF, /* REFFA */
01f98ec2
KW
241 REF, /* NREF */
242 REF, /* NREFF */
243 REF, /* NREFFL */
244 REF, /* NREFFU */
781aab5c 245 REF, /* NREFFA */
e2e6a0f1
YO
246 BRANCHJ, /* IFMATCH */
247 BRANCHJ, /* UNLESSM */
248 BRANCHJ, /* SUSPEND */
249 BRANCHJ, /* IFTHEN */
250 GROUPP, /* GROUPP */
251 LONGJMP, /* LONGJMP */
252 BRANCHJ, /* BRANCHJ */
253 EVAL, /* EVAL */
254 MINMOD, /* MINMOD */
255 LOGICAL, /* LOGICAL */
256 BRANCHJ, /* RENUM */
257 TRIE, /* TRIE */
258 TRIE, /* TRIEC */
259 TRIE, /* AHOCORASICK */
260 TRIE, /* AHOCORASICKC */
261 GOSUB, /* GOSUB */
262 GOSTART, /* GOSTART */
e2e6a0f1
YO
263 NGROUPP, /* NGROUPP */
264 INSUBP, /* INSUBP */
265 DEFINEP, /* DEFINEP */
266 ENDLIKE, /* ENDLIKE */
267 ENDLIKE, /* OPFAIL */
268 ENDLIKE, /* ACCEPT */
269 VERB, /* VERB */
5d458dd8 270 VERB, /* PRUNE */
e2e6a0f1 271 VERB, /* MARKPOINT */
5d458dd8 272 VERB, /* SKIP */
e2e6a0f1 273 VERB, /* COMMIT */
5d458dd8 274 VERB, /* CUTGROUP */
ee9b8eae 275 KEEPS, /* KEEPS */
e1d1eefb
YO
276 LNBREAK, /* LNBREAK */
277 VERTWS, /* VERTWS */
278 NVERTWS, /* NVERTWS */
279 HORIZWS, /* HORIZWS */
280 NHORIZWS, /* NHORIZWS */
32e6a07c 281 FOLDCHAR, /* FOLDCHAR */
e2e6a0f1
YO
282 NOTHING, /* OPTIMIZED */
283 PSEUDO, /* PSEUDO */
03363afd 284 /* ------------ States ------------- */
e2e6a0f1
YO
285 TRIE, /* TRIE_next */
286 TRIE, /* TRIE_next_fail */
287 EVAL, /* EVAL_AB */
288 EVAL, /* EVAL_AB_fail */
289 CURLYX, /* CURLYX_end */
290 CURLYX, /* CURLYX_end_fail */
291 WHILEM, /* WHILEM_A_pre */
292 WHILEM, /* WHILEM_A_pre_fail */
293 WHILEM, /* WHILEM_A_min */
294 WHILEM, /* WHILEM_A_min_fail */
295 WHILEM, /* WHILEM_A_max */
296 WHILEM, /* WHILEM_A_max_fail */
297 WHILEM, /* WHILEM_B_min */
298 WHILEM, /* WHILEM_B_min_fail */
299 WHILEM, /* WHILEM_B_max */
300 WHILEM, /* WHILEM_B_max_fail */
301 BRANCH, /* BRANCH_next */
302 BRANCH, /* BRANCH_next_fail */
303 CURLYM, /* CURLYM_A */
304 CURLYM, /* CURLYM_A_fail */
305 CURLYM, /* CURLYM_B */
306 CURLYM, /* CURLYM_B_fail */
307 IFMATCH, /* IFMATCH_A */
308 IFMATCH, /* IFMATCH_A_fail */
309 CURLY, /* CURLY_B_min_known */
310 CURLY, /* CURLY_B_min_known_fail */
311 CURLY, /* CURLY_B_min */
312 CURLY, /* CURLY_B_min_fail */
313 CURLY, /* CURLY_B_max */
314 CURLY, /* CURLY_B_max_fail */
315 COMMIT, /* COMMIT_next */
316 COMMIT, /* COMMIT_next_fail */
317 MARKPOINT, /* MARKPOINT_next */
318 MARKPOINT, /* MARKPOINT_next_fail */
5d458dd8
YO
319 SKIP, /* SKIP_next */
320 SKIP, /* SKIP_next_fail */
321 CUTGROUP, /* CUTGROUP_next */
322 CUTGROUP, /* CUTGROUP_next_fail */
ee9b8eae
YO
323 KEEPS, /* KEEPS_next */
324 KEEPS, /* KEEPS_next_fail */
d09b2d29
IZ
325};
326#endif
327
6bda09f9 328/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29
IZ
329
330#ifdef REG_COMP_C
29de9391 331static const U8 regarglen[] = {
03363afd
YO
332 0, /* END */
333 0, /* SUCCEED */
334 0, /* BOL */
335 0, /* MBOL */
336 0, /* SBOL */
337 0, /* EOS */
338 0, /* EOL */
339 0, /* MEOL */
340 0, /* SEOL */
341 0, /* BOUND */
342 0, /* BOUNDL */
1e355c70 343 0, /* BOUNDU */
0c6e81eb 344 0, /* BOUNDA */
03363afd
YO
345 0, /* NBOUND */
346 0, /* NBOUNDL */
1e355c70 347 0, /* NBOUNDU */
0c6e81eb 348 0, /* NBOUNDA */
03363afd
YO
349 0, /* GPOS */
350 0, /* REG_ANY */
351 0, /* SANY */
352 0, /* CANY */
353 0, /* ANYOF */
0e019ad6 354 0, /* ANYOFV */
03363afd
YO
355 0, /* ALNUM */
356 0, /* ALNUML */
fdf48794 357 0, /* ALNUMU */
0c6e81eb 358 0, /* ALNUMA */
03363afd
YO
359 0, /* NALNUM */
360 0, /* NALNUML */
fdf48794 361 0, /* NALNUMU */
0c6e81eb 362 0, /* NALNUMA */
03363afd
YO
363 0, /* SPACE */
364 0, /* SPACEL */
fdf48794 365 0, /* SPACEU */
0c6e81eb 366 0, /* SPACEA */
03363afd
YO
367 0, /* NSPACE */
368 0, /* NSPACEL */
fdf48794 369 0, /* NSPACEU */
0c6e81eb 370 0, /* NSPACEA */
03363afd
YO
371 0, /* DIGIT */
372 0, /* DIGITL */
0c6e81eb 373 0, /* DIGITA */
03363afd
YO
374 0, /* NDIGIT */
375 0, /* NDIGITL */
0c6e81eb 376 0, /* NDIGITA */
03363afd
YO
377 0, /* CLUMP */
378 0, /* BRANCH */
379 0, /* BACK */
380 0, /* EXACT */
381 0, /* EXACTF */
382 0, /* EXACTFL */
01f98ec2 383 0, /* EXACTFU */
7986cb47 384 0, /* EXACTFA */
03363afd
YO
385 0, /* NOTHING */
386 0, /* TAIL */
387 0, /* STAR */
388 0, /* PLUS */
389 EXTRA_SIZE(struct regnode_2), /* CURLY */
390 EXTRA_SIZE(struct regnode_2), /* CURLYN */
391 EXTRA_SIZE(struct regnode_2), /* CURLYM */
392 EXTRA_SIZE(struct regnode_2), /* CURLYX */
393 0, /* WHILEM */
394 EXTRA_SIZE(struct regnode_1), /* OPEN */
395 EXTRA_SIZE(struct regnode_1), /* CLOSE */
396 EXTRA_SIZE(struct regnode_1), /* REF */
397 EXTRA_SIZE(struct regnode_1), /* REFF */
398 EXTRA_SIZE(struct regnode_1), /* REFFL */
01f98ec2 399 EXTRA_SIZE(struct regnode_1), /* REFFU */
781aab5c 400 EXTRA_SIZE(struct regnode_1), /* REFFA */
01f98ec2
KW
401 EXTRA_SIZE(struct regnode_1), /* NREF */
402 EXTRA_SIZE(struct regnode_1), /* NREFF */
403 EXTRA_SIZE(struct regnode_1), /* NREFFL */
404 EXTRA_SIZE(struct regnode_1), /* NREFFU */
781aab5c 405 EXTRA_SIZE(struct regnode_1), /* NREFFA */
03363afd
YO
406 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
407 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
408 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
409 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
410 EXTRA_SIZE(struct regnode_1), /* GROUPP */
411 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
412 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
413 EXTRA_SIZE(struct regnode_1), /* EVAL */
414 0, /* MINMOD */
415 0, /* LOGICAL */
416 EXTRA_SIZE(struct regnode_1), /* RENUM */
417 EXTRA_SIZE(struct regnode_1), /* TRIE */
418 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
419 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
420 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
1a147d38
YO
421 EXTRA_SIZE(struct regnode_2L), /* GOSUB */
422 0, /* GOSTART */
0a4db386 423 EXTRA_SIZE(struct regnode_1), /* NGROUPP */
1a147d38 424 EXTRA_SIZE(struct regnode_1), /* INSUBP */
0a4db386 425 EXTRA_SIZE(struct regnode_1), /* DEFINEP */
e2e6a0f1 426 0, /* ENDLIKE */
7f69552c 427 0, /* OPFAIL */
e2e6a0f1 428 EXTRA_SIZE(struct regnode_1), /* ACCEPT */
20832bc5 429 EXTRA_SIZE(struct regnode_1), /* VERB */
5d458dd8 430 EXTRA_SIZE(struct regnode_1), /* PRUNE */
e2e6a0f1 431 EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
5d458dd8 432 EXTRA_SIZE(struct regnode_1), /* SKIP */
e2e6a0f1 433 EXTRA_SIZE(struct regnode_1), /* COMMIT */
5d458dd8 434 EXTRA_SIZE(struct regnode_1), /* CUTGROUP */
ee9b8eae 435 0, /* KEEPS */
e1d1eefb
YO
436 0, /* LNBREAK */
437 0, /* VERTWS */
438 0, /* NVERTWS */
439 0, /* HORIZWS */
440 0, /* NHORIZWS */
32e6a07c 441 EXTRA_SIZE(struct regnode_1), /* FOLDCHAR */
03363afd
YO
442 0, /* OPTIMIZED */
443 0, /* PSEUDO */
d09b2d29
IZ
444};
445
6bda09f9
YO
446/* reg_off_by_arg[] - Which argument holds the offset to the next node */
447
29de9391 448static const char reg_off_by_arg[] = {
03363afd
YO
449 0, /* END */
450 0, /* SUCCEED */
451 0, /* BOL */
452 0, /* MBOL */
453 0, /* SBOL */
454 0, /* EOS */
455 0, /* EOL */
456 0, /* MEOL */
457 0, /* SEOL */
458 0, /* BOUND */
459 0, /* BOUNDL */
1e355c70 460 0, /* BOUNDU */
0c6e81eb 461 0, /* BOUNDA */
03363afd
YO
462 0, /* NBOUND */
463 0, /* NBOUNDL */
1e355c70 464 0, /* NBOUNDU */
0c6e81eb 465 0, /* NBOUNDA */
03363afd
YO
466 0, /* GPOS */
467 0, /* REG_ANY */
468 0, /* SANY */
469 0, /* CANY */
470 0, /* ANYOF */
0e019ad6 471 0, /* ANYOFV */
03363afd
YO
472 0, /* ALNUM */
473 0, /* ALNUML */
fdf48794 474 0, /* ALNUMU */
0c6e81eb 475 0, /* ALNUMA */
03363afd
YO
476 0, /* NALNUM */
477 0, /* NALNUML */
fdf48794 478 0, /* NALNUMU */
0c6e81eb 479 0, /* NALNUMA */
03363afd
YO
480 0, /* SPACE */
481 0, /* SPACEL */
fdf48794 482 0, /* SPACEU */
0c6e81eb 483 0, /* SPACEA */
03363afd
YO
484 0, /* NSPACE */
485 0, /* NSPACEL */
fdf48794 486 0, /* NSPACEU */
0c6e81eb 487 0, /* NSPACEA */
03363afd
YO
488 0, /* DIGIT */
489 0, /* DIGITL */
0c6e81eb 490 0, /* DIGITA */
03363afd
YO
491 0, /* NDIGIT */
492 0, /* NDIGITL */
0c6e81eb 493 0, /* NDIGITA */
03363afd
YO
494 0, /* CLUMP */
495 0, /* BRANCH */
496 0, /* BACK */
497 0, /* EXACT */
498 0, /* EXACTF */
499 0, /* EXACTFL */
01f98ec2 500 0, /* EXACTFU */
7986cb47 501 0, /* EXACTFA */
03363afd
YO
502 0, /* NOTHING */
503 0, /* TAIL */
504 0, /* STAR */
505 0, /* PLUS */
506 0, /* CURLY */
507 0, /* CURLYN */
508 0, /* CURLYM */
509 0, /* CURLYX */
510 0, /* WHILEM */
511 0, /* OPEN */
512 0, /* CLOSE */
513 0, /* REF */
514 0, /* REFF */
515 0, /* REFFL */
01f98ec2 516 0, /* REFFU */
781aab5c 517 0, /* REFFA */
01f98ec2
KW
518 0, /* NREF */
519 0, /* NREFF */
520 0, /* NREFFL */
521 0, /* NREFFU */
781aab5c 522 0, /* NREFFA */
03363afd
YO
523 2, /* IFMATCH */
524 2, /* UNLESSM */
525 1, /* SUSPEND */
526 1, /* IFTHEN */
527 0, /* GROUPP */
528 1, /* LONGJMP */
529 1, /* BRANCHJ */
530 0, /* EVAL */
531 0, /* MINMOD */
532 0, /* LOGICAL */
533 1, /* RENUM */
534 0, /* TRIE */
535 0, /* TRIEC */
536 0, /* AHOCORASICK */
537 0, /* AHOCORASICKC */
1a147d38
YO
538 0, /* GOSUB */
539 0, /* GOSTART */
0a4db386 540 0, /* NGROUPP */
1a147d38 541 0, /* INSUBP */
0a4db386 542 0, /* DEFINEP */
e2e6a0f1 543 0, /* ENDLIKE */
7f69552c 544 0, /* OPFAIL */
e2e6a0f1
YO
545 0, /* ACCEPT */
546 0, /* VERB */
5d458dd8 547 0, /* PRUNE */
e2e6a0f1 548 0, /* MARKPOINT */
5d458dd8 549 0, /* SKIP */
e2e6a0f1 550 0, /* COMMIT */
5d458dd8 551 0, /* CUTGROUP */
ee9b8eae 552 0, /* KEEPS */
e1d1eefb
YO
553 0, /* LNBREAK */
554 0, /* VERTWS */
555 0, /* NVERTWS */
556 0, /* HORIZWS */
557 0, /* NHORIZWS */
32e6a07c 558 0, /* FOLDCHAR */
03363afd
YO
559 0, /* OPTIMIZED */
560 0, /* PSEUDO */
d09b2d29 561};
885f9e59 562
13d6edb4
NC
563#endif /* REG_COMP_C */
564
6bda09f9
YO
565/* reg_name[] - Opcode/state names in string form, for debugging */
566
22429478 567#ifndef DOINIT
13d6edb4 568EXTCONST char * PL_reg_name[];
22429478 569#else
4764e399 570EXTCONST char * const PL_reg_name[] = {
03363afd
YO
571 "END", /* 0000 */
572 "SUCCEED", /* 0x01 */
573 "BOL", /* 0x02 */
574 "MBOL", /* 0x03 */
575 "SBOL", /* 0x04 */
576 "EOS", /* 0x05 */
577 "EOL", /* 0x06 */
578 "MEOL", /* 0x07 */
579 "SEOL", /* 0x08 */
580 "BOUND", /* 0x09 */
581 "BOUNDL", /* 0x0a */
1e355c70 582 "BOUNDU", /* 0x0b */
0c6e81eb
KW
583 "BOUNDA", /* 0x0c */
584 "NBOUND", /* 0x0d */
585 "NBOUNDL", /* 0x0e */
586 "NBOUNDU", /* 0x0f */
587 "NBOUNDA", /* 0x10 */
588 "GPOS", /* 0x11 */
589 "REG_ANY", /* 0x12 */
590 "SANY", /* 0x13 */
591 "CANY", /* 0x14 */
592 "ANYOF", /* 0x15 */
593 "ANYOFV", /* 0x16 */
594 "ALNUM", /* 0x17 */
595 "ALNUML", /* 0x18 */
596 "ALNUMU", /* 0x19 */
597 "ALNUMA", /* 0x1a */
598 "NALNUM", /* 0x1b */
599 "NALNUML", /* 0x1c */
600 "NALNUMU", /* 0x1d */
601 "NALNUMA", /* 0x1e */
602 "SPACE", /* 0x1f */
603 "SPACEL", /* 0x20 */
604 "SPACEU", /* 0x21 */
605 "SPACEA", /* 0x22 */
606 "NSPACE", /* 0x23 */
607 "NSPACEL", /* 0x24 */
608 "NSPACEU", /* 0x25 */
609 "NSPACEA", /* 0x26 */
610 "DIGIT", /* 0x27 */
611 "DIGITL", /* 0x28 */
612 "DIGITA", /* 0x29 */
613 "NDIGIT", /* 0x2a */
614 "NDIGITL", /* 0x2b */
615 "NDIGITA", /* 0x2c */
616 "CLUMP", /* 0x2d */
617 "BRANCH", /* 0x2e */
618 "BACK", /* 0x2f */
619 "EXACT", /* 0x30 */
620 "EXACTF", /* 0x31 */
621 "EXACTFL", /* 0x32 */
622 "EXACTFU", /* 0x33 */
7986cb47
KW
623 "EXACTFA", /* 0x34 */
624 "NOTHING", /* 0x35 */
625 "TAIL", /* 0x36 */
626 "STAR", /* 0x37 */
627 "PLUS", /* 0x38 */
628 "CURLY", /* 0x39 */
629 "CURLYN", /* 0x3a */
630 "CURLYM", /* 0x3b */
631 "CURLYX", /* 0x3c */
632 "WHILEM", /* 0x3d */
633 "OPEN", /* 0x3e */
634 "CLOSE", /* 0x3f */
635 "REF", /* 0x40 */
636 "REFF", /* 0x41 */
637 "REFFL", /* 0x42 */
638 "REFFU", /* 0x43 */
781aab5c
KW
639 "REFFA", /* 0x44 */
640 "NREF", /* 0x45 */
641 "NREFF", /* 0x46 */
642 "NREFFL", /* 0x47 */
643 "NREFFU", /* 0x48 */
644 "NREFFA", /* 0x49 */
645 "IFMATCH", /* 0x4a */
646 "UNLESSM", /* 0x4b */
647 "SUSPEND", /* 0x4c */
648 "IFTHEN", /* 0x4d */
649 "GROUPP", /* 0x4e */
650 "LONGJMP", /* 0x4f */
651 "BRANCHJ", /* 0x50 */
652 "EVAL", /* 0x51 */
653 "MINMOD", /* 0x52 */
654 "LOGICAL", /* 0x53 */
655 "RENUM", /* 0x54 */
656 "TRIE", /* 0x55 */
657 "TRIEC", /* 0x56 */
658 "AHOCORASICK", /* 0x57 */
659 "AHOCORASICKC", /* 0x58 */
660 "GOSUB", /* 0x59 */
661 "GOSTART", /* 0x5a */
662 "NGROUPP", /* 0x5b */
663 "INSUBP", /* 0x5c */
664 "DEFINEP", /* 0x5d */
665 "ENDLIKE", /* 0x5e */
666 "OPFAIL", /* 0x5f */
667 "ACCEPT", /* 0x60 */
668 "VERB", /* 0x61 */
669 "PRUNE", /* 0x62 */
670 "MARKPOINT", /* 0x63 */
671 "SKIP", /* 0x64 */
672 "COMMIT", /* 0x65 */
673 "CUTGROUP", /* 0x66 */
674 "KEEPS", /* 0x67 */
675 "LNBREAK", /* 0x68 */
676 "VERTWS", /* 0x69 */
677 "NVERTWS", /* 0x6a */
678 "HORIZWS", /* 0x6b */
679 "NHORIZWS", /* 0x6c */
680 "FOLDCHAR", /* 0x6d */
681 "OPTIMIZED", /* 0x6e */
682 "PSEUDO", /* 0x6f */
03363afd 683 /* ------------ States ------------- */
24b23f37
YO
684 "TRIE_next", /* REGNODE_MAX +0x01 */
685 "TRIE_next_fail", /* REGNODE_MAX +0x02 */
686 "EVAL_AB", /* REGNODE_MAX +0x03 */
687 "EVAL_AB_fail", /* REGNODE_MAX +0x04 */
688 "CURLYX_end", /* REGNODE_MAX +0x05 */
689 "CURLYX_end_fail", /* REGNODE_MAX +0x06 */
690 "WHILEM_A_pre", /* REGNODE_MAX +0x07 */
691 "WHILEM_A_pre_fail", /* REGNODE_MAX +0x08 */
692 "WHILEM_A_min", /* REGNODE_MAX +0x09 */
693 "WHILEM_A_min_fail", /* REGNODE_MAX +0x0a */
694 "WHILEM_A_max", /* REGNODE_MAX +0x0b */
695 "WHILEM_A_max_fail", /* REGNODE_MAX +0x0c */
696 "WHILEM_B_min", /* REGNODE_MAX +0x0d */
697 "WHILEM_B_min_fail", /* REGNODE_MAX +0x0e */
698 "WHILEM_B_max", /* REGNODE_MAX +0x0f */
699 "WHILEM_B_max_fail", /* REGNODE_MAX +0x10 */
700 "BRANCH_next", /* REGNODE_MAX +0x11 */
701 "BRANCH_next_fail", /* REGNODE_MAX +0x12 */
702 "CURLYM_A", /* REGNODE_MAX +0x13 */
703 "CURLYM_A_fail", /* REGNODE_MAX +0x14 */
704 "CURLYM_B", /* REGNODE_MAX +0x15 */
705 "CURLYM_B_fail", /* REGNODE_MAX +0x16 */
706 "IFMATCH_A", /* REGNODE_MAX +0x17 */
707 "IFMATCH_A_fail", /* REGNODE_MAX +0x18 */
708 "CURLY_B_min_known", /* REGNODE_MAX +0x19 */
709 "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1a */
710 "CURLY_B_min", /* REGNODE_MAX +0x1b */
711 "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
712 "CURLY_B_max", /* REGNODE_MAX +0x1d */
713 "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
714 "COMMIT_next", /* REGNODE_MAX +0x1f */
715 "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
e2e6a0f1
YO
716 "MARKPOINT_next", /* REGNODE_MAX +0x21 */
717 "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
5d458dd8
YO
718 "SKIP_next", /* REGNODE_MAX +0x23 */
719 "SKIP_next_fail", /* REGNODE_MAX +0x24 */
720 "CUTGROUP_next", /* REGNODE_MAX +0x25 */
721 "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
ee9b8eae
YO
722 "KEEPS_next", /* REGNODE_MAX +0x27 */
723 "KEEPS_next_fail", /* REGNODE_MAX +0x28 */
885f9e59 724};
22429478 725#endif /* DOINIT */
d09b2d29 726
f7819f85
A
727/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
728
729#ifndef DOINIT
730EXTCONST char * PL_reg_extflags_name[];
731#else
732EXTCONST char * const PL_reg_extflags_name[] = {
df7a8460 733 /* Bits in extflags defined: 11111111111111111111111011111111 */
52d81aa8
NC
734 "MULTILINE", /* 0x00000001 */
735 "SINGLELINE", /* 0x00000002 */
736 "FOLD", /* 0x00000004 */
737 "EXTENDED", /* 0x00000008 */
738 "KEEPCOPY", /* 0x00000010 */
df7a8460
KW
739 "CHARSET", /* 0x000000e0 */
740 "CHARSET", /* 0x000000e0 */
741 "CHARSET", /* 0x000000e0 */
e795e964
KW
742 "UNUSED_BIT_8", /* 0x00000100 */
743 "ANCH_BOL", /* 0x00000200 */
744 "ANCH_MBOL", /* 0x00000400 */
745 "ANCH_SBOL", /* 0x00000800 */
746 "ANCH_GPOS", /* 0x00001000 */
747 "GPOS_SEEN", /* 0x00002000 */
748 "GPOS_FLOAT", /* 0x00004000 */
749 "LOOKBEHIND_SEEN", /* 0x00008000 */
750 "EVAL_SEEN", /* 0x00010000 */
751 "CANY_SEEN", /* 0x00020000 */
752 "NOSCAN", /* 0x00040000 */
753 "CHECK_ALL", /* 0x00080000 */
52d81aa8
NC
754 "MATCH_UTF8", /* 0x00100000 */
755 "USE_INTUIT_NOML", /* 0x00200000 */
756 "USE_INTUIT_ML", /* 0x00400000 */
757 "INTUIT_TAIL", /* 0x00800000 */
758 "SPLIT", /* 0x01000000 */
759 "COPY_DONE", /* 0x02000000 */
760 "TAINTED_SEEN", /* 0x04000000 */
761 "TAINTED", /* 0x08000000 */
762 "START_ONLY", /* 0x10000000 */
763 "SKIPWHITE", /* 0x20000000 */
764 "WHITE", /* 0x40000000 */
765 "NULL", /* 0x80000000 */
f7819f85
A
766};
767#endif /* DOINIT */
768
f9ef50a7 769/* The following have no fixed length. U8 so we can do strchr() on it. */
ded4dd2a 770#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 771
f9ef50a7 772#ifndef DOINIT
ded4dd2a 773EXTCONST U8 PL_varies[] __attribute__deprecated__;
f9ef50a7 774#else
ded4dd2a 775EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
0e019ad6 776 ANYOFV, CLUMP, BRANCH, BACK, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX,
781aab5c
KW
777 WHILEM, REF, REFF, REFFL, REFFU, REFFA, NREF, NREFF, NREFFL, NREFFU,
778 NREFFA, SUSPEND, IFTHEN, BRANCHJ,
f9ef50a7
NC
779 0
780};
781#endif /* DOINIT */
782
ded4dd2a
NC
783#ifndef DOINIT
784EXTCONST U8 PL_varies_bitmask[];
785#else
786EXTCONST U8 PL_varies_bitmask[] = {
781aab5c 787 0x00, 0x00, 0x40, 0x00, 0x00, 0xE0, 0x80, 0x3F, 0xFF, 0x33, 0x01, 0x00, 0x00, 0x00
ded4dd2a
NC
788};
789#endif /* DOINIT */
790
f9ef50a7
NC
791/* The following always have a length of 1. U8 we can do strchr() on it. */
792/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
ded4dd2a 793#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 794
f9ef50a7 795#ifndef DOINIT
ded4dd2a 796EXTCONST U8 PL_simple[] __attribute__deprecated__;
f9ef50a7 797#else
ded4dd2a 798EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
0c6e81eb
KW
799 REG_ANY, SANY, CANY, ANYOF, ALNUM, ALNUML, ALNUMU, ALNUMA, NALNUM,
800 NALNUML, NALNUMU, NALNUMA, SPACE, SPACEL, SPACEU, SPACEA, NSPACE,
801 NSPACEL, NSPACEU, NSPACEA, DIGIT, DIGITL, DIGITA, NDIGIT, NDIGITL,
802 NDIGITA, VERTWS, NVERTWS, HORIZWS, NHORIZWS,
f9ef50a7
NC
803 0
804};
805#endif /* DOINIT */
806
ded4dd2a
NC
807#ifndef DOINIT
808EXTCONST U8 PL_simple_bitmask[];
809#else
810EXTCONST U8 PL_simple_bitmask[] = {
781aab5c 811 0x00, 0x00, 0xBC, 0xFF, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E
ded4dd2a
NC
812};
813#endif /* DOINIT */
814
37442d52 815/* ex: set ro: */