This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Free up bit for regex ANYOF nodes
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
eb2624c9 3 This file is built by regen/regcomp.pl from regcomp.sym.
d09b2d29 4 Any changes made here will be lost!
78102347 5 */
d09b2d29 6
6bda09f9
YO
7/* Regops and State definitions */
8
34fdef84
KW
9#define REGNODE_MAX 94
10#define REGMATCH_STATE_MAX 134
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
14#define BOL 2 /* 0x02 Match "" at beginning of line. */
15#define MBOL 3 /* 0x03 Same, assuming multiline. */
16#define SBOL 4 /* 0x04 Same, assuming singleline. */
17#define EOS 5 /* 0x05 Match "" at end of string. */
18#define EOL 6 /* 0x06 Match "" at end of line. */
19#define MEOL 7 /* 0x07 Same, assuming multiline. */
20#define SEOL 8 /* 0x08 Same, assuming singleline. */
1e355c70 21#define BOUND 9 /* 0x09 Match "" at any word boundary using native charset semantics for non-utf8 */
f2284805 22#define BOUNDL 10 /* 0x0a Match "" at any locale word boundary */
1e355c70 23#define BOUNDU 11 /* 0x0b Match "" at any word boundary using Unicode semantics */
0c6e81eb
KW
24#define BOUNDA 12 /* 0x0c Match "" at any word boundary using ASCII semantics */
25#define NBOUND 13 /* 0x0d Match "" at any word non-boundary using native charset semantics for non-utf8 */
26#define NBOUNDL 14 /* 0x0e Match "" at any locale word non-boundary */
27#define NBOUNDU 15 /* 0x0f Match "" at any word non-boundary using Unicode semantics */
28#define NBOUNDA 16 /* 0x10 Match "" at any word non-boundary using ASCII semantics */
29#define GPOS 17 /* 0x11 Matches where last m//g left off. */
30#define REG_ANY 18 /* 0x12 Match any one character (except newline). */
31#define SANY 19 /* 0x13 Match any one character. */
32#define CANY 20 /* 0x14 Match any one byte. */
33#define ANYOF 21 /* 0x15 Match character in (or not in) this class, single char match only */
34fdef84
KW
34#define ANYOF_NON_UTF8_NON_ASCII_ALL 22 /* 0x16 like ANYOF, also matches any U+80 - U+FF when not in UTF-8 */
35#define POSIXD 23 /* 0x17 Some [[:class:]] under /d; the FLAGS field gives which one */
36#define POSIXL 24 /* 0x18 Some [[:class:]] under /l; the FLAGS field gives which one */
37#define POSIXU 25 /* 0x19 Some [[:class:]] under /u; the FLAGS field gives which one */
38#define POSIXA 26 /* 0x1a Some [[:class:]] under /a; the FLAGS field gives which one */
39#define NPOSIXD 27 /* 0x1b complement of POSIXD, [[:^class:]] */
40#define NPOSIXL 28 /* 0x1c complement of POSIXL, [[:^class:]] */
41#define NPOSIXU 29 /* 0x1d complement of POSIXU, [[:^class:]] */
42#define NPOSIXA 30 /* 0x1e complement of POSIXA, [[:^class:]] */
43#define CLUMP 31 /* 0x1f Match any extended grapheme cluster sequence */
44#define BRANCH 32 /* 0x20 Match this alternative, or the next... */
45#define BACK 33 /* 0x21 Match "", "next" ptr points backward. */
46#define EXACT 34 /* 0x22 Match this string (preceded by length). */
47#define EXACTF 35 /* 0x23 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
48#define EXACTFL 36 /* 0x24 Match this string (not guaranteed to be folded) using /il rules (w/len). */
49#define EXACTFU 37 /* 0x25 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
50#define EXACTFA 38 /* 0x26 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
51#define EXACTFU_SS 39 /* 0x27 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
52#define EXACTFA_NO_TRIE 40 /* 0x28 Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */
53#define NOTHING 41 /* 0x29 Match empty string. */
54#define TAIL 42 /* 0x2a Match empty string. Can jump here from outside. */
55#define STAR 43 /* 0x2b Match this (simple) thing 0 or more times. */
56#define PLUS 44 /* 0x2c Match this (simple) thing 1 or more times. */
57#define CURLY 45 /* 0x2d Match this simple thing {n,m} times. */
58#define CURLYN 46 /* 0x2e Capture next-after-this simple thing */
59#define CURLYM 47 /* 0x2f Capture this medium-complex thing {n,m} times. */
60#define CURLYX 48 /* 0x30 Match this complex thing {n,m} times. */
61#define WHILEM 49 /* 0x31 Do curly processing and see if rest matches. */
62#define OPEN 50 /* 0x32 Mark this point in input as start of #n. */
63#define CLOSE 51 /* 0x33 Analogous to OPEN. */
64#define REF 52 /* 0x34 Match some already matched string */
65#define REFF 53 /* 0x35 Match already matched string, folded using native charset semantics for non-utf8 */
66#define REFFL 54 /* 0x36 Match already matched string, folded in loc. */
67#define REFFU 55 /* 0x37 Match already matched string, folded using unicode semantics for non-utf8 */
68#define REFFA 56 /* 0x38 Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
69#define NREF 57 /* 0x39 Match some already matched string */
70#define NREFF 58 /* 0x3a Match already matched string, folded using native charset semantics for non-utf8 */
71#define NREFFL 59 /* 0x3b Match already matched string, folded in loc. */
72#define NREFFU 60 /* 0x3c Match already matched string, folded using unicode semantics for non-utf8 */
73#define NREFFA 61 /* 0x3d Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
74#define IFMATCH 62 /* 0x3e Succeeds if the following matches. */
75#define UNLESSM 63 /* 0x3f Fails if the following matches. */
76#define SUSPEND 64 /* 0x40 "Independent" sub-RE. */
77#define IFTHEN 65 /* 0x41 Switch, should be preceded by switcher. */
78#define GROUPP 66 /* 0x42 Whether the group matched. */
79#define LONGJMP 67 /* 0x43 Jump far away. */
80#define BRANCHJ 68 /* 0x44 BRANCH with long offset. */
81#define EVAL 69 /* 0x45 Execute some Perl code. */
82#define MINMOD 70 /* 0x46 Next operator is not greedy. */
83#define LOGICAL 71 /* 0x47 Next opcode should set the flag only. */
84#define RENUM 72 /* 0x48 Group with independently numbered parens. */
85#define TRIE 73 /* 0x49 Match many EXACT(F[ALU]?)? at once. flags==type */
86#define TRIEC 74 /* 0x4a Same as TRIE, but with embedded charclass data */
87#define AHOCORASICK 75 /* 0x4b Aho Corasick stclass. flags==type */
88#define AHOCORASICKC 76 /* 0x4c Same as AHOCORASICK, but with embedded charclass data */
89#define GOSUB 77 /* 0x4d recurse to paren arg1 at (signed) ofs arg2 */
90#define GOSTART 78 /* 0x4e recurse to start of pattern */
91#define NGROUPP 79 /* 0x4f Whether the group matched. */
92#define INSUBP 80 /* 0x50 Whether we are in a specific recurse. */
93#define DEFINEP 81 /* 0x51 Never execute directly. */
94#define ENDLIKE 82 /* 0x52 Used only for the type field of verbs */
95#define OPFAIL 83 /* 0x53 Same as (?!) */
96#define ACCEPT 84 /* 0x54 Accepts the current matched string. */
97#define VERB 85 /* 0x55 Used only for the type field of verbs */
98#define PRUNE 86 /* 0x56 Pattern fails at this startpoint if no-backtracking through this */
99#define MARKPOINT 87 /* 0x57 Push the current location for rollback by cut. */
100#define SKIP 88 /* 0x58 On failure skip forward (to the mark) before retrying */
101#define COMMIT 89 /* 0x59 Pattern fails outright if backtracking through this */
102#define CUTGROUP 90 /* 0x5a On failure go to the next alternation in the group */
103#define KEEPS 91 /* 0x5b $& begins here. */
104#define LNBREAK 92 /* 0x5c generic newline pattern */
105#define OPTIMIZED 93 /* 0x5d Placeholder for dump. */
106#define PSEUDO 94 /* 0x5e Pseudo opcode for internal use. */
03363afd 107 /* ------------ States ------------- */
24b23f37
YO
108#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
109#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
110#define EVAL_AB (REGNODE_MAX + 3) /* state for EVAL */
111#define EVAL_AB_fail (REGNODE_MAX + 4) /* state for EVAL */
112#define CURLYX_end (REGNODE_MAX + 5) /* state for CURLYX */
113#define CURLYX_end_fail (REGNODE_MAX + 6) /* state for CURLYX */
114#define WHILEM_A_pre (REGNODE_MAX + 7) /* state for WHILEM */
115#define WHILEM_A_pre_fail (REGNODE_MAX + 8) /* state for WHILEM */
116#define WHILEM_A_min (REGNODE_MAX + 9) /* state for WHILEM */
117#define WHILEM_A_min_fail (REGNODE_MAX + 10) /* state for WHILEM */
118#define WHILEM_A_max (REGNODE_MAX + 11) /* state for WHILEM */
119#define WHILEM_A_max_fail (REGNODE_MAX + 12) /* state for WHILEM */
120#define WHILEM_B_min (REGNODE_MAX + 13) /* state for WHILEM */
121#define WHILEM_B_min_fail (REGNODE_MAX + 14) /* state for WHILEM */
122#define WHILEM_B_max (REGNODE_MAX + 15) /* state for WHILEM */
123#define WHILEM_B_max_fail (REGNODE_MAX + 16) /* state for WHILEM */
124#define BRANCH_next (REGNODE_MAX + 17) /* state for BRANCH */
125#define BRANCH_next_fail (REGNODE_MAX + 18) /* state for BRANCH */
126#define CURLYM_A (REGNODE_MAX + 19) /* state for CURLYM */
127#define CURLYM_A_fail (REGNODE_MAX + 20) /* state for CURLYM */
128#define CURLYM_B (REGNODE_MAX + 21) /* state for CURLYM */
129#define CURLYM_B_fail (REGNODE_MAX + 22) /* state for CURLYM */
130#define IFMATCH_A (REGNODE_MAX + 23) /* state for IFMATCH */
131#define IFMATCH_A_fail (REGNODE_MAX + 24) /* state for IFMATCH */
132#define CURLY_B_min_known (REGNODE_MAX + 25) /* state for CURLY */
133#define CURLY_B_min_known_fail (REGNODE_MAX + 26) /* state for CURLY */
134#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
135#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
136#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
137#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
138#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
139#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
e2e6a0f1
YO
140#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
141#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
5d458dd8
YO
142#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */
143#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */
144#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */
145#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */
ee9b8eae
YO
146#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */
147#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */
03363afd 148
6bda09f9 149/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
150
151#ifndef DOINIT
22c35a8c 152EXTCONST U8 PL_regkind[];
d09b2d29 153#else
22c35a8c 154EXTCONST U8 PL_regkind[] = {
e2e6a0f1
YO
155 END, /* END */
156 END, /* SUCCEED */
157 BOL, /* BOL */
158 BOL, /* MBOL */
159 BOL, /* SBOL */
160 EOL, /* EOS */
161 EOL, /* EOL */
162 EOL, /* MEOL */
163 EOL, /* SEOL */
164 BOUND, /* BOUND */
165 BOUND, /* BOUNDL */
1e355c70 166 BOUND, /* BOUNDU */
0c6e81eb 167 BOUND, /* BOUNDA */
e2e6a0f1
YO
168 NBOUND, /* NBOUND */
169 NBOUND, /* NBOUNDL */
1e355c70 170 NBOUND, /* NBOUNDU */
0c6e81eb 171 NBOUND, /* NBOUNDA */
e2e6a0f1
YO
172 GPOS, /* GPOS */
173 REG_ANY, /* REG_ANY */
174 REG_ANY, /* SANY */
175 REG_ANY, /* CANY */
176 ANYOF, /* ANYOF */
34fdef84 177 ANYOF, /* ANYOF_NON_UTF8_NON_ASCII_ALL */
3615ea58
KW
178 POSIXD, /* POSIXD */
179 POSIXD, /* POSIXL */
180 POSIXD, /* POSIXU */
181 POSIXD, /* POSIXA */
9e84774b
KW
182 NPOSIXD, /* NPOSIXD */
183 NPOSIXD, /* NPOSIXL */
184 NPOSIXD, /* NPOSIXU */
185 NPOSIXD, /* NPOSIXA */
e2e6a0f1
YO
186 CLUMP, /* CLUMP */
187 BRANCH, /* BRANCH */
188 BACK, /* BACK */
189 EXACT, /* EXACT */
190 EXACT, /* EXACTF */
191 EXACT, /* EXACTFL */
01f98ec2 192 EXACT, /* EXACTFU */
8c1182fd 193 EXACT, /* EXACTFA */
3c760661 194 EXACT, /* EXACTFU_SS */
098b07d5 195 EXACT, /* EXACTFA_NO_TRIE */
e2e6a0f1
YO
196 NOTHING, /* NOTHING */
197 NOTHING, /* TAIL */
198 STAR, /* STAR */
199 PLUS, /* PLUS */
200 CURLY, /* CURLY */
201 CURLY, /* CURLYN */
202 CURLY, /* CURLYM */
203 CURLY, /* CURLYX */
204 WHILEM, /* WHILEM */
205 OPEN, /* OPEN */
206 CLOSE, /* CLOSE */
207 REF, /* REF */
208 REF, /* REFF */
209 REF, /* REFFL */
01f98ec2 210 REF, /* REFFU */
781aab5c 211 REF, /* REFFA */
01f98ec2
KW
212 REF, /* NREF */
213 REF, /* NREFF */
214 REF, /* NREFFL */
215 REF, /* NREFFU */
781aab5c 216 REF, /* NREFFA */
e2e6a0f1
YO
217 BRANCHJ, /* IFMATCH */
218 BRANCHJ, /* UNLESSM */
219 BRANCHJ, /* SUSPEND */
220 BRANCHJ, /* IFTHEN */
221 GROUPP, /* GROUPP */
222 LONGJMP, /* LONGJMP */
223 BRANCHJ, /* BRANCHJ */
224 EVAL, /* EVAL */
225 MINMOD, /* MINMOD */
226 LOGICAL, /* LOGICAL */
227 BRANCHJ, /* RENUM */
228 TRIE, /* TRIE */
229 TRIE, /* TRIEC */
230 TRIE, /* AHOCORASICK */
231 TRIE, /* AHOCORASICKC */
232 GOSUB, /* GOSUB */
233 GOSTART, /* GOSTART */
e2e6a0f1
YO
234 NGROUPP, /* NGROUPP */
235 INSUBP, /* INSUBP */
236 DEFINEP, /* DEFINEP */
237 ENDLIKE, /* ENDLIKE */
238 ENDLIKE, /* OPFAIL */
239 ENDLIKE, /* ACCEPT */
240 VERB, /* VERB */
5d458dd8 241 VERB, /* PRUNE */
e2e6a0f1 242 VERB, /* MARKPOINT */
5d458dd8 243 VERB, /* SKIP */
e2e6a0f1 244 VERB, /* COMMIT */
5d458dd8 245 VERB, /* CUTGROUP */
ee9b8eae 246 KEEPS, /* KEEPS */
e1d1eefb 247 LNBREAK, /* LNBREAK */
e2e6a0f1
YO
248 NOTHING, /* OPTIMIZED */
249 PSEUDO, /* PSEUDO */
03363afd 250 /* ------------ States ------------- */
e2e6a0f1
YO
251 TRIE, /* TRIE_next */
252 TRIE, /* TRIE_next_fail */
253 EVAL, /* EVAL_AB */
254 EVAL, /* EVAL_AB_fail */
255 CURLYX, /* CURLYX_end */
256 CURLYX, /* CURLYX_end_fail */
257 WHILEM, /* WHILEM_A_pre */
258 WHILEM, /* WHILEM_A_pre_fail */
259 WHILEM, /* WHILEM_A_min */
260 WHILEM, /* WHILEM_A_min_fail */
261 WHILEM, /* WHILEM_A_max */
262 WHILEM, /* WHILEM_A_max_fail */
263 WHILEM, /* WHILEM_B_min */
264 WHILEM, /* WHILEM_B_min_fail */
265 WHILEM, /* WHILEM_B_max */
266 WHILEM, /* WHILEM_B_max_fail */
267 BRANCH, /* BRANCH_next */
268 BRANCH, /* BRANCH_next_fail */
269 CURLYM, /* CURLYM_A */
270 CURLYM, /* CURLYM_A_fail */
271 CURLYM, /* CURLYM_B */
272 CURLYM, /* CURLYM_B_fail */
273 IFMATCH, /* IFMATCH_A */
274 IFMATCH, /* IFMATCH_A_fail */
275 CURLY, /* CURLY_B_min_known */
276 CURLY, /* CURLY_B_min_known_fail */
277 CURLY, /* CURLY_B_min */
278 CURLY, /* CURLY_B_min_fail */
279 CURLY, /* CURLY_B_max */
280 CURLY, /* CURLY_B_max_fail */
281 COMMIT, /* COMMIT_next */
282 COMMIT, /* COMMIT_next_fail */
283 MARKPOINT, /* MARKPOINT_next */
284 MARKPOINT, /* MARKPOINT_next_fail */
5d458dd8
YO
285 SKIP, /* SKIP_next */
286 SKIP, /* SKIP_next_fail */
287 CUTGROUP, /* CUTGROUP_next */
288 CUTGROUP, /* CUTGROUP_next_fail */
ee9b8eae
YO
289 KEEPS, /* KEEPS_next */
290 KEEPS, /* KEEPS_next_fail */
d09b2d29
IZ
291};
292#endif
293
6bda09f9 294/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29
IZ
295
296#ifdef REG_COMP_C
29de9391 297static const U8 regarglen[] = {
03363afd
YO
298 0, /* END */
299 0, /* SUCCEED */
300 0, /* BOL */
301 0, /* MBOL */
302 0, /* SBOL */
303 0, /* EOS */
304 0, /* EOL */
305 0, /* MEOL */
306 0, /* SEOL */
307 0, /* BOUND */
308 0, /* BOUNDL */
1e355c70 309 0, /* BOUNDU */
0c6e81eb 310 0, /* BOUNDA */
03363afd
YO
311 0, /* NBOUND */
312 0, /* NBOUNDL */
1e355c70 313 0, /* NBOUNDU */
0c6e81eb 314 0, /* NBOUNDA */
03363afd
YO
315 0, /* GPOS */
316 0, /* REG_ANY */
317 0, /* SANY */
318 0, /* CANY */
319 0, /* ANYOF */
34fdef84 320 0, /* ANYOF_NON_UTF8_NON_ASCII_ALL */
3615ea58
KW
321 0, /* POSIXD */
322 0, /* POSIXL */
323 0, /* POSIXU */
324 0, /* POSIXA */
325 0, /* NPOSIXD */
326 0, /* NPOSIXL */
327 0, /* NPOSIXU */
328 0, /* NPOSIXA */
03363afd
YO
329 0, /* CLUMP */
330 0, /* BRANCH */
331 0, /* BACK */
332 0, /* EXACT */
333 0, /* EXACTF */
334 0, /* EXACTFL */
01f98ec2 335 0, /* EXACTFU */
8c1182fd 336 0, /* EXACTFA */
3c760661 337 0, /* EXACTFU_SS */
098b07d5 338 0, /* EXACTFA_NO_TRIE */
03363afd
YO
339 0, /* NOTHING */
340 0, /* TAIL */
341 0, /* STAR */
342 0, /* PLUS */
343 EXTRA_SIZE(struct regnode_2), /* CURLY */
344 EXTRA_SIZE(struct regnode_2), /* CURLYN */
345 EXTRA_SIZE(struct regnode_2), /* CURLYM */
346 EXTRA_SIZE(struct regnode_2), /* CURLYX */
347 0, /* WHILEM */
348 EXTRA_SIZE(struct regnode_1), /* OPEN */
349 EXTRA_SIZE(struct regnode_1), /* CLOSE */
350 EXTRA_SIZE(struct regnode_1), /* REF */
351 EXTRA_SIZE(struct regnode_1), /* REFF */
352 EXTRA_SIZE(struct regnode_1), /* REFFL */
01f98ec2 353 EXTRA_SIZE(struct regnode_1), /* REFFU */
781aab5c 354 EXTRA_SIZE(struct regnode_1), /* REFFA */
01f98ec2
KW
355 EXTRA_SIZE(struct regnode_1), /* NREF */
356 EXTRA_SIZE(struct regnode_1), /* NREFF */
357 EXTRA_SIZE(struct regnode_1), /* NREFFL */
358 EXTRA_SIZE(struct regnode_1), /* NREFFU */
781aab5c 359 EXTRA_SIZE(struct regnode_1), /* NREFFA */
03363afd
YO
360 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
361 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
362 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
363 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
364 EXTRA_SIZE(struct regnode_1), /* GROUPP */
365 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
366 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
367 EXTRA_SIZE(struct regnode_1), /* EVAL */
368 0, /* MINMOD */
369 0, /* LOGICAL */
370 EXTRA_SIZE(struct regnode_1), /* RENUM */
371 EXTRA_SIZE(struct regnode_1), /* TRIE */
372 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
373 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
374 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
1a147d38
YO
375 EXTRA_SIZE(struct regnode_2L), /* GOSUB */
376 0, /* GOSTART */
0a4db386 377 EXTRA_SIZE(struct regnode_1), /* NGROUPP */
1a147d38 378 EXTRA_SIZE(struct regnode_1), /* INSUBP */
0a4db386 379 EXTRA_SIZE(struct regnode_1), /* DEFINEP */
e2e6a0f1 380 0, /* ENDLIKE */
7f69552c 381 0, /* OPFAIL */
e2e6a0f1 382 EXTRA_SIZE(struct regnode_1), /* ACCEPT */
20832bc5 383 EXTRA_SIZE(struct regnode_1), /* VERB */
5d458dd8 384 EXTRA_SIZE(struct regnode_1), /* PRUNE */
e2e6a0f1 385 EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
5d458dd8 386 EXTRA_SIZE(struct regnode_1), /* SKIP */
e2e6a0f1 387 EXTRA_SIZE(struct regnode_1), /* COMMIT */
5d458dd8 388 EXTRA_SIZE(struct regnode_1), /* CUTGROUP */
ee9b8eae 389 0, /* KEEPS */
e1d1eefb 390 0, /* LNBREAK */
03363afd
YO
391 0, /* OPTIMIZED */
392 0, /* PSEUDO */
d09b2d29
IZ
393};
394
6bda09f9
YO
395/* reg_off_by_arg[] - Which argument holds the offset to the next node */
396
29de9391 397static const char reg_off_by_arg[] = {
03363afd
YO
398 0, /* END */
399 0, /* SUCCEED */
400 0, /* BOL */
401 0, /* MBOL */
402 0, /* SBOL */
403 0, /* EOS */
404 0, /* EOL */
405 0, /* MEOL */
406 0, /* SEOL */
407 0, /* BOUND */
408 0, /* BOUNDL */
1e355c70 409 0, /* BOUNDU */
0c6e81eb 410 0, /* BOUNDA */
03363afd
YO
411 0, /* NBOUND */
412 0, /* NBOUNDL */
1e355c70 413 0, /* NBOUNDU */
0c6e81eb 414 0, /* NBOUNDA */
03363afd
YO
415 0, /* GPOS */
416 0, /* REG_ANY */
417 0, /* SANY */
418 0, /* CANY */
419 0, /* ANYOF */
34fdef84 420 0, /* ANYOF_NON_UTF8_NON_ASCII_ALL */
3615ea58
KW
421 0, /* POSIXD */
422 0, /* POSIXL */
423 0, /* POSIXU */
424 0, /* POSIXA */
425 0, /* NPOSIXD */
426 0, /* NPOSIXL */
427 0, /* NPOSIXU */
428 0, /* NPOSIXA */
03363afd
YO
429 0, /* CLUMP */
430 0, /* BRANCH */
431 0, /* BACK */
432 0, /* EXACT */
433 0, /* EXACTF */
434 0, /* EXACTFL */
01f98ec2 435 0, /* EXACTFU */
8c1182fd 436 0, /* EXACTFA */
3c760661 437 0, /* EXACTFU_SS */
098b07d5 438 0, /* EXACTFA_NO_TRIE */
03363afd
YO
439 0, /* NOTHING */
440 0, /* TAIL */
441 0, /* STAR */
442 0, /* PLUS */
443 0, /* CURLY */
444 0, /* CURLYN */
445 0, /* CURLYM */
446 0, /* CURLYX */
447 0, /* WHILEM */
448 0, /* OPEN */
449 0, /* CLOSE */
450 0, /* REF */
451 0, /* REFF */
452 0, /* REFFL */
01f98ec2 453 0, /* REFFU */
781aab5c 454 0, /* REFFA */
01f98ec2
KW
455 0, /* NREF */
456 0, /* NREFF */
457 0, /* NREFFL */
458 0, /* NREFFU */
781aab5c 459 0, /* NREFFA */
03363afd
YO
460 2, /* IFMATCH */
461 2, /* UNLESSM */
462 1, /* SUSPEND */
463 1, /* IFTHEN */
464 0, /* GROUPP */
465 1, /* LONGJMP */
466 1, /* BRANCHJ */
467 0, /* EVAL */
468 0, /* MINMOD */
469 0, /* LOGICAL */
470 1, /* RENUM */
471 0, /* TRIE */
472 0, /* TRIEC */
473 0, /* AHOCORASICK */
474 0, /* AHOCORASICKC */
1a147d38
YO
475 0, /* GOSUB */
476 0, /* GOSTART */
0a4db386 477 0, /* NGROUPP */
1a147d38 478 0, /* INSUBP */
0a4db386 479 0, /* DEFINEP */
e2e6a0f1 480 0, /* ENDLIKE */
7f69552c 481 0, /* OPFAIL */
e2e6a0f1
YO
482 0, /* ACCEPT */
483 0, /* VERB */
5d458dd8 484 0, /* PRUNE */
e2e6a0f1 485 0, /* MARKPOINT */
5d458dd8 486 0, /* SKIP */
e2e6a0f1 487 0, /* COMMIT */
5d458dd8 488 0, /* CUTGROUP */
ee9b8eae 489 0, /* KEEPS */
e1d1eefb 490 0, /* LNBREAK */
03363afd
YO
491 0, /* OPTIMIZED */
492 0, /* PSEUDO */
d09b2d29 493};
885f9e59 494
13d6edb4
NC
495#endif /* REG_COMP_C */
496
6bda09f9
YO
497/* reg_name[] - Opcode/state names in string form, for debugging */
498
22429478 499#ifndef DOINIT
13d6edb4 500EXTCONST char * PL_reg_name[];
22429478 501#else
4764e399 502EXTCONST char * const PL_reg_name[] = {
03363afd
YO
503 "END", /* 0000 */
504 "SUCCEED", /* 0x01 */
505 "BOL", /* 0x02 */
506 "MBOL", /* 0x03 */
507 "SBOL", /* 0x04 */
508 "EOS", /* 0x05 */
509 "EOL", /* 0x06 */
510 "MEOL", /* 0x07 */
511 "SEOL", /* 0x08 */
512 "BOUND", /* 0x09 */
513 "BOUNDL", /* 0x0a */
1e355c70 514 "BOUNDU", /* 0x0b */
0c6e81eb
KW
515 "BOUNDA", /* 0x0c */
516 "NBOUND", /* 0x0d */
517 "NBOUNDL", /* 0x0e */
518 "NBOUNDU", /* 0x0f */
519 "NBOUNDA", /* 0x10 */
520 "GPOS", /* 0x11 */
521 "REG_ANY", /* 0x12 */
522 "SANY", /* 0x13 */
523 "CANY", /* 0x14 */
524 "ANYOF", /* 0x15 */
34fdef84
KW
525 "ANYOF_NON_UTF8_NON_ASCII_ALL", /* 0x16 */
526 "POSIXD", /* 0x17 */
527 "POSIXL", /* 0x18 */
528 "POSIXU", /* 0x19 */
529 "POSIXA", /* 0x1a */
530 "NPOSIXD", /* 0x1b */
531 "NPOSIXL", /* 0x1c */
532 "NPOSIXU", /* 0x1d */
533 "NPOSIXA", /* 0x1e */
534 "CLUMP", /* 0x1f */
535 "BRANCH", /* 0x20 */
536 "BACK", /* 0x21 */
537 "EXACT", /* 0x22 */
538 "EXACTF", /* 0x23 */
539 "EXACTFL", /* 0x24 */
540 "EXACTFU", /* 0x25 */
541 "EXACTFA", /* 0x26 */
542 "EXACTFU_SS", /* 0x27 */
543 "EXACTFA_NO_TRIE", /* 0x28 */
544 "NOTHING", /* 0x29 */
545 "TAIL", /* 0x2a */
546 "STAR", /* 0x2b */
547 "PLUS", /* 0x2c */
548 "CURLY", /* 0x2d */
549 "CURLYN", /* 0x2e */
550 "CURLYM", /* 0x2f */
551 "CURLYX", /* 0x30 */
552 "WHILEM", /* 0x31 */
553 "OPEN", /* 0x32 */
554 "CLOSE", /* 0x33 */
555 "REF", /* 0x34 */
556 "REFF", /* 0x35 */
557 "REFFL", /* 0x36 */
558 "REFFU", /* 0x37 */
559 "REFFA", /* 0x38 */
560 "NREF", /* 0x39 */
561 "NREFF", /* 0x3a */
562 "NREFFL", /* 0x3b */
563 "NREFFU", /* 0x3c */
564 "NREFFA", /* 0x3d */
565 "IFMATCH", /* 0x3e */
566 "UNLESSM", /* 0x3f */
567 "SUSPEND", /* 0x40 */
568 "IFTHEN", /* 0x41 */
569 "GROUPP", /* 0x42 */
570 "LONGJMP", /* 0x43 */
571 "BRANCHJ", /* 0x44 */
572 "EVAL", /* 0x45 */
573 "MINMOD", /* 0x46 */
574 "LOGICAL", /* 0x47 */
575 "RENUM", /* 0x48 */
576 "TRIE", /* 0x49 */
577 "TRIEC", /* 0x4a */
578 "AHOCORASICK", /* 0x4b */
579 "AHOCORASICKC", /* 0x4c */
580 "GOSUB", /* 0x4d */
581 "GOSTART", /* 0x4e */
582 "NGROUPP", /* 0x4f */
583 "INSUBP", /* 0x50 */
584 "DEFINEP", /* 0x51 */
585 "ENDLIKE", /* 0x52 */
586 "OPFAIL", /* 0x53 */
587 "ACCEPT", /* 0x54 */
588 "VERB", /* 0x55 */
589 "PRUNE", /* 0x56 */
590 "MARKPOINT", /* 0x57 */
591 "SKIP", /* 0x58 */
592 "COMMIT", /* 0x59 */
593 "CUTGROUP", /* 0x5a */
594 "KEEPS", /* 0x5b */
595 "LNBREAK", /* 0x5c */
596 "OPTIMIZED", /* 0x5d */
597 "PSEUDO", /* 0x5e */
03363afd 598 /* ------------ States ------------- */
24b23f37
YO
599 "TRIE_next", /* REGNODE_MAX +0x01 */
600 "TRIE_next_fail", /* REGNODE_MAX +0x02 */
601 "EVAL_AB", /* REGNODE_MAX +0x03 */
602 "EVAL_AB_fail", /* REGNODE_MAX +0x04 */
603 "CURLYX_end", /* REGNODE_MAX +0x05 */
604 "CURLYX_end_fail", /* REGNODE_MAX +0x06 */
605 "WHILEM_A_pre", /* REGNODE_MAX +0x07 */
606 "WHILEM_A_pre_fail", /* REGNODE_MAX +0x08 */
607 "WHILEM_A_min", /* REGNODE_MAX +0x09 */
608 "WHILEM_A_min_fail", /* REGNODE_MAX +0x0a */
609 "WHILEM_A_max", /* REGNODE_MAX +0x0b */
610 "WHILEM_A_max_fail", /* REGNODE_MAX +0x0c */
611 "WHILEM_B_min", /* REGNODE_MAX +0x0d */
612 "WHILEM_B_min_fail", /* REGNODE_MAX +0x0e */
613 "WHILEM_B_max", /* REGNODE_MAX +0x0f */
614 "WHILEM_B_max_fail", /* REGNODE_MAX +0x10 */
615 "BRANCH_next", /* REGNODE_MAX +0x11 */
616 "BRANCH_next_fail", /* REGNODE_MAX +0x12 */
617 "CURLYM_A", /* REGNODE_MAX +0x13 */
618 "CURLYM_A_fail", /* REGNODE_MAX +0x14 */
619 "CURLYM_B", /* REGNODE_MAX +0x15 */
620 "CURLYM_B_fail", /* REGNODE_MAX +0x16 */
621 "IFMATCH_A", /* REGNODE_MAX +0x17 */
622 "IFMATCH_A_fail", /* REGNODE_MAX +0x18 */
623 "CURLY_B_min_known", /* REGNODE_MAX +0x19 */
624 "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1a */
625 "CURLY_B_min", /* REGNODE_MAX +0x1b */
626 "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
627 "CURLY_B_max", /* REGNODE_MAX +0x1d */
628 "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
629 "COMMIT_next", /* REGNODE_MAX +0x1f */
630 "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
e2e6a0f1
YO
631 "MARKPOINT_next", /* REGNODE_MAX +0x21 */
632 "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
5d458dd8
YO
633 "SKIP_next", /* REGNODE_MAX +0x23 */
634 "SKIP_next_fail", /* REGNODE_MAX +0x24 */
635 "CUTGROUP_next", /* REGNODE_MAX +0x25 */
636 "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
ee9b8eae
YO
637 "KEEPS_next", /* REGNODE_MAX +0x27 */
638 "KEEPS_next_fail", /* REGNODE_MAX +0x28 */
885f9e59 639};
22429478 640#endif /* DOINIT */
d09b2d29 641
f7819f85
A
642/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
643
644#ifndef DOINIT
645EXTCONST char * PL_reg_extflags_name[];
646#else
647EXTCONST char * const PL_reg_extflags_name[] = {
dbc200c5 648 /* Bits in extflags defined: 11111110111111111111111111111111 */
52d81aa8
NC
649 "MULTILINE", /* 0x00000001 */
650 "SINGLELINE", /* 0x00000002 */
651 "FOLD", /* 0x00000004 */
652 "EXTENDED", /* 0x00000008 */
653 "KEEPCOPY", /* 0x00000010 */
6976c986
YO
654 "CHARSET0", /* 0x00000020 : "CHARSET" - 0x000000e0 */
655 "CHARSET1", /* 0x00000040 : "CHARSET" - 0x000000e0 */
656 "CHARSET2", /* 0x00000080 : "CHARSET" - 0x000000e0 */
dbc200c5 657 "SPLIT", /* 0x00000100 */
8e1490ee
YO
658 "IS_ANCHORED", /* 0x00000200 */
659 "UNUSED1", /* 0x00000400 */
660 "UNUSED2", /* 0x00000800 */
661 "UNUSED3", /* 0x00001000 */
662 "UNUSED4", /* 0x00002000 */
663 "UNUSED5", /* 0x00004000 */
dbc200c5 664 "NO_INPLACE_SUBST", /* 0x00008000 */
e795e964 665 "EVAL_SEEN", /* 0x00010000 */
8e1490ee 666 "UNUSED8", /* 0x00020000 */
ee273784 667 "UNBOUNDED_QUANTIFIER_SEEN",/* 0x00040000 */
e795e964 668 "CHECK_ALL", /* 0x00080000 */
52d81aa8
NC
669 "MATCH_UTF8", /* 0x00100000 */
670 "USE_INTUIT_NOML", /* 0x00200000 */
671 "USE_INTUIT_ML", /* 0x00400000 */
672 "INTUIT_TAIL", /* 0x00800000 */
dbc200c5 673 "UNUSED_BIT_24", /* 0x01000000 */
52d81aa8
NC
674 "COPY_DONE", /* 0x02000000 */
675 "TAINTED_SEEN", /* 0x04000000 */
676 "TAINTED", /* 0x08000000 */
677 "START_ONLY", /* 0x10000000 */
dbc200c5 678 "SKIPWHITE", /* 0x20000000 */
52d81aa8
NC
679 "WHITE", /* 0x40000000 */
680 "NULL", /* 0x80000000 */
f7819f85
A
681};
682#endif /* DOINIT */
683
337ff307
YO
684/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
685
686#ifndef DOINIT
687EXTCONST char * PL_reg_intflags_name[];
688#else
689EXTCONST char * const PL_reg_intflags_name[] = {
b8f6efdd
YO
690 "SKIP", /* 0x00000001 - PREGf_SKIP */
691 "IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
692 "NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
693 "VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
694 "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
695 "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
58430ea8
YO
696 "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
697 "CANY_SEEN", /* 0x00000080 - PREGf_CANY_SEEN */
698 "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
699 "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
8e1490ee
YO
700 "ANCH_BOL", /* 0x00000400 - PREGf_ANCH_BOL */
701 "ANCH_MBOL", /* 0x00000800 - PREGf_ANCH_MBOL */
702 "ANCH_SBOL", /* 0x00001000 - PREGf_ANCH_SBOL */
703 "ANCH_GPOS", /* 0x00002000 - PREGf_ANCH_GPOS */
337ff307
YO
704};
705#endif /* DOINIT */
706
f9ef50a7 707/* The following have no fixed length. U8 so we can do strchr() on it. */
ded4dd2a 708#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 709
f9ef50a7 710#ifndef DOINIT
ded4dd2a 711EXTCONST U8 PL_varies[] __attribute__deprecated__;
f9ef50a7 712#else
ded4dd2a 713EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
e0193e47
KW
714 CLUMP, BRANCH, BACK, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM,
715 REF, REFF, REFFL, REFFU, REFFA, NREF, NREFF, NREFFL, NREFFU, NREFFA,
716 SUSPEND, IFTHEN, BRANCHJ,
f9ef50a7
NC
717 0
718};
719#endif /* DOINIT */
720
ded4dd2a
NC
721#ifndef DOINIT
722EXTCONST U8 PL_varies_bitmask[];
723#else
724EXTCONST U8 PL_varies_bitmask[] = {
34fdef84 725 0x00, 0x00, 0x00, 0x80, 0x03, 0xF8, 0xF3, 0x3F, 0x13, 0x00, 0x00, 0x00
ded4dd2a
NC
726};
727#endif /* DOINIT */
728
f9ef50a7
NC
729/* The following always have a length of 1. U8 we can do strchr() on it. */
730/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
ded4dd2a 731#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 732
f9ef50a7 733#ifndef DOINIT
ded4dd2a 734EXTCONST U8 PL_simple[] __attribute__deprecated__;
f9ef50a7 735#else
ded4dd2a 736EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
34fdef84
KW
737 REG_ANY, SANY, CANY, ANYOF, ANYOF_NON_UTF8_NON_ASCII_ALL, POSIXD,
738 POSIXL, POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA,
f9ef50a7
NC
739 0
740};
741#endif /* DOINIT */
742
ded4dd2a
NC
743#ifndef DOINIT
744EXTCONST U8 PL_simple_bitmask[];
745#else
746EXTCONST U8 PL_simple_bitmask[] = {
34fdef84 747 0x00, 0x00, 0xFC, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
ded4dd2a
NC
748};
749#endif /* DOINIT */
750
37442d52 751/* ex: set ro: */