This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Clarify pod for bytes to/from utf8()
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
eb2624c9 3 This file is built by regen/regcomp.pl from regcomp.sym.
d09b2d29 4 Any changes made here will be lost!
78102347 5 */
d09b2d29 6
6bda09f9
YO
7/* Regops and State definitions */
8
d5a00e4a 9#define REGNODE_MAX 92
4ee16520 10#define REGMATCH_STATE_MAX 134
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
d3d47aac
YO
14#define SBOL 2 /* 0x02 Match "" at beginning of line: /^/, /\A/ */
15#define BOL 2 /* 0x02 type alias */
16#define MBOL 3 /* 0x03 Same, assuming multiline: /^/m */
17#define SEOL 4 /* 0x04 Match "" at end of line: /$/ */
18#define EOL 4 /* 0x04 type alias */
19#define MEOL 5 /* 0x05 Same, assuming multiline: /$/m */
20#define EOS 6 /* 0x06 Match "" at end of string: /\z/ */
21#define GPOS 7 /* 0x07 Matches where last m//g left off. */
c440a570
KW
22#define BOUND 8 /* 0x08 Like BOUNDA for non-utf8, otherwise match "" between any Unicode \w\W or \W\w */
23#define BOUNDL 9 /* 0x09 Like BOUND/BOUNDU, but \w and \W are defined by current locale */
64935bc6 24#define BOUNDU 10 /* 0x0a Match "" at any boundary of a given type using Unicode rules */
c440a570
KW
25#define BOUNDA 11 /* 0x0b Match "" at any boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] */
26#define NBOUND 12 /* 0x0c Like NBOUNDA for non-utf8, otherwise match "" between any Unicode \w\w or \W\W */
27#define NBOUNDL 13 /* 0x0d Like NBOUND/NBOUNDU, but \w and \W are defined by current locale */
28#define NBOUNDU 14 /* 0x0e Match "" at any non-boundary of a given type using using Unicode rules */
29#define NBOUNDA 15 /* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */
d3d47aac
YO
30#define REG_ANY 16 /* 0x10 Match any one character (except newline). */
31#define SANY 17 /* 0x11 Match any one character. */
33c28ab2 32#define ANYOF 18 /* 0x12 Match character in (or not in) this class, single char match only */
ac44c12e
KW
33#define ANYOFD 19 /* 0x13 Like ANYOF, but /d is in effect */
34#define ANYOFL 20 /* 0x14 Like ANYOF, but /l is in effect */
35#define POSIXD 21 /* 0x15 Some [[:class:]] under /d; the FLAGS field gives which one */
36#define POSIXL 22 /* 0x16 Some [[:class:]] under /l; the FLAGS field gives which one */
37#define POSIXU 23 /* 0x17 Some [[:class:]] under /u; the FLAGS field gives which one */
38#define POSIXA 24 /* 0x18 Some [[:class:]] under /a; the FLAGS field gives which one */
39#define NPOSIXD 25 /* 0x19 complement of POSIXD, [[:^class:]] */
40#define NPOSIXL 26 /* 0x1a complement of POSIXL, [[:^class:]] */
41#define NPOSIXU 27 /* 0x1b complement of POSIXU, [[:^class:]] */
42#define NPOSIXA 28 /* 0x1c complement of POSIXA, [[:^class:]] */
43#define CLUMP 29 /* 0x1d Match any extended grapheme cluster sequence */
44#define BRANCH 30 /* 0x1e Match this alternative, or the next... */
45#define EXACT 31 /* 0x1f Match this string (preceded by length). */
46#define EXACTL 32 /* 0x20 Like EXACT, but /l is in effect (used so locale-related warnings can be checked for). */
47#define EXACTF 33 /* 0x21 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
48#define EXACTFL 34 /* 0x22 Match this string (not guaranteed to be folded) using /il rules (w/len). */
49#define EXACTFU 35 /* 0x23 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
50#define EXACTFA 36 /* 0x24 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
51#define EXACTFU_SS 37 /* 0x25 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
52#define EXACTFLU8 38 /* 0x26 Rare cirucmstances: like EXACTFU, but is under /l, UTF-8, folded, and everything in it is above 255. */
53#define EXACTFA_NO_TRIE 39 /* 0x27 Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */
54#define NOTHING 40 /* 0x28 Match empty string. */
55#define TAIL 41 /* 0x29 Match empty string. Can jump here from outside. */
56#define STAR 42 /* 0x2a Match this (simple) thing 0 or more times. */
57#define PLUS 43 /* 0x2b Match this (simple) thing 1 or more times. */
58#define CURLY 44 /* 0x2c Match this simple thing {n,m} times. */
59#define CURLYN 45 /* 0x2d Capture next-after-this simple thing */
60#define CURLYM 46 /* 0x2e Capture this medium-complex thing {n,m} times. */
61#define CURLYX 47 /* 0x2f Match this complex thing {n,m} times. */
62#define WHILEM 48 /* 0x30 Do curly processing and see if rest matches. */
63#define OPEN 49 /* 0x31 Mark this point in input as start of #n. */
64#define CLOSE 50 /* 0x32 Analogous to OPEN. */
65#define REF 51 /* 0x33 Match some already matched string */
66#define REFF 52 /* 0x34 Match already matched string, folded using native charset rules for non-utf8 */
67#define REFFL 53 /* 0x35 Match already matched string, folded in loc. */
68#define REFFU 54 /* 0x36 Match already matched string, folded using unicode rules for non-utf8 */
69#define REFFA 55 /* 0x37 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
70#define NREF 56 /* 0x38 Match some already matched string */
71#define NREFF 57 /* 0x39 Match already matched string, folded using native charset rules for non-utf8 */
72#define NREFFL 58 /* 0x3a Match already matched string, folded in loc. */
73#define NREFFU 59 /* 0x3b Match already matched string, folded using unicode rules for non-utf8 */
74#define NREFFA 60 /* 0x3c Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
75#define LONGJMP 61 /* 0x3d Jump far away. */
76#define BRANCHJ 62 /* 0x3e BRANCH with long offset. */
77#define IFMATCH 63 /* 0x3f Succeeds if the following matches. */
78#define UNLESSM 64 /* 0x40 Fails if the following matches. */
79#define SUSPEND 65 /* 0x41 "Independent" sub-RE. */
80#define IFTHEN 66 /* 0x42 Switch, should be preceded by switcher. */
81#define GROUPP 67 /* 0x43 Whether the group matched. */
82#define EVAL 68 /* 0x44 Execute some Perl code. */
83#define MINMOD 69 /* 0x45 Next operator is not greedy. */
84#define LOGICAL 70 /* 0x46 Next opcode should set the flag only. */
85#define RENUM 71 /* 0x47 Group with independently numbered parens. */
86#define TRIE 72 /* 0x48 Match many EXACT(F[ALU]?)? at once. flags==type */
87#define TRIEC 73 /* 0x49 Same as TRIE, but with embedded charclass data */
88#define AHOCORASICK 74 /* 0x4a Aho Corasick stclass. flags==type */
89#define AHOCORASICKC 75 /* 0x4b Same as AHOCORASICK, but with embedded charclass data */
90#define GOSUB 76 /* 0x4c recurse to paren arg1 at (signed) ofs arg2 */
d5a00e4a
YO
91#define NGROUPP 77 /* 0x4d Whether the group matched. */
92#define INSUBP 78 /* 0x4e Whether we are in a specific recurse. */
93#define DEFINEP 79 /* 0x4f Never execute directly. */
94#define ENDLIKE 80 /* 0x50 Used only for the type field of verbs */
95#define OPFAIL 81 /* 0x51 Same as (?!), but with verb arg */
96#define ACCEPT 82 /* 0x52 Accepts the current matched string, with verbar */
97#define VERB 83 /* 0x53 Used only for the type field of verbs */
98#define PRUNE 84 /* 0x54 Pattern fails at this startpoint if no-backtracking through this */
99#define MARKPOINT 85 /* 0x55 Push the current location for rollback by cut. */
100#define SKIP 86 /* 0x56 On failure skip forward (to the mark) before retrying */
101#define COMMIT 87 /* 0x57 Pattern fails outright if backtracking through this */
102#define CUTGROUP 88 /* 0x58 On failure go to the next alternation in the group */
103#define KEEPS 89 /* 0x59 $& begins here. */
104#define LNBREAK 90 /* 0x5a generic newline pattern */
105#define OPTIMIZED 91 /* 0x5b Placeholder for dump. */
106#define PSEUDO 92 /* 0x5c Pseudo opcode for internal use. */
03363afd 107 /* ------------ States ------------- */
24b23f37
YO
108#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
109#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
4ee16520
DM
110#define EVAL_B (REGNODE_MAX + 3) /* state for EVAL */
111#define EVAL_B_fail (REGNODE_MAX + 4) /* state for EVAL */
112#define EVAL_postponed_AB (REGNODE_MAX + 5) /* state for EVAL */
113#define EVAL_postponed_AB_fail (REGNODE_MAX + 6) /* state for EVAL */
114#define CURLYX_end (REGNODE_MAX + 7) /* state for CURLYX */
115#define CURLYX_end_fail (REGNODE_MAX + 8) /* state for CURLYX */
116#define WHILEM_A_pre (REGNODE_MAX + 9) /* state for WHILEM */
117#define WHILEM_A_pre_fail (REGNODE_MAX + 10) /* state for WHILEM */
118#define WHILEM_A_min (REGNODE_MAX + 11) /* state for WHILEM */
119#define WHILEM_A_min_fail (REGNODE_MAX + 12) /* state for WHILEM */
120#define WHILEM_A_max (REGNODE_MAX + 13) /* state for WHILEM */
121#define WHILEM_A_max_fail (REGNODE_MAX + 14) /* state for WHILEM */
122#define WHILEM_B_min (REGNODE_MAX + 15) /* state for WHILEM */
123#define WHILEM_B_min_fail (REGNODE_MAX + 16) /* state for WHILEM */
124#define WHILEM_B_max (REGNODE_MAX + 17) /* state for WHILEM */
125#define WHILEM_B_max_fail (REGNODE_MAX + 18) /* state for WHILEM */
126#define BRANCH_next (REGNODE_MAX + 19) /* state for BRANCH */
127#define BRANCH_next_fail (REGNODE_MAX + 20) /* state for BRANCH */
128#define CURLYM_A (REGNODE_MAX + 21) /* state for CURLYM */
129#define CURLYM_A_fail (REGNODE_MAX + 22) /* state for CURLYM */
130#define CURLYM_B (REGNODE_MAX + 23) /* state for CURLYM */
131#define CURLYM_B_fail (REGNODE_MAX + 24) /* state for CURLYM */
132#define IFMATCH_A (REGNODE_MAX + 25) /* state for IFMATCH */
133#define IFMATCH_A_fail (REGNODE_MAX + 26) /* state for IFMATCH */
134#define CURLY_B_min_known (REGNODE_MAX + 27) /* state for CURLY */
135#define CURLY_B_min_known_fail (REGNODE_MAX + 28) /* state for CURLY */
136#define CURLY_B_min (REGNODE_MAX + 29) /* state for CURLY */
137#define CURLY_B_min_fail (REGNODE_MAX + 30) /* state for CURLY */
138#define CURLY_B_max (REGNODE_MAX + 31) /* state for CURLY */
139#define CURLY_B_max_fail (REGNODE_MAX + 32) /* state for CURLY */
140#define COMMIT_next (REGNODE_MAX + 33) /* state for COMMIT */
141#define COMMIT_next_fail (REGNODE_MAX + 34) /* state for COMMIT */
142#define MARKPOINT_next (REGNODE_MAX + 35) /* state for MARKPOINT */
143#define MARKPOINT_next_fail (REGNODE_MAX + 36) /* state for MARKPOINT */
144#define SKIP_next (REGNODE_MAX + 37) /* state for SKIP */
145#define SKIP_next_fail (REGNODE_MAX + 38) /* state for SKIP */
146#define CUTGROUP_next (REGNODE_MAX + 39) /* state for CUTGROUP */
147#define CUTGROUP_next_fail (REGNODE_MAX + 40) /* state for CUTGROUP */
148#define KEEPS_next (REGNODE_MAX + 41) /* state for KEEPS */
149#define KEEPS_next_fail (REGNODE_MAX + 42) /* state for KEEPS */
03363afd 150
6bda09f9 151/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
152
153#ifndef DOINIT
22c35a8c 154EXTCONST U8 PL_regkind[];
d09b2d29 155#else
22c35a8c 156EXTCONST U8 PL_regkind[] = {
e2e6a0f1
YO
157 END, /* END */
158 END, /* SUCCEED */
e2e6a0f1 159 BOL, /* SBOL */
d3d47aac 160 BOL, /* MBOL */
e2e6a0f1 161 EOL, /* SEOL */
d3d47aac
YO
162 EOL, /* MEOL */
163 EOL, /* EOS */
164 GPOS, /* GPOS */
e2e6a0f1
YO
165 BOUND, /* BOUND */
166 BOUND, /* BOUNDL */
1e355c70 167 BOUND, /* BOUNDU */
0c6e81eb 168 BOUND, /* BOUNDA */
e2e6a0f1
YO
169 NBOUND, /* NBOUND */
170 NBOUND, /* NBOUNDL */
1e355c70 171 NBOUND, /* NBOUNDU */
0c6e81eb 172 NBOUND, /* NBOUNDA */
e2e6a0f1
YO
173 REG_ANY, /* REG_ANY */
174 REG_ANY, /* SANY */
e2e6a0f1 175 ANYOF, /* ANYOF */
ac44c12e 176 ANYOF, /* ANYOFD */
a4525e78 177 ANYOF, /* ANYOFL */
3615ea58
KW
178 POSIXD, /* POSIXD */
179 POSIXD, /* POSIXL */
180 POSIXD, /* POSIXU */
181 POSIXD, /* POSIXA */
9e84774b
KW
182 NPOSIXD, /* NPOSIXD */
183 NPOSIXD, /* NPOSIXL */
184 NPOSIXD, /* NPOSIXU */
185 NPOSIXD, /* NPOSIXA */
e2e6a0f1
YO
186 CLUMP, /* CLUMP */
187 BRANCH, /* BRANCH */
e2e6a0f1 188 EXACT, /* EXACT */
a4525e78 189 EXACT, /* EXACTL */
e2e6a0f1
YO
190 EXACT, /* EXACTF */
191 EXACT, /* EXACTFL */
01f98ec2 192 EXACT, /* EXACTFU */
8c1182fd 193 EXACT, /* EXACTFA */
3c760661 194 EXACT, /* EXACTFU_SS */
a4525e78 195 EXACT, /* EXACTFLU8 */
098b07d5 196 EXACT, /* EXACTFA_NO_TRIE */
e2e6a0f1
YO
197 NOTHING, /* NOTHING */
198 NOTHING, /* TAIL */
199 STAR, /* STAR */
200 PLUS, /* PLUS */
201 CURLY, /* CURLY */
202 CURLY, /* CURLYN */
203 CURLY, /* CURLYM */
204 CURLY, /* CURLYX */
205 WHILEM, /* WHILEM */
206 OPEN, /* OPEN */
207 CLOSE, /* CLOSE */
208 REF, /* REF */
209 REF, /* REFF */
210 REF, /* REFFL */
01f98ec2 211 REF, /* REFFU */
781aab5c 212 REF, /* REFFA */
01f98ec2
KW
213 REF, /* NREF */
214 REF, /* NREFF */
215 REF, /* NREFFL */
216 REF, /* NREFFU */
781aab5c 217 REF, /* NREFFA */
d3d47aac
YO
218 LONGJMP, /* LONGJMP */
219 BRANCHJ, /* BRANCHJ */
e2e6a0f1
YO
220 BRANCHJ, /* IFMATCH */
221 BRANCHJ, /* UNLESSM */
222 BRANCHJ, /* SUSPEND */
223 BRANCHJ, /* IFTHEN */
224 GROUPP, /* GROUPP */
e2e6a0f1
YO
225 EVAL, /* EVAL */
226 MINMOD, /* MINMOD */
227 LOGICAL, /* LOGICAL */
228 BRANCHJ, /* RENUM */
229 TRIE, /* TRIE */
230 TRIE, /* TRIEC */
231 TRIE, /* AHOCORASICK */
232 TRIE, /* AHOCORASICKC */
233 GOSUB, /* GOSUB */
e2e6a0f1
YO
234 NGROUPP, /* NGROUPP */
235 INSUBP, /* INSUBP */
236 DEFINEP, /* DEFINEP */
237 ENDLIKE, /* ENDLIKE */
238 ENDLIKE, /* OPFAIL */
239 ENDLIKE, /* ACCEPT */
240 VERB, /* VERB */
5d458dd8 241 VERB, /* PRUNE */
e2e6a0f1 242 VERB, /* MARKPOINT */
5d458dd8 243 VERB, /* SKIP */
e2e6a0f1 244 VERB, /* COMMIT */
5d458dd8 245 VERB, /* CUTGROUP */
ee9b8eae 246 KEEPS, /* KEEPS */
e1d1eefb 247 LNBREAK, /* LNBREAK */
e2e6a0f1
YO
248 NOTHING, /* OPTIMIZED */
249 PSEUDO, /* PSEUDO */
03363afd 250 /* ------------ States ------------- */
e2e6a0f1
YO
251 TRIE, /* TRIE_next */
252 TRIE, /* TRIE_next_fail */
4ee16520
DM
253 EVAL, /* EVAL_B */
254 EVAL, /* EVAL_B_fail */
255 EVAL, /* EVAL_postponed_AB */
256 EVAL, /* EVAL_postponed_AB_fail */
e2e6a0f1
YO
257 CURLYX, /* CURLYX_end */
258 CURLYX, /* CURLYX_end_fail */
259 WHILEM, /* WHILEM_A_pre */
260 WHILEM, /* WHILEM_A_pre_fail */
261 WHILEM, /* WHILEM_A_min */
262 WHILEM, /* WHILEM_A_min_fail */
263 WHILEM, /* WHILEM_A_max */
264 WHILEM, /* WHILEM_A_max_fail */
265 WHILEM, /* WHILEM_B_min */
266 WHILEM, /* WHILEM_B_min_fail */
267 WHILEM, /* WHILEM_B_max */
268 WHILEM, /* WHILEM_B_max_fail */
269 BRANCH, /* BRANCH_next */
270 BRANCH, /* BRANCH_next_fail */
271 CURLYM, /* CURLYM_A */
272 CURLYM, /* CURLYM_A_fail */
273 CURLYM, /* CURLYM_B */
274 CURLYM, /* CURLYM_B_fail */
275 IFMATCH, /* IFMATCH_A */
276 IFMATCH, /* IFMATCH_A_fail */
277 CURLY, /* CURLY_B_min_known */
278 CURLY, /* CURLY_B_min_known_fail */
279 CURLY, /* CURLY_B_min */
280 CURLY, /* CURLY_B_min_fail */
281 CURLY, /* CURLY_B_max */
282 CURLY, /* CURLY_B_max_fail */
283 COMMIT, /* COMMIT_next */
284 COMMIT, /* COMMIT_next_fail */
285 MARKPOINT, /* MARKPOINT_next */
286 MARKPOINT, /* MARKPOINT_next_fail */
5d458dd8
YO
287 SKIP, /* SKIP_next */
288 SKIP, /* SKIP_next_fail */
289 CUTGROUP, /* CUTGROUP_next */
290 CUTGROUP, /* CUTGROUP_next_fail */
ee9b8eae
YO
291 KEEPS, /* KEEPS_next */
292 KEEPS, /* KEEPS_next_fail */
d09b2d29
IZ
293};
294#endif
295
f83e001e
YO
296#ifdef REG_COMP_C
297
6bda09f9 298/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29 299
29de9391 300static const U8 regarglen[] = {
03363afd
YO
301 0, /* END */
302 0, /* SUCCEED */
03363afd 303 0, /* SBOL */
d3d47aac 304 0, /* MBOL */
03363afd 305 0, /* SEOL */
d3d47aac
YO
306 0, /* MEOL */
307 0, /* EOS */
308 0, /* GPOS */
03363afd
YO
309 0, /* BOUND */
310 0, /* BOUNDL */
1e355c70 311 0, /* BOUNDU */
0c6e81eb 312 0, /* BOUNDA */
03363afd
YO
313 0, /* NBOUND */
314 0, /* NBOUNDL */
1e355c70 315 0, /* NBOUNDU */
0c6e81eb 316 0, /* NBOUNDA */
03363afd
YO
317 0, /* REG_ANY */
318 0, /* SANY */
975a06f7 319 EXTRA_SIZE(struct regnode_1), /* ANYOF */
ac44c12e 320 EXTRA_SIZE(struct regnode_1), /* ANYOFD */
a4525e78 321 EXTRA_SIZE(struct regnode_1), /* ANYOFL */
3615ea58
KW
322 0, /* POSIXD */
323 0, /* POSIXL */
324 0, /* POSIXU */
325 0, /* POSIXA */
326 0, /* NPOSIXD */
327 0, /* NPOSIXL */
328 0, /* NPOSIXU */
329 0, /* NPOSIXA */
03363afd
YO
330 0, /* CLUMP */
331 0, /* BRANCH */
03363afd 332 0, /* EXACT */
a4525e78 333 0, /* EXACTL */
03363afd
YO
334 0, /* EXACTF */
335 0, /* EXACTFL */
01f98ec2 336 0, /* EXACTFU */
8c1182fd 337 0, /* EXACTFA */
3c760661 338 0, /* EXACTFU_SS */
a4525e78 339 0, /* EXACTFLU8 */
098b07d5 340 0, /* EXACTFA_NO_TRIE */
03363afd
YO
341 0, /* NOTHING */
342 0, /* TAIL */
343 0, /* STAR */
344 0, /* PLUS */
345 EXTRA_SIZE(struct regnode_2), /* CURLY */
346 EXTRA_SIZE(struct regnode_2), /* CURLYN */
347 EXTRA_SIZE(struct regnode_2), /* CURLYM */
348 EXTRA_SIZE(struct regnode_2), /* CURLYX */
349 0, /* WHILEM */
350 EXTRA_SIZE(struct regnode_1), /* OPEN */
351 EXTRA_SIZE(struct regnode_1), /* CLOSE */
352 EXTRA_SIZE(struct regnode_1), /* REF */
353 EXTRA_SIZE(struct regnode_1), /* REFF */
354 EXTRA_SIZE(struct regnode_1), /* REFFL */
01f98ec2 355 EXTRA_SIZE(struct regnode_1), /* REFFU */
781aab5c 356 EXTRA_SIZE(struct regnode_1), /* REFFA */
01f98ec2
KW
357 EXTRA_SIZE(struct regnode_1), /* NREF */
358 EXTRA_SIZE(struct regnode_1), /* NREFF */
359 EXTRA_SIZE(struct regnode_1), /* NREFFL */
360 EXTRA_SIZE(struct regnode_1), /* NREFFU */
781aab5c 361 EXTRA_SIZE(struct regnode_1), /* NREFFA */
d3d47aac
YO
362 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
363 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
03363afd
YO
364 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
365 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
366 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
367 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
368 EXTRA_SIZE(struct regnode_1), /* GROUPP */
13f27704 369 EXTRA_SIZE(struct regnode_2L), /* EVAL */
03363afd
YO
370 0, /* MINMOD */
371 0, /* LOGICAL */
372 EXTRA_SIZE(struct regnode_1), /* RENUM */
373 EXTRA_SIZE(struct regnode_1), /* TRIE */
374 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
375 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
376 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
1a147d38 377 EXTRA_SIZE(struct regnode_2L), /* GOSUB */
0a4db386 378 EXTRA_SIZE(struct regnode_1), /* NGROUPP */
1a147d38 379 EXTRA_SIZE(struct regnode_1), /* INSUBP */
0a4db386 380 EXTRA_SIZE(struct regnode_1), /* DEFINEP */
e2e6a0f1 381 0, /* ENDLIKE */
fee50582
YO
382 EXTRA_SIZE(struct regnode_1), /* OPFAIL */
383 EXTRA_SIZE(struct regnode_2L), /* ACCEPT */
20832bc5 384 EXTRA_SIZE(struct regnode_1), /* VERB */
5d458dd8 385 EXTRA_SIZE(struct regnode_1), /* PRUNE */
e2e6a0f1 386 EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
5d458dd8 387 EXTRA_SIZE(struct regnode_1), /* SKIP */
e2e6a0f1 388 EXTRA_SIZE(struct regnode_1), /* COMMIT */
5d458dd8 389 EXTRA_SIZE(struct regnode_1), /* CUTGROUP */
ee9b8eae 390 0, /* KEEPS */
e1d1eefb 391 0, /* LNBREAK */
03363afd
YO
392 0, /* OPTIMIZED */
393 0, /* PSEUDO */
d09b2d29
IZ
394};
395
6bda09f9
YO
396/* reg_off_by_arg[] - Which argument holds the offset to the next node */
397
29de9391 398static const char reg_off_by_arg[] = {
03363afd
YO
399 0, /* END */
400 0, /* SUCCEED */
03363afd 401 0, /* SBOL */
d3d47aac 402 0, /* MBOL */
03363afd 403 0, /* SEOL */
d3d47aac
YO
404 0, /* MEOL */
405 0, /* EOS */
406 0, /* GPOS */
03363afd
YO
407 0, /* BOUND */
408 0, /* BOUNDL */
1e355c70 409 0, /* BOUNDU */
0c6e81eb 410 0, /* BOUNDA */
03363afd
YO
411 0, /* NBOUND */
412 0, /* NBOUNDL */
1e355c70 413 0, /* NBOUNDU */
0c6e81eb 414 0, /* NBOUNDA */
03363afd
YO
415 0, /* REG_ANY */
416 0, /* SANY */
03363afd 417 0, /* ANYOF */
ac44c12e 418 0, /* ANYOFD */
a4525e78 419 0, /* ANYOFL */
3615ea58
KW
420 0, /* POSIXD */
421 0, /* POSIXL */
422 0, /* POSIXU */
423 0, /* POSIXA */
424 0, /* NPOSIXD */
425 0, /* NPOSIXL */
426 0, /* NPOSIXU */
427 0, /* NPOSIXA */
03363afd
YO
428 0, /* CLUMP */
429 0, /* BRANCH */
03363afd 430 0, /* EXACT */
a4525e78 431 0, /* EXACTL */
03363afd
YO
432 0, /* EXACTF */
433 0, /* EXACTFL */
01f98ec2 434 0, /* EXACTFU */
8c1182fd 435 0, /* EXACTFA */
3c760661 436 0, /* EXACTFU_SS */
a4525e78 437 0, /* EXACTFLU8 */
098b07d5 438 0, /* EXACTFA_NO_TRIE */
03363afd
YO
439 0, /* NOTHING */
440 0, /* TAIL */
441 0, /* STAR */
442 0, /* PLUS */
443 0, /* CURLY */
444 0, /* CURLYN */
445 0, /* CURLYM */
446 0, /* CURLYX */
447 0, /* WHILEM */
448 0, /* OPEN */
449 0, /* CLOSE */
450 0, /* REF */
451 0, /* REFF */
452 0, /* REFFL */
01f98ec2 453 0, /* REFFU */
781aab5c 454 0, /* REFFA */
01f98ec2
KW
455 0, /* NREF */
456 0, /* NREFF */
457 0, /* NREFFL */
458 0, /* NREFFU */
781aab5c 459 0, /* NREFFA */
d3d47aac
YO
460 1, /* LONGJMP */
461 1, /* BRANCHJ */
03363afd
YO
462 2, /* IFMATCH */
463 2, /* UNLESSM */
464 1, /* SUSPEND */
465 1, /* IFTHEN */
466 0, /* GROUPP */
03363afd
YO
467 0, /* EVAL */
468 0, /* MINMOD */
469 0, /* LOGICAL */
470 1, /* RENUM */
471 0, /* TRIE */
472 0, /* TRIEC */
473 0, /* AHOCORASICK */
474 0, /* AHOCORASICKC */
1a147d38 475 0, /* GOSUB */
0a4db386 476 0, /* NGROUPP */
1a147d38 477 0, /* INSUBP */
0a4db386 478 0, /* DEFINEP */
e2e6a0f1 479 0, /* ENDLIKE */
7f69552c 480 0, /* OPFAIL */
e2e6a0f1
YO
481 0, /* ACCEPT */
482 0, /* VERB */
5d458dd8 483 0, /* PRUNE */
e2e6a0f1 484 0, /* MARKPOINT */
5d458dd8 485 0, /* SKIP */
e2e6a0f1 486 0, /* COMMIT */
5d458dd8 487 0, /* CUTGROUP */
ee9b8eae 488 0, /* KEEPS */
e1d1eefb 489 0, /* LNBREAK */
03363afd
YO
490 0, /* OPTIMIZED */
491 0, /* PSEUDO */
d09b2d29 492};
885f9e59 493
13d6edb4
NC
494#endif /* REG_COMP_C */
495
f83e001e 496
6bda09f9
YO
497/* reg_name[] - Opcode/state names in string form, for debugging */
498
22429478 499#ifndef DOINIT
13d6edb4 500EXTCONST char * PL_reg_name[];
22429478 501#else
4764e399 502EXTCONST char * const PL_reg_name[] = {
03363afd
YO
503 "END", /* 0000 */
504 "SUCCEED", /* 0x01 */
d3d47aac 505 "SBOL", /* 0x02 */
03363afd 506 "MBOL", /* 0x03 */
d3d47aac
YO
507 "SEOL", /* 0x04 */
508 "MEOL", /* 0x05 */
509 "EOS", /* 0x06 */
510 "GPOS", /* 0x07 */
511 "BOUND", /* 0x08 */
512 "BOUNDL", /* 0x09 */
513 "BOUNDU", /* 0x0a */
514 "BOUNDA", /* 0x0b */
515 "NBOUND", /* 0x0c */
516 "NBOUNDL", /* 0x0d */
517 "NBOUNDU", /* 0x0e */
518 "NBOUNDA", /* 0x0f */
519 "REG_ANY", /* 0x10 */
520 "SANY", /* 0x11 */
33c28ab2 521 "ANYOF", /* 0x12 */
ac44c12e
KW
522 "ANYOFD", /* 0x13 */
523 "ANYOFL", /* 0x14 */
524 "POSIXD", /* 0x15 */
525 "POSIXL", /* 0x16 */
526 "POSIXU", /* 0x17 */
527 "POSIXA", /* 0x18 */
528 "NPOSIXD", /* 0x19 */
529 "NPOSIXL", /* 0x1a */
530 "NPOSIXU", /* 0x1b */
531 "NPOSIXA", /* 0x1c */
532 "CLUMP", /* 0x1d */
533 "BRANCH", /* 0x1e */
534 "EXACT", /* 0x1f */
535 "EXACTL", /* 0x20 */
536 "EXACTF", /* 0x21 */
537 "EXACTFL", /* 0x22 */
538 "EXACTFU", /* 0x23 */
539 "EXACTFA", /* 0x24 */
540 "EXACTFU_SS", /* 0x25 */
541 "EXACTFLU8", /* 0x26 */
542 "EXACTFA_NO_TRIE", /* 0x27 */
543 "NOTHING", /* 0x28 */
544 "TAIL", /* 0x29 */
545 "STAR", /* 0x2a */
546 "PLUS", /* 0x2b */
547 "CURLY", /* 0x2c */
548 "CURLYN", /* 0x2d */
549 "CURLYM", /* 0x2e */
550 "CURLYX", /* 0x2f */
551 "WHILEM", /* 0x30 */
552 "OPEN", /* 0x31 */
553 "CLOSE", /* 0x32 */
554 "REF", /* 0x33 */
555 "REFF", /* 0x34 */
556 "REFFL", /* 0x35 */
557 "REFFU", /* 0x36 */
558 "REFFA", /* 0x37 */
559 "NREF", /* 0x38 */
560 "NREFF", /* 0x39 */
561 "NREFFL", /* 0x3a */
562 "NREFFU", /* 0x3b */
563 "NREFFA", /* 0x3c */
564 "LONGJMP", /* 0x3d */
565 "BRANCHJ", /* 0x3e */
566 "IFMATCH", /* 0x3f */
567 "UNLESSM", /* 0x40 */
568 "SUSPEND", /* 0x41 */
569 "IFTHEN", /* 0x42 */
570 "GROUPP", /* 0x43 */
571 "EVAL", /* 0x44 */
572 "MINMOD", /* 0x45 */
573 "LOGICAL", /* 0x46 */
574 "RENUM", /* 0x47 */
575 "TRIE", /* 0x48 */
576 "TRIEC", /* 0x49 */
577 "AHOCORASICK", /* 0x4a */
578 "AHOCORASICKC", /* 0x4b */
579 "GOSUB", /* 0x4c */
d5a00e4a
YO
580 "NGROUPP", /* 0x4d */
581 "INSUBP", /* 0x4e */
582 "DEFINEP", /* 0x4f */
583 "ENDLIKE", /* 0x50 */
584 "OPFAIL", /* 0x51 */
585 "ACCEPT", /* 0x52 */
586 "VERB", /* 0x53 */
587 "PRUNE", /* 0x54 */
588 "MARKPOINT", /* 0x55 */
589 "SKIP", /* 0x56 */
590 "COMMIT", /* 0x57 */
591 "CUTGROUP", /* 0x58 */
592 "KEEPS", /* 0x59 */
593 "LNBREAK", /* 0x5a */
594 "OPTIMIZED", /* 0x5b */
595 "PSEUDO", /* 0x5c */
03363afd 596 /* ------------ States ------------- */
24b23f37
YO
597 "TRIE_next", /* REGNODE_MAX +0x01 */
598 "TRIE_next_fail", /* REGNODE_MAX +0x02 */
4ee16520
DM
599 "EVAL_B", /* REGNODE_MAX +0x03 */
600 "EVAL_B_fail", /* REGNODE_MAX +0x04 */
601 "EVAL_postponed_AB", /* REGNODE_MAX +0x05 */
602 "EVAL_postponed_AB_fail", /* REGNODE_MAX +0x06 */
603 "CURLYX_end", /* REGNODE_MAX +0x07 */
604 "CURLYX_end_fail", /* REGNODE_MAX +0x08 */
605 "WHILEM_A_pre", /* REGNODE_MAX +0x09 */
606 "WHILEM_A_pre_fail", /* REGNODE_MAX +0x0a */
607 "WHILEM_A_min", /* REGNODE_MAX +0x0b */
608 "WHILEM_A_min_fail", /* REGNODE_MAX +0x0c */
609 "WHILEM_A_max", /* REGNODE_MAX +0x0d */
610 "WHILEM_A_max_fail", /* REGNODE_MAX +0x0e */
611 "WHILEM_B_min", /* REGNODE_MAX +0x0f */
612 "WHILEM_B_min_fail", /* REGNODE_MAX +0x10 */
613 "WHILEM_B_max", /* REGNODE_MAX +0x11 */
614 "WHILEM_B_max_fail", /* REGNODE_MAX +0x12 */
615 "BRANCH_next", /* REGNODE_MAX +0x13 */
616 "BRANCH_next_fail", /* REGNODE_MAX +0x14 */
617 "CURLYM_A", /* REGNODE_MAX +0x15 */
618 "CURLYM_A_fail", /* REGNODE_MAX +0x16 */
619 "CURLYM_B", /* REGNODE_MAX +0x17 */
620 "CURLYM_B_fail", /* REGNODE_MAX +0x18 */
621 "IFMATCH_A", /* REGNODE_MAX +0x19 */
622 "IFMATCH_A_fail", /* REGNODE_MAX +0x1a */
623 "CURLY_B_min_known", /* REGNODE_MAX +0x1b */
624 "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1c */
625 "CURLY_B_min", /* REGNODE_MAX +0x1d */
626 "CURLY_B_min_fail", /* REGNODE_MAX +0x1e */
627 "CURLY_B_max", /* REGNODE_MAX +0x1f */
628 "CURLY_B_max_fail", /* REGNODE_MAX +0x20 */
629 "COMMIT_next", /* REGNODE_MAX +0x21 */
630 "COMMIT_next_fail", /* REGNODE_MAX +0x22 */
631 "MARKPOINT_next", /* REGNODE_MAX +0x23 */
632 "MARKPOINT_next_fail", /* REGNODE_MAX +0x24 */
633 "SKIP_next", /* REGNODE_MAX +0x25 */
634 "SKIP_next_fail", /* REGNODE_MAX +0x26 */
635 "CUTGROUP_next", /* REGNODE_MAX +0x27 */
636 "CUTGROUP_next_fail", /* REGNODE_MAX +0x28 */
637 "KEEPS_next", /* REGNODE_MAX +0x29 */
638 "KEEPS_next_fail", /* REGNODE_MAX +0x2a */
885f9e59 639};
22429478 640#endif /* DOINIT */
d09b2d29 641
f7819f85
A
642/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
643
644#ifndef DOINIT
645EXTCONST char * PL_reg_extflags_name[];
646#else
647EXTCONST char * const PL_reg_extflags_name[] = {
d262c0c7 648 /* Bits in extflags defined: 11111111111111110000111111111111 */
52d81aa8
NC
649 "MULTILINE", /* 0x00000001 */
650 "SINGLELINE", /* 0x00000002 */
651 "FOLD", /* 0x00000004 */
652 "EXTENDED", /* 0x00000008 */
334afb3e 653 "EXTENDED_MORE", /* 0x00000010 */
e3b64d84
KW
654 "NOCAPTURE", /* 0x00000020 */
655 "KEEPCOPY", /* 0x00000040 */
656 "CHARSET0", /* 0x00000080 : "CHARSET" - 0x00000380 */
657 "CHARSET1", /* 0x00000100 : "CHARSET" - 0x00000380 */
658 "CHARSET2", /* 0x00000200 : "CHARSET" - 0x00000380 */
d262c0c7
KW
659 "STRICT", /* 0x00000400 */
660 "SPLIT", /* 0x00000800 */
1d32d911
KW
661 "UNUSED_BIT_12", /* 0x00001000 */
662 "UNUSED_BIT_13", /* 0x00002000 */
663 "UNUSED_BIT_14", /* 0x00004000 */
a3b51d37
KW
664 "UNUSED_BIT_15", /* 0x00008000 */
665 "NO_INPLACE_SUBST", /* 0x00010000 */
666 "EVAL_SEEN", /* 0x00020000 */
ee273784 667 "UNBOUNDED_QUANTIFIER_SEEN",/* 0x00040000 */
e795e964 668 "CHECK_ALL", /* 0x00080000 */
52d81aa8
NC
669 "MATCH_UTF8", /* 0x00100000 */
670 "USE_INTUIT_NOML", /* 0x00200000 */
671 "USE_INTUIT_ML", /* 0x00400000 */
672 "INTUIT_TAIL", /* 0x00800000 */
a3b51d37 673 "IS_ANCHORED", /* 0x01000000 */
52d81aa8
NC
674 "COPY_DONE", /* 0x02000000 */
675 "TAINTED_SEEN", /* 0x04000000 */
676 "TAINTED", /* 0x08000000 */
677 "START_ONLY", /* 0x10000000 */
dbc200c5 678 "SKIPWHITE", /* 0x20000000 */
52d81aa8
NC
679 "WHITE", /* 0x40000000 */
680 "NULL", /* 0x80000000 */
f7819f85
A
681};
682#endif /* DOINIT */
683
adc2d0c9
JH
684#ifdef DEBUGGING
685# define REG_EXTFLAGS_NAME_SIZE 32
686#endif
687
337ff307
YO
688/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
689
690#ifndef DOINIT
691EXTCONST char * PL_reg_intflags_name[];
692#else
693EXTCONST char * const PL_reg_intflags_name[] = {
b8f6efdd
YO
694 "SKIP", /* 0x00000001 - PREGf_SKIP */
695 "IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
696 "NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
697 "VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
698 "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
699 "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
58430ea8 700 "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
58430ea8
YO
701 "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
702 "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
d3d47aac
YO
703 "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
704 "ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */
705 "ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */
d5a00e4a 706 "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */
337ff307
YO
707};
708#endif /* DOINIT */
709
adc2d0c9 710#ifdef DEBUGGING
d5a00e4a 711# define REG_INTFLAGS_NAME_SIZE 13
adc2d0c9
JH
712#endif
713
f9ef50a7 714/* The following have no fixed length. U8 so we can do strchr() on it. */
ded4dd2a 715#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 716
f9ef50a7 717#ifndef DOINIT
ded4dd2a 718EXTCONST U8 PL_varies[] __attribute__deprecated__;
f9ef50a7 719#else
ded4dd2a 720EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
62e6ef33
AC
721 CLUMP, BRANCH, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF,
722 REFF, REFFL, REFFU, REFFA, NREF, NREFF, NREFFL, NREFFU, NREFFA,
d3d47aac 723 BRANCHJ, SUSPEND, IFTHEN,
f9ef50a7
NC
724 0
725};
726#endif /* DOINIT */
727
ded4dd2a
NC
728#ifndef DOINIT
729EXTCONST U8 PL_varies_bitmask[];
730#else
731EXTCONST U8 PL_varies_bitmask[] = {
ac44c12e 732 0x00, 0x00, 0x00, 0x60, 0x00, 0xFC, 0xF9, 0x5F, 0x06, 0x00, 0x00, 0x00
ded4dd2a
NC
733};
734#endif /* DOINIT */
735
f9ef50a7
NC
736/* The following always have a length of 1. U8 we can do strchr() on it. */
737/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
ded4dd2a 738#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 739
f9ef50a7 740#ifndef DOINIT
ded4dd2a 741EXTCONST U8 PL_simple[] __attribute__deprecated__;
f9ef50a7 742#else
ded4dd2a 743EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
ac44c12e
KW
744 REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, POSIXD, POSIXL, POSIXU, POSIXA,
745 NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA,
f9ef50a7
NC
746 0
747};
748#endif /* DOINIT */
749
ded4dd2a
NC
750#ifndef DOINIT
751EXTCONST U8 PL_simple_bitmask[];
752#else
753EXTCONST U8 PL_simple_bitmask[] = {
ac44c12e 754 0x00, 0x00, 0xFF, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
ded4dd2a
NC
755};
756#endif /* DOINIT */
757
37442d52 758/* ex: set ro: */