This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Revamp qr/[...]/ optimizations
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
eb2624c9 3 This file is built by regen/regcomp.pl from regcomp.sym.
d09b2d29 4 Any changes made here will be lost!
78102347 5 */
d09b2d29 6
6bda09f9
YO
7/* Regops and State definitions */
8
3e6e81fa
KW
9#define REGNODE_MAX 100
10#define REGMATCH_STATE_MAX 140
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
d3d47aac
YO
14#define SBOL 2 /* 0x02 Match "" at beginning of line: /^/, /\A/ */
15#define BOL 2 /* 0x02 type alias */
16#define MBOL 3 /* 0x03 Same, assuming multiline: /^/m */
17#define SEOL 4 /* 0x04 Match "" at end of line: /$/ */
18#define EOL 4 /* 0x04 type alias */
19#define MEOL 5 /* 0x05 Same, assuming multiline: /$/m */
20#define EOS 6 /* 0x06 Match "" at end of string: /\z/ */
21#define GPOS 7 /* 0x07 Matches where last m//g left off. */
c440a570
KW
22#define BOUND 8 /* 0x08 Like BOUNDA for non-utf8, otherwise match "" between any Unicode \w\W or \W\w */
23#define BOUNDL 9 /* 0x09 Like BOUND/BOUNDU, but \w and \W are defined by current locale */
64935bc6 24#define BOUNDU 10 /* 0x0a Match "" at any boundary of a given type using Unicode rules */
c440a570
KW
25#define BOUNDA 11 /* 0x0b Match "" at any boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] */
26#define NBOUND 12 /* 0x0c Like NBOUNDA for non-utf8, otherwise match "" between any Unicode \w\w or \W\W */
27#define NBOUNDL 13 /* 0x0d Like NBOUND/NBOUNDU, but \w and \W are defined by current locale */
28#define NBOUNDU 14 /* 0x0e Match "" at any non-boundary of a given type using using Unicode rules */
29#define NBOUNDA 15 /* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */
d3d47aac
YO
30#define REG_ANY 16 /* 0x10 Match any one character (except newline). */
31#define SANY 17 /* 0x11 Match any one character. */
33c28ab2 32#define ANYOF 18 /* 0x12 Match character in (or not in) this class, single char match only */
ac44c12e
KW
33#define ANYOFD 19 /* 0x13 Like ANYOF, but /d is in effect */
34#define ANYOFL 20 /* 0x14 Like ANYOF, but /l is in effect */
3edce4f5
KW
35#define ANYOFPOSIXL 21 /* 0x15 Like ANYOFL, but matches [[:posix:]] classes */
36#define ANYOFM 22 /* 0x16 Like ANYOF, but matches an invariant byte as determined by the mask and arg */
3db0bccc
KW
37#define NANYOFM 23 /* 0x17 complement of ANYOFM */
38#define POSIXD 24 /* 0x18 Some [[:class:]] under /d; the FLAGS field gives which one */
39#define POSIXL 25 /* 0x19 Some [[:class:]] under /l; the FLAGS field gives which one */
40#define POSIXU 26 /* 0x1a Some [[:class:]] under /u; the FLAGS field gives which one */
41#define POSIXA 27 /* 0x1b Some [[:class:]] under /a; the FLAGS field gives which one */
42#define NPOSIXD 28 /* 0x1c complement of POSIXD, [[:^class:]] */
43#define NPOSIXL 29 /* 0x1d complement of POSIXL, [[:^class:]] */
44#define NPOSIXU 30 /* 0x1e complement of POSIXU, [[:^class:]] */
45#define NPOSIXA 31 /* 0x1f complement of POSIXA, [[:^class:]] */
3e6e81fa
KW
46#define CLUMP 32 /* 0x20 Match any extended grapheme cluster sequence */
47#define BRANCH 33 /* 0x21 Match this alternative, or the next... */
48#define EXACT 34 /* 0x22 Match this string (preceded by length). */
49#define EXACTL 35 /* 0x23 Like EXACT, but /l is in effect (used so locale-related warnings can be checked for). */
50#define EXACTF 36 /* 0x24 Match this string using /id rules (w/len); (string not UTF-8, not guaranteed to be folded). */
51#define EXACTFL 37 /* 0x25 Match this string using /il rules (w/len); (string not guaranteed to be folded). */
52#define EXACTFU 38 /* 0x26 Match this string using /iu rules (w/len); (string folded iff in UTF-8; non-UTF8 folded length <= unfolded). */
53#define EXACTFAA 39 /* 0x27 Match this string using /iaa rules (w/len) (string folded iff in UTF-8; non-UTF8 folded length <= unfolded). */
54#define EXACTFUP 40 /* 0x28 Match this string using /iu rules (w/len); (string not UTF-8, not guaranteed to be folded; and its Problematic). */
55#define EXACTFLU8 41 /* 0x29 Like EXACTFU, but use /il, UTF-8, folded, and everything in it is above 255. */
56#define EXACTFAA_NO_TRIE 42 /* 0x2a Match this string using /iaa rules (w/len) (string not UTF-8, not guaranteed to be folded, not currently trie-able). */
57#define EXACT_ONLY8 43 /* 0x2b Like EXACT, but only UTF-8 encoded targets can match */
58#define EXACTFU_ONLY8 44 /* 0x2c Like EXACTFU, but only UTF-8 encoded targets can match */
59#define EXACTFU_S_EDGE 45 /* 0x2d /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only). */
60#define NOTHING 46 /* 0x2e Match empty string. */
61#define TAIL 47 /* 0x2f Match empty string. Can jump here from outside. */
62#define STAR 48 /* 0x30 Match this (simple) thing 0 or more times. */
63#define PLUS 49 /* 0x31 Match this (simple) thing 1 or more times. */
64#define CURLY 50 /* 0x32 Match this simple thing {n,m} times. */
65#define CURLYN 51 /* 0x33 Capture next-after-this simple thing */
66#define CURLYM 52 /* 0x34 Capture this medium-complex thing {n,m} times. */
67#define CURLYX 53 /* 0x35 Match this complex thing {n,m} times. */
68#define WHILEM 54 /* 0x36 Do curly processing and see if rest matches. */
69#define OPEN 55 /* 0x37 Mark this point in input as start of #n. */
70#define CLOSE 56 /* 0x38 Close corresponding OPEN of #n. */
71#define SROPEN 57 /* 0x39 Same as OPEN, but for script run */
72#define SRCLOSE 58 /* 0x3a Close preceding SROPEN */
73#define REF 59 /* 0x3b Match some already matched string */
74#define REFF 60 /* 0x3c Match already matched string, folded using native charset rules for non-utf8 */
75#define REFFL 61 /* 0x3d Match already matched string, folded in loc. */
76#define REFFU 62 /* 0x3e Match already matched string, folded using unicode rules for non-utf8 */
77#define REFFA 63 /* 0x3f Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
78#define NREF 64 /* 0x40 Match some already matched string */
79#define NREFF 65 /* 0x41 Match already matched string, folded using native charset rules for non-utf8 */
80#define NREFFL 66 /* 0x42 Match already matched string, folded in loc. */
81#define NREFFU 67 /* 0x43 Match already matched string, folded using unicode rules for non-utf8 */
82#define NREFFA 68 /* 0x44 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
83#define LONGJMP 69 /* 0x45 Jump far away. */
84#define BRANCHJ 70 /* 0x46 BRANCH with long offset. */
85#define IFMATCH 71 /* 0x47 Succeeds if the following matches. */
86#define UNLESSM 72 /* 0x48 Fails if the following matches. */
87#define SUSPEND 73 /* 0x49 "Independent" sub-RE. */
88#define IFTHEN 74 /* 0x4a Switch, should be preceded by switcher. */
89#define GROUPP 75 /* 0x4b Whether the group matched. */
90#define EVAL 76 /* 0x4c Execute some Perl code. */
91#define MINMOD 77 /* 0x4d Next operator is not greedy. */
92#define LOGICAL 78 /* 0x4e Next opcode should set the flag only. */
93#define RENUM 79 /* 0x4f Group with independently numbered parens. */
94#define TRIE 80 /* 0x50 Match many EXACT(F[ALU]?)? at once. flags==type */
95#define TRIEC 81 /* 0x51 Same as TRIE, but with embedded charclass data */
96#define AHOCORASICK 82 /* 0x52 Aho Corasick stclass. flags==type */
97#define AHOCORASICKC 83 /* 0x53 Same as AHOCORASICK, but with embedded charclass data */
98#define GOSUB 84 /* 0x54 recurse to paren arg1 at (signed) ofs arg2 */
99#define NGROUPP 85 /* 0x55 Whether the group matched. */
100#define INSUBP 86 /* 0x56 Whether we are in a specific recurse. */
101#define DEFINEP 87 /* 0x57 Never execute directly. */
102#define ENDLIKE 88 /* 0x58 Used only for the type field of verbs */
103#define OPFAIL 89 /* 0x59 Same as (?!), but with verb arg */
104#define ACCEPT 90 /* 0x5a Accepts the current matched string, with verbar */
105#define VERB 91 /* 0x5b Used only for the type field of verbs */
106#define PRUNE 92 /* 0x5c Pattern fails at this startpoint if no-backtracking through this */
107#define MARKPOINT 93 /* 0x5d Push the current location for rollback by cut. */
108#define SKIP 94 /* 0x5e On failure skip forward (to the mark) before retrying */
109#define COMMIT 95 /* 0x5f Pattern fails outright if backtracking through this */
110#define CUTGROUP 96 /* 0x60 On failure go to the next alternation in the group */
111#define KEEPS 97 /* 0x61 $& begins here. */
112#define LNBREAK 98 /* 0x62 generic newline pattern */
113#define OPTIMIZED 99 /* 0x63 Placeholder for dump. */
114#define PSEUDO 100 /* 0x64 Pseudo opcode for internal use. */
03363afd 115 /* ------------ States ------------- */
24b23f37
YO
116#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
117#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
4ee16520
DM
118#define EVAL_B (REGNODE_MAX + 3) /* state for EVAL */
119#define EVAL_B_fail (REGNODE_MAX + 4) /* state for EVAL */
120#define EVAL_postponed_AB (REGNODE_MAX + 5) /* state for EVAL */
121#define EVAL_postponed_AB_fail (REGNODE_MAX + 6) /* state for EVAL */
122#define CURLYX_end (REGNODE_MAX + 7) /* state for CURLYX */
123#define CURLYX_end_fail (REGNODE_MAX + 8) /* state for CURLYX */
124#define WHILEM_A_pre (REGNODE_MAX + 9) /* state for WHILEM */
125#define WHILEM_A_pre_fail (REGNODE_MAX + 10) /* state for WHILEM */
126#define WHILEM_A_min (REGNODE_MAX + 11) /* state for WHILEM */
127#define WHILEM_A_min_fail (REGNODE_MAX + 12) /* state for WHILEM */
128#define WHILEM_A_max (REGNODE_MAX + 13) /* state for WHILEM */
129#define WHILEM_A_max_fail (REGNODE_MAX + 14) /* state for WHILEM */
130#define WHILEM_B_min (REGNODE_MAX + 15) /* state for WHILEM */
131#define WHILEM_B_min_fail (REGNODE_MAX + 16) /* state for WHILEM */
132#define WHILEM_B_max (REGNODE_MAX + 17) /* state for WHILEM */
133#define WHILEM_B_max_fail (REGNODE_MAX + 18) /* state for WHILEM */
134#define BRANCH_next (REGNODE_MAX + 19) /* state for BRANCH */
135#define BRANCH_next_fail (REGNODE_MAX + 20) /* state for BRANCH */
136#define CURLYM_A (REGNODE_MAX + 21) /* state for CURLYM */
137#define CURLYM_A_fail (REGNODE_MAX + 22) /* state for CURLYM */
138#define CURLYM_B (REGNODE_MAX + 23) /* state for CURLYM */
139#define CURLYM_B_fail (REGNODE_MAX + 24) /* state for CURLYM */
140#define IFMATCH_A (REGNODE_MAX + 25) /* state for IFMATCH */
141#define IFMATCH_A_fail (REGNODE_MAX + 26) /* state for IFMATCH */
21cbe009
DM
142#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
143#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
144#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
145#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
146#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
147#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
148#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
149#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
150#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */
151#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */
152#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */
153#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */
154#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */
155#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */
03363afd 156
6bda09f9 157/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
158
159#ifndef DOINIT
22c35a8c 160EXTCONST U8 PL_regkind[];
d09b2d29 161#else
22c35a8c 162EXTCONST U8 PL_regkind[] = {
e2e6a0f1
YO
163 END, /* END */
164 END, /* SUCCEED */
e2e6a0f1 165 BOL, /* SBOL */
d3d47aac 166 BOL, /* MBOL */
e2e6a0f1 167 EOL, /* SEOL */
d3d47aac
YO
168 EOL, /* MEOL */
169 EOL, /* EOS */
170 GPOS, /* GPOS */
e2e6a0f1
YO
171 BOUND, /* BOUND */
172 BOUND, /* BOUNDL */
1e355c70 173 BOUND, /* BOUNDU */
0c6e81eb 174 BOUND, /* BOUNDA */
e2e6a0f1
YO
175 NBOUND, /* NBOUND */
176 NBOUND, /* NBOUNDL */
1e355c70 177 NBOUND, /* NBOUNDU */
0c6e81eb 178 NBOUND, /* NBOUNDA */
e2e6a0f1
YO
179 REG_ANY, /* REG_ANY */
180 REG_ANY, /* SANY */
e2e6a0f1 181 ANYOF, /* ANYOF */
ac44c12e 182 ANYOF, /* ANYOFD */
a4525e78 183 ANYOF, /* ANYOFL */
3edce4f5 184 ANYOF, /* ANYOFPOSIXL */
67a1b5f9 185 ANYOFM, /* ANYOFM */
3db0bccc 186 ANYOFM, /* NANYOFM */
3615ea58
KW
187 POSIXD, /* POSIXD */
188 POSIXD, /* POSIXL */
189 POSIXD, /* POSIXU */
190 POSIXD, /* POSIXA */
9e84774b
KW
191 NPOSIXD, /* NPOSIXD */
192 NPOSIXD, /* NPOSIXL */
193 NPOSIXD, /* NPOSIXU */
194 NPOSIXD, /* NPOSIXA */
e2e6a0f1
YO
195 CLUMP, /* CLUMP */
196 BRANCH, /* BRANCH */
e2e6a0f1 197 EXACT, /* EXACT */
a4525e78 198 EXACT, /* EXACTL */
e2e6a0f1
YO
199 EXACT, /* EXACTF */
200 EXACT, /* EXACTFL */
01f98ec2 201 EXACT, /* EXACTFU */
89829bb5 202 EXACT, /* EXACTFAA */
627a7895 203 EXACT, /* EXACTFUP */
a4525e78 204 EXACT, /* EXACTFLU8 */
89829bb5 205 EXACT, /* EXACTFAA_NO_TRIE */
f6b4b99d 206 EXACT, /* EXACT_ONLY8 */
a9f8c7ac 207 EXACT, /* EXACTFU_ONLY8 */
95fb0a6e 208 EXACT, /* EXACTFU_S_EDGE */
e2e6a0f1
YO
209 NOTHING, /* NOTHING */
210 NOTHING, /* TAIL */
211 STAR, /* STAR */
212 PLUS, /* PLUS */
213 CURLY, /* CURLY */
214 CURLY, /* CURLYN */
215 CURLY, /* CURLYM */
216 CURLY, /* CURLYX */
217 WHILEM, /* WHILEM */
218 OPEN, /* OPEN */
219 CLOSE, /* CLOSE */
07093db4
KW
220 SROPEN, /* SROPEN */
221 SRCLOSE, /* SRCLOSE */
e2e6a0f1
YO
222 REF, /* REF */
223 REF, /* REFF */
224 REF, /* REFFL */
01f98ec2 225 REF, /* REFFU */
781aab5c 226 REF, /* REFFA */
01f98ec2
KW
227 REF, /* NREF */
228 REF, /* NREFF */
229 REF, /* NREFFL */
230 REF, /* NREFFU */
781aab5c 231 REF, /* NREFFA */
d3d47aac
YO
232 LONGJMP, /* LONGJMP */
233 BRANCHJ, /* BRANCHJ */
e2e6a0f1
YO
234 BRANCHJ, /* IFMATCH */
235 BRANCHJ, /* UNLESSM */
236 BRANCHJ, /* SUSPEND */
237 BRANCHJ, /* IFTHEN */
238 GROUPP, /* GROUPP */
e2e6a0f1
YO
239 EVAL, /* EVAL */
240 MINMOD, /* MINMOD */
241 LOGICAL, /* LOGICAL */
242 BRANCHJ, /* RENUM */
243 TRIE, /* TRIE */
244 TRIE, /* TRIEC */
245 TRIE, /* AHOCORASICK */
246 TRIE, /* AHOCORASICKC */
247 GOSUB, /* GOSUB */
e2e6a0f1
YO
248 NGROUPP, /* NGROUPP */
249 INSUBP, /* INSUBP */
250 DEFINEP, /* DEFINEP */
251 ENDLIKE, /* ENDLIKE */
252 ENDLIKE, /* OPFAIL */
253 ENDLIKE, /* ACCEPT */
254 VERB, /* VERB */
5d458dd8 255 VERB, /* PRUNE */
e2e6a0f1 256 VERB, /* MARKPOINT */
5d458dd8 257 VERB, /* SKIP */
e2e6a0f1 258 VERB, /* COMMIT */
5d458dd8 259 VERB, /* CUTGROUP */
ee9b8eae 260 KEEPS, /* KEEPS */
e1d1eefb 261 LNBREAK, /* LNBREAK */
e2e6a0f1
YO
262 NOTHING, /* OPTIMIZED */
263 PSEUDO, /* PSEUDO */
03363afd 264 /* ------------ States ------------- */
e2e6a0f1
YO
265 TRIE, /* TRIE_next */
266 TRIE, /* TRIE_next_fail */
4ee16520
DM
267 EVAL, /* EVAL_B */
268 EVAL, /* EVAL_B_fail */
269 EVAL, /* EVAL_postponed_AB */
270 EVAL, /* EVAL_postponed_AB_fail */
e2e6a0f1
YO
271 CURLYX, /* CURLYX_end */
272 CURLYX, /* CURLYX_end_fail */
273 WHILEM, /* WHILEM_A_pre */
274 WHILEM, /* WHILEM_A_pre_fail */
275 WHILEM, /* WHILEM_A_min */
276 WHILEM, /* WHILEM_A_min_fail */
277 WHILEM, /* WHILEM_A_max */
278 WHILEM, /* WHILEM_A_max_fail */
279 WHILEM, /* WHILEM_B_min */
280 WHILEM, /* WHILEM_B_min_fail */
281 WHILEM, /* WHILEM_B_max */
282 WHILEM, /* WHILEM_B_max_fail */
283 BRANCH, /* BRANCH_next */
284 BRANCH, /* BRANCH_next_fail */
285 CURLYM, /* CURLYM_A */
286 CURLYM, /* CURLYM_A_fail */
287 CURLYM, /* CURLYM_B */
288 CURLYM, /* CURLYM_B_fail */
289 IFMATCH, /* IFMATCH_A */
290 IFMATCH, /* IFMATCH_A_fail */
e2e6a0f1
YO
291 CURLY, /* CURLY_B_min */
292 CURLY, /* CURLY_B_min_fail */
293 CURLY, /* CURLY_B_max */
294 CURLY, /* CURLY_B_max_fail */
295 COMMIT, /* COMMIT_next */
296 COMMIT, /* COMMIT_next_fail */
297 MARKPOINT, /* MARKPOINT_next */
298 MARKPOINT, /* MARKPOINT_next_fail */
5d458dd8
YO
299 SKIP, /* SKIP_next */
300 SKIP, /* SKIP_next_fail */
301 CUTGROUP, /* CUTGROUP_next */
302 CUTGROUP, /* CUTGROUP_next_fail */
ee9b8eae
YO
303 KEEPS, /* KEEPS_next */
304 KEEPS, /* KEEPS_next_fail */
d09b2d29
IZ
305};
306#endif
307
f83e001e
YO
308#ifdef REG_COMP_C
309
6bda09f9 310/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29 311
29de9391 312static const U8 regarglen[] = {
03363afd
YO
313 0, /* END */
314 0, /* SUCCEED */
03363afd 315 0, /* SBOL */
d3d47aac 316 0, /* MBOL */
03363afd 317 0, /* SEOL */
d3d47aac
YO
318 0, /* MEOL */
319 0, /* EOS */
320 0, /* GPOS */
03363afd
YO
321 0, /* BOUND */
322 0, /* BOUNDL */
1e355c70 323 0, /* BOUNDU */
0c6e81eb 324 0, /* BOUNDA */
03363afd
YO
325 0, /* NBOUND */
326 0, /* NBOUNDL */
1e355c70 327 0, /* NBOUNDU */
0c6e81eb 328 0, /* NBOUNDA */
03363afd
YO
329 0, /* REG_ANY */
330 0, /* SANY */
46fc0c43
KW
331 EXTRA_SIZE(struct regnode_charclass), /* ANYOF */
332 EXTRA_SIZE(struct regnode_charclass), /* ANYOFD */
333 EXTRA_SIZE(struct regnode_charclass), /* ANYOFL */
334 EXTRA_SIZE(struct regnode_charclass_posixl), /* ANYOFPOSIXL */
67a1b5f9 335 EXTRA_SIZE(struct regnode_1), /* ANYOFM */
3db0bccc 336 EXTRA_SIZE(struct regnode_1), /* NANYOFM */
3615ea58
KW
337 0, /* POSIXD */
338 0, /* POSIXL */
339 0, /* POSIXU */
340 0, /* POSIXA */
341 0, /* NPOSIXD */
342 0, /* NPOSIXL */
343 0, /* NPOSIXU */
344 0, /* NPOSIXA */
03363afd
YO
345 0, /* CLUMP */
346 0, /* BRANCH */
03363afd 347 0, /* EXACT */
a4525e78 348 0, /* EXACTL */
03363afd
YO
349 0, /* EXACTF */
350 0, /* EXACTFL */
01f98ec2 351 0, /* EXACTFU */
89829bb5 352 0, /* EXACTFAA */
627a7895 353 0, /* EXACTFUP */
a4525e78 354 0, /* EXACTFLU8 */
89829bb5 355 0, /* EXACTFAA_NO_TRIE */
f6b4b99d 356 0, /* EXACT_ONLY8 */
a9f8c7ac 357 0, /* EXACTFU_ONLY8 */
95fb0a6e 358 0, /* EXACTFU_S_EDGE */
03363afd
YO
359 0, /* NOTHING */
360 0, /* TAIL */
361 0, /* STAR */
362 0, /* PLUS */
363 EXTRA_SIZE(struct regnode_2), /* CURLY */
364 EXTRA_SIZE(struct regnode_2), /* CURLYN */
365 EXTRA_SIZE(struct regnode_2), /* CURLYM */
366 EXTRA_SIZE(struct regnode_2), /* CURLYX */
367 0, /* WHILEM */
368 EXTRA_SIZE(struct regnode_1), /* OPEN */
369 EXTRA_SIZE(struct regnode_1), /* CLOSE */
07093db4
KW
370 0, /* SROPEN */
371 0, /* SRCLOSE */
03363afd
YO
372 EXTRA_SIZE(struct regnode_1), /* REF */
373 EXTRA_SIZE(struct regnode_1), /* REFF */
374 EXTRA_SIZE(struct regnode_1), /* REFFL */
01f98ec2 375 EXTRA_SIZE(struct regnode_1), /* REFFU */
781aab5c 376 EXTRA_SIZE(struct regnode_1), /* REFFA */
01f98ec2
KW
377 EXTRA_SIZE(struct regnode_1), /* NREF */
378 EXTRA_SIZE(struct regnode_1), /* NREFF */
379 EXTRA_SIZE(struct regnode_1), /* NREFFL */
380 EXTRA_SIZE(struct regnode_1), /* NREFFU */
781aab5c 381 EXTRA_SIZE(struct regnode_1), /* NREFFA */
d3d47aac
YO
382 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
383 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
03363afd
YO
384 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
385 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
386 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
387 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
388 EXTRA_SIZE(struct regnode_1), /* GROUPP */
13f27704 389 EXTRA_SIZE(struct regnode_2L), /* EVAL */
03363afd
YO
390 0, /* MINMOD */
391 0, /* LOGICAL */
392 EXTRA_SIZE(struct regnode_1), /* RENUM */
393 EXTRA_SIZE(struct regnode_1), /* TRIE */
394 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
395 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
396 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
1a147d38 397 EXTRA_SIZE(struct regnode_2L), /* GOSUB */
0a4db386 398 EXTRA_SIZE(struct regnode_1), /* NGROUPP */
1a147d38 399 EXTRA_SIZE(struct regnode_1), /* INSUBP */
0a4db386 400 EXTRA_SIZE(struct regnode_1), /* DEFINEP */
e2e6a0f1 401 0, /* ENDLIKE */
fee50582
YO
402 EXTRA_SIZE(struct regnode_1), /* OPFAIL */
403 EXTRA_SIZE(struct regnode_2L), /* ACCEPT */
20832bc5 404 EXTRA_SIZE(struct regnode_1), /* VERB */
5d458dd8 405 EXTRA_SIZE(struct regnode_1), /* PRUNE */
e2e6a0f1 406 EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
5d458dd8 407 EXTRA_SIZE(struct regnode_1), /* SKIP */
e2e6a0f1 408 EXTRA_SIZE(struct regnode_1), /* COMMIT */
5d458dd8 409 EXTRA_SIZE(struct regnode_1), /* CUTGROUP */
ee9b8eae 410 0, /* KEEPS */
e1d1eefb 411 0, /* LNBREAK */
03363afd
YO
412 0, /* OPTIMIZED */
413 0, /* PSEUDO */
d09b2d29
IZ
414};
415
6bda09f9
YO
416/* reg_off_by_arg[] - Which argument holds the offset to the next node */
417
29de9391 418static const char reg_off_by_arg[] = {
03363afd
YO
419 0, /* END */
420 0, /* SUCCEED */
03363afd 421 0, /* SBOL */
d3d47aac 422 0, /* MBOL */
03363afd 423 0, /* SEOL */
d3d47aac
YO
424 0, /* MEOL */
425 0, /* EOS */
426 0, /* GPOS */
03363afd
YO
427 0, /* BOUND */
428 0, /* BOUNDL */
1e355c70 429 0, /* BOUNDU */
0c6e81eb 430 0, /* BOUNDA */
03363afd
YO
431 0, /* NBOUND */
432 0, /* NBOUNDL */
1e355c70 433 0, /* NBOUNDU */
0c6e81eb 434 0, /* NBOUNDA */
03363afd
YO
435 0, /* REG_ANY */
436 0, /* SANY */
03363afd 437 0, /* ANYOF */
ac44c12e 438 0, /* ANYOFD */
a4525e78 439 0, /* ANYOFL */
3edce4f5 440 0, /* ANYOFPOSIXL */
67a1b5f9 441 0, /* ANYOFM */
3db0bccc 442 0, /* NANYOFM */
3615ea58
KW
443 0, /* POSIXD */
444 0, /* POSIXL */
445 0, /* POSIXU */
446 0, /* POSIXA */
447 0, /* NPOSIXD */
448 0, /* NPOSIXL */
449 0, /* NPOSIXU */
450 0, /* NPOSIXA */
03363afd
YO
451 0, /* CLUMP */
452 0, /* BRANCH */
03363afd 453 0, /* EXACT */
a4525e78 454 0, /* EXACTL */
03363afd
YO
455 0, /* EXACTF */
456 0, /* EXACTFL */
01f98ec2 457 0, /* EXACTFU */
89829bb5 458 0, /* EXACTFAA */
627a7895 459 0, /* EXACTFUP */
a4525e78 460 0, /* EXACTFLU8 */
89829bb5 461 0, /* EXACTFAA_NO_TRIE */
f6b4b99d 462 0, /* EXACT_ONLY8 */
a9f8c7ac 463 0, /* EXACTFU_ONLY8 */
95fb0a6e 464 0, /* EXACTFU_S_EDGE */
03363afd
YO
465 0, /* NOTHING */
466 0, /* TAIL */
467 0, /* STAR */
468 0, /* PLUS */
469 0, /* CURLY */
470 0, /* CURLYN */
471 0, /* CURLYM */
472 0, /* CURLYX */
473 0, /* WHILEM */
474 0, /* OPEN */
475 0, /* CLOSE */
07093db4
KW
476 0, /* SROPEN */
477 0, /* SRCLOSE */
03363afd
YO
478 0, /* REF */
479 0, /* REFF */
480 0, /* REFFL */
01f98ec2 481 0, /* REFFU */
781aab5c 482 0, /* REFFA */
01f98ec2
KW
483 0, /* NREF */
484 0, /* NREFF */
485 0, /* NREFFL */
486 0, /* NREFFU */
781aab5c 487 0, /* NREFFA */
d3d47aac
YO
488 1, /* LONGJMP */
489 1, /* BRANCHJ */
46167d76
KW
490 1, /* IFMATCH */
491 1, /* UNLESSM */
03363afd
YO
492 1, /* SUSPEND */
493 1, /* IFTHEN */
494 0, /* GROUPP */
03363afd
YO
495 0, /* EVAL */
496 0, /* MINMOD */
497 0, /* LOGICAL */
498 1, /* RENUM */
499 0, /* TRIE */
500 0, /* TRIEC */
501 0, /* AHOCORASICK */
502 0, /* AHOCORASICKC */
1a147d38 503 0, /* GOSUB */
0a4db386 504 0, /* NGROUPP */
1a147d38 505 0, /* INSUBP */
0a4db386 506 0, /* DEFINEP */
e2e6a0f1 507 0, /* ENDLIKE */
7f69552c 508 0, /* OPFAIL */
e2e6a0f1
YO
509 0, /* ACCEPT */
510 0, /* VERB */
5d458dd8 511 0, /* PRUNE */
e2e6a0f1 512 0, /* MARKPOINT */
5d458dd8 513 0, /* SKIP */
e2e6a0f1 514 0, /* COMMIT */
5d458dd8 515 0, /* CUTGROUP */
ee9b8eae 516 0, /* KEEPS */
e1d1eefb 517 0, /* LNBREAK */
03363afd
YO
518 0, /* OPTIMIZED */
519 0, /* PSEUDO */
d09b2d29 520};
885f9e59 521
13d6edb4
NC
522#endif /* REG_COMP_C */
523
f83e001e 524
6bda09f9
YO
525/* reg_name[] - Opcode/state names in string form, for debugging */
526
22429478 527#ifndef DOINIT
13d6edb4 528EXTCONST char * PL_reg_name[];
22429478 529#else
4764e399 530EXTCONST char * const PL_reg_name[] = {
03363afd
YO
531 "END", /* 0000 */
532 "SUCCEED", /* 0x01 */
d3d47aac 533 "SBOL", /* 0x02 */
03363afd 534 "MBOL", /* 0x03 */
d3d47aac
YO
535 "SEOL", /* 0x04 */
536 "MEOL", /* 0x05 */
537 "EOS", /* 0x06 */
538 "GPOS", /* 0x07 */
539 "BOUND", /* 0x08 */
540 "BOUNDL", /* 0x09 */
541 "BOUNDU", /* 0x0a */
542 "BOUNDA", /* 0x0b */
543 "NBOUND", /* 0x0c */
544 "NBOUNDL", /* 0x0d */
545 "NBOUNDU", /* 0x0e */
546 "NBOUNDA", /* 0x0f */
547 "REG_ANY", /* 0x10 */
548 "SANY", /* 0x11 */
33c28ab2 549 "ANYOF", /* 0x12 */
ac44c12e
KW
550 "ANYOFD", /* 0x13 */
551 "ANYOFL", /* 0x14 */
3edce4f5
KW
552 "ANYOFPOSIXL", /* 0x15 */
553 "ANYOFM", /* 0x16 */
3db0bccc
KW
554 "NANYOFM", /* 0x17 */
555 "POSIXD", /* 0x18 */
556 "POSIXL", /* 0x19 */
557 "POSIXU", /* 0x1a */
558 "POSIXA", /* 0x1b */
559 "NPOSIXD", /* 0x1c */
560 "NPOSIXL", /* 0x1d */
561 "NPOSIXU", /* 0x1e */
562 "NPOSIXA", /* 0x1f */
3e6e81fa
KW
563 "CLUMP", /* 0x20 */
564 "BRANCH", /* 0x21 */
565 "EXACT", /* 0x22 */
566 "EXACTL", /* 0x23 */
567 "EXACTF", /* 0x24 */
568 "EXACTFL", /* 0x25 */
569 "EXACTFU", /* 0x26 */
570 "EXACTFAA", /* 0x27 */
571 "EXACTFUP", /* 0x28 */
572 "EXACTFLU8", /* 0x29 */
573 "EXACTFAA_NO_TRIE", /* 0x2a */
574 "EXACT_ONLY8", /* 0x2b */
575 "EXACTFU_ONLY8", /* 0x2c */
576 "EXACTFU_S_EDGE", /* 0x2d */
577 "NOTHING", /* 0x2e */
578 "TAIL", /* 0x2f */
579 "STAR", /* 0x30 */
580 "PLUS", /* 0x31 */
581 "CURLY", /* 0x32 */
582 "CURLYN", /* 0x33 */
583 "CURLYM", /* 0x34 */
584 "CURLYX", /* 0x35 */
585 "WHILEM", /* 0x36 */
586 "OPEN", /* 0x37 */
587 "CLOSE", /* 0x38 */
588 "SROPEN", /* 0x39 */
589 "SRCLOSE", /* 0x3a */
590 "REF", /* 0x3b */
591 "REFF", /* 0x3c */
592 "REFFL", /* 0x3d */
593 "REFFU", /* 0x3e */
594 "REFFA", /* 0x3f */
595 "NREF", /* 0x40 */
596 "NREFF", /* 0x41 */
597 "NREFFL", /* 0x42 */
598 "NREFFU", /* 0x43 */
599 "NREFFA", /* 0x44 */
600 "LONGJMP", /* 0x45 */
601 "BRANCHJ", /* 0x46 */
602 "IFMATCH", /* 0x47 */
603 "UNLESSM", /* 0x48 */
604 "SUSPEND", /* 0x49 */
605 "IFTHEN", /* 0x4a */
606 "GROUPP", /* 0x4b */
607 "EVAL", /* 0x4c */
608 "MINMOD", /* 0x4d */
609 "LOGICAL", /* 0x4e */
610 "RENUM", /* 0x4f */
611 "TRIE", /* 0x50 */
612 "TRIEC", /* 0x51 */
613 "AHOCORASICK", /* 0x52 */
614 "AHOCORASICKC", /* 0x53 */
615 "GOSUB", /* 0x54 */
616 "NGROUPP", /* 0x55 */
617 "INSUBP", /* 0x56 */
618 "DEFINEP", /* 0x57 */
619 "ENDLIKE", /* 0x58 */
620 "OPFAIL", /* 0x59 */
621 "ACCEPT", /* 0x5a */
622 "VERB", /* 0x5b */
623 "PRUNE", /* 0x5c */
624 "MARKPOINT", /* 0x5d */
625 "SKIP", /* 0x5e */
626 "COMMIT", /* 0x5f */
627 "CUTGROUP", /* 0x60 */
628 "KEEPS", /* 0x61 */
629 "LNBREAK", /* 0x62 */
630 "OPTIMIZED", /* 0x63 */
631 "PSEUDO", /* 0x64 */
03363afd 632 /* ------------ States ------------- */
24b23f37
YO
633 "TRIE_next", /* REGNODE_MAX +0x01 */
634 "TRIE_next_fail", /* REGNODE_MAX +0x02 */
4ee16520
DM
635 "EVAL_B", /* REGNODE_MAX +0x03 */
636 "EVAL_B_fail", /* REGNODE_MAX +0x04 */
637 "EVAL_postponed_AB", /* REGNODE_MAX +0x05 */
638 "EVAL_postponed_AB_fail", /* REGNODE_MAX +0x06 */
639 "CURLYX_end", /* REGNODE_MAX +0x07 */
640 "CURLYX_end_fail", /* REGNODE_MAX +0x08 */
641 "WHILEM_A_pre", /* REGNODE_MAX +0x09 */
642 "WHILEM_A_pre_fail", /* REGNODE_MAX +0x0a */
643 "WHILEM_A_min", /* REGNODE_MAX +0x0b */
644 "WHILEM_A_min_fail", /* REGNODE_MAX +0x0c */
645 "WHILEM_A_max", /* REGNODE_MAX +0x0d */
646 "WHILEM_A_max_fail", /* REGNODE_MAX +0x0e */
647 "WHILEM_B_min", /* REGNODE_MAX +0x0f */
648 "WHILEM_B_min_fail", /* REGNODE_MAX +0x10 */
649 "WHILEM_B_max", /* REGNODE_MAX +0x11 */
650 "WHILEM_B_max_fail", /* REGNODE_MAX +0x12 */
651 "BRANCH_next", /* REGNODE_MAX +0x13 */
652 "BRANCH_next_fail", /* REGNODE_MAX +0x14 */
653 "CURLYM_A", /* REGNODE_MAX +0x15 */
654 "CURLYM_A_fail", /* REGNODE_MAX +0x16 */
655 "CURLYM_B", /* REGNODE_MAX +0x17 */
656 "CURLYM_B_fail", /* REGNODE_MAX +0x18 */
657 "IFMATCH_A", /* REGNODE_MAX +0x19 */
658 "IFMATCH_A_fail", /* REGNODE_MAX +0x1a */
21cbe009
DM
659 "CURLY_B_min", /* REGNODE_MAX +0x1b */
660 "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
661 "CURLY_B_max", /* REGNODE_MAX +0x1d */
662 "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
663 "COMMIT_next", /* REGNODE_MAX +0x1f */
664 "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
665 "MARKPOINT_next", /* REGNODE_MAX +0x21 */
666 "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
667 "SKIP_next", /* REGNODE_MAX +0x23 */
668 "SKIP_next_fail", /* REGNODE_MAX +0x24 */
669 "CUTGROUP_next", /* REGNODE_MAX +0x25 */
670 "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
671 "KEEPS_next", /* REGNODE_MAX +0x27 */
672 "KEEPS_next_fail", /* REGNODE_MAX +0x28 */
885f9e59 673};
22429478 674#endif /* DOINIT */
d09b2d29 675
f7819f85
A
676/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
677
678#ifndef DOINIT
679EXTCONST char * PL_reg_extflags_name[];
680#else
681EXTCONST char * const PL_reg_extflags_name[] = {
d262c0c7 682 /* Bits in extflags defined: 11111111111111110000111111111111 */
52d81aa8
NC
683 "MULTILINE", /* 0x00000001 */
684 "SINGLELINE", /* 0x00000002 */
685 "FOLD", /* 0x00000004 */
686 "EXTENDED", /* 0x00000008 */
334afb3e 687 "EXTENDED_MORE", /* 0x00000010 */
e3b64d84
KW
688 "NOCAPTURE", /* 0x00000020 */
689 "KEEPCOPY", /* 0x00000040 */
690 "CHARSET0", /* 0x00000080 : "CHARSET" - 0x00000380 */
691 "CHARSET1", /* 0x00000100 : "CHARSET" - 0x00000380 */
692 "CHARSET2", /* 0x00000200 : "CHARSET" - 0x00000380 */
d262c0c7
KW
693 "STRICT", /* 0x00000400 */
694 "SPLIT", /* 0x00000800 */
1d32d911
KW
695 "UNUSED_BIT_12", /* 0x00001000 */
696 "UNUSED_BIT_13", /* 0x00002000 */
697 "UNUSED_BIT_14", /* 0x00004000 */
a3b51d37
KW
698 "UNUSED_BIT_15", /* 0x00008000 */
699 "NO_INPLACE_SUBST", /* 0x00010000 */
700 "EVAL_SEEN", /* 0x00020000 */
ee273784 701 "UNBOUNDED_QUANTIFIER_SEEN",/* 0x00040000 */
e795e964 702 "CHECK_ALL", /* 0x00080000 */
52d81aa8
NC
703 "MATCH_UTF8", /* 0x00100000 */
704 "USE_INTUIT_NOML", /* 0x00200000 */
705 "USE_INTUIT_ML", /* 0x00400000 */
706 "INTUIT_TAIL", /* 0x00800000 */
a3b51d37 707 "IS_ANCHORED", /* 0x01000000 */
52d81aa8
NC
708 "COPY_DONE", /* 0x02000000 */
709 "TAINTED_SEEN", /* 0x04000000 */
710 "TAINTED", /* 0x08000000 */
711 "START_ONLY", /* 0x10000000 */
dbc200c5 712 "SKIPWHITE", /* 0x20000000 */
52d81aa8
NC
713 "WHITE", /* 0x40000000 */
714 "NULL", /* 0x80000000 */
f7819f85
A
715};
716#endif /* DOINIT */
717
adc2d0c9
JH
718#ifdef DEBUGGING
719# define REG_EXTFLAGS_NAME_SIZE 32
720#endif
721
337ff307
YO
722/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
723
724#ifndef DOINIT
725EXTCONST char * PL_reg_intflags_name[];
726#else
727EXTCONST char * const PL_reg_intflags_name[] = {
b8f6efdd
YO
728 "SKIP", /* 0x00000001 - PREGf_SKIP */
729 "IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
730 "NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
731 "VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
732 "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
733 "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
58430ea8 734 "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
58430ea8
YO
735 "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
736 "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
d3d47aac
YO
737 "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
738 "ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */
739 "ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */
d5a00e4a 740 "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */
337ff307
YO
741};
742#endif /* DOINIT */
743
adc2d0c9 744#ifdef DEBUGGING
d5a00e4a 745# define REG_INTFLAGS_NAME_SIZE 13
adc2d0c9
JH
746#endif
747
f9ef50a7 748/* The following have no fixed length. U8 so we can do strchr() on it. */
ded4dd2a 749#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 750
f9ef50a7 751#ifndef DOINIT
ded4dd2a 752EXTCONST U8 PL_varies[] __attribute__deprecated__;
f9ef50a7 753#else
ded4dd2a 754EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
62e6ef33
AC
755 CLUMP, BRANCH, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF,
756 REFF, REFFL, REFFU, REFFA, NREF, NREFF, NREFFL, NREFFU, NREFFA,
d3d47aac 757 BRANCHJ, SUSPEND, IFTHEN,
f9ef50a7
NC
758 0
759};
760#endif /* DOINIT */
761
ded4dd2a
NC
762#ifndef DOINIT
763EXTCONST U8 PL_varies_bitmask[];
764#else
765EXTCONST U8 PL_varies_bitmask[] = {
3e6e81fa 766 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7F, 0xF8, 0x5F, 0x06, 0x00, 0x00, 0x00
ded4dd2a
NC
767};
768#endif /* DOINIT */
769
f9ef50a7
NC
770/* The following always have a length of 1. U8 we can do strchr() on it. */
771/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
ded4dd2a 772#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 773
f9ef50a7 774#ifndef DOINIT
ded4dd2a 775EXTCONST U8 PL_simple[] __attribute__deprecated__;
f9ef50a7 776#else
ded4dd2a 777EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
3db0bccc
KW
778 REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFPOSIXL, ANYOFM, NANYOFM,
779 POSIXD, POSIXL, POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA,
f9ef50a7
NC
780 0
781};
782#endif /* DOINIT */
783
ded4dd2a
NC
784#ifndef DOINIT
785EXTCONST U8 PL_simple_bitmask[];
786#else
787EXTCONST U8 PL_simple_bitmask[] = {
3e6e81fa 788 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
ded4dd2a
NC
789};
790#endif /* DOINIT */
791
37442d52 792/* ex: set ro: */