This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.sym: Note specialized use of 'flags' in 2 OPs
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
eb2624c9 3 This file is built by regen/regcomp.pl from regcomp.sym.
d09b2d29 4 Any changes made here will be lost!
78102347 5 */
d09b2d29 6
6bda09f9
YO
7/* Regops and State definitions */
8
c316b824
KW
9#define REGNODE_MAX 101
10#define REGMATCH_STATE_MAX 141
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
d3d47aac
YO
14#define SBOL 2 /* 0x02 Match "" at beginning of line: /^/, /\A/ */
15#define BOL 2 /* 0x02 type alias */
16#define MBOL 3 /* 0x03 Same, assuming multiline: /^/m */
17#define SEOL 4 /* 0x04 Match "" at end of line: /$/ */
18#define EOL 4 /* 0x04 type alias */
19#define MEOL 5 /* 0x05 Same, assuming multiline: /$/m */
20#define EOS 6 /* 0x06 Match "" at end of string: /\z/ */
21#define GPOS 7 /* 0x07 Matches where last m//g left off. */
c440a570
KW
22#define BOUND 8 /* 0x08 Like BOUNDA for non-utf8, otherwise match "" between any Unicode \w\W or \W\w */
23#define BOUNDL 9 /* 0x09 Like BOUND/BOUNDU, but \w and \W are defined by current locale */
64935bc6 24#define BOUNDU 10 /* 0x0a Match "" at any boundary of a given type using Unicode rules */
c440a570
KW
25#define BOUNDA 11 /* 0x0b Match "" at any boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] */
26#define NBOUND 12 /* 0x0c Like NBOUNDA for non-utf8, otherwise match "" between any Unicode \w\w or \W\W */
27#define NBOUNDL 13 /* 0x0d Like NBOUND/NBOUNDU, but \w and \W are defined by current locale */
28#define NBOUNDU 14 /* 0x0e Match "" at any non-boundary of a given type using using Unicode rules */
29#define NBOUNDA 15 /* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */
d3d47aac
YO
30#define REG_ANY 16 /* 0x10 Match any one character (except newline). */
31#define SANY 17 /* 0x11 Match any one character. */
33c28ab2 32#define ANYOF 18 /* 0x12 Match character in (or not in) this class, single char match only */
ac44c12e
KW
33#define ANYOFD 19 /* 0x13 Like ANYOF, but /d is in effect */
34#define ANYOFL 20 /* 0x14 Like ANYOF, but /l is in effect */
3edce4f5 35#define ANYOFPOSIXL 21 /* 0x15 Like ANYOFL, but matches [[:posix:]] classes */
c316b824
KW
36#define ANYOFH 22 /* 0x16 Like ANYOF, but only has "High" matches, none in the bitmap */
37#define ANYOFM 23 /* 0x17 Like ANYOF, but matches an invariant byte as determined by the mask and arg */
38#define NANYOFM 24 /* 0x18 complement of ANYOFM */
39#define POSIXD 25 /* 0x19 Some [[:class:]] under /d; the FLAGS field gives which one */
40#define POSIXL 26 /* 0x1a Some [[:class:]] under /l; the FLAGS field gives which one */
41#define POSIXU 27 /* 0x1b Some [[:class:]] under /u; the FLAGS field gives which one */
42#define POSIXA 28 /* 0x1c Some [[:class:]] under /a; the FLAGS field gives which one */
43#define NPOSIXD 29 /* 0x1d complement of POSIXD, [[:^class:]] */
44#define NPOSIXL 30 /* 0x1e complement of POSIXL, [[:^class:]] */
45#define NPOSIXU 31 /* 0x1f complement of POSIXU, [[:^class:]] */
46#define NPOSIXA 32 /* 0x20 complement of POSIXA, [[:^class:]] */
47#define CLUMP 33 /* 0x21 Match any extended grapheme cluster sequence */
48#define BRANCH 34 /* 0x22 Match this alternative, or the next... */
49#define EXACT 35 /* 0x23 Match this string (preceded by length). */
50#define EXACTL 36 /* 0x24 Like EXACT, but /l is in effect (used so locale-related warnings can be checked for). */
51#define EXACTF 37 /* 0x25 Match this string using /id rules (w/len); (string not UTF-8, not guaranteed to be folded). */
52#define EXACTFL 38 /* 0x26 Match this string using /il rules (w/len); (string not guaranteed to be folded). */
53#define EXACTFU 39 /* 0x27 Match this string using /iu rules (w/len); (string folded iff in UTF-8; non-UTF8 folded length <= unfolded). */
54#define EXACTFAA 40 /* 0x28 Match this string using /iaa rules (w/len) (string folded iff in UTF-8; non-UTF8 folded length <= unfolded). */
55#define EXACTFUP 41 /* 0x29 Match this string using /iu rules (w/len); (string not UTF-8, not guaranteed to be folded; and its Problematic). */
56#define EXACTFLU8 42 /* 0x2a Like EXACTFU, but use /il, UTF-8, folded, and everything in it is above 255. */
57#define EXACTFAA_NO_TRIE 43 /* 0x2b Match this string using /iaa rules (w/len) (string not UTF-8, not guaranteed to be folded, not currently trie-able). */
58#define EXACT_ONLY8 44 /* 0x2c Like EXACT, but only UTF-8 encoded targets can match */
59#define EXACTFU_ONLY8 45 /* 0x2d Like EXACTFU, but only UTF-8 encoded targets can match */
60#define EXACTFU_S_EDGE 46 /* 0x2e /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only). */
61#define NOTHING 47 /* 0x2f Match empty string. */
62#define TAIL 48 /* 0x30 Match empty string. Can jump here from outside. */
63#define STAR 49 /* 0x31 Match this (simple) thing 0 or more times. */
64#define PLUS 50 /* 0x32 Match this (simple) thing 1 or more times. */
65#define CURLY 51 /* 0x33 Match this simple thing {n,m} times. */
66#define CURLYN 52 /* 0x34 Capture next-after-this simple thing */
67#define CURLYM 53 /* 0x35 Capture this medium-complex thing {n,m} times. */
68#define CURLYX 54 /* 0x36 Match this complex thing {n,m} times. */
69#define WHILEM 55 /* 0x37 Do curly processing and see if rest matches. */
70#define OPEN 56 /* 0x38 Mark this point in input as start of #n. */
71#define CLOSE 57 /* 0x39 Close corresponding OPEN of #n. */
72#define SROPEN 58 /* 0x3a Same as OPEN, but for script run */
73#define SRCLOSE 59 /* 0x3b Close preceding SROPEN */
74#define REF 60 /* 0x3c Match some already matched string */
75#define REFF 61 /* 0x3d Match already matched string, folded using native charset rules for non-utf8 */
76#define REFFL 62 /* 0x3e Match already matched string, folded in loc. */
77#define REFFU 63 /* 0x3f Match already matched string, folded using unicode rules for non-utf8 */
78#define REFFA 64 /* 0x40 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
79#define NREF 65 /* 0x41 Match some already matched string */
80#define NREFF 66 /* 0x42 Match already matched string, folded using native charset rules for non-utf8 */
81#define NREFFL 67 /* 0x43 Match already matched string, folded in loc. */
82#define NREFFU 68 /* 0x44 Match already matched string, folded using unicode rules for non-utf8 */
83#define NREFFA 69 /* 0x45 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
84#define LONGJMP 70 /* 0x46 Jump far away. */
85#define BRANCHJ 71 /* 0x47 BRANCH with long offset. */
80101a2c
KW
86#define IFMATCH 72 /* 0x48 Succeeds if the following matches; non-zero flags "f" means lookbehind assertion starting "f" characters before current */
87#define UNLESSM 73 /* 0x49 Fails if the following matches; non-zero flags "f" means lookbehind assertion starting "f" characters before current */
c316b824
KW
88#define SUSPEND 74 /* 0x4a "Independent" sub-RE. */
89#define IFTHEN 75 /* 0x4b Switch, should be preceded by switcher. */
90#define GROUPP 76 /* 0x4c Whether the group matched. */
91#define EVAL 77 /* 0x4d Execute some Perl code. */
92#define MINMOD 78 /* 0x4e Next operator is not greedy. */
93#define LOGICAL 79 /* 0x4f Next opcode should set the flag only. */
94#define RENUM 80 /* 0x50 Group with independently numbered parens. */
95#define TRIE 81 /* 0x51 Match many EXACT(F[ALU]?)? at once. flags==type */
96#define TRIEC 82 /* 0x52 Same as TRIE, but with embedded charclass data */
97#define AHOCORASICK 83 /* 0x53 Aho Corasick stclass. flags==type */
98#define AHOCORASICKC 84 /* 0x54 Same as AHOCORASICK, but with embedded charclass data */
99#define GOSUB 85 /* 0x55 recurse to paren arg1 at (signed) ofs arg2 */
100#define NGROUPP 86 /* 0x56 Whether the group matched. */
101#define INSUBP 87 /* 0x57 Whether we are in a specific recurse. */
102#define DEFINEP 88 /* 0x58 Never execute directly. */
103#define ENDLIKE 89 /* 0x59 Used only for the type field of verbs */
104#define OPFAIL 90 /* 0x5a Same as (?!), but with verb arg */
105#define ACCEPT 91 /* 0x5b Accepts the current matched string, with verbar */
106#define VERB 92 /* 0x5c Used only for the type field of verbs */
107#define PRUNE 93 /* 0x5d Pattern fails at this startpoint if no-backtracking through this */
108#define MARKPOINT 94 /* 0x5e Push the current location for rollback by cut. */
109#define SKIP 95 /* 0x5f On failure skip forward (to the mark) before retrying */
110#define COMMIT 96 /* 0x60 Pattern fails outright if backtracking through this */
111#define CUTGROUP 97 /* 0x61 On failure go to the next alternation in the group */
112#define KEEPS 98 /* 0x62 $& begins here. */
113#define LNBREAK 99 /* 0x63 generic newline pattern */
114#define OPTIMIZED 100 /* 0x64 Placeholder for dump. */
115#define PSEUDO 101 /* 0x65 Pseudo opcode for internal use. */
03363afd 116 /* ------------ States ------------- */
24b23f37
YO
117#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
118#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
4ee16520
DM
119#define EVAL_B (REGNODE_MAX + 3) /* state for EVAL */
120#define EVAL_B_fail (REGNODE_MAX + 4) /* state for EVAL */
121#define EVAL_postponed_AB (REGNODE_MAX + 5) /* state for EVAL */
122#define EVAL_postponed_AB_fail (REGNODE_MAX + 6) /* state for EVAL */
123#define CURLYX_end (REGNODE_MAX + 7) /* state for CURLYX */
124#define CURLYX_end_fail (REGNODE_MAX + 8) /* state for CURLYX */
125#define WHILEM_A_pre (REGNODE_MAX + 9) /* state for WHILEM */
126#define WHILEM_A_pre_fail (REGNODE_MAX + 10) /* state for WHILEM */
127#define WHILEM_A_min (REGNODE_MAX + 11) /* state for WHILEM */
128#define WHILEM_A_min_fail (REGNODE_MAX + 12) /* state for WHILEM */
129#define WHILEM_A_max (REGNODE_MAX + 13) /* state for WHILEM */
130#define WHILEM_A_max_fail (REGNODE_MAX + 14) /* state for WHILEM */
131#define WHILEM_B_min (REGNODE_MAX + 15) /* state for WHILEM */
132#define WHILEM_B_min_fail (REGNODE_MAX + 16) /* state for WHILEM */
133#define WHILEM_B_max (REGNODE_MAX + 17) /* state for WHILEM */
134#define WHILEM_B_max_fail (REGNODE_MAX + 18) /* state for WHILEM */
135#define BRANCH_next (REGNODE_MAX + 19) /* state for BRANCH */
136#define BRANCH_next_fail (REGNODE_MAX + 20) /* state for BRANCH */
137#define CURLYM_A (REGNODE_MAX + 21) /* state for CURLYM */
138#define CURLYM_A_fail (REGNODE_MAX + 22) /* state for CURLYM */
139#define CURLYM_B (REGNODE_MAX + 23) /* state for CURLYM */
140#define CURLYM_B_fail (REGNODE_MAX + 24) /* state for CURLYM */
141#define IFMATCH_A (REGNODE_MAX + 25) /* state for IFMATCH */
142#define IFMATCH_A_fail (REGNODE_MAX + 26) /* state for IFMATCH */
21cbe009
DM
143#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
144#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
145#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
146#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
147#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
148#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
149#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
150#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
151#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */
152#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */
153#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */
154#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */
155#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */
156#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */
03363afd 157
6bda09f9 158/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
159
160#ifndef DOINIT
22c35a8c 161EXTCONST U8 PL_regkind[];
d09b2d29 162#else
22c35a8c 163EXTCONST U8 PL_regkind[] = {
e2e6a0f1
YO
164 END, /* END */
165 END, /* SUCCEED */
e2e6a0f1 166 BOL, /* SBOL */
d3d47aac 167 BOL, /* MBOL */
e2e6a0f1 168 EOL, /* SEOL */
d3d47aac
YO
169 EOL, /* MEOL */
170 EOL, /* EOS */
171 GPOS, /* GPOS */
e2e6a0f1
YO
172 BOUND, /* BOUND */
173 BOUND, /* BOUNDL */
1e355c70 174 BOUND, /* BOUNDU */
0c6e81eb 175 BOUND, /* BOUNDA */
e2e6a0f1
YO
176 NBOUND, /* NBOUND */
177 NBOUND, /* NBOUNDL */
1e355c70 178 NBOUND, /* NBOUNDU */
0c6e81eb 179 NBOUND, /* NBOUNDA */
e2e6a0f1
YO
180 REG_ANY, /* REG_ANY */
181 REG_ANY, /* SANY */
e2e6a0f1 182 ANYOF, /* ANYOF */
ac44c12e 183 ANYOF, /* ANYOFD */
a4525e78 184 ANYOF, /* ANYOFL */
3edce4f5 185 ANYOF, /* ANYOFPOSIXL */
c316b824 186 ANYOF, /* ANYOFH */
67a1b5f9 187 ANYOFM, /* ANYOFM */
3db0bccc 188 ANYOFM, /* NANYOFM */
3615ea58
KW
189 POSIXD, /* POSIXD */
190 POSIXD, /* POSIXL */
191 POSIXD, /* POSIXU */
192 POSIXD, /* POSIXA */
9e84774b
KW
193 NPOSIXD, /* NPOSIXD */
194 NPOSIXD, /* NPOSIXL */
195 NPOSIXD, /* NPOSIXU */
196 NPOSIXD, /* NPOSIXA */
e2e6a0f1
YO
197 CLUMP, /* CLUMP */
198 BRANCH, /* BRANCH */
e2e6a0f1 199 EXACT, /* EXACT */
a4525e78 200 EXACT, /* EXACTL */
e2e6a0f1
YO
201 EXACT, /* EXACTF */
202 EXACT, /* EXACTFL */
01f98ec2 203 EXACT, /* EXACTFU */
89829bb5 204 EXACT, /* EXACTFAA */
627a7895 205 EXACT, /* EXACTFUP */
a4525e78 206 EXACT, /* EXACTFLU8 */
89829bb5 207 EXACT, /* EXACTFAA_NO_TRIE */
f6b4b99d 208 EXACT, /* EXACT_ONLY8 */
a9f8c7ac 209 EXACT, /* EXACTFU_ONLY8 */
95fb0a6e 210 EXACT, /* EXACTFU_S_EDGE */
e2e6a0f1
YO
211 NOTHING, /* NOTHING */
212 NOTHING, /* TAIL */
213 STAR, /* STAR */
214 PLUS, /* PLUS */
215 CURLY, /* CURLY */
216 CURLY, /* CURLYN */
217 CURLY, /* CURLYM */
218 CURLY, /* CURLYX */
219 WHILEM, /* WHILEM */
220 OPEN, /* OPEN */
221 CLOSE, /* CLOSE */
07093db4
KW
222 SROPEN, /* SROPEN */
223 SRCLOSE, /* SRCLOSE */
e2e6a0f1
YO
224 REF, /* REF */
225 REF, /* REFF */
226 REF, /* REFFL */
01f98ec2 227 REF, /* REFFU */
781aab5c 228 REF, /* REFFA */
01f98ec2
KW
229 REF, /* NREF */
230 REF, /* NREFF */
231 REF, /* NREFFL */
232 REF, /* NREFFU */
781aab5c 233 REF, /* NREFFA */
d3d47aac
YO
234 LONGJMP, /* LONGJMP */
235 BRANCHJ, /* BRANCHJ */
e2e6a0f1
YO
236 BRANCHJ, /* IFMATCH */
237 BRANCHJ, /* UNLESSM */
238 BRANCHJ, /* SUSPEND */
239 BRANCHJ, /* IFTHEN */
240 GROUPP, /* GROUPP */
e2e6a0f1
YO
241 EVAL, /* EVAL */
242 MINMOD, /* MINMOD */
243 LOGICAL, /* LOGICAL */
244 BRANCHJ, /* RENUM */
245 TRIE, /* TRIE */
246 TRIE, /* TRIEC */
247 TRIE, /* AHOCORASICK */
248 TRIE, /* AHOCORASICKC */
249 GOSUB, /* GOSUB */
e2e6a0f1
YO
250 NGROUPP, /* NGROUPP */
251 INSUBP, /* INSUBP */
252 DEFINEP, /* DEFINEP */
253 ENDLIKE, /* ENDLIKE */
254 ENDLIKE, /* OPFAIL */
255 ENDLIKE, /* ACCEPT */
256 VERB, /* VERB */
5d458dd8 257 VERB, /* PRUNE */
e2e6a0f1 258 VERB, /* MARKPOINT */
5d458dd8 259 VERB, /* SKIP */
e2e6a0f1 260 VERB, /* COMMIT */
5d458dd8 261 VERB, /* CUTGROUP */
ee9b8eae 262 KEEPS, /* KEEPS */
e1d1eefb 263 LNBREAK, /* LNBREAK */
e2e6a0f1
YO
264 NOTHING, /* OPTIMIZED */
265 PSEUDO, /* PSEUDO */
03363afd 266 /* ------------ States ------------- */
e2e6a0f1
YO
267 TRIE, /* TRIE_next */
268 TRIE, /* TRIE_next_fail */
4ee16520
DM
269 EVAL, /* EVAL_B */
270 EVAL, /* EVAL_B_fail */
271 EVAL, /* EVAL_postponed_AB */
272 EVAL, /* EVAL_postponed_AB_fail */
e2e6a0f1
YO
273 CURLYX, /* CURLYX_end */
274 CURLYX, /* CURLYX_end_fail */
275 WHILEM, /* WHILEM_A_pre */
276 WHILEM, /* WHILEM_A_pre_fail */
277 WHILEM, /* WHILEM_A_min */
278 WHILEM, /* WHILEM_A_min_fail */
279 WHILEM, /* WHILEM_A_max */
280 WHILEM, /* WHILEM_A_max_fail */
281 WHILEM, /* WHILEM_B_min */
282 WHILEM, /* WHILEM_B_min_fail */
283 WHILEM, /* WHILEM_B_max */
284 WHILEM, /* WHILEM_B_max_fail */
285 BRANCH, /* BRANCH_next */
286 BRANCH, /* BRANCH_next_fail */
287 CURLYM, /* CURLYM_A */
288 CURLYM, /* CURLYM_A_fail */
289 CURLYM, /* CURLYM_B */
290 CURLYM, /* CURLYM_B_fail */
291 IFMATCH, /* IFMATCH_A */
292 IFMATCH, /* IFMATCH_A_fail */
e2e6a0f1
YO
293 CURLY, /* CURLY_B_min */
294 CURLY, /* CURLY_B_min_fail */
295 CURLY, /* CURLY_B_max */
296 CURLY, /* CURLY_B_max_fail */
297 COMMIT, /* COMMIT_next */
298 COMMIT, /* COMMIT_next_fail */
299 MARKPOINT, /* MARKPOINT_next */
300 MARKPOINT, /* MARKPOINT_next_fail */
5d458dd8
YO
301 SKIP, /* SKIP_next */
302 SKIP, /* SKIP_next_fail */
303 CUTGROUP, /* CUTGROUP_next */
304 CUTGROUP, /* CUTGROUP_next_fail */
ee9b8eae
YO
305 KEEPS, /* KEEPS_next */
306 KEEPS, /* KEEPS_next_fail */
d09b2d29
IZ
307};
308#endif
309
f83e001e
YO
310#ifdef REG_COMP_C
311
6bda09f9 312/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29 313
29de9391 314static const U8 regarglen[] = {
03363afd
YO
315 0, /* END */
316 0, /* SUCCEED */
03363afd 317 0, /* SBOL */
d3d47aac 318 0, /* MBOL */
03363afd 319 0, /* SEOL */
d3d47aac
YO
320 0, /* MEOL */
321 0, /* EOS */
322 0, /* GPOS */
03363afd
YO
323 0, /* BOUND */
324 0, /* BOUNDL */
1e355c70 325 0, /* BOUNDU */
0c6e81eb 326 0, /* BOUNDA */
03363afd
YO
327 0, /* NBOUND */
328 0, /* NBOUNDL */
1e355c70 329 0, /* NBOUNDU */
0c6e81eb 330 0, /* NBOUNDA */
03363afd
YO
331 0, /* REG_ANY */
332 0, /* SANY */
46fc0c43
KW
333 EXTRA_SIZE(struct regnode_charclass), /* ANYOF */
334 EXTRA_SIZE(struct regnode_charclass), /* ANYOFD */
335 EXTRA_SIZE(struct regnode_charclass), /* ANYOFL */
336 EXTRA_SIZE(struct regnode_charclass_posixl), /* ANYOFPOSIXL */
c316b824 337 EXTRA_SIZE(struct regnode_1), /* ANYOFH */
67a1b5f9 338 EXTRA_SIZE(struct regnode_1), /* ANYOFM */
3db0bccc 339 EXTRA_SIZE(struct regnode_1), /* NANYOFM */
3615ea58
KW
340 0, /* POSIXD */
341 0, /* POSIXL */
342 0, /* POSIXU */
343 0, /* POSIXA */
344 0, /* NPOSIXD */
345 0, /* NPOSIXL */
346 0, /* NPOSIXU */
347 0, /* NPOSIXA */
03363afd
YO
348 0, /* CLUMP */
349 0, /* BRANCH */
03363afd 350 0, /* EXACT */
a4525e78 351 0, /* EXACTL */
03363afd
YO
352 0, /* EXACTF */
353 0, /* EXACTFL */
01f98ec2 354 0, /* EXACTFU */
89829bb5 355 0, /* EXACTFAA */
627a7895 356 0, /* EXACTFUP */
a4525e78 357 0, /* EXACTFLU8 */
89829bb5 358 0, /* EXACTFAA_NO_TRIE */
f6b4b99d 359 0, /* EXACT_ONLY8 */
a9f8c7ac 360 0, /* EXACTFU_ONLY8 */
95fb0a6e 361 0, /* EXACTFU_S_EDGE */
03363afd
YO
362 0, /* NOTHING */
363 0, /* TAIL */
364 0, /* STAR */
365 0, /* PLUS */
366 EXTRA_SIZE(struct regnode_2), /* CURLY */
367 EXTRA_SIZE(struct regnode_2), /* CURLYN */
368 EXTRA_SIZE(struct regnode_2), /* CURLYM */
369 EXTRA_SIZE(struct regnode_2), /* CURLYX */
370 0, /* WHILEM */
371 EXTRA_SIZE(struct regnode_1), /* OPEN */
372 EXTRA_SIZE(struct regnode_1), /* CLOSE */
07093db4
KW
373 0, /* SROPEN */
374 0, /* SRCLOSE */
03363afd
YO
375 EXTRA_SIZE(struct regnode_1), /* REF */
376 EXTRA_SIZE(struct regnode_1), /* REFF */
377 EXTRA_SIZE(struct regnode_1), /* REFFL */
01f98ec2 378 EXTRA_SIZE(struct regnode_1), /* REFFU */
781aab5c 379 EXTRA_SIZE(struct regnode_1), /* REFFA */
01f98ec2
KW
380 EXTRA_SIZE(struct regnode_1), /* NREF */
381 EXTRA_SIZE(struct regnode_1), /* NREFF */
382 EXTRA_SIZE(struct regnode_1), /* NREFFL */
383 EXTRA_SIZE(struct regnode_1), /* NREFFU */
781aab5c 384 EXTRA_SIZE(struct regnode_1), /* NREFFA */
d3d47aac
YO
385 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
386 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
03363afd
YO
387 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
388 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
389 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
390 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
391 EXTRA_SIZE(struct regnode_1), /* GROUPP */
13f27704 392 EXTRA_SIZE(struct regnode_2L), /* EVAL */
03363afd
YO
393 0, /* MINMOD */
394 0, /* LOGICAL */
395 EXTRA_SIZE(struct regnode_1), /* RENUM */
396 EXTRA_SIZE(struct regnode_1), /* TRIE */
397 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
398 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
399 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
1a147d38 400 EXTRA_SIZE(struct regnode_2L), /* GOSUB */
0a4db386 401 EXTRA_SIZE(struct regnode_1), /* NGROUPP */
1a147d38 402 EXTRA_SIZE(struct regnode_1), /* INSUBP */
0a4db386 403 EXTRA_SIZE(struct regnode_1), /* DEFINEP */
e2e6a0f1 404 0, /* ENDLIKE */
fee50582
YO
405 EXTRA_SIZE(struct regnode_1), /* OPFAIL */
406 EXTRA_SIZE(struct regnode_2L), /* ACCEPT */
20832bc5 407 EXTRA_SIZE(struct regnode_1), /* VERB */
5d458dd8 408 EXTRA_SIZE(struct regnode_1), /* PRUNE */
e2e6a0f1 409 EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
5d458dd8 410 EXTRA_SIZE(struct regnode_1), /* SKIP */
e2e6a0f1 411 EXTRA_SIZE(struct regnode_1), /* COMMIT */
5d458dd8 412 EXTRA_SIZE(struct regnode_1), /* CUTGROUP */
ee9b8eae 413 0, /* KEEPS */
e1d1eefb 414 0, /* LNBREAK */
03363afd
YO
415 0, /* OPTIMIZED */
416 0, /* PSEUDO */
d09b2d29
IZ
417};
418
6bda09f9
YO
419/* reg_off_by_arg[] - Which argument holds the offset to the next node */
420
29de9391 421static const char reg_off_by_arg[] = {
03363afd
YO
422 0, /* END */
423 0, /* SUCCEED */
03363afd 424 0, /* SBOL */
d3d47aac 425 0, /* MBOL */
03363afd 426 0, /* SEOL */
d3d47aac
YO
427 0, /* MEOL */
428 0, /* EOS */
429 0, /* GPOS */
03363afd
YO
430 0, /* BOUND */
431 0, /* BOUNDL */
1e355c70 432 0, /* BOUNDU */
0c6e81eb 433 0, /* BOUNDA */
03363afd
YO
434 0, /* NBOUND */
435 0, /* NBOUNDL */
1e355c70 436 0, /* NBOUNDU */
0c6e81eb 437 0, /* NBOUNDA */
03363afd
YO
438 0, /* REG_ANY */
439 0, /* SANY */
03363afd 440 0, /* ANYOF */
ac44c12e 441 0, /* ANYOFD */
a4525e78 442 0, /* ANYOFL */
3edce4f5 443 0, /* ANYOFPOSIXL */
c316b824 444 0, /* ANYOFH */
67a1b5f9 445 0, /* ANYOFM */
3db0bccc 446 0, /* NANYOFM */
3615ea58
KW
447 0, /* POSIXD */
448 0, /* POSIXL */
449 0, /* POSIXU */
450 0, /* POSIXA */
451 0, /* NPOSIXD */
452 0, /* NPOSIXL */
453 0, /* NPOSIXU */
454 0, /* NPOSIXA */
03363afd
YO
455 0, /* CLUMP */
456 0, /* BRANCH */
03363afd 457 0, /* EXACT */
a4525e78 458 0, /* EXACTL */
03363afd
YO
459 0, /* EXACTF */
460 0, /* EXACTFL */
01f98ec2 461 0, /* EXACTFU */
89829bb5 462 0, /* EXACTFAA */
627a7895 463 0, /* EXACTFUP */
a4525e78 464 0, /* EXACTFLU8 */
89829bb5 465 0, /* EXACTFAA_NO_TRIE */
f6b4b99d 466 0, /* EXACT_ONLY8 */
a9f8c7ac 467 0, /* EXACTFU_ONLY8 */
95fb0a6e 468 0, /* EXACTFU_S_EDGE */
03363afd
YO
469 0, /* NOTHING */
470 0, /* TAIL */
471 0, /* STAR */
472 0, /* PLUS */
473 0, /* CURLY */
474 0, /* CURLYN */
475 0, /* CURLYM */
476 0, /* CURLYX */
477 0, /* WHILEM */
478 0, /* OPEN */
479 0, /* CLOSE */
07093db4
KW
480 0, /* SROPEN */
481 0, /* SRCLOSE */
03363afd
YO
482 0, /* REF */
483 0, /* REFF */
484 0, /* REFFL */
01f98ec2 485 0, /* REFFU */
781aab5c 486 0, /* REFFA */
01f98ec2
KW
487 0, /* NREF */
488 0, /* NREFF */
489 0, /* NREFFL */
490 0, /* NREFFU */
781aab5c 491 0, /* NREFFA */
d3d47aac
YO
492 1, /* LONGJMP */
493 1, /* BRANCHJ */
46167d76
KW
494 1, /* IFMATCH */
495 1, /* UNLESSM */
03363afd
YO
496 1, /* SUSPEND */
497 1, /* IFTHEN */
498 0, /* GROUPP */
03363afd
YO
499 0, /* EVAL */
500 0, /* MINMOD */
501 0, /* LOGICAL */
502 1, /* RENUM */
503 0, /* TRIE */
504 0, /* TRIEC */
505 0, /* AHOCORASICK */
506 0, /* AHOCORASICKC */
1a147d38 507 0, /* GOSUB */
0a4db386 508 0, /* NGROUPP */
1a147d38 509 0, /* INSUBP */
0a4db386 510 0, /* DEFINEP */
e2e6a0f1 511 0, /* ENDLIKE */
7f69552c 512 0, /* OPFAIL */
e2e6a0f1
YO
513 0, /* ACCEPT */
514 0, /* VERB */
5d458dd8 515 0, /* PRUNE */
e2e6a0f1 516 0, /* MARKPOINT */
5d458dd8 517 0, /* SKIP */
e2e6a0f1 518 0, /* COMMIT */
5d458dd8 519 0, /* CUTGROUP */
ee9b8eae 520 0, /* KEEPS */
e1d1eefb 521 0, /* LNBREAK */
03363afd
YO
522 0, /* OPTIMIZED */
523 0, /* PSEUDO */
d09b2d29 524};
885f9e59 525
13d6edb4
NC
526#endif /* REG_COMP_C */
527
f83e001e 528
6bda09f9
YO
529/* reg_name[] - Opcode/state names in string form, for debugging */
530
22429478 531#ifndef DOINIT
13d6edb4 532EXTCONST char * PL_reg_name[];
22429478 533#else
4764e399 534EXTCONST char * const PL_reg_name[] = {
03363afd
YO
535 "END", /* 0000 */
536 "SUCCEED", /* 0x01 */
d3d47aac 537 "SBOL", /* 0x02 */
03363afd 538 "MBOL", /* 0x03 */
d3d47aac
YO
539 "SEOL", /* 0x04 */
540 "MEOL", /* 0x05 */
541 "EOS", /* 0x06 */
542 "GPOS", /* 0x07 */
543 "BOUND", /* 0x08 */
544 "BOUNDL", /* 0x09 */
545 "BOUNDU", /* 0x0a */
546 "BOUNDA", /* 0x0b */
547 "NBOUND", /* 0x0c */
548 "NBOUNDL", /* 0x0d */
549 "NBOUNDU", /* 0x0e */
550 "NBOUNDA", /* 0x0f */
551 "REG_ANY", /* 0x10 */
552 "SANY", /* 0x11 */
33c28ab2 553 "ANYOF", /* 0x12 */
ac44c12e
KW
554 "ANYOFD", /* 0x13 */
555 "ANYOFL", /* 0x14 */
3edce4f5 556 "ANYOFPOSIXL", /* 0x15 */
c316b824
KW
557 "ANYOFH", /* 0x16 */
558 "ANYOFM", /* 0x17 */
559 "NANYOFM", /* 0x18 */
560 "POSIXD", /* 0x19 */
561 "POSIXL", /* 0x1a */
562 "POSIXU", /* 0x1b */
563 "POSIXA", /* 0x1c */
564 "NPOSIXD", /* 0x1d */
565 "NPOSIXL", /* 0x1e */
566 "NPOSIXU", /* 0x1f */
567 "NPOSIXA", /* 0x20 */
568 "CLUMP", /* 0x21 */
569 "BRANCH", /* 0x22 */
570 "EXACT", /* 0x23 */
571 "EXACTL", /* 0x24 */
572 "EXACTF", /* 0x25 */
573 "EXACTFL", /* 0x26 */
574 "EXACTFU", /* 0x27 */
575 "EXACTFAA", /* 0x28 */
576 "EXACTFUP", /* 0x29 */
577 "EXACTFLU8", /* 0x2a */
578 "EXACTFAA_NO_TRIE", /* 0x2b */
579 "EXACT_ONLY8", /* 0x2c */
580 "EXACTFU_ONLY8", /* 0x2d */
581 "EXACTFU_S_EDGE", /* 0x2e */
582 "NOTHING", /* 0x2f */
583 "TAIL", /* 0x30 */
584 "STAR", /* 0x31 */
585 "PLUS", /* 0x32 */
586 "CURLY", /* 0x33 */
587 "CURLYN", /* 0x34 */
588 "CURLYM", /* 0x35 */
589 "CURLYX", /* 0x36 */
590 "WHILEM", /* 0x37 */
591 "OPEN", /* 0x38 */
592 "CLOSE", /* 0x39 */
593 "SROPEN", /* 0x3a */
594 "SRCLOSE", /* 0x3b */
595 "REF", /* 0x3c */
596 "REFF", /* 0x3d */
597 "REFFL", /* 0x3e */
598 "REFFU", /* 0x3f */
599 "REFFA", /* 0x40 */
600 "NREF", /* 0x41 */
601 "NREFF", /* 0x42 */
602 "NREFFL", /* 0x43 */
603 "NREFFU", /* 0x44 */
604 "NREFFA", /* 0x45 */
605 "LONGJMP", /* 0x46 */
606 "BRANCHJ", /* 0x47 */
607 "IFMATCH", /* 0x48 */
608 "UNLESSM", /* 0x49 */
609 "SUSPEND", /* 0x4a */
610 "IFTHEN", /* 0x4b */
611 "GROUPP", /* 0x4c */
612 "EVAL", /* 0x4d */
613 "MINMOD", /* 0x4e */
614 "LOGICAL", /* 0x4f */
615 "RENUM", /* 0x50 */
616 "TRIE", /* 0x51 */
617 "TRIEC", /* 0x52 */
618 "AHOCORASICK", /* 0x53 */
619 "AHOCORASICKC", /* 0x54 */
620 "GOSUB", /* 0x55 */
621 "NGROUPP", /* 0x56 */
622 "INSUBP", /* 0x57 */
623 "DEFINEP", /* 0x58 */
624 "ENDLIKE", /* 0x59 */
625 "OPFAIL", /* 0x5a */
626 "ACCEPT", /* 0x5b */
627 "VERB", /* 0x5c */
628 "PRUNE", /* 0x5d */
629 "MARKPOINT", /* 0x5e */
630 "SKIP", /* 0x5f */
631 "COMMIT", /* 0x60 */
632 "CUTGROUP", /* 0x61 */
633 "KEEPS", /* 0x62 */
634 "LNBREAK", /* 0x63 */
635 "OPTIMIZED", /* 0x64 */
636 "PSEUDO", /* 0x65 */
03363afd 637 /* ------------ States ------------- */
24b23f37
YO
638 "TRIE_next", /* REGNODE_MAX +0x01 */
639 "TRIE_next_fail", /* REGNODE_MAX +0x02 */
4ee16520
DM
640 "EVAL_B", /* REGNODE_MAX +0x03 */
641 "EVAL_B_fail", /* REGNODE_MAX +0x04 */
642 "EVAL_postponed_AB", /* REGNODE_MAX +0x05 */
643 "EVAL_postponed_AB_fail", /* REGNODE_MAX +0x06 */
644 "CURLYX_end", /* REGNODE_MAX +0x07 */
645 "CURLYX_end_fail", /* REGNODE_MAX +0x08 */
646 "WHILEM_A_pre", /* REGNODE_MAX +0x09 */
647 "WHILEM_A_pre_fail", /* REGNODE_MAX +0x0a */
648 "WHILEM_A_min", /* REGNODE_MAX +0x0b */
649 "WHILEM_A_min_fail", /* REGNODE_MAX +0x0c */
650 "WHILEM_A_max", /* REGNODE_MAX +0x0d */
651 "WHILEM_A_max_fail", /* REGNODE_MAX +0x0e */
652 "WHILEM_B_min", /* REGNODE_MAX +0x0f */
653 "WHILEM_B_min_fail", /* REGNODE_MAX +0x10 */
654 "WHILEM_B_max", /* REGNODE_MAX +0x11 */
655 "WHILEM_B_max_fail", /* REGNODE_MAX +0x12 */
656 "BRANCH_next", /* REGNODE_MAX +0x13 */
657 "BRANCH_next_fail", /* REGNODE_MAX +0x14 */
658 "CURLYM_A", /* REGNODE_MAX +0x15 */
659 "CURLYM_A_fail", /* REGNODE_MAX +0x16 */
660 "CURLYM_B", /* REGNODE_MAX +0x17 */
661 "CURLYM_B_fail", /* REGNODE_MAX +0x18 */
662 "IFMATCH_A", /* REGNODE_MAX +0x19 */
663 "IFMATCH_A_fail", /* REGNODE_MAX +0x1a */
21cbe009
DM
664 "CURLY_B_min", /* REGNODE_MAX +0x1b */
665 "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
666 "CURLY_B_max", /* REGNODE_MAX +0x1d */
667 "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
668 "COMMIT_next", /* REGNODE_MAX +0x1f */
669 "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
670 "MARKPOINT_next", /* REGNODE_MAX +0x21 */
671 "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
672 "SKIP_next", /* REGNODE_MAX +0x23 */
673 "SKIP_next_fail", /* REGNODE_MAX +0x24 */
674 "CUTGROUP_next", /* REGNODE_MAX +0x25 */
675 "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
676 "KEEPS_next", /* REGNODE_MAX +0x27 */
677 "KEEPS_next_fail", /* REGNODE_MAX +0x28 */
885f9e59 678};
22429478 679#endif /* DOINIT */
d09b2d29 680
f7819f85
A
681/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
682
683#ifndef DOINIT
684EXTCONST char * PL_reg_extflags_name[];
685#else
686EXTCONST char * const PL_reg_extflags_name[] = {
d262c0c7 687 /* Bits in extflags defined: 11111111111111110000111111111111 */
52d81aa8
NC
688 "MULTILINE", /* 0x00000001 */
689 "SINGLELINE", /* 0x00000002 */
690 "FOLD", /* 0x00000004 */
691 "EXTENDED", /* 0x00000008 */
334afb3e 692 "EXTENDED_MORE", /* 0x00000010 */
e3b64d84
KW
693 "NOCAPTURE", /* 0x00000020 */
694 "KEEPCOPY", /* 0x00000040 */
695 "CHARSET0", /* 0x00000080 : "CHARSET" - 0x00000380 */
696 "CHARSET1", /* 0x00000100 : "CHARSET" - 0x00000380 */
697 "CHARSET2", /* 0x00000200 : "CHARSET" - 0x00000380 */
d262c0c7
KW
698 "STRICT", /* 0x00000400 */
699 "SPLIT", /* 0x00000800 */
1d32d911
KW
700 "UNUSED_BIT_12", /* 0x00001000 */
701 "UNUSED_BIT_13", /* 0x00002000 */
702 "UNUSED_BIT_14", /* 0x00004000 */
a3b51d37
KW
703 "UNUSED_BIT_15", /* 0x00008000 */
704 "NO_INPLACE_SUBST", /* 0x00010000 */
705 "EVAL_SEEN", /* 0x00020000 */
ee273784 706 "UNBOUNDED_QUANTIFIER_SEEN",/* 0x00040000 */
e795e964 707 "CHECK_ALL", /* 0x00080000 */
52d81aa8
NC
708 "MATCH_UTF8", /* 0x00100000 */
709 "USE_INTUIT_NOML", /* 0x00200000 */
710 "USE_INTUIT_ML", /* 0x00400000 */
711 "INTUIT_TAIL", /* 0x00800000 */
a3b51d37 712 "IS_ANCHORED", /* 0x01000000 */
52d81aa8
NC
713 "COPY_DONE", /* 0x02000000 */
714 "TAINTED_SEEN", /* 0x04000000 */
715 "TAINTED", /* 0x08000000 */
716 "START_ONLY", /* 0x10000000 */
dbc200c5 717 "SKIPWHITE", /* 0x20000000 */
52d81aa8
NC
718 "WHITE", /* 0x40000000 */
719 "NULL", /* 0x80000000 */
f7819f85
A
720};
721#endif /* DOINIT */
722
adc2d0c9
JH
723#ifdef DEBUGGING
724# define REG_EXTFLAGS_NAME_SIZE 32
725#endif
726
337ff307
YO
727/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
728
729#ifndef DOINIT
730EXTCONST char * PL_reg_intflags_name[];
731#else
732EXTCONST char * const PL_reg_intflags_name[] = {
b8f6efdd
YO
733 "SKIP", /* 0x00000001 - PREGf_SKIP */
734 "IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
735 "NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
736 "VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
737 "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
738 "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
58430ea8 739 "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
58430ea8
YO
740 "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
741 "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
d3d47aac
YO
742 "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
743 "ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */
744 "ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */
d5a00e4a 745 "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */
337ff307
YO
746};
747#endif /* DOINIT */
748
adc2d0c9 749#ifdef DEBUGGING
d5a00e4a 750# define REG_INTFLAGS_NAME_SIZE 13
adc2d0c9
JH
751#endif
752
f9ef50a7 753/* The following have no fixed length. U8 so we can do strchr() on it. */
ded4dd2a 754#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 755
f9ef50a7 756#ifndef DOINIT
ded4dd2a 757EXTCONST U8 PL_varies[] __attribute__deprecated__;
f9ef50a7 758#else
ded4dd2a 759EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
62e6ef33
AC
760 CLUMP, BRANCH, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF,
761 REFF, REFFL, REFFU, REFFA, NREF, NREFF, NREFFL, NREFFU, NREFFA,
d3d47aac 762 BRANCHJ, SUSPEND, IFTHEN,
f9ef50a7
NC
763 0
764};
765#endif /* DOINIT */
766
ded4dd2a
NC
767#ifndef DOINIT
768EXTCONST U8 PL_varies_bitmask[];
769#else
770EXTCONST U8 PL_varies_bitmask[] = {
c316b824 771 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0xFE, 0xF0, 0xBF, 0x0C, 0x00, 0x00, 0x00
ded4dd2a
NC
772};
773#endif /* DOINIT */
774
f9ef50a7
NC
775/* The following always have a length of 1. U8 we can do strchr() on it. */
776/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
ded4dd2a 777#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 778
f9ef50a7 779#ifndef DOINIT
ded4dd2a 780EXTCONST U8 PL_simple[] __attribute__deprecated__;
f9ef50a7 781#else
ded4dd2a 782EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
c316b824
KW
783 REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFPOSIXL, ANYOFH, ANYOFM,
784 NANYOFM, POSIXD, POSIXL, POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU,
785 NPOSIXA,
f9ef50a7
NC
786 0
787};
788#endif /* DOINIT */
789
ded4dd2a
NC
790#ifndef DOINIT
791EXTCONST U8 PL_simple_bitmask[];
792#else
793EXTCONST U8 PL_simple_bitmask[] = {
c316b824 794 0x00, 0x00, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
ded4dd2a
NC
795};
796#endif /* DOINIT */
797
37442d52 798/* ex: set ro: */