This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
t/re/reg_mesg.t: Add test
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
eb2624c9 3 This file is built by regen/regcomp.pl from regcomp.sym.
d09b2d29 4 Any changes made here will be lost!
78102347 5 */
d09b2d29 6
6bda09f9
YO
7/* Regops and State definitions */
8
3edce4f5
KW
9#define REGNODE_MAX 98
10#define REGMATCH_STATE_MAX 138
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
d3d47aac
YO
14#define SBOL 2 /* 0x02 Match "" at beginning of line: /^/, /\A/ */
15#define BOL 2 /* 0x02 type alias */
16#define MBOL 3 /* 0x03 Same, assuming multiline: /^/m */
17#define SEOL 4 /* 0x04 Match "" at end of line: /$/ */
18#define EOL 4 /* 0x04 type alias */
19#define MEOL 5 /* 0x05 Same, assuming multiline: /$/m */
20#define EOS 6 /* 0x06 Match "" at end of string: /\z/ */
21#define GPOS 7 /* 0x07 Matches where last m//g left off. */
c440a570
KW
22#define BOUND 8 /* 0x08 Like BOUNDA for non-utf8, otherwise match "" between any Unicode \w\W or \W\w */
23#define BOUNDL 9 /* 0x09 Like BOUND/BOUNDU, but \w and \W are defined by current locale */
64935bc6 24#define BOUNDU 10 /* 0x0a Match "" at any boundary of a given type using Unicode rules */
c440a570
KW
25#define BOUNDA 11 /* 0x0b Match "" at any boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] */
26#define NBOUND 12 /* 0x0c Like NBOUNDA for non-utf8, otherwise match "" between any Unicode \w\w or \W\W */
27#define NBOUNDL 13 /* 0x0d Like NBOUND/NBOUNDU, but \w and \W are defined by current locale */
28#define NBOUNDU 14 /* 0x0e Match "" at any non-boundary of a given type using using Unicode rules */
29#define NBOUNDA 15 /* 0x0f Match "" betweeen any \w\w or \W\W, where \w is [_a-zA-Z0-9] */
d3d47aac
YO
30#define REG_ANY 16 /* 0x10 Match any one character (except newline). */
31#define SANY 17 /* 0x11 Match any one character. */
33c28ab2 32#define ANYOF 18 /* 0x12 Match character in (or not in) this class, single char match only */
ac44c12e
KW
33#define ANYOFD 19 /* 0x13 Like ANYOF, but /d is in effect */
34#define ANYOFL 20 /* 0x14 Like ANYOF, but /l is in effect */
3edce4f5
KW
35#define ANYOFPOSIXL 21 /* 0x15 Like ANYOFL, but matches [[:posix:]] classes */
36#define ANYOFM 22 /* 0x16 Like ANYOF, but matches an invariant byte as determined by the mask and arg */
37#define POSIXD 23 /* 0x17 Some [[:class:]] under /d; the FLAGS field gives which one */
38#define POSIXL 24 /* 0x18 Some [[:class:]] under /l; the FLAGS field gives which one */
39#define POSIXU 25 /* 0x19 Some [[:class:]] under /u; the FLAGS field gives which one */
40#define POSIXA 26 /* 0x1a Some [[:class:]] under /a; the FLAGS field gives which one */
41#define NPOSIXD 27 /* 0x1b complement of POSIXD, [[:^class:]] */
42#define NPOSIXL 28 /* 0x1c complement of POSIXL, [[:^class:]] */
43#define NPOSIXU 29 /* 0x1d complement of POSIXU, [[:^class:]] */
44#define NPOSIXA 30 /* 0x1e complement of POSIXA, [[:^class:]] */
45#define ASCII 31 /* 0x1f [[:ascii:]] */
46#define NASCII 32 /* 0x20 [[:^ascii:]] */
47#define CLUMP 33 /* 0x21 Match any extended grapheme cluster sequence */
48#define BRANCH 34 /* 0x22 Match this alternative, or the next... */
49#define EXACT 35 /* 0x23 Match this string (preceded by length). */
50#define EXACTL 36 /* 0x24 Like EXACT, but /l is in effect (used so locale-related warnings can be checked for). */
51#define EXACTF 37 /* 0x25 Match this non-UTF-8 string (not guaranteed to be folded) using /id rules (w/len). */
52#define EXACTFL 38 /* 0x26 Match this string (not guaranteed to be folded) using /il rules (w/len). */
53#define EXACTFU 39 /* 0x27 Match this string (folded iff in UTF-8, length in folding doesn't change if not in UTF-8) using /iu rules (w/len). */
54#define EXACTFAA 40 /* 0x28 Match this string (not guaranteed to be folded) using /iaa rules (w/len). */
55#define EXACTFU_SS 41 /* 0x29 Match this string (folded iff in UTF-8, length in folding may change even if not in UTF-8) using /iu rules (w/len). */
56#define EXACTFLU8 42 /* 0x2a Rare circumstances: like EXACTFU, but is under /l, UTF-8, folded, and everything in it is above 255. */
57#define EXACTFAA_NO_TRIE 43 /* 0x2b Match this string (which is not trie-able; not guaranteed to be folded) using /iaa rules (w/len). */
58#define NOTHING 44 /* 0x2c Match empty string. */
59#define TAIL 45 /* 0x2d Match empty string. Can jump here from outside. */
60#define STAR 46 /* 0x2e Match this (simple) thing 0 or more times. */
61#define PLUS 47 /* 0x2f Match this (simple) thing 1 or more times. */
62#define CURLY 48 /* 0x30 Match this simple thing {n,m} times. */
63#define CURLYN 49 /* 0x31 Capture next-after-this simple thing */
64#define CURLYM 50 /* 0x32 Capture this medium-complex thing {n,m} times. */
65#define CURLYX 51 /* 0x33 Match this complex thing {n,m} times. */
66#define WHILEM 52 /* 0x34 Do curly processing and see if rest matches. */
67#define OPEN 53 /* 0x35 Mark this point in input as start of #n. */
68#define CLOSE 54 /* 0x36 Close corresponding OPEN of #n. */
69#define SROPEN 55 /* 0x37 Same as OPEN, but for script run */
70#define SRCLOSE 56 /* 0x38 Close preceding SROPEN */
71#define REF 57 /* 0x39 Match some already matched string */
72#define REFF 58 /* 0x3a Match already matched string, folded using native charset rules for non-utf8 */
73#define REFFL 59 /* 0x3b Match already matched string, folded in loc. */
74#define REFFU 60 /* 0x3c Match already matched string, folded using unicode rules for non-utf8 */
75#define REFFA 61 /* 0x3d Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
76#define NREF 62 /* 0x3e Match some already matched string */
77#define NREFF 63 /* 0x3f Match already matched string, folded using native charset rules for non-utf8 */
78#define NREFFL 64 /* 0x40 Match already matched string, folded in loc. */
79#define NREFFU 65 /* 0x41 Match already matched string, folded using unicode rules for non-utf8 */
80#define NREFFA 66 /* 0x42 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
81#define LONGJMP 67 /* 0x43 Jump far away. */
82#define BRANCHJ 68 /* 0x44 BRANCH with long offset. */
83#define IFMATCH 69 /* 0x45 Succeeds if the following matches. */
84#define UNLESSM 70 /* 0x46 Fails if the following matches. */
85#define SUSPEND 71 /* 0x47 "Independent" sub-RE. */
86#define IFTHEN 72 /* 0x48 Switch, should be preceded by switcher. */
87#define GROUPP 73 /* 0x49 Whether the group matched. */
88#define EVAL 74 /* 0x4a Execute some Perl code. */
89#define MINMOD 75 /* 0x4b Next operator is not greedy. */
90#define LOGICAL 76 /* 0x4c Next opcode should set the flag only. */
91#define RENUM 77 /* 0x4d Group with independently numbered parens. */
92#define TRIE 78 /* 0x4e Match many EXACT(F[ALU]?)? at once. flags==type */
93#define TRIEC 79 /* 0x4f Same as TRIE, but with embedded charclass data */
94#define AHOCORASICK 80 /* 0x50 Aho Corasick stclass. flags==type */
95#define AHOCORASICKC 81 /* 0x51 Same as AHOCORASICK, but with embedded charclass data */
96#define GOSUB 82 /* 0x52 recurse to paren arg1 at (signed) ofs arg2 */
97#define NGROUPP 83 /* 0x53 Whether the group matched. */
98#define INSUBP 84 /* 0x54 Whether we are in a specific recurse. */
99#define DEFINEP 85 /* 0x55 Never execute directly. */
100#define ENDLIKE 86 /* 0x56 Used only for the type field of verbs */
101#define OPFAIL 87 /* 0x57 Same as (?!), but with verb arg */
102#define ACCEPT 88 /* 0x58 Accepts the current matched string, with verbar */
103#define VERB 89 /* 0x59 Used only for the type field of verbs */
104#define PRUNE 90 /* 0x5a Pattern fails at this startpoint if no-backtracking through this */
105#define MARKPOINT 91 /* 0x5b Push the current location for rollback by cut. */
106#define SKIP 92 /* 0x5c On failure skip forward (to the mark) before retrying */
107#define COMMIT 93 /* 0x5d Pattern fails outright if backtracking through this */
108#define CUTGROUP 94 /* 0x5e On failure go to the next alternation in the group */
109#define KEEPS 95 /* 0x5f $& begins here. */
110#define LNBREAK 96 /* 0x60 generic newline pattern */
111#define OPTIMIZED 97 /* 0x61 Placeholder for dump. */
112#define PSEUDO 98 /* 0x62 Pseudo opcode for internal use. */
03363afd 113 /* ------------ States ------------- */
24b23f37
YO
114#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
115#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
4ee16520
DM
116#define EVAL_B (REGNODE_MAX + 3) /* state for EVAL */
117#define EVAL_B_fail (REGNODE_MAX + 4) /* state for EVAL */
118#define EVAL_postponed_AB (REGNODE_MAX + 5) /* state for EVAL */
119#define EVAL_postponed_AB_fail (REGNODE_MAX + 6) /* state for EVAL */
120#define CURLYX_end (REGNODE_MAX + 7) /* state for CURLYX */
121#define CURLYX_end_fail (REGNODE_MAX + 8) /* state for CURLYX */
122#define WHILEM_A_pre (REGNODE_MAX + 9) /* state for WHILEM */
123#define WHILEM_A_pre_fail (REGNODE_MAX + 10) /* state for WHILEM */
124#define WHILEM_A_min (REGNODE_MAX + 11) /* state for WHILEM */
125#define WHILEM_A_min_fail (REGNODE_MAX + 12) /* state for WHILEM */
126#define WHILEM_A_max (REGNODE_MAX + 13) /* state for WHILEM */
127#define WHILEM_A_max_fail (REGNODE_MAX + 14) /* state for WHILEM */
128#define WHILEM_B_min (REGNODE_MAX + 15) /* state for WHILEM */
129#define WHILEM_B_min_fail (REGNODE_MAX + 16) /* state for WHILEM */
130#define WHILEM_B_max (REGNODE_MAX + 17) /* state for WHILEM */
131#define WHILEM_B_max_fail (REGNODE_MAX + 18) /* state for WHILEM */
132#define BRANCH_next (REGNODE_MAX + 19) /* state for BRANCH */
133#define BRANCH_next_fail (REGNODE_MAX + 20) /* state for BRANCH */
134#define CURLYM_A (REGNODE_MAX + 21) /* state for CURLYM */
135#define CURLYM_A_fail (REGNODE_MAX + 22) /* state for CURLYM */
136#define CURLYM_B (REGNODE_MAX + 23) /* state for CURLYM */
137#define CURLYM_B_fail (REGNODE_MAX + 24) /* state for CURLYM */
138#define IFMATCH_A (REGNODE_MAX + 25) /* state for IFMATCH */
139#define IFMATCH_A_fail (REGNODE_MAX + 26) /* state for IFMATCH */
21cbe009
DM
140#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
141#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
142#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
143#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
144#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
145#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
146#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
147#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
148#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */
149#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */
150#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */
151#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */
152#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */
153#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */
03363afd 154
6bda09f9 155/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
156
157#ifndef DOINIT
22c35a8c 158EXTCONST U8 PL_regkind[];
d09b2d29 159#else
22c35a8c 160EXTCONST U8 PL_regkind[] = {
e2e6a0f1
YO
161 END, /* END */
162 END, /* SUCCEED */
e2e6a0f1 163 BOL, /* SBOL */
d3d47aac 164 BOL, /* MBOL */
e2e6a0f1 165 EOL, /* SEOL */
d3d47aac
YO
166 EOL, /* MEOL */
167 EOL, /* EOS */
168 GPOS, /* GPOS */
e2e6a0f1
YO
169 BOUND, /* BOUND */
170 BOUND, /* BOUNDL */
1e355c70 171 BOUND, /* BOUNDU */
0c6e81eb 172 BOUND, /* BOUNDA */
e2e6a0f1
YO
173 NBOUND, /* NBOUND */
174 NBOUND, /* NBOUNDL */
1e355c70 175 NBOUND, /* NBOUNDU */
0c6e81eb 176 NBOUND, /* NBOUNDA */
e2e6a0f1
YO
177 REG_ANY, /* REG_ANY */
178 REG_ANY, /* SANY */
e2e6a0f1 179 ANYOF, /* ANYOF */
ac44c12e 180 ANYOF, /* ANYOFD */
a4525e78 181 ANYOF, /* ANYOFL */
3edce4f5 182 ANYOF, /* ANYOFPOSIXL */
67a1b5f9 183 ANYOFM, /* ANYOFM */
3615ea58
KW
184 POSIXD, /* POSIXD */
185 POSIXD, /* POSIXL */
186 POSIXD, /* POSIXU */
187 POSIXD, /* POSIXA */
9e84774b
KW
188 NPOSIXD, /* NPOSIXD */
189 NPOSIXD, /* NPOSIXL */
190 NPOSIXD, /* NPOSIXU */
191 NPOSIXD, /* NPOSIXA */
39d24220
KW
192 ASCII, /* ASCII */
193 ASCII, /* NASCII */
e2e6a0f1
YO
194 CLUMP, /* CLUMP */
195 BRANCH, /* BRANCH */
e2e6a0f1 196 EXACT, /* EXACT */
a4525e78 197 EXACT, /* EXACTL */
e2e6a0f1
YO
198 EXACT, /* EXACTF */
199 EXACT, /* EXACTFL */
01f98ec2 200 EXACT, /* EXACTFU */
89829bb5 201 EXACT, /* EXACTFAA */
3c760661 202 EXACT, /* EXACTFU_SS */
a4525e78 203 EXACT, /* EXACTFLU8 */
89829bb5 204 EXACT, /* EXACTFAA_NO_TRIE */
e2e6a0f1
YO
205 NOTHING, /* NOTHING */
206 NOTHING, /* TAIL */
207 STAR, /* STAR */
208 PLUS, /* PLUS */
209 CURLY, /* CURLY */
210 CURLY, /* CURLYN */
211 CURLY, /* CURLYM */
212 CURLY, /* CURLYX */
213 WHILEM, /* WHILEM */
214 OPEN, /* OPEN */
215 CLOSE, /* CLOSE */
07093db4
KW
216 SROPEN, /* SROPEN */
217 SRCLOSE, /* SRCLOSE */
e2e6a0f1
YO
218 REF, /* REF */
219 REF, /* REFF */
220 REF, /* REFFL */
01f98ec2 221 REF, /* REFFU */
781aab5c 222 REF, /* REFFA */
01f98ec2
KW
223 REF, /* NREF */
224 REF, /* NREFF */
225 REF, /* NREFFL */
226 REF, /* NREFFU */
781aab5c 227 REF, /* NREFFA */
d3d47aac
YO
228 LONGJMP, /* LONGJMP */
229 BRANCHJ, /* BRANCHJ */
e2e6a0f1
YO
230 BRANCHJ, /* IFMATCH */
231 BRANCHJ, /* UNLESSM */
232 BRANCHJ, /* SUSPEND */
233 BRANCHJ, /* IFTHEN */
234 GROUPP, /* GROUPP */
e2e6a0f1
YO
235 EVAL, /* EVAL */
236 MINMOD, /* MINMOD */
237 LOGICAL, /* LOGICAL */
238 BRANCHJ, /* RENUM */
239 TRIE, /* TRIE */
240 TRIE, /* TRIEC */
241 TRIE, /* AHOCORASICK */
242 TRIE, /* AHOCORASICKC */
243 GOSUB, /* GOSUB */
e2e6a0f1
YO
244 NGROUPP, /* NGROUPP */
245 INSUBP, /* INSUBP */
246 DEFINEP, /* DEFINEP */
247 ENDLIKE, /* ENDLIKE */
248 ENDLIKE, /* OPFAIL */
249 ENDLIKE, /* ACCEPT */
250 VERB, /* VERB */
5d458dd8 251 VERB, /* PRUNE */
e2e6a0f1 252 VERB, /* MARKPOINT */
5d458dd8 253 VERB, /* SKIP */
e2e6a0f1 254 VERB, /* COMMIT */
5d458dd8 255 VERB, /* CUTGROUP */
ee9b8eae 256 KEEPS, /* KEEPS */
e1d1eefb 257 LNBREAK, /* LNBREAK */
e2e6a0f1
YO
258 NOTHING, /* OPTIMIZED */
259 PSEUDO, /* PSEUDO */
03363afd 260 /* ------------ States ------------- */
e2e6a0f1
YO
261 TRIE, /* TRIE_next */
262 TRIE, /* TRIE_next_fail */
4ee16520
DM
263 EVAL, /* EVAL_B */
264 EVAL, /* EVAL_B_fail */
265 EVAL, /* EVAL_postponed_AB */
266 EVAL, /* EVAL_postponed_AB_fail */
e2e6a0f1
YO
267 CURLYX, /* CURLYX_end */
268 CURLYX, /* CURLYX_end_fail */
269 WHILEM, /* WHILEM_A_pre */
270 WHILEM, /* WHILEM_A_pre_fail */
271 WHILEM, /* WHILEM_A_min */
272 WHILEM, /* WHILEM_A_min_fail */
273 WHILEM, /* WHILEM_A_max */
274 WHILEM, /* WHILEM_A_max_fail */
275 WHILEM, /* WHILEM_B_min */
276 WHILEM, /* WHILEM_B_min_fail */
277 WHILEM, /* WHILEM_B_max */
278 WHILEM, /* WHILEM_B_max_fail */
279 BRANCH, /* BRANCH_next */
280 BRANCH, /* BRANCH_next_fail */
281 CURLYM, /* CURLYM_A */
282 CURLYM, /* CURLYM_A_fail */
283 CURLYM, /* CURLYM_B */
284 CURLYM, /* CURLYM_B_fail */
285 IFMATCH, /* IFMATCH_A */
286 IFMATCH, /* IFMATCH_A_fail */
e2e6a0f1
YO
287 CURLY, /* CURLY_B_min */
288 CURLY, /* CURLY_B_min_fail */
289 CURLY, /* CURLY_B_max */
290 CURLY, /* CURLY_B_max_fail */
291 COMMIT, /* COMMIT_next */
292 COMMIT, /* COMMIT_next_fail */
293 MARKPOINT, /* MARKPOINT_next */
294 MARKPOINT, /* MARKPOINT_next_fail */
5d458dd8
YO
295 SKIP, /* SKIP_next */
296 SKIP, /* SKIP_next_fail */
297 CUTGROUP, /* CUTGROUP_next */
298 CUTGROUP, /* CUTGROUP_next_fail */
ee9b8eae
YO
299 KEEPS, /* KEEPS_next */
300 KEEPS, /* KEEPS_next_fail */
d09b2d29
IZ
301};
302#endif
303
f83e001e
YO
304#ifdef REG_COMP_C
305
6bda09f9 306/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29 307
29de9391 308static const U8 regarglen[] = {
03363afd
YO
309 0, /* END */
310 0, /* SUCCEED */
03363afd 311 0, /* SBOL */
d3d47aac 312 0, /* MBOL */
03363afd 313 0, /* SEOL */
d3d47aac
YO
314 0, /* MEOL */
315 0, /* EOS */
316 0, /* GPOS */
03363afd
YO
317 0, /* BOUND */
318 0, /* BOUNDL */
1e355c70 319 0, /* BOUNDU */
0c6e81eb 320 0, /* BOUNDA */
03363afd
YO
321 0, /* NBOUND */
322 0, /* NBOUNDL */
1e355c70 323 0, /* NBOUNDU */
0c6e81eb 324 0, /* NBOUNDA */
03363afd
YO
325 0, /* REG_ANY */
326 0, /* SANY */
46fc0c43
KW
327 EXTRA_SIZE(struct regnode_charclass), /* ANYOF */
328 EXTRA_SIZE(struct regnode_charclass), /* ANYOFD */
329 EXTRA_SIZE(struct regnode_charclass), /* ANYOFL */
330 EXTRA_SIZE(struct regnode_charclass_posixl), /* ANYOFPOSIXL */
67a1b5f9 331 EXTRA_SIZE(struct regnode_1), /* ANYOFM */
3615ea58
KW
332 0, /* POSIXD */
333 0, /* POSIXL */
334 0, /* POSIXU */
335 0, /* POSIXA */
336 0, /* NPOSIXD */
337 0, /* NPOSIXL */
338 0, /* NPOSIXU */
339 0, /* NPOSIXA */
39d24220
KW
340 0, /* ASCII */
341 0, /* NASCII */
03363afd
YO
342 0, /* CLUMP */
343 0, /* BRANCH */
03363afd 344 0, /* EXACT */
a4525e78 345 0, /* EXACTL */
03363afd
YO
346 0, /* EXACTF */
347 0, /* EXACTFL */
01f98ec2 348 0, /* EXACTFU */
89829bb5 349 0, /* EXACTFAA */
3c760661 350 0, /* EXACTFU_SS */
a4525e78 351 0, /* EXACTFLU8 */
89829bb5 352 0, /* EXACTFAA_NO_TRIE */
03363afd
YO
353 0, /* NOTHING */
354 0, /* TAIL */
355 0, /* STAR */
356 0, /* PLUS */
357 EXTRA_SIZE(struct regnode_2), /* CURLY */
358 EXTRA_SIZE(struct regnode_2), /* CURLYN */
359 EXTRA_SIZE(struct regnode_2), /* CURLYM */
360 EXTRA_SIZE(struct regnode_2), /* CURLYX */
361 0, /* WHILEM */
362 EXTRA_SIZE(struct regnode_1), /* OPEN */
363 EXTRA_SIZE(struct regnode_1), /* CLOSE */
07093db4
KW
364 0, /* SROPEN */
365 0, /* SRCLOSE */
03363afd
YO
366 EXTRA_SIZE(struct regnode_1), /* REF */
367 EXTRA_SIZE(struct regnode_1), /* REFF */
368 EXTRA_SIZE(struct regnode_1), /* REFFL */
01f98ec2 369 EXTRA_SIZE(struct regnode_1), /* REFFU */
781aab5c 370 EXTRA_SIZE(struct regnode_1), /* REFFA */
01f98ec2
KW
371 EXTRA_SIZE(struct regnode_1), /* NREF */
372 EXTRA_SIZE(struct regnode_1), /* NREFF */
373 EXTRA_SIZE(struct regnode_1), /* NREFFL */
374 EXTRA_SIZE(struct regnode_1), /* NREFFU */
781aab5c 375 EXTRA_SIZE(struct regnode_1), /* NREFFA */
d3d47aac
YO
376 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
377 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
03363afd
YO
378 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
379 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
380 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
381 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
382 EXTRA_SIZE(struct regnode_1), /* GROUPP */
13f27704 383 EXTRA_SIZE(struct regnode_2L), /* EVAL */
03363afd
YO
384 0, /* MINMOD */
385 0, /* LOGICAL */
386 EXTRA_SIZE(struct regnode_1), /* RENUM */
387 EXTRA_SIZE(struct regnode_1), /* TRIE */
388 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
389 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
390 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
1a147d38 391 EXTRA_SIZE(struct regnode_2L), /* GOSUB */
0a4db386 392 EXTRA_SIZE(struct regnode_1), /* NGROUPP */
1a147d38 393 EXTRA_SIZE(struct regnode_1), /* INSUBP */
0a4db386 394 EXTRA_SIZE(struct regnode_1), /* DEFINEP */
e2e6a0f1 395 0, /* ENDLIKE */
fee50582
YO
396 EXTRA_SIZE(struct regnode_1), /* OPFAIL */
397 EXTRA_SIZE(struct regnode_2L), /* ACCEPT */
20832bc5 398 EXTRA_SIZE(struct regnode_1), /* VERB */
5d458dd8 399 EXTRA_SIZE(struct regnode_1), /* PRUNE */
e2e6a0f1 400 EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
5d458dd8 401 EXTRA_SIZE(struct regnode_1), /* SKIP */
e2e6a0f1 402 EXTRA_SIZE(struct regnode_1), /* COMMIT */
5d458dd8 403 EXTRA_SIZE(struct regnode_1), /* CUTGROUP */
ee9b8eae 404 0, /* KEEPS */
e1d1eefb 405 0, /* LNBREAK */
03363afd
YO
406 0, /* OPTIMIZED */
407 0, /* PSEUDO */
d09b2d29
IZ
408};
409
6bda09f9
YO
410/* reg_off_by_arg[] - Which argument holds the offset to the next node */
411
29de9391 412static const char reg_off_by_arg[] = {
03363afd
YO
413 0, /* END */
414 0, /* SUCCEED */
03363afd 415 0, /* SBOL */
d3d47aac 416 0, /* MBOL */
03363afd 417 0, /* SEOL */
d3d47aac
YO
418 0, /* MEOL */
419 0, /* EOS */
420 0, /* GPOS */
03363afd
YO
421 0, /* BOUND */
422 0, /* BOUNDL */
1e355c70 423 0, /* BOUNDU */
0c6e81eb 424 0, /* BOUNDA */
03363afd
YO
425 0, /* NBOUND */
426 0, /* NBOUNDL */
1e355c70 427 0, /* NBOUNDU */
0c6e81eb 428 0, /* NBOUNDA */
03363afd
YO
429 0, /* REG_ANY */
430 0, /* SANY */
03363afd 431 0, /* ANYOF */
ac44c12e 432 0, /* ANYOFD */
a4525e78 433 0, /* ANYOFL */
3edce4f5 434 0, /* ANYOFPOSIXL */
67a1b5f9 435 0, /* ANYOFM */
3615ea58
KW
436 0, /* POSIXD */
437 0, /* POSIXL */
438 0, /* POSIXU */
439 0, /* POSIXA */
440 0, /* NPOSIXD */
441 0, /* NPOSIXL */
442 0, /* NPOSIXU */
443 0, /* NPOSIXA */
39d24220
KW
444 0, /* ASCII */
445 0, /* NASCII */
03363afd
YO
446 0, /* CLUMP */
447 0, /* BRANCH */
03363afd 448 0, /* EXACT */
a4525e78 449 0, /* EXACTL */
03363afd
YO
450 0, /* EXACTF */
451 0, /* EXACTFL */
01f98ec2 452 0, /* EXACTFU */
89829bb5 453 0, /* EXACTFAA */
3c760661 454 0, /* EXACTFU_SS */
a4525e78 455 0, /* EXACTFLU8 */
89829bb5 456 0, /* EXACTFAA_NO_TRIE */
03363afd
YO
457 0, /* NOTHING */
458 0, /* TAIL */
459 0, /* STAR */
460 0, /* PLUS */
461 0, /* CURLY */
462 0, /* CURLYN */
463 0, /* CURLYM */
464 0, /* CURLYX */
465 0, /* WHILEM */
466 0, /* OPEN */
467 0, /* CLOSE */
07093db4
KW
468 0, /* SROPEN */
469 0, /* SRCLOSE */
03363afd
YO
470 0, /* REF */
471 0, /* REFF */
472 0, /* REFFL */
01f98ec2 473 0, /* REFFU */
781aab5c 474 0, /* REFFA */
01f98ec2
KW
475 0, /* NREF */
476 0, /* NREFF */
477 0, /* NREFFL */
478 0, /* NREFFU */
781aab5c 479 0, /* NREFFA */
d3d47aac
YO
480 1, /* LONGJMP */
481 1, /* BRANCHJ */
03363afd
YO
482 2, /* IFMATCH */
483 2, /* UNLESSM */
484 1, /* SUSPEND */
485 1, /* IFTHEN */
486 0, /* GROUPP */
03363afd
YO
487 0, /* EVAL */
488 0, /* MINMOD */
489 0, /* LOGICAL */
490 1, /* RENUM */
491 0, /* TRIE */
492 0, /* TRIEC */
493 0, /* AHOCORASICK */
494 0, /* AHOCORASICKC */
1a147d38 495 0, /* GOSUB */
0a4db386 496 0, /* NGROUPP */
1a147d38 497 0, /* INSUBP */
0a4db386 498 0, /* DEFINEP */
e2e6a0f1 499 0, /* ENDLIKE */
7f69552c 500 0, /* OPFAIL */
e2e6a0f1
YO
501 0, /* ACCEPT */
502 0, /* VERB */
5d458dd8 503 0, /* PRUNE */
e2e6a0f1 504 0, /* MARKPOINT */
5d458dd8 505 0, /* SKIP */
e2e6a0f1 506 0, /* COMMIT */
5d458dd8 507 0, /* CUTGROUP */
ee9b8eae 508 0, /* KEEPS */
e1d1eefb 509 0, /* LNBREAK */
03363afd
YO
510 0, /* OPTIMIZED */
511 0, /* PSEUDO */
d09b2d29 512};
885f9e59 513
13d6edb4
NC
514#endif /* REG_COMP_C */
515
f83e001e 516
6bda09f9
YO
517/* reg_name[] - Opcode/state names in string form, for debugging */
518
22429478 519#ifndef DOINIT
13d6edb4 520EXTCONST char * PL_reg_name[];
22429478 521#else
4764e399 522EXTCONST char * const PL_reg_name[] = {
03363afd
YO
523 "END", /* 0000 */
524 "SUCCEED", /* 0x01 */
d3d47aac 525 "SBOL", /* 0x02 */
03363afd 526 "MBOL", /* 0x03 */
d3d47aac
YO
527 "SEOL", /* 0x04 */
528 "MEOL", /* 0x05 */
529 "EOS", /* 0x06 */
530 "GPOS", /* 0x07 */
531 "BOUND", /* 0x08 */
532 "BOUNDL", /* 0x09 */
533 "BOUNDU", /* 0x0a */
534 "BOUNDA", /* 0x0b */
535 "NBOUND", /* 0x0c */
536 "NBOUNDL", /* 0x0d */
537 "NBOUNDU", /* 0x0e */
538 "NBOUNDA", /* 0x0f */
539 "REG_ANY", /* 0x10 */
540 "SANY", /* 0x11 */
33c28ab2 541 "ANYOF", /* 0x12 */
ac44c12e
KW
542 "ANYOFD", /* 0x13 */
543 "ANYOFL", /* 0x14 */
3edce4f5
KW
544 "ANYOFPOSIXL", /* 0x15 */
545 "ANYOFM", /* 0x16 */
546 "POSIXD", /* 0x17 */
547 "POSIXL", /* 0x18 */
548 "POSIXU", /* 0x19 */
549 "POSIXA", /* 0x1a */
550 "NPOSIXD", /* 0x1b */
551 "NPOSIXL", /* 0x1c */
552 "NPOSIXU", /* 0x1d */
553 "NPOSIXA", /* 0x1e */
554 "ASCII", /* 0x1f */
555 "NASCII", /* 0x20 */
556 "CLUMP", /* 0x21 */
557 "BRANCH", /* 0x22 */
558 "EXACT", /* 0x23 */
559 "EXACTL", /* 0x24 */
560 "EXACTF", /* 0x25 */
561 "EXACTFL", /* 0x26 */
562 "EXACTFU", /* 0x27 */
563 "EXACTFAA", /* 0x28 */
564 "EXACTFU_SS", /* 0x29 */
565 "EXACTFLU8", /* 0x2a */
566 "EXACTFAA_NO_TRIE", /* 0x2b */
567 "NOTHING", /* 0x2c */
568 "TAIL", /* 0x2d */
569 "STAR", /* 0x2e */
570 "PLUS", /* 0x2f */
571 "CURLY", /* 0x30 */
572 "CURLYN", /* 0x31 */
573 "CURLYM", /* 0x32 */
574 "CURLYX", /* 0x33 */
575 "WHILEM", /* 0x34 */
576 "OPEN", /* 0x35 */
577 "CLOSE", /* 0x36 */
578 "SROPEN", /* 0x37 */
579 "SRCLOSE", /* 0x38 */
580 "REF", /* 0x39 */
581 "REFF", /* 0x3a */
582 "REFFL", /* 0x3b */
583 "REFFU", /* 0x3c */
584 "REFFA", /* 0x3d */
585 "NREF", /* 0x3e */
586 "NREFF", /* 0x3f */
587 "NREFFL", /* 0x40 */
588 "NREFFU", /* 0x41 */
589 "NREFFA", /* 0x42 */
590 "LONGJMP", /* 0x43 */
591 "BRANCHJ", /* 0x44 */
592 "IFMATCH", /* 0x45 */
593 "UNLESSM", /* 0x46 */
594 "SUSPEND", /* 0x47 */
595 "IFTHEN", /* 0x48 */
596 "GROUPP", /* 0x49 */
597 "EVAL", /* 0x4a */
598 "MINMOD", /* 0x4b */
599 "LOGICAL", /* 0x4c */
600 "RENUM", /* 0x4d */
601 "TRIE", /* 0x4e */
602 "TRIEC", /* 0x4f */
603 "AHOCORASICK", /* 0x50 */
604 "AHOCORASICKC", /* 0x51 */
605 "GOSUB", /* 0x52 */
606 "NGROUPP", /* 0x53 */
607 "INSUBP", /* 0x54 */
608 "DEFINEP", /* 0x55 */
609 "ENDLIKE", /* 0x56 */
610 "OPFAIL", /* 0x57 */
611 "ACCEPT", /* 0x58 */
612 "VERB", /* 0x59 */
613 "PRUNE", /* 0x5a */
614 "MARKPOINT", /* 0x5b */
615 "SKIP", /* 0x5c */
616 "COMMIT", /* 0x5d */
617 "CUTGROUP", /* 0x5e */
618 "KEEPS", /* 0x5f */
619 "LNBREAK", /* 0x60 */
620 "OPTIMIZED", /* 0x61 */
621 "PSEUDO", /* 0x62 */
03363afd 622 /* ------------ States ------------- */
24b23f37
YO
623 "TRIE_next", /* REGNODE_MAX +0x01 */
624 "TRIE_next_fail", /* REGNODE_MAX +0x02 */
4ee16520
DM
625 "EVAL_B", /* REGNODE_MAX +0x03 */
626 "EVAL_B_fail", /* REGNODE_MAX +0x04 */
627 "EVAL_postponed_AB", /* REGNODE_MAX +0x05 */
628 "EVAL_postponed_AB_fail", /* REGNODE_MAX +0x06 */
629 "CURLYX_end", /* REGNODE_MAX +0x07 */
630 "CURLYX_end_fail", /* REGNODE_MAX +0x08 */
631 "WHILEM_A_pre", /* REGNODE_MAX +0x09 */
632 "WHILEM_A_pre_fail", /* REGNODE_MAX +0x0a */
633 "WHILEM_A_min", /* REGNODE_MAX +0x0b */
634 "WHILEM_A_min_fail", /* REGNODE_MAX +0x0c */
635 "WHILEM_A_max", /* REGNODE_MAX +0x0d */
636 "WHILEM_A_max_fail", /* REGNODE_MAX +0x0e */
637 "WHILEM_B_min", /* REGNODE_MAX +0x0f */
638 "WHILEM_B_min_fail", /* REGNODE_MAX +0x10 */
639 "WHILEM_B_max", /* REGNODE_MAX +0x11 */
640 "WHILEM_B_max_fail", /* REGNODE_MAX +0x12 */
641 "BRANCH_next", /* REGNODE_MAX +0x13 */
642 "BRANCH_next_fail", /* REGNODE_MAX +0x14 */
643 "CURLYM_A", /* REGNODE_MAX +0x15 */
644 "CURLYM_A_fail", /* REGNODE_MAX +0x16 */
645 "CURLYM_B", /* REGNODE_MAX +0x17 */
646 "CURLYM_B_fail", /* REGNODE_MAX +0x18 */
647 "IFMATCH_A", /* REGNODE_MAX +0x19 */
648 "IFMATCH_A_fail", /* REGNODE_MAX +0x1a */
21cbe009
DM
649 "CURLY_B_min", /* REGNODE_MAX +0x1b */
650 "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
651 "CURLY_B_max", /* REGNODE_MAX +0x1d */
652 "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
653 "COMMIT_next", /* REGNODE_MAX +0x1f */
654 "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
655 "MARKPOINT_next", /* REGNODE_MAX +0x21 */
656 "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
657 "SKIP_next", /* REGNODE_MAX +0x23 */
658 "SKIP_next_fail", /* REGNODE_MAX +0x24 */
659 "CUTGROUP_next", /* REGNODE_MAX +0x25 */
660 "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
661 "KEEPS_next", /* REGNODE_MAX +0x27 */
662 "KEEPS_next_fail", /* REGNODE_MAX +0x28 */
885f9e59 663};
22429478 664#endif /* DOINIT */
d09b2d29 665
f7819f85
A
666/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
667
668#ifndef DOINIT
669EXTCONST char * PL_reg_extflags_name[];
670#else
671EXTCONST char * const PL_reg_extflags_name[] = {
d262c0c7 672 /* Bits in extflags defined: 11111111111111110000111111111111 */
52d81aa8
NC
673 "MULTILINE", /* 0x00000001 */
674 "SINGLELINE", /* 0x00000002 */
675 "FOLD", /* 0x00000004 */
676 "EXTENDED", /* 0x00000008 */
334afb3e 677 "EXTENDED_MORE", /* 0x00000010 */
e3b64d84
KW
678 "NOCAPTURE", /* 0x00000020 */
679 "KEEPCOPY", /* 0x00000040 */
680 "CHARSET0", /* 0x00000080 : "CHARSET" - 0x00000380 */
681 "CHARSET1", /* 0x00000100 : "CHARSET" - 0x00000380 */
682 "CHARSET2", /* 0x00000200 : "CHARSET" - 0x00000380 */
d262c0c7
KW
683 "STRICT", /* 0x00000400 */
684 "SPLIT", /* 0x00000800 */
1d32d911
KW
685 "UNUSED_BIT_12", /* 0x00001000 */
686 "UNUSED_BIT_13", /* 0x00002000 */
687 "UNUSED_BIT_14", /* 0x00004000 */
a3b51d37
KW
688 "UNUSED_BIT_15", /* 0x00008000 */
689 "NO_INPLACE_SUBST", /* 0x00010000 */
690 "EVAL_SEEN", /* 0x00020000 */
ee273784 691 "UNBOUNDED_QUANTIFIER_SEEN",/* 0x00040000 */
e795e964 692 "CHECK_ALL", /* 0x00080000 */
52d81aa8
NC
693 "MATCH_UTF8", /* 0x00100000 */
694 "USE_INTUIT_NOML", /* 0x00200000 */
695 "USE_INTUIT_ML", /* 0x00400000 */
696 "INTUIT_TAIL", /* 0x00800000 */
a3b51d37 697 "IS_ANCHORED", /* 0x01000000 */
52d81aa8
NC
698 "COPY_DONE", /* 0x02000000 */
699 "TAINTED_SEEN", /* 0x04000000 */
700 "TAINTED", /* 0x08000000 */
701 "START_ONLY", /* 0x10000000 */
dbc200c5 702 "SKIPWHITE", /* 0x20000000 */
52d81aa8
NC
703 "WHITE", /* 0x40000000 */
704 "NULL", /* 0x80000000 */
f7819f85
A
705};
706#endif /* DOINIT */
707
adc2d0c9
JH
708#ifdef DEBUGGING
709# define REG_EXTFLAGS_NAME_SIZE 32
710#endif
711
337ff307
YO
712/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
713
714#ifndef DOINIT
715EXTCONST char * PL_reg_intflags_name[];
716#else
717EXTCONST char * const PL_reg_intflags_name[] = {
b8f6efdd
YO
718 "SKIP", /* 0x00000001 - PREGf_SKIP */
719 "IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
720 "NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
721 "VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
722 "CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
723 "USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
58430ea8 724 "NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
58430ea8
YO
725 "GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
726 "GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
d3d47aac
YO
727 "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
728 "ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */
729 "ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */
d5a00e4a 730 "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */
337ff307
YO
731};
732#endif /* DOINIT */
733
adc2d0c9 734#ifdef DEBUGGING
d5a00e4a 735# define REG_INTFLAGS_NAME_SIZE 13
adc2d0c9
JH
736#endif
737
f9ef50a7 738/* The following have no fixed length. U8 so we can do strchr() on it. */
ded4dd2a 739#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 740
f9ef50a7 741#ifndef DOINIT
ded4dd2a 742EXTCONST U8 PL_varies[] __attribute__deprecated__;
f9ef50a7 743#else
ded4dd2a 744EXTCONST U8 PL_varies[] __attribute__deprecated__ = {
62e6ef33
AC
745 CLUMP, BRANCH, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, REF,
746 REFF, REFFL, REFFU, REFFA, NREF, NREFF, NREFFL, NREFFU, NREFFA,
d3d47aac 747 BRANCHJ, SUSPEND, IFTHEN,
f9ef50a7
NC
748 0
749};
750#endif /* DOINIT */
751
ded4dd2a
NC
752#ifndef DOINIT
753EXTCONST U8 PL_varies_bitmask[];
754#else
755EXTCONST U8 PL_varies_bitmask[] = {
3edce4f5 756 0x00, 0x00, 0x00, 0x00, 0x06, 0xC0, 0x1F, 0xFE, 0x97, 0x01, 0x00, 0x00, 0x00
ded4dd2a
NC
757};
758#endif /* DOINIT */
759
f9ef50a7
NC
760/* The following always have a length of 1. U8 we can do strchr() on it. */
761/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
ded4dd2a 762#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7)))
e52fc539 763
f9ef50a7 764#ifndef DOINIT
ded4dd2a 765EXTCONST U8 PL_simple[] __attribute__deprecated__;
f9ef50a7 766#else
ded4dd2a 767EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
3edce4f5
KW
768 REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFPOSIXL, ANYOFM, POSIXD,
769 POSIXL, POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA, ASCII,
770 NASCII,
f9ef50a7
NC
771 0
772};
773#endif /* DOINIT */
774
ded4dd2a
NC
775#ifndef DOINIT
776EXTCONST U8 PL_simple_bitmask[];
777#else
778EXTCONST U8 PL_simple_bitmask[] = {
3edce4f5 779 0x00, 0x00, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
ded4dd2a
NC
780};
781#endif /* DOINIT */
782
37442d52 783/* ex: set ro: */