Commit | Line | Data |
---|---|---|
37442d52 RGS |
1 | /* -*- buffer-read-only: t -*- |
2 | !!!!!!! DO NOT EDIT THIS FILE !!!!!!! | |
eb2624c9 | 3 | This file is built by regen/regcomp.pl from regcomp.sym. |
d09b2d29 IZ |
4 | Any changes made here will be lost! |
5 | */ | |
6 | ||
6bda09f9 YO |
7 | /* Regops and State definitions */ |
8 | ||
7fcd3a28 KW |
9 | #define REGNODE_MAX 93 |
10 | #define REGMATCH_STATE_MAX 133 | |
03363afd | 11 | |
f9f4320a YO |
12 | #define END 0 /* 0000 End of program. */ |
13 | #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ | |
14 | #define BOL 2 /* 0x02 Match "" at beginning of line. */ | |
15 | #define MBOL 3 /* 0x03 Same, assuming multiline. */ | |
16 | #define SBOL 4 /* 0x04 Same, assuming singleline. */ | |
17 | #define EOS 5 /* 0x05 Match "" at end of string. */ | |
18 | #define EOL 6 /* 0x06 Match "" at end of line. */ | |
19 | #define MEOL 7 /* 0x07 Same, assuming multiline. */ | |
20 | #define SEOL 8 /* 0x08 Same, assuming singleline. */ | |
21 | #define BOUND 9 /* 0x09 Match "" at any word boundary */ | |
22 | #define BOUNDL 10 /* 0x0a Match "" at any word boundary */ | |
23 | #define NBOUND 11 /* 0x0b Match "" at any word non-boundary */ | |
24 | #define NBOUNDL 12 /* 0x0c Match "" at any word non-boundary */ | |
25 | #define GPOS 13 /* 0x0d Matches where last m//g left off. */ | |
26 | #define REG_ANY 14 /* 0x0e Match any one character (except newline). */ | |
27 | #define SANY 15 /* 0x0f Match any one character. */ | |
28 | #define CANY 16 /* 0x10 Match any one byte. */ | |
617b7ae7 | 29 | #define ANYOF 17 /* 0x11 Match character in (or not in) this class, folding is native charset for non-utf8. */ |
f9f4320a YO |
30 | #define ALNUM 18 /* 0x12 Match any alphanumeric character */ |
31 | #define ALNUML 19 /* 0x13 Match any alphanumeric char in locale */ | |
32 | #define NALNUM 20 /* 0x14 Match any non-alphanumeric character */ | |
33 | #define NALNUML 21 /* 0x15 Match any non-alphanumeric char in locale */ | |
34 | #define SPACE 22 /* 0x16 Match any whitespace character */ | |
35 | #define SPACEL 23 /* 0x17 Match any whitespace char in locale */ | |
36 | #define NSPACE 24 /* 0x18 Match any non-whitespace character */ | |
37 | #define NSPACEL 25 /* 0x19 Match any non-whitespace char in locale */ | |
38 | #define DIGIT 26 /* 0x1a Match any numeric character */ | |
39 | #define DIGITL 27 /* 0x1b Match any numeric character in locale */ | |
40 | #define NDIGIT 28 /* 0x1c Match any non-numeric character */ | |
41 | #define NDIGITL 29 /* 0x1d Match any non-numeric character in locale */ | |
2448cf39 | 42 | #define CLUMP 30 /* 0x1e Match any extended grapheme cluster sequence */ |
f9f4320a YO |
43 | #define BRANCH 31 /* 0x1f Match this alternative, or the next... */ |
44 | #define BACK 32 /* 0x20 Match "", "next" ptr points backward. */ | |
45 | #define EXACT 33 /* 0x21 Match this string (preceded by length). */ | |
2448cf39 | 46 | #define EXACTF 34 /* 0x22 Match this string, folded, native charset semantics for non-utf8 (prec. by length). */ |
f9f4320a | 47 | #define EXACTFL 35 /* 0x23 Match this string, folded in locale (w/len). */ |
01f98ec2 KW |
48 | #define EXACTFU 36 /* 0x24 Match this string, folded, Unicode semantics for non-utf8 (prec. by length). */ |
49 | #define NOTHING 37 /* 0x25 Match empty string. */ | |
50 | #define TAIL 38 /* 0x26 Match empty string. Can jump here from outside. */ | |
51 | #define STAR 39 /* 0x27 Match this (simple) thing 0 or more times. */ | |
52 | #define PLUS 40 /* 0x28 Match this (simple) thing 1 or more times. */ | |
53 | #define CURLY 41 /* 0x29 Match this simple thing {n,m} times. */ | |
54 | #define CURLYN 42 /* 0x2a Capture next-after-this simple thing */ | |
55 | #define CURLYM 43 /* 0x2b Capture this medium-complex thing {n,m} times. */ | |
56 | #define CURLYX 44 /* 0x2c Match this complex thing {n,m} times. */ | |
57 | #define WHILEM 45 /* 0x2d Do curly processing and see if rest matches. */ | |
58 | #define OPEN 46 /* 0x2e Mark this point in input as start of */ | |
59 | #define CLOSE 47 /* 0x2f Analogous to OPEN. */ | |
60 | #define REF 48 /* 0x30 Match some already matched string */ | |
61 | #define REFF 49 /* 0x31 Match already matched string, folded using native charset semantics for non-utf8 */ | |
62 | #define REFFL 50 /* 0x32 Match already matched string, folded in loc. */ | |
63 | #define REFFU 51 /* 0x33 Match already matched string, folded using unicode semantics for non-utf8 */ | |
64 | #define NREF 52 /* 0x34 Match some already matched string */ | |
65 | #define NREFF 53 /* 0x35 Match already matched string, folded using native charset semantics for non-utf8 */ | |
66 | #define NREFFL 54 /* 0x36 Match already matched string, folded in loc. */ | |
67 | #define NREFFU 55 /* 0x37 Match already matched string, folded using unicode semantics for non-utf8 */ | |
68 | #define IFMATCH 56 /* 0x38 Succeeds if the following matches. */ | |
69 | #define UNLESSM 57 /* 0x39 Fails if the following matches. */ | |
70 | #define SUSPEND 58 /* 0x3a "Independent" sub-RE. */ | |
eca57c3a | 71 | #define IFTHEN 59 /* 0x3b Switch, should be preceded by switcher . */ |
01f98ec2 KW |
72 | #define GROUPP 60 /* 0x3c Whether the group matched. */ |
73 | #define LONGJMP 61 /* 0x3d Jump far away. */ | |
74 | #define BRANCHJ 62 /* 0x3e BRANCH with long offset. */ | |
75 | #define EVAL 63 /* 0x3f Execute some Perl code. */ | |
76 | #define MINMOD 64 /* 0x40 Next operator is not greedy. */ | |
77 | #define LOGICAL 65 /* 0x41 Next opcode should set the flag only. */ | |
78 | #define RENUM 66 /* 0x42 Group with independently numbered parens. */ | |
79 | #define TRIE 67 /* 0x43 Match many EXACT(F[LU]?)? at once. flags==type */ | |
80 | #define TRIEC 68 /* 0x44 Same as TRIE, but with embedded charclass data */ | |
81 | #define AHOCORASICK 69 /* 0x45 Aho Corasick stclass. flags==type */ | |
82 | #define AHOCORASICKC 70 /* 0x46 Same as AHOCORASICK, but with embedded charclass data */ | |
83 | #define GOSUB 71 /* 0x47 recurse to paren arg1 at (signed) ofs arg2 */ | |
84 | #define GOSTART 72 /* 0x48 recurse to start of pattern */ | |
85 | #define NGROUPP 73 /* 0x49 Whether the group matched. */ | |
86 | #define INSUBP 74 /* 0x4a Whether we are in a specific recurse. */ | |
87 | #define DEFINEP 75 /* 0x4b Never execute directly. */ | |
88 | #define ENDLIKE 76 /* 0x4c Used only for the type field of verbs */ | |
89 | #define OPFAIL 77 /* 0x4d Same as (?!) */ | |
90 | #define ACCEPT 78 /* 0x4e Accepts the current matched string. */ | |
91 | #define VERB 79 /* 0x4f Used only for the type field of verbs */ | |
92 | #define PRUNE 80 /* 0x50 Pattern fails at this startpoint if no-backtracking through this */ | |
93 | #define MARKPOINT 81 /* 0x51 Push the current location for rollback by cut. */ | |
94 | #define SKIP 82 /* 0x52 On failure skip forward (to the mark) before retrying */ | |
95 | #define COMMIT 83 /* 0x53 Pattern fails outright if backtracking through this */ | |
96 | #define CUTGROUP 84 /* 0x54 On failure go to the next alternation in the group */ | |
97 | #define KEEPS 85 /* 0x55 $& begins here. */ | |
98 | #define LNBREAK 86 /* 0x56 generic newline pattern */ | |
99 | #define VERTWS 87 /* 0x57 vertical whitespace (Perl 6) */ | |
100 | #define NVERTWS 88 /* 0x58 not vertical whitespace (Perl 6) */ | |
101 | #define HORIZWS 89 /* 0x59 horizontal whitespace (Perl 6) */ | |
102 | #define NHORIZWS 90 /* 0x5a not horizontal whitespace (Perl 6) */ | |
103 | #define FOLDCHAR 91 /* 0x5b codepoint with tricky case folding properties. */ | |
7fcd3a28 KW |
104 | #define OPTIMIZED 92 /* 0x5c Placeholder for dump. */ |
105 | #define PSEUDO 93 /* 0x5d Pseudo opcode for internal use. */ | |
03363afd | 106 | /* ------------ States ------------- */ |
24b23f37 YO |
107 | #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */ |
108 | #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */ | |
109 | #define EVAL_AB (REGNODE_MAX + 3) /* state for EVAL */ | |
110 | #define EVAL_AB_fail (REGNODE_MAX + 4) /* state for EVAL */ | |
111 | #define CURLYX_end (REGNODE_MAX + 5) /* state for CURLYX */ | |
112 | #define CURLYX_end_fail (REGNODE_MAX + 6) /* state for CURLYX */ | |
113 | #define WHILEM_A_pre (REGNODE_MAX + 7) /* state for WHILEM */ | |
114 | #define WHILEM_A_pre_fail (REGNODE_MAX + 8) /* state for WHILEM */ | |
115 | #define WHILEM_A_min (REGNODE_MAX + 9) /* state for WHILEM */ | |
116 | #define WHILEM_A_min_fail (REGNODE_MAX + 10) /* state for WHILEM */ | |
117 | #define WHILEM_A_max (REGNODE_MAX + 11) /* state for WHILEM */ | |
118 | #define WHILEM_A_max_fail (REGNODE_MAX + 12) /* state for WHILEM */ | |
119 | #define WHILEM_B_min (REGNODE_MAX + 13) /* state for WHILEM */ | |
120 | #define WHILEM_B_min_fail (REGNODE_MAX + 14) /* state for WHILEM */ | |
121 | #define WHILEM_B_max (REGNODE_MAX + 15) /* state for WHILEM */ | |
122 | #define WHILEM_B_max_fail (REGNODE_MAX + 16) /* state for WHILEM */ | |
123 | #define BRANCH_next (REGNODE_MAX + 17) /* state for BRANCH */ | |
124 | #define BRANCH_next_fail (REGNODE_MAX + 18) /* state for BRANCH */ | |
125 | #define CURLYM_A (REGNODE_MAX + 19) /* state for CURLYM */ | |
126 | #define CURLYM_A_fail (REGNODE_MAX + 20) /* state for CURLYM */ | |
127 | #define CURLYM_B (REGNODE_MAX + 21) /* state for CURLYM */ | |
128 | #define CURLYM_B_fail (REGNODE_MAX + 22) /* state for CURLYM */ | |
129 | #define IFMATCH_A (REGNODE_MAX + 23) /* state for IFMATCH */ | |
130 | #define IFMATCH_A_fail (REGNODE_MAX + 24) /* state for IFMATCH */ | |
131 | #define CURLY_B_min_known (REGNODE_MAX + 25) /* state for CURLY */ | |
132 | #define CURLY_B_min_known_fail (REGNODE_MAX + 26) /* state for CURLY */ | |
133 | #define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */ | |
134 | #define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */ | |
135 | #define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */ | |
136 | #define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */ | |
137 | #define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */ | |
138 | #define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */ | |
e2e6a0f1 YO |
139 | #define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */ |
140 | #define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */ | |
5d458dd8 YO |
141 | #define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */ |
142 | #define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */ | |
143 | #define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */ | |
144 | #define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */ | |
ee9b8eae YO |
145 | #define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */ |
146 | #define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */ | |
03363afd | 147 | |
6bda09f9 | 148 | /* PL_regkind[] What type of regop or state is this. */ |
d09b2d29 IZ |
149 | |
150 | #ifndef DOINIT | |
22c35a8c | 151 | EXTCONST U8 PL_regkind[]; |
d09b2d29 | 152 | #else |
22c35a8c | 153 | EXTCONST U8 PL_regkind[] = { |
e2e6a0f1 YO |
154 | END, /* END */ |
155 | END, /* SUCCEED */ | |
156 | BOL, /* BOL */ | |
157 | BOL, /* MBOL */ | |
158 | BOL, /* SBOL */ | |
159 | EOL, /* EOS */ | |
160 | EOL, /* EOL */ | |
161 | EOL, /* MEOL */ | |
162 | EOL, /* SEOL */ | |
163 | BOUND, /* BOUND */ | |
164 | BOUND, /* BOUNDL */ | |
165 | NBOUND, /* NBOUND */ | |
166 | NBOUND, /* NBOUNDL */ | |
167 | GPOS, /* GPOS */ | |
168 | REG_ANY, /* REG_ANY */ | |
169 | REG_ANY, /* SANY */ | |
170 | REG_ANY, /* CANY */ | |
171 | ANYOF, /* ANYOF */ | |
172 | ALNUM, /* ALNUM */ | |
173 | ALNUM, /* ALNUML */ | |
174 | NALNUM, /* NALNUM */ | |
175 | NALNUM, /* NALNUML */ | |
176 | SPACE, /* SPACE */ | |
177 | SPACE, /* SPACEL */ | |
178 | NSPACE, /* NSPACE */ | |
179 | NSPACE, /* NSPACEL */ | |
180 | DIGIT, /* DIGIT */ | |
181 | DIGIT, /* DIGITL */ | |
182 | NDIGIT, /* NDIGIT */ | |
183 | NDIGIT, /* NDIGITL */ | |
184 | CLUMP, /* CLUMP */ | |
185 | BRANCH, /* BRANCH */ | |
186 | BACK, /* BACK */ | |
187 | EXACT, /* EXACT */ | |
188 | EXACT, /* EXACTF */ | |
189 | EXACT, /* EXACTFL */ | |
01f98ec2 | 190 | EXACT, /* EXACTFU */ |
e2e6a0f1 YO |
191 | NOTHING, /* NOTHING */ |
192 | NOTHING, /* TAIL */ | |
193 | STAR, /* STAR */ | |
194 | PLUS, /* PLUS */ | |
195 | CURLY, /* CURLY */ | |
196 | CURLY, /* CURLYN */ | |
197 | CURLY, /* CURLYM */ | |
198 | CURLY, /* CURLYX */ | |
199 | WHILEM, /* WHILEM */ | |
200 | OPEN, /* OPEN */ | |
201 | CLOSE, /* CLOSE */ | |
202 | REF, /* REF */ | |
203 | REF, /* REFF */ | |
204 | REF, /* REFFL */ | |
01f98ec2 KW |
205 | REF, /* REFFU */ |
206 | REF, /* NREF */ | |
207 | REF, /* NREFF */ | |
208 | REF, /* NREFFL */ | |
209 | REF, /* NREFFU */ | |
e2e6a0f1 YO |
210 | BRANCHJ, /* IFMATCH */ |
211 | BRANCHJ, /* UNLESSM */ | |
212 | BRANCHJ, /* SUSPEND */ | |
213 | BRANCHJ, /* IFTHEN */ | |
214 | GROUPP, /* GROUPP */ | |
215 | LONGJMP, /* LONGJMP */ | |
216 | BRANCHJ, /* BRANCHJ */ | |
217 | EVAL, /* EVAL */ | |
218 | MINMOD, /* MINMOD */ | |
219 | LOGICAL, /* LOGICAL */ | |
220 | BRANCHJ, /* RENUM */ | |
221 | TRIE, /* TRIE */ | |
222 | TRIE, /* TRIEC */ | |
223 | TRIE, /* AHOCORASICK */ | |
224 | TRIE, /* AHOCORASICKC */ | |
225 | GOSUB, /* GOSUB */ | |
226 | GOSTART, /* GOSTART */ | |
e2e6a0f1 YO |
227 | NGROUPP, /* NGROUPP */ |
228 | INSUBP, /* INSUBP */ | |
229 | DEFINEP, /* DEFINEP */ | |
230 | ENDLIKE, /* ENDLIKE */ | |
231 | ENDLIKE, /* OPFAIL */ | |
232 | ENDLIKE, /* ACCEPT */ | |
233 | VERB, /* VERB */ | |
5d458dd8 | 234 | VERB, /* PRUNE */ |
e2e6a0f1 | 235 | VERB, /* MARKPOINT */ |
5d458dd8 | 236 | VERB, /* SKIP */ |
e2e6a0f1 | 237 | VERB, /* COMMIT */ |
5d458dd8 | 238 | VERB, /* CUTGROUP */ |
ee9b8eae | 239 | KEEPS, /* KEEPS */ |
e1d1eefb YO |
240 | LNBREAK, /* LNBREAK */ |
241 | VERTWS, /* VERTWS */ | |
242 | NVERTWS, /* NVERTWS */ | |
243 | HORIZWS, /* HORIZWS */ | |
244 | NHORIZWS, /* NHORIZWS */ | |
32e6a07c | 245 | FOLDCHAR, /* FOLDCHAR */ |
e2e6a0f1 YO |
246 | NOTHING, /* OPTIMIZED */ |
247 | PSEUDO, /* PSEUDO */ | |
03363afd | 248 | /* ------------ States ------------- */ |
e2e6a0f1 YO |
249 | TRIE, /* TRIE_next */ |
250 | TRIE, /* TRIE_next_fail */ | |
251 | EVAL, /* EVAL_AB */ | |
252 | EVAL, /* EVAL_AB_fail */ | |
253 | CURLYX, /* CURLYX_end */ | |
254 | CURLYX, /* CURLYX_end_fail */ | |
255 | WHILEM, /* WHILEM_A_pre */ | |
256 | WHILEM, /* WHILEM_A_pre_fail */ | |
257 | WHILEM, /* WHILEM_A_min */ | |
258 | WHILEM, /* WHILEM_A_min_fail */ | |
259 | WHILEM, /* WHILEM_A_max */ | |
260 | WHILEM, /* WHILEM_A_max_fail */ | |
261 | WHILEM, /* WHILEM_B_min */ | |
262 | WHILEM, /* WHILEM_B_min_fail */ | |
263 | WHILEM, /* WHILEM_B_max */ | |
264 | WHILEM, /* WHILEM_B_max_fail */ | |
265 | BRANCH, /* BRANCH_next */ | |
266 | BRANCH, /* BRANCH_next_fail */ | |
267 | CURLYM, /* CURLYM_A */ | |
268 | CURLYM, /* CURLYM_A_fail */ | |
269 | CURLYM, /* CURLYM_B */ | |
270 | CURLYM, /* CURLYM_B_fail */ | |
271 | IFMATCH, /* IFMATCH_A */ | |
272 | IFMATCH, /* IFMATCH_A_fail */ | |
273 | CURLY, /* CURLY_B_min_known */ | |
274 | CURLY, /* CURLY_B_min_known_fail */ | |
275 | CURLY, /* CURLY_B_min */ | |
276 | CURLY, /* CURLY_B_min_fail */ | |
277 | CURLY, /* CURLY_B_max */ | |
278 | CURLY, /* CURLY_B_max_fail */ | |
279 | COMMIT, /* COMMIT_next */ | |
280 | COMMIT, /* COMMIT_next_fail */ | |
281 | MARKPOINT, /* MARKPOINT_next */ | |
282 | MARKPOINT, /* MARKPOINT_next_fail */ | |
5d458dd8 YO |
283 | SKIP, /* SKIP_next */ |
284 | SKIP, /* SKIP_next_fail */ | |
285 | CUTGROUP, /* CUTGROUP_next */ | |
286 | CUTGROUP, /* CUTGROUP_next_fail */ | |
ee9b8eae YO |
287 | KEEPS, /* KEEPS_next */ |
288 | KEEPS, /* KEEPS_next_fail */ | |
d09b2d29 IZ |
289 | }; |
290 | #endif | |
291 | ||
6bda09f9 | 292 | /* regarglen[] - How large is the argument part of the node (in regnodes) */ |
d09b2d29 IZ |
293 | |
294 | #ifdef REG_COMP_C | |
29de9391 | 295 | static const U8 regarglen[] = { |
03363afd YO |
296 | 0, /* END */ |
297 | 0, /* SUCCEED */ | |
298 | 0, /* BOL */ | |
299 | 0, /* MBOL */ | |
300 | 0, /* SBOL */ | |
301 | 0, /* EOS */ | |
302 | 0, /* EOL */ | |
303 | 0, /* MEOL */ | |
304 | 0, /* SEOL */ | |
305 | 0, /* BOUND */ | |
306 | 0, /* BOUNDL */ | |
307 | 0, /* NBOUND */ | |
308 | 0, /* NBOUNDL */ | |
309 | 0, /* GPOS */ | |
310 | 0, /* REG_ANY */ | |
311 | 0, /* SANY */ | |
312 | 0, /* CANY */ | |
313 | 0, /* ANYOF */ | |
314 | 0, /* ALNUM */ | |
315 | 0, /* ALNUML */ | |
316 | 0, /* NALNUM */ | |
317 | 0, /* NALNUML */ | |
318 | 0, /* SPACE */ | |
319 | 0, /* SPACEL */ | |
320 | 0, /* NSPACE */ | |
321 | 0, /* NSPACEL */ | |
322 | 0, /* DIGIT */ | |
323 | 0, /* DIGITL */ | |
324 | 0, /* NDIGIT */ | |
325 | 0, /* NDIGITL */ | |
326 | 0, /* CLUMP */ | |
327 | 0, /* BRANCH */ | |
328 | 0, /* BACK */ | |
329 | 0, /* EXACT */ | |
330 | 0, /* EXACTF */ | |
331 | 0, /* EXACTFL */ | |
01f98ec2 | 332 | 0, /* EXACTFU */ |
03363afd YO |
333 | 0, /* NOTHING */ |
334 | 0, /* TAIL */ | |
335 | 0, /* STAR */ | |
336 | 0, /* PLUS */ | |
337 | EXTRA_SIZE(struct regnode_2), /* CURLY */ | |
338 | EXTRA_SIZE(struct regnode_2), /* CURLYN */ | |
339 | EXTRA_SIZE(struct regnode_2), /* CURLYM */ | |
340 | EXTRA_SIZE(struct regnode_2), /* CURLYX */ | |
341 | 0, /* WHILEM */ | |
342 | EXTRA_SIZE(struct regnode_1), /* OPEN */ | |
343 | EXTRA_SIZE(struct regnode_1), /* CLOSE */ | |
344 | EXTRA_SIZE(struct regnode_1), /* REF */ | |
345 | EXTRA_SIZE(struct regnode_1), /* REFF */ | |
346 | EXTRA_SIZE(struct regnode_1), /* REFFL */ | |
01f98ec2 KW |
347 | EXTRA_SIZE(struct regnode_1), /* REFFU */ |
348 | EXTRA_SIZE(struct regnode_1), /* NREF */ | |
349 | EXTRA_SIZE(struct regnode_1), /* NREFF */ | |
350 | EXTRA_SIZE(struct regnode_1), /* NREFFL */ | |
351 | EXTRA_SIZE(struct regnode_1), /* NREFFU */ | |
03363afd YO |
352 | EXTRA_SIZE(struct regnode_1), /* IFMATCH */ |
353 | EXTRA_SIZE(struct regnode_1), /* UNLESSM */ | |
354 | EXTRA_SIZE(struct regnode_1), /* SUSPEND */ | |
355 | EXTRA_SIZE(struct regnode_1), /* IFTHEN */ | |
356 | EXTRA_SIZE(struct regnode_1), /* GROUPP */ | |
357 | EXTRA_SIZE(struct regnode_1), /* LONGJMP */ | |
358 | EXTRA_SIZE(struct regnode_1), /* BRANCHJ */ | |
359 | EXTRA_SIZE(struct regnode_1), /* EVAL */ | |
360 | 0, /* MINMOD */ | |
361 | 0, /* LOGICAL */ | |
362 | EXTRA_SIZE(struct regnode_1), /* RENUM */ | |
363 | EXTRA_SIZE(struct regnode_1), /* TRIE */ | |
364 | EXTRA_SIZE(struct regnode_charclass), /* TRIEC */ | |
365 | EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */ | |
366 | EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */ | |
1a147d38 YO |
367 | EXTRA_SIZE(struct regnode_2L), /* GOSUB */ |
368 | 0, /* GOSTART */ | |
0a4db386 | 369 | EXTRA_SIZE(struct regnode_1), /* NGROUPP */ |
1a147d38 | 370 | EXTRA_SIZE(struct regnode_1), /* INSUBP */ |
0a4db386 | 371 | EXTRA_SIZE(struct regnode_1), /* DEFINEP */ |
e2e6a0f1 | 372 | 0, /* ENDLIKE */ |
7f69552c | 373 | 0, /* OPFAIL */ |
e2e6a0f1 | 374 | EXTRA_SIZE(struct regnode_1), /* ACCEPT */ |
20832bc5 | 375 | EXTRA_SIZE(struct regnode_1), /* VERB */ |
5d458dd8 | 376 | EXTRA_SIZE(struct regnode_1), /* PRUNE */ |
e2e6a0f1 | 377 | EXTRA_SIZE(struct regnode_1), /* MARKPOINT */ |
5d458dd8 | 378 | EXTRA_SIZE(struct regnode_1), /* SKIP */ |
e2e6a0f1 | 379 | EXTRA_SIZE(struct regnode_1), /* COMMIT */ |
5d458dd8 | 380 | EXTRA_SIZE(struct regnode_1), /* CUTGROUP */ |
ee9b8eae | 381 | 0, /* KEEPS */ |
e1d1eefb YO |
382 | 0, /* LNBREAK */ |
383 | 0, /* VERTWS */ | |
384 | 0, /* NVERTWS */ | |
385 | 0, /* HORIZWS */ | |
386 | 0, /* NHORIZWS */ | |
32e6a07c | 387 | EXTRA_SIZE(struct regnode_1), /* FOLDCHAR */ |
03363afd YO |
388 | 0, /* OPTIMIZED */ |
389 | 0, /* PSEUDO */ | |
d09b2d29 IZ |
390 | }; |
391 | ||
6bda09f9 YO |
392 | /* reg_off_by_arg[] - Which argument holds the offset to the next node */ |
393 | ||
29de9391 | 394 | static const char reg_off_by_arg[] = { |
03363afd YO |
395 | 0, /* END */ |
396 | 0, /* SUCCEED */ | |
397 | 0, /* BOL */ | |
398 | 0, /* MBOL */ | |
399 | 0, /* SBOL */ | |
400 | 0, /* EOS */ | |
401 | 0, /* EOL */ | |
402 | 0, /* MEOL */ | |
403 | 0, /* SEOL */ | |
404 | 0, /* BOUND */ | |
405 | 0, /* BOUNDL */ | |
406 | 0, /* NBOUND */ | |
407 | 0, /* NBOUNDL */ | |
408 | 0, /* GPOS */ | |
409 | 0, /* REG_ANY */ | |
410 | 0, /* SANY */ | |
411 | 0, /* CANY */ | |
412 | 0, /* ANYOF */ | |
413 | 0, /* ALNUM */ | |
414 | 0, /* ALNUML */ | |
415 | 0, /* NALNUM */ | |
416 | 0, /* NALNUML */ | |
417 | 0, /* SPACE */ | |
418 | 0, /* SPACEL */ | |
419 | 0, /* NSPACE */ | |
420 | 0, /* NSPACEL */ | |
421 | 0, /* DIGIT */ | |
422 | 0, /* DIGITL */ | |
423 | 0, /* NDIGIT */ | |
424 | 0, /* NDIGITL */ | |
425 | 0, /* CLUMP */ | |
426 | 0, /* BRANCH */ | |
427 | 0, /* BACK */ | |
428 | 0, /* EXACT */ | |
429 | 0, /* EXACTF */ | |
430 | 0, /* EXACTFL */ | |
01f98ec2 | 431 | 0, /* EXACTFU */ |
03363afd YO |
432 | 0, /* NOTHING */ |
433 | 0, /* TAIL */ | |
434 | 0, /* STAR */ | |
435 | 0, /* PLUS */ | |
436 | 0, /* CURLY */ | |
437 | 0, /* CURLYN */ | |
438 | 0, /* CURLYM */ | |
439 | 0, /* CURLYX */ | |
440 | 0, /* WHILEM */ | |
441 | 0, /* OPEN */ | |
442 | 0, /* CLOSE */ | |
443 | 0, /* REF */ | |
444 | 0, /* REFF */ | |
445 | 0, /* REFFL */ | |
01f98ec2 KW |
446 | 0, /* REFFU */ |
447 | 0, /* NREF */ | |
448 | 0, /* NREFF */ | |
449 | 0, /* NREFFL */ | |
450 | 0, /* NREFFU */ | |
03363afd YO |
451 | 2, /* IFMATCH */ |
452 | 2, /* UNLESSM */ | |
453 | 1, /* SUSPEND */ | |
454 | 1, /* IFTHEN */ | |
455 | 0, /* GROUPP */ | |
456 | 1, /* LONGJMP */ | |
457 | 1, /* BRANCHJ */ | |
458 | 0, /* EVAL */ | |
459 | 0, /* MINMOD */ | |
460 | 0, /* LOGICAL */ | |
461 | 1, /* RENUM */ | |
462 | 0, /* TRIE */ | |
463 | 0, /* TRIEC */ | |
464 | 0, /* AHOCORASICK */ | |
465 | 0, /* AHOCORASICKC */ | |
1a147d38 YO |
466 | 0, /* GOSUB */ |
467 | 0, /* GOSTART */ | |
0a4db386 | 468 | 0, /* NGROUPP */ |
1a147d38 | 469 | 0, /* INSUBP */ |
0a4db386 | 470 | 0, /* DEFINEP */ |
e2e6a0f1 | 471 | 0, /* ENDLIKE */ |
7f69552c | 472 | 0, /* OPFAIL */ |
e2e6a0f1 YO |
473 | 0, /* ACCEPT */ |
474 | 0, /* VERB */ | |
5d458dd8 | 475 | 0, /* PRUNE */ |
e2e6a0f1 | 476 | 0, /* MARKPOINT */ |
5d458dd8 | 477 | 0, /* SKIP */ |
e2e6a0f1 | 478 | 0, /* COMMIT */ |
5d458dd8 | 479 | 0, /* CUTGROUP */ |
ee9b8eae | 480 | 0, /* KEEPS */ |
e1d1eefb YO |
481 | 0, /* LNBREAK */ |
482 | 0, /* VERTWS */ | |
483 | 0, /* NVERTWS */ | |
484 | 0, /* HORIZWS */ | |
485 | 0, /* NHORIZWS */ | |
32e6a07c | 486 | 0, /* FOLDCHAR */ |
03363afd YO |
487 | 0, /* OPTIMIZED */ |
488 | 0, /* PSEUDO */ | |
d09b2d29 | 489 | }; |
885f9e59 | 490 | |
13d6edb4 NC |
491 | #endif /* REG_COMP_C */ |
492 | ||
6bda09f9 YO |
493 | /* reg_name[] - Opcode/state names in string form, for debugging */ |
494 | ||
22429478 | 495 | #ifndef DOINIT |
13d6edb4 | 496 | EXTCONST char * PL_reg_name[]; |
22429478 | 497 | #else |
4764e399 | 498 | EXTCONST char * const PL_reg_name[] = { |
03363afd YO |
499 | "END", /* 0000 */ |
500 | "SUCCEED", /* 0x01 */ | |
501 | "BOL", /* 0x02 */ | |
502 | "MBOL", /* 0x03 */ | |
503 | "SBOL", /* 0x04 */ | |
504 | "EOS", /* 0x05 */ | |
505 | "EOL", /* 0x06 */ | |
506 | "MEOL", /* 0x07 */ | |
507 | "SEOL", /* 0x08 */ | |
508 | "BOUND", /* 0x09 */ | |
509 | "BOUNDL", /* 0x0a */ | |
510 | "NBOUND", /* 0x0b */ | |
511 | "NBOUNDL", /* 0x0c */ | |
512 | "GPOS", /* 0x0d */ | |
513 | "REG_ANY", /* 0x0e */ | |
514 | "SANY", /* 0x0f */ | |
515 | "CANY", /* 0x10 */ | |
516 | "ANYOF", /* 0x11 */ | |
517 | "ALNUM", /* 0x12 */ | |
518 | "ALNUML", /* 0x13 */ | |
519 | "NALNUM", /* 0x14 */ | |
520 | "NALNUML", /* 0x15 */ | |
521 | "SPACE", /* 0x16 */ | |
522 | "SPACEL", /* 0x17 */ | |
523 | "NSPACE", /* 0x18 */ | |
524 | "NSPACEL", /* 0x19 */ | |
525 | "DIGIT", /* 0x1a */ | |
526 | "DIGITL", /* 0x1b */ | |
527 | "NDIGIT", /* 0x1c */ | |
528 | "NDIGITL", /* 0x1d */ | |
529 | "CLUMP", /* 0x1e */ | |
530 | "BRANCH", /* 0x1f */ | |
531 | "BACK", /* 0x20 */ | |
532 | "EXACT", /* 0x21 */ | |
533 | "EXACTF", /* 0x22 */ | |
534 | "EXACTFL", /* 0x23 */ | |
01f98ec2 KW |
535 | "EXACTFU", /* 0x24 */ |
536 | "NOTHING", /* 0x25 */ | |
537 | "TAIL", /* 0x26 */ | |
538 | "STAR", /* 0x27 */ | |
539 | "PLUS", /* 0x28 */ | |
540 | "CURLY", /* 0x29 */ | |
541 | "CURLYN", /* 0x2a */ | |
542 | "CURLYM", /* 0x2b */ | |
543 | "CURLYX", /* 0x2c */ | |
544 | "WHILEM", /* 0x2d */ | |
545 | "OPEN", /* 0x2e */ | |
546 | "CLOSE", /* 0x2f */ | |
547 | "REF", /* 0x30 */ | |
548 | "REFF", /* 0x31 */ | |
549 | "REFFL", /* 0x32 */ | |
550 | "REFFU", /* 0x33 */ | |
551 | "NREF", /* 0x34 */ | |
552 | "NREFF", /* 0x35 */ | |
553 | "NREFFL", /* 0x36 */ | |
554 | "NREFFU", /* 0x37 */ | |
555 | "IFMATCH", /* 0x38 */ | |
556 | "UNLESSM", /* 0x39 */ | |
557 | "SUSPEND", /* 0x3a */ | |
558 | "IFTHEN", /* 0x3b */ | |
559 | "GROUPP", /* 0x3c */ | |
560 | "LONGJMP", /* 0x3d */ | |
561 | "BRANCHJ", /* 0x3e */ | |
562 | "EVAL", /* 0x3f */ | |
563 | "MINMOD", /* 0x40 */ | |
564 | "LOGICAL", /* 0x41 */ | |
565 | "RENUM", /* 0x42 */ | |
566 | "TRIE", /* 0x43 */ | |
567 | "TRIEC", /* 0x44 */ | |
568 | "AHOCORASICK", /* 0x45 */ | |
569 | "AHOCORASICKC", /* 0x46 */ | |
570 | "GOSUB", /* 0x47 */ | |
571 | "GOSTART", /* 0x48 */ | |
572 | "NGROUPP", /* 0x49 */ | |
573 | "INSUBP", /* 0x4a */ | |
574 | "DEFINEP", /* 0x4b */ | |
575 | "ENDLIKE", /* 0x4c */ | |
576 | "OPFAIL", /* 0x4d */ | |
577 | "ACCEPT", /* 0x4e */ | |
578 | "VERB", /* 0x4f */ | |
579 | "PRUNE", /* 0x50 */ | |
580 | "MARKPOINT", /* 0x51 */ | |
581 | "SKIP", /* 0x52 */ | |
582 | "COMMIT", /* 0x53 */ | |
583 | "CUTGROUP", /* 0x54 */ | |
584 | "KEEPS", /* 0x55 */ | |
585 | "LNBREAK", /* 0x56 */ | |
586 | "VERTWS", /* 0x57 */ | |
587 | "NVERTWS", /* 0x58 */ | |
588 | "HORIZWS", /* 0x59 */ | |
589 | "NHORIZWS", /* 0x5a */ | |
590 | "FOLDCHAR", /* 0x5b */ | |
7fcd3a28 KW |
591 | "OPTIMIZED", /* 0x5c */ |
592 | "PSEUDO", /* 0x5d */ | |
03363afd | 593 | /* ------------ States ------------- */ |
24b23f37 YO |
594 | "TRIE_next", /* REGNODE_MAX +0x01 */ |
595 | "TRIE_next_fail", /* REGNODE_MAX +0x02 */ | |
596 | "EVAL_AB", /* REGNODE_MAX +0x03 */ | |
597 | "EVAL_AB_fail", /* REGNODE_MAX +0x04 */ | |
598 | "CURLYX_end", /* REGNODE_MAX +0x05 */ | |
599 | "CURLYX_end_fail", /* REGNODE_MAX +0x06 */ | |
600 | "WHILEM_A_pre", /* REGNODE_MAX +0x07 */ | |
601 | "WHILEM_A_pre_fail", /* REGNODE_MAX +0x08 */ | |
602 | "WHILEM_A_min", /* REGNODE_MAX +0x09 */ | |
603 | "WHILEM_A_min_fail", /* REGNODE_MAX +0x0a */ | |
604 | "WHILEM_A_max", /* REGNODE_MAX +0x0b */ | |
605 | "WHILEM_A_max_fail", /* REGNODE_MAX +0x0c */ | |
606 | "WHILEM_B_min", /* REGNODE_MAX +0x0d */ | |
607 | "WHILEM_B_min_fail", /* REGNODE_MAX +0x0e */ | |
608 | "WHILEM_B_max", /* REGNODE_MAX +0x0f */ | |
609 | "WHILEM_B_max_fail", /* REGNODE_MAX +0x10 */ | |
610 | "BRANCH_next", /* REGNODE_MAX +0x11 */ | |
611 | "BRANCH_next_fail", /* REGNODE_MAX +0x12 */ | |
612 | "CURLYM_A", /* REGNODE_MAX +0x13 */ | |
613 | "CURLYM_A_fail", /* REGNODE_MAX +0x14 */ | |
614 | "CURLYM_B", /* REGNODE_MAX +0x15 */ | |
615 | "CURLYM_B_fail", /* REGNODE_MAX +0x16 */ | |
616 | "IFMATCH_A", /* REGNODE_MAX +0x17 */ | |
617 | "IFMATCH_A_fail", /* REGNODE_MAX +0x18 */ | |
618 | "CURLY_B_min_known", /* REGNODE_MAX +0x19 */ | |
619 | "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1a */ | |
620 | "CURLY_B_min", /* REGNODE_MAX +0x1b */ | |
621 | "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */ | |
622 | "CURLY_B_max", /* REGNODE_MAX +0x1d */ | |
623 | "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */ | |
624 | "COMMIT_next", /* REGNODE_MAX +0x1f */ | |
625 | "COMMIT_next_fail", /* REGNODE_MAX +0x20 */ | |
e2e6a0f1 YO |
626 | "MARKPOINT_next", /* REGNODE_MAX +0x21 */ |
627 | "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */ | |
5d458dd8 YO |
628 | "SKIP_next", /* REGNODE_MAX +0x23 */ |
629 | "SKIP_next_fail", /* REGNODE_MAX +0x24 */ | |
630 | "CUTGROUP_next", /* REGNODE_MAX +0x25 */ | |
631 | "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */ | |
ee9b8eae YO |
632 | "KEEPS_next", /* REGNODE_MAX +0x27 */ |
633 | "KEEPS_next_fail", /* REGNODE_MAX +0x28 */ | |
885f9e59 | 634 | }; |
22429478 | 635 | #endif /* DOINIT */ |
d09b2d29 | 636 | |
f7819f85 A |
637 | /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ |
638 | ||
639 | #ifndef DOINIT | |
640 | EXTCONST char * PL_reg_extflags_name[]; | |
641 | #else | |
642 | EXTCONST char * const PL_reg_extflags_name[] = { | |
9de15fec | 643 | /* Bits in extflags defined: 11111111111111111111111001111111 */ |
52d81aa8 NC |
644 | "MULTILINE", /* 0x00000001 */ |
645 | "SINGLELINE", /* 0x00000002 */ | |
646 | "FOLD", /* 0x00000004 */ | |
647 | "EXTENDED", /* 0x00000008 */ | |
648 | "KEEPCOPY", /* 0x00000010 */ | |
649 | "LOCALE", /* 0x00000020 */ | |
9de15fec | 650 | "UNICODE", /* 0x00000040 */ |
f7819f85 | 651 | "UNUSED_BIT_7", /* 0x00000080 */ |
e795e964 KW |
652 | "UNUSED_BIT_8", /* 0x00000100 */ |
653 | "ANCH_BOL", /* 0x00000200 */ | |
654 | "ANCH_MBOL", /* 0x00000400 */ | |
655 | "ANCH_SBOL", /* 0x00000800 */ | |
656 | "ANCH_GPOS", /* 0x00001000 */ | |
657 | "GPOS_SEEN", /* 0x00002000 */ | |
658 | "GPOS_FLOAT", /* 0x00004000 */ | |
659 | "LOOKBEHIND_SEEN", /* 0x00008000 */ | |
660 | "EVAL_SEEN", /* 0x00010000 */ | |
661 | "CANY_SEEN", /* 0x00020000 */ | |
662 | "NOSCAN", /* 0x00040000 */ | |
663 | "CHECK_ALL", /* 0x00080000 */ | |
52d81aa8 NC |
664 | "MATCH_UTF8", /* 0x00100000 */ |
665 | "USE_INTUIT_NOML", /* 0x00200000 */ | |
666 | "USE_INTUIT_ML", /* 0x00400000 */ | |
667 | "INTUIT_TAIL", /* 0x00800000 */ | |
668 | "SPLIT", /* 0x01000000 */ | |
669 | "COPY_DONE", /* 0x02000000 */ | |
670 | "TAINTED_SEEN", /* 0x04000000 */ | |
671 | "TAINTED", /* 0x08000000 */ | |
672 | "START_ONLY", /* 0x10000000 */ | |
673 | "SKIPWHITE", /* 0x20000000 */ | |
674 | "WHITE", /* 0x40000000 */ | |
675 | "NULL", /* 0x80000000 */ | |
f7819f85 A |
676 | }; |
677 | #endif /* DOINIT */ | |
678 | ||
f9ef50a7 | 679 | /* The following have no fixed length. U8 so we can do strchr() on it. */ |
ded4dd2a | 680 | #define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
e52fc539 | 681 | |
f9ef50a7 | 682 | #ifndef DOINIT |
ded4dd2a | 683 | EXTCONST U8 PL_varies[] __attribute__deprecated__; |
f9ef50a7 | 684 | #else |
ded4dd2a | 685 | EXTCONST U8 PL_varies[] __attribute__deprecated__ = { |
f9ef50a7 | 686 | CLUMP, BRANCH, BACK, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, |
01f98ec2 KW |
687 | REF, REFF, REFFL, REFFU, NREF, NREFF, NREFFL, NREFFU, SUSPEND, IFTHEN, |
688 | BRANCHJ, | |
f9ef50a7 NC |
689 | 0 |
690 | }; | |
691 | #endif /* DOINIT */ | |
692 | ||
ded4dd2a NC |
693 | #ifndef DOINIT |
694 | EXTCONST U8 PL_varies_bitmask[]; | |
695 | #else | |
696 | EXTCONST U8 PL_varies_bitmask[] = { | |
01f98ec2 | 697 | 0x00, 0x00, 0x00, 0xC0, 0x81, 0x3F, 0xFF, 0x4C, 0x00, 0x00, 0x00, 0x00 |
ded4dd2a NC |
698 | }; |
699 | #endif /* DOINIT */ | |
700 | ||
f9ef50a7 NC |
701 | /* The following always have a length of 1. U8 we can do strchr() on it. */ |
702 | /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ | |
ded4dd2a | 703 | #define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7))) |
e52fc539 | 704 | |
f9ef50a7 | 705 | #ifndef DOINIT |
ded4dd2a | 706 | EXTCONST U8 PL_simple[] __attribute__deprecated__; |
f9ef50a7 | 707 | #else |
ded4dd2a | 708 | EXTCONST U8 PL_simple[] __attribute__deprecated__ = { |
f9ef50a7 | 709 | REG_ANY, SANY, CANY, ANYOF, ALNUM, ALNUML, NALNUM, NALNUML, SPACE, |
28b5d7bf KW |
710 | SPACEL, NSPACE, NSPACEL, DIGIT, DIGITL, NDIGIT, NDIGITL, VERTWS, |
711 | NVERTWS, HORIZWS, NHORIZWS, | |
f9ef50a7 NC |
712 | 0 |
713 | }; | |
714 | #endif /* DOINIT */ | |
715 | ||
ded4dd2a NC |
716 | #ifndef DOINIT |
717 | EXTCONST U8 PL_simple_bitmask[]; | |
718 | #else | |
719 | EXTCONST U8 PL_simple_bitmask[] = { | |
28b5d7bf | 720 | 0x00, 0xC0, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x07 |
ded4dd2a NC |
721 | }; |
722 | #endif /* DOINIT */ | |
723 | ||
37442d52 | 724 | /* ex: set ro: */ |