This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
migrate CURLYX/WHILEM branch in regmatch() to new FSM-esque paradigm
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
885f9e59 3 This file is built by regcomp.pl from regcomp.sym.
d09b2d29
IZ
4 Any changes made here will be lost!
5*/
6
6bda09f9
YO
7/* Regops and State definitions */
8
9#define REGNODE_MAX 68
c476f425 10#define REGMATCH_STATE_MAX 98
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
14#define BOL 2 /* 0x02 Match "" at beginning of line. */
15#define MBOL 3 /* 0x03 Same, assuming multiline. */
16#define SBOL 4 /* 0x04 Same, assuming singleline. */
17#define EOS 5 /* 0x05 Match "" at end of string. */
18#define EOL 6 /* 0x06 Match "" at end of line. */
19#define MEOL 7 /* 0x07 Same, assuming multiline. */
20#define SEOL 8 /* 0x08 Same, assuming singleline. */
21#define BOUND 9 /* 0x09 Match "" at any word boundary */
22#define BOUNDL 10 /* 0x0a Match "" at any word boundary */
23#define NBOUND 11 /* 0x0b Match "" at any word non-boundary */
24#define NBOUNDL 12 /* 0x0c Match "" at any word non-boundary */
25#define GPOS 13 /* 0x0d Matches where last m//g left off. */
26#define REG_ANY 14 /* 0x0e Match any one character (except newline). */
27#define SANY 15 /* 0x0f Match any one character. */
28#define CANY 16 /* 0x10 Match any one byte. */
29#define ANYOF 17 /* 0x11 Match character in (or not in) this class. */
30#define ALNUM 18 /* 0x12 Match any alphanumeric character */
31#define ALNUML 19 /* 0x13 Match any alphanumeric char in locale */
32#define NALNUM 20 /* 0x14 Match any non-alphanumeric character */
33#define NALNUML 21 /* 0x15 Match any non-alphanumeric char in locale */
34#define SPACE 22 /* 0x16 Match any whitespace character */
35#define SPACEL 23 /* 0x17 Match any whitespace char in locale */
36#define NSPACE 24 /* 0x18 Match any non-whitespace character */
37#define NSPACEL 25 /* 0x19 Match any non-whitespace char in locale */
38#define DIGIT 26 /* 0x1a Match any numeric character */
39#define DIGITL 27 /* 0x1b Match any numeric character in locale */
40#define NDIGIT 28 /* 0x1c Match any non-numeric character */
41#define NDIGITL 29 /* 0x1d Match any non-numeric character in locale */
42#define CLUMP 30 /* 0x1e Match any combining character sequence */
43#define BRANCH 31 /* 0x1f Match this alternative, or the next... */
44#define BACK 32 /* 0x20 Match "", "next" ptr points backward. */
45#define EXACT 33 /* 0x21 Match this string (preceded by length). */
46#define EXACTF 34 /* 0x22 Match this string, folded (prec. by length). */
47#define EXACTFL 35 /* 0x23 Match this string, folded in locale (w/len). */
48#define NOTHING 36 /* 0x24 Match empty string. */
49#define TAIL 37 /* 0x25 Match empty string. Can jump here from outside. */
50#define STAR 38 /* 0x26 Match this (simple) thing 0 or more times. */
51#define PLUS 39 /* 0x27 Match this (simple) thing 1 or more times. */
52#define CURLY 40 /* 0x28 Match this simple thing {n,m} times. */
53#define CURLYN 41 /* 0x29 Match next-after-this simple thing */
54#define CURLYM 42 /* 0x2a Match this medium-complex thing {n,m} times. */
55#define CURLYX 43 /* 0x2b Match this complex thing {n,m} times. */
56#define WHILEM 44 /* 0x2c Do curly processing and see if rest matches. */
57#define OPEN 45 /* 0x2d Mark this point in input as start of */
58#define CLOSE 46 /* 0x2e Analogous to OPEN. */
59#define REF 47 /* 0x2f Match some already matched string */
60#define REFF 48 /* 0x30 Match already matched string, folded */
61#define REFFL 49 /* 0x31 Match already matched string, folded in loc. */
62#define IFMATCH 50 /* 0x32 Succeeds if the following matches. */
63#define UNLESSM 51 /* 0x33 Fails if the following matches. */
64#define SUSPEND 52 /* 0x34 "Independent" sub-RE. */
65#define IFTHEN 53 /* 0x35 Switch, should be preceeded by switcher . */
66#define GROUPP 54 /* 0x36 Whether the group matched. */
67#define LONGJMP 55 /* 0x37 Jump far away. */
68#define BRANCHJ 56 /* 0x38 BRANCH with long offset. */
69#define EVAL 57 /* 0x39 Execute some Perl code. */
70#define MINMOD 58 /* 0x3a Next operator is not greedy. */
71#define LOGICAL 59 /* 0x3b Next opcode should set the flag only. */
72#define RENUM 60 /* 0x3c Group with independently numbered parens. */
73#define TRIE 61 /* 0x3d Match many EXACT(FL?)? at once. flags==type */
74#define TRIEC 62 /* 0x3e Same as TRIE, but with embedded charclass data */
75#define AHOCORASICK 63 /* 0x3f Aho Corasick stclass. flags==type */
76#define AHOCORASICKC 64 /* 0x40 Same as AHOCORASICK, but with embedded charclass data */
6bda09f9
YO
77#define RECURSE 65 /* 0x41 recurse to paren arg1 at (signed) ofs arg2 */
78#define SRECURSE 66 /* 0x42 recurse to start of pattern */
79#define OPTIMIZED 67 /* 0x43 Placeholder for dump. */
80#define PSEUDO 68 /* 0x44 Pseudo opcode for internal use. */
03363afd
YO
81
82 /* ------------ States ------------- */
83
6bda09f9
YO
84#define TRIE_next 69 /* 0x45 Regmatch state for TRIE */
85#define TRIE_next_fail 70 /* 0x46 Regmatch state for TRIE */
86#define EVAL_AB 71 /* 0x47 Regmatch state for EVAL */
87#define EVAL_AB_fail 72 /* 0x48 Regmatch state for EVAL */
c476f425
DM
88#define CURLYX_end 73 /* 0x49 Regmatch state for CURLYX */
89#define CURLYX_end_fail 74 /* 0x4a Regmatch state for CURLYX */
90#define WHILEM_A_pre 75 /* 0x4b Regmatch state for WHILEM */
91#define WHILEM_A_pre_fail 76 /* 0x4c Regmatch state for WHILEM */
92#define WHILEM_A_min 77 /* 0x4d Regmatch state for WHILEM */
93#define WHILEM_A_min_fail 78 /* 0x4e Regmatch state for WHILEM */
94#define WHILEM_A_max 79 /* 0x4f Regmatch state for WHILEM */
95#define WHILEM_A_max_fail 80 /* 0x50 Regmatch state for WHILEM */
96#define WHILEM_B_min 81 /* 0x51 Regmatch state for WHILEM */
97#define WHILEM_B_min_fail 82 /* 0x52 Regmatch state for WHILEM */
98#define WHILEM_B_max 83 /* 0x53 Regmatch state for WHILEM */
99#define WHILEM_B_max_fail 84 /* 0x54 Regmatch state for WHILEM */
100#define BRANCH_next 85 /* 0x55 Regmatch state for BRANCH */
101#define BRANCH_next_fail 86 /* 0x56 Regmatch state for BRANCH */
102#define CURLYM_A 87 /* 0x57 Regmatch state for CURLYM */
103#define CURLYM_A_fail 88 /* 0x58 Regmatch state for CURLYM */
104#define CURLYM_B 89 /* 0x59 Regmatch state for CURLYM */
105#define CURLYM_B_fail 90 /* 0x5a Regmatch state for CURLYM */
106#define IFMATCH_A 91 /* 0x5b Regmatch state for IFMATCH */
107#define IFMATCH_A_fail 92 /* 0x5c Regmatch state for IFMATCH */
108#define CURLY_B_min_known 93 /* 0x5d Regmatch state for CURLY */
109#define CURLY_B_min_known_fail 94 /* 0x5e Regmatch state for CURLY */
110#define CURLY_B_min 95 /* 0x5f Regmatch state for CURLY */
111#define CURLY_B_min_fail 96 /* 0x60 Regmatch state for CURLY */
112#define CURLY_B_max 97 /* 0x61 Regmatch state for CURLY */
113#define CURLY_B_max_fail 98 /* 0x62 Regmatch state for CURLY */
03363afd 114
6bda09f9 115/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
116
117#ifndef DOINIT
22c35a8c 118EXTCONST U8 PL_regkind[];
d09b2d29 119#else
22c35a8c 120EXTCONST U8 PL_regkind[] = {
03363afd
YO
121 END, /* END */
122 END, /* SUCCEED */
123 BOL, /* BOL */
124 BOL, /* MBOL */
125 BOL, /* SBOL */
126 EOL, /* EOS */
127 EOL, /* EOL */
128 EOL, /* MEOL */
129 EOL, /* SEOL */
130 BOUND, /* BOUND */
131 BOUND, /* BOUNDL */
132 NBOUND, /* NBOUND */
133 NBOUND, /* NBOUNDL */
134 GPOS, /* GPOS */
135 REG_ANY, /* REG_ANY */
136 REG_ANY, /* SANY */
137 REG_ANY, /* CANY */
138 ANYOF, /* ANYOF */
139 ALNUM, /* ALNUM */
140 ALNUM, /* ALNUML */
141 NALNUM, /* NALNUM */
142 NALNUM, /* NALNUML */
143 SPACE, /* SPACE */
144 SPACE, /* SPACEL */
145 NSPACE, /* NSPACE */
146 NSPACE, /* NSPACEL */
147 DIGIT, /* DIGIT */
148 DIGIT, /* DIGITL */
149 NDIGIT, /* NDIGIT */
150 NDIGIT, /* NDIGITL */
151 CLUMP, /* CLUMP */
152 BRANCH, /* BRANCH */
153 BACK, /* BACK */
154 EXACT, /* EXACT */
155 EXACT, /* EXACTF */
156 EXACT, /* EXACTFL */
157 NOTHING, /* NOTHING */
158 NOTHING, /* TAIL */
159 STAR, /* STAR */
160 PLUS, /* PLUS */
161 CURLY, /* CURLY */
162 CURLY, /* CURLYN */
163 CURLY, /* CURLYM */
164 CURLY, /* CURLYX */
165 WHILEM, /* WHILEM */
166 OPEN, /* OPEN */
167 CLOSE, /* CLOSE */
168 REF, /* REF */
169 REF, /* REFF */
170 REF, /* REFFL */
171 BRANCHJ, /* IFMATCH */
172 BRANCHJ, /* UNLESSM */
173 BRANCHJ, /* SUSPEND */
174 BRANCHJ, /* IFTHEN */
175 GROUPP, /* GROUPP */
176 LONGJMP, /* LONGJMP */
177 BRANCHJ, /* BRANCHJ */
178 EVAL, /* EVAL */
179 MINMOD, /* MINMOD */
180 LOGICAL, /* LOGICAL */
181 BRANCHJ, /* RENUM */
182 TRIE, /* TRIE */
183 TRIE, /* TRIEC */
184 TRIE, /* AHOCORASICK */
185 TRIE, /* AHOCORASICKC */
6bda09f9
YO
186 RECURSE, /* RECURSE */
187 RECURSE, /* SRECURSE */
03363afd
YO
188 NOTHING, /* OPTIMIZED */
189 PSEUDO, /* PSEUDO */
190 /* ------------ States ------------- */
191 TRIE, /* TRIE_next */
192 TRIE, /* TRIE_next_fail */
193 EVAL, /* EVAL_AB */
194 EVAL, /* EVAL_AB_fail */
c476f425
DM
195 CURLYX, /* CURLYX_end */
196 CURLYX, /* CURLYX_end_fail */
197 WHILEM, /* WHILEM_A_pre */
198 WHILEM, /* WHILEM_A_pre_fail */
199 WHILEM, /* WHILEM_A_min */
200 WHILEM, /* WHILEM_A_min_fail */
201 WHILEM, /* WHILEM_A_max */
202 WHILEM, /* WHILEM_A_max_fail */
203 WHILEM, /* WHILEM_B_min */
204 WHILEM, /* WHILEM_B_min_fail */
205 WHILEM, /* WHILEM_B_max */
206 WHILEM, /* WHILEM_B_max_fail */
03363afd
YO
207 BRANCH, /* BRANCH_next */
208 BRANCH, /* BRANCH_next_fail */
209 CURLYM, /* CURLYM_A */
210 CURLYM, /* CURLYM_A_fail */
211 CURLYM, /* CURLYM_B */
212 CURLYM, /* CURLYM_B_fail */
213 IFMATCH, /* IFMATCH_A */
214 IFMATCH, /* IFMATCH_A_fail */
215 CURLY, /* CURLY_B_min_known */
216 CURLY, /* CURLY_B_min_known_fail */
217 CURLY, /* CURLY_B_min */
218 CURLY, /* CURLY_B_min_fail */
219 CURLY, /* CURLY_B_max */
220 CURLY, /* CURLY_B_max_fail */
d09b2d29
IZ
221};
222#endif
223
6bda09f9 224/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29
IZ
225
226#ifdef REG_COMP_C
29de9391 227static const U8 regarglen[] = {
03363afd
YO
228 0, /* END */
229 0, /* SUCCEED */
230 0, /* BOL */
231 0, /* MBOL */
232 0, /* SBOL */
233 0, /* EOS */
234 0, /* EOL */
235 0, /* MEOL */
236 0, /* SEOL */
237 0, /* BOUND */
238 0, /* BOUNDL */
239 0, /* NBOUND */
240 0, /* NBOUNDL */
241 0, /* GPOS */
242 0, /* REG_ANY */
243 0, /* SANY */
244 0, /* CANY */
245 0, /* ANYOF */
246 0, /* ALNUM */
247 0, /* ALNUML */
248 0, /* NALNUM */
249 0, /* NALNUML */
250 0, /* SPACE */
251 0, /* SPACEL */
252 0, /* NSPACE */
253 0, /* NSPACEL */
254 0, /* DIGIT */
255 0, /* DIGITL */
256 0, /* NDIGIT */
257 0, /* NDIGITL */
258 0, /* CLUMP */
259 0, /* BRANCH */
260 0, /* BACK */
261 0, /* EXACT */
262 0, /* EXACTF */
263 0, /* EXACTFL */
264 0, /* NOTHING */
265 0, /* TAIL */
266 0, /* STAR */
267 0, /* PLUS */
268 EXTRA_SIZE(struct regnode_2), /* CURLY */
269 EXTRA_SIZE(struct regnode_2), /* CURLYN */
270 EXTRA_SIZE(struct regnode_2), /* CURLYM */
271 EXTRA_SIZE(struct regnode_2), /* CURLYX */
272 0, /* WHILEM */
273 EXTRA_SIZE(struct regnode_1), /* OPEN */
274 EXTRA_SIZE(struct regnode_1), /* CLOSE */
275 EXTRA_SIZE(struct regnode_1), /* REF */
276 EXTRA_SIZE(struct regnode_1), /* REFF */
277 EXTRA_SIZE(struct regnode_1), /* REFFL */
278 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
279 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
280 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
281 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
282 EXTRA_SIZE(struct regnode_1), /* GROUPP */
283 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
284 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
285 EXTRA_SIZE(struct regnode_1), /* EVAL */
286 0, /* MINMOD */
287 0, /* LOGICAL */
288 EXTRA_SIZE(struct regnode_1), /* RENUM */
289 EXTRA_SIZE(struct regnode_1), /* TRIE */
290 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
291 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
292 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
6bda09f9
YO
293 EXTRA_SIZE(struct regnode_2L), /* RECURSE */
294 0, /* SRECURSE */
03363afd
YO
295 0, /* OPTIMIZED */
296 0, /* PSEUDO */
d09b2d29
IZ
297};
298
6bda09f9
YO
299/* reg_off_by_arg[] - Which argument holds the offset to the next node */
300
29de9391 301static const char reg_off_by_arg[] = {
03363afd
YO
302 0, /* END */
303 0, /* SUCCEED */
304 0, /* BOL */
305 0, /* MBOL */
306 0, /* SBOL */
307 0, /* EOS */
308 0, /* EOL */
309 0, /* MEOL */
310 0, /* SEOL */
311 0, /* BOUND */
312 0, /* BOUNDL */
313 0, /* NBOUND */
314 0, /* NBOUNDL */
315 0, /* GPOS */
316 0, /* REG_ANY */
317 0, /* SANY */
318 0, /* CANY */
319 0, /* ANYOF */
320 0, /* ALNUM */
321 0, /* ALNUML */
322 0, /* NALNUM */
323 0, /* NALNUML */
324 0, /* SPACE */
325 0, /* SPACEL */
326 0, /* NSPACE */
327 0, /* NSPACEL */
328 0, /* DIGIT */
329 0, /* DIGITL */
330 0, /* NDIGIT */
331 0, /* NDIGITL */
332 0, /* CLUMP */
333 0, /* BRANCH */
334 0, /* BACK */
335 0, /* EXACT */
336 0, /* EXACTF */
337 0, /* EXACTFL */
338 0, /* NOTHING */
339 0, /* TAIL */
340 0, /* STAR */
341 0, /* PLUS */
342 0, /* CURLY */
343 0, /* CURLYN */
344 0, /* CURLYM */
345 0, /* CURLYX */
346 0, /* WHILEM */
347 0, /* OPEN */
348 0, /* CLOSE */
349 0, /* REF */
350 0, /* REFF */
351 0, /* REFFL */
352 2, /* IFMATCH */
353 2, /* UNLESSM */
354 1, /* SUSPEND */
355 1, /* IFTHEN */
356 0, /* GROUPP */
357 1, /* LONGJMP */
358 1, /* BRANCHJ */
359 0, /* EVAL */
360 0, /* MINMOD */
361 0, /* LOGICAL */
362 1, /* RENUM */
363 0, /* TRIE */
364 0, /* TRIEC */
365 0, /* AHOCORASICK */
366 0, /* AHOCORASICKC */
6bda09f9
YO
367 0, /* RECURSE */
368 0, /* SRECURSE */
03363afd
YO
369 0, /* OPTIMIZED */
370 0, /* PSEUDO */
d09b2d29 371};
885f9e59 372
6bda09f9
YO
373/* reg_name[] - Opcode/state names in string form, for debugging */
374
885f9e59 375#ifdef DEBUGGING
6d9c9890 376const char * reg_name[] = {
03363afd
YO
377 "END", /* 0000 */
378 "SUCCEED", /* 0x01 */
379 "BOL", /* 0x02 */
380 "MBOL", /* 0x03 */
381 "SBOL", /* 0x04 */
382 "EOS", /* 0x05 */
383 "EOL", /* 0x06 */
384 "MEOL", /* 0x07 */
385 "SEOL", /* 0x08 */
386 "BOUND", /* 0x09 */
387 "BOUNDL", /* 0x0a */
388 "NBOUND", /* 0x0b */
389 "NBOUNDL", /* 0x0c */
390 "GPOS", /* 0x0d */
391 "REG_ANY", /* 0x0e */
392 "SANY", /* 0x0f */
393 "CANY", /* 0x10 */
394 "ANYOF", /* 0x11 */
395 "ALNUM", /* 0x12 */
396 "ALNUML", /* 0x13 */
397 "NALNUM", /* 0x14 */
398 "NALNUML", /* 0x15 */
399 "SPACE", /* 0x16 */
400 "SPACEL", /* 0x17 */
401 "NSPACE", /* 0x18 */
402 "NSPACEL", /* 0x19 */
403 "DIGIT", /* 0x1a */
404 "DIGITL", /* 0x1b */
405 "NDIGIT", /* 0x1c */
406 "NDIGITL", /* 0x1d */
407 "CLUMP", /* 0x1e */
408 "BRANCH", /* 0x1f */
409 "BACK", /* 0x20 */
410 "EXACT", /* 0x21 */
411 "EXACTF", /* 0x22 */
412 "EXACTFL", /* 0x23 */
413 "NOTHING", /* 0x24 */
414 "TAIL", /* 0x25 */
415 "STAR", /* 0x26 */
416 "PLUS", /* 0x27 */
417 "CURLY", /* 0x28 */
418 "CURLYN", /* 0x29 */
419 "CURLYM", /* 0x2a */
420 "CURLYX", /* 0x2b */
421 "WHILEM", /* 0x2c */
422 "OPEN", /* 0x2d */
423 "CLOSE", /* 0x2e */
424 "REF", /* 0x2f */
425 "REFF", /* 0x30 */
426 "REFFL", /* 0x31 */
427 "IFMATCH", /* 0x32 */
428 "UNLESSM", /* 0x33 */
429 "SUSPEND", /* 0x34 */
430 "IFTHEN", /* 0x35 */
431 "GROUPP", /* 0x36 */
432 "LONGJMP", /* 0x37 */
433 "BRANCHJ", /* 0x38 */
434 "EVAL", /* 0x39 */
435 "MINMOD", /* 0x3a */
436 "LOGICAL", /* 0x3b */
437 "RENUM", /* 0x3c */
438 "TRIE", /* 0x3d */
439 "TRIEC", /* 0x3e */
440 "AHOCORASICK", /* 0x3f */
441 "AHOCORASICKC", /* 0x40 */
6bda09f9
YO
442 "RECURSE", /* 0x41 */
443 "SRECURSE", /* 0x42 */
444 "OPTIMIZED", /* 0x43 */
445 "PSEUDO", /* 0x44 */
03363afd 446 /* ------------ States ------------- */
6bda09f9
YO
447 "TRIE_next", /* 0x45 */
448 "TRIE_next_fail", /* 0x46 */
449 "EVAL_AB", /* 0x47 */
450 "EVAL_AB_fail", /* 0x48 */
c476f425
DM
451 "CURLYX_end", /* 0x49 */
452 "CURLYX_end_fail", /* 0x4a */
453 "WHILEM_A_pre", /* 0x4b */
454 "WHILEM_A_pre_fail", /* 0x4c */
455 "WHILEM_A_min", /* 0x4d */
456 "WHILEM_A_min_fail", /* 0x4e */
457 "WHILEM_A_max", /* 0x4f */
458 "WHILEM_A_max_fail", /* 0x50 */
459 "WHILEM_B_min", /* 0x51 */
460 "WHILEM_B_min_fail", /* 0x52 */
461 "WHILEM_B_max", /* 0x53 */
462 "WHILEM_B_max_fail", /* 0x54 */
463 "BRANCH_next", /* 0x55 */
464 "BRANCH_next_fail", /* 0x56 */
465 "CURLYM_A", /* 0x57 */
466 "CURLYM_A_fail", /* 0x58 */
467 "CURLYM_B", /* 0x59 */
468 "CURLYM_B_fail", /* 0x5a */
469 "IFMATCH_A", /* 0x5b */
470 "IFMATCH_A_fail", /* 0x5c */
471 "CURLY_B_min_known", /* 0x5d */
472 "CURLY_B_min_known_fail", /* 0x5e */
473 "CURLY_B_min", /* 0x5f */
474 "CURLY_B_min_fail", /* 0x60 */
475 "CURLY_B_max", /* 0x61 */
476 "CURLY_B_max_fail", /* 0x62 */
885f9e59 477};
885f9e59 478#endif /* DEBUGGING */
03363afd
YO
479#else
480#ifdef DEBUGGING
6d9c9890 481extern const char * reg_name[];
03363afd 482#endif
d09b2d29
IZ
483#endif /* REG_COMP_C */
484
37442d52 485/* ex: set ro: */