This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [perl #8835] fairly large regex optimization bug with 5.7.3
[perl5.git] / regnodes.h
CommitLineData
37442d52
RGS
1/* -*- buffer-read-only: t -*-
2 !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
885f9e59 3 This file is built by regcomp.pl from regcomp.sym.
d09b2d29
IZ
4 Any changes made here will be lost!
5*/
6
6bda09f9
YO
7/* Regops and State definitions */
8
9#define REGNODE_MAX 68
10#define REGMATCH_STATE_MAX 93
03363afd 11
f9f4320a
YO
12#define END 0 /* 0000 End of program. */
13#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
14#define BOL 2 /* 0x02 Match "" at beginning of line. */
15#define MBOL 3 /* 0x03 Same, assuming multiline. */
16#define SBOL 4 /* 0x04 Same, assuming singleline. */
17#define EOS 5 /* 0x05 Match "" at end of string. */
18#define EOL 6 /* 0x06 Match "" at end of line. */
19#define MEOL 7 /* 0x07 Same, assuming multiline. */
20#define SEOL 8 /* 0x08 Same, assuming singleline. */
21#define BOUND 9 /* 0x09 Match "" at any word boundary */
22#define BOUNDL 10 /* 0x0a Match "" at any word boundary */
23#define NBOUND 11 /* 0x0b Match "" at any word non-boundary */
24#define NBOUNDL 12 /* 0x0c Match "" at any word non-boundary */
25#define GPOS 13 /* 0x0d Matches where last m//g left off. */
26#define REG_ANY 14 /* 0x0e Match any one character (except newline). */
27#define SANY 15 /* 0x0f Match any one character. */
28#define CANY 16 /* 0x10 Match any one byte. */
29#define ANYOF 17 /* 0x11 Match character in (or not in) this class. */
30#define ALNUM 18 /* 0x12 Match any alphanumeric character */
31#define ALNUML 19 /* 0x13 Match any alphanumeric char in locale */
32#define NALNUM 20 /* 0x14 Match any non-alphanumeric character */
33#define NALNUML 21 /* 0x15 Match any non-alphanumeric char in locale */
34#define SPACE 22 /* 0x16 Match any whitespace character */
35#define SPACEL 23 /* 0x17 Match any whitespace char in locale */
36#define NSPACE 24 /* 0x18 Match any non-whitespace character */
37#define NSPACEL 25 /* 0x19 Match any non-whitespace char in locale */
38#define DIGIT 26 /* 0x1a Match any numeric character */
39#define DIGITL 27 /* 0x1b Match any numeric character in locale */
40#define NDIGIT 28 /* 0x1c Match any non-numeric character */
41#define NDIGITL 29 /* 0x1d Match any non-numeric character in locale */
42#define CLUMP 30 /* 0x1e Match any combining character sequence */
43#define BRANCH 31 /* 0x1f Match this alternative, or the next... */
44#define BACK 32 /* 0x20 Match "", "next" ptr points backward. */
45#define EXACT 33 /* 0x21 Match this string (preceded by length). */
46#define EXACTF 34 /* 0x22 Match this string, folded (prec. by length). */
47#define EXACTFL 35 /* 0x23 Match this string, folded in locale (w/len). */
48#define NOTHING 36 /* 0x24 Match empty string. */
49#define TAIL 37 /* 0x25 Match empty string. Can jump here from outside. */
50#define STAR 38 /* 0x26 Match this (simple) thing 0 or more times. */
51#define PLUS 39 /* 0x27 Match this (simple) thing 1 or more times. */
52#define CURLY 40 /* 0x28 Match this simple thing {n,m} times. */
53#define CURLYN 41 /* 0x29 Match next-after-this simple thing */
54#define CURLYM 42 /* 0x2a Match this medium-complex thing {n,m} times. */
55#define CURLYX 43 /* 0x2b Match this complex thing {n,m} times. */
56#define WHILEM 44 /* 0x2c Do curly processing and see if rest matches. */
57#define OPEN 45 /* 0x2d Mark this point in input as start of */
58#define CLOSE 46 /* 0x2e Analogous to OPEN. */
59#define REF 47 /* 0x2f Match some already matched string */
60#define REFF 48 /* 0x30 Match already matched string, folded */
61#define REFFL 49 /* 0x31 Match already matched string, folded in loc. */
62#define IFMATCH 50 /* 0x32 Succeeds if the following matches. */
63#define UNLESSM 51 /* 0x33 Fails if the following matches. */
64#define SUSPEND 52 /* 0x34 "Independent" sub-RE. */
65#define IFTHEN 53 /* 0x35 Switch, should be preceeded by switcher . */
66#define GROUPP 54 /* 0x36 Whether the group matched. */
67#define LONGJMP 55 /* 0x37 Jump far away. */
68#define BRANCHJ 56 /* 0x38 BRANCH with long offset. */
69#define EVAL 57 /* 0x39 Execute some Perl code. */
70#define MINMOD 58 /* 0x3a Next operator is not greedy. */
71#define LOGICAL 59 /* 0x3b Next opcode should set the flag only. */
72#define RENUM 60 /* 0x3c Group with independently numbered parens. */
73#define TRIE 61 /* 0x3d Match many EXACT(FL?)? at once. flags==type */
74#define TRIEC 62 /* 0x3e Same as TRIE, but with embedded charclass data */
75#define AHOCORASICK 63 /* 0x3f Aho Corasick stclass. flags==type */
76#define AHOCORASICKC 64 /* 0x40 Same as AHOCORASICK, but with embedded charclass data */
6bda09f9
YO
77#define RECURSE 65 /* 0x41 recurse to paren arg1 at (signed) ofs arg2 */
78#define SRECURSE 66 /* 0x42 recurse to start of pattern */
79#define OPTIMIZED 67 /* 0x43 Placeholder for dump. */
80#define PSEUDO 68 /* 0x44 Pseudo opcode for internal use. */
03363afd
YO
81
82 /* ------------ States ------------- */
83
6bda09f9
YO
84#define TRIE_next 69 /* 0x45 Regmatch state for TRIE */
85#define TRIE_next_fail 70 /* 0x46 Regmatch state for TRIE */
86#define EVAL_AB 71 /* 0x47 Regmatch state for EVAL */
87#define EVAL_AB_fail 72 /* 0x48 Regmatch state for EVAL */
88#define resume_CURLYX 73 /* 0x49 Regmatch state for CURLYX */
89#define resume_WHILEM1 74 /* 0x4a Regmatch state for WHILEM */
90#define resume_WHILEM2 75 /* 0x4b Regmatch state for WHILEM */
91#define resume_WHILEM3 76 /* 0x4c Regmatch state for WHILEM */
92#define resume_WHILEM4 77 /* 0x4d Regmatch state for WHILEM */
93#define resume_WHILEM5 78 /* 0x4e Regmatch state for WHILEM */
94#define resume_WHILEM6 79 /* 0x4f Regmatch state for WHILEM */
95#define BRANCH_next 80 /* 0x50 Regmatch state for BRANCH */
96#define BRANCH_next_fail 81 /* 0x51 Regmatch state for BRANCH */
97#define CURLYM_A 82 /* 0x52 Regmatch state for CURLYM */
98#define CURLYM_A_fail 83 /* 0x53 Regmatch state for CURLYM */
99#define CURLYM_B 84 /* 0x54 Regmatch state for CURLYM */
100#define CURLYM_B_fail 85 /* 0x55 Regmatch state for CURLYM */
101#define IFMATCH_A 86 /* 0x56 Regmatch state for IFMATCH */
102#define IFMATCH_A_fail 87 /* 0x57 Regmatch state for IFMATCH */
103#define CURLY_B_min_known 88 /* 0x58 Regmatch state for CURLY */
104#define CURLY_B_min_known_fail 89 /* 0x59 Regmatch state for CURLY */
105#define CURLY_B_min 90 /* 0x5a Regmatch state for CURLY */
106#define CURLY_B_min_fail 91 /* 0x5b Regmatch state for CURLY */
107#define CURLY_B_max 92 /* 0x5c Regmatch state for CURLY */
108#define CURLY_B_max_fail 93 /* 0x5d Regmatch state for CURLY */
03363afd 109
6bda09f9 110/* PL_regkind[] What type of regop or state is this. */
d09b2d29
IZ
111
112#ifndef DOINIT
22c35a8c 113EXTCONST U8 PL_regkind[];
d09b2d29 114#else
22c35a8c 115EXTCONST U8 PL_regkind[] = {
03363afd
YO
116 END, /* END */
117 END, /* SUCCEED */
118 BOL, /* BOL */
119 BOL, /* MBOL */
120 BOL, /* SBOL */
121 EOL, /* EOS */
122 EOL, /* EOL */
123 EOL, /* MEOL */
124 EOL, /* SEOL */
125 BOUND, /* BOUND */
126 BOUND, /* BOUNDL */
127 NBOUND, /* NBOUND */
128 NBOUND, /* NBOUNDL */
129 GPOS, /* GPOS */
130 REG_ANY, /* REG_ANY */
131 REG_ANY, /* SANY */
132 REG_ANY, /* CANY */
133 ANYOF, /* ANYOF */
134 ALNUM, /* ALNUM */
135 ALNUM, /* ALNUML */
136 NALNUM, /* NALNUM */
137 NALNUM, /* NALNUML */
138 SPACE, /* SPACE */
139 SPACE, /* SPACEL */
140 NSPACE, /* NSPACE */
141 NSPACE, /* NSPACEL */
142 DIGIT, /* DIGIT */
143 DIGIT, /* DIGITL */
144 NDIGIT, /* NDIGIT */
145 NDIGIT, /* NDIGITL */
146 CLUMP, /* CLUMP */
147 BRANCH, /* BRANCH */
148 BACK, /* BACK */
149 EXACT, /* EXACT */
150 EXACT, /* EXACTF */
151 EXACT, /* EXACTFL */
152 NOTHING, /* NOTHING */
153 NOTHING, /* TAIL */
154 STAR, /* STAR */
155 PLUS, /* PLUS */
156 CURLY, /* CURLY */
157 CURLY, /* CURLYN */
158 CURLY, /* CURLYM */
159 CURLY, /* CURLYX */
160 WHILEM, /* WHILEM */
161 OPEN, /* OPEN */
162 CLOSE, /* CLOSE */
163 REF, /* REF */
164 REF, /* REFF */
165 REF, /* REFFL */
166 BRANCHJ, /* IFMATCH */
167 BRANCHJ, /* UNLESSM */
168 BRANCHJ, /* SUSPEND */
169 BRANCHJ, /* IFTHEN */
170 GROUPP, /* GROUPP */
171 LONGJMP, /* LONGJMP */
172 BRANCHJ, /* BRANCHJ */
173 EVAL, /* EVAL */
174 MINMOD, /* MINMOD */
175 LOGICAL, /* LOGICAL */
176 BRANCHJ, /* RENUM */
177 TRIE, /* TRIE */
178 TRIE, /* TRIEC */
179 TRIE, /* AHOCORASICK */
180 TRIE, /* AHOCORASICKC */
6bda09f9
YO
181 RECURSE, /* RECURSE */
182 RECURSE, /* SRECURSE */
03363afd
YO
183 NOTHING, /* OPTIMIZED */
184 PSEUDO, /* PSEUDO */
185 /* ------------ States ------------- */
186 TRIE, /* TRIE_next */
187 TRIE, /* TRIE_next_fail */
188 EVAL, /* EVAL_AB */
189 EVAL, /* EVAL_AB_fail */
190 CURLYX, /* resume_CURLYX */
191 WHILEM, /* resume_WHILEM1 */
192 WHILEM, /* resume_WHILEM2 */
193 WHILEM, /* resume_WHILEM3 */
194 WHILEM, /* resume_WHILEM4 */
195 WHILEM, /* resume_WHILEM5 */
196 WHILEM, /* resume_WHILEM6 */
197 BRANCH, /* BRANCH_next */
198 BRANCH, /* BRANCH_next_fail */
199 CURLYM, /* CURLYM_A */
200 CURLYM, /* CURLYM_A_fail */
201 CURLYM, /* CURLYM_B */
202 CURLYM, /* CURLYM_B_fail */
203 IFMATCH, /* IFMATCH_A */
204 IFMATCH, /* IFMATCH_A_fail */
205 CURLY, /* CURLY_B_min_known */
206 CURLY, /* CURLY_B_min_known_fail */
207 CURLY, /* CURLY_B_min */
208 CURLY, /* CURLY_B_min_fail */
209 CURLY, /* CURLY_B_max */
210 CURLY, /* CURLY_B_max_fail */
d09b2d29
IZ
211};
212#endif
213
6bda09f9 214/* regarglen[] - How large is the argument part of the node (in regnodes) */
d09b2d29
IZ
215
216#ifdef REG_COMP_C
29de9391 217static const U8 regarglen[] = {
03363afd
YO
218 0, /* END */
219 0, /* SUCCEED */
220 0, /* BOL */
221 0, /* MBOL */
222 0, /* SBOL */
223 0, /* EOS */
224 0, /* EOL */
225 0, /* MEOL */
226 0, /* SEOL */
227 0, /* BOUND */
228 0, /* BOUNDL */
229 0, /* NBOUND */
230 0, /* NBOUNDL */
231 0, /* GPOS */
232 0, /* REG_ANY */
233 0, /* SANY */
234 0, /* CANY */
235 0, /* ANYOF */
236 0, /* ALNUM */
237 0, /* ALNUML */
238 0, /* NALNUM */
239 0, /* NALNUML */
240 0, /* SPACE */
241 0, /* SPACEL */
242 0, /* NSPACE */
243 0, /* NSPACEL */
244 0, /* DIGIT */
245 0, /* DIGITL */
246 0, /* NDIGIT */
247 0, /* NDIGITL */
248 0, /* CLUMP */
249 0, /* BRANCH */
250 0, /* BACK */
251 0, /* EXACT */
252 0, /* EXACTF */
253 0, /* EXACTFL */
254 0, /* NOTHING */
255 0, /* TAIL */
256 0, /* STAR */
257 0, /* PLUS */
258 EXTRA_SIZE(struct regnode_2), /* CURLY */
259 EXTRA_SIZE(struct regnode_2), /* CURLYN */
260 EXTRA_SIZE(struct regnode_2), /* CURLYM */
261 EXTRA_SIZE(struct regnode_2), /* CURLYX */
262 0, /* WHILEM */
263 EXTRA_SIZE(struct regnode_1), /* OPEN */
264 EXTRA_SIZE(struct regnode_1), /* CLOSE */
265 EXTRA_SIZE(struct regnode_1), /* REF */
266 EXTRA_SIZE(struct regnode_1), /* REFF */
267 EXTRA_SIZE(struct regnode_1), /* REFFL */
268 EXTRA_SIZE(struct regnode_1), /* IFMATCH */
269 EXTRA_SIZE(struct regnode_1), /* UNLESSM */
270 EXTRA_SIZE(struct regnode_1), /* SUSPEND */
271 EXTRA_SIZE(struct regnode_1), /* IFTHEN */
272 EXTRA_SIZE(struct regnode_1), /* GROUPP */
273 EXTRA_SIZE(struct regnode_1), /* LONGJMP */
274 EXTRA_SIZE(struct regnode_1), /* BRANCHJ */
275 EXTRA_SIZE(struct regnode_1), /* EVAL */
276 0, /* MINMOD */
277 0, /* LOGICAL */
278 EXTRA_SIZE(struct regnode_1), /* RENUM */
279 EXTRA_SIZE(struct regnode_1), /* TRIE */
280 EXTRA_SIZE(struct regnode_charclass), /* TRIEC */
281 EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */
282 EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */
6bda09f9
YO
283 EXTRA_SIZE(struct regnode_2L), /* RECURSE */
284 0, /* SRECURSE */
03363afd
YO
285 0, /* OPTIMIZED */
286 0, /* PSEUDO */
d09b2d29
IZ
287};
288
6bda09f9
YO
289/* reg_off_by_arg[] - Which argument holds the offset to the next node */
290
29de9391 291static const char reg_off_by_arg[] = {
03363afd
YO
292 0, /* END */
293 0, /* SUCCEED */
294 0, /* BOL */
295 0, /* MBOL */
296 0, /* SBOL */
297 0, /* EOS */
298 0, /* EOL */
299 0, /* MEOL */
300 0, /* SEOL */
301 0, /* BOUND */
302 0, /* BOUNDL */
303 0, /* NBOUND */
304 0, /* NBOUNDL */
305 0, /* GPOS */
306 0, /* REG_ANY */
307 0, /* SANY */
308 0, /* CANY */
309 0, /* ANYOF */
310 0, /* ALNUM */
311 0, /* ALNUML */
312 0, /* NALNUM */
313 0, /* NALNUML */
314 0, /* SPACE */
315 0, /* SPACEL */
316 0, /* NSPACE */
317 0, /* NSPACEL */
318 0, /* DIGIT */
319 0, /* DIGITL */
320 0, /* NDIGIT */
321 0, /* NDIGITL */
322 0, /* CLUMP */
323 0, /* BRANCH */
324 0, /* BACK */
325 0, /* EXACT */
326 0, /* EXACTF */
327 0, /* EXACTFL */
328 0, /* NOTHING */
329 0, /* TAIL */
330 0, /* STAR */
331 0, /* PLUS */
332 0, /* CURLY */
333 0, /* CURLYN */
334 0, /* CURLYM */
335 0, /* CURLYX */
336 0, /* WHILEM */
337 0, /* OPEN */
338 0, /* CLOSE */
339 0, /* REF */
340 0, /* REFF */
341 0, /* REFFL */
342 2, /* IFMATCH */
343 2, /* UNLESSM */
344 1, /* SUSPEND */
345 1, /* IFTHEN */
346 0, /* GROUPP */
347 1, /* LONGJMP */
348 1, /* BRANCHJ */
349 0, /* EVAL */
350 0, /* MINMOD */
351 0, /* LOGICAL */
352 1, /* RENUM */
353 0, /* TRIE */
354 0, /* TRIEC */
355 0, /* AHOCORASICK */
356 0, /* AHOCORASICKC */
6bda09f9
YO
357 0, /* RECURSE */
358 0, /* SRECURSE */
03363afd
YO
359 0, /* OPTIMIZED */
360 0, /* PSEUDO */
d09b2d29 361};
885f9e59 362
6bda09f9
YO
363/* reg_name[] - Opcode/state names in string form, for debugging */
364
885f9e59 365#ifdef DEBUGGING
6d9c9890 366const char * reg_name[] = {
03363afd
YO
367 "END", /* 0000 */
368 "SUCCEED", /* 0x01 */
369 "BOL", /* 0x02 */
370 "MBOL", /* 0x03 */
371 "SBOL", /* 0x04 */
372 "EOS", /* 0x05 */
373 "EOL", /* 0x06 */
374 "MEOL", /* 0x07 */
375 "SEOL", /* 0x08 */
376 "BOUND", /* 0x09 */
377 "BOUNDL", /* 0x0a */
378 "NBOUND", /* 0x0b */
379 "NBOUNDL", /* 0x0c */
380 "GPOS", /* 0x0d */
381 "REG_ANY", /* 0x0e */
382 "SANY", /* 0x0f */
383 "CANY", /* 0x10 */
384 "ANYOF", /* 0x11 */
385 "ALNUM", /* 0x12 */
386 "ALNUML", /* 0x13 */
387 "NALNUM", /* 0x14 */
388 "NALNUML", /* 0x15 */
389 "SPACE", /* 0x16 */
390 "SPACEL", /* 0x17 */
391 "NSPACE", /* 0x18 */
392 "NSPACEL", /* 0x19 */
393 "DIGIT", /* 0x1a */
394 "DIGITL", /* 0x1b */
395 "NDIGIT", /* 0x1c */
396 "NDIGITL", /* 0x1d */
397 "CLUMP", /* 0x1e */
398 "BRANCH", /* 0x1f */
399 "BACK", /* 0x20 */
400 "EXACT", /* 0x21 */
401 "EXACTF", /* 0x22 */
402 "EXACTFL", /* 0x23 */
403 "NOTHING", /* 0x24 */
404 "TAIL", /* 0x25 */
405 "STAR", /* 0x26 */
406 "PLUS", /* 0x27 */
407 "CURLY", /* 0x28 */
408 "CURLYN", /* 0x29 */
409 "CURLYM", /* 0x2a */
410 "CURLYX", /* 0x2b */
411 "WHILEM", /* 0x2c */
412 "OPEN", /* 0x2d */
413 "CLOSE", /* 0x2e */
414 "REF", /* 0x2f */
415 "REFF", /* 0x30 */
416 "REFFL", /* 0x31 */
417 "IFMATCH", /* 0x32 */
418 "UNLESSM", /* 0x33 */
419 "SUSPEND", /* 0x34 */
420 "IFTHEN", /* 0x35 */
421 "GROUPP", /* 0x36 */
422 "LONGJMP", /* 0x37 */
423 "BRANCHJ", /* 0x38 */
424 "EVAL", /* 0x39 */
425 "MINMOD", /* 0x3a */
426 "LOGICAL", /* 0x3b */
427 "RENUM", /* 0x3c */
428 "TRIE", /* 0x3d */
429 "TRIEC", /* 0x3e */
430 "AHOCORASICK", /* 0x3f */
431 "AHOCORASICKC", /* 0x40 */
6bda09f9
YO
432 "RECURSE", /* 0x41 */
433 "SRECURSE", /* 0x42 */
434 "OPTIMIZED", /* 0x43 */
435 "PSEUDO", /* 0x44 */
03363afd 436 /* ------------ States ------------- */
6bda09f9
YO
437 "TRIE_next", /* 0x45 */
438 "TRIE_next_fail", /* 0x46 */
439 "EVAL_AB", /* 0x47 */
440 "EVAL_AB_fail", /* 0x48 */
441 "resume_CURLYX", /* 0x49 */
442 "resume_WHILEM1", /* 0x4a */
443 "resume_WHILEM2", /* 0x4b */
444 "resume_WHILEM3", /* 0x4c */
445 "resume_WHILEM4", /* 0x4d */
446 "resume_WHILEM5", /* 0x4e */
447 "resume_WHILEM6", /* 0x4f */
448 "BRANCH_next", /* 0x50 */
449 "BRANCH_next_fail", /* 0x51 */
450 "CURLYM_A", /* 0x52 */
451 "CURLYM_A_fail", /* 0x53 */
452 "CURLYM_B", /* 0x54 */
453 "CURLYM_B_fail", /* 0x55 */
454 "IFMATCH_A", /* 0x56 */
455 "IFMATCH_A_fail", /* 0x57 */
456 "CURLY_B_min_known", /* 0x58 */
457 "CURLY_B_min_known_fail", /* 0x59 */
458 "CURLY_B_min", /* 0x5a */
459 "CURLY_B_min_fail", /* 0x5b */
460 "CURLY_B_max", /* 0x5c */
461 "CURLY_B_max_fail", /* 0x5d */
885f9e59 462};
885f9e59 463#endif /* DEBUGGING */
03363afd
YO
464#else
465#ifdef DEBUGGING
6d9c9890 466extern const char * reg_name[];
03363afd 467#endif
d09b2d29
IZ
468#endif /* REG_COMP_C */
469
37442d52 470/* ex: set ro: */