X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/24b23f37fefbcc71a881f6805d87449a234dc645..4e205ed637fa853d115099170e6e11407ca49619:/regnodes.h diff --git a/regnodes.h b/regnodes.h index 010b943..1697a12 100644 --- a/regnodes.h +++ b/regnodes.h @@ -6,8 +6,8 @@ /* Regops and State definitions */ -#define REGNODE_MAX 78 -#define REGMATCH_STATE_MAX 110 +#define REGNODE_MAX 90 +#define REGMATCH_STATE_MAX 130 #define END 0 /* 0000 End of program. */ #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ @@ -82,12 +82,24 @@ #define NGROUPP 70 /* 0x46 Whether the group matched. */ #define INSUBP 71 /* 0x47 Whether we are in a specific recurse. */ #define DEFINEP 72 /* 0x48 Never execute directly. */ -#define OPFAIL 73 /* 0x49 Same as (?!) */ -#define COMMIT 74 /* 0x4a Pattern fails if backtracking through this */ -#define CUT 75 /* 0x4b ... and restarts at the cursor point */ -#define OPERROR 76 /* 0x4c Pattern fails outright if backtracking through this */ -#define OPTIMIZED 77 /* 0x4d Placeholder for dump. */ -#define PSEUDO 78 /* 0x4e Pseudo opcode for internal use. */ +#define ENDLIKE 73 /* 0x49 Used only for the type field of verbs */ +#define OPFAIL 74 /* 0x4a Same as (?!) */ +#define ACCEPT 75 /* 0x4b Accepts the current matched string. */ +#define VERB 76 /* 0x4c no-sv 1 Used only for the type field of verbs */ +#define PRUNE 77 /* 0x4d Pattern fails at this startpoint if no-backtracking through this */ +#define MARKPOINT 78 /* 0x4e Push the current location for rollback by cut. */ +#define SKIP 79 /* 0x4f On failure skip forward (to the mark) before retrying */ +#define COMMIT 80 /* 0x50 Pattern fails outright if backtracking through this */ +#define CUTGROUP 81 /* 0x51 On failure go to the next alternation in the group */ +#define KEEPS 82 /* 0x52 $& begins here. */ +#define LNBREAK 83 /* 0x53 generic newline pattern */ +#define VERTWS 84 /* 0x54 vertical whitespace (Perl 6) */ +#define NVERTWS 85 /* 0x55 not vertical whitespace (Perl 6) */ +#define HORIZWS 86 /* 0x56 horizontal whitespace (Perl 6) */ +#define NHORIZWS 87 /* 0x57 not horizontal whitespace (Perl 6) */ +#define FOLDCHAR 88 /* 0x58 codepoint with tricky case folding properties. */ +#define OPTIMIZED 89 /* 0x59 Placeholder for dump. */ +#define PSEUDO 90 /* 0x5a Pseudo opcode for internal use. */ /* ------------ States ------------- */ #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */ #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */ @@ -121,6 +133,14 @@ #define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */ #define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */ #define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */ +#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */ +#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */ +#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */ +#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */ +#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */ +#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */ +#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */ +#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */ /* PL_regkind[] What type of regop or state is this. */ @@ -128,118 +148,138 @@ EXTCONST U8 PL_regkind[]; #else EXTCONST U8 PL_regkind[] = { - END, /* END */ - END, /* SUCCEED */ - BOL, /* BOL */ - BOL, /* MBOL */ - BOL, /* SBOL */ - EOL, /* EOS */ - EOL, /* EOL */ - EOL, /* MEOL */ - EOL, /* SEOL */ - BOUND, /* BOUND */ - BOUND, /* BOUNDL */ - NBOUND, /* NBOUND */ - NBOUND, /* NBOUNDL */ - GPOS, /* GPOS */ - REG_ANY, /* REG_ANY */ - REG_ANY, /* SANY */ - REG_ANY, /* CANY */ - ANYOF, /* ANYOF */ - ALNUM, /* ALNUM */ - ALNUM, /* ALNUML */ - NALNUM, /* NALNUM */ - NALNUM, /* NALNUML */ - SPACE, /* SPACE */ - SPACE, /* SPACEL */ - NSPACE, /* NSPACE */ - NSPACE, /* NSPACEL */ - DIGIT, /* DIGIT */ - DIGIT, /* DIGITL */ - NDIGIT, /* NDIGIT */ - NDIGIT, /* NDIGITL */ - CLUMP, /* CLUMP */ - BRANCH, /* BRANCH */ - BACK, /* BACK */ - EXACT, /* EXACT */ - EXACT, /* EXACTF */ - EXACT, /* EXACTFL */ - NOTHING, /* NOTHING */ - NOTHING, /* TAIL */ - STAR, /* STAR */ - PLUS, /* PLUS */ - CURLY, /* CURLY */ - CURLY, /* CURLYN */ - CURLY, /* CURLYM */ - CURLY, /* CURLYX */ - WHILEM, /* WHILEM */ - OPEN, /* OPEN */ - CLOSE, /* CLOSE */ - REF, /* REF */ - REF, /* REFF */ - REF, /* REFFL */ - BRANCHJ, /* IFMATCH */ - BRANCHJ, /* UNLESSM */ - BRANCHJ, /* SUSPEND */ - BRANCHJ, /* IFTHEN */ - GROUPP, /* GROUPP */ - LONGJMP, /* LONGJMP */ - BRANCHJ, /* BRANCHJ */ - EVAL, /* EVAL */ - MINMOD, /* MINMOD */ - LOGICAL, /* LOGICAL */ - BRANCHJ, /* RENUM */ - TRIE, /* TRIE */ - TRIE, /* TRIEC */ - TRIE, /* AHOCORASICK */ - TRIE, /* AHOCORASICKC */ - GOSUB, /* GOSUB */ - GOSTART, /* GOSTART */ - NREF, /* NREF */ - NREF, /* NREFF */ - NREF, /* NREFFL */ - NGROUPP, /* NGROUPP */ - INSUBP, /* INSUBP */ - DEFINEP, /* DEFINEP */ - OPFAIL, /* OPFAIL */ - COMMIT, /* COMMIT */ - COMMIT, /* CUT */ - OPERROR, /* OPERROR */ - NOTHING, /* OPTIMIZED */ - PSEUDO, /* PSEUDO */ + END, /* END */ + END, /* SUCCEED */ + BOL, /* BOL */ + BOL, /* MBOL */ + BOL, /* SBOL */ + EOL, /* EOS */ + EOL, /* EOL */ + EOL, /* MEOL */ + EOL, /* SEOL */ + BOUND, /* BOUND */ + BOUND, /* BOUNDL */ + NBOUND, /* NBOUND */ + NBOUND, /* NBOUNDL */ + GPOS, /* GPOS */ + REG_ANY, /* REG_ANY */ + REG_ANY, /* SANY */ + REG_ANY, /* CANY */ + ANYOF, /* ANYOF */ + ALNUM, /* ALNUM */ + ALNUM, /* ALNUML */ + NALNUM, /* NALNUM */ + NALNUM, /* NALNUML */ + SPACE, /* SPACE */ + SPACE, /* SPACEL */ + NSPACE, /* NSPACE */ + NSPACE, /* NSPACEL */ + DIGIT, /* DIGIT */ + DIGIT, /* DIGITL */ + NDIGIT, /* NDIGIT */ + NDIGIT, /* NDIGITL */ + CLUMP, /* CLUMP */ + BRANCH, /* BRANCH */ + BACK, /* BACK */ + EXACT, /* EXACT */ + EXACT, /* EXACTF */ + EXACT, /* EXACTFL */ + NOTHING, /* NOTHING */ + NOTHING, /* TAIL */ + STAR, /* STAR */ + PLUS, /* PLUS */ + CURLY, /* CURLY */ + CURLY, /* CURLYN */ + CURLY, /* CURLYM */ + CURLY, /* CURLYX */ + WHILEM, /* WHILEM */ + OPEN, /* OPEN */ + CLOSE, /* CLOSE */ + REF, /* REF */ + REF, /* REFF */ + REF, /* REFFL */ + BRANCHJ, /* IFMATCH */ + BRANCHJ, /* UNLESSM */ + BRANCHJ, /* SUSPEND */ + BRANCHJ, /* IFTHEN */ + GROUPP, /* GROUPP */ + LONGJMP, /* LONGJMP */ + BRANCHJ, /* BRANCHJ */ + EVAL, /* EVAL */ + MINMOD, /* MINMOD */ + LOGICAL, /* LOGICAL */ + BRANCHJ, /* RENUM */ + TRIE, /* TRIE */ + TRIE, /* TRIEC */ + TRIE, /* AHOCORASICK */ + TRIE, /* AHOCORASICKC */ + GOSUB, /* GOSUB */ + GOSTART, /* GOSTART */ + REF, /* NREF */ + REF, /* NREFF */ + REF, /* NREFFL */ + NGROUPP, /* NGROUPP */ + INSUBP, /* INSUBP */ + DEFINEP, /* DEFINEP */ + ENDLIKE, /* ENDLIKE */ + ENDLIKE, /* OPFAIL */ + ENDLIKE, /* ACCEPT */ + VERB, /* VERB */ + VERB, /* PRUNE */ + VERB, /* MARKPOINT */ + VERB, /* SKIP */ + VERB, /* COMMIT */ + VERB, /* CUTGROUP */ + KEEPS, /* KEEPS */ + LNBREAK, /* LNBREAK */ + VERTWS, /* VERTWS */ + NVERTWS, /* NVERTWS */ + HORIZWS, /* HORIZWS */ + NHORIZWS, /* NHORIZWS */ + FOLDCHAR, /* FOLDCHAR */ + NOTHING, /* OPTIMIZED */ + PSEUDO, /* PSEUDO */ /* ------------ States ------------- */ - TRIE, /* TRIE_next */ - TRIE, /* TRIE_next_fail */ - EVAL, /* EVAL_AB */ - EVAL, /* EVAL_AB_fail */ - CURLYX, /* CURLYX_end */ - CURLYX, /* CURLYX_end_fail */ - WHILEM, /* WHILEM_A_pre */ - WHILEM, /* WHILEM_A_pre_fail */ - WHILEM, /* WHILEM_A_min */ - WHILEM, /* WHILEM_A_min_fail */ - WHILEM, /* WHILEM_A_max */ - WHILEM, /* WHILEM_A_max_fail */ - WHILEM, /* WHILEM_B_min */ - WHILEM, /* WHILEM_B_min_fail */ - WHILEM, /* WHILEM_B_max */ - WHILEM, /* WHILEM_B_max_fail */ - BRANCH, /* BRANCH_next */ - BRANCH, /* BRANCH_next_fail */ - CURLYM, /* CURLYM_A */ - CURLYM, /* CURLYM_A_fail */ - CURLYM, /* CURLYM_B */ - CURLYM, /* CURLYM_B_fail */ - IFMATCH, /* IFMATCH_A */ - IFMATCH, /* IFMATCH_A_fail */ - CURLY, /* CURLY_B_min_known */ - CURLY, /* CURLY_B_min_known_fail */ - CURLY, /* CURLY_B_min */ - CURLY, /* CURLY_B_min_fail */ - CURLY, /* CURLY_B_max */ - CURLY, /* CURLY_B_max_fail */ - COMMIT, /* COMMIT_next */ - COMMIT, /* COMMIT_next_fail */ + TRIE, /* TRIE_next */ + TRIE, /* TRIE_next_fail */ + EVAL, /* EVAL_AB */ + EVAL, /* EVAL_AB_fail */ + CURLYX, /* CURLYX_end */ + CURLYX, /* CURLYX_end_fail */ + WHILEM, /* WHILEM_A_pre */ + WHILEM, /* WHILEM_A_pre_fail */ + WHILEM, /* WHILEM_A_min */ + WHILEM, /* WHILEM_A_min_fail */ + WHILEM, /* WHILEM_A_max */ + WHILEM, /* WHILEM_A_max_fail */ + WHILEM, /* WHILEM_B_min */ + WHILEM, /* WHILEM_B_min_fail */ + WHILEM, /* WHILEM_B_max */ + WHILEM, /* WHILEM_B_max_fail */ + BRANCH, /* BRANCH_next */ + BRANCH, /* BRANCH_next_fail */ + CURLYM, /* CURLYM_A */ + CURLYM, /* CURLYM_A_fail */ + CURLYM, /* CURLYM_B */ + CURLYM, /* CURLYM_B_fail */ + IFMATCH, /* IFMATCH_A */ + IFMATCH, /* IFMATCH_A_fail */ + CURLY, /* CURLY_B_min_known */ + CURLY, /* CURLY_B_min_known_fail */ + CURLY, /* CURLY_B_min */ + CURLY, /* CURLY_B_min_fail */ + CURLY, /* CURLY_B_max */ + CURLY, /* CURLY_B_max_fail */ + COMMIT, /* COMMIT_next */ + COMMIT, /* COMMIT_next_fail */ + MARKPOINT, /* MARKPOINT_next */ + MARKPOINT, /* MARKPOINT_next_fail */ + SKIP, /* SKIP_next */ + SKIP, /* SKIP_next_fail */ + CUTGROUP, /* CUTGROUP_next */ + CUTGROUP, /* CUTGROUP_next_fail */ + KEEPS, /* KEEPS_next */ + KEEPS, /* KEEPS_next_fail */ }; #endif @@ -320,10 +360,22 @@ static const U8 regarglen[] = { EXTRA_SIZE(struct regnode_1), /* NGROUPP */ EXTRA_SIZE(struct regnode_1), /* INSUBP */ EXTRA_SIZE(struct regnode_1), /* DEFINEP */ + 0, /* ENDLIKE */ 0, /* OPFAIL */ - 0, /* COMMIT */ - 0, /* CUT */ - 0, /* OPERROR */ + EXTRA_SIZE(struct regnode_1), /* ACCEPT */ + 0, /* VERB */ + EXTRA_SIZE(struct regnode_1), /* PRUNE */ + EXTRA_SIZE(struct regnode_1), /* MARKPOINT */ + EXTRA_SIZE(struct regnode_1), /* SKIP */ + EXTRA_SIZE(struct regnode_1), /* COMMIT */ + EXTRA_SIZE(struct regnode_1), /* CUTGROUP */ + 0, /* KEEPS */ + 0, /* LNBREAK */ + 0, /* VERTWS */ + 0, /* NVERTWS */ + 0, /* HORIZWS */ + 0, /* NHORIZWS */ + EXTRA_SIZE(struct regnode_1), /* FOLDCHAR */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ }; @@ -404,18 +456,34 @@ static const char reg_off_by_arg[] = { 0, /* NGROUPP */ 0, /* INSUBP */ 0, /* DEFINEP */ + 0, /* ENDLIKE */ 0, /* OPFAIL */ + 0, /* ACCEPT */ + 0, /* VERB */ + 0, /* PRUNE */ + 0, /* MARKPOINT */ + 0, /* SKIP */ 0, /* COMMIT */ - 0, /* CUT */ - 0, /* OPERROR */ + 0, /* CUTGROUP */ + 0, /* KEEPS */ + 0, /* LNBREAK */ + 0, /* VERTWS */ + 0, /* NVERTWS */ + 0, /* HORIZWS */ + 0, /* NHORIZWS */ + 0, /* FOLDCHAR */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ }; +#endif /* REG_COMP_C */ + /* reg_name[] - Opcode/state names in string form, for debugging */ -#ifdef DEBUGGING -const char * reg_name[] = { +#ifndef DOINIT +EXTCONST char * PL_reg_name[]; +#else +EXTCONST char * const PL_reg_name[] = { "END", /* 0000 */ "SUCCEED", /* 0x01 */ "BOL", /* 0x02 */ @@ -489,12 +557,24 @@ const char * reg_name[] = { "NGROUPP", /* 0x46 */ "INSUBP", /* 0x47 */ "DEFINEP", /* 0x48 */ - "OPFAIL", /* 0x49 */ - "COMMIT", /* 0x4a */ - "CUT", /* 0x4b */ - "OPERROR", /* 0x4c */ - "OPTIMIZED", /* 0x4d */ - "PSEUDO", /* 0x4e */ + "ENDLIKE", /* 0x49 */ + "OPFAIL", /* 0x4a */ + "ACCEPT", /* 0x4b */ + "VERB", /* 0x4c */ + "PRUNE", /* 0x4d */ + "MARKPOINT", /* 0x4e */ + "SKIP", /* 0x4f */ + "COMMIT", /* 0x50 */ + "CUTGROUP", /* 0x51 */ + "KEEPS", /* 0x52 */ + "LNBREAK", /* 0x53 */ + "VERTWS", /* 0x54 */ + "NVERTWS", /* 0x55 */ + "HORIZWS", /* 0x56 */ + "NHORIZWS", /* 0x57 */ + "FOLDCHAR", /* 0x58 */ + "OPTIMIZED", /* 0x59 */ + "PSEUDO", /* 0x5a */ /* ------------ States ------------- */ "TRIE_next", /* REGNODE_MAX +0x01 */ "TRIE_next_fail", /* REGNODE_MAX +0x02 */ @@ -528,12 +608,57 @@ const char * reg_name[] = { "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */ "COMMIT_next", /* REGNODE_MAX +0x1f */ "COMMIT_next_fail", /* REGNODE_MAX +0x20 */ + "MARKPOINT_next", /* REGNODE_MAX +0x21 */ + "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */ + "SKIP_next", /* REGNODE_MAX +0x23 */ + "SKIP_next_fail", /* REGNODE_MAX +0x24 */ + "CUTGROUP_next", /* REGNODE_MAX +0x25 */ + "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */ + "KEEPS_next", /* REGNODE_MAX +0x27 */ + "KEEPS_next_fail", /* REGNODE_MAX +0x28 */ }; -#endif /* DEBUGGING */ +#endif /* DOINIT */ + +/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ + +#ifndef DOINIT +EXTCONST char * PL_reg_extflags_name[]; #else -#ifdef DEBUGGING -extern const char * reg_name[]; -#endif -#endif /* REG_COMP_C */ +EXTCONST char * const PL_reg_extflags_name[] = { + /* Bits in extflags defined: 11111111111111111111111100111111 */ + "ANCH_BOL", /* 0x00000001 */ + "ANCH_MBOL", /* 0x00000002 */ + "ANCH_SBOL", /* 0x00000004 */ + "ANCH_GPOS", /* 0x00000008 */ + "GPOS_SEEN", /* 0x00000010 */ + "GPOS_FLOAT", /* 0x00000020 */ + "UNUSED_BIT_6", /* 0x00000040 */ + "UNUSED_BIT_7", /* 0x00000080 */ + "SKIPWHITE", /* 0x00000100 */ + "START_ONLY", /* 0x00000200 */ + "WHITE", /* 0x00000400 */ + "LOCALE", /* 0x00000800 */ + "MULTILINE", /* 0x00001000 */ + "SINGLELINE", /* 0x00002000 */ + "FOLD", /* 0x00004000 */ + "EXTENDED", /* 0x00008000 */ + "KEEPCOPY", /* 0x00010000 */ + "LOOKBEHIND_SEEN", /* 0x00020000 */ + "EVAL_SEEN", /* 0x00040000 */ + "CANY_SEEN", /* 0x00080000 */ + "NOSCAN", /* 0x00100000 */ + "CHECK_ALL", /* 0x00200000 */ + "UTF8", /* 0x00400000 */ + "MATCH_UTF8", /* 0x00800000 */ + "USE_INTUIT_NOML", /* 0x01000000 */ + "USE_INTUIT_ML", /* 0x02000000 */ + "INTUIT_TAIL", /* 0x04000000 */ + "SPLIT", /* 0x08000000 */ + "COPY_DONE", /* 0x10000000 */ + "TAINTED_SEEN", /* 0x20000000 */ + "NULL", /* 0x40000000 */ + "TAINTED", /* 0x80000000 */ +}; +#endif /* DOINIT */ /* ex: set ro: */