X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5d458dd8ef53373c3f90d568f6668084b0ccbc62..c33ef3ac654cbe35caea1d36f34c68f0e4a134ba:/regnodes.h diff --git a/regnodes.h b/regnodes.h index bbb49db..348410c 100644 --- a/regnodes.h +++ b/regnodes.h @@ -6,8 +6,8 @@ /* Regops and State definitions */ -#define REGNODE_MAX 83 -#define REGMATCH_STATE_MAX 121 +#define REGNODE_MAX 90 +#define REGMATCH_STATE_MAX 130 #define END 0 /* 0000 End of program. */ #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ @@ -85,14 +85,21 @@ #define ENDLIKE 73 /* 0x49 Used only for the type field of verbs */ #define OPFAIL 74 /* 0x4a Same as (?!) */ #define ACCEPT 75 /* 0x4b Accepts the current matched string. */ -#define VERB 76 /* 0x4c no-sv 1 Used only for the type field of verbs */ +#define VERB 76 /* 0x4c Used only for the type field of verbs */ #define PRUNE 77 /* 0x4d Pattern fails at this startpoint if no-backtracking through this */ #define MARKPOINT 78 /* 0x4e Push the current location for rollback by cut. */ #define SKIP 79 /* 0x4f On failure skip forward (to the mark) before retrying */ #define COMMIT 80 /* 0x50 Pattern fails outright if backtracking through this */ #define CUTGROUP 81 /* 0x51 On failure go to the next alternation in the group */ -#define OPTIMIZED 82 /* 0x52 Placeholder for dump. */ -#define PSEUDO 83 /* 0x53 Pseudo opcode for internal use. */ +#define KEEPS 82 /* 0x52 $& begins here. */ +#define LNBREAK 83 /* 0x53 generic newline pattern */ +#define VERTWS 84 /* 0x54 vertical whitespace (Perl 6) */ +#define NVERTWS 85 /* 0x55 not vertical whitespace (Perl 6) */ +#define HORIZWS 86 /* 0x56 horizontal whitespace (Perl 6) */ +#define NHORIZWS 87 /* 0x57 not horizontal whitespace (Perl 6) */ +#define FOLDCHAR 88 /* 0x58 codepoint with tricky case folding properties. */ +#define OPTIMIZED 89 /* 0x59 Placeholder for dump. */ +#define PSEUDO 90 /* 0x5a Pseudo opcode for internal use. */ /* ------------ States ------------- */ #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */ #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */ @@ -132,6 +139,8 @@ #define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */ #define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */ #define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */ +#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */ +#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */ /* PL_regkind[] What type of regop or state is this. */ @@ -206,9 +215,9 @@ EXTCONST U8 PL_regkind[] = { TRIE, /* AHOCORASICKC */ GOSUB, /* GOSUB */ GOSTART, /* GOSTART */ - NREF, /* NREF */ - NREF, /* NREFF */ - NREF, /* NREFFL */ + REF, /* NREF */ + REF, /* NREFF */ + REF, /* NREFFL */ NGROUPP, /* NGROUPP */ INSUBP, /* INSUBP */ DEFINEP, /* DEFINEP */ @@ -221,6 +230,13 @@ EXTCONST U8 PL_regkind[] = { VERB, /* SKIP */ VERB, /* COMMIT */ VERB, /* CUTGROUP */ + KEEPS, /* KEEPS */ + LNBREAK, /* LNBREAK */ + VERTWS, /* VERTWS */ + NVERTWS, /* NVERTWS */ + HORIZWS, /* HORIZWS */ + NHORIZWS, /* NHORIZWS */ + FOLDCHAR, /* FOLDCHAR */ NOTHING, /* OPTIMIZED */ PSEUDO, /* PSEUDO */ /* ------------ States ------------- */ @@ -262,6 +278,8 @@ EXTCONST U8 PL_regkind[] = { SKIP, /* SKIP_next_fail */ CUTGROUP, /* CUTGROUP_next */ CUTGROUP, /* CUTGROUP_next_fail */ + KEEPS, /* KEEPS_next */ + KEEPS, /* KEEPS_next_fail */ }; #endif @@ -345,12 +363,19 @@ static const U8 regarglen[] = { 0, /* ENDLIKE */ 0, /* OPFAIL */ EXTRA_SIZE(struct regnode_1), /* ACCEPT */ - 0, /* VERB */ + EXTRA_SIZE(struct regnode_1), /* VERB */ EXTRA_SIZE(struct regnode_1), /* PRUNE */ EXTRA_SIZE(struct regnode_1), /* MARKPOINT */ EXTRA_SIZE(struct regnode_1), /* SKIP */ EXTRA_SIZE(struct regnode_1), /* COMMIT */ EXTRA_SIZE(struct regnode_1), /* CUTGROUP */ + 0, /* KEEPS */ + 0, /* LNBREAK */ + 0, /* VERTWS */ + 0, /* NVERTWS */ + 0, /* HORIZWS */ + 0, /* NHORIZWS */ + EXTRA_SIZE(struct regnode_1), /* FOLDCHAR */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ }; @@ -440,14 +465,25 @@ static const char reg_off_by_arg[] = { 0, /* SKIP */ 0, /* COMMIT */ 0, /* CUTGROUP */ + 0, /* KEEPS */ + 0, /* LNBREAK */ + 0, /* VERTWS */ + 0, /* NVERTWS */ + 0, /* HORIZWS */ + 0, /* NHORIZWS */ + 0, /* FOLDCHAR */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ }; +#endif /* REG_COMP_C */ + /* reg_name[] - Opcode/state names in string form, for debugging */ -#ifdef DEBUGGING -const char * reg_name[] = { +#ifndef DOINIT +EXTCONST char * PL_reg_name[]; +#else +EXTCONST char * const PL_reg_name[] = { "END", /* 0000 */ "SUCCEED", /* 0x01 */ "BOL", /* 0x02 */ @@ -530,8 +566,15 @@ const char * reg_name[] = { "SKIP", /* 0x4f */ "COMMIT", /* 0x50 */ "CUTGROUP", /* 0x51 */ - "OPTIMIZED", /* 0x52 */ - "PSEUDO", /* 0x53 */ + "KEEPS", /* 0x52 */ + "LNBREAK", /* 0x53 */ + "VERTWS", /* 0x54 */ + "NVERTWS", /* 0x55 */ + "HORIZWS", /* 0x56 */ + "NHORIZWS", /* 0x57 */ + "FOLDCHAR", /* 0x58 */ + "OPTIMIZED", /* 0x59 */ + "PSEUDO", /* 0x5a */ /* ------------ States ------------- */ "TRIE_next", /* REGNODE_MAX +0x01 */ "TRIE_next_fail", /* REGNODE_MAX +0x02 */ @@ -571,12 +614,95 @@ const char * reg_name[] = { "SKIP_next_fail", /* REGNODE_MAX +0x24 */ "CUTGROUP_next", /* REGNODE_MAX +0x25 */ "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */ + "KEEPS_next", /* REGNODE_MAX +0x27 */ + "KEEPS_next_fail", /* REGNODE_MAX +0x28 */ +}; +#endif /* DOINIT */ + +/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ + +#ifndef DOINIT +EXTCONST char * PL_reg_extflags_name[]; +#else +EXTCONST char * const PL_reg_extflags_name[] = { + /* Bits in extflags defined: 11111111111101111111111100111111 */ + "MULTILINE", /* 0x00000001 */ + "SINGLELINE", /* 0x00000002 */ + "FOLD", /* 0x00000004 */ + "EXTENDED", /* 0x00000008 */ + "KEEPCOPY", /* 0x00000010 */ + "LOCALE", /* 0x00000020 */ + "UNUSED_BIT_6", /* 0x00000040 */ + "UNUSED_BIT_7", /* 0x00000080 */ + "ANCH_BOL", /* 0x00000100 */ + "ANCH_MBOL", /* 0x00000200 */ + "ANCH_SBOL", /* 0x00000400 */ + "ANCH_GPOS", /* 0x00000800 */ + "GPOS_SEEN", /* 0x00001000 */ + "GPOS_FLOAT", /* 0x00002000 */ + "LOOKBEHIND_SEEN", /* 0x00004000 */ + "EVAL_SEEN", /* 0x00008000 */ + "CANY_SEEN", /* 0x00010000 */ + "NOSCAN", /* 0x00020000 */ + "CHECK_ALL", /* 0x00040000 */ + "UNUSED_BIT_19", /* 0x00080000 */ + "MATCH_UTF8", /* 0x00100000 */ + "USE_INTUIT_NOML", /* 0x00200000 */ + "USE_INTUIT_ML", /* 0x00400000 */ + "INTUIT_TAIL", /* 0x00800000 */ + "SPLIT", /* 0x01000000 */ + "COPY_DONE", /* 0x02000000 */ + "TAINTED_SEEN", /* 0x04000000 */ + "TAINTED", /* 0x08000000 */ + "START_ONLY", /* 0x10000000 */ + "SKIPWHITE", /* 0x20000000 */ + "WHITE", /* 0x40000000 */ + "NULL", /* 0x80000000 */ }; -#endif /* DEBUGGING */ +#endif /* DOINIT */ + +/* The following have no fixed length. U8 so we can do strchr() on it. */ +#define REGNODE_VARIES(node) (PL_varies_bitmask[(node) >> 3] & (1 << ((node) & 7))) + +#ifndef DOINIT +EXTCONST U8 PL_varies[] __attribute__deprecated__; #else -#ifdef DEBUGGING -extern const char * reg_name[]; -#endif -#endif /* REG_COMP_C */ +EXTCONST U8 PL_varies[] __attribute__deprecated__ = { + CLUMP, BRANCH, BACK, STAR, PLUS, CURLY, CURLYN, CURLYM, CURLYX, WHILEM, + REF, REFF, REFFL, SUSPEND, IFTHEN, BRANCHJ, NREF, NREFF, NREFFL, + 0 +}; +#endif /* DOINIT */ + +#ifndef DOINIT +EXTCONST U8 PL_varies_bitmask[]; +#else +EXTCONST U8 PL_varies_bitmask[] = { + 0x00, 0x00, 0x00, 0xC0, 0xC1, 0x9F, 0x33, 0x01, 0x38, 0x00, 0x00, 0x00 +}; +#endif /* DOINIT */ + +/* The following always have a length of 1. U8 we can do strchr() on it. */ +/* (Note that length 1 means "one character" under UTF8, not "one octet".) */ +#define REGNODE_SIMPLE(node) (PL_simple_bitmask[(node) >> 3] & (1 << ((node) & 7))) + +#ifndef DOINIT +EXTCONST U8 PL_simple[] __attribute__deprecated__; +#else +EXTCONST U8 PL_simple[] __attribute__deprecated__ = { + REG_ANY, SANY, CANY, ANYOF, ALNUM, ALNUML, NALNUM, NALNUML, SPACE, + SPACEL, NSPACE, NSPACEL, DIGIT, NDIGIT, VERTWS, NVERTWS, HORIZWS, + NHORIZWS, + 0 +}; +#endif /* DOINIT */ + +#ifndef DOINIT +EXTCONST U8 PL_simple_bitmask[]; +#else +EXTCONST U8 PL_simple_bitmask[] = { + 0x00, 0xC0, 0xFF, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x00 +}; +#endif /* DOINIT */ /* ex: set ro: */