else if ((OP(scan) == EXACTFU_ONLY8) && (OP(n) == EXACTFU)) {
; /* join is compatible, no need to change OP */
}
- else if (OP(scan) == EXACTFU) {
- if (OP(n) != EXACTFU) {
-
- /* Here the first node is EXACTFU and the second isn't.
- * Normally EXACTFU nodes are compatible for joining only
- * with EXACTFU_ONLY8 nodes (already handled), and other
- * EXACTFU nodes. But under /di, certain temporary
- * EXACTFS_foo_U nodes are generated, which are compatible.
- * We check for this case here. These need to be resolved
- * to either EXACTFU or EXACTF at joining time. They have
- * nothing in them that would forbid them from being the
- * more desirable EXACTFU nodes except that they begin
- * and/or end with a single [Ss]. The reason this is
- * problematic is because they could be joined in this loop
- * with an adjacent node that ends and/or begins with [Ss]
- * which would then form the sequence 'ss', which matches
- * differently under /di than /ui, in which case EXACTFU
- * can't be used. If the 'ss' sequence doesn't get formed,
- * the nodes get absorbed into any adjacent EXACTFU node.
- * And if the only adjacent node is EXACTF, they get
- * absorbed into that, under the theory that a longer node
- * is better than two shorter ones, even if one is EXACTFU.
- * Note that EXACTFU_ONLY8 is generated only for UTF-8
- * patterns, and the EXACTFS_foo_U ones only for non-UTF-8.
- * */
-
- if (OP(n) == EXACTFS_E_U || OP(n) == EXACTFS_BE_U) {
-
- /* Here the joined node would end with 's'. If the
- * node following the combination is an EXACTF one,
- * it's better to join this EXACTFS_fooE_U with that
- * one, leaving the current one in 'scan' be the more
- * desirable EXACTFU */
- if (OP(nnext) == EXACTF) {
- break;
- }
- OP(scan) = EXACTFS_E_U;
- }
- else if (OP(n) != EXACTFS_B_U) {
- break; /* This would be an incompatible join; stop */
- }
- }
+ else if (OP(scan) == EXACTFU && OP(n) == EXACTFU) {
+ ; /* join is compatible, no need to change OP */
}
- else if (OP(scan) == EXACTF) {
- if (OP(n) != EXACTF) {
-
- /* Here the first node is EXACTF and the second isn't.
- * EXACTF nodes are compatible for joining only with other
- * EXACTF nodes, and the EXACTFS_foo_U nodes. But the
- * latter nodes can be also joined with EXACTFU ones, and
- * that is a better outcome, so if the node following 'n'
- * is EXACTFU, quit now so that those two can be joined
- * later */
- if ( OP(n) != EXACTFS_B_U
- && OP(n) != EXACTFS_E_U
- && OP(n) != EXACTFS_BE_U)
- {
- break;
- }
- else if (OP(nnext) == EXACTFU) {
+ else if (OP(scan) == EXACTFU && OP(n) == EXACTFU_S_EDGE) {
+
+ /* Under /di, temporary EXACTFU_S_EDGE nodes are generated,
+ * which can join with EXACTFU ones. We check for this case
+ * here. These need to be resolved to either EXACTFU or
+ * EXACTF at joining time. They have nothing in them that
+ * would forbid them from being the more desirable EXACTFU
+ * nodes except that they begin and/or end with a single [Ss].
+ * The reason this is problematic is because they could be
+ * joined in this loop with an adjacent node that ends and/or
+ * begins with [Ss] which would then form the sequence 'ss',
+ * which matches differently under /di than /ui, in which case
+ * EXACTFU can't be used. If the 'ss' sequence doesn't get
+ * formed, the nodes get absorbed into any adjacent EXACTFU
+ * node. And if the only adjacent node is EXACTF, they get
+ * absorbed into that, under the theory that a longer node is
+ * better than two shorter ones, even if one is EXACTFU. Note
+ * that EXACTFU_ONLY8 is generated only for UTF-8 patterns,
+ * and the EXACTFU_S_EDGE ones only for non-UTF-8. */
+
+ if (STRING(n)[STR_LEN(n)-1] == 's') {
+
+ /* Here the joined node would end with 's'. If the node
+ * following the combination is an EXACTF one, it's better to
+ * join this trailing edge 's' node with that one, leaving the
+ * current one in 'scan' be the more desirable EXACTFU */
+ if (OP(nnext) == EXACTF) {
break;
}
- else {
- /* Here the next node can be joined with the EXACTF
- * node, and become part of it. That they begin or end
- * with 's' now doesn't matter. */
- }
+
+ OP(scan) = EXACTFU_S_EDGE;
+
+ } /* Otherwise, the beginning 's' of the 2nd node just
+ becomes an interior 's' in 'scan' */
+ }
+ else if (OP(scan) == EXACTF && OP(n) == EXACTF) {
+ ; /* join is compatible, no need to change OP */
+ }
+ else if (OP(scan) == EXACTF && OP(n) == EXACTFU_S_EDGE) {
+
+ /* EXACTF nodes are compatible for joining with EXACTFU_S_EDGE
+ * nodes. But the latter nodes can be also joined with EXACTFU
+ * ones, and that is a better outcome, so if the node following
+ * 'n' is EXACTFU, quit now so that those two can be joined
+ * later */
+ if (OP(nnext) == EXACTFU) {
+ break;
}
+
+ /* The join is compatible, and the combined node will be
+ * EXACTF. (These don't care if they begin or end with 's' */
}
- else if (OP(scan) == EXACTFS_B_U) {
-
- /* Here, the first node begins, but does not end with 's'.
- * That means it doesn't form 'ss' with the following node, so
- * can become EXACTFU, and either stand on its own or be joined
- * with a following EXACTFU. If the following is instead an
- * EXACTF, the two can also be joined together as EXACTF */
- if (OP(n) == EXACTF) {
+ else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU_S_EDGE) {
+ if ( STRING(scan)[STR_LEN(scan)-1] == 's'
+ && STRING(n)[0] == 's')
+ {
+ /* When combined, we have the sequence 'ss', which means we
+ * have to remain /di */
OP(scan) = EXACTF;
}
- else {
- OP(scan) = EXACTFU;
- if (OP(n) != EXACTFU) {
- break;
- }
- }
}
- else if (OP(scan) == EXACTFS_E_U || OP(scan) == EXACTFS_BE_U) {
-
- /* Here, the first node ends with 's', and could become an
- * EXACTFU (or be joined with a following EXACTFU) if that next
- * node doesn't begin with 's'; otherwise it must become an
- * EXACTF node. */
- if (OP(n) == EXACTFS_B_U || OP(n) == EXACTFS_BE_U) {
- OP(scan) = EXACTF;
+ else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU) {
+ if (STRING(n)[0] == 's') {
+ ; /* Here the join is compatible and the combined node
+ starts with 's', no need to change OP */
}
- else {
+ else { /* Now the trailing 's' is in the interior */
OP(scan) = EXACTFU;
- if (OP(n) != EXACTFU) {
- break;
- }
}
}
+ else if (OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTF) {
+
+ /* The join is compatible, and the combined node will be
+ * EXACTF. (These don't care if they begin or end with 's' */
+ OP(scan) = EXACTF;
+ }
else if (OP(scan) != OP(n)) {
/* The only other compatible joinings are the same node type */
#endif
}
- /* These temporary nodes can now be turned into EXACTFU, and must, as
- * regexec.c doesn't handle them */
- if ( OP(scan) == EXACTFS_B_U
- || OP(scan) == EXACTFS_E_U
- || OP(scan) == EXACTFS_BE_U)
- {
+ /* This temporary node can now be turned into EXACTFU, and must, as
+ * regexec.c doesn't handle it */
+ if (OP(scan) == EXACTFU_S_EDGE) {
OP(scan) = EXACTFU;
}
case STAR:
next = NEXTOPER(scan);
- /* These temporary nodes can now be turned into EXACTFU, and
- * must, as regexec.c doesn't handle them */
- if ( OP(next) == EXACTFS_B_U
- || OP(next) == EXACTFS_E_U
- || OP(next) == EXACTFS_BE_U)
- {
+ /* This temporary node can now be turned into EXACTFU, and
+ * must, as regexec.c doesn't handle it */
+ if (OP(next) == EXACTFU_S_EDGE) {
OP(next) = EXACTFU;
}
* contain only above-Latin1 characters (hence must be in UTF8),
* which don't participate in folds with Latin1-range characters,
* as the latter's folds aren't known until runtime. */
- bool maybe_exactfu = FOLD;
-
- /* An EXACTF node that otherwise could be turned into EXACTFU,
- * can't be if it starts and/or ends with [Ss]. Because, during
- * optimization it could be joined with another node that ends
- * and/or starts with [Ss], creating the sequence 'ss', which needs
- * to remain in an EXACTF node. This flag is used to signal this
- * situation */
- bool maybe_exactfs = FALSE;
+ bool maybe_exactfu = FOLD && (DEPENDS_SEMANTICS || LOC);
/* Single-character EXACTish nodes are almost always SIMPLE. This
* allows us to override this as encountered */
* target string is (also) in UTF-8 */
bool requires_utf8_target = FALSE;
+ /* The sequence 'ss' is problematic in non-UTF-8 patterns. */
+ bool has_ss = FALSE;
+
+ /* So is the MICRO SIGN */
bool has_micro_sign = FALSE;
/* Allocate an EXACT node. The node_type may change below to
if (! maybe_exactfu) {
len = 0;
s = s0;
- maybe_exactfu = FOLD; /* Prob. unnecessary */
goto reparse;
}
}
: 0));
s += added_len;
- if (ender > 255) {
+ if ( ender > 255
+ && LIKELY(ender != GREEK_SMALL_LETTER_MU))
+ {
+ /* U+B5 folds to the MU, so its possible for a
+ * non-UTF-8 target to match it */
requires_utf8_target = TRUE;
- if (UNLIKELY(ender == GREEK_SMALL_LETTER_MU)) {
- has_micro_sign = TRUE;
- }
}
}
}
/* On non-ancient Unicode versions, this includes the
* multi-char fold SHARP S to 'ss' */
- if (len == 0 && isALPHA_FOLD_EQ(ender, 's')) {
- maybe_exactfs = TRUE; /* Node begins with 's' */
- }
- else if ( UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)
+ if ( UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)
|| ( isALPHA_FOLD_EQ(ender, 's')
+ && len > 0
&& isALPHA_FOLD_EQ(*(s-1), 's')))
{
/* Here, we have one of the following:
* string is in UTF-8.
* */
- maybe_exactfs = FALSE; /* Can't generate an
- EXACTFS node */
- maybe_exactfu = FALSE; /* Nor EXACTFU (unless we
+ has_ss = TRUE;
+ maybe_exactfu = FALSE; /* Can't generate an
+ EXACTFU node (unless we
already are in one) */
if (UNLIKELY(ender == LATIN_SMALL_LETTER_SHARP_S)) {
maybe_SIMPLE = 0;
else if (requires_utf8_target) {
node_type = EXACT_ONLY8;
}
- }
-
- if (FOLD) {
- /* If the node ends in an 's' it can't now be changed into
- * an EXACTFU, as the node could later get joined with another
- * one that begins with 's' and that combination that would
- * then wrongly match the sharp s under /di. (Note that if
- * it's already EXACTFU, this is irrelevant) If this is
- * the only reason keeping it from being an EXACTFU, we
- * create a special node type so that at joining time, we
- * can turn it into an EXACTFU if no 'ss' is formed */
- if (isALPHA_FOLD_EQ(ender, 's')) {
- if (maybe_exactfu && node_type == EXACTF) {
- node_type = (maybe_exactfs)
- ? EXACTFS_BE_U
- : EXACTFS_E_U;
- }
- maybe_exactfu = FALSE;
+ } else if (FOLD) {
+ if ( UNLIKELY(has_micro_sign || has_ss)
+ && (node_type == EXACTFU || ( node_type == EXACTF
+ && maybe_exactfu)))
+ { /* These two conditions are problematic in non-UTF-8
+ EXACTFU nodes. */
+ assert(! UTF);
+ node_type = EXACTFUP;
}
+ else if (node_type == EXACTFL) {
- /* If 'maybe_exactfu' is set, then there are no code points
- * that match differently depending on UTF8ness of the
- * target string (for /u), or depending on locale for /l */
- if (maybe_exactfu) {
- if (node_type == EXACTF) {
- node_type = EXACTFU;
- }
- else if (node_type == EXACTFL) {
+ /* 'maybe_exactfu' is deliberately set above to
+ * indicate this node type, where all code points in it
+ * are above 255 */
+ if (maybe_exactfu) {
node_type = EXACTFLU8;
}
}
- else if (node_type == EXACTF) {
- RExC_seen_d_op = TRUE;
-
- /* If the only thing keeping this from being EXACTFU is
- * that it begins with 's', change it to a special node
- * type so that during the later join, we can easily
- * check for, and do the change there if appropriate */
- if (maybe_exactfs) {
- node_type = EXACTFS_B_U;
+ else if (node_type == EXACTF) { /* Means is /di */
+
+ /* If 'maybe_exactfu' is clear, then we need to stay
+ * /di. If it is set, it means there are no code
+ * points that match differently depending on UTF8ness
+ * of the target string, so it can become an EXACTFU
+ * node */
+ if (! maybe_exactfu) {
+ RExC_seen_d_op = TRUE;
}
- }
-
- if (node_type == EXACTFU) {
-
- /* Because the MICRO SIGN folds to something
- * representable only in UTF-8, we use a special node
- * when we aren't in UTF-8, so can't represent that
- * fold */
- if (UNLIKELY(has_micro_sign)) {
-
- /* The micro sign is the only below 256 character
- * that folds to above 255 */
- if (! UTF) {
- node_type = EXACTFUP;
- }
+ else if ( isALPHA_FOLD_EQ(* STRING(REGNODE_p(ret)), 's')
+ || isALPHA_FOLD_EQ(ender, 's'))
+ {
+ /* But, if the node begins or ends in an 's' we
+ * have to defer changing it into an EXACTFU, as
+ * the node could later get joined with another one
+ * that ends or begins with 's' creating an 'ss'
+ * sequence which would then wrongly match the
+ * sharp s without the target being UTF-8. We
+ * create a special node that we resolve later when
+ * we join nodes together */
+
+ node_type = EXACTFU_S_EDGE;
}
- else if (requires_utf8_target) {
-
- node_type = EXACTFU_ONLY8;
+ else {
+ node_type = EXACTFU;
}
}
+
+ if (requires_utf8_target && node_type == EXACTFU) {
+ node_type = EXACTFU_ONLY8;
+ }
}
OP(REGNODE_p(ret)) = node_type;
case EXACT_ONLY8:
case EXACTL:
case EXACTF:
- case EXACTFS_B_U:
- case EXACTFS_E_U:
- case EXACTFS_BE_U:
+ case EXACTFU_S_EDGE:
case EXACTFAA_NO_TRIE:
case EXACTFAA:
case EXACTFU:
/* Regops and State definitions */
-#define REGNODE_MAX 104
-#define REGMATCH_STATE_MAX 144
+#define REGNODE_MAX 102
+#define REGMATCH_STATE_MAX 142
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
#define EXACTFAA_NO_TRIE 44 /* 0x2c Match this string using /iaa rules (w/len) (string not UTF-8, not guaranteed to be folded, not currently trie-able). */
#define EXACT_ONLY8 45 /* 0x2d Like EXACT, but only UTF-8 encoded targets can match */
#define EXACTFU_ONLY8 46 /* 0x2e Like EXACTFU, but only UTF-8 encoded targets can match */
-#define EXACTFS_B_U 47 /* 0x2f EXACTFU but begins with [Ss]; (string not UTF-8; compile-time only). */
-#define EXACTFS_E_U 48 /* 0x30 EXACTFU but ends with [Ss]; (string not UTF-8; compile-time only). */
-#define EXACTFS_BE_U 49 /* 0x31 EXACTFU but begins and ends with [Ss]; (string not UTF-8; compile-time only). */
-#define NOTHING 50 /* 0x32 Match empty string. */
-#define TAIL 51 /* 0x33 Match empty string. Can jump here from outside. */
-#define STAR 52 /* 0x34 Match this (simple) thing 0 or more times. */
-#define PLUS 53 /* 0x35 Match this (simple) thing 1 or more times. */
-#define CURLY 54 /* 0x36 Match this simple thing {n,m} times. */
-#define CURLYN 55 /* 0x37 Capture next-after-this simple thing */
-#define CURLYM 56 /* 0x38 Capture this medium-complex thing {n,m} times. */
-#define CURLYX 57 /* 0x39 Match this complex thing {n,m} times. */
-#define WHILEM 58 /* 0x3a Do curly processing and see if rest matches. */
-#define OPEN 59 /* 0x3b Mark this point in input as start of #n. */
-#define CLOSE 60 /* 0x3c Close corresponding OPEN of #n. */
-#define SROPEN 61 /* 0x3d Same as OPEN, but for script run */
-#define SRCLOSE 62 /* 0x3e Close preceding SROPEN */
-#define REF 63 /* 0x3f Match some already matched string */
-#define REFF 64 /* 0x40 Match already matched string, folded using native charset rules for non-utf8 */
-#define REFFL 65 /* 0x41 Match already matched string, folded in loc. */
-#define REFFU 66 /* 0x42 Match already matched string, folded using unicode rules for non-utf8 */
-#define REFFA 67 /* 0x43 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define NREF 68 /* 0x44 Match some already matched string */
-#define NREFF 69 /* 0x45 Match already matched string, folded using native charset rules for non-utf8 */
-#define NREFFL 70 /* 0x46 Match already matched string, folded in loc. */
-#define NREFFU 71 /* 0x47 Match already matched string, folded using unicode rules for non-utf8 */
-#define NREFFA 72 /* 0x48 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
-#define LONGJMP 73 /* 0x49 Jump far away. */
-#define BRANCHJ 74 /* 0x4a BRANCH with long offset. */
-#define IFMATCH 75 /* 0x4b Succeeds if the following matches. */
-#define UNLESSM 76 /* 0x4c Fails if the following matches. */
-#define SUSPEND 77 /* 0x4d "Independent" sub-RE. */
-#define IFTHEN 78 /* 0x4e Switch, should be preceded by switcher. */
-#define GROUPP 79 /* 0x4f Whether the group matched. */
-#define EVAL 80 /* 0x50 Execute some Perl code. */
-#define MINMOD 81 /* 0x51 Next operator is not greedy. */
-#define LOGICAL 82 /* 0x52 Next opcode should set the flag only. */
-#define RENUM 83 /* 0x53 Group with independently numbered parens. */
-#define TRIE 84 /* 0x54 Match many EXACT(F[ALU]?)? at once. flags==type */
-#define TRIEC 85 /* 0x55 Same as TRIE, but with embedded charclass data */
-#define AHOCORASICK 86 /* 0x56 Aho Corasick stclass. flags==type */
-#define AHOCORASICKC 87 /* 0x57 Same as AHOCORASICK, but with embedded charclass data */
-#define GOSUB 88 /* 0x58 recurse to paren arg1 at (signed) ofs arg2 */
-#define NGROUPP 89 /* 0x59 Whether the group matched. */
-#define INSUBP 90 /* 0x5a Whether we are in a specific recurse. */
-#define DEFINEP 91 /* 0x5b Never execute directly. */
-#define ENDLIKE 92 /* 0x5c Used only for the type field of verbs */
-#define OPFAIL 93 /* 0x5d Same as (?!), but with verb arg */
-#define ACCEPT 94 /* 0x5e Accepts the current matched string, with verbar */
-#define VERB 95 /* 0x5f Used only for the type field of verbs */
-#define PRUNE 96 /* 0x60 Pattern fails at this startpoint if no-backtracking through this */
-#define MARKPOINT 97 /* 0x61 Push the current location for rollback by cut. */
-#define SKIP 98 /* 0x62 On failure skip forward (to the mark) before retrying */
-#define COMMIT 99 /* 0x63 Pattern fails outright if backtracking through this */
-#define CUTGROUP 100 /* 0x64 On failure go to the next alternation in the group */
-#define KEEPS 101 /* 0x65 $& begins here. */
-#define LNBREAK 102 /* 0x66 generic newline pattern */
-#define OPTIMIZED 103 /* 0x67 Placeholder for dump. */
-#define PSEUDO 104 /* 0x68 Pseudo opcode for internal use. */
+#define EXACTFU_S_EDGE 47 /* 0x2f /di rules, but nothing in it precludes /ui, except begins and/or ends with [Ss]; (string not UTF-8; compile-time only). */
+#define NOTHING 48 /* 0x30 Match empty string. */
+#define TAIL 49 /* 0x31 Match empty string. Can jump here from outside. */
+#define STAR 50 /* 0x32 Match this (simple) thing 0 or more times. */
+#define PLUS 51 /* 0x33 Match this (simple) thing 1 or more times. */
+#define CURLY 52 /* 0x34 Match this simple thing {n,m} times. */
+#define CURLYN 53 /* 0x35 Capture next-after-this simple thing */
+#define CURLYM 54 /* 0x36 Capture this medium-complex thing {n,m} times. */
+#define CURLYX 55 /* 0x37 Match this complex thing {n,m} times. */
+#define WHILEM 56 /* 0x38 Do curly processing and see if rest matches. */
+#define OPEN 57 /* 0x39 Mark this point in input as start of #n. */
+#define CLOSE 58 /* 0x3a Close corresponding OPEN of #n. */
+#define SROPEN 59 /* 0x3b Same as OPEN, but for script run */
+#define SRCLOSE 60 /* 0x3c Close preceding SROPEN */
+#define REF 61 /* 0x3d Match some already matched string */
+#define REFF 62 /* 0x3e Match already matched string, folded using native charset rules for non-utf8 */
+#define REFFL 63 /* 0x3f Match already matched string, folded in loc. */
+#define REFFU 64 /* 0x40 Match already matched string, folded using unicode rules for non-utf8 */
+#define REFFA 65 /* 0x41 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define NREF 66 /* 0x42 Match some already matched string */
+#define NREFF 67 /* 0x43 Match already matched string, folded using native charset rules for non-utf8 */
+#define NREFFL 68 /* 0x44 Match already matched string, folded in loc. */
+#define NREFFU 69 /* 0x45 Match already matched string, folded using unicode rules for non-utf8 */
+#define NREFFA 70 /* 0x46 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
+#define LONGJMP 71 /* 0x47 Jump far away. */
+#define BRANCHJ 72 /* 0x48 BRANCH with long offset. */
+#define IFMATCH 73 /* 0x49 Succeeds if the following matches. */
+#define UNLESSM 74 /* 0x4a Fails if the following matches. */
+#define SUSPEND 75 /* 0x4b "Independent" sub-RE. */
+#define IFTHEN 76 /* 0x4c Switch, should be preceded by switcher. */
+#define GROUPP 77 /* 0x4d Whether the group matched. */
+#define EVAL 78 /* 0x4e Execute some Perl code. */
+#define MINMOD 79 /* 0x4f Next operator is not greedy. */
+#define LOGICAL 80 /* 0x50 Next opcode should set the flag only. */
+#define RENUM 81 /* 0x51 Group with independently numbered parens. */
+#define TRIE 82 /* 0x52 Match many EXACT(F[ALU]?)? at once. flags==type */
+#define TRIEC 83 /* 0x53 Same as TRIE, but with embedded charclass data */
+#define AHOCORASICK 84 /* 0x54 Aho Corasick stclass. flags==type */
+#define AHOCORASICKC 85 /* 0x55 Same as AHOCORASICK, but with embedded charclass data */
+#define GOSUB 86 /* 0x56 recurse to paren arg1 at (signed) ofs arg2 */
+#define NGROUPP 87 /* 0x57 Whether the group matched. */
+#define INSUBP 88 /* 0x58 Whether we are in a specific recurse. */
+#define DEFINEP 89 /* 0x59 Never execute directly. */
+#define ENDLIKE 90 /* 0x5a Used only for the type field of verbs */
+#define OPFAIL 91 /* 0x5b Same as (?!), but with verb arg */
+#define ACCEPT 92 /* 0x5c Accepts the current matched string, with verbar */
+#define VERB 93 /* 0x5d Used only for the type field of verbs */
+#define PRUNE 94 /* 0x5e Pattern fails at this startpoint if no-backtracking through this */
+#define MARKPOINT 95 /* 0x5f Push the current location for rollback by cut. */
+#define SKIP 96 /* 0x60 On failure skip forward (to the mark) before retrying */
+#define COMMIT 97 /* 0x61 Pattern fails outright if backtracking through this */
+#define CUTGROUP 98 /* 0x62 On failure go to the next alternation in the group */
+#define KEEPS 99 /* 0x63 $& begins here. */
+#define LNBREAK 100 /* 0x64 generic newline pattern */
+#define OPTIMIZED 101 /* 0x65 Placeholder for dump. */
+#define PSEUDO 102 /* 0x66 Pseudo opcode for internal use. */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
EXACT, /* EXACTFAA_NO_TRIE */
EXACT, /* EXACT_ONLY8 */
EXACT, /* EXACTFU_ONLY8 */
- EXACT, /* EXACTFS_B_U */
- EXACT, /* EXACTFS_E_U */
- EXACT, /* EXACTFS_BE_U */
+ EXACT, /* EXACTFU_S_EDGE */
NOTHING, /* NOTHING */
NOTHING, /* TAIL */
STAR, /* STAR */
0, /* EXACTFAA_NO_TRIE */
0, /* EXACT_ONLY8 */
0, /* EXACTFU_ONLY8 */
- 0, /* EXACTFS_B_U */
- 0, /* EXACTFS_E_U */
- 0, /* EXACTFS_BE_U */
+ 0, /* EXACTFU_S_EDGE */
0, /* NOTHING */
0, /* TAIL */
0, /* STAR */
0, /* EXACTFAA_NO_TRIE */
0, /* EXACT_ONLY8 */
0, /* EXACTFU_ONLY8 */
- 0, /* EXACTFS_B_U */
- 0, /* EXACTFS_E_U */
- 0, /* EXACTFS_BE_U */
+ 0, /* EXACTFU_S_EDGE */
0, /* NOTHING */
0, /* TAIL */
0, /* STAR */
"EXACTFAA_NO_TRIE", /* 0x2c */
"EXACT_ONLY8", /* 0x2d */
"EXACTFU_ONLY8", /* 0x2e */
- "EXACTFS_B_U", /* 0x2f */
- "EXACTFS_E_U", /* 0x30 */
- "EXACTFS_BE_U", /* 0x31 */
- "NOTHING", /* 0x32 */
- "TAIL", /* 0x33 */
- "STAR", /* 0x34 */
- "PLUS", /* 0x35 */
- "CURLY", /* 0x36 */
- "CURLYN", /* 0x37 */
- "CURLYM", /* 0x38 */
- "CURLYX", /* 0x39 */
- "WHILEM", /* 0x3a */
- "OPEN", /* 0x3b */
- "CLOSE", /* 0x3c */
- "SROPEN", /* 0x3d */
- "SRCLOSE", /* 0x3e */
- "REF", /* 0x3f */
- "REFF", /* 0x40 */
- "REFFL", /* 0x41 */
- "REFFU", /* 0x42 */
- "REFFA", /* 0x43 */
- "NREF", /* 0x44 */
- "NREFF", /* 0x45 */
- "NREFFL", /* 0x46 */
- "NREFFU", /* 0x47 */
- "NREFFA", /* 0x48 */
- "LONGJMP", /* 0x49 */
- "BRANCHJ", /* 0x4a */
- "IFMATCH", /* 0x4b */
- "UNLESSM", /* 0x4c */
- "SUSPEND", /* 0x4d */
- "IFTHEN", /* 0x4e */
- "GROUPP", /* 0x4f */
- "EVAL", /* 0x50 */
- "MINMOD", /* 0x51 */
- "LOGICAL", /* 0x52 */
- "RENUM", /* 0x53 */
- "TRIE", /* 0x54 */
- "TRIEC", /* 0x55 */
- "AHOCORASICK", /* 0x56 */
- "AHOCORASICKC", /* 0x57 */
- "GOSUB", /* 0x58 */
- "NGROUPP", /* 0x59 */
- "INSUBP", /* 0x5a */
- "DEFINEP", /* 0x5b */
- "ENDLIKE", /* 0x5c */
- "OPFAIL", /* 0x5d */
- "ACCEPT", /* 0x5e */
- "VERB", /* 0x5f */
- "PRUNE", /* 0x60 */
- "MARKPOINT", /* 0x61 */
- "SKIP", /* 0x62 */
- "COMMIT", /* 0x63 */
- "CUTGROUP", /* 0x64 */
- "KEEPS", /* 0x65 */
- "LNBREAK", /* 0x66 */
- "OPTIMIZED", /* 0x67 */
- "PSEUDO", /* 0x68 */
+ "EXACTFU_S_EDGE", /* 0x2f */
+ "NOTHING", /* 0x30 */
+ "TAIL", /* 0x31 */
+ "STAR", /* 0x32 */
+ "PLUS", /* 0x33 */
+ "CURLY", /* 0x34 */
+ "CURLYN", /* 0x35 */
+ "CURLYM", /* 0x36 */
+ "CURLYX", /* 0x37 */
+ "WHILEM", /* 0x38 */
+ "OPEN", /* 0x39 */
+ "CLOSE", /* 0x3a */
+ "SROPEN", /* 0x3b */
+ "SRCLOSE", /* 0x3c */
+ "REF", /* 0x3d */
+ "REFF", /* 0x3e */
+ "REFFL", /* 0x3f */
+ "REFFU", /* 0x40 */
+ "REFFA", /* 0x41 */
+ "NREF", /* 0x42 */
+ "NREFF", /* 0x43 */
+ "NREFFL", /* 0x44 */
+ "NREFFU", /* 0x45 */
+ "NREFFA", /* 0x46 */
+ "LONGJMP", /* 0x47 */
+ "BRANCHJ", /* 0x48 */
+ "IFMATCH", /* 0x49 */
+ "UNLESSM", /* 0x4a */
+ "SUSPEND", /* 0x4b */
+ "IFTHEN", /* 0x4c */
+ "GROUPP", /* 0x4d */
+ "EVAL", /* 0x4e */
+ "MINMOD", /* 0x4f */
+ "LOGICAL", /* 0x50 */
+ "RENUM", /* 0x51 */
+ "TRIE", /* 0x52 */
+ "TRIEC", /* 0x53 */
+ "AHOCORASICK", /* 0x54 */
+ "AHOCORASICKC", /* 0x55 */
+ "GOSUB", /* 0x56 */
+ "NGROUPP", /* 0x57 */
+ "INSUBP", /* 0x58 */
+ "DEFINEP", /* 0x59 */
+ "ENDLIKE", /* 0x5a */
+ "OPFAIL", /* 0x5b */
+ "ACCEPT", /* 0x5c */
+ "VERB", /* 0x5d */
+ "PRUNE", /* 0x5e */
+ "MARKPOINT", /* 0x5f */
+ "SKIP", /* 0x60 */
+ "COMMIT", /* 0x61 */
+ "CUTGROUP", /* 0x62 */
+ "KEEPS", /* 0x63 */
+ "LNBREAK", /* 0x64 */
+ "OPTIMIZED", /* 0x65 */
+ "PSEUDO", /* 0x66 */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
EXTCONST U8 PL_varies_bitmask[];
#else
EXTCONST U8 PL_varies_bitmask[] = {
- 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0xF0, 0x87, 0xFF, 0x65, 0x00, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0x00, 0x00, 0x0C, 0x00, 0xFC, 0xE1, 0x7F, 0x19, 0x00, 0x00, 0x00
};
#endif /* DOINIT */
EXTCONST U8 PL_simple_bitmask[];
#else
EXTCONST U8 PL_simple_bitmask[] = {
- 0x00, 0x00, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0xFF, 0xFF, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
#endif /* DOINIT */