#include "invlist_inline.h"
#include "unicode_constants.h"
-#define HAS_NONLATIN1_FOLD_CLOSURE(i) \
- _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-#define HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(i) \
- _HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
-#define IS_NON_FINAL_FOLD(c) _IS_NON_FINAL_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
-#define IS_IN_SOME_FOLD_L1(c) _IS_IN_SOME_FOLD_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
-
#ifndef STATIC
#define STATIC static
#endif
U8 *study_chunk_recursed; /* bitmap of which subs we have moved
through */
U32 study_chunk_recursed_bytes; /* bytes in bitmap */
- I32 in_lookbehind;
- I32 in_lookahead;
+ I32 in_lookaround;
I32 contains_locale;
I32 override_recoding;
I32 recode_x_to_native;
#define RExC_study_chunk_recursed (pRExC_state->study_chunk_recursed)
#define RExC_study_chunk_recursed_bytes \
(pRExC_state->study_chunk_recursed_bytes)
-#define RExC_in_lookbehind (pRExC_state->in_lookbehind)
-#define RExC_in_lookahead (pRExC_state->in_lookahead)
+#define RExC_in_lookaround (pRExC_state->in_lookaround)
#define RExC_contains_locale (pRExC_state->contains_locale)
#define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
} \
} STMT_END
+/* /u is to be chosen if we are supposed to use Unicode rules, or if the
+ * pattern is in UTF-8. This latter condition is in case the outermost rules
+ * are locale. See GH #17278 */
+#define toUSE_UNI_CHARSET_NOT_DEPENDS (RExC_uni_semantics || UTF)
+
/* Change from /d into /u rules, and restart the parse. RExC_uni_semantics is
* a flag that indicates we need to override /d with /u as a result of
* something in the pattern. It should only be used in regards to calling
ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
}
-#define NON_OTHER_COUNT NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C
-
STATIC bool
S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
{
/* See if *uc is the beginning of a multi-character fold. If
* so, we decrement the length remaining to look at, to account
* for the current character this iteration. (We can use 'uc'
- * instead of the fold returned by TRIE_READ_CHAR because for
- * non-UTF, the latin1_safe macro is smart enough to account
- * for all the unfolded characters, and because for UTF, the
- * string will already have been folded earlier in the
- * compilation process */
+ * instead of the fold returned by TRIE_READ_CHAR because the
+ * macro is smart enough to account for any unfolded
+ * characters. */
if (UTF) {
if ((foldlen = is_MULTI_CHAR_FOLD_utf8_safe(uc, e))) {
foldlen -= UTF8SKIP(uc);
continue;
}
}
- else if ( OP(scan) == EXACT
- || OP(scan) == LEXACT
- || OP(scan) == EXACT_REQ8
- || OP(scan) == LEXACT_REQ8
- || OP(scan) == EXACTL)
- {
+ else if (PL_regkind[OP(scan)] == EXACT && ! isEXACTFish(OP(scan))) {
SSize_t bytelen = STR_LEN(scan), charlen;
UV uc;
assert(bytelen);
case PLUS:
if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
next = NEXTOPER(scan);
- if ( OP(next) == EXACT
- || OP(next) == LEXACT
- || OP(next) == EXACT_REQ8
- || OP(next) == LEXACT_REQ8
- || OP(next) == EXACTL
+ if ( ( PL_regkind[OP(next)] == EXACT
+ && ! isEXACTFish(OP(next)))
|| (flags & SCF_DO_STCLASS))
{
mincount = 1;
rx_flags = orig_rx_flags;
- if ( (UTF || RExC_uni_semantics)
+ if ( toUSE_UNI_CHARSET_NOT_DEPENDS
&& initial_charset == REGEX_DEPENDS_CHARSET)
{
RExC_seen = 0;
RExC_maxlen = 0;
- RExC_in_lookbehind = 0;
- RExC_in_lookahead = 0;
+ RExC_in_lookaround = 0;
RExC_seen_zerolen = *exp == '^' ? -1 : 0;
RExC_recode_x_to_native = 0;
RExC_in_multi_char_class = 0;
DEBUG_PEEP("first:", first, 0, 0);
/* Ignore EXACT as we deal with it later. */
if (PL_regkind[OP(first)] == EXACT) {
- if ( OP(first) == EXACT
- || OP(first) == LEXACT
- || OP(first) == EXACT_REQ8
- || OP(first) == LEXACT_REQ8
- || OP(first) == EXACTL)
- {
+ if (! isEXACTFish(OP(first))) {
NOOP; /* Empty, get anchored substr later. */
}
else
&& nop == END)
RExC_rx->extflags |= RXf_WHITE;
else if ( RExC_rx->extflags & RXf_SPLIT
- && ( fop == EXACT || fop == LEXACT
- || fop == EXACT_REQ8 || fop == LEXACT_REQ8
- || fop == EXACTL)
+ && (PL_regkind[fop] == EXACT && ! isEXACTFish(fop))
&& STR_LEN(first) == 1
&& *(STRING(first)) == ' '
&& nop == END )
/* Some characters match above-Latin1 ones under /i. This
* is true of EXACTFL ones when the locale is UTF-8 */
if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
- && (! isASCII(uc) || (OP(node) != EXACTFAA
- && OP(node) != EXACTFAA_NO_TRIE)))
+ && (! isASCII(uc) || ! inRANGE(OP(node), EXACTFAA,
+ EXACTFAA_NO_TRIE)))
{
add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
}
* the folded string to be just past any possible multi-char
* fold.
*
- * Unlike the non-UTF-8 case, the macro for determining if a
- * string is a multi-char fold requires all the characters to
- * already be folded. This is because of all the complications
- * if not. Note that they are folded anyway, except in EXACTFL
- * nodes. Like the non-UTF case above, we punt if the node
- * begins with a multi-char fold */
+ * Like the non-UTF case above, we punt if the node begins with a
+ * multi-char fold */
if (is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
UV c = (k == 0) ? first_fold : remaining_folds[k-1];
/* /aa doesn't allow folds between ASCII and non- */
- if ( (OP(node) == EXACTFAA || OP(node) == EXACTFAA_NO_TRIE)
+ if ( inRANGE(OP(node), EXACTFAA, EXACTFAA_NO_TRIE)
&& isASCII(c) != isASCII(fc))
{
continue;
RExC_parse++;
has_use_defaults = TRUE;
STD_PMMOD_FLAGS_CLEAR(&RExC_flags);
- cs = (RExC_uni_semantics)
+ cs = (toUSE_UNI_CHARSET_NOT_DEPENDS)
? REGEX_UNICODE_CHARSET
: REGEX_DEPENDS_CHARSET;
set_regex_charset(&RExC_flags, cs);
else {
cs = get_regex_charset(RExC_flags);
if ( cs == REGEX_DEPENDS_CHARSET
- && RExC_uni_semantics)
+ && (toUSE_UNI_CHARSET_NOT_DEPENDS))
{
cs = REGEX_UNICODE_CHARSET;
}
* pattern (or target, not known until runtime) are
* utf8, or something in the pattern indicates unicode
* semantics */
- cs = (RExC_uni_semantics)
+ cs = (toUSE_UNI_CHARSET_NOT_DEPENDS)
? REGEX_UNICODE_CHARSET
: REGEX_DEPENDS_CHARSET;
has_charset_modifier = DEPENDS_PAT_MOD;
I32 after_freeze = 0;
I32 num; /* numeric backreferences */
SV * max_open; /* Max number of unclosed parens */
+ I32 was_in_lookaround = RExC_in_lookaround;
char * parse_start = RExC_parse; /* MJD */
char * const oregcomp_parse = RExC_parse;
*flagp = 0; /* Initialize. */
- if (RExC_in_lookbehind) {
- RExC_in_lookbehind++;
- }
- if (RExC_in_lookahead) {
- RExC_in_lookahead++;
- }
-
/* Having this true makes it feasible to have a lot fewer tests for the
* parse pointer being in scope. For example, we can write
* while(isFOO(*RExC_parse)) RExC_parse++;
lookbehind_alpha_assertions:
RExC_seen |= REG_LOOKBEHIND_SEEN;
- RExC_in_lookbehind++;
/*FALLTHROUGH*/
alpha_assertions:
+ RExC_in_lookaround++;
RExC_seen_zerolen++;
if (! start_arg) {
}
RExC_seen |= REG_LOOKBEHIND_SEEN;
- RExC_in_lookbehind++;
+ RExC_in_lookaround++;
RExC_parse++;
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?... not terminated");
break;
case '=': /* (?=...) */
RExC_seen_zerolen++;
- RExC_in_lookahead++;
+ RExC_in_lookaround++;
break;
case '!': /* (?!...) */
RExC_seen_zerolen++;
nextchar(pRExC_state);
return ret;
}
+ RExC_in_lookaround++;
break;
case '|': /* (?|...) */
/* branch reset, behave like a (?:...) except that
/* restore original flags, but keep (?p) and, if we've encountered
* something in the parse that changes /d rules into /u, keep the /u */
RExC_flags = oregflags | (RExC_flags & RXf_PMf_KEEPCOPY);
- if (DEPENDS_SEMANTICS && RExC_uni_semantics) {
+ if (DEPENDS_SEMANTICS && toUSE_UNI_CHARSET_NOT_DEPENDS) {
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
}
if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
NOT_REACHED; /* NOTREACHED */
}
- if (RExC_in_lookbehind) {
- RExC_in_lookbehind--;
- }
- if (RExC_in_lookahead) {
- RExC_in_lookahead--;
- }
if (after_freeze > RExC_npar)
RExC_npar = after_freeze;
+
+ RExC_in_lookaround = was_in_lookaround;
+
return(ret);
}
}
/*
+ - regcurly - a little FSA that accepts {\d+,?\d*}
+ Pulled from reg.c.
+ */
+bool
+Perl_regcurly(const char *s)
+{
+ PERL_ARGS_ASSERT_REGCURLY;
+
+ if (*s++ != '{')
+ return FALSE;
+ if (!isDIGIT(*s))
+ return FALSE;
+ while (isDIGIT(*s))
+ s++;
+ if (*s == ',') {
+ s++;
+ while (isDIGIT(*s))
+ s++;
+ }
+
+ return *s == '}';
+}
+
+/*
- regpiece - something followed by possible quantifier * + ? {n,m}
*
* Note that the branching code sequences used for ? and the general cases
FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
}
- if (! ISMULT2(RExC_parse)) {
- *flagp = flags;
- return(ret);
- }
-
- /* Here we know the input is a legal quantifier, including {m,n} */
-
- op = *RExC_parse;
-
#ifdef RE_TRACK_PATTERN_OFFSETS
parse_start = RExC_parse;
#endif
- if (op != '{') {
+ op = *RExC_parse;
+ switch (op) {
+
+ case '*':
nextchar(pRExC_state);
+ min = 0;
+ break;
- *flagp = HASWIDTH;
+ case '+':
+ nextchar(pRExC_state);
+ min = 1;
+ break;
- if (op == '*') {
- min = 0;
- }
- else if (op == '+') {
- min = 1;
- }
- else if (op == '?') {
- min = 0; max = 1;
- }
- }
- else { /* is '{' */
- const char* endptr;
+ case '?':
+ nextchar(pRExC_state);
+ min = 0; max = 1;
+ break;
- maxpos = NULL;
- next = RExC_parse + 1;
- while (isDIGIT(*next) || *next == ',') {
- if (*next == ',') {
- if (maxpos)
- break;
- else
- maxpos = next;
+ case '{': /* A '{' may or may not indicate a quantifier; call regcurly()
+ to determine which */
+ if (regcurly(RExC_parse)) {
+ const char* endptr;
+
+ /* Here is a quantifier, parse for min and max values */
+ maxpos = NULL;
+ next = RExC_parse + 1;
+ while (isDIGIT(*next) || *next == ',') {
+ if (*next == ',') {
+ if (maxpos)
+ break;
+ else
+ maxpos = next;
+ }
+ next++;
}
- next++;
- }
- assert(*next == '}');
+ assert(*next == '}');
- if (!maxpos)
- maxpos = next;
- RExC_parse++;
- if (isDIGIT(*RExC_parse)) {
- endptr = RExC_end;
- if (!grok_atoUV(RExC_parse, &uv, &endptr))
- vFAIL("Invalid quantifier in {,}");
- if (uv >= REG_INFTY)
- vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
- min = (I32)uv;
- } else {
- min = 0;
- }
- if (*maxpos == ',')
- maxpos++;
- else
- maxpos = RExC_parse;
- if (isDIGIT(*maxpos)) {
- endptr = RExC_end;
- if (!grok_atoUV(maxpos, &uv, &endptr))
- vFAIL("Invalid quantifier in {,}");
- if (uv >= REG_INFTY)
- vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
- max = (I32)uv;
- } else {
- max = REG_INFTY; /* meaning "infinity" */
- }
- RExC_parse = next;
- nextchar(pRExC_state);
- if (max < min) { /* If can't match, warn and optimize to fail
- unconditionally */
- reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
- ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
- NEXT_OFF(REGNODE_p(orig_emit)) =
- regarglen[OPFAIL] + NODE_STEP_REGNODE;
- return ret;
- }
- else if (min == max && *RExC_parse == '?')
- {
- ckWARN2reg(RExC_parse + 1,
- "Useless use of greediness modifier '%c'",
- *RExC_parse);
- }
+ if (!maxpos)
+ maxpos = next;
+ RExC_parse++;
+ if (isDIGIT(*RExC_parse)) {
+ endptr = RExC_end;
+ if (!grok_atoUV(RExC_parse, &uv, &endptr))
+ vFAIL("Invalid quantifier in {,}");
+ if (uv >= REG_INFTY)
+ vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+ min = (I32)uv;
+ } else {
+ min = 0;
+ }
+ if (*maxpos == ',')
+ maxpos++;
+ else
+ maxpos = RExC_parse;
+ if (isDIGIT(*maxpos)) {
+ endptr = RExC_end;
+ if (!grok_atoUV(maxpos, &uv, &endptr))
+ vFAIL("Invalid quantifier in {,}");
+ if (uv >= REG_INFTY)
+ vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+ max = (I32)uv;
+ } else {
+ max = REG_INFTY; /* meaning "infinity" */
+ }
+
+ RExC_parse = next;
+ nextchar(pRExC_state);
+ if (max < min) { /* If can't match, warn and optimize to fail
+ unconditionally */
+ reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+ ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+ NEXT_OFF(REGNODE_p(orig_emit)) =
+ regarglen[OPFAIL] + NODE_STEP_REGNODE;
+ return ret;
+ }
+ else if (min == max && *RExC_parse == '?')
+ {
+ ckWARN2reg(RExC_parse + 1,
+ "Useless use of greediness modifier '%c'",
+ *RExC_parse);
+ }
+
+ break;
+ } /* End of is regcurly() */
+
+ /* Here was a '{', but what followed it didn't form a quantifier. */
+ /* FALLTHROUGH */
+
+ default:
+ *flagp = flags;
+ return(ret);
+ NOT_REACHED; /*NOTREACHED*/
}
+ /* Here we have a quantifier, and have calculated 'min' and 'max'.
+ *
+ * Check and possibly adjust a zero width operand */
if (! (flags & (HASWIDTH|POSTPONED))) {
if (max > REG_INFTY/3) {
if (origparse[0] == '\\' && origparse[1] == 'K') {
origparse));
}
}
+
+ /* There's no point in trying to match something 0 length more than
+ * once except for extra side effects, which we don't have here since
+ * not POSTPONED */
+ if (max > 1) {
+ max = 1;
+ if (min > max) {
+ min = max;
+ }
+ }
+ }
+
+ /* If this is a code block pass it up */
+ *flagp |= (flags & POSTPONED);
+
+ if (max > 0) {
+ *flagp |= (flags & HASWIDTH);
+ if (max == REG_INFTY)
+ RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
}
+ /* 'SIMPLE' operands don't require full generality */
if ((flags&SIMPLE)) {
- if (min == 0 && max == REG_INFTY) {
+ if (max == REG_INFTY) {
+ if (min == 0) {
+ if (UNLIKELY(RExC_pm_flags & PMf_WILDCARD)) {
+ goto min0_maxINF_wildcard_forbidden;
+ }
- /* Going from 0..inf is currently forbidden in wildcard
- * subpatterns. The only reason is to make it harder to
- * write patterns that take a long long time to halt, and
- * because the use of this construct isn't necessary in
- * matching Unicode property values */
- if (RExC_pm_flags & PMf_WILDCARD) {
- RExC_parse++;
- /* diag_listed_as: Use of %s is not allowed in Unicode
- property wildcard subpatterns in regex; marked by
- <-- HERE in m/%s/ */
- vFAIL("Use of quantifier '*' is not allowed in"
- " Unicode property wildcard subpatterns");
- /* Note, don't need to worry about {0,}, as a '}' isn't
- * legal at all in wildcards, so wouldn't get this far
- * */
+ reginsert(pRExC_state, STAR, ret, depth+1);
+ MARK_NAUGHTY(4);
+ goto done_main_op;
+ }
+ else if (min == 1) {
+ reginsert(pRExC_state, PLUS, ret, depth+1);
+ MARK_NAUGHTY(3);
+ goto done_main_op;
}
- reginsert(pRExC_state, STAR, ret, depth+1);
- MARK_NAUGHTY(4);
- RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
- goto nest_check;
- }
- if (min == 1 && max == REG_INFTY) {
- reginsert(pRExC_state, PLUS, ret, depth+1);
- MARK_NAUGHTY(3);
- RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
- goto nest_check;
}
+
+ /* Here, SIMPLE, but not the '*' and '+' special cases */
+
MARK_NAUGHTY_EXP(2, 2);
reginsert(pRExC_state, CURLY, ret, depth+1);
Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
}
- else {
+ else { /* not SIMPLE */
const regnode_offset w = reg_node(pRExC_state, WHILEM);
FLAGS(REGNODE_p(w)) = 0;
RExC_whilem_seen++;
MARK_NAUGHTY_EXP(1, 4); /* compound interest */
}
+
+ /* Finish up the CURLY/CURLYX case */
FLAGS(REGNODE_p(ret)) = 0;
- if (min > 0)
- *flagp = 0;
- if (max > 0)
- *flagp |= HASWIDTH;
ARG1_SET(REGNODE_p(ret), (U16)min);
ARG2_SET(REGNODE_p(ret), (U16)max);
- if (max == REG_INFTY)
- RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
-
- goto nest_check;
- nest_check:
+ done_main_op:
+ /* Process any greediness modifiers */
if (*RExC_parse == '?') {
nextchar(pRExC_state);
reginsert(pRExC_state, MINMOD, ret, depth+1);
}
}
+ /* Forbid extra quantifiers */
if (ISMULT2(RExC_parse)) {
RExC_parse++;
vFAIL("Nested quantifiers");
}
return(ret);
+
+ min0_maxINF_wildcard_forbidden:
+
+ /* Here we are in a wildcard match, and the minimum match length is 0, and
+ * the max could be infinity. This is currently forbidden. The only
+ * reason is to make it harder to write patterns that take a long long time
+ * to halt, and because the use of this construct isn't necessary in
+ * matching Unicode property values */
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
+ subpatterns in regex; marked by <-- HERE in m/%s/
+ */
+ vFAIL("Use of quantifier '*' is not allowed in Unicode property wildcard"
+ " subpatterns");
+
+ /* Note, don't need to worry about the input being '{0,}', as a '}' isn't
+ * legal at all in wildcards, so can't get this far */
+
+ NOT_REACHED; /*NOTREACHED*/
}
STATIC bool
/* SBOL is shared with /^/ so we set the flags so we can tell
* /\A/ from /^/ in split. */
FLAGS(REGNODE_p(ret)) = 1;
- *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
goto finish_meta_pat;
case 'G':
RExC_seen |= REG_GPOS_SEEN;
goto finish_meta_pat;
case 'K':
- if (!RExC_in_lookbehind && !RExC_in_lookahead) {
+ if (!RExC_in_lookaround) {
RExC_seen_zerolen++;
ret = reg_node(pRExC_state, KEEPS);
/* XXX:dmq : disabling in-place substitution seems to
}
else {
ret = reg_node(pRExC_state, SEOL);
- *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
}
else {
ret = reg_node(pRExC_state, EOS);
- *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
* things */
maybe_exactfu = FALSE;
+ /* Although these two characters have folds that are
+ * locale-problematic, they also have folds to above Latin1
+ * that aren't a problem. Doing these now helps at
+ * runtime. */
+ if (UNLIKELY( ender == GREEK_CAPITAL_LETTER_MU
+ || ender == LATIN_CAPITAL_LETTER_SHARP_S))
+ {
+ goto fold_anyway;
+ }
+
/* Here, we are adding a problematic fold character.
* "Problematic" in this context means that its fold isn't
* known until runtime. (The non-problematic code points
*(s)++ = (U8) toFOLD(ender);
}
else {
- UV folded = _to_uni_fold_flags(
+ UV folded;
+
+ fold_anyway:
+ folded = _to_uni_fold_flags(
ender,
(U8 *) s, /* We have allocated extra space
in 's' so can't run off the
end */
&added_len,
- FOLD_FLAGS_FULL | ((ASCII_FOLD_RESTRICTED)
- ? FOLD_FLAGS_NOMIX_ASCII
- : 0));
+ FOLD_FLAGS_FULL
+ | (( ASCII_FOLD_RESTRICTED
+ || node_type == EXACTFL)
+ ? FOLD_FLAGS_NOMIX_ASCII
+ : 0));
if (UNLIKELY(len + added_len > max_string_len)) {
overflowed = TRUE;
break;
*
* The solution used here for peeking ahead is to look at that
* next character. If it isn't ASCII punctuation, then it will
- * be something that continues in an EXACTish node if there
- * were space. We append the fold of it to s, having reserved
- * enough room in s0 for the purpose. If we can't reasonably
- * peek ahead, we instead assume the worst case: that it is
- * something that would form the completion of a multi-char
- * fold.
+ * be something that would continue on in an EXACTish node if
+ * there were space. We append the fold of it to s, having
+ * reserved enough room in s0 for the purpose. If we can't
+ * reasonably peek ahead, we instead assume the worst case:
+ * that it is something that would form the completion of a
+ * multi-char fold.
*
* If we can't split between s and ender, we work backwards
* character-by-character down to s0. At each current point
}
#endif
if ( exact ) {
- switch (OP(REGNODE_p(scan))) {
- case LEXACT:
- case EXACT:
- case LEXACT_REQ8:
- case EXACT_REQ8:
- case EXACTL:
- case EXACTF:
- case EXACTFU_S_EDGE:
- case EXACTFAA_NO_TRIE:
- case EXACTFAA:
- case EXACTFU:
- case EXACTFU_REQ8:
- case EXACTFLU8:
- case EXACTFUP:
- case EXACTFL:
- if( exact == PSEUDO )
- exact= OP(REGNODE_p(scan));
- else if ( exact != OP(REGNODE_p(scan)) )
- exact= 0;
- case NOTHING:
- break;
- default:
+ if (PL_regkind[OP(REGNODE_p(scan))] == EXACT) {
+ if (exact == PSEUDO )
+ exact= OP(REGNODE_p(scan));
+ else if (exact != OP(REGNODE_p(scan)) )
exact= 0;
}
+ else if (OP(REGNODE_p(scan)) != NOTHING) {
+ exact= 0;
+ }
}
DEBUG_PARSE_r({
DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
#define SAVEPVN(p, n) ((p) ? savepvn(p, n) : NULL)
/*
-=for apidoc_section REGEXP Functions
=for apidoc re_dup_guts
Duplicate a regexp.
compiled under USE_ITHREADS.
After all of the core data stored in struct regexp is duplicated
-the regexp_engine.dupe method is used to copy any private data
+the C<regexp_engine.dupe> method is used to copy any private data
stored in the *pprivate pointer. This allows extensions to handle
any duplication they need to do.
}
}
-#define MAX_PRINT_A MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C
-
STATIC void
S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
{