FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
}
- if (! ISMULT2(RExC_parse)) {
- *flagp = flags;
- return(ret);
- }
-
- /* Here we know the input is a legal quantifier, including {m,n} */
-
- op = *RExC_parse;
-
#ifdef RE_TRACK_PATTERN_OFFSETS
parse_start = RExC_parse;
#endif
- if (op != '{') {
+ op = *RExC_parse;
+ switch (op) {
+
+ case '*':
nextchar(pRExC_state);
+ min = 0;
+ break;
- if (op == '*') {
- min = 0;
- }
- else if (op == '+') {
- min = 1;
- }
- else if (op == '?') {
- min = 0; max = 1;
- }
- }
- else { /* is '{' */
- const char* endptr;
+ case '+':
+ nextchar(pRExC_state);
+ min = 1;
+ break;
- maxpos = NULL;
- next = RExC_parse + 1;
- while (isDIGIT(*next) || *next == ',') {
- if (*next == ',') {
- if (maxpos)
- break;
- else
- maxpos = next;
+ case '?':
+ nextchar(pRExC_state);
+ min = 0; max = 1;
+ break;
+
+ case '{': /* A '{' may or may not indicate a quantifier; call regcurly()
+ to determine which */
+ if (regcurly(RExC_parse)) {
+ const char* endptr;
+
+ /* Here is a quantifier, parse for min and max values */
+ maxpos = NULL;
+ next = RExC_parse + 1;
+ while (isDIGIT(*next) || *next == ',') {
+ if (*next == ',') {
+ if (maxpos)
+ break;
+ else
+ maxpos = next;
+ }
+ next++;
}
- next++;
- }
- assert(*next == '}');
+ assert(*next == '}');
- if (!maxpos)
- maxpos = next;
- RExC_parse++;
- if (isDIGIT(*RExC_parse)) {
- endptr = RExC_end;
- if (!grok_atoUV(RExC_parse, &uv, &endptr))
- vFAIL("Invalid quantifier in {,}");
- if (uv >= REG_INFTY)
- vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
- min = (I32)uv;
- } else {
- min = 0;
- }
- if (*maxpos == ',')
- maxpos++;
- else
- maxpos = RExC_parse;
- if (isDIGIT(*maxpos)) {
- endptr = RExC_end;
- if (!grok_atoUV(maxpos, &uv, &endptr))
- vFAIL("Invalid quantifier in {,}");
- if (uv >= REG_INFTY)
- vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
- max = (I32)uv;
- } else {
- max = REG_INFTY; /* meaning "infinity" */
- }
- RExC_parse = next;
- nextchar(pRExC_state);
- if (max < min) { /* If can't match, warn and optimize to fail
- unconditionally */
- reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
- ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
- NEXT_OFF(REGNODE_p(orig_emit)) =
- regarglen[OPFAIL] + NODE_STEP_REGNODE;
- return ret;
- }
- else if (min == max && *RExC_parse == '?')
- {
- ckWARN2reg(RExC_parse + 1,
- "Useless use of greediness modifier '%c'",
- *RExC_parse);
- }
+ if (!maxpos)
+ maxpos = next;
+ RExC_parse++;
+ if (isDIGIT(*RExC_parse)) {
+ endptr = RExC_end;
+ if (!grok_atoUV(RExC_parse, &uv, &endptr))
+ vFAIL("Invalid quantifier in {,}");
+ if (uv >= REG_INFTY)
+ vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+ min = (I32)uv;
+ } else {
+ min = 0;
+ }
+ if (*maxpos == ',')
+ maxpos++;
+ else
+ maxpos = RExC_parse;
+ if (isDIGIT(*maxpos)) {
+ endptr = RExC_end;
+ if (!grok_atoUV(maxpos, &uv, &endptr))
+ vFAIL("Invalid quantifier in {,}");
+ if (uv >= REG_INFTY)
+ vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+ max = (I32)uv;
+ } else {
+ max = REG_INFTY; /* meaning "infinity" */
+ }
+
+ RExC_parse = next;
+ nextchar(pRExC_state);
+ if (max < min) { /* If can't match, warn and optimize to fail
+ unconditionally */
+ reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+ ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+ NEXT_OFF(REGNODE_p(orig_emit)) =
+ regarglen[OPFAIL] + NODE_STEP_REGNODE;
+ return ret;
+ }
+ else if (min == max && *RExC_parse == '?')
+ {
+ ckWARN2reg(RExC_parse + 1,
+ "Useless use of greediness modifier '%c'",
+ *RExC_parse);
+ }
+
+ break;
+ } /* End of is regcurly() */
+
+ /* Here was a '{', but what followed it didn't form a quantifier. */
+ /* FALLTHROUGH */
+
+ default:
+ *flagp = flags;
+ return(ret);
+ NOT_REACHED; /*NOTREACHED*/
}
/* Here we have a quantifier, and have calculated 'min' and 'max'.
RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
}
+ /* 'SIMPLE' operands don't require full generality */
if ((flags&SIMPLE)) {
if (max == REG_INFTY) {
- if (min == 1) {
- reginsert(pRExC_state, PLUS, ret, depth+1);
- MARK_NAUGHTY(3);
- goto done_main_op;
- }
- else if (min == 0) {
-
- /* Going from 0..inf is currently forbidden in wildcard
- * subpatterns. The only reason is to make it harder to
- * write patterns that take a long long time to halt, and
- * because the use of this construct isn't necessary in
- * matching Unicode property values */
- if (RExC_pm_flags & PMf_WILDCARD) {
- RExC_parse++;
- /* diag_listed_as: Use of %s is not allowed in Unicode
- property wildcard subpatterns in regex; marked by
- <-- HERE in m/%s/ */
- vFAIL("Use of quantifier '*' is not allowed in"
- " Unicode property wildcard subpatterns");
- /* Note, don't need to worry about {0,}, as a '}' isn't
- * legal at all in wildcards, so wouldn't get this far
- * */
+ if (min == 0) {
+ if (UNLIKELY(RExC_pm_flags & PMf_WILDCARD)) {
+ goto min0_maxINF_wildcard_forbidden;
}
reginsert(pRExC_state, STAR, ret, depth+1);
MARK_NAUGHTY(4);
goto done_main_op;
}
+ else if (min == 1) {
+ reginsert(pRExC_state, PLUS, ret, depth+1);
+ MARK_NAUGHTY(3);
+ goto done_main_op;
+ }
}
+
+ /* Here, SIMPLE, but not the '*' and '+' special cases */
+
MARK_NAUGHTY_EXP(2, 2);
reginsert(pRExC_state, CURLY, ret, depth+1);
Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
}
- else {
+ else { /* not SIMPLE */
const regnode_offset w = reg_node(pRExC_state, WHILEM);
FLAGS(REGNODE_p(w)) = 0;
MARK_NAUGHTY_EXP(1, 4); /* compound interest */
}
+ /* Finish up the CURLY/CURLYX case */
FLAGS(REGNODE_p(ret)) = 0;
ARG1_SET(REGNODE_p(ret), (U16)min);
done_main_op:
+ /* Process any greediness modifiers */
if (*RExC_parse == '?') {
nextchar(pRExC_state);
reginsert(pRExC_state, MINMOD, ret, depth+1);
}
}
+ /* Forbid extra quantifiers */
if (ISMULT2(RExC_parse)) {
RExC_parse++;
vFAIL("Nested quantifiers");
}
return(ret);
+
+ min0_maxINF_wildcard_forbidden:
+
+ /* Here we are in a wildcard match, and the minimum match length is 0, and
+ * the max could be infinity. This is currently forbidden. The only
+ * reason is to make it harder to write patterns that take a long long time
+ * to halt, and because the use of this construct isn't necessary in
+ * matching Unicode property values */
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode property wildcard
+ subpatterns in regex; marked by <-- HERE in m/%s/
+ */
+ vFAIL("Use of quantifier '*' is not allowed in Unicode property wildcard"
+ " subpatterns");
+
+ /* Note, don't need to worry about the input being '{0,}', as a '}' isn't
+ * legal at all in wildcards, so can't get this far */
+
+ NOT_REACHED; /*NOTREACHED*/
}
STATIC bool
/* SBOL is shared with /^/ so we set the flags so we can tell
* /\A/ from /^/ in split. */
FLAGS(REGNODE_p(ret)) = 1;
- *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
goto finish_meta_pat;
case 'G':
}
else {
ret = reg_node(pRExC_state, SEOL);
- *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
}
else {
ret = reg_node(pRExC_state, EOS);
- *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
*
* The solution used here for peeking ahead is to look at that
* next character. If it isn't ASCII punctuation, then it will
- * be something that continues in an EXACTish node if there
- * were space. We append the fold of it to s, having reserved
- * enough room in s0 for the purpose. If we can't reasonably
- * peek ahead, we instead assume the worst case: that it is
- * something that would form the completion of a multi-char
- * fold.
+ * be something that would continue on in an EXACTish node if
+ * there were space. We append the fold of it to s, having
+ * reserved enough room in s0 for the purpose. If we can't
+ * reasonably peek ahead, we instead assume the worst case:
+ * that it is something that would form the completion of a
+ * multi-char fold.
*
* If we can't split between s and ender, we work backwards
* character-by-character down to s0. At each current point