RExC_naughty += RExC_naughty / (exp) + (add)
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
-#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
- ((*s) == '{' && regcurly(s)))
+#define ISMULT2(s) (ISMULT1(*s) || ((*s) == '{' && regcurly(s)))
/*
* Flags to be passed up and down.
*/
-#define WORST 0 /* Worst case. */
#define HASWIDTH 0x01 /* Known to not match null strings, could match
non-null ones. */
-
-/* Simple enough to be STAR/PLUS operand; in an EXACTish node must be a single
- * character. (There needs to be a case: in the switch statement in regexec.c
- * for any node marked SIMPLE.) Note that this is not the same thing as
- * REGNODE_SIMPLE */
-#define SIMPLE 0x02
-#define SPSTART 0x04 /* Starts with * or + */
+#define SIMPLE 0x02 /* Exactly one character wide */
+ /* (or LNBREAK as a special case) */
#define POSTPONED 0x08 /* (?1),(?&name), (??{...}) or similar */
#define TRYAGAIN 0x10 /* Weeded out a declaration. */
#define RESTART_PARSE 0x20 /* Need to redo the parse */
? OPTIMIZE_INFTY
: (l
? data->last_start_max
+ /* temporary underflow guard for 5.32 */
+ : data->pos_delta < 0 ? OPTIMIZE_INFTY
: (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
? OPTIMIZE_INFTY
: data->pos_min + data->pos_delta));
* returned list must, and will, contain every code point that is a
* possibility. */
- dVAR;
SV* invlist = NULL;
SV* only_utf8_locale_invlist = NULL;
unsigned int i;
* character folded sequences. Since a single character can fold into
* such a sequence, the minimum match length for this node is less than
* the number of characters in the node. This routine returns in
- * *min_subtract how many characters to subtract from the the actual
+ * *min_subtract how many characters to subtract from the actual
* length of the string to get a real minimum match length; it is 0 if
* there are no multi-char foldeds. This delta is used by the caller to
* adjust the min length of the match, and the delta between min and max,
/* EXACTF nodes need to know that the minimum length
* changed so that a sharp s in the string can match this
* ss in the pattern, but they remain EXACTF nodes, as they
- * won't match this unless the target string is is UTF-8,
+ * won't match this unless the target string is in UTF-8,
* which we don't know until runtime. EXACTFL nodes can't
* transform into EXACTFU nodes */
if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
} while (f);
}
+/* Follow the next-chain of the current node and optimize away
+ all the NOTHINGs from it.
+ */
+STATIC void
+S_rck_elide_nothing(pTHX_ regnode *node)
+{
+ PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
+
+ if (OP(node) != CURLYX) {
+ const int max = (reg_off_by_arg[OP(node)]
+ ? I32_MAX
+ /* I32 may be smaller than U16 on CRAYs! */
+ : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+ int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
+ int noff;
+ regnode *n = node;
+
+ /* Skip NOTHING and LONGJMP. */
+ while (
+ (n = regnext(n))
+ && (
+ (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
+ || ((OP(n) == LONGJMP) && (noff = ARG(n)))
+ )
+ && off + noff < max
+ ) {
+ off += noff;
+ }
+ if (reg_off_by_arg[OP(node)])
+ ARG(node) = off;
+ else
+ NEXT_OFF(node) = off;
+ }
+ return;
+}
+
/* the return from this sub is the minimum length that could possibly match */
STATIC SSize_t
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
I32 stopparen,
U32 recursed_depth,
regnode_ssc *and_withp,
- U32 flags, U32 depth)
+ U32 flags, U32 depth, bool was_mutate_ok)
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
/* last: Stop before this one. */
/* recursed: which subroutines have we recursed into */
/* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
{
- dVAR;
SSize_t final_minlen;
/* There must be at least this number of characters to match */
SSize_t min = 0;
node length to get a real minimum (because
the folded version may be shorter) */
bool unfolded_multi_char = FALSE;
- bool mutate_ok = (frame && frame->in_gosub) ? 0 : 1;
+ /* avoid mutating ops if we are anywhere within the recursed or
+ * enframed handling for a GOSUB: the outermost level will handle it.
+ */
+ bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
/* Peephole optimizer: */
DEBUG_STUDYDATA("Peep", data, depth, is_inf);
DEBUG_PEEP("Peep", scan, depth, flags);
}
/* Follow the next-chain of the current node and optimize
- away all the NOTHINGs from it. */
- if (OP(scan) != CURLYX) {
- const int max = (reg_off_by_arg[OP(scan)]
- ? I32_MAX
- /* I32 may be smaller than U16 on CRAYs! */
- : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
- int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
- int noff;
- regnode *n = scan;
-
- /* Skip NOTHING and LONGJMP. */
- while ( (n = regnext(n))
- && ( (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
- || ((OP(n) == LONGJMP) && (noff = ARG(n))))
- && off + noff < max)
- off += noff;
- if (reg_off_by_arg[OP(scan)])
- ARG(scan) = off;
- else
- NEXT_OFF(scan) = off;
- }
+ away all the NOTHINGs from it.
+ */
+ rck_elide_nothing(scan);
/* The principal pseudo-switch. Cannot be a switch, since we look into
* several different things. */
/* DEFINEP study_chunk() recursion */
(void)study_chunk(pRExC_state, &scan, &minlen,
&deltanext, next, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1, mutate_ok);
scan = next;
} else
/* recurse study_chunk() for each BRANCH in an alternation */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, next, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1,
+ mutate_ok);
if (min1 > minnext)
min1 = minnext;
* might result in a minlen of 1 and not of 4,
* but this doesn't make us mismatch, just try a bit
* harder than we should.
- * */
+ *
+ * However we must assume this GOSUB is infinite, to
+ * avoid wrongly applying other optimizations in the
+ * enclosing scope - see GH 18096, for example.
+ */
+ is_inf = is_inf_internal = 1;
scan= regnext(scan);
continue;
}
}
if (flags & SCF_DO_SUBSTR)
data->pos_min++;
+ /* This will bypass the formal 'min += minnext * mincount'
+ * calculation in the do_curly path, so assumes min width
+ * of the PLUS payload is exactly one. */
min++;
/* FALLTHROUGH */
case STAR:
(mincount == 0
? (f & ~SCF_DO_SUBSTR)
: f)
- ,depth+1);
+ , depth+1, mutate_ok);
if (flags & SCF_DO_STCLASS)
data->start_class = oclass;
RExC_precomp)));
}
+ if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
+ || min >= SSize_t_MAX - minnext * mincount )
+ {
+ FAIL("Regexp out of space");
+ }
+
min += minnext * mincount;
is_inf_internal |= deltanext == OPTIMIZE_INFTY
|| (maxcount == REG_INFTY && minnext + deltanext > 0);
/* recurse study_chunk() on optimised CURLYX => CURLYM */
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
NULL, stopparen, recursed_depth, NULL, 0,
- depth+1);
+ depth+1, mutate_ok);
}
else
oscan->flags = 0;
if (data && (fl & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
optimize_curly_tail:
- if (OP(oscan) != CURLYX) {
- while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
- && NEXT_OFF(next))
- NEXT_OFF(oscan) += NEXT_OFF(next);
- }
+ rck_elide_nothing(oscan);
continue;
default:
/* recurse study_chunk() for lookahead body */
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
last, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1,
+ mutate_ok);
if (scan->flags) {
if ( deltanext < 0
|| deltanext > (I32) U8_MAX
*minnextp = study_chunk(pRExC_state, &nscan, minnextp,
&deltanext, last, &data_fake,
stopparen, recursed_depth, NULL,
- f, depth+1);
+ f, depth+1, mutate_ok);
if (scan->flags) {
assert(0); /* This code has never been tested since this
is normally not compiled */
/* optimise study_chunk() for TRIE */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, (regnode *)nextbranch, &data_fake,
- stopparen, recursed_depth, NULL, f, depth+1);
+ stopparen, recursed_depth, NULL, f, depth+1,
+ mutate_ok);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode*)nextbranch);
/* we make the assumption here that each op in the list of
* op_siblings maps to one SV pushed onto the stack,
* except for code blocks, with have both an OP_NULL and
- * and OP_CONST.
+ * an OP_CONST.
* This allows us to match up the list of SVs against the
* list of OPs to find the next code block.
*
PERL_ARGS_ASSERT_SET_REGEX_PV;
/* make sure PL_bitcount bounds not exceeded */
- assert(sizeof(STD_PAT_MODS) <= 8);
+ STATIC_ASSERT_STMT(sizeof(STD_PAT_MODS) <= 8);
p = sv_grow(MUTABLE_SV(Rx), wraplen + 1); /* +1 for the ending NUL */
SvPOK_on(Rx);
* length of the pattern. Patches welcome to improve that guess. That amount
* of space is malloc'd and then immediately freed, and then clawed back node
* by node. This design is to minimze, to the extent possible, memory churn
- * when doing the the reallocs.
+ * when doing the reallocs.
*
* A separate parentheses counting pass may be needed in some cases.
* (Previously the sizing pass did this.) Patches welcome to reduce the number
OP *expr, const regexp_engine* eng, REGEXP *old_re,
bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
{
- dVAR;
REGEXP *Rx; /* Capital 'R' means points to a REGEXP */
STRLEN plen;
char *exp;
&data, -1, 0, NULL,
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
| (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
- 0);
+ 0, TRUE);
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
? SCF_TRIE_DOING_RESTUDY
: 0),
- 0);
+ 0, TRUE);
CHECK_RESTUDY_GOTO_butfirst(NOOP);
} else if (flags & RXapif_ONE) {
ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
av = MUTABLE_AV(SvRV(ret));
- length = av_tindex(av);
+ length = av_count(av);
SvREFCNT_dec_NN(ret);
- return newSViv(length + 1);
+ return newSViv(length);
} else {
Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
(int)flags);
* one of them */
while (i_a < len_a && i_b < len_b) {
UV cp; /* The element to potentially add to the union's array */
- bool cp_in_set; /* is it in the the input list's set or not */
+ bool cp_in_set; /* is it in the input list's set or not */
/* We need to take one or the other of the two inputs for the union.
* Since we are merging two sorted lists, we take the smaller of the
STATIC SV*
S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
{
- dVAR;
const U8 * s = (U8*)STRING(node);
SSize_t bytelen = STR_LEN(node);
UV uc;
fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
/* The only code points that aren't folded in a UTF EXACTFish
- * node are are the problematic ones in EXACTFL nodes */
+ * node are the problematic ones in EXACTFL nodes */
if (OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
/* We need to check for the possibility that this EXACTFL
* node begins with a multi-char fold. Therefore we fold
vFAIL("Too many nested open parens");
}
- *flagp = 0; /* Tentatively. */
+ *flagp = 0; /* Initialize. */
if (RExC_in_lookbehind) {
RExC_in_lookbehind++;
}
else if (paren != '?') /* Not Conditional */
ret = br;
- *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
+ *flagp |= flags & (HASWIDTH | POSTPONED);
lastbr = br;
while (*RExC_parse == '|') {
if (RExC_use_BRANCHJ) {
REQUIRE_BRANCHJ(flagp, 0);
}
lastbr = br;
- *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
+ *flagp |= flags & (HASWIDTH | POSTPONED);
}
if (have_branch || paren != ':') {
}
}
- *flagp = WORST; /* Tentatively. */
+ *flagp = 0; /* Initialize. */
skip_to_be_ignored_text(pRExC_state, &RExC_parse,
FALSE /* Don't force to /x */ );
else if (ret == 0)
ret = latest;
*flagp |= flags&(HASWIDTH|POSTPONED);
- if (chain == 0) /* First piece. */
- *flagp |= flags&SPSTART;
- else {
+ if (chain != 0) {
/* FIXME adding one for every branch after the first is probably
* excessive now we have TRIE support. (hv) */
MARK_NAUGHTY(1);
FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
}
- op = *RExC_parse;
-
- if (op == '{' && regcurly(RExC_parse)) {
- maxpos = NULL;
#ifdef RE_TRACK_PATTERN_OFFSETS
- parse_start = RExC_parse; /* MJD */
+ parse_start = RExC_parse;
#endif
- next = RExC_parse + 1;
- while (isDIGIT(*next) || *next == ',') {
- if (*next == ',') {
- if (maxpos)
- break;
- else
- maxpos = next;
- }
- next++;
- }
- if (*next == '}') { /* got one */
- const char* endptr;
- if (!maxpos)
- maxpos = next;
- RExC_parse++;
- if (isDIGIT(*RExC_parse)) {
- endptr = RExC_end;
- if (!grok_atoUV(RExC_parse, &uv, &endptr))
- vFAIL("Invalid quantifier in {,}");
- if (uv >= REG_INFTY)
- vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
- min = (I32)uv;
- } else {
- min = 0;
- }
- if (*maxpos == ',')
- maxpos++;
- else
- maxpos = RExC_parse;
- if (isDIGIT(*maxpos)) {
- endptr = RExC_end;
- if (!grok_atoUV(maxpos, &uv, &endptr))
- vFAIL("Invalid quantifier in {,}");
- if (uv >= REG_INFTY)
- vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
- max = (I32)uv;
- } else {
- max = REG_INFTY; /* meaning "infinity" */
- }
- RExC_parse = next;
- nextchar(pRExC_state);
- if (max < min) { /* If can't match, warn and optimize to fail
- unconditionally */
- reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
- ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
- NEXT_OFF(REGNODE_p(orig_emit)) =
- regarglen[OPFAIL] + NODE_STEP_REGNODE;
- return ret;
- }
- else if (min == max && *RExC_parse == '?')
- {
- ckWARN2reg(RExC_parse + 1,
- "Useless use of greediness modifier '%c'",
- *RExC_parse);
+
+ op = *RExC_parse;
+ switch (op) {
+
+ case '*':
+ nextchar(pRExC_state);
+ min = 0;
+ break;
+
+ case '+':
+ nextchar(pRExC_state);
+ min = 1;
+ break;
+
+ case '?':
+ nextchar(pRExC_state);
+ min = 0; max = 1;
+ break;
+
+ case '{': /* A '{' may or may not indicate a quantifier; call regcurly()
+ to determine which */
+ if (regcurly(RExC_parse)) {
+ const char* endptr;
+
+ /* Here is a quantifier, parse for min and max values */
+ maxpos = NULL;
+ next = RExC_parse + 1;
+ while (isDIGIT(*next) || *next == ',') {
+ if (*next == ',') {
+ if (maxpos)
+ break;
+ else
+ maxpos = next;
}
+ next++;
+ }
- do_curly:
- if ((flags&SIMPLE)) {
- if (min == 0 && max == REG_INFTY) {
+ assert(*next == '}');
- /* Going from 0..inf is currently forbidden in wildcard
- * subpatterns. The only reason is to make it harder to
- * write patterns that take a long long time to halt, and
- * because the use of this construct isn't necessary in
- * matching Unicode property values */
- if (RExC_pm_flags & PMf_WILDCARD) {
- RExC_parse++;
- /* diag_listed_as: Use of %s is not allowed in Unicode
- property wildcard subpatterns in regex; marked by
- <-- HERE in m/%s/ */
- vFAIL("Use of quantifier '*' is not allowed in"
- " Unicode property wildcard subpatterns");
- /* Note, don't need to worry about {0,}, as a '}' isn't
- * legal at all in wildcards, so wouldn't get this far
- * */
- }
- reginsert(pRExC_state, STAR, ret, depth+1);
- MARK_NAUGHTY(4);
- RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
- goto nest_check;
- }
- if (min == 1 && max == REG_INFTY) {
- reginsert(pRExC_state, PLUS, ret, depth+1);
- MARK_NAUGHTY(3);
- RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
- goto nest_check;
- }
- MARK_NAUGHTY_EXP(2, 2);
- reginsert(pRExC_state, CURLY, ret, depth+1);
- Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
- Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
- }
- else {
- const regnode_offset w = reg_node(pRExC_state, WHILEM);
+ if (!maxpos)
+ maxpos = next;
+ RExC_parse++;
+ if (isDIGIT(*RExC_parse)) {
+ endptr = RExC_end;
+ if (!grok_atoUV(RExC_parse, &uv, &endptr))
+ vFAIL("Invalid quantifier in {,}");
+ if (uv >= REG_INFTY)
+ vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+ min = (I32)uv;
+ } else {
+ min = 0;
+ }
+ if (*maxpos == ',')
+ maxpos++;
+ else
+ maxpos = RExC_parse;
+ if (isDIGIT(*maxpos)) {
+ endptr = RExC_end;
+ if (!grok_atoUV(maxpos, &uv, &endptr))
+ vFAIL("Invalid quantifier in {,}");
+ if (uv >= REG_INFTY)
+ vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+ max = (I32)uv;
+ } else {
+ max = REG_INFTY; /* meaning "infinity" */
+ }
+ RExC_parse = next;
+ nextchar(pRExC_state);
+ if (max < min) { /* If can't match, warn and optimize to fail
+ unconditionally */
+ reginsert(pRExC_state, OPFAIL, orig_emit, depth+1);
+ ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
+ NEXT_OFF(REGNODE_p(orig_emit)) =
+ regarglen[OPFAIL] + NODE_STEP_REGNODE;
+ return ret;
+ }
+ else if (min == max && *RExC_parse == '?')
+ {
+ ckWARN2reg(RExC_parse + 1,
+ "Useless use of greediness modifier '%c'",
+ *RExC_parse);
+ }
- FLAGS(REGNODE_p(w)) = 0;
- if (! REGTAIL(pRExC_state, ret, w)) {
- REQUIRE_BRANCHJ(flagp, 0);
- }
- if (RExC_use_BRANCHJ) {
- reginsert(pRExC_state, LONGJMP, ret, depth+1);
- reginsert(pRExC_state, NOTHING, ret, depth+1);
- NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over LONGJMP. */
- }
- reginsert(pRExC_state, CURLYX, ret, depth+1);
- /* MJD hk */
- Set_Node_Offset(REGNODE_p(ret), parse_start+1);
- Set_Node_Length(REGNODE_p(ret),
- op == '{' ? (RExC_parse - parse_start) : 1);
+ break;
+ } /* End of is regcurly() */
- if (RExC_use_BRANCHJ)
- NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over NOTHING to
- LONGJMP. */
- if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
- NOTHING)))
- {
- REQUIRE_BRANCHJ(flagp, 0);
- }
- RExC_whilem_seen++;
- MARK_NAUGHTY_EXP(1, 4); /* compound interest */
- }
- FLAGS(REGNODE_p(ret)) = 0;
-
- if (min > 0)
- *flagp = WORST;
- if (max > 0)
- *flagp |= HASWIDTH;
- ARG1_SET(REGNODE_p(ret), (U16)min);
- ARG2_SET(REGNODE_p(ret), (U16)max);
- if (max == REG_INFTY)
- RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
-
- goto nest_check;
- }
+ /* Here was a '{', but what followed it didn't form a quantifier. */
+ /* FALLTHROUGH */
+
+ default:
+ *flagp = flags;
+ return(ret);
+ NOT_REACHED; /*NOTREACHED*/
}
- if (!ISMULT1(op)) {
- *flagp = flags;
- return(ret);
+ /* Here we have a quantifier, and have calculated 'min' and 'max'.
+ *
+ * Check and possibly adjust a zero width operand */
+ if (! (flags & (HASWIDTH|POSTPONED))) {
+ if (max > REG_INFTY/3) {
+ if (origparse[0] == '\\' && origparse[1] == 'K') {
+ vFAIL2utf8f(
+ "%" UTF8f " is forbidden - matches null string"
+ " many times",
+ UTF8fARG(UTF, (RExC_parse >= origparse
+ ? RExC_parse - origparse
+ : 0),
+ origparse));
+ } else {
+ ckWARN2reg(RExC_parse,
+ "%" UTF8f " matches null string many times",
+ UTF8fARG(UTF, (RExC_parse >= origparse
+ ? RExC_parse - origparse
+ : 0),
+ origparse));
+ }
+ }
+
+ /* There's no point in trying to match something 0 length more than
+ * once except for extra side effects, which we don't have here since
+ * not POSTPONED */
+ if (max > 1) {
+ max = 1;
+ if (min > max) {
+ min = max;
+ }
+ }
}
-#if 0 /* Now runtime fix should be reliable. */
+ /* If this is a code block pass it up */
+ *flagp |= (flags & POSTPONED);
- /* if this is reinstated, don't forget to put this back into perldiag:
+ if (max > 0) {
+ *flagp |= (flags & HASWIDTH);
+ if (max == REG_INFTY)
+ RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
+ }
- =item Regexp *+ operand could be empty at {#} in regex m/%s/
+ /* 'SIMPLE' operands don't require full generality */
+ if ((flags&SIMPLE)) {
+ if (max == REG_INFTY) {
+ if (min == 1) {
+ reginsert(pRExC_state, PLUS, ret, depth+1);
+ MARK_NAUGHTY(3);
+ goto done_main_op;
+ }
+ else if (min == 0) {
- (F) The part of the regexp subject to either the * or + quantifier
- could match an empty string. The {#} shows in the regular
- expression about where the problem was discovered.
+ /* Going from 0..inf is currently forbidden in wildcard
+ * subpatterns. The only reason is to make it harder to
+ * write patterns that take a long long time to halt, and
+ * because the use of this construct isn't necessary in
+ * matching Unicode property values */
+ if (RExC_pm_flags & PMf_WILDCARD) {
+ RExC_parse++;
+ /* diag_listed_as: Use of %s is not allowed in Unicode
+ property wildcard subpatterns in regex; marked by
+ <-- HERE in m/%s/ */
+ vFAIL("Use of quantifier '*' is not allowed in"
+ " Unicode property wildcard subpatterns");
+ /* Note, don't need to worry about {0,}, as a '}' isn't
+ * legal at all in wildcards, so wouldn't get this far
+ * */
+ }
- */
+ reginsert(pRExC_state, STAR, ret, depth+1);
+ MARK_NAUGHTY(4);
+ goto done_main_op;
+ }
+ }
- if (!(flags&HASWIDTH) && op != '?')
- vFAIL("Regexp *+ operand could be empty");
-#endif
+ /* Here, SIMPLE, but not the '*' and '+' special cases */
-#ifdef RE_TRACK_PATTERN_OFFSETS
- parse_start = RExC_parse;
-#endif
- nextchar(pRExC_state);
+ MARK_NAUGHTY_EXP(2, 2);
+ reginsert(pRExC_state, CURLY, ret, depth+1);
+ Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
+ Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
+ }
+ else { /* not SIMPLE */
+ const regnode_offset w = reg_node(pRExC_state, WHILEM);
- *flagp = (op != '+') ? (WORST|SPSTART|HASWIDTH) : (WORST|HASWIDTH);
-
- if (op == '*') {
- min = 0;
- goto do_curly;
- }
- else if (op == '+') {
- min = 1;
- goto do_curly;
- }
- else if (op == '?') {
- min = 0; max = 1;
- goto do_curly;
- }
- nest_check:
- if (!(flags&(HASWIDTH|POSTPONED)) && max > REG_INFTY/3) {
- if (origparse[0] == '\\' && origparse[1] == 'K') {
- vFAIL2utf8f(
- "%" UTF8f " is forbidden - matches null string many times",
- UTF8fARG(UTF, (RExC_parse >= origparse
- ? RExC_parse - origparse
- : 0),
- origparse));
- /* NOT-REACHED */
- } else {
- ckWARN2reg(RExC_parse,
- "%" UTF8f " matches null string many times",
- UTF8fARG(UTF, (RExC_parse >= origparse
- ? RExC_parse - origparse
- : 0),
- origparse));
+ FLAGS(REGNODE_p(w)) = 0;
+ if (! REGTAIL(pRExC_state, ret, w)) {
+ REQUIRE_BRANCHJ(flagp, 0);
}
+ if (RExC_use_BRANCHJ) {
+ reginsert(pRExC_state, LONGJMP, ret, depth+1);
+ reginsert(pRExC_state, NOTHING, ret, depth+1);
+ NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over LONGJMP. */
+ }
+ reginsert(pRExC_state, CURLYX, ret, depth+1);
+ /* MJD hk */
+ Set_Node_Offset(REGNODE_p(ret), parse_start+1);
+ Set_Node_Length(REGNODE_p(ret),
+ op == '{' ? (RExC_parse - parse_start) : 1);
+
+ if (RExC_use_BRANCHJ)
+ NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over NOTHING to
+ LONGJMP. */
+ if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
+ NOTHING)))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
+ RExC_whilem_seen++;
+ MARK_NAUGHTY_EXP(1, 4); /* compound interest */
}
+ /* Finish up the CURLY/CURLYX case */
+ FLAGS(REGNODE_p(ret)) = 0;
+
+ ARG1_SET(REGNODE_p(ret), (U16)min);
+ ARG2_SET(REGNODE_p(ret), (U16)max);
+
+ done_main_op:
+
+ /* Process any greediness modifiers */
if (*RExC_parse == '?') {
- nextchar(pRExC_state);
- reginsert(pRExC_state, MINMOD, ret, depth+1);
+ nextchar(pRExC_state);
+ reginsert(pRExC_state, MINMOD, ret, depth+1);
if (! REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE)) {
REQUIRE_BRANCHJ(flagp, 0);
}
}
}
+ /* Forbid extra quantifiers */
if (ISMULT2(RExC_parse)) {
- RExC_parse++;
- vFAIL("Nested quantifiers");
+ RExC_parse++;
+ vFAIL("Nested quantifiers");
}
return(ret);
FAIL2("panic: reg returned failure to grok_bslash_N, flags=%#" UVxf,
(UV) flags);
}
- *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+ *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
nextchar(pRExC_state);
STATIC regnode_offset
S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
{
- dVAR;
regnode_offset ret = 0;
I32 flags = 0;
char *parse_start;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
- *flagp = WORST; /* Tentatively. */
+ *flagp = 0; /* Initialize. */
DEBUG_PARSE("atom");
FAIL2("panic: reg returned failure to regatom, flags=%#" UVxf,
(UV) flags);
}
- *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
+ *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
break;
case '|':
case ')':
/* SBOL is shared with /^/ so we set the flags so we can tell
* /\A/ from /^/ in split. */
FLAGS(REGNODE_p(ret)) = 1;
+ *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
- *flagp |= SIMPLE;
goto finish_meta_pat;
case 'G':
if (RExC_pm_flags & PMf_WILDCARD) {
}
ret = reg_node(pRExC_state, GPOS);
RExC_seen |= REG_GPOS_SEEN;
- *flagp |= SIMPLE;
goto finish_meta_pat;
case 'K':
if (!RExC_in_lookbehind && !RExC_in_lookahead) {
RExC_seen_zerolen++;
ret = reg_node(pRExC_state, KEEPS);
- *flagp |= SIMPLE;
/* XXX:dmq : disabling in-place substitution seems to
* be necessary here to avoid cases of memory corruption, as
* with: C<$_="x" x 80; s/x\K/y/> -- rgs
}
else {
ret = reg_node(pRExC_state, SEOL);
+ *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
- *flagp |= SIMPLE;
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
case 'z':
}
else {
ret = reg_node(pRExC_state, EOS);
+ *flagp |= SIMPLE; /* Wrong, but too late to fix for 5.32 */
}
- *flagp |= SIMPLE;
RExC_seen_zerolen++; /* Do not optimize RE away */
goto finish_meta_pat;
case 'C':
ret = reg_node(pRExC_state, op);
FLAGS(REGNODE_p(ret)) = flags;
- *flagp |= SIMPLE;
-
goto finish_meta_pat;
}
num > 9
/* any numeric escape < RExC_npar is a backref */
&& num >= RExC_npar
- /* cannot be an octal escape if it starts with 8 */
- && *RExC_parse != '8'
- /* cannot be an octal escape if it starts with 9 */
- && *RExC_parse != '9'
+ /* cannot be an octal escape if it starts with [89] */
+ && ! inRANGE(*RExC_parse, '8', '9')
) {
/* Probably not meant to be a backref, instead likely
* to be an octal character escape, e.g. \35 or \777.
* have to map that back to the original */
if (need_to_fold_loc) {
upper_fill = loc_correspondence[s - s_start];
- Safefree(locfold_buf);
- Safefree(loc_correspondence);
-
if (upper_fill == 0) {
FAIL2("panic: loc_correspondence[%d] is 0",
(int) (s - s_start));
}
+ Safefree(locfold_buf);
+ Safefree(loc_correspondence);
}
else {
upper_fill = s - s0;
}
goto reparse;
}
- else if (need_to_fold_loc) {
- Safefree(locfold_buf);
- Safefree(loc_correspondence);
- }
/* Here the node consists entirely of non-final multi-char
* folds. (Likely it is all 'f's or all 's's.) There's no
* whole thing */
len = old_s - s0;
}
+
+ if (need_to_fold_loc) {
+ Safefree(locfold_buf);
+ Safefree(loc_correspondence);
+ }
} /* End of verifying node ends with an appropriate char */
/* We need to start the next node at the character that didn't fit
* sets up the bitmap and any flags, removing those code points from the
* inversion list, setting it to NULL should it become completely empty */
- dVAR;
PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST;
assert(PL_regkind[OP(node)] == ANYOF);
? end
: NUM_ANYOF_CODE_POINTS - 1;
for (i = start; i <= (int) high; i++) {
- if (! ANYOF_BITMAP_TEST(node, i)) {
- ANYOF_BITMAP_SET(node, i);
- }
+ ANYOF_BITMAP_SET(node, i);
}
}
invlist_iterfinish(*invlist_ptr);
if ( posix_warnings
&& RExC_warn_text
- && av_top_index(RExC_warn_text) > -1)
+ && av_count(RExC_warn_text) > 0)
{
*posix_warnings = RExC_warn_text;
}
/* If more than a single node returned, the nested
* parens evaluated to more than just a (?[...]),
* which isn't legal */
- || node != 1) {
+ || RExC_emit != orig_emit
+ + NODE_STEP_REGNODE
+ + regarglen[REGEX_SET])
+ {
vFAIL("Expecting interpolated extended charclass");
}
resultant_invlist = (SV *) ARGp(REGNODE_p(node));
goto regclass_failed;
}
+ assert(current);
+
/* regclass() will return with parsing just the \ sequence,
* leaving the parse pointer at the next thing to parse */
RExC_parse--;
goto regclass_failed;
}
- if (! current) {
- break;
- }
+ assert(current);
/* function call leaves parse pointing to the ']', except if we
* faked it */
*
* There is a line below that uses the same white space criteria but is outside
* this macro. Both here and there must use the same definition */
-#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p) \
+#define SKIP_BRACKETED_WHITE_SPACE(do_skip, p, stop_p) \
STMT_START { \
if (do_skip) { \
- while (isBLANK_A(UCHARAT(p))) \
+ while (p < stop_p && isBLANK_A(UCHARAT(p))) \
{ \
p++; \
} \
* UTF-8
*/
- dVAR;
UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;
IV range = 0;
UV value = OOB_UNICODE, save_value = OOB_UNICODE;
PERL_UNUSED_ARG(depth);
#endif
+ assert(! (ret_invlist && allow_mutiple_chars));
/* If wants an inversion list returned, we can't optimize to something
* else. */
initial_listsv_len = SvCUR(listsv);
SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated. */
- SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
assert(RExC_parse <= RExC_end);
invert = TRUE;
allow_mutiple_chars = FALSE;
MARK_NAUGHTY(1);
- SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
}
/* Check that they didn't say [:posix:] instead of [[:posix:]] */
output_posix_warnings(pRExC_state, posix_warnings);
}
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
+
if (RExC_parse >= stop_ptr) {
break;
}
- SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
-
if (UCHARAT(RExC_parse) == ']') {
break;
}
assert(prop_definition || strings);
if (strings) {
- if (! RExC_in_multi_char_class) {
+ if (ret_invlist) {
+ if (! prop_definition) {
+ RExC_parse = e + 1;
+ vFAIL("Unicode string properties are not implemented in (?[...])");
+ }
+ else {
+ ckWARNreg(e + 1,
+ "Using just the single character results"
+ " returned by \\p{} in (?[...])");
+ }
+ }
+ else if (! RExC_in_multi_char_class) {
if (invert ^ (value == 'P')) {
RExC_parse = e + 1;
vFAIL("Inverting a character class which contains"
}
/* For each multi-character string ... */
- while (av_tindex(strings) >= 0) {
+ while (av_count(strings) > 0) {
/* ... Each entry is itself an array of code
* points. */
AV * this_string = (AV *) av_shift( strings);
- STRLEN cp_count = av_tindex(this_string) + 1;
+ STRLEN cp_count = av_count(this_string);
SV * final = newSV(cp_count * 4);
SvPVCLEAR(final);
/* Create another string of sequences of \x{...} */
- while (av_tindex(this_string) >= 0) {
+ while (av_count(this_string) > 0) {
SV * character = av_shift(this_string);
UV cp = SvUV(character);
}
} /* end of namedclass \blah */
- SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse);
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
/* If 'range' is set, 'value' is the ending of a range--check its
* validity. (If value isn't a single code point in the case of a
char* next_char_ptr = RExC_parse + 1;
/* Get the next real char after the '-' */
- SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr);
+ SKIP_BRACKETED_WHITE_SPACE(skip_white, next_char_ptr, RExC_end);
/* If the '-' is at the end of the class (just before the ']',
* it is a literal minus; otherwise it is a range */
* printable should have each end point be a portable value
* for it (preferably like 'A', but we don't warn if it is
* a (portable) Unicode name or code point), and the range
- * must be be all digits or all letters of the same case.
+ * must be all digits or all letters of the same case.
* Otherwise, the range is non-portable and unclear as to
* what it contains */
if ( (isPRINT_A(prevvalue) || isPRINT_A(value))
ret = reg(pRExC_state, 1, ®_flags, depth+1);
- *flagp |= reg_flags & (HASWIDTH|SIMPLE|SPSTART|POSTPONED|RESTART_PARSE|NEED_UTF8);
+ *flagp |= reg_flags & (HASWIDTH|SIMPLE|POSTPONED|RESTART_PARSE|NEED_UTF8);
/* And restore so can parse the rest of the pattern */
RExC_parse = save_parse;
if (ret_invlist) {
*ret_invlist = cp_list;
- return RExC_emit;
+ return (cp_list) ? RExC_emit : 0;
}
if (anyof_flags & ANYOF_LOCALE_FLAGS) {
* points) in the ASCII range, so we can't use it here to
* artificially restrict the fold domain, so we check if
* the class does or does not match some EXACTFish node.
- * Further, if we aren't under /i, and and the folded-to
+ * Further, if we aren't under /i, and the folded-to
* character is part of a multi-character fold, we can't do
* this optimization, as the sequence around it could be
* that multi-character fold, and we don't here know the
av_store(av, INVLIST_INDEX, SvREFCNT_inc_NN(cp_list));
}
+ /* (Note that if any of this changes, the size calculations in
+ * S_optimize_regclass() might need to be updated.) */
+
if (only_utf8_locale_list) {
av_store(av, ONLY_LOCALE_MATCHES_INDEX,
SvREFCNT_inc_NN(only_utf8_locale_list));
PERL_UNUSED_ARG(depth);
#endif
- /* Find last node. */
+ /* The final node in the chain is the first one with a nonzero next pointer
+ * */
scan = (regnode_offset) p;
for (;;) {
regnode * const temp = regnext(REGNODE_p(scan));
scan = REGNODE_OFFSET(temp);
}
+ /* Populate this node's next pointer */
assert(val >= scan);
if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
assert((UV) (val - scan) <= U32_MAX);
Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
{
#ifdef DEBUGGING
- dVAR;
int k;
RXi_GET_DECL(prog, progi);
DECLARE_AND_GET_RE_DEBUG_FLAGS;
: (OP(o) == ANYOFH || OP(o) == ANYOFR)
? 0xFF
: lowest;
- Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
- if (lowest != highest) {
- Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+#ifndef EBCDIC
+ if (OP(o) != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
+#endif
+ {
+ Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
+ if (lowest != highest) {
+ Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+ }
+ Perl_sv_catpvf(aTHX_ sv, ")");
}
- Perl_sv_catpvf(aTHX_ sv, ")");
}
SvREFCNT_dec(unresolved);
Perl_re_intuit_string(pTHX_ REGEXP * const r)
{ /* Assume that RE_INTUIT is set */
/* Returns an SV containing a string that must appear in the target for it
- * to match */
+ * to match, or NULL if nothing is known that must match.
+ *
+ * CAUTION: the SV can be freed during execution of the regex engine */
struct regexp *const prog = ReANY(r);
DECLARE_AND_GET_RE_DEBUG_FLAGS;
U32 refcount;
reg_ac_data *aho=(reg_ac_data*)ri->data->data[n];
#ifdef USE_ITHREADS
- dVAR;
#endif
OP_REFCNT_LOCK;
refcount = --aho->refcount;
U32 refcount;
reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
#ifdef USE_ITHREADS
- dVAR;
#endif
OP_REFCNT_LOCK;
refcount = --trie->refcount;
#define SAVEPVN(p, n) ((p) ? savepvn(p, n) : NULL)
/*
- re_dup_guts - duplicate a regexp.
+=for apidoc_section REGEXP Functions
+=for apidoc re_dup_guts
+Duplicate a regexp.
- This routine is expected to clone a given regexp structure. It is only
- compiled under USE_ITHREADS.
+This routine is expected to clone a given regexp structure. It is only
+compiled under USE_ITHREADS.
- After all of the core data stored in struct regexp is duplicated
- the regexp_engine.dupe method is used to copy any private data
- stored in the *pprivate pointer. This allows extensions to handle
- any duplication it needs to do.
+After all of the core data stored in struct regexp is duplicated
+the regexp_engine.dupe method is used to copy any private data
+stored in the *pprivate pointer. This allows extensions to handle
+any duplication they need to do.
+
+=cut
See pregfree() and regfree_internal() if you change anything here.
*/
void
Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
{
- dVAR;
I32 npar;
const struct regexp *r = ReANY(sstr);
struct regexp *ret = ReANY(dstr);
void *
Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
{
- dVAR;
struct regexp *const r = ReANY(rx);
regexp_internal *reti;
int len;
UV this_end;
const char * format;
- if (end - start < min_range_count) {
-
- /* Output chars individually when they occur in short ranges */
+ if ( end - start < min_range_count
+ && (end - start <= 2 || (isPRINT_A(start) && isPRINT_A(end))))
+ {
+ /* Output a range of 1 or 2 chars individually, or longer ranges
+ * when printable */
for (; start <= end; start++) {
put_code_point(sv, start);
}
* output would have been only the inversion indicator '^', NULL is instead
* returned. */
- dVAR;
SV * output;
PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS_COMMON;
* cases where it can't try inverting, as what actually matches isn't known
* until runtime, and hence the inversion isn't either. */
- dVAR;
bool inverting_allowed = ! force_as_is_display;
int i;
int inverted_bias, as_is_bias;
- /* We will apply our bias to whichever of the the results doesn't have
+ /* We will apply our bias to whichever of the results doesn't have
* the '^' */
if (invert) {
invert = FALSE;
void
Perl_init_uniprops(pTHX)
{
- dVAR;
# ifdef DEBUGGING
char * dump_len_string;
# define CUR_CONTEXT aTHX
# define ORIGINAL_CONTEXT save_aTHX
# else
-# define DECLARATION_FOR_GLOBAL_CONTEXT
+# define DECLARATION_FOR_GLOBAL_CONTEXT dNOOP
# define SWITCH_TO_GLOBAL_CONTEXT NOOP
# define RESTORE_CONTEXT NOOP
# define CUR_CONTEXT NULL
* properties. This is a function so it can be set up to be called even if
* the program unexpectedly quits */
- dVAR;
SV ** current_entry;
const STRLEN key_len = strlen((const char *) key);
DECLARATION_FOR_GLOBAL_CONTEXT;
this */
const STRLEN level) /* Recursion level of this call */
{
- dVAR;
char* lookup_name; /* normalized name for lookup in our tables */
unsigned lookup_len; /* Its length */
enum { Not_Strict = 0, /* Some properties have stricter name */
goto append_name_to_msg;
}
- lookup_loose = get_cv("_charnames::_loose_regcomp_lookup", 0);
+ lookup_loose = get_cvs("_charnames::_loose_regcomp_lookup", 0);
if (! lookup_loose) {
Perl_croak(aTHX_
"panic: Can't find '_charnames::_loose_regcomp_lookup");
/* Try again stripping off any initial 'Is'. This is because we
* promise that an initial Is is optional. The same isn't true of
* names that start with 'In'. Those can match only blocks, and the
- * lookup table already has those accounted for. */
- if (starts_with_Is) {
+ * lookup table already has those accounted for. The lookup table also
+ * has already accounted for Perl extensions (without and = sign)
+ * starting with 'i's'. */
+ if (starts_with_Is && equals_pos >= 0) {
lookup_name += 2;
lookup_len -= 2;
equals_pos -= 2;
where we are now */
bool found_matches = FALSE; /* Did any name match so far? */
SV * empty; /* For matching zero length names */
- SV * must; /* What substring, if any, must be in a name
- for the subpattern to match */
+ SV * must_sv; /* Contains the substring, if any, that must be
+ in a name for the subpattern to match */
+ const char * must; /* The PV of 'must' */
+ STRLEN must_len; /* And its length */
SV * syllable_name = NULL; /* For Hangul syllables */
const char hangul_prefix[] = "HANGUL SYLLABLE ";
const STRLEN hangul_prefix_len = sizeof(hangul_prefix) - 1;
/* Compile the subpattern consisting of the name being looked for */
subpattern_re = compile_wildcard(wname, wname_len, FALSE /* /-i */ );
- must = re_intuit_string(subpattern_re);
+
+ must_sv = re_intuit_string(subpattern_re);
+ if (must_sv) {
+ /* regexec.c can free the re_intuit_string() return. GH #17734 */
+ must_sv = sv_2mortal(newSVsv(must_sv));
+ must = SvPV(must_sv, must_len);
+ }
+ else {
+ must = "";
+ must_len = 0;
+ }
+
+ /* (Note: 'must' could contain a NUL. And yet we use strspn() below on it.
+ * This works because the NUL causes the function to return early, thus
+ * showing that there are characters in it other than the acceptable ones,
+ * which is our desired result.) */
+
prog = ReANY(subpattern_re);
/* If only nothing is matched, skip to where empty names are looked for */
/* And match against the string of all names /gc. Don't even try if it
* must match a character not found in any name. */
- if ( ! must
- || SvCUR(must) == 0
- || strspn(SvPVX(must), "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()")
- == SvCUR(must))
+ if (strspn(must, "\n -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ()") == must_len)
{
while (execute_wildcard(subpattern_re,
cur_pos,
* one of the characters in that isn't in any Hangul syllable. */
if ( prog->minlen <= (SSize_t) syl_max_len
&& prog->maxlen > 0
- && ( ! must
- || SvCUR(must) == 0
- || strspn(SvPVX(must), "\n ABCDEGHIJKLMNOPRSTUWY") == SvCUR(must)))
+ && (strspn(must, "\n ABCDEGHIJKLMNOPRSTUWY") == must_len))
{
/* These constants, names, values, and algorithm are adapted from the
* Unicode standard, version 5.1, section 3.12, and should never
* series */
if ( prog->minlen <= (SSize_t) SvCUR(algo_name)
&& prog->maxlen > 0
- && ( ! must
- || SvCUR(must) == 0
- || strspn(SvPVX(must), legal) == SvCUR(must)))
+ && (strspn(must, legal) == must_len))
{
for (j = low; j <= high; j++) { /* For each code point in the series */