#define WORST 0 /* Worst case. */
#define HASWIDTH 0x01 /* Known to match non-null strings. */
-/* Simple enough to be STAR/PLUS operand; in an EXACT node must be a single
- * character. Note that this is not the same thing as REGNODE_SIMPLE */
+/* Simple enough to be STAR/PLUS operand; in an EXACTish node must be a single
+ * character. (There needs to be a case: in the switch statement in regexec.c
+ * for any node marked SIMPLE.) Note that this is not the same thing as
+ * REGNODE_SIMPLE */
#define SIMPLE 0x02
-#define SPSTART 0x04 /* Starts with * or +. */
+#define SPSTART 0x04 /* Starts with * or + */
#define TRYAGAIN 0x08 /* Weeded out a declaration. */
#define POSTPONED 0x10 /* (?1),(?&name), (??{...}) or similar */
goto finish_meta_pat;
case 'R':
ret = reg_node(pRExC_state, LNBREAK);
- *flagp |= HASWIDTH;
+ *flagp |= HASWIDTH|SIMPLE;
goto finish_meta_pat;
case 'h':
ret = reg_node(pRExC_state, HORIZWS);
IV namedclass = OOB_NAMEDCLASS;
char *rangebegin = NULL;
bool need_class = 0;
- bool allow_full_fold = TRUE; /* Assume wants multi-char folding */
SV *listsv = NULL;
STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
than just initialized. */
if (UCHARAT(RExC_parse) == '^') { /* Complement of range. */
RExC_parse++;
- if (! RExC_in_multi_char_class) {
- invert = TRUE;
- RExC_naughty++;
-
- /* We have decided to not allow multi-char folds in inverted
- * character classes, due to the confusion that can happen,
- * especially with classes that are designed for a non-Unicode
- * world: You have the peculiar case that:
- "s s" =~ /^[^\xDF]+$/i => Y
- "ss" =~ /^[^\xDF]+$/i => N
- *
- * See [perl #89750] */
- allow_full_fold = FALSE;
- }
+ invert = TRUE;
+ RExC_naughty++;
}
if (SIZE_ONLY) {
* For single-valued non-inverted ranges, we consider the possibility
* of multi-char folds. (We made a conscious decision to not do this
* for the other cases because it can often lead to non-intuitive
- * results) */
+ * results. For example, you have the peculiar case that:
+ * "s s" =~ /^[^\xDF]+$/i => Y
+ * "ss" =~ /^[^\xDF]+$/i => N
+ *
+ * See [perl #89750] */
if (FOLD && ! invert && value == prevvalue) {
if (value == LATIN_SMALL_LETTER_SHARP_S
|| (value > 255 && _invlist_contains_cp(PL_HasMultiCharFold,
ret = reg(pRExC_state, 1, ®_flags, depth+1);
- *flagp |= reg_flags&(HASWIDTH|SPSTART|POSTPONED);
+ *flagp |= reg_flags&(HASWIDTH|SIMPLE|SPSTART|POSTPONED);
RExC_parse = save_parse;
RExC_end = save_end;
U8 foldbuf[UTF8_MAXBYTES_CASE+1];
STRLEN foldlen;
- UV f;
SV** listp;
if (j < 256) {
* hard-coded for it. First, get its fold. This is the simple
* fold, as the multi-character folds have been handled earlier
* and separated out */
- f = _to_uni_fold_flags(j, foldbuf, &foldlen,
- ((LOC)
- ? FOLD_FLAGS_LOCALE
- : (ASCII_FOLD_RESTRICTED)
- ? FOLD_FLAGS_NOMIX_ASCII
- : 0));
+ _to_uni_fold_flags(j, foldbuf, &foldlen,
+ ((LOC)
+ ? FOLD_FLAGS_LOCALE
+ : (ASCII_FOLD_RESTRICTED)
+ ? FOLD_FLAGS_NOMIX_ASCII
+ : 0));
/* Single character fold of above Latin1. Add everything in
* its fold closure to the list that this node should match.
if (!ret_x)
ret_x = (REGEXP*) newSV_type(SVt_REGEXP);
+ /* This ensures that SvTHINKFIRST(sv) is true, and hence that
+ sv_force_normal(sv) is called. */
+ SvFAKE_on(ret_x);
ret = (struct regexp *)SvANY(ret_x);
- (void)ReREFCNT_inc(rx);
/* We can take advantage of the existing "copied buffer" mechanism in SVs
by pointing directly at the buffer, but flagging that the allocated
space in the copy is zero. As we've just done a struct copy, it's now
a case of zero-ing that, rather than copying the current length. */
+ if (SvPOKp(ret_x)) SvPV_free(ret_x);
SvPV_set(ret_x, RX_WRAPPED(rx));
SvFLAGS(ret_x) |= SvFLAGS(rx) & (SVf_POK|SVp_POK|SVf_UTF8);
memcpy(&(ret->xpv_cur), &(r->xpv_cur),
sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
SvLEN_set(ret_x, 0);
- SvSTASH_set(ret_x, NULL);
- SvMAGIC_set(ret_x, NULL);
if (r->offs) {
const I32 npar = r->nparens+1;
Newx(ret->offs, npar, regexp_paren_pair);
#ifdef PERL_OLD_COPY_ON_WRITE
ret->saved_copy = NULL;
#endif
- ret->mother_re = rx;
+ ret->mother_re = ReREFCNT_inc(r->mother_re ? r->mother_re : rx);
SvREFCNT_inc_void(ret->qr_anoncv);
return ret_x;
2: something we no longer hold a reference on
so we need to copy it locally. */
/* Note we need to use SvCUR(), rather than
- SvLEN(), on our mother_re, because it, in
- turn, may well be pointing to its own mother_re. */
+ SvLEN(), on our mother_re, because its buffer may not be
+ the same size as our newly-allocated one. */
SvPV_set(dstr, SAVEPVN(SvPVX_const(ret->mother_re),
SvCUR(ret->mother_re)+1));
SvLEN_set(dstr, SvCUR(ret->mother_re)+1);