#define STATIC static
#endif
+
typedef struct RExC_state_t {
- U32 flags; /* are we folding, multilining? */
+ U32 flags; /* RXf_* are we folding, multilining? */
+ U32 pm_flags; /* PMf_* stuff from the calling PMOP */
char *precomp; /* uncompiled string. */
REGEXP *rx_sv; /* The SV that is the regexp. */
regexp *rx; /* perl core regexp structure */
I32 nestroot; /* root parens we are in - used by accept */
I32 extralen;
I32 seen_zerolen;
- I32 seen_evals;
regnode **open_parens; /* pointers to open parens */
regnode **close_parens; /* pointers to close parens */
regnode *opend; /* END node in program */
I32 in_lookbehind;
I32 contains_locale;
I32 override_recoding;
- int max_code_index; /* max index into code_indices */
- int code_index; /* index into code_indices */
- STRLEN *code_indices; /* begin and ends of literal (?{})
+ struct reg_code_block *code_blocks; /* positions of literal (?{})
within pattern */
- OP* next_code_or_const; /* iterating the list of DO/OP_CONST */
+ int num_code_blocks; /* size of code_blocks[] */
+ int code_index; /* next code_blocks[] slot */
#if ADD_TO_REGEXEC
char *starttry; /* -Dr: where regtry was called. */
#define RExC_starttry (pRExC_state->starttry)
#endif
+ SV *runtime_code_qr; /* qr with the runtime code blocks */
#ifdef DEBUGGING
const char *lastparse;
I32 lastnum;
} RExC_state_t;
#define RExC_flags (pRExC_state->flags)
+#define RExC_pm_flags (pRExC_state->pm_flags)
#define RExC_precomp (pRExC_state->precomp)
#define RExC_rx_sv (pRExC_state->rx_sv)
#define RExC_rx (pRExC_state->rx)
#define RExC_nestroot (pRExC_state->nestroot)
#define RExC_extralen (pRExC_state->extralen)
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
-#define RExC_seen_evals (pRExC_state->seen_evals)
#define RExC_utf8 (pRExC_state->utf8)
#define RExC_uni_semantics (pRExC_state->uni_semantics)
#define RExC_orig_utf8 (pRExC_state->orig_utf8)
* scope
*/
-#ifndef PERL_IN_XSUB_RE
-#define RE_ENGINE_PTR &PL_core_reg_engine
-#else
-extern const struct regexp_engine my_reg_engine;
-#define RE_ENGINE_PTR &my_reg_engine
-#endif
-
#ifndef PERL_IN_XSUB_RE
-/* return the currently in-scope regex engine (or NULL if none) */
+/* return the currently in-scope regex engine (or the default if none) */
-regexp_engine *
+regexp_engine const *
Perl_current_re_engine(pTHX)
{
dVAR;
SV **ptr;
if (!table)
- return NULL;
+ return &PL_core_reg_engine;
ptr = hv_fetchs(table, "regcomp", FALSE);
if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr)))
- return NULL;
+ return &PL_core_reg_engine;
return INT2PTR(regexp_engine*,SvIV(*ptr));
}
else {
SV *ptr;
if (!PL_curcop->cop_hints_hash)
- return NULL;
+ return &PL_core_reg_engine;
ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0);
if ( !(ptr && SvIOK(ptr) && SvIV(ptr)))
- return NULL;
+ return &PL_core_reg_engine;
return INT2PTR(regexp_engine*,SvIV(ptr));
}
}
Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
{
dVAR;
- regexp_engine *eng = current_re_engine();
+ regexp_engine const *eng = current_re_engine();
+ GET_RE_DEBUG_FLAGS_DECL;
PERL_ARGS_ASSERT_PREGCOMP;
/* Dispatch a request to compile a regexp to correct regexp engine. */
- if (eng) {
- GET_RE_DEBUG_FLAGS_DECL;
- DEBUG_COMPILE_r({
- PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
- PTR2UV(eng));
- });
- return CALLREGCOMP_ENG(eng, pattern, flags);
- }
- return Perl_re_compile(aTHX_ pattern, flags);
+ DEBUG_COMPILE_r({
+ PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
+ PTR2UV(eng));
+ });
+ return CALLREGCOMP_ENG(eng, pattern, flags);
}
#endif
-/* public(ish) wrapper for Perl_op_re_compile that only takes an SV
+/* public(ish) wrapper for Perl_re_op_compile that only takes an SV
* pattern rather than a list of OPs */
REGEXP *
-Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
+Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
{
+ SV *pat = pattern; /* defeat constness! */
PERL_ARGS_ASSERT_RE_COMPILE;
- return Perl_re_op_compile(aTHX_ &pattern, 1, NULL, orig_pm_flags);
+ return Perl_re_op_compile(aTHX_ &pat, 1, NULL, current_re_engine(),
+ NULL, NULL, rx_flags, 0);
}
-/* given a list of CONSTs and DO blocks in expr, append all the CONSTs to
- * pat, and record the start and end of each code block in code_indices
- * (each DO{} op is followed by an OP_CONST containing the corresponding
- * literal '(?{...}) text)
+/* see if there are any run-time code blocks in the pattern.
+ * False positives are allowed */
+
+static bool
+S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
+ U32 pm_flags, char *pat, STRLEN plen)
+{
+ int n = 0;
+ STRLEN s;
+
+ /* avoid infinitely recursing when we recompile the pattern parcelled up
+ * as qr'...'. A single constant qr// string can't have have any
+ * run-time component in it, and thus, no runtime code. (A non-qr
+ * string, however, can, e.g. $x =~ '(?{})') */
+ if ((pm_flags & PMf_IS_QR) && expr && expr->op_type == OP_CONST)
+ return 0;
+
+ for (s = 0; s < plen; s++) {
+ if (n < pRExC_state->num_code_blocks
+ && s == pRExC_state->code_blocks[n].start)
+ {
+ s = pRExC_state->code_blocks[n].end;
+ n++;
+ continue;
+ }
+ /* TODO ideally should handle [..], (#..), /#.../x to reduce false
+ * positives here */
+ if (pat[s] == '(' && pat[s+1] == '?' &&
+ (pat[s+2] == '{' || (pat[s+2] == '?' && pat[s+3] == '{'))
+ )
+ return 1;
+ }
+ return 0;
+}
+
+/* Handle run-time code blocks. We will already have compiled any direct
+ * or indirect literal code blocks. Now, take the pattern 'pat' and make a
+ * copy of it, but with any literal code blocks blanked out and
+ * appropriate chars escaped; then feed it into
+ *
+ * eval "qr'modified_pattern'"
+ *
+ * For example,
+ *
+ * a\bc(?{"this was literal"})def'ghi\\jkl(?{"this is runtime"})mno
+ *
+ * becomes
+ *
+ * qr'a\\bc def\'ghi\\\\jkl(?{"this is runtime"})mno'
+ *
+ * After eval_sv()-ing that, grab any new code blocks from the returned qr
+ * and merge them with any code blocks of the original regexp.
+ *
+ * If the pat is non-UTF8, while the evalled qr is UTF8, don't merge;
+ * instead, just save the qr and return FALSE; this tells our caller that
+ * the original pattern needs upgrading to utf8.
*/
-static void
-S_get_pat_and_code_indices(pTHX_ RExC_state_t *pRExC_state, OP* expr, SV* pat) {
- int ncode = 0;
- bool is_code = 0;
- OP *o;
- for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
- if (o->op_type == OP_CONST) {
- sv_catsv(pat, cSVOPo_sv);
- if (is_code) {
- pRExC_state->code_indices[ncode++] = SvCUR(pat); /* end pos */
- is_code = 0;
+bool
+S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
+ char *pat, STRLEN plen)
+{
+ SV *qr;
+
+ GET_RE_DEBUG_FLAGS_DECL;
+
+ if (pRExC_state->runtime_code_qr) {
+ /* this is the second time we've been called; this should
+ * only happen if the main pattern got upgraded to utf8
+ * during compilation; re-use the qr we compiled first time
+ * round (which should be utf8 too)
+ */
+ qr = pRExC_state->runtime_code_qr;
+ pRExC_state->runtime_code_qr = NULL;
+ assert(RExC_utf8 && SvUTF8(qr));
+ }
+ else {
+ int n = 0;
+ STRLEN s;
+ char *p, *newpat;
+ int newlen = plen + 6; /* allow for "qr''x\0" extra chars */
+ SV *sv, *qr_ref;
+ dSP;
+
+ /* determine how many extra chars we need for ' and \ escaping */
+ for (s = 0; s < plen; s++) {
+ if (pat[s] == '\'' || pat[s] == '\\')
+ newlen++;
+ }
+
+ Newx(newpat, newlen, char);
+ p = newpat;
+ *p++ = 'q'; *p++ = 'r'; *p++ = '\'';
+
+ for (s = 0; s < plen; s++) {
+ if (n < pRExC_state->num_code_blocks
+ && s == pRExC_state->code_blocks[n].start)
+ {
+ /* blank out literal code block */
+ assert(pat[s] == '(');
+ while (s <= pRExC_state->code_blocks[n].end) {
+ *p++ = ' ';
+ s++;
+ }
+ s--;
+ n++;
+ continue;
}
+ if (pat[s] == '\'' || pat[s] == '\\')
+ *p++ = '\\';
+ *p++ = pat[s];
+ }
+ *p++ = '\'';
+ if (pRExC_state->pm_flags & RXf_PMf_EXTENDED)
+ *p++ = 'x';
+ *p++ = '\0';
+ DEBUG_COMPILE_r({
+ PerlIO_printf(Perl_debug_log,
+ "%sre-parsing pattern for runtime code:%s %s\n",
+ PL_colors[4],PL_colors[5],newpat);
+ });
+
+ sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0);
+ Safefree(newpat);
+
+ ENTER;
+ SAVETMPS;
+ save_re_context();
+ PUSHSTACKi(PERLSI_REQUIRE);
+ /* this causes the toker to collapse \\ into \ when parsing
+ * qr''; normally only q'' does this. It also alters hints
+ * handling */
+ PL_reg_state.re_reparsing = TRUE;
+ eval_sv(sv, G_SCALAR);
+ SvREFCNT_dec(sv);
+ SPAGAIN;
+ qr_ref = POPs;
+ PUTBACK;
+ if (SvTRUE(ERRSV))
+ Perl_croak(aTHX_ "%s", SvPVx_nolen_const(ERRSV));
+ assert(SvROK(qr_ref));
+ qr = SvRV(qr_ref);
+ assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp);
+ /* the leaving below frees the tmp qr_ref.
+ * Give qr a life of its own */
+ SvREFCNT_inc(qr);
+ POPSTACK;
+ FREETMPS;
+ LEAVE;
+
+ }
+
+ if (!RExC_utf8 && SvUTF8(qr)) {
+ /* first time through; the pattern got upgraded; save the
+ * qr for the next time through */
+ assert(!pRExC_state->runtime_code_qr);
+ pRExC_state->runtime_code_qr = qr;
+ return 0;
+ }
+
+
+ /* extract any code blocks within the returned qr// */
+
+
+ /* merge the main (r1) and run-time (r2) code blocks into one */
+ {
+ RXi_GET_DECL(((struct regexp*)SvANY(qr)), r2);
+ struct reg_code_block *new_block, *dst;
+ RExC_state_t * const r1 = pRExC_state; /* convenient alias */
+ int i1 = 0, i2 = 0;
+
+ if (!r2->num_code_blocks) /* we guessed wrong */
+ return 1;
+
+ Newx(new_block,
+ r1->num_code_blocks + r2->num_code_blocks,
+ struct reg_code_block);
+ dst = new_block;
+
+ while ( i1 < r1->num_code_blocks
+ || i2 < r2->num_code_blocks)
+ {
+ struct reg_code_block *src;
+ bool is_qr = 0;
+
+ if (i1 == r1->num_code_blocks) {
+ src = &r2->code_blocks[i2++];
+ is_qr = 1;
+ }
+ else if (i2 == r2->num_code_blocks)
+ src = &r1->code_blocks[i1++];
+ else if ( r1->code_blocks[i1].start
+ < r2->code_blocks[i2].start)
+ {
+ src = &r1->code_blocks[i1++];
+ assert(src->end < r2->code_blocks[i2].start);
+ }
+ else {
+ assert( r1->code_blocks[i1].start
+ > r2->code_blocks[i2].start);
+ src = &r2->code_blocks[i2++];
+ is_qr = 1;
+ assert(src->end < r1->code_blocks[i1].start);
+ }
+
+ assert(pat[src->start] == '(');
+ assert(pat[src->end] == ')');
+ dst->start = src->start;
+ dst->end = src->end;
+ dst->block = src->block;
+ dst->src_regex = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr)
+ : src->src_regex;
+ dst++;
}
- else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
- assert(ncode < pRExC_state->max_code_index);
- pRExC_state->code_indices[ncode++] = SvCUR(pat); /*start pos */
- is_code = 1;
- }
+ r1->num_code_blocks += r2->num_code_blocks;
+ Safefree(r1->code_blocks);
+ r1->code_blocks = new_block;
}
- pRExC_state->code_index = 0;
+
+ SvREFCNT_dec(qr);
+ return 1;
}
/*
- * Perl_op_re_compile - the perl internal RE engine's function to compile a
+ * Perl_re_op_compile - the perl internal RE engine's function to compile a
* regular expression into internal code.
* The pattern may be passed either as:
* a list of SVs (patternp plus pat_count)
* a list of OPs (expr)
+ * If both are passed, the SV list is used, but the OP list indicates
+ * which SVs are actually pre-compiled code blocks
+ *
+ * The SVs in the list have magic and qr overloading applied to them (and
+ * the list may be modified in-place with replacement SVs in the latter
+ * case).
+ *
+ * If the pattern hasn't changed from old_re, then old_re will be
+ * returned.
+ *
+ * eng is the current engine. If that engine has an op_comp method, then
+ * handle directly (i.e. we assume that op_comp was us); otherwise, just
+ * do the initial concatenation of arguments and pass on to the external
+ * engine.
+ *
+ * If is_bare_re is not null, set it to a boolean indicating whether the
+ * arg list reduced (after overloading) to a single bare regex which has
+ * been returned (i.e. /$qr/).
+ *
+ * orig_rx_flags contains RXf_* flags. See perlreapi.pod for more details.
+ *
+ * pm_flags contains the PMf_* flags, typically based on those from the
+ * pm_flags field of the related PMOP. Currently we're only interested in
+ * PMf_HAS_CV, PMf_IS_QR, PMf_USE_RE_EVAL.
*
* We can't allocate space until we know how big the compiled form will be,
* but we can't compile it (and thus know how big it is) until we've got a
*/
REGEXP *
-Perl_re_op_compile(pTHX_ SV * const * const patternp, int pat_count,
- OP *expr, U32 orig_pm_flags)
+Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
+ OP *expr, const regexp_engine* eng, REGEXP *VOL old_re,
+ bool *is_bare_re, U32 orig_rx_flags, U32 pm_flags)
{
dVAR;
REGEXP *rx;
struct regexp *r;
register regexp_internal *ri;
STRLEN plen;
- char *exp;
+ char * VOL exp;
char* xend;
regnode *scan;
I32 flags;
I32 minlen = 0;
- U32 pm_flags;
+ U32 rx_flags;
SV * VOL pat;
/* these are all flags - maybe they should be turned
I32 sawplus = 0;
I32 sawopen = 0;
bool used_setjump = FALSE;
- regex_charset initial_charset = get_regex_charset(orig_pm_flags);
-
+ regex_charset initial_charset = get_regex_charset(orig_rx_flags);
+ bool code_is_utf8 = 0;
+ bool VOL recompile = 0;
+ bool runtime_code = 0;
U8 jump_ret = 0;
dJMPENV;
scan_data_t data;
#endif
GET_RE_DEBUG_FLAGS_DECL;
+ PERL_ARGS_ASSERT_RE_OP_COMPILE;
+
DEBUG_r(if (!PL_colorset) reginitcolors());
#ifndef PERL_IN_XSUB_RE
}
#endif
- pRExC_state->code_indices = NULL;
- pRExC_state->max_code_index = 0;
- if (expr) {
- if (expr->op_type == OP_LIST) {
- OP *o;
- bool is_utf8 = 0;
- int ncode = 0;
+ pRExC_state->code_blocks = NULL;
+ pRExC_state->num_code_blocks = 0;
- /* are we UTF8, and how many code blocks are there? */
- for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
- if (o->op_type == OP_CONST && SvUTF8(cSVOPo_sv))
- is_utf8 = 1;
- else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
- /* count of DO blocks */
- ncode++;
+ if (is_bare_re)
+ *is_bare_re = FALSE;
+
+ if (expr && (expr->op_type == OP_LIST ||
+ (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
+
+ /* is the source UTF8, and how many code blocks are there? */
+ OP *o;
+ int ncode = 0;
+
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST && SvUTF8(cSVOPo_sv))
+ code_is_utf8 = 1;
+ else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+ /* count of DO blocks */
+ ncode++;
+ }
+ if (ncode) {
+ pRExC_state->num_code_blocks = ncode;
+ Newx(pRExC_state->code_blocks, ncode, struct reg_code_block);
+ }
+ }
+
+ if (pat_count) {
+ /* handle a list of SVs */
+
+ SV **svp;
+
+ /* apply magic and RE overloading to each arg */
+ for (svp = patternp; svp < patternp + pat_count; svp++) {
+ SV *rx = *svp;
+ SvGETMAGIC(rx);
+ if (SvROK(rx) && SvAMAGIC(rx)) {
+ SV *sv = AMG_CALLunary(rx, regexp_amg);
+ if (sv) {
+ if (SvROK(sv))
+ sv = SvRV(sv);
+ if (SvTYPE(sv) != SVt_REGEXP)
+ Perl_croak(aTHX_ "Overloaded qr did not return a REGEXP");
+ *svp = sv;
+ }
}
- pRExC_state->max_code_index = ncode*2;
- if (ncode) {
- Newx(pRExC_state->code_indices, ncode*2, STRLEN);
- SAVEFREEPV(pRExC_state->code_indices);
+ }
+
+ if (pat_count > 1) {
+ /* concat multiple args and find any code block indexes */
+
+ OP *o = NULL;
+ int n = 0;
+ bool utf8 = 0;
+ STRLEN orig_patlen = 0;
+
+ if (pRExC_state->num_code_blocks) {
+ o = cLISTOPx(expr)->op_first;
+ assert(o->op_type == OP_PUSHMARK);
+ o = o->op_sibling;
}
+
+ pat = newSVpvn("", 0);
+ SAVEFREESV(pat);
+
+ /* determine if the pattern is going to be utf8 (needed
+ * in advance to align code block indices correctly).
+ * XXX This could fail to be detected for an arg with
+ * overloading but not concat overloading; but the main effect
+ * in this obscure case is to need a 'use re eval' for a
+ * literal code block */
+ for (svp = patternp; svp < patternp + pat_count; svp++) {
+ if (SvUTF8(*svp))
+ utf8 = 1;
+ }
+ if (utf8)
+ SvUTF8_on(pat);
+
+ for (svp = patternp; svp < patternp + pat_count; svp++) {
+ SV *sv, *msv = *svp;
+ SV *rx;
+ bool code = 0;
+ if (o) {
+ if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ assert(n < pRExC_state->num_code_blocks);
+ pRExC_state->code_blocks[n].start = SvCUR(pat);
+ pRExC_state->code_blocks[n].block = o;
+ pRExC_state->code_blocks[n].src_regex = NULL;
+ n++;
+ code = 1;
+ o = o->op_sibling; /* skip CONST */
+ assert(o);
+ }
+ o = o->op_sibling;;
+ }
+
+ if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
+ (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+ {
+ sv_setsv(pat, sv);
+ /* overloading involved: all bets are off over literal
+ * code. Pretend we haven't seen it */
+ pRExC_state->num_code_blocks -= n;
+ n = 0;
+ rx = NULL;
+
+ }
+ else {
+ while (SvAMAGIC(msv)
+ && (sv = AMG_CALLunary(msv, string_amg))
+ && sv != msv)
+ {
+ msv = sv;
+ SvGETMAGIC(msv);
+ }
+ if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
+ msv = SvRV(msv);
+ orig_patlen = SvCUR(pat);
+ sv_catsv_nomg(pat, msv);
+ rx = msv;
+ if (code)
+ pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+ }
+
+ /* extract any code blocks within any embedded qr//'s */
+ if (rx && SvTYPE(rx) == SVt_REGEXP
+ && RX_ENGINE((REGEXP*)rx)->op_comp)
+ {
+
+ RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
+ if (ri->num_code_blocks) {
+ int i;
+ /* the presence of an embedded qr// with code means
+ * we should always recompile: the text of the
+ * qr// may not have changed, but it may be a
+ * different closure than last time */
+ recompile = 1;
+ Renew(pRExC_state->code_blocks,
+ pRExC_state->num_code_blocks + ri->num_code_blocks,
+ struct reg_code_block);
+ pRExC_state->num_code_blocks += ri->num_code_blocks;
+ for (i=0; i < ri->num_code_blocks; i++) {
+ struct reg_code_block *src, *dst;
+ STRLEN offset = orig_patlen
+ + ((struct regexp *)SvANY(rx))->pre_prefix;
+ assert(n < pRExC_state->num_code_blocks);
+ src = &ri->code_blocks[i];
+ dst = &pRExC_state->code_blocks[n];
+ dst->start = src->start + offset;
+ dst->end = src->end + offset;
+ dst->block = src->block;
+ dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*)
+ src->src_regex
+ ? src->src_regex
+ : (REGEXP*)rx);
+ n++;
+ }
+ }
+ }
+ }
+ SvSETMAGIC(pat);
+ }
+ else {
+ SV *sv;
+ pat = *patternp;
+ while (SvAMAGIC(pat)
+ && (sv = AMG_CALLunary(pat, string_amg))
+ && sv != pat)
+ {
+ pat = sv;
+ SvGETMAGIC(pat);
+ }
+ }
+
+ /* handle bare regex: foo =~ $re */
+ {
+ SV *re = pat;
+ if (SvROK(re))
+ re = SvRV(re);
+ if (SvTYPE(re) == SVt_REGEXP) {
+ if (is_bare_re)
+ *is_bare_re = TRUE;
+ SvREFCNT_inc(re);
+ Safefree(pRExC_state->code_blocks);
+ return (REGEXP*)re;
+ }
+ }
+ }
+ else {
+ /* not a list of SVs, so must be a list of OPs */
+ assert(expr);
+ if (expr->op_type == OP_LIST) {
+ int i = -1;
+ bool is_code = 0;
+ OP *o;
+
pat = newSVpvn("", 0);
SAVEFREESV(pat);
- if (is_utf8)
+ if (code_is_utf8)
SvUTF8_on(pat);
- S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
+
+ /* given a list of CONSTs and DO blocks in expr, append all
+ * the CONSTs to pat, and record the start and end of each
+ * code block in code_blocks[] (each DO{} op is followed by an
+ * OP_CONST containing the corresponding literal '(?{...})
+ * text)
+ */
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST) {
+ sv_catsv(pat, cSVOPo_sv);
+ if (is_code) {
+ pRExC_state->code_blocks[i].end = SvCUR(pat)-1;
+ is_code = 0;
+ }
+ }
+ else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ assert(i+1 < pRExC_state->num_code_blocks);
+ pRExC_state->code_blocks[++i].start = SvCUR(pat);
+ pRExC_state->code_blocks[i].block = o;
+ pRExC_state->code_blocks[i].src_regex = NULL;
+ is_code = 1;
+ }
+ }
}
else {
assert(expr->op_type == OP_CONST);
pat = cSVOPx_sv(expr);
}
}
- else
- {
- assert(pat_count ==1); /*XXX*/
- pat = *patternp;
- }
- exp = SvPV(pat, plen);
+ exp = SvPV_nomg(pat, plen);
- if (plen == 0) { /* ignore the utf8ness if the pattern is 0 length */
- RExC_utf8 = RExC_orig_utf8 = 0;
- }
- else {
- RExC_utf8 = RExC_orig_utf8 = SvUTF8(pat);
+ if (!eng->op_comp) {
+ if ((SvUTF8(pat) && IN_BYTES)
+ || SvGMAGICAL(pat) || SvAMAGIC(pat))
+ {
+ /* make a temporary copy; either to convert to bytes,
+ * or to avoid repeating get-magic / overloaded stringify */
+ pat = newSVpvn_flags(exp, plen, SVs_TEMP |
+ (IN_BYTES ? 0 : SvUTF8(pat)));
+ }
+ Safefree(pRExC_state->code_blocks);
+ return CALLREGCOMP_ENG(eng, pat, orig_rx_flags);
}
+
+ /* ignore the utf8ness if the pattern is 0 length */
+ RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
RExC_uni_semantics = 0;
RExC_contains_locale = 0;
+ pRExC_state->runtime_code_qr = NULL;
/****************** LONG JUMP TARGET HERE***********************/
/* Longjmp back to here if have to switch in midstream to utf8 */
});
}
else { /* longjumped back */
- STRLEN len = plen;
+ U8 *src, *dst;
+ int n=0;
+ STRLEN s = 0, d = 0;
+ bool do_end = 0;
/* If the cause for the longjmp was other than changing to utf8, pop
* our own setjmp, and longjmp to the correct handler */
DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
"UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
- if (expr && expr->op_type == OP_LIST) {
- sv_setpvn(pat, "", 0);
- SvUTF8_on(pat);
- S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
- exp = SvPV(pat, plen);
- xend = exp + plen;
- }
- else {
- exp = (char*)Perl_bytes_to_utf8(aTHX_
- (U8*)SvPV_nomg(pat, plen),
- &len);
- xend = exp + len;
- SAVEFREEPV(exp);
+ /* upgrade pattern to UTF8, and if there are code blocks,
+ * recalculate the indices.
+ * This is essentially an unrolled Perl_bytes_to_utf8() */
+
+ src = (U8*)SvPV_nomg(pat, plen);
+ Newx(dst, plen * 2 + 1, U8);
+
+ while (s < plen) {
+ const UV uv = NATIVE_TO_ASCII(src[s]);
+ if (UNI_IS_INVARIANT(uv))
+ dst[d] = (U8)UTF_TO_NATIVE(uv);
+ else {
+ dst[d++] = (U8)UTF8_EIGHT_BIT_HI(uv);
+ dst[d] = (U8)UTF8_EIGHT_BIT_LO(uv);
+ }
+ if (n < pRExC_state->num_code_blocks) {
+ if (!do_end && pRExC_state->code_blocks[n].start == s) {
+ pRExC_state->code_blocks[n].start = d;
+ assert(dst[d] == '(');
+ do_end = 1;
+ }
+ else if (do_end && pRExC_state->code_blocks[n].end == s) {
+ pRExC_state->code_blocks[n].end = d;
+ assert(dst[d] == ')');
+ do_end = 0;
+ n++;
+ }
+ }
+ s++;
+ d++;
}
+ dst[d] = '\0';
+ plen = d;
+ exp = (char*) dst;
+ xend = exp + plen;
+ SAVEFREEPV(exp);
RExC_orig_utf8 = RExC_utf8 = 1;
}
+ /* return old regex if pattern hasn't changed */
+
+ if ( old_re
+ && !recompile
+ && !!RX_UTF8(old_re) == !!RExC_utf8
+ && RX_PRECOMP(old_re)
+ && RX_PRELEN(old_re) == plen
+ && memEQ(RX_PRECOMP(old_re), exp, plen))
+ {
+ /* with runtime code, always recompile */
+ runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
+ exp, plen);
+ if (!runtime_code) {
+ ReREFCNT_inc(old_re);
+ if (used_setjump) {
+ JMPENV_POP;
+ }
+ Safefree(pRExC_state->code_blocks);
+ return old_re;
+ }
+ }
+ else if ((pm_flags & PMf_USE_RE_EVAL)
+ /* this second condition covers the non-regex literal case,
+ * i.e. $foo =~ '(?{})'. */
+ || ( !PL_reg_state.re_reparsing && IN_PERL_COMPILETIME
+ && (PL_hints & HINT_RE_EVAL))
+ )
+ runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
+ exp, plen);
+
#ifdef TRIE_STUDY_OPT
restudied = 0;
#endif
- pm_flags = orig_pm_flags;
+ rx_flags = orig_rx_flags;
if (initial_charset == REGEX_LOCALE_CHARSET) {
RExC_contains_locale = 1;
/* Set to use unicode semantics if the pattern is in utf8 and has the
* 'depends' charset specified, as it means unicode when utf8 */
- set_regex_charset(&pm_flags, REGEX_UNICODE_CHARSET);
+ set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
}
RExC_precomp = exp;
- RExC_flags = pm_flags;
+ RExC_flags = rx_flags;
+ RExC_pm_flags = pm_flags;
+
+ if (runtime_code) {
+ if (PL_tainting && PL_tainted)
+ Perl_croak(aTHX_ "Eval-group in insecure regular expression");
+
+ if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
+ /* whoops, we have a non-utf8 pattern, whilst run-time code
+ * got compiled as utf8. Try again with a utf8 pattern */
+ JMPENV_JUMP(UTF8_LONGJMP);
+ }
+ }
+ assert(!pRExC_state->runtime_code_qr);
+
RExC_sawback = 0;
RExC_seen = 0;
RExC_in_lookbehind = 0;
RExC_seen_zerolen = *exp == '^' ? -1 : 0;
- RExC_seen_evals = 0;
RExC_extralen = 0;
RExC_override_recoding = 0;
#endif
RExC_recurse = NULL;
RExC_recurse_count = 0;
+ pRExC_state->code_index = 0;
#if 0 /* REGC() is (currently) a NOP at the first pass.
* Clever compilers notice this and complain. --jhi */
REGC((U8)REG_MAGIC, (char*)RExC_emit);
#endif
- DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n"));
+ DEBUG_PARSE_r(
+ PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n");
+ RExC_lastnum=0;
+ RExC_lastparse=NULL;
+ );
if (reg(pRExC_state, 0, &flags,1) == NULL) {
RExC_precomp = NULL;
+ Safefree(pRExC_state->code_blocks);
return(NULL);
}
/* The first pass could have found things that force Unicode semantics */
if ((RExC_utf8 || RExC_uni_semantics)
- && get_regex_charset(pm_flags) == REGEX_DEPENDS_CHARSET)
+ && get_regex_charset(rx_flags) == REGEX_DEPENDS_CHARSET)
{
- set_regex_charset(&pm_flags, REGEX_UNICODE_CHARSET);
+ set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
}
/* Small enough for pointer-storage convention?
/* non-zero initialization begins here */
RXi_SET( r, ri );
- r->engine= RE_ENGINE_PTR;
- r->extflags = pm_flags;
+ r->engine= eng;
+ r->extflags = rx_flags;
+ if (pm_flags & PMf_IS_QR) {
+ ri->code_blocks = pRExC_state->code_blocks;
+ ri->num_code_blocks = pRExC_state->num_code_blocks;
+ }
+ else
+ SAVEFREEPV(pRExC_state->code_blocks);
+
{
bool has_p = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
bool has_charset = (get_regex_charset(r->extflags) != REGEX_DEPENDS_CHARSET);
p = sv_grow(MUTABLE_SV(rx), wraplen + 1); /* +1 for the ending NUL */
SvPOK_on(rx);
- SvFLAGS(rx) |= SvUTF8(pat);
+ if (RExC_utf8)
+ SvFLAGS(rx) |= SVf_UTF8;
*p++='('; *p++='?';
/* If a default, cover it using the caret */
RExC_rxi = ri;
/* Second pass: emit code. */
- RExC_flags = pm_flags; /* don't let top level (?i) bleed */
+ RExC_flags = rx_flags; /* don't let top level (?i) bleed */
+ RExC_pm_flags = pm_flags;
RExC_parse = exp;
RExC_end = xend;
RExC_naughty = 0;
RExC_emit = ri->program;
RExC_emit_bound = ri->program + RExC_size + 1;
pRExC_state->code_index = 0;
- if (expr && expr->op_type == OP_LIST) {
- assert(cLISTOPx(expr)->op_first->op_type == OP_PUSHMARK);
- pRExC_state->next_code_or_const = cLISTOPx(expr)->op_first;
- }
- /* Store the count of eval-groups for security checks: */
- RExC_rx->seen_evals = RExC_seen_evals;
REGC((U8)REG_MAGIC, (char*) RExC_emit++);
if (reg(pRExC_state, 0, &flags,1) == NULL) {
ReREFCNT_dec(rx);
else if ((!sawopen || !RExC_sawback) &&
(OP(first) == STAR &&
PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
- !(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL))
+ !(r->extflags & RXf_ANCH) && !pRExC_state->num_code_blocks)
{
/* turn .* into ^.* with an implied $*=1 */
const int type =
goto again;
}
if (sawplus && !sawlookahead && (!sawopen || !RExC_sawback)
- && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */
+ && !pRExC_state->num_code_blocks) /* May examine pos and $& */
/* x+ must match at the 1st pos of run of x's */
r->intflags |= PREGf_SKIP;
r->extflags |= RXf_GPOS_SEEN;
if (RExC_seen & REG_SEEN_LOOKBEHIND)
r->extflags |= RXf_LOOKBEHIND_SEEN;
- if (RExC_seen & REG_SEEN_EVAL)
+ if (pRExC_state->num_code_blocks)
r->extflags |= RXf_EVAL_SEEN;
if (RExC_seen & REG_SEEN_CANY)
r->extflags |= RXf_CANY_SEEN;
r->intflags |= PREGf_VERBARG_SEEN;
if (RExC_seen & REG_SEEN_CUTGROUP)
r->intflags |= PREGf_CUTGROUP_SEEN;
+ if (pm_flags & PMf_USE_RE_EVAL)
+ r->intflags |= PREGf_USE_RE_EVAL;
if (RExC_paren_names)
RXp_PAREN_NAMES(r) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
else
return rx;
}
-#undef RE_ENGINE_PTR
-
SV*
Perl_reg_named_buff(pTHX_ REGEXP * const rx, SV * const key, SV * const value,
Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
(unsigned long) flags);
}
- /* NOT REACHED */
+ assert(0); /* NOT REACHED */
}
return NULL;
}
num = sv_dat ? *((I32 *)SvPVX(sv_dat)) : 0;
}
goto gen_recurse_regop;
- /* NOT REACHED */
+ assert(0); /* NOT REACHED */
case '+':
if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
RExC_parse++;
nextchar(pRExC_state);
return ret;
} /* named and numeric backreferences */
- /* NOT REACHED */
+ assert(0); /* NOT REACHED */
case '?': /* (??...) */
is_logical = 1;
/* FALL THROUGH */
case '{': /* (?{...}) */
{
- I32 count = 1;
U32 n = 0;
- char c;
- char *s = RExC_parse;
+ struct reg_code_block *cb;
RExC_seen_zerolen++;
- RExC_seen |= REG_SEEN_EVAL;
- if (pRExC_state->max_code_index
- && pRExC_state->code_indices[pRExC_state->code_index] ==
- (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+ if ( !pRExC_state->num_code_blocks
+ || pRExC_state->code_index >= pRExC_state->num_code_blocks
+ || pRExC_state->code_blocks[pRExC_state->code_index].start
+ != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
- RExC_start)
) {
- /* this is a pre-compiled literal (?{}) */
- assert(pRExC_state->code_index
- < pRExC_state->max_code_index);
- RExC_parse = RExC_start - 1
- + pRExC_state->code_indices[++pRExC_state->code_index];
- pRExC_state->code_index++;
- if (SIZE_ONLY)
- RExC_seen_evals++;
- else {
- OP *o = pRExC_state->next_code_or_const;
- while(! (o->op_type == OP_NULL
- && (o->op_flags & OPf_SPECIAL)))
- {
- o = o->op_sibling;
- }
- n = add_data(pRExC_state, 1,
- (RExC_flags & PMf_HAS_CV) ? "L" : "l");
- RExC_rxi->data->data[n] = (void*)o->op_next;
- pRExC_state->next_code_or_const = o->op_sibling;
- }
+ if (RExC_pm_flags & PMf_USE_RE_EVAL)
+ FAIL("panic: Sequence (?{...}): no code block found\n");
+ FAIL("Eval-group not allowed at runtime, use re 'eval'");
}
- else {
- while (count && (c = *RExC_parse)) {
- if (c == '\\') {
- if (RExC_parse[1])
- RExC_parse++;
- }
- else if (c == '{')
- count++;
- else if (c == '}')
- count--;
- RExC_parse++;
- }
- if (*RExC_parse != ')') {
- RExC_parse = s;
- vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
- }
- if (!SIZE_ONLY) {
- PAD *pad;
- OP_4tree *sop, *rop;
- SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
-
- ENTER;
- Perl_save_re_context(aTHX);
- rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
- sop->op_private |= OPpREFCOUNTED;
- /* re_dup will OpREFCNT_inc */
- OpREFCNT_set(sop, 1);
- LEAVE;
-
- n = add_data(pRExC_state, 3, "nop");
- RExC_rxi->data->data[n] = (void*)rop;
- RExC_rxi->data->data[n+1] = (void*)sop;
- RExC_rxi->data->data[n+2] = (void*)pad;
- SvREFCNT_dec(sv);
+ /* this is a pre-compiled code block (?{...}) */
+ cb = &pRExC_state->code_blocks[pRExC_state->code_index];
+ RExC_parse = RExC_start + cb->end;
+ if (!SIZE_ONLY) {
+ OP *o = cb->block;
+ if (cb->src_regex) {
+ n = add_data(pRExC_state, 2, "rl");
+ RExC_rxi->data->data[n] =
+ (void*)SvREFCNT_inc((SV*)cb->src_regex);
+ RExC_rxi->data->data[n+1] = (void*)o;
}
- else { /* First pass */
- if (PL_reginterp_cnt < ++RExC_seen_evals
- && IN_PERL_RUNTIME)
- /* No compiled RE interpolated, has runtime
- components ===> unsafe. */
- FAIL("Eval-group not allowed at runtime, use re 'eval'");
- if (PL_tainting && PL_tainted)
- FAIL("Eval-group in insecure regular expression");
- #if PERL_VERSION > 8
- if (IN_PERL_COMPILETIME)
- PL_cv_has_eval = 1;
- #endif
+ else {
+ n = add_data(pRExC_state, 1,
+ (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l");
+ RExC_rxi->data->data[n] = (void*)o;
}
}
+ pRExC_state->code_index++;
nextchar(pRExC_state);
if (is_logical) {
+ regnode *eval;
ret = reg_node(pRExC_state, LOGICAL);
- if (!SIZE_ONLY)
+ eval = reganode(pRExC_state, EVAL, n);
+ if (!SIZE_ONLY) {
ret->flags = 2;
- REGTAIL(pRExC_state, ret, reganode(pRExC_state, EVAL, n));
+ /* for later propagation into (??{}) return value */
+ eval->flags = (U8) (RExC_flags & RXf_PMf_COMPILETIME);
+ }
+ REGTAIL(pRExC_state, ret, eval);
/* deal with the length of this later - MJD */
return ret;
}
}
else
FAIL("Junk on end of regexp"); /* "Can't happen". */
- /* NOTREACHED */
+ assert(0); /* NOTREACHED */
}
if (RExC_in_lookbehind) {
sequence, we return.
Note: we have to be careful with escapes, as they can be both literal
- and special, and in the case of \10 and friends can either, depending
- on context. Specifically there are two separate switches for handling
+ and special, and in the case of \10 and friends, context determines which.
+
+ A summary of the code structure is:
+
+ switch (first_byte) {
+ cases for each special:
+ handle this special;
+ break;
+ case '\\':
+ switch (2nd byte) {
+ cases for each unambiguous special:
+ handle this special;
+ break;
+ cases for each ambigous special/literal:
+ disambiguate;
+ if (special) handle here
+ else goto defchar;
+ default: // unambiguously literal:
+ goto defchar;
+ }
+ default: // is a literal char
+ // FALL THROUGH
+ defchar:
+ create EXACTish node for literal;
+ while (more input and node isn't full) {
+ switch (input_byte) {
+ cases for each special;
+ make sure parse pointer is set so that the next call to
+ regatom will see this special first
+ goto loopdone; // EXACTish node terminated by prev. char
+ default:
+ append char to EXACTISH node;
+ }
+ get next input byte;
+ }
+ loopdone:
+ }
+ return the generated node;
+
+ Specifically there are two separate switches for handling
escape sequences, with the one for handling literal escapes requiring
a dummy entry for all of the special escapes that are actually handled
by the other.
vFAIL("Reference to nonexistent or unclosed group");
}
if (!isg && num > 9 && num >= RExC_npar)
+ /* Probably a character specified in octal, e.g. \35 */
goto defchar;
else {
char * const parse_start = RExC_parse - 1; /* MJD */
break;
}
case 'x':
- if (*++p == '{') {
- char* const e = strchr(p, '}');
+ {
+ STRLEN brace_len = len;
+ UV result;
+ const char* error_msg;
- if (!e) {
- RExC_parse = p + 1;
- vFAIL("Missing right brace on \\x{}");
+ bool valid = grok_bslash_x(p,
+ &result,
+ &brace_len,
+ &error_msg,
+ 1);
+ p += brace_len;
+ if (! valid) {
+ RExC_parse = p; /* going to die anyway; point
+ to exact spot of failure */
+ vFAIL(error_msg);
}
else {
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_DISALLOW_PREFIX;
- STRLEN numlen = e - p - 1;
- ender = grok_hex(p + 1, &numlen, &flags, NULL);
- if (ender > 0xff)
- REQUIRE_UTF8;
- p = e + 1;
+ ender = result;
}
+ if (PL_encoding && ender < 0x100) {
+ goto recode_encoding;
+ }
+ if (ender > 0xff) {
+ REQUIRE_UTF8;
+ }
+ break;
}
- else {
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- STRLEN numlen = 2;
- ender = grok_hex(p, &numlen, &flags, NULL);
- p += numlen;
- }
- if (PL_encoding && ender < 0x100)
- goto recode_encoding;
- break;
case 'c':
p++;
ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
break;
case '0': case '1': case '2': case '3':case '4':
- case '5': case '6': case '7': case '8':case '9':
+ case '5': case '6': case '7':
if (*p == '0' ||
(isDIGIT(p[1]) && atoi(p) >= RExC_npar))
{
FAIL("Trailing \\");
/* FALL THROUGH */
default:
- if (!SIZE_ONLY&& isALPHA(*p)) {
+ if (!SIZE_ONLY&& isALNUMC(*p)) {
ckWARN2reg(p + 1, "Unrecognized escape \\%.1s passed through", p);
}
goto normal_default;
}
break;
case 'x':
- if (*RExC_parse == '{') {
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_DISALLOW_PREFIX;
- char * const e = strchr(RExC_parse++, '}');
- if (!e)
- vFAIL("Missing right brace on \\x{}");
-
- numlen = e - RExC_parse;
- value = grok_hex(RExC_parse, &numlen, &flags, NULL);
- RExC_parse = e + 1;
- }
- else {
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- numlen = 2;
- value = grok_hex(RExC_parse, &numlen, &flags, NULL);
+ RExC_parse--; /* function expects to be pointed at the 'x' */
+ {
+ const char* error_msg;
+ bool valid = grok_bslash_x(RExC_parse,
+ &value,
+ &numlen,
+ &error_msg,
+ 1);
RExC_parse += numlen;
+ if (! valid) {
+ vFAIL(error_msg);
+ }
}
if (PL_encoding && value < 0x100)
goto recode_encoding;
if (ri->u.offsets)
Safefree(ri->u.offsets); /* 20010421 MJD */
#endif
+ if (ri->code_blocks) {
+ int n;
+ for (n = 0; n < ri->num_code_blocks; n++)
+ SvREFCNT_dec(ri->code_blocks[n].src_regex);
+ Safefree(ri->code_blocks);
+ }
+
if (ri->data) {
int n = ri->data->count;
- PAD* new_comppad = NULL;
- PAD* old_comppad;
- PADOFFSET refcnt;
while (--n >= 0) {
/* If you add a ->what type here, update the comment in regcomp.h */
switch (ri->data->what[n]) {
case 'a':
+ case 'r':
case 's':
case 'S':
case 'u':
case 'f':
Safefree(ri->data->data[n]);
break;
- case 'p':
- new_comppad = MUTABLE_AV(ri->data->data[n]);
- break;
- case 'o':
- if (new_comppad == NULL)
- Perl_croak(aTHX_ "panic: pregfree comppad");
- PAD_SAVE_LOCAL(old_comppad,
- /* Watch out for global destruction's random ordering. */
- (SvTYPE(new_comppad) == SVt_PVAV) ? new_comppad : NULL
- );
- OP_REFCNT_LOCK;
- refcnt = OpREFCNT_dec((OP_4tree*)ri->data->data[n]);
- OP_REFCNT_UNLOCK;
- if (!refcnt)
- op_free((OP_4tree*)ri->data->data[n]);
-
- PAD_RESTORE_LOCAL(old_comppad);
- SvREFCNT_dec(MUTABLE_SV(new_comppad));
- new_comppad = NULL;
- break;
case 'l':
case 'L':
- case 'n':
break;
case 'T':
{ /* Aho Corasick add-on structure for a trie node.
Newxc(reti, sizeof(regexp_internal) + len*sizeof(regnode), char, regexp_internal);
Copy(ri->program, reti->program, len+1, regnode);
-
+
+ reti->num_code_blocks = ri->num_code_blocks;
+ if (ri->code_blocks) {
+ int n;
+ Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block,
+ struct reg_code_block);
+ Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks,
+ struct reg_code_block);
+ for (n = 0; n < ri->num_code_blocks; n++)
+ reti->code_blocks[n].src_regex = (REGEXP*)
+ sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param);
+ }
+ else
+ reti->code_blocks = NULL;
reti->regstclass = NULL;
for (i = 0; i < count; i++) {
d->what[i] = ri->data->what[i];
switch (d->what[i]) {
- /* legal options are one of: sSfpontTua
- see also regcomp.h and pregfree() */
+ /* see also regcomp.h and regfree_internal() */
case 'a': /* actually an AV, but the dup function is identical. */
+ case 'r':
case 's':
case 'S':
- case 'p': /* actually an AV, but the dup function is identical. */
case 'u': /* actually an HV, but the dup function is identical. */
d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
break;
struct regnode_charclass_class);
reti->regstclass = (regnode*)d->data[i];
break;
- case 'o':
- /* Compiled op trees are readonly and in shared memory,
- and can thus be shared without duplication. */
- OP_REFCNT_LOCK;
- d->data[i] = (void*)OpREFCNT_inc((OP*)ri->data->data[i]);
- OP_REFCNT_UNLOCK;
- break;
case 'T':
/* Trie stclasses are readonly and can thus be shared
* without duplication. We free the stclass in pregfree
/* Fall through */
case 'l':
case 'L':
- case 'n':
d->data[i] = ri->data->data[i];
break;
default:
Copy(&PL_reg_state, state, 1, struct re_save_state);
- PL_reg_start_tmp = 0;
- PL_reg_start_tmpl = 0;
PL_reg_oldsaved = NULL;
PL_reg_oldsavedlen = 0;
PL_reg_maxiter = 0;