Perl_dump_indent(aTHX_ level, file, "PMf_REPL = ");
op_dump(pm->op_pmreplrootu.op_pmreplroot);
}
+ if (pm->op_code_list) {
+ Perl_dump_indent(aTHX_ level, file, "CODE_LIST =\n");
+ do_op_dump(level, file, pm->op_code_list);
+ }
if (pm->op_pmflags || (PM_GETRE(pm) && RX_CHECK_SUBSTR(PM_GETRE(pm)))) {
SV * const tmpsv = pm_description(pm);
Perl_dump_indent(aTHX_ level, file, "PMFLAGS = (%s)\n", SvCUR(tmpsv) ? SvPVX_const(tmpsv) + 1 : "");
case OP_MATCH:
case OP_QR:
clear_pmop:
+ op_free(cPMOPo->op_code_list);
+ cPMOPo->op_code_list = NULL;
forget_pmop(cPMOPo, 1);
cPMOPo->op_pmreplrootu.op_pmreplroot = NULL;
/* we use the same protection as the "SAFE" version of the PM_ macros
is_compiletime = 0;
}
}
- else { assert(expr->op_type != OP_PUSHMARK); if (expr->op_type != OP_CONST && expr->op_type != OP_PUSHMARK)
+ else if (expr->op_type != OP_CONST)
is_compiletime = 0;
- }
/* are we using an external (non-perl) re engine? */
eng = current_re_engine();
ext_eng = (eng && eng != &PL_core_reg_engine);
- /* concatenate adjacent CONSTs, and for non-perl engines, strip out
- * any DO blocks */
+ /* for perl engine:
+ * concatenate adjacent CONSTs for non-code case
+ * pre-process DO blocks;
+ * for non-perl engines:
+ * concatenate adjacent CONSTs;
+ * strip out any DO blocks
+ */
- if (expr->op_type == OP_LIST
- && (!is_compiletime || /* XXX TMP until we handle runtime (?{}) */
- !has_code || ext_eng))
- {
- OP *o, *kid;
- o = cLISTOPx(expr)->op_first;
- while (o->op_sibling) {
- kid = o->op_sibling;
+ if (expr->op_type == OP_LIST) {
+ OP *kid, *okid = NULL;
+ kid = cLISTOPx(expr)->op_first;
+ while (kid) {
if (kid->op_type == OP_NULL && (kid->op_flags & OPf_SPECIAL)) {
/* do {...} */
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ if (ext_eng || !is_compiletime/*XXX tmp*/
+ || o->op_type == OP_QR/*XXX tmp*/) {
+ assert(okid);
+ okid->op_sibling = kid->op_sibling;
+ kid->op_sibling = NULL;
+ op_free(kid);
+ kid = okid;
+ }
+ else {
+ /* treat each DO block as a separate little sub */
+ scalar(kid);
+ LINKLIST(kid);
+ if (kLISTOP->op_first->op_type == OP_LEAVE) {
+ LISTOP *leave = cLISTOPx(kLISTOP->op_first);
+ /* skip ENTER */
+ assert(leave->op_first->op_type == OP_ENTER);
+ assert(leave->op_first->op_sibling);
+ kid->op_next = leave->op_first->op_sibling;
+ /* skip LEAVE */
+ assert(leave->op_flags & OPf_KIDS);
+ assert(leave->op_last->op_next = (OP*)leave);
+ leave->op_next = NULL; /* stop on last op */
+ op_null((OP*)leave);
+ }
+ else {
+ /* skip SCOPE */
+ OP *scope = kLISTOP->op_first;
+ assert(scope->op_type == OP_SCOPE);
+ assert(scope->op_flags & OPf_KIDS);
+ scope->op_next = NULL; /* stop on last op */
+ op_null(scope);
+ }
+ CALL_PEEP(kid);
+ finalize_optree(kid);
+ }
}
- else if (o->op_type == OP_CONST && kid->op_type == OP_CONST){
- SV* sv = cSVOPo->op_sv;
+ else if ( (ext_eng || !has_code || !is_compiletime/*XXX tmp*/)
+ && kid->op_type == OP_CONST
+ && kid->op_sibling
+ && kid->op_sibling->op_type == OP_CONST)
+ {
+ OP *o = kid->op_sibling;
+ SV* sv = cSVOPx_sv(kid);
SvREADONLY_off(sv);
- sv_catsv(sv, cSVOPx(kid)->op_sv);
+ sv_catsv(sv, cSVOPo_sv);
SvREADONLY_on(sv);
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ kid->op_sibling = o->op_sibling;
+ o->op_sibling = NULL;
+ op_free(o);
+ kid = okid;
}
- else
- o = o->op_sibling;
+ okid = kid;
+ kid = kid->op_sibling;
}
- cLISTOPx(expr)->op_last = o;
+ cLISTOPx(expr)->op_last = okid;
}
PL_hints |= HINT_BLOCK_SCOPE;
}
PM_SETRE(pm, CALLREGCOMP(pat, pm_flags));
- }
- else
- PM_SETRE(pm, re_op_compile(NULL, expr, pm_flags));
-
#ifdef PERL_MAD
- op_getmad(expr,(OP*)pm,'e');
+ op_getmad(expr,(OP*)pm,'e');
#else
- op_free(expr);
+ op_free(expr);
#endif
+ }
+ else {
+ pm->op_code_list = expr;
+ PM_SETRE(pm, re_op_compile(NULL, expr, pm_flags));
+ }
}
else {
bool reglist;
HV * op_pmstash;
#endif
} op_pmstashstartu;
+ OP * op_code_list; /* list of (?{}) code blocks */
};
#ifdef USE_ITHREADS
C<UNITCHECK> blocks are run just after the unit which defined them has
been compiled. The main program file and each module it loads are
-compilation units, as are string C<eval>s, code compiled using the
+compilation units, as are string C<eval>s, run-time code compiled using the
C<(?{ })> construct in a regex, calls to C<do FILE>, C<require FILE>,
and code after the C<-e> switch on the command line.
I32 in_lookbehind;
I32 contains_locale;
I32 override_recoding;
+ int max_code_index; /* max index into code_indices */
+ int code_index; /* index into code_indices */
+ STRLEN *code_indices; /* begin and ends of literal (?{})
+ within pattern */
+ OP* next_code_or_const; /* iterating the list of DO/OP_CONST */
#if ADD_TO_REGEXEC
char *starttry; /* -Dr: where regtry was called. */
#define RExC_starttry (pRExC_state->starttry)
return Perl_re_op_compile(aTHX_ pattern, NULL, orig_pm_flags);
}
+/* given a list of CONSTs and DO blocks in expr, append all the CONSTs to
+ * pat, and record the start and end of each code block in code_indices
+ * (each DO{} op is followed by an OP_CONST containing the corresponding
+ * literal '(?{...}) text)
+ */
+
+static void
+S_get_pat_and_code_indices(pTHX_ RExC_state_t *pRExC_state, OP* expr, SV* pat) {
+ int ncode = 0;
+ bool is_code = 0;
+ OP *o;
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST) {
+ sv_catsv(pat, cSVOPo_sv);
+ if (is_code) {
+ pRExC_state->code_indices[ncode++] = SvCUR(pat); /* end pos */
+ is_code = 0;
+ }
+ }
+ else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ assert(ncode < pRExC_state->max_code_index);
+ pRExC_state->code_indices[ncode++] = SvCUR(pat); /*start pos */
+ is_code = 1;
+ }
+ }
+ pRExC_state->code_index = 0;
+}
+
+
/*
* Perl_op_re_compile - the perl internal RE engine's function to compile a
* regular expression into internal code.
}
#endif
+ pRExC_state->code_indices = NULL;
+ pRExC_state->max_code_index = 0;
if (expr) {
- /* XXX tmp get rid of DO blocks, concat CONSTs */
- OP *o, *kid;
- o = cLISTOPx(expr)->op_first;
- while (o->op_sibling) {
- kid = o->op_sibling;
- if (kid->op_type == OP_NULL && (kid->op_flags & OPf_SPECIAL)) {
- /* do {...} */
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ if (expr->op_type == OP_LIST) {
+ OP *o;
+ bool is_utf8 = 0;
+ int ncode = 0;
+
+ /* are we UTF8, and how many code blocks are there? */
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST && SvUTF8(cSVOPo_sv))
+ is_utf8 = 1;
+ else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+ /* count of DO blocks */
+ ncode++;
}
- else if (o->op_type == OP_CONST && kid->op_type == OP_CONST){
- SV* sv = cSVOPo->op_sv;
- SvREADONLY_off(sv);
- sv_catsv(sv, cSVOPx(kid)->op_sv);
- SvREADONLY_on(sv);
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ pRExC_state->max_code_index = ncode*2;
+ if (ncode) {
+ Newx(pRExC_state->code_indices, ncode*2, STRLEN);
+ SAVEFREEPV(pRExC_state->code_indices);
}
- else
- o = o->op_sibling;
+ pat = newSVpvn("", 0);
+ SAVEFREESV(pat);
+ if (is_utf8)
+ SvUTF8_on(pat);
+ S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
+ }
+ else {
+ assert(expr->op_type == OP_CONST);
+ pat = cSVOPx_sv(expr);
}
- cLISTOPx(expr)->op_last = o;
- pat = ((SVOP*)(expr->op_type == OP_LIST
- ? cLISTOPx(expr)->op_first->op_sibling : expr))->op_sv;
}
else
pat = pattern;
-- dmq */
DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
"UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
- exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pat, plen), &len);
- xend = exp + len;
- RExC_orig_utf8 = RExC_utf8 = 1;
- SAVEFREEPV(exp);
+
+ if (expr && expr->op_type == OP_LIST) {
+ sv_setpvn(pat, "", 0);
+ SvUTF8_on(pat);
+ S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
+ exp = SvPV(pat, plen);
+ xend = exp + plen;
+ }
+ else {
+ exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pat, plen), &len);
+ xend = exp + len;
+ SAVEFREEPV(exp);
+ }
+ RExC_orig_utf8 = RExC_utf8 = 1;
}
#ifdef TRIE_STUDY_OPT
RExC_emit_start = ri->program;
RExC_emit = ri->program;
RExC_emit_bound = ri->program + RExC_size + 1;
+ pRExC_state->code_index = 0;
+ if (expr && expr->op_type == OP_LIST) {
+ assert(cLISTOPx(expr)->op_first->op_type == OP_PUSHMARK);
+ pRExC_state->next_code_or_const = cLISTOPx(expr)->op_first;
+ }
/* Store the count of eval-groups for security checks: */
RExC_rx->seen_evals = RExC_seen_evals;
RExC_seen_zerolen++;
RExC_seen |= REG_SEEN_EVAL;
- while (count && (c = *RExC_parse)) {
- if (c == '\\') {
- if (RExC_parse[1])
- RExC_parse++;
+
+ if (pRExC_state->max_code_index
+ && pRExC_state->code_indices[pRExC_state->code_index] ==
+ (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+ - RExC_start)
+ ) {
+ /* this is a pre-compiled literal (?{}) */
+ assert(pRExC_state->code_index
+ < pRExC_state->max_code_index);
+ RExC_parse = RExC_start - 1
+ + pRExC_state->code_indices[++pRExC_state->code_index];
+ pRExC_state->code_index++;
+ if (SIZE_ONLY)
+ RExC_seen_evals++;
+ else {
+ OP *o = pRExC_state->next_code_or_const;
+ while(! (o->op_type == OP_NULL
+ && (o->op_flags & OPf_SPECIAL)))
+ {
+ o = o->op_sibling;
+ }
+ n = add_data(pRExC_state, 1, "l");
+ RExC_rxi->data->data[n] = (void*)o->op_next;
+ pRExC_state->next_code_or_const = o->op_sibling;
}
- else if (c == '{')
- count++;
- else if (c == '}')
- count--;
- RExC_parse++;
}
- if (*RExC_parse != ')') {
- RExC_parse = s;
- vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
- }
- if (!SIZE_ONLY) {
- PAD *pad;
- OP_4tree *sop, *rop;
- SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
-
- ENTER;
- Perl_save_re_context(aTHX);
- rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
- sop->op_private |= OPpREFCOUNTED;
- /* re_dup will OpREFCNT_inc */
- OpREFCNT_set(sop, 1);
- LEAVE;
-
- n = add_data(pRExC_state, 3, "nop");
- RExC_rxi->data->data[n] = (void*)rop;
- RExC_rxi->data->data[n+1] = (void*)sop;
- RExC_rxi->data->data[n+2] = (void*)pad;
- SvREFCNT_dec(sv);
- }
- else { /* First pass */
- if (PL_reginterp_cnt < ++RExC_seen_evals
- && IN_PERL_RUNTIME)
- /* No compiled RE interpolated, has runtime
- components ===> unsafe. */
- FAIL("Eval-group not allowed at runtime, use re 'eval'");
- if (PL_tainting && PL_tainted)
- FAIL("Eval-group in insecure regular expression");
-#if PERL_VERSION > 8
- if (IN_PERL_COMPILETIME)
- PL_cv_has_eval = 1;
-#endif
+ else {
+ while (count && (c = *RExC_parse)) {
+ if (c == '\\') {
+ if (RExC_parse[1])
+ RExC_parse++;
+ }
+ else if (c == '{')
+ count++;
+ else if (c == '}')
+ count--;
+ RExC_parse++;
+ }
+ if (*RExC_parse != ')') {
+ RExC_parse = s;
+ vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
+ }
+ if (!SIZE_ONLY) {
+ PAD *pad;
+ OP_4tree *sop, *rop;
+ SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
+
+ ENTER;
+ Perl_save_re_context(aTHX);
+ rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
+ sop->op_private |= OPpREFCOUNTED;
+ /* re_dup will OpREFCNT_inc */
+ OpREFCNT_set(sop, 1);
+ LEAVE;
+
+ n = add_data(pRExC_state, 3, "nop");
+ RExC_rxi->data->data[n] = (void*)rop;
+ RExC_rxi->data->data[n+1] = (void*)sop;
+ RExC_rxi->data->data[n+2] = (void*)pad;
+ SvREFCNT_dec(sv);
+ }
+ else { /* First pass */
+ if (PL_reginterp_cnt < ++RExC_seen_evals
+ && IN_PERL_RUNTIME)
+ /* No compiled RE interpolated, has runtime
+ components ===> unsafe. */
+ FAIL("Eval-group not allowed at runtime, use re 'eval'");
+ if (PL_tainting && PL_tainted)
+ FAIL("Eval-group in insecure regular expression");
+ #if PERL_VERSION > 8
+ if (IN_PERL_COMPILETIME)
+ PL_cv_has_eval = 1;
+ #endif
+ }
}
-
nextchar(pRExC_state);
+
if (is_logical) {
ret = reg_node(pRExC_state, LOGICAL);
if (!SIZE_ONLY)
SvREFCNT_dec(MUTABLE_SV(new_comppad));
new_comppad = NULL;
break;
+ case 'l':
case 'n':
break;
case 'T':
((reg_trie_data*)ri->data->data[i])->refcount++;
OP_REFCNT_UNLOCK;
/* Fall through */
+ case 'l':
case 'n':
d->data[i] = ri->data->data[i];
break;
* The character describes the function of the corresponding .data item:
* a - AV for paren_name_list under DEBUGGING
* f - start-class data for regstclass optimization
+ * l - start op for literal (?{EVAL}) item
* n - Root of op tree for (?{EVAL}) item
* o - Start op for (?{EVAL}) item
* p - Pad for (?{EVAL}) item
false: plain (?=foo)
true: used as a condition: (?(?=foo))
*/
+ PAD* const initial_pad = PL_comppad;
#ifdef DEBUGGING
GET_RE_DEBUG_FLAGS_DECL;
#endif
SV ** const before = SP;
OP_4tree * const oop = PL_op;
COP * const ocurcop = PL_curcop;
- PAD *old_comppad;
+ PAD *old_comppad, *new_comppad;
char *saved_regeol = PL_regeol;
struct re_save_state saved_state;
Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
n = ARG(scan);
- PL_op = (OP_4tree*)rexi->data->data[n];
+ if (rexi->data->what[n] == 'l') { /* literal code */
+ new_comppad = initial_pad; /* the pad of the current sub */
+ PL_op = (OP_4tree*)rexi->data->data[n];
+ }
+ else {
+ PL_op = (OP_4tree*)rexi->data->data[n];
+ new_comppad = (PAD*)rexi->data->data[n + 2];
+ }
DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
" re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
/* wrap the call in two SAVECOMPPADs. This ensures that
* accumulated SAVEt_CLEARSV's will be processed with
* interspersed SAVEt_COMPPAD's to ensure that lexicals
* are cleared in the right pad */
- SAVECOMPPAD();
- PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
+ if (PL_comppad == new_comppad)
+ old_comppad = new_comppad;
+ else {
+ SAVECOMPPAD();
+ PAD_SAVE_LOCAL(old_comppad, new_comppad);
+ }
PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
if (sv_yes_mark) {
Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
PL_op = oop;
- SAVECOMPPAD();
- PAD_RESTORE_LOCAL(old_comppad);
+ if (old_comppad != PL_comppad) {
+ SAVECOMPPAD();
+ PAD_RESTORE_LOCAL(old_comppad);
+ }
PL_curcop = ocurcop;
PL_regeol = saved_regeol;
if (!logical) {
use strict 'refs';
/(?{${"foo"}++})/;
EXPECT
-Can't use string ("foo") as a SCALAR ref while "strict refs" in use at (re_eval 1) line 1.
+Can't use string ("foo") as a SCALAR ref while "strict refs" in use at - line 3.
########
# [perl #37886] strict 'refs' doesn't apply inside defined
use strict 'refs';
EXPECT
Bareword "FOO" not allowed while "strict subs" in use at - line 5.
Execution of - aborted due to compilation errors.
+########
+# make sure checks are done within (?{})
+use strict 'subs';
+/(?{FOO})/
+EXPECT
+Bareword "FOO" not allowed while "strict subs" in use at - line 3.
+Execution of - aborted due to compilation errors.
b2
b3
b4
-b6
-u5
+b6-c
b7
u6
+u5-c
u1
c3
-c2
+c2-c
c1
i1
i2
u2
u3
u4
+b6-r
+u5-r
e2
e1
);
my $expect = ":" . join(":", @expect);
-# XXX tmp while re-evals are being doubly compiled:
-$expect =
- ':b1:b2:b3:b4:b6:b6:u5:b7:u6:u5:u1:c3:c2:c2:c1:i1:i2:b5:u2:u3:u4:e2:e1';
-
fresh_perl_is(<<'SCRIPT', $expect,{switches => [''], stdin => '', stderr => 1 },'Order of execution of special blocks');
BEGIN {print ":b1"}
END {print ":e1"}
eval 'BEGIN {print ":b5"}';
eval 'UNITCHECK {print ":u2"}';
eval 'UNITCHECK {print ":u3"; UNITCHECK {print ":u4"}}';
-"a" =~ /(?{UNITCHECK {print ":u5"};
- CHECK {print ":c2"};
- BEGIN {print ":b6"}})/x;
+"a" =~ /(?{UNITCHECK {print ":u5-c"};
+ CHECK {print ":c2-c"};
+ BEGIN {print ":b6-c"}})/x;
+{
+ use re 'eval';
+ my $runtime = q{
+ (?{UNITCHECK {print ":u5-r"};
+ CHECK {print ":c2-r"};
+ BEGIN {print ":b6-r"}})/
+ };
+ "a" =~ /$runtime/x;
+}
eval {BEGIN {print ":b7"}};
eval {UNITCHECK {print ":u6"}};
eval {INIT {print ":i2"}};
}
-plan tests => 214; # Update this when adding/deleting tests.
+plan tests => 217; # Update this when adding/deleting tests.
run_tests() unless caller;
# the most basic: literal code should be in same scope
# as the parent
- tok(1, "A$x" =~ /^A(??{$x})$/, "[$x] literal code");
+ ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code");
# the "don't recompile if pattern unchanged" mechanism
# shouldn't apply to code blocks - recompile every time
"[$x-$yy] literal qr + r6 +lit, outside");
}
}
+
+ # recursive subs should get lexical from the correct pad depth
+
+ sub recurse {
+ my ($n) = @_;
+ return if $n > 2;
+ ok("A$n" =~ /^A(??{$n})$/, "recurse($n)");
+ recurse($n+1);
+ }
+ recurse(0);
}
} # End of sub run_tests
a(?{f()+ - c - Missing right curly or square bracket
a(?{{1}+ - c - Missing right curly or square bracket
a(?{}})b - c -
-a(?{"{"})b - c - Sequence (?{...}) not terminated or not {}-balanced
+# XXX tmp disable this test - works for // but not qr// yet
+#a(?{"{"})b ab y - -
a(?{"\{"})b cabd y $& ab
a(?{"{"}})b - c - Sequence (?{...}) not terminated with ')'
a(?{$::bl="\{"}).b caxbd y $::bl {
print $x,$a,$b;
CODE
-on;
-
fresh_perl_is <<'CODE',
for my $x("a".."c") {
$y = 1;
{},
'multiple (?{})s in loop with lexicals';
-off;
-
fresh_perl_is <<'CODE', '781745', {}, 'run-time re-eval has its own scope';
use re qw(eval);
my $x = 7; my $a = 4; my $b = 5;
@x = foo(' x y z ');
print "you die joe!\n" unless "@x" eq 'x y z';
########
-/(?{"{"})/ # Check it outside of eval too
+"A" =~ /(?{"{"})/ # Check it outside of eval too
EXPECT
-Sequence (?{...}) not terminated or not {}-balanced in regex; marked by <-- HERE in m/(?{ <-- HERE "{"})/ at - line 1.
########
/(?{"{"}})/ # Check it outside of eval too
EXPECT