}
STATIC void
-S_forget_pmop(pTHX_ PMOP *const o
- )
+S_forget_pmop(pTHX_ PMOP *const o)
{
HV * const pmstash = PmopSTASH(o);
OpTYPE_set(logop, type);
logop->op_first = first;
logop->op_other = other;
- logop->op_flags = OPf_KIDS;
+ if (first)
+ logop->op_flags = OPf_KIDS;
while (kid && OpHAS_SIBLING(kid))
kid = OpSIBLING(kid);
if (kid)
if (o->op_type == OP_REPEAT)
scalar(cBINOPo->op_first);
goto func_ops;
+ case OP_CONCAT:
+ if ((o->op_flags & OPf_STACKED) &&
+ !(o->op_private & OPpCONCAT_NESTED))
+ break;
+ goto func_ops;
case OP_SUBSTR:
if (o->op_private == 4)
break;
SSize_t nargs = 0;
SSize_t nconst = 0;
+ SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */
STRLEN variant;
bool utf8 = FALSE;
bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
U8 private_flags = 0; /* ... op_private of the multiconcat op */
bool is_sprintf = FALSE; /* we're optimising an sprintf */
bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */
+ bool prev_was_const = FALSE; /* previous arg was a const */
/* -----------------------------------------------------------------
* Phase 1:
last = TRUE;
}
- if ( nargs > PERL_MULTICONCAT_MAXARG - 2
+ if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2
|| (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
{
/* At least two spare slots are needed to decompose both
argp++->p = sv;
utf8 |= cBOOL(SvUTF8(sv));
nconst++;
+ if (prev_was_const)
+ /* this const may be demoted back to a plain arg later;
+ * make sure we have enough arg slots left */
+ nadjconst++;
+ prev_was_const = !prev_was_const;
}
else {
argp++->p = NULL;
nargs++;
+ prev_was_const = FALSE;
}
if (last)
OP *prev;
/* set prev to the sibling *before* the arg to be cut out,
- * e.g.:
+ * e.g. when cutting EXPR:
*
* |
- * kid= CONST
+ * kid= CONCAT
* |
- * prev= CONST -- EXPR
+ * prev= CONCAT -- EXPR
* |
*/
if (argp == args && kid->op_type != OP_CONCAT) {
- /* in e.g. '$x . = f(1)' there's no RHS concat tree
+ /* in e.g. '$x .= f(1)' there's no RHS concat tree
* so the expression to be cut isn't kid->op_last but
* kid itself */
OP *o1, *o2;
case OP_RV2HV:
if (type == OP_REFGEN && o->op_flags & OPf_PARENS) {
PL_modcount = RETURN_UNLIMITED_NUMBER;
- return o; /* Treat \(@foo) like ordinary list. */
+ /* Treat \(@foo) like ordinary list, but still mark it as modi-
+ fiable since some contexts need to know. */
+ o->op_flags |= OPf_MOD;
+ return o;
}
/* FALLTHROUGH */
case OP_RV2GV:
case OP_PADHV:
PL_modcount = RETURN_UNLIMITED_NUMBER;
if (type == OP_REFGEN && o->op_flags & OPf_PARENS)
- return o; /* Treat \(@foo) like ordinary list. */
+ {
+ /* Treat \(@foo) like ordinary list, but still mark it as modi-
+ fiable since some contexts need to know. */
+ o->op_flags |= OPf_MOD;
+ return o;
+ }
if (scalar_mod_type(o, type))
goto nomod;
if ((o->op_flags & OPf_WANT) != OPf_WANT_SCALAR
return fold_constants(op_integerize(op_std_init((OP *)binop)));
}
+/* Helper function for S_pmtrans(): comparison function to sort an array
+ * of codepoint range pairs. Sorts by start point, or if equal, by end
+ * point */
+
static int uvcompare(const void *a, const void *b)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
return 0;
}
+/* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl
+ * containing the search and replacement strings, assemble into
+ * a translation table attached as o->op_pv.
+ * Free expr and repl.
+ * It expects the toker to have already set the
+ * OPpTRANS_COMPLEMENT
+ * OPpTRANS_SQUASH
+ * OPpTRANS_DELETE
+ * flags as appropriate; this function may add
+ * OPpTRANS_FROM_UTF
+ * OPpTRANS_TO_UTF
+ * OPpTRANS_IDENTICAL
+ * OPpTRANS_GROWS
+ * flags
+ */
+
static OP *
S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
{
SV * const tstr = ((SVOP*)expr)->op_sv;
- SV * const rstr =
- ((SVOP*)repl)->op_sv;
+ SV * const rstr = ((SVOP*)repl)->op_sv;
STRLEN tlen;
STRLEN rlen;
const U8 *t = (U8*)SvPV_const(tstr, tlen);
const U8 *r = (U8*)SvPV_const(rstr, rlen);
- I32 i;
- I32 j;
- I32 grows = 0;
- short *tbl;
-
- const I32 complement = o->op_private & OPpTRANS_COMPLEMENT;
- const I32 squash = o->op_private & OPpTRANS_SQUASH;
- I32 del = o->op_private & OPpTRANS_DELETE;
+ Size_t i, j;
+ bool grows = FALSE;
+ OPtrans_map *tbl;
+ SSize_t struct_size; /* malloced size of table struct */
+
+ const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT);
+ const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH);
+ const bool del = cBOOL(o->op_private & OPpTRANS_DELETE);
SV* swash;
PERL_ARGS_ASSERT_PMTRANS;
o->op_private |= OPpTRANS_TO_UTF;
if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
+
+ /* for utf8 translations, op_sv will be set to point to a swash
+ * containing codepoint ranges. This is done by first assembling
+ * a textual representation of the ranges in listsv then compiling
+ * it using swash_init(). For more details of the textual format,
+ * see L<perlunicode.pod/"User-Defined Character Properties"> .
+ */
+
SV* const listsv = newSVpvs("# comment\n");
SV* transv = NULL;
const U8* tend = t + tlen;
* odd. */
if (complement) {
+ /* utf8 and /c:
+ * replace t/tlen/tend with a version that has the ranges
+ * complemented
+ */
U8 tmpbuf[UTF8_MAXBYTES+1];
UV *cp;
UV nextmin = 0;
Newx(cp, 2*tlen, UV);
i = 0;
transv = newSVpvs("");
+
+ /* convert search string into array of (start,end) range
+ * codepoint pairs stored in cp[]. Most "ranges" will start
+ * and end at the same char */
while (t < tend) {
cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
t += ulen;
+ /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */
if (t < tend && *t == ILLEGAL_UTF8_BYTE) {
t++;
cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags);
}
i++;
}
+
+ /* sort the ranges */
qsort(cp, i, 2*sizeof(UV), uvcompare);
+
+ /* Create a utf8 string containing the complement of the
+ * codepoint ranges. For example if cp[] contains [A,B], [C,D],
+ * then transv will contain the equivalent of:
+ * join '', map chr, 0, ILLEGAL_UTF8_BYTE, A - 1,
+ * B + 1, ILLEGAL_UTF8_BYTE, C - 1,
+ * D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff;
+ * A range of a single char skips the ILLEGAL_UTF8_BYTE and
+ * end cp.
+ */
for (j = 0; j < i; j++) {
UV val = cp[2*j];
diff = val - nextmin;
if (val >= nextmin)
nextmin = val + 1;
}
+
t = uvchr_to_utf8(tmpbuf,nextmin);
sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf);
{
else if (!rlen && !del) {
r = t; rlen = tlen; rend = tend;
}
+
if (!squash) {
if ((!rlen && !del) || t == r ||
(tlen == rlen && memEQ((char *)t, (char *)r, tlen)))
}
}
+ /* extract char ranges from t and r and append them to listsv */
+
while (t < tend || tfirst <= tlast) {
/* see if we need more "t" chars */
if (tfirst > tlast) {
tfirst += diff + 1;
}
+ /* compile listsv into a swash and attach to o */
+
none = ++max;
if (del)
- del = ++max;
+ ++max;
if (max > 0xffff)
bits = 32;
goto warnins;
}
- tbl = (short*)PerlMemShared_calloc(
- (o->op_private & OPpTRANS_COMPLEMENT) &&
- !(o->op_private & OPpTRANS_DELETE) ? 258 : 256,
- sizeof(short));
+ /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup
+ * table. Entries with the value -1 indicate chars not to be
+ * translated, while -2 indicates a search char without a
+ * corresponding replacement char under /d.
+ *
+ * Normally, the table has 256 slots. However, in the presence of
+ * /c, the search charlist has an implicit \x{100}-\x{7fffffff}
+ * added, and if there are enough replacement chars to start pairing
+ * with the \x{100},... search chars, then a larger (> 256) table
+ * is allocated.
+ *
+ * In addition, regardless of whether under /c, an extra slot at the
+ * end is used to store the final repeating char, or -3 under an empty
+ * replacement list, or -2 under /d; which makes the runtime code
+ * easier.
+ *
+ * The toker will have already expanded char ranges in t and r.
+ */
+
+ /* Initially allocate 257-slot table: 256 for basic (non /c) usage,
+ * plus final slot for repeat/-2/-3. Later we realloc if excess > * 0.
+ * The OPtrans_map struct already contains one slot; hence the -1.
+ */
+ struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short);
+ tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1);
+ tbl->size = 256;
cPVOPo->op_pv = (char*)tbl;
+
if (complement) {
- for (i = 0; i < (I32)tlen; i++)
- tbl[t[i]] = -1;
+ Size_t excess;
+
+ /* in this branch, j is a count of 'consumed' (i.e. paired off
+ * with a search char) replacement chars (so j <= rlen always)
+ */
+ for (i = 0; i < tlen; i++)
+ tbl->map[t[i]] = -1;
+
for (i = 0, j = 0; i < 256; i++) {
- if (!tbl[i]) {
- if (j >= (I32)rlen) {
+ if (!tbl->map[i]) {
+ if (j == rlen) {
if (del)
- tbl[i] = -2;
+ tbl->map[i] = -2;
else if (rlen)
- tbl[i] = r[j-1];
+ tbl->map[i] = r[j-1];
else
- tbl[i] = (short)i;
+ tbl->map[i] = (short)i;
}
else {
- if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j]))
- grows = 1;
- tbl[i] = r[j++];
+ tbl->map[i] = r[j++];
}
+ if ( tbl->map[i] >= 0
+ && UVCHR_IS_INVARIANT((UV)i)
+ && !UVCHR_IS_INVARIANT((UV)(tbl->map[i]))
+ )
+ grows = TRUE;
}
}
- if (!del) {
- if (!rlen) {
- j = rlen;
- if (!squash)
- o->op_private |= OPpTRANS_IDENTICAL;
- }
- else if (j >= (I32)rlen)
- j = rlen - 1;
- else {
- tbl =
- (short *)
- PerlMemShared_realloc(tbl,
- (0x101+rlen-j) * sizeof(short));
- cPVOPo->op_pv = (char*)tbl;
- }
- tbl[0x100] = (short)(rlen - j);
- for (i=0; i < (I32)rlen - j; i++)
- tbl[0x101+i] = r[j+i];
- }
+
+ ASSUME(j <= rlen);
+ excess = rlen - j;
+
+ if (excess) {
+ /* More replacement chars than search chars:
+ * store excess replacement chars at end of main table.
+ */
+
+ struct_size += excess;
+ tbl = (OPtrans_map*)PerlMemShared_realloc(tbl,
+ struct_size + excess * sizeof(short));
+ tbl->size += excess;
+ cPVOPo->op_pv = (char*)tbl;
+
+ for (i = 0; i < excess; i++)
+ tbl->map[i + 256] = r[j+i];
+ }
+ else {
+ /* no more replacement chars than search chars */
+ if (!rlen && !del && !squash)
+ o->op_private |= OPpTRANS_IDENTICAL;
+ }
+
+ tbl->map[tbl->size] = del ? -2 : rlen ? r[rlen - 1] : -3;
}
else {
if (!rlen && !del) {
else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) {
o->op_private |= OPpTRANS_IDENTICAL;
}
+
for (i = 0; i < 256; i++)
- tbl[i] = -1;
- for (i = 0, j = 0; i < (I32)tlen; i++,j++) {
- if (j >= (I32)rlen) {
+ tbl->map[i] = -1;
+ for (i = 0, j = 0; i < tlen; i++,j++) {
+ if (j >= rlen) {
if (del) {
- if (tbl[t[i]] == -1)
- tbl[t[i]] = -2;
+ if (tbl->map[t[i]] == -1)
+ tbl->map[t[i]] = -2;
continue;
}
--j;
}
- if (tbl[t[i]] == -1) {
+ if (tbl->map[t[i]] == -1) {
if ( UVCHR_IS_INVARIANT(t[i])
&& ! UVCHR_IS_INVARIANT(r[j]))
- grows = 1;
- tbl[t[i]] = r[j];
+ grows = TRUE;
+ tbl->map[t[i]] = r[j];
}
}
+ tbl->map[tbl->size] = del ? -1 : rlen ? -1 : -3;
}
+ /* both non-utf8 and utf8 code paths end up here */
+
warnins:
if(del && rlen == tlen) {
Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator");
return o;
}
+
/*
=for apidoc Am|OP *|newPMOP|I32 type|I32 flags
op_null(scope);
}
- if (is_compiletime)
- /* runtime finalizes as part of finalizing whole tree */
- optimize_optree(o);
+ /* XXX optimize_optree() must be called on o before
+ * CALL_PEEP(), as currently S_maybe_multiconcat() can't
+ * currently cope with a peephole-optimised optree.
+ * Calling optimize_optree() here ensures that condition
+ * is met, but may mean optimize_optree() is applied
+ * to the same optree later (where hopefully it won't do any
+ * harm as it can't convert an op to multiconcat if it's
+ * already been converted */
+ optimize_optree(o);
/* have to peep the DOs individually as we've removed it from
* the op_next chain */
case OP_FLOP:
return TRUE;
+
+ case OP_INDEX:
+ case OP_RINDEX:
+ /* optimised-away (index() != -1) or similar comparison */
+ if (o->op_private & OPpTRUEBOOL)
+ return TRUE;
+ return FALSE;
case OP_CONST:
/* Detect comparisons that have been optimized away */
return TRUE;
else
return FALSE;
-
/* FALLTHROUGH */
default:
return FALSE;
Also, we may be called from load_module at run time, so
PL_curstash (which sets CvSTASH) may not point to the stash the
sub is stored in. */
+ /* XXX This optimization is currently disabled for packages other
+ than main, since there was too much CPAN breakage. */
const I32 flags =
ec ? GV_NOADD_NOINIT
: (IN_PERL_RUNTIME && PL_curstash != CopSTASH(PL_curcop))
+ || PL_curstash != PL_defstash
|| memchr(name, ':', namlen) || memchr(name, '\'', namlen)
? gv_fetch_flags
: GV_ADDMULTI | GV_NOINIT | GV_NOTQUAL;
OP *newop = NULL;
OP *sibl;
PERL_ARGS_ASSERT_CK_BACKTICK;
+ o = ck_fun(o);
/* qx and `` have a null pushmark; CORE::readpipe has only one kid. */
if (o->op_flags & OPf_KIDS && (sibl = OpSIBLING(cUNOPo->op_first))
&& (gv = gv_override("readpipe",8)))
o->op_private = (U8)(PL_hints & HINT_INTEGER);
- if (o->op_type == OP_NBIT_OR || o->op_type == OP_SBIT_OR
- || o->op_type == OP_NBIT_XOR || o->op_type == OP_SBIT_XOR
- || o->op_type == OP_NBIT_AND || o->op_type == OP_SBIT_AND
- || o->op_type == OP_NCOMPLEMENT || o->op_type == OP_SCOMPLEMENT)
- Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__BITWISE),
- "The bitwise feature is experimental");
if (!(o->op_flags & OPf_STACKED) /* Not an assignment */
&& OP_IS_INFIX_BIT(o->op_type))
{