X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/28ef70489d76deb9024de42a0571162f323148c8..f09dd912ca46cc4e5553da867bc148656110510c:/op.c diff --git a/op.c b/op.c index 072d3ce..146407b 100644 --- a/op.c +++ b/op.c @@ -1223,8 +1223,7 @@ S_cop_free(pTHX_ COP* cop) } STATIC void -S_forget_pmop(pTHX_ PMOP *const o - ) +S_forget_pmop(pTHX_ PMOP *const o) { HV * const pmstash = PmopSTASH(o); @@ -1545,7 +1544,8 @@ Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP* other) OpTYPE_set(logop, type); logop->op_first = first; logop->op_other = other; - logop->op_flags = OPf_KIDS; + if (first) + logop->op_flags = OPf_KIDS; while (kid && OpHAS_SIBLING(kid)) kid = OpSIBLING(kid); if (kid) @@ -1949,6 +1949,11 @@ Perl_scalarvoid(pTHX_ OP *arg) if (o->op_type == OP_REPEAT) scalar(cBINOPo->op_first); goto func_ops; + case OP_CONCAT: + if ((o->op_flags & OPf_STACKED) && + !(o->op_private & OPpCONCAT_NESTED)) + break; + goto func_ops; case OP_SUBSTR: if (o->op_private == 4) break; @@ -2662,6 +2667,7 @@ S_maybe_multiconcat(pTHX_ OP *o) SSize_t nargs = 0; SSize_t nconst = 0; + SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */ STRLEN variant; bool utf8 = FALSE; bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op; @@ -2673,6 +2679,7 @@ S_maybe_multiconcat(pTHX_ OP *o) U8 private_flags = 0; /* ... op_private of the multiconcat op */ bool is_sprintf = FALSE; /* we're optimising an sprintf */ bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */ + bool prev_was_const = FALSE; /* previous arg was a const */ /* ----------------------------------------------------------------- * Phase 1: @@ -2715,7 +2722,8 @@ S_maybe_multiconcat(pTHX_ OP *o) } else if ( topop->op_type == OP_CONCAT && (topop->op_flags & OPf_STACKED) - && (cUNOPo->op_first->op_flags & OPf_MOD)) + && (!(topop->op_private & OPpCONCAT_NESTED)) + ) { /* expr .= ..... */ @@ -2887,7 +2895,7 @@ S_maybe_multiconcat(pTHX_ OP *o) last = TRUE; } - if ( nargs > PERL_MULTICONCAT_MAXARG - 2 + if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2 || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2) { /* At least two spare slots are needed to decompose both @@ -2918,10 +2926,16 @@ S_maybe_multiconcat(pTHX_ OP *o) argp++->p = sv; utf8 |= cBOOL(SvUTF8(sv)); nconst++; + if (prev_was_const) + /* this const may be demoted back to a plain arg later; + * make sure we have enough arg slots left */ + nadjconst++; + prev_was_const = !prev_was_const; } else { argp++->p = NULL; nargs++; + prev_was_const = FALSE; } if (last) @@ -3188,16 +3202,16 @@ S_maybe_multiconcat(pTHX_ OP *o) OP *prev; /* set prev to the sibling *before* the arg to be cut out, - * e.g.: + * e.g. when cutting EXPR: * * | - * kid= CONST + * kid= CONCAT * | - * prev= CONST -- EXPR + * prev= CONCAT -- EXPR * | */ if (argp == args && kid->op_type != OP_CONCAT) { - /* in e.g. '$x . = f(1)' there's no RHS concat tree + /* in e.g. '$x .= f(1)' there's no RHS concat tree * so the expression to be cut isn't kid->op_last but * kid itself */ OP *o1, *o2; @@ -4065,7 +4079,10 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_RV2HV: if (type == OP_REFGEN && o->op_flags & OPf_PARENS) { PL_modcount = RETURN_UNLIMITED_NUMBER; - return o; /* Treat \(@foo) like ordinary list. */ + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; } /* FALLTHROUGH */ case OP_RV2GV: @@ -4130,7 +4147,12 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_PADHV: PL_modcount = RETURN_UNLIMITED_NUMBER; if (type == OP_REFGEN && o->op_flags & OPf_PARENS) - return o; /* Treat \(@foo) like ordinary list. */ + { + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; + } if (scalar_mod_type(o, type)) goto nomod; if ((o->op_flags & OPf_WANT) != OPf_WANT_SCALAR @@ -6278,6 +6300,10 @@ Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last) return fold_constants(op_integerize(op_std_init((OP *)binop))); } +/* Helper function for S_pmtrans(): comparison function to sort an array + * of codepoint range pairs. Sorts by start point, or if equal, by end + * point */ + static int uvcompare(const void *a, const void *b) __attribute__nonnull__(1) __attribute__nonnull__(2) @@ -6295,24 +6321,39 @@ static int uvcompare(const void *a, const void *b) return 0; } +/* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl + * containing the search and replacement strings, assemble into + * a translation table attached as o->op_pv. + * Free expr and repl. + * It expects the toker to have already set the + * OPpTRANS_COMPLEMENT + * OPpTRANS_SQUASH + * OPpTRANS_DELETE + * flags as appropriate; this function may add + * OPpTRANS_FROM_UTF + * OPpTRANS_TO_UTF + * OPpTRANS_IDENTICAL + * OPpTRANS_GROWS + * flags + */ + static OP * S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) { SV * const tstr = ((SVOP*)expr)->op_sv; - SV * const rstr = - ((SVOP*)repl)->op_sv; + SV * const rstr = ((SVOP*)repl)->op_sv; STRLEN tlen; STRLEN rlen; const U8 *t = (U8*)SvPV_const(tstr, tlen); const U8 *r = (U8*)SvPV_const(rstr, rlen); - I32 i; - I32 j; - I32 grows = 0; - short *tbl; - - const I32 complement = o->op_private & OPpTRANS_COMPLEMENT; - const I32 squash = o->op_private & OPpTRANS_SQUASH; - I32 del = o->op_private & OPpTRANS_DELETE; + Size_t i, j; + bool grows = FALSE; + OPtrans_map *tbl; + SSize_t struct_size; /* malloced size of table struct */ + + const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT); + const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH); + const bool del = cBOOL(o->op_private & OPpTRANS_DELETE); SV* swash; PERL_ARGS_ASSERT_PMTRANS; @@ -6326,6 +6367,14 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) o->op_private |= OPpTRANS_TO_UTF; if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) { + + /* for utf8 translations, op_sv will be set to point to a swash + * containing codepoint ranges. This is done by first assembling + * a textual representation of the ranges in listsv then compiling + * it using swash_init(). For more details of the textual format, + * see L . + */ + SV* const listsv = newSVpvs("# comment\n"); SV* transv = NULL; const U8* tend = t + tlen; @@ -6367,15 +6416,24 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) * odd. */ if (complement) { + /* utf8 and /c: + * replace t/tlen/tend with a version that has the ranges + * complemented + */ U8 tmpbuf[UTF8_MAXBYTES+1]; UV *cp; UV nextmin = 0; Newx(cp, 2*tlen, UV); i = 0; transv = newSVpvs(""); + + /* convert search string into array of (start,end) range + * codepoint pairs stored in cp[]. Most "ranges" will start + * and end at the same char */ while (t < tend) { cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags); t += ulen; + /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */ if (t < tend && *t == ILLEGAL_UTF8_BYTE) { t++; cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags); @@ -6386,7 +6444,19 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } i++; } + + /* sort the ranges */ qsort(cp, i, 2*sizeof(UV), uvcompare); + + /* Create a utf8 string containing the complement of the + * codepoint ranges. For example if cp[] contains [A,B], [C,D], + * then transv will contain the equivalent of: + * join '', map chr, 0, ILLEGAL_UTF8_BYTE, A - 1, + * B + 1, ILLEGAL_UTF8_BYTE, C - 1, + * D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff; + * A range of a single char skips the ILLEGAL_UTF8_BYTE and + * end cp. + */ for (j = 0; j < i; j++) { UV val = cp[2*j]; diff = val - nextmin; @@ -6404,6 +6474,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) if (val >= nextmin) nextmin = val + 1; } + t = uvchr_to_utf8(tmpbuf,nextmin); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); { @@ -6420,6 +6491,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!rlen && !del) { r = t; rlen = tlen; rend = tend; } + if (!squash) { if ((!rlen && !del) || t == r || (tlen == rlen && memEQ((char *)t, (char *)r, tlen))) @@ -6428,6 +6500,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } } + /* extract char ranges from t and r and append them to listsv */ + while (t < tend || tfirst <= tlast) { /* see if we need more "t" chars */ if (tfirst > tlast) { @@ -6500,9 +6574,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) tfirst += diff + 1; } + /* compile listsv into a swash and attach to o */ + none = ++max; if (del) - del = ++max; + ++max; if (max > 0xffff) bits = 32; @@ -6541,50 +6617,88 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) goto warnins; } - tbl = (short*)PerlMemShared_calloc( - (o->op_private & OPpTRANS_COMPLEMENT) && - !(o->op_private & OPpTRANS_DELETE) ? 258 : 256, - sizeof(short)); + /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup + * table. Entries with the value -1 indicate chars not to be + * translated, while -2 indicates a search char without a + * corresponding replacement char under /d. + * + * Normally, the table has 256 slots. However, in the presence of + * /c, the search charlist has an implicit \x{100}-\x{7fffffff} + * added, and if there are enough replacement chars to start pairing + * with the \x{100},... search chars, then a larger (> 256) table + * is allocated. + * + * In addition, regardless of whether under /c, an extra slot at the + * end is used to store the final repeating char, or -3 under an empty + * replacement list, or -2 under /d; which makes the runtime code + * easier. + * + * The toker will have already expanded char ranges in t and r. + */ + + /* Initially allocate 257-slot table: 256 for basic (non /c) usage, + * plus final slot for repeat/-2/-3. Later we realloc if excess > * 0. + * The OPtrans_map struct already contains one slot; hence the -1. + */ + struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short); + tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1); + tbl->size = 256; cPVOPo->op_pv = (char*)tbl; + if (complement) { - for (i = 0; i < (I32)tlen; i++) - tbl[t[i]] = -1; + Size_t excess; + + /* in this branch, j is a count of 'consumed' (i.e. paired off + * with a search char) replacement chars (so j <= rlen always) + */ + for (i = 0; i < tlen; i++) + tbl->map[t[i]] = -1; + for (i = 0, j = 0; i < 256; i++) { - if (!tbl[i]) { - if (j >= (I32)rlen) { + if (!tbl->map[i]) { + if (j == rlen) { if (del) - tbl[i] = -2; + tbl->map[i] = -2; else if (rlen) - tbl[i] = r[j-1]; + tbl->map[i] = r[j-1]; else - tbl[i] = (short)i; + tbl->map[i] = (short)i; } else { - if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[i] = r[j++]; + tbl->map[i] = r[j++]; } + if ( tbl->map[i] >= 0 + && UVCHR_IS_INVARIANT((UV)i) + && !UVCHR_IS_INVARIANT((UV)(tbl->map[i])) + ) + grows = TRUE; } } - if (!del) { - if (!rlen) { - j = rlen; - if (!squash) - o->op_private |= OPpTRANS_IDENTICAL; - } - else if (j >= (I32)rlen) - j = rlen - 1; - else { - tbl = - (short *) - PerlMemShared_realloc(tbl, - (0x101+rlen-j) * sizeof(short)); - cPVOPo->op_pv = (char*)tbl; - } - tbl[0x100] = (short)(rlen - j); - for (i=0; i < (I32)rlen - j; i++) - tbl[0x101+i] = r[j+i]; - } + + ASSUME(j <= rlen); + excess = rlen - j; + + if (excess) { + /* More replacement chars than search chars: + * store excess replacement chars at end of main table. + */ + + struct_size += excess; + tbl = (OPtrans_map*)PerlMemShared_realloc(tbl, + struct_size + excess * sizeof(short)); + tbl->size += excess; + cPVOPo->op_pv = (char*)tbl; + + for (i = 0; i < excess; i++) + tbl->map[i + 256] = r[j+i]; + } + else { + /* no more replacement chars than search chars */ + if (!rlen && !del && !squash) + o->op_private |= OPpTRANS_IDENTICAL; + } + + tbl->map[tbl->size] = del ? -2 : rlen ? r[rlen - 1] : -3; } else { if (!rlen && !del) { @@ -6595,26 +6709,30 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) { o->op_private |= OPpTRANS_IDENTICAL; } + for (i = 0; i < 256; i++) - tbl[i] = -1; - for (i = 0, j = 0; i < (I32)tlen; i++,j++) { - if (j >= (I32)rlen) { + tbl->map[i] = -1; + for (i = 0, j = 0; i < tlen; i++,j++) { + if (j >= rlen) { if (del) { - if (tbl[t[i]] == -1) - tbl[t[i]] = -2; + if (tbl->map[t[i]] == -1) + tbl->map[t[i]] = -2; continue; } --j; } - if (tbl[t[i]] == -1) { + if (tbl->map[t[i]] == -1) { if ( UVCHR_IS_INVARIANT(t[i]) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[t[i]] = r[j]; + grows = TRUE; + tbl->map[t[i]] = r[j]; } } + tbl->map[tbl->size] = del ? -1 : rlen ? -1 : -3; } + /* both non-utf8 and utf8 code paths end up here */ + warnins: if(del && rlen == tlen) { Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator"); @@ -6630,6 +6748,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) return o; } + /* =for apidoc Am|OP *|newPMOP|I32 type|I32 flags @@ -6831,9 +6950,15 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) op_null(scope); } - if (is_compiletime) - /* runtime finalizes as part of finalizing whole tree */ - optimize_optree(o); + /* XXX optimize_optree() must be called on o before + * CALL_PEEP(), as currently S_maybe_multiconcat() can't + * currently cope with a peephole-optimised optree. + * Calling optimize_optree() here ensures that condition + * is met, but may mean optimize_optree() is applied + * to the same optree later (where hopefully it won't do any + * harm as it can't convert an op to multiconcat if it's + * already been converted */ + optimize_optree(o); /* have to peep the DOs individually as we've removed it from * the op_next chain */ @@ -7579,11 +7704,24 @@ S_assignment_type(pTHX_ const OP *o) if (!o) return TRUE; - if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) - o = cUNOPo->op_first; + if (o->op_type == OP_SREFGEN) + { + OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; + type = kid->op_type; + flags = o->op_flags | kid->op_flags; + if (!(flags & OPf_PARENS) + && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || + kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) + return ASSIGN_REF; + ret = ASSIGN_REF; + } else { + if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) + o = cUNOPo->op_first; + flags = o->op_flags; + type = o->op_type; + ret = 0; + } - flags = o->op_flags; - type = o->op_type; if (type == OP_COND_EXPR) { OP * const sib = OpSIBLING(cLOGOPo->op_first); const I32 t = assignment_type(sib); @@ -7596,19 +7734,6 @@ S_assignment_type(pTHX_ const OP *o) return FALSE; } - if (type == OP_SREFGEN) - { - OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; - type = kid->op_type; - flags |= kid->op_flags; - if (!(flags & OPf_PARENS) - && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || - kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) - return ASSIGN_REF; - ret = ASSIGN_REF; - } - else ret = 0; - if (type == OP_LIST && (flags & OPf_WANT) == OPf_WANT_SCALAR && o->op_private & OPpLVAL_INTRO) @@ -8135,9 +8260,8 @@ S_new_logop(pTHX_ I32 type, I32 flags, OP** firstp, OP** otherp) && o2->op_private & OPpLVAL_INTRO && !(o2->op_private & OPpPAD_STATE)) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), - "Deprecated use of my() in false conditional. " - "This will be a fatal error in Perl 5.30"); + Perl_croak(aTHX_ "This use of my() in false conditional is " + "no longer allowed"); } *otherp = NULL; @@ -8946,6 +9070,13 @@ S_looks_like_bool(pTHX_ const OP *o) case OP_FLOP: return TRUE; + + case OP_INDEX: + case OP_RINDEX: + /* optimised-away (index() != -1) or similar comparison */ + if (o->op_private & OPpTRUEBOOL) + return TRUE; + return FALSE; case OP_CONST: /* Detect comparisons that have been optimized away */ @@ -8955,7 +9086,6 @@ S_looks_like_bool(pTHX_ const OP *o) return TRUE; else return FALSE; - /* FALLTHROUGH */ default: return FALSE; @@ -9726,9 +9856,12 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, Also, we may be called from load_module at run time, so PL_curstash (which sets CvSTASH) may not point to the stash the sub is stored in. */ + /* XXX This optimization is currently disabled for packages other + than main, since there was too much CPAN breakage. */ const I32 flags = ec ? GV_NOADD_NOINIT : (IN_PERL_RUNTIME && PL_curstash != CopSTASH(PL_curcop)) + || PL_curstash != PL_defstash || memchr(name, ':', namlen) || memchr(name, '\'', namlen) ? gv_fetch_flags : GV_ADDMULTI | GV_NOINIT | GV_NOTQUAL; @@ -10440,7 +10573,7 @@ Perl_newXS_deffile(pTHX_ const char *name, XSUBADDR_t subaddr) } /* -=for apidoc m|CV *|newXS_len_flags|const char *name|STRLEN len|XSUBADDR_t subaddr|const char *const filename|NULLOK const char *const proto|NULLOK SV **const_svp|U32 flags +=for apidoc m|CV *|newXS_len_flags|const char *name|STRLEN len|XSUBADDR_t subaddr|const char *const filename|const char *const proto|SV **const_svp|U32 flags Construct an XS subroutine, also performing some surrounding jobs. @@ -10905,6 +11038,7 @@ Perl_ck_backtick(pTHX_ OP *o) OP *newop = NULL; OP *sibl; PERL_ARGS_ASSERT_CK_BACKTICK; + o = ck_fun(o); /* qx and `` have a null pushmark; CORE::readpipe has only one kid. */ if (o->op_flags & OPf_KIDS && (sibl = OpSIBLING(cUNOPo->op_first)) && (gv = gv_override("readpipe",8))) @@ -10930,12 +11064,6 @@ Perl_ck_bitop(pTHX_ OP *o) o->op_private = (U8)(PL_hints & HINT_INTEGER); - if (o->op_type == OP_NBIT_OR || o->op_type == OP_SBIT_OR - || o->op_type == OP_NBIT_XOR || o->op_type == OP_SBIT_XOR - || o->op_type == OP_NBIT_AND || o->op_type == OP_SBIT_AND - || o->op_type == OP_NCOMPLEMENT || o->op_type == OP_SCOMPLEMENT) - Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__BITWISE), - "The bitwise feature is experimental"); if (!(o->op_flags & OPf_STACKED) /* Not an assignment */ && OP_IS_INFIX_BIT(o->op_type)) { @@ -14578,7 +14706,7 @@ S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints) /* at this point we're looking for an OP_AELEM, OP_HELEM, * OP_EXISTS or OP_DELETE */ - /* if something like arybase (a.k.a $[ ) is in scope, + /* if a custom array/hash access checker is in scope, * abandon optimisation attempt */ if ( (o->op_type == OP_AELEM || o->op_type == OP_HELEM) && PL_check[o->op_type] != Perl_ck_null)