X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/d96402565ccd7459d3a8bc4074f177d00bbefeeb..f09dd912ca46cc4e5553da867bc148656110510c:/op.c diff --git a/op.c b/op.c index 368f9b1..146407b 100644 --- a/op.c +++ b/op.c @@ -1223,8 +1223,7 @@ S_cop_free(pTHX_ COP* cop) } STATIC void -S_forget_pmop(pTHX_ PMOP *const o - ) +S_forget_pmop(pTHX_ PMOP *const o) { HV * const pmstash = PmopSTASH(o); @@ -1545,7 +1544,8 @@ Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP* other) OpTYPE_set(logop, type); logop->op_first = first; logop->op_other = other; - logop->op_flags = OPf_KIDS; + if (first) + logop->op_flags = OPf_KIDS; while (kid && OpHAS_SIBLING(kid)) kid = OpSIBLING(kid); if (kid) @@ -1949,6 +1949,11 @@ Perl_scalarvoid(pTHX_ OP *arg) if (o->op_type == OP_REPEAT) scalar(cBINOPo->op_first); goto func_ops; + case OP_CONCAT: + if ((o->op_flags & OPf_STACKED) && + !(o->op_private & OPpCONCAT_NESTED)) + break; + goto func_ops; case OP_SUBSTR: if (o->op_private == 4) break; @@ -2489,7 +2494,7 @@ S_check_hash_fields_and_hekify(pTHX_ UNOP *rop, SVOP *key_op) /* info returned by S_sprintf_is_multiconcatable() */ struct sprintf_ismc_info { - UV nargs; /* num of args to sprintf (not including the format) */ + SSize_t nargs; /* num of args to sprintf (not including the format) */ char *start; /* start of raw format string */ char *end; /* bytes after end of raw format string */ STRLEN total_len; /* total length (in bytes) of format string, not @@ -2517,7 +2522,7 @@ S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info) OP *pm, *constop, *kid; SV *sv; char *s, *e, *p; - UV nargs, nformats; + SSize_t nargs, nformats; STRLEN cur, total_len, variant; bool utf8; @@ -2660,8 +2665,9 @@ S_maybe_multiconcat(pTHX_ OP *o) STRLEN len; /* ... len set to SvPV(..., len) */ } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1]; - UV nargs = 0; - UV nconst = 0; + SSize_t nargs = 0; + SSize_t nconst = 0; + SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */ STRLEN variant; bool utf8 = FALSE; bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op; @@ -2673,6 +2679,7 @@ S_maybe_multiconcat(pTHX_ OP *o) U8 private_flags = 0; /* ... op_private of the multiconcat op */ bool is_sprintf = FALSE; /* we're optimising an sprintf */ bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */ + bool prev_was_const = FALSE; /* previous arg was a const */ /* ----------------------------------------------------------------- * Phase 1: @@ -2694,6 +2701,8 @@ S_maybe_multiconcat(pTHX_ OP *o) || o->op_type == OP_SPRINTF || o->op_type == OP_STRINGIFY); + Zero(&sprintf_info, 1, struct sprintf_ismc_info); + /* first see if, at the top of the tree, there is an assign, * append and/or stringify */ @@ -2713,7 +2722,8 @@ S_maybe_multiconcat(pTHX_ OP *o) } else if ( topop->op_type == OP_CONCAT && (topop->op_flags & OPf_STACKED) - && (cUNOPo->op_first->op_flags & OPf_MOD)) + && (!(topop->op_private & OPpCONCAT_NESTED)) + ) { /* expr .= ..... */ @@ -2885,7 +2895,7 @@ S_maybe_multiconcat(pTHX_ OP *o) last = TRUE; } - if ( nargs > PERL_MULTICONCAT_MAXARG - 2 + if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2 || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2) { /* At least two spare slots are needed to decompose both @@ -2916,10 +2926,16 @@ S_maybe_multiconcat(pTHX_ OP *o) argp++->p = sv; utf8 |= cBOOL(SvUTF8(sv)); nconst++; + if (prev_was_const) + /* this const may be demoted back to a plain arg later; + * make sure we have enough arg slots left */ + nadjconst++; + prev_was_const = !prev_was_const; } else { argp++->p = NULL; nargs++; + prev_was_const = FALSE; } if (last) @@ -2931,6 +2947,33 @@ S_maybe_multiconcat(pTHX_ OP *o) if (stacked_last) return; /* we don't support ((A.=B).=C)...) */ + /* look for two adjacent consts and don't fold them together: + * $o . "a" . "b" + * should do + * $o->concat("a")->concat("b") + * rather than + * $o->concat("ab") + * (but $o .= "a" . "b" should still fold) + */ + { + bool seen_nonconst = FALSE; + for (argp = toparg; argp >= args; argp--) { + if (argp->p == NULL) { + seen_nonconst = TRUE; + continue; + } + if (!seen_nonconst) + continue; + if (argp[1].p) { + /* both previous and current arg were constants; + * leave the current OP_CONST as-is */ + argp->p = NULL; + nconst--; + nargs++; + } + } + } + /* ----------------------------------------------------------------- * Phase 2: * @@ -3043,7 +3086,7 @@ S_maybe_multiconcat(pTHX_ OP *o) + ((nargs + 1) * (variant ? 2 : 1)) ) ); - const_str = (char *)PerlMemShared_malloc(total_len); + const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1); /* Extract all the non-const expressions from the concat tree then * dispose of the old tree, e.g. convert the tree from this: @@ -3095,14 +3138,14 @@ S_maybe_multiconcat(pTHX_ OP *o) if (*p == '%') { p++; if (*p != '%') { - (lenp++)->uv = q - oldq; + (lenp++)->ssize = q - oldq; oldq = q; continue; } } *q++ = *p; } - lenp->uv = q - oldq; + lenp->ssize = q - oldq; assert((STRLEN)(q - const_str) == total_len); /* Attach all the args (i.e. the kids of the sprintf) to o (which @@ -3123,7 +3166,7 @@ S_maybe_multiconcat(pTHX_ OP *o) p = const_str; lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; - lenp->size = -1; + lenp->ssize = -1; /* Concatenate all const strings into const_str. * Note that args[] contains the RHS args in reverse order, so @@ -3133,15 +3176,15 @@ S_maybe_multiconcat(pTHX_ OP *o) for (argp = toparg; argp >= args; argp--) { if (!argp->p) /* not a const op */ - (++lenp)->size = -1; + (++lenp)->ssize = -1; else { STRLEN l = argp->len; Copy(argp->p, p, l, char); p += l; - if (lenp->size == -1) - lenp->size = l; + if (lenp->ssize == -1) + lenp->ssize = l; else - lenp->size += l; + lenp->ssize += l; } } @@ -3159,16 +3202,16 @@ S_maybe_multiconcat(pTHX_ OP *o) OP *prev; /* set prev to the sibling *before* the arg to be cut out, - * e.g.: + * e.g. when cutting EXPR: * * | - * kid= CONST + * kid= CONCAT * | - * prev= CONST -- EXPR + * prev= CONCAT -- EXPR * | */ if (argp == args && kid->op_type != OP_CONCAT) { - /* in e.g. '$x . = f(1)' there's no RHS concat tree + /* in e.g. '$x .= f(1)' there's no RHS concat tree * so the expression to be cut isn't kid->op_last but * kid itself */ OP *o1, *o2; @@ -3215,11 +3258,11 @@ S_maybe_multiconcat(pTHX_ OP *o) /* Populate the aux struct */ - aux[PERL_MULTICONCAT_IX_NARGS].uv = nargs; + aux[PERL_MULTICONCAT_IX_NARGS].ssize = nargs; aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str; - aux[PERL_MULTICONCAT_IX_PLAIN_LEN].size = utf8 ? 0 : total_len; + aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ? 0 : total_len; aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str; - aux[PERL_MULTICONCAT_IX_UTF8_LEN].size = total_len; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = total_len; /* if variant > 0, calculate a variant const string and lengths where * the utf8 version of the string will take 'variant' more bytes than @@ -3231,19 +3274,19 @@ S_maybe_multiconcat(pTHX_ OP *o) UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS; UNOP_AUX_item *ulens = lens + (nargs + 1); char *up = (char*)PerlMemShared_malloc(ulen); - UV n; + SSize_t n; aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up; - aux[PERL_MULTICONCAT_IX_UTF8_LEN].size = ulen; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen; for (n = 0; n < (nargs + 1); n++) { SSize_t i; char * orig_up = up; - for (i = (lens++)->size; i > 0; i--) { + for (i = (lens++)->ssize; i > 0; i--) { U8 c = *p++; append_utf8_from_native_byte(c, (U8**)&up); } - (ulens++)->size = (i < 0) ? i : up - orig_up; + (ulens++)->ssize = (i < 0) ? i : up - orig_up; } } @@ -4036,7 +4079,10 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_RV2HV: if (type == OP_REFGEN && o->op_flags & OPf_PARENS) { PL_modcount = RETURN_UNLIMITED_NUMBER; - return o; /* Treat \(@foo) like ordinary list. */ + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; } /* FALLTHROUGH */ case OP_RV2GV: @@ -4101,7 +4147,12 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_PADHV: PL_modcount = RETURN_UNLIMITED_NUMBER; if (type == OP_REFGEN && o->op_flags & OPf_PARENS) - return o; /* Treat \(@foo) like ordinary list. */ + { + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; + } if (scalar_mod_type(o, type)) goto nomod; if ((o->op_flags & OPf_WANT) != OPf_WANT_SCALAR @@ -6249,6 +6300,10 @@ Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last) return fold_constants(op_integerize(op_std_init((OP *)binop))); } +/* Helper function for S_pmtrans(): comparison function to sort an array + * of codepoint range pairs. Sorts by start point, or if equal, by end + * point */ + static int uvcompare(const void *a, const void *b) __attribute__nonnull__(1) __attribute__nonnull__(2) @@ -6266,24 +6321,39 @@ static int uvcompare(const void *a, const void *b) return 0; } +/* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl + * containing the search and replacement strings, assemble into + * a translation table attached as o->op_pv. + * Free expr and repl. + * It expects the toker to have already set the + * OPpTRANS_COMPLEMENT + * OPpTRANS_SQUASH + * OPpTRANS_DELETE + * flags as appropriate; this function may add + * OPpTRANS_FROM_UTF + * OPpTRANS_TO_UTF + * OPpTRANS_IDENTICAL + * OPpTRANS_GROWS + * flags + */ + static OP * S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) { SV * const tstr = ((SVOP*)expr)->op_sv; - SV * const rstr = - ((SVOP*)repl)->op_sv; + SV * const rstr = ((SVOP*)repl)->op_sv; STRLEN tlen; STRLEN rlen; const U8 *t = (U8*)SvPV_const(tstr, tlen); const U8 *r = (U8*)SvPV_const(rstr, rlen); - I32 i; - I32 j; - I32 grows = 0; - short *tbl; - - const I32 complement = o->op_private & OPpTRANS_COMPLEMENT; - const I32 squash = o->op_private & OPpTRANS_SQUASH; - I32 del = o->op_private & OPpTRANS_DELETE; + Size_t i, j; + bool grows = FALSE; + OPtrans_map *tbl; + SSize_t struct_size; /* malloced size of table struct */ + + const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT); + const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH); + const bool del = cBOOL(o->op_private & OPpTRANS_DELETE); SV* swash; PERL_ARGS_ASSERT_PMTRANS; @@ -6297,6 +6367,14 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) o->op_private |= OPpTRANS_TO_UTF; if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) { + + /* for utf8 translations, op_sv will be set to point to a swash + * containing codepoint ranges. This is done by first assembling + * a textual representation of the ranges in listsv then compiling + * it using swash_init(). For more details of the textual format, + * see L . + */ + SV* const listsv = newSVpvs("# comment\n"); SV* transv = NULL; const U8* tend = t + tlen; @@ -6338,15 +6416,24 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) * odd. */ if (complement) { + /* utf8 and /c: + * replace t/tlen/tend with a version that has the ranges + * complemented + */ U8 tmpbuf[UTF8_MAXBYTES+1]; UV *cp; UV nextmin = 0; Newx(cp, 2*tlen, UV); i = 0; transv = newSVpvs(""); + + /* convert search string into array of (start,end) range + * codepoint pairs stored in cp[]. Most "ranges" will start + * and end at the same char */ while (t < tend) { cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags); t += ulen; + /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */ if (t < tend && *t == ILLEGAL_UTF8_BYTE) { t++; cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags); @@ -6357,7 +6444,19 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } i++; } + + /* sort the ranges */ qsort(cp, i, 2*sizeof(UV), uvcompare); + + /* Create a utf8 string containing the complement of the + * codepoint ranges. For example if cp[] contains [A,B], [C,D], + * then transv will contain the equivalent of: + * join '', map chr, 0, ILLEGAL_UTF8_BYTE, A - 1, + * B + 1, ILLEGAL_UTF8_BYTE, C - 1, + * D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff; + * A range of a single char skips the ILLEGAL_UTF8_BYTE and + * end cp. + */ for (j = 0; j < i; j++) { UV val = cp[2*j]; diff = val - nextmin; @@ -6375,6 +6474,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) if (val >= nextmin) nextmin = val + 1; } + t = uvchr_to_utf8(tmpbuf,nextmin); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); { @@ -6391,6 +6491,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!rlen && !del) { r = t; rlen = tlen; rend = tend; } + if (!squash) { if ((!rlen && !del) || t == r || (tlen == rlen && memEQ((char *)t, (char *)r, tlen))) @@ -6399,6 +6500,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } } + /* extract char ranges from t and r and append them to listsv */ + while (t < tend || tfirst <= tlast) { /* see if we need more "t" chars */ if (tfirst > tlast) { @@ -6471,9 +6574,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) tfirst += diff + 1; } + /* compile listsv into a swash and attach to o */ + none = ++max; if (del) - del = ++max; + ++max; if (max > 0xffff) bits = 32; @@ -6512,50 +6617,88 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) goto warnins; } - tbl = (short*)PerlMemShared_calloc( - (o->op_private & OPpTRANS_COMPLEMENT) && - !(o->op_private & OPpTRANS_DELETE) ? 258 : 256, - sizeof(short)); + /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup + * table. Entries with the value -1 indicate chars not to be + * translated, while -2 indicates a search char without a + * corresponding replacement char under /d. + * + * Normally, the table has 256 slots. However, in the presence of + * /c, the search charlist has an implicit \x{100}-\x{7fffffff} + * added, and if there are enough replacement chars to start pairing + * with the \x{100},... search chars, then a larger (> 256) table + * is allocated. + * + * In addition, regardless of whether under /c, an extra slot at the + * end is used to store the final repeating char, or -3 under an empty + * replacement list, or -2 under /d; which makes the runtime code + * easier. + * + * The toker will have already expanded char ranges in t and r. + */ + + /* Initially allocate 257-slot table: 256 for basic (non /c) usage, + * plus final slot for repeat/-2/-3. Later we realloc if excess > * 0. + * The OPtrans_map struct already contains one slot; hence the -1. + */ + struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short); + tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1); + tbl->size = 256; cPVOPo->op_pv = (char*)tbl; + if (complement) { - for (i = 0; i < (I32)tlen; i++) - tbl[t[i]] = -1; + Size_t excess; + + /* in this branch, j is a count of 'consumed' (i.e. paired off + * with a search char) replacement chars (so j <= rlen always) + */ + for (i = 0; i < tlen; i++) + tbl->map[t[i]] = -1; + for (i = 0, j = 0; i < 256; i++) { - if (!tbl[i]) { - if (j >= (I32)rlen) { + if (!tbl->map[i]) { + if (j == rlen) { if (del) - tbl[i] = -2; + tbl->map[i] = -2; else if (rlen) - tbl[i] = r[j-1]; + tbl->map[i] = r[j-1]; else - tbl[i] = (short)i; + tbl->map[i] = (short)i; } else { - if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[i] = r[j++]; + tbl->map[i] = r[j++]; } + if ( tbl->map[i] >= 0 + && UVCHR_IS_INVARIANT((UV)i) + && !UVCHR_IS_INVARIANT((UV)(tbl->map[i])) + ) + grows = TRUE; } } - if (!del) { - if (!rlen) { - j = rlen; - if (!squash) - o->op_private |= OPpTRANS_IDENTICAL; - } - else if (j >= (I32)rlen) - j = rlen - 1; - else { - tbl = - (short *) - PerlMemShared_realloc(tbl, - (0x101+rlen-j) * sizeof(short)); - cPVOPo->op_pv = (char*)tbl; - } - tbl[0x100] = (short)(rlen - j); - for (i=0; i < (I32)rlen - j; i++) - tbl[0x101+i] = r[j+i]; - } + + ASSUME(j <= rlen); + excess = rlen - j; + + if (excess) { + /* More replacement chars than search chars: + * store excess replacement chars at end of main table. + */ + + struct_size += excess; + tbl = (OPtrans_map*)PerlMemShared_realloc(tbl, + struct_size + excess * sizeof(short)); + tbl->size += excess; + cPVOPo->op_pv = (char*)tbl; + + for (i = 0; i < excess; i++) + tbl->map[i + 256] = r[j+i]; + } + else { + /* no more replacement chars than search chars */ + if (!rlen && !del && !squash) + o->op_private |= OPpTRANS_IDENTICAL; + } + + tbl->map[tbl->size] = del ? -2 : rlen ? r[rlen - 1] : -3; } else { if (!rlen && !del) { @@ -6566,26 +6709,30 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) { o->op_private |= OPpTRANS_IDENTICAL; } + for (i = 0; i < 256; i++) - tbl[i] = -1; - for (i = 0, j = 0; i < (I32)tlen; i++,j++) { - if (j >= (I32)rlen) { + tbl->map[i] = -1; + for (i = 0, j = 0; i < tlen; i++,j++) { + if (j >= rlen) { if (del) { - if (tbl[t[i]] == -1) - tbl[t[i]] = -2; + if (tbl->map[t[i]] == -1) + tbl->map[t[i]] = -2; continue; } --j; } - if (tbl[t[i]] == -1) { + if (tbl->map[t[i]] == -1) { if ( UVCHR_IS_INVARIANT(t[i]) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[t[i]] = r[j]; + grows = TRUE; + tbl->map[t[i]] = r[j]; } } + tbl->map[tbl->size] = del ? -1 : rlen ? -1 : -3; } + /* both non-utf8 and utf8 code paths end up here */ + warnins: if(del && rlen == tlen) { Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator"); @@ -6601,6 +6748,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) return o; } + /* =for apidoc Am|OP *|newPMOP|I32 type|I32 flags @@ -6802,9 +6950,15 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) op_null(scope); } - if (is_compiletime) - /* runtime finalizes as part of finalizing whole tree */ - optimize_optree(o); + /* XXX optimize_optree() must be called on o before + * CALL_PEEP(), as currently S_maybe_multiconcat() can't + * currently cope with a peephole-optimised optree. + * Calling optimize_optree() here ensures that condition + * is met, but may mean optimize_optree() is applied + * to the same optree later (where hopefully it won't do any + * harm as it can't convert an op to multiconcat if it's + * already been converted */ + optimize_optree(o); /* have to peep the DOs individually as we've removed it from * the op_next chain */ @@ -7193,9 +7347,10 @@ Perl_newGVOP(pTHX_ I32 type, I32 flags, GV *gv) Constructs, checks, and returns an op of any type that involves an embedded C-level pointer (PV). C is the opcode. C gives -the eight bits of C. C supplies the C-level pointer, which -must have been allocated using C; the memory will -be freed when the op is destroyed. +the eight bits of C. C supplies the C-level pointer. +Depending on the op type, the memory referenced by C may be freed +when the op is destroyed. If the op is of a freeing type, C must +have been allocated using C. =cut */ @@ -7549,11 +7704,24 @@ S_assignment_type(pTHX_ const OP *o) if (!o) return TRUE; - if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) - o = cUNOPo->op_first; + if (o->op_type == OP_SREFGEN) + { + OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; + type = kid->op_type; + flags = o->op_flags | kid->op_flags; + if (!(flags & OPf_PARENS) + && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || + kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) + return ASSIGN_REF; + ret = ASSIGN_REF; + } else { + if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) + o = cUNOPo->op_first; + flags = o->op_flags; + type = o->op_type; + ret = 0; + } - flags = o->op_flags; - type = o->op_type; if (type == OP_COND_EXPR) { OP * const sib = OpSIBLING(cLOGOPo->op_first); const I32 t = assignment_type(sib); @@ -7566,19 +7734,6 @@ S_assignment_type(pTHX_ const OP *o) return FALSE; } - if (type == OP_SREFGEN) - { - OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; - type = kid->op_type; - flags |= kid->op_flags; - if (!(flags & OPf_PARENS) - && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || - kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) - return ASSIGN_REF; - ret = ASSIGN_REF; - } - else ret = 0; - if (type == OP_LIST && (flags & OPf_WANT) == OPf_WANT_SCALAR && o->op_private & OPpLVAL_INTRO) @@ -8105,9 +8260,8 @@ S_new_logop(pTHX_ I32 type, I32 flags, OP** firstp, OP** otherp) && o2->op_private & OPpLVAL_INTRO && !(o2->op_private & OPpPAD_STATE)) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), - "Deprecated use of my() in false conditional. " - "This will be a fatal error in Perl 5.30"); + Perl_croak(aTHX_ "This use of my() in false conditional is " + "no longer allowed"); } *otherp = NULL; @@ -8916,6 +9070,13 @@ S_looks_like_bool(pTHX_ const OP *o) case OP_FLOP: return TRUE; + + case OP_INDEX: + case OP_RINDEX: + /* optimised-away (index() != -1) or similar comparison */ + if (o->op_private & OPpTRUEBOOL) + return TRUE; + return FALSE; case OP_CONST: /* Detect comparisons that have been optimized away */ @@ -8925,7 +9086,6 @@ S_looks_like_bool(pTHX_ const OP *o) return TRUE; else return FALSE; - /* FALLTHROUGH */ default: return FALSE; @@ -8936,8 +9096,8 @@ S_looks_like_bool(pTHX_ const OP *o) =for apidoc Am|OP *|newGIVENOP|OP *cond|OP *block|PADOFFSET defsv_off Constructs, checks, and returns an op tree expressing a C block. -C supplies the expression that will be locally assigned to a lexical -variable, and C supplies the body of the C construct; they +C supplies the expression to whose value C<$_> will be locally +aliased, and C supplies the body of the C construct; they are consumed by this function and become part of the constructed op tree. C must be zero (it used to identity the pad slot of lexical $_). @@ -9242,6 +9402,8 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) PERL_ARGS_ASSERT_NEWMYSUB; + PL_hints |= HINT_BLOCK_SCOPE; + /* Find the pad slot for storing the new sub. We cannot use PL_comppad, as it is the pad owned by the new sub. We need to look in CvOUTSIDE and find the pad belonging to the enclos- @@ -9570,6 +9732,85 @@ Perl_newMYSUB(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, OP *block) return cv; } +/* +=for apidoc m|CV *|newATTRSUB_x|I32 floor|OP *o|OP *proto|OP *attrs|OP *block|bool o_is_gv + +Construct a Perl subroutine, also performing some surrounding jobs. + +This function is expected to be called in a Perl compilation context, +and some aspects of the subroutine are taken from global variables +associated with compilation. In particular, C represents +the subroutine that is currently being compiled. It must be non-null +when this function is called, and some aspects of the subroutine being +constructed are taken from it. The constructed subroutine may actually +be a reuse of the C object, but will not necessarily be so. + +If C is null then the subroutine will have no body, and for the +time being it will be an error to call it. This represents a forward +subroutine declaration such as S>. If C is +non-null then it provides the Perl code of the subroutine body, which +will be executed when the subroutine is called. This body includes +any argument unwrapping code resulting from a subroutine signature or +similar. The pad use of the code must correspond to the pad attached +to C. The code is not expected to include a C or +C op; this function will add such an op. C is consumed +by this function and will become part of the constructed subroutine. + +C specifies the subroutine's prototype, unless one is supplied +as an attribute (see below). If C is null, then the subroutine +will not have a prototype. If C is non-null, it must point to a +C op whose value is a string, and the subroutine will have that +string as its prototype. If a prototype is supplied as an attribute, the +attribute takes precedence over C, but in that case C should +preferably be null. In any case, C is consumed by this function. + +C supplies attributes to be applied the subroutine. A handful of +attributes take effect by built-in means, being applied to C +immediately when seen. Other attributes are collected up and attached +to the subroutine by this route. C may be null to supply no +attributes, or point to a C op for a single attribute, or point +to a C op whose children apart from the C are C +ops for one or more attributes. Each C op must be a string, +giving the attribute name optionally followed by parenthesised arguments, +in the manner in which attributes appear in Perl source. The attributes +will be applied to the sub by this function. C is consumed by +this function. + +If C is false and C is null, then the subroutine will +be anonymous. If C is false and C is non-null, then C +must point to a C op, which will be consumed by this function, +and its string value supplies a name for the subroutine. The name may +be qualified or unqualified, and if it is unqualified then a default +stash will be selected in some manner. If C is true, then C +doesn't point to an C at all, but is instead a cast pointer to a C +by which the subroutine will be named. + +If there is already a subroutine of the specified name, then the new +sub will either replace the existing one in the glob or be merged with +the existing one. A warning may be generated about redefinition. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +In the case of C, the subroutine will be executed and the reference +to it disposed of before this function returns. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by the +phase's automatic run queue. But a C subroutine, having already +been executed, will quite likely have been destroyed already by the +time this function returns, making it erroneous for the caller to make +any use of the returned pointer. It is the caller's responsibility to +ensure that it knows which of these situations applies. + +=cut +*/ /* _x = extended */ CV * @@ -9615,9 +9856,12 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, Also, we may be called from load_module at run time, so PL_curstash (which sets CvSTASH) may not point to the stash the sub is stored in. */ + /* XXX This optimization is currently disabled for packages other + than main, since there was too much CPAN breakage. */ const I32 flags = ec ? GV_NOADD_NOINIT : (IN_PERL_RUNTIME && PL_curstash != CopSTASH(PL_curcop)) + || PL_curstash != PL_defstash || memchr(name, ':', namlen) || memchr(name, '\'', namlen) ? gv_fetch_flags : GV_ADDMULTI | GV_NOINIT | GV_NOTQUAL; @@ -9836,6 +10080,8 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, NULL, name, namlen, name_is_utf8 ? SVf_UTF8 : 0, const_sv ); + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvFLAGS(cv) |= CvMETHOD(PL_compcv); } else { @@ -9938,6 +10184,8 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, mro_method_changed_in(PL_curstash); } } + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); if (!CvHASGV(cv)) { if (isGV(gv)) @@ -10026,12 +10274,15 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, process_special_blocks(floor, name, gv, cv); } } + assert(cv); done: + assert(!cv || evanescent || SvREFCNT((SV*)cv) != 0); if (PL_parser) PL_parser->copline = NOLINE; LEAVE_SCOPE(floor); + assert(!cv || evanescent || SvREFCNT((SV*)cv) != 0); if (!evanescent) { #ifdef PERL_DEBUG_READONLY_OPS if (slab) @@ -10146,9 +10397,11 @@ S_process_special_blocks(pTHX_ I32 floor, const char *const fullname, } /* -=for apidoc newCONSTSUB +=for apidoc Am|CV *|newCONSTSUB|HV *stash|const char *name|SV *sv -See L. +Behaves like L, except that C is nul-terminated +rather than of counted length, and no flags are set. (This means that +C is always interpreted as Latin-1.) =cut */ @@ -10160,20 +10413,71 @@ Perl_newCONSTSUB(pTHX_ HV *stash, const char *name, SV *sv) } /* -=for apidoc newCONSTSUB_flags - -Creates a constant sub equivalent to Perl S> which is -eligible for inlining at compile-time. - -Currently, the only useful value for C is C. - -The newly created subroutine takes ownership of a reference to the passed in -SV. - -Passing C for SV creates a constant sub equivalent to S>, -which won't be called if used as a destructor, but will suppress the overhead -of a call to C. (This form, however, isn't eligible for inlining at -compile time.) +=for apidoc Am|CV *|newCONSTSUB_flags|HV *stash|const char *name|STRLEN len|U32 flags|SV *sv + +Construct a constant subroutine, also performing some surrounding +jobs. A scalar constant-valued subroutine is eligible for inlining +at compile-time, and in Perl code can be created by S>. Other kinds of constant subroutine have other treatment. + +The subroutine will have an empty prototype and will ignore any arguments +when called. Its constant behaviour is determined by C. If C +is null, the subroutine will yield an empty list. If C points to a +scalar, the subroutine will always yield that scalar. If C points +to an array, the subroutine will always yield a list of the elements of +that array in list context, or the number of elements in the array in +scalar context. This function takes ownership of one counted reference +to the scalar or array, and will arrange for the object to live as long +as the subroutine does. If C points to a scalar then the inlining +assumes that the value of the scalar will never change, so the caller +must ensure that the scalar is not subsequently written to. If C +points to an array then no such assumption is made, so it is ostensibly +safe to mutate the array or its elements, but whether this is really +supported has not been determined. + +The subroutine will have C set according to C. +Other aspects of the subroutine will be left in their default state. +The caller is free to mutate the subroutine beyond its initial state +after this function has returned. + +If C is null then the subroutine will be anonymous, with its +C referring to an C<__ANON__> glob. If C is non-null then the +subroutine will be named accordingly, referenced by the appropriate glob. +C is a string of length C bytes giving a sigilless symbol +name, in UTF-8 if C has the C bit set and in Latin-1 +otherwise. The name may be either qualified or unqualified. If the +name is unqualified then it defaults to being in the stash specified by +C if that is non-null, or to C if C is null. +The symbol is always added to the stash if necessary, with C +semantics. + +C should not have bits set other than C. + +If there is already a subroutine of the specified name, then the new sub +will replace the existing one in the glob. A warning may be generated +about the redefinition. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +Execution of the subroutine will likely be a no-op, unless C was +a tied array or the caller modified the subroutine in some interesting +way before it was executed. In the case of C, the treatment is +buggy: the sub will be executed when only half built, and may be deleted +prematurely, possibly causing a crash. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by +the phase's automatic run queue. A C subroutine may have been +destroyed already by the time this function returns, but currently bugs +occur in that case before the caller gets control. It is the caller's +responsibility to ensure that it knows which of these situations applies. =cut */ @@ -10220,6 +10524,8 @@ Perl_newCONSTSUB_flags(pTHX_ HV *stash, const char *name, STRLEN len, : const_sv_xsub, file ? file : "", "", &sv, XS_DYNAMIC_FILENAME | flags); + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvXSUBANY(cv).any_ptr = SvREFCNT_inc_simple(sv); CvCONST_on(cv); @@ -10266,6 +10572,78 @@ Perl_newXS_deffile(pTHX_ const char *name, XSUBADDR_t subaddr) ); } +/* +=for apidoc m|CV *|newXS_len_flags|const char *name|STRLEN len|XSUBADDR_t subaddr|const char *const filename|const char *const proto|SV **const_svp|U32 flags + +Construct an XS subroutine, also performing some surrounding jobs. + +The subroutine will have the entry point C. It will have +the prototype specified by the nul-terminated string C, or +no prototype if C is null. The prototype string is copied; +the caller can mutate the supplied string afterwards. If C +is non-null, it must be a nul-terminated filename, and the subroutine +will have its C set accordingly. By default C is set to +point directly to the supplied string, which must be static. If C +has the C bit set, then a copy of the string will +be taken instead. + +Other aspects of the subroutine will be left in their default state. +If anything else needs to be done to the subroutine for it to function +correctly, it is the caller's responsibility to do that after this +function has constructed it. However, beware of the subroutine +potentially being destroyed before this function returns, as described +below. + +If C is null then the subroutine will be anonymous, with its +C referring to an C<__ANON__> glob. If C is non-null then the +subroutine will be named accordingly, referenced by the appropriate glob. +C is a string of length C bytes giving a sigilless symbol name, +in UTF-8 if C has the C bit set and in Latin-1 otherwise. +The name may be either qualified or unqualified, with the stash defaulting +in the same manner as for C. C may contain +flag bits understood by C with the same meaning as +they have there, such as C. The symbol is always added to +the stash if necessary, with C semantics. + +If there is already a subroutine of the specified name, then the new sub +will replace the existing one in the glob. A warning may be generated +about the redefinition. If the old subroutine was C then the +decision about whether to warn is influenced by an expectation about +whether the new subroutine will become a constant of similar value. +That expectation is determined by C. (Note that the call to +this function doesn't make the new subroutine C in any case; +that is left to the caller.) If C is null then it indicates +that the new subroutine will not become a constant. If C +is non-null then it indicates that the new subroutine will become a +constant, and it points to an C that provides the constant value +that the subroutine will have. + +If the subroutine has one of a few special names, such as C or +C, then it will be claimed by the appropriate queue for automatic +running of phase-related subroutines. In this case the relevant glob will +be left not containing any subroutine, even if it did contain one before. +In the case of C, the subroutine will be executed and the reference +to it disposed of before this function returns, and also before its +prototype is set. If a C subroutine would not be sufficiently +constructed by this function to be ready for execution then the caller +must prevent this happening by giving the subroutine a different name. + +The function returns a pointer to the constructed subroutine. If the sub +is anonymous then ownership of one counted reference to the subroutine +is transferred to the caller. If the sub is named then the caller does +not get ownership of a reference. In most such cases, where the sub +has a non-phase name, the sub will be alive at the point it is returned +by virtue of being contained in the glob that names it. A phase-named +subroutine will usually be alive by virtue of the reference owned by the +phase's automatic run queue. But a C subroutine, having already +been executed, will quite likely have been destroyed already by the +time this function returns, making it erroneous for the caller to make +any use of the returned pointer. It is the caller's responsibility to +ensure that it knows which of these situations applies. + +=cut +*/ + CV * Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, XSUBADDR_t subaddr, const char *const filename, @@ -10274,6 +10652,7 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, { CV *cv; bool interleave = FALSE; + bool evanescent = FALSE; PERL_ARGS_ASSERT_NEWXS_LEN_FLAGS; @@ -10318,6 +10697,8 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, gv_method_changed(gv); /* newXS */ } } + assert(cv); + assert(SvREFCNT((SV*)cv) != 0); CvGV_set(cv, gv); if(filename) { @@ -10345,14 +10726,17 @@ Perl_newXS_len_flags(pTHX_ const char *name, STRLEN len, #endif if (name) - process_special_blocks(0, name, gv, cv); + evanescent = process_special_blocks(0, name, gv, cv); else CvANON_on(cv); } /* <- not a conditional branch */ + assert(cv); + assert(evanescent || SvREFCNT((SV*)cv) != 0); - sv_setpv(MUTABLE_SV(cv), proto); + if (!evanescent) sv_setpv(MUTABLE_SV(cv), proto); if (interleave) LEAVE; + assert(evanescent || SvREFCNT((SV*)cv) != 0); return cv; } @@ -10654,6 +11038,7 @@ Perl_ck_backtick(pTHX_ OP *o) OP *newop = NULL; OP *sibl; PERL_ARGS_ASSERT_CK_BACKTICK; + o = ck_fun(o); /* qx and `` have a null pushmark; CORE::readpipe has only one kid. */ if (o->op_flags & OPf_KIDS && (sibl = OpSIBLING(cUNOPo->op_first)) && (gv = gv_override("readpipe",8))) @@ -10679,12 +11064,6 @@ Perl_ck_bitop(pTHX_ OP *o) o->op_private = (U8)(PL_hints & HINT_INTEGER); - if (o->op_type == OP_NBIT_OR || o->op_type == OP_SBIT_OR - || o->op_type == OP_NBIT_XOR || o->op_type == OP_SBIT_XOR - || o->op_type == OP_NBIT_AND || o->op_type == OP_SBIT_AND - || o->op_type == OP_NCOMPLEMENT || o->op_type == OP_SCOMPLEMENT) - Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__BITWISE), - "The bitwise feature is experimental"); if (!(o->op_flags & OPf_STACKED) /* Not an assignment */ && OP_IS_INFIX_BIT(o->op_type)) { @@ -10847,7 +11226,10 @@ Perl_ck_concat(pTHX_ OP *o) /* reuse the padtmp returned by the concat child */ if (kid->op_type == OP_CONCAT && !(kid->op_private & OPpTARGET_MY) && !(kUNOP->op_first->op_flags & OPf_MOD)) + { o->op_flags |= OPf_STACKED; + o->op_private |= OPpCONCAT_NESTED; + } return o; } @@ -12182,8 +12564,6 @@ Perl_ck_sort(pTHX_ OP *o) SV ** const svp = hv_fetchs(hinthv, "sort", FALSE); if (svp) { const I32 sorthints = (I32)SvIV(*svp); - if ((sorthints & HINT_SORT_QUICKSORT) != 0) - o->op_private |= OPpSORT_QSORT; if ((sorthints & HINT_SORT_STABLE) != 0) o->op_private |= OPpSORT_STABLE; if ((sorthints & HINT_SORT_UNSTABLE) != 0) @@ -13382,7 +13762,10 @@ Perl_ck_substr(pTHX_ OP *o) if (kid->op_type == OP_NULL) kid = OpSIBLING(kid); if (kid) - kid->op_flags |= OPf_MOD; + /* Historically, substr(delete $foo{bar},...) has been allowed + with 4-arg substr. Keep it working by applying entersub + lvalue context. */ + op_lvalue(kid, OP_ENTERSUB); } return o; @@ -14323,7 +14706,7 @@ S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints) /* at this point we're looking for an OP_AELEM, OP_HELEM, * OP_EXISTS or OP_DELETE */ - /* if something like arybase (a.k.a $[ ) is in scope, + /* if a custom array/hash access checker is in scope, * abandon optimisation attempt */ if ( (o->op_type == OP_AELEM || o->op_type == OP_HELEM) && PL_check[o->op_type] != Perl_ck_null) @@ -15544,7 +15927,7 @@ Perl_rpeep(pTHX_ OP *o) o->op_flags &= ~(OPf_REF|OPf_WANT); o->op_flags |= want; o->op_private |= (o->op_type == OP_PADHV ? - OPpRV2HV_ISKEYS : OPpRV2HV_ISKEYS); + OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS); /* for keys(%lex), hold onto the OP_KEYS's targ * since padhv doesn't have its own targ to return * an int with */