X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/7896dde7482a2851e73f0ac2c32d1c71f6e97dca..7bf15fe2240f4090d85a676abda2b04456b58caa:/op.c?ds=sidebyside diff --git a/op.c b/op.c index b59433c..146407b 100644 --- a/op.c +++ b/op.c @@ -1223,8 +1223,7 @@ S_cop_free(pTHX_ COP* cop) } STATIC void -S_forget_pmop(pTHX_ PMOP *const o - ) +S_forget_pmop(pTHX_ PMOP *const o) { HV * const pmstash = PmopSTASH(o); @@ -1545,7 +1544,8 @@ Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP* other) OpTYPE_set(logop, type); logop->op_first = first; logop->op_other = other; - logop->op_flags = OPf_KIDS; + if (first) + logop->op_flags = OPf_KIDS; while (kid && OpHAS_SIBLING(kid)) kid = OpSIBLING(kid); if (kid) @@ -2722,7 +2722,6 @@ S_maybe_multiconcat(pTHX_ OP *o) } else if ( topop->op_type == OP_CONCAT && (topop->op_flags & OPf_STACKED) - && (cUNOPo->op_first->op_flags & OPf_MOD) && (!(topop->op_private & OPpCONCAT_NESTED)) ) { @@ -3203,16 +3202,16 @@ S_maybe_multiconcat(pTHX_ OP *o) OP *prev; /* set prev to the sibling *before* the arg to be cut out, - * e.g.: + * e.g. when cutting EXPR: * * | - * kid= CONST + * kid= CONCAT * | - * prev= CONST -- EXPR + * prev= CONCAT -- EXPR * | */ if (argp == args && kid->op_type != OP_CONCAT) { - /* in e.g. '$x . = f(1)' there's no RHS concat tree + /* in e.g. '$x .= f(1)' there's no RHS concat tree * so the expression to be cut isn't kid->op_last but * kid itself */ OP *o1, *o2; @@ -4080,7 +4079,10 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_RV2HV: if (type == OP_REFGEN && o->op_flags & OPf_PARENS) { PL_modcount = RETURN_UNLIMITED_NUMBER; - return o; /* Treat \(@foo) like ordinary list. */ + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; } /* FALLTHROUGH */ case OP_RV2GV: @@ -4145,7 +4147,12 @@ Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) case OP_PADHV: PL_modcount = RETURN_UNLIMITED_NUMBER; if (type == OP_REFGEN && o->op_flags & OPf_PARENS) - return o; /* Treat \(@foo) like ordinary list. */ + { + /* Treat \(@foo) like ordinary list, but still mark it as modi- + fiable since some contexts need to know. */ + o->op_flags |= OPf_MOD; + return o; + } if (scalar_mod_type(o, type)) goto nomod; if ((o->op_flags & OPf_WANT) != OPf_WANT_SCALAR @@ -6293,6 +6300,10 @@ Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last) return fold_constants(op_integerize(op_std_init((OP *)binop))); } +/* Helper function for S_pmtrans(): comparison function to sort an array + * of codepoint range pairs. Sorts by start point, or if equal, by end + * point */ + static int uvcompare(const void *a, const void *b) __attribute__nonnull__(1) __attribute__nonnull__(2) @@ -6310,24 +6321,39 @@ static int uvcompare(const void *a, const void *b) return 0; } +/* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl + * containing the search and replacement strings, assemble into + * a translation table attached as o->op_pv. + * Free expr and repl. + * It expects the toker to have already set the + * OPpTRANS_COMPLEMENT + * OPpTRANS_SQUASH + * OPpTRANS_DELETE + * flags as appropriate; this function may add + * OPpTRANS_FROM_UTF + * OPpTRANS_TO_UTF + * OPpTRANS_IDENTICAL + * OPpTRANS_GROWS + * flags + */ + static OP * S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) { SV * const tstr = ((SVOP*)expr)->op_sv; - SV * const rstr = - ((SVOP*)repl)->op_sv; + SV * const rstr = ((SVOP*)repl)->op_sv; STRLEN tlen; STRLEN rlen; const U8 *t = (U8*)SvPV_const(tstr, tlen); const U8 *r = (U8*)SvPV_const(rstr, rlen); - I32 i; - I32 j; - I32 grows = 0; - short *tbl; - - const I32 complement = o->op_private & OPpTRANS_COMPLEMENT; - const I32 squash = o->op_private & OPpTRANS_SQUASH; - I32 del = o->op_private & OPpTRANS_DELETE; + Size_t i, j; + bool grows = FALSE; + OPtrans_map *tbl; + SSize_t struct_size; /* malloced size of table struct */ + + const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT); + const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH); + const bool del = cBOOL(o->op_private & OPpTRANS_DELETE); SV* swash; PERL_ARGS_ASSERT_PMTRANS; @@ -6341,6 +6367,14 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) o->op_private |= OPpTRANS_TO_UTF; if (o->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) { + + /* for utf8 translations, op_sv will be set to point to a swash + * containing codepoint ranges. This is done by first assembling + * a textual representation of the ranges in listsv then compiling + * it using swash_init(). For more details of the textual format, + * see L . + */ + SV* const listsv = newSVpvs("# comment\n"); SV* transv = NULL; const U8* tend = t + tlen; @@ -6382,15 +6416,24 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) * odd. */ if (complement) { + /* utf8 and /c: + * replace t/tlen/tend with a version that has the ranges + * complemented + */ U8 tmpbuf[UTF8_MAXBYTES+1]; UV *cp; UV nextmin = 0; Newx(cp, 2*tlen, UV); i = 0; transv = newSVpvs(""); + + /* convert search string into array of (start,end) range + * codepoint pairs stored in cp[]. Most "ranges" will start + * and end at the same char */ while (t < tend) { cp[2*i] = utf8n_to_uvchr(t, tend-t, &ulen, flags); t += ulen; + /* the toker converts X-Y into (X, ILLEGAL_UTF8_BYTE, Y) */ if (t < tend && *t == ILLEGAL_UTF8_BYTE) { t++; cp[2*i+1] = utf8n_to_uvchr(t, tend-t, &ulen, flags); @@ -6401,7 +6444,19 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } i++; } + + /* sort the ranges */ qsort(cp, i, 2*sizeof(UV), uvcompare); + + /* Create a utf8 string containing the complement of the + * codepoint ranges. For example if cp[] contains [A,B], [C,D], + * then transv will contain the equivalent of: + * join '', map chr, 0, ILLEGAL_UTF8_BYTE, A - 1, + * B + 1, ILLEGAL_UTF8_BYTE, C - 1, + * D + 1, ILLEGAL_UTF8_BYTE, 0x7fffffff; + * A range of a single char skips the ILLEGAL_UTF8_BYTE and + * end cp. + */ for (j = 0; j < i; j++) { UV val = cp[2*j]; diff = val - nextmin; @@ -6419,6 +6474,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) if (val >= nextmin) nextmin = val + 1; } + t = uvchr_to_utf8(tmpbuf,nextmin); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); { @@ -6435,6 +6491,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!rlen && !del) { r = t; rlen = tlen; rend = tend; } + if (!squash) { if ((!rlen && !del) || t == r || (tlen == rlen && memEQ((char *)t, (char *)r, tlen))) @@ -6443,6 +6500,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } } + /* extract char ranges from t and r and append them to listsv */ + while (t < tend || tfirst <= tlast) { /* see if we need more "t" chars */ if (tfirst > tlast) { @@ -6515,9 +6574,11 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) tfirst += diff + 1; } + /* compile listsv into a swash and attach to o */ + none = ++max; if (del) - del = ++max; + ++max; if (max > 0xffff) bits = 32; @@ -6556,50 +6617,88 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) goto warnins; } - tbl = (short*)PerlMemShared_calloc( - (o->op_private & OPpTRANS_COMPLEMENT) && - !(o->op_private & OPpTRANS_DELETE) ? 258 : 256, - sizeof(short)); + /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup + * table. Entries with the value -1 indicate chars not to be + * translated, while -2 indicates a search char without a + * corresponding replacement char under /d. + * + * Normally, the table has 256 slots. However, in the presence of + * /c, the search charlist has an implicit \x{100}-\x{7fffffff} + * added, and if there are enough replacement chars to start pairing + * with the \x{100},... search chars, then a larger (> 256) table + * is allocated. + * + * In addition, regardless of whether under /c, an extra slot at the + * end is used to store the final repeating char, or -3 under an empty + * replacement list, or -2 under /d; which makes the runtime code + * easier. + * + * The toker will have already expanded char ranges in t and r. + */ + + /* Initially allocate 257-slot table: 256 for basic (non /c) usage, + * plus final slot for repeat/-2/-3. Later we realloc if excess > * 0. + * The OPtrans_map struct already contains one slot; hence the -1. + */ + struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short); + tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1); + tbl->size = 256; cPVOPo->op_pv = (char*)tbl; + if (complement) { - for (i = 0; i < (I32)tlen; i++) - tbl[t[i]] = -1; + Size_t excess; + + /* in this branch, j is a count of 'consumed' (i.e. paired off + * with a search char) replacement chars (so j <= rlen always) + */ + for (i = 0; i < tlen; i++) + tbl->map[t[i]] = -1; + for (i = 0, j = 0; i < 256; i++) { - if (!tbl[i]) { - if (j >= (I32)rlen) { + if (!tbl->map[i]) { + if (j == rlen) { if (del) - tbl[i] = -2; + tbl->map[i] = -2; else if (rlen) - tbl[i] = r[j-1]; + tbl->map[i] = r[j-1]; else - tbl[i] = (short)i; + tbl->map[i] = (short)i; } else { - if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[i] = r[j++]; + tbl->map[i] = r[j++]; } + if ( tbl->map[i] >= 0 + && UVCHR_IS_INVARIANT((UV)i) + && !UVCHR_IS_INVARIANT((UV)(tbl->map[i])) + ) + grows = TRUE; } } - if (!del) { - if (!rlen) { - j = rlen; - if (!squash) - o->op_private |= OPpTRANS_IDENTICAL; - } - else if (j >= (I32)rlen) - j = rlen - 1; - else { - tbl = - (short *) - PerlMemShared_realloc(tbl, - (0x101+rlen-j) * sizeof(short)); - cPVOPo->op_pv = (char*)tbl; - } - tbl[0x100] = (short)(rlen - j); - for (i=0; i < (I32)rlen - j; i++) - tbl[0x101+i] = r[j+i]; - } + + ASSUME(j <= rlen); + excess = rlen - j; + + if (excess) { + /* More replacement chars than search chars: + * store excess replacement chars at end of main table. + */ + + struct_size += excess; + tbl = (OPtrans_map*)PerlMemShared_realloc(tbl, + struct_size + excess * sizeof(short)); + tbl->size += excess; + cPVOPo->op_pv = (char*)tbl; + + for (i = 0; i < excess; i++) + tbl->map[i + 256] = r[j+i]; + } + else { + /* no more replacement chars than search chars */ + if (!rlen && !del && !squash) + o->op_private |= OPpTRANS_IDENTICAL; + } + + tbl->map[tbl->size] = del ? -2 : rlen ? r[rlen - 1] : -3; } else { if (!rlen && !del) { @@ -6610,26 +6709,30 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) else if (!squash && rlen == tlen && memEQ((char*)t, (char*)r, tlen)) { o->op_private |= OPpTRANS_IDENTICAL; } + for (i = 0; i < 256; i++) - tbl[i] = -1; - for (i = 0, j = 0; i < (I32)tlen; i++,j++) { - if (j >= (I32)rlen) { + tbl->map[i] = -1; + for (i = 0, j = 0; i < tlen; i++,j++) { + if (j >= rlen) { if (del) { - if (tbl[t[i]] == -1) - tbl[t[i]] = -2; + if (tbl->map[t[i]] == -1) + tbl->map[t[i]] = -2; continue; } --j; } - if (tbl[t[i]] == -1) { + if (tbl->map[t[i]] == -1) { if ( UVCHR_IS_INVARIANT(t[i]) && ! UVCHR_IS_INVARIANT(r[j])) - grows = 1; - tbl[t[i]] = r[j]; + grows = TRUE; + tbl->map[t[i]] = r[j]; } } + tbl->map[tbl->size] = del ? -1 : rlen ? -1 : -3; } + /* both non-utf8 and utf8 code paths end up here */ + warnins: if(del && rlen == tlen) { Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator"); @@ -6645,6 +6748,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) return o; } + /* =for apidoc Am|OP *|newPMOP|I32 type|I32 flags @@ -6846,9 +6950,15 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) op_null(scope); } - if (is_compiletime) - /* runtime finalizes as part of finalizing whole tree */ - optimize_optree(o); + /* XXX optimize_optree() must be called on o before + * CALL_PEEP(), as currently S_maybe_multiconcat() can't + * currently cope with a peephole-optimised optree. + * Calling optimize_optree() here ensures that condition + * is met, but may mean optimize_optree() is applied + * to the same optree later (where hopefully it won't do any + * harm as it can't convert an op to multiconcat if it's + * already been converted */ + optimize_optree(o); /* have to peep the DOs individually as we've removed it from * the op_next chain */ @@ -8150,9 +8260,8 @@ S_new_logop(pTHX_ I32 type, I32 flags, OP** firstp, OP** otherp) && o2->op_private & OPpLVAL_INTRO && !(o2->op_private & OPpPAD_STATE)) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), - "Deprecated use of my() in false conditional. " - "This will be a fatal error in Perl 5.30"); + Perl_croak(aTHX_ "This use of my() in false conditional is " + "no longer allowed"); } *otherp = NULL; @@ -8961,6 +9070,13 @@ S_looks_like_bool(pTHX_ const OP *o) case OP_FLOP: return TRUE; + + case OP_INDEX: + case OP_RINDEX: + /* optimised-away (index() != -1) or similar comparison */ + if (o->op_private & OPpTRUEBOOL) + return TRUE; + return FALSE; case OP_CONST: /* Detect comparisons that have been optimized away */ @@ -8970,7 +9086,6 @@ S_looks_like_bool(pTHX_ const OP *o) return TRUE; else return FALSE; - /* FALLTHROUGH */ default: return FALSE; @@ -9741,9 +9856,12 @@ Perl_newATTRSUB_x(pTHX_ I32 floor, OP *o, OP *proto, OP *attrs, Also, we may be called from load_module at run time, so PL_curstash (which sets CvSTASH) may not point to the stash the sub is stored in. */ + /* XXX This optimization is currently disabled for packages other + than main, since there was too much CPAN breakage. */ const I32 flags = ec ? GV_NOADD_NOINIT : (IN_PERL_RUNTIME && PL_curstash != CopSTASH(PL_curcop)) + || PL_curstash != PL_defstash || memchr(name, ':', namlen) || memchr(name, '\'', namlen) ? gv_fetch_flags : GV_ADDMULTI | GV_NOINIT | GV_NOTQUAL; @@ -10946,12 +11064,6 @@ Perl_ck_bitop(pTHX_ OP *o) o->op_private = (U8)(PL_hints & HINT_INTEGER); - if (o->op_type == OP_NBIT_OR || o->op_type == OP_SBIT_OR - || o->op_type == OP_NBIT_XOR || o->op_type == OP_SBIT_XOR - || o->op_type == OP_NBIT_AND || o->op_type == OP_SBIT_AND - || o->op_type == OP_NCOMPLEMENT || o->op_type == OP_SCOMPLEMENT) - Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__BITWISE), - "The bitwise feature is experimental"); if (!(o->op_flags & OPf_STACKED) /* Not an assignment */ && OP_IS_INFIX_BIT(o->op_type)) { @@ -14594,7 +14706,7 @@ S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints) /* at this point we're looking for an OP_AELEM, OP_HELEM, * OP_EXISTS or OP_DELETE */ - /* if something like arybase (a.k.a $[ ) is in scope, + /* if a custom array/hash access checker is in scope, * abandon optimisation attempt */ if ( (o->op_type == OP_AELEM || o->op_type == OP_HELEM) && PL_check[o->op_type] != Perl_ck_null)