X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/4155e4fe81b9987a30efea627e43a574f5460f73..b7064dd7b68928a129e4ffc9144054b2384f6e25:/sv.c diff --git a/sv.c b/sv.c index 160b132..9351076 100644 --- a/sv.c +++ b/sv.c @@ -80,7 +80,7 @@ many types, a pointer to the body (struct xrv, xpv, xpviv...), which contains fields specific to each type. Some types store all they need in the head, so don't have a body. -In all but the most memory-paranoid configuations (ex: PURIFY), heads +In all but the most memory-paranoid configurations (ex: PURIFY), heads and bodies are allocated out of arenas, which by default are approximately 4K chunks of memory parcelled up into N heads or bodies. Sv-bodies are allocated by their sv-type, guaranteeing size @@ -522,7 +522,7 @@ do_clean_named_objs(pTHX_ SV *const sv) if ( ((obj = MUTABLE_SV(GvCV(sv)) )) && SvOBJECT(obj)) { DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning named glob CV object:\n "), sv_dump(obj))); - GvCV(sv) = NULL; + GvCV_set(sv, NULL); SvREFCNT_dec(obj); } SvREFCNT_dec(sv); /* undo the inc above */ @@ -552,13 +552,15 @@ do_clean_named_io_objs(pTHX_ SV *const sv) } /* Void wrapper to pass to visit() */ +/* XXX static void do_curse(pTHX_ SV * const sv) { - if ((PL_stderrgv && GvGP(PL_stderrgv) && GvIO(PL_stderrgv) == sv) - || (PL_defoutgv && GvGP(PL_defoutgv) && GvIO(PL_defoutgv) == sv)) + if ((PL_stderrgv && GvGP(PL_stderrgv) && (SV*)GvIO(PL_stderrgv) == sv) + || (PL_defoutgv && GvGP(PL_defoutgv) && (SV*)GvIO(PL_defoutgv) == sv)) return; (void)curse(sv, 0); } +*/ /* =for apidoc sv_clean_objs @@ -582,7 +584,9 @@ Perl_sv_clean_objs(pTHX) visit(do_clean_named_io_objs, SVt_PVGV|SVpgv_GP, SVTYPEMASK|SVp_POK|SVpgv_GP); /* And if there are some very tenacious barnacles clinging to arrays, closures, or what have you.... */ + /* XXX This line breaks Tk and Gtk2. See [perl #82542]. visit(do_curse, SVs_OBJECT, SVs_OBJECT); + */ olddef = PL_defoutgv; PL_defoutgv = NULL; /* disable skip of PL_defoutgv */ if (olddef && isGV_with_GP(olddef)) @@ -1068,7 +1072,7 @@ Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size, Remember, this is integer division: */ end = start + good_arena_size / body_size * body_size; - /* computed count doesnt reflect the 1st slot reservation */ + /* computed count doesn't reflect the 1st slot reservation */ #if defined(MYMALLOC) || defined(HAS_MALLOC_GOOD_SIZE) DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %p end %p arena-size %d (from %d) type %d " @@ -1576,6 +1580,7 @@ Perl_sv_setiv(pTHX_ register SV *const sv, const IV i) case SVt_PVCV: case SVt_PVFM: case SVt_PVIO: + /* diag_listed_as: Can't coerce %s to %s in %s */ Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), OP_DESC(PL_op)); default: NOOP; @@ -1685,6 +1690,7 @@ Perl_sv_setnv(pTHX_ register SV *const sv, const NV num) case SVt_PVCV: case SVt_PVFM: case SVt_PVIO: + /* diag_listed_as: Can't coerce %s to %s in %s */ Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), OP_DESC(PL_op)); default: NOOP; @@ -2890,7 +2896,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags retval -= stashnamelen; memcpy(retval, stashname, stashnamelen); } - /* retval may not neccesarily have reached the start of the + /* retval may not necessarily have reached the start of the buffer here. */ assert (retval >= buffer); @@ -3219,7 +3225,7 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ register SV *const sv, const I32 flags, ST return len; } } else { - (void) SvPV_force(sv,len); + (void) SvPV_force_flags(sv,len,flags & SV_GMAGIC); } } @@ -3427,6 +3433,29 @@ must_be_utf8: } } } + + if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { + /* Update pos. We do it at the end rather than during + * the upgrade, to avoid slowing down the common case + * (upgrade without pos) */ + MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); + if (mg) { + I32 pos = mg->mg_len; + if (pos > 0 && (U32)pos > invariant_head) { + U8 *d = (U8*) SvPVX(sv) + invariant_head; + STRLEN n = (U32)pos - invariant_head; + while (n > 0) { + if (UTF8_IS_START(*d)) + d++; + d++; + n--; + } + mg->mg_len = d - (U8*)SvPVX(sv); + } + } + if ((mg = mg_find(sv, PERL_MAGIC_utf8))) + magic_setutf8(sv,mg); /* clear UTF8 cache */ + } } } @@ -3461,11 +3490,28 @@ Perl_sv_utf8_downgrade(pTHX_ register SV *const sv, const bool fail_ok) if (SvCUR(sv)) { U8 *s; STRLEN len; + int mg_flags = SV_GMAGIC; if (SvIsCOW(sv)) { sv_force_normal_flags(sv, 0); } - s = (U8 *) SvPV(sv, len); + if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { + /* update pos */ + MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); + if (mg) { + I32 pos = mg->mg_len; + if (pos > 0) { + sv_pos_b2u(sv, &pos); + mg_flags = 0; /* sv_pos_b2u does get magic */ + mg->mg_len = pos; + } + } + if ((mg = mg_find(sv, PERL_MAGIC_utf8))) + magic_setutf8(sv,mg); /* clear UTF8 cache */ + + } + s = (U8 *) SvPV_flags(sv, len, mg_flags); + if (!utf8_to_bytes(s, &len)) { if (fail_ok) return FALSE; @@ -3526,7 +3572,7 @@ Perl_sv_utf8_decode(pTHX_ register SV *const sv) PERL_ARGS_ASSERT_SV_UTF8_DECODE; if (SvPOKp(sv)) { - const U8 *c; + const U8 *start, *c; const U8 *e; /* The octets may have got themselves encoded - get them back as @@ -3538,7 +3584,7 @@ Perl_sv_utf8_decode(pTHX_ register SV *const sv) /* it is actually just a matter of turning the utf8 flag on, but * we want to make sure everything inside is valid utf8 first. */ - c = (const U8 *) SvPVX_const(sv); + c = start = (const U8 *) SvPVX_const(sv); if (!is_utf8_string(c, SvCUR(sv)+1)) return FALSE; e = (const U8 *) SvEND(sv); @@ -3549,6 +3595,22 @@ Perl_sv_utf8_decode(pTHX_ register SV *const sv) break; } } + if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { + /* adjust pos to the start of a UTF8 char sequence */ + MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); + if (mg) { + I32 pos = mg->mg_len; + if (pos > 0) { + for (c = start + pos; c > start; c--) { + if (UTF8_IS_START(*c)) + break; + } + mg->mg_len = c - start; + } + } + if ((mg = mg_find(sv, PERL_MAGIC_utf8))) + magic_setutf8(sv,mg); /* clear UTF8 cache */ + } } return TRUE; } @@ -3623,7 +3685,7 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) /* If source has method cache entry, clear it */ if(GvCVGEN(sstr)) { SvREFCNT_dec(GvCV(sstr)); - GvCV(sstr) = NULL; + GvCV_set(sstr, NULL); GvCVGEN(sstr) = 0; } /* If source has a real method, then a method is @@ -3676,7 +3738,7 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) (void)SvOK_off(dstr); isGV_with_GP_on(dstr); GvINTRO_off(dstr); /* one-shot flag */ - GvGP(dstr) = gp_ref(GvGP(sstr)); + GvGP_set(dstr, gp_ref(GvGP(sstr))); if (SvTAINTED(sstr)) SvTAINT(dstr); if (GvIMPORTED(dstr) != GVf_IMPORTED @@ -3731,7 +3793,7 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr) GvMULTI_on(dstr); switch (stype) { case SVt_PVCV: - location = (SV **) &GvCV(dstr); + location = (SV **) &(GvGP(dstr)->gp_cv); /* XXX bypassing GvCV_set */ import_flag = GVf_IMPORTED_CV; goto common; case SVt_PVHV: @@ -3757,7 +3819,7 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr) /*if (GvCVGEN(dstr) && (GvCV(dstr) != (const CV *)sref || GvCVGEN(dstr))) {*/ if (GvCVGEN(dstr)) { SvREFCNT_dec(GvCV(dstr)); - GvCV(dstr) = NULL; + GvCV_set(dstr, NULL); GvCVGEN(dstr) = 0; /* Switch off cacheness. */ } } @@ -4129,7 +4191,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags) if (GvGP(dstr)) gp_free(MUTABLE_GV(dstr)); - GvGP(dstr) = gp_ref(GvGP(gv)); + GvGP_set(dstr, gp_ref(GvGP(gv))); if (reset_isa) { HV * const stash = GvHV(dstr); @@ -4578,7 +4640,7 @@ Perl_sv_usepvn_flags(pTHX_ SV *const sv, char *ptr, const STRLEN len, const U32 #endif if (flags & SV_HAS_TRAILING_NUL) { /* It's long enough - do nothing. - Specfically Perl_newCONSTSUB is relying on this. */ + Specifically Perl_newCONSTSUB is relying on this. */ } else { #ifdef DEBUGGING /* Force a move to shake out bugs in callers. */ @@ -4655,7 +4717,7 @@ we do the copy, and is also used locally. If C is set then a copy-on-write scalar drops its PV buffer (if any) and becomes SvPOK_off rather than making a copy. (Used where this scalar is about to be set to some other value.) In addition, the C parameter gets passed to -C when unrefing. C calls this function +C when unreffing. C calls this function with flags set to 0. =cut @@ -4734,7 +4796,7 @@ Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags) else if (SvFAKE(sv) && isGV_with_GP(sv)) sv_unglob(sv); else if (SvFAKE(sv) && SvTYPE(sv) == SVt_REGEXP) { - /* Need to downgrade the REGEXP to a simple(r) scalar. This is analagous + /* Need to downgrade the REGEXP to a simple(r) scalar. This is analogous to sv_unglob. We only need it here, so inline it. */ const svtype new_type = SvMAGIC(sv) || SvSTASH(sv) ? SVt_PVMG : SVt_PV; SV *const temp = newSV_type(new_type); @@ -5139,7 +5201,7 @@ Perl_sv_magicext(pTHX_ SV *const sv, SV *const obj, const int how, mg->mg_ptr = savepvn(name, namlen); else if (namlen == HEf_SVKEY) { /* Yes, this is casting away const. This is only for the case of - HEf_SVKEY. I think we need to document this abberation of the + HEf_SVKEY. I think we need to document this aberation of the constness of the API, rather than making name non-const, as that change propagating outwards a long way. */ mg->mg_ptr = (char*)SvREFCNT_inc_simple_NN((SV *)name); @@ -5994,10 +6056,17 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) if (!curse(sv, 1)) goto get_next_sv; } if (type >= SVt_PVMG) { + /* Free back-references before magic, in case the magic calls + * Perl code that has weak references to sv. */ + if (type == SVt_PVHV) + Perl_hv_kill_backrefs(aTHX_ MUTABLE_HV(sv)); if (type == SVt_PVMG && SvPAD_OUR(sv)) { SvREFCNT_dec(SvOURSTASH(sv)); - } else if (SvMAGIC(sv)) + } else if (SvMAGIC(sv)) { + /* Free back-references before other types of magic. */ + sv_unmagic(sv, PERL_MAGIC_backref); mg_free(sv); + } if (type == SVt_PVMG && SvPAD_TYPED(sv)) SvREFCNT_dec(SvSTASH(sv)); } @@ -6036,7 +6105,6 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) if (PL_last_swash_hv == (const HV *)sv) { PL_last_swash_hv = NULL; } - Perl_hv_kill_backrefs(aTHX_ MUTABLE_HV(sv)); Perl_hv_undef_flags(aTHX_ MUTABLE_HV(sv), HV_NAME_SETALL); break; case SVt_PVAV: @@ -6860,7 +6928,7 @@ S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN b /* Cache has 2 slots in use, and we know three potential pairs. Keep the two that give the lowest RMS distance. Do the - calcualation in bytes simply because we always know the byte + calculation in bytes simply because we always know the byte length. squareroot has the same ordering as the positive value, so don't bother with the actual square root. */ const float existing = THREEWAY_SQUARE(0, cache[3], cache[1], blen); @@ -7753,7 +7821,7 @@ screamer2: } else { cnt = PerlIO_read(fp,(char*)buf, sizeof(buf)); - /* Accomodate broken VAXC compiler, which applies U8 cast to + /* Accommodate broken VAXC compiler, which applies U8 cast to * both args of ?: operator, causing EOF to change into 255 */ if (cnt > 0) @@ -8224,11 +8292,11 @@ Perl_newSVpvn_flags(pTHX_ const char *const s, const STRLEN len, const U32 flags sv_setpvn(sv,s,len); /* This code used to a sv_2mortal(), however we now unroll the call to sv_2mortal() - * and do what it does outselves here. + * and do what it does ourselves here. * Since we have asserted that flags can only have the SVf_UTF8 and/or SVs_TEMP flags * set above we can use it to enable the sv flags directly (bypassing SvTEMP_on), which * in turn means we dont need to mask out the SVf_UTF8 flag below, which means that we - * eleminate quite a few steps than it looks - Yves (explaining patch by gfx) + * eliminate quite a few steps than it looks - Yves (explaining patch by gfx) */ SvFLAGS(sv) |= flags; @@ -8985,6 +9053,7 @@ Perl_sv_pvn_force_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) } if ((SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM) || isGV_with_GP(sv)) + /* diag_listed_as: Can't coerce %s to %s in %s */ Perl_croak(aTHX_ "Can't coerce %s to string in %s", sv_reftype(sv,0), OP_DESC(PL_op)); s = sv_2pv_flags(sv, &len, flags); @@ -9085,7 +9154,7 @@ Perl_sv_reftype(pTHX_ const SV *const sv, const int ob) case SVt_PVLV: return (char *) (SvROK(sv) ? "REF" /* tied lvalues should appear to be - * scalars for backwards compatitbility */ + * scalars for backwards compatibility */ : (LvTYPE(sv) == 't' || LvTYPE(sv) == 'T') ? "SCALAR" : "LVALUE"); case SVt_PVAV: return "ARRAY"; @@ -10178,59 +10247,28 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, width = expect_number(&q); } - if (vectorize) { - if (vectorarg) { - if (args) - vecsv = va_arg(*args, SV*); - else if (evix) { - vecsv = (evix > 0 && evix <= svmax) - ? svargs[evix-1] : S_vcatpvfn_missing_argument(aTHX); - } else { - vecsv = svix < svmax - ? svargs[svix++] : S_vcatpvfn_missing_argument(aTHX); - } - dotstr = SvPV_const(vecsv, dotstrlen); - /* Keep the DO_UTF8 test *after* the SvPV call, else things go - bad with tied or overloaded values that return UTF8. */ - if (DO_UTF8(vecsv)) - is_utf8 = TRUE; - else if (has_utf8) { - vecsv = sv_mortalcopy(vecsv); - sv_utf8_upgrade(vecsv); - dotstr = SvPV_const(vecsv, dotstrlen); - is_utf8 = TRUE; - } - } - if (args) { - VECTORIZE_ARGS - } - else if (efix ? (efix > 0 && efix <= svmax) : svix < svmax) { - vecsv = svargs[efix ? efix-1 : svix++]; - vecstr = (U8*)SvPV_const(vecsv,veclen); - vec_utf8 = DO_UTF8(vecsv); - - /* if this is a version object, we need to convert - * back into v-string notation and then let the - * vectorize happen normally - */ - if (sv_derived_from(vecsv, "version")) { - char *version = savesvpv(vecsv); - if ( hv_exists(MUTABLE_HV(SvRV(vecsv)), "alpha", 5 ) ) { - Perl_warner(aTHX_ packWARN(WARN_INTERNAL), - "vector argument not supported with alpha versions"); - goto unknown; - } - vecsv = sv_newmortal(); - scan_vstring(version, version + veclen, vecsv); - vecstr = (U8*)SvPV_const(vecsv, veclen); - vec_utf8 = DO_UTF8(vecsv); - Safefree(version); - } - } - else { - vecstr = (U8*)""; - veclen = 0; + if (vectorize && vectorarg) { + /* vectorizing, but not with the default "." */ + if (args) + vecsv = va_arg(*args, SV*); + else if (evix) { + vecsv = (evix > 0 && evix <= svmax) + ? svargs[evix-1] : S_vcatpvfn_missing_argument(aTHX); + } else { + vecsv = svix < svmax + ? svargs[svix++] : S_vcatpvfn_missing_argument(aTHX); } + dotstr = SvPV_const(vecsv, dotstrlen); + /* Keep the DO_UTF8 test *after* the SvPV call, else things go + bad with tied or overloaded values that return UTF8. */ + if (DO_UTF8(vecsv)) + is_utf8 = TRUE; + else if (has_utf8) { + vecsv = sv_mortalcopy(vecsv); + sv_utf8_upgrade(vecsv); + dotstr = SvPV_const(vecsv, dotstrlen); + is_utf8 = TRUE; + } } if (asterisk) { @@ -10271,6 +10309,39 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, } } + if (vectorize) { + if (args) { + VECTORIZE_ARGS + } + else if (efix ? (efix > 0 && efix <= svmax) : svix < svmax) { + vecsv = svargs[efix ? efix-1 : svix++]; + vecstr = (U8*)SvPV_const(vecsv,veclen); + vec_utf8 = DO_UTF8(vecsv); + + /* if this is a version object, we need to convert + * back into v-string notation and then let the + * vectorize happen normally + */ + if (sv_derived_from(vecsv, "version")) { + char *version = savesvpv(vecsv); + if ( hv_exists(MUTABLE_HV(SvRV(vecsv)), "alpha", 5 ) ) { + Perl_warner(aTHX_ packWARN(WARN_INTERNAL), + "vector argument not supported with alpha versions"); + goto unknown; + } + vecsv = sv_newmortal(); + scan_vstring(version, version + veclen, vecsv); + vecstr = (U8*)SvPV_const(vecsv, veclen); + vec_utf8 = DO_UTF8(vecsv); + Safefree(version); + } + } + else { + vecstr = (U8*)""; + veclen = 0; + } + } + /* SIZE */ switch (*q) { @@ -11621,7 +11692,7 @@ Perl_rvpv_dup(pTHX_ SV *const dstr, const SV *const sstr, CLONE_PARAMS *const pa SvPV_set(dstr, SAVEPVN(SvPVX_const(sstr), SvLEN(sstr)-1)); if (SvREADONLY(sstr) && SvFAKE(sstr)) { /* Not that normal - actually sstr is copy on write. - But we are a true, independant SV, so: */ + But we are a true, independent SV, so: */ SvREADONLY_off(dstr); SvFAKE_off(dstr); } @@ -11847,7 +11918,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) GvSTASH(dstr) = hv_dup(GvSTASH(dstr), param); if (param->flags & CLONEf_JOIN_IN) Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dstr)), dstr); - GvGP(dstr) = gp_dup(GvGP(sstr), param); + GvGP_set(dstr, gp_dup(GvGP(sstr), param)); (void)GpREFCNT_inc(GvGP(dstr)); } break; @@ -12124,7 +12195,7 @@ Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param) ncx->blk_loop.state_u.lazysv.end = sv_dup_inc(ncx->blk_loop.state_u.lazysv.end, param); /* We are taking advantage of av_dup_inc and sv_dup_inc - actually being the same function, and order equivalance of + actually being the same function, and order equivalence of the two unions. We can assert the later [but only at run time :-(] */ assert ((void *) &ncx->blk_loop.state_u.ary.ary == @@ -13110,7 +13181,10 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); PL_utf8_idstart = sv_dup_inc(proto_perl->Iutf8_idstart, param); + PL_utf8_xidstart = sv_dup_inc(proto_perl->Iutf8_xidstart, param); PL_utf8_idcont = sv_dup_inc(proto_perl->Iutf8_idcont, param); + PL_utf8_xidcont = sv_dup_inc(proto_perl->Iutf8_xidcont, param); + PL_utf8_foldable = hv_dup_inc(proto_perl->Iutf8_foldable, param); /* Did the locale setup indicate UTF-8? */ PL_utf8locale = proto_perl->Iutf8locale; @@ -13517,6 +13591,14 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) } FREETMPS; LEAVE; + if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { + /* clear pos and any utf8 cache */ + MAGIC * mg = mg_find(sv, PERL_MAGIC_regex_global); + if (mg) + mg->mg_len = -1; + if ((mg = mg_find(sv, PERL_MAGIC_utf8))) + magic_setutf8(sv,mg); /* clear UTF8 cache */ + } SvUTF8_on(sv); return SvPVX(sv); }