X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/daba3364ed9f39ba44b28575c032f6db52d47881..ea6efd2c816aee1bf9f4bfc59f5bf6b604e59cc2:/sv.c diff --git a/sv.c b/sv.c index 6811155..7d2eae5 100644 --- a/sv.c +++ b/sv.c @@ -1,12 +1,22 @@ /* sv.c * * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, - * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others + * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall + * and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. * - * "I wonder what the Entish is for 'yes' and 'no'," he thought. + */ + +/* + * 'I wonder what the Entish is for "yes" and "no",' he thought. + * --Pippin + * + * [p.480 of _The Lord of the Rings_, III/iv: "Treebeard"] + */ + +/* * * * This file contains the code that creates, manipulates and destroys @@ -147,7 +157,7 @@ Public API: =cut -============================================================================ */ + * ========================================================================= */ /* * "A time to plant, and a time to uproot what was planted..." @@ -371,8 +381,8 @@ and split it into a list of free SVs. =cut */ -void -Perl_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags) +static void +S_sv_add_arena(pTHX_ char *const ptr, const U32 size, const U32 flags) { dVAR; SV *const sva = MUTABLE_SV(ptr); @@ -2264,7 +2274,7 @@ S_sv_2iuv_common(pTHX_ SV *const sv) } else { if (isGV_with_GP(sv)) - return glob_2number((GV *)sv); + return glob_2number(MUTABLE_GV(sv)); if (!(SvFLAGS(sv) & SVs_PADTMP)) { if (!PL_localizing && ckWARN(WARN_UNINITIALIZED)) @@ -2635,7 +2645,7 @@ Perl_sv_2nv(pTHX_ register SV *const sv) } else { if (isGV_with_GP(sv)) { - glob_2number((GV *)sv); + glob_2number(MUTABLE_GV(sv)); return 0.0; } @@ -2831,13 +2841,13 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags STRLEN len; char *retval; char *buffer; - const SV *const referent = SvRV(sv); + SV *const referent = SvRV(sv); if (!referent) { len = 7; retval = buffer = savepvn("NULLREF", len); } else if (SvTYPE(referent) == SVt_REGEXP) { - const REGEXP * const re = (REGEXP *)referent; + REGEXP * const re = (REGEXP *)MUTABLE_PTR(referent); I32 seen_evals = 0; assert(re); @@ -2949,7 +2959,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags *s = '\0'; } else if (SvNOKp(sv)) { - const int olderrno = errno; + dSAVE_ERRNO; if (SvTYPE(sv) < SVt_PVNV) sv_upgrade(sv, SVt_PVNV); /* The +20 is pure guesswork. Configure test needed. --jhi */ @@ -2963,7 +2973,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags { Gconvert(SvNVX(sv), NV_DIG, 0, s); } - errno = olderrno; + RESTORE_ERRNO; #ifdef FIXNEGATIVEZERO if (*s == '-' && s[1] == '0' && !s[2]) { s[0] = '0'; @@ -2978,7 +2988,7 @@ Perl_sv_2pv_flags(pTHX_ register SV *const sv, STRLEN *const lp, const I32 flags } else { if (isGV_with_GP(sv)) - return glob_2pv((GV *)sv, lp); + return glob_2pv(MUTABLE_GV(sv), lp); if (lp) *lp = 0; @@ -3137,33 +3147,71 @@ Perl_sv_2bool(pTHX_ register SV *const sv) Converts the PV of an SV to its UTF-8-encoded form. Forces the SV to string form if it is not already. +Will C on C if appropriate. Always sets the SvUTF8 flag to avoid future validity checks even -if all the bytes have hibit clear. +if the whole string is the same in UTF-8 as not. +Returns the number of bytes in the converted string This is not as a general purpose byte encoding to Unicode interface: use the Encode extension for that. +=for apidoc sv_utf8_upgrade_nomg + +Like sv_utf8_upgrade, but doesn't do magic on C + =for apidoc sv_utf8_upgrade_flags Converts the PV of an SV to its UTF-8-encoded form. Forces the SV to string form if it is not already. Always sets the SvUTF8 flag to avoid future validity checks even -if all the bytes have hibit clear. If C has C bit set, -will C on C if appropriate, else not. C and +if all the bytes are invariant in UTF-8. If C has C bit set, +will C on C if appropriate, else not. +Returns the number of bytes in the converted string +C and C are implemented in terms of this function. This is not as a general purpose byte encoding to Unicode interface: use the Encode extension for that. =cut + +The grow version is currently not externally documented. It adds a parameter, +extra, which is the number of unused bytes the string of 'sv' is guaranteed to +have free after it upon return. This allows the caller to reserve extra space +that it intends to fill, to avoid extra grows. + +Also externally undocumented for the moment is the flag SV_FORCE_UTF8_UPGRADE, +which can be used to tell this function to not first check to see if there are +any characters that are different in UTF-8 (variant characters) which would +force it to allocate a new string to sv, but to assume there are. Typically +this flag is used by a routine that has already parsed the string to find that +there are such characters, and passes this information on so that the work +doesn't have to be repeated. + +(One might think that the calling routine could pass in the position of the +first such variant, so it wouldn't have to be found again. But that is not the +case, because typically when the caller is likely to use this flag, it won't be +calling this routine unless it finds something that won't fit into a byte. +Otherwise it tries to not upgrade and just use bytes. But some things that +do fit into a byte are variants in utf8, and the caller may not have been +keeping track of these.) + +If the routine itself changes the string, it adds a trailing NUL. Such a NUL +isn't guaranteed due to having other routines do the work in some input cases, +or if the input is already flagged as being in utf8. + +The speed of this could perhaps be improved for many cases if someone wanted to +write a fast function that counts the number of variant characters in a string, +especially if it could return the position of the first one. + */ STRLEN -Perl_sv_utf8_upgrade_flags(pTHX_ register SV *const sv, const I32 flags) +Perl_sv_utf8_upgrade_flags_grow(pTHX_ register SV *const sv, const I32 flags, STRLEN extra) { dVAR; - PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS; + PERL_ARGS_ASSERT_SV_UTF8_UPGRADE_FLAGS_GROW; if (sv == &PL_sv_undef) return 0; @@ -3171,14 +3219,17 @@ Perl_sv_utf8_upgrade_flags(pTHX_ register SV *const sv, const I32 flags) STRLEN len = 0; if (SvREADONLY(sv) && (SvPOKp(sv) || SvIOKp(sv) || SvNOKp(sv))) { (void) sv_2pv_flags(sv,&len, flags); - if (SvUTF8(sv)) + if (SvUTF8(sv)) { + if (extra) SvGROW(sv, SvCUR(sv) + extra); return len; + } } else { (void) SvPV_force(sv,len); } } if (SvUTF8(sv)) { + if (extra) SvGROW(sv, SvCUR(sv) + extra); return SvCUR(sv); } @@ -3186,42 +3237,204 @@ Perl_sv_utf8_upgrade_flags(pTHX_ register SV *const sv, const I32 flags) sv_force_normal_flags(sv, 0); } - if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) + if (PL_encoding && !(flags & SV_UTF8_NO_ENCODING)) { sv_recode_to_utf8(sv, PL_encoding); - else { /* Assume Latin-1/EBCDIC */ + if (extra) SvGROW(sv, SvCUR(sv) + extra); + return SvCUR(sv); + } + + if (SvCUR(sv) > 0) { /* Assume Latin-1/EBCDIC */ /* This function could be much more efficient if we - * had a FLAG in SVs to signal if there are any hibit + * had a FLAG in SVs to signal if there are any variant * chars in the PV. Given that there isn't such a flag - * make the loop as fast as possible. */ - const U8 * const s = (U8 *) SvPVX_const(sv); - const U8 * const e = (U8 *) SvEND(sv); - const U8 *t = s; + * make the loop as fast as possible (although there are certainly ways + * to speed this up, eg. through vectorization) */ + U8 * s = (U8 *) SvPVX_const(sv); + U8 * e = (U8 *) SvEND(sv); + U8 *t = s; + STRLEN two_byte_count = 0; + if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8; + + /* See if really will need to convert to utf8. We mustn't rely on our + * incoming SV being well formed and having a trailing '\0', as certain + * code in pp_formline can send us partially built SVs. */ + while (t < e) { const U8 ch = *t++; - /* Check for hi bit */ - if (!NATIVE_IS_INVARIANT(ch)) { - STRLEN len = SvCUR(sv); - /* *Currently* bytes_to_utf8() adds a '\0' after every string - it converts. This isn't documented. It's not clear if it's - a bad thing to be doing, and should be changed to do exactly - what the documentation says. If so, this code will have to - be changed. - As is, we mustn't rely on our incoming SV being well formed - and having a trailing '\0', as certain code in pp_formline - can send us partially built SVs. */ - U8 * const recoded = bytes_to_utf8((U8*)s, &len); - - SvPV_free(sv); /* No longer using what was there before. */ - SvPV_set(sv, (char*)recoded); - SvCUR_set(sv, len); - SvLEN_set(sv, len + 1); /* No longer know the real size. */ - break; - } + if (NATIVE_IS_INVARIANT(ch)) continue; + + t--; /* t already incremented; re-point to first variant */ + two_byte_count = 1; + goto must_be_utf8; } - /* Mark as UTF-8 even if no hibit - saves scanning loop */ + + /* utf8 conversion not needed because all are invariants. Mark as + * UTF-8 even if no variant - saves scanning loop */ SvUTF8_on(sv); + return SvCUR(sv); + +must_be_utf8: + + /* Here, the string should be converted to utf8, either because of an + * input flag (two_byte_count = 0), or because a character that + * requires 2 bytes was found (two_byte_count = 1). t points either to + * the beginning of the string (if we didn't examine anything), or to + * the first variant. In either case, everything from s to t - 1 will + * occupy only 1 byte each on output. + * + * There are two main ways to convert. One is to create a new string + * and go through the input starting from the beginning, appending each + * converted value onto the new string as we go along. It's probably + * best to allocate enough space in the string for the worst possible + * case rather than possibly running out of space and having to + * reallocate and then copy what we've done so far. Since everything + * from s to t - 1 is invariant, the destination can be initialized + * with these using a fast memory copy + * + * The other way is to figure out exactly how big the string should be + * by parsing the entire input. Then you don't have to make it big + * enough to handle the worst possible case, and more importantly, if + * the string you already have is large enough, you don't have to + * allocate a new string, you can copy the last character in the input + * string to the final position(s) that will be occupied by the + * converted string and go backwards, stopping at t, since everything + * before that is invariant. + * + * There are advantages and disadvantages to each method. + * + * In the first method, we can allocate a new string, do the memory + * copy from the s to t - 1, and then proceed through the rest of the + * string byte-by-byte. + * + * In the second method, we proceed through the rest of the input + * string just calculating how big the converted string will be. Then + * there are two cases: + * 1) if the string has enough extra space to handle the converted + * value. We go backwards through the string, converting until we + * get to the position we are at now, and then stop. If this + * position is far enough along in the string, this method is + * faster than the other method. If the memory copy were the same + * speed as the byte-by-byte loop, that position would be about + * half-way, as at the half-way mark, parsing to the end and back + * is one complete string's parse, the same amount as starting + * over and going all the way through. Actually, it would be + * somewhat less than half-way, as it's faster to just count bytes + * than to also copy, and we don't have the overhead of allocating + * a new string, changing the scalar to use it, and freeing the + * existing one. But if the memory copy is fast, the break-even + * point is somewhere after half way. The counting loop could be + * sped up by vectorization, etc, to move the break-even point + * further towards the beginning. + * 2) if the string doesn't have enough space to handle the converted + * value. A new string will have to be allocated, and one might + * as well, given that, start from the beginning doing the first + * method. We've spent extra time parsing the string and in + * exchange all we've gotten is that we know precisely how big to + * make the new one. Perl is more optimized for time than space, + * so this case is a loser. + * So what I've decided to do is not use the 2nd method unless it is + * guaranteed that a new string won't have to be allocated, assuming + * the worst case. I also decided not to put any more conditions on it + * than this, for now. It seems likely that, since the worst case is + * twice as big as the unknown portion of the string (plus 1), we won't + * be guaranteed enough space, causing us to go to the first method, + * unless the string is short, or the first variant character is near + * the end of it. In either of these cases, it seems best to use the + * 2nd method. The only circumstance I can think of where this would + * be really slower is if the string had once had much more data in it + * than it does now, but there is still a substantial amount in it */ + + { + STRLEN invariant_head = t - s; + STRLEN size = invariant_head + (e - t) * 2 + 1 + extra; + if (SvLEN(sv) < size) { + + /* Here, have decided to allocate a new string */ + + U8 *dst; + U8 *d; + + Newx(dst, size, U8); + + /* If no known invariants at the beginning of the input string, + * set so starts from there. Otherwise, can use memory copy to + * get up to where we are now, and then start from here */ + + if (invariant_head <= 0) { + d = dst; + } else { + Copy(s, dst, invariant_head, char); + d = dst + invariant_head; + } + + while (t < e) { + const UV uv = NATIVE8_TO_UNI(*t++); + if (UNI_IS_INVARIANT(uv)) + *d++ = (U8)UNI_TO_NATIVE(uv); + else { + *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); + *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); + } + } + *d = '\0'; + SvPV_free(sv); /* No longer using pre-existing string */ + SvPV_set(sv, (char*)dst); + SvCUR_set(sv, d - dst); + SvLEN_set(sv, size); + } else { + + /* Here, have decided to get the exact size of the string. + * Currently this happens only when we know that there is + * guaranteed enough space to fit the converted string, so + * don't have to worry about growing. If two_byte_count is 0, + * then t points to the first byte of the string which hasn't + * been examined yet. Otherwise two_byte_count is 1, and t + * points to the first byte in the string that will expand to + * two. Depending on this, start examining at t or 1 after t. + * */ + + U8 *d = t + two_byte_count; + + + /* Count up the remaining bytes that expand to two */ + + while (d < e) { + const U8 chr = *d++; + if (! NATIVE_IS_INVARIANT(chr)) two_byte_count++; + } + + /* The string will expand by just the number of bytes that + * occupy two positions. But we are one afterwards because of + * the increment just above. This is the place to put the + * trailing NUL, and to set the length before we decrement */ + + d += two_byte_count; + SvCUR_set(sv, d - s); + *d-- = '\0'; + + + /* Having decremented d, it points to the position to put the + * very last byte of the expanded string. Go backwards through + * the string, copying and expanding as we go, stopping when we + * get to the part that is invariant the rest of the way down */ + + e--; + while (e >= t) { + const U8 ch = NATIVE8_TO_UNI(*e--); + if (UNI_IS_INVARIANT(ch)) { + *d-- = UNI_TO_NATIVE(ch); + } else { + *d-- = (U8)UTF8_EIGHT_BIT_LO(ch); + *d-- = (U8)UTF8_EIGHT_BIT_HI(ch); + } + } + } + } } + + /* Mark as UTF-8 even if no variant - saves scanning loop */ + SvUTF8_on(sv); return SvCUR(sv); } @@ -3229,7 +3442,8 @@ Perl_sv_utf8_upgrade_flags(pTHX_ register SV *const sv, const I32 flags) =for apidoc sv_utf8_downgrade Attempts to convert the PV of an SV from characters to bytes. -If the PV contains a character beyond byte, this conversion will fail; +If the PV contains a character that cannot fit +in a byte, this conversion will fail; in this case, either returns false or, if C is not true, croaks. @@ -3291,7 +3505,7 @@ Perl_sv_utf8_encode(pTHX_ register SV *const sv) sv_force_normal_flags(sv, 0); } if (SvREADONLY(sv)) { - Perl_croak(aTHX_ PL_no_modify); + Perl_croak(aTHX_ "%s", PL_no_modify); } (void) sv_utf8_upgrade(sv); SvUTF8_off(sv); @@ -3403,17 +3617,11 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) GvSTASH(dstr) = GvSTASH(sstr); if (GvSTASH(dstr)) Perl_sv_add_backref(aTHX_ MUTABLE_SV(GvSTASH(dstr)), dstr); - gv_name_set((GV *)dstr, name, len, GV_ADD); + gv_name_set(MUTABLE_GV(dstr), name, len, GV_ADD); SvFAKE_on(dstr); /* can coerce to non-glob */ } -#ifdef GV_UNIQUE_CHECK - if (GvUNIQUE((GV*)dstr)) { - Perl_croak(aTHX_ PL_no_modify); - } -#endif - - if(GvGP((GV*)sstr)) { + if(GvGP(MUTABLE_GV(sstr))) { /* If source has method cache entry, clear it */ if(GvCVGEN(sstr)) { SvREFCNT_dec(GvCV(sstr)); @@ -3422,20 +3630,20 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) } /* If source has a real method, then a method is going to change */ - else if(GvCV((GV*)sstr)) { + else if(GvCV((const GV *)sstr)) { mro_changes = 1; } } /* If dest already had a real method, that's a change as well */ - if(!mro_changes && GvGP((GV*)dstr) && GvCVu((GV*)dstr)) { + if(!mro_changes && GvGP(MUTABLE_GV(dstr)) && GvCVu((const GV *)dstr)) { mro_changes = 1; } - if(strEQ(GvNAME((GV*)dstr),"ISA")) + if(strEQ(GvNAME((const GV *)dstr),"ISA")) mro_changes = 2; - gp_free((GV*)dstr); + gp_free(MUTABLE_GV(dstr)); isGV_with_GP_off(dstr); (void)SvOK_off(dstr); isGV_with_GP_on(dstr); @@ -3466,16 +3674,10 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr) PERL_ARGS_ASSERT_GLOB_ASSIGN_REF; -#ifdef GV_UNIQUE_CHECK - if (GvUNIQUE((GV*)dstr)) { - Perl_croak(aTHX_ PL_no_modify); - } -#endif - if (intro) { GvINTRO_off(dstr); /* one-shot flag */ GvLINE(dstr) = CopLINE(PL_curcop); - GvEGV(dstr) = (GV*)dstr; + GvEGV(dstr) = MUTABLE_GV(dstr); } GvMULTI_on(dstr); switch (stype) { @@ -3516,7 +3718,7 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr) if (stype == SVt_PVCV && (*location != sref || GvCVGEN(dstr))) { CV* const cv = MUTABLE_CV(*location); if (cv) { - if (!GvCVGEN((GV*)dstr) && + if (!GvCVGEN((const GV *)dstr) && (CvROOT(cv) || CvXSUB(cv))) { /* Redefining a sub - warning is mandatory if @@ -3543,12 +3745,12 @@ S_glob_assign_ref(pTHX_ SV *const dstr, SV *const sstr) (CvCONST(cv) ? "Constant subroutine %s::%s redefined" : "Subroutine %s::%s redefined"), - HvNAME_get(GvSTASH((GV*)dstr)), - GvENAME((GV*)dstr)); + HvNAME_get(GvSTASH((const GV *)dstr)), + GvENAME(MUTABLE_GV(dstr))); } } if (!intro) - cv_ckproto_len(cv, (GV*)dstr, + cv_ckproto_len(cv, (const GV *)dstr, SvPOK(sref) ? SvPVX_const(sref) : NULL, SvPOK(sref) ? SvCUR(sref) : 0); } @@ -3798,7 +4000,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags) GV *gv = gv_fetchsv(sstr, GV_ADD, SVt_PVGV); if (dstr != (const SV *)gv) { if (GvGP(dstr)) - gp_free((GV*)dstr); + gp_free(MUTABLE_GV(dstr)); GvGP(dstr) = gp_ref(GvGP(gv)); } } @@ -3860,7 +4062,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags) && ((flags & SV_COW_SHARED_HASH_KEYS) ? (!((sflags & CAN_COW_MASK) == CAN_COW_FLAGS && (SvFLAGS(dstr) & CAN_COW_MASK) == CAN_COW_FLAGS - && SvTYPE(sstr) >= SVt_PVIV)) + && SvTYPE(sstr) >= SVt_PVIV && SvTYPE(sstr) != SVt_PVFM)) : 1) #endif ) { @@ -3883,12 +4085,6 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags) } #ifdef PERL_OLD_COPY_ON_WRITE if (!isSwipe) { - /* I believe I should acquire a global SV mutex if - it's a COW sv (not a shared hash key) to stop - it going un copy-on-write. - If the source SV has gone un copy on write between up there - and down here, then (assert() that) it is of the correct - form to make it copy on write again */ if ((sflags & (SVf_FAKE | SVf_READONLY)) != (SVf_FAKE | SVf_READONLY)) { SvREADONLY_on(sstr); @@ -3931,7 +4127,6 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags) SvCUR_set(dstr, cur); SvREADONLY_on(dstr); SvFAKE_on(dstr); - /* Relesase a global SV mutex. */ } else { /* Passes the swipe test. */ @@ -3987,7 +4182,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV* sstr, const I32 flags) /* FAKE globs can get coerced, so need to turn this off temporarily if it is on. */ SvFAKE_off(sstr); - gv_efullname3(dstr, (GV *)sstr, "*"); + gv_efullname3(dstr, MUTABLE_GV(sstr), "*"); SvFLAGS(sstr) |= wasfake; } else @@ -4335,7 +4530,6 @@ Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags) #ifdef PERL_OLD_COPY_ON_WRITE if (SvREADONLY(sv)) { - /* At this point I believe I should acquire a global SV mutex. */ if (SvFAKE(sv)) { const char * const pvx = SvPVX_const(sv); const STRLEN len = SvLEN(sv); @@ -4375,8 +4569,7 @@ Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags) } } else if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); - /* At this point I believe that I can drop the global SV mutex. */ + Perl_croak(aTHX_ "%s", PL_no_modify); } #else if (SvREADONLY(sv)) { @@ -4393,7 +4586,7 @@ Perl_sv_force_normal_flags(pTHX_ register SV *const sv, const U32 flags) unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); } else if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); + Perl_croak(aTHX_ "%s", PL_no_modify); } #endif if (SvROK(sv)) @@ -4582,7 +4775,8 @@ Perl_sv_catsv_flags(pTHX_ SV *const dsv, register SV *const ssv, const I32 flags spv = SvPV_const(csv, slen); } else - sv_utf8_upgrade_nomg(dsv); + /* Leave enough space for the cat that's about to happen */ + sv_utf8_upgrade_flags_grow(dsv, 0, slen); } sv_catpvn_nomg(dsv, spv, slen); } @@ -4806,7 +5000,7 @@ Perl_sv_magic(pTHX_ register SV *const sv, SV *const obj, const int how, && how != PERL_MAGIC_backref ) { - Perl_croak(aTHX_ PL_no_modify); + Perl_croak(aTHX_ "%s", PL_no_modify); } } if (SvMAGICAL(sv) || (how == PERL_MAGIC_taint && SvTYPE(sv) >= SVt_PVMG)) { @@ -5440,7 +5634,14 @@ Perl_sv_clear(pTHX_ register SV *const sv) CV* destructor; stash = SvSTASH(sv); destructor = StashHANDLER(stash,DESTROY); - if (destructor) { + if (destructor + /* A constant subroutine can have no side effects, so + don't bother calling it. */ + && !CvCONST(destructor) + /* Don't bother calling an empty destructor */ + && (CvISXSUB(destructor) + || CvSTART(destructor)->op_next->op_type != OP_LEAVESUB)) + { SV* const tmpref = newRV(sv); SvREADONLY_on(tmpref); /* DESTROY() could be naughty */ ENTER; @@ -5539,9 +5740,10 @@ Perl_sv_clear(pTHX_ register SV *const sv) SvREFCNT_dec(LvTARG(sv)); case SVt_PVGV: if (isGV_with_GP(sv)) { - if(GvCVu((GV*)sv) && (stash = GvSTASH((GV*)sv)) && HvNAME_get(stash)) + if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv))) + && HvNAME_get(stash)) mro_method_changed_in(stash); - gp_free((GV*)sv); + gp_free(MUTABLE_GV(sv)); if (GvNAME_HEK(sv)) unshare_hek(GvNAME_HEK(sv)); /* If we're in a stash, we don't own a reference to it. However it does @@ -5552,7 +5754,7 @@ Perl_sv_clear(pTHX_ register SV *const sv) /* FIXME. There are probably more unreferenced pointers to SVs in the interpreter struct that we should check and tidy in a similar fashion to this: */ - if ((GV*)sv == PL_last_in_gv) + if ((const GV *)sv == PL_last_in_gv) PL_last_in_gv = NULL; case SVt_PVMG: case SVt_PVNV: @@ -5576,8 +5778,6 @@ Perl_sv_clear(pTHX_ register SV *const sv) #ifdef PERL_OLD_COPY_ON_WRITE else if (SvPVX_const(sv)) { if (SvIsCOW(sv)) { - /* I believe I need to grab the global SV mutex here and - then recheck the COW status. */ if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Copy on write: clear\n"); sv_dump(sv); @@ -5588,7 +5788,6 @@ Perl_sv_clear(pTHX_ register SV *const sv) unshare_hek(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))); } - /* And drop it here. */ SvFAKE_off(sv); } else if (SvLEN(sv)) { Safefree(SvPVX_const(sv)); @@ -5756,7 +5955,7 @@ UTF-8 bytes as a single character. Handles magic and type coercion. */ /* - * The length is cached in PERL_UTF8_magic, in the mg_len field. Also the + * The length is cached in PERL_MAGIC_utf8, in the mg_len field. Also the * mg_ptr is used, by sv_pos_u2b() and sv_pos_b2u() - see the comments below. * (Note that the mg_len is not the length of the mg_ptr field. * This allows the cache to store the character length of the string without @@ -5985,7 +6184,7 @@ type coercion. /* * sv_pos_u2b() uses, like sv_pos_b2u(), the mg_ptr of the potential - * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and + * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and * byte offsets. See also the comments of S_utf8_mg_pos_cache_update(). * */ @@ -6228,7 +6427,7 @@ Handles magic and type coercion. /* * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential - * PERL_UTF8_magic of the sv to store the mapping between UTF-8 and + * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and * byte offsets. * */ @@ -7023,7 +7222,7 @@ Perl_sv_inc(pTHX_ register SV *const sv) sv_force_normal_flags(sv, 0); if (SvREADONLY(sv)) { if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); + Perl_croak(aTHX_ "%s", PL_no_modify); } if (SvROK(sv)) { IV i; @@ -7186,7 +7385,7 @@ Perl_sv_dec(pTHX_ register SV *const sv) sv_force_normal_flags(sv, 0); if (SvREADONLY(sv)) { if (IN_PERL_RUNTIME) - Perl_croak(aTHX_ PL_no_modify); + Perl_croak(aTHX_ "%s", PL_no_modify); } if (SvROK(sv)) { IV i; @@ -7836,7 +8035,7 @@ Perl_sv_reset(pTHX_ register const char *s, HV *const stash) if (!todo[(U8)*HeKEY(entry)]) continue; - gv = (GV*)HeVAL(entry); + gv = MUTABLE_GV(HeVAL(entry)); sv = GvSV(gv); if (sv) { if (SvTHINKFIRST(sv)) { @@ -7897,7 +8096,7 @@ Perl_sv_2io(pTHX_ SV *const sv) break; case SVt_PVGV: if (isGV_with_GP(sv)) { - gv = (GV*)sv; + gv = MUTABLE_GV(sv); io = GvIO(gv); if (!io) Perl_croak(aTHX_ "Bad filehandle: %s", GvNAME(gv)); @@ -7957,7 +8156,7 @@ Perl_sv_2cv(pTHX_ SV *sv, HV **const st, GV **const gvp, const I32 lref) return NULL; case SVt_PVGV: if (isGV_with_GP(sv)) { - gv = (GV*)sv; + gv = MUTABLE_GV(sv); *gvp = gv; *st = GvESTASH(gv); goto fix_gv; @@ -7978,13 +8177,13 @@ Perl_sv_2cv(pTHX_ SV *sv, HV **const st, GV **const gvp, const I32 lref) return cv; } else if(isGV_with_GP(sv)) - gv = (GV*)sv; + gv = MUTABLE_GV(sv); else Perl_croak(aTHX_ "Not a subroutine reference"); } else if (isGV_with_GP(sv)) { SvGETMAGIC(sv); - gv = (GV*)sv; + gv = MUTABLE_GV(sv); } else gv = gv_fetchsv(sv, lref, SVt_PVCV); /* Calls get magic */ @@ -8472,7 +8671,7 @@ Perl_sv_bless(pTHX_ SV *const sv, HV *const stash) if (SvIsCOW(tmpRef)) sv_force_normal_flags(tmpRef, 0); if (SvREADONLY(tmpRef)) - Perl_croak(aTHX_ PL_no_modify); + Perl_croak(aTHX_ "%s", PL_no_modify); if (SvOBJECT(tmpRef)) { if (SvTYPE(tmpRef) != SVt_PVIO) --PL_sv_objcount; @@ -8514,12 +8713,13 @@ S_sv_unglob(pTHX_ SV *const sv) assert(SvTYPE(sv) == SVt_PVGV); SvFAKE_off(sv); - gv_efullname3(temp, (GV *) sv, "*"); + gv_efullname3(temp, MUTABLE_GV(sv), "*"); if (GvGP(sv)) { - if(GvCVu((GV*)sv) && (stash = GvSTASH((GV*)sv)) && HvNAME_get(stash)) + if(GvCVu((const GV *)sv) && (stash = GvSTASH(MUTABLE_GV(sv))) + && HvNAME_get(stash)) mro_method_changed_in(stash); - gp_free((GV*)sv); + gp_free(MUTABLE_GV(sv)); } if (GvSTASH(sv)) { sv_del_backref(MUTABLE_SV(GvSTASH(sv)), sv); @@ -9111,6 +9311,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, STRLEN esignlen = 0; const char *eptr = NULL; + const char *fmtstart; STRLEN elen = 0; SV *vecsv = NULL; const U8 *vecstr = NULL; @@ -9151,6 +9352,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, if (q++ >= patend) break; + fmtstart = q; + /* We allow format specification elements in this order: \d+\$ explicit format parameter index @@ -9467,12 +9670,6 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, if (args) { eptr = va_arg(*args, char*); if (eptr) -#ifdef MACOS_TRADITIONAL - /* On MacOS, %#s format is used for Pascal strings */ - if (alt) - elen = *eptr++; - else -#endif elen = strlen(eptr); else { eptr = (char *)nullstr; @@ -9546,8 +9743,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, case 'l': iv = va_arg(*args, long); break; case 'V': iv = va_arg(*args, IV); break; default: iv = va_arg(*args, int); break; + case 'q': #ifdef HAS_QUAD - case 'q': iv = va_arg(*args, Quad_t); break; + iv = va_arg(*args, Quad_t); break; +#else + goto unknown; #endif } } @@ -9558,8 +9758,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, case 'l': iv = (long)tiv; break; case 'V': default: iv = tiv; break; + case 'q': #ifdef HAS_QUAD - case 'q': iv = (Quad_t)tiv; break; + iv = (Quad_t)tiv; break; +#else + goto unknown; #endif } } @@ -9631,8 +9834,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, case 'l': uv = va_arg(*args, unsigned long); break; case 'V': uv = va_arg(*args, UV); break; default: uv = va_arg(*args, unsigned); break; + case 'q': #ifdef HAS_QUAD - case 'q': uv = va_arg(*args, Uquad_t); break; + uv = va_arg(*args, Uquad_t); break; +#else + goto unknown; #endif } } @@ -9643,8 +9849,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, case 'l': uv = (unsigned long)tuv; break; case 'V': default: uv = tuv; break; + case 'q': #ifdef HAS_QUAD - case 'q': uv = (Uquad_t)tuv; break; + uv = (Uquad_t)tuv; break; +#else + goto unknown; #endif } } @@ -9930,8 +10139,11 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, default: *(va_arg(*args, int*)) = i; break; case 'l': *(va_arg(*args, long*)) = i; break; case 'V': *(va_arg(*args, IV*)) = i; break; + case 'q': #ifdef HAS_QUAD - case 'q': *(va_arg(*args, Quad_t*)) = i; break; + *(va_arg(*args, Quad_t*)) = i; break; +#else + goto unknown; #endif } } @@ -9950,16 +10162,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, SV * const msg = sv_newmortal(); Perl_sv_setpvf(aTHX_ msg, "Invalid conversion in %sprintf: ", (PL_op->op_type == OP_PRTF) ? "" : "s"); - if (c) { - if (isPRINT(c)) - Perl_sv_catpvf(aTHX_ msg, - "\"%%%c\"", c & 0xFF); - else - Perl_sv_catpvf(aTHX_ msg, - "\"%%\\%03"UVof"\"", - (UV)c & 0xFF); - } else + if (fmtstart < patend) { + const char * const fmtend = q < patend ? q : patend; + const char * f; + sv_catpvs(msg, "\"%"); + for (f = fmtstart; f < fmtend; f++) { + if (isPRINT(*f)) { + sv_catpvn(msg, f, 1); + } else { + Perl_sv_catpvf(aTHX_ msg, + "\\%03"UVof, (UV)*f & 0xFF); + } + } + sv_catpvs(msg, "\""); + } else { sv_catpvs(msg, "end of string"); + } Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%"SVf, SVfARG(msg)); /* yes, this is reentrant */ } @@ -10001,13 +10219,13 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, have = esignlen + zeros + elen; if (have < zeros) - Perl_croak_nocontext(PL_memory_wrap); + Perl_croak_nocontext("%s", PL_memory_wrap); need = (have > width ? have : width); gap = need - have; if (need >= (((STRLEN)~0) - SvCUR(sv) - dotstrlen - 1)) - Perl_croak_nocontext(PL_memory_wrap); + Perl_croak_nocontext("%s", PL_memory_wrap); SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1); p = SvEND(sv); if (esignlen && fill == '0') { @@ -10072,7 +10290,7 @@ ptr_table_* functions. =cut -============================================================================*/ + * =========================================================================*/ #if defined(USE_ITHREADS) @@ -10096,8 +10314,8 @@ ptr_table_* functions. #define cv_dup_inc(s,t) MUTABLE_CV(SvREFCNT_inc(sv_dup((const SV *)s,t))) #define io_dup(s,t) MUTABLE_IO(sv_dup((const SV *)s,t)) #define io_dup_inc(s,t) MUTABLE_IO(SvREFCNT_inc(sv_dup((const SV *)s,t))) -#define gv_dup(s,t) (GV*)sv_dup((const SV *)s,t) -#define gv_dup_inc(s,t) (GV*)SvREFCNT_inc(sv_dup((const SV *)s,t)) +#define gv_dup(s,t) MUTABLE_GV(sv_dup((const SV *)s,t)) +#define gv_dup_inc(s,t) MUTABLE_GV(SvREFCNT_inc(sv_dup((const SV *)s,t))) #define SAVEPV(p) ((p) ? savepv(p) : NULL) #define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL) @@ -10527,8 +10745,8 @@ Perl_rvpv_dup(pTHX_ SV *const dstr, const SV *const sstr, CLONE_PARAMS *const pa if (SvROK(sstr)) { SvRV_set(dstr, SvWEAKREF(sstr) - ? sv_dup(SvRV(sstr), param) - : sv_dup_inc(SvRV(sstr), param)); + ? sv_dup(SvRV_const(sstr), param) + : sv_dup_inc(SvRV_const(sstr), param)); } else if (SvPVX_const(sstr)) { @@ -10556,7 +10774,7 @@ Perl_rvpv_dup(pTHX_ SV *const dstr, const SV *const sstr, CLONE_PARAMS *const pa } else { /* Some other special case - random pointer */ - SvPV_set(dstr, SvPVX(sstr)); + SvPV_set(dstr, (char *) SvPVX_const(sstr)); } } } @@ -10661,9 +10879,6 @@ Perl_sv_dup(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) break; case SVt_PVGV: - if (GvUNIQUE((GV*)sstr)) { - NOOP; /* Do sharing here, and fall through */ - } case SVt_PVIO: case SVt_PVFM: case SVt_PVHV: @@ -10782,7 +10997,8 @@ Perl_sv_dup(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) IoBOTTOM_NAME(dstr) = SAVEPV(IoBOTTOM_NAME(dstr)); break; case SVt_PVAV: - if (AvARRAY((const AV *)sstr)) { + /* avoid cloning an empty array */ + if (AvARRAY((const AV *)sstr) && AvFILLp((const AV *)sstr) >= 0) { SV **dst_ary, **src_ary; SSize_t items = AvFILLp((const AV *)sstr) + 1; @@ -10807,6 +11023,8 @@ Perl_sv_dup(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) else { AvARRAY(MUTABLE_AV(dstr)) = NULL; AvALLOC((const AV *)dstr) = (SV**)NULL; + AvMAX( (const AV *)dstr) = -1; + AvFILLp((const AV *)dstr) = -1; } break; case SVt_PVHV: @@ -10872,8 +11090,7 @@ Perl_sv_dup(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) CvROOT(dstr) = OpREFCNT_inc(CvROOT(dstr)); OP_REFCNT_UNLOCK; if (CvCONST(dstr) && CvISXSUB(dstr)) { - CvXSUBANY(dstr).any_ptr = GvUNIQUE(CvGV(dstr)) ? - SvREFCNT_inc(CvXSUBANY(dstr).any_ptr) : + CvXSUBANY(dstr).any_ptr = sv_dup_inc((const SV *)CvXSUBANY(dstr).any_ptr, param); } /* don't dup if copying back - CvGV isn't refcounted, so the @@ -10965,7 +11182,8 @@ Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param) ncx->blk_loop.oldcomppad); } else { ncx->blk_loop.oldcomppad - = (PAD*)gv_dup((GV*)ncx->blk_loop.oldcomppad, param); + = (PAD*)gv_dup((const GV *)ncx->blk_loop.oldcomppad, + param); } break; case CXt_FORMAT: @@ -11189,7 +11407,7 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) gp = (GP*)POPPTR(ss,ix); TOPPTR(nss,ix) = gp = gp_dup(gp, param); (void)GpREFCNT_inc(gp); - gv = (GV*)POPPTR(ss,ix); + gv = (const GV *)POPPTR(ss,ix); TOPPTR(nss,ix) = gv_dup_inc(gv, param); break; case SAVEt_FREEOP: @@ -11218,16 +11436,16 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) else TOPPTR(nss,ix) = NULL; break; - case SAVEt_FREEPV: - c = (char*)POPPTR(ss,ix); - TOPPTR(nss,ix) = pv_dup_inc(c); - break; case SAVEt_DELETE: hv = (const HV *)POPPTR(ss,ix); TOPPTR(nss,ix) = hv_dup_inc(hv, param); + i = POPINT(ss,ix); + TOPINT(nss,ix) = i; + /* Fall through */ + case SAVEt_FREEPV: c = (char*)POPPTR(ss,ix); TOPPTR(nss,ix) = pv_dup_inc(c); - /* fall through */ + break; case SAVEt_STACK_POS: /* Position on Perl stack */ i = POPINT(ss,ix); TOPINT(nss,ix) = i; @@ -11267,8 +11485,6 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) TOPPTR(nss,ix) = ptr; break; case SAVEt_HINTS: - i = POPINT(ss,ix); - TOPINT(nss,ix) = i; ptr = POPPTR(ss,ix); if (ptr) { HINTS_REFCNT_LOCK; @@ -11276,6 +11492,8 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) HINTS_REFCNT_UNLOCK; } TOPPTR(nss,ix) = ptr; + i = POPINT(ss,ix); + TOPINT(nss,ix) = i; if (i & HINT_LOCALIZE_HH) { hv = (const HV *)POPPTR(ss,ix); TOPPTR(nss,ix) = hv_dup_inc(hv, param); @@ -11725,6 +11943,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_regex_pad = AvARRAY(PL_regex_padav); /* shortcuts to various I/O objects */ + PL_ofsgv = gv_dup(proto_perl->Iofsgv, param); PL_stdingv = gv_dup(proto_perl->Istdingv, param); PL_stderrgv = gv_dup(proto_perl->Istderrgv, param); PL_defgv = gv_dup(proto_perl->Idefgv, param); @@ -12071,7 +12290,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_curpm = proto_perl->Icurpm; /* XXX No PMOP ref count */ PL_rs = sv_dup_inc(proto_perl->Irs, param); PL_last_in_gv = gv_dup(proto_perl->Ilast_in_gv, param); - PL_ofs_sv = sv_dup_inc(proto_perl->Iofs_sv, param); PL_defoutgv = gv_dup_inc(proto_perl->Idefoutgv, param); PL_chopset = proto_perl->Ichopset; /* XXX never deallocated */ PL_toptarget = sv_dup_inc(proto_perl->Itoptarget, param); @@ -12126,6 +12344,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PTR2UV(PL_watchok)); } + PL_registered_mros = hv_dup_inc(proto_perl->Iregistered_mros, param); + if (!(flags & CLONEf_KEEP_PTR_TABLE)) { ptr_table_free(PL_ptr_table); PL_ptr_table = NULL; @@ -12835,7 +13055,7 @@ Print appropriate "Use of uninitialized variable" warning */ void -Perl_report_uninit(pTHX_ SV* uninit_sv) +Perl_report_uninit(pTHX_ const SV *uninit_sv) { dVAR; if (PL_op) {