X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/6f3cc0ab249d90a03504c6915cff26c58d008b2b..e1aa2579a2006b68a3715befb8faa75a98bfb6cd:/sv.c diff --git a/sv.c b/sv.c index 75447d4..fa5295d 100644 --- a/sv.c +++ b/sv.c @@ -1462,9 +1462,7 @@ Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type) SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv)))); IoPAGE_LEN(sv) = 60; } - if (UNLIKELY(new_type == SVt_REGEXP)) - sv->sv_u.svu_rx = (regexp *)new_body; - else if (old_type < SVt_PV) { + if (old_type < SVt_PV) { /* referent will be NULL unless the old type was SVt_IV emulating SVt_RV */ sv->sv_u.svu_rv = referent; @@ -1647,6 +1645,7 @@ Perl_sv_setiv(pTHX_ SV *const sv, const IV i) case SVt_PVGV: if (!isGV_with_GP(sv)) break; + /* FALLTHROUGH */ case SVt_PVAV: case SVt_PVHV: case SVt_PVCV: @@ -1760,6 +1759,7 @@ Perl_sv_setnv(pTHX_ SV *const sv, const NV num) case SVt_PVGV: if (!isGV_with_GP(sv)) break; + /* FALLTHROUGH */ case SVt_PVAV: case SVt_PVHV: case SVt_PVCV: @@ -2463,7 +2463,7 @@ Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags) Regexps have no SvIVX and SvNVX fields. */ - assert(isREGEXP(sv) || SvPOKp(sv)); + assert(SvPOKp(sv)); { UV value; const char * const ptr = @@ -2551,7 +2551,7 @@ Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags) /* FBMs use the space for SvIVX and SvNVX for other purposes, and use the same flag bit as SVf_IVisUV, so must not let them cache IVs. Regexps have no SvIVX and SvNVX fields. */ - assert(isREGEXP(sv) || SvPOKp(sv)); + assert(SvPOKp(sv)); { UV value; const char * const ptr = @@ -2627,7 +2627,6 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) return SvNVX(sv); if (SvPOKp(sv) && !SvIOKp(sv)) { ptr = SvPVX_const(sv); - grokpv: if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) && !grok_number(ptr, SvCUR(sv), NULL)) not_a_number(sv); @@ -2642,10 +2641,6 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) if (SvROK(sv)) { goto return_rok; } - if (isREGEXP(sv)) { - ptr = RX_WRAPPED((REGEXP *)sv); - goto grokpv; - } assert(SvTYPE(sv) >= SVt_PVMG); /* This falls through to the report_uninit near the end of the function. */ @@ -2673,11 +2668,12 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */ sv_upgrade(sv, SVt_NV); DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + STORE_LC_NUMERIC_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%" UVxf " num(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); + RESTORE_LC_NUMERIC(); }); } else if (SvTYPE(sv) < SVt_PVNV) @@ -2814,10 +2810,11 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) return 0.0; } DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + STORE_LC_NUMERIC_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); + RESTORE_LC_NUMERIC(); }); return SvNVX(sv); } @@ -3146,7 +3143,7 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) DECLARATION_FOR_LC_NUMERIC_MANIPULATION; STORE_LC_NUMERIC_SET_TO_NEEDED(); - local_radix = PL_numeric_local && PL_numeric_radix_sv; + local_radix = _NOT_IN_NUMERIC_STANDARD; if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) { size += SvCUR(PL_numeric_radix_sv) - 1; s = SvGROW_mutable(sv, size); @@ -3191,10 +3188,6 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) *lp = SvCUR(buffer); return SvPVX(buffer); } - else if (isREGEXP(sv)) { - if (lp) *lp = RX_WRAPLEN((REGEXP *)sv); - return RX_WRAPPED((REGEXP *)sv); - } else { if (lp) *lp = 0; @@ -3371,11 +3364,16 @@ Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags) return cBOOL(svb); } } - return SvRV(sv) != 0; + assert(SvRV(sv)); + return TRUE; } if (isREGEXP(sv)) return RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0'); + + if (SvNOK(sv) && !SvPOK(sv)) + return SvNVX(sv) != 0.0; + return SvTRUE_common(sv, isGV_with_GP(sv) ? 1 : 0); } @@ -3405,11 +3403,7 @@ if all the bytes are invariant in UTF-8. If C has C bit set, will C on C if appropriate, else not. -If C has C set, this function assumes that the PV -will expand when converted to UTF-8, and skips the extra work of checking for -that. Typically this flag is used by a routine that has already parsed the -string and found such characters, and passes this information on so that the -work doesn't have to be repeated. +The C flag is now ignored. Returns the number of bytes in the converted string. @@ -3430,22 +3424,10 @@ Returns the number of bytes in the converted string (not including the spares). =cut -(One might think that the calling routine could pass in the position of the -first variant character when it has set SV_FORCE_UTF8_UPGRADE, so it wouldn't -have to be found again. But that is not the case, because typically when the -caller is likely to use this flag, it won't be calling this routine unless it -finds something that won't fit into a byte. Otherwise it tries to not upgrade -and just use bytes. But some things that do fit into a byte are variants in -utf8, and the caller may not have been keeping track of these.) - If the routine itself changes the string, it adds a trailing C. Such a C isn't guaranteed due to having other routines do the work in some input cases, or if the input is already flagged as being in utf8. -The speed of this could perhaps be improved for many cases if someone wanted to -write a fast function that counts the number of variant characters in a string, -especially if it could return the position of the first one. - */ STRLEN @@ -3468,7 +3450,12 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr } } - if (SvUTF8(sv)) { + /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already + * compiled and individual nodes will remain non-utf8 even if the + * stringified version of the pattern gets upgraded. Whether the + * PVX of a REGEXP should be grown or we should just croak, I don't + * know - DAPM */ + if (SvUTF8(sv) || isREGEXP(sv)) { if (extra) SvGROW(sv, SvCUR(sv) + extra); return SvCUR(sv); } @@ -3483,185 +3470,96 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr /* This function could be much more efficient if we * had a FLAG in SVs to signal if there are any variant * chars in the PV. Given that there isn't such a flag - * make the loop as fast as possible (although there are certainly ways - * to speed this up, eg. through vectorization) */ + * make the loop as fast as possible. */ U8 * s = (U8 *) SvPVX_const(sv); - U8 * e = (U8 *) SvEND(sv); U8 *t = s; - STRLEN two_byte_count = 0; - if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8; - - /* See if really will need to convert to utf8. We mustn't rely on our - * incoming SV being well formed and having a trailing '\0', as certain - * code in pp_formline can send us partially built SVs. */ + if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) { - while (t < e) { - const U8 ch = *t++; - if (NATIVE_BYTE_IS_INVARIANT(ch)) continue; - - t--; /* t already incremented; re-point to first variant */ - two_byte_count = 1; - goto must_be_utf8; - } - - /* utf8 conversion not needed because all are invariants. Mark as - * UTF-8 even if no variant - saves scanning loop */ - SvUTF8_on(sv); - if (extra) SvGROW(sv, SvCUR(sv) + extra); - return SvCUR(sv); - - must_be_utf8: + /* utf8 conversion not needed because all are invariants. Mark + * as UTF-8 even if no variant - saves scanning loop */ + SvUTF8_on(sv); + if (extra) SvGROW(sv, SvCUR(sv) + extra); + return SvCUR(sv); + } - /* Here, the string should be converted to utf8, either because of an - * input flag (two_byte_count = 0), or because a character that - * requires 2 bytes was found (two_byte_count = 1). t points either to - * the beginning of the string (if we didn't examine anything), or to - * the first variant. In either case, everything from s to t - 1 will - * occupy only 1 byte each on output. + /* Here, there is at least one variant (t points to the first one), so + * the string should be converted to utf8. Everything from 's' to + * 't - 1' will occupy only 1 byte each on output. + * + * Note that the incoming SV may not have a trailing '\0', as certain + * code in pp_formline can send us partially built SVs. * * There are two main ways to convert. One is to create a new string * and go through the input starting from the beginning, appending each - * converted value onto the new string as we go along. It's probably - * best to allocate enough space in the string for the worst possible - * case rather than possibly running out of space and having to - * reallocate and then copy what we've done so far. Since everything - * from s to t - 1 is invariant, the destination can be initialized - * with these using a fast memory copy + * converted value onto the new string as we go along. Going this + * route, it's probably best to initially allocate enough space in the + * string rather than possibly running out of space and having to + * reallocate and then copy what we've done so far. Since everything + * from 's' to 't - 1' is invariant, the destination can be initialized + * with these using a fast memory copy. To be sure to allocate enough + * space, one could use the worst case scenario, where every remaining + * byte expands to two under UTF-8, or one could parse it and count + * exactly how many do expand. * - * The other way is to figure out exactly how big the string should be - * by parsing the entire input. Then you don't have to make it big - * enough to handle the worst possible case, and more importantly, if - * the string you already have is large enough, you don't have to - * allocate a new string, you can copy the last character in the input - * string to the final position(s) that will be occupied by the - * converted string and go backwards, stopping at t, since everything - * before that is invariant. + * The other way is to unconditionally parse the remainder of the + * string to figure out exactly how big the expanded string will be, + * growing if needed. Then start at the end of the string and place + * the character there at the end of the unfilled space in the expanded + * one, working backwards until reaching 't'. * - * There are advantages and disadvantages to each method. - * - * In the first method, we can allocate a new string, do the memory - * copy from the s to t - 1, and then proceed through the rest of the - * string byte-by-byte. - * - * In the second method, we proceed through the rest of the input - * string just calculating how big the converted string will be. Then - * there are two cases: - * 1) if the string has enough extra space to handle the converted - * value. We go backwards through the string, converting until we - * get to the position we are at now, and then stop. If this - * position is far enough along in the string, this method is - * faster than the other method. If the memory copy were the same - * speed as the byte-by-byte loop, that position would be about - * half-way, as at the half-way mark, parsing to the end and back - * is one complete string's parse, the same amount as starting - * over and going all the way through. Actually, it would be - * somewhat less than half-way, as it's faster to just count bytes - * than to also copy, and we don't have the overhead of allocating - * a new string, changing the scalar to use it, and freeing the - * existing one. But if the memory copy is fast, the break-even - * point is somewhere after half way. The counting loop could be - * sped up by vectorization, etc, to move the break-even point - * further towards the beginning. - * 2) if the string doesn't have enough space to handle the converted - * value. A new string will have to be allocated, and one might - * as well, given that, start from the beginning doing the first - * method. We've spent extra time parsing the string and in - * exchange all we've gotten is that we know precisely how big to - * make the new one. Perl is more optimized for time than space, - * so this case is a loser. - * So what I've decided to do is not use the 2nd method unless it is - * guaranteed that a new string won't have to be allocated, assuming - * the worst case. I also decided not to put any more conditions on it - * than this, for now. It seems likely that, since the worst case is - * twice as big as the unknown portion of the string (plus 1), we won't - * be guaranteed enough space, causing us to go to the first method, - * unless the string is short, or the first variant character is near - * the end of it. In either of these cases, it seems best to use the - * 2nd method. The only circumstance I can think of where this would - * be really slower is if the string had once had much more data in it - * than it does now, but there is still a substantial amount in it */ + * The problem with assuming the worst case scenario is that for very + * long strings, we could allocate much more memory than actually + * needed, which can create performance problems. If we have to parse + * anyway, the second method is the winner as it may avoid an extra + * copy. The code used to use the first method under some + * circumstances, but now that there is faster variant counting on + * ASCII platforms, the second method is used exclusively, eliminating + * some code that no longer has to be maintained. */ { - STRLEN invariant_head = t - s; - STRLEN size = invariant_head + (e - t) * 2 + 1 + extra; - if (SvLEN(sv) < size) { - - /* Here, have decided to allocate a new string */ - - U8 *dst; - U8 *d; - - Newx(dst, size, U8); - - /* If no known invariants at the beginning of the input string, - * set so starts from there. Otherwise, can use memory copy to - * get up to where we are now, and then start from here */ - - if (invariant_head == 0) { - d = dst; - } else { - Copy(s, dst, invariant_head, char); - d = dst + invariant_head; - } - - while (t < e) { - append_utf8_from_native_byte(*t, &d); - t++; - } - *d = '\0'; - SvPV_free(sv); /* No longer using pre-existing string */ - SvPV_set(sv, (char*)dst); - SvCUR_set(sv, d - dst); - SvLEN_set(sv, size); - } else { - - /* Here, have decided to get the exact size of the string. - * Currently this happens only when we know that there is - * guaranteed enough space to fit the converted string, so - * don't have to worry about growing. If two_byte_count is 0, - * then t points to the first byte of the string which hasn't - * been examined yet. Otherwise two_byte_count is 1, and t - * points to the first byte in the string that will expand to - * two. Depending on this, start examining at t or 1 after t. - * */ - - U8 *d = t + two_byte_count; - - - /* Count up the remaining bytes that expand to two */ - - while (d < e) { - const U8 chr = *d++; - if (! NATIVE_BYTE_IS_INVARIANT(chr)) two_byte_count++; - } - - /* The string will expand by just the number of bytes that - * occupy two positions. But we are one afterwards because of - * the increment just above. This is the place to put the - * trailing NUL, and to set the length before we decrement */ - - d += two_byte_count; - SvCUR_set(sv, d - s); - *d-- = '\0'; + /* Count the total number of variants there are. We can start + * just beyond the first one, which is known to be at 't' */ + const Size_t invariant_length = t - s; + U8 * e = (U8 *) SvEND(sv); + + /* The length of the left overs, plus 1. */ + const Size_t remaining_length_p1 = e - t; + + /* We expand by 1 for the variant at 't' and one for each remaining + * variant (we start looking at 't+1') */ + Size_t expansion = 1 + variant_under_utf8_count(t + 1, e); + + /* +1 = trailing NUL */ + Size_t need = SvCUR(sv) + expansion + extra + 1; + U8 * d; + + /* Grow if needed */ + if (SvLEN(sv) < need) { + t = invariant_length + (U8*) SvGROW(sv, need); + e = t + remaining_length_p1; + } + SvCUR_set(sv, invariant_length + remaining_length_p1 + expansion); + /* Set the NUL at the end */ + d = (U8 *) SvEND(sv); + *d-- = '\0'; - /* Having decremented d, it points to the position to put the - * very last byte of the expanded string. Go backwards through - * the string, copying and expanding as we go, stopping when we - * get to the part that is invariant the rest of the way down */ + /* Having decremented d, it points to the position to put the + * very last byte of the expanded string. Go backwards through + * the string, copying and expanding as we go, stopping when we + * get to the part that is invariant the rest of the way down */ - e--; - while (e >= t) { - if (NATIVE_BYTE_IS_INVARIANT(*e)) { - *d-- = *e; - } else { - *d-- = UTF8_EIGHT_BIT_LO(*e); - *d-- = UTF8_EIGHT_BIT_HI(*e); - } - e--; - } - } + e--; + while (e >= t) { + if (NATIVE_BYTE_IS_INVARIANT(*e)) { + *d-- = *e; + } else { + *d-- = UTF8_EIGHT_BIT_LO(*e); + *d-- = UTF8_EIGHT_BIT_HI(*e); + } + e--; + } if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { /* Update pos. We do it at the end rather than during @@ -3680,7 +3578,6 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr } } - /* Mark as UTF-8 even if no variant - saves scanning loop */ SvUTF8_on(sv); return SvCUR(sv); } @@ -3785,7 +3682,7 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) PERL_ARGS_ASSERT_SV_UTF8_DECODE; if (SvPOKp(sv)) { - const U8 *start, *c; + const U8 *start, *c, *first_variant; /* The octets may have got themselves encoded - get them back as * bytes @@ -3797,9 +3694,9 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) * we want to make sure everything inside is valid utf8 first. */ c = start = (const U8 *) SvPVX_const(sv); - if (!is_utf8_string(c, SvCUR(sv))) - return FALSE; - if (! is_utf8_invariant_string(c, SvCUR(sv))) { + if (! is_utf8_invariant_string_loc(c, SvCUR(sv), &first_variant)) { + if (!is_utf8_string(first_variant, SvCUR(sv) - (first_variant -c))) + return FALSE; SvUTF8_on(sv); } if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { @@ -3920,15 +3817,14 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) glob to begin with. */ if(dtype == SVt_PVGV) { const char * const name = GvNAME((const GV *)dstr); - if( - strEQ(name,"ISA") + const STRLEN len = GvNAMELEN(dstr); + if(memEQs(name, len, "ISA") /* The stash may have been detached from the symbol table, so check its name. */ && GvSTASH(dstr) && HvENAME(GvSTASH(dstr)) ) mro_changes = 2; else { - const STRLEN len = GvNAMELEN(dstr); if ((len > 1 && name[len-2] == ':' && name[len-1] == ':') || (len == 1 && name[0] == ':')) { mro_changes = 3; @@ -4141,7 +4037,7 @@ Perl_gv_setref(pTHX_ SV *const dstr, SV *const sstr) } else if ( stype == SVt_PVAV && sref != dref - && strEQ(GvNAME((GV*)dstr), "ISA") + && memEQs(GvNAME((GV*)dstr), GvNAMELEN((GV*)dstr), "ISA") /* The stash may have been detached from the symbol table, so check its name before doing anything. */ && GvSTASH(dstr) && HvENAME(GvSTASH(dstr)) @@ -4448,15 +4344,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) case SVt_REGEXP: upgregexp: if (dtype < SVt_REGEXP) - { - if (dtype >= SVt_PV) { - SvPV_free(dstr); - SvPV_set(dstr, 0); - SvLEN_set(dstr, 0); - SvCUR_set(dstr, 0); - } sv_upgrade(dstr, SVt_REGEXP); - } break; case SVt_INVLIST: @@ -4705,11 +4593,13 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) ) { /* Either it's a shared hash key, or it's suitable for copy-on-write. */ +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Copy on write: sstr --> dstr\n"); sv_dump(sstr); sv_dump(dstr); } +#endif #ifdef PERL_ANY_COW if (!(sflags & SVf_IsCOW)) { SvIsCOW_on(sstr); @@ -4883,7 +4773,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) #endif PERL_ARGS_ASSERT_SV_SETSV_COW; - +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", (void*)sstr, (void*)dstr); @@ -4891,7 +4781,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) if (dstr) sv_dump(dstr); } - +#endif if (dstr) { if (SvTHINKFIRST(dstr)) sv_force_normal_flags(dstr, SV_COW_DROP_PV); @@ -4938,9 +4828,10 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) SvUTF8_on(dstr); SvLEN_set(dstr, len); SvCUR_set(dstr, cur); - if (DEBUG_C_TEST) { - sv_dump(dstr); - } +#ifdef DEBUGGING + if (DEBUG_C_TEST) + sv_dump(dstr); +#endif return dstr; } #endif @@ -5146,7 +5037,7 @@ giving it to C, and neither should any pointers from "behind" that pointer (e.g. ptr + 1) be used. If S> is true, will call C. If -S & SV_HAS_TRAILING_NUL>> is true, then C must be C, +S> is true, then C must be C, and the realloc will be skipped (i.e. the buffer is actually at least 1 byte longer than C, and already meets the requirements for storing in C). @@ -5226,12 +5117,14 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags) const STRLEN len = SvLEN(sv); const STRLEN cur = SvCUR(sv); +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Copy on write: Force normal %ld\n", (long) flags); sv_dump(sv); } +#endif SvIsCOW_off(sv); # ifdef PERL_COPY_ON_WRITE if (len) { @@ -5271,9 +5164,10 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags) } else { unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); } - if (DEBUG_C_TEST) { +#ifdef DEBUGGING + if (DEBUG_C_TEST) sv_dump(sv); - } +#endif } #else const char * const pvx = SvPVX_const(sv); @@ -5338,7 +5232,7 @@ Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) const svtype new_type = islv ? SVt_NULL : SvMAGIC(sv) || SvSTASH(sv) ? SVt_PVMG : SVt_PV; SV *const temp = newSV_type(new_type); - regexp *const temp_p = ReANY((REGEXP *)sv); + regexp *old_rx_body; if (new_type == SVt_PVMG) { SvMAGIC_set(temp, SvMAGIC(sv)); @@ -5346,15 +5240,26 @@ Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) SvSTASH_set(temp, SvSTASH(sv)); SvSTASH_set(sv, NULL); } - if (!islv) SvCUR_set(temp, SvCUR(sv)); - /* Remember that SvPVX is in the head, not the body. But - RX_WRAPPED is in the body. */ + if (!islv) + SvCUR_set(temp, SvCUR(sv)); + /* Remember that SvPVX is in the head, not the body. */ assert(ReANY((REGEXP *)sv)->mother_re); + + if (islv) { + /* LV-as-regex has sv->sv_any pointing to an XPVLV body, + * whose xpvlenu_rx field points to the regex body */ + XPV *xpv = (XPV*)(SvANY(sv)); + old_rx_body = xpv->xpv_len_u.xpvlenu_rx; + xpv->xpv_len_u.xpvlenu_rx = NULL; + } + else + old_rx_body = ReANY((REGEXP *)sv); + /* Their buffer is already owned by someone else. */ if (flags & SV_COW_DROP_PV) { /* SvLEN is already 0. For SVt_REGEXP, we have a brand new - zeroed body. For SVt_PVLV, it should have been set to 0 - before turning into a regexp. */ + zeroed body. For SVt_PVLV, we zeroed it above (len field + a union with xpvlenu_rx) */ assert(!SvLEN(islv ? sv : temp)); sv->sv_u.svu_pv = 0; } @@ -5375,8 +5280,7 @@ Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) SvFLAGS(temp) &= ~(SVTYPEMASK); SvFLAGS(temp) |= SVt_REGEXP|SVf_FAKE; - SvANY(temp) = temp_p; - temp->sv_u.svu_rx = (regexp *)temp_p; + SvANY(temp) = old_rx_body; SvREFCNT_dec_NN(temp); } @@ -5962,7 +5866,8 @@ Weaken a reference: set the C flag on this RV; give the referred-to SV C magic if it hasn't already; and push a back-reference to this RV onto the array of backreferences associated with that magic. If the RV is magical, set magic will be -called after the RV is cleared. +called after the RV is cleared. Silently ignores C and warns +on already-weak references. =cut */ @@ -5991,6 +5896,42 @@ Perl_sv_rvweaken(pTHX_ SV *const sv) } /* +=for apidoc sv_rvunweaken + +Unweaken a reference: Clear the C flag on this RV; remove +the backreference to this RV from the array of backreferences +associated with the target SV, increment the refcount of the target. +Silently ignores C and warns on non-weak references. + +=cut +*/ + +SV * +Perl_sv_rvunweaken(pTHX_ SV *const sv) +{ + SV *tsv; + + PERL_ARGS_ASSERT_SV_RVUNWEAKEN; + + if (!SvOK(sv)) /* let undefs pass */ + return sv; + if (!SvROK(sv)) + Perl_croak(aTHX_ "Can't unweaken a nonreference"); + else if (!SvWEAKREF(sv)) { + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is not weak"); + return sv; + } + else if (SvREADONLY(sv)) croak_no_modify(); + + tsv = SvRV(sv); + SvWEAKREF_off(sv); + SvROK_on(sv); + SvREFCNT_inc_NN(tsv); + Perl_sv_del_backref(aTHX_ tsv, sv); + return sv; +} + +/* =for apidoc sv_get_backrefs If C is the target of a weak reference then it returns the back @@ -6625,7 +6566,6 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) goto freescalar; case SVt_REGEXP: /* FIXME for plugins */ - freeregexp: pregfree2((REGEXP*) sv); goto freescalar; case SVt_PVCV: @@ -6704,7 +6644,16 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) } else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ SvREFCNT_dec(LvTARG(sv)); - if (isREGEXP(sv)) goto freeregexp; + if (isREGEXP(sv)) { + /* SvLEN points to a regex body. Free the body, then + * set SvLEN to whatever value was in the now-freed + * regex body. The PVX buffer is shared by multiple re's + * and only freed once, by the re whose len in non-null */ + STRLEN len = ReANY(sv)->xpv_len; + pregfree2((REGEXP*) sv); + SvLEN_set((sv), len); + goto freescalar; + } /* FALLTHROUGH */ case SVt_PVGV: if (isGV_with_GP(sv)) { @@ -6761,10 +6710,12 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) && !(IoFLAGS(sv) & IOf_FAKE_DIRP))) { if (SvIsCOW(sv)) { +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Copy on write: clear\n"); sv_dump(sv); } +#endif if (SvLEN(sv)) { if (CowREFCNT(sv)) { sv_buf_to_rw(sv); @@ -8958,7 +8909,7 @@ Perl_sv_inc_nomg(pTHX_ SV *const sv) if (flags & SVp_NOK) { const NV was = SvNVX(sv); if (LIKELY(!Perl_isinfnan(was)) && - NV_OVERFLOWS_INTEGERS_AT && + NV_OVERFLOWS_INTEGERS_AT != 0.0 && was >= NV_OVERFLOWS_INTEGERS_AT) { /* diag_listed_as: Lost precision when %s %f by 1 */ Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), @@ -9141,7 +9092,7 @@ Perl_sv_dec_nomg(pTHX_ SV *const sv) { const NV was = SvNVX(sv); if (LIKELY(!Perl_isinfnan(was)) && - NV_OVERFLOWS_INTEGERS_AT && + NV_OVERFLOWS_INTEGERS_AT != 0.0 && was <= -NV_OVERFLOWS_INTEGERS_AT) { /* diag_listed_as: Lost precision when %s %f by 1 */ Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), @@ -9374,7 +9325,7 @@ Creates a new SV and copies a string into it, which may contain C character (C<\0>) and other binary data. The reference count for the SV is set to 1. Note that if C is zero, Perl will create a zero length (Perl) string. You are responsible for ensuring that the source buffer is at least -C bytes long. If the C argument is NULL the new SV will be +C bytes long. If the C argument is NULL the new SV will be undefined. =cut @@ -10955,12 +10906,35 @@ Usually used via one of its frontends C and C. void Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, - va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted) + va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) { PERL_ARGS_ASSERT_SV_VSETPVFN; SvPVCLEAR(sv); - sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, 0); + sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, 0); +} + + +/* simplified inline Perl_sv_catpvn_nomg() when you know the SV's SvPOK */ + +PERL_STATIC_INLINE void +S_sv_catpvn_simple(pTHX_ SV *const sv, const char* const buf, const STRLEN len) +{ + STRLEN const need = len + SvCUR(sv) + 1; + char *end; + + /* can't wrap as both len and SvCUR() are allocated in + * memory and together can't consume all the address space + */ + assert(need > len); + + assert(SvPOK(sv)); + SvGROW(sv, need); + end = SvEND(sv); + Copy(buf, end, len, char); + end += len; + *end = '\0'; + SvCUR_set(sv, need - 1); } @@ -11050,7 +11024,7 @@ S_sprintf_arg_num_val(pTHX_ va_list *const args, int i, SV *sv, bool *neg) */ STATIC STRLEN -S_expect_number(pTHX_ char **const pattern) +S_expect_number(pTHX_ const char **const pattern) { STRLEN var; @@ -11085,12 +11059,15 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len) assert(!Perl_isinfnan(nv)); if (neg) nv = -nv; - if (nv < UV_MAX) { + if (nv != 0.0 && nv < UV_MAX) { char *p = endbuf; - nv += 0.5; uv = (UV)nv; - if (uv & 1 && uv == nv) - uv--; /* Round to even */ + if (uv != nv) { + nv += 0.5; + uv = (UV)nv; + if (uv & 1 && uv == nv) + uv--; /* Round to even */ + } do { const unsigned dig = uv % 10; *--p = '0' + dig; @@ -11108,11 +11085,11 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len) void Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, - va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted) + va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) { PERL_ARGS_ASSERT_SV_VCATPVFN; - sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC); + sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, SV_GMAGIC|SV_SMAGIC); } @@ -11565,8 +11542,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, /* In this case there is an implicit bit, * and therefore the exponent is shifted by one. */ exponent--; -# else -# ifdef NV_X86_80_BIT +# elif defined(NV_X86_80_BIT) if (subnormal) { /* The subnormals of the x86-80 have a base exponent of -16382, * (while the physical exponent bits are zero) but the frexp() @@ -11580,7 +11556,6 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, } else { exponent -= 4; } -# endif /* TBD: other non-implicit-bit platforms than the x86-80. */ # endif #endif @@ -11637,6 +11612,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, * the top non-zero nybble. */ for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { } assert(n < 4); + assert(vlnz); vlnz[1] = 0; for (vshr = vlnz; vshr >= vfnz; vshr--) { vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n); @@ -11737,10 +11713,9 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, #ifndef USE_LOCALE_NUMERIC *p++ = '.'; #else - if (PL_numeric_radix_sv) { + if (IN_LC(LC_NUMERIC)) { STRLEN n; const char* r = SvPV(PL_numeric_radix_sv, n); - assert(IN_LC(LC_NUMERIC)); Copy(r, p, n, char); p += n; } @@ -11835,21 +11810,19 @@ Usually used via one of its frontends C and C. void Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, - va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted, + va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted, const U32 flags) { - char *p; - char *q; + const char *fmtstart; /* character following the current '%' */ + const char *q; /* current position within format */ const char *patend; STRLEN origlen; Size_t svix = 0; static const char nullstr[] = "(null)"; - SV *argsv = NULL; bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ - SV *nsv = NULL; /* Times 4: a decimal digit takes more than 3 binary digits. - * NV_DIG: mantissa takes than many decimal digits. + * NV_DIG: mantissa takes that many decimal digits. * Plus 32: Playing safe. */ char ebuf[IV_DIG * 4 + NV_DIG + 32]; bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ @@ -11879,10 +11852,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * warnings etc. */ - if (patlen == 0 && (args || svmax == 0)) + if (patlen == 0 && (args || sv_count == 0)) return; - if (patlen <= 4 && pat[0] == '%' && (args || svmax == 1)) { + if (patlen <= 4 && pat[0] == '%' && (args || sv_count == 1)) { /* "%s" */ if (patlen == 2 && pat[1] == 's') { @@ -11928,8 +11901,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p patend = (char*)pat + patlen; - for (p = (char*)pat; p < patend; p = q) { - + for (fmtstart = pat; fmtstart < patend; fmtstart = q) { char intsize = 0; /* size qualifier in "%hi..." etc */ bool alt = FALSE; /* has "%#..." */ bool left = FALSE; /* has "%-..." */ @@ -11951,6 +11923,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p Size_t efix = 0; /* explicit format parameter index */ const Size_t osvix = svix; /* original index in case of bad fmt */ + SV *argsv = NULL; bool is_utf8 = FALSE; /* is this item utf8? */ bool arg_missing = FALSE; /* give "Missing argument" warning */ char esignbuf[4]; /* holds sign prefix, e.g. "-0x" */ @@ -11960,23 +11933,39 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p const char *eptr = NULL; /* the address of the element string */ STRLEN elen = 0; /* the length of the element string */ - const char *fmtstart; /* start of current format (the '%') */ char c; /* the actual format ('d', s' etc) */ /* echo everything up to the next format specification */ - for (q = p; q < patend && *q != '%'; ++q) ; - if (q > p) { - if (has_utf8 && !pat_utf8) - sv_catpvn_nomg_utf8_upgrade(sv, p, q - p, nsv); + for (q = fmtstart; q < patend && *q != '%'; ++q) + {}; + + if (q > fmtstart) { + if (has_utf8 && !pat_utf8) { + /* upgrade and copy the bytes of fmtstart..q-1 to utf8 on + * the fly */ + const char *p; + char *dst; + STRLEN need = SvCUR(sv) + (q - fmtstart) + 1; + + for (p = fmtstart; p < q; p++) + if (!NATIVE_BYTE_IS_INVARIANT(*p)) + need++; + SvGROW(sv, need); + + dst = SvEND(sv); + for (p = fmtstart; p < q; p++) + append_utf8_from_native_byte((U8)*p, (U8**)&dst); + *dst = '\0'; + SvCUR_set(sv, need - 1); + } else - sv_catpvn_nomg(sv, p, q - p); - p = q; + S_sv_catpvn_simple(aTHX_ sv, fmtstart, q - fmtstart); } if (q++ >= patend) break; - fmtstart = q; + fmtstart = q; /* fmtstart is char following the '%' */ /* We allow format specification elements in this order: @@ -12082,7 +12071,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p vecsv = va_arg(*args, SV*); else { ix = ix ? ix - 1 : svix++; - vecsv = ix < svmax ? svargs[ix] + vecsv = ix < sv_count ? svargs[ix] : (arg_missing = TRUE, &PL_sv_no); } dotstr = SvPV_const(vecsv, dotstrlen); @@ -12108,7 +12097,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p i = va_arg(*args, int); else { ix = ix ? ix - 1 : svix++; - sv = (ix < svmax) ? svargs[ix] + sv = (ix < sv_count) ? svargs[ix] : (arg_missing = TRUE, (SV*)NULL); } width = S_sprintf_arg_num_val(aTHX_ args, i, sv, &left); @@ -12165,7 +12154,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p i = va_arg(*args, int); else { ix = ix ? ix - 1 : svix++; - sv = (ix < svmax) ? svargs[ix] + sv = (ix < sv_count) ? svargs[ix] : (arg_missing = TRUE, (SV*)NULL); } precis = S_sprintf_arg_num_val(aTHX_ args, i, sv, &neg); @@ -12247,9 +12236,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 'V': case 'z': case 't': -#ifdef I_STDINT case 'j': -#endif intsize = *q++; break; } @@ -12275,7 +12262,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (!args) { efix = efix ? efix - 1 : svix++; - argsv = efix < svmax ? svargs[efix] + argsv = efix < sv_count ? svargs[efix] : (arg_missing = TRUE, &PL_sv_no); } @@ -12288,7 +12275,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (args) { eptr = va_arg(*args, char*); if (eptr) - elen = strlen(eptr); + if (has_precis) + elen = my_strnlen(eptr, precis); + else + elen = strlen(eptr); else { eptr = (char *)nullstr; elen = sizeof nullstr - 1; @@ -12531,7 +12521,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * over the individual characters of a vector arg */ vector: if (!veclen) - goto donevalidconversion; + goto done_valid_conversion; if (vec_utf8) uv = utf8n_to_uvchr(vecstr, veclen, &ulen, UTF8_ALLOW_ANYUV); @@ -12569,9 +12559,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 't': iv = va_arg(*args, ptrdiff_t); break; #endif default: iv = va_arg(*args, int); break; -#ifdef I_STDINT - case 'j': iv = va_arg(*args, intmax_t); break; -#endif + case 'j': iv = va_arg(*args, PERL_INTMAX_T); break; case 'q': #if IVSIZE >= 8 iv = va_arg(*args, Quad_t); break; @@ -12626,9 +12614,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * uptrdiff_t, so oh well */ case 't': uv = va_arg(*args, ptrdiff_t); break; #endif -#ifdef I_STDINT - case 'j': uv = va_arg(*args, uintmax_t); break; -#endif + case 'j': uv = va_arg(*args, PERL_UINTMAX_T); break; default: uv = va_arg(*args, unsigned); break; case 'q': #if IVSIZE >= 8 @@ -12667,16 +12653,20 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p switch (base) { case 16: - p = (char *)((c == 'X') ? PL_hexdigit + 16 : PL_hexdigit); - do { - dig = uv & 15; - *--ptr = p[dig]; - } while (uv >>= 4); - if (alt && *ptr != '0') { - esignbuf[esignlen++] = '0'; - esignbuf[esignlen++] = c; /* 'x' or 'X' */ - } - break; + { + const char * const p = + (c == 'X') ? PL_hexdigit + 16 : PL_hexdigit; + + do { + dig = uv & 15; + *--ptr = p[dig]; + } while (uv >>= 4); + if (alt && *ptr != '0') { + esignbuf[esignlen++] = '0'; + esignbuf[esignlen++] = c; /* 'x' or 'X' */ + } + break; + } case 8: do { dig = uv & 7; @@ -12915,8 +12905,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p lc_numeric_set = TRUE; } - if (PL_numeric_radix_sv) { - assert(IN_LC(LC_NUMERIC)); + if (IN_LC(LC_NUMERIC)) { /* this can't wrap unless PL_numeric_radix_sv is a string * consuming virtually all the 32-bit or 64-bit address * space @@ -13028,7 +13017,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (float_need < width) float_need = width; - if (PL_efloatsize < float_need) { + if (PL_efloatsize <= float_need) { + /* PL_efloatbuf should be at least 1 greater than + * float_need to allow a trailing \0 to be returned by + * snprintf(). If we need to grow, overgrow for the + * benefit of future generations */ + const STRLEN extra = 0x20; + if (float_need >= ((STRLEN)~0) - extra) + croak_memory_wrap(); + float_need += extra; Safefree(PL_efloatbuf); PL_efloatsize = float_need; Newx(PL_efloatbuf, PL_efloatsize, char); @@ -13088,7 +13085,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* hopefully the above makes ptr a very constrained format * that is safe to use, even though it's not literal */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); #ifdef USE_QUADMATH { const char* qfmt = quadmath_format_single(ptr); @@ -13109,9 +13106,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv) : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)fv)); #else - elen = my_sprintf(PL_efloatbuf, ptr, fv); + elen = my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv); #endif - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } eptr = PL_efloatbuf; @@ -13128,24 +13125,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p assert(elen); assert(elen >= width); + S_sv_catpvn_simple(aTHX_ sv, eptr, elen); - { - /* unrolled Perl_sv_catpvn */ - STRLEN need = elen + SvCUR(sv) + 1; - char *end; - /* can't wrap as both elen and SvCUR() are allocated in - * memory and together can't consume all the address space - */ - assert(need > elen); - SvGROW(sv, need); - end = SvEND(sv); - Copy(eptr, end, elen, char); - end += elen; - *end = '\0'; - SvCUR_set(sv, need - 1); - } - - goto donevalidconversion; + goto done_valid_conversion; } /* SPECIAL */ @@ -13172,9 +13154,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p #ifdef HAS_PTRDIFF_T case 't': *(va_arg(*args, ptrdiff_t*)) = i; break; #endif -#ifdef I_STDINT - case 'j': *(va_arg(*args, intmax_t*)) = i; break; -#endif + case 'j': *(va_arg(*args, PERL_INTMAX_T*)) = i; break; case 'q': #if IVSIZE >= 8 *(va_arg(*args, Quad_t*)) = i; break; @@ -13190,7 +13170,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)len); } - goto donevalidconversion; + goto done_valid_conversion; } /* UNKNOWN */ @@ -13225,8 +13205,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* mangled format: output the '%', then continue from the * character following that */ - sv_catpvn_nomg(sv, p, 1); - q = p + 1; + sv_catpvn_nomg(sv, fmtstart-1, 1); + q = fmtstart; svix = osvix; /* Any "redundant arg" warning from now onwards will probably * just be misleading, so don't bother. */ @@ -13333,7 +13313,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p goto vector; /* do next iteration */ } - donevalidconversion: + done_valid_conversion: if (arg_missing) S_warn_vcatpvfn_missing_argument(aTHX); @@ -13342,15 +13322,17 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* Now that we've consumed all our printf format arguments (svix) * do we have things left on the stack that we didn't use? */ - if (!no_redundant_warning && svmax >= svix + 1 && ckWARN(WARN_REDUNDANT)) { + if (!no_redundant_warning && sv_count >= svix + 1 && ckWARN(WARN_REDUNDANT)) { Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); } SvTAINT(sv); - RESTORE_LC_NUMERIC(); /* Done outside loop, so don't have to save/restore - each iteration. */ + if (lc_numeric_set) { + RESTORE_LC_NUMERIC(); /* Done outside loop, so don't have to + save/restore each iteration. */ + } } /* ========================================================================= @@ -13417,13 +13399,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) Newxz(parser, 1, yy_parser); ptr_table_store(PL_ptr_table, proto, parser); - /* XXX these not yet duped */ - parser->old_parser = NULL; - parser->stack = NULL; - parser->ps = NULL; - parser->stack_max1 = 0; - /* XXX parser->stack->state = 0; */ - /* XXX eventually, just Copy() most of the parser struct ? */ parser->lex_brackets = proto->lex_brackets; @@ -13465,7 +13440,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) parser->sig_optelems= proto->sig_optelems; parser->sig_slurpy = proto->sig_slurpy; parser->recheck_utf8_validity = proto->recheck_utf8_validity; - parser->linestr = sv_dup_inc(proto->linestr, param); { char * const ols = SvPVX(proto->linestr); @@ -14196,7 +14170,6 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) case SVt_REGEXP: duprex: /* FIXME for plugins */ - dstr->sv_u.svu_rx = ((REGEXP *)dstr)->sv_any; re_dup_guts((REGEXP*) sstr, (REGEXP*) dstr, param); break; case SVt_PVLV: @@ -14208,6 +14181,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) else LvTARG(dstr) = sv_dup_inc(LvTARG(dstr), param); if (isREGEXP(sstr)) goto duprex; + /* FALLTHROUGH */ case SVt_PVGV: /* non-GP case already handled above */ if(isGV_with_GP(sstr)) { @@ -14261,7 +14235,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) SSize_t items = AvFILLp((const AV *)sstr) + 1; src_ary = AvARRAY((const AV *)sstr); - Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*); + Newx(dst_ary, AvMAX((const AV *)sstr)+1, SV*); ptr_table_store(PL_ptr_table, src_ary, dst_ary); AvARRAY(MUTABLE_AV(dstr)) = dst_ary; AvALLOC((const AV *)dstr) = dst_ary; @@ -14593,7 +14567,7 @@ Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param) return nsi; /* create anew and remember what it is */ - Newxz(nsi, 1, PERL_SI); + Newx(nsi, 1, PERL_SI); ptr_table_store(PL_ptr_table, si, nsi); nsi->si_stack = av_dup_inc(si->si_stack, param); @@ -14604,6 +14578,9 @@ Perl_si_dup(pTHX_ PERL_SI *si, CLONE_PARAMS* param) nsi->si_prev = si_dup(si->si_prev, param); nsi->si_next = si_dup(si->si_next, param); nsi->si_markoff = si->si_markoff; +#if defined DEBUGGING && !defined DEBUGGING_RE_ONLY + nsi->si_stack_hwm = 0; +#endif return nsi; } @@ -14685,7 +14662,7 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) PERL_ARGS_ASSERT_SS_DUP; - Newxz(nss, max, ANY); + Newx(nss, max, ANY); while (ix > 0) { const UV uv = POPUV(ss,ix); @@ -14888,8 +14865,8 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) case SAVEt_AELEM: /* array element */ sv = (const SV *)POPPTR(ss,ix); TOPPTR(nss,ix) = SvREFCNT_inc(sv_dup_inc(sv, param)); - i = POPINT(ss,ix); - TOPINT(nss,ix) = i; + iv = POPIV(ss,ix); + TOPIV(nss,ix) = iv; av = (const AV *)POPPTR(ss,ix); TOPPTR(nss,ix) = av_dup_inc(av, param); break; @@ -15204,8 +15181,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_filemode = proto_perl->Ifilemode; PL_lastfd = proto_perl->Ilastfd; PL_oldname = proto_perl->Ioldname; /* XXX not quite right */ - PL_Argv = NULL; - PL_Cmd = NULL; PL_gensym = proto_perl->Igensym; PL_laststatval = proto_perl->Ilaststatval; @@ -15251,13 +15226,15 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_LOCALE_NUMERIC PL_numeric_standard = proto_perl->Inumeric_standard; - PL_numeric_local = proto_perl->Inumeric_local; + PL_numeric_underlying = proto_perl->Inumeric_underlying; + PL_numeric_underlying_is_standard = proto_perl->Inumeric_underlying_is_standard; #endif /* !USE_LOCALE_NUMERIC */ /* Did the locale setup indicate UTF-8? */ PL_utf8locale = proto_perl->Iutf8locale; PL_in_utf8_CTYPE_locale = proto_perl->Iin_utf8_CTYPE_locale; PL_in_utf8_COLLATE_locale = proto_perl->Iin_utf8_COLLATE_locale; + my_strlcpy(PL_locale_utf8ness, proto_perl->Ilocale_utf8ness, sizeof(PL_locale_utf8ness)); /* Unicode features (see perlrun/-C) */ PL_unicode = proto_perl->Iunicode; @@ -15375,6 +15352,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, init_constants(); ptr_table_store(PL_ptr_table, &proto_perl->Isv_undef, &PL_sv_undef); ptr_table_store(PL_ptr_table, &proto_perl->Isv_no, &PL_sv_no); + ptr_table_store(PL_ptr_table, &proto_perl->Isv_zero, &PL_sv_zero); ptr_table_store(PL_ptr_table, &proto_perl->Isv_yes, &PL_sv_yes); ptr_table_store(PL_ptr_table, &proto_perl->Ipadname_const, &PL_padname_const); @@ -15582,8 +15560,15 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_LOCALE_NUMERIC PL_numeric_name = SAVEPV(proto_perl->Inumeric_name); PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv, param); + +# if defined(HAS_NEWLOCALE) && ! defined(NO_POSIX_2008_LOCALE) + PL_underlying_numeric_obj = NULL; +# endif #endif /* !USE_LOCALE_NUMERIC */ + PL_langinfo_buf = NULL; + PL_langinfo_bufsize = 0; + /* Unicode inversion lists */ PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param); @@ -15602,6 +15587,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, } PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param); PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param); + PL_SCX_invlist = sv_dup_inc(proto_perl->ISCX_invlist, param); PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param); PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param); PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); @@ -15644,7 +15630,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* next PUSHMARK() sets *(PL_markstack_ptr+1) */ i = proto_perl->Imarkstack_max - proto_perl->Imarkstack; - Newxz(PL_markstack, i, I32); + Newx(PL_markstack, i, I32); PL_markstack_max = PL_markstack + (proto_perl->Imarkstack_max - proto_perl->Imarkstack); PL_markstack_ptr = PL_markstack + (proto_perl->Imarkstack_ptr @@ -15654,11 +15640,11 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* next push_scope()/ENTER sets PL_scopestack[PL_scopestack_ix] * NOTE: unlike the others! */ - Newxz(PL_scopestack, PL_scopestack_max, I32); + Newx(PL_scopestack, PL_scopestack_max, I32); Copy(proto_perl->Iscopestack, PL_scopestack, PL_scopestack_ix, I32); #ifdef DEBUGGING - Newxz(PL_scopestack_name, PL_scopestack_max, const char *); + Newx(PL_scopestack_name, PL_scopestack_max, const char *); Copy(proto_perl->Iscopestack_name, PL_scopestack_name, PL_scopestack_ix, const char *); #endif /* reset stack AV to correct length before its duped via @@ -15882,6 +15868,13 @@ Perl_init_constants(pTHX) |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK |SVp_POK|SVf_POK; + SvANY(&PL_sv_zero) = new_XPVNV(); + SvREFCNT(&PL_sv_zero) = SvREFCNT_IMMORTAL; + SvFLAGS(&PL_sv_zero) = SVt_PVNV|SVf_READONLY|SVf_PROTECT + |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK + |SVp_POK|SVf_POK + |SVs_PADTMP; + SvPV_set(&PL_sv_no, (char*)PL_No); SvCUR_set(&PL_sv_no, 0); SvLEN_set(&PL_sv_no, 0); @@ -15894,7 +15887,33 @@ Perl_init_constants(pTHX) SvIV_set(&PL_sv_yes, 1); SvNV_set(&PL_sv_yes, 1); + SvPV_set(&PL_sv_zero, (char*)PL_Zero); + SvCUR_set(&PL_sv_zero, 1); + SvLEN_set(&PL_sv_zero, 0); + SvIV_set(&PL_sv_zero, 0); + SvNV_set(&PL_sv_zero, 0); + PadnamePV(&PL_padname_const) = (char *)PL_No; + + assert(SvIMMORTAL_INTERP(&PL_sv_yes)); + assert(SvIMMORTAL_INTERP(&PL_sv_undef)); + assert(SvIMMORTAL_INTERP(&PL_sv_no)); + assert(SvIMMORTAL_INTERP(&PL_sv_zero)); + + assert(SvIMMORTAL(&PL_sv_yes)); + assert(SvIMMORTAL(&PL_sv_undef)); + assert(SvIMMORTAL(&PL_sv_no)); + assert(SvIMMORTAL(&PL_sv_zero)); + + assert( SvIMMORTAL_TRUE(&PL_sv_yes)); + assert(!SvIMMORTAL_TRUE(&PL_sv_undef)); + assert(!SvIMMORTAL_TRUE(&PL_sv_no)); + assert(!SvIMMORTAL_TRUE(&PL_sv_zero)); + + assert( SvTRUE_nomg_NN(&PL_sv_yes)); + assert(!SvTRUE_nomg_NN(&PL_sv_undef)); + assert(!SvTRUE_nomg_NN(&PL_sv_no)); + assert(!SvTRUE_nomg_NN(&PL_sv_zero)); } /* @@ -16796,6 +16815,7 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, /* def-ness of rval pos() is independent of the def-ness of its arg */ if ( !(obase->op_flags & OPf_MOD)) break; + /* FALLTHROUGH */ case OP_SCHOMP: case OP_CHOMP: @@ -16871,6 +16891,9 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) if (PL_op) { desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded ? "join or string" + : PL_op->op_type == OP_MULTICONCAT + && (PL_op->op_private & OPpMULTICONCAT_FAKE) + ? "sprintf" : OP_DESC(PL_op); if (uninit_sv && PL_curpad) { varname = find_uninit_var(PL_op, uninit_sv, 0, &desc); @@ -16884,7 +16907,7 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) desc = "sort"; /* PL_warn_uninit_sv is constant */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); if (desc) /* diag_listed_as: Use of uninitialized value%s */ Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit_sv, @@ -16893,7 +16916,7 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) else Perl_warner(aTHX_ packWARN(WARN_UNINITIALIZED), PL_warn_uninit, "", "", ""); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } /*