X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/5f7104e27bf393e7a375da488b5653e69e55d8da..2e8ea15a11326145a7df027b5b2507ff3d7483ba:/sv.c diff --git a/sv.c b/sv.c index dfdf57b..c1a33fb 100644 --- a/sv.c +++ b/sv.c @@ -1462,9 +1462,7 @@ Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type) SvSTASH_set(io, MUTABLE_HV(SvREFCNT_inc(GvHV(iogv)))); IoPAGE_LEN(sv) = 60; } - if (UNLIKELY(new_type == SVt_REGEXP)) - sv->sv_u.svu_rx = (regexp *)new_body; - else if (old_type < SVt_PV) { + if (old_type < SVt_PV) { /* referent will be NULL unless the old type was SVt_IV emulating SVt_RV */ sv->sv_u.svu_rv = referent; @@ -1647,6 +1645,7 @@ Perl_sv_setiv(pTHX_ SV *const sv, const IV i) case SVt_PVGV: if (!isGV_with_GP(sv)) break; + /* FALLTHROUGH */ case SVt_PVAV: case SVt_PVHV: case SVt_PVCV: @@ -1760,6 +1759,7 @@ Perl_sv_setnv(pTHX_ SV *const sv, const NV num) case SVt_PVGV: if (!isGV_with_GP(sv)) break; + /* FALLTHROUGH */ case SVt_PVAV: case SVt_PVHV: case SVt_PVCV: @@ -2463,7 +2463,7 @@ Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags) Regexps have no SvIVX and SvNVX fields. */ - assert(isREGEXP(sv) || SvPOKp(sv)); + assert(SvPOKp(sv)); { UV value; const char * const ptr = @@ -2551,7 +2551,7 @@ Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags) /* FBMs use the space for SvIVX and SvNVX for other purposes, and use the same flag bit as SVf_IVisUV, so must not let them cache IVs. Regexps have no SvIVX and SvNVX fields. */ - assert(isREGEXP(sv) || SvPOKp(sv)); + assert(SvPOKp(sv)); { UV value; const char * const ptr = @@ -2627,7 +2627,6 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) return SvNVX(sv); if (SvPOKp(sv) && !SvIOKp(sv)) { ptr = SvPVX_const(sv); - grokpv: if (!SvIOKp(sv) && ckWARN(WARN_NUMERIC) && !grok_number(ptr, SvCUR(sv), NULL)) not_a_number(sv); @@ -2642,10 +2641,6 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) if (SvROK(sv)) { goto return_rok; } - if (isREGEXP(sv)) { - ptr = RX_WRAPPED((REGEXP *)sv); - goto grokpv; - } assert(SvTYPE(sv) >= SVt_PVMG); /* This falls through to the report_uninit near the end of the function. */ @@ -2673,11 +2668,11 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) /* The logic to use SVt_PVNV if necessary is in sv_upgrade. */ sv_upgrade(sv, SVt_NV); DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); + STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%" UVxf " num(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); + RESTORE_LC_NUMERIC_UNDERLYING(); }); } else if (SvTYPE(sv) < SVt_PVNV) @@ -2814,10 +2809,10 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) return 0.0; } DEBUG_c({ - STORE_NUMERIC_LOCAL_SET_STANDARD(); + STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); - RESTORE_NUMERIC_LOCAL(); + RESTORE_LC_NUMERIC_UNDERLYING(); }); return SvNVX(sv); } @@ -3146,7 +3141,7 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) DECLARATION_FOR_LC_NUMERIC_MANIPULATION; STORE_LC_NUMERIC_SET_TO_NEEDED(); - local_radix = PL_numeric_local && PL_numeric_radix_sv; + local_radix = PL_numeric_underlying && PL_numeric_radix_sv; if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) { size += SvCUR(PL_numeric_radix_sv) - 1; s = SvGROW_mutable(sv, size); @@ -3191,10 +3186,6 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) *lp = SvCUR(buffer); return SvPVX(buffer); } - else if (isREGEXP(sv)) { - if (lp) *lp = RX_WRAPLEN((REGEXP *)sv); - return RX_WRAPPED((REGEXP *)sv); - } else { if (lp) *lp = 0; @@ -3371,11 +3362,16 @@ Perl_sv_2bool_flags(pTHX_ SV *sv, I32 flags) return cBOOL(svb); } } - return SvRV(sv) != 0; + assert(SvRV(sv)); + return TRUE; } if (isREGEXP(sv)) return RX_WRAPLEN(sv) > 1 || (RX_WRAPLEN(sv) && *RX_WRAPPED(sv) != '0'); + + if (SvNOK(sv) && !SvPOK(sv)) + return SvNVX(sv) != 0.0; + return SvTRUE_common(sv, isGV_with_GP(sv) ? 1 : 0); } @@ -3468,7 +3464,12 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr } } - if (SvUTF8(sv)) { + /* SVt_REGEXP's shouldn't be upgraded to UTF8 - they're already + * compiled and individual nodes will remain non-utf8 even if the + * stringified version of the pattern gets upgraded. Whether the + * PVX of a REGEXP should be grown or we should just croak, I don't + * know - DAPM */ + if (SvUTF8(sv) || isREGEXP(sv)) { if (extra) SvGROW(sv, SvCUR(sv) + extra); return SvCUR(sv); } @@ -3488,37 +3489,35 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr U8 * s = (U8 *) SvPVX_const(sv); U8 * e = (U8 *) SvEND(sv); U8 *t = s; - STRLEN two_byte_count = 0; + STRLEN two_byte_count; - if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8; - - /* See if really will need to convert to utf8. We mustn't rely on our - * incoming SV being well formed and having a trailing '\0', as certain - * code in pp_formline can send us partially built SVs. */ - - while (t < e) { - const U8 ch = *t++; - if (NATIVE_BYTE_IS_INVARIANT(ch)) continue; - - t--; /* t already incremented; re-point to first variant */ - two_byte_count = 1; - goto must_be_utf8; - } + if (flags & SV_FORCE_UTF8_UPGRADE) { + two_byte_count = 0; + } + else { + if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) { - /* utf8 conversion not needed because all are invariants. Mark as - * UTF-8 even if no variant - saves scanning loop */ - SvUTF8_on(sv); - if (extra) SvGROW(sv, SvCUR(sv) + extra); - return SvCUR(sv); + /* utf8 conversion not needed because all are invariants. Mark + * as UTF-8 even if no variant - saves scanning loop */ + SvUTF8_on(sv); + if (extra) SvGROW(sv, SvCUR(sv) + extra); + return SvCUR(sv); + } - must_be_utf8: + /* Here, there is at least one variant, and t points to the first + * one */ + two_byte_count = 1; + } - /* Here, the string should be converted to utf8, either because of an - * input flag (two_byte_count = 0), or because a character that - * requires 2 bytes was found (two_byte_count = 1). t points either to - * the beginning of the string (if we didn't examine anything), or to - * the first variant. In either case, everything from s to t - 1 will - * occupy only 1 byte each on output. + /* Note that the incoming SV may not have a trailing '\0', as certain + * code in pp_formline can send us partially built SVs. + * + * Here, the string should be converted to utf8, either because of an + * input flag (which causes two_byte_count to be set to 0), or because + * a character that requires 2 bytes was found (two_byte_count = 1). t + * points either to the beginning of the string (if we didn't examine + * anything), or to the first variant. In either case, everything from + * s to t - 1 will occupy only 1 byte each on output. * * There are two main ways to convert. One is to create a new string * and go through the input starting from the beginning, appending each @@ -3529,7 +3528,7 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr * from s to t - 1 is invariant, the destination can be initialized * with these using a fast memory copy * - * The other way is to figure out exactly how big the string should be + * The other way is to figure out exactly how big the string should be, * by parsing the entire input. Then you don't have to make it big * enough to handle the worst possible case, and more importantly, if * the string you already have is large enough, you don't have to @@ -3551,18 +3550,18 @@ Perl_sv_utf8_upgrade_flags_grow(pTHX_ SV *const sv, const I32 flags, STRLEN extr * value. We go backwards through the string, converting until we * get to the position we are at now, and then stop. If this * position is far enough along in the string, this method is - * faster than the other method. If the memory copy were the same - * speed as the byte-by-byte loop, that position would be about - * half-way, as at the half-way mark, parsing to the end and back - * is one complete string's parse, the same amount as starting - * over and going all the way through. Actually, it would be - * somewhat less than half-way, as it's faster to just count bytes - * than to also copy, and we don't have the overhead of allocating - * a new string, changing the scalar to use it, and freeing the - * existing one. But if the memory copy is fast, the break-even - * point is somewhere after half way. The counting loop could be - * sped up by vectorization, etc, to move the break-even point - * further towards the beginning. + * faster than the first method above. If the memory copy were + * the same speed as the byte-by-byte loop, that position would be + * about half-way, as at the half-way mark, parsing to the end and + * back is one complete string's parse, the same amount as + * starting over and going all the way through. Actually, it + * would be somewhat less than half-way, as it's faster to just + * count bytes than to also copy, and we don't have the overhead + * of allocating a new string, changing the scalar to use it, and + * freeing the existing one. But if the memory copy is fast, the + * break-even point is somewhere after half way. The counting + * loop could be sped up by vectorization, etc, to move the + * break-even point further towards the beginning. * 2) if the string doesn't have enough space to handle the converted * value. A new string will have to be allocated, and one might * as well, given that, start from the beginning doing the first @@ -3920,15 +3919,14 @@ S_glob_assign_glob(pTHX_ SV *const dstr, SV *const sstr, const int dtype) glob to begin with. */ if(dtype == SVt_PVGV) { const char * const name = GvNAME((const GV *)dstr); - if( - strEQ(name,"ISA") + const STRLEN len = GvNAMELEN(dstr); + if(memEQs(name, len, "ISA") /* The stash may have been detached from the symbol table, so check its name. */ && GvSTASH(dstr) && HvENAME(GvSTASH(dstr)) ) mro_changes = 2; else { - const STRLEN len = GvNAMELEN(dstr); if ((len > 1 && name[len-2] == ':' && name[len-1] == ':') || (len == 1 && name[0] == ':')) { mro_changes = 3; @@ -4141,7 +4139,7 @@ Perl_gv_setref(pTHX_ SV *const dstr, SV *const sstr) } else if ( stype == SVt_PVAV && sref != dref - && strEQ(GvNAME((GV*)dstr), "ISA") + && memEQs(GvNAME((GV*)dstr), GvNAMELEN((GV*)dstr), "ISA") /* The stash may have been detached from the symbol table, so check its name before doing anything. */ && GvSTASH(dstr) && HvENAME(GvSTASH(dstr)) @@ -4448,15 +4446,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) case SVt_REGEXP: upgregexp: if (dtype < SVt_REGEXP) - { - if (dtype >= SVt_PV) { - SvPV_free(dstr); - SvPV_set(dstr, 0); - SvLEN_set(dstr, 0); - SvCUR_set(dstr, 0); - } sv_upgrade(dstr, SVt_REGEXP); - } break; case SVt_INVLIST: @@ -4705,11 +4695,13 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) ) { /* Either it's a shared hash key, or it's suitable for copy-on-write. */ +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Copy on write: sstr --> dstr\n"); sv_dump(sstr); sv_dump(dstr); } +#endif #ifdef PERL_ANY_COW if (!(sflags & SVf_IsCOW)) { SvIsCOW_on(sstr); @@ -4883,7 +4875,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) #endif PERL_ARGS_ASSERT_SV_SETSV_COW; - +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Fast copy on write: %p -> %p\n", (void*)sstr, (void*)dstr); @@ -4891,7 +4883,7 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) if (dstr) sv_dump(dstr); } - +#endif if (dstr) { if (SvTHINKFIRST(dstr)) sv_force_normal_flags(dstr, SV_COW_DROP_PV); @@ -4938,9 +4930,10 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) SvUTF8_on(dstr); SvLEN_set(dstr, len); SvCUR_set(dstr, cur); - if (DEBUG_C_TEST) { - sv_dump(dstr); - } +#ifdef DEBUGGING + if (DEBUG_C_TEST) + sv_dump(dstr); +#endif return dstr; } #endif @@ -5226,12 +5219,14 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags) const STRLEN len = SvLEN(sv); const STRLEN cur = SvCUR(sv); +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Copy on write: Force normal %ld\n", (long) flags); sv_dump(sv); } +#endif SvIsCOW_off(sv); # ifdef PERL_COPY_ON_WRITE if (len) { @@ -5271,9 +5266,10 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags) } else { unshare_hek(SvSHARED_HEK_FROM_PV(pvx)); } - if (DEBUG_C_TEST) { +#ifdef DEBUGGING + if (DEBUG_C_TEST) sv_dump(sv); - } +#endif } #else const char * const pvx = SvPVX_const(sv); @@ -5338,7 +5334,7 @@ Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) const svtype new_type = islv ? SVt_NULL : SvMAGIC(sv) || SvSTASH(sv) ? SVt_PVMG : SVt_PV; SV *const temp = newSV_type(new_type); - regexp *const temp_p = ReANY((REGEXP *)sv); + regexp *old_rx_body; if (new_type == SVt_PVMG) { SvMAGIC_set(temp, SvMAGIC(sv)); @@ -5346,15 +5342,26 @@ Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) SvSTASH_set(temp, SvSTASH(sv)); SvSTASH_set(sv, NULL); } - if (!islv) SvCUR_set(temp, SvCUR(sv)); - /* Remember that SvPVX is in the head, not the body. But - RX_WRAPPED is in the body. */ + if (!islv) + SvCUR_set(temp, SvCUR(sv)); + /* Remember that SvPVX is in the head, not the body. */ assert(ReANY((REGEXP *)sv)->mother_re); + + if (islv) { + /* LV-as-regex has sv->sv_any pointing to an XPVLV body, + * whose xpvlenu_rx field points to the regex body */ + XPV *xpv = (XPV*)(SvANY(sv)); + old_rx_body = xpv->xpv_len_u.xpvlenu_rx; + xpv->xpv_len_u.xpvlenu_rx = NULL; + } + else + old_rx_body = ReANY((REGEXP *)sv); + /* Their buffer is already owned by someone else. */ if (flags & SV_COW_DROP_PV) { /* SvLEN is already 0. For SVt_REGEXP, we have a brand new - zeroed body. For SVt_PVLV, it should have been set to 0 - before turning into a regexp. */ + zeroed body. For SVt_PVLV, we zeroed it above (len field + a union with xpvlenu_rx) */ assert(!SvLEN(islv ? sv : temp)); sv->sv_u.svu_pv = 0; } @@ -5375,8 +5382,7 @@ Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) SvFLAGS(temp) &= ~(SVTYPEMASK); SvFLAGS(temp) |= SVt_REGEXP|SVf_FAKE; - SvANY(temp) = temp_p; - temp->sv_u.svu_rx = (regexp *)temp_p; + SvANY(temp) = old_rx_body; SvREFCNT_dec_NN(temp); } @@ -5962,7 +5968,8 @@ Weaken a reference: set the C flag on this RV; give the referred-to SV C magic if it hasn't already; and push a back-reference to this RV onto the array of backreferences associated with that magic. If the RV is magical, set magic will be -called after the RV is cleared. +called after the RV is cleared. Silently ignores C and warns +on already-weak references. =cut */ @@ -5991,6 +5998,42 @@ Perl_sv_rvweaken(pTHX_ SV *const sv) } /* +=for apidoc sv_rvunweaken + +Unweaken a reference: Clear the C flag on this RV; remove +the backreference to this RV from the array of backreferences +associated with the target SV, increment the refcount of the target. +Silently ignores C and warns on non-weak references. + +=cut +*/ + +SV * +Perl_sv_rvunweaken(pTHX_ SV *const sv) +{ + SV *tsv; + + PERL_ARGS_ASSERT_SV_RVUNWEAKEN; + + if (!SvOK(sv)) /* let undefs pass */ + return sv; + if (!SvROK(sv)) + Perl_croak(aTHX_ "Can't unweaken a nonreference"); + else if (!SvWEAKREF(sv)) { + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Reference is not weak"); + return sv; + } + else if (SvREADONLY(sv)) croak_no_modify(); + + tsv = SvRV(sv); + SvWEAKREF_off(sv); + SvROK_on(sv); + SvREFCNT_inc_NN(tsv); + Perl_sv_del_backref(aTHX_ tsv, sv); + return sv; +} + +/* =for apidoc sv_get_backrefs If C is the target of a weak reference then it returns the back @@ -6625,7 +6668,6 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) goto freescalar; case SVt_REGEXP: /* FIXME for plugins */ - freeregexp: pregfree2((REGEXP*) sv); goto freescalar; case SVt_PVCV: @@ -6704,7 +6746,16 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) } else if (LvTYPE(sv) != 't') /* unless tie: unrefcnted fake SV** */ SvREFCNT_dec(LvTARG(sv)); - if (isREGEXP(sv)) goto freeregexp; + if (isREGEXP(sv)) { + /* SvLEN points to a regex body. Free the body, then + * set SvLEN to whatever value was in the now-freed + * regex body. The PVX buffer is shared by multiple re's + * and only freed once, by the re whose len in non-null */ + STRLEN len = ReANY(sv)->xpv_len; + pregfree2((REGEXP*) sv); + SvLEN_set((sv), len); + goto freescalar; + } /* FALLTHROUGH */ case SVt_PVGV: if (isGV_with_GP(sv)) { @@ -6761,10 +6812,12 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) && !(IoFLAGS(sv) & IOf_FAKE_DIRP))) { if (SvIsCOW(sv)) { +#ifdef DEBUGGING if (DEBUG_C_TEST) { PerlIO_printf(Perl_debug_log, "Copy on write: clear\n"); sv_dump(sv); } +#endif if (SvLEN(sv)) { if (CowREFCNT(sv)) { sv_buf_to_rw(sv); @@ -8958,7 +9011,7 @@ Perl_sv_inc_nomg(pTHX_ SV *const sv) if (flags & SVp_NOK) { const NV was = SvNVX(sv); if (LIKELY(!Perl_isinfnan(was)) && - NV_OVERFLOWS_INTEGERS_AT && + NV_OVERFLOWS_INTEGERS_AT != 0.0 && was >= NV_OVERFLOWS_INTEGERS_AT) { /* diag_listed_as: Lost precision when %s %f by 1 */ Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), @@ -9141,7 +9194,7 @@ Perl_sv_dec_nomg(pTHX_ SV *const sv) { const NV was = SvNVX(sv); if (LIKELY(!Perl_isinfnan(was)) && - NV_OVERFLOWS_INTEGERS_AT && + NV_OVERFLOWS_INTEGERS_AT != 0.0 && was <= -NV_OVERFLOWS_INTEGERS_AT) { /* diag_listed_as: Lost precision when %s %f by 1 */ Perl_ck_warner(aTHX_ packWARN(WARN_IMPRECISION), @@ -10955,12 +11008,35 @@ Usually used via one of its frontends C and C. void Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, - va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted) + va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) { PERL_ARGS_ASSERT_SV_VSETPVFN; SvPVCLEAR(sv); - sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, 0); + sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, 0); +} + + +/* simplified inline Perl_sv_catpvn_nomg() when you know the SV's SvPOK */ + +PERL_STATIC_INLINE void +S_sv_catpvn_simple(pTHX_ SV *const sv, const char* const buf, const STRLEN len) +{ + STRLEN const need = len + SvCUR(sv) + 1; + char *end; + + /* can't wrap as both len and SvCUR() are allocated in + * memory and together can't consume all the address space + */ + assert(need > len); + + assert(SvPOK(sv)); + SvGROW(sv, need); + end = SvEND(sv); + Copy(buf, end, len, char); + end += len; + *end = '\0'; + SvCUR_set(sv, need - 1); } @@ -10978,24 +11054,92 @@ S_warn_vcatpvfn_missing_argument(pTHX) { } -STATIC I32 -S_expect_number(pTHX_ char **const pattern) +static void +S_croak_overflow() +{ + dTHX; + Perl_croak(aTHX_ "Integer overflow in format string for %s", + (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn")); +} + + +/* Given an int i from the next arg (if args is true) or an sv from an arg + * (if args is false), try to extract a STRLEN-ranged value from the arg, + * with overflow checking. + * Sets *neg to true if the value was negative (untouched otherwise. + * Returns the absolute value. + * As an extra margin of safety, it croaks if the returned value would + * exceed the maximum value of a STRLEN / 4. + */ + +static STRLEN +S_sprintf_arg_num_val(pTHX_ va_list *const args, int i, SV *sv, bool *neg) +{ + IV iv; + + if (args) { + iv = i; + goto do_iv; + } + + if (!sv) + return 0; + + SvGETMAGIC(sv); + + if (UNLIKELY(SvIsUV(sv))) { + UV uv = SvUV_nomg(sv); + if (uv > IV_MAX) + S_croak_overflow(); + iv = uv; + } + else { + iv = SvIV_nomg(sv); + do_iv: + if (iv < 0) { + if (iv < -IV_MAX) + S_croak_overflow(); + iv = -iv; + *neg = TRUE; + } + } + + if (iv > (IV)(((STRLEN)~0) / 4)) + S_croak_overflow(); + + return (STRLEN)iv; +} + + +/* Returns true if c is in the range '1'..'9' + * Written with the cast so it only needs one conditional test + */ +#define IS_1_TO_9(c) ((U8)(c - '1') <= 8) + +/* Read in and return a number. Updates *pattern to point to the char + * following the number. Expects the first char to 1..9. + * Croaks if the number exceeds 1/4 of the maximum value of STRLEN. + * This is a belt-and-braces safety measure to complement any + * overflow/wrap checks done in the main body of sv_vcatpvfn_flags. + * It means that e.g. on a 32-bit system the width/precision can't be more + * than 1G, which seems reasonable. + */ + +STATIC STRLEN +S_expect_number(pTHX_ const char **const pattern) { - I32 var = 0; + STRLEN var; PERL_ARGS_ASSERT_EXPECT_NUMBER; - switch (**pattern) { - case '1': case '2': case '3': - case '4': case '5': case '6': - case '7': case '8': case '9': - var = *(*pattern)++ - '0'; - while (isDIGIT(**pattern)) { - const I32 tmp = var * 10 + (*(*pattern)++ - '0'); - if (tmp < var) - Perl_croak(aTHX_ "Integer overflow in format string for %s", (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn")); - var = tmp; - } + assert(IS_1_TO_9(**pattern)); + + var = *(*pattern)++ - '0'; + while (isDIGIT(**pattern)) { + /* if var * 10 + 9 would exceed 1/4 max strlen, croak */ + if (var > ((((STRLEN)~0) / 4 - 9) / 10)) + S_croak_overflow(); + var = var * 10 + (*(*pattern)++ - '0'); } return var; } @@ -11036,19 +11180,15 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len) } -#define VECTORIZE_ARGS vecsv = va_arg(*args, SV*);\ - vecstr = (U8*)SvPV_const(vecsv,veclen);\ - vec_utf8 = DO_UTF8(vecsv); - /* XXX maybe_tainted is never assigned to, so the doc above is lying. */ void Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, - va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted) + va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted) { PERL_ARGS_ASSERT_SV_VCATPVFN; - sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC); + sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, SV_GMAGIC|SV_SMAGIC); } @@ -11210,6 +11350,9 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, #endif const U8* vmaxend = vhex + HEXTRACTSIZE; + + assert(HEXTRACTSIZE <= VHEX_SIZE); + PERL_UNUSED_VAR(ix); /* might happen */ (void)Perl_frexp(PERL_ABS(nv), exponent); *subnormal = FALSE; @@ -11228,7 +11371,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, const U8* nvp = (const U8*)(&nv); HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); -# undef HEXTRACT_HAS_TOP_NYBBLE +# undef HEXTRACT_HAS_TOP_NYBBLE HEXTRACT_BYTES_LE(13, 0); # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN /* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L: @@ -11238,7 +11381,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, const U8* nvp = (const U8*)(&nv); HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); -# undef HEXTRACT_HAS_TOP_NYBBLE +# undef HEXTRACT_HAS_TOP_NYBBLE HEXTRACT_BYTES_BE(2, 15); # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN /* x86 80-bit "extended precision", 64 bits of mantissa / fraction / @@ -11338,9 +11481,10 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, # define HEXTRACT_FALLBACK # endif #endif /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) #else */ -# ifdef HEXTRACT_FALLBACK + +#ifdef HEXTRACT_FALLBACK HEXTRACT_GET_SUBNORMAL(nv); -# undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */ +# undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */ /* The fallback is used for the double-double format, and * for unknown long double formats, and for unknown double * formats, or in general unknown NV formats. */ @@ -11421,7 +11565,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, v++; } } -# endif +#endif } /* Croak for various reasons: if the output pointer escaped the * output buffer, if the extraction index escaped the extraction @@ -11451,6 +11595,8 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, * same name within Perl_sv_vcatpvfn_flags(). * * It assumes the caller has already done STORE_LC_NUMERIC_SET_TO_NEEDED(); + * + * It requires the caller to make buf large enough. */ static STRLEN @@ -11472,7 +11618,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, * be mapped through the xdig to get the actual * human-readable xdigits. */ const char* xdig = PL_hexdigit; - int zerotail = 0; /* how many extra zeros to append */ + STRLEN zerotail = 0; /* how many extra zeros to append */ int exponent = 0; /* exponent of the floating point input */ bool hexradix = FALSE; /* should we output the radix */ bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */ @@ -11495,8 +11641,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, /* In this case there is an implicit bit, * and therefore the exponent is shifted by one. */ exponent--; -# else -# ifdef NV_X86_80_BIT +# elif defined(NV_X86_80_BIT) if (subnormal) { /* The subnormals of the x86-80 have a base exponent of -16382, * (while the physical exponent bits are zero) but the frexp() @@ -11510,7 +11655,6 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, } else { exponent -= 4; } -# endif /* TBD: other non-implicit-bit platforms than the x86-80. */ # endif #endif @@ -11667,10 +11811,9 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, #ifndef USE_LOCALE_NUMERIC *p++ = '.'; #else - if (PL_numeric_radix_sv) { + if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) { STRLEN n; const char* r = SvPV(PL_numeric_radix_sv, n); - assert(IN_LC(LC_NUMERIC)); Copy(r, p, n, char); p += n; } @@ -11692,6 +11835,12 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, } elen = p - buf; + + /* sanity checks */ + if (elen >= bufsize || width >= bufsize) + /* diag_listed_as: Hexadecimal float: internal error (%s) */ + Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)"); + elen += my_snprintf(p, bufsize - elen, "%c%+d", lower ? 'p' : 'P', exponent); @@ -11730,20 +11879,6 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, } -/* Helper for sv_vcatpvfn_flags(). */ -#define FETCH_VCATPVFN_ARGUMENT(var, in_range, expr) \ - STMT_START { \ - if (in_range) \ - (var) = (expr); \ - else { \ - (var) = &PL_sv_no; /* [perl #71000] */ \ - arg_missing = TRUE; \ - } \ - } STMT_END - -void - - /* =for apidoc sv_vcatpvfn @@ -11771,22 +11906,22 @@ Usually used via one of its frontends C and C. */ +void Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, - va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted, + va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted, const U32 flags) { - char *p; - char *q; + const char *fmtstart; /* character following the current '%' */ + const char *q; /* current position within format */ const char *patend; STRLEN origlen; - I32 svix = 0; + Size_t svix = 0; static const char nullstr[] = "(null)"; SV *argsv = NULL; bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ - SV *nsv = NULL; /* Times 4: a decimal digit takes more than 3 binary digits. - * NV_DIG: mantissa takes than many decimal digits. + * NV_DIG: mantissa takes that many decimal digits. * Plus 32: Playing safe. */ char ebuf[IV_DIG * 4 + NV_DIG + 32]; bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ @@ -11809,64 +11944,63 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * should be fixed */ assert(pat[patlen] == '\0'); - /* special-case "", "%s", and "%-p" (SVf - see below) */ - if (patlen == 0) { - if (svmax && ckWARN(WARN_REDUNDANT)) - Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", - PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); - return; - } - if (patlen == 2 && pat[0] == '%' && pat[1] == 's') { - if (svmax > 1 && ckWARN(WARN_REDUNDANT)) - Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", - PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); - if (args) { - const char * const s = va_arg(*args, char*); - sv_catpv_nomg(sv, s ? s : nullstr); - } - else if (svix < svmax) { - /* we want get magic on the source but not the target. sv_catsv can't do that, though */ - SvGETMAGIC(*svargs); - sv_catsv_nomg(sv, *svargs); - } - else - S_warn_vcatpvfn_missing_argument(aTHX); - return; - } - if (args && patlen == 3 && pat[0] == '%' && - pat[1] == '-' && pat[2] == 'p') { - if (svmax > 1 && ckWARN(WARN_REDUNDANT)) - Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", - PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); - argsv = MUTABLE_SV(va_arg(*args, void*)); - sv_catsv_nomg(sv, argsv); + /* Special-case "", "%s", "%-p" (SVf - see below) and "%.0f". + * In each case, if there isn't the correct number of args, instead + * fall through to the main code to handle the issuing of any + * warnings etc. + */ + + if (patlen == 0 && (args || sv_count == 0)) return; - } -#if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH) - /* special-case "%.0f" */ - if ( !args - && patlen == 4 - && pat[0] == '%' && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f' - && svmax > 0) - { - const NV nv = SvNV(*svargs); - if (LIKELY(!Perl_isinfnan(nv))) { - STRLEN l; - char *p; + if (patlen <= 4 && pat[0] == '%' && (args || sv_count == 1)) { + + /* "%s" */ + if (patlen == 2 && pat[1] == 's') { + if (args) { + const char * const s = va_arg(*args, char*); + sv_catpv_nomg(sv, s ? s : nullstr); + } + else { + /* we want get magic on the source but not the target. + * sv_catsv can't do that, though */ + SvGETMAGIC(*svargs); + sv_catsv_nomg(sv, *svargs); + } + return; + } - if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { - sv_catpvn_nomg(sv, p, l); + /* "%-p" */ + if (args) { + if (patlen == 3 && pat[1] == '-' && pat[2] == 'p') { + SV *asv = MUTABLE_SV(va_arg(*args, void*)); + sv_catsv_nomg(sv, asv); return; } - } - } + } +#if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH) + /* special-case "%.0f" */ + else if ( patlen == 4 + && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f') + { + const NV nv = SvNV(*svargs); + if (LIKELY(!Perl_isinfnan(nv))) { + STRLEN l; + char *p; + + if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { + sv_catpvn_nomg(sv, p, l); + return; + } + } + } #endif /* !USE_LONG_DOUBLE */ + } - patend = (char*)pat + patlen; - for (p = (char*)pat; p < patend; p = q) { + patend = (char*)pat + patlen; + for (fmtstart = pat; fmtstart < patend; fmtstart = q) { char intsize = 0; /* size qualifier in "%hi..." etc */ bool alt = FALSE; /* has "%#..." */ bool left = FALSE; /* has "%-..." */ @@ -11875,26 +12009,18 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p STRLEN width = 0; /* value of "%NNN..." */ bool has_precis = FALSE; /* has "%.NNN..." */ STRLEN precis = 0; /* value of "%.NNN..." */ - bool asterisk = FALSE; /* has "%*..." */ - bool used_explicit_ix = FALSE;/* has "%$n..." */ - unsigned base = 0; /* base to print in, e.g. 8 for %o */ + int base = 0; /* base to print in, e.g. 8 for %o */ UV uv = 0; /* the value to print of int-ish args */ - IV iv = 0; /* ditto for signed types */ bool vectorize = FALSE; /* has "%v..." */ - bool vectorarg = FALSE; /* has "%*v..." */ - SV *vecsv = NULL; /* the cur arg for %v */ - bool vec_utf8 = FALSE; /* SvUTF8(vecsv) */ - const U8 *vecstr = NULL; /* SvPVX(vecsv) */ - STRLEN veclen = 0; /* SvCUR(vecsv) */ - const char *dotstr = "."; /* separator string for %v */ - STRLEN dotstrlen = 1; /* length of separator string for %v */ - - I32 efix = 0; /* explicit format parameter index */ - I32 ewix = 0; /* explicit width index */ - I32 epix = 0; /* explicit precision index */ - I32 evix = 0; /* explicit vector index */ - const I32 osvix = svix; /* original index in case of bad fmt */ + bool vec_utf8 = FALSE; /* SvUTF8(vec arg) */ + const U8 *vecstr = NULL; /* SvPVX(vec arg) */ + STRLEN veclen = 0; /* SvCUR(vec arg) */ + const char *dotstr = NULL; /* separator string for %v */ + STRLEN dotstrlen; /* length of separator string for %v */ + + Size_t efix = 0; /* explicit format parameter index */ + const Size_t osvix = svix; /* original index in case of bad fmt */ bool is_utf8 = FALSE; /* is this item utf8? */ bool arg_missing = FALSE; /* give "Missing argument" warning */ @@ -11905,23 +12031,39 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p const char *eptr = NULL; /* the address of the element string */ STRLEN elen = 0; /* the length of the element string */ - const char *fmtstart; /* start of current format (the '%') */ - char c = 0; /* current character read from format */ + char c; /* the actual format ('d', s' etc) */ /* echo everything up to the next format specification */ - for (q = p; q < patend && *q != '%'; ++q) ; - if (q > p) { - if (has_utf8 && !pat_utf8) - sv_catpvn_nomg_utf8_upgrade(sv, p, q - p, nsv); + for (q = fmtstart; q < patend && *q != '%'; ++q) + {}; + + if (q > fmtstart) { + if (has_utf8 && !pat_utf8) { + /* upgrade and copy the bytes of fmtstart..q-1 to utf8 on + * the fly */ + const char *p; + char *dst; + STRLEN need = SvCUR(sv) + (q - fmtstart) + 1; + + for (p = fmtstart; p < q; p++) + if (!NATIVE_BYTE_IS_INVARIANT(*p)) + need++; + SvGROW(sv, need); + + dst = SvEND(sv); + for (p = fmtstart; p < q; p++) + append_utf8_from_native_byte((U8)*p, (U8**)&dst); + *dst = '\0'; + SvCUR_set(sv, need - 1); + } else - sv_catpvn_nomg(sv, p, q - p); - p = q; + S_sv_catpvn_simple(aTHX_ sv, fmtstart, q - fmtstart); } if (q++ >= patend) break; - fmtstart = q; + fmtstart = q; /* fmtstart is char following the '%' */ /* We allow format specification elements in this order: @@ -11935,14 +12077,16 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p [%bcdefginopsuxDFOUX] format (mandatory) */ - if ( (width = expect_number(&q)) ) { + if (IS_1_TO_9(*q)) { + width = expect_number(&q); if (*q == '$') { if (args) Perl_croak_nocontext( "Cannot yet reorder sv_catpvfn() arguments from va_list"); ++q; - efix = width; - used_explicit_ix = TRUE; + efix = (Size_t)width; + width = 0; + no_redundant_warning = TRUE; } else { goto gotwidth; } @@ -11981,76 +12125,102 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p break; } + /* at this point we can expect one of: + * + * 123 an explicit width + * * width taken from next arg + * *12$ width taken from 12th arg + * or no width + * + * But any width specification may be preceded by a v, in one of its + * forms: + * v + * *v + * *12$v + * So an asterisk may be either a width specifier or a vector + * separator arg specifier, and we don't know which initially + */ + tryasterisk: if (*q == '*') { + STRLEN ix; /* explicit width/vector separator index */ q++; - if ( (ewix = expect_number(&q)) ) { + if (IS_1_TO_9(*q)) { + ix = expect_number(&q); if (*q++ == '$') { if (args) Perl_croak_nocontext( "Cannot yet reorder sv_catpvfn() arguments from va_list"); - used_explicit_ix = TRUE; + no_redundant_warning = TRUE; } else goto unknown; } - asterisk = TRUE; - } - if (*q == 'v') { + else + ix = 0; + + if (*q == 'v') { + SV *vecsv; + /* The asterisk was for *v, *NNN$v: vectorizing, but not + * with the default "." */ + q++; + if (vectorize) + goto unknown; + if (args) + vecsv = va_arg(*args, SV*); + else { + ix = ix ? ix - 1 : svix++; + vecsv = ix < sv_count ? svargs[ix] + : (arg_missing = TRUE, &PL_sv_no); + } + dotstr = SvPV_const(vecsv, dotstrlen); + /* Keep the DO_UTF8 test *after* the SvPV call, else things go + bad with tied or overloaded values that return UTF8. */ + if (DO_UTF8(vecsv)) + is_utf8 = TRUE; + else if (has_utf8) { + vecsv = sv_mortalcopy(vecsv); + sv_utf8_upgrade(vecsv); + dotstr = SvPV_const(vecsv, dotstrlen); + is_utf8 = TRUE; + } + vectorize = TRUE; + goto tryasterisk; + } + + /* the asterisk specified a width */ + { + int i = 0; + SV *sv = NULL; + if (args) + i = va_arg(*args, int); + else { + ix = ix ? ix - 1 : svix++; + sv = (ix < sv_count) ? svargs[ix] + : (arg_missing = TRUE, (SV*)NULL); + } + width = S_sprintf_arg_num_val(aTHX_ args, i, sv, &left); + } + } + else if (*q == 'v') { q++; if (vectorize) goto unknown; - if ((vectorarg = asterisk)) { - evix = ewix; - ewix = 0; - asterisk = FALSE; - } vectorize = TRUE; - goto tryasterisk; - } + dotstr = "."; + dotstrlen = 1; + goto tryasterisk; - if (!asterisk) - { + } + else { + /* explicit width? */ if(*q == '0') { fill = TRUE; q++; } - width = expect_number(&q); - } - - if (vectorize && vectorarg) { - /* vectorizing, but not with the default "." */ - if (args) - vecsv = va_arg(*args, SV*); - else if (evix) { - FETCH_VCATPVFN_ARGUMENT( - vecsv, evix > 0 && evix <= svmax, svargs[evix-1]); - } else { - FETCH_VCATPVFN_ARGUMENT( - vecsv, svix < svmax, svargs[svix++]); - } - dotstr = SvPV_const(vecsv, dotstrlen); - /* Keep the DO_UTF8 test *after* the SvPV call, else things go - bad with tied or overloaded values that return UTF8. */ - if (DO_UTF8(vecsv)) - is_utf8 = TRUE; - else if (has_utf8) { - vecsv = sv_mortalcopy(vecsv); - sv_utf8_upgrade(vecsv); - dotstr = SvPV_const(vecsv, dotstrlen); - is_utf8 = TRUE; - } + if (IS_1_TO_9(*q)) + width = expect_number(&q); } - if (asterisk) { - int i; - if (args) - i = va_arg(*args, int); - else - i = (ewix ? ewix <= svmax : svix < svmax) ? - SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; - left |= (i < 0); - width = (i < 0) ? -i : i; - } gotwidth: /* PRECISION */ @@ -12058,73 +12228,52 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (*q == '.') { q++; if (*q == '*') { - int i; + STRLEN ix; /* explicit precision index */ q++; - if ( (epix = expect_number(&q)) ) { + if (IS_1_TO_9(*q)) { + ix = expect_number(&q); if (*q++ == '$') { if (args) Perl_croak_nocontext( "Cannot yet reorder sv_catpvfn() arguments from va_list"); - used_explicit_ix = TRUE; + no_redundant_warning = TRUE; } else goto unknown; } - if (args) - i = va_arg(*args, int); - else { - SV *precsv; - if (epix) - FETCH_VCATPVFN_ARGUMENT( - precsv, epix > 0 && epix <= svmax, svargs[epix-1]); - else - FETCH_VCATPVFN_ARGUMENT( - precsv, svix < svmax, svargs[svix++]); - i = precsv == &PL_sv_no ? 0 : SvIVx(precsv); + else + ix = 0; + + { + int i = 0; + SV *sv = NULL; + bool neg = FALSE; + + if (args) + i = va_arg(*args, int); + else { + ix = ix ? ix - 1 : svix++; + sv = (ix < sv_count) ? svargs[ix] + : (arg_missing = TRUE, (SV*)NULL); + } + precis = S_sprintf_arg_num_val(aTHX_ args, i, sv, &neg); + has_precis = !neg; } - precis = i; - has_precis = !(i < 0); } else { - precis = 0; - while (isDIGIT(*q)) - precis = precis * 10 + (*q++ - '0'); + /* although it doesn't seem documented, this code has long + * behaved so that: + * no digits following the '.' is treated like '.0' + * the number may be preceded by any number of zeroes, + * e.g. "%.0001f", which is the same as "%.1f" + * so I've kept that behaviour. DAPM May 2017 + */ + while (*q == '0') + q++; + precis = IS_1_TO_9(*q) ? expect_number(&q) : 0; has_precis = TRUE; } } - if (vectorize) { - if (args) { - VECTORIZE_ARGS - } - else if (efix ? (efix > 0 && efix <= svmax) : svix < svmax) { - vecsv = svargs[efix ? efix-1 : svix++]; - vecstr = (U8*)SvPV_const(vecsv,veclen); - vec_utf8 = DO_UTF8(vecsv); - - /* if this is a version object, we need to convert - * back into v-string notation and then let the - * vectorize happen normally - */ - if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) { - if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF), - "vector argument not supported with alpha versions"); - goto vdblank; - } - vecsv = sv_newmortal(); - scan_vstring((char *)vecstr, (char *)vecstr + veclen, - vecsv); - vecstr = (U8*)SvPV_const(vecsv, veclen); - vec_utf8 = DO_UTF8(vecsv); - } - } - else { - vdblank: - vecstr = (U8*)""; - veclen = 0; - } - } - /* SIZE */ switch (*q) { @@ -12194,68 +12343,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* CONVERSION */ - if (*q == '%') { - eptr = q++; + c = *q++; /* c now holds the conversion type */ + + /* '%' doesn't have an arg, so skip arg processing */ + if (c == '%') { + eptr = q - 1; elen = 1; - if (vectorize) { - c = '%'; + if (vectorize) goto unknown; - } goto string; } - if (!vectorize && !args) { - if (efix) { - const I32 i = efix-1; - FETCH_VCATPVFN_ARGUMENT(argsv, i >= 0 && i < svmax, svargs[i]); - } else { - FETCH_VCATPVFN_ARGUMENT(argsv, svix >= 0 && svix < svmax, - svargs[svix++]); - } - } + if (vectorize && !strchr("BbDdiOouUXx", c)) + goto unknown; - c = *q++; /* c now holds the conversion type */ + /* get next arg (individual branches do their own va_arg() + * handling for the args case) */ + + if (!args) { + efix = efix ? efix - 1 : svix++; + argsv = efix < sv_count ? svargs[efix] + : (arg_missing = TRUE, &PL_sv_no); + } - if (argsv && strchr("BbcDdiOopuUXx", c)) { - /* XXX va_arg(*args) case? need peek, use va_copy? */ - SvGETMAGIC(argsv); - if (UNLIKELY(SvAMAGIC(argsv))) - argsv = sv_2num(argsv); - if (UNLIKELY(isinfnansv(argsv))) - goto handle_infnan_argsv; - } switch (c) { /* STRINGS */ - case 'c': - if (vectorize) - goto unknown; - uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv); - if ((uv > 255 || - (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv))) - && !IN_BYTES) - { - assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1); - eptr = ebuf; - elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf; - is_utf8 = TRUE; - } - else { - c = (char)uv; - eptr = &c; - elen = 1; - } - goto string; - case 's': - if (vectorize) - goto unknown; if (args) { eptr = va_arg(*args, char*); if (eptr) - elen = strlen(eptr); + if (has_precis) + elen = my_strnlen(eptr, precis); + else + elen = strlen(eptr); else { eptr = (char *)nullstr; elen = sizeof nullstr - 1; @@ -12290,7 +12413,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* INTEGERS */ case 'p': - if (alt || vectorize) + if (alt) goto unknown; /* %p extensions: @@ -12327,8 +12450,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p && !fill && !plus && !has_precis - && !asterisk - && !used_explicit_ix + /* not %*p or %*1$p - any width was explicit */ + && q[-2] != '*' + && q[-2] != '$' ) { if (left) { /* %-p (SVf), %-NNNp */ if (width) { @@ -12365,7 +12489,23 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p uv = PTR2UV(args ? va_arg(*args, void*) : argsv); base = 16; - goto integer; + goto do_integer; + + case 'c': + /* Ignore any size specifiers, since they're not documented as + * being allowed for %c (ideally we should warn on e.g. '%hc'). + * Setting a default intsize, along with a positive + * (which signals unsigned) base, causes, for C-ish use, the + * va_arg to be interpreted as as unsigned int, when it's + * actually signed, which will convert -ve values to high +ve + * values. Note that unlike the libc %c, values > 255 will + * convert to high unicode points rather than being truncated + * to 8 bits. For perlish use, it will do SvUV(argsv), which + * will again convert -ve args to high -ve values. + */ + intsize = 0; + base = 1; /* special value that indicates we're doing a 'c' */ + goto get_int_arg_val; case 'D': #ifdef IV_IS_QUAD @@ -12373,7 +12513,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p #else intsize = 'l'; #endif - goto do_i; + base = -10; + goto get_int_arg_val; case 'd': /* probably just a plain %d, but it might be the start of the @@ -12406,75 +12547,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* FALLTHROUGH */ case 'i': - do_i: - if (vectorize) { - STRLEN ulen; - if (!veclen) - goto donevalidconversion; - if (vec_utf8) - uv = utf8n_to_uvchr(vecstr, veclen, &ulen, - UTF8_ALLOW_ANYUV); - else { - uv = *vecstr; - ulen = 1; - } - vecstr += ulen; - veclen -= ulen; - if (plus) - esignbuf[esignlen++] = plus; - } - else if (args) { - switch (intsize) { - case 'c': iv = (char)va_arg(*args, int); break; - case 'h': iv = (short)va_arg(*args, int); break; - case 'l': iv = va_arg(*args, long); break; - case 'V': iv = va_arg(*args, IV); break; - case 'z': iv = va_arg(*args, SSize_t); break; -#ifdef HAS_PTRDIFF_T - case 't': iv = va_arg(*args, ptrdiff_t); break; -#endif - default: iv = va_arg(*args, int); break; -#ifdef I_STDINT - case 'j': iv = va_arg(*args, intmax_t); break; -#endif - case 'q': -#if IVSIZE >= 8 - iv = va_arg(*args, Quad_t); break; -#else - goto unknown; -#endif - } - } - else { - IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */ - switch (intsize) { - case 'c': iv = (char)tiv; break; - case 'h': iv = (short)tiv; break; - case 'l': iv = (long)tiv; break; - case 'V': - default: iv = tiv; break; - case 'q': -#if IVSIZE >= 8 - iv = (Quad_t)tiv; break; -#else - goto unknown; -#endif - } - } - if ( !vectorize ) /* we already set uv above */ - { - if (iv >= 0) { - uv = iv; - if (plus) - esignbuf[esignlen++] = plus; - } - else { - uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv); - esignbuf[esignlen++] = '-'; - } - } - base = 10; - goto integer; + base = -10; + goto get_int_arg_val; case 'U': #ifdef IV_IS_QUAD @@ -12485,12 +12559,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* FALLTHROUGH */ case 'u': base = 10; - goto uns_integer; + goto get_int_arg_val; case 'B': case 'b': base = 2; - goto uns_integer; + goto get_int_arg_val; case 'O': #ifdef IV_IS_QUAD @@ -12501,18 +12575,53 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* FALLTHROUGH */ case 'o': base = 8; - goto uns_integer; + goto get_int_arg_val; case 'X': case 'x': base = 16; - uns_integer: + get_int_arg_val: + if (vectorize) { STRLEN ulen; - vector: + SV *vecsv; + + if (base < 0) { + base = -base; + if (plus) + esignbuf[esignlen++] = plus; + } + + /* initialise the vector string to iterate over */ + + vecsv = args ? va_arg(*args, SV*) : argsv; + + /* if this is a version object, we need to convert + * back into v-string notation and then let the + * vectorize happen normally + */ + if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) { + if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF), + "vector argument not supported with alpha versions"); + vecsv = &PL_sv_no; + } + else { + vecstr = (U8*)SvPV_const(vecsv,veclen); + vecsv = sv_newmortal(); + scan_vstring((char *)vecstr, (char *)vecstr + veclen, + vecsv); + } + } + vecstr = (U8*)SvPV_const(vecsv, veclen); + vec_utf8 = DO_UTF8(vecsv); + + /* This is the re-entry point for when we're iterating + * over the individual characters of a vector arg */ + vector: if (!veclen) - goto donevalidconversion; + goto done_valid_conversion; if (vec_utf8) uv = utf8n_to_uvchr(vecstr, veclen, &ulen, UTF8_ALLOW_ANYUV); @@ -12523,64 +12632,145 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p vecstr += ulen; veclen -= ulen; } - else if (args) { - switch (intsize) { - case 'c': uv = (unsigned char)va_arg(*args, unsigned); break; - case 'h': uv = (unsigned short)va_arg(*args, unsigned); break; - case 'l': uv = va_arg(*args, unsigned long); break; - case 'V': uv = va_arg(*args, UV); break; - case 'z': uv = va_arg(*args, Size_t); break; + else { + /* test arg for inf/nan. This can trigger an unwanted + * 'str' overload, so manually force 'num' overload first + * if necessary */ + if (argsv) { + SvGETMAGIC(argsv); + if (UNLIKELY(SvAMAGIC(argsv))) + argsv = sv_2num(argsv); + if (UNLIKELY(isinfnansv(argsv))) + goto handle_infnan_argsv; + } + + if (base < 0) { + /* signed int type */ + IV iv; + base = -base; + if (args) { + switch (intsize) { + case 'c': iv = (char)va_arg(*args, int); break; + case 'h': iv = (short)va_arg(*args, int); break; + case 'l': iv = va_arg(*args, long); break; + case 'V': iv = va_arg(*args, IV); break; + case 'z': iv = va_arg(*args, SSize_t); break; #ifdef HAS_PTRDIFF_T - case 't': uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */ + case 't': iv = va_arg(*args, ptrdiff_t); break; #endif + default: iv = va_arg(*args, int); break; #ifdef I_STDINT - case 'j': uv = va_arg(*args, uintmax_t); break; + case 'j': iv = va_arg(*args, intmax_t); break; #endif - default: uv = va_arg(*args, unsigned); break; - case 'q': + case 'q': #if IVSIZE >= 8 - uv = va_arg(*args, Uquad_t); break; + iv = va_arg(*args, Quad_t); break; #else - goto unknown; + goto unknown; #endif - } - } - else { - UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */ - switch (intsize) { - case 'c': uv = (unsigned char)tuv; break; - case 'h': uv = (unsigned short)tuv; break; - case 'l': uv = (unsigned long)tuv; break; - case 'V': - default: uv = tuv; break; - case 'q': + } + } + else { + /* assign to tiv then cast to iv to work around + * 2003 GCC cast bug (gnu.org bugzilla #13488) */ + IV tiv = SvIV_nomg(argsv); + switch (intsize) { + case 'c': iv = (char)tiv; break; + case 'h': iv = (short)tiv; break; + case 'l': iv = (long)tiv; break; + case 'V': + default: iv = tiv; break; + case 'q': #if IVSIZE >= 8 - uv = (Uquad_t)tuv; break; + iv = (Quad_t)tiv; break; #else - goto unknown; + goto unknown; #endif - } - } + } + } + + /* now convert iv to uv */ + if (iv >= 0) { + uv = iv; + if (plus) + esignbuf[esignlen++] = plus; + } + else { + uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv); + esignbuf[esignlen++] = '-'; + } + } + else { + /* unsigned int type */ + if (args) { + switch (intsize) { + case 'c': uv = (unsigned char)va_arg(*args, unsigned); + break; + case 'h': uv = (unsigned short)va_arg(*args, unsigned); + break; + case 'l': uv = va_arg(*args, unsigned long); break; + case 'V': uv = va_arg(*args, UV); break; + case 'z': uv = va_arg(*args, Size_t); break; +#ifdef HAS_PTRDIFF_T + /* will sign extend, but there is no + * uptrdiff_t, so oh well */ + case 't': uv = va_arg(*args, ptrdiff_t); break; +#endif +#ifdef I_STDINT + case 'j': uv = va_arg(*args, uintmax_t); break; +#endif + default: uv = va_arg(*args, unsigned); break; + case 'q': +#if IVSIZE >= 8 + uv = va_arg(*args, Uquad_t); break; +#else + goto unknown; +#endif + } + } + else { + /* assign to tiv then cast to iv to work around + * 2003 GCC cast bug (gnu.org bugzilla #13488) */ + UV tuv = SvUV_nomg(argsv); + switch (intsize) { + case 'c': uv = (unsigned char)tuv; break; + case 'h': uv = (unsigned short)tuv; break; + case 'l': uv = (unsigned long)tuv; break; + case 'V': + default: uv = tuv; break; + case 'q': +#if IVSIZE >= 8 + uv = (Uquad_t)tuv; break; +#else + goto unknown; +#endif + } + } + } + } - integer: + do_integer: { char *ptr = ebuf + sizeof ebuf; - bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */ unsigned dig; zeros = 0; switch (base) { case 16: - p = (char *)((c == 'X') ? PL_hexdigit + 16 : PL_hexdigit); - do { - dig = uv & 15; - *--ptr = p[dig]; - } while (uv >>= 4); - if (tempalt) { - esignbuf[esignlen++] = '0'; - esignbuf[esignlen++] = c; /* 'x' or 'X' */ - } - break; + { + const char * const p = + (c == 'X') ? PL_hexdigit + 16 : PL_hexdigit; + + do { + dig = uv & 15; + *--ptr = p[dig]; + } while (uv >>= 4); + if (alt && *ptr != '0') { + esignbuf[esignlen++] = '0'; + esignbuf[esignlen++] = c; /* 'x' or 'X' */ + } + break; + } case 8: do { dig = uv & 7; @@ -12594,11 +12784,33 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p dig = uv & 1; *--ptr = '0' + dig; } while (uv >>= 1); - if (tempalt) { + if (alt && *ptr != '0') { esignbuf[esignlen++] = '0'; - esignbuf[esignlen++] = c; + esignbuf[esignlen++] = c; /* 'b' or 'B' */ } break; + + case 1: + /* special-case: base 1 indicates a 'c' format: + * we use the common code for extracting a uv, + * but handle that value differently here than + * all the other int types */ + if ((uv > 255 || + (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv))) + && !IN_BYTES) + { + assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1); + eptr = ebuf; + elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf; + is_utf8 = TRUE; + } + else { + eptr = ebuf; + ebuf[0] = (char)uv; + elen = 1; + } + goto string; + default: /* it had better be ten or less */ do { dig = uv % base; @@ -12632,16 +12844,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 'a': case 'A': { - STRLEN radix_len; /* SvCUR(PL_numeric_radix_sv) */ STRLEN float_need; /* what PL_efloatsize needs to become */ bool hexfp; /* hexadecimal floating point? */ vcatpvfn_long_double_t fv; NV nv; - if (vectorize) - goto unknown; - /* This is evil, but floating point is even more evil */ /* for SV-style calling, we can only get NV @@ -12738,7 +12946,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p && intsize != 'q' && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen))) ) - goto float_concat_no_utf8; + goto float_concat; /* Determine the buffer size needed for the various * floating-point formats. @@ -12769,17 +12977,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * First, here are the constant bits. For ease of calculation * we over-estimate the needed buffer size, for example by * assuming all formats have an exponent and a leading 0x1. + * + * Also for production use, add a little extra overhead for + * safety's sake. Under debugging don't, as it means we're + * more likely to quickly spot issues during development. */ float_need = 1 /* possible unary minus */ + 4 /* "0x1" plus very unlikely carry */ + + 1 /* default radix point '.' */ + 2 /* "e-", "p+" etc */ + 6 /* exponent: up to 16383 (quad fp) */ +#ifndef DEBUGGING + + 20 /* safety net */ +#endif + 1; /* \0 */ /* determine the radix point len, e.g. length(".") in "1.2" */ - radix_len = 1; /* assume '.' */ #ifdef USE_LOCALE_NUMERIC /* note that we may either explicitly use PL_numeric_radix_sv * below, or implicitly, via an snprintf() variant. @@ -12794,18 +13009,26 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p lc_numeric_set = TRUE; } - if (PL_numeric_radix_sv) { - assert(IN_LC(LC_NUMERIC)); - radix_len = SvCUR(PL_numeric_radix_sv); - /* note that this will convert the output to utf8 even if - * if the radix point didn't get output */ - is_utf8 = SvUTF8(PL_numeric_radix_sv); + if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) { + /* this can't wrap unless PL_numeric_radix_sv is a string + * consuming virtually all the 32-bit or 64-bit address + * space + */ + float_need += (SvCUR(PL_numeric_radix_sv) - 1); + + /* floating-point formats only get utf8 if the radix point + * is utf8. All other characters in the string are < 128 + * and so can be safely appended to both a non-utf8 and utf8 + * string as-is. + * Note that this will convert the output to utf8 even if + * the radix point didn't get output. + */ + if (SvUTF8(PL_numeric_radix_sv) && !has_utf8) { + sv_utf8_upgrade(sv); + has_utf8 = TRUE; + } } #endif - /* this can't wrap unless PL_numeric_radix_sv is a string - * consuming virtually all the 32-bit or 64-bit address space - */ - float_need += radix_len; hexfp = FALSE; @@ -12822,8 +13045,13 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (i > 0) { digits = BIT_DIGITS(i); - if (float_need >= ((STRLEN)~0) - digits) - croak_memory_wrap(); + /* this can't overflow. 'digits' will only be a few + * thousand even for the largest floating-point types. + * And up until now float_need is just some small + * constants plus radix len, which can't be in + * overflow territory unless the radix SV is consuming + * over 1/2 the address space */ + assert(float_need < ((STRLEN)~0) - digits); float_need += digits; } } @@ -12855,8 +13083,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p #else NVSIZE * 2; /* 2 hexdigits for each byte */ #endif - if (float_need >= ((STRLEN)~0) - digits) - croak_memory_wrap(); + /* see "this can't overflow" comment above */ + assert(float_need < ((STRLEN)~0) - digits); float_need += digits; } } @@ -12882,6 +13110,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p { STRLEN pr = has_precis ? precis : 6; /* known default */ + /* this probably can't wrap, since precis is limited + * to 1/4 address space size, but better safe than sorry + */ if (float_need >= ((STRLEN)~0) - pr) croak_memory_wrap(); float_need += pr; @@ -12890,19 +13121,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (float_need < width) float_need = width; -/* We should have correctly calculated (or indeed over-estimated) the - * buffer size, but you never know what strange floating-point systems - * there are out there. So for production use, add a little extra overhead. - * Under debugging don't, as it means we more more likely to quickly spot - * issues during development. - */ -#ifndef DEBUGGING - if (float_need >= ((STRLEN)~0) - 20) - croak_memory_wrap(); - float_need += 20; /* safety fudge factor */ -#endif - - if (PL_efloatsize < float_need) { + if (PL_efloatsize <= float_need) { + /* PL_efloatbuf should be at least 1 greater than + * float_need to allow a trailing \0 to be returned by + * snprintf(). If we need to grow, overgrow for the + * benefit of future generations */ + const STRLEN extra = 0x20; + if (float_need >= ((STRLEN)~0) - extra) + croak_memory_wrap(); + float_need += extra; Safefree(PL_efloatbuf); PL_efloatsize = float_need; Newx(PL_efloatbuf, PL_efloatsize, char); @@ -12983,7 +13210,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv) : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)fv)); #else - elen = my_sprintf(PL_efloatbuf, ptr, fv); + elen = my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, fv); #endif GCC_DIAG_RESTORE; } @@ -12997,58 +13224,30 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * loop which handles appending eptr to sv, and do our own * stripped-down version */ - /* floating-point formats only get is_utf8 if the radix point - * is utf8. All other characters in the string are < 128 - * and so can be safely appended to both a non-utf8 and utf8 - * string as-is. - */ - if (is_utf8 && !has_utf8) { - sv_utf8_upgrade(sv); - has_utf8 = TRUE; - } - - float_concat_no_utf8: - assert(!zeros); assert(!esignlen); - assert(!vectorize); assert(elen); assert(elen >= width); + S_sv_catpvn_simple(aTHX_ sv, eptr, elen); - { - /* unrolled Perl_sv_catpvn */ - STRLEN need = elen + SvCUR(sv) + 1; - char *end; - /* can't wrap as both elen and SvCUR() are allocated in - * memory and together can't consume all the address space - */ - assert(need > elen); - SvGROW(sv, need); - end = SvEND(sv); - Copy(eptr, end, elen, char); - end += elen; - *end = '\0'; - SvCUR_set(sv, need - 1); - } - - goto donevalidconversion; + goto done_valid_conversion; } /* SPECIAL */ case 'n': { - int i; - if (vectorize) - goto unknown; + STRLEN len; /* XXX ideally we should warn if any flags etc have been * set, e.g. "%-4.5n" */ /* XXX if sv was originally non-utf8 with a char in the * range 0x80-0xff, then if it got upgraded, we should * calculate char len rather than byte len here */ - i = SvCUR(sv) - origlen; + len = SvCUR(sv) - origlen; if (args) { + int i = (len > PERL_INT_MAX) ? PERL_INT_MAX : (int)len; + switch (intsize) { case 'c': *(va_arg(*args, char*)) = i; break; case 'h': *(va_arg(*args, short*)) = i; break; @@ -13075,9 +13274,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p Perl_croak_nocontext( "Missing argument for %%n in %s", PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); - sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i); + sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)len); } - goto donevalidconversion; + goto done_valid_conversion; } /* UNKNOWN */ @@ -13112,9 +13311,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* mangled format: output the '%', then continue from the * character following that */ - sv_catpvn_nomg(sv, p, 1); - q = p + 1; + sv_catpvn_nomg(sv, fmtstart-1, 1); + q = fmtstart; svix = osvix; + /* Any "redundant arg" warning from now onwards will probably + * just be misleading, so don't bother. */ + no_redundant_warning = TRUE; continue; /* not "break" */ } @@ -13142,22 +13344,17 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p { STRLEN need, have, gap; + STRLEN i; + char *s; /* signed value that's wrapped? */ assert(elen <= ((~(STRLEN)0) >> 1)); - /* Most of these length vars can range to any value if - * supplied with a hostile format and/or args. So check every - * addition for possible overflow. In reality some of these - * values are interdependent so these checks are slightly - * redundant. But its easier to be certain this way. - */ - - have = elen; - - if (have >= (((STRLEN)~0) - zeros)) - croak_memory_wrap(); - have += zeros; + /* if zeros is non-zero, then it represents filler between + * elen and precis. So adding elen and zeros together will + * always be <= precis, and the addition can never wrap */ + assert(!zeros || (precis > elen && precis - elen == zeros)); + have = elen + zeros; if (have >= (((STRLEN)~0) - esignlen)) croak_memory_wrap(); @@ -13166,68 +13363,64 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p need = (have > width ? have : width); gap = need - have; - if (need >= (((STRLEN)~0) - dotstrlen)) - croak_memory_wrap(); - need += dotstrlen; - if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1))) croak_memory_wrap(); need += (SvCUR(sv) + 1); SvGROW(sv, need); - p = SvEND(sv); - if (esignlen && fill) { - int i; - for (i = 0; i < (int)esignlen; i++) - *p++ = esignbuf[i]; - } - if (gap && !left) { - memset(p, (fill ? '0' : ' '), gap); - p += gap; - } - if (esignlen && !fill) { - int i; - for (i = 0; i < (int)esignlen; i++) - *p++ = esignbuf[i]; - } - if (zeros) { - int i; + s = SvEND(sv); + + if (left) { + for (i = 0; i < esignlen; i++) + *s++ = esignbuf[i]; for (i = zeros; i; i--) - *p++ = '0'; - } - if (elen) { - Copy(eptr, p, elen, char); - p += elen; + *s++ = '0'; + Copy(eptr, s, elen, char); + s += elen; + for (i = gap; i; i--) + *s++ = ' '; } - if (gap && left) { - memset(p, ' ', gap); - p += gap; - } - if (vectorize) { - if (veclen) { - Copy(dotstr, p, dotstrlen, char); - p += dotstrlen; + else { + if (fill) { + for (i = 0; i < esignlen; i++) + *s++ = esignbuf[i]; + assert(!zeros); + zeros = gap; } - else - vectorize = FALSE; /* done iterating over vecstr */ + else { + for (i = gap; i; i--) + *s++ = ' '; + for (i = 0; i < esignlen; i++) + *s++ = esignbuf[i]; + } + + for (i = zeros; i; i--) + *s++ = '0'; + Copy(eptr, s, elen, char); + s += elen; } + + *s = '\0'; + SvCUR_set(sv, s - SvPVX_const(sv)); + if (is_utf8) has_utf8 = TRUE; if (has_utf8) SvUTF8_on(sv); - *p = '\0'; - SvCUR_set(sv, p - SvPVX_const(sv)); } - if (vectorize) { - esignlen = 0; - goto vector; + if (vectorize && veclen) { + /* we append the vector separator separately since %v isn't + * very common: don't slow down the general case by adding + * dotstrlen to need etc */ + sv_catpvn_nomg(sv, dotstr, dotstrlen); + esignlen = 0; + goto vector; /* do next iteration */ } - donevalidconversion: - if (used_explicit_ix) - no_redundant_warning = TRUE; + done_valid_conversion: + if (arg_missing) S_warn_vcatpvfn_missing_argument(aTHX); } @@ -13235,7 +13428,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* Now that we've consumed all our printf format arguments (svix) * do we have things left on the stack that we didn't use? */ - if (!no_redundant_warning && svmax >= svix + 1 && ckWARN(WARN_REDUNDANT)) { + if (!no_redundant_warning && sv_count >= svix + 1 && ckWARN(WARN_REDUNDANT)) { Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); } @@ -14089,7 +14282,6 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) case SVt_REGEXP: duprex: /* FIXME for plugins */ - dstr->sv_u.svu_rx = ((REGEXP *)dstr)->sv_any; re_dup_guts((REGEXP*) sstr, (REGEXP*) dstr, param); break; case SVt_PVLV: @@ -14101,6 +14293,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) else LvTARG(dstr) = sv_dup_inc(LvTARG(dstr), param); if (isREGEXP(sstr)) goto duprex; + /* FALLTHROUGH */ case SVt_PVGV: /* non-GP case already handled above */ if(isGV_with_GP(sstr)) { @@ -15144,7 +15337,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, #ifdef USE_LOCALE_NUMERIC PL_numeric_standard = proto_perl->Inumeric_standard; - PL_numeric_local = proto_perl->Inumeric_local; + PL_numeric_underlying = proto_perl->Inumeric_underlying; #endif /* !USE_LOCALE_NUMERIC */ /* Did the locale setup indicate UTF-8? */ @@ -15268,6 +15461,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, init_constants(); ptr_table_store(PL_ptr_table, &proto_perl->Isv_undef, &PL_sv_undef); ptr_table_store(PL_ptr_table, &proto_perl->Isv_no, &PL_sv_no); + ptr_table_store(PL_ptr_table, &proto_perl->Isv_zero, &PL_sv_zero); ptr_table_store(PL_ptr_table, &proto_perl->Isv_yes, &PL_sv_yes); ptr_table_store(PL_ptr_table, &proto_perl->Ipadname_const, &PL_padname_const); @@ -15477,6 +15671,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_numeric_radix_sv = sv_dup_inc(proto_perl->Inumeric_radix_sv, param); #endif /* !USE_LOCALE_NUMERIC */ + PL_langinfo_buf = NULL; + PL_langinfo_bufsize = 0; + /* Unicode inversion lists */ PL_Latin1 = sv_dup_inc(proto_perl->ILatin1, param); PL_UpperLatin1 = sv_dup_inc(proto_perl->IUpperLatin1, param); @@ -15775,6 +15972,13 @@ Perl_init_constants(pTHX) |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK |SVp_POK|SVf_POK; + SvANY(&PL_sv_zero) = new_XPVNV(); + SvREFCNT(&PL_sv_zero) = SvREFCNT_IMMORTAL; + SvFLAGS(&PL_sv_zero) = SVt_PVNV|SVf_READONLY|SVf_PROTECT + |SVp_IOK|SVf_IOK|SVp_NOK|SVf_NOK + |SVp_POK|SVf_POK + |SVs_PADTMP; + SvPV_set(&PL_sv_no, (char*)PL_No); SvCUR_set(&PL_sv_no, 0); SvLEN_set(&PL_sv_no, 0); @@ -15787,7 +15991,33 @@ Perl_init_constants(pTHX) SvIV_set(&PL_sv_yes, 1); SvNV_set(&PL_sv_yes, 1); + SvPV_set(&PL_sv_zero, (char*)PL_Zero); + SvCUR_set(&PL_sv_zero, 1); + SvLEN_set(&PL_sv_zero, 0); + SvIV_set(&PL_sv_zero, 0); + SvNV_set(&PL_sv_zero, 0); + PadnamePV(&PL_padname_const) = (char *)PL_No; + + assert(SvIMMORTAL_INTERP(&PL_sv_yes)); + assert(SvIMMORTAL_INTERP(&PL_sv_undef)); + assert(SvIMMORTAL_INTERP(&PL_sv_no)); + assert(SvIMMORTAL_INTERP(&PL_sv_zero)); + + assert(SvIMMORTAL(&PL_sv_yes)); + assert(SvIMMORTAL(&PL_sv_undef)); + assert(SvIMMORTAL(&PL_sv_no)); + assert(SvIMMORTAL(&PL_sv_zero)); + + assert( SvIMMORTAL_TRUE(&PL_sv_yes)); + assert(!SvIMMORTAL_TRUE(&PL_sv_undef)); + assert(!SvIMMORTAL_TRUE(&PL_sv_no)); + assert(!SvIMMORTAL_TRUE(&PL_sv_zero)); + + assert( SvTRUE_nomg_NN(&PL_sv_yes)); + assert(!SvTRUE_nomg_NN(&PL_sv_undef)); + assert(!SvTRUE_nomg_NN(&PL_sv_no)); + assert(!SvTRUE_nomg_NN(&PL_sv_zero)); } /* @@ -16689,6 +16919,7 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, /* def-ness of rval pos() is independent of the def-ness of its arg */ if ( !(obase->op_flags & OPf_MOD)) break; + /* FALLTHROUGH */ case OP_SCHOMP: case OP_CHOMP: @@ -16764,6 +16995,9 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) if (PL_op) { desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded ? "join or string" + : PL_op->op_type == OP_MULTICONCAT + && (PL_op->op_private & OPpMULTICONCAT_FAKE) + ? "sprintf" : OP_DESC(PL_op); if (uninit_sv && PL_curpad) { varname = find_uninit_var(PL_op, uninit_sv, 0, &desc);