X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/ee58923a8531731f8acb98ab130abf41f75ebdc7..d1ce136f24009faf349e2a3b3cfc91e37082d29f:/sv.c diff --git a/sv.c b/sv.c index 467af34..cfbc512 100644 --- a/sv.c +++ b/sv.c @@ -245,7 +245,7 @@ Public API: if ((sv)->sv_debug_file) PerlMemShared_free((sv)->sv_debug_file); \ } STMT_END # define DEBUG_SV_SERIAL(sv) \ - DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) del_SV\n", \ + DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) del_SV\n", \ PTR2UV(sv), (long)(sv)->sv_debug_serial)) #else # define FREE_SV_DEBUG_FILE(sv) @@ -340,7 +340,7 @@ S_new_SV(pTHX_ const char *file, int line, const char *func) sv->sv_debug_serial = PL_sv_serial++; MEM_LOG_NEW_SV(sv, file, line, func); - DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%"UVxf": (%05ld) new_SV (from %s:%d [%s])\n", + DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%" UVxf ": (%05ld) new_SV (from %s:%d [%s])\n", PTR2UV(sv), (long)sv->sv_debug_serial, file, line, func)); return sv; @@ -392,7 +392,7 @@ S_del_sv(pTHX_ SV *p) } if (!ok) { Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), - "Attempt to free non-arena SV: 0x%"UVxf + "Attempt to free non-arena SV: 0x%" UVxf pTHX__FORMAT, PTR2UV(p) pTHX__VALUE); return; } @@ -654,7 +654,7 @@ do_clean_all(pTHX_ SV *const sv) /* don't clean pid table and strtab */ return; } - DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%"UVxf"\n", PTR2UV(sv)) )); + DEBUG_D((PerlIO_printf(Perl_debug_log, "Cleaning loops: SV at 0x%" UVxf "\n", PTR2UV(sv)) )); SvFLAGS(sv) |= SVf_BREAK; SvREFCNT_dec_NN(sv); } @@ -1111,7 +1111,7 @@ Perl_more_bodies (pTHX_ const svtype sv_type, const size_t body_size, Newx(adesc->arena, good_arena_size, char); adesc->size = good_arena_size; adesc->utype = sv_type; - DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %"UVuf"\n", + DEBUG_m(PerlIO_printf(Perl_debug_log, "arena %d added: %p size %" UVuf "\n", curr, (void*)adesc->arena, (UV)good_arena_size)); start = (char *) adesc->arena; @@ -1205,7 +1205,7 @@ Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type) const struct body_details *new_type_details; const struct body_details *old_type_details = bodies_by_type + old_type; - SV *referant = NULL; + SV *referent = NULL; PERL_ARGS_ASSERT_SV_UPGRADE; @@ -1270,7 +1270,7 @@ Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type) break; case SVt_IV: if (SvROK(sv)) { - referant = SvRV(sv); + referent = SvRV(sv); old_type_details = &fake_rv; if (new_type == SVt_NV) new_type = SVt_PVNV; @@ -1465,9 +1465,9 @@ Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type) if (UNLIKELY(new_type == SVt_REGEXP)) sv->sv_u.svu_rx = (regexp *)new_body; else if (old_type < SVt_PV) { - /* referant will be NULL unless the old type was SVt_IV emulating + /* referent will be NULL unless the old type was SVt_IV emulating SVt_RV */ - sv->sv_u.svu_rv = referant; + sv->sv_u.svu_rv = referent; } break; default: @@ -1525,6 +1525,11 @@ Perl_sv_backoff(SV *const sv) return; } + +/* forward declaration */ +static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags); + + /* =for apidoc sv_grow @@ -1535,7 +1540,6 @@ Use the C wrapper instead. =cut */ -static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags); char * Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen) @@ -1567,15 +1571,11 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen) * to store the COW count. So in general, allocate one more byte than * asked for, to make it likely this byte is always spare: and thus * make more strings COW-able. - * If the new size is a big power of two, don't bother: we assume the - * caller wanted a nice 2^N sized block and will be annoyed at getting - * 2^N+1. + * * Only increment if the allocation isn't MEM_SIZE_MAX, * otherwise it will wrap to 0. */ - if ( (newlen < 0x1000 || (newlen & (newlen - 1))) - && newlen != MEM_SIZE_MAX - ) + if ( newlen != MEM_SIZE_MAX ) newlen++; #endif @@ -1604,7 +1604,7 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen) else { s = (char*)safemalloc(newlen); if (SvPVX_const(sv) && SvCUR(sv)) { - Move(SvPVX_const(sv), s, (newlen < SvCUR(sv)) ? newlen : SvCUR(sv), char); + Move(SvPVX_const(sv), s, SvCUR(sv), char); } } SvPV_set(sv, s); @@ -1655,6 +1655,7 @@ Perl_sv_setiv(pTHX_ SV *const sv, const IV i) /* diag_listed_as: Can't coerce %s to %s in %s */ Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0), OP_DESC(PL_op)); + NOT_REACHED; /* NOTREACHED */ break; default: NOOP; } @@ -1767,6 +1768,7 @@ Perl_sv_setnv(pTHX_ SV *const sv, const NV num) /* diag_listed_as: Can't coerce %s to %s in %s */ Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0), OP_DESC(PL_op)); + NOT_REACHED; /* NOTREACHED */ break; default: NOOP; } @@ -2042,7 +2044,7 @@ S_sv_2iuv_non_preserve(pTHX_ SV *const sv PERL_ARGS_ASSERT_SV_2IUV_NON_PRESERVE; PERL_UNUSED_CONTEXT; - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%"UVxf" NV=%"NVgf" inttype=%"UVXf"\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_2iuv_non '%s', IV=0x%" UVxf " NV=%" NVgf " inttype=%" UVXf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv), (UV)numtype)); if (SvNVX(sv) < (NV)IV_MIN) { (void)SvIOKp_on(sv); (void)SvNOK_on(sv); @@ -2169,7 +2171,7 @@ S_sv_2iuv_common(pTHX_ SV *const sv) /* scalar has trailing garbage, eg "42a" */ } DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" iv(%"NVgf" => %"IVdf") (precise)\n", + "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (precise)\n", PTR2UV(sv), SvNVX(sv), SvIVX(sv))); @@ -2180,7 +2182,7 @@ S_sv_2iuv_common(pTHX_ SV *const sv) that PV->IV would be better than PV->NV->IV flags already correct - don't set public IOK. */ DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" iv(%"NVgf" => %"IVdf") (imprecise)\n", + "0x%" UVxf " iv(%" NVgf " => %" IVdf ") (imprecise)\n", PTR2UV(sv), SvNVX(sv), SvIVX(sv))); @@ -2211,7 +2213,7 @@ S_sv_2iuv_common(pTHX_ SV *const sv) SvIOK_on(sv); SvIsUV_on(sv); DEBUG_c(PerlIO_printf(Perl_debug_log, - "0x%"UVxf" 2iv(%"UVuf" => %"IVdf") (as unsigned)\n", + "0x%" UVxf " 2iv(%" UVuf " => %" IVdf ") (as unsigned)\n", PTR2UV(sv), SvUVX(sv), SvUVX(sv))); @@ -2219,7 +2221,24 @@ S_sv_2iuv_common(pTHX_ SV *const sv) } else if (SvPOKp(sv)) { UV value; - const int numtype = grok_number(SvPVX_const(sv), SvCUR(sv), &value); + int numtype; + const char *s = SvPVX_const(sv); + const STRLEN cur = SvCUR(sv); + + /* short-cut for a single digit string like "1" */ + + if (cur == 1) { + char c = *s; + if (isDIGIT(c)) { + if (SvTYPE(sv) < SVt_PVIV) + sv_upgrade(sv, SVt_PVIV); + (void)SvIOK_on(sv); + SvIV_set(sv, (IV)(c - '0')); + return FALSE; + } + } + + numtype = grok_number(s, cur, &value); /* We want to avoid a possible problem when we cache an IV/ a UV which may be later translated to an NV, and the resulting NV is not the same as the direct translation of the initial string @@ -2300,7 +2319,7 @@ S_sv_2iuv_common(pTHX_ SV *const sv) if (! numtype && ckWARN(WARN_NUMERIC)) not_a_number(sv); - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" NVgf ")\n", + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv))); #ifdef NV_PRESERVES_UV @@ -2359,7 +2378,7 @@ S_sv_2iuv_common(pTHX_ SV *const sv) this NV is in the preserved range, therefore: */ if (!(U_V(SvNVX(sv) > 0 ? SvNVX(sv) : -SvNVX(sv)) < (UV)IV_MAX)) { - Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%"NVgf" U_V is 0x%"UVxf", IV_MAX is 0x%"UVxf"\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); + Perl_croak(aTHX_ "sv_2iv assumed (U_V(fabs((double)SvNVX(sv))) < (UV)IV_MAX) but SvNVX(sv)=%" NVgf " U_V is 0x%" UVxf ", IV_MAX is 0x%" UVxf "\n", SvNVX(sv), U_V(SvNVX(sv)), (UV)IV_MAX); } } else { /* IN_UV NOT_INT @@ -2436,8 +2455,8 @@ Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags) } if (SvVALID(sv) || isREGEXP(sv)) { - /* FBMs use the space for SvIVX and SvNVX for other purposes, and use - the same flag bit as SVf_IVisUV, so must not let them cache IVs. + /* FBMs use the space for SvIVX and SvNVX for other purposes, so + must not let them cache IVs. In practice they are extremely unlikely to actually get anywhere accessible by user Perl code - the only way that I'm aware of is when a constant subroutine which is used as the second argument to index. @@ -2492,7 +2511,7 @@ Perl_sv_2iv_flags(pTHX_ SV *const sv, const I32 flags) return 0; } - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"IVdf")\n", + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2iv(%" IVdf ")\n", PTR2UV(sv),SvIVX(sv))); return SvIsUV(sv) ? (IV)SvUVX(sv) : SvIVX(sv); } @@ -2575,7 +2594,7 @@ Perl_sv_2uv_flags(pTHX_ SV *const sv, const I32 flags) return 0; } - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2uv(%"UVuf")\n", + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2uv(%" UVuf ")\n", PTR2UV(sv),SvUVX(sv))); return SvIsUV(sv) ? SvUVX(sv) : (UV)SvIVX(sv); } @@ -2656,7 +2675,7 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) DEBUG_c({ STORE_NUMERIC_LOCAL_SET_STANDARD(); PerlIO_printf(Perl_debug_log, - "0x%"UVxf" num(%" NVgf ")\n", + "0x%" UVxf " num(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); RESTORE_NUMERIC_LOCAL(); }); @@ -2796,7 +2815,7 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags) } DEBUG_c({ STORE_NUMERIC_LOCAL_SET_STANDARD(); - PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" NVgf ")\n", + PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2nv(%" NVgf ")\n", PTR2UV(sv), SvNVX(sv)); RESTORE_NUMERIC_LOCAL(); }); @@ -2902,8 +2921,8 @@ S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) { return 0; } assert((s == buffer + 3) || (s == buffer + 4)); - *s++ = 0; - return s - buffer - 1; /* -1: excluding the zero byte */ + *s = 0; + return s - buffer; } /* @@ -3128,8 +3147,8 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) STORE_LC_NUMERIC_SET_TO_NEEDED(); local_radix = PL_numeric_local && PL_numeric_radix_sv; - if (local_radix && SvLEN(PL_numeric_radix_sv) > 1) { - size += SvLEN(PL_numeric_radix_sv) - 1; + if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) { + size += SvCUR(PL_numeric_radix_sv) - 1; s = SvGROW_mutable(sv, size); } @@ -3166,6 +3185,8 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) assert(SvPOK(buffer)); if (SvUTF8(buffer)) SvUTF8_on(sv); + else + SvUTF8_off(sv); if (lp) *lp = SvCUR(buffer); return SvPVX(buffer); @@ -3193,7 +3214,7 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) *lp = len; SvCUR_set(sv, len); } - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n", + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n", PTR2UV(sv),SvPVX_const(sv))); if (flags & SV_CONST_RETURN) return (char *)SvPVX_const(sv); @@ -3749,11 +3770,11 @@ Perl_sv_utf8_encode(pTHX_ SV *const sv) /* =for apidoc sv_utf8_decode -If the PV of the SV is an octet sequence in UTF-8 +If the PV of the SV is an octet sequence in Perl's extended UTF-8 and contains a multiple-byte character, the C flag is turned on so that it looks like a character. If the PV contains only single-byte characters, the C flag stays off. -Scans PV for validity and returns false if the PV is invalid UTF-8. +Scans PV for validity and returns FALSE if the PV is invalid UTF-8. =cut */ @@ -3765,7 +3786,6 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) if (SvPOKp(sv)) { const U8 *start, *c; - const U8 *e; /* The octets may have got themselves encoded - get them back as * bytes @@ -3779,13 +3799,8 @@ Perl_sv_utf8_decode(pTHX_ SV *const sv) c = start = (const U8 *) SvPVX_const(sv); if (!is_utf8_string(c, SvCUR(sv))) return FALSE; - e = (const U8 *) SvEND(sv); - while (c < e) { - const U8 ch = *c++; - if (!UTF8_IS_INVARIANT(ch)) { - SvUTF8_on(sv); - break; - } + if (! is_utf8_invariant_string(c, SvCUR(sv))) { + SvUTF8_on(sv); } if (SvTYPE(sv) >= SVt_PVMG && SvMAGIC(sv)) { /* XXX Is this dead code? XS_utf8_decode calls SvSETMAGIC @@ -4073,11 +4088,11 @@ Perl_gv_setref(pTHX_ SV *const dstr, SV *const sstr) sv_2mortal( stash ? Perl_newSVpvf(aTHX_ - "%"HEKf"::%"HEKf, + "%" HEKf "::%" HEKf, HEKfARG(HvNAME_HEK(stash)), HEKfARG(GvENAME_HEK(MUTABLE_GV(dstr)))) : Perl_newSVpvf(aTHX_ - "%"HEKf, + "%" HEKf, HEKfARG(GvENAME_HEK(MUTABLE_GV(dstr)))) ), cv, @@ -4273,12 +4288,17 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) * special-casing */ U32 sflags; U32 new_dflags; + SV *old_rv = NULL; /* minimal subset of SV_CHECK_THINKFIRST_COW_DROP(dstr) */ if (SvREADONLY(dstr)) Perl_croak_no_modify(); - if (SvROK(dstr)) - sv_unref_flags(dstr, 0); + if (SvROK(dstr)) { + if (SvWEAKREF(dstr)) + sv_unref_flags(dstr, 0); + else + old_rv = SvRV(dstr); + } assert(!SvGMAGICAL(sstr)); assert(!SvGMAGICAL(dstr)); @@ -4308,6 +4328,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) new_dflags = dtype; /* turn off everything except the type */ } SvFLAGS(dstr) = new_dflags; + SvREFCNT_dec(old_rv); return; } @@ -4734,8 +4755,6 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) } if (sflags & SVp_IOK) { SvIV_set(dstr, SvIVX(sstr)); - /* Must do this otherwise some other overloaded use of 0x80000000 - gets confused. I guess SVpbm_VALID */ if (sflags & SVf_IVisUV) SvIsUV_on(dstr); } @@ -4771,6 +4790,69 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags) SvTAINT(dstr); } + +/* +=for apidoc sv_set_undef + +Equivalent to C, but more efficient. +Doesn't handle set magic. + +The perl equivalent is C<$sv = undef;>. Note that it doesn't free any string +buffer, unlike C. + +Introduced in perl 5.25.12. + +=cut +*/ + +void +Perl_sv_set_undef(pTHX_ SV *sv) +{ + U32 type = SvTYPE(sv); + + PERL_ARGS_ASSERT_SV_SET_UNDEF; + + /* shortcut, NULL, IV, RV */ + + if (type <= SVt_IV) { + assert(!SvGMAGICAL(sv)); + if (SvREADONLY(sv)) { + /* does undeffing PL_sv_undef count as modifying a read-only + * variable? Some XS code does this */ + if (sv == &PL_sv_undef) + return; + Perl_croak_no_modify(); + } + + if (SvROK(sv)) { + if (SvWEAKREF(sv)) + sv_unref_flags(sv, 0); + else { + SV *rv = SvRV(sv); + SvFLAGS(sv) = type; /* quickly turn off all flags */ + SvREFCNT_dec_NN(rv); + return; + } + } + SvFLAGS(sv) = type; /* quickly turn off all flags */ + return; + } + + if (SvIS_FREED(sv)) + Perl_croak(aTHX_ "panic: attempt to undefine a freed scalar %p", + (void *)sv); + + SV_CHECK_THINKFIRST_COW_DROP(sv); + + if (isGV_with_GP(sv)) + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "Undefined value assigned to typeglob"); + else + SvOK_off(sv); +} + + + /* =for apidoc sv_setsv_mg @@ -4864,6 +4946,35 @@ Perl_sv_setsv_cow(pTHX_ SV *dstr, SV *sstr) #endif /* +=for apidoc sv_setpv_bufsize + +Sets the SV to be a string of cur bytes length, with at least +len bytes available. Ensures that there is a null byte at SvEND. +Returns a char * pointer to the SvPV buffer. + +=cut +*/ + +char * +Perl_sv_setpv_bufsize(pTHX_ SV *const sv, const STRLEN cur, const STRLEN len) +{ + char *pv; + + PERL_ARGS_ASSERT_SV_SETPV_BUFSIZE; + + SV_CHECK_THINKFIRST_COW_DROP(sv); + SvUPGRADE(sv, SVt_PV); + pv = SvGROW(sv, len + 1); + SvCUR_set(sv, cur); + *(SvEND(sv))= '\0'; + (void)SvPOK_only_UTF8(sv); /* validate pointer */ + + SvTAINT(sv); + if (SvTYPE(sv) == SVt_PVCV) CvAUTOLOAD_off(sv); + return pv; +} + +/* =for apidoc sv_setpvn Copies a string (possibly containing embedded C characters) into an SV. @@ -4882,6 +4993,8 @@ Perl_sv_setpvn(pTHX_ SV *const sv, const char *const ptr, const STRLEN len) PERL_ARGS_ASSERT_SV_SETPVN; SV_CHECK_THINKFIRST_COW_DROP(sv); + if (isGV_with_GP(sv)) + Perl_croak_no_modify(); if (!ptr) { (void)SvOK_off(sv); return; @@ -5102,28 +5215,6 @@ Perl_sv_usepvn_flags(pTHX_ SV *const sv, char *ptr, const STRLEN len, const U32 SvSETMAGIC(sv); } -/* -=for apidoc sv_force_normal_flags - -Undo various types of fakery on an SV, where fakery means -"more than" a string: if the PV is a shared string, make -a private copy; if we're a ref, stop refing; if we're a glob, downgrade to -an C; if we're a copy-on-write scalar, this is the on-write time when -we do the copy, and is also used locally; if this is a -vstring, drop the vstring magic. If C is set -then a copy-on-write scalar drops its PV buffer (if any) and becomes -C rather than making a copy. (Used where this -scalar is about to be set to some other value.) In addition, -the C parameter gets passed to C -when unreffing. C calls this function -with flags set to 0. - -This function is expected to be used to signal to perl that this SV is -about to be written to, and any extra book-keeping needs to be taken care -of. Hence, it croaks on read-only values. - -=cut -*/ static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags) @@ -5203,6 +5294,30 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags) } } + +/* +=for apidoc sv_force_normal_flags + +Undo various types of fakery on an SV, where fakery means +"more than" a string: if the PV is a shared string, make +a private copy; if we're a ref, stop refing; if we're a glob, downgrade to +an C; if we're a copy-on-write scalar, this is the on-write time when +we do the copy, and is also used locally; if this is a +vstring, drop the vstring magic. If C is set +then a copy-on-write scalar drops its PV buffer (if any) and becomes +C rather than making a copy. (Used where this +scalar is about to be set to some other value.) In addition, +the C parameter gets passed to C +when unreffing. C calls this function +with flags set to 0. + +This function is expected to be used to signal to perl that this SV is +about to be written to, and any extra book-keeping needs to be taken care +of. Hence, it croaks on read-only values. + +=cut +*/ + void Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags) { @@ -5413,7 +5528,7 @@ Perl_sv_catpvn_flags(pTHX_ SV *const dsv, const char *sstr, const STRLEN slen, c sv_utf8_upgrade_flags_grow(dsv, 0, slen + 1); dlen = SvCUR(dsv); } - else SvGROW(dsv, dlen + slen + 1); + else SvGROW(dsv, dlen + slen + 3); if (sstr == dstr) sstr = SvPVX_const(dsv); Move(sstr, SvPVX(dsv) + dlen, slen, char); @@ -5429,7 +5544,7 @@ Perl_sv_catpvn_flags(pTHX_ SV *const dsv, const char *sstr, const STRLEN slen, c bytes *and* utf8, which would indicate a bug elsewhere. */ assert(sstr != dstr); - SvGROW(dsv, dlen + slen * 2 + 1); + SvGROW(dsv, dlen + slen * 2 + 3); d = (U8 *)SvPVX(dsv) + dlen; while (sstr < send) { @@ -5621,7 +5736,9 @@ Perl_sv_magicext(pTHX_ SV *const sv, SV *const obj, const int how, */ if (!obj || obj == sv || how == PERL_MAGIC_arylen || - how == PERL_MAGIC_symtab || + how == PERL_MAGIC_regdata || + how == PERL_MAGIC_regdatum || + how == PERL_MAGIC_symtab || (SvTYPE(obj) == SVt_PVGV && (GvSV(obj) == sv || GvHV(obj) == (const HV *)sv || GvAV(obj) == (const AV *)sv || GvCV(obj) == (const CV *)sv @@ -6189,7 +6306,7 @@ Perl_sv_kill_backrefs(pTHX_ SV *const sv, AV *const av) } else { Perl_croak(aTHX_ - "panic: magic_killbackrefs (flags=%"UVxf")", + "panic: magic_killbackrefs (flags=%" UVxf ")", (UV)SvFLAGS(referrer)); } @@ -6221,7 +6338,7 @@ C that applies to C. */ void -Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *const little, const STRLEN littlelen, const U32 flags) +Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *little, const STRLEN littlelen, const U32 flags) { char *big; char *mid; @@ -6234,6 +6351,16 @@ Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN l SvPV_force_flags(bigstr, curlen, flags); (void)SvPOK_only_UTF8(bigstr); + + if (little >= SvPVX(bigstr) && + little < SvPVX(bigstr) + (SvLEN(bigstr) ? SvLEN(bigstr) : SvCUR(bigstr))) { + /* little is a pointer to within bigstr, since we can reallocate bigstr, + or little...little+littlelen might overlap offset...offset+len we make a copy + */ + little = savepvn(little, littlelen); + SAVEFREEPV(little); + } + if (offset + len > curlen) { SvGROW(bigstr, offset+len+1); Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char); @@ -6523,7 +6650,7 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) { if (PL_stashcache) { DEBUG_o(Perl_deb(aTHX_ - "sv_clear clearing PL_stashcache for '%"HEKf + "sv_clear clearing PL_stashcache for '%" HEKf "'\n", HEKfARG(hek))); (void)hv_deletehek(PL_stashcache, @@ -6590,7 +6717,7 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) /* If we're in a stash, we don't own a reference to it. * However it does have a back reference to us, which * needs to be cleared. */ - if (!SvVALID(sv) && (stash = GvSTASH(sv))) + if ((stash = GvSTASH(sv))) sv_del_backref(MUTABLE_SV(stash), sv); } /* FIXME. There are probably more unreferenced pointers to SVs @@ -6747,7 +6874,7 @@ Perl_sv_clear(pTHX_ SV *const orig_sv) #ifdef DEBUGGING if (SvTEMP(sv)) { Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING), - "Attempt to free temp prematurely: SV 0x%"UVxf + "Attempt to free temp prematurely: SV 0x%" UVxf pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); continue; } @@ -6872,7 +6999,7 @@ S_curse(pTHX_ SV * const sv, const bool check_refcnt) { if (check_refcnt && SvREFCNT(sv)) { if (PL_in_clean_objs) Perl_croak(aTHX_ - "DESTROY created new reference to dead object '%"HEKf"'", + "DESTROY created new reference to dead object '%" HEKf "'", HEKfARG(HvNAME_HEK(stash))); /* DESTROY gave object new lease on life */ return FALSE; @@ -6943,7 +7070,7 @@ Perl_sv_free2(pTHX_ SV *const sv, const U32 rc) #ifdef DEBUGGING if (SvTEMP(sv)) { Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING), - "Attempt to free temp prematurely: SV 0x%"UVxf + "Attempt to free temp prematurely: SV 0x%" UVxf pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); return; } @@ -6990,7 +7117,7 @@ Perl_sv_free2(pTHX_ SV *const sv, const U32 rc) #endif /* This may not return: */ Perl_warner(aTHX_ packWARN(WARN_INTERNAL), - "Attempt to free unreferenced scalar: SV 0x%"UVxf + "Attempt to free unreferenced scalar: SV 0x%" UVxf pTHX__FORMAT, PTR2UV(sv) pTHX__VALUE); #endif } @@ -7579,8 +7706,8 @@ Perl_sv_pos_b2u_flags(pTHX_ SV *const sv, STRLEN const offset, U32 flags) s = (const U8*)SvPV_flags(sv, blen, flags); if (blen < offset) - Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset, blen=%"UVuf - ", byte=%"UVuf, (UV)blen, (UV)offset); + Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset, blen=%" UVuf + ", byte=%" UVuf, (UV)blen, (UV)offset); send = s + offset; @@ -7697,7 +7824,7 @@ S_assert_uft8_cache_coherent(pTHX_ const char *const func, STRLEN from_cache, while printing error messages. */ SAVEI8(PL_utf8cache); PL_utf8cache = 0; - Perl_croak(aTHX_ "panic: %s cache %"UVuf" real %"UVuf" for %"SVf, + Perl_croak(aTHX_ "panic: %s cache %" UVuf " real %" UVuf " for %" SVf, func, (UV) from_cache, (UV) real, SVfARG(sv)); } @@ -8540,10 +8667,10 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) /* some trace debug output */ DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: entering, ptr=%"UVuf", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); + "Screamer: entering, ptr=%" UVuf ", cnt=%ld\n",PTR2UV(ptr),(long)cnt)); DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: entering: PerlIO * thinks ptr=%"UVuf", cnt=%"IVdf", base=%" - UVuf"\n", + "Screamer: entering: PerlIO * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" + UVuf "\n", PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base(fp) ? PerlIO_get_base(fp) : 0))); @@ -8553,13 +8680,27 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) if (cnt > 0) { /* if there is a separator */ if (rslen) { - /* loop until we hit the end of the read-ahead buffer */ - while (cnt > 0) { /* this | eat */ - /* scan forward copying and searching for rslast as we go */ - cnt--; - if ((*bp++ = *ptr++) == rslast) /* really | dust */ - goto thats_all_folks; /* screams | sed :-) */ - } + /* find next rslast */ + STDCHAR *p; + + /* shortcut common case of blank line */ + cnt--; + if ((*bp++ = *ptr++) == rslast) + goto thats_all_folks; + + p = (STDCHAR *)memchr(ptr, rslast, cnt); + if (p) { + SSize_t got = p - ptr + 1; + Copy(ptr, bp, got, STDCHAR); + ptr += got; + bp += got; + cnt -= got; + goto thats_all_folks; + } + Copy(ptr, bp, cnt, STDCHAR); + ptr += cnt; + bp += cnt; + cnt = 0; } else { /* no separator, slurp the full buffer */ @@ -8589,12 +8730,12 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) /* we need to refill the read-ahead buffer if possible */ DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: going to getc, ptr=%"UVuf", cnt=%"IVdf"\n", + "Screamer: going to getc, ptr=%" UVuf ", cnt=%" IVdf "\n", PTR2UV(ptr),(IV)cnt)); PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* deregisterize cnt and ptr */ DEBUG_Pv(PerlIO_printf(Perl_debug_log, - "Screamer: pre: FILE * thinks ptr=%"UVuf", cnt=%"IVdf", base=%"UVuf"\n", + "Screamer: pre: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n", PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); @@ -8610,7 +8751,7 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) i = PerlIO_getc(fp); /* get more characters */ DEBUG_Pv(PerlIO_printf(Perl_debug_log, - "Screamer: post: FILE * thinks ptr=%"UVuf", cnt=%"IVdf", base=%"UVuf"\n", + "Screamer: post: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n", PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); @@ -8618,7 +8759,7 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) cnt = PerlIO_get_cnt(fp); ptr = (STDCHAR*)PerlIO_get_ptr(fp); /* reregisterize cnt and ptr */ DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: after getc, ptr=%"UVuf", cnt=%"IVdf"\n", + "Screamer: after getc, ptr=%" UVuf ", cnt=%" IVdf "\n", PTR2UV(ptr),(IV)cnt)); if (i == EOF) /* all done for ever? */ @@ -8648,10 +8789,10 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append) if (shortbuffered) cnt += shortbuffered; DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: quitting, ptr=%"UVuf", cnt=%"IVdf"\n",PTR2UV(ptr),(IV)cnt)); + "Screamer: quitting, ptr=%" UVuf ", cnt=%" IVdf "\n",PTR2UV(ptr),(IV)cnt)); PerlIO_set_ptrcnt(fp, (STDCHAR*)ptr, cnt); /* put these back or we're in trouble */ DEBUG_P(PerlIO_printf(Perl_debug_log, - "Screamer: end: FILE * thinks ptr=%"UVuf", cnt=%"IVdf", base=%"UVuf + "Screamer: end: FILE * thinks ptr=%" UVuf ", cnt=%" IVdf ", base=%" UVuf "\n", PTR2UV(PerlIO_get_ptr(fp)), (IV)PerlIO_get_cnt(fp), PTR2UV(PerlIO_has_base (fp) ? PerlIO_get_base(fp) : 0))); @@ -8870,7 +9011,7 @@ Perl_sv_inc_nomg(pTHX_ SV *const sv) /* I don't think we can get here. Maybe I should assert this And if we do get here I suspect that sv_setnv will croak. NWC Fall through. */ - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n", + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); } #endif /* PERL_PRESERVE_IVUV */ @@ -9048,7 +9189,7 @@ Perl_sv_dec_nomg(pTHX_ SV *const sv) /* I don't think we can get here. Maybe I should assert this And if we do get here I suspect that sv_setnv will croak. NWC Fall through. */ - DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n", + DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%" UVxf " NV=%" NVgf "\n", SvPVX_const(sv), SvIVX(sv), SvNVX(sv))); } } @@ -9204,7 +9345,14 @@ SV is set to 1. If C is zero, Perl will compute the length using C, (which means if you use this option, that C can't have embedded C characters and has to have a terminating C byte). -For efficiency, consider using C instead. +This function can cause reliability issues if you are likely to pass in +empty strings that are not null terminated, because it will run +strlen on the string and potentially run past valid memory. + +Using L is a safer alternative for non C terminated strings. +For string literals use L instead. This function will work fine for +C terminated strings, but if you want to avoid the if statement on whether +to call C use C instead (calling C yourself). =cut */ @@ -9742,7 +9890,7 @@ Perl_sv_2io(pTHX_ SV *const sv) gv = MUTABLE_GV(sv); io = GvIO(gv); if (!io) - Perl_croak(aTHX_ "Bad filehandle: %"HEKf, + Perl_croak(aTHX_ "Bad filehandle: %" HEKf, HEKfARG(GvNAME_HEK(gv))); break; } @@ -9765,7 +9913,7 @@ Perl_sv_2io(pTHX_ SV *const sv) newsv = sv_newmortal(); sv_setsv_nomg(newsv, sv); } - Perl_croak(aTHX_ "Bad filehandle: %"SVf, SVfARG(newsv)); + Perl_croak(aTHX_ "Bad filehandle: %" SVf, SVfARG(newsv)); } break; } @@ -9947,7 +10095,7 @@ Perl_sv_pvn_force_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags) if (!SvPOK(sv)) { SvPOK_on(sv); /* validate pointer */ SvTAINT(sv); - DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2pv(%s)\n", + DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%" UVxf " 2pv(%s)\n", PTR2UV(sv),SvPVX_const(sv))); } } @@ -10081,7 +10229,7 @@ Perl_sv_ref(pTHX_ SV *dst, const SV *const sv, const int ob) if (ob && SvOBJECT(sv)) { HvNAME_get(SvSTASH(sv)) ? sv_sethek(dst, HvNAME_HEK(SvSTASH(sv))) - : sv_setpvn(dst, "__ANON__", 8); + : sv_setpvs(dst, "__ANON__"); } else { const char * reftype = sv_reftype(sv, 0); @@ -10230,7 +10378,7 @@ Perl_sv_setref_pv(pTHX_ SV *const rv, const char *const classname, void *const p PERL_ARGS_ASSERT_SV_SETREF_PV; if (!pv) { - sv_setsv(rv, &PL_sv_undef); + sv_set_undef(rv); SvSETMAGIC(rv); } else @@ -10811,7 +10959,7 @@ Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, { PERL_ARGS_ASSERT_SV_VSETPVFN; - sv_setpvs(sv, ""); + SvPVCLEAR(sv); sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, 0); } @@ -10852,6 +11000,12 @@ S_expect_number(pTHX_ char **const pattern) return var; } +/* Implement a fast "%.0f": given a pointer to the end of a buffer (caller + * ensures it's big enough), back fill it with the rounded integer part of + * nv. Returns ptr to start of string, and sets *len to its length. + * Returns NULL if not convertible. + */ + STATIC char * S_F0convert(NV nv, char *const endbuf, STRLEN *const len) { @@ -10860,11 +11014,7 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len) PERL_ARGS_ASSERT_F0CONVERT; - if (UNLIKELY(Perl_isinfnan(nv))) { - STRLEN n = S_infnan_2pv(nv, endbuf - *len, *len, 0); - *len = n; - return endbuf - n; - } + assert(!Perl_isinfnan(nv)); if (neg) nv = -nv; if (nv < UV_MAX) { @@ -10886,29 +11036,6 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len) } -/* -=for apidoc sv_vcatpvfn - -=for apidoc sv_vcatpvfn_flags - -Processes its arguments like C and appends the formatted output -to an SV. Uses an array of SVs if the C-style variable argument list is -missing (C). Argument reordering (using format specifiers like C<%2$d> -or C<%*2$d>) is supported only when using an array of SVs; using a C-style -C argument list with a format string that uses argument reordering -will yield an exception. - -When running with taint checks enabled, indicates via -C if results are untrustworthy (often due to the use of -locales). - -If called as C or flags has the C bit set, calls get magic. - -Usually used via one of its frontends C and C. - -=cut -*/ - #define VECTORIZE_ARGS vecsv = va_arg(*args, SV*);\ vecstr = (U8*)SvPV_const(vecsv,veclen);\ vec_utf8 = DO_UTF8(vecsv); @@ -10924,6 +11051,34 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC); } + +/* For the vcatpvfn code, we need a long double target in case + * HAS_LONG_DOUBLE, even without USE_LONG_DOUBLE, so that we can printf + * with long double formats, even without NV being long double. But we + * call the target 'fv' instead of 'nv', since most of the time it is not + * (most compilers these days recognize "long double", even if only as a + * synonym for "double"). +*/ +#if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE && \ + defined(PERL_PRIgldbl) && !defined(USE_QUADMATH) +# define VCATPVFN_FV_GF PERL_PRIgldbl +# if defined(__VMS) && defined(__ia64) && defined(__IEEE_FLOAT) + /* Work around breakage in OTS$CVT_FLOAT_T_X */ +# define VCATPVFN_NV_TO_FV(nv,fv) \ + STMT_START { \ + double _dv = nv; \ + fv = Perl_isnan(_dv) ? LDBL_QNAN : _dv; \ + } STMT_END +# else +# define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv) +# endif + typedef long double vcatpvfn_long_double_t; +#else +# define VCATPVFN_FV_GF NVgf +# define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv) + typedef NV vcatpvfn_long_double_t; +#endif + #ifdef LONGDOUBLE_DOUBLEDOUBLE /* The first double can be as large as 2**1023, or '1' x '0' x 1023. * The second double can be as small as 2**-1074, or '0' x 1073 . '1'. @@ -10973,13 +11128,13 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, # define HEXTRACT_MIX_ENDIAN #endif -/* S_hextract() is a helper for Perl_sv_vcatpvfn_flags, for extracting +/* S_hextract() is a helper for S_format_hexfp, for extracting * the hexadecimal values (for %a/%A). The nv is the NV where the value * are being extracted from (either directly from the long double in-memory * presentation, or from the uquad computed via frexp+ldexp). frexp also * is used to update the exponent. The subnormal is set to true - * for IEEE 754 subnormals/denormals. The vhex is the pointer to - * the beginning of the output buffer (of VHEX_SIZE). + * for IEEE 754 subnormals/denormals (including the x86 80-bit format). + * The vhex is the pointer to the beginning of the output buffer of VHEX_SIZE. * * The tricky part is that S_hextract() needs to be called twice: * the first time with vend as NULL, and the second time with vend as @@ -11030,9 +11185,10 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, for (ix = a; ix >= b; ix--) { HEXTRACT_BYTE(ix); } #define HEXTRACT_BYTES_BE(a, b) \ for (ix = a; ix <= b; ix++) { HEXTRACT_BYTE(ix); } +#define HEXTRACT_GET_SUBNORMAL(nv) *subnormal = Perl_fp_class_denorm(nv) #define HEXTRACT_IMPLICIT_BIT(nv) \ STMT_START { \ - if (!(*subnormal = (HEXTRACT_EXPONENT_BITS() == 0))) { \ + if (!*subnormal) { \ if (vend) *v++ = ((nv) == 0.0) ? 0 : 1; else v++; \ } \ } STMT_END @@ -11070,7 +11226,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, /* The bytes 13..0 are the mantissa/fraction, * the 15,14 are the sign+exponent. */ const U8* nvp = (const U8*)(&nv); -# define HEXTRACT_EXPONENT_BITS() (nvp[14] | (nvp[15] & 0x7F) << 8) + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); # undef HEXTRACT_HAS_TOP_NYBBLE HEXTRACT_BYTES_LE(13, 0); @@ -11080,7 +11236,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, /* The bytes 2..15 are the mantissa/fraction, * the 0,1 are the sign+exponent. */ const U8* nvp = (const U8*)(&nv); -# define HEXTRACT_EXPONENT_BITS() ((nvp[0] & 0x7F) << 8 | nvp[1]) + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); # undef HEXTRACT_HAS_TOP_NYBBLE HEXTRACT_BYTES_BE(2, 15); @@ -11089,10 +11245,12 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, * significand, 15 bits of exponent, 1 bit of sign. No implicit bit. * NVSIZE can be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux * and OS X), meaning that 2 or 6 bytes are empty padding. */ - /* The bytes 7..0 are the mantissa/fraction */ + /* The bytes 0..1 are the sign+exponent, + * the bytes 2..9 are the mantissa/fraction. */ const U8* nvp = (const U8*)(&nv); # undef HEXTRACT_HAS_IMPLICIT_BIT # undef HEXTRACT_HAS_TOP_NYBBLE + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_BYTES_LE(7, 0); # elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN /* Does this format ever happen? (Wikipedia says the Motorola @@ -11102,6 +11260,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, const U8* nvp = (const U8*)(&nv); # undef HEXTRACT_HAS_IMPLICIT_BIT # undef HEXTRACT_HAS_TOP_NYBBLE + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_BYTES_BE(0, 7); # else # define HEXTRACT_FALLBACK @@ -11137,21 +11296,21 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, # ifdef HEXTRACT_LITTLE_ENDIAN /* 0 1 2 3 4 5 6 7 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */ const U8* nvp = (const U8*)(&nv); -# define HEXTRACT_EXPONENT_BITS() (nvp[6] | (nvp[7] & 0x7F) << 4) + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); HEXTRACT_TOP_NYBBLE(6); HEXTRACT_BYTES_LE(5, 0); # elif defined(HEXTRACT_BIG_ENDIAN) /* 7 6 5 4 3 2 1 0 (MSB = 7, LSB = 0, 6+7 = exponent+sign) */ const U8* nvp = (const U8*)(&nv); -# define HEXTRACT_EXPONENT_BITS() (nvp[1] | (nvp[0] & 0x7F) << 4) + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); HEXTRACT_TOP_NYBBLE(1); HEXTRACT_BYTES_BE(2, 7); # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_LE_BE /* 4 5 6 7 0 1 2 3 (MSB = 7, LSB = 0, 6:7 = nybble:exponent:sign) */ const U8* nvp = (const U8*)(&nv); -# define HEXTRACT_EXPONENT_BITS() (nvp[2] | (nvp[3] & 0x7F) << 4) + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); HEXTRACT_TOP_NYBBLE(2); /* 6 */ HEXTRACT_BYTE(1); /* 5 */ @@ -11163,7 +11322,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, # elif DOUBLEKIND == DOUBLE_IS_IEEE_754_64_BIT_MIXED_ENDIAN_BE_LE /* 3 2 1 0 7 6 5 4 (MSB = 7, LSB = 0, 7:6 = sign:exponent:nybble) */ const U8* nvp = (const U8*)(&nv); -# define HEXTRACT_EXPONENT_BITS() (nvp[5] | (nvp[4] & 0x7F) << 4) + HEXTRACT_GET_SUBNORMAL(nv); HEXTRACT_IMPLICIT_BIT(nv); HEXTRACT_TOP_NYBBLE(5); /* 6 */ HEXTRACT_BYTE(6); /* 5 */ @@ -11180,6 +11339,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, # endif #endif /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) #else */ # ifdef HEXTRACT_FALLBACK + HEXTRACT_GET_SUBNORMAL(nv); # undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */ /* The fallback is used for the double-double format, and * for unknown long double formats, and for unknown double @@ -11279,212 +11439,472 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal, return v; } -/* Helper for sv_vcatpvfn_flags(). */ -#define FETCH_VCATPVFN_ARGUMENT(var, in_range, expr) \ - STMT_START { \ - if (in_range) \ - (var) = (expr); \ - else { \ - (var) = &PL_sv_no; /* [perl #71000] */ \ - arg_missing = TRUE; \ - } \ - } STMT_END - -void -Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, - va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted, - const U32 flags) -{ - char *p; - char *q; - const char *patend; - STRLEN origlen; - I32 svix = 0; - static const char nullstr[] = "(null)"; - SV *argsv = NULL; - bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ - const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ - SV *nsv = NULL; - /* Times 4: a decimal digit takes more than 3 binary digits. - * NV_DIG: mantissa takes than many decimal digits. - * Plus 32: Playing safe. */ - char ebuf[IV_DIG * 4 + NV_DIG + 32]; - bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ - bool hexfp = FALSE; /* hexadecimal floating point? */ - - DECLARATION_FOR_LC_NUMERIC_MANIPULATION; - PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS; - PERL_UNUSED_ARG(maybe_tainted); +/* S_format_hexfp(): helper function for Perl_sv_vcatpvfn_flags(). + * + * Processes the %a/%A hexadecimal floating-point format, since the + * built-in snprintf()s which are used for most of the f/p formats, don't + * universally handle %a/%A. + * Populates buf of length bufsize, and returns the length of the created + * string. + * The rest of the args have the same meaning as the local vars of the + * same name within Perl_sv_vcatpvfn_flags(). + * + * It assumes the caller has already done STORE_LC_NUMERIC_SET_TO_NEEDED(); + */ - if (flags & SV_GMAGIC) - SvGETMAGIC(sv); +static STRLEN +S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c, + const NV nv, const vcatpvfn_long_double_t fv, + bool has_precis, STRLEN precis, STRLEN width, + bool alt, char plus, bool left, bool fill) +{ + /* Hexadecimal floating point. */ + char* p = buf; + U8 vhex[VHEX_SIZE]; + U8* v = vhex; /* working pointer to vhex */ + U8* vend; /* pointer to one beyond last digit of vhex */ + U8* vfnz = NULL; /* first non-zero */ + U8* vlnz = NULL; /* last non-zero */ + U8* v0 = NULL; /* first output */ + const bool lower = (c == 'a'); + /* At output the values of vhex (up to vend) will + * be mapped through the xdig to get the actual + * human-readable xdigits. */ + const char* xdig = PL_hexdigit; + int zerotail = 0; /* how many extra zeros to append */ + int exponent = 0; /* exponent of the floating point input */ + bool hexradix = FALSE; /* should we output the radix */ + bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */ + bool negative = FALSE; + STRLEN elen; + + /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf". + * + * For example with denormals, (assuming the vanilla + * 64-bit double): the exponent is zero. 1xp-1074 is + * the smallest denormal and the smallest double, it + * could be output also as 0x0.0000000000001p-1022 to + * match its internal structure. */ - /* no matter what, this is a string now */ - (void)SvPV_force_nomg(sv, origlen); + vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL); + S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend); - /* special-case "", "%s", and "%-p" (SVf - see below) */ - if (patlen == 0) { - if (svmax && ckWARN(WARN_REDUNDANT)) - Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", - PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); - return; +#if NVSIZE > DOUBLESIZE +# ifdef HEXTRACT_HAS_IMPLICIT_BIT + /* In this case there is an implicit bit, + * and therefore the exponent is shifted by one. */ + exponent--; +# else +# ifdef NV_X86_80_BIT + if (subnormal) { + /* The subnormals of the x86-80 have a base exponent of -16382, + * (while the physical exponent bits are zero) but the frexp() + * returned the scientific-style floating exponent. We want + * to map the last one as: + * -16831..-16384 -> -16382 (the last normal is 0x1p-16382) + * -16835..-16388 -> -16384 + * since we want to keep the first hexdigit + * as one of the [8421]. */ + exponent = -4 * ( (exponent + 1) / -4) - 2; + } else { + exponent -= 4; } - if (patlen == 2 && pat[0] == '%' && pat[1] == 's') { - if (svmax > 1 && ckWARN(WARN_REDUNDANT)) - Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", - PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); +# endif + /* TBD: other non-implicit-bit platforms than the x86-80. */ +# endif +#endif - if (args) { - const char * const s = va_arg(*args, char*); - sv_catpv_nomg(sv, s ? s : nullstr); - } - else if (svix < svmax) { - /* we want get magic on the source but not the target. sv_catsv can't do that, though */ - SvGETMAGIC(*svargs); - sv_catsv_nomg(sv, *svargs); - } - else - S_warn_vcatpvfn_missing_argument(aTHX); - return; + negative = fv < 0 || Perl_signbit(nv); + if (negative) + *p++ = '-'; + else if (plus) + *p++ = plus; + *p++ = '0'; + if (lower) { + *p++ = 'x'; } - if (args && patlen == 3 && pat[0] == '%' && - pat[1] == '-' && pat[2] == 'p') { - if (svmax > 1 && ckWARN(WARN_REDUNDANT)) - Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", - PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); - argsv = MUTABLE_SV(va_arg(*args, void*)); - sv_catsv_nomg(sv, argsv); - return; + else { + *p++ = 'X'; + xdig += 16; /* Use uppercase hex. */ } -#if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH) - /* special-case "%.[gf]" */ - if ( !args && patlen <= 5 && pat[0] == '%' && pat[1] == '.' - && (pat[patlen-1] == 'g' || pat[patlen-1] == 'f') ) { - unsigned digits = 0; - const char *pp; - - pp = pat + 2; - while (*pp >= '0' && *pp <= '9') - digits = 10 * digits + (*pp++ - '0'); - - /* XXX: Why do this `svix < svmax` test? Couldn't we just - format the first argument and WARN_REDUNDANT if svmax > 1? - Munged by Nicholas Clark in v5.13.0-209-g95ea86d */ - if (pp - pat == (int)patlen - 1 && svix < svmax) { - const NV nv = SvNV(*svargs); - if (LIKELY(!Perl_isinfnan(nv))) { - if (*pp == 'g') { - /* Add check for digits != 0 because it seems that some - gconverts are buggy in this case, and we don't yet have - a Configure test for this. */ - if (digits && digits < sizeof(ebuf) - NV_DIG - 10) { - /* 0, point, slack */ - STORE_LC_NUMERIC_SET_TO_NEEDED(); - SNPRINTF_G(nv, ebuf, size, digits); - sv_catpv_nomg(sv, ebuf); - if (*ebuf) /* May return an empty string for digits==0 */ - return; - } - } else if (!digits) { - STRLEN l; + /* Find the first non-zero xdigit. */ + for (v = vhex; v < vend; v++) { + if (*v) { + vfnz = v; + break; + } + } - if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { - sv_catpvn_nomg(sv, p, l); - return; - } - } + if (vfnz) { + /* Find the last non-zero xdigit. */ + for (v = vend - 1; v >= vhex; v--) { + if (*v) { + vlnz = v; + break; } - } - } -#endif /* !USE_LONG_DOUBLE */ + } - if (!args && svix < svmax && DO_UTF8(*svargs)) - has_utf8 = TRUE; +#if NVSIZE == DOUBLESIZE + if (fv != 0.0) + exponent--; +#endif - patend = (char*)pat + patlen; - for (p = (char*)pat; p < patend; p = q) { - bool alt = FALSE; - bool left = FALSE; - bool vectorize = FALSE; - bool vectorarg = FALSE; - bool vec_utf8 = FALSE; - char fill = ' '; - char plus = 0; - char intsize = 0; - STRLEN width = 0; - STRLEN zeros = 0; - bool has_precis = FALSE; - STRLEN precis = 0; - const I32 osvix = svix; - bool is_utf8 = FALSE; /* is this item utf8? */ - bool used_explicit_ix = FALSE; - bool arg_missing = FALSE; -#ifdef HAS_LDBL_SPRINTF_BUG - /* This is to try to fix a bug with irix/nonstop-ux/powerux and - with sfio - Allen */ - bool fix_ldbl_sprintf_bug = FALSE; + if (subnormal) { +#ifndef NV_X86_80_BIT + if (vfnz[0] > 1) { + /* IEEE 754 subnormals (but not the x86 80-bit): + * we want "normalize" the subnormal, + * so we need to right shift the hex nybbles + * so that the output of the subnormal starts + * from the first true bit. (Another, equally + * valid, policy would be to dump the subnormal + * nybbles as-is, to display the "physical" layout.) */ + int i, n; + U8 *vshr; + /* Find the ceil(log2(v[0])) of + * the top non-zero nybble. */ + for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { } + assert(n < 4); + vlnz[1] = 0; + for (vshr = vlnz; vshr >= vfnz; vshr--) { + vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n); + vshr[0] >>= n; + } + if (vlnz[1]) { + vlnz++; + } + } #endif + v0 = vfnz; + } else { + v0 = vhex; + } - char esignbuf[4]; - U8 utf8buf[UTF8_MAXBYTES+1]; - STRLEN esignlen = 0; - - const char *eptr = NULL; - const char *fmtstart; - STRLEN elen = 0; - SV *vecsv = NULL; - const U8 *vecstr = NULL; - STRLEN veclen = 0; - char c = 0; - int i; - unsigned base = 0; - IV iv = 0; - UV uv = 0; - /* We need a long double target in case HAS_LONG_DOUBLE, - * even without USE_LONG_DOUBLE, so that we can printf with - * long double formats, even without NV being long double. - * But we call the target 'fv' instead of 'nv', since most of - * the time it is not (most compilers these days recognize - * "long double", even if only as a synonym for "double"). - */ -#if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE && \ - defined(PERL_PRIgldbl) && !defined(USE_QUADMATH) - long double fv; -# ifdef Perl_isfinitel -# define FV_ISFINITE(x) Perl_isfinitel(x) -# endif -# define FV_GF PERL_PRIgldbl -# if defined(__VMS) && defined(__ia64) && defined(__IEEE_FLOAT) - /* Work around breakage in OTS$CVT_FLOAT_T_X */ -# define NV_TO_FV(nv,fv) STMT_START { \ - double _dv = nv; \ - fv = Perl_isnan(_dv) ? LDBL_QNAN : _dv; \ - } STMT_END -# else -# define NV_TO_FV(nv,fv) (fv)=(nv) -# endif + if (has_precis) { + U8* ve = (subnormal ? vlnz + 1 : vend); + SSize_t vn = ve - v0; + assert(vn >= 1); + if (precis < (Size_t)(vn - 1)) { + bool overflow = FALSE; + if (v0[precis + 1] < 0x8) { + /* Round down, nothing to do. */ + } else if (v0[precis + 1] > 0x8) { + /* Round up. */ + v0[precis]++; + overflow = v0[precis] > 0xF; + v0[precis] &= 0xF; + } else { /* v0[precis] == 0x8 */ + /* Half-point: round towards the one + * with the even least-significant digit: + * 08 -> 0 88 -> 8 + * 18 -> 2 98 -> a + * 28 -> 2 a8 -> a + * 38 -> 4 b8 -> c + * 48 -> 4 c8 -> c + * 58 -> 6 d8 -> e + * 68 -> 6 e8 -> e + * 78 -> 8 f8 -> 10 */ + if ((v0[precis] & 0x1)) { + v0[precis]++; + } + overflow = v0[precis] > 0xF; + v0[precis] &= 0xF; + } + + if (overflow) { + for (v = v0 + precis - 1; v >= v0; v--) { + (*v)++; + overflow = *v > 0xF; + (*v) &= 0xF; + if (!overflow) { + break; + } + } + if (v == v0 - 1 && overflow) { + /* If the overflow goes all the + * way to the front, we need to + * insert 0x1 in front, and adjust + * the exponent. */ + Move(v0, v0 + 1, vn - 1, char); + *v0 = 0x1; + exponent += 4; + } + } + + /* The new effective "last non zero". */ + vlnz = v0 + precis; + } + else { + zerotail = + subnormal ? precis - vn + 1 : + precis - (vlnz - vhex); + } + } + + v = v0; + *p++ = xdig[*v++]; + + /* If there are non-zero xdigits, the radix + * is output after the first one. */ + if (vfnz < vlnz) { + hexradix = TRUE; + } + } + else { + *p++ = '0'; + exponent = 0; + zerotail = precis; + } + + /* The radix is always output if precis, or if alt. */ + if (precis > 0 || alt) { + hexradix = TRUE; + } + + if (hexradix) { +#ifndef USE_LOCALE_NUMERIC + *p++ = '.'; #else - NV fv; -# define FV_GF NVgf -# define NV_TO_FV(nv,fv) (fv)=(nv) + if (PL_numeric_radix_sv) { + STRLEN n; + const char* r = SvPV(PL_numeric_radix_sv, n); + assert(IN_LC(LC_NUMERIC)); + Copy(r, p, n, char); + p += n; + } + else { + *p++ = '.'; + } #endif -#ifndef FV_ISFINITE -# define FV_ISFINITE(x) Perl_isfinite((NV)(x)) + } + + if (vlnz) { + while (v <= vlnz) + *p++ = xdig[*v++]; + } + + if (zerotail > 0) { + while (zerotail--) { + *p++ = '0'; + } + } + + elen = p - buf; + elen += my_snprintf(p, bufsize - elen, + "%c%+d", lower ? 'p' : 'P', + exponent); + + if (elen < width) { + STRLEN gap = (STRLEN)(width - elen); + if (left) { + /* Pad the back with spaces. */ + memset(buf + elen, ' ', gap); + } + else if (fill) { + /* Insert the zeros after the "0x" and the + * the potential sign, but before the digits, + * otherwise we end up with "0000xH.HHH...", + * when we want "0x000H.HHH..." */ + STRLEN nzero = gap; + char* zerox = buf + 2; + STRLEN nmove = elen - 2; + if (negative || plus) { + zerox++; + nmove--; + } + Move(zerox, zerox + nzero, nmove, char); + memset(zerox, fill ? '0' : ' ', nzero); + } + else { + /* Move it to the right. */ + Move(buf, buf + gap, + elen, char); + /* Pad the front with spaces. */ + memset(buf, ' ', gap); + } + elen = width; + } + return elen; +} + + +/* Helper for sv_vcatpvfn_flags(). */ +#define FETCH_VCATPVFN_ARGUMENT(var, in_range, expr) \ + STMT_START { \ + if (in_range) \ + (var) = (expr); \ + else { \ + (var) = &PL_sv_no; /* [perl #71000] */ \ + arg_missing = TRUE; \ + } \ + } STMT_END + +void + + +/* +=for apidoc sv_vcatpvfn + +=for apidoc sv_vcatpvfn_flags + +Processes its arguments like C and appends the formatted output +to an SV. Uses an array of SVs if the C-style variable argument list is +missing (C). Argument reordering (using format specifiers like C<%2$d> +or C<%*2$d>) is supported only when using an array of SVs; using a C-style +C argument list with a format string that uses argument reordering +will yield an exception. + +When running with taint checks enabled, indicates via +C if results are untrustworthy (often due to the use of +locales). + +If called as C or flags has the C bit set, calls get magic. + +It assumes that pat has the same utf8-ness as sv. It's the caller's +responsibility to ensure that this is so. + +Usually used via one of its frontends C and C. + +=cut +*/ + + +Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, + va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted, + const U32 flags) +{ + char *p; + char *q; + const char *patend; + STRLEN origlen; + I32 svix = 0; + static const char nullstr[] = "(null)"; + SV *argsv = NULL; + bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */ + const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */ + SV *nsv = NULL; + /* Times 4: a decimal digit takes more than 3 binary digits. + * NV_DIG: mantissa takes than many decimal digits. + * Plus 32: Playing safe. */ + char ebuf[IV_DIG * 4 + NV_DIG + 32]; + bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ +#ifdef USE_LOCALE_NUMERIC + DECLARATION_FOR_LC_NUMERIC_MANIPULATION; + bool lc_numeric_set = FALSE; /* called STORE_LC_NUMERIC_SET_TO_NEEDED? */ #endif - NV nv; - STRLEN have; - STRLEN need; - STRLEN gap; - const char *dotstr = "."; - STRLEN dotstrlen = 1; - I32 efix = 0; /* explicit format parameter index */ - I32 ewix = 0; /* explicit width index */ - I32 epix = 0; /* explicit precision index */ - I32 evix = 0; /* explicit vector index */ - bool asterisk = FALSE; - bool infnan = FALSE; + + PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS; + PERL_UNUSED_ARG(maybe_tainted); + + if (flags & SV_GMAGIC) + SvGETMAGIC(sv); + + /* no matter what, this is a string now */ + (void)SvPV_force_nomg(sv, origlen); + + /* the code that scans for flags etc following a % relies on + * a '\0' being present to avoid falling off the end. Ideally that + * should be fixed */ + assert(pat[patlen] == '\0'); + + /* special-case "", "%s", and "%-p" (SVf - see below) */ + if (patlen == 0) { + if (svmax && ckWARN(WARN_REDUNDANT)) + Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", + PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); + return; + } + if (patlen == 2 && pat[0] == '%' && pat[1] == 's') { + if (svmax > 1 && ckWARN(WARN_REDUNDANT)) + Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", + PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); + + if (args) { + const char * const s = va_arg(*args, char*); + sv_catpv_nomg(sv, s ? s : nullstr); + } + else if (svix < svmax) { + /* we want get magic on the source but not the target. sv_catsv can't do that, though */ + SvGETMAGIC(*svargs); + sv_catsv_nomg(sv, *svargs); + } + else + S_warn_vcatpvfn_missing_argument(aTHX); + return; + } + if (args && patlen == 3 && pat[0] == '%' && + pat[1] == '-' && pat[2] == 'p') { + if (svmax > 1 && ckWARN(WARN_REDUNDANT)) + Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s", + PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); + argsv = MUTABLE_SV(va_arg(*args, void*)); + sv_catsv_nomg(sv, argsv); + return; + } + +#if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH) + /* special-case "%.0f" */ + if ( !args + && patlen == 4 + && pat[0] == '%' && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f' + && svmax > 0) + { + const NV nv = SvNV(*svargs); + if (LIKELY(!Perl_isinfnan(nv))) { + STRLEN l; + char *p; + + if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) { + sv_catpvn_nomg(sv, p, l); + return; + } + } + } +#endif /* !USE_LONG_DOUBLE */ + + patend = (char*)pat + patlen; + for (p = (char*)pat; p < patend; p = q) { + + char intsize = 0; /* size qualifier in "%hi..." etc */ + bool alt = FALSE; /* has "%#..." */ + bool left = FALSE; /* has "%-..." */ + bool fill = FALSE; /* has "%0..." */ + char plus = 0; /* has "%+..." */ + STRLEN width = 0; /* value of "%NNN..." */ + bool has_precis = FALSE; /* has "%.NNN..." */ + STRLEN precis = 0; /* value of "%.NNN..." */ + bool used_explicit_ix = FALSE;/* has "%$n..." */ + int base = 0; /* base to print in, e.g. 8 for %o */ + UV uv = 0; /* the value to print of int-ish args */ + IV iv = 0; /* ditto for signed types */ + + bool vectorize = FALSE; /* has "%v..." */ + SV *vecsv = NULL; /* the cur arg for %v */ + bool vec_utf8 = FALSE; /* SvUTF8(vecsv) */ + const U8 *vecstr = NULL; /* SvPVX(vecsv) */ + STRLEN veclen = 0; /* SvCUR(vecsv) */ + const char *dotstr = "."; /* separator string for %v */ + STRLEN dotstrlen = 1; /* length of separator string for %v */ + + I32 efix = 0; /* explicit format parameter index */ + I32 ewix = 0; /* explicit width index */ + I32 epix = 0; /* explicit precision index */ + const I32 osvix = svix; /* original index in case of bad fmt */ + + bool is_utf8 = FALSE; /* is this item utf8? */ + bool arg_missing = FALSE; /* give "Missing argument" warning */ + char esignbuf[4]; /* holds sign prefix, e.g. "-0x" */ + STRLEN esignlen = 0; /* length of e.g. "-0x" */ + STRLEN zeros = 0; /* how many '0' to prepend */ + + const char *eptr = NULL; /* the address of the element string */ + STRLEN elen = 0; /* the length of the element string */ + + const char *fmtstart; /* start of current format (the '%') */ + char c = 0; /* current character read from format */ + /* echo everything up to the next format specification */ for (q = p; q < patend && *q != '%'; ++q) ; @@ -11512,77 +11932,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p [%bcdefginopsuxDFOUX] format (mandatory) */ - if (args) { -/* - As of perl5.9.3, printf format checking is on by default. - Internally, perl uses %p formats to provide an escape to - some extended formatting. This block deals with those - extensions: if it does not match, (char*)q is reset and - the normal format processing code is used. - - Currently defined extensions are: - %p include pointer address (standard) - %-p (SVf) include an SV (previously %_) - %-p include an SV with precision - %2p include a HEK - %3p include a HEK with precision of 256 - %4p char* preceded by utf8 flag and length - %p (where num is 1 or > 4) reserved for future - extensions - - Robin Barker 2005-07-14 (but modified since) - - %1p (VDf) removed. RMB 2007-10-19 -*/ - char* r = q; - bool sv = FALSE; - STRLEN n = 0; - if (*q == '-') - sv = *q++; - else if (strnEQ(q, UTF8f, sizeof(UTF8f)-1)) { /* UTF8f */ - /* The argument has already gone through cBOOL, so the cast - is safe. */ - is_utf8 = (bool)va_arg(*args, int); - elen = va_arg(*args, UV); - /* if utf8 length is larger than 0x7ffff..., then it might - * have been a signed value that wrapped */ - if (elen > ((~(STRLEN)0) >> 1)) { - assert(0); /* in DEBUGGING build we want to crash */ - elen= 0; /* otherwise we want to treat this as an empty string */ - } - eptr = va_arg(*args, char *); - q += sizeof(UTF8f)-1; - goto string; - } - n = expect_number(&q); - if (*q++ == 'p') { - if (sv) { /* SVf */ - if (n) { - precis = n; - has_precis = TRUE; - } - argsv = MUTABLE_SV(va_arg(*args, void*)); - eptr = SvPV_const(argsv, elen); - if (DO_UTF8(argsv)) - is_utf8 = TRUE; - goto string; - } - else if (n==2 || n==3) { /* HEKf */ - HEK * const hek = va_arg(*args, HEK *); - eptr = HEK_KEY(hek); - elen = HEK_LEN(hek); - if (HEK_UTF8(hek)) is_utf8 = TRUE; - if (n==3) precis = 256, has_precis = TRUE; - goto string; - } - else if (n) { - Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), - "internal %%p might conflict with future printf extensions"); - } - } - q = r; - } - if ( (width = expect_number(&q)) ) { if (*q == '$') { if (args) @@ -11614,7 +11963,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p continue; case '0': - fill = *q++; + fill = TRUE; + q++; continue; case '#': @@ -11628,8 +11978,25 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p break; } + /* at this point we can expect one of: + * + * 123 an explicit width + * * width taken from next arg + * *12$ width taken from 12th arg + * or no width + * + * But any width specification may be preceded by a v, in one of its + * forms: + * v + * *v + * *12$v + * So an asterisk may be either a width specifier or a vector + * separator arg specifier, and we don't know which initially + */ + tryasterisk: if (*q == '*') { + int i; q++; if ( (ewix = expect_number(&q)) ) { if (*q++ == '$') { @@ -11640,53 +12007,39 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p } else goto unknown; } - asterisk = TRUE; - } - if (*q == 'v') { - q++; - if (vectorize) - goto unknown; - if ((vectorarg = asterisk)) { - evix = ewix; - ewix = 0; - asterisk = FALSE; - } - vectorize = TRUE; - goto tryasterisk; - } - if (!asterisk) - { - if( *q == '0' ) - fill = *q++; - width = expect_number(&q); - } - - if (vectorize && vectorarg) { - /* vectorizing, but not with the default "." */ - if (args) - vecsv = va_arg(*args, SV*); - else if (evix) { - FETCH_VCATPVFN_ARGUMENT( - vecsv, evix > 0 && evix <= svmax, svargs[evix-1]); - } else { - FETCH_VCATPVFN_ARGUMENT( - vecsv, svix < svmax, svargs[svix++]); - } - dotstr = SvPV_const(vecsv, dotstrlen); - /* Keep the DO_UTF8 test *after* the SvPV call, else things go - bad with tied or overloaded values that return UTF8. */ - if (DO_UTF8(vecsv)) - is_utf8 = TRUE; - else if (has_utf8) { - vecsv = sv_mortalcopy(vecsv); - sv_utf8_upgrade(vecsv); - dotstr = SvPV_const(vecsv, dotstrlen); - is_utf8 = TRUE; - } - } + if (*q == 'v') { + /* The asterisk was for *v, *NNN$v: vectorizing, but not + * with the default "." */ + q++; + if (vectorize) + goto unknown; + if (args) + vecsv = va_arg(*args, SV*); + else if (ewix) { + FETCH_VCATPVFN_ARGUMENT( + vecsv, ewix > 0 && ewix <= svmax, svargs[ewix-1]); + } else { + FETCH_VCATPVFN_ARGUMENT( + vecsv, svix < svmax, svargs[svix++]); + } + dotstr = SvPV_const(vecsv, dotstrlen); + /* Keep the DO_UTF8 test *after* the SvPV call, else things go + bad with tied or overloaded values that return UTF8. */ + if (DO_UTF8(vecsv)) + is_utf8 = TRUE; + else if (has_utf8) { + vecsv = sv_mortalcopy(vecsv); + sv_utf8_upgrade(vecsv); + dotstr = SvPV_const(vecsv, dotstrlen); + is_utf8 = TRUE; + } + ewix = 0; + vectorize = TRUE; + goto tryasterisk; + } - if (asterisk) { + /* the asterisk specified a width */ if (args) i = va_arg(*args, int); else @@ -11694,7 +12047,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0; left |= (i < 0); width = (i < 0) ? -i : i; + } + else if (*q == 'v') { + q++; + if (vectorize) + goto unknown; + vectorize = TRUE; + goto tryasterisk; + + } + else { + /* explicit width? */ + if(*q == '0') { + fill = TRUE; + q++; + } + width = expect_number(&q); } + gotwidth: /* PRECISION */ @@ -11702,6 +12072,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p if (*q == '.') { q++; if (*q == '*') { + int i; q++; if ( (epix = expect_number(&q)) ) { if (*q++ == '$') { @@ -11749,7 +12120,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * vectorize happen normally */ if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) { - if ( hv_exists(MUTABLE_HV(SvRV(vecsv)), "alpha", 5 ) ) { + if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) { Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF), "vector argument not supported with alpha versions"); goto vdblank; @@ -11857,40 +12228,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p } } - if (argsv && strchr("BbcDdiOopuUXx",*q)) { - /* XXX va_arg(*args) case? need peek, use va_copy? */ - SvGETMAGIC(argsv); - if (UNLIKELY(SvAMAGIC(argsv))) - argsv = sv_2num(argsv); - infnan = UNLIKELY(isinfnansv(argsv)); - } + c = *q++; /* c now holds the conversion type */ - switch (c = *q++) { + switch (c) { /* STRINGS */ - case 'c': - if (vectorize) - goto unknown; - if (infnan) - Perl_croak(aTHX_ "Cannot printf %"NVgf" with '%c'", - /* no va_arg() case */ - SvNV_nomg(argsv), (int)c); - uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv); - if ((uv > 255 || - (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv))) - && !IN_BYTES) { - eptr = (char*)utf8buf; - elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; - is_utf8 = TRUE; - } - else { - c = (char)uv; - eptr = &c; - elen = 1; - } - goto string; - case 's': if (vectorize) goto unknown; @@ -11932,14 +12275,102 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* INTEGERS */ case 'p': - if (infnan) { - goto floating_point; - } if (alt || vectorize) goto unknown; + + /* %p extensions: + * + * "%...p" is normally treated like "%...x", except that the + * number to print is the SV's address (or a pointer address + * for C-ish sprintf). + * + * However, the C-ish sprintf variant allows a few special + * extensions. These are currently: + * + * %-p (SVf) Like %s, but gets the string from an SV* + * arg rather than a char* arg. + * (This was previously %_). + * + * %-p Ditto but like %.s (i.e. num is max width) + * + * %2p (HEKf) Like %s, but using the key string in a HEK + * + * %3p (HEKf256) Ditto but like %.256s + * + * %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args: + * (cBOOL(utf8), len, string_buf). + * It's handled by the "case 'd'" branch + * rather than here. + * + * %p where num is 1 or > 4: reserved for future + * extensions. Warns, but then is treated as a + * general %p (print hex address) format. + */ + + if ( args + && !intsize + && !fill + && !plus + && !has_precis + /* not %*p or %*1$p - any width was explicit */ + && q[-2] != '*' + && q[-2] != '$' + && !used_explicit_ix + ) { + if (left) { /* %-p (SVf), %-NNNp */ + if (width) { + precis = width; + has_precis = TRUE; + } + argsv = MUTABLE_SV(va_arg(*args, void*)); + eptr = SvPV_const(argsv, elen); + if (DO_UTF8(argsv)) + is_utf8 = TRUE; + width = 0; + goto string; + } + else if (width == 2 || width == 3) { /* HEKf, HEKf256 */ + HEK * const hek = va_arg(*args, HEK *); + eptr = HEK_KEY(hek); + elen = HEK_LEN(hek); + if (HEK_UTF8(hek)) + is_utf8 = TRUE; + if (width == 3) { + precis = 256; + has_precis = TRUE; + } + width = 0; + goto string; + } + else if (width) { + Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), + "internal %%p might conflict with future printf extensions"); + } + } + + /* treat as normal %...p */ + uv = PTR2UV(args ? va_arg(*args, void*) : argsv); base = 16; - goto integer; + goto do_integer; + + case 'c': + if (vectorize) + goto unknown; + /* Ignore any size specifiers, since they're not documented as + * being allowed for %c (ideally we should warn on e.g. '%hc'). + * Setting a default intsize, along with a positive + * (which signals unsigned) base, causes, for C-ish use, the + * va_arg to be interpreted as as unsigned int, when it's + * actually signed, which will convert -ve values to high +ve + * values. Note that unlike the libc %c, values > 255 will + * convert to high unicode points rather than being truncated + * to 8 bits. For perlish use, it will do SvUV(argsv), which + * will again convert -ve args to high -ve values. + */ + intsize = 0; + base = 1; /* special value that indicates we're doing a 'c' */ + goto get_int_arg_val; case 'D': #ifdef IV_IS_QUAD @@ -11947,80 +12378,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p #else intsize = 'l'; #endif - /* FALLTHROUGH */ + base = -10; + goto get_int_arg_val; + case 'd': - case 'i': - if (infnan) { - goto floating_point; - } - if (vectorize) { - STRLEN ulen; - if (!veclen) - goto donevalidconversion; - if (vec_utf8) - uv = utf8n_to_uvchr(vecstr, veclen, &ulen, - UTF8_ALLOW_ANYUV); - else { - uv = *vecstr; - ulen = 1; - } - vecstr += ulen; - veclen -= ulen; - if (plus) - esignbuf[esignlen++] = plus; - } - else if (args) { - switch (intsize) { - case 'c': iv = (char)va_arg(*args, int); break; - case 'h': iv = (short)va_arg(*args, int); break; - case 'l': iv = va_arg(*args, long); break; - case 'V': iv = va_arg(*args, IV); break; - case 'z': iv = va_arg(*args, SSize_t); break; -#ifdef HAS_PTRDIFF_T - case 't': iv = va_arg(*args, ptrdiff_t); break; -#endif - default: iv = va_arg(*args, int); break; -#ifdef I_STDINT - case 'j': iv = va_arg(*args, intmax_t); break; -#endif - case 'q': -#if IVSIZE >= 8 - iv = va_arg(*args, Quad_t); break; -#else - goto unknown; -#endif - } - } - else { - IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */ - switch (intsize) { - case 'c': iv = (char)tiv; break; - case 'h': iv = (short)tiv; break; - case 'l': iv = (long)tiv; break; - case 'V': - default: iv = tiv; break; - case 'q': -#if IVSIZE >= 8 - iv = (Quad_t)tiv; break; -#else - goto unknown; -#endif - } - } - if ( !vectorize ) /* we already set uv above */ - { - if (iv >= 0) { - uv = iv; - if (plus) - esignbuf[esignlen++] = plus; - } - else { - uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv); - esignbuf[esignlen++] = '-'; - } + /* probably just a plain %d, but it might be the start of the + * special UTF8f format, which usually looks something like + * "%d%lu%4p" (the lu may vary by platform) + */ + assert((UTF8f)[0] == 'd'); + assert((UTF8f)[1] == '%'); + + if ( args /* UTF8f only valid for C-ish sprintf */ + && q == fmtstart + 1 /* plain %d, not %....d */ + && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */ + && *q == '%' + && strnEQ(q + 1, UTF8f + 2, sizeof(UTF8f) - 3)) + { + /* The argument has already gone through cBOOL, so the cast + is safe. */ + is_utf8 = (bool)va_arg(*args, int); + elen = va_arg(*args, UV); + /* if utf8 length is larger than 0x7ffff..., then it might + * have been a signed value that wrapped */ + if (elen > ((~(STRLEN)0) >> 1)) { + assert(0); /* in DEBUGGING build we want to crash */ + elen = 0; /* otherwise we want to treat this as an empty string */ + } + eptr = va_arg(*args, char *); + q += sizeof(UTF8f) - 2; + goto string; } - base = 10; - goto integer; + + /* FALLTHROUGH */ + case 'i': + base = -10; + goto get_int_arg_val; case 'U': #ifdef IV_IS_QUAD @@ -12031,12 +12424,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* FALLTHROUGH */ case 'u': base = 10; - goto uns_integer; + goto get_int_arg_val; case 'B': case 'b': base = 2; - goto uns_integer; + goto get_int_arg_val; case 'O': #ifdef IV_IS_QUAD @@ -12047,19 +12440,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* FALLTHROUGH */ case 'o': base = 8; - goto uns_integer; + goto get_int_arg_val; case 'X': case 'x': base = 16; - uns_integer: - if (infnan) { - goto floating_point; - } + get_int_arg_val: + if (vectorize) { STRLEN ulen; - vector: + + if (base < 0) { + base = -base; + if (plus) + esignbuf[esignlen++] = plus; + } + + vector: if (!veclen) goto donevalidconversion; if (vec_utf8) @@ -12072,46 +12470,119 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p vecstr += ulen; veclen -= ulen; } - else if (args) { - switch (intsize) { - case 'c': uv = (unsigned char)va_arg(*args, unsigned); break; - case 'h': uv = (unsigned short)va_arg(*args, unsigned); break; - case 'l': uv = va_arg(*args, unsigned long); break; - case 'V': uv = va_arg(*args, UV); break; - case 'z': uv = va_arg(*args, Size_t); break; + else { + /* test arg for inf/nan. This can trigger an unwanted + * 'str' overload, so manually force 'num' overload first + * if necessary */ + if (argsv) { + SvGETMAGIC(argsv); + if (UNLIKELY(SvAMAGIC(argsv))) + argsv = sv_2num(argsv); + if (UNLIKELY(isinfnansv(argsv))) + goto handle_infnan_argsv; + } + + if (base < 0) { + /* signed int type */ + base = -base; + if (args) { + switch (intsize) { + case 'c': iv = (char)va_arg(*args, int); break; + case 'h': iv = (short)va_arg(*args, int); break; + case 'l': iv = va_arg(*args, long); break; + case 'V': iv = va_arg(*args, IV); break; + case 'z': iv = va_arg(*args, SSize_t); break; #ifdef HAS_PTRDIFF_T - case 't': uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */ + case 't': iv = va_arg(*args, ptrdiff_t); break; #endif + default: iv = va_arg(*args, int); break; #ifdef I_STDINT - case 'j': uv = va_arg(*args, uintmax_t); break; + case 'j': iv = va_arg(*args, intmax_t); break; #endif - default: uv = va_arg(*args, unsigned); break; - case 'q': + case 'q': #if IVSIZE >= 8 - uv = va_arg(*args, Uquad_t); break; + iv = va_arg(*args, Quad_t); break; #else - goto unknown; + goto unknown; #endif - } - } - else { - UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */ - switch (intsize) { - case 'c': uv = (unsigned char)tuv; break; - case 'h': uv = (unsigned short)tuv; break; - case 'l': uv = (unsigned long)tuv; break; - case 'V': - default: uv = tuv; break; - case 'q': + } + } + else { + IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */ + switch (intsize) { + case 'c': iv = (char)tiv; break; + case 'h': iv = (short)tiv; break; + case 'l': iv = (long)tiv; break; + case 'V': + default: iv = tiv; break; + case 'q': #if IVSIZE >= 8 - uv = (Uquad_t)tuv; break; + iv = (Quad_t)tiv; break; #else - goto unknown; + goto unknown; #endif - } - } + } + } + + /* now convert iv to uv */ + if (iv >= 0) { + uv = iv; + if (plus) + esignbuf[esignlen++] = plus; + } + else { + uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv); + esignbuf[esignlen++] = '-'; + } + } + else { + /* unsigned int type */ + if (args) { + switch (intsize) { + case 'c': uv = (unsigned char)va_arg(*args, unsigned); + break; + case 'h': uv = (unsigned short)va_arg(*args, unsigned); + break; + case 'l': uv = va_arg(*args, unsigned long); break; + case 'V': uv = va_arg(*args, UV); break; + case 'z': uv = va_arg(*args, Size_t); break; +#ifdef HAS_PTRDIFF_T + /* will sign extend, but there is no + * uptrdiff_t, so oh well */ + case 't': uv = va_arg(*args, ptrdiff_t); break; +#endif +#ifdef I_STDINT + case 'j': uv = va_arg(*args, uintmax_t); break; +#endif + default: uv = va_arg(*args, unsigned); break; + case 'q': +#if IVSIZE >= 8 + uv = va_arg(*args, Uquad_t); break; +#else + goto unknown; +#endif + } + } + else { + UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */ + switch (intsize) { + case 'c': uv = (unsigned char)tuv; break; + case 'h': uv = (unsigned short)tuv; break; + case 'l': uv = (unsigned long)tuv; break; + case 'V': + default: uv = tuv; break; + case 'q': +#if IVSIZE >= 8 + uv = (Uquad_t)tuv; break; +#else + goto unknown; +#endif + } + } + } + } - integer: + do_integer: { char *ptr = ebuf + sizeof ebuf; bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */ @@ -12148,6 +12619,28 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p esignbuf[esignlen++] = c; } break; + + case 1: + /* special-case: base 1 indicates a 'c' format: + * we use the common code for extracting a uv, + * but handle that value differently here than + * all the other int types */ + if ((uv > 255 || + (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv))) + && !IN_BYTES) + { + assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1); + eptr = ebuf; + elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf; + is_utf8 = TRUE; + } + else { + c = (char)uv; + eptr = &c; + elen = 1; + } + goto string; + default: /* it had better be ten or less */ do { dig = uv % base; @@ -12164,17 +12657,14 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p && !(base == 8 && alt)) /* "%#.0o" prints "0" */ elen = 0; - /* a precision nullifies the 0 flag. */ - if (fill == '0') - fill = ' '; + /* a precision nullifies the 0 flag. */ + fill = FALSE; } } break; /* FLOATING POINT */ - floating_point: - case 'F': c = 'f'; /* maybe %F isn't supported here */ /* FALLTHROUGH */ @@ -12182,6 +12672,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 'f': case 'g': case 'G': case 'a': case 'A': + + { + STRLEN radix_len; /* SvCUR(PL_numeric_radix_sv) */ + STRLEN float_need; /* what PL_efloatsize needs to become */ + bool hexfp; /* hexadecimal floating point? */ + + vcatpvfn_long_double_t fv; + NV nv; + if (vectorize) goto unknown; @@ -12240,7 +12739,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p nv = fv; } else { nv = va_arg(*args, double); - NV_TO_FV(nv, fv); + VCATPVFN_NV_TO_FV(nv, fv); } #else nv = va_arg(*args, double); @@ -12249,432 +12748,215 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p } else { - if (!infnan) SvGETMAGIC(argsv); + SvGETMAGIC(argsv); + /* we jump here if an int-ish format encountered an + * infinite/Nan argsv. After setting nv/fv, it falls + * into the isinfnan block which follows */ + handle_infnan_argsv: nv = SvNV_nomg(argsv); - NV_TO_FV(nv, fv); + VCATPVFN_NV_TO_FV(nv, fv); + } + + if (Perl_isinfnan(nv)) { + if (c == 'c') + Perl_croak(aTHX_ "Cannot printf %" NVgf " with '%c'", + SvNV_nomg(argsv), (int)c); + + elen = S_infnan_2pv(nv, ebuf, sizeof(ebuf), plus); + assert(elen); + eptr = ebuf; + zeros = 0; + esignlen = 0; + dotstrlen = 0; + break; + } + + /* special-case "%.0f" */ + if ( c == 'f' + && !precis + && has_precis + && !(width || left || plus || alt) + && !fill + && intsize != 'q' + && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen))) + ) + goto float_concat_no_utf8; + + /* Determine the buffer size needed for the various + * floating-point formats. + * + * The basic possibilities are: + * + * <---P---> + * %f 1111111.123456789 + * %e 1.111111123e+06 + * %a 0x1.0f4471f9bp+20 + * %g 1111111.12 + * %g 1.11111112e+15 + * + * where P is the value of the precision in the format, or 6 + * if not specified. Note the two possible output formats of + * %g; in both cases the number of significant digits is <= + * precision. + * + * For most of the format types the maximum buffer size needed + * is precision, plus: any leading 1 or 0x1, the radix + * point, and an exponent. The difficult one is %f: for a + * large positive exponent it can have many leading digits, + * which needs to be calculated specially. Also %a is slightly + * different in that in the absence of a specified precision, + * it uses as many digits as necessary to distinguish + * different values. + * + * First, here are the constant bits. For ease of calculation + * we over-estimate the needed buffer size, for example by + * assuming all formats have an exponent and a leading 0x1. + */ + + float_need = 1 /* possible unary minus */ + + 4 /* "0x1" plus very unlikely carry */ + + 2 /* "e-", "p+" etc */ + + 6 /* exponent: up to 16383 (quad fp) */ + + 1; /* \0 */ + + + /* determine the radix point len, e.g. length(".") in "1.2" */ + radix_len = 1; /* assume '.' */ +#ifdef USE_LOCALE_NUMERIC + /* note that we may either explicitly use PL_numeric_radix_sv + * below, or implicitly, via an snprintf() variant. + * Note also things like ps_AF.utf8 which has + * "\N{ARABIC DECIMAL SEPARATOR} as a radix point */ + if (!lc_numeric_set) { + /* only set once and reuse in-locale value on subsequent + * iterations. + * XXX what happens if we die in an eval? + */ + STORE_LC_NUMERIC_SET_TO_NEEDED(); + lc_numeric_set = TRUE; } - need = 0; - /* frexp() (or frexpl) has some unspecified behaviour for - * nan/inf/-inf, so let's avoid calling that on non-finites. */ - if (isALPHA_FOLD_NE(c, 'e') && FV_ISFINITE(fv)) { - i = PERL_INT_MIN; + if (PL_numeric_radix_sv) { + assert(IN_LC(LC_NUMERIC)); + radix_len = SvCUR(PL_numeric_radix_sv); + /* note that this will convert the output to utf8 even if + * if the radix point didn't get output */ + is_utf8 = SvUTF8(PL_numeric_radix_sv); + } +#endif + /* this can't wrap unless PL_numeric_radix_sv is a string + * consuming virtually all the 32-bit or 64-bit address space + */ + float_need += radix_len; + + hexfp = FALSE; + + if (isALPHA_FOLD_EQ(c, 'f')) { + /* Determine how many digits before the radix point + * might be emitted. frexp() (or frexpl) has some + * unspecified behaviour for nan/inf/-inf, so lucky we've + * already handled them above */ + STRLEN digits; + int i = PERL_INT_MIN; (void)Perl_frexp((NV)fv, &i); if (i == PERL_INT_MIN) - Perl_die(aTHX_ "panic: frexp: %"FV_GF, fv); - /* Do not set hexfp earlier since we want to printf - * Inf/NaN for Inf/NaN, not their hexfp. */ - hexfp = isALPHA_FOLD_EQ(c, 'a'); - if (UNLIKELY(hexfp)) { - /* This seriously overshoots in most cases, but - * better the undershooting. Firstly, all bytes + Perl_die(aTHX_ "panic: frexp: %" VCATPVFN_FV_GF, fv); + + if (i > 0) { + digits = BIT_DIGITS(i); + if (float_need >= ((STRLEN)~0) - digits) + croak_memory_wrap(); + float_need += digits; + } + } + else if (UNLIKELY(isALPHA_FOLD_EQ(c, 'a'))) { + hexfp = TRUE; + if (!has_precis) { + /* %a in the absence of precision may print as many + * digits as needed to represent the entire mantissa + * bit pattern. + * This estimate seriously overshoots in most cases, + * but better the undershooting. Firstly, all bytes * of the NV are not mantissa, some of them are * exponent. Secondly, for the reasonably common * long doubles case, the "80-bit extended", two - * or six bytes of the NV are unused. */ - need += - (fv < 0) ? 1 : 0 + /* possible unary minus */ - 2 + /* "0x" */ - 1 + /* the very unlikely carry */ - 1 + /* "1" */ - 1 + /* "." */ - 2 * NVSIZE + /* 2 hexdigits for each byte */ - 2 + /* "p+" */ - 6 + /* exponent: sign, plus up to 16383 (quad fp) */ - 1; /* \0 */ + * or six bytes of the NV are unused. Also, we'll + * still pick up an extra +6 from the default + * precision calculation below. */ + STRLEN digits = #ifdef LONGDOUBLE_DOUBLEDOUBLE - /* However, for the "double double", we need more. - * Since each double has their own exponent, the - * doubles may float (haha) rather far from each - * other, and the number of required bits is much - * larger, up to total of DOUBLEDOUBLE_MAXBITS bits. - * See the definition of DOUBLEDOUBLE_MAXBITS. - * - * Need 2 hexdigits for each byte. */ - need += (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2; - /* the size for the exponent already added */ -#endif -#ifdef USE_LOCALE_NUMERIC - STORE_LC_NUMERIC_SET_TO_NEEDED(); - if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) - need += SvLEN(PL_numeric_radix_sv); - RESTORE_LC_NUMERIC(); + /* For the "double double", we need more. + * Since each double has their own exponent, the + * doubles may float (haha) rather far from each + * other, and the number of required bits is much + * larger, up to total of DOUBLEDOUBLE_MAXBITS bits. + * See the definition of DOUBLEDOUBLE_MAXBITS. + * + * Need 2 hexdigits for each byte. */ + (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2; +#else + NVSIZE * 2; /* 2 hexdigits for each byte */ #endif + if (float_need >= ((STRLEN)~0) - digits) + croak_memory_wrap(); + float_need += digits; } - else if (i > 0) { - need = BIT_DIGITS(i); - } /* if i < 0, the number of digits is hard to predict. */ } - need += has_precis ? precis : 6; /* known default */ - - if (need < width) - need = width; - -#ifdef HAS_LDBL_SPRINTF_BUG - /* This is to try to fix a bug with irix/nonstop-ux/powerux and - with sfio - Allen */ - -# ifdef DBL_MAX -# define MY_DBL_MAX DBL_MAX -# else /* XXX guessing! HUGE_VAL may be defined as infinity, so not using */ -# if DOUBLESIZE >= 8 -# define MY_DBL_MAX 1.7976931348623157E+308L -# else -# define MY_DBL_MAX 3.40282347E+38L -# endif -# endif + /* special-case "%.g" if it will fit in ebuf */ + else if (c == 'g' + && precis /* See earlier comment about buggy Gconvert + when digits, aka precis, is 0 */ + && has_precis + /* check, in manner not involving wrapping, that it will + * fit in ebuf */ + && float_need < sizeof(ebuf) + && sizeof(ebuf) - float_need > precis + && !(width || left || plus || alt) + && !fill + && intsize != 'q' + ) { + SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis); + elen = strlen(ebuf); + eptr = ebuf; + goto float_concat; + } -# ifdef HAS_LDBL_SPRINTF_BUG_LESS1 /* only between -1L & 1L - Allen */ -# define MY_DBL_MAX_BUG 1L -# else -# define MY_DBL_MAX_BUG MY_DBL_MAX -# endif -# ifdef DBL_MIN -# define MY_DBL_MIN DBL_MIN -# else /* XXX guessing! -Allen */ -# if DOUBLESIZE >= 8 -# define MY_DBL_MIN 2.2250738585072014E-308L -# else -# define MY_DBL_MIN 1.17549435E-38L -# endif -# endif + { + STRLEN pr = has_precis ? precis : 6; /* known default */ + if (float_need >= ((STRLEN)~0) - pr) + croak_memory_wrap(); + float_need += pr; + } - if ((intsize == 'q') && (c == 'f') && - ((fv < MY_DBL_MAX_BUG) && (fv > -MY_DBL_MAX_BUG)) && - (need < DBL_DIG)) { - /* it's going to be short enough that - * long double precision is not needed */ + if (float_need < width) + float_need = width; - if ((fv <= 0L) && (fv >= -0L)) - fix_ldbl_sprintf_bug = TRUE; /* 0 is 0 - easiest */ - else { - /* would use Perl_fp_class as a double-check but not - * functional on IRIX - see perl.h comments */ - - if ((fv >= MY_DBL_MIN) || (fv <= -MY_DBL_MIN)) { - /* It's within the range that a double can represent */ -#if defined(DBL_MAX) && !defined(DBL_MIN) - if ((fv >= ((long double)1/DBL_MAX)) || - (fv <= (-(long double)1/DBL_MAX))) +/* We should have correctly calculated (or indeed over-estimated) the + * buffer size, but you never know what strange floating-point systems + * there are out there. So for production use, add a little extra overhead. + * Under debugging don't, as it means we more more likely to quickly spot + * issues during development. + */ +#ifndef DEBUGGING + if (float_need >= ((STRLEN)~0) - 20) + croak_memory_wrap(); + float_need += 20; /* safety fudge factor */ #endif - fix_ldbl_sprintf_bug = TRUE; - } - } - if (fix_ldbl_sprintf_bug == TRUE) { - double temp; - - intsize = 0; - temp = (double)fv; - fv = (NV)temp; - } - } - -# undef MY_DBL_MAX -# undef MY_DBL_MAX_BUG -# undef MY_DBL_MIN -#endif /* HAS_LDBL_SPRINTF_BUG */ - - need += 20; /* fudge factor */ - if (PL_efloatsize < need) { + if (PL_efloatsize < float_need) { Safefree(PL_efloatbuf); - PL_efloatsize = need + 20; /* more fudge */ + PL_efloatsize = float_need; Newx(PL_efloatbuf, PL_efloatsize, char); PL_efloatbuf[0] = '\0'; } - if ( !(width || left || plus || alt) && fill != '0' - && has_precis && intsize != 'q' /* Shortcuts */ - && LIKELY(!Perl_isinfnan((NV)fv)) ) { - /* See earlier comment about buggy Gconvert when digits, - aka precis is 0 */ - if ( c == 'g' && precis ) { - STORE_LC_NUMERIC_SET_TO_NEEDED(); - SNPRINTF_G(fv, PL_efloatbuf, PL_efloatsize, precis); - /* May return an empty string for digits==0 */ - if (*PL_efloatbuf) { - elen = strlen(PL_efloatbuf); - goto float_converted; - } - } else if ( c == 'f' && !precis ) { - if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen))) - break; - } - } - if (UNLIKELY(hexfp)) { - /* Hexadecimal floating point. */ - char* p = PL_efloatbuf; - U8 vhex[VHEX_SIZE]; - U8* v = vhex; /* working pointer to vhex */ - U8* vend; /* pointer to one beyond last digit of vhex */ - U8* vfnz = NULL; /* first non-zero */ - U8* vlnz = NULL; /* last non-zero */ - U8* v0 = NULL; /* first output */ - const bool lower = (c == 'a'); - /* At output the values of vhex (up to vend) will - * be mapped through the xdig to get the actual - * human-readable xdigits. */ - const char* xdig = PL_hexdigit; - int zerotail = 0; /* how many extra zeros to append */ - int exponent = 0; /* exponent of the floating point input */ - bool hexradix = FALSE; /* should we output the radix */ - bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */ - bool negative = FALSE; - - /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf". - * - * For example with denormals, (assuming the vanilla - * 64-bit double): the exponent is zero. 1xp-1074 is - * the smallest denormal and the smallest double, it - * could be output also as 0x0.0000000000001p-1022 to - * match its internal structure. */ - - vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL); - S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend); - -#if NVSIZE > DOUBLESIZE -# ifdef HEXTRACT_HAS_IMPLICIT_BIT - /* In this case there is an implicit bit, - * and therefore the exponent is shifted by one, - * unless this is a subnormal/denormal. */ - if (!subnormal) { - exponent--; - } -# else - /* In this case there is no implicit bit, - * and the exponent is shifted by the first xdigit. */ - exponent -= 4; -# endif -#endif - - negative = fv < 0 || Perl_signbit(nv); - if (negative) - *p++ = '-'; - else if (plus) - *p++ = plus; - *p++ = '0'; - if (lower) { - *p++ = 'x'; - } - else { - *p++ = 'X'; - xdig += 16; /* Use uppercase hex. */ - } - - /* Find the first non-zero xdigit. */ - for (v = vhex; v < vend; v++) { - if (*v) { - vfnz = v; - break; - } - } - - if (vfnz) { - /* Find the last non-zero xdigit. */ - for (v = vend - 1; v >= vhex; v--) { - if (*v) { - vlnz = v; - break; - } - } - -#if NVSIZE == DOUBLESIZE - if (fv != 0.0) - exponent--; -#endif - - if (subnormal) { - if (vfnz[0] > 1) { - /* We need to right shift the hex nybbles so - * that the output of the subnormal starts - * from the first true bit. */ - int i, n; - U8 *vshr; - /* Find the ceil(log2(v[0])) of - * the top non-zero nybble. */ - for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { } - assert(n < 4); - vlnz[1] = 0; - for (vshr = vlnz; vshr >= vfnz; vshr--) { - vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n); - vshr[0] >>= n; - } - if (vlnz[1]) { - vlnz++; - } - } - v0 = vfnz; - } else { - v0 = vhex; - } - - if (has_precis) { - U8* ve = (subnormal ? vlnz + 1 : vend); - SSize_t vn = ve - (subnormal ? vfnz : vhex); - if ((SSize_t)(precis + 1) < vn) { - bool overflow = FALSE; - if (v0[precis + 1] < 0x8) { - /* Round down, nothing to do. */ - } else if (v0[precis + 1] > 0x8) { - /* Round up. */ - v0[precis]++; - overflow = v0[precis] > 0xF; - v0[precis] &= 0xF; - } else { /* v0[precis] == 0x8 */ - /* Half-point: round towards the one - * with the even least-significant digit: - * 08 -> 0 88 -> 8 - * 18 -> 2 98 -> a - * 28 -> 2 a8 -> a - * 38 -> 4 b8 -> c - * 48 -> 4 c8 -> c - * 58 -> 6 d8 -> e - * 68 -> 6 e8 -> e - * 78 -> 8 f8 -> 10 */ - if ((v0[precis] & 0x1)) { - v0[precis]++; - } - overflow = v0[precis] > 0xF; - v0[precis] &= 0xF; - } - - if (overflow) { - for (v = v0 + precis - 1; v >= v0; v--) { - (*v)++; - overflow = *v > 0xF; - (*v) &= 0xF; - if (!overflow) { - break; - } - } - if (v == v0 - 1 && overflow) { - /* If the overflow goes all the - * way to the front, we need to - * insert 0x1 in front, and adjust - * the argument. */ - Move(v0, v0 + 1, vn, char); - *v0 = 0x1; - exponent += 4; - } - } - - /* The new effective "last non zero". */ - vlnz = v0 + precis; - } - else { - zerotail = - subnormal ? precis - vn + 1 : - precis - (vlnz - vhex); - } - } - - v = v0; - *p++ = xdig[*v++]; - - /* If there are non-zero xdigits, the radix - * is output after the first one. */ - if (vfnz < vlnz) { - hexradix = TRUE; - } - } - else { - *p++ = '0'; - exponent = 0; - zerotail = precis; - } - - /* The radix is always output if precis, or if alt. */ - if (precis > 0 || alt) { - hexradix = TRUE; - } - - if (hexradix) { -#ifndef USE_LOCALE_NUMERIC - *p++ = '.'; -#else - STORE_LC_NUMERIC_SET_TO_NEEDED(); - if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) { - STRLEN n; - const char* r = SvPV(PL_numeric_radix_sv, n); - Copy(r, p, n, char); - p += n; - } - else { - *p++ = '.'; - } - RESTORE_LC_NUMERIC(); -#endif - } - - if (vlnz) { - while (v <= vlnz) - *p++ = xdig[*v++]; - } - - if (zerotail > 0) { - while (zerotail--) { - *p++ = '0'; - } - } - - elen = p - PL_efloatbuf; - elen += my_snprintf(p, PL_efloatsize - elen, - "%c%+d", lower ? 'p' : 'P', - exponent); - - if (elen < width) { - if (left) { - /* Pad the back with spaces. */ - memset(PL_efloatbuf + elen, ' ', width - elen); - } - else if (fill == '0') { - /* Insert the zeros after the "0x" and the - * the potential sign, but before the digits, - * otherwise we end up with "0000xH.HHH...", - * when we want "0x000H.HHH..." */ - STRLEN nzero = width - elen; - char* zerox = PL_efloatbuf + 2; - STRLEN nmove = elen - 2; - if (negative || plus) { - zerox++; - nmove--; - } - Move(zerox, zerox + nzero, nmove, char); - memset(zerox, fill, nzero); - } - else { - /* Move it to the right. */ - Move(PL_efloatbuf, PL_efloatbuf + width - elen, - elen, char); - /* Pad the front with spaces. */ - memset(PL_efloatbuf, ' ', width - elen); - } - elen = width; - } + elen = S_format_hexfp(aTHX_ PL_efloatbuf, PL_efloatsize, c, + nv, fv, has_precis, precis, width, + alt, plus, left, fill); } else { - elen = S_infnan_2pv(nv, PL_efloatbuf, PL_efloatsize, plus); - if (elen) { - /* Not affecting infnan output: precision, alt, fill. */ - if (elen < width) { - if (left) { - /* Pack the back with spaces. */ - memset(PL_efloatbuf + elen, ' ', width - elen); - } else { - /* Move it to the right. */ - Move(PL_efloatbuf, PL_efloatbuf + width - elen, - elen, char); - /* Pad the front with spaces. */ - memset(PL_efloatbuf, ' ', width - elen); - } - elen = width; - } - } - } - - if (elen == 0) { char *ptr = ebuf + sizeof ebuf; *--ptr = '\0'; *--ptr = c; @@ -12706,8 +12988,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p base = width; do { *--ptr = '0' + (base % 10); } while (base /= 10); } - if (fill == '0') - *--ptr = fill; + if (fill) + *--ptr = '0'; if (left) *--ptr = '-'; if (plus) @@ -12720,8 +13002,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * where printf() taints but print($float) doesn't. * --jhi */ - STORE_LC_NUMERIC_SET_TO_NEEDED(); - /* hopefully the above makes ptr a very constrained format * that is safe to use, even though it's not literal */ GCC_DIAG_IGNORE(-Wformat-nonliteral); @@ -12732,8 +13012,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p Perl_croak_nocontext("panic: quadmath invalid format \"%s\"", ptr); elen = quadmath_snprintf(PL_efloatbuf, PL_efloatsize, qfmt, nv); - if ((IV)elen == -1) + if ((IV)elen == -1) { + if (qfmt != ptr) + SAVEFREEPV(qfmt); Perl_croak_nocontext("panic: quadmath_snprintf failed, format \"%s\"", qfmt); + } if (qfmt != ptr) Safefree(qfmt); } @@ -12747,53 +13030,97 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p GCC_DIAG_RESTORE; } - float_converted: eptr = PL_efloatbuf; - assert((IV)elen > 0); /* here zero elen is bad */ -#ifdef USE_LOCALE_NUMERIC - /* If the decimal point character in the string is UTF-8, make the - * output utf8 */ - if (PL_numeric_radix_sv && SvUTF8(PL_numeric_radix_sv) - && instr(eptr, SvPVX_const(PL_numeric_radix_sv))) + float_concat: + + /* Since floating-point formats do their own formatting and + * padding, we skip the main block of code at the end of this + * loop which handles appending eptr to sv, and do our own + * stripped-down version */ + + /* floating-point formats only get is_utf8 if the radix point + * is utf8. All other characters in the string are < 128 + * and so can be safely appended to both a non-utf8 and utf8 + * string as-is. + */ + if (is_utf8 && !has_utf8) { + sv_utf8_upgrade(sv); + has_utf8 = TRUE; + } + + float_concat_no_utf8: + + assert(!zeros); + assert(!esignlen); + assert(!vectorize); + assert(elen); + assert(elen >= width); + + { - is_utf8 = TRUE; + /* unrolled Perl_sv_catpvn */ + STRLEN need = elen + SvCUR(sv) + 1; + char *end; + /* can't wrap as both elen and SvCUR() are allocated in + * memory and together can't consume all the address space + */ + assert(need > elen); + SvGROW(sv, need); + end = SvEND(sv); + Copy(eptr, end, elen, char); + end += elen; + *end = '\0'; + SvCUR_set(sv, need - 1); } -#endif - break; + goto donevalidconversion; + } /* SPECIAL */ case 'n': - if (vectorize) - goto unknown; - i = SvCUR(sv) - origlen; - if (args) { - switch (intsize) { - case 'c': *(va_arg(*args, char*)) = i; break; - case 'h': *(va_arg(*args, short*)) = i; break; - default: *(va_arg(*args, int*)) = i; break; - case 'l': *(va_arg(*args, long*)) = i; break; - case 'V': *(va_arg(*args, IV*)) = i; break; - case 'z': *(va_arg(*args, SSize_t*)) = i; break; + { + int i; + if (vectorize) + goto unknown; + /* XXX ideally we should warn if any flags etc have been + * set, e.g. "%-4.5n" */ + /* XXX if sv was originally non-utf8 with a char in the + * range 0x80-0xff, then if it got upgraded, we should + * calculate char len rather than byte len here */ + i = SvCUR(sv) - origlen; + if (args) { + switch (intsize) { + case 'c': *(va_arg(*args, char*)) = i; break; + case 'h': *(va_arg(*args, short*)) = i; break; + default: *(va_arg(*args, int*)) = i; break; + case 'l': *(va_arg(*args, long*)) = i; break; + case 'V': *(va_arg(*args, IV*)) = i; break; + case 'z': *(va_arg(*args, SSize_t*)) = i; break; #ifdef HAS_PTRDIFF_T - case 't': *(va_arg(*args, ptrdiff_t*)) = i; break; + case 't': *(va_arg(*args, ptrdiff_t*)) = i; break; #endif #ifdef I_STDINT - case 'j': *(va_arg(*args, intmax_t*)) = i; break; + case 'j': *(va_arg(*args, intmax_t*)) = i; break; #endif - case 'q': + case 'q': #if IVSIZE >= 8 - *(va_arg(*args, Quad_t*)) = i; break; + *(va_arg(*args, Quad_t*)) = i; break; #else - goto unknown; + goto unknown; #endif - } - } - else - sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i); - goto donevalidconversion; + } + } + else { + if (arg_missing) + Perl_croak_nocontext( + "Missing argument for %%n in %s", + PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()"); + sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i); + } + goto donevalidconversion; + } /* UNKNOWN */ @@ -12815,29 +13142,20 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p sv_catpvn_nomg(msg, f, 1); } else { Perl_sv_catpvf(aTHX_ msg, - "\\%03"UVof, (UV)*f & 0xFF); + "\\%03" UVof, (UV)*f & 0xFF); } } sv_catpvs(msg, "\""); } else { sv_catpvs(msg, "end of string"); } - Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%"SVf, SVfARG(msg)); /* yes, this is reentrant */ + Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%" SVf, SVfARG(msg)); /* yes, this is reentrant */ } - /* output mangled stuff ... */ - if (c == '\0') - --q; - eptr = p; - elen = q - p; - - /* ... right here, because formatting flags should not apply */ - SvGROW(sv, SvCUR(sv) + elen + 1); - p = SvEND(sv); - Copy(eptr, p, elen, char); - p += elen; - *p = '\0'; - SvCUR_set(sv, p - SvPVX_const(sv)); + /* mangled format: output the '%', then continue from the + * character following that */ + sv_catpvn_nomg(sv, p, 1); + q = p + 1; svix = osvix; continue; /* not "break" */ } @@ -12861,60 +13179,89 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p } } - /* signed value that's wrapped? */ - assert(elen <= ((~(STRLEN)0) >> 1)); - have = esignlen + zeros + elen; - if (have < zeros) - croak_memory_wrap(); - - need = (have > width ? have : width); - gap = need - have; - - if (need >= (((STRLEN)~0) - SvCUR(sv) - dotstrlen - 1)) - croak_memory_wrap(); - SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1); - p = SvEND(sv); - if (esignlen && fill == '0') { - int i; - for (i = 0; i < (int)esignlen; i++) - *p++ = esignbuf[i]; - } - if (gap && !left) { - memset(p, fill, gap); - p += gap; - } - if (esignlen && fill != '0') { - int i; - for (i = 0; i < (int)esignlen; i++) - *p++ = esignbuf[i]; - } - if (zeros) { - int i; - for (i = zeros; i; i--) - *p++ = '0'; - } - if (elen) { - Copy(eptr, p, elen, char); - p += elen; - } - if (gap && left) { - memset(p, ' ', gap); - p += gap; - } - if (vectorize) { - if (veclen) { - Copy(dotstr, p, dotstrlen, char); - p += dotstrlen; - } - else - vectorize = FALSE; /* done iterating over vecstr */ - } - if (is_utf8) - has_utf8 = TRUE; - if (has_utf8) - SvUTF8_on(sv); - *p = '\0'; - SvCUR_set(sv, p - SvPVX_const(sv)); + + /* append esignbuf, filler, zeros, eptr and dotstr to sv */ + + { + STRLEN need, have, gap; + + /* signed value that's wrapped? */ + assert(elen <= ((~(STRLEN)0) >> 1)); + + /* Most of these length vars can range to any value if + * supplied with a hostile format and/or args. So check every + * addition for possible overflow. In reality some of these + * values are interdependent so these checks are slightly + * redundant. But its easier to be certain this way. + */ + + have = elen; + + if (have >= (((STRLEN)~0) - zeros)) + croak_memory_wrap(); + have += zeros; + + if (have >= (((STRLEN)~0) - esignlen)) + croak_memory_wrap(); + have += esignlen; + + need = (have > width ? have : width); + gap = need - have; + + if (need >= (((STRLEN)~0) - dotstrlen)) + croak_memory_wrap(); + need += dotstrlen; + + if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1))) + croak_memory_wrap(); + need += (SvCUR(sv) + 1); + + SvGROW(sv, need); + + p = SvEND(sv); + if (esignlen && fill) { + int i; + for (i = 0; i < (int)esignlen; i++) + *p++ = esignbuf[i]; + } + if (gap && !left) { + memset(p, (fill ? '0' : ' '), gap); + p += gap; + } + if (esignlen && !fill) { + int i; + for (i = 0; i < (int)esignlen; i++) + *p++ = esignbuf[i]; + } + if (zeros) { + int i; + for (i = zeros; i; i--) + *p++ = '0'; + } + if (elen) { + Copy(eptr, p, elen, char); + p += elen; + } + if (gap && left) { + memset(p, ' ', gap); + p += gap; + } + if (vectorize) { + if (veclen) { + Copy(dotstr, p, dotstrlen, char); + p += dotstrlen; + } + else + vectorize = FALSE; /* done iterating over vecstr */ + } + if (is_utf8) + has_utf8 = TRUE; + if (has_utf8) + SvUTF8_on(sv); + *p = '\0'; + SvCUR_set(sv, p - SvPVX_const(sv)); + } + if (vectorize) { esignlen = 0; goto vector; @@ -13009,7 +13356,7 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) parser->old_parser = NULL; parser->stack = NULL; parser->ps = NULL; - parser->stack_size = 0; + parser->stack_max1 = 0; /* XXX parser->stack->state = 0; */ /* XXX eventually, just Copy() most of the parser struct ? */ @@ -13052,6 +13399,7 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param) parser->sig_elems = proto->sig_elems; parser->sig_optelems= proto->sig_optelems; parser->sig_slurpy = proto->sig_slurpy; + parser->recheck_utf8_validity = proto->recheck_utf8_validity; parser->linestr = sv_dup_inc(proto->linestr, param); { @@ -13315,7 +13663,10 @@ Perl_mg_dup(pTHX_ MAGIC *mg, CLONE_PARAMS *const param) ? SvREFCNT_inc(av_dup_inc((const AV *) nmg->mg_obj, param)) : sv_dup_inc(nmg->mg_obj, param) - : sv_dup(nmg->mg_obj, param); + : (nmg->mg_type == PERL_MAGIC_regdatum || + nmg->mg_type == PERL_MAGIC_regdata) + ? nmg->mg_obj + : sv_dup(nmg->mg_obj, param); if (nmg->mg_ptr && nmg->mg_type != PERL_MAGIC_regex_global) { if (nmg->mg_len > 0) { @@ -13710,6 +14061,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param) switch (sv_type) { default: Perl_croak(aTHX_ "Bizarre SvTYPE [%" IVdf "]", (IV)SvTYPE(sstr)); + NOT_REACHED; /* NOTREACHED */ break; case SVt_PVGV: @@ -14088,7 +14440,7 @@ Perl_cx_dup(pTHX_ PERL_CONTEXT *cxs, I32 ix, I32 max, CLONE_PARAMS* param) case CXt_EVAL: ncx->blk_eval.old_namesv = sv_dup_inc(ncx->blk_eval.old_namesv, param); - /* XXX should this sv_dup_inc? Or only if SvSCREAM ???? */ + /* XXX should this sv_dup_inc? Or only if CxEVAL_TXT_REFCNTED ???? */ ncx->blk_eval.cur_text = sv_dup(ncx->blk_eval.cur_text, param); ncx->blk_eval.cv = cv_dup(ncx->blk_eval.cv, param); /* XXX what do do with cur_top_env ???? */ @@ -14517,7 +14869,7 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) break; default: Perl_croak(aTHX_ - "panic: ss_dup inconsistency (%"IVdf")", (IV) type); + "panic: ss_dup inconsistency (%" IVdf ")", (IV) type); } } @@ -14780,8 +15132,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_forkprocess = proto_perl->Iforkprocess; /* internal state */ - PL_maxo = proto_perl->Imaxo; - PL_main_start = proto_perl->Imain_start; PL_eval_root = proto_perl->Ieval_root; PL_eval_start = proto_perl->Ieval_start; @@ -14908,7 +15258,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_Xpv = (XPV*)NULL; my_perl->Ina = proto_perl->Ina; - PL_statbuf = proto_perl->Istatbuf; PL_statcache = proto_perl->Istatcache; #ifndef NO_TAINT_SUPPORT @@ -15015,9 +15364,9 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* magical thingies */ - sv_setpvs(PERL_DEBUG_PAD(0), ""); /* For regex debugging. */ - sv_setpvs(PERL_DEBUG_PAD(1), ""); /* ext/re needs these */ - sv_setpvs(PERL_DEBUG_PAD(2), ""); /* even without DEBUGGING. */ + SvPVCLEAR(PERL_DEBUG_PAD(0)); /* For regex debugging. */ + SvPVCLEAR(PERL_DEBUG_PAD(1)); /* ext/re needs these */ + SvPVCLEAR(PERL_DEBUG_PAD(2)); /* even without DEBUGGING. */ /* Clone the regex array */ @@ -15189,6 +15538,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param); PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param); PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param); + PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param); PL_utf8_mark = sv_dup_inc(proto_perl->Iutf8_mark, param); PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); @@ -15295,7 +15645,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_watchok = PL_watchaddr ? * PL_watchaddr : NULL; if (PL_debug && PL_watchaddr) { PerlIO_printf(Perl_debug_log, - "WATCHING: %"UVxf" cloned as %"UVxf" with value %"UVxf"\n", + "WATCHING: %" UVxf " cloned as %" UVxf " with value %" UVxf "\n", PTR2UV(proto_perl->Iwatchaddr), PTR2UV(PL_watchaddr), PTR2UV(PL_watchok)); } @@ -15743,7 +16093,7 @@ Perl_varname(pTHX_ const GV *const gv, const char gvtype, PADOFFSET targ, } else if (subscript_type == FUV_SUBSCRIPT_ARRAY) { *SvPVX(name) = '$'; - Perl_sv_catpvf(aTHX_ name, "[%"IVdf"]", (IV)aindex); + Perl_sv_catpvf(aTHX_ name, "[%" IVdf "]", (IV)aindex); } else if (subscript_type == FUV_SUBSCRIPT_WITHIN) { /* We know that name has no magic, so can use 0 instead of SV_GMAGIC */ @@ -15793,6 +16143,11 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, switch (obase->op_type) { + case OP_UNDEF: + /* undef should care if its args are undef - any warnings + * will be from tied/magic vars */ + break; + case OP_RV2AV: case OP_RV2HV: case OP_PADAV: @@ -16245,6 +16600,7 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, */ break; } + match = 1; goto do_op; /* ops where $_ may be an implicit arg */ @@ -16338,7 +16694,6 @@ S_find_uninit_var(pTHX_ const OP *const obase, const SV *const uninit_sv, case OP_ALARM: case OP_SEMGET: case OP_GETLOGIN: - case OP_UNDEF: case OP_SUBSTR: case OP_AEACH: case OP_EACH: