U8 * s = (U8 *) SvPVX_const(sv);
U8 * e = (U8 *) SvEND(sv);
U8 *t = s;
- STRLEN two_byte_count = 0;
+ STRLEN two_byte_count;
- if (flags & SV_FORCE_UTF8_UPGRADE) goto must_be_utf8;
-
- /* See if really will need to convert to utf8. We mustn't rely on our
- * incoming SV being well formed and having a trailing '\0', as certain
- * code in pp_formline can send us partially built SVs. */
-
- while (t < e) {
- const U8 ch = *t++;
- if (NATIVE_BYTE_IS_INVARIANT(ch)) continue;
-
- t--; /* t already incremented; re-point to first variant */
- two_byte_count = 1;
- goto must_be_utf8;
- }
+ if (flags & SV_FORCE_UTF8_UPGRADE) {
+ two_byte_count = 0;
+ }
+ else {
+ if (is_utf8_invariant_string_loc(s, SvCUR(sv), (const U8 **) &t)) {
- /* utf8 conversion not needed because all are invariants. Mark as
- * UTF-8 even if no variant - saves scanning loop */
- SvUTF8_on(sv);
- if (extra) SvGROW(sv, SvCUR(sv) + extra);
- return SvCUR(sv);
+ /* utf8 conversion not needed because all are invariants. Mark
+ * as UTF-8 even if no variant - saves scanning loop */
+ SvUTF8_on(sv);
+ if (extra) SvGROW(sv, SvCUR(sv) + extra);
+ return SvCUR(sv);
+ }
- must_be_utf8:
+ /* Here, there is at least one variant, and t points to the first
+ * one */
+ two_byte_count = 1;
+ }
- /* Here, the string should be converted to utf8, either because of an
- * input flag (two_byte_count = 0), or because a character that
- * requires 2 bytes was found (two_byte_count = 1). t points either to
- * the beginning of the string (if we didn't examine anything), or to
- * the first variant. In either case, everything from s to t - 1 will
- * occupy only 1 byte each on output.
+ /* Note that the incoming SV may not have a trailing '\0', as certain
+ * code in pp_formline can send us partially built SVs.
+ *
+ * Here, the string should be converted to utf8, either because of an
+ * input flag (which causes two_byte_count to be set to 0), or because
+ * a character that requires 2 bytes was found (two_byte_count = 1). t
+ * points either to the beginning of the string (if we didn't examine
+ * anything), or to the first variant. In either case, everything from
+ * s to t - 1 will occupy only 1 byte each on output.
*
* There are two main ways to convert. One is to create a new string
* and go through the input starting from the beginning, appending each
* from s to t - 1 is invariant, the destination can be initialized
* with these using a fast memory copy
*
- * The other way is to figure out exactly how big the string should be
+ * The other way is to figure out exactly how big the string should be,
* by parsing the entire input. Then you don't have to make it big
* enough to handle the worst possible case, and more importantly, if
* the string you already have is large enough, you don't have to
* value. We go backwards through the string, converting until we
* get to the position we are at now, and then stop. If this
* position is far enough along in the string, this method is
- * faster than the other method. If the memory copy were the same
- * speed as the byte-by-byte loop, that position would be about
- * half-way, as at the half-way mark, parsing to the end and back
- * is one complete string's parse, the same amount as starting
- * over and going all the way through. Actually, it would be
- * somewhat less than half-way, as it's faster to just count bytes
- * than to also copy, and we don't have the overhead of allocating
- * a new string, changing the scalar to use it, and freeing the
- * existing one. But if the memory copy is fast, the break-even
- * point is somewhere after half way. The counting loop could be
- * sped up by vectorization, etc, to move the break-even point
- * further towards the beginning.
+ * faster than the first method above. If the memory copy were
+ * the same speed as the byte-by-byte loop, that position would be
+ * about half-way, as at the half-way mark, parsing to the end and
+ * back is one complete string's parse, the same amount as
+ * starting over and going all the way through. Actually, it
+ * would be somewhat less than half-way, as it's faster to just
+ * count bytes than to also copy, and we don't have the overhead
+ * of allocating a new string, changing the scalar to use it, and
+ * freeing the existing one. But if the memory copy is fast, the
+ * break-even point is somewhere after half way. The counting
+ * loop could be sped up by vectorization, etc, to move the
+ * break-even point further towards the beginning.
* 2) if the string doesn't have enough space to handle the converted
* value. A new string will have to be allocated, and one might
* as well, given that, start from the beginning doing the first
void
Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted)
+ va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted)
{
PERL_ARGS_ASSERT_SV_VSETPVFN;
SvPVCLEAR(sv);
- sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, 0);
+ sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, 0);
+}
+
+
+/* simplified inline Perl_sv_catpvn_nomg() when you know the SV's SvPOK */
+
+PERL_STATIC_INLINE void
+S_sv_catpvn_simple(pTHX_ SV *const sv, const char* const buf, const STRLEN len)
+{
+ STRLEN const need = len + SvCUR(sv) + 1;
+ char *end;
+
+ /* can't wrap as both len and SvCUR() are allocated in
+ * memory and together can't consume all the address space
+ */
+ assert(need > len);
+
+ assert(SvPOK(sv));
+ SvGROW(sv, need);
+ end = SvEND(sv);
+ Copy(buf, end, len, char);
+ end += len;
+ *end = '\0';
+ SvCUR_set(sv, need - 1);
}
}
-STATIC I32
-S_expect_number(pTHX_ char **const pattern)
+static void
+S_croak_overflow()
+{
+ dTHX;
+ Perl_croak(aTHX_ "Integer overflow in format string for %s",
+ (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn"));
+}
+
+
+/* Given an int i from the next arg (if args is true) or an sv from an arg
+ * (if args is false), try to extract a STRLEN-ranged value from the arg,
+ * with overflow checking.
+ * Sets *neg to true if the value was negative (untouched otherwise.
+ * Returns the absolute value.
+ * As an extra margin of safety, it croaks if the returned value would
+ * exceed the maximum value of a STRLEN / 4.
+ */
+
+static STRLEN
+S_sprintf_arg_num_val(pTHX_ va_list *const args, int i, SV *sv, bool *neg)
+{
+ IV iv;
+
+ if (args) {
+ iv = i;
+ goto do_iv;
+ }
+
+ if (!sv)
+ return 0;
+
+ SvGETMAGIC(sv);
+
+ if (UNLIKELY(SvIsUV(sv))) {
+ UV uv = SvUV_nomg(sv);
+ if (uv > IV_MAX)
+ S_croak_overflow();
+ iv = uv;
+ }
+ else {
+ iv = SvIV_nomg(sv);
+ do_iv:
+ if (iv < 0) {
+ if (iv < -IV_MAX)
+ S_croak_overflow();
+ iv = -iv;
+ *neg = TRUE;
+ }
+ }
+
+ if (iv > (IV)(((STRLEN)~0) / 4))
+ S_croak_overflow();
+
+ return (STRLEN)iv;
+}
+
+
+/* Returns true if c is in the range '1'..'9'
+ * Written with the cast so it only needs one conditional test
+ */
+#define IS_1_TO_9(c) ((U8)(c - '1') <= 8)
+
+/* Read in and return a number. Updates *pattern to point to the char
+ * following the number. Expects the first char to 1..9.
+ * Croaks if the number exceeds 1/4 of the maximum value of STRLEN.
+ * This is a belt-and-braces safety measure to complement any
+ * overflow/wrap checks done in the main body of sv_vcatpvfn_flags.
+ * It means that e.g. on a 32-bit system the width/precision can't be more
+ * than 1G, which seems reasonable.
+ */
+
+STATIC STRLEN
+S_expect_number(pTHX_ const char **const pattern)
{
- I32 var = 0;
+ STRLEN var;
PERL_ARGS_ASSERT_EXPECT_NUMBER;
- switch (**pattern) {
- case '1': case '2': case '3':
- case '4': case '5': case '6':
- case '7': case '8': case '9':
- var = *(*pattern)++ - '0';
- while (isDIGIT(**pattern)) {
- const I32 tmp = var * 10 + (*(*pattern)++ - '0');
- if (tmp < var)
- Perl_croak(aTHX_ "Integer overflow in format string for %s", (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn"));
- var = tmp;
- }
+ assert(IS_1_TO_9(**pattern));
+
+ var = *(*pattern)++ - '0';
+ while (isDIGIT(**pattern)) {
+ /* if var * 10 + 9 would exceed 1/4 max strlen, croak */
+ if (var > ((((STRLEN)~0) / 4 - 9) / 10))
+ S_croak_overflow();
+ var = var * 10 + (*(*pattern)++ - '0');
}
return var;
}
}
-#define VECTORIZE_ARGS vecsv = va_arg(*args, SV*);\
- vecstr = (U8*)SvPV_const(vecsv,veclen);\
- vec_utf8 = DO_UTF8(vecsv);
-
/* XXX maybe_tainted is never assigned to, so the doc above is lying. */
void
Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted)
+ va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted)
{
PERL_ARGS_ASSERT_SV_VCATPVFN;
- sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC);
+ sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, sv_count, maybe_tainted, SV_GMAGIC|SV_SMAGIC);
}
#endif
const U8* vmaxend = vhex + HEXTRACTSIZE;
+
+ assert(HEXTRACTSIZE <= VHEX_SIZE);
+
PERL_UNUSED_VAR(ix); /* might happen */
(void)Perl_frexp(PERL_ABS(nv), exponent);
*subnormal = FALSE;
const U8* nvp = (const U8*)(&nv);
HEXTRACT_GET_SUBNORMAL(nv);
HEXTRACT_IMPLICIT_BIT(nv);
-# undef HEXTRACT_HAS_TOP_NYBBLE
+# undef HEXTRACT_HAS_TOP_NYBBLE
HEXTRACT_BYTES_LE(13, 0);
# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN
/* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L:
const U8* nvp = (const U8*)(&nv);
HEXTRACT_GET_SUBNORMAL(nv);
HEXTRACT_IMPLICIT_BIT(nv);
-# undef HEXTRACT_HAS_TOP_NYBBLE
+# undef HEXTRACT_HAS_TOP_NYBBLE
HEXTRACT_BYTES_BE(2, 15);
# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
/* x86 80-bit "extended precision", 64 bits of mantissa / fraction /
# define HEXTRACT_FALLBACK
# endif
#endif /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) #else */
-# ifdef HEXTRACT_FALLBACK
+
+#ifdef HEXTRACT_FALLBACK
HEXTRACT_GET_SUBNORMAL(nv);
-# undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */
+# undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */
/* The fallback is used for the double-double format, and
* for unknown long double formats, and for unknown double
* formats, or in general unknown NV formats. */
v++;
}
}
-# endif
+#endif
}
/* Croak for various reasons: if the output pointer escaped the
* output buffer, if the extraction index escaped the extraction
* string.
* The rest of the args have the same meaning as the local vars of the
* same name within Perl_sv_vcatpvfn_flags().
+ *
+ * It assumes the caller has already done STORE_LC_NUMERIC_SET_TO_NEEDED();
+ *
+ * It requires the caller to make buf large enough.
*/
static STRLEN
S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
const NV nv, const vcatpvfn_long_double_t fv,
bool has_precis, STRLEN precis, STRLEN width,
- bool alt, char plus, bool left, char fill)
+ bool alt, char plus, bool left, bool fill)
{
/* Hexadecimal floating point. */
char* p = buf;
* be mapped through the xdig to get the actual
* human-readable xdigits. */
const char* xdig = PL_hexdigit;
- int zerotail = 0; /* how many extra zeros to append */
+ STRLEN zerotail = 0; /* how many extra zeros to append */
int exponent = 0; /* exponent of the floating point input */
bool hexradix = FALSE; /* should we output the radix */
bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
bool negative = FALSE;
STRLEN elen;
- DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
/* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf".
*
#ifndef USE_LOCALE_NUMERIC
*p++ = '.';
#else
- STORE_LC_NUMERIC_SET_TO_NEEDED();
- if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+ if (PL_numeric_radix_sv) {
STRLEN n;
const char* r = SvPV(PL_numeric_radix_sv, n);
+ assert(IN_LC(LC_NUMERIC));
Copy(r, p, n, char);
p += n;
}
else {
*p++ = '.';
}
- RESTORE_LC_NUMERIC();
#endif
}
}
elen = p - buf;
+
+ /* sanity checks */
+ if (elen >= bufsize || width >= bufsize)
+ /* diag_listed_as: Hexadecimal float: internal error (%s) */
+ Perl_croak(aTHX_ "Hexadecimal float: internal error (overflow)");
+
elen += my_snprintf(p, bufsize - elen,
"%c%+d", lower ? 'p' : 'P',
exponent);
/* Pad the back with spaces. */
memset(buf + elen, ' ', gap);
}
- else if (fill == '0') {
+ else if (fill) {
/* Insert the zeros after the "0x" and the
* the potential sign, but before the digits,
* otherwise we end up with "0000xH.HHH...",
nmove--;
}
Move(zerox, zerox + nzero, nmove, char);
- memset(zerox, fill, nzero);
+ memset(zerox, fill ? '0' : ' ', nzero);
}
else {
/* Move it to the right. */
}
-/* Helper for sv_vcatpvfn_flags(). */
-#define FETCH_VCATPVFN_ARGUMENT(var, in_range, expr) \
- STMT_START { \
- if (in_range) \
- (var) = (expr); \
- else { \
- (var) = &PL_sv_no; /* [perl #71000] */ \
- arg_missing = TRUE; \
- } \
- } STMT_END
-
-void
-
-
/*
=for apidoc sv_vcatpvfn
*/
+void
Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted,
+ va_list *const args, SV **const svargs, const Size_t sv_count, bool *const maybe_tainted,
const U32 flags)
{
- char *p;
- char *q;
+ const char *fmtstart; /* character following the current '%' */
+ const char *q; /* current position within format */
const char *patend;
STRLEN origlen;
- I32 svix = 0;
+ Size_t svix = 0;
static const char nullstr[] = "(null)";
SV *argsv = NULL;
bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */
const bool pat_utf8 = has_utf8; /* the pattern is in utf8? */
- SV *nsv = NULL;
/* Times 4: a decimal digit takes more than 3 binary digits.
* NV_DIG: mantissa takes than many decimal digits.
* Plus 32: Playing safe. */
char ebuf[IV_DIG * 4 + NV_DIG + 32];
bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */
-
+#ifdef USE_LOCALE_NUMERIC
DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
+ bool lc_numeric_set = FALSE; /* called STORE_LC_NUMERIC_SET_TO_NEEDED? */
+#endif
PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
PERL_UNUSED_ARG(maybe_tainted);
* should be fixed */
assert(pat[patlen] == '\0');
- /* special-case "", "%s", and "%-p" (SVf - see below) */
- if (patlen == 0) {
- if (svmax && ckWARN(WARN_REDUNDANT))
- Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s",
- PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
- return;
- }
- if (patlen == 2 && pat[0] == '%' && pat[1] == 's') {
- if (svmax > 1 && ckWARN(WARN_REDUNDANT))
- Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s",
- PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
- if (args) {
- const char * const s = va_arg(*args, char*);
- sv_catpv_nomg(sv, s ? s : nullstr);
- }
- else if (svix < svmax) {
- /* we want get magic on the source but not the target. sv_catsv can't do that, though */
- SvGETMAGIC(*svargs);
- sv_catsv_nomg(sv, *svargs);
- }
- else
- S_warn_vcatpvfn_missing_argument(aTHX);
- return;
- }
- if (args && patlen == 3 && pat[0] == '%' &&
- pat[1] == '-' && pat[2] == 'p') {
- if (svmax > 1 && ckWARN(WARN_REDUNDANT))
- Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s",
- PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
- argsv = MUTABLE_SV(va_arg(*args, void*));
- sv_catsv_nomg(sv, argsv);
+ /* Special-case "", "%s", "%-p" (SVf - see below) and "%.0f".
+ * In each case, if there isn't the correct number of args, instead
+ * fall through to the main code to handle the issuing of any
+ * warnings etc.
+ */
+
+ if (patlen == 0 && (args || sv_count == 0))
return;
- }
-#if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH)
- /* special-case "%.0f" */
- if ( !args
- && patlen == 4
- && pat[0] == '%' && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f'
- && svmax > 0)
- {
- const NV nv = SvNV(*svargs);
- if (LIKELY(!Perl_isinfnan(nv))) {
- STRLEN l;
- char *p;
+ if (patlen <= 4 && pat[0] == '%' && (args || sv_count == 1)) {
- if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
- sv_catpvn_nomg(sv, p, l);
+ /* "%s" */
+ if (patlen == 2 && pat[1] == 's') {
+ if (args) {
+ const char * const s = va_arg(*args, char*);
+ sv_catpv_nomg(sv, s ? s : nullstr);
+ }
+ else {
+ /* we want get magic on the source but not the target.
+ * sv_catsv can't do that, though */
+ SvGETMAGIC(*svargs);
+ sv_catsv_nomg(sv, *svargs);
+ }
+ return;
+ }
+
+ /* "%-p" */
+ if (args) {
+ if (patlen == 3 && pat[1] == '-' && pat[2] == 'p') {
+ SV *asv = MUTABLE_SV(va_arg(*args, void*));
+ sv_catsv_nomg(sv, asv);
return;
}
- }
- }
+ }
+#if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH)
+ /* special-case "%.0f" */
+ else if ( patlen == 4
+ && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f')
+ {
+ const NV nv = SvNV(*svargs);
+ if (LIKELY(!Perl_isinfnan(nv))) {
+ STRLEN l;
+ char *p;
+
+ if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
+ sv_catpvn_nomg(sv, p, l);
+ return;
+ }
+ }
+ }
#endif /* !USE_LONG_DOUBLE */
+ }
- if (!args && svix < svmax && DO_UTF8(*svargs))
- has_utf8 = TRUE;
patend = (char*)pat + patlen;
- for (p = (char*)pat; p < patend; p = q) {
-
+ for (fmtstart = pat; fmtstart < patend; fmtstart = q) {
char intsize = 0; /* size qualifier in "%hi..." etc */
bool alt = FALSE; /* has "%#..." */
bool left = FALSE; /* has "%-..." */
- char fill = ' '; /* has "%0..." */
+ bool fill = FALSE; /* has "%0..." */
char plus = 0; /* has "%+..." */
STRLEN width = 0; /* value of "%NNN..." */
bool has_precis = FALSE; /* has "%.NNN..." */
STRLEN precis = 0; /* value of "%.NNN..." */
- bool asterisk = FALSE; /* has "%*..." */
- bool used_explicit_ix = FALSE;/* has "%$n..." */
- unsigned base = 0; /* base to print in, e.g. 8 for %o */
+ int base = 0; /* base to print in, e.g. 8 for %o */
UV uv = 0; /* the value to print of int-ish args */
- IV iv = 0; /* ditto for signed types */
bool vectorize = FALSE; /* has "%v..." */
- bool vectorarg = FALSE; /* has "%*v..." */
- SV *vecsv = NULL; /* the cur arg for %v */
- bool vec_utf8 = FALSE; /* SvUTF8(vecsv) */
- const U8 *vecstr = NULL; /* SvPVX(vecsv) */
- STRLEN veclen = 0; /* SvCUR(vecsv) */
- const char *dotstr = "."; /* separator string for %v */
- STRLEN dotstrlen = 1; /* length of separator string for %v */
-
- I32 efix = 0; /* explicit format parameter index */
- I32 ewix = 0; /* explicit width index */
- I32 epix = 0; /* explicit precision index */
- I32 evix = 0; /* explicit vector index */
- const I32 osvix = svix; /* original index in case of bad fmt */
+ bool vec_utf8 = FALSE; /* SvUTF8(vec arg) */
+ const U8 *vecstr = NULL; /* SvPVX(vec arg) */
+ STRLEN veclen = 0; /* SvCUR(vec arg) */
+ const char *dotstr = NULL; /* separator string for %v */
+ STRLEN dotstrlen; /* length of separator string for %v */
+
+ Size_t efix = 0; /* explicit format parameter index */
+ const Size_t osvix = svix; /* original index in case of bad fmt */
bool is_utf8 = FALSE; /* is this item utf8? */
bool arg_missing = FALSE; /* give "Missing argument" warning */
const char *eptr = NULL; /* the address of the element string */
STRLEN elen = 0; /* the length of the element string */
- const char *fmtstart; /* start of current format (the '%') */
- char c = 0; /* current character read from format */
+ char c; /* the actual format ('d', s' etc) */
/* echo everything up to the next format specification */
- for (q = p; q < patend && *q != '%'; ++q) ;
- if (q > p) {
- if (has_utf8 && !pat_utf8)
- sv_catpvn_nomg_utf8_upgrade(sv, p, q - p, nsv);
+ for (q = fmtstart; q < patend && *q != '%'; ++q)
+ {};
+
+ if (q > fmtstart) {
+ if (has_utf8 && !pat_utf8) {
+ /* upgrade and copy the bytes of fmtstart..q-1 to utf8 on
+ * the fly */
+ const char *p;
+ char *dst;
+ STRLEN need = SvCUR(sv) + (q - fmtstart) + 1;
+
+ for (p = fmtstart; p < q; p++)
+ if (!NATIVE_BYTE_IS_INVARIANT(*p))
+ need++;
+ SvGROW(sv, need);
+
+ dst = SvEND(sv);
+ for (p = fmtstart; p < q; p++)
+ append_utf8_from_native_byte((U8)*p, (U8**)&dst);
+ *dst = '\0';
+ SvCUR_set(sv, need - 1);
+ }
else
- sv_catpvn_nomg(sv, p, q - p);
- p = q;
+ S_sv_catpvn_simple(aTHX_ sv, fmtstart, q - fmtstart);
}
if (q++ >= patend)
break;
- fmtstart = q;
+ fmtstart = q; /* fmtstart is char following the '%' */
/*
We allow format specification elements in this order:
[%bcdefginopsuxDFOUX] format (mandatory)
*/
- if (args) {
-/*
- As of perl5.9.3, printf format checking is on by default.
- Internally, perl uses %p formats to provide an escape to
- some extended formatting. This block deals with those
- extensions: if it does not match, (char*)q is reset and
- the normal format processing code is used.
-
- Currently defined extensions are:
- %p include pointer address (standard)
- %-p (SVf) include an SV (previously %_)
- %-<num>p include an SV with precision <num>
- %2p include a HEK
- %3p include a HEK with precision of 256
- %4p char* preceded by utf8 flag and length
- %<num>p (where num is 1 or > 4) reserved for future
- extensions
-
- Robin Barker 2005-07-14 (but modified since)
-
- %1p (VDf) removed. RMB 2007-10-19
-*/
- char* r = q;
- bool sv = FALSE;
- STRLEN n = 0;
- if (*q == '-')
- sv = *q++;
- else if (strnEQ(q, UTF8f, sizeof(UTF8f)-1)) { /* UTF8f */
- /* The argument has already gone through cBOOL, so the cast
- is safe. */
- is_utf8 = (bool)va_arg(*args, int);
- elen = va_arg(*args, UV);
- /* if utf8 length is larger than 0x7ffff..., then it might
- * have been a signed value that wrapped */
- if (elen > ((~(STRLEN)0) >> 1)) {
- assert(0); /* in DEBUGGING build we want to crash */
- elen= 0; /* otherwise we want to treat this as an empty string */
- }
- eptr = va_arg(*args, char *);
- q += sizeof(UTF8f)-1;
- goto string;
- }
- n = expect_number(&q);
- if (*q++ == 'p') {
- if (sv) { /* SVf */
- if (n) {
- precis = n;
- has_precis = TRUE;
- }
- argsv = MUTABLE_SV(va_arg(*args, void*));
- eptr = SvPV_const(argsv, elen);
- if (DO_UTF8(argsv))
- is_utf8 = TRUE;
- goto string;
- }
- else if (n==2 || n==3) { /* HEKf */
- HEK * const hek = va_arg(*args, HEK *);
- eptr = HEK_KEY(hek);
- elen = HEK_LEN(hek);
- if (HEK_UTF8(hek)) is_utf8 = TRUE;
- if (n==3) precis = 256, has_precis = TRUE;
- goto string;
- }
- else if (n) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
- "internal %%<num>p might conflict with future printf extensions");
- }
- }
- q = r;
- }
-
- if ( (width = expect_number(&q)) ) {
+ if (IS_1_TO_9(*q)) {
+ width = expect_number(&q);
if (*q == '$') {
if (args)
Perl_croak_nocontext(
"Cannot yet reorder sv_catpvfn() arguments from va_list");
++q;
- efix = width;
- used_explicit_ix = TRUE;
+ efix = (Size_t)width;
+ width = 0;
+ no_redundant_warning = TRUE;
} else {
goto gotwidth;
}
continue;
case '0':
- fill = *q++;
+ fill = TRUE;
+ q++;
continue;
case '#':
break;
}
+ /* at this point we can expect one of:
+ *
+ * 123 an explicit width
+ * * width taken from next arg
+ * *12$ width taken from 12th arg
+ * or no width
+ *
+ * But any width specification may be preceded by a v, in one of its
+ * forms:
+ * v
+ * *v
+ * *12$v
+ * So an asterisk may be either a width specifier or a vector
+ * separator arg specifier, and we don't know which initially
+ */
+
tryasterisk:
if (*q == '*') {
+ STRLEN ix; /* explicit width/vector separator index */
q++;
- if ( (ewix = expect_number(&q)) ) {
+ if (IS_1_TO_9(*q)) {
+ ix = expect_number(&q);
if (*q++ == '$') {
if (args)
Perl_croak_nocontext(
"Cannot yet reorder sv_catpvfn() arguments from va_list");
- used_explicit_ix = TRUE;
+ no_redundant_warning = TRUE;
} else
goto unknown;
}
- asterisk = TRUE;
- }
- if (*q == 'v') {
+ else
+ ix = 0;
+
+ if (*q == 'v') {
+ SV *vecsv;
+ /* The asterisk was for *v, *NNN$v: vectorizing, but not
+ * with the default "." */
+ q++;
+ if (vectorize)
+ goto unknown;
+ if (args)
+ vecsv = va_arg(*args, SV*);
+ else {
+ ix = ix ? ix - 1 : svix++;
+ vecsv = ix < sv_count ? svargs[ix]
+ : (arg_missing = TRUE, &PL_sv_no);
+ }
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ /* Keep the DO_UTF8 test *after* the SvPV call, else things go
+ bad with tied or overloaded values that return UTF8. */
+ if (DO_UTF8(vecsv))
+ is_utf8 = TRUE;
+ else if (has_utf8) {
+ vecsv = sv_mortalcopy(vecsv);
+ sv_utf8_upgrade(vecsv);
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ is_utf8 = TRUE;
+ }
+ vectorize = TRUE;
+ goto tryasterisk;
+ }
+
+ /* the asterisk specified a width */
+ {
+ int i = 0;
+ SV *sv = NULL;
+ if (args)
+ i = va_arg(*args, int);
+ else {
+ ix = ix ? ix - 1 : svix++;
+ sv = (ix < sv_count) ? svargs[ix]
+ : (arg_missing = TRUE, (SV*)NULL);
+ }
+ width = S_sprintf_arg_num_val(aTHX_ args, i, sv, &left);
+ }
+ }
+ else if (*q == 'v') {
q++;
if (vectorize)
goto unknown;
- if ((vectorarg = asterisk)) {
- evix = ewix;
- ewix = 0;
- asterisk = FALSE;
- }
vectorize = TRUE;
- goto tryasterisk;
- }
+ dotstr = ".";
+ dotstrlen = 1;
+ goto tryasterisk;
- if (!asterisk)
- {
- if( *q == '0' )
- fill = *q++;
- width = expect_number(&q);
- }
-
- if (vectorize && vectorarg) {
- /* vectorizing, but not with the default "." */
- if (args)
- vecsv = va_arg(*args, SV*);
- else if (evix) {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
- } else {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, svix < svmax, svargs[svix++]);
- }
- dotstr = SvPV_const(vecsv, dotstrlen);
- /* Keep the DO_UTF8 test *after* the SvPV call, else things go
- bad with tied or overloaded values that return UTF8. */
- if (DO_UTF8(vecsv))
- is_utf8 = TRUE;
- else if (has_utf8) {
- vecsv = sv_mortalcopy(vecsv);
- sv_utf8_upgrade(vecsv);
- dotstr = SvPV_const(vecsv, dotstrlen);
- is_utf8 = TRUE;
- }
+ }
+ else {
+ /* explicit width? */
+ if(*q == '0') {
+ fill = TRUE;
+ q++;
+ }
+ if (IS_1_TO_9(*q))
+ width = expect_number(&q);
}
- if (asterisk) {
- int i;
- if (args)
- i = va_arg(*args, int);
- else
- i = (ewix ? ewix <= svmax : svix < svmax) ?
- SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0;
- left |= (i < 0);
- width = (i < 0) ? -i : i;
- }
gotwidth:
/* PRECISION */
if (*q == '.') {
q++;
if (*q == '*') {
- int i;
+ STRLEN ix; /* explicit precision index */
q++;
- if ( (epix = expect_number(&q)) ) {
+ if (IS_1_TO_9(*q)) {
+ ix = expect_number(&q);
if (*q++ == '$') {
if (args)
Perl_croak_nocontext(
"Cannot yet reorder sv_catpvfn() arguments from va_list");
- used_explicit_ix = TRUE;
+ no_redundant_warning = TRUE;
} else
goto unknown;
}
- if (args)
- i = va_arg(*args, int);
- else {
- SV *precsv;
- if (epix)
- FETCH_VCATPVFN_ARGUMENT(
- precsv, epix > 0 && epix <= svmax, svargs[epix-1]);
- else
- FETCH_VCATPVFN_ARGUMENT(
- precsv, svix < svmax, svargs[svix++]);
- i = precsv == &PL_sv_no ? 0 : SvIVx(precsv);
+ else
+ ix = 0;
+
+ {
+ int i = 0;
+ SV *sv = NULL;
+ bool neg = FALSE;
+
+ if (args)
+ i = va_arg(*args, int);
+ else {
+ ix = ix ? ix - 1 : svix++;
+ sv = (ix < sv_count) ? svargs[ix]
+ : (arg_missing = TRUE, (SV*)NULL);
+ }
+ precis = S_sprintf_arg_num_val(aTHX_ args, i, sv, &neg);
+ has_precis = !neg;
}
- precis = i;
- has_precis = !(i < 0);
}
else {
- precis = 0;
- while (isDIGIT(*q))
- precis = precis * 10 + (*q++ - '0');
+ /* although it doesn't seem documented, this code has long
+ * behaved so that:
+ * no digits following the '.' is treated like '.0'
+ * the number may be preceded by any number of zeroes,
+ * e.g. "%.0001f", which is the same as "%.1f"
+ * so I've kept that behaviour. DAPM May 2017
+ */
+ while (*q == '0')
+ q++;
+ precis = IS_1_TO_9(*q) ? expect_number(&q) : 0;
has_precis = TRUE;
}
}
- if (vectorize) {
- if (args) {
- VECTORIZE_ARGS
- }
- else if (efix ? (efix > 0 && efix <= svmax) : svix < svmax) {
- vecsv = svargs[efix ? efix-1 : svix++];
- vecstr = (U8*)SvPV_const(vecsv,veclen);
- vec_utf8 = DO_UTF8(vecsv);
-
- /* if this is a version object, we need to convert
- * back into v-string notation and then let the
- * vectorize happen normally
- */
- if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) {
- if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF),
- "vector argument not supported with alpha versions");
- goto vdblank;
- }
- vecsv = sv_newmortal();
- scan_vstring((char *)vecstr, (char *)vecstr + veclen,
- vecsv);
- vecstr = (U8*)SvPV_const(vecsv, veclen);
- vec_utf8 = DO_UTF8(vecsv);
- }
- }
- else {
- vdblank:
- vecstr = (U8*)"";
- veclen = 0;
- }
- }
-
/* SIZE */
switch (*q) {
/* CONVERSION */
- if (*q == '%') {
- eptr = q++;
+ c = *q++; /* c now holds the conversion type */
+
+ /* '%' doesn't have an arg, so skip arg processing */
+ if (c == '%') {
+ eptr = q - 1;
elen = 1;
- if (vectorize) {
- c = '%';
+ if (vectorize)
goto unknown;
- }
goto string;
}
- if (!vectorize && !args) {
- if (efix) {
- const I32 i = efix-1;
- FETCH_VCATPVFN_ARGUMENT(argsv, i >= 0 && i < svmax, svargs[i]);
- } else {
- FETCH_VCATPVFN_ARGUMENT(argsv, svix >= 0 && svix < svmax,
- svargs[svix++]);
- }
- }
+ if (vectorize && !strchr("BbDdiOouUXx", c))
+ goto unknown;
- c = *q++; /* c now holds the conversion type */
+ /* get next arg (individual branches do their own va_arg()
+ * handling for the args case) */
+
+ if (!args) {
+ efix = efix ? efix - 1 : svix++;
+ argsv = efix < sv_count ? svargs[efix]
+ : (arg_missing = TRUE, &PL_sv_no);
+ }
- if (argsv && strchr("BbcDdiOopuUXx", c)) {
- /* XXX va_arg(*args) case? need peek, use va_copy? */
- SvGETMAGIC(argsv);
- if (UNLIKELY(SvAMAGIC(argsv)))
- argsv = sv_2num(argsv);
- if (UNLIKELY(isinfnansv(argsv)))
- goto handle_infnan_argsv;
- }
switch (c) {
/* STRINGS */
- case 'c':
- if (vectorize)
- goto unknown;
- uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv);
- if ((uv > 255 ||
- (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
- && !IN_BYTES)
- {
- assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
- eptr = ebuf;
- elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
- is_utf8 = TRUE;
- }
- else {
- c = (char)uv;
- eptr = &c;
- elen = 1;
- }
- goto string;
-
case 's':
- if (vectorize)
- goto unknown;
if (args) {
eptr = va_arg(*args, char*);
if (eptr)
/* INTEGERS */
case 'p':
- if (alt || vectorize)
+ if (alt)
goto unknown;
+
+ /* %p extensions:
+ *
+ * "%...p" is normally treated like "%...x", except that the
+ * number to print is the SV's address (or a pointer address
+ * for C-ish sprintf).
+ *
+ * However, the C-ish sprintf variant allows a few special
+ * extensions. These are currently:
+ *
+ * %-p (SVf) Like %s, but gets the string from an SV*
+ * arg rather than a char* arg.
+ * (This was previously %_).
+ *
+ * %-<num>p Ditto but like %.<num>s (i.e. num is max width)
+ *
+ * %2p (HEKf) Like %s, but using the key string in a HEK
+ *
+ * %3p (HEKf256) Ditto but like %.256s
+ *
+ * %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args:
+ * (cBOOL(utf8), len, string_buf).
+ * It's handled by the "case 'd'" branch
+ * rather than here.
+ *
+ * %<num>p where num is 1 or > 4: reserved for future
+ * extensions. Warns, but then is treated as a
+ * general %p (print hex address) format.
+ */
+
+ if ( args
+ && !intsize
+ && !fill
+ && !plus
+ && !has_precis
+ /* not %*p or %*1$p - any width was explicit */
+ && q[-2] != '*'
+ && q[-2] != '$'
+ ) {
+ if (left) { /* %-p (SVf), %-NNNp */
+ if (width) {
+ precis = width;
+ has_precis = TRUE;
+ }
+ argsv = MUTABLE_SV(va_arg(*args, void*));
+ eptr = SvPV_const(argsv, elen);
+ if (DO_UTF8(argsv))
+ is_utf8 = TRUE;
+ width = 0;
+ goto string;
+ }
+ else if (width == 2 || width == 3) { /* HEKf, HEKf256 */
+ HEK * const hek = va_arg(*args, HEK *);
+ eptr = HEK_KEY(hek);
+ elen = HEK_LEN(hek);
+ if (HEK_UTF8(hek))
+ is_utf8 = TRUE;
+ if (width == 3) {
+ precis = 256;
+ has_precis = TRUE;
+ }
+ width = 0;
+ goto string;
+ }
+ else if (width) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+ "internal %%<num>p might conflict with future printf extensions");
+ }
+ }
+
+ /* treat as normal %...p */
+
uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
base = 16;
- goto integer;
+ goto do_integer;
+
+ case 'c':
+ /* Ignore any size specifiers, since they're not documented as
+ * being allowed for %c (ideally we should warn on e.g. '%hc').
+ * Setting a default intsize, along with a positive
+ * (which signals unsigned) base, causes, for C-ish use, the
+ * va_arg to be interpreted as as unsigned int, when it's
+ * actually signed, which will convert -ve values to high +ve
+ * values. Note that unlike the libc %c, values > 255 will
+ * convert to high unicode points rather than being truncated
+ * to 8 bits. For perlish use, it will do SvUV(argsv), which
+ * will again convert -ve args to high -ve values.
+ */
+ intsize = 0;
+ base = 1; /* special value that indicates we're doing a 'c' */
+ goto get_int_arg_val;
case 'D':
#ifdef IV_IS_QUAD
#else
intsize = 'l';
#endif
- /* FALLTHROUGH */
+ base = -10;
+ goto get_int_arg_val;
+
case 'd':
- case 'i':
- if (vectorize) {
- STRLEN ulen;
- if (!veclen)
- goto donevalidconversion;
- if (vec_utf8)
- uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
- UTF8_ALLOW_ANYUV);
- else {
- uv = *vecstr;
- ulen = 1;
- }
- vecstr += ulen;
- veclen -= ulen;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else if (args) {
- switch (intsize) {
- case 'c': iv = (char)va_arg(*args, int); break;
- case 'h': iv = (short)va_arg(*args, int); break;
- case 'l': iv = va_arg(*args, long); break;
- case 'V': iv = va_arg(*args, IV); break;
- case 'z': iv = va_arg(*args, SSize_t); break;
-#ifdef HAS_PTRDIFF_T
- case 't': iv = va_arg(*args, ptrdiff_t); break;
-#endif
- default: iv = va_arg(*args, int); break;
-#ifdef I_STDINT
- case 'j': iv = va_arg(*args, intmax_t); break;
-#endif
- case 'q':
-#if IVSIZE >= 8
- iv = va_arg(*args, Quad_t); break;
-#else
- goto unknown;
-#endif
- }
- }
- else {
- IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': iv = (char)tiv; break;
- case 'h': iv = (short)tiv; break;
- case 'l': iv = (long)tiv; break;
- case 'V':
- default: iv = tiv; break;
- case 'q':
-#if IVSIZE >= 8
- iv = (Quad_t)tiv; break;
-#else
- goto unknown;
-#endif
- }
- }
- if ( !vectorize ) /* we already set uv above */
- {
- if (iv >= 0) {
- uv = iv;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else {
- uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
- esignbuf[esignlen++] = '-';
- }
+ /* probably just a plain %d, but it might be the start of the
+ * special UTF8f format, which usually looks something like
+ * "%d%lu%4p" (the lu may vary by platform)
+ */
+ assert((UTF8f)[0] == 'd');
+ assert((UTF8f)[1] == '%');
+
+ if ( args /* UTF8f only valid for C-ish sprintf */
+ && q == fmtstart + 1 /* plain %d, not %....d */
+ && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
+ && *q == '%'
+ && strnEQ(q + 1, UTF8f + 2, sizeof(UTF8f) - 3))
+ {
+ /* The argument has already gone through cBOOL, so the cast
+ is safe. */
+ is_utf8 = (bool)va_arg(*args, int);
+ elen = va_arg(*args, UV);
+ /* if utf8 length is larger than 0x7ffff..., then it might
+ * have been a signed value that wrapped */
+ if (elen > ((~(STRLEN)0) >> 1)) {
+ assert(0); /* in DEBUGGING build we want to crash */
+ elen = 0; /* otherwise we want to treat this as an empty string */
+ }
+ eptr = va_arg(*args, char *);
+ q += sizeof(UTF8f) - 2;
+ goto string;
}
- base = 10;
- goto integer;
+
+ /* FALLTHROUGH */
+ case 'i':
+ base = -10;
+ goto get_int_arg_val;
case 'U':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'u':
base = 10;
- goto uns_integer;
+ goto get_int_arg_val;
case 'B':
case 'b':
base = 2;
- goto uns_integer;
+ goto get_int_arg_val;
case 'O':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'o':
base = 8;
- goto uns_integer;
+ goto get_int_arg_val;
case 'X':
case 'x':
base = 16;
- uns_integer:
+ get_int_arg_val:
+
if (vectorize) {
STRLEN ulen;
- vector:
+ SV *vecsv;
+
+ if (base < 0) {
+ base = -base;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+
+ /* initialise the vector string to iterate over */
+
+ vecsv = args ? va_arg(*args, SV*) : argsv;
+
+ /* if this is a version object, we need to convert
+ * back into v-string notation and then let the
+ * vectorize happen normally
+ */
+ if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) {
+ if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF),
+ "vector argument not supported with alpha versions");
+ vecsv = &PL_sv_no;
+ }
+ else {
+ vecstr = (U8*)SvPV_const(vecsv,veclen);
+ vecsv = sv_newmortal();
+ scan_vstring((char *)vecstr, (char *)vecstr + veclen,
+ vecsv);
+ }
+ }
+ vecstr = (U8*)SvPV_const(vecsv, veclen);
+ vec_utf8 = DO_UTF8(vecsv);
+
+ /* This is the re-entry point for when we're iterating
+ * over the individual characters of a vector arg */
+ vector:
if (!veclen)
- goto donevalidconversion;
+ goto done_valid_conversion;
if (vec_utf8)
uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
UTF8_ALLOW_ANYUV);
vecstr += ulen;
veclen -= ulen;
}
- else if (args) {
- switch (intsize) {
- case 'c': uv = (unsigned char)va_arg(*args, unsigned); break;
- case 'h': uv = (unsigned short)va_arg(*args, unsigned); break;
- case 'l': uv = va_arg(*args, unsigned long); break;
- case 'V': uv = va_arg(*args, UV); break;
- case 'z': uv = va_arg(*args, Size_t); break;
+ else {
+ /* test arg for inf/nan. This can trigger an unwanted
+ * 'str' overload, so manually force 'num' overload first
+ * if necessary */
+ if (argsv) {
+ SvGETMAGIC(argsv);
+ if (UNLIKELY(SvAMAGIC(argsv)))
+ argsv = sv_2num(argsv);
+ if (UNLIKELY(isinfnansv(argsv)))
+ goto handle_infnan_argsv;
+ }
+
+ if (base < 0) {
+ /* signed int type */
+ IV iv;
+ base = -base;
+ if (args) {
+ switch (intsize) {
+ case 'c': iv = (char)va_arg(*args, int); break;
+ case 'h': iv = (short)va_arg(*args, int); break;
+ case 'l': iv = va_arg(*args, long); break;
+ case 'V': iv = va_arg(*args, IV); break;
+ case 'z': iv = va_arg(*args, SSize_t); break;
#ifdef HAS_PTRDIFF_T
- case 't': uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
+ case 't': iv = va_arg(*args, ptrdiff_t); break;
#endif
+ default: iv = va_arg(*args, int); break;
#ifdef I_STDINT
- case 'j': uv = va_arg(*args, uintmax_t); break;
+ case 'j': iv = va_arg(*args, intmax_t); break;
#endif
- default: uv = va_arg(*args, unsigned); break;
- case 'q':
+ case 'q':
#if IVSIZE >= 8
- uv = va_arg(*args, Uquad_t); break;
+ iv = va_arg(*args, Quad_t); break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
- else {
- UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': uv = (unsigned char)tuv; break;
- case 'h': uv = (unsigned short)tuv; break;
- case 'l': uv = (unsigned long)tuv; break;
- case 'V':
- default: uv = tuv; break;
- case 'q':
+ }
+ }
+ else {
+ /* assign to tiv then cast to iv to work around
+ * 2003 GCC cast bug (gnu.org bugzilla #13488) */
+ IV tiv = SvIV_nomg(argsv);
+ switch (intsize) {
+ case 'c': iv = (char)tiv; break;
+ case 'h': iv = (short)tiv; break;
+ case 'l': iv = (long)tiv; break;
+ case 'V':
+ default: iv = tiv; break;
+ case 'q':
#if IVSIZE >= 8
- uv = (Uquad_t)tuv; break;
+ iv = (Quad_t)tiv; break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
+ }
+ }
+
+ /* now convert iv to uv */
+ if (iv >= 0) {
+ uv = iv;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+ else {
+ uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
+ esignbuf[esignlen++] = '-';
+ }
+ }
+ else {
+ /* unsigned int type */
+ if (args) {
+ switch (intsize) {
+ case 'c': uv = (unsigned char)va_arg(*args, unsigned);
+ break;
+ case 'h': uv = (unsigned short)va_arg(*args, unsigned);
+ break;
+ case 'l': uv = va_arg(*args, unsigned long); break;
+ case 'V': uv = va_arg(*args, UV); break;
+ case 'z': uv = va_arg(*args, Size_t); break;
+#ifdef HAS_PTRDIFF_T
+ /* will sign extend, but there is no
+ * uptrdiff_t, so oh well */
+ case 't': uv = va_arg(*args, ptrdiff_t); break;
+#endif
+#ifdef I_STDINT
+ case 'j': uv = va_arg(*args, uintmax_t); break;
+#endif
+ default: uv = va_arg(*args, unsigned); break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = va_arg(*args, Uquad_t); break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ else {
+ /* assign to tiv then cast to iv to work around
+ * 2003 GCC cast bug (gnu.org bugzilla #13488) */
+ UV tuv = SvUV_nomg(argsv);
+ switch (intsize) {
+ case 'c': uv = (unsigned char)tuv; break;
+ case 'h': uv = (unsigned short)tuv; break;
+ case 'l': uv = (unsigned long)tuv; break;
+ case 'V':
+ default: uv = tuv; break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = (Uquad_t)tuv; break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ }
+ }
- integer:
+ do_integer:
{
char *ptr = ebuf + sizeof ebuf;
- bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */
unsigned dig;
zeros = 0;
switch (base) {
case 16:
- p = (char *)((c == 'X') ? PL_hexdigit + 16 : PL_hexdigit);
- do {
- dig = uv & 15;
- *--ptr = p[dig];
- } while (uv >>= 4);
- if (tempalt) {
- esignbuf[esignlen++] = '0';
- esignbuf[esignlen++] = c; /* 'x' or 'X' */
- }
- break;
+ {
+ const char * const p =
+ (c == 'X') ? PL_hexdigit + 16 : PL_hexdigit;
+
+ do {
+ dig = uv & 15;
+ *--ptr = p[dig];
+ } while (uv >>= 4);
+ if (alt && *ptr != '0') {
+ esignbuf[esignlen++] = '0';
+ esignbuf[esignlen++] = c; /* 'x' or 'X' */
+ }
+ break;
+ }
case 8:
do {
dig = uv & 7;
dig = uv & 1;
*--ptr = '0' + dig;
} while (uv >>= 1);
- if (tempalt) {
+ if (alt && *ptr != '0') {
esignbuf[esignlen++] = '0';
- esignbuf[esignlen++] = c;
+ esignbuf[esignlen++] = c; /* 'b' or 'B' */
}
break;
+
+ case 1:
+ /* special-case: base 1 indicates a 'c' format:
+ * we use the common code for extracting a uv,
+ * but handle that value differently here than
+ * all the other int types */
+ if ((uv > 255 ||
+ (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
+ && !IN_BYTES)
+ {
+ assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
+ eptr = ebuf;
+ elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+ is_utf8 = TRUE;
+ }
+ else {
+ eptr = ebuf;
+ ebuf[0] = (char)uv;
+ elen = 1;
+ }
+ goto string;
+
default: /* it had better be ten or less */
do {
dig = uv % base;
&& !(base == 8 && alt)) /* "%#.0o" prints "0" */
elen = 0;
- /* a precision nullifies the 0 flag. */
- if (fill == '0')
- fill = ' ';
+ /* a precision nullifies the 0 flag. */
+ fill = FALSE;
}
}
break;
case 'a': case 'A':
{
- STRLEN radix_len; /* SvCUR(PL_numeric_radix_sv) */
STRLEN float_need; /* what PL_efloatsize needs to become */
bool hexfp; /* hexadecimal floating point? */
vcatpvfn_long_double_t fv;
NV nv;
- if (vectorize)
- goto unknown;
-
/* This is evil, but floating point is even more evil */
/* for SV-style calling, we can only get NV
&& !precis
&& has_precis
&& !(width || left || plus || alt)
- && fill != '0'
+ && !fill
&& intsize != 'q'
&& ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
)
- goto float_concat_no_utf8;
+ goto float_concat;
/* Determine the buffer size needed for the various
* floating-point formats.
* First, here are the constant bits. For ease of calculation
* we over-estimate the needed buffer size, for example by
* assuming all formats have an exponent and a leading 0x1.
+ *
+ * Also for production use, add a little extra overhead for
+ * safety's sake. Under debugging don't, as it means we're
+ * more likely to quickly spot issues during development.
*/
float_need = 1 /* possible unary minus */
+ 4 /* "0x1" plus very unlikely carry */
+ + 1 /* default radix point '.' */
+ 2 /* "e-", "p+" etc */
+ 6 /* exponent: up to 16383 (quad fp) */
+#ifndef DEBUGGING
+ + 20 /* safety net */
+#endif
+ 1; /* \0 */
/* determine the radix point len, e.g. length(".") in "1.2" */
- radix_len = 1; /* assume '.' */
#ifdef USE_LOCALE_NUMERIC
/* note that we may either explicitly use PL_numeric_radix_sv
* below, or implicitly, via an snprintf() variant.
* Note also things like ps_AF.utf8 which has
* "\N{ARABIC DECIMAL SEPARATOR} as a radix point */
- STORE_LC_NUMERIC_SET_TO_NEEDED();
- if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
- radix_len = SvCUR(PL_numeric_radix_sv);
- /* note that this will convert the output to utf8 even if
- * if the radix point didn't get output */
- is_utf8 = SvUTF8(PL_numeric_radix_sv);
+ if (!lc_numeric_set) {
+ /* only set once and reuse in-locale value on subsequent
+ * iterations.
+ * XXX what happens if we die in an eval?
+ */
+ STORE_LC_NUMERIC_SET_TO_NEEDED();
+ lc_numeric_set = TRUE;
+ }
+
+ if (PL_numeric_radix_sv) {
+ assert(IN_LC(LC_NUMERIC));
+ /* this can't wrap unless PL_numeric_radix_sv is a string
+ * consuming virtually all the 32-bit or 64-bit address
+ * space
+ */
+ float_need += (SvCUR(PL_numeric_radix_sv) - 1);
+
+ /* floating-point formats only get utf8 if the radix point
+ * is utf8. All other characters in the string are < 128
+ * and so can be safely appended to both a non-utf8 and utf8
+ * string as-is.
+ * Note that this will convert the output to utf8 even if
+ * the radix point didn't get output.
+ */
+ if (SvUTF8(PL_numeric_radix_sv) && !has_utf8) {
+ sv_utf8_upgrade(sv);
+ has_utf8 = TRUE;
+ }
}
- RESTORE_LC_NUMERIC();
#endif
- /* this can't wrap unless PL_numeric_radix_sv is a string
- * consuming virtually all the 32-bit or 64-bit address space
- */
- float_need += radix_len;
hexfp = FALSE;
if (i > 0) {
digits = BIT_DIGITS(i);
- if (float_need >= ((STRLEN)~0) - digits)
- croak_memory_wrap();
+ /* this can't overflow. 'digits' will only be a few
+ * thousand even for the largest floating-point types.
+ * And up until now float_need is just some small
+ * constants plus radix len, which can't be in
+ * overflow territory unless the radix SV is consuming
+ * over 1/2 the address space */
+ assert(float_need < ((STRLEN)~0) - digits);
float_need += digits;
}
}
#else
NVSIZE * 2; /* 2 hexdigits for each byte */
#endif
- if (float_need >= ((STRLEN)~0) - digits)
- croak_memory_wrap();
+ /* see "this can't overflow" comment above */
+ assert(float_need < ((STRLEN)~0) - digits);
float_need += digits;
}
}
&& float_need < sizeof(ebuf)
&& sizeof(ebuf) - float_need > precis
&& !(width || left || plus || alt)
- && fill != '0'
+ && !fill
&& intsize != 'q'
) {
- STORE_LC_NUMERIC_SET_TO_NEEDED();
SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis);
elen = strlen(ebuf);
eptr = ebuf;
{
STRLEN pr = has_precis ? precis : 6; /* known default */
+ /* this probably can't wrap, since precis is limited
+ * to 1/4 address space size, but better safe than sorry
+ */
if (float_need >= ((STRLEN)~0) - pr)
croak_memory_wrap();
float_need += pr;
if (float_need < width)
float_need = width;
-/* We should have correctly calculated (or indeed over-estimated) the
- * buffer size, but you never know what strange floating-point systems
- * there are out there. So for production use, add a little extra overhead.
- * Under debugging don't, as it means we more more likely to quickly spot
- * issues during development.
- */
-#ifndef DEBUGGING
- if (float_need >= ((STRLEN)~0) - 20)
- croak_memory_wrap();
- float_need += 20; /* safety fudge factor */
-#endif
-
- if (PL_efloatsize < float_need) {
+ if (PL_efloatsize <= float_need) {
+ /* PL_efloatbuf should be at least 1 greater than
+ * float_need to allow a trailing \0 to be returned by
+ * snprintf(). If we need to grow, overgrow for the
+ * benefit of future generations */
+ const STRLEN extra = 0x20;
+ if (float_need >= ((STRLEN)~0) - extra)
+ croak_memory_wrap();
+ float_need += extra;
Safefree(PL_efloatbuf);
PL_efloatsize = float_need;
Newx(PL_efloatbuf, PL_efloatsize, char);
base = width;
do { *--ptr = '0' + (base % 10); } while (base /= 10);
}
- if (fill == '0')
- *--ptr = fill;
+ if (fill)
+ *--ptr = '0';
if (left)
*--ptr = '-';
if (plus)
* where printf() taints but print($float) doesn't.
* --jhi */
- STORE_LC_NUMERIC_SET_TO_NEEDED();
-
/* hopefully the above makes ptr a very constrained format
* that is safe to use, even though it's not literal */
GCC_DIAG_IGNORE(-Wformat-nonliteral);
* loop which handles appending eptr to sv, and do our own
* stripped-down version */
- /* floating-point formats only get is_utf8 if the radix point
- * is utf8. All other characters in the string are < 128
- * and so can be safely appended to both a non-utf8 and utf8
- * string as-is.
- */
- if (is_utf8 && !has_utf8) {
- sv_utf8_upgrade(sv);
- has_utf8 = TRUE;
- }
-
- float_concat_no_utf8:
-
assert(!zeros);
assert(!esignlen);
- assert(!vectorize);
assert(elen);
assert(elen >= width);
+ S_sv_catpvn_simple(aTHX_ sv, eptr, elen);
- {
- /* unrolled Perl_sv_catpvn */
- STRLEN need = elen + SvCUR(sv) + 1;
- char *end;
- /* can't wrap as both elen and SvCUR() are allocated in
- * memory and together can't consume all the address space
- */
- assert(need > elen);
- SvGROW(sv, need);
- end = SvEND(sv);
- Copy(eptr, end, elen, char);
- end += elen;
- *end = '\0';
- SvCUR_set(sv, need - 1);
- }
-
- goto donevalidconversion;
+ goto done_valid_conversion;
}
/* SPECIAL */
case 'n':
{
- int i;
- if (vectorize)
- goto unknown;
- i = SvCUR(sv) - origlen;
+ STRLEN len;
+ /* XXX ideally we should warn if any flags etc have been
+ * set, e.g. "%-4.5n" */
+ /* XXX if sv was originally non-utf8 with a char in the
+ * range 0x80-0xff, then if it got upgraded, we should
+ * calculate char len rather than byte len here */
+ len = SvCUR(sv) - origlen;
if (args) {
+ int i = (len > PERL_INT_MAX) ? PERL_INT_MAX : (int)len;
+
switch (intsize) {
case 'c': *(va_arg(*args, char*)) = i; break;
case 'h': *(va_arg(*args, short*)) = i; break;
#endif
}
}
- else
- sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
- goto donevalidconversion;
+ else {
+ if (arg_missing)
+ Perl_croak_nocontext(
+ "Missing argument for %%n in %s",
+ PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
+ sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)len);
+ }
+ goto done_valid_conversion;
}
/* UNKNOWN */
/* mangled format: output the '%', then continue from the
* character following that */
- sv_catpvn_nomg(sv, p, 1);
- q = p + 1;
+ sv_catpvn_nomg(sv, fmtstart-1, 1);
+ q = fmtstart;
svix = osvix;
+ /* Any "redundant arg" warning from now onwards will probably
+ * just be misleading, so don't bother. */
+ no_redundant_warning = TRUE;
continue; /* not "break" */
}
{
STRLEN need, have, gap;
+ STRLEN i;
+ char *s;
/* signed value that's wrapped? */
assert(elen <= ((~(STRLEN)0) >> 1));
- /* Most of these length vars can range to any value if
- * supplied with a hostile format and/or args. So check every
- * addition for possible overflow. In reality some of these
- * values are interdependent so these checks are slightly
- * redundant. But its easier to be certain this way.
- */
-
- have = elen;
-
- if (have >= (((STRLEN)~0) - zeros))
- croak_memory_wrap();
- have += zeros;
+ /* if zeros is non-zero, then it represents filler between
+ * elen and precis. So adding elen and zeros together will
+ * always be <= precis, and the addition can never wrap */
+ assert(!zeros || (precis > elen && precis - elen == zeros));
+ have = elen + zeros;
if (have >= (((STRLEN)~0) - esignlen))
croak_memory_wrap();
need = (have > width ? have : width);
gap = need - have;
- if (need >= (((STRLEN)~0) - dotstrlen))
- croak_memory_wrap();
- need += dotstrlen;
-
if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1)))
croak_memory_wrap();
need += (SvCUR(sv) + 1);
SvGROW(sv, need);
- p = SvEND(sv);
- if (esignlen && fill == '0') {
- int i;
- for (i = 0; i < (int)esignlen; i++)
- *p++ = esignbuf[i];
- }
- if (gap && !left) {
- memset(p, fill, gap);
- p += gap;
- }
- if (esignlen && fill != '0') {
- int i;
- for (i = 0; i < (int)esignlen; i++)
- *p++ = esignbuf[i];
- }
- if (zeros) {
- int i;
+ s = SvEND(sv);
+
+ if (left) {
+ for (i = 0; i < esignlen; i++)
+ *s++ = esignbuf[i];
for (i = zeros; i; i--)
- *p++ = '0';
+ *s++ = '0';
+ Copy(eptr, s, elen, char);
+ s += elen;
+ for (i = gap; i; i--)
+ *s++ = ' ';
}
- if (elen) {
- Copy(eptr, p, elen, char);
- p += elen;
- }
- if (gap && left) {
- memset(p, ' ', gap);
- p += gap;
- }
- if (vectorize) {
- if (veclen) {
- Copy(dotstr, p, dotstrlen, char);
- p += dotstrlen;
+ else {
+ if (fill) {
+ for (i = 0; i < esignlen; i++)
+ *s++ = esignbuf[i];
+ assert(!zeros);
+ zeros = gap;
}
- else
- vectorize = FALSE; /* done iterating over vecstr */
+ else {
+ for (i = gap; i; i--)
+ *s++ = ' ';
+ for (i = 0; i < esignlen; i++)
+ *s++ = esignbuf[i];
+ }
+
+ for (i = zeros; i; i--)
+ *s++ = '0';
+ Copy(eptr, s, elen, char);
+ s += elen;
}
+
+ *s = '\0';
+ SvCUR_set(sv, s - SvPVX_const(sv));
+
if (is_utf8)
has_utf8 = TRUE;
if (has_utf8)
SvUTF8_on(sv);
- *p = '\0';
- SvCUR_set(sv, p - SvPVX_const(sv));
}
- if (vectorize) {
- esignlen = 0;
- goto vector;
+ if (vectorize && veclen) {
+ /* we append the vector separator separately since %v isn't
+ * very common: don't slow down the general case by adding
+ * dotstrlen to need etc */
+ sv_catpvn_nomg(sv, dotstr, dotstrlen);
+ esignlen = 0;
+ goto vector; /* do next iteration */
}
- donevalidconversion:
- if (used_explicit_ix)
- no_redundant_warning = TRUE;
+ done_valid_conversion:
+
if (arg_missing)
S_warn_vcatpvfn_missing_argument(aTHX);
}
/* Now that we've consumed all our printf format arguments (svix)
* do we have things left on the stack that we didn't use?
*/
- if (!no_redundant_warning && svmax >= svix + 1 && ckWARN(WARN_REDUNDANT)) {
+ if (!no_redundant_warning && sv_count >= svix + 1 && ckWARN(WARN_REDUNDANT)) {
Perl_warner(aTHX_ packWARN(WARN_REDUNDANT), "Redundant argument in %s",
PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
}