void
Perl_sv_vsetpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted)
+ va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted)
{
PERL_ARGS_ASSERT_SV_VSETPVFN;
}
-STATIC I32
+static void
+S_croak_overflow()
+{
+ dTHX;
+ Perl_croak(aTHX_ "Integer overflow in format string for %s",
+ (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn"));
+}
+
+
+/* Given an int i from the next arg (if args is true) or an sv from an arg
+ * (if args is false), try to extract a STRLEN-ranged value from the arg,
+ * with overflow checking.
+ * Sets *neg to true if the value was negative (untouched otherwise.
+ * Returns the absolute value.
+ * As an extra margin of safety, it croaks if the returned value would
+ * exceed the maximum value of a STRLEN / 4.
+ */
+
+static STRLEN
+S_sprintf_arg_num_val(pTHX_ va_list *const args, int i, SV *sv, bool *neg)
+{
+ IV iv;
+
+ if (args) {
+ iv = i;
+ goto do_iv;
+ }
+
+ if (!sv)
+ return 0;
+
+ SvGETMAGIC(sv);
+
+ if (UNLIKELY(SvIsUV(sv))) {
+ UV uv = SvUV_nomg(sv);
+ if (uv > IV_MAX)
+ S_croak_overflow();
+ iv = uv;
+ }
+ else {
+ iv = SvIV_nomg(sv);
+ do_iv:
+ if (iv < 0) {
+ if (iv < -IV_MAX)
+ S_croak_overflow();
+ iv = -iv;
+ *neg = TRUE;
+ }
+ }
+
+ if (iv > (IV)(((STRLEN)~0) / 4))
+ S_croak_overflow();
+
+ return (STRLEN)iv;
+}
+
+
+/* Returns true if c is in the range '1'..'9'
+ * Written with the cast so it only needs one conditional test
+ */
+#define IS_1_TO_9(c) ((U8)(c - '1') <= 8)
+
+/* Read in and return a number. Updates *pattern to point to the char
+ * following the number. Expects the first char to 1..9.
+ * Croaks if the number exceeds 1/4 of the maximum value of STRLEN.
+ * This is a belt-and-braces safety measure to complement any
+ * overflow/wrap checks done in the main body of sv_vcatpvfn_flags.
+ * It means that e.g. on a 32-bit system the width/precision can't be more
+ * than 1G, which seems reasonable.
+ */
+
+STATIC STRLEN
S_expect_number(pTHX_ char **const pattern)
{
- I32 var = 0;
+ STRLEN var;
PERL_ARGS_ASSERT_EXPECT_NUMBER;
- switch (**pattern) {
- case '1': case '2': case '3':
- case '4': case '5': case '6':
- case '7': case '8': case '9':
- var = *(*pattern)++ - '0';
- while (isDIGIT(**pattern)) {
- const I32 tmp = var * 10 + (*(*pattern)++ - '0');
- if (tmp < var)
- Perl_croak(aTHX_ "Integer overflow in format string for %s", (PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn"));
- var = tmp;
- }
+ assert(IS_1_TO_9(**pattern));
+
+ var = *(*pattern)++ - '0';
+ while (isDIGIT(**pattern)) {
+ /* if var * 10 + 9 would exceed 1/4 max strlen, croak */
+ if (var > ((((STRLEN)~0) / 4 - 9) / 10))
+ S_croak_overflow();
+ var = var * 10 + (*(*pattern)++ - '0');
}
return var;
}
}
-#define VECTORIZE_ARGS vecsv = va_arg(*args, SV*);\
- vecstr = (U8*)SvPV_const(vecsv,veclen);\
- vec_utf8 = DO_UTF8(vecsv);
-
/* XXX maybe_tainted is never assigned to, so the doc above is lying. */
void
Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted)
+ va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted)
{
PERL_ARGS_ASSERT_SV_VCATPVFN;
const U8* nvp = (const U8*)(&nv);
HEXTRACT_GET_SUBNORMAL(nv);
HEXTRACT_IMPLICIT_BIT(nv);
-# undef HEXTRACT_HAS_TOP_NYBBLE
+# undef HEXTRACT_HAS_TOP_NYBBLE
HEXTRACT_BYTES_LE(13, 0);
# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN
/* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L:
const U8* nvp = (const U8*)(&nv);
HEXTRACT_GET_SUBNORMAL(nv);
HEXTRACT_IMPLICIT_BIT(nv);
-# undef HEXTRACT_HAS_TOP_NYBBLE
+# undef HEXTRACT_HAS_TOP_NYBBLE
HEXTRACT_BYTES_BE(2, 15);
# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
/* x86 80-bit "extended precision", 64 bits of mantissa / fraction /
# define HEXTRACT_FALLBACK
# endif
#endif /* #if defined(USE_LONG_DOUBLE) && (NVSIZE > DOUBLESIZE) #else */
-# ifdef HEXTRACT_FALLBACK
+
+#ifdef HEXTRACT_FALLBACK
HEXTRACT_GET_SUBNORMAL(nv);
-# undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */
+# undef HEXTRACT_HAS_TOP_NYBBLE /* Meaningless, but consistent. */
/* The fallback is used for the double-double format, and
* for unknown long double formats, and for unknown double
* formats, or in general unknown NV formats. */
v++;
}
}
-# endif
+#endif
}
/* Croak for various reasons: if the output pointer escaped the
* output buffer, if the extraction index escaped the extraction
S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
const NV nv, const vcatpvfn_long_double_t fv,
bool has_precis, STRLEN precis, STRLEN width,
- bool alt, char plus, bool left, char fill)
+ bool alt, char plus, bool left, bool fill)
{
/* Hexadecimal floating point. */
char* p = buf;
* be mapped through the xdig to get the actual
* human-readable xdigits. */
const char* xdig = PL_hexdigit;
- int zerotail = 0; /* how many extra zeros to append */
+ STRLEN zerotail = 0; /* how many extra zeros to append */
int exponent = 0; /* exponent of the floating point input */
bool hexradix = FALSE; /* should we output the radix */
bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
/* Pad the back with spaces. */
memset(buf + elen, ' ', gap);
}
- else if (fill == '0') {
+ else if (fill) {
/* Insert the zeros after the "0x" and the
* the potential sign, but before the digits,
* otherwise we end up with "0000xH.HHH...",
nmove--;
}
Move(zerox, zerox + nzero, nmove, char);
- memset(zerox, fill, nzero);
+ memset(zerox, fill ? '0' : ' ', nzero);
}
else {
/* Move it to the right. */
}
-/* Helper for sv_vcatpvfn_flags(). */
-#define FETCH_VCATPVFN_ARGUMENT(var, in_range, expr) \
- STMT_START { \
- if (in_range) \
- (var) = (expr); \
- else { \
- (var) = &PL_sv_no; /* [perl #71000] */ \
- arg_missing = TRUE; \
- } \
- } STMT_END
-
-void
-
-
/*
=for apidoc sv_vcatpvfn
*/
+void
Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
- va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted,
+ va_list *const args, SV **const svargs, const Size_t svmax, bool *const maybe_tainted,
const U32 flags)
{
char *p;
char *q;
const char *patend;
STRLEN origlen;
- I32 svix = 0;
+ Size_t svix = 0;
static const char nullstr[] = "(null)";
SV *argsv = NULL;
bool has_utf8 = DO_UTF8(sv); /* has the result utf8? */
char intsize = 0; /* size qualifier in "%hi..." etc */
bool alt = FALSE; /* has "%#..." */
bool left = FALSE; /* has "%-..." */
- char fill = ' '; /* has "%0..." */
+ bool fill = FALSE; /* has "%0..." */
char plus = 0; /* has "%+..." */
STRLEN width = 0; /* value of "%NNN..." */
bool has_precis = FALSE; /* has "%.NNN..." */
STRLEN precis = 0; /* value of "%.NNN..." */
- bool asterisk = FALSE; /* has "%*..." */
bool used_explicit_ix = FALSE;/* has "%$n..." */
- unsigned base = 0; /* base to print in, e.g. 8 for %o */
+ int base = 0; /* base to print in, e.g. 8 for %o */
UV uv = 0; /* the value to print of int-ish args */
- IV iv = 0; /* ditto for signed types */
bool vectorize = FALSE; /* has "%v..." */
- bool vectorarg = FALSE; /* has "%*v..." */
- SV *vecsv = NULL; /* the cur arg for %v */
- bool vec_utf8 = FALSE; /* SvUTF8(vecsv) */
- const U8 *vecstr = NULL; /* SvPVX(vecsv) */
- STRLEN veclen = 0; /* SvCUR(vecsv) */
- const char *dotstr = "."; /* separator string for %v */
- STRLEN dotstrlen = 1; /* length of separator string for %v */
-
- I32 efix = 0; /* explicit format parameter index */
- I32 ewix = 0; /* explicit width index */
- I32 epix = 0; /* explicit precision index */
- I32 evix = 0; /* explicit vector index */
- const I32 osvix = svix; /* original index in case of bad fmt */
+ bool vec_utf8 = FALSE; /* SvUTF8(vec arg) */
+ const U8 *vecstr = NULL; /* SvPVX(vec arg) */
+ STRLEN veclen = 0; /* SvCUR(vec arg) */
+ const char *dotstr = NULL; /* separator string for %v */
+ STRLEN dotstrlen; /* length of separator string for %v */
+
+ Size_t efix = 0; /* explicit format parameter index */
+ const Size_t osvix = svix; /* original index in case of bad fmt */
bool is_utf8 = FALSE; /* is this item utf8? */
bool arg_missing = FALSE; /* give "Missing argument" warning */
STRLEN elen = 0; /* the length of the element string */
const char *fmtstart; /* start of current format (the '%') */
- char c = 0; /* current character read from format */
+ char c; /* the actual format ('d', s' etc) */
/* echo everything up to the next format specification */
[%bcdefginopsuxDFOUX] format (mandatory)
*/
- if (args) {
-/*
- As of perl5.9.3, printf format checking is on by default.
- Internally, perl uses %p formats to provide an escape to
- some extended formatting. This block deals with those
- extensions: if it does not match, (char*)q is reset and
- the normal format processing code is used.
-
- Currently defined extensions are:
- %p include pointer address (standard)
- %-p (SVf) include an SV (previously %_)
- %-<num>p include an SV with precision <num>
- %2p include a HEK
- %3p include a HEK with precision of 256
- %4p char* preceded by utf8 flag and length
- %<num>p (where num is 1 or > 4) reserved for future
- extensions
-
- Robin Barker 2005-07-14 (but modified since)
-
- %1p (VDf) removed. RMB 2007-10-19
-*/
- char* r = q;
- bool sv = FALSE;
- STRLEN n = 0;
- if (*q == '-')
- sv = *q++;
- else if (strnEQ(q, UTF8f, sizeof(UTF8f)-1)) { /* UTF8f */
- /* The argument has already gone through cBOOL, so the cast
- is safe. */
- is_utf8 = (bool)va_arg(*args, int);
- elen = va_arg(*args, UV);
- /* if utf8 length is larger than 0x7ffff..., then it might
- * have been a signed value that wrapped */
- if (elen > ((~(STRLEN)0) >> 1)) {
- assert(0); /* in DEBUGGING build we want to crash */
- elen= 0; /* otherwise we want to treat this as an empty string */
- }
- eptr = va_arg(*args, char *);
- q += sizeof(UTF8f)-1;
- goto string;
- }
- n = expect_number(&q);
- if (*q++ == 'p') {
- if (sv) { /* SVf */
- if (n) {
- precis = n;
- has_precis = TRUE;
- }
- argsv = MUTABLE_SV(va_arg(*args, void*));
- eptr = SvPV_const(argsv, elen);
- if (DO_UTF8(argsv))
- is_utf8 = TRUE;
- goto string;
- }
- else if (n==2 || n==3) { /* HEKf */
- HEK * const hek = va_arg(*args, HEK *);
- eptr = HEK_KEY(hek);
- elen = HEK_LEN(hek);
- if (HEK_UTF8(hek)) is_utf8 = TRUE;
- if (n==3) precis = 256, has_precis = TRUE;
- goto string;
- }
- else if (n) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
- "internal %%<num>p might conflict with future printf extensions");
- }
- }
- q = r;
- }
-
- if ( (width = expect_number(&q)) ) {
+ if (IS_1_TO_9(*q)) {
+ width = expect_number(&q);
if (*q == '$') {
if (args)
Perl_croak_nocontext(
"Cannot yet reorder sv_catpvfn() arguments from va_list");
++q;
- efix = width;
+ efix = (Size_t)width;
+ width = 0;
used_explicit_ix = TRUE;
} else {
goto gotwidth;
continue;
case '0':
- fill = *q++;
+ fill = TRUE;
+ q++;
continue;
case '#':
break;
}
+ /* at this point we can expect one of:
+ *
+ * 123 an explicit width
+ * * width taken from next arg
+ * *12$ width taken from 12th arg
+ * or no width
+ *
+ * But any width specification may be preceded by a v, in one of its
+ * forms:
+ * v
+ * *v
+ * *12$v
+ * So an asterisk may be either a width specifier or a vector
+ * separator arg specifier, and we don't know which initially
+ */
+
tryasterisk:
if (*q == '*') {
+ STRLEN ix; /* explicit width/vector separator index */
q++;
- if ( (ewix = expect_number(&q)) ) {
+ if (IS_1_TO_9(*q)) {
+ ix = expect_number(&q);
if (*q++ == '$') {
if (args)
Perl_croak_nocontext(
} else
goto unknown;
}
- asterisk = TRUE;
- }
- if (*q == 'v') {
+ else
+ ix = 0;
+
+ if (*q == 'v') {
+ SV *vecsv;
+ /* The asterisk was for *v, *NNN$v: vectorizing, but not
+ * with the default "." */
+ q++;
+ if (vectorize)
+ goto unknown;
+ if (args)
+ vecsv = va_arg(*args, SV*);
+ else {
+ ix = ix ? ix - 1 : svix++;
+ vecsv = ix < svmax ? svargs[ix]
+ : (arg_missing = TRUE, &PL_sv_no);
+ }
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ /* Keep the DO_UTF8 test *after* the SvPV call, else things go
+ bad with tied or overloaded values that return UTF8. */
+ if (DO_UTF8(vecsv))
+ is_utf8 = TRUE;
+ else if (has_utf8) {
+ vecsv = sv_mortalcopy(vecsv);
+ sv_utf8_upgrade(vecsv);
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ is_utf8 = TRUE;
+ }
+ vectorize = TRUE;
+ goto tryasterisk;
+ }
+
+ /* the asterisk specified a width */
+ {
+ int i = 0;
+ SV *sv = NULL;
+ if (args)
+ i = va_arg(*args, int);
+ else {
+ ix = ix ? ix - 1 : svix++;
+ sv = (ix < svmax) ? svargs[ix]
+ : (arg_missing = TRUE, (SV*)NULL);
+ }
+ width = S_sprintf_arg_num_val(aTHX_ args, i, sv, &left);
+ }
+ }
+ else if (*q == 'v') {
q++;
if (vectorize)
goto unknown;
- if ((vectorarg = asterisk)) {
- evix = ewix;
- ewix = 0;
- asterisk = FALSE;
- }
vectorize = TRUE;
- goto tryasterisk;
- }
+ dotstr = ".";
+ dotstrlen = 1;
+ goto tryasterisk;
- if (!asterisk)
- {
- if( *q == '0' )
- fill = *q++;
- width = expect_number(&q);
- }
-
- if (vectorize && vectorarg) {
- /* vectorizing, but not with the default "." */
- if (args)
- vecsv = va_arg(*args, SV*);
- else if (evix) {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
- } else {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, svix < svmax, svargs[svix++]);
- }
- dotstr = SvPV_const(vecsv, dotstrlen);
- /* Keep the DO_UTF8 test *after* the SvPV call, else things go
- bad with tied or overloaded values that return UTF8. */
- if (DO_UTF8(vecsv))
- is_utf8 = TRUE;
- else if (has_utf8) {
- vecsv = sv_mortalcopy(vecsv);
- sv_utf8_upgrade(vecsv);
- dotstr = SvPV_const(vecsv, dotstrlen);
- is_utf8 = TRUE;
- }
+ }
+ else {
+ /* explicit width? */
+ if(*q == '0') {
+ fill = TRUE;
+ q++;
+ }
+ if (IS_1_TO_9(*q))
+ width = expect_number(&q);
}
- if (asterisk) {
- int i;
- if (args)
- i = va_arg(*args, int);
- else
- i = (ewix ? ewix <= svmax : svix < svmax) ?
- SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0;
- left |= (i < 0);
- width = (i < 0) ? -i : i;
- }
gotwidth:
/* PRECISION */
if (*q == '.') {
q++;
if (*q == '*') {
- int i;
+ STRLEN ix; /* explicit precision index */
q++;
- if ( (epix = expect_number(&q)) ) {
+ if (IS_1_TO_9(*q)) {
+ ix = expect_number(&q);
if (*q++ == '$') {
if (args)
Perl_croak_nocontext(
} else
goto unknown;
}
- if (args)
- i = va_arg(*args, int);
- else {
- SV *precsv;
- if (epix)
- FETCH_VCATPVFN_ARGUMENT(
- precsv, epix > 0 && epix <= svmax, svargs[epix-1]);
- else
- FETCH_VCATPVFN_ARGUMENT(
- precsv, svix < svmax, svargs[svix++]);
- i = precsv == &PL_sv_no ? 0 : SvIVx(precsv);
+ else
+ ix = 0;
+
+ {
+ int i = 0;
+ SV *sv = NULL;
+ bool neg = FALSE;
+
+ if (args)
+ i = va_arg(*args, int);
+ else {
+ ix = ix ? ix - 1 : svix++;
+ sv = (ix < svmax) ? svargs[ix]
+ : (arg_missing = TRUE, (SV*)NULL);
+ }
+ precis = S_sprintf_arg_num_val(aTHX_ args, i, sv, &neg);
+ has_precis = !neg;
}
- precis = i;
- has_precis = !(i < 0);
}
else {
- precis = 0;
- while (isDIGIT(*q))
- precis = precis * 10 + (*q++ - '0');
+ /* although it doesn't seem documented, this code has long
+ * behaved so that:
+ * no digits following the '.' is treated like '.0'
+ * the number may be preceded by any number of zeroes,
+ * e.g. "%.0001f", which is the same as "%.1f"
+ * so I've kept that behaviour. DAPM May 2017
+ */
+ while (*q == '0')
+ q++;
+ precis = IS_1_TO_9(*q) ? expect_number(&q) : 0;
has_precis = TRUE;
}
}
- if (vectorize) {
- if (args) {
- VECTORIZE_ARGS
- }
- else if (efix ? (efix > 0 && efix <= svmax) : svix < svmax) {
- vecsv = svargs[efix ? efix-1 : svix++];
- vecstr = (U8*)SvPV_const(vecsv,veclen);
- vec_utf8 = DO_UTF8(vecsv);
-
- /* if this is a version object, we need to convert
- * back into v-string notation and then let the
- * vectorize happen normally
- */
- if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) {
- if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF),
- "vector argument not supported with alpha versions");
- goto vdblank;
- }
- vecsv = sv_newmortal();
- scan_vstring((char *)vecstr, (char *)vecstr + veclen,
- vecsv);
- vecstr = (U8*)SvPV_const(vecsv, veclen);
- vec_utf8 = DO_UTF8(vecsv);
- }
- }
- else {
- vdblank:
- vecstr = (U8*)"";
- veclen = 0;
- }
- }
-
/* SIZE */
switch (*q) {
/* CONVERSION */
- if (*q == '%') {
- eptr = q++;
+ c = *q++; /* c now holds the conversion type */
+
+ /* '%' doesn't have an arg, so skip arg processing */
+ if (c == '%') {
+ eptr = q - 1;
elen = 1;
- if (vectorize) {
- c = '%';
+ if (vectorize)
goto unknown;
- }
goto string;
}
- if (!vectorize && !args) {
- if (efix) {
- const I32 i = efix-1;
- FETCH_VCATPVFN_ARGUMENT(argsv, i >= 0 && i < svmax, svargs[i]);
- } else {
- FETCH_VCATPVFN_ARGUMENT(argsv, svix >= 0 && svix < svmax,
- svargs[svix++]);
- }
- }
+ if (vectorize && !strchr("BbDdiOouUXx", c))
+ goto unknown;
- c = *q++; /* c now holds the conversion type */
+ /* get next arg (individual branches do their own va_arg()
+ * handling for the args case) */
+
+ if (!args) {
+ efix = efix ? efix - 1 : svix++;
+ argsv = efix < svmax ? svargs[efix]
+ : (arg_missing = TRUE, &PL_sv_no);
+ }
- if (argsv && strchr("BbcDdiOopuUXx", c)) {
- /* XXX va_arg(*args) case? need peek, use va_copy? */
- SvGETMAGIC(argsv);
- if (UNLIKELY(SvAMAGIC(argsv)))
- argsv = sv_2num(argsv);
- if (UNLIKELY(isinfnansv(argsv)))
- goto handle_infnan_argsv;
- }
switch (c) {
/* STRINGS */
- case 'c':
- if (vectorize)
- goto unknown;
- uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv);
- if ((uv > 255 ||
- (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
- && !IN_BYTES)
- {
- assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
- eptr = ebuf;
- elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
- is_utf8 = TRUE;
- }
- else {
- c = (char)uv;
- eptr = &c;
- elen = 1;
- }
- goto string;
-
case 's':
- if (vectorize)
- goto unknown;
if (args) {
eptr = va_arg(*args, char*);
if (eptr)
/* INTEGERS */
case 'p':
- if (alt || vectorize)
+ if (alt)
goto unknown;
+
+ /* %p extensions:
+ *
+ * "%...p" is normally treated like "%...x", except that the
+ * number to print is the SV's address (or a pointer address
+ * for C-ish sprintf).
+ *
+ * However, the C-ish sprintf variant allows a few special
+ * extensions. These are currently:
+ *
+ * %-p (SVf) Like %s, but gets the string from an SV*
+ * arg rather than a char* arg.
+ * (This was previously %_).
+ *
+ * %-<num>p Ditto but like %.<num>s (i.e. num is max width)
+ *
+ * %2p (HEKf) Like %s, but using the key string in a HEK
+ *
+ * %3p (HEKf256) Ditto but like %.256s
+ *
+ * %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args:
+ * (cBOOL(utf8), len, string_buf).
+ * It's handled by the "case 'd'" branch
+ * rather than here.
+ *
+ * %<num>p where num is 1 or > 4: reserved for future
+ * extensions. Warns, but then is treated as a
+ * general %p (print hex address) format.
+ */
+
+ if ( args
+ && !intsize
+ && !fill
+ && !plus
+ && !has_precis
+ /* not %*p or %*1$p - any width was explicit */
+ && q[-2] != '*'
+ && q[-2] != '$'
+ && !used_explicit_ix
+ ) {
+ if (left) { /* %-p (SVf), %-NNNp */
+ if (width) {
+ precis = width;
+ has_precis = TRUE;
+ }
+ argsv = MUTABLE_SV(va_arg(*args, void*));
+ eptr = SvPV_const(argsv, elen);
+ if (DO_UTF8(argsv))
+ is_utf8 = TRUE;
+ width = 0;
+ goto string;
+ }
+ else if (width == 2 || width == 3) { /* HEKf, HEKf256 */
+ HEK * const hek = va_arg(*args, HEK *);
+ eptr = HEK_KEY(hek);
+ elen = HEK_LEN(hek);
+ if (HEK_UTF8(hek))
+ is_utf8 = TRUE;
+ if (width == 3) {
+ precis = 256;
+ has_precis = TRUE;
+ }
+ width = 0;
+ goto string;
+ }
+ else if (width) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+ "internal %%<num>p might conflict with future printf extensions");
+ }
+ }
+
+ /* treat as normal %...p */
+
uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
base = 16;
- goto integer;
+ goto do_integer;
+
+ case 'c':
+ /* Ignore any size specifiers, since they're not documented as
+ * being allowed for %c (ideally we should warn on e.g. '%hc').
+ * Setting a default intsize, along with a positive
+ * (which signals unsigned) base, causes, for C-ish use, the
+ * va_arg to be interpreted as as unsigned int, when it's
+ * actually signed, which will convert -ve values to high +ve
+ * values. Note that unlike the libc %c, values > 255 will
+ * convert to high unicode points rather than being truncated
+ * to 8 bits. For perlish use, it will do SvUV(argsv), which
+ * will again convert -ve args to high -ve values.
+ */
+ intsize = 0;
+ base = 1; /* special value that indicates we're doing a 'c' */
+ goto get_int_arg_val;
case 'D':
#ifdef IV_IS_QUAD
#else
intsize = 'l';
#endif
- /* FALLTHROUGH */
+ base = -10;
+ goto get_int_arg_val;
+
case 'd':
- case 'i':
- if (vectorize) {
- STRLEN ulen;
- if (!veclen)
- goto donevalidconversion;
- if (vec_utf8)
- uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
- UTF8_ALLOW_ANYUV);
- else {
- uv = *vecstr;
- ulen = 1;
- }
- vecstr += ulen;
- veclen -= ulen;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else if (args) {
- switch (intsize) {
- case 'c': iv = (char)va_arg(*args, int); break;
- case 'h': iv = (short)va_arg(*args, int); break;
- case 'l': iv = va_arg(*args, long); break;
- case 'V': iv = va_arg(*args, IV); break;
- case 'z': iv = va_arg(*args, SSize_t); break;
-#ifdef HAS_PTRDIFF_T
- case 't': iv = va_arg(*args, ptrdiff_t); break;
-#endif
- default: iv = va_arg(*args, int); break;
-#ifdef I_STDINT
- case 'j': iv = va_arg(*args, intmax_t); break;
-#endif
- case 'q':
-#if IVSIZE >= 8
- iv = va_arg(*args, Quad_t); break;
-#else
- goto unknown;
-#endif
- }
- }
- else {
- IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': iv = (char)tiv; break;
- case 'h': iv = (short)tiv; break;
- case 'l': iv = (long)tiv; break;
- case 'V':
- default: iv = tiv; break;
- case 'q':
-#if IVSIZE >= 8
- iv = (Quad_t)tiv; break;
-#else
- goto unknown;
-#endif
- }
- }
- if ( !vectorize ) /* we already set uv above */
- {
- if (iv >= 0) {
- uv = iv;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else {
- uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
- esignbuf[esignlen++] = '-';
- }
+ /* probably just a plain %d, but it might be the start of the
+ * special UTF8f format, which usually looks something like
+ * "%d%lu%4p" (the lu may vary by platform)
+ */
+ assert((UTF8f)[0] == 'd');
+ assert((UTF8f)[1] == '%');
+
+ if ( args /* UTF8f only valid for C-ish sprintf */
+ && q == fmtstart + 1 /* plain %d, not %....d */
+ && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
+ && *q == '%'
+ && strnEQ(q + 1, UTF8f + 2, sizeof(UTF8f) - 3))
+ {
+ /* The argument has already gone through cBOOL, so the cast
+ is safe. */
+ is_utf8 = (bool)va_arg(*args, int);
+ elen = va_arg(*args, UV);
+ /* if utf8 length is larger than 0x7ffff..., then it might
+ * have been a signed value that wrapped */
+ if (elen > ((~(STRLEN)0) >> 1)) {
+ assert(0); /* in DEBUGGING build we want to crash */
+ elen = 0; /* otherwise we want to treat this as an empty string */
+ }
+ eptr = va_arg(*args, char *);
+ q += sizeof(UTF8f) - 2;
+ goto string;
}
- base = 10;
- goto integer;
+
+ /* FALLTHROUGH */
+ case 'i':
+ base = -10;
+ goto get_int_arg_val;
case 'U':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'u':
base = 10;
- goto uns_integer;
+ goto get_int_arg_val;
case 'B':
case 'b':
base = 2;
- goto uns_integer;
+ goto get_int_arg_val;
case 'O':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'o':
base = 8;
- goto uns_integer;
+ goto get_int_arg_val;
case 'X':
case 'x':
base = 16;
- uns_integer:
+ get_int_arg_val:
+
if (vectorize) {
STRLEN ulen;
- vector:
+ SV *vecsv;
+
+ if (base < 0) {
+ base = -base;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+
+ /* initialise the vector string to iterate over */
+
+ vecsv = args ? va_arg(*args, SV*) : argsv;
+
+ /* if this is a version object, we need to convert
+ * back into v-string notation and then let the
+ * vectorize happen normally
+ */
+ if (sv_isobject(vecsv) && sv_derived_from(vecsv, "version")) {
+ if ( hv_existss(MUTABLE_HV(SvRV(vecsv)), "alpha") ) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_PRINTF),
+ "vector argument not supported with alpha versions");
+ vecsv = &PL_sv_no;
+ }
+ else {
+ vecstr = (U8*)SvPV_const(vecsv,veclen);
+ vecsv = sv_newmortal();
+ scan_vstring((char *)vecstr, (char *)vecstr + veclen,
+ vecsv);
+ }
+ }
+ vecstr = (U8*)SvPV_const(vecsv, veclen);
+ vec_utf8 = DO_UTF8(vecsv);
+
+ /* This is the re-entry point for when we're iterating
+ * over the individual characters of a vector arg */
+ vector:
if (!veclen)
goto donevalidconversion;
if (vec_utf8)
vecstr += ulen;
veclen -= ulen;
}
- else if (args) {
- switch (intsize) {
- case 'c': uv = (unsigned char)va_arg(*args, unsigned); break;
- case 'h': uv = (unsigned short)va_arg(*args, unsigned); break;
- case 'l': uv = va_arg(*args, unsigned long); break;
- case 'V': uv = va_arg(*args, UV); break;
- case 'z': uv = va_arg(*args, Size_t); break;
+ else {
+ /* test arg for inf/nan. This can trigger an unwanted
+ * 'str' overload, so manually force 'num' overload first
+ * if necessary */
+ if (argsv) {
+ SvGETMAGIC(argsv);
+ if (UNLIKELY(SvAMAGIC(argsv)))
+ argsv = sv_2num(argsv);
+ if (UNLIKELY(isinfnansv(argsv)))
+ goto handle_infnan_argsv;
+ }
+
+ if (base < 0) {
+ /* signed int type */
+ IV iv;
+ base = -base;
+ if (args) {
+ switch (intsize) {
+ case 'c': iv = (char)va_arg(*args, int); break;
+ case 'h': iv = (short)va_arg(*args, int); break;
+ case 'l': iv = va_arg(*args, long); break;
+ case 'V': iv = va_arg(*args, IV); break;
+ case 'z': iv = va_arg(*args, SSize_t); break;
#ifdef HAS_PTRDIFF_T
- case 't': uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
+ case 't': iv = va_arg(*args, ptrdiff_t); break;
#endif
+ default: iv = va_arg(*args, int); break;
#ifdef I_STDINT
- case 'j': uv = va_arg(*args, uintmax_t); break;
+ case 'j': iv = va_arg(*args, intmax_t); break;
#endif
- default: uv = va_arg(*args, unsigned); break;
- case 'q':
+ case 'q':
#if IVSIZE >= 8
- uv = va_arg(*args, Uquad_t); break;
+ iv = va_arg(*args, Quad_t); break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
- else {
- UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': uv = (unsigned char)tuv; break;
- case 'h': uv = (unsigned short)tuv; break;
- case 'l': uv = (unsigned long)tuv; break;
- case 'V':
- default: uv = tuv; break;
- case 'q':
+ }
+ }
+ else {
+ IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
+ switch (intsize) {
+ case 'c': iv = (char)tiv; break;
+ case 'h': iv = (short)tiv; break;
+ case 'l': iv = (long)tiv; break;
+ case 'V':
+ default: iv = tiv; break;
+ case 'q':
#if IVSIZE >= 8
- uv = (Uquad_t)tuv; break;
+ iv = (Quad_t)tiv; break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
+ }
+ }
- integer:
+ /* now convert iv to uv */
+ if (iv >= 0) {
+ uv = iv;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+ else {
+ uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
+ esignbuf[esignlen++] = '-';
+ }
+ }
+ else {
+ /* unsigned int type */
+ if (args) {
+ switch (intsize) {
+ case 'c': uv = (unsigned char)va_arg(*args, unsigned);
+ break;
+ case 'h': uv = (unsigned short)va_arg(*args, unsigned);
+ break;
+ case 'l': uv = va_arg(*args, unsigned long); break;
+ case 'V': uv = va_arg(*args, UV); break;
+ case 'z': uv = va_arg(*args, Size_t); break;
+#ifdef HAS_PTRDIFF_T
+ /* will sign extend, but there is no
+ * uptrdiff_t, so oh well */
+ case 't': uv = va_arg(*args, ptrdiff_t); break;
+#endif
+#ifdef I_STDINT
+ case 'j': uv = va_arg(*args, uintmax_t); break;
+#endif
+ default: uv = va_arg(*args, unsigned); break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = va_arg(*args, Uquad_t); break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ else {
+ UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
+ switch (intsize) {
+ case 'c': uv = (unsigned char)tuv; break;
+ case 'h': uv = (unsigned short)tuv; break;
+ case 'l': uv = (unsigned long)tuv; break;
+ case 'V':
+ default: uv = tuv; break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = (Uquad_t)tuv; break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ }
+ }
+
+ do_integer:
{
char *ptr = ebuf + sizeof ebuf;
bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */
} while (uv >>= 1);
if (tempalt) {
esignbuf[esignlen++] = '0';
- esignbuf[esignlen++] = c;
+ esignbuf[esignlen++] = c; /* 'b' or 'B' */
}
break;
+
+ case 1:
+ /* special-case: base 1 indicates a 'c' format:
+ * we use the common code for extracting a uv,
+ * but handle that value differently here than
+ * all the other int types */
+ if ((uv > 255 ||
+ (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
+ && !IN_BYTES)
+ {
+ assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
+ eptr = ebuf;
+ elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+ is_utf8 = TRUE;
+ }
+ else {
+ eptr = ebuf;
+ ebuf[0] = (char)uv;
+ elen = 1;
+ }
+ goto string;
+
default: /* it had better be ten or less */
do {
dig = uv % base;
&& !(base == 8 && alt)) /* "%#.0o" prints "0" */
elen = 0;
- /* a precision nullifies the 0 flag. */
- if (fill == '0')
- fill = ' ';
+ /* a precision nullifies the 0 flag. */
+ fill = FALSE;
}
}
break;
vcatpvfn_long_double_t fv;
NV nv;
- if (vectorize)
- goto unknown;
-
/* This is evil, but floating point is even more evil */
/* for SV-style calling, we can only get NV
&& !precis
&& has_precis
&& !(width || left || plus || alt)
- && fill != '0'
+ && !fill
&& intsize != 'q'
&& ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
)
* First, here are the constant bits. For ease of calculation
* we over-estimate the needed buffer size, for example by
* assuming all formats have an exponent and a leading 0x1.
+ *
+ * Also for production use, add a little extra overhead for
+ * safety's sake. Under debugging don't, as it means we're more
+ * more likely to quickly spot issues during development.
*/
float_need = 1 /* possible unary minus */
+ 4 /* "0x1" plus very unlikely carry */
+ 2 /* "e-", "p+" etc */
+ 6 /* exponent: up to 16383 (quad fp) */
+#ifndef DEBUGGING
+ + 20 /* safety net */
+#endif
+ 1; /* \0 */
if (i > 0) {
digits = BIT_DIGITS(i);
- if (float_need >= ((STRLEN)~0) - digits)
- croak_memory_wrap();
+ /* this can't overflow. 'digits' will only be a few
+ * thousand even for the largest floating-point types.
+ * And up until now float_need is just some small
+ * constants plus radix_len, which can't be in
+ * overflow territory unless the radix SV is consuming
+ * over 1/2 the address space */
+ assert(float_need < ((STRLEN)~0) - digits);
float_need += digits;
}
}
#else
NVSIZE * 2; /* 2 hexdigits for each byte */
#endif
- if (float_need >= ((STRLEN)~0) - digits)
- croak_memory_wrap();
+ /* see "this can't overflow" comment above */
+ assert(float_need < ((STRLEN)~0) - digits);
float_need += digits;
}
}
&& float_need < sizeof(ebuf)
&& sizeof(ebuf) - float_need > precis
&& !(width || left || plus || alt)
- && fill != '0'
+ && !fill
&& intsize != 'q'
) {
SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis);
if (float_need < width)
float_need = width;
-/* We should have correctly calculated (or indeed over-estimated) the
- * buffer size, but you never know what strange floating-point systems
- * there are out there. So for production use, add a little extra overhead.
- * Under debugging don't, as it means we more more likely to quickly spot
- * issues during development.
- */
-#ifndef DEBUGGING
- if (float_need >= ((STRLEN)~0) - 20)
- croak_memory_wrap();
- float_need += 20; /* safety fudge factor */
-#endif
-
if (PL_efloatsize < float_need) {
Safefree(PL_efloatbuf);
PL_efloatsize = float_need;
base = width;
do { *--ptr = '0' + (base % 10); } while (base /= 10);
}
- if (fill == '0')
- *--ptr = fill;
+ if (fill)
+ *--ptr = '0';
if (left)
*--ptr = '-';
if (plus)
assert(!zeros);
assert(!esignlen);
- assert(!vectorize);
assert(elen);
assert(elen >= width);
case 'n':
{
- int i;
- if (vectorize)
- goto unknown;
- i = SvCUR(sv) - origlen;
+ STRLEN len;
+ /* XXX ideally we should warn if any flags etc have been
+ * set, e.g. "%-4.5n" */
+ /* XXX if sv was originally non-utf8 with a char in the
+ * range 0x80-0xff, then if it got upgraded, we should
+ * calculate char len rather than byte len here */
+ len = SvCUR(sv) - origlen;
if (args) {
+ int i = (len > PERL_INT_MAX) ? PERL_INT_MAX : (int)len;
+
switch (intsize) {
case 'c': *(va_arg(*args, char*)) = i; break;
case 'h': *(va_arg(*args, short*)) = i; break;
#endif
}
}
- else
- sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
+ else {
+ if (arg_missing)
+ Perl_croak_nocontext(
+ "Missing argument for %%n in %s",
+ PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
+ sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)len);
+ }
goto donevalidconversion;
}
need = (have > width ? have : width);
gap = need - have;
- if (need >= (((STRLEN)~0) - dotstrlen))
- croak_memory_wrap();
- need += dotstrlen;
-
if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1)))
croak_memory_wrap();
need += (SvCUR(sv) + 1);
SvGROW(sv, need);
p = SvEND(sv);
- if (esignlen && fill == '0') {
- int i;
- for (i = 0; i < (int)esignlen; i++)
+ if (esignlen && fill) {
+ STRLEN i;
+ for (i = 0; i < esignlen; i++)
*p++ = esignbuf[i];
}
if (gap && !left) {
- memset(p, fill, gap);
+ memset(p, (fill ? '0' : ' '), gap);
p += gap;
}
- if (esignlen && fill != '0') {
- int i;
- for (i = 0; i < (int)esignlen; i++)
+ if (esignlen && !fill) {
+ STRLEN i;
+ for (i = 0; i < esignlen; i++)
*p++ = esignbuf[i];
}
if (zeros) {
- int i;
+ STRLEN i;
for (i = zeros; i; i--)
*p++ = '0';
}
memset(p, ' ', gap);
p += gap;
}
- if (vectorize) {
- if (veclen) {
- Copy(dotstr, p, dotstrlen, char);
- p += dotstrlen;
- }
- else
- vectorize = FALSE; /* done iterating over vecstr */
- }
if (is_utf8)
has_utf8 = TRUE;
if (has_utf8)
SvCUR_set(sv, p - SvPVX_const(sv));
}
- if (vectorize) {
- esignlen = 0;
- goto vector;
+ if (vectorize && veclen) {
+ /* we append the vector separator separately since %v isn't
+ * very common: don't slow down the general case by adding
+ * dotstrlen to need etc */
+ sv_catpvn_nomg(sv, dotstr, dotstrlen);
+ esignlen = 0;
+ goto vector; /* do next iteration */
}
donevalidconversion: