* string.
* The rest of the args have the same meaning as the local vars of the
* same name within Perl_sv_vcatpvfn_flags().
+ *
+ * It assumes the caller has already done STORE_LC_NUMERIC_SET_TO_NEEDED();
*/
static STRLEN
S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
const NV nv, const vcatpvfn_long_double_t fv,
bool has_precis, STRLEN precis, STRLEN width,
- bool alt, char plus, bool left, char fill)
+ bool alt, char plus, bool left, bool fill)
{
/* Hexadecimal floating point. */
char* p = buf;
bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
bool negative = FALSE;
STRLEN elen;
- DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
/* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf".
*
#ifndef USE_LOCALE_NUMERIC
*p++ = '.';
#else
- STORE_LC_NUMERIC_SET_TO_NEEDED();
- if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+ if (PL_numeric_radix_sv) {
STRLEN n;
const char* r = SvPV(PL_numeric_radix_sv, n);
+ assert(IN_LC(LC_NUMERIC));
Copy(r, p, n, char);
p += n;
}
else {
*p++ = '.';
}
- RESTORE_LC_NUMERIC();
#endif
}
/* Pad the back with spaces. */
memset(buf + elen, ' ', gap);
}
- else if (fill == '0') {
+ else if (fill) {
/* Insert the zeros after the "0x" and the
* the potential sign, but before the digits,
* otherwise we end up with "0000xH.HHH...",
nmove--;
}
Move(zerox, zerox + nzero, nmove, char);
- memset(zerox, fill, nzero);
+ memset(zerox, fill ? '0' : ' ', nzero);
}
else {
/* Move it to the right. */
* Plus 32: Playing safe. */
char ebuf[IV_DIG * 4 + NV_DIG + 32];
bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */
- bool hexfp = FALSE; /* hexadecimal floating point? */
-
+#ifdef USE_LOCALE_NUMERIC
DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
+ bool lc_numeric_set = FALSE; /* called STORE_LC_NUMERIC_SET_TO_NEEDED? */
+#endif
PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
PERL_UNUSED_ARG(maybe_tainted);
/* no matter what, this is a string now */
(void)SvPV_force_nomg(sv, origlen);
+ /* the code that scans for flags etc following a % relies on
+ * a '\0' being present to avoid falling off the end. Ideally that
+ * should be fixed */
+ assert(pat[patlen] == '\0');
+
/* special-case "", "%s", and "%-p" (SVf - see below) */
if (patlen == 0) {
if (svmax && ckWARN(WARN_REDUNDANT))
}
#if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH)
- /* special-case "%.0f" and "%.<number>g" */
- if ( !args && patlen <= 5 && pat[0] == '%' && pat[1] == '.'
- && (pat[patlen-1] == 'g' || pat[patlen-1] == 'f') ) {
- unsigned digits = 0;
- const char *pp;
-
- pp = pat + 2;
- while (*pp >= '0' && *pp <= '9')
- digits = 10 * digits + (*pp++ - '0');
-
- /* XXX: Why do this `svix < svmax` test? Couldn't we just
- format the first argument and WARN_REDUNDANT if svmax > 1?
- Munged by Nicholas Clark in v5.13.0-209-g95ea86d */
- if (pp + 1 == pat + patlen && svix < svmax) {
- const NV nv = SvNV(*svargs);
- if (LIKELY(!Perl_isinfnan(nv))) {
- if (*pp == 'g') {
- /* Add check for digits != 0 because it seems that some
- gconverts are buggy in this case, and we don't yet have
- a Configure test for this. */
- if (digits && digits < sizeof(ebuf) - NV_DIG - 10) {
- /* 0, point, slack */
- STORE_LC_NUMERIC_SET_TO_NEEDED();
- SNPRINTF_G(nv, ebuf, sizeof(ebuf), digits);
- sv_catpv_nomg(sv, ebuf);
- if (*ebuf) /* May return an empty string for digits==0 */
- return;
- }
- } else if (!digits) {
- STRLEN l;
+ /* special-case "%.0f" */
+ if ( !args
+ && patlen == 4
+ && pat[0] == '%' && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f'
+ && svmax > 0)
+ {
+ const NV nv = SvNV(*svargs);
+ if (LIKELY(!Perl_isinfnan(nv))) {
+ STRLEN l;
+ char *p;
- if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
- sv_catpvn_nomg(sv, p, l);
- return;
- }
- }
+ if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
+ sv_catpvn_nomg(sv, p, l);
+ return;
}
}
}
#endif /* !USE_LONG_DOUBLE */
- if (!args && svix < svmax && DO_UTF8(*svargs))
- has_utf8 = TRUE;
-
patend = (char*)pat + patlen;
for (p = (char*)pat; p < patend; p = q) {
char intsize = 0; /* size qualifier in "%hi..." etc */
bool alt = FALSE; /* has "%#..." */
bool left = FALSE; /* has "%-..." */
- char fill = ' '; /* has "%0..." */
+ bool fill = FALSE; /* has "%0..." */
char plus = 0; /* has "%+..." */
STRLEN width = 0; /* value of "%NNN..." */
bool has_precis = FALSE; /* has "%.NNN..." */
STRLEN precis = 0; /* value of "%.NNN..." */
bool asterisk = FALSE; /* has "%*..." */
bool used_explicit_ix = FALSE;/* has "%$n..." */
- unsigned base = 0; /* base to print in, e.g. 8 for %o */
+ int base = 0; /* base to print in, e.g. 8 for %o */
UV uv = 0; /* the value to print of int-ish args */
IV iv = 0; /* ditto for signed types */
[%bcdefginopsuxDFOUX] format (mandatory)
*/
- if (args) {
-/*
- As of perl5.9.3, printf format checking is on by default.
- Internally, perl uses %p formats to provide an escape to
- some extended formatting. This block deals with those
- extensions: if it does not match, (char*)q is reset and
- the normal format processing code is used.
-
- Currently defined extensions are:
- %p include pointer address (standard)
- %-p (SVf) include an SV (previously %_)
- %-<num>p include an SV with precision <num>
- %2p include a HEK
- %3p include a HEK with precision of 256
- %4p char* preceded by utf8 flag and length
- %<num>p (where num is 1 or > 4) reserved for future
- extensions
-
- Robin Barker 2005-07-14 (but modified since)
-
- %1p (VDf) removed. RMB 2007-10-19
-*/
- char* r = q;
- bool sv = FALSE;
- STRLEN n = 0;
- if (*q == '-')
- sv = *q++;
- else if (strnEQ(q, UTF8f, sizeof(UTF8f)-1)) { /* UTF8f */
- /* The argument has already gone through cBOOL, so the cast
- is safe. */
- is_utf8 = (bool)va_arg(*args, int);
- elen = va_arg(*args, UV);
- /* if utf8 length is larger than 0x7ffff..., then it might
- * have been a signed value that wrapped */
- if (elen > ((~(STRLEN)0) >> 1)) {
- assert(0); /* in DEBUGGING build we want to crash */
- elen= 0; /* otherwise we want to treat this as an empty string */
- }
- eptr = va_arg(*args, char *);
- q += sizeof(UTF8f)-1;
- goto string;
- }
- n = expect_number(&q);
- if (*q++ == 'p') {
- if (sv) { /* SVf */
- if (n) {
- precis = n;
- has_precis = TRUE;
- }
- argsv = MUTABLE_SV(va_arg(*args, void*));
- eptr = SvPV_const(argsv, elen);
- if (DO_UTF8(argsv))
- is_utf8 = TRUE;
- goto string;
- }
- else if (n==2 || n==3) { /* HEKf */
- HEK * const hek = va_arg(*args, HEK *);
- eptr = HEK_KEY(hek);
- elen = HEK_LEN(hek);
- if (HEK_UTF8(hek)) is_utf8 = TRUE;
- if (n==3) precis = 256, has_precis = TRUE;
- goto string;
- }
- else if (n) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
- "internal %%<num>p might conflict with future printf extensions");
- }
- }
- q = r;
- }
-
if ( (width = expect_number(&q)) ) {
if (*q == '$') {
if (args)
continue;
case '0':
- fill = *q++;
+ fill = TRUE;
+ q++;
continue;
case '#':
evix = ewix;
ewix = 0;
asterisk = FALSE;
+ /* vectorizing, but not with the default "." */
+ if (args)
+ vecsv = va_arg(*args, SV*);
+ else if (evix) {
+ FETCH_VCATPVFN_ARGUMENT(
+ vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
+ } else {
+ FETCH_VCATPVFN_ARGUMENT(
+ vecsv, svix < svmax, svargs[svix++]);
+ }
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ /* Keep the DO_UTF8 test *after* the SvPV call, else things go
+ bad with tied or overloaded values that return UTF8. */
+ if (DO_UTF8(vecsv))
+ is_utf8 = TRUE;
+ else if (has_utf8) {
+ vecsv = sv_mortalcopy(vecsv);
+ sv_utf8_upgrade(vecsv);
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ is_utf8 = TRUE;
+ }
}
vectorize = TRUE;
goto tryasterisk;
if (!asterisk)
{
- if( *q == '0' )
- fill = *q++;
+ if(*q == '0') {
+ fill = TRUE;
+ q++;
+ }
width = expect_number(&q);
}
- if (vectorize && vectorarg) {
- /* vectorizing, but not with the default "." */
- if (args)
- vecsv = va_arg(*args, SV*);
- else if (evix) {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
- } else {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, svix < svmax, svargs[svix++]);
- }
- dotstr = SvPV_const(vecsv, dotstrlen);
- /* Keep the DO_UTF8 test *after* the SvPV call, else things go
- bad with tied or overloaded values that return UTF8. */
- if (DO_UTF8(vecsv))
- is_utf8 = TRUE;
- else if (has_utf8) {
- vecsv = sv_mortalcopy(vecsv);
- sv_utf8_upgrade(vecsv);
- dotstr = SvPV_const(vecsv, dotstrlen);
- is_utf8 = TRUE;
- }
- }
if (asterisk) {
int i;
c = *q++; /* c now holds the conversion type */
- if (argsv && strchr("BbcDdiOopuUXx", c)) {
- /* XXX va_arg(*args) case? need peek, use va_copy? */
- SvGETMAGIC(argsv);
- if (UNLIKELY(SvAMAGIC(argsv)))
- argsv = sv_2num(argsv);
- if (UNLIKELY(isinfnansv(argsv)))
- goto handle_infnan_argsv;
- }
-
switch (c) {
/* STRINGS */
- case 'c':
- if (vectorize)
- goto unknown;
- uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv);
- if ((uv > 255 ||
- (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
- && !IN_BYTES)
- {
- assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
- eptr = ebuf;
- elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
- is_utf8 = TRUE;
- }
- else {
- c = (char)uv;
- eptr = &c;
- elen = 1;
- }
- goto string;
-
case 's':
if (vectorize)
goto unknown;
case 'p':
if (alt || vectorize)
goto unknown;
+
+ /* %p extensions:
+ *
+ * "%...p" is normally treated like "%...x", except that the
+ * number to print is the SV's address (or a pointer address
+ * for C-ish sprintf).
+ *
+ * However, the C-ish sprintf variant allows a few special
+ * extensions. These are currently:
+ *
+ * %-p (SVf) Like %s, but gets the string from an SV*
+ * arg rather than a char* arg.
+ * (This was previously %_).
+ *
+ * %-<num>p Ditto but like %.<num>s (i.e. num is max width)
+ *
+ * %2p (HEKf) Like %s, but using the key string in a HEK
+ *
+ * %3p (HEKf256) Ditto but like %.256s
+ *
+ * %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args:
+ * (cBOOL(utf8), len, string_buf).
+ * It's handled by the "case 'd'" branch
+ * rather than here.
+ *
+ * %<num>p where num is 1 or > 4: reserved for future
+ * extensions. Warns, but then is treated as a
+ * general %p (print hex address) format.
+ */
+
+ if ( args
+ && !intsize
+ && !fill
+ && !plus
+ && !has_precis
+ && !asterisk
+ && !used_explicit_ix
+ ) {
+ if (left) { /* %-p (SVf), %-NNNp */
+ if (width) {
+ precis = width;
+ has_precis = TRUE;
+ }
+ argsv = MUTABLE_SV(va_arg(*args, void*));
+ eptr = SvPV_const(argsv, elen);
+ if (DO_UTF8(argsv))
+ is_utf8 = TRUE;
+ width = 0;
+ goto string;
+ }
+ else if (width == 2 || width == 3) { /* HEKf, HEKf256 */
+ HEK * const hek = va_arg(*args, HEK *);
+ eptr = HEK_KEY(hek);
+ elen = HEK_LEN(hek);
+ if (HEK_UTF8(hek))
+ is_utf8 = TRUE;
+ if (width == 3) {
+ precis = 256;
+ has_precis = TRUE;
+ }
+ width = 0;
+ goto string;
+ }
+ else if (width) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+ "internal %%<num>p might conflict with future printf extensions");
+ }
+ }
+
+ /* treat as normal %...p */
+
uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
base = 16;
- goto integer;
+ goto do_integer;
+
+ case 'c':
+ if (vectorize)
+ goto unknown;
+ /* Ignore any size specifiers, since they're not documented as
+ * being allowed for %c (ideally we should warn on e.g. '%hc').
+ * Setting a default intsize, along with a positive
+ * (which signals unsigned) base, causes, for C-ish use, the
+ * va_arg to be interpreted as as unsigned int, when it's
+ * actually signed, which will convert -ve values to high +ve
+ * values. Note that unlike the libc %c, values > 255 will
+ * convert to high unicode points rather than being truncated
+ * to 8 bits. For perlish use, it will do SvUV(argsv), which
+ * will again convert -ve args to high -ve values.
+ */
+ intsize = 0;
+ base = 1; /* special value that indicates we're doing a 'c' */
+ goto get_int_arg_val;
case 'D':
#ifdef IV_IS_QUAD
#else
intsize = 'l';
#endif
- /* FALLTHROUGH */
+ base = -10;
+ goto get_int_arg_val;
+
case 'd':
- case 'i':
- if (vectorize) {
- STRLEN ulen;
- if (!veclen)
- goto donevalidconversion;
- if (vec_utf8)
- uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
- UTF8_ALLOW_ANYUV);
- else {
- uv = *vecstr;
- ulen = 1;
- }
- vecstr += ulen;
- veclen -= ulen;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else if (args) {
- switch (intsize) {
- case 'c': iv = (char)va_arg(*args, int); break;
- case 'h': iv = (short)va_arg(*args, int); break;
- case 'l': iv = va_arg(*args, long); break;
- case 'V': iv = va_arg(*args, IV); break;
- case 'z': iv = va_arg(*args, SSize_t); break;
-#ifdef HAS_PTRDIFF_T
- case 't': iv = va_arg(*args, ptrdiff_t); break;
-#endif
- default: iv = va_arg(*args, int); break;
-#ifdef I_STDINT
- case 'j': iv = va_arg(*args, intmax_t); break;
-#endif
- case 'q':
-#if IVSIZE >= 8
- iv = va_arg(*args, Quad_t); break;
-#else
- goto unknown;
-#endif
- }
- }
- else {
- IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': iv = (char)tiv; break;
- case 'h': iv = (short)tiv; break;
- case 'l': iv = (long)tiv; break;
- case 'V':
- default: iv = tiv; break;
- case 'q':
-#if IVSIZE >= 8
- iv = (Quad_t)tiv; break;
-#else
- goto unknown;
-#endif
- }
- }
- if ( !vectorize ) /* we already set uv above */
- {
- if (iv >= 0) {
- uv = iv;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else {
- uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
- esignbuf[esignlen++] = '-';
- }
+ /* probably just a plain %d, but it might be the start of the
+ * special UTF8f format, which usually looks something like
+ * "%d%lu%4p" (the lu may vary by platform)
+ */
+ assert((UTF8f)[0] == 'd');
+ assert((UTF8f)[1] == '%');
+
+ if ( args /* UTF8f only valid for C-ish sprintf */
+ && q == fmtstart + 1 /* plain %d, not %....d */
+ && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
+ && *q == '%'
+ && strnEQ(q + 1, UTF8f + 2, sizeof(UTF8f) - 3))
+ {
+ /* The argument has already gone through cBOOL, so the cast
+ is safe. */
+ is_utf8 = (bool)va_arg(*args, int);
+ elen = va_arg(*args, UV);
+ /* if utf8 length is larger than 0x7ffff..., then it might
+ * have been a signed value that wrapped */
+ if (elen > ((~(STRLEN)0) >> 1)) {
+ assert(0); /* in DEBUGGING build we want to crash */
+ elen = 0; /* otherwise we want to treat this as an empty string */
+ }
+ eptr = va_arg(*args, char *);
+ q += sizeof(UTF8f) - 2;
+ goto string;
}
- base = 10;
- goto integer;
+
+ /* FALLTHROUGH */
+ case 'i':
+ base = -10;
+ goto get_int_arg_val;
case 'U':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'u':
base = 10;
- goto uns_integer;
+ goto get_int_arg_val;
case 'B':
case 'b':
base = 2;
- goto uns_integer;
+ goto get_int_arg_val;
case 'O':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'o':
base = 8;
- goto uns_integer;
+ goto get_int_arg_val;
case 'X':
case 'x':
base = 16;
- uns_integer:
+ get_int_arg_val:
+
if (vectorize) {
STRLEN ulen;
- vector:
+
+ if (base < 0) {
+ base = -base;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+
+ vector:
if (!veclen)
goto donevalidconversion;
if (vec_utf8)
vecstr += ulen;
veclen -= ulen;
}
- else if (args) {
- switch (intsize) {
- case 'c': uv = (unsigned char)va_arg(*args, unsigned); break;
- case 'h': uv = (unsigned short)va_arg(*args, unsigned); break;
- case 'l': uv = va_arg(*args, unsigned long); break;
- case 'V': uv = va_arg(*args, UV); break;
- case 'z': uv = va_arg(*args, Size_t); break;
+ else {
+ /* test arg for inf/nan. This can trigger an unwanted
+ * 'str' overload, so manually force 'num' overload first
+ * if necessary */
+ if (argsv) {
+ SvGETMAGIC(argsv);
+ if (UNLIKELY(SvAMAGIC(argsv)))
+ argsv = sv_2num(argsv);
+ if (UNLIKELY(isinfnansv(argsv)))
+ goto handle_infnan_argsv;
+ }
+
+ if (base < 0) {
+ /* signed int type */
+ base = -base;
+ if (args) {
+ switch (intsize) {
+ case 'c': iv = (char)va_arg(*args, int); break;
+ case 'h': iv = (short)va_arg(*args, int); break;
+ case 'l': iv = va_arg(*args, long); break;
+ case 'V': iv = va_arg(*args, IV); break;
+ case 'z': iv = va_arg(*args, SSize_t); break;
#ifdef HAS_PTRDIFF_T
- case 't': uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
+ case 't': iv = va_arg(*args, ptrdiff_t); break;
#endif
+ default: iv = va_arg(*args, int); break;
#ifdef I_STDINT
- case 'j': uv = va_arg(*args, uintmax_t); break;
+ case 'j': iv = va_arg(*args, intmax_t); break;
#endif
- default: uv = va_arg(*args, unsigned); break;
- case 'q':
+ case 'q':
#if IVSIZE >= 8
- uv = va_arg(*args, Uquad_t); break;
+ iv = va_arg(*args, Quad_t); break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
- else {
- UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': uv = (unsigned char)tuv; break;
- case 'h': uv = (unsigned short)tuv; break;
- case 'l': uv = (unsigned long)tuv; break;
- case 'V':
- default: uv = tuv; break;
- case 'q':
+ }
+ }
+ else {
+ IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
+ switch (intsize) {
+ case 'c': iv = (char)tiv; break;
+ case 'h': iv = (short)tiv; break;
+ case 'l': iv = (long)tiv; break;
+ case 'V':
+ default: iv = tiv; break;
+ case 'q':
#if IVSIZE >= 8
- uv = (Uquad_t)tuv; break;
+ iv = (Quad_t)tiv; break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
+ }
+ }
+
+ /* now convert iv to uv */
+ if (iv >= 0) {
+ uv = iv;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+ else {
+ uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
+ esignbuf[esignlen++] = '-';
+ }
+ }
+ else {
+ /* unsigned int type */
+ if (args) {
+ switch (intsize) {
+ case 'c': uv = (unsigned char)va_arg(*args, unsigned);
+ break;
+ case 'h': uv = (unsigned short)va_arg(*args, unsigned);
+ break;
+ case 'l': uv = va_arg(*args, unsigned long); break;
+ case 'V': uv = va_arg(*args, UV); break;
+ case 'z': uv = va_arg(*args, Size_t); break;
+#ifdef HAS_PTRDIFF_T
+ /* will sign extend, but there is no
+ * uptrdiff_t, so oh well */
+ case 't': uv = va_arg(*args, ptrdiff_t); break;
+#endif
+#ifdef I_STDINT
+ case 'j': uv = va_arg(*args, uintmax_t); break;
+#endif
+ default: uv = va_arg(*args, unsigned); break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = va_arg(*args, Uquad_t); break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ else {
+ UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
+ switch (intsize) {
+ case 'c': uv = (unsigned char)tuv; break;
+ case 'h': uv = (unsigned short)tuv; break;
+ case 'l': uv = (unsigned long)tuv; break;
+ case 'V':
+ default: uv = tuv; break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = (Uquad_t)tuv; break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ }
+ }
- integer:
+ do_integer:
{
char *ptr = ebuf + sizeof ebuf;
bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */
esignbuf[esignlen++] = c;
}
break;
+
+ case 1:
+ /* special-case: base 1 indicates a 'c' format:
+ * we use the common code for extracting a uv,
+ * but handle that value differently here than
+ * all the other int types */
+ if ((uv > 255 ||
+ (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
+ && !IN_BYTES)
+ {
+ assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
+ eptr = ebuf;
+ elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+ is_utf8 = TRUE;
+ }
+ else {
+ c = (char)uv;
+ eptr = &c;
+ elen = 1;
+ }
+ goto string;
+
default: /* it had better be ten or less */
do {
dig = uv % base;
&& !(base == 8 && alt)) /* "%#.0o" prints "0" */
elen = 0;
- /* a precision nullifies the 0 flag. */
- if (fill == '0')
- fill = ' ';
+ /* a precision nullifies the 0 flag. */
+ fill = FALSE;
}
}
break;
case 'a': case 'A':
{
- bool can_be_special; /* candidate for special-case-handling */
STRLEN radix_len; /* SvCUR(PL_numeric_radix_sv) */
STRLEN float_need; /* what PL_efloatsize needs to become */
+ bool hexfp; /* hexadecimal floating point? */
vcatpvfn_long_double_t fv;
NV nv;
break;
}
- /* a candidate for special-casing: %.0f and %.NNNg */
- can_be_special = ( !(width || left || plus || alt)
- && fill != '0'
- && has_precis
- && intsize != 'q');
-
/* special-case "%.0f" */
- if (can_be_special && c == 'f' && !precis) {
- if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
- break;
- }
+ if ( c == 'f'
+ && !precis
+ && has_precis
+ && !(width || left || plus || alt)
+ && !fill
+ && intsize != 'q'
+ && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
+ )
+ goto float_concat_no_utf8;
/* Determine the buffer size needed for the various
* floating-point formats.
* below, or implicitly, via an snprintf() variant.
* Note also things like ps_AF.utf8 which has
* "\N{ARABIC DECIMAL SEPARATOR} as a radix point */
- STORE_LC_NUMERIC_SET_TO_NEEDED();
- if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+ if (!lc_numeric_set) {
+ /* only set once and reuse in-locale value on subsequent
+ * iterations.
+ * XXX what happens if we die in an eval?
+ */
+ STORE_LC_NUMERIC_SET_TO_NEEDED();
+ lc_numeric_set = TRUE;
+ }
+
+ if (PL_numeric_radix_sv) {
+ assert(IN_LC(LC_NUMERIC));
radix_len = SvCUR(PL_numeric_radix_sv);
/* note that this will convert the output to utf8 even if
* if the radix point didn't get output */
is_utf8 = SvUTF8(PL_numeric_radix_sv);
}
- RESTORE_LC_NUMERIC();
#endif
/* this can't wrap unless PL_numeric_radix_sv is a string
* consuming virtually all the 32-bit or 64-bit address space
*/
float_need += radix_len;
+ hexfp = FALSE;
+
if (isALPHA_FOLD_EQ(c, 'f')) {
/* Determine how many digits before the radix point
* might be emitted. frexp() (or frexpl) has some
float_need += digits;
}
}
+ /* special-case "%.<number>g" if it will fit in ebuf */
+ else if (c == 'g'
+ && precis /* See earlier comment about buggy Gconvert
+ when digits, aka precis, is 0 */
+ && has_precis
+ /* check, in manner not involving wrapping, that it will
+ * fit in ebuf */
+ && float_need < sizeof(ebuf)
+ && sizeof(ebuf) - float_need > precis
+ && !(width || left || plus || alt)
+ && !fill
+ && intsize != 'q'
+ ) {
+ SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis);
+ elen = strlen(ebuf);
+ eptr = ebuf;
+ goto float_concat;
+ }
+
{
STRLEN pr = has_precis ? precis : 6; /* known default */
PL_efloatbuf[0] = '\0';
}
- /* special-case "%.<number>g" */
- if (can_be_special) {
- /* See earlier comment about buggy Gconvert when digits,
- aka precis is 0 */
- if ( c == 'g' && precis ) {
- STORE_LC_NUMERIC_SET_TO_NEEDED();
- SNPRINTF_G(fv, PL_efloatbuf, PL_efloatsize, precis);
- /* May return an empty string for digits==0 */
- if (*PL_efloatbuf) {
- elen = strlen(PL_efloatbuf);
- goto float_converted;
- }
- }
- }
-
if (UNLIKELY(hexfp)) {
elen = S_format_hexfp(aTHX_ PL_efloatbuf, PL_efloatsize, c,
nv, fv, has_precis, precis, width,
base = width;
do { *--ptr = '0' + (base % 10); } while (base /= 10);
}
- if (fill == '0')
- *--ptr = fill;
+ if (fill)
+ *--ptr = '0';
if (left)
*--ptr = '-';
if (plus)
* where printf() taints but print($float) doesn't.
* --jhi */
- STORE_LC_NUMERIC_SET_TO_NEEDED();
-
/* hopefully the above makes ptr a very constrained format
* that is safe to use, even though it's not literal */
GCC_DIAG_IGNORE(-Wformat-nonliteral);
GCC_DIAG_RESTORE;
}
- float_converted:
eptr = PL_efloatbuf;
- assert((IV)elen > 0); /* here zero elen is bad */
+ float_concat:
- break;
+ /* Since floating-point formats do their own formatting and
+ * padding, we skip the main block of code at the end of this
+ * loop which handles appending eptr to sv, and do our own
+ * stripped-down version */
+
+ /* floating-point formats only get is_utf8 if the radix point
+ * is utf8. All other characters in the string are < 128
+ * and so can be safely appended to both a non-utf8 and utf8
+ * string as-is.
+ */
+ if (is_utf8 && !has_utf8) {
+ sv_utf8_upgrade(sv);
+ has_utf8 = TRUE;
+ }
+
+ float_concat_no_utf8:
+
+ assert(!zeros);
+ assert(!esignlen);
+ assert(!vectorize);
+ assert(elen);
+ assert(elen >= width);
+
+
+ {
+ /* unrolled Perl_sv_catpvn */
+ STRLEN need = elen + SvCUR(sv) + 1;
+ char *end;
+ /* can't wrap as both elen and SvCUR() are allocated in
+ * memory and together can't consume all the address space
+ */
+ assert(need > elen);
+ SvGROW(sv, need);
+ end = SvEND(sv);
+ Copy(eptr, end, elen, char);
+ end += elen;
+ *end = '\0';
+ SvCUR_set(sv, need - 1);
+ }
+
+ goto donevalidconversion;
}
/* SPECIAL */
int i;
if (vectorize)
goto unknown;
+ /* XXX ideally we should warn if any flags etc have been
+ * set, e.g. "%-4.5n" */
+ /* XXX if sv was originally non-utf8 with a char in the
+ * range 0x80-0xff, then if it got upgraded, we should
+ * calculate char len rather than byte len here */
i = SvCUR(sv) - origlen;
if (args) {
switch (intsize) {
#endif
}
}
- else
+ else {
+ if (arg_missing)
+ Perl_croak_nocontext(
+ "Missing argument for %%n in %s",
+ PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
+ }
goto donevalidconversion;
}
SvGROW(sv, need);
p = SvEND(sv);
- if (esignlen && fill == '0') {
+ if (esignlen && fill) {
int i;
for (i = 0; i < (int)esignlen; i++)
*p++ = esignbuf[i];
}
if (gap && !left) {
- memset(p, fill, gap);
+ memset(p, (fill ? '0' : ' '), gap);
p += gap;
}
- if (esignlen && fill != '0') {
+ if (esignlen && !fill) {
int i;
for (i = 0; i < (int)esignlen; i++)
*p++ = esignbuf[i];