STRLEN width = 0; /* value of "%NNN..." */
bool has_precis = FALSE; /* has "%.NNN..." */
STRLEN precis = 0; /* value of "%.NNN..." */
- bool asterisk = FALSE; /* has "%*..." */
bool used_explicit_ix = FALSE;/* has "%$n..." */
- unsigned base = 0; /* base to print in, e.g. 8 for %o */
+ int base = 0; /* base to print in, e.g. 8 for %o */
UV uv = 0; /* the value to print of int-ish args */
IV iv = 0; /* ditto for signed types */
bool vectorize = FALSE; /* has "%v..." */
- bool vectorarg = FALSE; /* has "%*v..." */
SV *vecsv = NULL; /* the cur arg for %v */
bool vec_utf8 = FALSE; /* SvUTF8(vecsv) */
const U8 *vecstr = NULL; /* SvPVX(vecsv) */
I32 efix = 0; /* explicit format parameter index */
I32 ewix = 0; /* explicit width index */
I32 epix = 0; /* explicit precision index */
- I32 evix = 0; /* explicit vector index */
const I32 osvix = svix; /* original index in case of bad fmt */
bool is_utf8 = FALSE; /* is this item utf8? */
break;
}
+ /* at this point we can expect one of:
+ *
+ * 123 an explicit width
+ * * width taken from next arg
+ * *12$ width taken from 12th arg
+ * or no width
+ *
+ * But any width specification may be preceded by a v, in one of its
+ * forms:
+ * v
+ * *v
+ * *12$v
+ * So an asterisk may be either a width specifier or a vector
+ * separator arg specifier, and we don't know which initially
+ */
+
tryasterisk:
if (*q == '*') {
+ int i;
q++;
if ( (ewix = expect_number(&q)) ) {
if (*q++ == '$') {
} else
goto unknown;
}
- asterisk = TRUE;
- }
- if (*q == 'v') {
- q++;
- if (vectorize)
- goto unknown;
- if ((vectorarg = asterisk)) {
- evix = ewix;
- ewix = 0;
- asterisk = FALSE;
- }
- vectorize = TRUE;
- goto tryasterisk;
- }
- if (!asterisk)
- {
- if(*q == '0') {
- fill = TRUE;
+ if (*q == 'v') {
+ /* The asterisk was for *v, *NNN$v: vectorizing, but not
+ * with the default "." */
q++;
+ if (vectorize)
+ goto unknown;
+ if (args)
+ vecsv = va_arg(*args, SV*);
+ else if (ewix) {
+ FETCH_VCATPVFN_ARGUMENT(
+ vecsv, ewix > 0 && ewix <= svmax, svargs[ewix-1]);
+ } else {
+ FETCH_VCATPVFN_ARGUMENT(
+ vecsv, svix < svmax, svargs[svix++]);
+ }
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ /* Keep the DO_UTF8 test *after* the SvPV call, else things go
+ bad with tied or overloaded values that return UTF8. */
+ if (DO_UTF8(vecsv))
+ is_utf8 = TRUE;
+ else if (has_utf8) {
+ vecsv = sv_mortalcopy(vecsv);
+ sv_utf8_upgrade(vecsv);
+ dotstr = SvPV_const(vecsv, dotstrlen);
+ is_utf8 = TRUE;
+ }
+ ewix = 0;
+ vectorize = TRUE;
+ goto tryasterisk;
}
- width = expect_number(&q);
- }
-
- if (vectorize && vectorarg) {
- /* vectorizing, but not with the default "." */
- if (args)
- vecsv = va_arg(*args, SV*);
- else if (evix) {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
- } else {
- FETCH_VCATPVFN_ARGUMENT(
- vecsv, svix < svmax, svargs[svix++]);
- }
- dotstr = SvPV_const(vecsv, dotstrlen);
- /* Keep the DO_UTF8 test *after* the SvPV call, else things go
- bad with tied or overloaded values that return UTF8. */
- if (DO_UTF8(vecsv))
- is_utf8 = TRUE;
- else if (has_utf8) {
- vecsv = sv_mortalcopy(vecsv);
- sv_utf8_upgrade(vecsv);
- dotstr = SvPV_const(vecsv, dotstrlen);
- is_utf8 = TRUE;
- }
- }
- if (asterisk) {
- int i;
+ /* the asterisk specified a width */
if (args)
i = va_arg(*args, int);
else
SvIVx(svargs[ewix ? ewix-1 : svix++]) : 0;
left |= (i < 0);
width = (i < 0) ? -i : i;
+ }
+ else if (*q == 'v') {
+ q++;
+ if (vectorize)
+ goto unknown;
+ vectorize = TRUE;
+ goto tryasterisk;
+
+ }
+ else {
+ /* explicit width? */
+ if(*q == '0') {
+ fill = TRUE;
+ q++;
+ }
+ width = expect_number(&q);
}
+
gotwidth:
/* PRECISION */
c = *q++; /* c now holds the conversion type */
- if (argsv && strchr("BbcDdiOopuUXx", c)) {
- /* XXX va_arg(*args) case? need peek, use va_copy? */
- SvGETMAGIC(argsv);
- if (UNLIKELY(SvAMAGIC(argsv)))
- argsv = sv_2num(argsv);
- if (UNLIKELY(isinfnansv(argsv)))
- goto handle_infnan_argsv;
- }
-
switch (c) {
/* STRINGS */
- case 'c':
- if (vectorize)
- goto unknown;
- uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv);
- if ((uv > 255 ||
- (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
- && !IN_BYTES)
- {
- assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
- eptr = ebuf;
- elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
- is_utf8 = TRUE;
- }
- else {
- c = (char)uv;
- eptr = &c;
- elen = 1;
- }
- goto string;
-
case 's':
if (vectorize)
goto unknown;
&& !fill
&& !plus
&& !has_precis
- && !asterisk
+ /* not %*p or %*1$p - any width was explicit */
+ && q[-2] != '*'
+ && q[-2] != '$'
&& !used_explicit_ix
) {
if (left) { /* %-p (SVf), %-NNNp */
uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
base = 16;
- goto integer;
+ goto do_integer;
+
+ case 'c':
+ if (vectorize)
+ goto unknown;
+ /* Ignore any size specifiers, since they're not documented as
+ * being allowed for %c (ideally we should warn on e.g. '%hc').
+ * Setting a default intsize, along with a positive
+ * (which signals unsigned) base, causes, for C-ish use, the
+ * va_arg to be interpreted as as unsigned int, when it's
+ * actually signed, which will convert -ve values to high +ve
+ * values. Note that unlike the libc %c, values > 255 will
+ * convert to high unicode points rather than being truncated
+ * to 8 bits. For perlish use, it will do SvUV(argsv), which
+ * will again convert -ve args to high -ve values.
+ */
+ intsize = 0;
+ base = 1; /* special value that indicates we're doing a 'c' */
+ goto get_int_arg_val;
case 'D':
#ifdef IV_IS_QUAD
#else
intsize = 'l';
#endif
- goto do_i;
+ base = -10;
+ goto get_int_arg_val;
case 'd':
/* probably just a plain %d, but it might be the start of the
/* FALLTHROUGH */
case 'i':
- do_i:
- if (vectorize) {
- STRLEN ulen;
- if (!veclen)
- goto donevalidconversion;
- if (vec_utf8)
- uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
- UTF8_ALLOW_ANYUV);
- else {
- uv = *vecstr;
- ulen = 1;
- }
- vecstr += ulen;
- veclen -= ulen;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else if (args) {
- switch (intsize) {
- case 'c': iv = (char)va_arg(*args, int); break;
- case 'h': iv = (short)va_arg(*args, int); break;
- case 'l': iv = va_arg(*args, long); break;
- case 'V': iv = va_arg(*args, IV); break;
- case 'z': iv = va_arg(*args, SSize_t); break;
-#ifdef HAS_PTRDIFF_T
- case 't': iv = va_arg(*args, ptrdiff_t); break;
-#endif
- default: iv = va_arg(*args, int); break;
-#ifdef I_STDINT
- case 'j': iv = va_arg(*args, intmax_t); break;
-#endif
- case 'q':
-#if IVSIZE >= 8
- iv = va_arg(*args, Quad_t); break;
-#else
- goto unknown;
-#endif
- }
- }
- else {
- IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': iv = (char)tiv; break;
- case 'h': iv = (short)tiv; break;
- case 'l': iv = (long)tiv; break;
- case 'V':
- default: iv = tiv; break;
- case 'q':
-#if IVSIZE >= 8
- iv = (Quad_t)tiv; break;
-#else
- goto unknown;
-#endif
- }
- }
- if ( !vectorize ) /* we already set uv above */
- {
- if (iv >= 0) {
- uv = iv;
- if (plus)
- esignbuf[esignlen++] = plus;
- }
- else {
- uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
- esignbuf[esignlen++] = '-';
- }
- }
- base = 10;
- goto integer;
+ base = -10;
+ goto get_int_arg_val;
case 'U':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'u':
base = 10;
- goto uns_integer;
+ goto get_int_arg_val;
case 'B':
case 'b':
base = 2;
- goto uns_integer;
+ goto get_int_arg_val;
case 'O':
#ifdef IV_IS_QUAD
/* FALLTHROUGH */
case 'o':
base = 8;
- goto uns_integer;
+ goto get_int_arg_val;
case 'X':
case 'x':
base = 16;
- uns_integer:
+ get_int_arg_val:
+
if (vectorize) {
STRLEN ulen;
- vector:
+
+ if (base < 0) {
+ base = -base;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+
+ vector:
if (!veclen)
goto donevalidconversion;
if (vec_utf8)
vecstr += ulen;
veclen -= ulen;
}
- else if (args) {
- switch (intsize) {
- case 'c': uv = (unsigned char)va_arg(*args, unsigned); break;
- case 'h': uv = (unsigned short)va_arg(*args, unsigned); break;
- case 'l': uv = va_arg(*args, unsigned long); break;
- case 'V': uv = va_arg(*args, UV); break;
- case 'z': uv = va_arg(*args, Size_t); break;
+ else {
+ /* test arg for inf/nan. This can trigger an unwanted
+ * 'str' overload, so manually force 'num' overload first
+ * if necessary */
+ if (argsv) {
+ SvGETMAGIC(argsv);
+ if (UNLIKELY(SvAMAGIC(argsv)))
+ argsv = sv_2num(argsv);
+ if (UNLIKELY(isinfnansv(argsv)))
+ goto handle_infnan_argsv;
+ }
+
+ if (base < 0) {
+ /* signed int type */
+ base = -base;
+ if (args) {
+ switch (intsize) {
+ case 'c': iv = (char)va_arg(*args, int); break;
+ case 'h': iv = (short)va_arg(*args, int); break;
+ case 'l': iv = va_arg(*args, long); break;
+ case 'V': iv = va_arg(*args, IV); break;
+ case 'z': iv = va_arg(*args, SSize_t); break;
#ifdef HAS_PTRDIFF_T
- case 't': uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
+ case 't': iv = va_arg(*args, ptrdiff_t); break;
#endif
+ default: iv = va_arg(*args, int); break;
#ifdef I_STDINT
- case 'j': uv = va_arg(*args, uintmax_t); break;
+ case 'j': iv = va_arg(*args, intmax_t); break;
#endif
- default: uv = va_arg(*args, unsigned); break;
- case 'q':
+ case 'q':
#if IVSIZE >= 8
- uv = va_arg(*args, Uquad_t); break;
+ iv = va_arg(*args, Quad_t); break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
- else {
- UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
- switch (intsize) {
- case 'c': uv = (unsigned char)tuv; break;
- case 'h': uv = (unsigned short)tuv; break;
- case 'l': uv = (unsigned long)tuv; break;
- case 'V':
- default: uv = tuv; break;
- case 'q':
+ }
+ }
+ else {
+ IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
+ switch (intsize) {
+ case 'c': iv = (char)tiv; break;
+ case 'h': iv = (short)tiv; break;
+ case 'l': iv = (long)tiv; break;
+ case 'V':
+ default: iv = tiv; break;
+ case 'q':
#if IVSIZE >= 8
- uv = (Uquad_t)tuv; break;
+ iv = (Quad_t)tiv; break;
#else
- goto unknown;
+ goto unknown;
#endif
- }
- }
+ }
+ }
- integer:
+ /* now convert iv to uv */
+ if (iv >= 0) {
+ uv = iv;
+ if (plus)
+ esignbuf[esignlen++] = plus;
+ }
+ else {
+ uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
+ esignbuf[esignlen++] = '-';
+ }
+ }
+ else {
+ /* unsigned int type */
+ if (args) {
+ switch (intsize) {
+ case 'c': uv = (unsigned char)va_arg(*args, unsigned);
+ break;
+ case 'h': uv = (unsigned short)va_arg(*args, unsigned);
+ break;
+ case 'l': uv = va_arg(*args, unsigned long); break;
+ case 'V': uv = va_arg(*args, UV); break;
+ case 'z': uv = va_arg(*args, Size_t); break;
+#ifdef HAS_PTRDIFF_T
+ /* will sign extend, but there is no
+ * uptrdiff_t, so oh well */
+ case 't': uv = va_arg(*args, ptrdiff_t); break;
+#endif
+#ifdef I_STDINT
+ case 'j': uv = va_arg(*args, uintmax_t); break;
+#endif
+ default: uv = va_arg(*args, unsigned); break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = va_arg(*args, Uquad_t); break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ else {
+ UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
+ switch (intsize) {
+ case 'c': uv = (unsigned char)tuv; break;
+ case 'h': uv = (unsigned short)tuv; break;
+ case 'l': uv = (unsigned long)tuv; break;
+ case 'V':
+ default: uv = tuv; break;
+ case 'q':
+#if IVSIZE >= 8
+ uv = (Uquad_t)tuv; break;
+#else
+ goto unknown;
+#endif
+ }
+ }
+ }
+ }
+
+ do_integer:
{
char *ptr = ebuf + sizeof ebuf;
bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */
esignbuf[esignlen++] = c;
}
break;
+
+ case 1:
+ /* special-case: base 1 indicates a 'c' format:
+ * we use the common code for extracting a uv,
+ * but handle that value differently here than
+ * all the other int types */
+ if ((uv > 255 ||
+ (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
+ && !IN_BYTES)
+ {
+ assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
+ eptr = ebuf;
+ elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+ is_utf8 = TRUE;
+ }
+ else {
+ c = (char)uv;
+ eptr = &c;
+ elen = 1;
+ }
+ goto string;
+
default: /* it had better be ten or less */
do {
dig = uv % base;