Perl_sv_vcatpvfn_flags: move %*v handling earlier

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index 5562a24..1355485 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -11449,13 +11449,15 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal,
   * string.
   * The rest of the args have the same meaning as the local vars of the
   * same name within Perl_sv_vcatpvfn_flags().
+ *
+ * It assumes the caller has already done STORE_LC_NUMERIC_SET_TO_NEEDED();
   */
  
  static STRLEN
  S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
                      const NV nv, const vcatpvfn_long_double_t fv,
                      bool has_precis, STRLEN precis, STRLEN width,
-                    bool alt, char plus, bool left, char fill)
+                    bool alt, char plus, bool left, bool fill)
  {
      /* Hexadecimal floating point. */
      char* p = buf;
@@ -11476,7 +11478,6 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
      bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
      bool negative = FALSE;
      STRLEN elen;
-    DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  
      /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf".
       *
@@ -11666,17 +11667,16 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
  #ifndef USE_LOCALE_NUMERIC
              *p++ = '.';
  #else
-            STORE_LC_NUMERIC_SET_TO_NEEDED();
-            if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+            if (PL_numeric_radix_sv) {
                  STRLEN n;
                  const char* r = SvPV(PL_numeric_radix_sv, n);
+                assert(IN_LC(LC_NUMERIC));
                  Copy(r, p, n, char);
                  p += n;
              }
              else {
                  *p++ = '.';
              }
-            RESTORE_LC_NUMERIC();
  #endif
      }
  
@@ -11702,7 +11702,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
              /* Pad the back with spaces. */
              memset(buf + elen, ' ', gap);
          }
-        else if (fill == '0') {
+        else if (fill) {
              /* Insert the zeros after the "0x" and the
               * the potential sign, but before the digits,
               * otherwise we end up with "0000xH.HHH...",
@@ -11715,7 +11715,7 @@ S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
                  nmove--;
              }
              Move(zerox, zerox + nzero, nmove, char);
-            memset(zerox, fill, nzero);
+            memset(zerox, fill ? '0' : ' ', nzero);
          }
          else {
              /* Move it to the right. */
@@ -11790,9 +11790,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
       * Plus 32: Playing safe. */
      char ebuf[IV_DIG * 4 + NV_DIG + 32];
      bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */
-    bool hexfp = FALSE; /* hexadecimal floating point? */
-
+#ifdef USE_LOCALE_NUMERIC
      DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
+    bool lc_numeric_set = FALSE; /* called STORE_LC_NUMERIC_SET_TO_NEEDED? */
+#endif
  
      PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
      PERL_UNUSED_ARG(maybe_tainted);
@@ -11803,6 +11804,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      /* no matter what, this is a string now */
      (void)SvPV_force_nomg(sv, origlen);
  
+    /* the code that scans for flags etc following a % relies on
+     * a '\0' being present to avoid falling off the end. Ideally that
+     * should be fixed */
+    assert(pat[patlen] == '\0');
+
      /* special-case "", "%s", and "%-p" (SVf - see below) */
      if (patlen == 0) {
         if (svmax && ckWARN(WARN_REDUNDANT))
@@ -11839,64 +11845,39 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      }
  
  #if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH)
-    /* special-case "%.0f" and "%.<number>g" */
-    if ( !args && patlen <= 5 && pat[0] == '%' && pat[1] == '.'
-        && (pat[patlen-1] == 'g' || pat[patlen-1] == 'f') ) {
-       unsigned digits = 0;
-       const char *pp;
-
-       pp = pat + 2;
-       while (*pp >= '0' && *pp <= '9')
-           digits = 10 * digits + (*pp++ - '0');
-
-       /* XXX: Why do this `svix < svmax` test? Couldn't we just
-          format the first argument and WARN_REDUNDANT if svmax > 1?
-          Munged by Nicholas Clark in v5.13.0-209-g95ea86d */
-       if (pp + 1 == pat + patlen && svix < svmax) {
-           const NV nv = SvNV(*svargs);
-            if (LIKELY(!Perl_isinfnan(nv))) {
-                if (*pp == 'g') {
-                    /* Add check for digits != 0 because it seems that some
-                       gconverts are buggy in this case, and we don't yet have
-                       a Configure test for this.  */
-                    if (digits && digits < sizeof(ebuf) - NV_DIG - 10) {
-                        /* 0, point, slack */
-                        STORE_LC_NUMERIC_SET_TO_NEEDED();
-                        SNPRINTF_G(nv, ebuf, sizeof(ebuf), digits);
-                        sv_catpv_nomg(sv, ebuf);
-                        if (*ebuf)     /* May return an empty string for digits==0 */
-                            return;
-                    }
-                } else if (!digits) {
-                    STRLEN l;
+    /* special-case "%.0f" */
+    if (    !args
+         && patlen == 4
+         && pat[0] == '%' && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f'
+         && svmax > 0)
+    {
+        const NV nv = SvNV(*svargs);
+        if (LIKELY(!Perl_isinfnan(nv))) {
+            STRLEN l;
+            char *p;
  
-                    if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
-                        sv_catpvn_nomg(sv, p, l);
-                        return;
-                    }
-                }
+            if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
+                sv_catpvn_nomg(sv, p, l);
+                return;
              }
         }
      }
  #endif /* !USE_LONG_DOUBLE */
  
-    if (!args && svix < svmax && DO_UTF8(*svargs))
-       has_utf8 = TRUE;
-
      patend = (char*)pat + patlen;
      for (p = (char*)pat; p < patend; p = q) {
  
         char intsize     = 0;         /* size qualifier in "%hi..." etc */
         bool alt         = FALSE;     /* has      "%#..."    */
         bool left        = FALSE;     /* has      "%-..."    */
-       char fill        = ' ';       /* has      "%0..."    */
+       bool fill        = FALSE;     /* has      "%0..."    */
         char plus        = 0;         /* has      "%+..."    */
         STRLEN width     = 0;         /* value of "%NNN..."  */
         bool has_precis  = FALSE;     /* has      "%.NNN..." */
         STRLEN precis    = 0;         /* value of "%.NNN..." */
         bool asterisk    = FALSE;     /* has      "%*..."    */
          bool used_explicit_ix = FALSE;/* has      "%$n..."   */
-       unsigned base    = 0;         /* base to print in, e.g. 8 for %o */
+       int base         = 0;         /* base to print in, e.g. 8 for %o */
         UV uv            = 0;         /* the value to print of int-ish args */
         IV iv            = 0;         /* ditto for signed types */
  
@@ -11954,77 +11935,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      [%bcdefginopsuxDFOUX] format (mandatory)
  */
  
-       if (args) {
-/*  
-       As of perl5.9.3, printf format checking is on by default.
-       Internally, perl uses %p formats to provide an escape to
-       some extended formatting.  This block deals with those
-       extensions: if it does not match, (char*)q is reset and
-       the normal format processing code is used.
-
-       Currently defined extensions are:
-               %p              include pointer address (standard)      
-               %-p     (SVf)   include an SV (previously %_)
-               %-<num>p        include an SV with precision <num>      
-               %2p             include a HEK
-               %3p             include a HEK with precision of 256
-               %4p             char* preceded by utf8 flag and length
-               %<num>p         (where num is 1 or > 4) reserved for future
-                               extensions
-
-       Robin Barker 2005-07-14 (but modified since)
-
-               %1p     (VDf)   removed.  RMB 2007-10-19
-*/
-           char* r = q; 
-           bool sv = FALSE;    
-           STRLEN n = 0;
-           if (*q == '-')
-               sv = *q++;
-           else if (strnEQ(q, UTF8f, sizeof(UTF8f)-1)) { /* UTF8f */
-               /* The argument has already gone through cBOOL, so the cast
-                  is safe. */
-               is_utf8 = (bool)va_arg(*args, int);
-               elen = va_arg(*args, UV);
-                /* if utf8 length is larger than 0x7ffff..., then it might
-                 * have been a signed value that wrapped */
-                if (elen  > ((~(STRLEN)0) >> 1)) {
-                    assert(0); /* in DEBUGGING build we want to crash */
-                    elen= 0; /* otherwise we want to treat this as an empty string */
-                }
-               eptr = va_arg(*args, char *);
-               q += sizeof(UTF8f)-1;
-               goto string;
-           }
-           n = expect_number(&q);
-           if (*q++ == 'p') {
-               if (sv) {                       /* SVf */
-                   if (n) {
-                       precis = n;
-                       has_precis = TRUE;
-                   }
-                   argsv = MUTABLE_SV(va_arg(*args, void*));
-                   eptr = SvPV_const(argsv, elen);
-                   if (DO_UTF8(argsv))
-                       is_utf8 = TRUE;
-                   goto string;
-               }
-               else if (n==2 || n==3) {        /* HEKf */
-                   HEK * const hek = va_arg(*args, HEK *);
-                   eptr = HEK_KEY(hek);
-                   elen = HEK_LEN(hek);
-                   if (HEK_UTF8(hek)) is_utf8 = TRUE;
-                   if (n==3) precis = 256, has_precis = TRUE;
-                   goto string;
-               }
-               else if (n) {
-                   Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
-                                    "internal %%<num>p might conflict with future printf extensions");
-               }
-           }
-           q = r; 
-       }
-
         if ( (width = expect_number(&q)) ) {
             if (*q == '$') {
                  if (args)
@@ -12056,7 +11966,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 continue;
  
             case '0':
-               fill = *q++;
+               fill = TRUE;
+                q++;
                 continue;
  
             case '#':
@@ -12092,6 +12003,27 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 evix = ewix;
                 ewix = 0;
                 asterisk = FALSE;
+                /* vectorizing, but not with the default "." */
+                if (args)
+                    vecsv = va_arg(*args, SV*);
+                else if (evix) {
+                    FETCH_VCATPVFN_ARGUMENT(
+                        vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
+                } else {
+                    FETCH_VCATPVFN_ARGUMENT(
+                        vecsv, svix < svmax, svargs[svix++]);
+                }
+                dotstr = SvPV_const(vecsv, dotstrlen);
+                /* Keep the DO_UTF8 test *after* the SvPV call, else things go
+                   bad with tied or overloaded values that return UTF8.  */
+                if (DO_UTF8(vecsv))
+                    is_utf8 = TRUE;
+                else if (has_utf8) {
+                    vecsv = sv_mortalcopy(vecsv);
+                    sv_utf8_upgrade(vecsv);
+                    dotstr = SvPV_const(vecsv, dotstrlen);
+                    is_utf8 = TRUE;
+                }
             }
             vectorize = TRUE;
             goto tryasterisk;
@@ -12099,34 +12031,13 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
         if (!asterisk)
         {
-           if( *q == '0' )
-               fill = *q++;
+           if(*q == '0') {
+               fill = TRUE;
+                q++;
+            }
             width = expect_number(&q);
         }
  
-       if (vectorize && vectorarg) {
-           /* vectorizing, but not with the default "." */
-           if (args)
-               vecsv = va_arg(*args, SV*);
-           else if (evix) {
-                FETCH_VCATPVFN_ARGUMENT(
-                    vecsv, evix > 0 && evix <= svmax, svargs[evix-1]);
-           } else {
-                FETCH_VCATPVFN_ARGUMENT(
-                    vecsv, svix < svmax, svargs[svix++]);
-           }
-           dotstr = SvPV_const(vecsv, dotstrlen);
-           /* Keep the DO_UTF8 test *after* the SvPV call, else things go
-              bad with tied or overloaded values that return UTF8.  */
-           if (DO_UTF8(vecsv))
-               is_utf8 = TRUE;
-           else if (has_utf8) {
-               vecsv = sv_mortalcopy(vecsv);
-               sv_utf8_upgrade(vecsv);
-               dotstr = SvPV_const(vecsv, dotstrlen);
-               is_utf8 = TRUE;
-           }               
-       }
  
         if (asterisk) {
              int i;
@@ -12303,39 +12214,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
         c = *q++; /* c now holds the conversion type */
  
-        if (argsv && strchr("BbcDdiOopuUXx", c)) {
-            /* XXX va_arg(*args) case? need peek, use va_copy? */
-            SvGETMAGIC(argsv);
-            if (UNLIKELY(SvAMAGIC(argsv)))
-                argsv = sv_2num(argsv);
-            if (UNLIKELY(isinfnansv(argsv)))
-                goto handle_infnan_argsv;
-        }
-
         switch (c) {
  
             /* STRINGS */
  
-       case 'c':
-           if (vectorize)
-               goto unknown;
-           uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv);
-           if ((uv > 255 ||
-                (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
-               && !IN_BYTES)
-            {
-                assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
-               eptr = ebuf;
-               elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
-               is_utf8 = TRUE;
-           }
-           else {
-               c = (char)uv;
-               eptr = &c;
-               elen = 1;
-           }
-           goto string;
-
         case 's':
             if (vectorize)
                 goto unknown;
@@ -12379,9 +12261,98 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         case 'p':
             if (alt || vectorize)
                 goto unknown;
+
+            /* %p extensions:
+             *
+             * "%...p" is normally treated like "%...x", except that the
+             * number to print is the SV's address (or a pointer address
+             * for C-ish sprintf).
+             *
+             * However, the C-ish sprintf variant allows a few special
+             * extensions. These are currently:
+             *
+             * %-p       (SVf)  Like %s, but gets the string from an SV*
+             *                  arg rather than a char* arg.
+             *                  (This was previously %_).
+             *
+             * %-<num>p         Ditto but like %.<num>s (i.e. num is max width)
+             *
+             * %2p       (HEKf) Like %s, but using the key string in a HEK
+             *
+             * %3p       (HEKf256) Ditto but like %.256s
+             *
+             * %d%lu%4p  (UTF8f) A utf8 string. Consumes 3 args:
+             *                       (cBOOL(utf8), len, string_buf).
+             *                   It's handled by the "case 'd'" branch
+             *                   rather than here.
+             *
+             * %<num>p   where num is 1 or > 4: reserved for future
+             *           extensions. Warns, but then is treated as a
+             *           general %p (print hex address) format.
+             */
+
+            if (   args
+                && !intsize
+                && !fill
+                && !plus
+                && !has_precis
+                && !asterisk
+                && !used_explicit_ix
+            ) {
+                if (left) {                    /* %-p (SVf), %-NNNp */
+                    if (width) {
+                        precis = width;
+                        has_precis = TRUE;
+                    }
+                    argsv = MUTABLE_SV(va_arg(*args, void*));
+                    eptr = SvPV_const(argsv, elen);
+                    if (DO_UTF8(argsv))
+                        is_utf8 = TRUE;
+                    width = 0;
+                    goto string;
+                }
+                else if (width == 2 || width == 3) {   /* HEKf, HEKf256 */
+                    HEK * const hek = va_arg(*args, HEK *);
+                    eptr = HEK_KEY(hek);
+                    elen = HEK_LEN(hek);
+                    if (HEK_UTF8(hek))
+                        is_utf8 = TRUE;
+                    if (width == 3) {
+                        precis = 256;
+                        has_precis = TRUE;
+                    }
+                    width = 0;
+                    goto string;
+                }
+                else if (width) {
+                    Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+                         "internal %%<num>p might conflict with future printf extensions");
+                }
+            }
+
+            /* treat as normal %...p */
+
             uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
             base = 16;
-           goto integer;
+           goto do_integer;
+
+       case 'c':
+           if (vectorize)
+               goto unknown;
+            /* Ignore any size specifiers, since they're not documented as
+             * being allowed for %c (ideally we should warn on e.g. '%hc').
+             * Setting a default intsize, along with a positive
+             * (which signals unsigned) base, causes, for C-ish use, the
+             * va_arg to be interpreted as as unsigned int, when it's
+             * actually signed, which will convert -ve values to high +ve
+             * values. Note that unlike the libc %c, values > 255 will
+             * convert to high unicode points rather than being truncated
+             * to 8 bits. For perlish use, it will do SvUV(argsv), which
+             * will again convert -ve args to high -ve values.
+             */
+            intsize = 0;
+            base = 1; /* special value that indicates we're doing a 'c' */
+            goto get_int_arg_val;
  
         case 'D':
  #ifdef IV_IS_QUAD
@@ -12389,77 +12360,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  #else
             intsize = 'l';
  #endif
-           /* FALLTHROUGH */
+            base = -10;
+            goto get_int_arg_val;
+
         case 'd':
-       case 'i':
-           if (vectorize) {
-               STRLEN ulen;
-               if (!veclen)
-                    goto donevalidconversion;
-               if (vec_utf8)
-                   uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
-                                       UTF8_ALLOW_ANYUV);
-               else {
-                   uv = *vecstr;
-                   ulen = 1;
-               }
-               vecstr += ulen;
-               veclen -= ulen;
-               if (plus)
-                    esignbuf[esignlen++] = plus;
-           }
-           else if (args) {
-               switch (intsize) {
-               case 'c':       iv = (char)va_arg(*args, int); break;
-               case 'h':       iv = (short)va_arg(*args, int); break;
-               case 'l':       iv = va_arg(*args, long); break;
-               case 'V':       iv = va_arg(*args, IV); break;
-               case 'z':       iv = va_arg(*args, SSize_t); break;
-#ifdef HAS_PTRDIFF_T
-               case 't':       iv = va_arg(*args, ptrdiff_t); break;
-#endif
-               default:        iv = va_arg(*args, int); break;
-#ifdef I_STDINT
-               case 'j':       iv = va_arg(*args, intmax_t); break;
-#endif
-               case 'q':
-#if IVSIZE >= 8
-                               iv = va_arg(*args, Quad_t); break;
-#else
-                               goto unknown;
-#endif
-               }
-           }
-           else {
-               IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
-               switch (intsize) {
-               case 'c':       iv = (char)tiv; break;
-               case 'h':       iv = (short)tiv; break;
-               case 'l':       iv = (long)tiv; break;
-               case 'V':
-               default:        iv = tiv; break;
-               case 'q':
-#if IVSIZE >= 8
-                               iv = (Quad_t)tiv; break;
-#else
-                               goto unknown;
-#endif
-               }
-           }
-           if ( !vectorize )   /* we already set uv above */
-           {
-               if (iv >= 0) {
-                   uv = iv;
-                   if (plus)
-                       esignbuf[esignlen++] = plus;
-               }
-               else {
-                   uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
-                   esignbuf[esignlen++] = '-';
-               }
+            /* probably just a plain %d, but it might be the start of the
+             * special UTF8f format, which usually looks something like
+             * "%d%lu%4p" (the lu may vary by platform)
+             */
+            assert((UTF8f)[0] == 'd');
+            assert((UTF8f)[1] == '%');
+
+            if (   args              /* UTF8f only valid for C-ish sprintf */
+                 && q == fmtstart + 1 /* plain %d, not %....d */
+                 && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
+                 && *q == '%'
+                 && strnEQ(q + 1, UTF8f + 2, sizeof(UTF8f) - 3))
+            {
+               /* The argument has already gone through cBOOL, so the cast
+                  is safe. */
+               is_utf8 = (bool)va_arg(*args, int);
+               elen = va_arg(*args, UV);
+                /* if utf8 length is larger than 0x7ffff..., then it might
+                 * have been a signed value that wrapped */
+                if (elen  > ((~(STRLEN)0) >> 1)) {
+                    assert(0); /* in DEBUGGING build we want to crash */
+                    elen = 0; /* otherwise we want to treat this as an empty string */
+                }
+               eptr = va_arg(*args, char *);
+               q += sizeof(UTF8f) - 2;
+               goto string;
             }
-           base = 10;
-           goto integer;
+
+           /* FALLTHROUGH */
+       case 'i':
+            base = -10;
+            goto get_int_arg_val;
  
         case 'U':
  #ifdef IV_IS_QUAD
@@ -12470,12 +12406,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* FALLTHROUGH */
         case 'u':
             base = 10;
-           goto uns_integer;
+           goto get_int_arg_val;
  
         case 'B':
         case 'b':
             base = 2;
-           goto uns_integer;
+           goto get_int_arg_val;
  
         case 'O':
  #ifdef IV_IS_QUAD
@@ -12486,16 +12422,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* FALLTHROUGH */
         case 'o':
             base = 8;
-           goto uns_integer;
+           goto get_int_arg_val;
  
         case 'X':
         case 'x':
             base = 16;
  
-       uns_integer:
+          get_int_arg_val:
+
             if (vectorize) {
                 STRLEN ulen;
-       vector:
+
+                if (base < 0) {
+                    base = -base;
+                    if (plus)
+                         esignbuf[esignlen++] = plus;
+                }
+
+             vector:
                 if (!veclen)
                      goto donevalidconversion;
                 if (vec_utf8)
@@ -12508,46 +12452,119 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 vecstr += ulen;
                 veclen -= ulen;
             }
-           else if (args) {
-               switch (intsize) {
-               case 'c':  uv = (unsigned char)va_arg(*args, unsigned); break;
-               case 'h':  uv = (unsigned short)va_arg(*args, unsigned); break;
-               case 'l':  uv = va_arg(*args, unsigned long); break;
-               case 'V':  uv = va_arg(*args, UV); break;
-               case 'z':  uv = va_arg(*args, Size_t); break;
+           else {
+                /* test arg for inf/nan. This can trigger an unwanted
+                 * 'str' overload, so manually force 'num' overload first
+                 * if necessary */
+                if (argsv) {
+                    SvGETMAGIC(argsv);
+                    if (UNLIKELY(SvAMAGIC(argsv)))
+                        argsv = sv_2num(argsv);
+                    if (UNLIKELY(isinfnansv(argsv)))
+                        goto handle_infnan_argsv;
+                }
+
+                if (base < 0) {
+                    /* signed int type */
+                    base = -base;
+                    if (args) {
+                        switch (intsize) {
+                        case 'c':  iv = (char)va_arg(*args, int);  break;
+                        case 'h':  iv = (short)va_arg(*args, int); break;
+                        case 'l':  iv = va_arg(*args, long);       break;
+                        case 'V':  iv = va_arg(*args, IV);         break;
+                        case 'z':  iv = va_arg(*args, SSize_t);    break;
  #ifdef HAS_PTRDIFF_T
-               case 't':  uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
+                        case 't':  iv = va_arg(*args, ptrdiff_t);  break;
  #endif
+                        default:   iv = va_arg(*args, int);        break;
  #ifdef I_STDINT
-               case 'j':  uv = va_arg(*args, uintmax_t); break;
+                        case 'j':  iv = va_arg(*args, intmax_t);   break;
  #endif
-               default:   uv = va_arg(*args, unsigned); break;
-               case 'q':
+                        case 'q':
  #if IVSIZE >= 8
-                          uv = va_arg(*args, Uquad_t); break;
+                                   iv = va_arg(*args, Quad_t);     break;
  #else
-                          goto unknown;
+                                   goto unknown;
  #endif
-               }
-           }
-           else {
-               UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
-               switch (intsize) {
-               case 'c':       uv = (unsigned char)tuv; break;
-               case 'h':       uv = (unsigned short)tuv; break;
-               case 'l':       uv = (unsigned long)tuv; break;
-               case 'V':
-               default:        uv = tuv; break;
-               case 'q':
+                        }
+                    }
+                    else {
+                        IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
+                        switch (intsize) {
+                        case 'c':  iv = (char)tiv;   break;
+                        case 'h':  iv = (short)tiv;  break;
+                        case 'l':  iv = (long)tiv;   break;
+                        case 'V':
+                        default:   iv = tiv;         break;
+                        case 'q':
  #if IVSIZE >= 8
-                               uv = (Uquad_t)tuv; break;
+                                   iv = (Quad_t)tiv; break;
  #else
-                               goto unknown;
+                                   goto unknown;
  #endif
-               }
-           }
+                        }
+                    }
+
+                    /* now convert iv to uv */
+                    if (iv >= 0) {
+                        uv = iv;
+                        if (plus)
+                            esignbuf[esignlen++] = plus;
+                    }
+                    else {
+                        uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
+                        esignbuf[esignlen++] = '-';
+                    }
+                }
+                else {
+                    /* unsigned int type */
+                    if (args) {
+                        switch (intsize) {
+                        case 'c': uv = (unsigned char)va_arg(*args, unsigned);
+                                  break;
+                        case 'h': uv = (unsigned short)va_arg(*args, unsigned);
+                                  break;
+                        case 'l': uv = va_arg(*args, unsigned long); break;
+                        case 'V': uv = va_arg(*args, UV);            break;
+                        case 'z': uv = va_arg(*args, Size_t);        break;
+#ifdef HAS_PTRDIFF_T
+                                  /* will sign extend, but there is no
+                                   * uptrdiff_t, so oh well */
+                        case 't': uv = va_arg(*args, ptrdiff_t);     break;
+#endif
+#ifdef I_STDINT
+                        case 'j': uv = va_arg(*args, uintmax_t);     break;
+#endif
+                        default:  uv = va_arg(*args, unsigned);      break;
+                        case 'q':
+#if IVSIZE >= 8
+                                  uv = va_arg(*args, Uquad_t);       break;
+#else
+                                  goto unknown;
+#endif
+                        }
+                    }
+                    else {
+                        UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
+                        switch (intsize) {
+                        case 'c': uv = (unsigned char)tuv;  break;
+                        case 'h': uv = (unsigned short)tuv; break;
+                        case 'l': uv = (unsigned long)tuv;  break;
+                        case 'V':
+                        default:  uv = tuv;                 break;
+                        case 'q':
+#if IVSIZE >= 8
+                                  uv = (Uquad_t)tuv;        break;
+#else
+                                  goto unknown;
+#endif
+                        }
+                    }
+                }
+            }
  
-       integer:
+       do_integer:
             {
                 char *ptr = ebuf + sizeof ebuf;
                 bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */
@@ -12584,6 +12601,28 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                         esignbuf[esignlen++] = c;
                     }
                     break;
+
+               case 1:
+                    /* special-case: base 1 indicates a 'c' format:
+                     * we use the common code for extracting a uv,
+                     * but handle that value differently here than
+                     * all the other int types */
+                    if ((uv > 255 ||
+                         (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
+                        && !IN_BYTES)
+                    {
+                        assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
+                        eptr = ebuf;
+                        elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+                        is_utf8 = TRUE;
+                    }
+                    else {
+                        c = (char)uv;
+                        eptr = &c;
+                        elen = 1;
+                    }
+                    goto string;
+
                 default:                /* it had better be ten or less */
                     do {
                         dig = uv % base;
@@ -12600,9 +12639,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                              && !(base == 8 && alt)) /* "%#.0o" prints "0" */
                         elen = 0;
  
-               /* a precision nullifies the 0 flag. */
-                   if (fill == '0')
-                       fill = ' ';
+                    /* a precision nullifies the 0 flag. */
+                    fill = FALSE;
                 }
             }
             break;
@@ -12618,9 +12656,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         case 'a': case 'A':
  
          {
-            bool   can_be_special; /* candidate for special-case-handling */
              STRLEN radix_len;  /* SvCUR(PL_numeric_radix_sv) */
              STRLEN float_need; /* what PL_efloatsize needs to become */
+            bool hexfp;        /* hexadecimal floating point? */
  
              vcatpvfn_long_double_t fv;
              NV                     nv;
@@ -12715,17 +12753,16 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  break;
              }
  
-            /* a candidate for special-casing: %.0f and %.NNNg */
-            can_be_special = ( !(width || left || plus || alt)
-                              && fill != '0'
-                              && has_precis
-                              && intsize != 'q');
-
              /* special-case "%.0f" */
-            if (can_be_special && c == 'f' && !precis) {
-                if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
-                    break;
-            }
+            if (   c == 'f'
+                && !precis
+                && has_precis
+                && !(width || left || plus || alt)
+                && !fill
+                && intsize != 'q'
+                && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
+            )
+                goto float_concat_no_utf8;
  
              /* Determine the buffer size needed for the various
               * floating-point formats.
@@ -12772,20 +12809,30 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
               * below, or implicitly, via an snprintf() variant.
               * Note also things like ps_AF.utf8 which has
               * "\N{ARABIC DECIMAL SEPARATOR} as a radix point */
-            STORE_LC_NUMERIC_SET_TO_NEEDED();
-            if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+            if (!lc_numeric_set) {
+                /* only set once and reuse in-locale value on subsequent
+                 * iterations.
+                 * XXX what happens if we die in an eval?
+                 */
+                STORE_LC_NUMERIC_SET_TO_NEEDED();
+                lc_numeric_set = TRUE;
+            }
+
+            if (PL_numeric_radix_sv) {
+                assert(IN_LC(LC_NUMERIC));
                  radix_len  = SvCUR(PL_numeric_radix_sv);
                  /* note that this will convert the output to utf8 even if
                   * if the radix point didn't get output */
                  is_utf8 = SvUTF8(PL_numeric_radix_sv);
              }
-            RESTORE_LC_NUMERIC();
  #endif
              /* this can't wrap unless PL_numeric_radix_sv is a string
               * consuming virtually all the 32-bit or 64-bit address space
               */
             float_need += radix_len;
  
+            hexfp = FALSE;
+
             if (isALPHA_FOLD_EQ(c, 'f')) {
                  /* Determine how many digits before the radix point
                   * might be emitted.  frexp() (or frexpl) has some
@@ -12837,6 +12884,25 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                      float_need += digits;
                  }
             }
+            /* special-case "%.<number>g" if it will fit in ebuf */
+            else if (c == 'g'
+                && precis   /* See earlier comment about buggy Gconvert
+                               when digits, aka precis, is 0  */
+                && has_precis
+                /* check, in manner not involving wrapping, that it will
+                 * fit in ebuf  */
+                && float_need < sizeof(ebuf)
+                && sizeof(ebuf) - float_need > precis
+                && !(width || left || plus || alt)
+                && !fill
+                && intsize != 'q'
+            ) {
+                SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis);
+                elen = strlen(ebuf);
+                eptr = ebuf;
+                goto float_concat;
+           }
+
  
              {
                  STRLEN pr = has_precis ? precis : 6; /* known default */
@@ -12867,21 +12933,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 PL_efloatbuf[0] = '\0';
             }
  
-            /* special-case "%.<number>g" */
-            if (can_be_special) {
-               /* See earlier comment about buggy Gconvert when digits,
-                  aka precis is 0  */
-               if ( c == 'g' && precis ) {
-                    STORE_LC_NUMERIC_SET_TO_NEEDED();
-                    SNPRINTF_G(fv, PL_efloatbuf, PL_efloatsize, precis);
-                   /* May return an empty string for digits==0 */
-                   if (*PL_efloatbuf) {
-                       elen = strlen(PL_efloatbuf);
-                       goto float_converted;
-                   }
-                }
-           }
-
              if (UNLIKELY(hexfp)) {
                  elen = S_format_hexfp(aTHX_ PL_efloatbuf, PL_efloatsize, c,
                                  nv, fv, has_precis, precis, width,
@@ -12919,8 +12970,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                     base = width;
                     do { *--ptr = '0' + (base % 10); } while (base /= 10);
                 }
-               if (fill == '0')
-                   *--ptr = fill;
+               if (fill)
+                   *--ptr = '0';
                 if (left)
                     *--ptr = '-';
                 if (plus)
@@ -12933,8 +12984,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  * where printf() taints but print($float) doesn't.
                  * --jhi */
  
-                STORE_LC_NUMERIC_SET_TO_NEEDED();
-
                  /* hopefully the above makes ptr a very constrained format
                   * that is safe to use, even though it's not literal */
                  GCC_DIAG_IGNORE(-Wformat-nonliteral);
@@ -12963,12 +13012,51 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  GCC_DIAG_RESTORE;
             }
  
-       float_converted:
             eptr = PL_efloatbuf;
-            assert((IV)elen > 0); /* here zero elen is bad */
  
+         float_concat:
  
-           break;
+            /* Since floating-point formats do their own formatting and
+             * padding, we skip the main block of code at the end of this
+             * loop which handles appending eptr to sv, and do our own
+             * stripped-down version */
+
+            /* floating-point formats only get is_utf8 if the radix point
+             * is utf8. All other characters in the string are < 128
+             * and so can be safely appended to both a non-utf8 and utf8
+             * string as-is.
+             */
+            if (is_utf8 && !has_utf8) {
+                sv_utf8_upgrade(sv);
+                has_utf8 = TRUE;
+            }
+
+         float_concat_no_utf8:
+
+            assert(!zeros);
+            assert(!esignlen);
+            assert(!vectorize);
+            assert(elen);
+            assert(elen >= width);
+
+
+            {
+                /* unrolled Perl_sv_catpvn */
+                STRLEN need = elen + SvCUR(sv) + 1;
+                char *end;
+                /* can't wrap as both elen and SvCUR() are allocated in
+                 * memory and together can't consume all the address space
+                 */
+                assert(need > elen);
+                SvGROW(sv, need);
+                end = SvEND(sv);
+                Copy(eptr, end, elen, char);
+                end += elen;
+                *end = '\0';
+                SvCUR_set(sv, need - 1);
+            }
+
+            goto donevalidconversion;
          }
  
             /* SPECIAL */
@@ -12978,6 +13066,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  int i;
                  if (vectorize)
                      goto unknown;
+                /* XXX ideally we should warn if any flags etc have been
+                 * set, e.g. "%-4.5n" */
+                /* XXX if sv was originally non-utf8 with a char in the
+                 * range 0x80-0xff, then if it got upgraded, we should
+                 * calculate char len rather than byte len here */
                  i = SvCUR(sv) - origlen;
                  if (args) {
                      switch (intsize) {
@@ -13001,8 +13094,13 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  #endif
                      }
                  }
-                else
+                else {
+                    if (arg_missing)
+                        Perl_croak_nocontext(
+                            "Missing argument for %%n in %s",
+                                PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
                      sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
+                }
                  goto donevalidconversion;
              }
  
@@ -13103,16 +13201,16 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
              SvGROW(sv, need);
  
              p = SvEND(sv);
-            if (esignlen && fill == '0') {
+            if (esignlen && fill) {
                  int i;
                  for (i = 0; i < (int)esignlen; i++)
                      *p++ = esignbuf[i];
              }
              if (gap && !left) {
-                memset(p, fill, gap);
+                memset(p, (fill ? '0' : ' '), gap);
                  p += gap;
              }
-            if (esignlen && fill != '0') {
+            if (esignlen && !fill) {
                  int i;
                  for (i = 0; i < (int)esignlen; i++)
                      *p++ = esignbuf[i];