Perl_sv_vcatpvfn_flags: move Inf handling for ints

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index 48de19c..b33792a 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -1525,6 +1525,11 @@ Perl_sv_backoff(SV *const sv)
      return;
  }
  
+
+/* forward declaration */
+static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
+
+
  /*
  =for apidoc sv_grow
  
@@ -1535,7 +1540,6 @@ Use the C<SvGROW> wrapper instead.
  =cut
  */
  
-static void S_sv_uncow(pTHX_ SV * const sv, const U32 flags);
  
  char *
  Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
@@ -1651,6 +1655,7 @@ Perl_sv_setiv(pTHX_ SV *const sv, const IV i)
         /* diag_listed_as: Can't coerce %s to %s in %s */
         Perl_croak(aTHX_ "Can't coerce %s to integer in %s", sv_reftype(sv,0),
                    OP_DESC(PL_op));
+        NOT_REACHED; /* NOTREACHED */
          break;
      default: NOOP;
      }
@@ -1763,6 +1768,7 @@ Perl_sv_setnv(pTHX_ SV *const sv, const NV num)
         /* diag_listed_as: Can't coerce %s to %s in %s */
         Perl_croak(aTHX_ "Can't coerce %s to number in %s", sv_reftype(sv,0),
                    OP_DESC(PL_op));
+        NOT_REACHED; /* NOTREACHED */
          break;
      default: NOOP;
      }
@@ -2915,8 +2921,8 @@ S_infnan_2pv(NV nv, char* buffer, size_t maxlen, char plus) {
        return 0;
      }
      assert((s == buffer + 3) || (s == buffer + 4));
-    *s++ = 0;
-    return s - buffer - 1; /* -1: excluding the zero byte */
+    *s = 0;
+    return s - buffer;
  }
  
  /*
@@ -3141,8 +3147,8 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
                      STORE_LC_NUMERIC_SET_TO_NEEDED();
  
                      local_radix = PL_numeric_local && PL_numeric_radix_sv;
-                    if (local_radix && SvLEN(PL_numeric_radix_sv) > 1) {
-                        size += SvLEN(PL_numeric_radix_sv) - 1;
+                    if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) {
+                        size += SvCUR(PL_numeric_radix_sv) - 1;
                          s = SvGROW_mutable(sv, size);
                      }
  
@@ -3179,6 +3185,8 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
         assert(SvPOK(buffer));
         if (SvUTF8(buffer))
             SvUTF8_on(sv);
+        else
+            SvUTF8_off(sv);
         if (lp)
             *lp = SvCUR(buffer);
         return SvPVX(buffer);
@@ -4792,7 +4800,7 @@ Doesn't handle set magic.
  The perl equivalent is C<$sv = undef;>. Note that it doesn't free any string
  buffer, unlike C<undef $sv>.
  
-Introduced in perl 5.26.0.
+Introduced in perl 5.25.12.
  
  =cut
  */
@@ -4808,8 +4816,13 @@ Perl_sv_set_undef(pTHX_ SV *sv)
  
      if (type <= SVt_IV) {
          assert(!SvGMAGICAL(sv));
-        if (SvREADONLY(sv))
+        if (SvREADONLY(sv)) {
+            /* does undeffing PL_sv_undef count as modifying a read-only
+             * variable? Some XS code does this */
+            if (sv == &PL_sv_undef)
+                return;
              Perl_croak_no_modify();
+        }
  
          if (SvROK(sv)) {
              if (SvWEAKREF(sv))
@@ -4834,8 +4847,8 @@ Perl_sv_set_undef(pTHX_ SV *sv)
      if (isGV_with_GP(sv))
          Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
                         "Undefined value assigned to typeglob");
-
-    SvOK_off(sv);
+    else
+        SvOK_off(sv);
  }
  
  
@@ -4980,6 +4993,8 @@ Perl_sv_setpvn(pTHX_ SV *const sv, const char *const ptr, const STRLEN len)
      PERL_ARGS_ASSERT_SV_SETPVN;
  
      SV_CHECK_THINKFIRST_COW_DROP(sv);
+    if (isGV_with_GP(sv))
+       Perl_croak_no_modify();
      if (!ptr) {
         (void)SvOK_off(sv);
         return;
@@ -5200,28 +5215,6 @@ Perl_sv_usepvn_flags(pTHX_ SV *const sv, char *ptr, const STRLEN len, const U32
         SvSETMAGIC(sv);
  }
  
-/*
-=for apidoc sv_force_normal_flags
-
-Undo various types of fakery on an SV, where fakery means
-"more than" a string: if the PV is a shared string, make
-a private copy; if we're a ref, stop refing; if we're a glob, downgrade to
-an C<xpvmg>; if we're a copy-on-write scalar, this is the on-write time when
-we do the copy, and is also used locally; if this is a
-vstring, drop the vstring magic.  If C<SV_COW_DROP_PV> is set
-then a copy-on-write scalar drops its PV buffer (if any) and becomes
-C<SvPOK_off> rather than making a copy.  (Used where this
-scalar is about to be set to some other value.)  In addition,
-the C<flags> parameter gets passed to C<sv_unref_flags()>
-when unreffing.  C<sv_force_normal> calls this function
-with flags set to 0.
-
-This function is expected to be used to signal to perl that this SV is
-about to be written to, and any extra book-keeping needs to be taken care
-of.  Hence, it croaks on read-only values.
-
-=cut
-*/
  
  static void
  S_sv_uncow(pTHX_ SV * const sv, const U32 flags)
@@ -5301,6 +5294,30 @@ S_sv_uncow(pTHX_ SV * const sv, const U32 flags)
      }
  }
  
+
+/*
+=for apidoc sv_force_normal_flags
+
+Undo various types of fakery on an SV, where fakery means
+"more than" a string: if the PV is a shared string, make
+a private copy; if we're a ref, stop refing; if we're a glob, downgrade to
+an C<xpvmg>; if we're a copy-on-write scalar, this is the on-write time when
+we do the copy, and is also used locally; if this is a
+vstring, drop the vstring magic.  If C<SV_COW_DROP_PV> is set
+then a copy-on-write scalar drops its PV buffer (if any) and becomes
+C<SvPOK_off> rather than making a copy.  (Used where this
+scalar is about to be set to some other value.)  In addition,
+the C<flags> parameter gets passed to C<sv_unref_flags()>
+when unreffing.  C<sv_force_normal> calls this function
+with flags set to 0.
+
+This function is expected to be used to signal to perl that this SV is
+about to be written to, and any extra book-keeping needs to be taken care
+of.  Hence, it croaks on read-only values.
+
+=cut
+*/
+
  void
  Perl_sv_force_normal_flags(pTHX_ SV *const sv, const U32 flags)
  {
@@ -6321,7 +6338,7 @@ C<SvPV_force_flags> that applies to C<bigstr>.
  */
  
  void
-Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *const little, const STRLEN littlelen, const U32 flags)
+Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN len, const char *little, const STRLEN littlelen, const U32 flags)
  {
      char *big;
      char *mid;
@@ -6334,6 +6351,16 @@ Perl_sv_insert_flags(pTHX_ SV *const bigstr, const STRLEN offset, const STRLEN l
  
      SvPV_force_flags(bigstr, curlen, flags);
      (void)SvPOK_only_UTF8(bigstr);
+
+    if (little >= SvPVX(bigstr) &&
+        little < SvPVX(bigstr) + (SvLEN(bigstr) ? SvLEN(bigstr) : SvCUR(bigstr))) {
+        /* little is a pointer to within bigstr, since we can reallocate bigstr,
+           or little...little+littlelen might overlap offset...offset+len we make a copy
+        */
+        little = savepvn(little, littlelen);
+        SAVEFREEPV(little);
+    }
+
      if (offset + len > curlen) {
         SvGROW(bigstr, offset+len+1);
         Zero(SvPVX(bigstr)+curlen, offset+len-curlen, char);
@@ -8653,13 +8680,27 @@ Perl_sv_gets(pTHX_ SV *const sv, PerlIO *const fp, I32 append)
         if (cnt > 0) {
              /* if there is a separator */
             if (rslen) {
-                /* loop until we hit the end of the read-ahead buffer */
-               while (cnt > 0) {                    /* this     |  eat */
-                    /* scan forward copying and searching for rslast as we go */
-                   cnt--;
-                   if ((*bp++ = *ptr++) == rslast)  /* really   |  dust */
-                       goto thats_all_folks;        /* screams  |  sed :-) */
-               }
+                /* find next rslast */
+                STDCHAR *p;
+
+                /* shortcut common case of blank line */
+                cnt--;
+                if ((*bp++ = *ptr++) == rslast)
+                    goto thats_all_folks;
+
+                p = (STDCHAR *)memchr(ptr, rslast, cnt);
+                if (p) {
+                    SSize_t got = p - ptr + 1;
+                    Copy(ptr, bp, got, STDCHAR);
+                    ptr += got;
+                    bp  += got;
+                    cnt -= got;
+                    goto thats_all_folks;
+                }
+                Copy(ptr, bp, cnt, STDCHAR);
+                ptr += cnt;
+                bp  += cnt;
+                cnt = 0;
             }
             else {
                  /* no separator, slurp the full buffer */
@@ -9304,7 +9345,14 @@ SV is set to 1.  If C<len> is zero, Perl will compute the length using
  C<strlen()>, (which means if you use this option, that C<s> can't have embedded
  C<NUL> characters and has to have a terminating C<NUL> byte).
  
-For efficiency, consider using C<newSVpvn> instead.
+This function can cause reliability issues if you are likely to pass in
+empty strings that are not null terminated, because it will run
+strlen on the string and potentially run past valid memory.
+
+Using L</newSVpvn> is a safer alternative for non C<NUL> terminated strings.
+For string literals use L</newSVpvs> instead.  This function will work fine for
+C<NUL> terminated strings, but if you want to avoid the if statement on whether
+to call C<strlen> use C<newSVpvn> instead (calling C<strlen> yourself).
  
  =cut
  */
@@ -10952,6 +11000,12 @@ S_expect_number(pTHX_ char **const pattern)
      return var;
  }
  
+/* Implement a fast "%.0f": given a pointer to the end of a buffer (caller
+ * ensures it's big enough), back fill it with the rounded integer part of
+ * nv. Returns ptr to start of string, and sets *len to its length.
+ * Returns NULL if not convertible.
+ */
+
  STATIC char *
  S_F0convert(NV nv, char *const endbuf, STRLEN *const len)
  {
@@ -10960,11 +11014,7 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len)
  
      PERL_ARGS_ASSERT_F0CONVERT;
  
-    if (UNLIKELY(Perl_isinfnan(nv))) {
-        STRLEN n = S_infnan_2pv(nv, endbuf - *len, *len, 0);
-        *len = n;
-        return endbuf - n;
-    }
+    assert(!Perl_isinfnan(nv));
      if (neg)
         nv = -nv;
      if (nv < UV_MAX) {
@@ -10986,29 +11036,6 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len)
  }
  
  
-/*
-=for apidoc sv_vcatpvfn
-
-=for apidoc sv_vcatpvfn_flags
-
-Processes its arguments like C<vsprintf> and appends the formatted output
-to an SV.  Uses an array of SVs if the C-style variable argument list is
-missing (C<NULL>). Argument reordering (using format specifiers like C<%2$d>
-or C<%*2$d>) is supported only when using an array of SVs; using a C-style
-C<va_list> argument list with a format string that uses argument reordering
-will yield an exception.
-
-When running with taint checks enabled, indicates via
-C<maybe_tainted> if results are untrustworthy (often due to the use of
-locales).
-
-If called as C<sv_vcatpvfn> or flags has the C<SV_GMAGIC> bit set, calls get magic.
-
-Usually used via one of its frontends C<sv_vcatpvf> and C<sv_vcatpvf_mg>.
-
-=cut
-*/
-
  #define VECTORIZE_ARGS vecsv = va_arg(*args, SV*);\
                         vecstr = (U8*)SvPV_const(vecsv,veclen);\
                         vec_utf8 = DO_UTF8(vecsv);
@@ -11024,6 +11051,34 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
      sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC);
  }
  
+
+/* For the vcatpvfn code, we need a long double target in case
+ * HAS_LONG_DOUBLE, even without USE_LONG_DOUBLE, so that we can printf
+ * with long double formats, even without NV being long double.  But we
+ * call the target 'fv' instead of 'nv', since most of the time it is not
+ * (most compilers these days recognize "long double", even if only as a
+ * synonym for "double").
+*/
+#if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE && \
+       defined(PERL_PRIgldbl) && !defined(USE_QUADMATH)
+#  define VCATPVFN_FV_GF PERL_PRIgldbl
+#  if defined(__VMS) && defined(__ia64) && defined(__IEEE_FLOAT)
+       /* Work around breakage in OTS$CVT_FLOAT_T_X */
+#    define VCATPVFN_NV_TO_FV(nv,fv)                    \
+            STMT_START {                                \
+                double _dv = nv;                        \
+                fv = Perl_isnan(_dv) ? LDBL_QNAN : _dv; \
+            } STMT_END
+#  else
+#    define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv)
+#  endif
+   typedef long double vcatpvfn_long_double_t;
+#else
+#  define VCATPVFN_FV_GF NVgf
+#  define VCATPVFN_NV_TO_FV(nv,fv) (fv)=(nv)
+   typedef NV vcatpvfn_long_double_t;
+#endif
+
  #ifdef LONGDOUBLE_DOUBLEDOUBLE
  /* The first double can be as large as 2**1023, or '1' x '0' x 1023.
   * The second double can be as small as 2**-1074, or '0' x 1073 . '1'.
@@ -11073,7 +11128,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
  #  define HEXTRACT_MIX_ENDIAN
  #endif
  
-/* S_hextract() is a helper for Perl_sv_vcatpvfn_flags, for extracting
+/* S_hextract() is a helper for S_format_hexfp, for extracting
   * the hexadecimal values (for %a/%A).  The nv is the NV where the value
   * are being extracted from (either directly from the long double in-memory
   * presentation, or from the uquad computed via frexp+ldexp).  frexp also
@@ -11384,6 +11439,297 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal,
      return v;
  }
  
+
+/* S_format_hexfp(): helper function for Perl_sv_vcatpvfn_flags().
+ *
+ * Processes the %a/%A hexadecimal floating-point format, since the
+ * built-in snprintf()s which are used for most of the f/p formats, don't
+ * universally handle %a/%A.
+ * Populates buf of length bufsize, and returns the length of the created
+ * string.
+ * The rest of the args have the same meaning as the local vars of the
+ * same name within Perl_sv_vcatpvfn_flags().
+ *
+ * It assumes the caller has already done STORE_LC_NUMERIC_SET_TO_NEEDED();
+ */
+
+static STRLEN
+S_format_hexfp(pTHX_ char * const buf, const STRLEN bufsize, const char c,
+                    const NV nv, const vcatpvfn_long_double_t fv,
+                    bool has_precis, STRLEN precis, STRLEN width,
+                    bool alt, char plus, bool left, bool fill)
+{
+    /* Hexadecimal floating point. */
+    char* p = buf;
+    U8 vhex[VHEX_SIZE];
+    U8* v = vhex; /* working pointer to vhex */
+    U8* vend; /* pointer to one beyond last digit of vhex */
+    U8* vfnz = NULL; /* first non-zero */
+    U8* vlnz = NULL; /* last non-zero */
+    U8* v0 = NULL; /* first output */
+    const bool lower = (c == 'a');
+    /* At output the values of vhex (up to vend) will
+     * be mapped through the xdig to get the actual
+     * human-readable xdigits. */
+    const char* xdig = PL_hexdigit;
+    int zerotail = 0; /* how many extra zeros to append */
+    int exponent = 0; /* exponent of the floating point input */
+    bool hexradix = FALSE; /* should we output the radix */
+    bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
+    bool negative = FALSE;
+    STRLEN elen;
+
+    /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf".
+     *
+     * For example with denormals, (assuming the vanilla
+     * 64-bit double): the exponent is zero. 1xp-1074 is
+     * the smallest denormal and the smallest double, it
+     * could be output also as 0x0.0000000000001p-1022 to
+     * match its internal structure. */
+
+    vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL);
+    S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend);
+
+#if NVSIZE > DOUBLESIZE
+#  ifdef HEXTRACT_HAS_IMPLICIT_BIT
+    /* In this case there is an implicit bit,
+     * and therefore the exponent is shifted by one. */
+    exponent--;
+#  else
+#    ifdef NV_X86_80_BIT
+    if (subnormal) {
+        /* The subnormals of the x86-80 have a base exponent of -16382,
+         * (while the physical exponent bits are zero) but the frexp()
+         * returned the scientific-style floating exponent.  We want
+         * to map the last one as:
+         * -16831..-16384 -> -16382 (the last normal is 0x1p-16382)
+         * -16835..-16388 -> -16384
+         * since we want to keep the first hexdigit
+         * as one of the [8421]. */
+        exponent = -4 * ( (exponent + 1) / -4) - 2;
+    } else {
+        exponent -= 4;
+    }
+#    endif
+    /* TBD: other non-implicit-bit platforms than the x86-80. */
+#  endif
+#endif
+
+    negative = fv < 0 || Perl_signbit(nv);
+    if (negative)
+        *p++ = '-';
+    else if (plus)
+        *p++ = plus;
+    *p++ = '0';
+    if (lower) {
+        *p++ = 'x';
+    }
+    else {
+        *p++ = 'X';
+        xdig += 16; /* Use uppercase hex. */
+    }
+
+    /* Find the first non-zero xdigit. */
+    for (v = vhex; v < vend; v++) {
+        if (*v) {
+            vfnz = v;
+            break;
+        }
+    }
+
+    if (vfnz) {
+        /* Find the last non-zero xdigit. */
+        for (v = vend - 1; v >= vhex; v--) {
+            if (*v) {
+                vlnz = v;
+                break;
+            }
+        }
+
+#if NVSIZE == DOUBLESIZE
+        if (fv != 0.0)
+            exponent--;
+#endif
+
+        if (subnormal) {
+#ifndef NV_X86_80_BIT
+          if (vfnz[0] > 1) {
+            /* IEEE 754 subnormals (but not the x86 80-bit):
+             * we want "normalize" the subnormal,
+             * so we need to right shift the hex nybbles
+             * so that the output of the subnormal starts
+             * from the first true bit.  (Another, equally
+             * valid, policy would be to dump the subnormal
+             * nybbles as-is, to display the "physical" layout.) */
+            int i, n;
+            U8 *vshr;
+            /* Find the ceil(log2(v[0])) of
+             * the top non-zero nybble. */
+            for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { }
+            assert(n < 4);
+            vlnz[1] = 0;
+            for (vshr = vlnz; vshr >= vfnz; vshr--) {
+              vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n);
+              vshr[0] >>= n;
+            }
+            if (vlnz[1]) {
+              vlnz++;
+            }
+          }
+#endif
+          v0 = vfnz;
+        } else {
+          v0 = vhex;
+        }
+
+        if (has_precis) {
+            U8* ve = (subnormal ? vlnz + 1 : vend);
+            SSize_t vn = ve - v0;
+            assert(vn >= 1);
+            if (precis < (Size_t)(vn - 1)) {
+                bool overflow = FALSE;
+                if (v0[precis + 1] < 0x8) {
+                    /* Round down, nothing to do. */
+                } else if (v0[precis + 1] > 0x8) {
+                    /* Round up. */
+                    v0[precis]++;
+                    overflow = v0[precis] > 0xF;
+                    v0[precis] &= 0xF;
+                } else { /* v0[precis] == 0x8 */
+                    /* Half-point: round towards the one
+                     * with the even least-significant digit:
+                     * 08 -> 0  88 -> 8
+                     * 18 -> 2  98 -> a
+                     * 28 -> 2  a8 -> a
+                     * 38 -> 4  b8 -> c
+                     * 48 -> 4  c8 -> c
+                     * 58 -> 6  d8 -> e
+                     * 68 -> 6  e8 -> e
+                     * 78 -> 8  f8 -> 10 */
+                    if ((v0[precis] & 0x1)) {
+                        v0[precis]++;
+                    }
+                    overflow = v0[precis] > 0xF;
+                    v0[precis] &= 0xF;
+                }
+
+                if (overflow) {
+                    for (v = v0 + precis - 1; v >= v0; v--) {
+                        (*v)++;
+                        overflow = *v > 0xF;
+                        (*v) &= 0xF;
+                        if (!overflow) {
+                            break;
+                        }
+                    }
+                    if (v == v0 - 1 && overflow) {
+                        /* If the overflow goes all the
+                         * way to the front, we need to
+                         * insert 0x1 in front, and adjust
+                         * the exponent. */
+                        Move(v0, v0 + 1, vn - 1, char);
+                        *v0 = 0x1;
+                        exponent += 4;
+                    }
+                }
+
+                /* The new effective "last non zero". */
+                vlnz = v0 + precis;
+            }
+            else {
+                zerotail =
+                  subnormal ? precis - vn + 1 :
+                  precis - (vlnz - vhex);
+            }
+        }
+
+        v = v0;
+        *p++ = xdig[*v++];
+
+        /* If there are non-zero xdigits, the radix
+         * is output after the first one. */
+        if (vfnz < vlnz) {
+          hexradix = TRUE;
+        }
+    }
+    else {
+        *p++ = '0';
+        exponent = 0;
+        zerotail = precis;
+    }
+
+    /* The radix is always output if precis, or if alt. */
+    if (precis > 0 || alt) {
+      hexradix = TRUE;
+    }
+
+    if (hexradix) {
+#ifndef USE_LOCALE_NUMERIC
+            *p++ = '.';
+#else
+            if (PL_numeric_radix_sv) {
+                STRLEN n;
+                const char* r = SvPV(PL_numeric_radix_sv, n);
+                assert(IN_LC(LC_NUMERIC));
+                Copy(r, p, n, char);
+                p += n;
+            }
+            else {
+                *p++ = '.';
+            }
+#endif
+    }
+
+    if (vlnz) {
+        while (v <= vlnz)
+            *p++ = xdig[*v++];
+    }
+
+    if (zerotail > 0) {
+      while (zerotail--) {
+        *p++ = '0';
+      }
+    }
+
+    elen = p - buf;
+    elen += my_snprintf(p, bufsize - elen,
+                        "%c%+d", lower ? 'p' : 'P',
+                        exponent);
+
+    if (elen < width) {
+        STRLEN gap = (STRLEN)(width - elen);
+        if (left) {
+            /* Pad the back with spaces. */
+            memset(buf + elen, ' ', gap);
+        }
+        else if (fill) {
+            /* Insert the zeros after the "0x" and the
+             * the potential sign, but before the digits,
+             * otherwise we end up with "0000xH.HHH...",
+             * when we want "0x000H.HHH..."  */
+            STRLEN nzero = gap;
+            char* zerox = buf + 2;
+            STRLEN nmove = elen - 2;
+            if (negative || plus) {
+                zerox++;
+                nmove--;
+            }
+            Move(zerox, zerox + nzero, nmove, char);
+            memset(zerox, fill ? '0' : ' ', nzero);
+        }
+        else {
+            /* Move it to the right. */
+            Move(buf, buf + gap,
+                 elen, char);
+            /* Pad the front with spaces. */
+            memset(buf, ' ', gap);
+        }
+        elen = width;
+    }
+    return elen;
+}
+
+
  /* Helper for sv_vcatpvfn_flags().  */
  #define FETCH_VCATPVFN_ARGUMENT(var, in_range, expr)   \
      STMT_START {                                       \
@@ -11396,6 +11742,35 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal,
      } STMT_END
  
  void
+
+
+/*
+=for apidoc sv_vcatpvfn
+
+=for apidoc sv_vcatpvfn_flags
+
+Processes its arguments like C<vsprintf> and appends the formatted output
+to an SV.  Uses an array of SVs if the C-style variable argument list is
+missing (C<NULL>). Argument reordering (using format specifiers like C<%2$d>
+or C<%*2$d>) is supported only when using an array of SVs; using a C-style
+C<va_list> argument list with a format string that uses argument reordering
+will yield an exception.
+
+When running with taint checks enabled, indicates via
+C<maybe_tainted> if results are untrustworthy (often due to the use of
+locales).
+
+If called as C<sv_vcatpvfn> or flags has the C<SV_GMAGIC> bit set, calls get magic.
+
+It assumes that pat has the same utf8-ness as sv.  It's the caller's
+responsibility to ensure that this is so.
+
+Usually used via one of its frontends C<sv_vcatpvf> and C<sv_vcatpvf_mg>.
+
+=cut
+*/
+
+
  Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
                         va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted,
                         const U32 flags)
@@ -11415,9 +11790,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
       * Plus 32: Playing safe. */
      char ebuf[IV_DIG * 4 + NV_DIG + 32];
      bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */
-    bool hexfp = FALSE; /* hexadecimal floating point? */
-
+#ifdef USE_LOCALE_NUMERIC
      DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
+    bool lc_numeric_set = FALSE; /* called STORE_LC_NUMERIC_SET_TO_NEEDED? */
+#endif
  
      PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
      PERL_UNUSED_ARG(maybe_tainted);
@@ -11428,6 +11804,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      /* no matter what, this is a string now */
      (void)SvPV_force_nomg(sv, origlen);
  
+    /* the code that scans for flags etc following a % relies on
+     * a '\0' being present to avoid falling off the end. Ideally that
+     * should be fixed */
+    assert(pat[patlen] == '\0');
+
      /* special-case "", "%s", and "%-p" (SVf - see below) */
      if (patlen == 0) {
         if (svmax && ckWARN(WARN_REDUNDANT))
@@ -11464,132 +11845,69 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      }
  
  #if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH)
-    /* special-case "%.<number>[gf]" */
-    if ( !args && patlen <= 5 && pat[0] == '%' && pat[1] == '.'
-        && (pat[patlen-1] == 'g' || pat[patlen-1] == 'f') ) {
-       unsigned digits = 0;
-       const char *pp;
-
-       pp = pat + 2;
-       while (*pp >= '0' && *pp <= '9')
-           digits = 10 * digits + (*pp++ - '0');
-
-       /* XXX: Why do this `svix < svmax` test? Couldn't we just
-          format the first argument and WARN_REDUNDANT if svmax > 1?
-          Munged by Nicholas Clark in v5.13.0-209-g95ea86d */
-       if (pp - pat == (int)patlen - 1 && svix < svmax) {
-           const NV nv = SvNV(*svargs);
-            if (LIKELY(!Perl_isinfnan(nv))) {
-                if (*pp == 'g') {
-                    /* Add check for digits != 0 because it seems that some
-                       gconverts are buggy in this case, and we don't yet have
-                       a Configure test for this.  */
-                    if (digits && digits < sizeof(ebuf) - NV_DIG - 10) {
-                        /* 0, point, slack */
-                        STORE_LC_NUMERIC_SET_TO_NEEDED();
-                        SNPRINTF_G(nv, ebuf, size, digits);
-                        sv_catpv_nomg(sv, ebuf);
-                        if (*ebuf)     /* May return an empty string for digits==0 */
-                            return;
-                    }
-                } else if (!digits) {
-                    STRLEN l;
+    /* special-case "%.0f" */
+    if (    !args
+         && patlen == 4
+         && pat[0] == '%' && pat[1] == '.' && pat[2] == '0' && pat[3] == 'f'
+         && svmax > 0)
+    {
+        const NV nv = SvNV(*svargs);
+        if (LIKELY(!Perl_isinfnan(nv))) {
+            STRLEN l;
+            char *p;
  
-                    if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
-                        sv_catpvn_nomg(sv, p, l);
-                        return;
-                    }
-                }
+            if ((p = F0convert(nv, ebuf + sizeof ebuf, &l))) {
+                sv_catpvn_nomg(sv, p, l);
+                return;
              }
         }
      }
  #endif /* !USE_LONG_DOUBLE */
  
-    if (!args && svix < svmax && DO_UTF8(*svargs))
-       has_utf8 = TRUE;
-
      patend = (char*)pat + patlen;
      for (p = (char*)pat; p < patend; p = q) {
-       bool alt = FALSE;
-       bool left = FALSE;
-       bool vectorize = FALSE;
-       bool vectorarg = FALSE;
-       bool vec_utf8 = FALSE;
-       char fill = ' ';
-       char plus = 0;
-       char intsize = 0;
-       STRLEN width = 0;
-       STRLEN zeros = 0;
-       bool has_precis = FALSE;
-       STRLEN precis = 0;
-       const I32 osvix = svix;
-       bool is_utf8 = FALSE;  /* is this item utf8?   */
-        bool used_explicit_ix = FALSE;
-        bool arg_missing = FALSE;
-#ifdef HAS_LDBL_SPRINTF_BUG
-       /* This is to try to fix a bug with irix/nonstop-ux/powerux and
-          with sfio - Allen <allens@cpan.org> */
-       bool fix_ldbl_sprintf_bug = FALSE;
-#endif
  
-       char esignbuf[4];
-       U8 utf8buf[UTF8_MAXBYTES+1];
-       STRLEN esignlen = 0;
-
-       const char *eptr = NULL;
-       const char *fmtstart;
-       STRLEN elen = 0;
-       SV *vecsv = NULL;
-       const U8 *vecstr = NULL;
-       STRLEN veclen = 0;
-       char c = 0;
-       int i;
-       unsigned base = 0;
-       IV iv = 0;
-       UV uv = 0;
-       /* We need a long double target in case HAS_LONG_DOUBLE,
-         * even without USE_LONG_DOUBLE, so that we can printf with
-         * long double formats, even without NV being long double.
-         * But we call the target 'fv' instead of 'nv', since most of
-         * the time it is not (most compilers these days recognize
-         * "long double", even if only as a synonym for "double").
-       */
-#if defined(HAS_LONG_DOUBLE) && LONG_DOUBLESIZE > DOUBLESIZE && \
-       defined(PERL_PRIgldbl) && !defined(USE_QUADMATH)
-       long double fv;
-#  ifdef Perl_isfinitel
-#    define FV_ISFINITE(x) Perl_isfinitel(x)
-#  endif
-#  define FV_GF PERL_PRIgldbl
-#    if defined(__VMS) && defined(__ia64) && defined(__IEEE_FLOAT)
-       /* Work around breakage in OTS$CVT_FLOAT_T_X */
-#      define NV_TO_FV(nv,fv) STMT_START {                   \
-                                           double _dv = nv;  \
-                                           fv = Perl_isnan(_dv) ? LDBL_QNAN : _dv; \
-                              } STMT_END
-#    else
-#      define NV_TO_FV(nv,fv) (fv)=(nv)
-#    endif
-#else
-       NV fv;
-#  define FV_GF NVgf
-#  define NV_TO_FV(nv,fv) (fv)=(nv)
-#endif
-#ifndef FV_ISFINITE
-#  define FV_ISFINITE(x) Perl_isfinite((NV)(x))
-#endif
-        NV nv;
-       STRLEN have;
-       STRLEN need;
-       STRLEN gap;
-       const char *dotstr = ".";
-       STRLEN dotstrlen = 1;
-       I32 efix = 0; /* explicit format parameter index */
-       I32 ewix = 0; /* explicit width index */
-       I32 epix = 0; /* explicit precision index */
-       I32 evix = 0; /* explicit vector index */
-       bool asterisk = FALSE;
-        bool infnan = FALSE;
+       char intsize     = 0;         /* size qualifier in "%hi..." etc */
+       bool alt         = FALSE;     /* has      "%#..."    */
+       bool left        = FALSE;     /* has      "%-..."    */
+       bool fill        = FALSE;     /* has      "%0..."    */
+       char plus        = 0;         /* has      "%+..."    */
+       STRLEN width     = 0;         /* value of "%NNN..."  */
+       bool has_precis  = FALSE;     /* has      "%.NNN..." */
+       STRLEN precis    = 0;         /* value of "%.NNN..." */
+       bool asterisk    = FALSE;     /* has      "%*..."    */
+        bool used_explicit_ix = FALSE;/* has      "%$n..."   */
+       int base         = 0;         /* base to print in, e.g. 8 for %o */
+       UV uv            = 0;         /* the value to print of int-ish args */
+       IV iv            = 0;         /* ditto for signed types */
+
+       bool vectorize   = FALSE;     /* has      "%v..."    */
+       bool vectorarg   = FALSE;     /* has      "%*v..."   */
+       SV *vecsv        = NULL;      /* the cur arg for %v  */
+       bool vec_utf8    = FALSE;     /* SvUTF8(vecsv)       */
+       const U8 *vecstr = NULL;      /* SvPVX(vecsv)        */
+       STRLEN veclen    = 0;         /* SvCUR(vecsv)        */
+       const char *dotstr = ".";     /* separator string for %v */
+       STRLEN dotstrlen = 1;         /* length of separator string for %v */
+
+       I32 efix         = 0;         /* explicit format parameter index */
+       I32 ewix         = 0;         /* explicit width index */
+       I32 epix         = 0;         /* explicit precision index */
+       I32 evix         = 0;         /* explicit vector index */
+       const I32 osvix  = svix;      /* original index in case of bad fmt */
+
+       bool is_utf8     = FALSE;     /* is this item utf8?   */
+        bool arg_missing = FALSE;     /* give "Missing argument" warning */
+       char esignbuf[4];             /* holds sign prefix, e.g. "-0x" */
+       STRLEN esignlen  = 0;         /* length of e.g. "-0x" */
+       STRLEN zeros     = 0;         /* how many '0' to prepend */
+
+       const char *eptr = NULL;      /* the address of the element string */
+       STRLEN elen      = 0;         /* the length  of the element string */
+
+       const char *fmtstart;         /* start of current format (the '%') */
+       char c           = 0;         /* current character read from format */
+
  
         /* echo everything up to the next format specification */
         for (q = p; q < patend && *q != '%'; ++q) ;
@@ -11617,77 +11935,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      [%bcdefginopsuxDFOUX] format (mandatory)
  */
  
-       if (args) {
-/*  
-       As of perl5.9.3, printf format checking is on by default.
-       Internally, perl uses %p formats to provide an escape to
-       some extended formatting.  This block deals with those
-       extensions: if it does not match, (char*)q is reset and
-       the normal format processing code is used.
-
-       Currently defined extensions are:
-               %p              include pointer address (standard)      
-               %-p     (SVf)   include an SV (previously %_)
-               %-<num>p        include an SV with precision <num>      
-               %2p             include a HEK
-               %3p             include a HEK with precision of 256
-               %4p             char* preceded by utf8 flag and length
-               %<num>p         (where num is 1 or > 4) reserved for future
-                               extensions
-
-       Robin Barker 2005-07-14 (but modified since)
-
-               %1p     (VDf)   removed.  RMB 2007-10-19
-*/
-           char* r = q; 
-           bool sv = FALSE;    
-           STRLEN n = 0;
-           if (*q == '-')
-               sv = *q++;
-           else if (strnEQ(q, UTF8f, sizeof(UTF8f)-1)) { /* UTF8f */
-               /* The argument has already gone through cBOOL, so the cast
-                  is safe. */
-               is_utf8 = (bool)va_arg(*args, int);
-               elen = va_arg(*args, UV);
-                /* if utf8 length is larger than 0x7ffff..., then it might
-                 * have been a signed value that wrapped */
-                if (elen  > ((~(STRLEN)0) >> 1)) {
-                    assert(0); /* in DEBUGGING build we want to crash */
-                    elen= 0; /* otherwise we want to treat this as an empty string */
-                }
-               eptr = va_arg(*args, char *);
-               q += sizeof(UTF8f)-1;
-               goto string;
-           }
-           n = expect_number(&q);
-           if (*q++ == 'p') {
-               if (sv) {                       /* SVf */
-                   if (n) {
-                       precis = n;
-                       has_precis = TRUE;
-                   }
-                   argsv = MUTABLE_SV(va_arg(*args, void*));
-                   eptr = SvPV_const(argsv, elen);
-                   if (DO_UTF8(argsv))
-                       is_utf8 = TRUE;
-                   goto string;
-               }
-               else if (n==2 || n==3) {        /* HEKf */
-                   HEK * const hek = va_arg(*args, HEK *);
-                   eptr = HEK_KEY(hek);
-                   elen = HEK_LEN(hek);
-                   if (HEK_UTF8(hek)) is_utf8 = TRUE;
-                   if (n==3) precis = 256, has_precis = TRUE;
-                   goto string;
-               }
-               else if (n) {
-                   Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
-                                    "internal %%<num>p might conflict with future printf extensions");
-               }
-           }
-           q = r; 
-       }
-
         if ( (width = expect_number(&q)) ) {
             if (*q == '$') {
                  if (args)
@@ -11719,7 +11966,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 continue;
  
             case '0':
-               fill = *q++;
+               fill = TRUE;
+                q++;
                 continue;
  
             case '#':
@@ -11762,8 +12010,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
         if (!asterisk)
         {
-           if( *q == '0' )
-               fill = *q++;
+           if(*q == '0') {
+               fill = TRUE;
+                q++;
+            }
             width = expect_number(&q);
         }
  
@@ -11792,6 +12042,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         }
  
         if (asterisk) {
+            int i;
             if (args)
                 i = va_arg(*args, int);
             else
@@ -11807,6 +12058,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         if (*q == '.') {
             q++;
             if (*q == '*') {
+                int i;
                 q++;
                  if ( (epix = expect_number(&q)) ) {
                      if (*q++ == '$') {
@@ -11962,40 +12214,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             }
         }
  
-        if (argsv && strchr("BbcDdiOopuUXx",*q)) {
-            /* XXX va_arg(*args) case? need peek, use va_copy? */
-            SvGETMAGIC(argsv);
-            if (UNLIKELY(SvAMAGIC(argsv)))
-                argsv = sv_2num(argsv);
-            infnan = UNLIKELY(isinfnansv(argsv));
-        }
+       c = *q++; /* c now holds the conversion type */
  
-       switch (c = *q++) {
+       switch (c) {
  
             /* STRINGS */
  
-       case 'c':
-           if (vectorize)
-               goto unknown;
-            if (infnan)
-                Perl_croak(aTHX_ "Cannot printf %" NVgf " with '%c'",
-                           /* no va_arg() case */
-                           SvNV_nomg(argsv), (int)c);
-           uv = (args) ? va_arg(*args, int) : SvIV_nomg(argsv);
-           if ((uv > 255 ||
-                (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
-               && !IN_BYTES) {
-               eptr = (char*)utf8buf;
-               elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf;
-               is_utf8 = TRUE;
-           }
-           else {
-               c = (char)uv;
-               eptr = &c;
-               elen = 1;
-           }
-           goto string;
-
         case 's':
             if (vectorize)
                 goto unknown;
@@ -12037,14 +12261,100 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* INTEGERS */
  
         case 'p':
-            if (infnan) {
-                goto floating_point;
-            }
             if (alt || vectorize)
                 goto unknown;
+
+            /* %p extensions:
+             *
+             * "%...p" is normally treated like "%...x", except that the
+             * number to print is the SV's address (or a pointer address
+             * for C-ish sprintf).
+             *
+             * However, the C-ish sprintf variant allows a few special
+             * extensions. These are currently:
+             *
+             * %-p       (SVf)  Like %s, but gets the string from an SV*
+             *                  arg rather than a char* arg.
+             *                  (This was previously %_).
+             *
+             * %-<num>p         Ditto but like %.<num>s (i.e. num is max width)
+             *
+             * %2p       (HEKf) Like %s, but using the key string in a HEK
+             *
+             * %3p       (HEKf256) Ditto but like %.256s
+             *
+             * %d%lu%4p  (UTF8f) A utf8 string. Consumes 3 args:
+             *                       (cBOOL(utf8), len, string_buf).
+             *                   It's handled by the "case 'd'" branch
+             *                   rather than here.
+             *
+             * %<num>p   where num is 1 or > 4: reserved for future
+             *           extensions. Warns, but then is treated as a
+             *           general %p (print hex address) format.
+             */
+
+            if (   args
+                && !intsize
+                && !fill
+                && !plus
+                && !has_precis
+                && !asterisk
+                && !used_explicit_ix
+            ) {
+                if (left) {                    /* %-p (SVf), %-NNNp */
+                    if (width) {
+                        precis = width;
+                        has_precis = TRUE;
+                    }
+                    argsv = MUTABLE_SV(va_arg(*args, void*));
+                    eptr = SvPV_const(argsv, elen);
+                    if (DO_UTF8(argsv))
+                        is_utf8 = TRUE;
+                    width = 0;
+                    goto string;
+                }
+                else if (width == 2 || width == 3) {   /* HEKf, HEKf256 */
+                    HEK * const hek = va_arg(*args, HEK *);
+                    eptr = HEK_KEY(hek);
+                    elen = HEK_LEN(hek);
+                    if (HEK_UTF8(hek))
+                        is_utf8 = TRUE;
+                    if (width == 3) {
+                        precis = 256;
+                        has_precis = TRUE;
+                    }
+                    width = 0;
+                    goto string;
+                }
+                else if (width) {
+                    Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+                         "internal %%<num>p might conflict with future printf extensions");
+                }
+            }
+
+            /* treat as normal %...p */
+
             uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
             base = 16;
-           goto integer;
+           goto do_integer;
+
+       case 'c':
+           if (vectorize)
+               goto unknown;
+            /* Ignore any size specifiers, since they're not documented as
+             * being allowed for %c (ideally we should warn on e.g. '%hc').
+             * Setting a default intsize, along with a positive
+             * (which signals unsigned) base, causes, for C-ish use, the
+             * va_arg to be interpreted as as unsigned int, when it's
+             * actually signed, which will convert -ve values to high +ve
+             * values. Note that unlike the libc %c, values > 255 will
+             * convert to high unicode points rather than being truncated
+             * to 8 bits. For perlish use, it will do SvUV(argsv), which
+             * will again convert -ve args to high -ve values.
+             */
+            intsize = 0;
+            base = 1; /* special value that indicates we're doing a 'c' */
+            goto get_int_arg_val;
  
         case 'D':
  #ifdef IV_IS_QUAD
@@ -12052,80 +12362,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  #else
             intsize = 'l';
  #endif
-           /* FALLTHROUGH */
+            base = -10;
+            goto get_int_arg_val;
+
         case 'd':
-       case 'i':
-            if (infnan) {
-                goto floating_point;
-            }
-           if (vectorize) {
-               STRLEN ulen;
-               if (!veclen)
-                    goto donevalidconversion;
-               if (vec_utf8)
-                   uv = utf8n_to_uvchr(vecstr, veclen, &ulen,
-                                       UTF8_ALLOW_ANYUV);
-               else {
-                   uv = *vecstr;
-                   ulen = 1;
-               }
-               vecstr += ulen;
-               veclen -= ulen;
-               if (plus)
-                    esignbuf[esignlen++] = plus;
-           }
-           else if (args) {
-               switch (intsize) {
-               case 'c':       iv = (char)va_arg(*args, int); break;
-               case 'h':       iv = (short)va_arg(*args, int); break;
-               case 'l':       iv = va_arg(*args, long); break;
-               case 'V':       iv = va_arg(*args, IV); break;
-               case 'z':       iv = va_arg(*args, SSize_t); break;
-#ifdef HAS_PTRDIFF_T
-               case 't':       iv = va_arg(*args, ptrdiff_t); break;
-#endif
-               default:        iv = va_arg(*args, int); break;
-#ifdef I_STDINT
-               case 'j':       iv = va_arg(*args, intmax_t); break;
-#endif
-               case 'q':
-#if IVSIZE >= 8
-                               iv = va_arg(*args, Quad_t); break;
-#else
-                               goto unknown;
-#endif
-               }
-           }
-           else {
-               IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
-               switch (intsize) {
-               case 'c':       iv = (char)tiv; break;
-               case 'h':       iv = (short)tiv; break;
-               case 'l':       iv = (long)tiv; break;
-               case 'V':
-               default:        iv = tiv; break;
-               case 'q':
-#if IVSIZE >= 8
-                               iv = (Quad_t)tiv; break;
-#else
-                               goto unknown;
-#endif
-               }
-           }
-           if ( !vectorize )   /* we already set uv above */
-           {
-               if (iv >= 0) {
-                   uv = iv;
-                   if (plus)
-                       esignbuf[esignlen++] = plus;
-               }
-               else {
-                   uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
-                   esignbuf[esignlen++] = '-';
-               }
+            /* probably just a plain %d, but it might be the start of the
+             * special UTF8f format, which usually looks something like
+             * "%d%lu%4p" (the lu may vary by platform)
+             */
+            assert((UTF8f)[0] == 'd');
+            assert((UTF8f)[1] == '%');
+
+            if (   args              /* UTF8f only valid for C-ish sprintf */
+                 && q == fmtstart + 1 /* plain %d, not %....d */
+                 && patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
+                 && *q == '%'
+                 && strnEQ(q + 1, UTF8f + 2, sizeof(UTF8f) - 3))
+            {
+               /* The argument has already gone through cBOOL, so the cast
+                  is safe. */
+               is_utf8 = (bool)va_arg(*args, int);
+               elen = va_arg(*args, UV);
+                /* if utf8 length is larger than 0x7ffff..., then it might
+                 * have been a signed value that wrapped */
+                if (elen  > ((~(STRLEN)0) >> 1)) {
+                    assert(0); /* in DEBUGGING build we want to crash */
+                    elen = 0; /* otherwise we want to treat this as an empty string */
+                }
+               eptr = va_arg(*args, char *);
+               q += sizeof(UTF8f) - 2;
+               goto string;
             }
-           base = 10;
-           goto integer;
+
+           /* FALLTHROUGH */
+       case 'i':
+            base = -10;
+            goto get_int_arg_val;
  
         case 'U':
  #ifdef IV_IS_QUAD
@@ -12136,12 +12408,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* FALLTHROUGH */
         case 'u':
             base = 10;
-           goto uns_integer;
+           goto get_int_arg_val;
  
         case 'B':
         case 'b':
             base = 2;
-           goto uns_integer;
+           goto get_int_arg_val;
  
         case 'O':
  #ifdef IV_IS_QUAD
@@ -12152,19 +12424,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* FALLTHROUGH */
         case 'o':
             base = 8;
-           goto uns_integer;
+           goto get_int_arg_val;
  
         case 'X':
         case 'x':
             base = 16;
  
-       uns_integer:
-            if (infnan) {
-                goto floating_point;
-            }
+          get_int_arg_val:
+
             if (vectorize) {
                 STRLEN ulen;
-       vector:
+
+                if (base < 0) {
+                    base = -base;
+                    if (plus)
+                         esignbuf[esignlen++] = plus;
+                }
+
+             vector:
                 if (!veclen)
                      goto donevalidconversion;
                 if (vec_utf8)
@@ -12177,46 +12454,119 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 vecstr += ulen;
                 veclen -= ulen;
             }
-           else if (args) {
-               switch (intsize) {
-               case 'c':  uv = (unsigned char)va_arg(*args, unsigned); break;
-               case 'h':  uv = (unsigned short)va_arg(*args, unsigned); break;
-               case 'l':  uv = va_arg(*args, unsigned long); break;
-               case 'V':  uv = va_arg(*args, UV); break;
-               case 'z':  uv = va_arg(*args, Size_t); break;
+           else {
+                /* test arg for inf/nan. This can trigger an unwanted
+                 * 'str' overload, so manually force 'num' overload first
+                 * if necessary */
+                if (argsv) {
+                    SvGETMAGIC(argsv);
+                    if (UNLIKELY(SvAMAGIC(argsv)))
+                        argsv = sv_2num(argsv);
+                    if (UNLIKELY(isinfnansv(argsv)))
+                        goto handle_infnan_argsv;
+                }
+
+                if (base < 0) {
+                    /* signed int type */
+                    base = -base;
+                    if (args) {
+                        switch (intsize) {
+                        case 'c':  iv = (char)va_arg(*args, int);  break;
+                        case 'h':  iv = (short)va_arg(*args, int); break;
+                        case 'l':  iv = va_arg(*args, long);       break;
+                        case 'V':  iv = va_arg(*args, IV);         break;
+                        case 'z':  iv = va_arg(*args, SSize_t);    break;
  #ifdef HAS_PTRDIFF_T
-               case 't':  uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
+                        case 't':  iv = va_arg(*args, ptrdiff_t);  break;
  #endif
+                        default:   iv = va_arg(*args, int);        break;
  #ifdef I_STDINT
-               case 'j':  uv = va_arg(*args, uintmax_t); break;
+                        case 'j':  iv = va_arg(*args, intmax_t);   break;
  #endif
-               default:   uv = va_arg(*args, unsigned); break;
-               case 'q':
+                        case 'q':
  #if IVSIZE >= 8
-                          uv = va_arg(*args, Uquad_t); break;
+                                   iv = va_arg(*args, Quad_t);     break;
  #else
-                          goto unknown;
+                                   goto unknown;
  #endif
-               }
-           }
-           else {
-               UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
-               switch (intsize) {
-               case 'c':       uv = (unsigned char)tuv; break;
-               case 'h':       uv = (unsigned short)tuv; break;
-               case 'l':       uv = (unsigned long)tuv; break;
-               case 'V':
-               default:        uv = tuv; break;
-               case 'q':
+                        }
+                    }
+                    else {
+                        IV tiv = SvIV_nomg(argsv); /* work around GCC bug #13488 */
+                        switch (intsize) {
+                        case 'c':  iv = (char)tiv;   break;
+                        case 'h':  iv = (short)tiv;  break;
+                        case 'l':  iv = (long)tiv;   break;
+                        case 'V':
+                        default:   iv = tiv;         break;
+                        case 'q':
  #if IVSIZE >= 8
-                               uv = (Uquad_t)tuv; break;
+                                   iv = (Quad_t)tiv; break;
  #else
-                               goto unknown;
+                                   goto unknown;
  #endif
-               }
-           }
+                        }
+                    }
+
+                    /* now convert iv to uv */
+                    if (iv >= 0) {
+                        uv = iv;
+                        if (plus)
+                            esignbuf[esignlen++] = plus;
+                    }
+                    else {
+                        uv = (iv == IV_MIN) ? (UV)iv : (UV)(-iv);
+                        esignbuf[esignlen++] = '-';
+                    }
+                }
+                else {
+                    /* unsigned int type */
+                    if (args) {
+                        switch (intsize) {
+                        case 'c': uv = (unsigned char)va_arg(*args, unsigned);
+                                  break;
+                        case 'h': uv = (unsigned short)va_arg(*args, unsigned);
+                                  break;
+                        case 'l': uv = va_arg(*args, unsigned long); break;
+                        case 'V': uv = va_arg(*args, UV);            break;
+                        case 'z': uv = va_arg(*args, Size_t);        break;
+#ifdef HAS_PTRDIFF_T
+                                  /* will sign extend, but there is no
+                                   * uptrdiff_t, so oh well */
+                        case 't': uv = va_arg(*args, ptrdiff_t);     break;
+#endif
+#ifdef I_STDINT
+                        case 'j': uv = va_arg(*args, uintmax_t);     break;
+#endif
+                        default:  uv = va_arg(*args, unsigned);      break;
+                        case 'q':
+#if IVSIZE >= 8
+                                  uv = va_arg(*args, Uquad_t);       break;
+#else
+                                  goto unknown;
+#endif
+                        }
+                    }
+                    else {
+                        UV tuv = SvUV_nomg(argsv); /* work around GCC bug #13488 */
+                        switch (intsize) {
+                        case 'c': uv = (unsigned char)tuv;  break;
+                        case 'h': uv = (unsigned short)tuv; break;
+                        case 'l': uv = (unsigned long)tuv;  break;
+                        case 'V':
+                        default:  uv = tuv;                 break;
+                        case 'q':
+#if IVSIZE >= 8
+                                  uv = (Uquad_t)tuv;        break;
+#else
+                                  goto unknown;
+#endif
+                        }
+                    }
+                }
+            }
  
-       integer:
+       do_integer:
             {
                 char *ptr = ebuf + sizeof ebuf;
                 bool tempalt = uv ? alt : FALSE; /* Vectors can't change alt */
@@ -12253,6 +12603,28 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                         esignbuf[esignlen++] = c;
                     }
                     break;
+
+               case 1:
+                    /* special-case: base 1 indicates a 'c' format:
+                     * we use the common code for extracting a uv,
+                     * but handle that value differently here than
+                     * all the other int types */
+                    if ((uv > 255 ||
+                         (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
+                        && !IN_BYTES)
+                    {
+                        assert(sizeof(ebuf) >= UTF8_MAXBYTES + 1);
+                        eptr = ebuf;
+                        elen = uvchr_to_utf8((U8*)eptr, uv) - (U8*)ebuf;
+                        is_utf8 = TRUE;
+                    }
+                    else {
+                        c = (char)uv;
+                        eptr = &c;
+                        elen = 1;
+                    }
+                    goto string;
+
                 default:                /* it had better be ten or less */
                     do {
                         dig = uv % base;
@@ -12269,17 +12641,14 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                              && !(base == 8 && alt)) /* "%#.0o" prints "0" */
                         elen = 0;
  
-               /* a precision nullifies the 0 flag. */
-                   if (fill == '0')
-                       fill = ' ';
+                    /* a precision nullifies the 0 flag. */
+                    fill = FALSE;
                 }
             }
             break;
  
             /* FLOATING POINT */
  
-        floating_point:
-
         case 'F':
             c = 'f';            /* maybe %F isn't supported here */
             /* FALLTHROUGH */
@@ -12287,6 +12656,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         case 'f':
         case 'g': case 'G':
         case 'a': case 'A':
+
+        {
+            STRLEN radix_len;  /* SvCUR(PL_numeric_radix_sv) */
+            STRLEN float_need; /* what PL_efloatsize needs to become */
+            bool hexfp;        /* hexadecimal floating point? */
+
+            vcatpvfn_long_double_t fv;
+            NV                     nv;
+
             if (vectorize)
                 goto unknown;
  
@@ -12345,7 +12723,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                      nv = fv;
                  } else {
                      nv = va_arg(*args, double);
-                    NV_TO_FV(nv, fv);
+                    VCATPVFN_NV_TO_FV(nv, fv);
                  }
  #else
                  nv = va_arg(*args, double);
@@ -12354,448 +12732,215 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
              }
              else
              {
-                if (!infnan) SvGETMAGIC(argsv);
+                SvGETMAGIC(argsv);
+                /* we jump here if an int-ish format encountered an
+                 * infinite/Nan argsv. After setting nv/fv, it falls
+                 * into the isinfnan block which follows */
+              handle_infnan_argsv:
                  nv = SvNV_nomg(argsv);
-                NV_TO_FV(nv, fv);
+                VCATPVFN_NV_TO_FV(nv, fv);
+            }
+
+            if (Perl_isinfnan(nv)) {
+                if (c == 'c')
+                    Perl_croak(aTHX_ "Cannot printf %" NVgf " with '%c'",
+                           SvNV_nomg(argsv), (int)c);
+
+                elen = S_infnan_2pv(nv, ebuf, sizeof(ebuf), plus);
+                assert(elen);
+                eptr = ebuf;
+                zeros     = 0;
+                esignlen  = 0;
+                dotstrlen = 0;
+                break;
+            }
+
+            /* special-case "%.0f" */
+            if (   c == 'f'
+                && !precis
+                && has_precis
+                && !(width || left || plus || alt)
+                && !fill
+                && intsize != 'q'
+                && ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
+            )
+                goto float_concat_no_utf8;
+
+            /* Determine the buffer size needed for the various
+             * floating-point formats.
+             *
+             * The basic possibilities are:
+             *
+             *               <---P--->
+             *    %f 1111111.123456789
+             *    %e       1.111111123e+06
+             *    %a     0x1.0f4471f9bp+20
+             *    %g        1111111.12
+             *    %g        1.11111112e+15
+             *
+             * where P is the value of the precision in the format, or 6
+             * if not specified. Note the two possible output formats of
+             * %g; in both cases the number of significant digits is <=
+             * precision.
+             *
+             * For most of the format types the maximum buffer size needed
+             * is precision, plus: any leading 1 or 0x1, the radix
+             * point, and an exponent.  The difficult one is %f: for a
+             * large positive exponent it can have many leading digits,
+             * which needs to be calculated specially. Also %a is slightly
+             * different in that in the absence of a specified precision,
+             * it uses as many digits as necessary to distinguish
+             * different values.
+             *
+             * First, here are the constant bits. For ease of calculation
+             * we over-estimate the needed buffer size, for example by
+             * assuming all formats have an exponent and a leading 0x1.
+             */
+
+            float_need =     1  /* possible unary minus */
+                          +  4  /* "0x1" plus very unlikely carry */
+                          +  2  /* "e-", "p+" etc */
+                          +  6  /* exponent: up to 16383 (quad fp) */
+                          +  1; /* \0 */
+
+
+            /* determine the radix point len, e.g. length(".") in "1.2" */
+            radix_len  = 1; /* assume '.' */
+#ifdef USE_LOCALE_NUMERIC
+            /* note that we may either explicitly use PL_numeric_radix_sv
+             * below, or implicitly, via an snprintf() variant.
+             * Note also things like ps_AF.utf8 which has
+             * "\N{ARABIC DECIMAL SEPARATOR} as a radix point */
+            if (!lc_numeric_set) {
+                /* only set once and reuse in-locale value on subsequent
+                 * iterations.
+                 * XXX what happens if we die in an eval?
+                 */
+                STORE_LC_NUMERIC_SET_TO_NEEDED();
+                lc_numeric_set = TRUE;
              }
  
-           need = 0;
-           /* frexp() (or frexpl) has some unspecified behaviour for
-             * nan/inf/-inf, so let's avoid calling that on non-finites. */
-           if (isALPHA_FOLD_NE(c, 'e') && FV_ISFINITE(fv)) {
-                i = PERL_INT_MIN;
+            if (PL_numeric_radix_sv) {
+                assert(IN_LC(LC_NUMERIC));
+                radix_len  = SvCUR(PL_numeric_radix_sv);
+                /* note that this will convert the output to utf8 even if
+                 * if the radix point didn't get output */
+                is_utf8 = SvUTF8(PL_numeric_radix_sv);
+            }
+#endif
+            /* this can't wrap unless PL_numeric_radix_sv is a string
+             * consuming virtually all the 32-bit or 64-bit address space
+             */
+           float_need += radix_len;
+
+            hexfp = FALSE;
+
+           if (isALPHA_FOLD_EQ(c, 'f')) {
+                /* Determine how many digits before the radix point
+                 * might be emitted.  frexp() (or frexpl) has some
+                 * unspecified behaviour for nan/inf/-inf, so lucky we've
+                 * already handled them above */
+                STRLEN digits;
+                int i = PERL_INT_MIN;
                  (void)Perl_frexp((NV)fv, &i);
                  if (i == PERL_INT_MIN)
-                    Perl_die(aTHX_ "panic: frexp: %" FV_GF, fv);
-                /* Do not set hexfp earlier since we want to printf
-                 * Inf/NaN for Inf/NaN, not their hexfp. */
-                hexfp = isALPHA_FOLD_EQ(c, 'a');
-                if (UNLIKELY(hexfp)) {
-                    /* This seriously overshoots in most cases, but
-                     * better the undershooting.  Firstly, all bytes
+                    Perl_die(aTHX_ "panic: frexp: %" VCATPVFN_FV_GF, fv);
+
+                if (i > 0) {
+                    digits = BIT_DIGITS(i);
+                    if (float_need >= ((STRLEN)~0) - digits)
+                        croak_memory_wrap();
+                    float_need += digits;
+                }
+            }
+            else if (UNLIKELY(isALPHA_FOLD_EQ(c, 'a'))) {
+                hexfp = TRUE;
+                if (!has_precis) {
+                    /* %a in the absence of precision may print as many
+                     * digits as needed to represent the entire mantissa
+                     * bit pattern.
+                     * This estimate seriously overshoots in most cases,
+                     * but better the undershooting.  Firstly, all bytes
                       * of the NV are not mantissa, some of them are
                       * exponent.  Secondly, for the reasonably common
                       * long doubles case, the "80-bit extended", two
-                     * or six bytes of the NV are unused. */
-                    need +=
-                        (fv < 0) ? 1 : 0 + /* possible unary minus */
-                        2 + /* "0x" */
-                        1 + /* the very unlikely carry */
-                        1 + /* "1" */
-                        1 + /* "." */
-                        2 * NVSIZE + /* 2 hexdigits for each byte */
-                        2 + /* "p+" */
-                        6 + /* exponent: sign, plus up to 16383 (quad fp) */
-                        1;   /* \0 */
+                     * or six bytes of the NV are unused. Also, we'll
+                     * still pick up an extra +6 from the default
+                     * precision calculation below. */
+                    STRLEN digits =
  #ifdef LONGDOUBLE_DOUBLEDOUBLE
-                    /* However, for the "double double", we need more.
-                     * Since each double has their own exponent, the
-                     * doubles may float (haha) rather far from each
-                     * other, and the number of required bits is much
-                     * larger, up to total of DOUBLEDOUBLE_MAXBITS bits.
-                     * See the definition of DOUBLEDOUBLE_MAXBITS.
-                     *
-                     * Need 2 hexdigits for each byte. */
-                    need += (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2;
-                    /* the size for the exponent already added */
-#endif
-#ifdef USE_LOCALE_NUMERIC
-                        STORE_LC_NUMERIC_SET_TO_NEEDED();
-                        if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))
-                            need += SvLEN(PL_numeric_radix_sv);
-                        RESTORE_LC_NUMERIC();
+                        /* For the "double double", we need more.
+                         * Since each double has their own exponent, the
+                         * doubles may float (haha) rather far from each
+                         * other, and the number of required bits is much
+                         * larger, up to total of DOUBLEDOUBLE_MAXBITS bits.
+                         * See the definition of DOUBLEDOUBLE_MAXBITS.
+                         *
+                         * Need 2 hexdigits for each byte. */
+                        (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2;
+#else
+                        NVSIZE * 2; /* 2 hexdigits for each byte */
  #endif
+                    if (float_need >= ((STRLEN)~0) - digits)
+                        croak_memory_wrap();
+                    float_need += digits;
                  }
-                else if (i > 0) {
-                    need = BIT_DIGITS(i);
-                } /* if i < 0, the number of digits is hard to predict. */
             }
-           need += has_precis ? precis : 6; /* known default */
-
-           if (need < width)
-               need = width;
-
-#ifdef HAS_LDBL_SPRINTF_BUG
-           /* This is to try to fix a bug with irix/nonstop-ux/powerux and
-              with sfio - Allen <allens@cpan.org> */
-
-#  ifdef DBL_MAX
-#    define MY_DBL_MAX DBL_MAX
-#  else /* XXX guessing! HUGE_VAL may be defined as infinity, so not using */
-#    if DOUBLESIZE >= 8
-#      define MY_DBL_MAX 1.7976931348623157E+308L
-#    else
-#      define MY_DBL_MAX 3.40282347E+38L
-#    endif
-#  endif
+            /* special-case "%.<number>g" if it will fit in ebuf */
+            else if (c == 'g'
+                && precis   /* See earlier comment about buggy Gconvert
+                               when digits, aka precis, is 0  */
+                && has_precis
+                /* check, in manner not involving wrapping, that it will
+                 * fit in ebuf  */
+                && float_need < sizeof(ebuf)
+                && sizeof(ebuf) - float_need > precis
+                && !(width || left || plus || alt)
+                && !fill
+                && intsize != 'q'
+            ) {
+                SNPRINTF_G(fv, ebuf, sizeof(ebuf), precis);
+                elen = strlen(ebuf);
+                eptr = ebuf;
+                goto float_concat;
+           }
  
-#  ifdef HAS_LDBL_SPRINTF_BUG_LESS1 /* only between -1L & 1L - Allen */
-#    define MY_DBL_MAX_BUG 1L
-#  else
-#    define MY_DBL_MAX_BUG MY_DBL_MAX
-#  endif
  
-#  ifdef DBL_MIN
-#    define MY_DBL_MIN DBL_MIN
-#  else  /* XXX guessing! -Allen */
-#    if DOUBLESIZE >= 8
-#      define MY_DBL_MIN 2.2250738585072014E-308L
-#    else
-#      define MY_DBL_MIN 1.17549435E-38L
-#    endif
-#  endif
+            {
+                STRLEN pr = has_precis ? precis : 6; /* known default */
+                if (float_need >= ((STRLEN)~0) - pr)
+                    croak_memory_wrap();
+                float_need += pr;
+            }
  
-           if ((intsize == 'q') && (c == 'f') &&
-               ((fv < MY_DBL_MAX_BUG) && (fv > -MY_DBL_MAX_BUG)) &&
-               (need < DBL_DIG)) {
-               /* it's going to be short enough that
-                * long double precision is not needed */
+           if (float_need < width)
+               float_need = width;
  
-               if ((fv <= 0L) && (fv >= -0L))
-                   fix_ldbl_sprintf_bug = TRUE; /* 0 is 0 - easiest */
-               else {
-                   /* would use Perl_fp_class as a double-check but not
-                    * functional on IRIX - see perl.h comments */
-
-                   if ((fv >= MY_DBL_MIN) || (fv <= -MY_DBL_MIN)) {
-                       /* It's within the range that a double can represent */
-#if defined(DBL_MAX) && !defined(DBL_MIN)
-                       if ((fv >= ((long double)1/DBL_MAX)) ||
-                           (fv <= (-(long double)1/DBL_MAX)))
+/* We should have correctly calculated (or indeed over-estimated) the
+ * buffer size, but you never know what strange floating-point systems
+ * there are out there. So for production use, add a little extra overhead.
+ * Under debugging don't, as it means we more more likely to quickly spot
+ * issues during development.
+ */
+#ifndef DEBUGGING
+            if (float_need >= ((STRLEN)~0) - 20)
+                croak_memory_wrap();
+           float_need += 20; /* safety fudge factor */
  #endif
-                       fix_ldbl_sprintf_bug = TRUE;
-                   }
-               }
-               if (fix_ldbl_sprintf_bug == TRUE) {
-                   double temp;
-
-                   intsize = 0;
-                   temp = (double)fv;
-                   fv = (NV)temp;
-               }
-           }
  
-#  undef MY_DBL_MAX
-#  undef MY_DBL_MAX_BUG
-#  undef MY_DBL_MIN
-
-#endif /* HAS_LDBL_SPRINTF_BUG */
-
-           need += 20; /* fudge factor */
-           if (PL_efloatsize < need) {
+           if (PL_efloatsize < float_need) {
                 Safefree(PL_efloatbuf);
-               PL_efloatsize = need + 20; /* more fudge */
+               PL_efloatsize = float_need;
                 Newx(PL_efloatbuf, PL_efloatsize, char);
                 PL_efloatbuf[0] = '\0';
             }
  
-           if ( !(width || left || plus || alt) && fill != '0'
-                && has_precis && intsize != 'q'        /* Shortcuts */
-                 && LIKELY(!Perl_isinfnan((NV)fv)) ) {
-               /* See earlier comment about buggy Gconvert when digits,
-                  aka precis is 0  */
-               if ( c == 'g' && precis ) {
-                    STORE_LC_NUMERIC_SET_TO_NEEDED();
-                    SNPRINTF_G(fv, PL_efloatbuf, PL_efloatsize, precis);
-                   /* May return an empty string for digits==0 */
-                   if (*PL_efloatbuf) {
-                       elen = strlen(PL_efloatbuf);
-                       goto float_converted;
-                   }
-               } else if ( c == 'f' && !precis ) {
-                   if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
-                       break;
-               }
-           }
-
              if (UNLIKELY(hexfp)) {
-                /* Hexadecimal floating point. */
-                char* p = PL_efloatbuf;
-                U8 vhex[VHEX_SIZE];
-                U8* v = vhex; /* working pointer to vhex */
-                U8* vend; /* pointer to one beyond last digit of vhex */
-                U8* vfnz = NULL; /* first non-zero */
-                U8* vlnz = NULL; /* last non-zero */
-                U8* v0 = NULL; /* first output */
-                const bool lower = (c == 'a');
-                /* At output the values of vhex (up to vend) will
-                 * be mapped through the xdig to get the actual
-                 * human-readable xdigits. */
-                const char* xdig = PL_hexdigit;
-                int zerotail = 0; /* how many extra zeros to append */
-                int exponent = 0; /* exponent of the floating point input */
-                bool hexradix = FALSE; /* should we output the radix */
-                bool subnormal = FALSE; /* IEEE 754 subnormal/denormal */
-                bool negative = FALSE;
-
-                /* XXX: NaN, Inf -- though they are printed as "NaN" and "Inf".
-                 *
-                 * For example with denormals, (assuming the vanilla
-                 * 64-bit double): the exponent is zero. 1xp-1074 is
-                 * the smallest denormal and the smallest double, it
-                 * could be output also as 0x0.0000000000001p-1022 to
-                 * match its internal structure. */
-
-                vend = S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, NULL);
-                S_hextract(aTHX_ nv, &exponent, &subnormal, vhex, vend);
-
-#if NVSIZE > DOUBLESIZE
-#  ifdef HEXTRACT_HAS_IMPLICIT_BIT
-                /* In this case there is an implicit bit,
-                 * and therefore the exponent is shifted by one. */
-                exponent--;
-#  else
-#   ifdef NV_X86_80_BIT
-                if (subnormal) {
-                    /* The subnormals of the x86-80 have a base exponent of -16382,
-                     * (while the physical exponent bits are zero) but the frexp()
-                     * returned the scientific-style floating exponent.  We want
-                     * to map the last one as:
-                     * -16831..-16384 -> -16382 (the last normal is 0x1p-16382)
-                     * -16835..-16388 -> -16384
-                     * since we want to keep the first hexdigit
-                     * as one of the [8421]. */
-                    exponent = -4 * ( (exponent + 1) / -4) - 2;
-                } else {
-                    exponent -= 4;
-                }
-#   endif
-                /* TBD: other non-implicit-bit platforms than the x86-80. */
-#  endif
-#endif
-
-                negative = fv < 0 || Perl_signbit(nv);
-                if (negative)
-                    *p++ = '-';
-                else if (plus)
-                    *p++ = plus;
-                *p++ = '0';
-                if (lower) {
-                    *p++ = 'x';
-                }
-                else {
-                    *p++ = 'X';
-                    xdig += 16; /* Use uppercase hex. */
-                }
-
-                /* Find the first non-zero xdigit. */
-                for (v = vhex; v < vend; v++) {
-                    if (*v) {
-                        vfnz = v;
-                        break;
-                    }
-                }
-
-                if (vfnz) {
-                    /* Find the last non-zero xdigit. */
-                    for (v = vend - 1; v >= vhex; v--) {
-                        if (*v) {
-                            vlnz = v;
-                            break;
-                        }
-                    }
-
-#if NVSIZE == DOUBLESIZE
-                    if (fv != 0.0)
-                        exponent--;
-#endif
-
-                    if (subnormal) {
-#ifndef NV_X86_80_BIT
-                      if (vfnz[0] > 1) {
-                        /* IEEE 754 subnormals (but not the x86 80-bit):
-                         * we want "normalize" the subnormal,
-                        * so we need to right shift the hex nybbles
-                         * so that the output of the subnormal starts
-                         * from the first true bit.  (Another, equally
-                        * valid, policy would be to dump the subnormal
-                        * nybbles as-is, to display the "physical" layout.) */
-                        int i, n;
-                        U8 *vshr;
-                        /* Find the ceil(log2(v[0])) of
-                         * the top non-zero nybble. */
-                        for (i = vfnz[0], n = 0; i > 1; i >>= 1, n++) { }
-                        assert(n < 4);
-                        vlnz[1] = 0;
-                        for (vshr = vlnz; vshr >= vfnz; vshr--) {
-                          vshr[1] |= (vshr[0] & (0xF >> (4 - n))) << (4 - n);
-                          vshr[0] >>= n;
-                        }
-                        if (vlnz[1]) {
-                          vlnz++;
-                        }
-                      }
-#endif
-                      v0 = vfnz;
-                    } else {
-                      v0 = vhex;
-                    }
-
-                    if (has_precis) {
-                        U8* ve = (subnormal ? vlnz + 1 : vend);
-                        SSize_t vn = ve - (subnormal ? vfnz : vhex);
-                        if ((SSize_t)(precis + 1) < vn) {
-                            bool overflow = FALSE;
-                            if (v0[precis + 1] < 0x8) {
-                                /* Round down, nothing to do. */
-                            } else if (v0[precis + 1] > 0x8) {
-                                /* Round up. */
-                                v0[precis]++;
-                                overflow = v0[precis] > 0xF;
-                                v0[precis] &= 0xF;
-                            } else { /* v0[precis] == 0x8 */
-                                /* Half-point: round towards the one
-                                 * with the even least-significant digit:
-                                 * 08 -> 0  88 -> 8
-                                 * 18 -> 2  98 -> a
-                                 * 28 -> 2  a8 -> a
-                                 * 38 -> 4  b8 -> c
-                                 * 48 -> 4  c8 -> c
-                                 * 58 -> 6  d8 -> e
-                                 * 68 -> 6  e8 -> e
-                                 * 78 -> 8  f8 -> 10 */
-                                if ((v0[precis] & 0x1)) {
-                                    v0[precis]++;
-                                }
-                                overflow = v0[precis] > 0xF;
-                                v0[precis] &= 0xF;
-                            }
-
-                            if (overflow) {
-                                for (v = v0 + precis - 1; v >= v0; v--) {
-                                    (*v)++;
-                                    overflow = *v > 0xF;
-                                    (*v) &= 0xF;
-                                    if (!overflow) {
-                                        break;
-                                    }
-                                }
-                                if (v == v0 - 1 && overflow) {
-                                    /* If the overflow goes all the
-                                     * way to the front, we need to
-                                     * insert 0x1 in front, and adjust
-                                     * the exponent. */
-                                    Move(v0, v0 + 1, vn, char);
-                                    *v0 = 0x1;
-                                    exponent += 4;
-                                }
-                            }
-
-                            /* The new effective "last non zero". */
-                            vlnz = v0 + precis;
-                        }
-                        else {
-                            zerotail =
-                              subnormal ? precis - vn + 1 :
-                              precis - (vlnz - vhex);
-                        }
-                    }
-
-                    v = v0;
-                    *p++ = xdig[*v++];
-
-                    /* If there are non-zero xdigits, the radix
-                     * is output after the first one. */
-                    if (vfnz < vlnz) {
-                      hexradix = TRUE;
-                    }
-                }
-                else {
-                    *p++ = '0';
-                    exponent = 0;
-                    zerotail = precis;
-                }
-
-                /* The radix is always output if precis, or if alt. */
-                if (precis > 0 || alt) {
-                  hexradix = TRUE;
-                }
-
-                if (hexradix) {
-#ifndef USE_LOCALE_NUMERIC
-                        *p++ = '.';
-#else
-                        STORE_LC_NUMERIC_SET_TO_NEEDED();
-                        if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
-                            STRLEN n;
-                            const char* r = SvPV(PL_numeric_radix_sv, n);
-                            Copy(r, p, n, char);
-                            p += n;
-                        }
-                        else {
-                            *p++ = '.';
-                        }
-                        RESTORE_LC_NUMERIC();
-#endif
-                }
-
-                if (vlnz) {
-                    while (v <= vlnz)
-                        *p++ = xdig[*v++];
-                }
-
-                if (zerotail > 0) {
-                  while (zerotail--) {
-                    *p++ = '0';
-                  }
-                }
-
-                elen = p - PL_efloatbuf;
-                elen += my_snprintf(p, PL_efloatsize - elen,
-                                    "%c%+d", lower ? 'p' : 'P',
-                                    exponent);
-
-                if (elen < width) {
-                    if (left) {
-                        /* Pad the back with spaces. */
-                        memset(PL_efloatbuf + elen, ' ', width - elen);
-                    }
-                    else if (fill == '0') {
-                        /* Insert the zeros after the "0x" and the
-                         * the potential sign, but before the digits,
-                         * otherwise we end up with "0000xH.HHH...",
-                         * when we want "0x000H.HHH..."  */
-                        STRLEN nzero = width - elen;
-                        char* zerox = PL_efloatbuf + 2;
-                        STRLEN nmove = elen - 2;
-                        if (negative || plus) {
-                            zerox++;
-                            nmove--;
-                        }
-                        Move(zerox, zerox + nzero, nmove, char);
-                        memset(zerox, fill, nzero);
-                    }
-                    else {
-                        /* Move it to the right. */
-                        Move(PL_efloatbuf, PL_efloatbuf + width - elen,
-                             elen, char);
-                        /* Pad the front with spaces. */
-                        memset(PL_efloatbuf, ' ', width - elen);
-                    }
-                    elen = width;
-                }
+                elen = S_format_hexfp(aTHX_ PL_efloatbuf, PL_efloatsize, c,
+                                nv, fv, has_precis, precis, width,
+                                alt, plus, left, fill);
              }
              else {
-                elen = S_infnan_2pv(nv, PL_efloatbuf, PL_efloatsize, plus);
-                if (elen) {
-                    /* Not affecting infnan output: precision, alt, fill. */
-                    if (elen < width) {
-                        if (left) {
-                            /* Pack the back with spaces. */
-                            memset(PL_efloatbuf + elen, ' ', width - elen);
-                        } else {
-                            /* Move it to the right. */
-                            Move(PL_efloatbuf, PL_efloatbuf + width - elen,
-                                 elen, char);
-                            /* Pad the front with spaces. */
-                            memset(PL_efloatbuf, ' ', width - elen);
-                        }
-                        elen = width;
-                    }
-                }
-            }
-
-            if (elen == 0) {
                  char *ptr = ebuf + sizeof ebuf;
                  *--ptr = '\0';
                  *--ptr = c;
@@ -12827,8 +12972,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                     base = width;
                     do { *--ptr = '0' + (base % 10); } while (base /= 10);
                 }
-               if (fill == '0')
-                   *--ptr = fill;
+               if (fill)
+                   *--ptr = '0';
                 if (left)
                     *--ptr = '-';
                 if (plus)
@@ -12841,8 +12986,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  * where printf() taints but print($float) doesn't.
                  * --jhi */
  
-                STORE_LC_NUMERIC_SET_TO_NEEDED();
-
                  /* hopefully the above makes ptr a very constrained format
                   * that is safe to use, even though it's not literal */
                  GCC_DIAG_IGNORE(-Wformat-nonliteral);
@@ -12853,8 +12996,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                          Perl_croak_nocontext("panic: quadmath invalid format \"%s\"", ptr);
                      elen = quadmath_snprintf(PL_efloatbuf, PL_efloatsize,
                                               qfmt, nv);
-                    if ((IV)elen == -1)
+                    if ((IV)elen == -1) {
+                        if (qfmt != ptr)
+                            SAVEFREEPV(qfmt);
                          Perl_croak_nocontext("panic: quadmath_snprintf failed, format \"%s\"", qfmt);
+                    }
                      if (qfmt != ptr)
                          Safefree(qfmt);
                  }
@@ -12868,53 +13014,97 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  GCC_DIAG_RESTORE;
             }
  
-       float_converted:
             eptr = PL_efloatbuf;
-            assert((IV)elen > 0); /* here zero elen is bad */
  
-#ifdef USE_LOCALE_NUMERIC
-            /* If the decimal point character in the string is UTF-8, make the
-             * output utf8 */
-            if (PL_numeric_radix_sv && SvUTF8(PL_numeric_radix_sv)
-                && instr(eptr, SvPVX_const(PL_numeric_radix_sv)))
+         float_concat:
+
+            /* Since floating-point formats do their own formatting and
+             * padding, we skip the main block of code at the end of this
+             * loop which handles appending eptr to sv, and do our own
+             * stripped-down version */
+
+            /* floating-point formats only get is_utf8 if the radix point
+             * is utf8. All other characters in the string are < 128
+             * and so can be safely appended to both a non-utf8 and utf8
+             * string as-is.
+             */
+            if (is_utf8 && !has_utf8) {
+                sv_utf8_upgrade(sv);
+                has_utf8 = TRUE;
+            }
+
+         float_concat_no_utf8:
+
+            assert(!zeros);
+            assert(!esignlen);
+            assert(!vectorize);
+            assert(elen);
+            assert(elen >= width);
+
+
              {
-                is_utf8 = TRUE;
+                /* unrolled Perl_sv_catpvn */
+                STRLEN need = elen + SvCUR(sv) + 1;
+                char *end;
+                /* can't wrap as both elen and SvCUR() are allocated in
+                 * memory and together can't consume all the address space
+                 */
+                assert(need > elen);
+                SvGROW(sv, need);
+                end = SvEND(sv);
+                Copy(eptr, end, elen, char);
+                end += elen;
+                *end = '\0';
+                SvCUR_set(sv, need - 1);
              }
-#endif
  
-           break;
+            goto donevalidconversion;
+        }
  
             /* SPECIAL */
  
         case 'n':
-           if (vectorize)
-               goto unknown;
-           i = SvCUR(sv) - origlen;
-           if (args) {
-               switch (intsize) {
-               case 'c':       *(va_arg(*args, char*)) = i; break;
-               case 'h':       *(va_arg(*args, short*)) = i; break;
-               default:        *(va_arg(*args, int*)) = i; break;
-               case 'l':       *(va_arg(*args, long*)) = i; break;
-               case 'V':       *(va_arg(*args, IV*)) = i; break;
-               case 'z':       *(va_arg(*args, SSize_t*)) = i; break;
+            {
+                int i;
+                if (vectorize)
+                    goto unknown;
+                /* XXX ideally we should warn if any flags etc have been
+                 * set, e.g. "%-4.5n" */
+                /* XXX if sv was originally non-utf8 with a char in the
+                 * range 0x80-0xff, then if it got upgraded, we should
+                 * calculate char len rather than byte len here */
+                i = SvCUR(sv) - origlen;
+                if (args) {
+                    switch (intsize) {
+                    case 'c':  *(va_arg(*args, char*))      = i; break;
+                    case 'h':  *(va_arg(*args, short*))     = i; break;
+                    default:   *(va_arg(*args, int*))       = i; break;
+                    case 'l':  *(va_arg(*args, long*))      = i; break;
+                    case 'V':  *(va_arg(*args, IV*))        = i; break;
+                    case 'z':  *(va_arg(*args, SSize_t*))   = i; break;
  #ifdef HAS_PTRDIFF_T
-               case 't':       *(va_arg(*args, ptrdiff_t*)) = i; break;
+                    case 't':  *(va_arg(*args, ptrdiff_t*)) = i; break;
  #endif
  #ifdef I_STDINT
-               case 'j':       *(va_arg(*args, intmax_t*)) = i; break;
+                    case 'j':  *(va_arg(*args, intmax_t*))  = i; break;
  #endif
-               case 'q':
+                    case 'q':
  #if IVSIZE >= 8
-                               *(va_arg(*args, Quad_t*)) = i; break;
+                               *(va_arg(*args, Quad_t*))    = i; break;
  #else
-                               goto unknown;
+                               goto unknown;
  #endif
-               }
-           }
-           else
-               sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
-            goto donevalidconversion;
+                    }
+                }
+                else {
+                    if (arg_missing)
+                        Perl_croak_nocontext(
+                            "Missing argument for %%n in %s",
+                                PL_op ? OP_DESC(PL_op) : "sv_vcatpvfn()");
+                    sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
+                }
+                goto donevalidconversion;
+            }
  
             /* UNKNOWN */
  
@@ -12946,19 +13136,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%" SVf, SVfARG(msg)); /* yes, this is reentrant */
             }
  
-           /* output mangled stuff ... */
-           if (c == '\0')
-               --q;
-           eptr = p;
-           elen = q - p;
-
-           /* ... right here, because formatting flags should not apply */
-           SvGROW(sv, SvCUR(sv) + elen + 1);
-           p = SvEND(sv);
-           Copy(eptr, p, elen, char);
-           p += elen;
-           *p = '\0';
-           SvCUR_set(sv, p - SvPVX_const(sv));
+           /* mangled format: output the '%', then continue from the
+             * character following that */
+            sv_catpvn_nomg(sv, p, 1);
+            q = p + 1;
             svix = osvix;
             continue;   /* not "break" */
         }
@@ -12982,60 +13163,89 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             }
         }
  
-        /* signed value that's wrapped? */
-        assert(elen  <= ((~(STRLEN)0) >> 1));
-       have = esignlen + zeros + elen;
-       if (have < zeros)
-           croak_memory_wrap();
-
-       need = (have > width ? have : width);
-       gap = need - have;
-
-       if (need >= (((STRLEN)~0) - SvCUR(sv) - dotstrlen - 1))
-           croak_memory_wrap();
-       SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1);
-       p = SvEND(sv);
-       if (esignlen && fill == '0') {
-           int i;
-           for (i = 0; i < (int)esignlen; i++)
-               *p++ = esignbuf[i];
-       }
-       if (gap && !left) {
-           memset(p, fill, gap);
-           p += gap;
-       }
-       if (esignlen && fill != '0') {
-           int i;
-           for (i = 0; i < (int)esignlen; i++)
-               *p++ = esignbuf[i];
-       }
-       if (zeros) {
-           int i;
-           for (i = zeros; i; i--)
-               *p++ = '0';
-       }
-       if (elen) {
-           Copy(eptr, p, elen, char);
-           p += elen;
-       }
-       if (gap && left) {
-           memset(p, ' ', gap);
-           p += gap;
-       }
-       if (vectorize) {
-           if (veclen) {
-               Copy(dotstr, p, dotstrlen, char);
-               p += dotstrlen;
-           }
-           else
-               vectorize = FALSE;              /* done iterating over vecstr */
-       }
-       if (is_utf8)
-           has_utf8 = TRUE;
-       if (has_utf8)
-           SvUTF8_on(sv);
-       *p = '\0';
-       SvCUR_set(sv, p - SvPVX_const(sv));
+
+        /* append esignbuf, filler, zeros, eptr and dotstr to sv */
+
+        {
+            STRLEN need, have, gap;
+
+            /* signed value that's wrapped? */
+            assert(elen  <= ((~(STRLEN)0) >> 1));
+
+            /* Most of these length vars can range to any value if
+             * supplied with a hostile format and/or args. So check every
+             * addition for possible overflow. In reality some of these
+             * values are interdependent so these checks are slightly
+             * redundant. But its easier to be certain this way.
+             */
+
+            have = elen;
+
+            if (have >= (((STRLEN)~0) - zeros))
+                croak_memory_wrap();
+            have += zeros;
+
+            if (have >= (((STRLEN)~0) - esignlen))
+                croak_memory_wrap();
+            have += esignlen;
+
+            need = (have > width ? have : width);
+            gap = need - have;
+
+            if (need >= (((STRLEN)~0) - dotstrlen))
+                croak_memory_wrap();
+            need += dotstrlen;
+
+            if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1)))
+                croak_memory_wrap();
+            need += (SvCUR(sv) + 1);
+
+            SvGROW(sv, need);
+
+            p = SvEND(sv);
+            if (esignlen && fill) {
+                int i;
+                for (i = 0; i < (int)esignlen; i++)
+                    *p++ = esignbuf[i];
+            }
+            if (gap && !left) {
+                memset(p, (fill ? '0' : ' '), gap);
+                p += gap;
+            }
+            if (esignlen && !fill) {
+                int i;
+                for (i = 0; i < (int)esignlen; i++)
+                    *p++ = esignbuf[i];
+            }
+            if (zeros) {
+                int i;
+                for (i = zeros; i; i--)
+                    *p++ = '0';
+            }
+            if (elen) {
+                Copy(eptr, p, elen, char);
+                p += elen;
+            }
+            if (gap && left) {
+                memset(p, ' ', gap);
+                p += gap;
+            }
+            if (vectorize) {
+                if (veclen) {
+                    Copy(dotstr, p, dotstrlen, char);
+                    p += dotstrlen;
+                }
+                else
+                    vectorize = FALSE; /* done iterating over vecstr */
+            }
+            if (is_utf8)
+                has_utf8 = TRUE;
+            if (has_utf8)
+                SvUTF8_on(sv);
+            *p = '\0';
+            SvCUR_set(sv, p - SvPVX_const(sv));
+        }
+
         if (vectorize) {
             esignlen = 0;
             goto vector;
@@ -13130,7 +13340,7 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
      parser->old_parser = NULL;
      parser->stack = NULL;
      parser->ps = NULL;
-    parser->stack_maxbase = NULL;
+    parser->stack_max1 = 0;
      /* XXX parser->stack->state = 0; */
  
      /* XXX eventually, just Copy() most of the parser struct ? */
@@ -13173,6 +13383,7 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
      parser->sig_elems  = proto->sig_elems;
      parser->sig_optelems= proto->sig_optelems;
      parser->sig_slurpy  = proto->sig_slurpy;
+    parser->recheck_utf8_validity = proto->recheck_utf8_validity;
      parser->linestr    = sv_dup_inc(proto->linestr, param);
  
      {
@@ -13834,6 +14045,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
             switch (sv_type) {
             default:
                 Perl_croak(aTHX_ "Bizarre SvTYPE [%" IVdf "]", (IV)SvTYPE(sstr));
+                NOT_REACHED; /* NOTREACHED */
                 break;
  
             case SVt_PVGV:
@@ -15030,7 +15242,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_Xpv             = (XPV*)NULL;
      my_perl->Ina       = proto_perl->Ina;
  
-    PL_statbuf         = proto_perl->Istatbuf;
      PL_statcache       = proto_perl->Istatcache;
  
  #ifndef NO_TAINT_SUPPORT
@@ -15311,6 +15522,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
      PL_GCB_invlist = sv_dup_inc(proto_perl->IGCB_invlist, param);
      PL_SB_invlist = sv_dup_inc(proto_perl->ISB_invlist, param);
      PL_WB_invlist = sv_dup_inc(proto_perl->IWB_invlist, param);
+    PL_seen_deprecated_macro = hv_dup_inc(proto_perl->Iseen_deprecated_macro, param);
      PL_utf8_mark       = sv_dup_inc(proto_perl->Iutf8_mark, param);
      PL_utf8_toupper    = sv_dup_inc(proto_perl->Iutf8_toupper, param);
      PL_utf8_totitle    = sv_dup_inc(proto_perl->Iutf8_totitle, param);