Perl_sv_vcatpvfn_flags: handle Inf/Nan in 1 place

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index 11bd021..6ad9181 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -3147,8 +3147,8 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
                      STORE_LC_NUMERIC_SET_TO_NEEDED();
  
                      local_radix = PL_numeric_local && PL_numeric_radix_sv;
-                    if (local_radix && SvLEN(PL_numeric_radix_sv) > 1) {
-                        size += SvLEN(PL_numeric_radix_sv) - 1;
+                    if (local_radix && SvCUR(PL_numeric_radix_sv) > 1) {
+                        size += SvCUR(PL_numeric_radix_sv) - 1;
                          s = SvGROW_mutable(sv, size);
                      }
  
@@ -11000,6 +11000,12 @@ S_expect_number(pTHX_ char **const pattern)
      return var;
  }
  
+/* Implement a fast "%.0f": given a pointer to the end of a buffer (caller
+ * ensures it's big enough), back fill it with the rounded integer part of
+ * nv. Returns ptr to start of string, and sets *len to its length.
+ * Returns NULL if not convertible.
+ */
+
  STATIC char *
  S_F0convert(NV nv, char *const endbuf, STRLEN *const len)
  {
@@ -11008,11 +11014,7 @@ S_F0convert(NV nv, char *const endbuf, STRLEN *const len)
  
      PERL_ARGS_ASSERT_F0CONVERT;
  
-    if (UNLIKELY(Perl_isinfnan(nv))) {
-        STRLEN n = S_infnan_2pv(nv, endbuf - *len, *len, 0);
-        *len = n;
-        return endbuf - n;
-    }
+    assert(!Perl_isinfnan(nv));
      if (neg)
         nv = -nv;
      if (nv < UV_MAX) {
@@ -11444,6 +11446,12 @@ S_hextract(pTHX_ const NV nv, int* exponent, bool *subnormal,
      } STMT_END
  
  void
+
+
+/* This function assumes that pat has the same utf8-ness as sv.
+ * It's the caller's responsibility to ensure that this is so.
+ */
+
  Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
                         va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted,
                         const U32 flags)
@@ -11512,7 +11520,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
      }
  
  #if !defined(USE_LONG_DOUBLE) && !defined(USE_QUADMATH)
-    /* special-case "%.<number>[gf]" */
+    /* special-case "%.0f" and "%.<number>g" */
      if ( !args && patlen <= 5 && pat[0] == '%' && pat[1] == '.'
          && (pat[patlen-1] == 'g' || pat[patlen-1] == 'f') ) {
         unsigned digits = 0;
@@ -11525,7 +11533,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         /* XXX: Why do this `svix < svmax` test? Couldn't we just
            format the first argument and WARN_REDUNDANT if svmax > 1?
            Munged by Nicholas Clark in v5.13.0-209-g95ea86d */
-       if (pp - pat == (int)patlen - 1 && svix < svmax) {
+       if (pp + 1 == pat + patlen && svix < svmax) {
             const NV nv = SvNV(*svargs);
              if (LIKELY(!Perl_isinfnan(nv))) {
                  if (*pp == 'g') {
@@ -11535,7 +11543,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                      if (digits && digits < sizeof(ebuf) - NV_DIG - 10) {
                          /* 0, point, slack */
                          STORE_LC_NUMERIC_SET_TO_NEEDED();
-                        SNPRINTF_G(nv, ebuf, size, digits);
+                        SNPRINTF_G(nv, ebuf, sizeof(ebuf), digits);
                          sv_catpv_nomg(sv, ebuf);
                          if (*ebuf)     /* May return an empty string for digits==0 */
                              return;
@@ -11574,12 +11582,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         bool is_utf8 = FALSE;  /* is this item utf8?   */
          bool used_explicit_ix = FALSE;
          bool arg_missing = FALSE;
-#ifdef HAS_LDBL_SPRINTF_BUG
-       /* This is to try to fix a bug with irix/nonstop-ux/powerux and
-          with sfio - Allen <allens@cpan.org> */
-       bool fix_ldbl_sprintf_bug = FALSE;
-#endif
-
         char esignbuf[4];
         U8 utf8buf[UTF8_MAXBYTES+1];
         STRLEN esignlen = 0;
@@ -11591,10 +11593,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         const U8 *vecstr = NULL;
         STRLEN veclen = 0;
         char c = 0;
-       int i;
         unsigned base = 0;
         IV iv = 0;
         UV uv = 0;
+        bool is_simple = TRUE; /* no fancy qualifiers */
+        STRLEN radix_len;  /* SvCUR(PL_numeric_radix_sv) */
+
         /* We need a long double target in case HAS_LONG_DOUBLE,
           * even without USE_LONG_DOUBLE, so that we can printf with
           * long double formats, even without NV being long double.
@@ -11838,6 +11842,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         }
  
         if (asterisk) {
+            int i;
             if (args)
                 i = va_arg(*args, int);
             else
@@ -11853,6 +11858,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         if (*q == '.') {
             q++;
             if (*q == '*') {
+                int i;
                 q++;
                  if ( (epix = expect_number(&q)) ) {
                      if (*q++ == '$') {
@@ -12405,16 +12411,52 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  NV_TO_FV(nv, fv);
              }
  
-           float_need = 0;
+            if (Perl_isinfnan(nv)) {
+                elen = S_infnan_2pv(nv, ebuf, sizeof(ebuf), plus);
+                assert(elen);
+                eptr = ebuf;
+                zeros     = 0;
+                esignlen  = 0;
+                dotstrlen = 0;
+                break;
+            }
+
+            /* special-case "%.0f" */
+            is_simple = ( !(width || left || plus || alt)
+                        && fill != '0'
+                        && has_precis
+                        && intsize != 'q');
+
+            if (is_simple && c == 'f' && !precis) {
+                if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
+                    break;
+            }
+
+            /*determine the radix point len, e.g. length(".") in "1.2" */
+            radix_len  = 1; /* assume '.' */
+#ifdef USE_LOCALE_NUMERIC
+            /* note that we may either explicitly use PL_numeric_radix_sv
+             * below, or implicitly, via an snprintf() variant.
+             * Note also things like ps_AF.utf8 which has
+             * "\N{ARABIC DECIMAL SEPARATOR} as a radix point */
+            STORE_LC_NUMERIC_SET_TO_NEEDED();
+            if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+                radix_len  = SvCUR(PL_numeric_radix_sv);
+                /* note that this will convert the output to utf8 even if
+                 * if the radix point didn't get output */
+                is_utf8 = SvUTF8(PL_numeric_radix_sv);
+            }
+            RESTORE_LC_NUMERIC();
+#endif
+           float_need = radix_len;
+
             /* frexp() (or frexpl) has some unspecified behaviour for
-             * nan/inf/-inf, so let's avoid calling that on non-finites. */
-           if (isALPHA_FOLD_NE(c, 'e') && FV_ISFINITE(fv)) {
-                i = PERL_INT_MIN;
+             * nan/inf/-inf, so lucky we've already handled them above */
+           if (isALPHA_FOLD_NE(c, 'e')) {
+                int i = PERL_INT_MIN;
                  (void)Perl_frexp((NV)fv, &i);
                  if (i == PERL_INT_MIN)
                      Perl_die(aTHX_ "panic: frexp: %" FV_GF, fv);
-                /* Do not set hexfp earlier since we want to printf
-                 * Inf/NaN for Inf/NaN, not their hexfp. */
                  hexfp = isALPHA_FOLD_EQ(c, 'a');
                  if (UNLIKELY(hexfp)) {
                      /* This seriously overshoots in most cases, but
@@ -12445,12 +12487,6 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                      float_need += (DOUBLEDOUBLE_MAXBITS/8 + 1) * 2;
                      /* the size for the exponent already added */
  #endif
-#ifdef USE_LOCALE_NUMERIC
-                        STORE_LC_NUMERIC_SET_TO_NEEDED();
-                        if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))
-                            float_need += SvLEN(PL_numeric_radix_sv);
-                        RESTORE_LC_NUMERIC();
-#endif
                  }
                  else if (i > 0) {
                      float_need = BIT_DIGITS(i);
@@ -12499,7 +12535,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
             if ((intsize == 'q') && (c == 'f') &&
                 ((fv < MY_DBL_MAX_BUG) && (fv > -MY_DBL_MAX_BUG)) &&
-               (float_need < DBL_DIG)) {
+               (float_need < DBL_DIG))
+            {
+                bool fix_ldbl_sprintf_bug = FALSE;
+
                 /* it's going to be short enough that
                  * long double precision is not needed */
  
@@ -12518,6 +12557,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                         fix_ldbl_sprintf_bug = TRUE;
                     }
                 }
+
                 if (fix_ldbl_sprintf_bug == TRUE) {
                     double temp;
  
@@ -12543,9 +12583,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 PL_efloatbuf[0] = '\0';
             }
  
-           if ( !(width || left || plus || alt) && fill != '0'
-                && has_precis && intsize != 'q'        /* Shortcuts */
-                 && LIKELY(!Perl_isinfnan((NV)fv)) ) {
+            /* special-case "%.<number>g" */
+            if (is_simple) {
                 /* See earlier comment about buggy Gconvert when digits,
                    aka precis is 0  */
                 if ( c == 'g' && precis ) {
@@ -12556,10 +12595,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                         elen = strlen(PL_efloatbuf);
                         goto float_converted;
                     }
-               } else if ( c == 'f' && !precis ) {
-                   if ((eptr = F0convert(nv, ebuf + sizeof ebuf, &elen)))
-                       break;
-               }
+                }
             }
  
              if (UNLIKELY(hexfp)) {
@@ -12687,8 +12723,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
                      if (has_precis) {
                          U8* ve = (subnormal ? vlnz + 1 : vend);
-                        SSize_t vn = ve - (subnormal ? vfnz : vhex);
-                        if ((SSize_t)(precis + 1) < vn) {
+                        SSize_t vn = ve - v0;
+                        assert(vn >= 1);
+                        if (precis < (Size_t)(vn - 1)) {
                              bool overflow = FALSE;
                              if (v0[precis + 1] < 0x8) {
                                  /* Round down, nothing to do. */
@@ -12729,7 +12766,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                                       * way to the front, we need to
                                       * insert 0x1 in front, and adjust
                                       * the exponent. */
-                                    Move(v0, v0 + 1, vn, char);
+                                    Move(v0, v0 + 1, vn - 1, char);
                                      *v0 = 0x1;
                                      exponent += 4;
                                  }
@@ -12800,16 +12837,17 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                                      exponent);
  
                  if (elen < width) {
+                    STRLEN gap = (STRLEN)(width - elen);
                      if (left) {
                          /* Pad the back with spaces. */
-                        memset(PL_efloatbuf + elen, ' ', width - elen);
+                        memset(PL_efloatbuf + elen, ' ', gap);
                      }
                      else if (fill == '0') {
                          /* Insert the zeros after the "0x" and the
                           * the potential sign, but before the digits,
                           * otherwise we end up with "0000xH.HHH...",
                           * when we want "0x000H.HHH..."  */
-                        STRLEN nzero = width - elen;
+                        STRLEN nzero = gap;
                          char* zerox = PL_efloatbuf + 2;
                          STRLEN nmove = elen - 2;
                          if (negative || plus) {
@@ -12821,35 +12859,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                      }
                      else {
                          /* Move it to the right. */
-                        Move(PL_efloatbuf, PL_efloatbuf + width - elen,
+                        Move(PL_efloatbuf, PL_efloatbuf + gap,
                               elen, char);
                          /* Pad the front with spaces. */
-                        memset(PL_efloatbuf, ' ', width - elen);
+                        memset(PL_efloatbuf, ' ', gap);
                      }
                      elen = width;
                  }
              }
              else {
-                elen = S_infnan_2pv(nv, PL_efloatbuf, PL_efloatsize, plus);
-                if (elen) {
-                    /* Not affecting infnan output: precision, alt, fill. */
-                    if (elen < width) {
-                        if (left) {
-                            /* Pack the back with spaces. */
-                            memset(PL_efloatbuf + elen, ' ', width - elen);
-                        } else {
-                            /* Move it to the right. */
-                            Move(PL_efloatbuf, PL_efloatbuf + width - elen,
-                                 elen, char);
-                            /* Pad the front with spaces. */
-                            memset(PL_efloatbuf, ' ', width - elen);
-                        }
-                        elen = width;
-                    }
-                }
-            }
-
-            if (elen == 0) {
                  char *ptr = ebuf + sizeof ebuf;
                  *--ptr = '\0';
                  *--ptr = c;
@@ -12929,49 +12947,43 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             eptr = PL_efloatbuf;
              assert((IV)elen > 0); /* here zero elen is bad */
  
-#ifdef USE_LOCALE_NUMERIC
-            /* If the decimal point character in the string is UTF-8, make the
-             * output utf8 */
-            if (PL_numeric_radix_sv && SvUTF8(PL_numeric_radix_sv)
-                && instr(eptr, SvPVX_const(PL_numeric_radix_sv)))
-            {
-                is_utf8 = TRUE;
-            }
-#endif
  
             break;
  
             /* SPECIAL */
  
         case 'n':
-           if (vectorize)
-               goto unknown;
-           i = SvCUR(sv) - origlen;
-           if (args) {
-               switch (intsize) {
-               case 'c':       *(va_arg(*args, char*)) = i; break;
-               case 'h':       *(va_arg(*args, short*)) = i; break;
-               default:        *(va_arg(*args, int*)) = i; break;
-               case 'l':       *(va_arg(*args, long*)) = i; break;
-               case 'V':       *(va_arg(*args, IV*)) = i; break;
-               case 'z':       *(va_arg(*args, SSize_t*)) = i; break;
+            {
+                int i;
+                if (vectorize)
+                    goto unknown;
+                i = SvCUR(sv) - origlen;
+                if (args) {
+                    switch (intsize) {
+                    case 'c':  *(va_arg(*args, char*))      = i; break;
+                    case 'h':  *(va_arg(*args, short*))     = i; break;
+                    default:   *(va_arg(*args, int*))       = i; break;
+                    case 'l':  *(va_arg(*args, long*))      = i; break;
+                    case 'V':  *(va_arg(*args, IV*))        = i; break;
+                    case 'z':  *(va_arg(*args, SSize_t*))   = i; break;
  #ifdef HAS_PTRDIFF_T
-               case 't':       *(va_arg(*args, ptrdiff_t*)) = i; break;
+                    case 't':  *(va_arg(*args, ptrdiff_t*)) = i; break;
  #endif
  #ifdef I_STDINT
-               case 'j':       *(va_arg(*args, intmax_t*)) = i; break;
+                    case 'j':  *(va_arg(*args, intmax_t*))  = i; break;
  #endif
-               case 'q':
+                    case 'q':
  #if IVSIZE >= 8
-                               *(va_arg(*args, Quad_t*)) = i; break;
+                               *(va_arg(*args, Quad_t*))    = i; break;
  #else
-                               goto unknown;
+                               goto unknown;
  #endif
-               }
-           }
-           else
-               sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
-            goto donevalidconversion;
+                    }
+                }
+                else
+                    sv_setuv_mg(argsv, has_utf8 ? (UV)sv_len_utf8(sv) : (UV)i);
+                goto donevalidconversion;
+            }
  
             /* UNKNOWN */
  
@@ -13003,19 +13015,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 Perl_warner(aTHX_ packWARN(WARN_PRINTF), "%" SVf, SVfARG(msg)); /* yes, this is reentrant */
             }
  
-           /* output mangled stuff ... */
-           if (c == '\0')
-               --q;
-           eptr = p;
-           elen = q - p;
-
-           /* ... right here, because formatting flags should not apply */
-           SvGROW(sv, SvCUR(sv) + elen + 1);
-           p = SvEND(sv);
-           Copy(eptr, p, elen, char);
-           p += elen;
-           *p = '\0';
-           SvCUR_set(sv, p - SvPVX_const(sv));
+           /* mangled format: output the '%', then continue from the
+             * character following that */
+            sv_catpvn_nomg(sv, p, 1);
+            q = p + 1;
             svix = osvix;
             continue;   /* not "break" */
         }
@@ -13040,23 +13043,43 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         }
  
  
-        /* append esignbuf, filler, zeroes, eptr and dotstr to sv */
+        /* append esignbuf, filler, zeros, eptr and dotstr to sv */
  
          {
              STRLEN need, have, gap;
  
              /* signed value that's wrapped? */
              assert(elen  <= ((~(STRLEN)0) >> 1));
-            have = esignlen + zeros + elen;
-            if (have < zeros)
+
+            /* Most of these length vars can range to any value if
+             * supplied with a hostile format and/or args. So check every
+             * addition for possible overflow. In reality some of these
+             * values are interdependent so these checks are slightly
+             * redundant. But its easier to be certain this way.
+             */
+
+            have = elen;
+
+            if (have >= (((STRLEN)~0) - zeros))
+                croak_memory_wrap();
+            have += zeros;
+
+            if (have >= (((STRLEN)~0) - esignlen))
                  croak_memory_wrap();
+            have += esignlen;
  
              need = (have > width ? have : width);
              gap = need - have;
  
-            if (need >= (((STRLEN)~0) - SvCUR(sv) - dotstrlen - 1))
+            if (need >= (((STRLEN)~0) - dotstrlen))
+                croak_memory_wrap();
+            need += dotstrlen;
+
+            if (need >= (((STRLEN)~0) - (SvCUR(sv) + 1)))
                  croak_memory_wrap();
-            SvGROW(sv, SvCUR(sv) + need + dotstrlen + 1);
+            need += (SvCUR(sv) + 1);
+
+            SvGROW(sv, need);
  
              p = SvEND(sv);
              if (esignlen && fill == '0') {