sv_grow: performance improvement for short strings

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index 761addb..2940942 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -1553,7 +1553,7 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
  
  #ifdef PERL_NEW_COPY_ON_WRITE
      /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
-     * to store the COW count. So in general, allocate one more byte than
+     * to store the CowREFCNT. So in general, allocate one more byte than
       * asked for, to make it likely this byte is always spare: and thus
       * make more strings COW-able.
       * If the new size is a big power of two, don't bother: we assume the
@@ -1569,7 +1569,7 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
  
      if (newlen > SvLEN(sv)) {          /* need more room? */
         STRLEN minlen = SvCUR(sv);
-       minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
+       minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 2;
         if (newlen < minlen)
             newlen = minlen;
  #ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
@@ -2231,13 +2231,8 @@ S_sv_2iuv_common(pTHX_ SV *const sv)
             if (! numtype && ckWARN(WARN_NUMERIC))
                 not_a_number(sv);
  
-#if defined(USE_LONG_DOUBLE)
-           DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n",
-                                 PTR2UV(sv), SvNVX(sv)));
-#else
-           DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n",
+           DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" NVgf ")\n",
                                   PTR2UV(sv), SvNVX(sv)));
-#endif
  
  #ifdef NV_PRESERVES_UV
              (void)SvIOKp_on(sv);
@@ -2581,22 +2576,13 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
      if (SvTYPE(sv) < SVt_NV) {
         /* The logic to use SVt_PVNV if necessary is in sv_upgrade.  */
         sv_upgrade(sv, SVt_NV);
-#ifdef USE_LONG_DOUBLE
         DEBUG_c({
             STORE_NUMERIC_LOCAL_SET_STANDARD();
             PerlIO_printf(Perl_debug_log,
-                         "0x%"UVxf" num(%" PERL_PRIgldbl ")\n",
+                         "0x%"UVxf" num(%" NVgf ")\n",
                           PTR2UV(sv), SvNVX(sv));
             RESTORE_NUMERIC_LOCAL();
         });
-#else
-       DEBUG_c({
-           STORE_NUMERIC_LOCAL_SET_STANDARD();
-           PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n",
-                         PTR2UV(sv), SvNVX(sv));
-           RESTORE_NUMERIC_LOCAL();
-       });
-#endif
      }
      else if (SvTYPE(sv) < SVt_PVNV)
         sv_upgrade(sv, SVt_PVNV);
@@ -2725,21 +2711,12 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
            and ideally should be fixed.  */
         return 0.0;
      }
-#if defined(USE_LONG_DOUBLE)
-    DEBUG_c({
-       STORE_NUMERIC_LOCAL_SET_STANDARD();
-       PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n",
-                     PTR2UV(sv), SvNVX(sv));
-       RESTORE_NUMERIC_LOCAL();
-    });
-#else
      DEBUG_c({
         STORE_NUMERIC_LOCAL_SET_STANDARD();
-       PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n",
+       PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" NVgf ")\n",
                       PTR2UV(sv), SvNVX(sv));
         RESTORE_NUMERIC_LOCAL();
      });
-#endif
      return SvNVX(sv);
  }
  
@@ -2804,9 +2781,9 @@ S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const pe
  }
  
  /* Helper for sv_2pv_flags and sv_vcatpvfn_flags.  If the NV is an
- * infinity or a not-a-number, writes the approrpriate strings to the
- * buffer, including a zero byte.  Returns the written length,
- * excluding the zero byte, or zero. */
+* infinity or a not-a-number, writes the appropriate strings to the
+* buffer, including a zero byte.  On success returns the written length,
+* excluding the zero byte, on failure returns zero. */
  STATIC size_t
  S_infnan_copy(NV nv, char* buffer, size_t maxlen) {
      if (maxlen < 4)
@@ -3495,7 +3472,7 @@ must_be_utf8:
                  * set so starts from there.  Otherwise, can use memory copy to
                  * get up to where we are now, and then start from here */
  
-               if (invariant_head <= 0) {
+               if (invariant_head == 0) {
                     d = dst;
                 } else {
                     Copy(s, dst, invariant_head, char);
@@ -8623,13 +8600,8 @@ Perl_sv_inc_nomg(pTHX_ SV *const sv)
             /* I don't think we can get here. Maybe I should assert this
                And if we do get here I suspect that sv_setnv will croak. NWC
                Fall through. */
-#if defined(USE_LONG_DOUBLE)
-           DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n",
-                                 SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#else
             DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n",
                                   SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#endif
         }
  #endif /* PERL_PRESERVE_IVUV */
          if (!numtype && ckWARN(WARN_NUMERIC))
@@ -8800,13 +8772,8 @@ Perl_sv_dec_nomg(pTHX_ SV *const sv)
             /* I don't think we can get here. Maybe I should assert this
                And if we do get here I suspect that sv_setnv will croak. NWC
                Fall through. */
-#if defined(USE_LONG_DOUBLE)
-           DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n",
-                                 SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#else
             DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n",
                                   SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#endif
         }
      }
  #endif /* PERL_PRESERVE_IVUV */
@@ -11035,6 +11002,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         I32 epix = 0; /* explicit precision index */
         I32 evix = 0; /* explicit vector index */
         bool asterisk = FALSE;
+        bool infnan = FALSE;
  
         /* echo everything up to the next format specification */
         for (q = p; q < patend && *q != '%'; ++q) ;
@@ -11380,6 +11348,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             }
         }
  
+        if (argsv && SvNOK(argsv)) {
+            /* XXX va_arg(*args) case? */
+            infnan = Perl_isinfnan(SvNV(argsv));
+        }
+
         switch (c = *q++) {
  
             /* STRINGS */
@@ -11387,7 +11360,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         case 'c':
             if (vectorize)
                 goto unknown;
-           uv = (args) ? va_arg(*args, int) : SvIV(argsv);
+           uv = (args) ? va_arg(*args, int) :
+                infnan ? UNICODE_REPLACEMENT : SvIV(argsv);
             if ((uv > 255 ||
                  (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
                 && !IN_BYTES) {
@@ -11443,6 +11417,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* INTEGERS */
  
         case 'p':
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
             if (alt || vectorize)
                 goto unknown;
             uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
@@ -11458,6 +11436,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* FALLTHROUGH */
         case 'd':
         case 'i':
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
             if (vectorize) {
                 STRLEN ulen;
                 if (!veclen)
@@ -11559,6 +11541,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             base = 16;
  
         uns_integer:
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
             if (vectorize) {
                 STRLEN ulen;
         vector:
@@ -11675,6 +11661,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
             /* FLOATING POINT */
  
+        floating_point:
+
         case 'F':
             c = 'f';            /* maybe %F isn't supported here */
             /* FALLTHROUGH */
@@ -11740,26 +11728,41 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  (void)Perl_frexp(nv, &i);
                  if (i == PERL_INT_MIN)
                      Perl_die(aTHX_ "panic: frexp");
+                /* Do not set hexfp earlier since we want to printf
+                 * Inf/NaN for Inf/NAN, not their hexfp. */
                  hexfp = isALPHA_FOLD_EQ(c, 'a');
                  if (UNLIKELY(hexfp)) {
-                    /* Hexadecimal floating point: this size
-                     * computation probably overshoots, but that is
-                     * better than undershooting. */
+                    /* This seriously overshoots in most cases, but
+                     * better the undershooting.  Firstly, all bytes
+                     * of the NV are not mantissa, some of them are
+                     * exponent.  Secondly, for the reasonably common
+                     * long doubles case, the "80-bit extended", two
+                     * or six bytes of the NV are unused. */
                      need +=
-                        (nv < 0) + /* possible unary minus */
+                        (nv < 0) ? 1 : 0 + /* possible unary minus */
                          2 + /* "0x" */
                          1 + /* the very unlikely carry */
                          1 + /* "1" */
                          1 + /* "." */
-                        /* We want one byte per each 4 bits in the
-                         * mantissa.  This works out to about 0.83
-                         * bytes per NV decimal digit (of 4 bits):
-                         * (NV_DIG * log(10)/log(2)) / 4,
-                         * we overestimate by using 5/6 (0.8333...) */
-                        ((NV_DIG * 5) / 6 + 1) +
+                        2 * NVSIZE + /* 2 hexdigits for each byte */
                          2 + /* "p+" */
-                        (i >= 0 ? BIT_DIGITS(i) : 1 + BIT_DIGITS(-i)) +
+                        BIT_DIGITS(NV_MAX_EXP) + /* exponent */
                          1;   /* \0 */
+#if LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN || \
+    LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN
+                    /* However, for the "double double", we need more.
+                     * Since each double has their own exponent, the
+                     * doubles may float (haha) rather far from each
+                     * other, and the number of required bits is much
+                     * larger, up to total of 1028 bits.  (NOTE: this
+                     * is not actually implemented properly yet,
+                     * we are using just the first double, see
+                     * S_hextract() for details.  But let's prepare
+                     * for the future.) */
+
+                    /* 2 hexdigits for each byte. */ 
+                    need += (1028/8 - DOUBLESIZE + 1) * 2;
+#endif
  #ifdef USE_LOCALE_NUMERIC
                          STORE_LC_NUMERIC_SET_TO_NEEDED();
                          if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))
@@ -12051,13 +12054,16 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  *--ptr = c;
                  /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */
  #if defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl)
+               /* Note that this is HAS_LONG_DOUBLE and PERL_PRIfldbl,
+                * not USE_LONG_DOUBLE and NVff.  In other words,
+                * this needs to work without USE_LONG_DOUBLE. */
                 if (intsize == 'q') {
                     /* Copy the one or more characters in a long double
                      * format before the 'base' ([efgEFG]) character to
                      * the format string. */
-                   static char const prifldbl[] = PERL_PRIfldbl;
-                   char const *p = prifldbl + sizeof(prifldbl) - 3;
-                   while (p >= prifldbl) { *--ptr = *p--; }
+                   static char const ldblf[] = PERL_PRIfldbl;
+                   char const *p = ldblf + sizeof(ldblf) - 3;
+                   while (p >= ldblf) { *--ptr = *p--; }
                 }
  #endif
                 if (has_precis) {