perldelta for #80368/e4916dd1b3

[perl5.git] / sv.c
diff --git a/sv.c b/sv.c

index d295f64..5f88508 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -35,15 +35,6 @@
  # include <rms.h>
  #endif
  
-#ifndef HAS_C99
-# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L && !defined(__VMS)
-#  define HAS_C99 1
-# endif
-#endif
-#ifdef HAS_C99
-# include <stdint.h>
-#endif
-
  #ifdef __Lynx__
  /* Missing proto on LynxOS */
    char *gconvert(double, int, int,  char *);
@@ -112,9 +103,6 @@
      GE_COWBUF_WASTE_THRESHOLD((cur),(len)) && \
      GE_COWBUF_WASTE_FACTOR_THRESHOLD((cur),(len)) \
  )
-/* void Gconvert: on Linux at least, gcvt (which Gconvert gets deffed to),
- * has a mandatory return value, even though that value is just the same
- * as the buf arg */
  
  #ifdef PERL_UTF8_CACHE_ASSERT
  /* if adding more checks watch out for the following tests:
@@ -1556,7 +1544,7 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
  
  #ifdef PERL_NEW_COPY_ON_WRITE
      /* the new COW scheme uses SvPVX(sv)[SvLEN(sv)-1] (if spare)
-     * to store the COW count. So in general, allocate one more byte than
+     * to store the CowREFCNT. So in general, allocate one more byte than
       * asked for, to make it likely this byte is always spare: and thus
       * make more strings COW-able.
       * If the new size is a big power of two, don't bother: we assume the
@@ -1572,7 +1560,7 @@ Perl_sv_grow(pTHX_ SV *const sv, STRLEN newlen)
  
      if (newlen > SvLEN(sv)) {          /* need more room? */
         STRLEN minlen = SvCUR(sv);
-       minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 10;
+       minlen += (minlen >> PERL_STRLEN_EXPAND_SHIFT) + 2;
         if (newlen < minlen)
             newlen = minlen;
  #ifndef PERL_UNWARANTED_CHUMMINESS_WITH_MALLOC
@@ -2234,13 +2222,8 @@ S_sv_2iuv_common(pTHX_ SV *const sv)
             if (! numtype && ckWARN(WARN_NUMERIC))
                 not_a_number(sv);
  
-#if defined(USE_LONG_DOUBLE)
-           DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" PERL_PRIgldbl ")\n",
-                                 PTR2UV(sv), SvNVX(sv)));
-#else
-           DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%"NVgf")\n",
+           DEBUG_c(PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2iv(%" NVgf ")\n",
                                   PTR2UV(sv), SvNVX(sv)));
-#endif
  
  #ifdef NV_PRESERVES_UV
              (void)SvIOKp_on(sv);
@@ -2584,22 +2567,13 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
      if (SvTYPE(sv) < SVt_NV) {
         /* The logic to use SVt_PVNV if necessary is in sv_upgrade.  */
         sv_upgrade(sv, SVt_NV);
-#ifdef USE_LONG_DOUBLE
         DEBUG_c({
             STORE_NUMERIC_LOCAL_SET_STANDARD();
             PerlIO_printf(Perl_debug_log,
-                         "0x%"UVxf" num(%" PERL_PRIgldbl ")\n",
+                         "0x%"UVxf" num(%" NVgf ")\n",
                           PTR2UV(sv), SvNVX(sv));
             RESTORE_NUMERIC_LOCAL();
         });
-#else
-       DEBUG_c({
-           STORE_NUMERIC_LOCAL_SET_STANDARD();
-           PerlIO_printf(Perl_debug_log, "0x%"UVxf" num(%"NVgf")\n",
-                         PTR2UV(sv), SvNVX(sv));
-           RESTORE_NUMERIC_LOCAL();
-       });
-#endif
      }
      else if (SvTYPE(sv) < SVt_PVNV)
         sv_upgrade(sv, SVt_PVNV);
@@ -2728,21 +2702,12 @@ Perl_sv_2nv_flags(pTHX_ SV *const sv, const I32 flags)
            and ideally should be fixed.  */
         return 0.0;
      }
-#if defined(USE_LONG_DOUBLE)
-    DEBUG_c({
-       STORE_NUMERIC_LOCAL_SET_STANDARD();
-       PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" PERL_PRIgldbl ")\n",
-                     PTR2UV(sv), SvNVX(sv));
-       RESTORE_NUMERIC_LOCAL();
-    });
-#else
      DEBUG_c({
         STORE_NUMERIC_LOCAL_SET_STANDARD();
-       PerlIO_printf(Perl_debug_log, "0x%"UVxf" 1nv(%"NVgf")\n",
+       PerlIO_printf(Perl_debug_log, "0x%"UVxf" 2nv(%" NVgf ")\n",
                       PTR2UV(sv), SvNVX(sv));
         RESTORE_NUMERIC_LOCAL();
      });
-#endif
      return SvNVX(sv);
  }
  
@@ -2806,6 +2771,39 @@ S_uiv_2buf(char *const buf, const IV iv, UV uv, const int is_uv, char **const pe
      return ptr;
  }
  
+/* Helper for sv_2pv_flags and sv_vcatpvfn_flags.  If the NV is an
+* infinity or a not-a-number, writes the appropriate strings to the
+* buffer, including a zero byte.  On success returns the written length,
+* excluding the zero byte, on failure returns zero. */
+STATIC size_t
+S_infnan_copy(NV nv, char* buffer, size_t maxlen) {
+    if (maxlen < 4)
+        return 0;
+    else {
+        char* s = buffer;
+        if (Perl_isinf(nv)) {
+            if (nv < 0) {
+                if (maxlen < 5)
+                    return 0;
+                *s++ = '-';
+            }
+            *s++ = 'I';
+            *s++ = 'n';
+            *s++ = 'f';
+        }
+        else if (Perl_isnan(nv)) {
+            *s++ = 'N';
+            *s++ = 'a';
+            *s++ = 'N';
+            /* XXX output the payload mantissa bits as "(hhh...)" */
+        }
+        else
+            return 0;
+        *s++ = 0;
+        return s - buffer - 1;
+    }
+}
+
  /*
  =for apidoc sv_2pv_flags
  
@@ -2989,37 +2987,44 @@ Perl_sv_2pv_flags(pTHX_ SV *const sv, STRLEN *const lp, const I32 flags)
             *s++ = '0';
             *s = '\0';
         } else {
-           dSAVE_ERRNO;
+            STRLEN len;
             /* The +20 is pure guesswork.  Configure test needed. --jhi */
             s = SvGROW_mutable(sv, NV_DIG + 20);
-           /* some Xenix systems wipe out errno here */
+
+            len = S_infnan_copy(SvNVX(sv), s, SvLEN(sv));
+            if (len > 0)
+                s += len;
+            else {
+                dSAVE_ERRNO;
+                /* some Xenix systems wipe out errno here */
  
  #ifndef USE_LOCALE_NUMERIC
-            PERL_UNUSED_RESULT(Gconvert(SvNVX(sv), NV_DIG, 0, s));
-            SvPOK_on(sv);
-#else
-            {
-                DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
                  PERL_UNUSED_RESULT(Gconvert(SvNVX(sv), NV_DIG, 0, s));
-
-                /* If the radix character is UTF-8, and actually is in the
-                 * output, turn on the UTF-8 flag for the scalar */
-                if (PL_numeric_local
-                    && PL_numeric_radix_sv && SvUTF8(PL_numeric_radix_sv)
-                    && instr(s, SvPVX_const(PL_numeric_radix_sv)))
+                SvPOK_on(sv);
+#else
                  {
-                    SvUTF8_on(sv);
+                    DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
+                    PERL_UNUSED_RESULT(Gconvert(SvNVX(sv), NV_DIG, 0, s));
+
+                    /* If the radix character is UTF-8, and actually is in the
+                     * output, turn on the UTF-8 flag for the scalar */
+                    if (PL_numeric_local
+                        && PL_numeric_radix_sv && SvUTF8(PL_numeric_radix_sv)
+                        && instr(s, SvPVX_const(PL_numeric_radix_sv)))
+                        {
+                            SvUTF8_on(sv);
+                        }
+                    RESTORE_LC_NUMERIC();
                  }
-                RESTORE_LC_NUMERIC();
-            }
  
-            /* We don't call SvPOK_on(), because it may come to pass that the
-             * locale changes so that the stringification we just did is no
-             * longer correct.  We will have to re-stringify every time it is
-             * needed */
+                /* We don't call SvPOK_on(), because it may come to
+                 * pass that the locale changes so that the
+                 * stringification we just did is no longer correct.  We
+                 * will have to re-stringify every time it is needed */
  #endif
-           RESTORE_ERRNO;
-           while (*s) s++;
+                RESTORE_ERRNO;
+            }
+            while (*s) s++;
         }
      }
      else if (isGV_with_GP(sv)) {
@@ -3458,7 +3463,7 @@ must_be_utf8:
                  * set so starts from there.  Otherwise, can use memory copy to
                  * get up to where we are now, and then start from here */
  
-               if (invariant_head <= 0) {
+               if (invariant_head == 0) {
                     d = dst;
                 } else {
                     Copy(s, dst, invariant_head, char);
@@ -8586,13 +8591,8 @@ Perl_sv_inc_nomg(pTHX_ SV *const sv)
             /* I don't think we can get here. Maybe I should assert this
                And if we do get here I suspect that sv_setnv will croak. NWC
                Fall through. */
-#if defined(USE_LONG_DOUBLE)
-           DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n",
-                                 SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#else
             DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_inc punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n",
                                   SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#endif
         }
  #endif /* PERL_PRESERVE_IVUV */
          if (!numtype && ckWARN(WARN_NUMERIC))
@@ -8615,7 +8615,7 @@ Perl_sv_inc_nomg(pTHX_ SV *const sv)
              * arranged in order (although not consecutively) and that only
              * [A-Za-z] are accepted by isALPHA in the C locale.
              */
-           if (*d != 'z' && *d != 'Z') {
+           if (isALPHA_FOLD_NE(*d, 'z')) {
                 do { ++*d; } while (!isALPHA(*d));
                 return;
             }
@@ -8763,13 +8763,8 @@ Perl_sv_dec_nomg(pTHX_ SV *const sv)
             /* I don't think we can get here. Maybe I should assert this
                And if we do get here I suspect that sv_setnv will croak. NWC
                Fall through. */
-#if defined(USE_LONG_DOUBLE)
-           DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"PERL_PRIgldbl"\n",
-                                 SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#else
             DEBUG_c(PerlIO_printf(Perl_debug_log,"sv_dec punt failed to convert '%s' to IOK or NOKp, UV=0x%"UVxf" NV=%"NVgf"\n",
                                   SvPVX_const(sv), SvIVX(sv), SvNVX(sv)));
-#endif
         }
      }
  #endif /* PERL_PRESERVE_IVUV */
@@ -9703,7 +9698,7 @@ Perl_sv_reftype(pTHX_ const SV *const sv, const int ob)
         case SVt_PVLV:          return (char *)  (SvROK(sv) ? "REF"
                                 /* tied lvalues should appear to be
                                  * scalars for backwards compatibility */
-                               : (LvTYPE(sv) == 't' || LvTYPE(sv) == 'T')
+                               : (isALPHA_FOLD_EQ(LvTYPE(sv), 't'))
                                     ? "SCALAR" : "LVALUE");
         case SVt_PVAV:          return "ARRAY";
         case SVt_PVHV:          return "HASH";
@@ -10676,10 +10671,9 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
              HEXTRACT_COUNT(ix, 2);
      }
  #  elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN
-    /* The last 8 bytes are the mantissa/fraction.
-     * (does this format ever happen?) */
+    /* (does this format ever happen?) */
      /* There explicitly is *no* implicit bit in this case. */
-    for (ix = LONGDBLSIZE - 8; ix < LONGDBLSIZE; ix++) {
+    for (ix = 0; ix < 8; ix++) {
          if (vend)
              HEXTRACT_OUTPUT(ix);
          else
@@ -10766,7 +10760,7 @@ S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
       * nv (as opposed to the long double method), but instead the UV
       * retrieved with the frexp+ldexp invocation. */
  #  if MANTISSASIZE * 8 > NV_MANT_DIG
-    MANTISSATYPE mantissa = Perl_ldexp(norm, NV_MANT_DIG);
+    MANTISSATYPE mantissa = (MANTISSATYPE)Perl_ldexp(norm, NV_MANT_DIG);
      int limit_byte = (NV_MANT_DIG - 1) / 8;
  #  else
      /* There will be low-order precision loss.  Try to salvage as many
@@ -10999,6 +10993,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         I32 epix = 0; /* explicit precision index */
         I32 evix = 0; /* explicit vector index */
         bool asterisk = FALSE;
+        bool infnan = FALSE;
  
         /* echo everything up to the next format specification */
         for (q = p; q < patend && *q != '%'; ++q) ;
@@ -11314,7 +11309,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         case 'V':
         case 'z':
         case 't':
-#ifdef HAS_C99
+#ifdef I_STDINT
          case 'j':
  #endif
             intsize = *q++;
@@ -11344,6 +11339,11 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             }
         }
  
+        if (argsv && SvNOK(argsv)) {
+            /* XXX va_arg(*args) case? */
+            infnan = Perl_isinfnan(SvNV(argsv));
+        }
+
         switch (c = *q++) {
  
             /* STRINGS */
@@ -11351,7 +11351,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
         case 'c':
             if (vectorize)
                 goto unknown;
-           uv = (args) ? va_arg(*args, int) : SvIV(argsv);
+           uv = (args) ? va_arg(*args, int) :
+                infnan ? UNICODE_REPLACEMENT : SvIV(argsv);
             if ((uv > 255 ||
                  (!UVCHR_IS_INVARIANT(uv) && SvUTF8(sv)))
                 && !IN_BYTES) {
@@ -11407,6 +11408,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* INTEGERS */
  
         case 'p':
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
             if (alt || vectorize)
                 goto unknown;
             uv = PTR2UV(args ? va_arg(*args, void*) : argsv);
@@ -11422,6 +11427,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             /* FALLTHROUGH */
         case 'd':
         case 'i':
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
             if (vectorize) {
                 STRLEN ulen;
                 if (!veclen)
@@ -11449,7 +11458,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 case 't':       iv = va_arg(*args, ptrdiff_t); break;
  #endif
                 default:        iv = va_arg(*args, int); break;
-#ifdef HAS_C99
+#ifdef I_STDINT
                 case 'j':       iv = va_arg(*args, intmax_t); break;
  #endif
                 case 'q':
@@ -11523,6 +11532,10 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
             base = 16;
  
         uns_integer:
+            if (infnan) {
+                c = 'g';
+                goto floating_point;
+            }
             if (vectorize) {
                 STRLEN ulen;
         vector:
@@ -11548,7 +11561,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  #ifdef HAS_PTRDIFF_T
                 case 't':  uv = va_arg(*args, ptrdiff_t); break; /* will sign extend, but there is no uptrdiff_t, so oh well */
  #endif
-#ifdef HAS_C99
+#ifdef I_STDINT
                 case 'j':  uv = va_arg(*args, uintmax_t); break;
  #endif
                 default:   uv = va_arg(*args, unsigned); break;
@@ -11639,6 +11652,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  
             /* FLOATING POINT */
  
+        floating_point:
+
         case 'F':
             c = 'f';            /* maybe %F isn't supported here */
             /* FALLTHROUGH */
@@ -11695,33 +11710,50 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                 : SvNV(argsv);
  
             need = 0;
-           /* frexp() has some unspecified behaviour for nan/inf,
-             * so let's avoid calling that. */
-           if (c != 'e' && c != 'E' && Perl_isfinite(nv)) {
+           /* frexp() (or frexpl) has some unspecified behaviour for
+             * nan/inf/-inf, so let's avoid calling that on those
+             * three values. nv * 0 will be NaN for NaN, +Inf and -Inf,
+             * and 0 for anything else. */
+           if (isALPHA_FOLD_NE(c, 'e') && (nv * 0) == 0) {
                  i = PERL_INT_MIN;
                  (void)Perl_frexp(nv, &i);
                  if (i == PERL_INT_MIN)
                      Perl_die(aTHX_ "panic: frexp");
-                hexfp = (c == 'a' || c == 'A');
+                /* Do not set hexfp earlier since we want to printf
+                 * Inf/NaN for Inf/NAN, not their hexfp. */
+                hexfp = isALPHA_FOLD_EQ(c, 'a');
                  if (UNLIKELY(hexfp)) {
-                    /* Hexadecimal floating point: this size
-                     * computation probably overshoots, but that is
-                     * better than undershooting. */
+                    /* This seriously overshoots in most cases, but
+                     * better the undershooting.  Firstly, all bytes
+                     * of the NV are not mantissa, some of them are
+                     * exponent.  Secondly, for the reasonably common
+                     * long doubles case, the "80-bit extended", two
+                     * or six bytes of the NV are unused. */
                      need +=
-                        (nv < 0) + /* possible unary minus */
+                        (nv < 0) ? 1 : 0 + /* possible unary minus */
                          2 + /* "0x" */
                          1 + /* the very unlikely carry */
                          1 + /* "1" */
                          1 + /* "." */
-                        /* We want one byte per each 4 bits in the
-                         * mantissa.  This works out to about 0.83
-                         * bytes per NV decimal digit (of 4 bits):
-                         * (NV_DIG * log(10)/log(2)) / 4,
-                         * we overestimate by using 5/6 (0.8333...) */
-                        ((NV_DIG * 5) / 6 + 1) +
+                        2 * NVSIZE + /* 2 hexdigits for each byte */
                          2 + /* "p+" */
-                        (i >= 0 ? BIT_DIGITS(i) : 1 + BIT_DIGITS(-i)) +
+                        BIT_DIGITS(NV_MAX_EXP) + /* exponent */
                          1;   /* \0 */
+#if LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN || \
+    LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN
+                    /* However, for the "double double", we need more.
+                     * Since each double has their own exponent, the
+                     * doubles may float (haha) rather far from each
+                     * other, and the number of required bits is much
+                     * larger, up to total of 1028 bits.  (NOTE: this
+                     * is not actually implemented properly yet,
+                     * we are using just the first double, see
+                     * S_hextract() for details.  But let's prepare
+                     * for the future.) */
+
+                    /* 2 hexdigits for each byte. */ 
+                    need += (1028/8 - DOUBLESIZE + 1) * 2;
+#endif
  #ifdef USE_LOCALE_NUMERIC
                          STORE_LC_NUMERIC_SET_TO_NEEDED();
                          if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))
@@ -12005,19 +12037,24 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                      elen = width;
                  }
              }
-            else {
-               char *ptr = ebuf + sizeof ebuf;
-               *--ptr = '\0';
-               *--ptr = c;
-               /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */
+            else
+                elen = S_infnan_copy(nv, PL_efloatbuf, PL_efloatsize);
+            if (elen == 0) {
+                char *ptr = ebuf + sizeof ebuf;
+                *--ptr = '\0';
+                *--ptr = c;
+                /* FIXME: what to do if HAS_LONG_DOUBLE but not PERL_PRIfldbl? */
  #if defined(HAS_LONG_DOUBLE) && defined(PERL_PRIfldbl)
+               /* Note that this is HAS_LONG_DOUBLE and PERL_PRIfldbl,
+                * not USE_LONG_DOUBLE and NVff.  In other words,
+                * this needs to work without USE_LONG_DOUBLE. */
                 if (intsize == 'q') {
                     /* Copy the one or more characters in a long double
                      * format before the 'base' ([efgEFG]) character to
                      * the format string. */
-                   static char const prifldbl[] = PERL_PRIfldbl;
-                   char const *p = prifldbl + sizeof(prifldbl) - 3;
-                   while (p >= prifldbl) { *--ptr = *p--; }
+                   static char const ldblf[] = PERL_PRIfldbl;
+                   char const *p = ldblf + sizeof(ldblf) - 3;
+                   while (p >= ldblf) { *--ptr = *p--; }
                 }
  #endif
                 if (has_precis) {
@@ -12090,7 +12127,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
  #ifdef HAS_PTRDIFF_T
                 case 't':       *(va_arg(*args, ptrdiff_t*)) = i; break;
  #endif
-#ifdef HAS_C99
+#ifdef I_STDINT
                 case 'j':       *(va_arg(*args, intmax_t*)) = i; break;
  #endif
                 case 'q':
@@ -12324,7 +12361,6 @@ Perl_parser_dup(pTHX_ const yy_parser *const proto, CLONE_PARAMS *const param)
                     (proto->lex_casemods < 12 ? 12 : proto->lex_casemods));
      parser->lex_defer  = proto->lex_defer;
      parser->lex_dojoin = proto->lex_dojoin;
-    parser->lex_expect = proto->lex_expect;
      parser->lex_formbrack = proto->lex_formbrack;
      parser->lex_inpat  = proto->lex_inpat;
      parser->lex_inwhat = proto->lex_inwhat;