X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/c0de32e24cb35f30ec0d37406b19fcc377911df7..2bb3725dfe9edb3ec056dbc691826cda14d85a48:/sv.c

diff --git a/sv.c b/sv.c
index afd4376..92c34a8 100644
--- a/sv.c
+++ b/sv.c
@@ -10563,6 +10563,280 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
     sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC);
 }
 
+/* vhex will contain the values (0..15) of the hex digits ("nybbles"
+ * of 4 bits); 1 for the implicit 1, and at most 128 bits of mantissa,
+ * four bits per xdigit. */
+#define VHEX_SIZE (1+128/4)
+
+/* If we do not have a known long double format, (including not using
+ * long doubles, or long doubles being equal to doubles) then we will
+ * fall back to the ldexp/frexp route, with which we can retrieve at
+ * most as many bits as our widest unsigned integer type is.  We try
+ * to get a 64-bit unsigned integer even if we are not having 64-bit
+ * UV. */
+#if defined(HAS_QUAD) && defined(Uquad_t)
+#  define MANTISSATYPE Uquad_t
+#  define MANTISSASIZE 8
+#else
+#  define MANTISSATYPE UV /* May lose precision if UVSIZE is not 8. */
+#  define MANTISSASIZE UVSIZE
+#endif
+
+/* S_hextract() is a helper for Perl_sv_vcatpvfn_flags, for extracting
+ * the hexadecimal values (for %a/%A).  The nv is the NV where the value
+ * are being extracted from (either directly from the long double in-memory
+ * presentation, or from the uquad computed via frexp+ldexp).  frexp also
+ * is used to update the exponent.  vhex is the pointer to the beginning
+ * of the output buffer (of VHEX_SIZE).
+ *
+ * The tricky part is that S_hextract() needs to be called twice:
+ * the first time with vend as NULL, and the second time with vend as
+ * the pointer returned by the first call.  What happens is that on
+ * the first round the output size is computed, and the intended
+ * extraction sanity checked.  On the second round the actual output
+ * (the extraction of the hexadecimal values) takes place.
+ * Sanity failures cause fatal failures during both rounds. */
+STATIC U8*
+S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend)
+{
+    U8* v = vhex;
+    int ix;
+    int ixmin = 0, ixmax = 0;
+
+    /* XXX Inf/NaN/denormal handling in the HEXTRACT_IMPLICIT_BIT,
+     * and elsewhere. */
+
+    /* These macros are just to reduce typos, they have multiple
+     * repetitions below, but usually only one (or sometimes two)
+     * of them is really being used. */
+    /* HEXTRACT_OUTPUT() extracts the high nybble first. */
+#define HEXTRACT_OUTPUT_HI(ix) (*v++ = nvp[ix] >> 4)
+#define HEXTRACT_OUTPUT_LO(ix) (*v++ = nvp[ix] & 0xF)
+#define HEXTRACT_OUTPUT(ix) \
+    STMT_START { \
+        HEXTRACT_OUTPUT_HI(ix); \
+        HEXTRACT_OUTPUT_LO(ix); \
+    } STMT_END
+#define HEXTRACT_COUNT(ix, c) \
+    STMT_START { \
+      v += c; \
+      if (ix < ixmin) \
+        ixmin = ix; \
+      else if (ix > ixmax) \
+        ixmax = ix; \
+    } STMT_END
+#define HEXTRACT_IMPLICIT_BIT() \
+    if (exponent) { \
+        if (vend) \
+            *v++ = 1; \
+        else \
+            v++; \
+    }
+
+    /* First see if we are using long doubles. */
+#if NVSIZE > DOUBLESIZE && LONG_DOUBLEKIND != LONG_DOUBLE_IS_DOUBLE
+    const U8* nvp = (const U8*)(&nv);
+#  define HEXTRACTSIZE NVSIZE
+    (void)Perl_frexp(PERL_ABS(nv), exponent);
+#  if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN
+    /* Used in e.g. VMS and HP-UX IA-64, e.g. -0.1L:
+     * 9a 99 99 99 99 99 99 99 99 99 99 99 99 99 fb 3f */
+    /* The bytes 13..0 are the mantissa/fraction,
+     * the 15,14 are the sign+exponent. */
+    HEXTRACT_IMPLICIT_BIT();
+    for (ix = 13; ix >= 0; ix--) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+    *exponent -= 4;
+#  elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN
+    /* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L:
+     * bf fb 99 99 99 99 99 99 99 99 99 99 99 99 99 9a */
+    /* The bytes 2..15 are the mantissa/fraction,
+     * the 0,1 are the sign+exponent. */
+    HEXTRACT_IMPLICIT_BIT();
+    for (ix = 2; ix <= 15; ix++) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+    *exponent -= 4;
+#  elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
+    /* x86 80-bit "extended precision", 64 bits of mantissa / fraction /
+     * significand, 15 bits of exponent, 1 bit of sign.  NVSIZE can
+     * be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux and OS X),
+     * meaning that 2 or 6 bytes are empty padding. */
+    /* The bytes 7..0 are the mantissa/fraction */
+    /* There explicitly is *no* implicit bit in this case. */
+    for (ix = 7; ix >= 0; ix--) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+    *exponent -= 4;
+#  elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN
+    /* The last 8 bytes are the mantissa/fraction.
+     * (does this format ever happen?) */
+    /* There explicitly is *no* implicit bit in this case. */
+    for (ix = LONGDBLSIZE - 8; ix < LONGDBLSIZE; ix++) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+    *exponent -= 4;
+#  elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN
+    /* Where is this used?
+     * 9a 99 99 99 99 99 59 bc 9a 99 99 99 99 99 b9 3f */
+    HEXTRACT_IMPLICIT_BIT();
+    if (vend)
+        HEXTRACT_OUTPUT_LO(14);
+    else
+        HEXTRACT_COUNT(14, 1);
+    for (ix = 13; ix >= 8; ix--) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+    /* XXX not extracting from the second double -- see the discussion
+     * below for the big endian double double. */
+#    if 0
+    if (vend)
+        HEXTRACT_OUTPUT_LO(6);
+    else
+        HEXTRACT_COUNT(6, 1);
+    for (ix = 5; ix >= 0; ix--) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+#    endif
+    (*exponent)--;
+#  elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN
+    /* Used in e.g. PPC/Power (AIX) and MIPS.
+     *
+     * The mantissa bits are in two separate stretches, e.g. for -0.1L:
+     * 3f b9 99 99 99 99 99 9a bc 59 99 99 99 99 99 9a
+     */
+    HEXTRACT_IMPLICIT_BIT();
+    if (vend)
+        HEXTRACT_OUTPUT_LO(1);
+    else
+        HEXTRACT_COUNT(1, 1);
+    for (ix = 2; ix < 8; ix++) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+    /* XXX not extracting the second double mantissa bits- this is not
+     * right nor ideal (we effectively reduce the output format to
+     * that of a "single double", only 53 bits), but we do not know
+     * exactly how to do the extraction correctly so that it matches
+     * the semantics of, say, the IEEE quadruple float. */
+#    if 0
+    if (vend)
+        HEXTRACT_OUTPUT_LO(9);
+    else
+        HEXTRACT_COUNT(9, 1);
+    for (ix = 10; ix < 16; ix++) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+#   endif
+    (*exponent)--;
+#  else
+    Perl_croak(aTHX_
+               "Hexadecimal float: unsupported long double format");
+#  endif
+#else
+    /* If not using long doubles (or if the long double format is
+     * known but not yet supported), try to retrieve the mantissa bits
+     * via frexp+ldexp. */
+
+    NV norm = Perl_frexp(PERL_ABS(nv), exponent);
+    /* Theoretically we have all the bytes [0, MANTISSASIZE-1] to
+     * inspect; but in practice we don't want the leading nybbles that
+     * are zero.  With the common IEEE 754 value for NV_MANT_DIG being
+     * 53, we want the limit byte to be (int)((53-1)/8) == 6.
+     *
+     * Note that this is _not_ inspecting the in-memory format of the
+     * nv (as opposed to the long double method), but instead the UV
+     * retrieved with the frexp+ldexp invocation. */
+#  if MANTISSASIZE * 8 > NV_MANT_DIG
+    MANTISSATYPE mantissa = Perl_ldexp(norm, NV_MANT_DIG);
+    int limit_byte = (NV_MANT_DIG - 1) / 8;
+#  else
+    /* There will be low-order precision loss.  Try to salvage as many
+     * bits as possible.  Will truncate, not round. */
+    MANTISSATYPE mantissa =
+    Perl_ldexp(norm,
+               /* The highest possible shift by two that fits in the
+                * mantissa and is aligned (by four) the same was as
+                * NV_MANT_DIG. */
+               MANTISSASIZE * 8 - (4 - NV_MANT_DIG % 4));
+    int limit_byte = MANTISSASIZE - 1;
+#  endif
+    const U8* nvp = (const U8*)(&mantissa);
+#  define HEXTRACTSIZE MANTISSASIZE
+    /* We make here the wild assumption that the endianness of doubles
+     * is similar to the endianness of integers, and that there is no
+     * middle-endianness.  This may come back to haunt us (the rumor
+     * has it that ARM can be quite haunted).
+     *
+     * We generate 4-bit xdigits (nybble/nibble) instead of 8-bit
+     * bytes, since we might need to handle printf precision, and also
+     * insert the radix.
+     */
+#  if BYTEORDER == 0x12345678 || BYTEORDER == 0x1234 || \
+     LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN || \
+     LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN || \
+     LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN
+    /* Little endian. */
+    for (ix = limit_byte; ix >= 0; ix--) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+#  else
+    /* Big endian. */
+    for (ix = MANTISSASIZE - 1 - limit_byte; ix < MANTISSASIZE; ix++) {
+        if (vend)
+            HEXTRACT_OUTPUT(ix);
+        else
+            HEXTRACT_COUNT(ix, 2);
+    }
+#  endif
+    /* If there are not enough bits in MANTISSATYPE, we couldn't get
+     * all of them, issue a warning.
+     *
+     * Note that NV_PRESERVES_UV_BITS would not help here, it is the
+     * wrong way around. */
+#  if NV_MANT_DIG > MANTISSASIZE * 8
+    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+                   "Hexadecimal float: precision loss");
+#  endif
+#endif
+    /* Croak for various reasons: if the output pointer escaped the
+     * output buffer, if the extraction index escaped the extraction
+     * buffer, or if the ending output pointer didn't match the
+     * previously computed value. */
+    if (v <= vhex || v - vhex >= VHEX_SIZE ||
+        ixmin < 0 || ixmax >= HEXTRACTSIZE ||
+        (vend && v != vend))
+        Perl_croak(aTHX_ "Hexadecimal float: internal error");
+    return v;
+}
+
 void
 Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen,
                        va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted,
@@ -10585,6 +10859,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
     /* large enough for "%#.#f" --chip */
     /* what about long double NVs? --jhi */
     bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */
+    bool hexfp = FALSE;
 
     DECLARATION_FOR_STORE_LC_NUMERIC_SET_TO_NEEDED;
 
@@ -11376,6 +11651,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 	case 'e': case 'E':
 	case 'f':
 	case 'g': case 'G':
+	case 'a': case 'A':
 	    if (vectorize)
 		goto unknown;
 
@@ -11428,14 +11704,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 	    /* nv * 0 will be NaN for NaN, +Inf and -Inf, and 0 for anything
 	       else. frexp() has some unspecified behaviour for those three */
 	    if (c != 'e' && c != 'E' && (nv * 0) == 0) {
-		i = PERL_INT_MIN;
-		/* FIXME: if HAS_LONG_DOUBLE but not USE_LONG_DOUBLE this
-		   will cast our (long double) to (double) */
-		(void)Perl_frexp(nv, &i);
-		if (i == PERL_INT_MIN)
-		    Perl_die(aTHX_ "panic: frexp");
-		if (i > 0)
-		    need = BIT_DIGITS(i);
+                i = PERL_INT_MIN;
+                /* FIXME: if HAS_LONG_DOUBLE but not USE_LONG_DOUBLE this
+                   will cast our (long double) to (double) */
+                (void)Perl_frexp(nv, &i);
+                if (i == PERL_INT_MIN)
+                    Perl_die(aTHX_ "panic: frexp");
+                hexfp = (c == 'a' || c == 'A');
+                if (UNLIKELY(hexfp)) {
+                    /* Hexadecimal floating point: this size
+                     * computation probably overshoots, but that is
+                     * better than undershooting. */
+                    need +=
+                        (nv < 0) + /* possible unary minus */
+                        2 + /* "0x" */
+                        1 + /* the very unlikely carry */
+                        1 + /* "1" */
+                        1 + /* "." */
+                        /* We want one byte per each 4 bits in the
+                         * mantissa.  This works out to about 0.83
+                         * bytes per NV decimal digit (of 4 bits):
+                         * (NV_DIG * log(10)/log(2)) / 4,
+                         * we overestimate by using 5/6 (0.8333...) */
+                        ((NV_DIG * 5) / 6 + 1) +
+                        2 + /* "p+" */
+                        (i >= 0 ? BIT_DIGITS(i) : 1 + BIT_DIGITS(-i)) +
+                        1;   /* \0 */
+#ifdef USE_LOCALE_NUMERIC
+                        STORE_LC_NUMERIC_SET_TO_NEEDED();
+                        if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))
+                            need += SvLEN(PL_numeric_radix_sv);
+                        RESTORE_LC_NUMERIC();
+#endif
+                }
+                else if (i > 0) {
+                    need = BIT_DIGITS(i);
+                } /* if i < 0, the number of digits is hard to predict. */
 	    }
 	    need += has_precis ? precis : 6; /* known default */
 
@@ -11533,7 +11837,175 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
 			break;
 		}
 	    }
-	    {
+
+            if (UNLIKELY(hexfp)) {
+                /* Hexadecimal floating point. */
+                char* p = PL_efloatbuf;
+                U8 vhex[VHEX_SIZE];
+                U8* v = vhex; /* working pointer to vhex */
+                U8* vend; /* pointer to one beyond last digit of vhex */
+                U8* vfnz = NULL; /* first non-zero */
+                const bool lower = (c == 'a');
+                /* At output the values of vhex (up to vend) will
+                 * be mapped through the xdig to get the actual
+                 * human-readable xdigits. */
+                const char* xdig = PL_hexdigit;
+                int zerotail = 0; /* how many extra zeros to append */
+                int exponent = 0; /* exponent of the floating point input */
+
+                /* XXX: denormals, NaN, Inf.
+                 *
+                 * For example with denormals, (assuming the vanilla
+                 * 64-bit double): the exponent is zero. 1xp-1074 is
+                 * the smallest denormal and the smallest double, it
+                 * should be output as 0x0.0000000000001p-1022 to
+                 * match its internal structure. */
+
+                vend = S_hextract(aTHX_ nv, &exponent, vhex, NULL);
+                S_hextract(aTHX_ nv, &exponent, vhex, vend);
+
+                if (nv < 0)
+                    *p++ = '-';
+                else if (plus)
+                    *p++ = plus;
+                *p++ = '0';
+                if (lower) {
+                    *p++ = 'x';
+                }
+                else {
+                    *p++ = 'X';
+                    xdig += 16; /* Use uppercase hex. */
+                }
+
+                /* Find the first non-zero xdigit. */
+                for (v = vhex; v < vend; v++) {
+                    if (*v) {
+                        vfnz = v;
+                        break;
+                    }
+                }
+
+                if (vfnz) {
+                    U8* vlnz = NULL; /* The last non-zero. */
+
+                    /* Find the last non-zero xdigit. */
+                    for (v = vend - 1; v >= vhex; v--) {
+                        if (*v) {
+                            vlnz = v;
+                            break;
+                        }
+                    }
+
+#if NVSIZE == DOUBLESIZE
+                    /* For long doubles S_hextract() took care of this. */
+                    exponent--;
+#endif
+
+                    if (precis > 0) {
+                        v = vhex + precis + 1;
+                        if (v < vend) {
+                            /* Round away from zero: if the tail
+                             * beyond the precis xdigits is equal to
+                             * or greater than 0x8000... */
+                            bool round = *v > 0x8;
+                            if (!round && *v == 0x8) {
+                                for (v++; v < vend; v++) {
+                                    if (*v) {
+                                        round = TRUE;
+                                        break;
+                                    }
+                                }
+                            }
+                            if (round) {
+                                for (v = vhex + precis; v >= vhex; v--) {
+                                    if (*v < 0xF) {
+                                        (*v)++;
+                                        break;
+                                    }
+                                    *v = 0;
+                                    if (v == vhex) {
+                                        /* If the carry goes all the way to
+                                         * the front, we need to output
+                                         * a single '1'. This goes against
+                                         * the "xdigit and then radix"
+                                         * but since this is "cannot happen"
+                                         * category, that is probably good. */
+                                        *p++ = xdig[1];
+                                    }
+                                }
+                            }
+                            /* The new effective "last non zero". */
+                            vlnz = vhex + precis;
+                        }
+                        else {
+                            zerotail = precis - (vlnz - vhex);
+                        }
+                    }
+
+                    v = vhex;
+                    *p++ = xdig[*v++];
+
+                    /* The radix is always output after the first
+                     * non-zero xdigit, or if alt.  */
+                    if (vfnz < vlnz || alt) {
+#ifndef USE_LOCALE_NUMERIC
+                        *p++ = '.';
+#else
+                        STORE_LC_NUMERIC_SET_TO_NEEDED();
+                        if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+                            STRLEN n;
+                            const char* r = SvPV(PL_numeric_radix_sv, n);
+                            Copy(r, p, n, char);
+                            p += n;
+                        }
+                        else {
+                            *p++ = '.';
+                        }
+                        RESTORE_LC_NUMERIC();
+#endif
+                    }
+
+                    while (v <= vlnz)
+                        *p++ = xdig[*v++];
+
+                    while (zerotail--)
+                        *p++ = '0';
+                }
+                else {
+                    *p++ = '0';
+                    exponent = 0;
+                }
+
+                elen = p - PL_efloatbuf;
+                elen += my_snprintf(p, PL_efloatsize - elen,
+                                    "%c%+d", lower ? 'p' : 'P',
+                                    exponent);
+
+                if (elen < width) {
+                    if (left) {
+                        /* Pad the back with spaces. */
+                        memset(PL_efloatbuf + elen, ' ', width - elen);
+                    }
+                    else if (fill == '0') {
+                        /* Insert the zeros between the "0x" and
+                         * the digits, otherwise we end up with
+                         * "0000xHHH..." */
+                        STRLEN nzero = width - elen;
+                        char* zerox = PL_efloatbuf + 2;
+                        Move(zerox, zerox + nzero,  elen - 2, char);
+                        memset(zerox, fill, nzero);
+                    }
+                    else {
+                        /* Move it to the right. */
+                        Move(PL_efloatbuf, PL_efloatbuf + width - elen,
+                             elen, char);
+                        /* Pad the front with spaces. */
+                        memset(PL_efloatbuf, ' ', width - elen);
+                    }
+                    elen = width;
+                }
+            }
+            else {
 		char *ptr = ebuf + sizeof ebuf;
 		*--ptr = '\0';
 		*--ptr = c;
@@ -11577,14 +12049,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                  * that is safe to use, even though it's not literal */
                 GCC_DIAG_IGNORE(-Wformat-nonliteral);
 #if defined(HAS_LONG_DOUBLE)
-		elen = ((intsize == 'q')
-			? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, nv)
-			: my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)nv));
+                elen = ((intsize == 'q')
+                        ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, nv)
+                        : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)nv));
 #else
-		elen = my_sprintf(PL_efloatbuf, ptr, nv);
+                elen = my_sprintf(PL_efloatbuf, ptr, nv);
 #endif
                 GCC_DIAG_RESTORE;
 	    }
+
 	float_converted:
 	    eptr = PL_efloatbuf;