From 61e61fbc49c95894c46dd5128133731fc2092e07 Mon Sep 17 00:00:00 2001
From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Tue, 12 Aug 2014 08:39:40 -0400
Subject: [PATCH] Hexadecimal float literals, for perl #122219

Do not use system strtod/strtold since its C99 (which we do not require).

If we can fit the mantissa bits of a NV to a UV (which is the case for
the most common format, 64-bit IEEE 754 double, which has 52 mantissa bits)
we will use that, and only in the end convert to NV.  If we cannot fit
the bits, accumulate the bits incrementally to a NV.
---
 MANIFEST         |   1 +
 perl.h           |   6 --
 pod/perldata.pod |   9 +++
 pod/perldiag.pod |  16 +++++
 t/op/hexfp.t     | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 toke.c           | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 6 files changed, 374 insertions(+), 15 deletions(-)
 create mode 100644 t/op/hexfp.t
diff --git a/MANIFEST b/MANIFEST
index 1d8411e..179cc3d 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5088,6 +5088,7 @@ t/op/hash-rt85026.t		See if hash iteration/deletion works
 t/op/hash.t			See if the complexity attackers are repelled
 t/op/hashwarn.t			See if warnings for bad hash assignments work
 t/op/heredoc.t			See if heredoc edge and corner cases work
+t/op/hexfp.t			See if hexadecimal float literals work
 t/op/inccode.t			See if coderefs work in @INC
 t/op/inccode-tie.t		See if tie to @INC works
 t/op/incfilter.t		See if the source filters in coderef-in-@INC work
diff --git a/perl.h b/perl.h
index 80238d9..ece022d 100644
--- a/perl.h
+++ b/perl.h
@@ -1842,9 +1842,6 @@ typedef NVTYPE NV;
 #   ifdef LDBL_MANT_DIG
 #       define NV_MANT_DIG LDBL_MANT_DIG
 #   endif
-#   ifdef LDBL_MANT_BITS
-#       define NV_MANT_BITS LDBL_MANT_BITS
-#   endif
 #   ifdef LDBL_MIN
 #       define NV_MIN LDBL_MIN
 #   endif
@@ -1934,9 +1931,6 @@ EXTERN_C long double modfl(long double, long double *);
 #   ifdef DBL_MANT_DIG
 #       define NV_MANT_DIG DBL_MANT_DIG
 #   endif
-#   ifdef DBL_MANT_BITS
-#       define NV_MANT_BITS DBL_MANT_BITS
-#   endif
 #   ifdef DBL_MIN
 #       define NV_MIN DBL_MIN
 #   endif
diff --git a/pod/perldata.pod b/pod/perldata.pod
index d8edfe9..fe39783 100644
--- a/pod/perldata.pod
+++ b/pod/perldata.pod
@@ -402,6 +402,7 @@ integer formats:
     0xdead_beef         # more hex   
     0377                # octal (only numbers, begins with 0)
     0b011011            # binary
+    0x1.999ap-4         # hexadecimal floating point (the 'p' is required)
 
 You are allowed to use underscores (underbars) in numeric literals
 between digits for legibility (but not multiple underscores in a row:
@@ -425,6 +426,14 @@ Hexadecimal, octal, or binary, representations in string literals
 representation.  The hex() and oct() functions make these conversions
 for you.  See L<perlfunc/hex> and L<perlfunc/oct> for more details.
 
+Hexadecimal floating point can start just like a hexadecimal literal,
+and it can be followed by an optional fractional hexadecimal part,
+but it must be followed by C<p>, an optional sign, and a power of two.
+The format is useful for accurately presenting floating point values,
+avoiding conversions to or from decimal floating point, and therefore
+avoiding possible loss in precision.  Notice that while most current
+platforms use the 64-bit IEEE 754 floating point, not all do.
+
 You can also embed newlines directly in your strings, i.e., they can end
 on a different line than they begin.  This is nice, but if you forget
 your trailing quote, the error will not be reported until Perl finds
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 9f1fed9..5877065 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2179,6 +2179,22 @@ created on an emergency basis to prevent a core dump.
 (F) The parser has given up trying to parse the program after 10 errors.
 Further error messages would likely be uninformative.
 
+=item Hexadecimal float: exponent overflow
+
+(W overflow) The hexadecimal floating point has larger exponent
+than the floating point supports.
+
+=item Hexadecimal float: exponent underflow
+
+(W overflow) The hexadecimal floating point has smaller exponent
+than the floating point supports.
+
+=item Hexadecimal float: mantissa overflow
+
+(W overflow) The hexadecimal floating point literal had more bits in
+the mantissa (the part between the 0x and the exponent, also known as
+the fraction or the significand) than the floating point supports.
+
 =item Hexadecimal float: precision loss
 
 (W overflow) The hexadecimal floating point had internally more
diff --git a/t/op/hexfp.t b/t/op/hexfp.t
new file mode 100644
index 0000000..9a1d045
--- /dev/null
+++ b/t/op/hexfp.t
@@ -0,0 +1,180 @@
+#!./perl
+
+use strict;
+
+use Config;
+
+BEGIN {
+    chdir 't' if -d 't';
+    require './test.pl';
+    eval '0x0p0';
+    print "# $@\n";
+}
+
+plan(tests => 79);
+
+# Test hexfloat literals.
+
+is(0x0p0, 0);
+is(0x0.p0, 0);
+is(0x.0p0, 0);
+is(0x0.0p0, 0);
+is(0x0.00p0, 0);
+
+is(0x1p0, 1);
+is(0x1.p0, 1);
+is(0x1.0p0, 1);
+is(0x1.00p0, 1);
+
+is(0x2p0, 2);
+is(0x2.p0, 2);
+is(0x2.0p0, 2);
+is(0x2.00p0, 2);
+
+is(0x1p1, 2);
+is(0x1.p1, 2);
+is(0x1.0p1, 2);
+is(0x1.00p1, 2);
+
+is(0x.1p0, 0.0625);
+is(0x0.1p0, 0.0625);
+is(0x0.10p0, 0.0625);
+is(0x0.100p0, 0.0625);
+
+# Positive exponents.
+is(0x1p2, 4);
+is(0x1p+2, 4);
+is(0x0p+0, 0);
+
+# Negative exponents.
+is(0x1p-1, 0.5);
+is(0x1.p-1, 0.5);
+is(0x1.0p-1, 0.5);
+is(0x0p-0, 0);
+
+is(0x1p+2, 4);
+is(0x1p-2, 0.25);
+
+is(0x3p+2, 12);
+is(0x3p-2, 0.75);
+
+# Shifting left.
+is(0x1p2, 1 << 2);
+is(0x1p3, 1 << 3);
+is(0x3p4, 3 << 4);
+is(0x3p5, 3 << 5);
+is(0x12p23, 0x12 << 23);
+
+# Shifting right.
+is(0x1p-2, 1 / (1 << 2));
+is(0x1p-3, 1 / (1 << 3));
+is(0x3p-4, 3 / (1 << 4));
+is(0x3p-5, 3 / (1 << 5));
+is(0x12p-23, 0x12 / (1 << 23));
+
+# Negative sign.
+is(-0x1p+2, -4);
+is(-0x1p-2, -0.25);
+is(-0x0p+0, 0);
+is(-0x0p-0, 0);
+
+is(0x0.10p0, 0.0625);
+is(0x0.1p0, 0.0625);
+is(0x.1p0, 0.0625);
+
+is(0x12p+3, 144);
+is(0x12p-3, 2.25);
+
+# Hexdigits (lowercase).
+is(0x9p+0, 9);
+is(0xap+0, 10);
+is(0xfp+0, 15);
+is(0x10p+0, 16);
+is(0x11p+0, 17);
+is(0xabp+0, 171);
+is(0xab.cdp+0, 171.80078125);
+
+# Uppercase hexdigits and exponent prefix.
+is(0xAp+0, 10);
+is(0xFp+0, 15);
+is(0xABP+0, 171);
+is(0xAB.CDP+0, 171.80078125);
+
+# Underbars.
+is(0xa_b.c_dp+1_2, 703696);
+
+# Note that the hexfloat representation is not unique
+# since the exponent can be shifted: no different from
+# 3e4 cf 30e3 cf 30000.
+
+# Needs to use within() instead of is() because of long doubles.
+within(0x1.999999999999ap-4, 0.1, 1e-9);
+within(0x3.3333333333333p-5, 0.1, 1e-9);
+within(0xc.ccccccccccccdp-7, 0.1, 1e-9);
+
+my $warn;
+
+local $SIG{__WARN__} = sub { $warn = shift };
+
+sub get_warn() {
+    my $save = $warn;
+    undef $warn;
+    return $save;
+}
+
+{ # Test certain things that are not hexfloats and should stay that way.
+    eval '0xp3';
+    like(get_warn(), qr/Missing operator before p3/);
+
+    eval '5p3';
+    like(get_warn(), qr/Missing operator before p3/);
+
+    my @a;
+    eval '@a = 0x3..5';
+    is("@a", "3 4 5");
+
+    eval '$a = eval "0x.3"';
+    is($a, '03');
+
+    eval '$a = eval "0xc.3"';
+    is($a, '123');
+}
+
+# Test warnings.
+SKIP:
+{
+    if ($Config{nv_preserves_uv_bits} == 53) {
+        local $^W = 1;
+
+        eval '0x1_0000_0000_0000_0p0';
+        is(get_warn(), undef);
+
+        eval '0x2_0000_0000_0000_0p0';
+        like(get_warn(), qr/^Hexadecimal float: mantissa overflow/);
+
+        eval '0x1.0000_0000_0000_0p0';
+        is(get_warn(), undef);
+
+        eval '0x2.0000_0000_0000_0p0';
+        like(get_warn(), qr/^Hexadecimal float: mantissa overflow/);
+
+        eval '0x.1p-1021';
+        is(get_warn(), undef);
+
+        eval '0x.1p-1023';
+        like(get_warn(), qr/^Hexadecimal float: exponent underflow/);
+
+        eval '0x1.fffffffffffffp+1023';
+        is(get_warn(), undef);
+
+        eval '0x1.fffffffffffffp+1024';
+        like(get_warn(), qr/^Hexadecimal float: exponent overflow/);
+    } else {
+        print "# skipping warning tests\n";
+        skip "nv_preserves_uv_bits is $Config{nv_preserves_uv_bits} not 53", 8;
+    }
+}
+
+# sprintf %a/%A testing is done in sprintf2.t,
+# trickier than necessary because of long doubles,
+# and because looseness of the spec.
diff --git a/toke.c b/toke.c
index ea16f9b..4620c10 100644
--- a/toke.c
+++ b/toke.c
@@ -9803,6 +9803,25 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
     bool floatit;			/* boolean: int or float? */
     const char *lastub = NULL;		/* position of last underbar */
     static const char* const number_too_long = "Number too long";
+    /* Hexadecimal floating point.
+     *
+     * In many places (where UV is quad and NV is IEEE 754 double)
+     * we can fit the mantissa bits of a NV into a UV.  This will not
+     * work everywhere, though (either no quads, or using long doubles),
+     * in which case we have to resort to NV, which will probably mean
+     * horrible loss of precision due to multiple fp operations. */
+    bool hexfp = FALSE;
+    int total_bits = 0;
+#if UVSIZE == 8 && NVSIZE == 8
+#  define HEXFP_UV
+    UV hexfp_uv = 0;
+    int hexfp_frac_bits = 0;
+#else
+#  define HEXFP_NV
+    NV hexfp_nv = 0.0;
+#endif
+    NV hexfp_mult = 1.0;
+    UV high_non_zero = 0; /* highest digit */
 
     PERL_ARGS_ASSERT_SCAN_NUM;
 
@@ -9927,6 +9946,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 		    if (!overflowed) {
 			x = u << shift;	/* make room for the digit */
 
+                        total_bits += shift;
+
 			if ((x >> shift) != u
 			    && !(PL_hints & HINT_NEW_BINARY)) {
 			    overflowed = TRUE;
@@ -9949,6 +9970,20 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 			 * amount. */
 			n += (NV) b;
 		    }
+
+                    if (high_non_zero == 0 && b > 0)
+                        high_non_zero = b;
+
+                    /* this could be hexfp, but peek ahead
+                     * to avoid matching ".." */
+#define HEXFP_PEEK(s) \
+	(((s[0] == '.') && \
+	  (isXDIGIT(s[1]) || s[1] == 'p' || s[1] == 'P')) \
+	 || s[0] == 'p' || s[0] == 'P')
+                    if (UNLIKELY(HEXFP_PEEK(s))) {
+                        goto out;
+                    }
+
 		    break;
 		}
 	    }
@@ -9963,6 +9998,98 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 		Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
 	    }
 
+            if (UNLIKELY(HEXFP_PEEK(s))) {
+                /* Do sloppy (on the underbars) but quick detection
+                 * (and value construction) for hexfp, the decimal
+                 * detection will shortly be more thorough with the
+                 * underbar checks. */
+                const char* h = s;
+#ifdef HEXFP_UV
+                hexfp_uv = u;
+#else /* HEXFP_NV */
+                hexfp_nv = u;
+#endif
+                if (*h == '.') {
+#ifdef HEXFP_NV
+                    NV hfm = 1 / 16.0;
+#endif
+                    h++;
+                    while (isXDIGIT(*h) || *h == '_') {
+                        if (isXDIGIT(*h)) {
+                            const char* p = strchr(PL_hexdigit, *h);
+                            U8 b;
+                            assert(p);
+                            b = ((p - PL_hexdigit) & 0x0F);
+                            total_bits += shift;
+#ifdef HEXFP_UV
+                            hexfp_uv <<= shift;
+                            hexfp_uv |= b;
+                            hexfp_frac_bits += shift;
+#else /* HEXFP_NV */
+                            hexfp_nv += b * hfm;
+                            hfm /= 16.0;
+#endif
+                        }
+                        h++;
+                    }
+                }
+
+                if (total_bits >= 4) {
+                    if (high_non_zero < 0x8)
+                        total_bits--;
+                    if (high_non_zero < 0x4)
+                        total_bits--;
+                    if (high_non_zero < 0x2)
+                        total_bits--;
+                }
+
+                if (total_bits > 0 && (*h == 'p' || *h == 'P')) {
+                    bool negexp = FALSE;
+                    h++;
+                    if (*h == '+')
+                        h++;
+                    else if (*h == '-') {
+                        negexp = TRUE;
+                        h++;
+                    }
+                    if (isDIGIT(*h)) {
+                        I32 hexfp_exp = 0;
+                        while (isDIGIT(*h) || *h == '_') {
+                            if (isDIGIT(*h)) {
+                                hexfp_exp *= 10;
+                                hexfp_exp += *h - '0';
+#ifdef NV_MIN_EXP
+                                if (negexp &&
+                                    -hexfp_exp < NV_MIN_EXP - 1) {
+                                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+                                                   "Hexadecimal float: exponent underflow");
+#endif
+                                    break;
+                                } else {
+#ifdef NV_MAX_EXP
+                                    if (!negexp &&
+                                        hexfp_exp > NV_MAX_EXP - 1) {
+                                        Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+                                                   "Hexadecimal float: exponent overflow");
+                                        break;
+                                    }
+#endif
+                                }
+                            }
+                            h++;
+                        }
+                        if (negexp)
+                            hexfp_exp = -hexfp_exp;
+#ifdef HEXFP_UV
+                        hexfp_exp -= hexfp_frac_bits;
+#endif
+                        hexfp_mult = pow(2.0, hexfp_exp);
+                        hexfp = TRUE;
+                        goto decimal;
+                    }
+                }
+            }
+
 	    if (overflowed) {
 		if (n > 4294967295.0)
 		    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
@@ -9996,10 +10123,17 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
       decimal:
 	d = PL_tokenbuf;
 	e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
-	floatit = FALSE;
+        floatit = FALSE;
+        if (hexfp) {
+            floatit = TRUE;
+            *d++ = '0';
+            *d++ = 'x';
+            s = start + 2;
+        }
 
 	/* read next group of digits and _ and copy into d */
-	while (isDIGIT(*s) || *s == '_') {
+	while (isDIGIT(*s) || *s == '_' ||
+               UNLIKELY(hexfp && isXDIGIT(*s))) {
 	    /* skip underscores, checking for misplaced ones
 	       if -w is on
 	    */
@@ -10039,7 +10173,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 
 	    /* copy, ignoring underbars, until we run out of digits.
 	    */
-	    for (; isDIGIT(*s) || *s == '_'; s++) {
+	    for (; isDIGIT(*s) || *s == '_' ||
+                     UNLIKELY(hexfp && isXDIGIT(*s));
+                 s++) {
 	        /* fixed length buffer check */
 		if (d >= e)
 		    Perl_croak(aTHX_ "%s", number_too_long);
@@ -10065,12 +10201,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	}
 
 	/* read exponent part, if present */
-	if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
-	    floatit = TRUE;
+	if (((*s == 'e' || *s == 'E') ||
+             UNLIKELY(hexfp && (*s == 'p' || *s == 'P'))) &&
+            strchr("+-0123456789_", s[1])) {
+            floatit = TRUE;
+
+	    /* regardless of whether user said 3E5 or 3e5, use lower 'e',
+               ditto for p (hexfloats) */
+            if ((*s == 'e' || *s == 'E')) {
+		/* At least some Mach atof()s don't grok 'E' */
+                *d++ = 'e';
+            } else if (UNLIKELY(hexfp && (*s == 'p' || *s == 'P'))) {
+                *d++ = 'p';
+            }
+
 	    s++;
 
-	    /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
-	    *d++ = 'e';		/* At least some Mach atof()s don't grok 'E' */
 
 	    /* stray preinitial _ */
 	    if (*s == '_') {
@@ -10134,9 +10280,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             STORE_NUMERIC_LOCAL_SET_STANDARD();
 	    /* terminate the string */
 	    *d = '\0';
-	    nv = Atof(PL_tokenbuf);
+            if (UNLIKELY(hexfp)) {
+#  ifdef NV_MANT_DIG
+                if (total_bits > NV_MANT_DIG)
+                    Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+                                   "Hexadecimal float: mantissa overflow");
+#  endif
+#ifdef HEXFP_UV
+                nv = hexfp_uv * hexfp_mult;
+#else /* HEXFP_NV */
+                nv = hexfp_nv * hexfp_mult;
+#endif
+            } else {
+                nv = Atof(PL_tokenbuf);
+            }
             RESTORE_NUMERIC_LOCAL();
-	    sv = newSVnv(nv);
+            sv = newSVnv(nv);
 	}
 
 	if ( floatit
-- 
1.8.3.1