From eba98284740b799def26803ebac340aaa8930437 Mon Sep 17 00:00:00 2001
From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Mon, 25 Jan 2016 22:17:09 -0500
Subject: [PATCH 1/1] [perl #127183] Non-canonical hexadecimal floats are
 parsed prematurely

5.22.1 regression from 5.22.0.

Rewriting the hexfp fractional digits parsing to handle the
trickiness of having to ignore both the leading and trailing
zero bits when determining how many bits were actually given.
---
 t/op/hexfp.t    |  22 +++++++++-
 t/op/sprintf2.t |  34 ++++++++++++++-
 toke.c          | 129 ++++++++++++++++++++++++++++++++++++--------------------
 3 files changed, 138 insertions(+), 47 deletions(-)

diff --git a/t/op/hexfp.t b/t/op/hexfp.t
index c9f9e39..4b2a96d 100644
--- a/t/op/hexfp.t
+++ b/t/op/hexfp.t
@@ -10,7 +10,7 @@ use strict;
 
 use Config;
 
-plan(tests => 97);
+plan(tests => 105);
 
 # Test hexfloat literals.
 
@@ -40,6 +40,11 @@ is(0x0.1p0, 0.0625);
 is(0x0.10p0, 0.0625);
 is(0x0.100p0, 0.0625);
 
+is(0x.1p0, 0.0625);
+is(0x1.1p0, 1.0625);
+is(0x1.11p0, 1.06640625);
+is(0x1.111p0, 1.066650390625);
+
 # Positive exponents.
 is(0x1p2, 4);
 is(0x1p+2, 4);
@@ -107,6 +112,15 @@ is(0xa_b.c_dp+1_2, 703696);
 # different from 3e4 cf 30e3 cf 30000.  The shifting of the hexdigits
 # makes it look stranger, though: 0xap1 == 0x5p2.
 
+# [perl #127183], try some non-canonical forms.
+SKIP: {
+    skip("nv_preserves_uv_bits is $Config{nv_preserves_uv_bits} not 53", 3)
+        unless ($Config{nv_preserves_uv_bits} == 53);
+    is(0x0.b17217f7d1cf78p0, 0x1.62e42fefa39efp-1);
+    is(0x0.58b90bfbe8e7bcp1, 0x1.62e42fefa39efp-1);
+    is(0x0.2c5c85fdf473dep2, 0x1.62e42fefa39efp-1);
+}
+
 # Needs to use within() instead of is() because of long doubles.
 within(0x1.99999999999ap-4, 0.1, 1e-9);
 within(0x3.333333333333p-5, 0.1, 1e-9);
@@ -133,11 +147,17 @@ sub get_warn() {
     eval '@a = 0x3..5';
     is("@a", "3 4 5");
 
+    undef $a;
     eval '$a = eval "0x.3"';
     is($a, '03');
 
+    undef $a;
     eval '$a = eval "0xc.3"';
     is($a, '123');
+
+    undef $a;
+    eval '$a = eval "0x.p3"';
+    is($a, undef);
 }
 
 # Test warnings.
diff --git a/t/op/sprintf2.t b/t/op/sprintf2.t
index 2784bde..d03deb1 100644
--- a/t/op/sprintf2.t
+++ b/t/op/sprintf2.t
@@ -247,7 +247,7 @@ if ($Config{nvsize} == 8 &&
     print "# no hexfloat tests\n";
 }
 
-plan tests => 1408 + ($Q ? 0 : 12) + @hexfloat + 9;
+plan tests => 1408 + ($Q ? 0 : 12) + @hexfloat + 12;
 
 use strict;
 use Config;
@@ -702,3 +702,35 @@ SKIP: {
     is(sprintf("%+a", -0.0), "-0x0p+0", "negative zero");
     is(sprintf("%.13a", -0.0), "-0x0.0000000000000p+0", "negative zero");
 }
+
+SKIP: {
+    # [perl #127183] Non-canonical hexadecimal floats are parsed prematurely
+
+    skip("nv_preserves_uv_bits is $Config{nv_preserves_uv_bits}, not 53", 3)
+        unless $Config{nv_preserves_uv_bits} == 53;
+
+    {
+        # The 0x0.b17217f7d1cf78p0 is the original LHS value
+        # from [perl #127183], its bits are 0x162e42fefa39ef << 3,
+        # resulting in a non-canonical form of hexfp, where the most
+        # significant bit is zero, instead of one.
+        is(sprintf("%a", 0x0.b17217f7d1cf78p0 - 0x1.62e42fefa39efp-1),
+           "0x0p+0",
+           "non-canonical form [perl #127183]");
+    }
+
+    {
+        no warnings 'overflow';  # Not the point here.
+
+        # The 0x058b90bfbe8e7bc is 0x162e42fefa39ef << 2,
+        # the 0x02c5c85fdf473de is 0x162e42fefa39ef << 1,
+        # see above.
+        is(sprintf("%a", 0x0.58b90bfbe8e7bcp1 - 0x1.62e42fefa39efp-1),
+           "0x0p+0",
+           "non-canonical form");
+
+        is(sprintf("%a", 0x0.2c5c85fdf473dep2 - 0x1.62e42fefa39efp-1),
+           "0x0p+0",
+           "non-canonical form");
+    }
+}
diff --git a/toke.c b/toke.c
index 23c3521..d2a7e7c 100644
--- a/toke.c
+++ b/toke.c
@@ -10295,6 +10295,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
      * multiple fp operations. */
     bool hexfp = FALSE;
     int total_bits = 0;
+    int significant_bits = 0;
 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
 #  define HEXFP_UQUAD
     Uquad_t hexfp_uquad = 0;
@@ -10305,6 +10306,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 #endif
     NV hexfp_mult = 1.0;
     UV high_non_zero = 0; /* highest digit */
+    int non_zero_integer_digits = 0;
 
     PERL_ARGS_ASSERT_SCAN_NUM;
 
@@ -10457,6 +10459,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                     if (high_non_zero == 0 && b > 0)
                         high_non_zero = b;
 
+                    if (high_non_zero)
+                        non_zero_integer_digits++;
+
                     /* this could be hexfp, but peek ahead
                      * to avoid matching ".." */
                     if (UNLIKELY(HEXFP_PEEK(s))) {
@@ -10483,69 +10488,103 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                  * detection will shortly be more thorough with the
                  * underbar checks. */
                 const char* h = s;
+                significant_bits = non_zero_integer_digits * shift;
 #ifdef HEXFP_UQUAD
                 hexfp_uquad = u;
 #else /* HEXFP_NV */
                 hexfp_nv = u;
 #endif
+                /* Ignore the leading zero bits of
+                 * the high (first) non-zero digit. */
+                if (high_non_zero) {
+                    if (high_non_zero < 0x8)
+                        significant_bits--;
+                    if (high_non_zero < 0x4)
+                        significant_bits--;
+                    if (high_non_zero < 0x2)
+                        significant_bits--;
+                }
+
                 if (*h == '.') {
 #ifdef HEXFP_NV
-                    NV mult = 1 / 16.0;
+                    NV nv_mult = 1.0;
 #endif
+                    bool accumulate = TRUE;
                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
                         if (isXDIGIT(*h)) {
                             U8 b = XDIGIT_VALUE(*h);
-                            total_bits += shift;
-                            if (total_bits < NV_MANT_DIG) {
+                            significant_bits += shift;
 #ifdef HEXFP_UQUAD
-                                hexfp_uquad <<= shift;
-                                hexfp_uquad |= b;
-                                hexfp_frac_bits += shift;
-#else /* HEXFP_NV */
-                                hexfp_nv += b * mult;
-                                mult /= 16.0;
-#endif
-                            } else if (total_bits - shift < NV_MANT_DIG) {
-                                /* A hexdigit straddling the edge of
-                                 * mantissa.  We can try grabbing as
-                                 * many as possible bits. */
-                                int shift2 = 0;
-                                if (b & 1) {
-                                    shift2 = 4;
-                                } else if (b & 2) {
-                                    shift2 = 3;
-                                    total_bits--;
-                                } else if (b & 4) {
-                                    shift2 = 2;
-                                    total_bits -= 2;
-                                } else if (b & 8) {
-                                    shift2 = 1;
-                                    total_bits -= 3;
+                            if (accumulate) {
+                                if (significant_bits < NV_MANT_DIG) {
+                                    /* We are in the long "run" of xdigits,
+                                     * accumulate the full four bits. */
+                                    hexfp_uquad <<= shift;
+                                    hexfp_uquad |= b;
+                                    hexfp_frac_bits += shift;
+                                } else {
+                                    /* We are at a hexdigit either at,
+                                     * or straddling, the edge of mantissa.
+                                     * We will try grabbing as many as
+                                     * possible bits. */
+                                    int tail =
+                                      significant_bits - NV_MANT_DIG;
+                                    if (tail <= 0)
+                                       tail += shift;
+                                    hexfp_uquad <<= tail;
+                                    hexfp_uquad |= b >> (shift - tail);
+                                    hexfp_frac_bits += tail;
+
+                                    /* Ignore the trailing zero bits
+                                     * of the last non-zero xdigit.
+                                     *
+                                     * The assumption here is that if
+                                     * one has input of e.g. the xdigit
+                                     * eight (0x8), there is only one
+                                     * bit being input, not the full
+                                     * four bits.  Conversely, if one
+                                     * specifies a zero xdigit, the
+                                     * assumption is that one really
+                                     * wants all those bits to be zero. */
+                                    if (b) {
+                                        if ((b & 0x1) == 0x0) {
+                                            significant_bits--;
+                                            if ((b & 0x2) == 0x0) {
+                                                significant_bits--;
+                                                if ((b & 0x4) == 0x0) {
+                                                    significant_bits--;
+                                                }
+                                            }
+                                        }
+                                    }
+
+                                    accumulate = FALSE;
                                 }
-#ifdef HEXFP_UQUAD
-                                hexfp_uquad <<= shift2;
-                                hexfp_uquad |= b;
-                                hexfp_frac_bits += shift2;
+                            } else {
+                                /* Keep skipping the xdigits, and
+                                 * accumulating the significant bits,
+                                 * but do not shift the uquad
+                                 * (which would catastrophically drop
+                                 * high-order bits) or accumulate the
+                                 * xdigits anymore. */
+                            }
 #else /* HEXFP_NV */
-                                PERL_UNUSED_VAR(shift2);
-                                hexfp_nv += b * mult;
-                                mult /= 16.0;
-#endif
+                            if (accumulate) {
+                                nv_mult /= 16.0;
+                                if (nv_mult > 0.0)
+                                    hexfp_nv += b * nv_mult;
+                                else
+                                    accumulate = FALSE;
                             }
+#endif
                         }
+                        if (significant_bits >= NV_MANT_DIG)
+                            accumulate = FALSE;
                     }
                 }
 
-                if (total_bits >= 4) {
-                    if (high_non_zero < 0x8)
-                        total_bits--;
-                    if (high_non_zero < 0x4)
-                        total_bits--;
-                    if (high_non_zero < 0x2)
-                        total_bits--;
-                }
-
-                if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
+                if ((total_bits > 0 || significant_bits > 0) &&
+                    isALPHA_FOLD_EQ(*h, 'p')) {
                     bool negexp = FALSE;
                     h++;
                     if (*h == '+')
@@ -10789,7 +10828,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    *d = '\0';
             if (UNLIKELY(hexfp)) {
 #  ifdef NV_MANT_DIG
-                if (total_bits > NV_MANT_DIG)
+                if (significant_bits > NV_MANT_DIG)
                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                    "Hexadecimal float: mantissa overflow");
 #  endif
-- 
1.8.3.1