From: Jarkko Hietaniemi <jhi@iki.fi>
Date: Sun, 24 Aug 2014 02:49:04 +0000 (-0400)
Subject: More robust inf/nan recognition and generation.
X-Git-Tag: v5.21.4~562
X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/8c12dc63d1f47b22b69bbcfe5c21dcadb14c5397?hp=b2bea0a9cc3975a6d0df1ebfa82cd24df518b9b0

More robust inf/nan recognition and generation.

Drop INFNAN_PEEK, premature optimization and hard to get right (it
basically imitates unrolled first half of grok_infnan).  Just keep
grok_infan fast.  (There is one spot in grok_number_flags() where we
peek at the next byte to avoid wasted work.)

If falling back (from not having NV_INF/NV_NAN) to the native strtod
(or similar), fake the input based on the grok_infnan result.
Add last-resort ways to generate inf/nan.

Recognize explicit unary plus, like "+Inf", and "INFINITE".

In tests use cmp_ok(), fix typos, add tests.
---

diff --git a/numeric.c b/numeric.c
index daaec06..355980a 100644
--- a/numeric.c
+++ b/numeric.c
@@ -586,16 +586,6 @@ Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
     return grok_number_flags(pv, len, valuep, 0);
 }
 
-/* Peek ahead to see whether this could be Inf/NaN/qNaN/snan/1.#INF */
-#define INFNAN_PEEK(s, send) \
-    (s < send && \
-     ((isALPHA_FOLD_EQ(*s, 'I') || isALPHA_FOLD_EQ(*s, 'N')) ||  \
-      ((s + 4) < send &&                                         \
-       (isALPHA_FOLD_EQ(*s, 'Q') || isALPHA_FOLD_EQ(*s, 'S')) && \
-       isALPHA_FOLD_EQ(s[1], 'N')) || \
-      ((s + 5) < send &&                                                \
-       (*s == '1' && ((s[1] == '.' && s[2] == '#') || s[1] == '#')))))
-
 /*
 =for apidoc grok_infnan
 
@@ -623,7 +613,10 @@ Perl_grok_infnan(const char** sp, const char* send)
 
     PERL_ARGS_ASSERT_GROK_INFNAN;
 
-    if (*s == '-') {
+    if (*s == '+') {
+        s++; if (s == send) return 0;
+    }
+    else if (*s == '-') {
         flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
         s++; if (s == send) return 0;
     }
@@ -650,8 +643,11 @@ Perl_grok_infnan(const char** sp, const char* send)
                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return 0;
                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return 0;
-                /* XXX maybe also grok "infinite"? */
-                s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return 0;
+                s++; if (s == send ||
+                         /* allow either Infinity or Infinite */
+                         (isALPHA_FOLD_NE(*s, 'Y') &&
+                          isALPHA_FOLD_NE(*s, 'E')))
+                         return 0;
                 s++;
             } else if (*s)
                 return 0;
@@ -681,10 +677,11 @@ Perl_grok_infnan(const char** sp, const char* send)
 
             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 
-            /* NaN can be followed by various stuff since there are
-             * multiple different NaN values, and some implementations
-             * output the "payload" values, e.g. NaN123, NAN(abc),
-             * some implementation just have weird stuff like NaN%. */
+            /* NaN can be followed by various stuff (NaNQ, NaNS), but
+             * there are also multiple different NaN values, and some
+             * implementations output the "payload" values,
+             * e.g. NaN123, NAN(abc), while some implementations just
+             * have weird stuff like NaN%. */
             s = send;
         }
         else
@@ -707,8 +704,6 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
   const char * const send = pv + len;
   const char *d;
   int numtype = 0;
-  int sawinf = 0;
-  int sawnan = 0;
 
   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
 
@@ -727,10 +722,10 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
     return 0;
 
   /* The first digit (after optional sign): note that might
-   * also point to "infinity" or "nan". */
+   * also point to "infinity" or "nan", or "1.#INF". */
   d = s;
 
-  /* next must be digit or the radix separator or beginning of infinity */
+  /* next must be digit or the radix separator or beginning of infinity/nan */
   if (isDIGIT(*s)) {
     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
        overflow.  */
@@ -841,30 +836,8 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
     else
         return 0;
   }
-  else {
-      if (INFNAN_PEEK(d, send)) {
-          int infnan = Perl_grok_infnan(&d, send);
-          if ((infnan & IS_NUMBER_INFINITY)) {
-              numtype |= infnan;
-              sawinf = 1;
-          }
-          else if ((infnan & IS_NUMBER_NAN)) {
-              numtype |= infnan;
-              sawnan = 1;
-          }
-          else
-              return 0;
-          s = d;
-      }
-  }
 
-  if (sawinf) {
-    /* Keep the sign for infinity. */
-    numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
-  } else if (sawnan) {
-    numtype &= IS_NUMBER_NEG; /* Clear sign for nan.  */
-    numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
-  } else if (s < send) {
+  if (s < send) {
     /* we can have an optional exponent part */
     if (isALPHA_FOLD_EQ(*s, 'e')) {
       s++;
@@ -894,6 +867,18 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
       *valuep = 0;
     return IS_NUMBER_IN_UV;
   }
+  /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
+  if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
+      /* Really detect inf/nan. Start at d, not s, since the above
+       * code might have already consumed the "1." or "1". */
+      int infnan = Perl_grok_infnan(&d, send);
+      if ((infnan & IS_NUMBER_INFINITY)) {
+          return (numtype | infnan); /* Keep sign for infinity. */
+      }
+      else if ((infnan & IS_NUMBER_NAN)) {
+          return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
+      }
+  }
   else if (flags & PERL_SCAN_TRAILING) {
     return numtype | IS_NUMBER_TRAILING;
   }
@@ -1174,30 +1159,68 @@ Perl_my_atof2(pTHX_ const char* orig, NV* value)
     {
         const char *p0 = negative ? s - 1 : s;
         const char *p = p0;
-#if defined(NV_INF) && defined(NV_NAN)
-        int infnan_flags = grok_infnan(&p, send);
-        if (infnan_flags && p != p0) {
-            if ((infnan_flags & IS_NUMBER_INFINITY)) {
-                *value = (infnan_flags & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
+        int infnan = grok_infnan(&p, send);
+        if (infnan && p != p0) {
+            /* If we can generate inf/nan directly, let's do so. */
+#ifdef NV_INF
+            if ((infnan & IS_NUMBER_INFINITY)) {
+                *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
                 return (char*)p;
             }
-            else if ((infnan_flags & IS_NUMBER_NAN)) {
+#endif
+#ifdef NV_NAN
+            if ((infnan & IS_NUMBER_NAN)) {
                 *value = NV_NAN;
                 return (char*)p;
             }
-        }
-#elif defined(HAS_STRTOD)
-        if (INFNAN_PEEK(s, send)) {
-            /* The native strtod() may not get all the possible
-             * inf/nan strings INFNAN_PEEK() recognizes. */
-            char* endp;
-            NV nv = Perl_strtod(p, &endp);
-            if (p != endp) {
-                *value = nv;
-                return endp;
+#endif
+#ifdef Perl_strtod
+            /* If still here, we didn't have either NV_INF or INV_NAN,
+             * and can try falling back to native strtod/strtold.
+             *
+             * The native interface might not recognize all the possible
+             * inf/nan strings Perl recognizes.  What we can try
+             * is to try faking the input.  We will try inf/-inf/nan
+             * as the most promising/portable input. */
+            {
+                const char* fake = NULL;
+                char* endp;
+                NV nv;
+                if ((infnan & IS_NUMBER_INFINITY)) {
+                    fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
+                }
+                else if ((infnan & IS_NUMBER_NAN)) {
+                    fake = "nan";
+                }
+                assert(fake);
+                nv = Perl_strtod(fake, &endp);
+                if (fake != endp) {
+                    if ((infnan & IS_NUMBER_INFINITY)) {
+#ifdef Perl_isinf
+                        if (Perl_isinf(nv))
+                            *value = nv;
+#else
+                        /* last resort, may generate SIGFPE */
+                        *value = Perl_exp((NV)1e9);
+                        if ((infnan & IS_NUMBER_NEG))
+                            *value = -*value;
+#endif
+                        return (char*)p; /* p, not endp */
+                    }
+                    else if ((infnan & IS_NUMBER_NAN)) {
+#ifdef Perl_isnan
+                        if (Perl_isnan(nv))
+                            *value = nv;
+#else
+                        /* last resort, may generate SIGFPE */
+                        *value = Perl_log((NV)-1.0);
+#endif
+                        return (char*)p; /* p, not endp */
+                    }
+                }
             }
+#endif /* #ifdef Perl_strtod */
         }
-#endif
     }
 
     /* we accumulate digits into an integer; when this becomes too
diff --git a/t/op/infnan.t b/t/op/infnan.t
index dccd888..acd7a34 100644
--- a/t/op/infnan.t
+++ b/t/op/infnan.t
@@ -1,4 +1,4 @@
-#!./perl
+#!./perl -w
 
 BEGIN {
     chdir 't' if -d 't';
@@ -6,21 +6,25 @@ BEGIN {
     require './test.pl';
 }
 
+use strict;
+
 my $PInf = "Inf"  + 0;
 my $NInf = "-Inf" + 0;
 my $NaN  = "NaN"  + 0;
 
-my @PInf = ("Inf", "inf", "INF", "Infinity", "INFINITY",
+my @PInf = ("Inf", "inf", "INF", "+Inf",
+            "Infinity", "INFINITE",
             "1.#INF", "1#INF");
-my @NInf = map { "-$_" } @PInf;
+my @NInf = map { "-$_" } grep { ! /^\+/ } @PInf;
 
 my @NaN = ("NAN", "nan", "qnan", "SNAN", "NanQ", "NANS",
-           "1.#QNAN", "1#SNAN", "1.#NAN", "1#IND",
+           "1.#QNAN", "+1#SNAN", "-1.#NAN", "1#IND",
            "NaN123", "NAN(123)", "nan%",
            "nanonano"); # RIP, Robin Williams.
 
-my $inf_tests = 6 + 6 * @PInf + 5;
-my $nan_tests = 5 + 2 * @NaN + 3;
+my $inf_tests = 9 + 3 * @PInf + 3 * @NInf + 5;
+my $nan_tests = 7 + 2 * @NaN + 3;
+
 my $infnan_tests = 4;
 
 plan tests => $inf_tests + $nan_tests + $infnan_tests;
@@ -29,30 +33,34 @@ my $has_inf;
 my $has_nan;
 
 SKIP: {
-  if ($PInf == 1 && $NINf == 1) {
+  if ($PInf == 1 && $NInf == 1) {
     skip $inf_tests, "no infinity found";
   }
 
   $has_inf = 1;
 
-  ok($PInf > 0, "positive infinity");
-  ok($NInf < 0, "negative infinity");
+  cmp_ok($PInf, '>', 0, "positive infinity");
+  cmp_ok($NInf, '<', 0, "negative infinity");
+
+  cmp_ok($PInf, '>', $NInf, "positive > negative");
+  cmp_ok($NInf, '==', -$PInf, "negative == -positive");
+  cmp_ok(-$NInf, '==', $PInf, "--negative == positive");
 
   is($PInf,  "Inf", "$PInf value stringifies as Inf");
-  is($NInf, "-Inf", "$PInf value stringifies as -Inf");
+  is($NInf, "-Inf", "$NInf value stringifies as -Inf");
 
   is(sprintf("%g", $PInf), "Inf", "$PInf sprintf %g is Inf");
   is(sprintf("%a", $PInf), "Inf", "$PInf sprintf %a is Inf");
 
   for my $i (@PInf) {
-    is($i + 0, $PInf, "$i is +Inf");
-    ok($i > 0, "$i is positive");
+    cmp_ok($i + 0 , '==', $PInf, "$i is +Inf");
+    cmp_ok($i, '>', 0, "$i is positive");
     is("@{[$i+0]}", "Inf", "$i value stringifies as Inf");
   }
 
   for my $i (@NInf) {
-    is($i + 0, $NInf, "$i is -Inf");
-    ok($i < 0, "$i is negative");
+    cmp_ok($i + 0, '==', $NInf, "$i is -Inf");
+    cmp_ok($i, '<', 0, "$i is negative");
     is("@{[$i+0]}", "-Inf", "$i value stringifies as -Inf");
   }
 
@@ -62,7 +70,7 @@ SKIP: {
   is(1/$PInf, 0, "one per +Inf is zero");
   is(1/$NInf, 0, "one per -Inf is zero");
 
-  is(9**9**9, $PInf, "9**9**9 is +Inf");
+  is(9**9**9, $PInf, "9**9**9 is Inf");
 }
 
 SKIP: {
@@ -72,20 +80,23 @@ SKIP: {
 
   $has_nan = 1;
 
-  ok($NaN != $NaN, "nan is not nan numerically");
-  ok($NaN eq $NaN, "nan is nan stringifically");
+  cmp_ok($NaN, '!=', $NaN, "NaN is NaN numerically (by not being NaN)");
+  ok($NaN eq $NaN, "NaN is NaN stringifically");
 
   is("$NaN", "NaN", "$NaN value stringies as NaN");
 
+  is("+NaN" + 0, "NaN", "+NaN is NaN");
+  is("-NaN" + 0, "NaN", "-NaN is NaN");
+
   is(sprintf("%g", $NaN), "NaN", "$NaN sprintf %g is NaN");
-  is(sprintf("%a", $NaN), "NaN", "$NaN sprintf %a is Inf");
+  is(sprintf("%a", $NaN), "NaN", "$NaN sprintf %a is NaN");
 
   for my $i (@NaN) {
-    cmp_ok($i + 0, '!=', $i + 0, "$i is nan");
+    cmp_ok($i + 0, '!=', $i + 0, "$i is NaN numerically (by not being NaN)");
     is("@{[$i+0]}", "NaN", "$i value stringifies as NaN");
   }
 
-  # is() okay with $NaN because eq is used.
+  # is() okay with $NaN because it uses eq.
   is($NaN * 0, $NaN, "NaN times zero is NaN");
   is($NaN * 2, $NaN, "NaN times two is NaN");
 
@@ -94,12 +105,12 @@ SKIP: {
 
 SKIP: {
   unless ($has_inf && $has_nan) {
-    skip $infnan_tests, "no both inf and nan";
+    skip $infnan_tests, "no both Inf and Nan";
   }
 
-  # is() okay with $NaN because eq is used.
-  is($PInf * 0,     $NaN, "inf times zero is nan");
-  is($PInf * $NaN,  $NaN, "inf times nan is nan");
-  is($PInf + $NaN,  $NaN, "inf plus nan is nan");
-  is($PInf - $PInf, $NaN, "inf minus inf is nan");
+  # is() okay with $NaN because it uses eq.
+  is($PInf * 0,     $NaN, "Inf times zero is NaN");
+  is($PInf * $NaN,  $NaN, "Inf times NaN is NaN");
+  is($PInf + $NaN,  $NaN, "Inf plus NaN is NaN");
+  is($PInf - $PInf, $NaN, "Inf minus inf is NaN");
 }