X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/02fd9d541c593812f3e152dc406cb4399353d5dc..d43c116b2ae74ec1f3ed78829d4f3ca76f091390:/numeric.c

diff --git a/numeric.c b/numeric.c
index 5d2fc5c..52c4547 100644
--- a/numeric.c
+++ b/numeric.c
@@ -16,9 +16,6 @@
  */
 
 /*
-=head1 Numeric functions
-
-=cut
 
 This file contains all the stuff needed by perl for manipulating numeric
 values, including such things as replacements for the OS's atof() function
@@ -90,14 +87,14 @@ It properly handles the locale radix character, meaning it expects a dot except
 when called from within the scope of S<C<use locale>>, in which case the radix
 character should be that specified by the current locale.
 
-The synonym Strod() may be used instead.
+The synonym Strtod() may be used instead.
 
 =cut
 
 */
 
 NV
-my_strtod(const char * const s, char **e)
+Perl_my_strtod(const char * const s, char **e)
 {
     dTHX;
 
@@ -208,24 +205,31 @@ Perl_cast_uv(NV f)
 
 converts a string representing a binary number to numeric form.
 
-On entry C<start> and C<*len> give the string to scan, C<*flags> gives
-conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
-invalid character will also trigger a warning.
-On return C<*len> is set to the length of the scanned string,
-and C<*flags> gives output flags.
+On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV.  The
+scan stops at the end of the string, or at just before the first invalid
+character.  Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning.  On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
 
 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_bin>
 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to C<*result> (or the value is discarded if C<result>
-is NULL).
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
 
 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
-number may use C<"_"> characters to separate digits.
+C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
+
+=for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
+=for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
+=for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
+=for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
 
 =cut
 
@@ -237,93 +241,9 @@ on this platform.
 UV
 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 {
-    const char *s = start;
-    STRLEN len = *len_p;
-    UV value = 0;
-    NV value_nv = 0;
-
-    const UV max_div_2 = UV_MAX / 2;
-    const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
-    bool overflowed = FALSE;
-    char bit;
-
     PERL_ARGS_ASSERT_GROK_BIN;
 
-    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
-        /* strip off leading b or 0b.
-           for compatibility silently suffer "b" and "0b" as valid binary
-           numbers. */
-        if (len >= 1) {
-            if (isALPHA_FOLD_EQ(s[0], 'b')) {
-                s++;
-                len--;
-            }
-            else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) {
-                s+=2;
-                len-=2;
-            }
-        }
-    }
-
-    for (; len-- && (bit = *s); s++) {
-        if (bit == '0' || bit == '1') {
-            /* Write it in this wonky order with a goto to attempt to get the
-               compiler to make the common case integer-only loop pretty tight.
-               With gcc seems to be much straighter code than old scan_bin.  */
-          redo:
-            if (!overflowed) {
-                if (value <= max_div_2) {
-                    value = (value << 1) | (bit - '0');
-                    continue;
-                }
-                /* Bah. We're just overflowed.  */
-		/* diag_listed_as: Integer overflow in %s number */
-		Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
-				 "Integer overflow in binary number");
-                overflowed = TRUE;
-                value_nv = (NV) value;
-            }
-            value_nv *= 2.0;
-	    /* If an NV has not enough bits in its mantissa to
-	     * represent a UV this summing of small low-order numbers
-	     * is a waste of time (because the NV cannot preserve
-	     * the low-order bits anyway): we could just remember when
-	     * did we overflow and in the end just multiply value_nv by the
-	     * right amount. */
-            value_nv += (NV)(bit - '0');
-            continue;
-        }
-        if (bit == '_' && len && allow_underscores && (bit = s[1])
-            && (bit == '0' || bit == '1'))
-	    {
-		--len;
-		++s;
-                goto redo;
-	    }
-        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
-            Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
-			   "Illegal binary digit '%c' ignored", *s);
-        break;
-    }
-
-    if (   ( overflowed && value_nv > 4294967295.0)
-#if UVSIZE > 4
-	|| (!overflowed && value > 0xffffffff
-	    && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
-#endif
-	) {
-	Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
-		       "Binary number > 0b11111111111111111111111111111111 non-portable");
-    }
-    *len_p = s - start;
-    if (!overflowed) {
-        *flags = 0;
-        return value;
-    }
-    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
-    if (result)
-        *result = value_nv;
-    return UV_MAX;
+    return grok_bin(start, len_p, flags, result);
 }
 
 /*
@@ -332,119 +252,39 @@ Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 converts a string representing a hex number to numeric form.
 
 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
-conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
-invalid character will also trigger a warning.
-On return C<*len> is set to the length of the scanned string,
-and C<*flags> gives output flags.
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV.  The
+scan stops at the end of the string, or at just before the first invalid
+character.  Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning.  On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
 
 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_hex>
 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to C<*result> (or the value is discarded if C<result>
-is C<NULL>).
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
 
 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
-number may use C<"_"> characters to separate digits.
+C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
 
 =cut
 
-Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
+Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
 which suppresses any message for non-portable numbers, but which are valid
-on this platform.
+on this platform.  But, C<*flags>  will have the corresponding flag bit set.
  */
 
 UV
 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 {
-    const char *s = start;
-    STRLEN len = *len_p;
-    UV value = 0;
-    NV value_nv = 0;
-    const UV max_div_16 = UV_MAX / 16;
-    const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
-    bool overflowed = FALSE;
-
     PERL_ARGS_ASSERT_GROK_HEX;
 
-    if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
-        /* strip off leading x or 0x.
-           for compatibility silently suffer "x" and "0x" as valid hex numbers.
-        */
-        if (len >= 1) {
-            if (isALPHA_FOLD_EQ(s[0], 'x')) {
-                s++;
-                len--;
-            }
-            else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) {
-                s+=2;
-                len-=2;
-            }
-        }
-    }
-
-    for (; len-- && *s; s++) {
-        if (isXDIGIT(*s)) {
-            /* Write it in this wonky order with a goto to attempt to get the
-               compiler to make the common case integer-only loop pretty tight.
-               With gcc seems to be much straighter code than old scan_hex.  */
-          redo:
-            if (!overflowed) {
-                if (value <= max_div_16) {
-                    value = (value << 4) | XDIGIT_VALUE(*s);
-                    continue;
-                }
-                /* Bah. We're just overflowed.  */
-		/* diag_listed_as: Integer overflow in %s number */
-		Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
-				 "Integer overflow in hexadecimal number");
-                overflowed = TRUE;
-                value_nv = (NV) value;
-            }
-            value_nv *= 16.0;
-	    /* If an NV has not enough bits in its mantissa to
-	     * represent a UV this summing of small low-order numbers
-	     * is a waste of time (because the NV cannot preserve
-	     * the low-order bits anyway): we could just remember when
-	     * did we overflow and in the end just multiply value_nv by the
-	     * right amount of 16-tuples. */
-            value_nv += (NV) XDIGIT_VALUE(*s);
-            continue;
-        }
-        if (*s == '_' && len && allow_underscores && s[1]
-		&& isXDIGIT(s[1]))
-	    {
-		--len;
-		++s;
-                goto redo;
-	    }
-        if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
-            Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
-                        "Illegal hexadecimal digit '%c' ignored", *s);
-        break;
-    }
-
-    if (   ( overflowed && value_nv > 4294967295.0)
-#if UVSIZE > 4
-	|| (!overflowed && value > 0xffffffff
-	    && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
-#endif
-	) {
-	Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
-		       "Hexadecimal number > 0xffffffff non-portable");
-    }
-    *len_p = s - start;
-    if (!overflowed) {
-        *flags = 0;
-        return value;
-    }
-    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
-    if (result)
-        *result = value_nv;
-    return UV_MAX;
+    return grok_hex(start, len_p, flags, result);
 }
 
 /*
@@ -452,22 +292,26 @@ Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 
 converts a string representing an octal number to numeric form.
 
-On entry C<start> and C<*len> give the string to scan, C<*flags> gives
-conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
-8 or 9 will also trigger a warning.
-On return C<*len> is set to the length of the scanned string,
-and C<*flags> gives output flags.
+On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV.  The
+scan stops at the end of the string, or at just before the first invalid
+character.  Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning.  On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
 
 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_oct>
 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to C<*result> (or the value is discarded if C<result>
-is C<NULL>).
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
 
-If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
-number may use C<"_"> characters to separate digits.
+The C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for
+this function.
 
 =cut
 
@@ -479,76 +323,277 @@ on this platform.
 UV
 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 {
-    const char *s = start;
+    PERL_ARGS_ASSERT_GROK_OCT;
+
+    return grok_oct(start, len_p, flags, result);
+}
+
+STATIC void
+S_output_non_portable(pTHX_ const U8 base)
+{
+    /* Display the proper message for a number in the given input base not
+     * fitting in 32 bits */
+    const char * which = (base == 2)
+                      ? "Binary number > 0b11111111111111111111111111111111"
+                      : (base == 8)
+                        ? "Octal number > 037777777777"
+                        : "Hexadecimal number > 0xffffffff";
+
+    PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
+
+    /* Also there are listings for the other two.  That's because, since they
+     * are the first word, it would be hard for a user to find them there
+     * starting with a %s */
+    /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
+    Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
+}
+
+UV
+Perl_grok_bin_oct_hex(pTHX_ const char *start,
+                        STRLEN *len_p,
+                        I32 *flags,
+                        NV *result,
+                        const unsigned shift, /* 1 for binary; 3 for octal;
+                                                 4 for hex */
+                        const U8 class_bit,
+                        const char prefix
+                     )
+
+{
+    const char *s0 = start;
+    const char *s;
     STRLEN len = *len_p;
+    STRLEN bytes_so_far;    /* How many real digits have been processed */
     UV value = 0;
     NV value_nv = 0;
-    const UV max_div_8 = UV_MAX / 8;
-    const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
+    const PERL_UINT_FAST8_T base = 1 << shift;  /* 2, 8, or 16 */
+    const UV max_div= UV_MAX / base;    /* Value above which, the next digit
+                                           processed would overflow */
+    const I32 input_flags = *flags;
+    const bool allow_underscores =
+                                cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
     bool overflowed = FALSE;
 
-    PERL_ARGS_ASSERT_GROK_OCT;
+    /* In overflows, this keeps track of how much to multiply the overflowed NV
+     * by as we continue to parse the remaining digits */
+    NV factor = 0;
+
+    /* This function unifies the core of grok_bin, grok_oct, and grok_hex.  It
+     * is optimized for hex conversion.  For example, it uses XDIGIT_VALUE to
+     * find the numeric value of a digit.  That requires more instructions than
+     * OCTAL_VALUE would, but gives the same result for the narrowed range of
+     * octal digits; same for binary.  If it were ever critical to squeeze more
+     * performance from this, the function could become grok_hex, and a regen
+     * perl script could scan it and write out two edited copies for the other
+     * two functions.  That would improve the performance of all three
+     * somewhat.  Besides eliminating XDIGIT_VALUE for the other two, extra
+     * parameters are now passed to this to avoid conditionals.  Those could
+     * become declared consts, like:
+     *      const U8 base = 16;
+     *      const U8 base = 8;
+     *      ...
+     */
+
+    PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
+
+    ASSUME(inRANGE(shift, 1, 4) && shift != 2);
+
+    /* Clear output flags; unlikely to find a problem that sets them */
+    *flags = 0;
+
+    if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
+
+        /* strip off leading b or 0b; x or 0x.
+           for compatibility silently suffer "b" and "0b" as valid binary; "x"
+           and "0x" as valid hex numbers. */
+        if (len >= 1) {
+            if (isALPHA_FOLD_EQ(s0[0], prefix)) {
+                s0++;
+                len--;
+            }
+            else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
+                s0+=2;
+                len-=2;
+            }
+        }
+    }
+
+    s = s0; /* s0 potentially advanced from 'start' */
 
-    for (; len-- && *s; s++) {
-        if (isOCTAL(*s)) {
+    /* Unroll the loop so that the first 8 digits are branchless except for the
+     * switch.  A ninth hex one overflows a 32 bit word. */
+    switch (len) {
+      case 0:
+          return 0;
+      default:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+          s++;
+          /* FALLTHROUGH */
+      case 7:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+          s++;
+          /* FALLTHROUGH */
+      case 6:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+          s++;
+          /* FALLTHROUGH */
+      case 5:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+          s++;
+          /* FALLTHROUGH */
+      case 4:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+          s++;
+          /* FALLTHROUGH */
+      case 3:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+          s++;
+          /* FALLTHROUGH */
+      case 2:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+          s++;
+          /* FALLTHROUGH */
+      case 1:
+          if (UNLIKELY(! _generic_isCC(*s, class_bit)))  break;
+          value = (value << shift) | XDIGIT_VALUE(*s);
+
+          if (LIKELY(len <= 8)) {
+              return value;
+          }
+
+          s++;
+          break;
+    }
+
+    bytes_so_far = s - s0;
+    factor = shift << bytes_so_far;
+    len -= bytes_so_far;
+
+    for (; len--; s++) {
+        if (_generic_isCC(*s, class_bit)) {
             /* Write it in this wonky order with a goto to attempt to get the
                compiler to make the common case integer-only loop pretty tight.
-            */
+               With gcc seems to be much straighter code than old scan_hex.
+               (khw suspects that adding a LIKELY() just above would do the
+               same thing) */
           redo:
-            if (!overflowed) {
-                if (value <= max_div_8) {
-                    value = (value << 3) | OCTAL_VALUE(*s);
-                    continue;
-                }
-                /* Bah. We're just overflowed.  */
-		/* diag_listed_as: Integer overflow in %s number */
-		Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
-			       "Integer overflow in octal number");
+            if (LIKELY(value <= max_div)) {
+                value = (value << shift) | XDIGIT_VALUE(*s);
+                    /* Note XDIGIT_VALUE() is branchless, works on binary
+                     * and octal as well, so can be used here, without
+                     * slowing those down */
+                factor *= 1 << shift;
+                continue;
+            }
+
+            /* Bah. We are about to overflow.  Instead, add the unoverflowed
+             * value to an NV that contains an approximation to the correct
+             * value.  Each time through the loop we have increased 'factor' so
+             * that it gives how much the current approximation needs to
+             * effectively be shifted to make room for this new value */
+            value_nv *= factor;
+            value_nv += (NV) value;
+
+            /* Then we keep accumulating digits, until all are parsed.  We
+             * start over using the current input value.  This will be added to
+             * 'value_nv' eventually, either when all digits are gone, or we
+             * have overflowed this fresh start. */
+            value = XDIGIT_VALUE(*s);
+            factor = 1 << shift;
+
+            if (! overflowed) {
                 overflowed = TRUE;
-                value_nv = (NV) value;
+                if (   ! (input_flags & PERL_SCAN_SILENT_OVERFLOW)
+                    &&    ckWARN_d(WARN_OVERFLOW))
+                {
+                    Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
+                                       "Integer overflow in %s number",
+                                       (base == 16) ? "hexadecimal"
+                                                    : (base == 2)
+                                                      ? "binary"
+                                                      : "octal");
+                }
             }
-            value_nv *= 8.0;
-	    /* If an NV has not enough bits in its mantissa to
-	     * represent a UV this summing of small low-order numbers
-	     * is a waste of time (because the NV cannot preserve
-	     * the low-order bits anyway): we could just remember when
-	     * did we overflow and in the end just multiply value_nv by the
-	     * right amount of 8-tuples. */
-            value_nv += (NV) OCTAL_VALUE(*s);
             continue;
         }
-        if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
+
+        if (   *s == '_'
+            && len
+            && allow_underscores
+            && _generic_isCC(s[1], class_bit)
+
+                /* Don't allow a leading underscore if the only-medial bit is
+                 * set */
+            && (   LIKELY(s > s0)
+                || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
+                                        != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
+        {
             --len;
             ++s;
             goto redo;
         }
-        /* Allow \octal to work the DWIM way (that is, stop scanning
-         * as soon as non-octal characters are seen, complain only if
-         * someone seems to want to use the digits eight and nine.  Since we
-         * know it is not octal, then if isDIGIT, must be an 8 or 9). */
-        if (isDIGIT(*s)) {
-            if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
-                Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
-			       "Illegal octal digit '%c' ignored", *s);
+
+        if (*s) {
+            if (   ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
+                &&    ckWARN(WARN_DIGIT))
+            {
+                if (base != 8) {
+                    Perl_warner(aTHX_ packWARN(WARN_DIGIT),
+                                           "Illegal %s digit '%c' ignored",
+                                           ((base == 2)
+                                            ? "binary"
+                                              : "hexadecimal"),
+                                            *s);
+                }
+                else if (isDIGIT(*s)) { /* octal base */
+
+                    /* Allow \octal to work the DWIM way (that is, stop
+                     * scanning as soon as non-octal characters are seen,
+                     * complain only if someone seems to want to use the digits
+                     * eight and nine.  Since we know it is not octal, then if
+                     * isDIGIT, must be an 8 or 9). */
+                    Perl_warner(aTHX_ packWARN(WARN_DIGIT),
+                                       "Illegal octal digit '%c' ignored", *s);
+                }
+            }
+
+            if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) {
+                *flags |= PERL_SCAN_NOTIFY_ILLDIGIT;
+            }
         }
+
         break;
     }
 
-    if (   ( overflowed && value_nv > 4294967295.0)
+    *len_p = s - start;
+
+    if (LIKELY(! overflowed)) {
 #if UVSIZE > 4
-	|| (!overflowed && value > 0xffffffff
-	    && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
+        if (      UNLIKELY(value > 0xffffffff)
+            && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
+        {
+            output_non_portable(base);
+            *flags |= PERL_SCAN_SILENT_NON_PORTABLE;
+        }
 #endif
-	) {
-	Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
-		       "Octal number > 037777777777 non-portable");
-    }
-    *len_p = s - start;
-    if (!overflowed) {
-        *flags = 0;
         return value;
     }
-    *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
+
+    /* Overflowed: Calculate the final overflow approximation */
+    value_nv *= factor;
+    value_nv += (NV) value;
+
+    output_non_portable(base);
+
+    *flags |= PERL_SCAN_GREATER_THAN_UV_MAX
+           |  PERL_SCAN_SILENT_NON_PORTABLE;
     if (result)
         *result = value_nv;
     return UV_MAX;
@@ -678,6 +723,13 @@ If an infinity or a not-a-number is recognized, C<*sp> will point to
 one byte past the end of the recognized string.  If the recognition fails,
 zero is returned, and C<*sp> will not move.
 
+=for apidoc Amnh|bool|IS_NUMBER_GREATER_THAN_UV_MAX
+=for apidoc Amnh|bool|IS_NUMBER_INFINITY
+=for apidoc Amnh|bool|IS_NUMBER_IN_UV
+=for apidoc Amnh|bool|IS_NUMBER_NAN
+=for apidoc Amnh|bool|IS_NUMBER_NEG
+=for apidoc Amnh|bool|IS_NUMBER_NOT_INT
+
 =cut
 */
 
@@ -729,7 +781,7 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
                 s++;
             } else if (odh) {
-                while (*s == '0') { /* 1.#INF00 */
+                while (s < send && *s == '0') { /* 1.#INF00 */
                     s++;
                 }
             }
@@ -743,10 +795,10 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
             s++;
             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
-            while (*s == '0') { /* 1.#IND00 */
+            while (s < send && *s == '0') { /* 1.#IND00 */
                 s++;
             }
-            if (*s) {
+            if (s < send && *s) {
                 flags |= IS_NUMBER_TRAILING;
             }
         } else
@@ -767,6 +819,9 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
             s++;
 
             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
+            if (s == send) {
+                return flags;
+            }
 
             /* NaN can be followed by various stuff (NaNQ, NaNS), but
              * there are also multiple different NaN values, and some
@@ -778,6 +833,9 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
                 /* "nanq" or "nans" are ok, though generating
                  * these portably is tricky. */
                 s++;
+                if (s == send) {
+                    return flags;
+                }
             }
             if (*s == '(') {
                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
@@ -943,6 +1001,8 @@ C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
 non-numeric text on an otherwise successful I<grok>, setting
 C<IS_NUMBER_TRAILING> on the result.
 
+=for apidoc Amnh||PERL_SCAN_TRAILING
+
 =for apidoc grok_number
 
 Identical to C<grok_number_flags()> with C<flags> set to zero.
@@ -970,109 +1030,153 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
 
   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
 
-  while (s < send && isSPACE(*s))
-    s++;
-  if (s == send) {
-    return 0;
-  } else if (*s == '-') {
-    s++;
-    numtype = IS_NUMBER_NEG;
+  if (UNLIKELY(isSPACE(*s))) {
+      s++;
+      while (s < send) {
+        if (LIKELY(! isSPACE(*s))) goto non_space;
+        s++;
+      }
+      return 0;
+    non_space: ;
   }
-  else if (*s == '+')
-    s++;
 
-  if (s == send)
-    return 0;
+  /* See if signed.  This assumes it is more likely to be unsigned, so
+   * penalizes signed by an extra conditional; rewarding unsigned by one fewer
+   * (because we detect '+' and '-' with a single test and then add a
+   * conditional to determine which) */
+  if (UNLIKELY((*s & ~('+' ^ '-')) == ('+' & '-') )) {
+
+    /* Here, on ASCII platforms, *s is one of: 0x29 = ')', 2B = '+', 2D = '-',
+     * 2F = '/'.  That is, it is either a sign, or a character that doesn't
+     * belong in a number at all (unless it's a radix character in a weird
+     * locale).  Given this, it's far more likely to be a minus than the
+     * others.  (On EBCDIC it is one of 42, 44, 46, 48, 4A, 4C, 4E,  (not 40
+     * because can't be a space)    60, 62, 64, 66, 68, 6A, 6C, 6E.  Again,
+     * only potentially a weird radix character, or 4E='+', or 60='-') */
+    if (LIKELY(*s == '-')) {
+        s++;
+        numtype = IS_NUMBER_NEG;
+    }
+    else if (LIKELY(*s == '+'))
+        s++;
+    else  /* Can't just return failure here, as it could be a weird radix
+             character */
+        goto done_sign;
+
+    if (UNLIKELY(s == send))
+        return 0;
+  done_sign: ;
+    }
 
   /* The first digit (after optional sign): note that might
    * also point to "infinity" or "nan", or "1.#INF". */
   d = s;
 
   /* next must be digit or the radix separator or beginning of infinity/nan */
-  if (isDIGIT(*s)) {
+  if (LIKELY(isDIGIT(*s))) {
     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
        overflow.  */
-    UV value = *s - '0';
-    /* This construction seems to be more optimiser friendly.
-       (without it gcc does the isDIGIT test and the *s - '0' separately)
-       With it gcc on arm is managing 6 instructions (6 cycles) per digit.
-       In theory the optimiser could deduce how far to unroll the loop
-       before checking for overflow.  */
-    if (++s < send) {
-      int digit = *s - '0';
-      if (inRANGE(digit, 0, 9)) {
+    UV value = *s - '0';    /* Process this first (perhaps only) digit */
+    int digit;
+
+    s++;
+
+    switch(send - s) {
+      default:      /* 8 or more remaining characters */
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+        value = value * 10 + digit;
+        s++;
+        /* FALLTHROUGH */
+      case 7:
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+        value = value * 10 + digit;
+        s++;
+        /* FALLTHROUGH */
+      case 6:
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
         value = value * 10 + digit;
-        if (++s < send) {
-          digit = *s - '0';
-          if (inRANGE(digit, 0, 9)) {
-            value = value * 10 + digit;
-            if (++s < send) {
-              digit = *s - '0';
-              if (inRANGE(digit, 0, 9)) {
+        s++;
+        /* FALLTHROUGH */
+      case 5:
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+        value = value * 10 + digit;
+        s++;
+        /* FALLTHROUGH */
+      case 4:
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+        value = value * 10 + digit;
+        s++;
+        /* FALLTHROUGH */
+      case 3:
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+        value = value * 10 + digit;
+        s++;
+        /* FALLTHROUGH */
+      case 2:
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+        value = value * 10 + digit;
+        s++;
+        /* FALLTHROUGH */
+      case 1:
+        digit = *s - '0';
+        if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+        value = value * 10 + digit;
+        s++;
+        /* FALLTHROUGH */
+      case 0:       /* This case means the string consists of just the one
+                       digit we already have processed */
+
+        /* If we got here by falling through other than the default: case, we
+         * have processed the whole string, and know it consists entirely of
+         * digits, and can't have overflowed. */
+        if (s >= send) {
+            if (valuep)
+              *valuep = value;
+            return numtype|IS_NUMBER_IN_UV;
+        }
+
+        /* Here, there are extra characters beyond the first 9 digits.  Use a
+         * loop to accumulate any remaining digits, until we get a non-digit or
+         * would overflow.  Note that leading zeros could cause us to get here
+         * without being close to overflowing.
+         *
+         * (The conditional 's >= send' above could be eliminated by making the
+         * default: in the switch to instead be 'case 8:', and process longer
+         * strings separately by using the loop below.  This would penalize
+         * these inputs by the extra instructions needed for looping.  That
+         * could be eliminated by copying the unwound code from above to handle
+         * the firt 9 digits of these.  khw didn't think this saving of a
+         * single conditional was worth it.) */
+        do {
+            digit = *s - '0';
+            if (! inRANGE(digit, 0, 9)) goto mantissa_done;
+            if (       value < uv_max_div_10
+                || (   value == uv_max_div_10
+                    && digit <= uv_max_mod_10))
+            {
                 value = value * 10 + digit;
-		if (++s < send) {
-                  digit = *s - '0';
-                  if (inRANGE(digit, 0, 9)) {
-                    value = value * 10 + digit;
-                    if (++s < send) {
-                      digit = *s - '0';
-                      if (inRANGE(digit, 0, 9)) {
-                        value = value * 10 + digit;
-                        if (++s < send) {
-                          digit = *s - '0';
-                          if (inRANGE(digit, 0, 9)) {
-                            value = value * 10 + digit;
-                            if (++s < send) {
-                              digit = *s - '0';
-                              if (inRANGE(digit, 0, 9)) {
-                                value = value * 10 + digit;
-                                if (++s < send) {
-                                  digit = *s - '0';
-                                  if (inRANGE(digit, 0, 9)) {
-                                    value = value * 10 + digit;
-                                    if (++s < send) {
-                                      /* Now got 9 digits, so need to check
-                                         each time for overflow.  */
-                                      digit = *s - '0';
-                                      while (    inRANGE(digit, 0, 9)
-                                             && (value < uv_max_div_10
-                                                 || (value == uv_max_div_10
-                                                     && digit <= uv_max_mod_10))) {
-                                        value = value * 10 + digit;
-                                        if (++s < send)
-                                          digit = *s - '0';
-                                        else
-                                          break;
-                                      }
-                                      if (inRANGE(digit, 0, 9)
-                                          && (s < send)) {
-                                        /* value overflowed.
-                                           skip the remaining digits, don't
-                                           worry about setting *valuep.  */
-                                        do {
-                                          s++;
-                                        } while (s < send && isDIGIT(*s));
-                                        numtype |=
-                                          IS_NUMBER_GREATER_THAN_UV_MAX;
-                                        goto skip_value;
-                                      }
-                                    }
-                                  }
-				}
-                              }
-                            }
-                          }
-                        }
-                      }
-                    }
-                  }
-                }
-              }
+                s++;
             }
-          }
-	}
-      }
-    }
+            else { /* value would overflow.  skip the remaining digits, don't
+                      worry about setting *valuep.  */
+                do {
+                    s++;
+                } while (s < send && isDIGIT(*s));
+                numtype |=
+                    IS_NUMBER_GREATER_THAN_UV_MAX;
+                goto skip_value;
+            }
+        } while (s < send);
+    }   /* End switch on input length */
+
+  mantissa_done:
     numtype |= IS_NUMBER_IN_UV;
     if (valuep)
       *valuep = value;
@@ -1083,7 +1187,7 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
         s++;
     }
-  }
+  } /* End of *s is a digit */
   else if (GROK_NUMERIC_RADIX(&s, send)) {
     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
     /* no digits before the radix means we need digits after it */
@@ -1100,9 +1204,9 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
         return 0;
   }
 
-  if (s > d && s < send) {
+  if (LIKELY(s > d) && s < send) {
     /* we can have an optional exponent part */
-    if (isALPHA_FOLD_EQ(*s, 'e')) {
+    if (UNLIKELY(isALPHA_FOLD_EQ(*s, 'e'))) {
       s++;
       if (s < send && (*s == '-' || *s == '+'))
         s++;
@@ -1121,17 +1225,23 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
       numtype |= IS_NUMBER_NOT_INT;
     }
   }
-  while (s < send && isSPACE(*s))
+
+  while (s < send) {
+    if (LIKELY(! isSPACE(*s))) goto end_space;
     s++;
-  if (s >= send)
-    return numtype;
-  if (memEQs(pv, len, "0 but true")) {
+  }
+  return numtype;
+
+ end_space:
+
+  if (UNLIKELY(memEQs(pv, len, "0 but true"))) {
     if (valuep)
       *valuep = 0;
     return IS_NUMBER_IN_UV;
   }
+
   /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
-  if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
+  if ((s + 2 < send) && UNLIKELY(memCHRs("inqs#", toFOLD(*s)))) {
       /* Really detect inf/nan. Start at d, not s, since the above
        * code might have already consumed the "1." or "1". */
       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
@@ -1168,7 +1278,8 @@ If you constrain the portion of C<pv> that is looked at by this function (by
 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
 valid value, it will return TRUE, setting C<*endptr> to the byte following the
 final digit of the value.  But if there is no constraint at what's looked at,
-all of C<pv> must be valid in order for TRUE to be returned.
+all of C<pv> must be valid in order for TRUE to be returned.  C<*endptr> is
+unchanged from its value on input if FALSE is returned;
 
 The only characters this accepts are the decimal digits '0'..'9'.
 
@@ -1353,7 +1464,18 @@ S_mulexp10(NV value, I32 exponent)
 NV
 Perl_my_atof(pTHX_ const char* s)
 {
-    /* 's' must be NUL terminated */
+
+/*
+=for apidoc my_atof
+
+L<C<atof>(3)>, but properly works with Perl locale handling, accepting a dot
+radix character always, but also the current locale's radix character if and
+only if called from within the lexical scope of a Perl C<use locale> statement.
+
+N.B. C<s> must be NUL terminated.
+
+=cut
+*/
 
     NV x = 0.0;
 
@@ -1408,10 +1530,6 @@ Perl_my_atof(pTHX_ const char* s)
 
 #if defined(NV_INF) || defined(NV_NAN)
 
-#ifdef USING_MSVC6
-#  pragma warning(push)
-#  pragma warning(disable:4756;disable:4056)
-#endif
 static char*
 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
 {
@@ -1489,9 +1607,6 @@ S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value
     }
     return NULL;
 }
-#ifdef USING_MSVC6
-#  pragma warning(pop)
-#endif
 
 #endif /* if defined(NV_INF) || defined(NV_NAN) */
 
@@ -1551,6 +1666,15 @@ Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
             return endp;
 
+        /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
+           0b-prefixed binary numbers, which is backward incompatible
+        */
+        if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
+            (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
+            *value = 0;
+            return (char *)s+1;
+        }
+
         /* If the length is passed in, the input string isn't NUL-terminated,
          * and in it turns out the function below assumes it is; therefore we
          * create a copy and NUL-terminate that */
@@ -1716,15 +1840,21 @@ Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
     /* now apply the sign */
     if (negative)
 	result[2] = -result[2];
-#endif /* USE_PERL_ATOF */
     *value = result[2];
     return (char *)s;
+#else  /* USE_PERL_ATOF */
+    /* If you see this error you both don't have strtod (or configured -Ud_strtod or
+       or it's long double/quadmath equivalent) and disabled USE_PERL_ATOF, thus
+       removing any way for perl to convert strings to floating point numbers.
+    */
+# error No mechanism to convert strings to numbers available
+#endif
 }
 
 /*
 =for apidoc isinfnan
 
-C<Perl_isinfnan()> is utility function that returns true if the NV
+C<Perl_isinfnan()> is a utility function that returns true if the NV
 argument is either an infinity or a C<NaN>, false otherwise.  To test
 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
 
@@ -1748,7 +1878,7 @@ Perl_isinfnan(NV nv)
 }
 
 /*
-=for apidoc
+=for apidoc isinfnansv
 
 Checks whether the argument would be either an infinity or C<NaN> when used
 as a number, but is careful not to trigger non-numeric or uninitialized