*/
/*
-=head1 Numeric functions
-
-=cut
This file contains all the stuff needed by perl for manipulating numeric
values, including such things as replacements for the OS's atof() function
#define PERL_IN_NUMERIC_C
#include "perl.h"
+#ifdef Perl_strtod
+
+PERL_STATIC_INLINE NV
+S_strtod(pTHX_ const char * const s, char ** e)
+{
+ DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
+ NV result;
+
+ STORE_LC_NUMERIC_SET_TO_NEEDED();
+
+# ifdef USE_QUADMATH
+
+ result = strtoflt128(s, e);
+
+# elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \
+ && defined(USE_LONG_DOUBLE)
+# if defined(__MINGW64_VERSION_MAJOR)
+ /***********************************************
+ We are unable to use strtold because of
+ https://sourceforge.net/p/mingw-w64/bugs/711/
+ &
+ https://sourceforge.net/p/mingw-w64/bugs/725/
+
+ but __mingw_strtold is fine.
+ ***********************************************/
+
+ result = __mingw_strtold(s, e);
+
+# else
+
+ result = strtold(s, e);
+
+# endif
+# elif defined(HAS_STRTOD)
+
+ result = strtod(s, e);
+
+# else
+# error No strtod() equivalent found
+# endif
+
+ RESTORE_LC_NUMERIC();
+
+ return result;
+}
+
+#endif /* #ifdef Perl_strtod */
+
+/*
+
+=for apidoc my_strtod
+
+This function is equivalent to the libc strtod() function, and is available
+even on platforms that lack plain strtod(). Its return value is the best
+available precision depending on platform capabilities and F<Configure>
+options.
+
+It properly handles the locale radix character, meaning it expects a dot except
+when called from within the scope of S<C<use locale>>, in which case the radix
+character should be that specified by the current locale.
+
+The synonym Strtod() may be used instead.
+
+=cut
+
+*/
+
+NV
+Perl_my_strtod(const char * const s, char **e)
+{
+ dTHX;
+
+ PERL_ARGS_ASSERT_MY_STRTOD;
+
+#ifdef Perl_strtod
+
+ return S_strtod(aTHX_ s, e);
+
+#else
+
+ {
+ NV result;
+ char ** end_ptr = NULL;
+
+ *end_ptr = my_atof2(s, &result);
+ if (e) {
+ *e = *end_ptr;
+ }
+
+ if (! *end_ptr) {
+ result = 0.0;
+ }
+
+ return result;
+ }
+
+#endif
+
+}
+
+
U32
Perl_cast_ulong(NV f)
{
if (f < U32_MAX_P1_HALF)
return (U32) f;
f -= U32_MAX_P1_HALF;
- return ((U32) f) | (1 + U32_MAX >> 1);
+ return ((U32) f) | (1 + (U32_MAX >> 1));
#else
return (U32) f;
#endif
if (f < U32_MAX_P1_HALF)
return (I32)(U32) f;
f -= U32_MAX_P1_HALF;
- return (I32)(((U32) f) | (1 + U32_MAX >> 1));
+ return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
#else
return (I32)(U32) f;
#endif
if (f < UV_MAX_P1_HALF)
return (IV)(UV) f;
f -= UV_MAX_P1_HALF;
- return (IV)(((UV) f) | (1 + UV_MAX >> 1));
+ return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
#else
return (IV)(UV) f;
#endif
if (f < UV_MAX_P1_HALF)
return (UV) f;
f -= UV_MAX_P1_HALF;
- return ((UV) f) | (1 + UV_MAX >> 1);
+ return ((UV) f) | (1 + (UV_MAX >> 1));
#else
return (UV) f;
#endif
converts a string representing a binary number to numeric form.
-On entry I<start> and I<*len> give the string to scan, I<*flags> gives
-conversion flags, and I<result> should be NULL or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
-invalid character will also trigger a warning.
-On return I<*len> is set to the length of the scanned string,
-and I<*flags> gives output flags.
+On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
+scan stops at the end of the string, or at just before the first invalid
+character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning. On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
-and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
-returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to I<*result> (or the value is discarded if I<result>
-is NULL).
+and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin>
+returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
-The binary number may optionally be prefixed with "0b" or "b" unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
-number may use '_' characters to separate digits.
+The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
+C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
+
+=for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
+=for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
+=for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
+=for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
=cut
UV
Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
- const char *s = start;
- STRLEN len = *len_p;
- UV value = 0;
- NV value_nv = 0;
-
- const UV max_div_2 = UV_MAX / 2;
- const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
- bool overflowed = FALSE;
- char bit;
-
PERL_ARGS_ASSERT_GROK_BIN;
- if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
- /* strip off leading b or 0b.
- for compatibility silently suffer "b" and "0b" as valid binary
- numbers. */
- if (len >= 1) {
- if (isALPHA_FOLD_EQ(s[0], 'b')) {
- s++;
- len--;
- }
- else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) {
- s+=2;
- len-=2;
- }
- }
- }
-
- for (; len-- && (bit = *s); s++) {
- if (bit == '0' || bit == '1') {
- /* Write it in this wonky order with a goto to attempt to get the
- compiler to make the common case integer-only loop pretty tight.
- With gcc seems to be much straighter code than old scan_bin. */
- redo:
- if (!overflowed) {
- if (value <= max_div_2) {
- value = (value << 1) | (bit - '0');
- continue;
- }
- /* Bah. We're just overflowed. */
- /* diag_listed_as: Integer overflow in %s number */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in binary number");
- overflowed = TRUE;
- value_nv = (NV) value;
- }
- value_nv *= 2.0;
- /* If an NV has not enough bits in its mantissa to
- * represent a UV this summing of small low-order numbers
- * is a waste of time (because the NV cannot preserve
- * the low-order bits anyway): we could just remember when
- * did we overflow and in the end just multiply value_nv by the
- * right amount. */
- value_nv += (NV)(bit - '0');
- continue;
- }
- if (bit == '_' && len && allow_underscores && (bit = s[1])
- && (bit == '0' || bit == '1'))
- {
- --len;
- ++s;
- goto redo;
- }
- if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
- Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
- "Illegal binary digit '%c' ignored", *s);
- break;
- }
-
- if ( ( overflowed && value_nv > 4294967295.0)
-#if UVSIZE > 4
- || (!overflowed && value > 0xffffffff
- && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
-#endif
- ) {
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "Binary number > 0b11111111111111111111111111111111 non-portable");
- }
- *len_p = s - start;
- if (!overflowed) {
- *flags = 0;
- return value;
- }
- *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
- if (result)
- *result = value_nv;
- return UV_MAX;
+ return grok_bin(start, len_p, flags, result);
}
/*
converts a string representing a hex number to numeric form.
-On entry I<start> and I<*len_p> give the string to scan, I<*flags> gives
-conversion flags, and I<result> should be NULL or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
-invalid character will also trigger a warning.
-On return I<*len> is set to the length of the scanned string,
-and I<*flags> gives output flags.
-
-If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
-and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
-returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to I<*result> (or the value is discarded if I<result>
-is NULL).
-
-The hex number may optionally be prefixed with "0x" or "x" unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
-number may use '_' characters to separate digits.
+On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
+scan stops at the end of the string, or at just before the first invalid
+character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning. On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
+
+If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
+and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex>
+returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
+
+The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
+C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
=cut
-Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
+Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
which suppresses any message for non-portable numbers, but which are valid
-on this platform.
+on this platform. But, C<*flags> will have the corresponding flag bit set.
*/
UV
Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
- const char *s = start;
- STRLEN len = *len_p;
- UV value = 0;
- NV value_nv = 0;
- const UV max_div_16 = UV_MAX / 16;
- const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
- bool overflowed = FALSE;
-
PERL_ARGS_ASSERT_GROK_HEX;
- if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
- /* strip off leading x or 0x.
- for compatibility silently suffer "x" and "0x" as valid hex numbers.
- */
- if (len >= 1) {
- if (isALPHA_FOLD_EQ(s[0], 'x')) {
- s++;
- len--;
- }
- else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) {
- s+=2;
- len-=2;
- }
- }
- }
-
- for (; len-- && *s; s++) {
- if (isXDIGIT(*s)) {
- /* Write it in this wonky order with a goto to attempt to get the
- compiler to make the common case integer-only loop pretty tight.
- With gcc seems to be much straighter code than old scan_hex. */
- redo:
- if (!overflowed) {
- if (value <= max_div_16) {
- value = (value << 4) | XDIGIT_VALUE(*s);
- continue;
- }
- /* Bah. We're just overflowed. */
- /* diag_listed_as: Integer overflow in %s number */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in hexadecimal number");
- overflowed = TRUE;
- value_nv = (NV) value;
- }
- value_nv *= 16.0;
- /* If an NV has not enough bits in its mantissa to
- * represent a UV this summing of small low-order numbers
- * is a waste of time (because the NV cannot preserve
- * the low-order bits anyway): we could just remember when
- * did we overflow and in the end just multiply value_nv by the
- * right amount of 16-tuples. */
- value_nv += (NV) XDIGIT_VALUE(*s);
- continue;
- }
- if (*s == '_' && len && allow_underscores && s[1]
- && isXDIGIT(s[1]))
- {
- --len;
- ++s;
- goto redo;
- }
- if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
- Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
- "Illegal hexadecimal digit '%c' ignored", *s);
- break;
- }
-
- if ( ( overflowed && value_nv > 4294967295.0)
-#if UVSIZE > 4
- || (!overflowed && value > 0xffffffff
- && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
-#endif
- ) {
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "Hexadecimal number > 0xffffffff non-portable");
- }
- *len_p = s - start;
- if (!overflowed) {
- *flags = 0;
- return value;
- }
- *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
- if (result)
- *result = value_nv;
- return UV_MAX;
+ return grok_hex(start, len_p, flags, result);
}
/*
converts a string representing an octal number to numeric form.
-On entry I<start> and I<*len> give the string to scan, I<*flags> gives
-conversion flags, and I<result> should be NULL or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
-8 or 9 will also trigger a warning.
-On return I<*len> is set to the length of the scanned string,
-and I<*flags> gives output flags.
+On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
+scan stops at the end of the string, or at just before the first invalid
+character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning. On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
+
+If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
+and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct>
+returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
-If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
-and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
-returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to I<*result> (or the value is discarded if I<result>
-is NULL).
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
-If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
-number may use '_' characters to separate digits.
+The C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for
+this function.
=cut
UV
Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
- const char *s = start;
- STRLEN len = *len_p;
- UV value = 0;
- NV value_nv = 0;
- const UV max_div_8 = UV_MAX / 8;
- const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
- bool overflowed = FALSE;
-
PERL_ARGS_ASSERT_GROK_OCT;
- for (; len-- && *s; s++) {
- if (isOCTAL(*s)) {
- /* Write it in this wonky order with a goto to attempt to get the
- compiler to make the common case integer-only loop pretty tight.
- */
- redo:
- if (!overflowed) {
- if (value <= max_div_8) {
- value = (value << 3) | OCTAL_VALUE(*s);
- continue;
- }
- /* Bah. We're just overflowed. */
- /* diag_listed_as: Integer overflow in %s number */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in octal number");
- overflowed = TRUE;
- value_nv = (NV) value;
- }
- value_nv *= 8.0;
- /* If an NV has not enough bits in its mantissa to
- * represent a UV this summing of small low-order numbers
- * is a waste of time (because the NV cannot preserve
- * the low-order bits anyway): we could just remember when
- * did we overflow and in the end just multiply value_nv by the
- * right amount of 8-tuples. */
- value_nv += (NV) OCTAL_VALUE(*s);
- continue;
- }
- if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
- --len;
- ++s;
- goto redo;
- }
- /* Allow \octal to work the DWIM way (that is, stop scanning
- * as soon as non-octal characters are seen, complain only if
- * someone seems to want to use the digits eight and nine. Since we
- * know it is not octal, then if isDIGIT, must be an 8 or 9). */
- if (isDIGIT(*s)) {
- if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
- Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
- "Illegal octal digit '%c' ignored", *s);
- }
- break;
- }
-
- if ( ( overflowed && value_nv > 4294967295.0)
-#if UVSIZE > 4
- || (!overflowed && value > 0xffffffff
- && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
-#endif
- ) {
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "Octal number > 037777777777 non-portable");
- }
- *len_p = s - start;
- if (!overflowed) {
- *flags = 0;
- return value;
- }
- *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
- if (result)
- *result = value_nv;
- return UV_MAX;
+ return grok_oct(start, len_p, flags, result);
}
-/*
-=for apidoc scan_bin
-
-For backwards compatibility. Use C<grok_bin> instead.
-
-=for apidoc scan_hex
-
-For backwards compatibility. Use C<grok_hex> instead.
-
-=for apidoc scan_oct
-
-For backwards compatibility. Use C<grok_oct> instead.
-
-=cut
- */
-
-NV
-Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
-{
- NV rnv;
- I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
- const UV ruv = grok_bin (start, &len, &flags, &rnv);
-
- PERL_ARGS_ASSERT_SCAN_BIN;
-
- *retlen = len;
- return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
-}
-
-NV
-Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
-{
- NV rnv;
- I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
- const UV ruv = grok_oct (start, &len, &flags, &rnv);
-
- PERL_ARGS_ASSERT_SCAN_OCT;
-
- *retlen = len;
- return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
-}
-
-NV
-Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
+STATIC void
+S_output_non_portable(pTHX_ const U8 base)
{
- NV rnv;
- I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
- const UV ruv = grok_hex (start, &len, &flags, &rnv);
-
- PERL_ARGS_ASSERT_SCAN_HEX;
-
- *retlen = len;
- return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
+ /* Display the proper message for a number in the given input base not
+ * fitting in 32 bits */
+ const char * which = (base == 2)
+ ? "Binary number > 0b11111111111111111111111111111111"
+ : (base == 8)
+ ? "Octal number > 037777777777"
+ : "Hexadecimal number > 0xffffffff";
+
+ PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
+
+ /* Also there are listings for the other two. That's because, since they
+ * are the first word, it would be hard for a user to find them there
+ * starting with a %s */
+ /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
}
-/*
-=for apidoc grok_numeric_radix
-
-Scan and skip for a numeric decimal separator (radix).
+UV
+Perl_grok_bin_oct_hex(pTHX_ const char *start,
+ STRLEN *len_p,
+ I32 *flags,
+ NV *result,
+ const unsigned shift, /* 1 for binary; 3 for octal;
+ 4 for hex */
+ const U8 class_bit,
+ const char prefix
+ )
-=cut
- */
-bool
-Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
{
-#ifdef USE_LOCALE_NUMERIC
- PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
+ const char *s0 = start;
+ const char *s;
+ STRLEN len = *len_p;
+ STRLEN bytes_so_far; /* How many real digits have been processed */
+ UV value = 0;
+ NV value_nv = 0;
+ const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */
+ const UV max_div= UV_MAX / base; /* Value above which, the next digit
+ processed would overflow */
+ const I32 input_flags = *flags;
+ const bool allow_underscores =
+ cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
+ bool overflowed = FALSE;
- if (IN_LC(LC_NUMERIC)) {
- DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
- if (PL_numeric_radix_sv) {
- STRLEN len;
- const char * const radix = SvPV(PL_numeric_radix_sv, len);
- if (*sp + len <= send && memEQ(*sp, radix, len)) {
- *sp += len;
- RESTORE_LC_NUMERIC();
- return TRUE;
+ /* In overflows, this keeps track of how much to multiply the overflowed NV
+ * by as we continue to parse the remaining digits */
+ NV factor = 0;
+
+ /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It
+ * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to
+ * find the numeric value of a digit. That requires more instructions than
+ * OCTAL_VALUE would, but gives the same result for the narrowed range of
+ * octal digits; same for binary. If it were ever critical to squeeze more
+ * performance from this, the function could become grok_hex, and a regen
+ * perl script could scan it and write out two edited copies for the other
+ * two functions. That would improve the performance of all three
+ * somewhat. Besides eliminating XDIGIT_VALUE for the other two, extra
+ * parameters are now passed to this to avoid conditionals. Those could
+ * become declared consts, like:
+ * const U8 base = 16;
+ * const U8 base = 8;
+ * ...
+ */
+
+ PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
+
+ ASSUME(inRANGE(shift, 1, 4) && shift != 2);
+
+ /* Clear output flags; unlikely to find a problem that sets them */
+ *flags = 0;
+
+ if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
+
+ /* strip off leading b or 0b; x or 0x.
+ for compatibility silently suffer "b" and "0b" as valid binary; "x"
+ and "0x" as valid hex numbers. */
+ if (len >= 1) {
+ if (isALPHA_FOLD_EQ(s0[0], prefix)) {
+ s0++;
+ len--;
+ }
+ else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
+ s0+=2;
+ len-=2;
}
}
- RESTORE_LC_NUMERIC();
}
- /* always try "." if numeric radix didn't match because
- * we may have data from different locales mixed */
-#endif
- PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
+ s = s0; /* s0 potentially advanced from 'start' */
- if (*sp < send && **sp == '.') {
- ++*sp;
- return TRUE;
- }
- return FALSE;
-}
-
-/*
-=for apidoc nan_hibyte
-
-Given an NV, returns pointer to the byte containing the most
-significant bit of the NaN, this bit is most commonly the
-quiet/signaling bit of the NaN. The mask will contain a mask
-appropriate for manipulating the most significant bit.
-Note that this bit may not be the highest bit of the byte.
-
-If the NV is not a NaN, returns NULL.
-
-Most platforms have "high bit is one" -> quiet nan.
-The known opposite exceptions are older MIPS and HPPA platforms.
-
-Some platforms do not differentiate between quiet and signaling NaNs.
+ /* Unroll the loop so that the first 8 digits are branchless except for the
+ * switch. A ninth hex one overflows a 32 bit word. */
+ switch (len) {
+ case 0:
+ return 0;
+ default:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 7:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 6:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 5:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 4:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 3:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 2:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 1:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
-=cut
-*/
-U8*
-Perl_nan_hibyte(NV *nvp, U8* mask)
-{
- STRLEN i = (NV_MANT_REAL_DIG - 1) / 8;
- STRLEN j = (NV_MANT_REAL_DIG - 1) % 8;
+ if (LIKELY(len <= 8)) {
+ return value;
+ }
- PERL_ARGS_ASSERT_NAN_HIBYTE;
+ s++;
+ break;
+ }
- *mask = 1 << j;
-#ifdef NV_BIG_ENDIAN
- return (U8*) nvp + NVSIZE - 1 - i;
-#endif
-#ifdef NV_LITTLE_ENDIAN
- return (U8*) nvp + i;
-#endif
-}
+ bytes_so_far = s - s0;
+ factor = shift << bytes_so_far;
+ len -= bytes_so_far;
-/*
-=for apidoc nan_signaling_set
+ for (; len--; s++) {
+ if (_generic_isCC(*s, class_bit)) {
+ /* Write it in this wonky order with a goto to attempt to get the
+ compiler to make the common case integer-only loop pretty tight.
+ With gcc seems to be much straighter code than old scan_hex.
+ (khw suspects that adding a LIKELY() just above would do the
+ same thing) */
+ redo:
+ if (LIKELY(value <= max_div)) {
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ /* Note XDIGIT_VALUE() is branchless, works on binary
+ * and octal as well, so can be used here, without
+ * slowing those down */
+ factor *= 1 << shift;
+ continue;
+ }
-Set or unset the NaN signaling-ness.
+ /* Bah. We are about to overflow. Instead, add the unoverflowed
+ * value to an NV that contains an approximation to the correct
+ * value. Each time through the loop we have increased 'factor' so
+ * that it gives how much the current approximation needs to
+ * effectively be shifted to make room for this new value */
+ value_nv *= factor;
+ value_nv += (NV) value;
+
+ /* Then we keep accumulating digits, until all are parsed. We
+ * start over using the current input value. This will be added to
+ * 'value_nv' eventually, either when all digits are gone, or we
+ * have overflowed this fresh start. */
+ value = XDIGIT_VALUE(*s);
+ factor = 1 << shift;
+
+ if (! overflowed) {
+ overflowed = TRUE;
+ if ( ! (input_flags & PERL_SCAN_SILENT_OVERFLOW)
+ && ckWARN_d(WARN_OVERFLOW))
+ {
+ Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in %s number",
+ (base == 16) ? "hexadecimal"
+ : (base == 2)
+ ? "binary"
+ : "octal");
+ }
+ }
+ continue;
+ }
-Of those platforms that differentiate between quiet and signaling
-platforms the majority has the semantics of the most significant bit
-being on meaning quiet NaN, so for signaling we need to clear the bit.
+ if ( *s == '_'
+ && len
+ && allow_underscores
+ && _generic_isCC(s[1], class_bit)
-Some platforms (older MIPS, and HPPA) have the opposite
-semantics, and we set the bit for a signaling NaN.
+ /* Don't allow a leading underscore if the only-medial bit is
+ * set */
+ && ( LIKELY(s > s0)
+ || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
+ != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
+ {
+ --len;
+ ++s;
+ goto redo;
+ }
-=cut
-*/
-void
-Perl_nan_signaling_set(NV *nvp, bool signaling)
-{
- U8 mask;
- U8* hibyte;
-
- PERL_ARGS_ASSERT_NAN_SIGNALING_SET;
-
- hibyte = nan_hibyte(nvp, &mask);
- if (hibyte) {
- const NV nan = NV_NAN;
- /* Decent optimizers should make the irrelevant branch to disappear. */
- if ((((U8*)&nan)[hibyte - (U8*)nvp] & mask)) {
- /* x86 style: the most significant bit of the NaN is off
- * for a signaling NaN, and on for a quiet NaN. */
- if (signaling) {
- *hibyte &= ~mask;
- } else {
- *hibyte |= mask;
+ if (*s) {
+ if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
+ && ckWARN(WARN_DIGIT))
+ {
+ if (base != 8) {
+ Perl_warner(aTHX_ packWARN(WARN_DIGIT),
+ "Illegal %s digit '%c' ignored",
+ ((base == 2)
+ ? "binary"
+ : "hexadecimal"),
+ *s);
+ }
+ else if (isDIGIT(*s)) { /* octal base */
+
+ /* Allow \octal to work the DWIM way (that is, stop
+ * scanning as soon as non-octal characters are seen,
+ * complain only if someone seems to want to use the digits
+ * eight and nine. Since we know it is not octal, then if
+ * isDIGIT, must be an 8 or 9). */
+ Perl_warner(aTHX_ packWARN(WARN_DIGIT),
+ "Illegal octal digit '%c' ignored", *s);
+ }
}
- } else {
- /* MIPS/HPPA style: the most significant bit of the NaN is on
- * for a signaling NaN, and off for a quiet NaN. */
- if (signaling) {
- *hibyte |= mask;
- } else {
- *hibyte &= ~mask;
+
+ if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) {
+ *flags |= PERL_SCAN_NOTIFY_ILLDIGIT;
}
}
- }
-}
-
-/*
-=for apidoc nan_is_signaling
-
-Returns true if the nv is a NaN is a signaling NaN.
-=cut
-*/
-int
-Perl_nan_is_signaling(NV nv)
-{
- /* Quiet NaN bit pattern (64-bit doubles, ignore endianness):
- * x86 00 00 00 00 00 00 f8 7f
- * sparc 7f ff ff ff ff ff ff ff
- * mips 7f f7 ff ff ff ff ff ff
- * hppa 7f f4 00 00 00 00 00 00
- * The "7ff" is the exponent. The most significant bit of the NaN
- * (note: here, not the most significant bit of the byte) is of
- * interest: in the x86 style (also in sparc) the bit on means
- * 'quiet', in the mips style the bit off means 'quiet'. */
-#ifdef Perl_fp_classify_snan
- return Perl_fp_classify_snan(nv);
-#else
- if (Perl_isnan(nv)) {
- U8 mask;
- U8 *hibyte = Perl_nan_hibyte(&nv, &mask);
- /* Hoping NV_NAN is a quiet nan - this might be a false hope.
- * XXX Configure test */
- const NV nan = NV_NAN;
- return (*hibyte & mask) != (((U8*)&nan)[hibyte - (U8*)&nv] & mask);
- } else {
- return 0;
+ break;
}
-#endif
-}
-
-/* The largest known floating point numbers are the IEEE quadruple
- * precision of 128 bits. */
-#define MAX_NV_BYTES (128/8)
-
-static const char nan_payload_error[] = "NaN payload error";
-
-/*
-
-=for apidoc nan_payload_set
-
-Set the NaN payload of the nv.
-
-The first byte is the highest order byte of the payload (big-endian).
-
-The signaling flag, if true, turns the generated NaN into a signaling one.
-In most platforms this means turning _off_ the most significant bit of the
-NaN. Note the _most_ - some platforms have the opposite semantics.
-Do not assume any portability of the NaN semantics.
-
-=cut
-*/
-void
-Perl_nan_payload_set(NV *nvp, const void *bytes, STRLEN byten, bool signaling)
-{
- /* How many bits we can set in the payload.
- *
- * Note that whether the most signicant bit is a quiet or
- * signaling NaN is actually unstandardized. Most platforms use
- * it as the 'quiet' bit. The known exceptions to this are older
- * MIPS, and HPPA.
- *
- * Yet another unstandardized area is what does the difference
- * actually mean - if it exists: some platforms do not even have
- * signaling NaNs.
- *
- * C99 nan() is supposed to generate quiet NaNs. */
- int bits = NV_MANT_REAL_DIG - 1;
-
- STRLEN i, nvi;
- bool error = FALSE;
-
- /* XXX None of this works for doubledouble platforms, or for mixendians. */
-
- PERL_ARGS_ASSERT_NAN_PAYLOAD_SET;
- *nvp = NV_NAN;
-
-#ifdef NV_BIG_ENDIAN
- nvi = NVSIZE - 1;
-#endif
-#ifdef NV_LITTLE_ENDIAN
- nvi = 0;
-#endif
+ *len_p = s - start;
- if (byten > MAX_NV_BYTES) {
- byten = MAX_NV_BYTES;
- error = TRUE;
- }
- for (i = 0; bits > 0; i++) {
- U8 b = i < byten ? ((U8*) bytes)[i] : 0;
- if (bits > 0 && bits < 8) {
- U8 m = (1 << bits) - 1;
- ((U8*)nvp)[nvi] &= ~m;
- ((U8*)nvp)[nvi] |= b & m;
- bits = 0;
- } else {
- ((U8*)nvp)[nvi] = b;
- bits -= 8;
+ if (LIKELY(! overflowed)) {
+#if UVSIZE > 4
+ if ( UNLIKELY(value > 0xffffffff)
+ && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
+ {
+ output_non_portable(base);
+ *flags |= PERL_SCAN_SILENT_NON_PORTABLE;
}
-#ifdef NV_BIG_ENDIAN
- nvi--;
-#endif
-#ifdef NV_LITTLE_ENDIAN
- nvi++;
#endif
+ return value;
}
- if (error) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- nan_payload_error);
- }
- nan_signaling_set(nvp, signaling);
-}
-/*
-=for apidoc grok_nan_payload
-
-Helper for grok_nan().
-
-Parses the "..." in C99-style "nan(...)" strings, and sets the nvp accordingly.
+ /* Overflowed: Calculate the final overflow approximation */
+ value_nv *= factor;
+ value_nv += (NV) value;
-If you want the parse the "nan" part you need to use grok_nan().
+ output_non_portable(base);
-=cut
-*/
-const char *
-Perl_grok_nan_payload(pTHX_ const char* s, const char* send, bool signaling, int *flags, NV* nvp)
-{
- U8 bytes[MAX_NV_BYTES];
- STRLEN byten = 0;
- const char *t = send - 1; /* minus one for ')' */
- bool error = FALSE;
+ *flags |= PERL_SCAN_GREATER_THAN_UV_MAX
+ | PERL_SCAN_SILENT_NON_PORTABLE;
+ if (result)
+ *result = value_nv;
+ return UV_MAX;
+}
- PERL_ARGS_ASSERT_GROK_NAN_PAYLOAD;
+/*
+=for apidoc scan_bin
- /* XXX: legacy nan payload formats like "nan123",
- * "nan0xabc", or "nan(s123)" ("s" for signaling). */
+For backwards compatibility. Use C<grok_bin> instead.
- while (t > s && isSPACE(*t)) t--;
- if (*t != ')') {
- return send;
- }
+=for apidoc scan_hex
- if (++s == send) {
- *flags |= IS_NUMBER_TRAILING;
- return s;
- }
+For backwards compatibility. Use C<grok_hex> instead.
- while (s < t && byten < MAX_NV_BYTES) {
- UV uv;
- int nantype = 0;
-
- if (s[0] == '0' && s + 2 < t &&
- isALPHA_FOLD_EQ(s[1], 'x') &&
- isXDIGIT(s[2])) {
- const char *u = s + 3;
- STRLEN len;
- I32 uvflags;
-
- while (isXDIGIT(*u)) u++;
- len = u - s;
- uvflags = PERL_SCAN_ALLOW_UNDERSCORES;
- uv = grok_hex(s, &len, &uvflags, NULL);
- if ((uvflags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
- nantype = 0;
- } else {
- nantype = IS_NUMBER_IN_UV;
- }
- s += len;
- } else if (s[0] == '0' && s + 2 < t &&
- isALPHA_FOLD_EQ(s[1], 'b') &&
- (s[2] == '0' || s[2] == '1')) {
- const char *u = s + 3;
- STRLEN len;
- I32 uvflags;
-
- while (*u == '0' || *u == '1') u++;
- len = u - s;
- uvflags = PERL_SCAN_ALLOW_UNDERSCORES;
- uv = grok_bin(s, &len, &uvflags, NULL);
- if ((uvflags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
- nantype = 0;
- } else {
- nantype = IS_NUMBER_IN_UV;
- }
- s += len;
- } else if ((s[0] == '\'' || s[0] == '"') &&
- s + 2 < t && t[-1] == s[0]) {
- /* Perl extension: if the input looks like a string
- * constant ('' or ""), read its bytes as-they-come. */
- STRLEN n = t - s - 2;
- STRLEN i;
- if ((n > MAX_NV_BYTES - byten) ||
- (n * 8 > NV_MANT_REAL_DIG)) {
- error = TRUE;
- break;
- }
- /* Copy the bytes in reverse so that \x41\x42 ('AB')
- * is equivalent to 0x4142. In other words, the bytes
- * are in big-endian order. */
- for (i = 0; i < n; i++) {
- bytes[n - i - 1] = s[i + 1];
- }
- byten += n;
- break;
- } else if (s < t && isDIGIT(*s)) {
- const char *u;
- nantype =
- grok_number_flags(s, (STRLEN)(t - s), &uv,
- PERL_SCAN_TRAILING |
- PERL_SCAN_ALLOW_UNDERSCORES);
- /* Unfortunately grok_number_flags() doesn't
- * tell how far we got and the ')' will always
- * be "trailing", so we need to double-check
- * whether we had something dubious. */
- for (u = s; u < send - 1; u++) {
- if (!isDIGIT(*u)) {
- *flags |= IS_NUMBER_TRAILING;
- break;
- }
- }
- s = u;
- } else {
- error = TRUE;
- break;
- }
- /* XXX Doesn't do octal: nan("0123").
- * Probably not a big loss. */
+=for apidoc scan_oct
- if (!(nantype & IS_NUMBER_IN_UV)) {
- error = TRUE;
- break;
- }
+For backwards compatibility. Use C<grok_oct> instead.
- if (uv) {
- while (uv && byten < MAX_NV_BYTES) {
- bytes[byten++] = (U8) (uv & 0xFF);
- uv >>= 8;
- }
- }
- }
+=cut
+ */
- if (byten == 0) {
- bytes[byten++] = 0;
- }
+NV
+Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
+{
+ NV rnv;
+ I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
+ const UV ruv = grok_bin (start, &len, &flags, &rnv);
- if (error) {
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- nan_payload_error);
- }
+ PERL_ARGS_ASSERT_SCAN_BIN;
- if (s == send) {
- *flags |= IS_NUMBER_TRAILING;
- return s;
- }
+ *retlen = len;
+ return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
+}
- if (nvp) {
- nan_payload_set(nvp, bytes, byten, signaling);
- }
+NV
+Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
+{
+ NV rnv;
+ I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
+ const UV ruv = grok_oct (start, &len, &flags, &rnv);
- return s;
+ PERL_ARGS_ASSERT_SCAN_OCT;
+
+ *retlen = len;
+ return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
}
-/*
-=for apidoc grok_nan
+NV
+Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
+{
+ NV rnv;
+ I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
+ const UV ruv = grok_hex (start, &len, &flags, &rnv);
-Helper for grok_infnan().
+ PERL_ARGS_ASSERT_SCAN_HEX;
-Parses the C99-style "nan(...)" strings, and sets the nvp accordingly.
+ *retlen = len;
+ return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
+}
-*sp points to the beginning of "nan", which can be also "qnan", "nanq",
-or "snan", "nans", and case is ignored.
+/*
+=for apidoc grok_numeric_radix
-The "..." is parsed with grok_nan_payload().
+Scan and skip for a numeric decimal separator (radix).
=cut
-*/
-const char *
-Perl_grok_nan(pTHX_ const char* s, const char* send, int *flags, NV* nvp)
+ */
+bool
+Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
{
- bool signaling = FALSE;
+ PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
- PERL_ARGS_ASSERT_GROK_NAN;
+#ifdef USE_LOCALE_NUMERIC
- if (isALPHA_FOLD_EQ(*s, 'S')) {
- signaling = TRUE;
- s++; if (s == send) return s;
- } else if (isALPHA_FOLD_EQ(*s, 'Q')) {
- s++; if (s == send) return s;
- }
+ if (IN_LC(LC_NUMERIC)) {
+ STRLEN len;
+ char * radix;
+ bool matches_radix = FALSE;
+ DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
- if (isALPHA_FOLD_EQ(*s, 'N')) {
- s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return s;
- s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return s;
- s++;
+ STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
- *flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
+ radix = SvPV(PL_numeric_radix_sv, len);
+ radix = savepvn(radix, len);
- /* NaN can be followed by various stuff (NaNQ, NaNS), while
- * some legacy implementations have weird stuff like "NaN%"
- * (no idea what that means). */
- if (isALPHA_FOLD_EQ(*s, 's')) {
- signaling = TRUE;
- s++;
- } else if (isALPHA_FOLD_EQ(*s, 'q')) {
- s++;
- }
+ RESTORE_LC_NUMERIC();
- if (*s == '(') {
- const char *n = grok_nan_payload(s, send, signaling, flags, nvp);
- if (n == send) return NULL;
- s = n;
- if (*s != ')') {
- *flags |= IS_NUMBER_TRAILING;
- return s;
- }
- } else {
- if (nvp) {
- U8 bytes[1] = { 0 };
- nan_payload_set(nvp, bytes, 1, signaling);
- }
+ if (*sp + len <= send) {
+ matches_radix = memEQ(*sp, radix, len);
+ }
- while (s < send && isSPACE(*s)) s++;
+ Safefree(radix);
- if (s < send && *s) {
- /* Note that we here implicitly accept (parse as
- * "nan", but with warnings) also any other weird
- * trailing stuff for "nan". In the above we just
- * check that if we got the C99-style "nan(...)",
- * the "..." looks sane. If in future we accept
- * more ways of specifying the nan payload (like
- * "nan123" or "nan0xabc"), the accepting would
- * happen around here. */
- *flags |= IS_NUMBER_TRAILING;
- }
+ if (matches_radix) {
+ *sp += len;
+ return TRUE;
}
+ }
+
+#endif
- s = send;
+ /* always try "." if numeric radix didn't match because
+ * we may have data from different locales mixed */
+ if (*sp < send && **sp == '.') {
+ ++*sp;
+ return TRUE;
}
- else
- return NULL;
- return s;
+ return FALSE;
}
/*
=for apidoc grok_infnan
-Helper for grok_number(), accepts various ways of spelling "infinity"
+Helper for C<grok_number()>, accepts various ways of spelling "infinity"
or "not a number", and returns one of the following flag combinations:
- IS_NUMBER_INFINITE
+ IS_NUMBER_INFINITY
IS_NUMBER_NAN
- IS_NUMBER_INFINITE | IS_NUMBER_NEG
+ IS_NUMBER_INFINITY | IS_NUMBER_NEG
IS_NUMBER_NAN | IS_NUMBER_NEG
0
-possibly |-ed with IS_NUMBER_TRAILING.
+possibly |-ed with C<IS_NUMBER_TRAILING>.
-If an infinity or a not-a-number is recognized, the *sp will point to
+If an infinity or a not-a-number is recognized, C<*sp> will point to
one byte past the end of the recognized string. If the recognition fails,
-zero is returned, and the *sp will not move.
+zero is returned, and C<*sp> will not move.
+
+=for apidoc Amnh|bool|IS_NUMBER_GREATER_THAN_UV_MAX
+=for apidoc Amnh|bool|IS_NUMBER_INFINITY
+=for apidoc Amnh|bool|IS_NUMBER_IN_UV
+=for apidoc Amnh|bool|IS_NUMBER_NAN
+=for apidoc Amnh|bool|IS_NUMBER_NEG
+=for apidoc Amnh|bool|IS_NUMBER_NOT_INT
=cut
*/
{
const char* s = *sp;
int flags = 0;
+#if defined(NV_INF) || defined(NV_NAN)
bool odh = FALSE; /* one-dot-hash: 1.#INF */
PERL_ARGS_ASSERT_GROK_INFNAN;
s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
s++;
} else if (odh) {
- while (*s == '0') { /* 1.#INF00 */
+ while (s < send && *s == '0') { /* 1.#INF00 */
s++;
}
}
else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
s++;
flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
- while (*s == '0') { /* 1.#IND00 */
+ while (s < send && *s == '0') { /* 1.#IND00 */
s++;
}
- if (*s) {
+ if (s < send && *s) {
flags |= IS_NUMBER_TRAILING;
}
} else
s++;
flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
+ if (s == send) {
+ return flags;
+ }
/* NaN can be followed by various stuff (NaNQ, NaNS), but
* there are also multiple different NaN values, and some
/* "nanq" or "nans" are ok, though generating
* these portably is tricky. */
s++;
+ if (s == send) {
+ return flags;
+ }
}
if (*s == '(') {
/* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
while (s < send && isSPACE(*s))
s++;
+#else
+ PERL_UNUSED_ARG(send);
+#endif /* #if defined(NV_INF) || defined(NV_NAN) */
*sp = s;
return flags;
}
Recognise (or not) a number. The type of the number is returned
(0 if unrecognised), otherwise it is a bit-ORed combination of
-IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
-IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
-
-If the value of the number can fit in a UV, it is returned in the *valuep
-IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
-will never be set unless *valuep is valid, but *valuep may have been assigned
-to during processing even though IS_NUMBER_IN_UV is not set on return.
-If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
-valuep is non-NULL, but no actual assignment (or SEGV) will occur.
-
-IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
-seen (in which case *valuep gives the true value truncated to an integer), and
-IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
-absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the
+C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
+C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
+
+If the value of the number can fit in a UV, it is returned in C<*valuep>.
+C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
+will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
+to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
+If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
+C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
+
+C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
+seen (in which case C<*valuep> gives the true value truncated to an integer), and
+C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
+absolute value). C<IS_NUMBER_IN_UV> is not set if e notation was used or the
number is larger than a UV.
C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
non-numeric text on an otherwise successful I<grok>, setting
C<IS_NUMBER_TRAILING> on the result.
+=for apidoc Amnh||PERL_SCAN_TRAILING
+
=for apidoc grok_number
-Identical to grok_number_flags() with flags set to zero.
+Identical to C<grok_number_flags()> with C<flags> set to zero.
=cut
*/
PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
- while (s < send && isSPACE(*s))
- s++;
- if (s == send) {
- return 0;
- } else if (*s == '-') {
- s++;
- numtype = IS_NUMBER_NEG;
+ if (UNLIKELY(isSPACE(*s))) {
+ s++;
+ while (s < send) {
+ if (LIKELY(! isSPACE(*s))) goto non_space;
+ s++;
+ }
+ return 0;
+ non_space: ;
}
- else if (*s == '+')
- s++;
- if (s == send)
- return 0;
+ /* See if signed. This assumes it is more likely to be unsigned, so
+ * penalizes signed by an extra conditional; rewarding unsigned by one fewer
+ * (because we detect '+' and '-' with a single test and then add a
+ * conditional to determine which) */
+ if (UNLIKELY((*s & ~('+' ^ '-')) == ('+' & '-') )) {
+
+ /* Here, on ASCII platforms, *s is one of: 0x29 = ')', 2B = '+', 2D = '-',
+ * 2F = '/'. That is, it is either a sign, or a character that doesn't
+ * belong in a number at all (unless it's a radix character in a weird
+ * locale). Given this, it's far more likely to be a minus than the
+ * others. (On EBCDIC it is one of 42, 44, 46, 48, 4A, 4C, 4E, (not 40
+ * because can't be a space) 60, 62, 64, 66, 68, 6A, 6C, 6E. Again,
+ * only potentially a weird radix character, or 4E='+', or 60='-') */
+ if (LIKELY(*s == '-')) {
+ s++;
+ numtype = IS_NUMBER_NEG;
+ }
+ else if (LIKELY(*s == '+'))
+ s++;
+ else /* Can't just return failure here, as it could be a weird radix
+ character */
+ goto done_sign;
+
+ if (UNLIKELY(s == send))
+ return 0;
+ done_sign: ;
+ }
/* The first digit (after optional sign): note that might
* also point to "infinity" or "nan", or "1.#INF". */
d = s;
/* next must be digit or the radix separator or beginning of infinity/nan */
- if (isDIGIT(*s)) {
+ if (LIKELY(isDIGIT(*s))) {
/* UVs are at least 32 bits, so the first 9 decimal digits cannot
overflow. */
- UV value = *s - '0';
- /* This construction seems to be more optimiser friendly.
- (without it gcc does the isDIGIT test and the *s - '0' separately)
- With it gcc on arm is managing 6 instructions (6 cycles) per digit.
- In theory the optimiser could deduce how far to unroll the loop
- before checking for overflow. */
- if (++s < send) {
- int digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
+ UV value = *s - '0'; /* Process this first (perhaps only) digit */
+ int digit;
+
+ s++;
+
+ switch(send - s) {
+ default: /* 8 or more remaining characters */
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 7:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 6:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 5:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 4:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 3:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 2:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 1:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
+ s++;
+ /* FALLTHROUGH */
+ case 0: /* This case means the string consists of just the one
+ digit we already have processed */
+
+ /* If we got here by falling through other than the default: case, we
+ * have processed the whole string, and know it consists entirely of
+ * digits, and can't have overflowed. */
+ if (s >= send) {
+ if (valuep)
+ *valuep = value;
+ return numtype|IS_NUMBER_IN_UV;
+ }
+
+ /* Here, there are extra characters beyond the first 9 digits. Use a
+ * loop to accumulate any remaining digits, until we get a non-digit or
+ * would overflow. Note that leading zeros could cause us to get here
+ * without being close to overflowing.
+ *
+ * (The conditional 's >= send' above could be eliminated by making the
+ * default: in the switch to instead be 'case 8:', and process longer
+ * strings separately by using the loop below. This would penalize
+ * these inputs by the extra instructions needed for looping. That
+ * could be eliminated by copying the unwound code from above to handle
+ * the firt 9 digits of these. khw didn't think this saving of a
+ * single conditional was worth it.) */
+ do {
+ digit = *s - '0';
+ if (! inRANGE(digit, 0, 9)) goto mantissa_done;
+ if ( value < uv_max_div_10
+ || ( value == uv_max_div_10
+ && digit <= uv_max_mod_10))
+ {
value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (digit >= 0 && digit <= 9) {
- value = value * 10 + digit;
- if (++s < send) {
- /* Now got 9 digits, so need to check
- each time for overflow. */
- digit = *s - '0';
- while (digit >= 0 && digit <= 9
- && (value < uv_max_div_10
- || (value == uv_max_div_10
- && digit <= uv_max_mod_10))) {
- value = value * 10 + digit;
- if (++s < send)
- digit = *s - '0';
- else
- break;
- }
- if (digit >= 0 && digit <= 9
- && (s < send)) {
- /* value overflowed.
- skip the remaining digits, don't
- worry about setting *valuep. */
- do {
- s++;
- } while (s < send && isDIGIT(*s));
- numtype |=
- IS_NUMBER_GREATER_THAN_UV_MAX;
- goto skip_value;
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
+ s++;
}
- }
- }
- }
- }
+ else { /* value would overflow. skip the remaining digits, don't
+ worry about setting *valuep. */
+ do {
+ s++;
+ } while (s < send && isDIGIT(*s));
+ numtype |=
+ IS_NUMBER_GREATER_THAN_UV_MAX;
+ goto skip_value;
+ }
+ } while (s < send);
+ } /* End switch on input length */
+
+ mantissa_done:
numtype |= IS_NUMBER_IN_UV;
if (valuep)
*valuep = value;
while (s < send && isDIGIT(*s)) /* optional digits after the radix */
s++;
}
- }
+ } /* End of *s is a digit */
else if (GROK_NUMERIC_RADIX(&s, send)) {
numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
/* no digits before the radix means we need digits after it */
return 0;
}
- if (s > d && s < send) {
+ if (LIKELY(s > d) && s < send) {
/* we can have an optional exponent part */
- if (isALPHA_FOLD_EQ(*s, 'e')) {
+ if (UNLIKELY(isALPHA_FOLD_EQ(*s, 'e'))) {
s++;
if (s < send && (*s == '-' || *s == '+'))
s++;
numtype |= IS_NUMBER_NOT_INT;
}
}
- while (s < send && isSPACE(*s))
+
+ while (s < send) {
+ if (LIKELY(! isSPACE(*s))) goto end_space;
s++;
- if (s >= send)
- return numtype;
- if (len == 10 && memEQ(pv, "0 but true", 10)) {
+ }
+ return numtype;
+
+ end_space:
+
+ if (UNLIKELY(memEQs(pv, len, "0 but true"))) {
if (valuep)
*valuep = 0;
return IS_NUMBER_IN_UV;
}
+
/* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
- if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
+ if ((s + 2 < send) && UNLIKELY(memCHRs("inqs#", toFOLD(*s)))) {
/* Really detect inf/nan. Start at d, not s, since the above
* code might have already consumed the "1." or "1". */
- int infnan = Perl_grok_infnan(aTHX_ &d, send);
+ const int infnan = Perl_grok_infnan(aTHX_ &d, send);
if ((infnan & IS_NUMBER_INFINITY)) {
return (numtype | infnan); /* Keep sign for infinity. */
}
}
/*
-=for apidoc grok_atou
-
-grok_atou is a safer replacement for atoi and strtol.
+=for apidoc grok_atoUV
-grok_atou parses a C-style zero-byte terminated string, looking for
-a decimal unsigned integer.
+parse a string, looking for a decimal unsigned integer.
-Returns the unsigned integer, if a valid value can be parsed
-from the beginning of the string.
+On entry, C<pv> points to the beginning of the string;
+C<valptr> points to a UV that will receive the converted value, if found;
+C<endptr> is either NULL or points to a variable that points to one byte
+beyond the point in C<pv> that this routine should examine.
+If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
-Accepts only the decimal digits '0'..'9'.
+Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
+no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that
+value.
-As opposed to atoi or strtol, grok_atou does NOT allow optional
-leading whitespace, or negative inputs. If such features are
-required, the calling code needs to explicitly implement those.
+If you constrain the portion of C<pv> that is looked at by this function (by
+passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
+valid value, it will return TRUE, setting C<*endptr> to the byte following the
+final digit of the value. But if there is no constraint at what's looked at,
+all of C<pv> must be valid in order for TRUE to be returned. C<*endptr> is
+unchanged from its value on input if FALSE is returned;
-If a valid value cannot be parsed, returns either zero (if non-digits
-are met before any digits) or UV_MAX (if the value overflows).
+The only characters this accepts are the decimal digits '0'..'9'.
-Note that extraneous leading zeros also count as an overflow
-(meaning that only "0" is the zero).
+As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
+leading whitespace, nor negative inputs. If such features are required, the
+calling code needs to explicitly implement those.
-On failure, the *endptr is also set to NULL, unless endptr is NULL.
+Note that this function returns FALSE for inputs that would overflow a UV,
+or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor
+C<01>, C<002>, I<etc>.
-Trailing non-digit bytes are allowed if the endptr is non-NULL.
-On return the *endptr will contain the pointer to the first non-digit byte.
-
-If the endptr is NULL, the first non-digit byte MUST be
-the zero byte terminating the pv, or zero will be returned.
-
-Background: atoi has severe problems with illegal inputs, it cannot be
+Background: C<atoi> has severe problems with illegal inputs, it cannot be
used for incremental parsing, and therefore should be avoided
-atoi and strtol are also affected by locale settings, which can also be
+C<atoi> and C<strtol> are also affected by locale settings, which can also be
seen as a bug (global state controlled by user environment).
=cut
+
*/
-UV
-Perl_grok_atou(const char *pv, const char** endptr)
+bool
+Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
{
const char* s = pv;
const char** eptr;
const char* end2; /* Used in case endptr is NULL. */
- UV val = 0; /* The return value. */
-
- PERL_ARGS_ASSERT_GROK_ATOU;
-
- eptr = endptr ? endptr : &end2;
- if (isDIGIT(*s)) {
- /* Single-digit inputs are quite common. */
- val = *s++ - '0';
- if (isDIGIT(*s)) {
- /* Extra leading zeros cause overflow. */
- if (val == 0) {
- *eptr = NULL;
- return UV_MAX;
- }
- while (isDIGIT(*s)) {
- /* This could be unrolled like in grok_number(), but
- * the expected uses of this are not speed-needy, and
- * unlikely to need full 64-bitness. */
- U8 digit = *s++ - '0';
- if (val < uv_max_div_10 ||
- (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
- val = val * 10 + digit;
- } else {
- *eptr = NULL;
- return UV_MAX;
- }
+ UV val = 0; /* The parsed value. */
+
+ PERL_ARGS_ASSERT_GROK_ATOUV;
+
+ if (endptr) {
+ eptr = endptr;
+ }
+ else {
+ end2 = s + strlen(s);
+ eptr = &end2;
+ }
+
+ if ( *eptr <= s
+ || ! isDIGIT(*s))
+ {
+ return FALSE;
+ }
+
+ /* Single-digit inputs are quite common. */
+ val = *s++ - '0';
+ if (s < *eptr && isDIGIT(*s)) {
+ /* Fail on extra leading zeros. */
+ if (val == 0)
+ return FALSE;
+ while (s < *eptr && isDIGIT(*s)) {
+ /* This could be unrolled like in grok_number(), but
+ * the expected uses of this are not speed-needy, and
+ * unlikely to need full 64-bitness. */
+ const U8 digit = *s++ - '0';
+ if (val < uv_max_div_10 ||
+ (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
+ val = val * 10 + digit;
+ } else {
+ return FALSE;
}
}
}
- if (s == pv) {
- *eptr = NULL; /* If no progress, failed to parse anything. */
- return 0;
+
+ if (endptr == NULL) {
+ if (*s) {
+ return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
+ }
}
- if (endptr == NULL && *s) {
- return 0; /* If endptr is NULL, no trailing non-digits allowed. */
+ else {
+ *endptr = s;
}
- *eptr = s;
- return val;
+
+ *valptr = val;
+ return TRUE;
}
-#ifndef USE_QUADMATH
+#ifndef Perl_strtod
STATIC NV
S_mulexp10(NV value, I32 exponent)
{
/* On OpenVMS VAX we by default use the D_FLOAT double format,
* and that format does not have *easy* capabilities [1] for
- * overflowing doubles 'silently' as IEEE fp does. We also need
- * to support G_FLOAT on both VAX and Alpha, and though the exponent
- * range is much larger than D_FLOAT it still doesn't do silent
- * overflow. Therefore we need to detect early whether we would
- * overflow (this is the behaviour of the native string-to-float
+ * overflowing doubles 'silently' as IEEE fp does. We also need
+ * to support G_FLOAT on both VAX and Alpha, and though the exponent
+ * range is much larger than D_FLOAT it still doesn't do silent
+ * overflow. Therefore we need to detect early whether we would
+ * overflow (this is the behaviour of the native string-to-float
* conversion routines, and therefore of native applications, too).
*
* [1] Trying to establish a condition handler to trap floating point
* a hammer. Therefore we need to catch potential overflows before
* it's too late. */
-#if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
+#if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
STMT_START {
const NV exp_v = log10(value);
if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
result *= power;
#ifdef FP_OVERFLOWS_TO_ZERO
if (result == 0)
+# ifdef NV_INF
return value < 0 ? -NV_INF : NV_INF;
+# else
+ return value < 0 ? -FLT_MAX : FLT_MAX;
+# endif
#endif
/* Floating point exceptions are supposed to be turned off,
- * but if we're obviously done, don't risk another iteration.
+ * but if we're obviously done, don't risk another iteration.
*/
if (exponent == 0) break;
}
}
return negative ? value / result : value * result;
}
-#endif /* #ifndef USE_QUADMATH */
+#endif /* #ifndef Perl_strtod */
+
+#ifdef Perl_strtod
+# define ATOF(s, x) my_atof2(s, &x)
+#else
+# define ATOF(s, x) Perl_atof2(s, x)
+#endif
NV
Perl_my_atof(pTHX_ const char* s)
{
+
+/*
+=for apidoc my_atof
+
+L<C<atof>(3)>, but properly works with Perl locale handling, accepting a dot
+radix character always, but also the current locale's radix character if and
+only if called from within the lexical scope of a Perl C<use locale> statement.
+
+N.B. C<s> must be NUL terminated.
+
+=cut
+*/
+
NV x = 0.0;
-#ifdef USE_QUADMATH
- Perl_my_atof2(aTHX_ s, &x);
- return x;
-#else
-# ifdef USE_LOCALE_NUMERIC
+
PERL_ARGS_ASSERT_MY_ATOF;
+#if ! defined(USE_LOCALE_NUMERIC)
+
+ ATOF(s, x);
+
+#else
+
{
- DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
- if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
- const char *standard = NULL, *local = NULL;
- bool use_standard_radix;
+ DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
+ STORE_LC_NUMERIC_SET_TO_NEEDED();
+ if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
+ ATOF(s,x);
+ }
+ else {
/* Look through the string for the first thing that looks like a
* decimal point: either the value in the current locale or the
* that we have to determine this beforehand because on some
* systems, Perl_atof2 is just a wrapper around the system's atof.
* */
- standard = strchr(s, '.');
- local = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
+ const char * const standard_pos = strchr(s, '.');
+ const char * const local_pos
+ = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
+ const bool use_standard_radix
+ = standard_pos && (!local_pos || standard_pos < local_pos);
- use_standard_radix = standard && (!local || standard < local);
-
- if (use_standard_radix)
+ if (use_standard_radix) {
SET_NUMERIC_STANDARD();
+ LOCK_LC_NUMERIC_STANDARD();
+ }
- Perl_atof2(s, x);
+ ATOF(s,x);
- if (use_standard_radix)
- SET_NUMERIC_LOCAL();
+ if (use_standard_radix) {
+ UNLOCK_LC_NUMERIC_STANDARD();
+ SET_NUMERIC_UNDERLYING();
+ }
}
- else
- Perl_atof2(s, x);
RESTORE_LC_NUMERIC();
}
-# else
- Perl_atof2(s, x);
-# endif
+
#endif
+
return x;
}
+#if defined(NV_INF) || defined(NV_NAN)
-#ifdef USING_MSVC6
-# pragma warning(push)
-# pragma warning(disable:4756;disable:4056)
-#endif
static char*
S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
{
const char *p0 = negative ? s - 1 : s;
const char *p = p0;
- int infnan = grok_infnan(&p, send);
+ const int infnan = grok_infnan(&p, send);
if (infnan && p != p0) {
/* If we can generate inf/nan directly, let's do so. */
#ifdef NV_INF
/* If still here, we didn't have either NV_INF or NV_NAN,
* and can try falling back to native strtod/strtold.
*
- * (Though, are our NV_INF or NV_NAN ever not defined?)
- *
* The native interface might not recognize all the possible
* inf/nan strings Perl recognizes. What we can try
* is to try faking the input. We will try inf/-inf/nan
* as the most promising/portable input. */
{
- const char* fake = NULL;
+ const char* fake = "silence compiler warning";
char* endp;
NV nv;
+#ifdef NV_INF
if ((infnan & IS_NUMBER_INFINITY)) {
fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
}
- else if ((infnan & IS_NUMBER_NAN)) {
+#endif
+#ifdef NV_NAN
+ if ((infnan & IS_NUMBER_NAN)) {
fake = "nan";
}
- assert(fake);
- nv = Perl_strtod(fake, &endp);
+#endif
+ assert(strNE(fake, "silence compiler warning"));
+ nv = S_strtod(aTHX_ fake, &endp);
if (fake != endp) {
+#ifdef NV_INF
if ((infnan & IS_NUMBER_INFINITY)) {
-#ifdef Perl_isinf
+# ifdef Perl_isinf
if (Perl_isinf(nv))
*value = nv;
-#else
+# else
/* last resort, may generate SIGFPE */
*value = Perl_exp((NV)1e9);
if ((infnan & IS_NUMBER_NEG))
*value = -*value;
-#endif
+# endif
return (char*)p; /* p, not endp */
}
- else if ((infnan & IS_NUMBER_NAN)) {
-#ifdef Perl_isnan
+#endif
+#ifdef NV_NAN
+ if ((infnan & IS_NUMBER_NAN)) {
+# ifdef Perl_isnan
if (Perl_isnan(nv))
*value = nv;
-#else
+# else
/* last resort, may generate SIGFPE */
*value = Perl_log((NV)-1.0);
-#endif
+# endif
return (char*)p; /* p, not endp */
+#endif
}
}
}
}
return NULL;
}
-#ifdef USING_MSVC6
-# pragma warning(pop)
-#endif
+
+#endif /* if defined(NV_INF) || defined(NV_NAN) */
char*
Perl_my_atof2(pTHX_ const char* orig, NV* value)
{
+ PERL_ARGS_ASSERT_MY_ATOF2;
+ return my_atof3(orig, value, 0);
+}
+
+char*
+Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
+{
const char* s = orig;
NV result[3] = {0.0, 0.0, 0.0};
-#if defined(USE_PERL_ATOF) || defined(USE_QUADMATH)
- const char* send = s + strlen(orig); /* one past the last */
+#if defined(USE_PERL_ATOF) || defined(Perl_strtod)
+ const char* send = s + ((len != 0)
+ ? len
+ : strlen(orig)); /* one past the last */
bool negative = 0;
#endif
-#if defined(USE_PERL_ATOF) && !defined(USE_QUADMATH)
+#if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
UV accumulator[2] = {0,0}; /* before/after dp */
bool seen_digit = 0;
I32 exp_adjust[2] = {0,0};
I32 sig_digits = 0; /* noof significant digits seen so far */
#endif
-#if defined(USE_PERL_ATOF) || defined(USE_QUADMATH)
- PERL_ARGS_ASSERT_MY_ATOF2;
+#if defined(USE_PERL_ATOF) || defined(Perl_strtod)
+ PERL_ARGS_ASSERT_MY_ATOF3;
/* leading whitespace */
- while (isSPACE(*s))
+ while (s < send && isSPACE(*s))
++s;
/* sign */
}
#endif
-#ifdef USE_QUADMATH
+#ifdef Perl_strtod
{
char* endp;
- if ((endp = S_my_atof_infnan(s, negative, send, value)))
+ char* copy = NULL;
+
+ if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
return endp;
- result[2] = strtoflt128(s, &endp);
+
+ /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
+ 0b-prefixed binary numbers, which is backward incompatible
+ */
+ if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
+ (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
+ *value = 0;
+ return (char *)s+1;
+ }
+
+ /* If the length is passed in, the input string isn't NUL-terminated,
+ * and in it turns out the function below assumes it is; therefore we
+ * create a copy and NUL-terminate that */
+ if (len) {
+ Newx(copy, len + 1, char);
+ Copy(orig, copy, len, char);
+ copy[len] = '\0';
+ s = copy + (s - orig);
+ }
+
+ result[2] = S_strtod(aTHX_ s, &endp);
+
+ /* If we created a copy, 'endp' is in terms of that. Convert back to
+ * the original */
+ if (copy) {
+ s = (s - copy) + (char *) orig;
+ endp = (endp - copy) + (char *) orig;
+ Safefree(copy);
+ }
+
if (s != endp) {
*value = negative ? -result[2] : result[2];
return endp;
/* the max number we can accumulate in a UV, and still safely do 10*N+9 */
#define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
+#if defined(NV_INF) || defined(NV_NAN)
{
- const char* endp;
+ char* endp;
if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
- return (char*)endp;
+ return endp;
}
+#endif
/* we accumulate digits into an integer; when this becomes too
* large, we add the total to NV and start again */
- while (1) {
+ while (s < send) {
if (isDIGIT(*s)) {
seen_digit = 1;
old_digit = digit;
exp_adjust[0]++;
}
/* skip remaining digits */
- while (isDIGIT(*s)) {
+ while (s < send && isDIGIT(*s)) {
++s;
if (! seen_dp) {
exp_adjust[0]++;
else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
seen_dp = 1;
if (sig_digits > MAX_SIG_DIGITS) {
- do {
+ while (s < send && isDIGIT(*s)) {
++s;
- } while (isDIGIT(*s));
+ }
break;
}
}
result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
}
- if (seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
+ if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
bool expnegative = 0;
++s;
case '+':
++s;
}
- while (isDIGIT(*s))
+ while (s < send && isDIGIT(*s))
exponent = exponent * 10 + (*s++ - '0');
if (expnegative)
exponent = -exponent;
}
-
-
/* now apply the exponent */
if (seen_dp) {
/* now apply the sign */
if (negative)
result[2] = -result[2];
-#endif /* USE_PERL_ATOF */
*value = result[2];
return (char *)s;
+#else /* USE_PERL_ATOF */
+ /* If you see this error you both don't have strtod (or configured -Ud_strtod or
+ or it's long double/quadmath equivalent) and disabled USE_PERL_ATOF, thus
+ removing any way for perl to convert strings to floating point numbers.
+ */
+# error No mechanism to convert strings to numbers available
+#endif
}
/*
=for apidoc isinfnan
-Perl_isinfnan() is utility function that returns true if the NV
-argument is either an infinity or a NaN, false otherwise. To test
-in more detail, use Perl_isinf() and Perl_isnan().
+C<Perl_isinfnan()> is a utility function that returns true if the NV
+argument is either an infinity or a C<NaN>, false otherwise. To test
+in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
This is also the logical inverse of Perl_isfinite().
bool
Perl_isinfnan(NV nv)
{
+ PERL_UNUSED_ARG(nv);
#ifdef Perl_isinf
if (Perl_isinf(nv))
return TRUE;
}
/*
-=for apidoc
+=for apidoc isinfnansv
-Checks whether the argument would be either an infinity or NaN when used
+Checks whether the argument would be either an infinity or C<NaN> when used
as a number, but is careful not to trigger non-numeric or uninitialized
-warnings. it assumes the caller has done SvGETMAGIC(sv) already.
+warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already.
=cut
*/
=for apidoc Perl_signbit
Return a non-zero integer if the sign bit on an NV is set, and 0 if
-it is not.
+it is not.
-If Configure detects this system has a signbit() that will work with
-our NVs, then we just use it via the #define in perl.h. Otherwise,
+If F<Configure> detects this system has a C<signbit()> that will work with
+our NVs, then we just use it via the C<#define> in F<perl.h>. Otherwise,
fall back on this implementation. The main use of this function
-is catching -0.0.
+is catching C<-0.0>.
-Configure notes: This function is called 'Perl_signbit' instead of a
-plain 'signbit' because it is easy to imagine a system having a signbit()
+C<Configure> notes: This function is called C<'Perl_signbit'> instead of a
+plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
function or macro that doesn't happen to work with our particular choice
-of NVs. We shouldn't just re-#define signbit as Perl_signbit and expect
+of NVs. We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
the standard system headers to be happy. Also, this is a no-context
-function (no pTHX_) because Perl_signbit() is usually re-#defined in
-perl.h as a simple macro call to the system's signbit().
-Users should just always call Perl_signbit().
+function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
+F<perl.h> as a simple macro call to the system's C<signbit()>.
+Users should just always call C<Perl_signbit()>.
=cut
*/
int
Perl_signbit(NV x) {
# ifdef Perl_fp_class_nzero
- if (x == 0)
- return Perl_fp_class_nzero(x);
-# endif
+ return Perl_fp_class_nzero(x);
+ /* Try finding the high byte, and assume it's highest bit
+ * is the sign. This assumption is probably wrong somewhere. */
+# elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
+ return (((unsigned char *)&x)[9] & 0x80);
+# elif defined(NV_LITTLE_ENDIAN)
+ /* Note that NVSIZE is sizeof(NV), which would make the below be
+ * wrong if the end bytes are unused, which happens with the x86
+ * 80-bit long doubles, which is why take care of that above. */
+ return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
+# elif defined(NV_BIG_ENDIAN)
+ return (((unsigned char *)&x)[0] & 0x80);
+# else
+ /* This last resort fallback is wrong for the negative zero. */
return (x < 0.0) ? 1 : 0;
+# endif
}
#endif
/*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
* ex: set ts=8 sts=4 sw=4 et:
*/