*/
/*
-=head1 Numeric functions
-
-=cut
This file contains all the stuff needed by perl for manipulating numeric
values, including such things as replacements for the OS's atof() function
converts a string representing a binary number to numeric form.
-On entry C<start> and C<*len> give the string to scan, C<*flags> gives
-conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
-invalid character will also trigger a warning.
-On return C<*len> is set to the length of the scanned string,
-and C<*flags> gives output flags.
+On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
+scan stops at the end of the string, or at just before the first invalid
+character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning. On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin>
returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to C<*result> (or the value is discarded if C<result>
-is NULL).
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
-number may use C<"_"> characters to separate digits.
+C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
=for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
=for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
=for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
=for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
-=for apidoc Amnh||PERL_SCAN_TRAILING
=cut
UV
Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
- const char *s = start;
- STRLEN len = *len_p;
- UV value = 0;
- NV value_nv = 0;
-
- const UV max_div_2 = UV_MAX / 2;
- const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
- bool overflowed = FALSE;
- char bit;
-
PERL_ARGS_ASSERT_GROK_BIN;
- if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
- /* strip off leading b or 0b.
- for compatibility silently suffer "b" and "0b" as valid binary
- numbers. */
- if (len >= 1) {
- if (isALPHA_FOLD_EQ(s[0], 'b')) {
- s++;
- len--;
- }
- else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) {
- s+=2;
- len-=2;
- }
- }
- }
-
- for (; len-- && (bit = *s); s++) {
- if (bit == '0' || bit == '1') {
- /* Write it in this wonky order with a goto to attempt to get the
- compiler to make the common case integer-only loop pretty tight.
- With gcc seems to be much straighter code than old scan_bin. */
- redo:
- if (!overflowed) {
- if (value <= max_div_2) {
- value = (value << 1) | (bit - '0');
- continue;
- }
- /* Bah. We're just overflowed. */
- /* diag_listed_as: Integer overflow in %s number */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in binary number");
- overflowed = TRUE;
- value_nv = (NV) value;
- }
- value_nv *= 2.0;
- /* If an NV has not enough bits in its mantissa to
- * represent a UV this summing of small low-order numbers
- * is a waste of time (because the NV cannot preserve
- * the low-order bits anyway): we could just remember when
- * did we overflow and in the end just multiply value_nv by the
- * right amount. */
- value_nv += (NV)(bit - '0');
- continue;
- }
- if (bit == '_' && len && allow_underscores && (bit = s[1])
- && (bit == '0' || bit == '1'))
- {
- --len;
- ++s;
- goto redo;
- }
- if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
- Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
- "Illegal binary digit '%c' ignored", *s);
- break;
- }
-
- if ( ( overflowed && value_nv > 4294967295.0)
-#if UVSIZE > 4
- || (!overflowed && value > 0xffffffff
- && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
-#endif
- ) {
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "Binary number > 0b11111111111111111111111111111111 non-portable");
- }
- *len_p = s - start;
- if (!overflowed) {
- *flags = 0;
- return value;
- }
- *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
- if (result)
- *result = value_nv;
- return UV_MAX;
+ return grok_bin(start, len_p, flags, result);
}
/*
converts a string representing a hex number to numeric form.
On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
-conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
-invalid character will also trigger a warning.
-On return C<*len> is set to the length of the scanned string,
-and C<*flags> gives output flags.
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
+scan stops at the end of the string, or at just before the first invalid
+character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning. On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex>
returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to C<*result> (or the value is discarded if C<result>
-is C<NULL>).
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If
-C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
-number may use C<"_"> characters to separate digits.
+C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
=cut
-Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
+Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
which suppresses any message for non-portable numbers, but which are valid
-on this platform.
+on this platform. But, C<*flags> will have the corresponding flag bit set.
*/
UV
Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
- const char *s = start;
- STRLEN len = *len_p;
- UV value = 0;
- NV value_nv = 0;
- const UV max_div_16 = UV_MAX / 16;
- const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
- bool overflowed = FALSE;
-
PERL_ARGS_ASSERT_GROK_HEX;
- if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
- /* strip off leading x or 0x.
- for compatibility silently suffer "x" and "0x" as valid hex numbers.
- */
- if (len >= 1) {
- if (isALPHA_FOLD_EQ(s[0], 'x')) {
- s++;
- len--;
- }
- else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) {
- s+=2;
- len-=2;
- }
- }
- }
-
- for (; len-- && *s; s++) {
- if (isXDIGIT(*s)) {
- /* Write it in this wonky order with a goto to attempt to get the
- compiler to make the common case integer-only loop pretty tight.
- With gcc seems to be much straighter code than old scan_hex. */
- redo:
- if (!overflowed) {
- if (value <= max_div_16) {
- value = (value << 4) | XDIGIT_VALUE(*s);
- continue;
- }
- /* Bah. We're just overflowed. */
- /* diag_listed_as: Integer overflow in %s number */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in hexadecimal number");
- overflowed = TRUE;
- value_nv = (NV) value;
- }
- value_nv *= 16.0;
- /* If an NV has not enough bits in its mantissa to
- * represent a UV this summing of small low-order numbers
- * is a waste of time (because the NV cannot preserve
- * the low-order bits anyway): we could just remember when
- * did we overflow and in the end just multiply value_nv by the
- * right amount of 16-tuples. */
- value_nv += (NV) XDIGIT_VALUE(*s);
- continue;
- }
- if (*s == '_' && len && allow_underscores && s[1]
- && isXDIGIT(s[1]))
- {
- --len;
- ++s;
- goto redo;
- }
- if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
- Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
- "Illegal hexadecimal digit '%c' ignored", *s);
- break;
- }
-
- if ( ( overflowed && value_nv > 4294967295.0)
-#if UVSIZE > 4
- || (!overflowed && value > 0xffffffff
- && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
-#endif
- ) {
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "Hexadecimal number > 0xffffffff non-portable");
- }
- *len_p = s - start;
- if (!overflowed) {
- *flags = 0;
- return value;
- }
- *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
- if (result)
- *result = value_nv;
- return UV_MAX;
+ return grok_hex(start, len_p, flags, result);
}
/*
converts a string representing an octal number to numeric form.
-On entry C<start> and C<*len> give the string to scan, C<*flags> gives
-conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
-The scan stops at the end of the string, or the first invalid character.
-Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
-8 or 9 will also trigger a warning.
-On return C<*len> is set to the length of the scanned string,
-and C<*flags> gives output flags.
+On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
+conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
+scan stops at the end of the string, or at just before the first invalid
+character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
+encountering an invalid character (except NUL) will also trigger a warning. On
+return C<*len_p> is set to the length of the scanned string, and C<*flags>
+gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct>
returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
-and writes the value to C<*result> (or the value is discarded if C<result>
-is C<NULL>).
+and writes an approximation of the correct value into C<*result> (which is an
+NV; or the approximation is discarded if C<result> is NULL).
+
+If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
+digits may be separated from each other by a single underscore; also a single
+leading underscore is accepted.
-If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
-number may use C<"_"> characters to separate digits.
+The C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for
+this function.
=cut
UV
Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
- const char *s = start;
+ PERL_ARGS_ASSERT_GROK_OCT;
+
+ return grok_oct(start, len_p, flags, result);
+}
+
+STATIC void
+S_output_non_portable(pTHX_ const U8 base)
+{
+ /* Display the proper message for a number in the given input base not
+ * fitting in 32 bits */
+ const char * which = (base == 2)
+ ? "Binary number > 0b11111111111111111111111111111111"
+ : (base == 8)
+ ? "Octal number > 037777777777"
+ : "Hexadecimal number > 0xffffffff";
+
+ PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
+
+ /* Also there are listings for the other two. That's because, since they
+ * are the first word, it would be hard for a user to find them there
+ * starting with a %s */
+ /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
+}
+
+UV
+Perl_grok_bin_oct_hex(pTHX_ const char *start,
+ STRLEN *len_p,
+ I32 *flags,
+ NV *result,
+ const unsigned shift, /* 1 for binary; 3 for octal;
+ 4 for hex */
+ const U8 class_bit,
+ const char prefix
+ )
+
+{
+ const char *s0 = start;
+ const char *s;
STRLEN len = *len_p;
+ STRLEN bytes_so_far; /* How many real digits have been processed */
UV value = 0;
NV value_nv = 0;
- const UV max_div_8 = UV_MAX / 8;
- const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
+ const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */
+ const UV max_div= UV_MAX / base; /* Value above which, the next digit
+ processed would overflow */
+ const I32 input_flags = *flags;
+ const bool allow_underscores =
+ cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
bool overflowed = FALSE;
- PERL_ARGS_ASSERT_GROK_OCT;
+ /* In overflows, this keeps track of how much to multiply the overflowed NV
+ * by as we continue to parse the remaining digits */
+ NV factor = 0;
+
+ /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It
+ * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to
+ * find the numeric value of a digit. That requires more instructions than
+ * OCTAL_VALUE would, but gives the same result for the narrowed range of
+ * octal digits; same for binary. If it were ever critical to squeeze more
+ * performance from this, the function could become grok_hex, and a regen
+ * perl script could scan it and write out two edited copies for the other
+ * two functions. That would improve the performance of all three
+ * somewhat. Besides eliminating XDIGIT_VALUE for the other two, extra
+ * parameters are now passed to this to avoid conditionals. Those could
+ * become declared consts, like:
+ * const U8 base = 16;
+ * const U8 base = 8;
+ * ...
+ */
+
+ PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
+
+ ASSUME(inRANGE(shift, 1, 4) && shift != 2);
+
+ /* Clear output flags; unlikely to find a problem that sets them */
+ *flags = 0;
+
+ if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
+
+ /* strip off leading b or 0b; x or 0x.
+ for compatibility silently suffer "b" and "0b" as valid binary; "x"
+ and "0x" as valid hex numbers. */
+ if (len >= 1) {
+ if (isALPHA_FOLD_EQ(s0[0], prefix)) {
+ s0++;
+ len--;
+ }
+ else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
+ s0+=2;
+ len-=2;
+ }
+ }
+ }
+
+ s = s0; /* s0 potentially advanced from 'start' */
- for (; len-- && *s; s++) {
- if (isOCTAL(*s)) {
+ /* Unroll the loop so that the first 8 digits are branchless except for the
+ * switch. A ninth hex one overflows a 32 bit word. */
+ switch (len) {
+ case 0:
+ return 0;
+ default:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 7:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 6:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 5:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 4:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 3:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 2:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 1:
+ if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+
+ if (LIKELY(len <= 8)) {
+ return value;
+ }
+
+ s++;
+ break;
+ }
+
+ bytes_so_far = s - s0;
+ factor = shift << bytes_so_far;
+ len -= bytes_so_far;
+
+ for (; len--; s++) {
+ if (_generic_isCC(*s, class_bit)) {
/* Write it in this wonky order with a goto to attempt to get the
compiler to make the common case integer-only loop pretty tight.
- */
+ With gcc seems to be much straighter code than old scan_hex.
+ (khw suspects that adding a LIKELY() just above would do the
+ same thing) */
redo:
- if (!overflowed) {
- if (value <= max_div_8) {
- value = (value << 3) | OCTAL_VALUE(*s);
- continue;
- }
- /* Bah. We're just overflowed. */
- /* diag_listed_as: Integer overflow in %s number */
- Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
- "Integer overflow in octal number");
+ if (LIKELY(value <= max_div)) {
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ /* Note XDIGIT_VALUE() is branchless, works on binary
+ * and octal as well, so can be used here, without
+ * slowing those down */
+ factor *= 1 << shift;
+ continue;
+ }
+
+ /* Bah. We are about to overflow. Instead, add the unoverflowed
+ * value to an NV that contains an approximation to the correct
+ * value. Each time through the loop we have increased 'factor' so
+ * that it gives how much the current approximation needs to
+ * effectively be shifted to make room for this new value */
+ value_nv *= factor;
+ value_nv += (NV) value;
+
+ /* Then we keep accumulating digits, until all are parsed. We
+ * start over using the current input value. This will be added to
+ * 'value_nv' eventually, either when all digits are gone, or we
+ * have overflowed this fresh start. */
+ value = XDIGIT_VALUE(*s);
+ factor = 1 << shift;
+
+ if (! overflowed) {
overflowed = TRUE;
- value_nv = (NV) value;
+ if ( ! (input_flags & PERL_SCAN_SILENT_OVERFLOW)
+ && ckWARN_d(WARN_OVERFLOW))
+ {
+ Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
+ "Integer overflow in %s number",
+ (base == 16) ? "hexadecimal"
+ : (base == 2)
+ ? "binary"
+ : "octal");
+ }
}
- value_nv *= 8.0;
- /* If an NV has not enough bits in its mantissa to
- * represent a UV this summing of small low-order numbers
- * is a waste of time (because the NV cannot preserve
- * the low-order bits anyway): we could just remember when
- * did we overflow and in the end just multiply value_nv by the
- * right amount of 8-tuples. */
- value_nv += (NV) OCTAL_VALUE(*s);
continue;
}
- if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
+
+ if ( *s == '_'
+ && len
+ && allow_underscores
+ && _generic_isCC(s[1], class_bit)
+
+ /* Don't allow a leading underscore if the only-medial bit is
+ * set */
+ && ( LIKELY(s > s0)
+ || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
+ != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
+ {
--len;
++s;
goto redo;
}
- /* Allow \octal to work the DWIM way (that is, stop scanning
- * as soon as non-octal characters are seen, complain only if
- * someone seems to want to use the digits eight and nine. Since we
- * know it is not octal, then if isDIGIT, must be an 8 or 9). */
- if (isDIGIT(*s)) {
- if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
- Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
- "Illegal octal digit '%c' ignored", *s);
+
+ if (*s) {
+ if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
+ && ckWARN(WARN_DIGIT))
+ {
+ if (base != 8) {
+ Perl_warner(aTHX_ packWARN(WARN_DIGIT),
+ "Illegal %s digit '%c' ignored",
+ ((base == 2)
+ ? "binary"
+ : "hexadecimal"),
+ *s);
+ }
+ else if (isDIGIT(*s)) { /* octal base */
+
+ /* Allow \octal to work the DWIM way (that is, stop
+ * scanning as soon as non-octal characters are seen,
+ * complain only if someone seems to want to use the digits
+ * eight and nine. Since we know it is not octal, then if
+ * isDIGIT, must be an 8 or 9). */
+ Perl_warner(aTHX_ packWARN(WARN_DIGIT),
+ "Illegal octal digit '%c' ignored", *s);
+ }
+ }
+
+ if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) {
+ *flags |= PERL_SCAN_NOTIFY_ILLDIGIT;
+ }
}
+
break;
}
- if ( ( overflowed && value_nv > 4294967295.0)
+ *len_p = s - start;
+
+ if (LIKELY(! overflowed)) {
#if UVSIZE > 4
- || (!overflowed && value > 0xffffffff
- && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
+ if ( UNLIKELY(value > 0xffffffff)
+ && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
+ {
+ output_non_portable(base);
+ *flags |= PERL_SCAN_SILENT_NON_PORTABLE;
+ }
#endif
- ) {
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
- "Octal number > 037777777777 non-portable");
- }
- *len_p = s - start;
- if (!overflowed) {
- *flags = 0;
return value;
}
- *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
+
+ /* Overflowed: Calculate the final overflow approximation */
+ value_nv *= factor;
+ value_nv += (NV) value;
+
+ output_non_portable(base);
+
+ *flags |= PERL_SCAN_GREATER_THAN_UV_MAX
+ | PERL_SCAN_SILENT_NON_PORTABLE;
if (result)
*result = value_nv;
return UV_MAX;
one byte past the end of the recognized string. If the recognition fails,
zero is returned, and C<*sp> will not move.
-=for apidoc Amn|bool|IS_NUMBER_GREATER_THAN_UV_MAX
-=for apidoc Amn|bool|IS_NUMBER_INFINITY
-=for apidoc Amn|bool|IS_NUMBER_IN_UV
-=for apidoc Amn|bool|IS_NUMBER_NAN
-=for apidoc Amn|bool|IS_NUMBER_NEG
-=for apidoc Amn|bool|IS_NUMBER_NOT_INT
+=for apidoc Amnh|bool|IS_NUMBER_GREATER_THAN_UV_MAX
+=for apidoc Amnh|bool|IS_NUMBER_INFINITY
+=for apidoc Amnh|bool|IS_NUMBER_IN_UV
+=for apidoc Amnh|bool|IS_NUMBER_NAN
+=for apidoc Amnh|bool|IS_NUMBER_NEG
+=for apidoc Amnh|bool|IS_NUMBER_NOT_INT
=cut
*/
s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
s++;
} else if (odh) {
- while (*s == '0') { /* 1.#INF00 */
+ while (s < send && *s == '0') { /* 1.#INF00 */
s++;
}
}
else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
s++;
flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
- while (*s == '0') { /* 1.#IND00 */
+ while (s < send && *s == '0') { /* 1.#IND00 */
s++;
}
- if (*s) {
+ if (s < send && *s) {
flags |= IS_NUMBER_TRAILING;
}
} else
s++;
flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
+ if (s == send) {
+ return flags;
+ }
/* NaN can be followed by various stuff (NaNQ, NaNS), but
* there are also multiple different NaN values, and some
/* "nanq" or "nans" are ok, though generating
* these portably is tricky. */
s++;
+ if (s == send) {
+ return flags;
+ }
}
if (*s == '(') {
/* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
non-numeric text on an otherwise successful I<grok>, setting
C<IS_NUMBER_TRAILING> on the result.
+=for apidoc Amnh||PERL_SCAN_TRAILING
+
=for apidoc grok_number
Identical to C<grok_number_flags()> with C<flags> set to zero.
PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
- while (s < send && isSPACE(*s))
- s++;
- if (s == send) {
- return 0;
- } else if (*s == '-') {
- s++;
- numtype = IS_NUMBER_NEG;
+ if (UNLIKELY(isSPACE(*s))) {
+ s++;
+ while (s < send) {
+ if (LIKELY(! isSPACE(*s))) goto non_space;
+ s++;
+ }
+ return 0;
+ non_space: ;
}
- else if (*s == '+')
- s++;
- if (s == send)
- return 0;
+ /* See if signed. This assumes it is more likely to be unsigned, so
+ * penalizes signed by an extra conditional; rewarding unsigned by one fewer
+ * (because we detect '+' and '-' with a single test and then add a
+ * conditional to determine which) */
+ if (UNLIKELY((*s & ~('+' ^ '-')) == ('+' & '-') )) {
+
+ /* Here, on ASCII platforms, *s is one of: 0x29 = ')', 2B = '+', 2D = '-',
+ * 2F = '/'. That is, it is either a sign, or a character that doesn't
+ * belong in a number at all (unless it's a radix character in a weird
+ * locale). Given this, it's far more likely to be a minus than the
+ * others. (On EBCDIC it is one of 42, 44, 46, 48, 4A, 4C, 4E, (not 40
+ * because can't be a space) 60, 62, 64, 66, 68, 6A, 6C, 6E. Again,
+ * only potentially a weird radix character, or 4E='+', or 60='-') */
+ if (LIKELY(*s == '-')) {
+ s++;
+ numtype = IS_NUMBER_NEG;
+ }
+ else if (LIKELY(*s == '+'))
+ s++;
+ else /* Can't just return failure here, as it could be a weird radix
+ character */
+ goto done_sign;
+
+ if (UNLIKELY(s == send))
+ return 0;
+ done_sign: ;
+ }
/* The first digit (after optional sign): note that might
* also point to "infinity" or "nan", or "1.#INF". */
d = s;
/* next must be digit or the radix separator or beginning of infinity/nan */
- if (isDIGIT(*s)) {
+ if (LIKELY(isDIGIT(*s))) {
/* UVs are at least 32 bits, so the first 9 decimal digits cannot
overflow. */
- UV value = *s - '0';
- /* This construction seems to be more optimiser friendly.
- (without it gcc does the isDIGIT test and the *s - '0' separately)
- With it gcc on arm is managing 6 instructions (6 cycles) per digit.
- In theory the optimiser could deduce how far to unroll the loop
- before checking for overflow. */
- if (++s < send) {
- int digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
+ UV value = *s - '0'; /* Process this first (perhaps only) digit */
+ int digit;
+
+ s++;
+
+ switch(send - s) {
+ default: /* 8 or more remaining characters */
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 7:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 6:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 5:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
+ s++;
+ /* FALLTHROUGH */
+ case 4:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 3:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 2:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 1:
+ digit = *s - '0';
+ if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
+ value = value * 10 + digit;
+ s++;
+ /* FALLTHROUGH */
+ case 0: /* This case means the string consists of just the one
+ digit we already have processed */
+
+ /* If we got here by falling through other than the default: case, we
+ * have processed the whole string, and know it consists entirely of
+ * digits, and can't have overflowed. */
+ if (s >= send) {
+ if (valuep)
+ *valuep = value;
+ return numtype|IS_NUMBER_IN_UV;
+ }
+
+ /* Here, there are extra characters beyond the first 9 digits. Use a
+ * loop to accumulate any remaining digits, until we get a non-digit or
+ * would overflow. Note that leading zeros could cause us to get here
+ * without being close to overflowing.
+ *
+ * (The conditional 's >= send' above could be eliminated by making the
+ * default: in the switch to instead be 'case 8:', and process longer
+ * strings separately by using the loop below. This would penalize
+ * these inputs by the extra instructions needed for looping. That
+ * could be eliminated by copying the unwound code from above to handle
+ * the firt 9 digits of these. khw didn't think this saving of a
+ * single conditional was worth it.) */
+ do {
+ digit = *s - '0';
+ if (! inRANGE(digit, 0, 9)) goto mantissa_done;
+ if ( value < uv_max_div_10
+ || ( value == uv_max_div_10
+ && digit <= uv_max_mod_10))
+ {
value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
- value = value * 10 + digit;
- if (++s < send) {
- digit = *s - '0';
- if (inRANGE(digit, 0, 9)) {
- value = value * 10 + digit;
- if (++s < send) {
- /* Now got 9 digits, so need to check
- each time for overflow. */
- digit = *s - '0';
- while ( inRANGE(digit, 0, 9)
- && (value < uv_max_div_10
- || (value == uv_max_div_10
- && digit <= uv_max_mod_10))) {
- value = value * 10 + digit;
- if (++s < send)
- digit = *s - '0';
- else
- break;
- }
- if (inRANGE(digit, 0, 9)
- && (s < send)) {
- /* value overflowed.
- skip the remaining digits, don't
- worry about setting *valuep. */
- do {
- s++;
- } while (s < send && isDIGIT(*s));
- numtype |=
- IS_NUMBER_GREATER_THAN_UV_MAX;
- goto skip_value;
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
- }
+ s++;
}
- }
- }
- }
- }
+ else { /* value would overflow. skip the remaining digits, don't
+ worry about setting *valuep. */
+ do {
+ s++;
+ } while (s < send && isDIGIT(*s));
+ numtype |=
+ IS_NUMBER_GREATER_THAN_UV_MAX;
+ goto skip_value;
+ }
+ } while (s < send);
+ } /* End switch on input length */
+
+ mantissa_done:
numtype |= IS_NUMBER_IN_UV;
if (valuep)
*valuep = value;
while (s < send && isDIGIT(*s)) /* optional digits after the radix */
s++;
}
- }
+ } /* End of *s is a digit */
else if (GROK_NUMERIC_RADIX(&s, send)) {
numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
/* no digits before the radix means we need digits after it */
return 0;
}
- if (s > d && s < send) {
+ if (LIKELY(s > d) && s < send) {
/* we can have an optional exponent part */
- if (isALPHA_FOLD_EQ(*s, 'e')) {
+ if (UNLIKELY(isALPHA_FOLD_EQ(*s, 'e'))) {
s++;
if (s < send && (*s == '-' || *s == '+'))
s++;
numtype |= IS_NUMBER_NOT_INT;
}
}
- while (s < send && isSPACE(*s))
+
+ while (s < send) {
+ if (LIKELY(! isSPACE(*s))) goto end_space;
s++;
- if (s >= send)
- return numtype;
- if (memEQs(pv, len, "0 but true")) {
+ }
+ return numtype;
+
+ end_space:
+
+ if (UNLIKELY(memEQs(pv, len, "0 but true"))) {
if (valuep)
*valuep = 0;
return IS_NUMBER_IN_UV;
}
+
/* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
- if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
+ if ((s + 2 < send) && UNLIKELY(memCHRs("inqs#", toFOLD(*s)))) {
/* Really detect inf/nan. Start at d, not s, since the above
* code might have already consumed the "1." or "1". */
const int infnan = Perl_grok_infnan(aTHX_ &d, send);
passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
valid value, it will return TRUE, setting C<*endptr> to the byte following the
final digit of the value. But if there is no constraint at what's looked at,
-all of C<pv> must be valid in order for TRUE to be returned.
+all of C<pv> must be valid in order for TRUE to be returned. C<*endptr> is
+unchanged from its value on input if FALSE is returned;
The only characters this accepts are the decimal digits '0'..'9'.
NV
Perl_my_atof(pTHX_ const char* s)
{
- /* 's' must be NUL terminated */
+
+/*
+=for apidoc my_atof
+
+L<C<atof>(3)>, but properly works with Perl locale handling, accepting a dot
+radix character always, but also the current locale's radix character if and
+only if called from within the lexical scope of a Perl C<use locale> statement.
+
+N.B. C<s> must be NUL terminated.
+
+=cut
+*/
NV x = 0.0;
/* now apply the sign */
if (negative)
result[2] = -result[2];
-#endif /* USE_PERL_ATOF */
*value = result[2];
return (char *)s;
+#else /* USE_PERL_ATOF */
+ /* If you see this error you both don't have strtod (or configured -Ud_strtod or
+ or it's long double/quadmath equivalent) and disabled USE_PERL_ATOF, thus
+ removing any way for perl to convert strings to floating point numbers.
+ */
+# error No mechanism to convert strings to numbers available
+#endif
}
/*
=for apidoc isinfnan
-C<Perl_isinfnan()> is utility function that returns true if the NV
+C<Perl_isinfnan()> is a utility function that returns true if the NV
argument is either an infinity or a C<NaN>, false otherwise. To test
in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
}
/*
-=for apidoc
+=for apidoc isinfnansv
Checks whether the argument would be either an infinity or C<NaN> when used
as a number, but is careful not to trigger non-numeric or uninitialized