/*
=head1 Numeric functions
+=cut
+
This file contains all the stuff needed by perl for manipulating numeric
values, including such things as replacements for the OS's atof() function
-=cut
-
*/
#include "EXTERN.h"
#include "perl.h"
U32
-Perl_cast_ulong(pTHX_ NV f)
+Perl_cast_ulong(NV f)
{
- PERL_UNUSED_CONTEXT;
if (f < 0.0)
return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
if (f < U32_MAX_P1) {
}
I32
-Perl_cast_i32(pTHX_ NV f)
+Perl_cast_i32(NV f)
{
- PERL_UNUSED_CONTEXT;
if (f < I32_MAX_P1)
return f < I32_MIN ? I32_MIN : (I32) f;
if (f < U32_MAX_P1) {
}
IV
-Perl_cast_iv(pTHX_ NV f)
+Perl_cast_iv(NV f)
{
- PERL_UNUSED_CONTEXT;
if (f < IV_MAX_P1)
return f < IV_MIN ? IV_MIN : (IV) f;
if (f < UV_MAX_P1) {
}
UV
-Perl_cast_uv(pTHX_ NV f)
+Perl_cast_uv(NV f)
{
- PERL_UNUSED_CONTEXT;
if (f < 0.0)
return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
if (f < UV_MAX_P1) {
and I<*flags> gives output flags.
If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
-and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
+and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
and writes the value to I<*result> (or the value is discarded if I<result>
is NULL).
The binary number may optionally be prefixed with "0b" or "b" unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
+C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
number may use '_' characters to separate digits.
converts a string representing a hex number to numeric form.
-On entry I<start> and I<*len> give the string to scan, I<*flags> gives
+On entry I<start> and I<*len_p> give the string to scan, I<*flags> gives
conversion flags, and I<result> should be NULL or a pointer to an NV.
The scan stops at the end of the string, or the first invalid character.
Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
and I<*flags> gives output flags.
If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
-and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
+and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
and writes the value to I<*result> (or the value is discarded if I<result>
is NULL).
The hex number may optionally be prefixed with "0x" or "x" unless
-C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
+C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
number may use '_' characters to separate digits.
UV
Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
{
- dVAR;
const char *s = start;
STRLEN len = *len_p;
UV value = 0;
}
for (; len-- && *s; s++) {
- const char *hexdigit = strchr(PL_hexdigit, *s);
- if (hexdigit) {
+ if (isXDIGIT(*s)) {
/* Write it in this wonky order with a goto to attempt to get the
compiler to make the common case integer-only loop pretty tight.
With gcc seems to be much straighter code than old scan_hex. */
redo:
if (!overflowed) {
if (value <= max_div_16) {
- value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
+ value = (value << 4) | XDIGIT_VALUE(*s);
continue;
}
/* Bah. We're just overflowed. */
* the low-order bits anyway): we could just remember when
* did we overflow and in the end just multiply value_nv by the
* right amount of 16-tuples. */
- value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
+ value_nv += (NV) XDIGIT_VALUE(*s);
continue;
}
if (*s == '_' && len && allow_underscores && s[1]
- && (hexdigit = strchr(PL_hexdigit, s[1])))
+ && isXDIGIT(s[1]))
{
--len;
++s;
and I<*flags> gives output flags.
If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
-and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
+and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
and writes the value to I<*result> (or the value is discarded if I<result>
is NULL).
PERL_ARGS_ASSERT_GROK_OCT;
for (; len-- && *s; s++) {
- /* gcc 2.95 optimiser not smart enough to figure that this subtraction
- out front allows slicker code. */
- int digit = *s - '0';
- if (digit >= 0 && digit <= 7) {
+ if (isOCTAL(*s)) {
/* Write it in this wonky order with a goto to attempt to get the
compiler to make the common case integer-only loop pretty tight.
*/
redo:
if (!overflowed) {
if (value <= max_div_8) {
- value = (value << 3) | digit;
+ value = (value << 3) | OCTAL_VALUE(*s);
continue;
}
/* Bah. We're just overflowed. */
* the low-order bits anyway): we could just remember when
* did we overflow and in the end just multiply value_nv by the
* right amount of 8-tuples. */
- value_nv += (NV)digit;
+ value_nv += (NV) OCTAL_VALUE(*s);
continue;
}
- if (digit == ('_' - '0') && len && allow_underscores
- && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
- {
- --len;
- ++s;
- goto redo;
- }
+ if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
+ --len;
+ ++s;
+ goto redo;
+ }
/* Allow \octal to work the DWIM way (that is, stop scanning
* as soon as non-octal characters are seen, complain only if
- * someone seems to want to use the digits eight and nine). */
- if (digit == 8 || digit == 9) {
+ * someone seems to want to use the digits eight and nine. Since we
+ * know it is not octal, then if isDIGIT, must be an 8 or 9). */
+ if (isDIGIT(*s)) {
if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
"Illegal octal digit '%c' ignored", *s);
/*
=for apidoc scan_bin
-For backwards compatibility. Use C<grok_bin> instead.
+For backwards compatibility. Use C<grok_bin> instead.
=for apidoc scan_hex
-For backwards compatibility. Use C<grok_hex> instead.
+For backwards compatibility. Use C<grok_hex> instead.
=for apidoc scan_oct
-For backwards compatibility. Use C<grok_oct> instead.
+For backwards compatibility. Use C<grok_oct> instead.
=cut
*/
Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
{
#ifdef USE_LOCALE_NUMERIC
- dVAR;
-
PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
- if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
- STRLEN len;
- const char * const radix = SvPV(PL_numeric_radix_sv, len);
- if (*sp + len <= send && memEQ(*sp, radix, len)) {
- *sp += len;
- return TRUE;
+ if (IN_LC(LC_NUMERIC)) {
+ DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
+ if (PL_numeric_radix_sv) {
+ STRLEN len;
+ const char * const radix = SvPV(PL_numeric_radix_sv, len);
+ if (*sp + len <= send && memEQ(*sp, radix, len)) {
+ *sp += len;
+ RESTORE_LC_NUMERIC();
+ return TRUE;
+ }
}
+ RESTORE_LC_NUMERIC();
}
/* always try "." if numeric radix didn't match because
* we may have data from different locales mixed */
}
/*
-=for apidoc grok_number
+=for apidoc grok_number_flags
Recognise (or not) a number. The type of the number is returned
(0 if unrecognised), otherwise it is a bit-ORed combination of
IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
-If the value of the number can fit an in UV, it is returned in the *valuep
+If the value of the number can fit in a UV, it is returned in the *valuep
IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
will never be set unless *valuep is valid, but *valuep may have been assigned
to during processing even though IS_NUMBER_IN_UV is not set on return.
absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the
number is larger than a UV.
+C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
+non-numeric text on an otherwise successful I<grok>, setting
+C<IS_NUMBER_TRAILING> on the result.
+
+=for apidoc grok_number
+
+Identical to grok_number_flags() with flags set to zero.
+
=cut
*/
int
Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
{
+ PERL_ARGS_ASSERT_GROK_NUMBER;
+
+ return grok_number_flags(pv, len, valuep, 0);
+}
+
+int
+Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
+{
const char *s = pv;
const char * const send = pv + len;
const UV max_div_10 = UV_MAX / 10;
int sawinf = 0;
int sawnan = 0;
- PERL_ARGS_ASSERT_GROK_NUMBER;
+ PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
while (s < send && isSPACE(*s))
s++;
numtype = IS_NUMBER_NEG;
}
else if (*s == '+')
- s++;
+ s++;
if (s == send)
return 0;
} else if (s < send) {
/* we can have an optional exponent part */
if (*s == 'e' || *s == 'E') {
- /* The only flag we keep is sign. Blow away any "it's UV" */
- numtype &= IS_NUMBER_NEG;
- numtype |= IS_NUMBER_NOT_INT;
s++;
if (s < send && (*s == '-' || *s == '+'))
s++;
s++;
} while (s < send && isDIGIT(*s));
}
+ else if (flags & PERL_SCAN_TRAILING)
+ return numtype | IS_NUMBER_TRAILING;
else
- return 0;
+ return 0;
+
+ /* The only flag we keep is sign. Blow away any "it's UV" */
+ numtype &= IS_NUMBER_NEG;
+ numtype |= IS_NUMBER_NOT_INT;
}
}
while (s < send && isSPACE(*s))
*valuep = 0;
return IS_NUMBER_IN_UV;
}
+ else if (flags & PERL_SCAN_TRAILING) {
+ return numtype | IS_NUMBER_TRAILING;
+ }
+
return 0;
}
+/*
+=for perlapi
+
+grok_atou is a safer replacement for atoi and strtoul.
+
+atoi has severe problems with illegal inputs, cannot be used
+for incremental parsing, and therefore should be avoided.
+
+atoi and strtoul are also affected by locale settings, which can
+also be seen as a bug (global state controlled by user environment).
+
+grok_atou parses a C-style zero-byte terminated string.
+
+Returns the unsigned value, if a valid one can be parsed.
+
+Only the decimal digits '0'..'9' are accepted.
+
+As opposed to atoi or strtoul:
+- does NOT allow optional leading whitespace
+- does NOT allow negative inputs
+
+Also rejected are:
+- leading plus signs
+- leading zeros (meaning that only "0" is the zero)
+
+Trailing non-digit bytes are allowed if the endptr is non-NULL.
+On return the *endptr will contain the pointer to the first non-digit byte.
+
+If the value overflows, returns Size_t_MAX, and sets the *endptr
+to NULL, unless endptr is NULL.
+
+If the endptr is NULL, the first non-digit byte MUST be
+the zero byte terminating the pv, or zero will be returned.
+
+=cut
+*/
+
+Size_t
+Perl_grok_atou(const char *pv, const char** endptr)
+{
+ const char* s = pv;
+ const char** eptr;
+ const char* end2; /* Used in case endptr is NULL. */
+ /* With Size_t_size of 8 or 4 this works out to be the start plus
+ * either 20 or 10. When 128 or 256-bit systems became reality,
+ * this overshoots (should get 39, 78, but gets 40 and 80). */
+ const char* maxend = s + 10 * (Size_t_size / 4);
+ Size_t val = 0; /* The return value. */
+
+ PERL_ARGS_ASSERT_GROK_ATOU;
+
+ eptr = endptr ? endptr : &end2;
+ if (isDIGIT(*s) && !isDIGIT(*(s + 1))) {
+ /* Quite common cases, and in addition the case of zero ("0")
+ * simplifies the decoding loop: not having to think whether
+ * "000" or "000123" are valid (now they are invalid). */
+ val = *s++ - '0';
+ } else {
+ Size_t tmp = 0; /* Temporary accumulator. */
+
+ while (s < maxend && *s) {
+ /* This could be unrolled like in grok_number(), but
+ * the expected uses of this are not speed-needy, and
+ * unlikely to need full 64-bitness. */
+ if (isDIGIT(*s)) {
+ int digit = *s++ - '0';
+ tmp = tmp * 10 + digit;
+ if (tmp > val) { /* Rejects leading zeros. */
+ val = tmp;
+ } else { /* Overflow. */
+ *eptr = NULL;
+ return Size_t_MAX;
+ }
+ } else {
+ break;
+ }
+ }
+ if (s == pv) {
+ *eptr = NULL; /* If no progress, failed to parse anything. */
+ return 0;
+ }
+ }
+ if (endptr == NULL && *s) {
+ return 0; /* If endptr is NULL, no trailing non-digits allowed. */
+ }
+ *eptr = s;
+ return val;
+}
+
STATIC NV
S_mulexp10(NV value, I32 exponent)
{
* a hammer. Therefore we need to catch potential overflows before
* it's too late. */
-#if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
+#if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
STMT_START {
const NV exp_v = log10(value);
if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
if (exponent < 0) {
negative = 1;
exponent = -exponent;
+#ifdef NV_MAX_10_EXP
+ /* for something like 1234 x 10^-309, the action of calculating
+ * the intermediate value 10^309 then returning 1234 / (10^309)
+ * will fail, since 10^309 becomes infinity. In this case try to
+ * refactor it as 123 / (10^308) etc.
+ */
+ while (value && exponent > NV_MAX_10_EXP) {
+ exponent--;
+ value /= 10;
+ }
+#endif
}
for (bit = 1; exponent; bit <<= 1) {
if (exponent & bit) {
{
NV x = 0.0;
#ifdef USE_LOCALE_NUMERIC
- dVAR;
-
PERL_ARGS_ASSERT_MY_ATOF;
- if (PL_numeric_local && PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
- const char *standard = NULL, *local = NULL;
- bool use_standard_radix;
+ {
+ DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
+ if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) {
+ const char *standard = NULL, *local = NULL;
+ bool use_standard_radix;
- /* Look through the string for the first thing that looks like a
- * decimal point: either the value in the current locale or the
- * standard fallback of '.'. The one which appears earliest in the
- * input string is the one that we should have atof look for. Note that
- * we have to determine this beforehand because on some systems,
- * Perl_atof2 is just a wrapper around the system's atof. */
- standard = strchr(s, '.');
- local = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
+ /* Look through the string for the first thing that looks like a
+ * decimal point: either the value in the current locale or the
+ * standard fallback of '.'. The one which appears earliest in the
+ * input string is the one that we should have atof look for. Note
+ * that we have to determine this beforehand because on some
+ * systems, Perl_atof2 is just a wrapper around the system's atof.
+ * */
+ standard = strchr(s, '.');
+ local = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
- use_standard_radix = standard && (!local || standard < local);
+ use_standard_radix = standard && (!local || standard < local);
- if (use_standard_radix)
- SET_NUMERIC_STANDARD();
+ if (use_standard_radix)
+ SET_NUMERIC_STANDARD();
- Perl_atof2(s, x);
+ Perl_atof2(s, x);
- if (use_standard_radix)
- SET_NUMERIC_LOCAL();
+ if (use_standard_radix)
+ SET_NUMERIC_LOCAL();
+ }
+ else
+ Perl_atof2(s, x);
+ RESTORE_LC_NUMERIC();
}
- else
- Perl_atof2(s, x);
#else
Perl_atof2(s, x);
#endif
switch (*s) {
case '-':
negative = 1;
- /* fall through */
+ /* FALLTHROUGH */
case '+':
++s;
}
switch (*s) {
case '-':
expnegative = 1;
- /* fall through */
+ /* FALLTHROUGH */
case '+':
++s;
}