3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "That only makes eleven (plus one mislaid) and not fourteen,
13 * unless wizards count differently to other people." --Beorn
15 * [p.115 of _The Hobbit_: "Queer Lodgings"]
19 =head1 Numeric functions
23 This file contains all the stuff needed by perl for manipulating numeric
24 values, including such things as replacements for the OS's atof() function
29 #define PERL_IN_NUMERIC_C
35 S_strtod(pTHX_ const char * const s, char ** e)
37 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
40 STORE_LC_NUMERIC_SET_TO_NEEDED();
44 result = strtoflt128(s, e);
46 # elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \
47 && defined(USE_LONG_DOUBLE)
48 # if defined(__MINGW64_VERSION_MAJOR)
49 /***********************************************
50 We are unable to use strtold because of
51 https://sourceforge.net/p/mingw-w64/bugs/711/
53 https://sourceforge.net/p/mingw-w64/bugs/725/
55 but __mingw_strtold is fine.
56 ***********************************************/
58 result = __mingw_strtold(s, e);
62 result = strtold(s, e);
65 # elif defined(HAS_STRTOD)
67 result = strtod(s, e);
70 # error No strtod() equivalent found
78 #endif /* #ifdef Perl_strtod */
84 This function is equivalent to the libc strtod() function, and is available
85 even on platforms that lack plain strtod(). Its return value is the best
86 available precision depending on platform capabilities and F<Configure>
89 It properly handles the locale radix character, meaning it expects a dot except
90 when called from within the scope of S<C<use locale>>, in which case the radix
91 character should be that specified by the current locale.
93 The synonym Strtod() may be used instead.
100 Perl_my_strtod(const char * const s, char **e)
104 PERL_ARGS_ASSERT_MY_STRTOD;
108 return S_strtod(aTHX_ s, e);
114 char ** end_ptr = NULL;
116 *end_ptr = my_atof2(s, &result);
134 Perl_cast_ulong(NV f)
137 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
138 if (f < U32_MAX_P1) {
140 if (f < U32_MAX_P1_HALF)
142 f -= U32_MAX_P1_HALF;
143 return ((U32) f) | (1 + (U32_MAX >> 1));
148 return f > 0 ? U32_MAX : 0 /* NaN */;
155 return f < I32_MIN ? I32_MIN : (I32) f;
156 if (f < U32_MAX_P1) {
158 if (f < U32_MAX_P1_HALF)
160 f -= U32_MAX_P1_HALF;
161 return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
166 return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
173 return f < IV_MIN ? IV_MIN : (IV) f;
176 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
177 if (f < UV_MAX_P1_HALF)
180 return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
185 return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
192 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
195 if (f < UV_MAX_P1_HALF)
198 return ((UV) f) | (1 + (UV_MAX >> 1));
203 return f > 0 ? UV_MAX : 0 /* NaN */;
209 converts a string representing a binary number to numeric form.
211 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
212 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
213 The scan stops at the end of the string, or the first invalid character.
214 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
215 invalid character will also trigger a warning.
216 On return C<*len> is set to the length of the scanned string,
217 and C<*flags> gives output flags.
219 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
220 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin>
221 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
222 and writes the value to C<*result> (or the value is discarded if C<result>
225 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
226 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If
227 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
228 number may use C<"_"> characters to separate digits.
230 =for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
231 =for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
232 =for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
233 =for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
234 =for apidoc Amnh||PERL_SCAN_TRAILING
238 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
239 which suppresses any message for non-portable numbers that are still valid
244 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
246 PERL_ARGS_ASSERT_GROK_BIN;
248 return grok_bin(start, len_p, flags, result);
254 converts a string representing a hex number to numeric form.
256 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
257 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
258 The scan stops at the end of the string, or the first invalid character.
259 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
260 invalid character will also trigger a warning.
261 On return C<*len> is set to the length of the scanned string,
262 and C<*flags> gives output flags.
264 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
265 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex>
266 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
267 and writes the value to C<*result> (or the value is discarded if C<result>
270 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
271 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry. If
272 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
273 number may use C<"_"> characters to separate digits.
277 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
278 which suppresses any message for non-portable numbers, but which are valid
283 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
285 PERL_ARGS_ASSERT_GROK_HEX;
287 return grok_hex(start, len_p, flags, result);
291 Perl_grok_bin_oct_hex(pTHX_ const char *start,
295 const unsigned shift) /* 1 for binary; 3 for octal;
298 const char *s = start;
302 const PERL_UINT_FAST8_T base = 1 << shift;
303 const UV max_div= UV_MAX / base;
304 const PERL_UINT_FAST8_T class_bit = (base == 2)
309 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
310 bool overflowed = FALSE;
312 PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
314 ASSUME(inRANGE(shift, 1, 4) && shift != 2);
316 if (base != 8 && !(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
317 const char prefix = base == 2 ? 'b' : 'x';
319 /* strip off leading b or 0b; x or 0x.
320 for compatibility silently suffer "b" and "0b" as valid binary; "x"
321 and "0x" as valid hex numbers. */
323 if (isALPHA_FOLD_EQ(s[0], prefix)) {
327 else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], prefix))) {
334 for (; len-- && *s; s++) {
335 if (_generic_isCC(*s, class_bit)) {
336 /* Write it in this wonky order with a goto to attempt to get the
337 compiler to make the common case integer-only loop pretty tight.
338 With gcc seems to be much straighter code than old scan_hex. */
341 if (value <= max_div) {
342 value = (value << shift) | XDIGIT_VALUE(*s);
343 /* Note XDIGIT_VALUE() is branchless, works on binary
344 * and octal as well, so can be used here, without
345 * slowing those down */
348 /* Bah. We're just overflowed. */
349 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
350 "Integer overflow in %s number",
351 (base == 16) ? "hexadecimal"
356 value_nv = (NV) value;
359 /* If an NV has not enough bits in its mantissa to
360 * represent a UV this summing of small low-order numbers
361 * is a waste of time (because the NV cannot preserve
362 * the low-order bits anyway): we could just remember when
363 * did we overflow and in the end just multiply value_nv by the
364 * right amount of base-tuples. */
365 value_nv += (NV) XDIGIT_VALUE(*s);
371 && _generic_isCC(s[1], class_bit))
377 if ( ! (*flags & PERL_SCAN_SILENT_ILLDIGIT)
378 && ckWARN(WARN_DIGIT))
381 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
382 "Illegal %s digit '%c' ignored",
388 else if (isDIGIT(*s)) { /* octal base */
390 /* Allow \octal to work the DWIM way (that is, stop scanning as
391 * soon as non-octal characters are seen, complain only if
392 * someone seems to want to use the digits eight and nine.
393 * Since we know it is not octal, then if isDIGIT, must be an 8
395 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
396 "Illegal octal digit '%c' ignored", *s);
402 if ( ( overflowed && value_nv > 4294967295.0)
404 || ( ! overflowed && value > 0xffffffff
405 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
408 const char * which = (base == 2)
409 ? "Binary number > 0b11111111111111111111111111111111"
411 ? "Octal number > 037777777777"
412 : "Hexadecimal number > 0xffffffff";
413 /* Also there are listings for the other two. Since they are the first
414 * word, it would be hard for a user to find them there starting with a
416 /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
417 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
425 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
434 converts a string representing an octal number to numeric form.
436 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
437 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
438 The scan stops at the end of the string, or the first invalid character.
439 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
440 8 or 9 will also trigger a warning.
441 On return C<*len> is set to the length of the scanned string,
442 and C<*flags> gives output flags.
444 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
445 and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct>
446 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
447 and writes the value to C<*result> (or the value is discarded if C<result>
450 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
451 number may use C<"_"> characters to separate digits.
455 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
456 which suppresses any message for non-portable numbers, but which are valid
461 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
463 PERL_ARGS_ASSERT_GROK_OCT;
465 return grok_oct(start, len_p, flags, result);
471 For backwards compatibility. Use C<grok_bin> instead.
475 For backwards compatibility. Use C<grok_hex> instead.
479 For backwards compatibility. Use C<grok_oct> instead.
485 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
488 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
489 const UV ruv = grok_bin (start, &len, &flags, &rnv);
491 PERL_ARGS_ASSERT_SCAN_BIN;
494 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
498 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
501 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
502 const UV ruv = grok_oct (start, &len, &flags, &rnv);
504 PERL_ARGS_ASSERT_SCAN_OCT;
507 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
511 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
514 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
515 const UV ruv = grok_hex (start, &len, &flags, &rnv);
517 PERL_ARGS_ASSERT_SCAN_HEX;
520 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
524 =for apidoc grok_numeric_radix
526 Scan and skip for a numeric decimal separator (radix).
531 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
533 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
535 #ifdef USE_LOCALE_NUMERIC
537 if (IN_LC(LC_NUMERIC)) {
540 bool matches_radix = FALSE;
541 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
543 STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
545 radix = SvPV(PL_numeric_radix_sv, len);
546 radix = savepvn(radix, len);
548 RESTORE_LC_NUMERIC();
550 if (*sp + len <= send) {
551 matches_radix = memEQ(*sp, radix, len);
564 /* always try "." if numeric radix didn't match because
565 * we may have data from different locales mixed */
566 if (*sp < send && **sp == '.') {
575 =for apidoc grok_infnan
577 Helper for C<grok_number()>, accepts various ways of spelling "infinity"
578 or "not a number", and returns one of the following flag combinations:
582 IS_NUMBER_INFINITY | IS_NUMBER_NEG
583 IS_NUMBER_NAN | IS_NUMBER_NEG
586 possibly |-ed with C<IS_NUMBER_TRAILING>.
588 If an infinity or a not-a-number is recognized, C<*sp> will point to
589 one byte past the end of the recognized string. If the recognition fails,
590 zero is returned, and C<*sp> will not move.
592 =for apidoc Amn|bool|IS_NUMBER_GREATER_THAN_UV_MAX
593 =for apidoc Amn|bool|IS_NUMBER_INFINITY
594 =for apidoc Amn|bool|IS_NUMBER_IN_UV
595 =for apidoc Amn|bool|IS_NUMBER_NAN
596 =for apidoc Amn|bool|IS_NUMBER_NEG
597 =for apidoc Amn|bool|IS_NUMBER_NOT_INT
603 Perl_grok_infnan(pTHX_ const char** sp, const char* send)
607 #if defined(NV_INF) || defined(NV_NAN)
608 bool odh = FALSE; /* one-dot-hash: 1.#INF */
610 PERL_ARGS_ASSERT_GROK_INFNAN;
613 s++; if (s == send) return 0;
615 else if (*s == '-') {
616 flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
617 s++; if (s == send) return 0;
621 /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
622 * Let's keep the dot optional. */
623 s++; if (s == send) return 0;
625 s++; if (s == send) return 0;
628 s++; if (s == send) return 0;
634 if (isALPHA_FOLD_EQ(*s, 'I')) {
635 /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
637 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
638 s++; if (s == send) return 0;
639 if (isALPHA_FOLD_EQ(*s, 'F')) {
641 if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
643 flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
644 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
645 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
646 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
647 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
650 while (*s == '0') { /* 1.#INF00 */
654 while (s < send && isSPACE(*s))
656 if (s < send && *s) {
657 flags |= IS_NUMBER_TRAILING;
659 flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
661 else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
663 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
664 while (*s == '0') { /* 1.#IND00 */
668 flags |= IS_NUMBER_TRAILING;
674 /* Maybe NAN of some sort */
676 if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
678 /* XXX do something with the snan/qnan difference */
679 s++; if (s == send) return 0;
682 if (isALPHA_FOLD_EQ(*s, 'N')) {
683 s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
684 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
687 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
692 /* NaN can be followed by various stuff (NaNQ, NaNS), but
693 * there are also multiple different NaN values, and some
694 * implementations output the "payload" values,
695 * e.g. NaN123, NAN(abc), while some legacy implementations
696 * have weird stuff like NaN%. */
697 if (isALPHA_FOLD_EQ(*s, 'q') ||
698 isALPHA_FOLD_EQ(*s, 's')) {
699 /* "nanq" or "nans" are ok, though generating
700 * these portably is tricky. */
707 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
711 return flags | IS_NUMBER_TRAILING;
714 while (t < send && *t && *t != ')') {
718 return flags | IS_NUMBER_TRAILING;
723 if (s[0] == '0' && s + 2 < t &&
724 isALPHA_FOLD_EQ(s[1], 'x') &&
727 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
728 nanval = grok_hex(s, &len, &flags, NULL);
729 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
732 nantype = IS_NUMBER_IN_UV;
735 } else if (s[0] == '0' && s + 2 < t &&
736 isALPHA_FOLD_EQ(s[1], 'b') &&
737 (s[2] == '0' || s[2] == '1')) {
739 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
740 nanval = grok_bin(s, &len, &flags, NULL);
741 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
744 nantype = IS_NUMBER_IN_UV;
750 grok_number_flags(s, t - s, &nanval,
752 PERL_SCAN_ALLOW_UNDERSCORES);
753 /* Unfortunately grok_number_flags() doesn't
754 * tell how far we got and the ')' will always
755 * be "trailing", so we need to double-check
756 * whether we had something dubious. */
757 for (u = s; u < t; u++) {
759 flags |= IS_NUMBER_TRAILING;
766 /* XXX Doesn't do octal: nan("0123").
767 * Probably not a big loss. */
769 if ((nantype & IS_NUMBER_NOT_INT) ||
770 !(nantype && IS_NUMBER_IN_UV)) {
771 /* XXX the nanval is currently unused, that is,
772 * not inserted as the NaN payload of the NV.
773 * But the above code already parses the C99
774 * nan(...) format. See below, and see also
775 * the nan() in POSIX.xs.
777 * Certain configuration combinations where
778 * NVSIZE is greater than UVSIZE mean that
779 * a single UV cannot contain all the possible
780 * NaN payload bits. There would need to be
781 * some more generic syntax than "nan($uv)".
783 * Issues to keep in mind:
785 * (1) In most common cases there would
786 * not be an integral number of bytes that
787 * could be set, only a certain number of bits.
788 * For example for the common case of
789 * NVSIZE == UVSIZE == 8 there is room for 52
790 * bits in the payload, but the most significant
791 * bit is commonly reserved for the
792 * signaling/quiet bit, leaving 51 bits.
793 * Furthermore, the C99 nan() is supposed
794 * to generate quiet NaNs, so it is doubtful
795 * whether it should be able to generate
796 * signaling NaNs. For the x86 80-bit doubles
797 * (if building a long double Perl) there would
798 * be 62 bits (s/q bit being the 63rd).
800 * (2) Endianness of the payload bits. If the
801 * payload is specified as an UV, the low-order
802 * bits of the UV are naturally little-endianed
803 * (rightmost) bits of the payload. The endianness
804 * of UVs and NVs can be different. */
808 flags |= IS_NUMBER_TRAILING;
811 /* Looked like nan(...), but no close paren. */
812 flags |= IS_NUMBER_TRAILING;
815 while (s < send && isSPACE(*s))
817 if (s < send && *s) {
818 /* Note that we here implicitly accept (parse as
819 * "nan", but with warnings) also any other weird
820 * trailing stuff for "nan". In the above we just
821 * check that if we got the C99-style "nan(...)",
822 * the "..." looks sane.
823 * If in future we accept more ways of specifying
824 * the nan payload, the accepting would happen around
826 flags |= IS_NUMBER_TRAILING;
835 while (s < send && isSPACE(*s))
839 PERL_UNUSED_ARG(send);
840 #endif /* #if defined(NV_INF) || defined(NV_NAN) */
846 =for apidoc grok_number_flags
848 Recognise (or not) a number. The type of the number is returned
849 (0 if unrecognised), otherwise it is a bit-ORed combination of
850 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
851 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
853 If the value of the number can fit in a UV, it is returned in C<*valuep>.
854 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
855 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
856 to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
857 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
858 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
860 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
861 seen (in which case C<*valuep> gives the true value truncated to an integer), and
862 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
863 absolute value). C<IS_NUMBER_IN_UV> is not set if e notation was used or the
864 number is larger than a UV.
866 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
867 non-numeric text on an otherwise successful I<grok>, setting
868 C<IS_NUMBER_TRAILING> on the result.
870 =for apidoc grok_number
872 Identical to C<grok_number_flags()> with C<flags> set to zero.
877 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
879 PERL_ARGS_ASSERT_GROK_NUMBER;
881 return grok_number_flags(pv, len, valuep, 0);
884 static const UV uv_max_div_10 = UV_MAX / 10;
885 static const U8 uv_max_mod_10 = UV_MAX % 10;
888 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
891 const char * const send = pv + len;
895 PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
897 while (s < send && isSPACE(*s))
901 } else if (*s == '-') {
903 numtype = IS_NUMBER_NEG;
911 /* The first digit (after optional sign): note that might
912 * also point to "infinity" or "nan", or "1.#INF". */
915 /* next must be digit or the radix separator or beginning of infinity/nan */
917 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
920 /* This construction seems to be more optimiser friendly.
921 (without it gcc does the isDIGIT test and the *s - '0' separately)
922 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
923 In theory the optimiser could deduce how far to unroll the loop
924 before checking for overflow. */
926 int digit = *s - '0';
927 if (inRANGE(digit, 0, 9)) {
928 value = value * 10 + digit;
931 if (inRANGE(digit, 0, 9)) {
932 value = value * 10 + digit;
935 if (inRANGE(digit, 0, 9)) {
936 value = value * 10 + digit;
939 if (inRANGE(digit, 0, 9)) {
940 value = value * 10 + digit;
943 if (inRANGE(digit, 0, 9)) {
944 value = value * 10 + digit;
947 if (inRANGE(digit, 0, 9)) {
948 value = value * 10 + digit;
951 if (inRANGE(digit, 0, 9)) {
952 value = value * 10 + digit;
955 if (inRANGE(digit, 0, 9)) {
956 value = value * 10 + digit;
958 /* Now got 9 digits, so need to check
959 each time for overflow. */
961 while ( inRANGE(digit, 0, 9)
962 && (value < uv_max_div_10
963 || (value == uv_max_div_10
964 && digit <= uv_max_mod_10))) {
965 value = value * 10 + digit;
971 if (inRANGE(digit, 0, 9)
974 skip the remaining digits, don't
975 worry about setting *valuep. */
978 } while (s < send && isDIGIT(*s));
980 IS_NUMBER_GREATER_THAN_UV_MAX;
1000 numtype |= IS_NUMBER_IN_UV;
1005 if (GROK_NUMERIC_RADIX(&s, send)) {
1006 numtype |= IS_NUMBER_NOT_INT;
1007 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
1011 else if (GROK_NUMERIC_RADIX(&s, send)) {
1012 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1013 /* no digits before the radix means we need digits after it */
1014 if (s < send && isDIGIT(*s)) {
1017 } while (s < send && isDIGIT(*s));
1019 /* integer approximation is valid - it's 0. */
1027 if (s > d && s < send) {
1028 /* we can have an optional exponent part */
1029 if (isALPHA_FOLD_EQ(*s, 'e')) {
1031 if (s < send && (*s == '-' || *s == '+'))
1033 if (s < send && isDIGIT(*s)) {
1036 } while (s < send && isDIGIT(*s));
1038 else if (flags & PERL_SCAN_TRAILING)
1039 return numtype | IS_NUMBER_TRAILING;
1043 /* The only flag we keep is sign. Blow away any "it's UV" */
1044 numtype &= IS_NUMBER_NEG;
1045 numtype |= IS_NUMBER_NOT_INT;
1048 while (s < send && isSPACE(*s))
1052 if (memEQs(pv, len, "0 but true")) {
1055 return IS_NUMBER_IN_UV;
1057 /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
1058 if ((s + 2 < send) && memCHRs("inqs#", toFOLD(*s))) {
1059 /* Really detect inf/nan. Start at d, not s, since the above
1060 * code might have already consumed the "1." or "1". */
1061 const int infnan = Perl_grok_infnan(aTHX_ &d, send);
1062 if ((infnan & IS_NUMBER_INFINITY)) {
1063 return (numtype | infnan); /* Keep sign for infinity. */
1065 else if ((infnan & IS_NUMBER_NAN)) {
1066 return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1069 else if (flags & PERL_SCAN_TRAILING) {
1070 return numtype | IS_NUMBER_TRAILING;
1077 =for apidoc grok_atoUV
1079 parse a string, looking for a decimal unsigned integer.
1081 On entry, C<pv> points to the beginning of the string;
1082 C<valptr> points to a UV that will receive the converted value, if found;
1083 C<endptr> is either NULL or points to a variable that points to one byte
1084 beyond the point in C<pv> that this routine should examine.
1085 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
1087 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1088 no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that
1091 If you constrain the portion of C<pv> that is looked at by this function (by
1092 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1093 valid value, it will return TRUE, setting C<*endptr> to the byte following the
1094 final digit of the value. But if there is no constraint at what's looked at,
1095 all of C<pv> must be valid in order for TRUE to be returned.
1097 The only characters this accepts are the decimal digits '0'..'9'.
1099 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1100 leading whitespace, nor negative inputs. If such features are required, the
1101 calling code needs to explicitly implement those.
1103 Note that this function returns FALSE for inputs that would overflow a UV,
1104 or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor
1105 C<01>, C<002>, I<etc>.
1107 Background: C<atoi> has severe problems with illegal inputs, it cannot be
1108 used for incremental parsing, and therefore should be avoided
1109 C<atoi> and C<strtol> are also affected by locale settings, which can also be
1110 seen as a bug (global state controlled by user environment).
1117 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
1121 const char* end2; /* Used in case endptr is NULL. */
1122 UV val = 0; /* The parsed value. */
1124 PERL_ARGS_ASSERT_GROK_ATOUV;
1130 end2 = s + strlen(s);
1140 /* Single-digit inputs are quite common. */
1142 if (s < *eptr && isDIGIT(*s)) {
1143 /* Fail on extra leading zeros. */
1146 while (s < *eptr && isDIGIT(*s)) {
1147 /* This could be unrolled like in grok_number(), but
1148 * the expected uses of this are not speed-needy, and
1149 * unlikely to need full 64-bitness. */
1150 const U8 digit = *s++ - '0';
1151 if (val < uv_max_div_10 ||
1152 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1153 val = val * 10 + digit;
1160 if (endptr == NULL) {
1162 return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1175 S_mulexp10(NV value, I32 exponent)
1187 /* On OpenVMS VAX we by default use the D_FLOAT double format,
1188 * and that format does not have *easy* capabilities [1] for
1189 * overflowing doubles 'silently' as IEEE fp does. We also need
1190 * to support G_FLOAT on both VAX and Alpha, and though the exponent
1191 * range is much larger than D_FLOAT it still doesn't do silent
1192 * overflow. Therefore we need to detect early whether we would
1193 * overflow (this is the behaviour of the native string-to-float
1194 * conversion routines, and therefore of native applications, too).
1196 * [1] Trying to establish a condition handler to trap floating point
1197 * exceptions is not a good idea. */
1199 /* In UNICOS and in certain Cray models (such as T90) there is no
1200 * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1201 * There is something you can do if you are willing to use some
1202 * inline assembler: the instruction is called DFI-- but that will
1203 * disable *all* floating point interrupts, a little bit too large
1204 * a hammer. Therefore we need to catch potential overflows before
1207 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
1209 const NV exp_v = log10(value);
1210 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1213 if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1215 while (-exponent >= NV_MAX_10_EXP) {
1216 /* combination does not overflow, but 10^(-exponent) does */
1226 exponent = -exponent;
1227 #ifdef NV_MAX_10_EXP
1228 /* for something like 1234 x 10^-309, the action of calculating
1229 * the intermediate value 10^309 then returning 1234 / (10^309)
1230 * will fail, since 10^309 becomes infinity. In this case try to
1231 * refactor it as 123 / (10^308) etc.
1233 while (value && exponent > NV_MAX_10_EXP) {
1241 #if defined(__osf__)
1242 /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1243 * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1244 * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1245 * but that breaks another set of infnan.t tests. */
1246 # define FP_OVERFLOWS_TO_ZERO
1248 for (bit = 1; exponent; bit <<= 1) {
1249 if (exponent & bit) {
1252 #ifdef FP_OVERFLOWS_TO_ZERO
1255 return value < 0 ? -NV_INF : NV_INF;
1257 return value < 0 ? -FLT_MAX : FLT_MAX;
1260 /* Floating point exceptions are supposed to be turned off,
1261 * but if we're obviously done, don't risk another iteration.
1263 if (exponent == 0) break;
1267 return negative ? value / result : value * result;
1269 #endif /* #ifndef Perl_strtod */
1272 # define ATOF(s, x) my_atof2(s, &x)
1274 # define ATOF(s, x) Perl_atof2(s, x)
1278 Perl_my_atof(pTHX_ const char* s)
1280 /* 's' must be NUL terminated */
1284 PERL_ARGS_ASSERT_MY_ATOF;
1286 #if ! defined(USE_LOCALE_NUMERIC)
1293 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1294 STORE_LC_NUMERIC_SET_TO_NEEDED();
1295 if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1300 /* Look through the string for the first thing that looks like a
1301 * decimal point: either the value in the current locale or the
1302 * standard fallback of '.'. The one which appears earliest in the
1303 * input string is the one that we should have atof look for. Note
1304 * that we have to determine this beforehand because on some
1305 * systems, Perl_atof2 is just a wrapper around the system's atof.
1307 const char * const standard_pos = strchr(s, '.');
1308 const char * const local_pos
1309 = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1310 const bool use_standard_radix
1311 = standard_pos && (!local_pos || standard_pos < local_pos);
1313 if (use_standard_radix) {
1314 SET_NUMERIC_STANDARD();
1315 LOCK_LC_NUMERIC_STANDARD();
1320 if (use_standard_radix) {
1321 UNLOCK_LC_NUMERIC_STANDARD();
1322 SET_NUMERIC_UNDERLYING();
1325 RESTORE_LC_NUMERIC();
1333 #if defined(NV_INF) || defined(NV_NAN)
1336 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
1338 const char *p0 = negative ? s - 1 : s;
1340 const int infnan = grok_infnan(&p, send);
1341 if (infnan && p != p0) {
1342 /* If we can generate inf/nan directly, let's do so. */
1344 if ((infnan & IS_NUMBER_INFINITY)) {
1345 *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
1350 if ((infnan & IS_NUMBER_NAN)) {
1356 /* If still here, we didn't have either NV_INF or NV_NAN,
1357 * and can try falling back to native strtod/strtold.
1359 * The native interface might not recognize all the possible
1360 * inf/nan strings Perl recognizes. What we can try
1361 * is to try faking the input. We will try inf/-inf/nan
1362 * as the most promising/portable input. */
1364 const char* fake = "silence compiler warning";
1368 if ((infnan & IS_NUMBER_INFINITY)) {
1369 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1373 if ((infnan & IS_NUMBER_NAN)) {
1377 assert(strNE(fake, "silence compiler warning"));
1378 nv = S_strtod(aTHX_ fake, &endp);
1381 if ((infnan & IS_NUMBER_INFINITY)) {
1386 /* last resort, may generate SIGFPE */
1387 *value = Perl_exp((NV)1e9);
1388 if ((infnan & IS_NUMBER_NEG))
1391 return (char*)p; /* p, not endp */
1395 if ((infnan & IS_NUMBER_NAN)) {
1400 /* last resort, may generate SIGFPE */
1401 *value = Perl_log((NV)-1.0);
1403 return (char*)p; /* p, not endp */
1408 #endif /* #ifdef Perl_strtod */
1413 #endif /* if defined(NV_INF) || defined(NV_NAN) */
1416 Perl_my_atof2(pTHX_ const char* orig, NV* value)
1418 PERL_ARGS_ASSERT_MY_ATOF2;
1419 return my_atof3(orig, value, 0);
1423 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
1425 const char* s = orig;
1426 NV result[3] = {0.0, 0.0, 0.0};
1427 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1428 const char* send = s + ((len != 0)
1430 : strlen(orig)); /* one past the last */
1433 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
1434 UV accumulator[2] = {0,0}; /* before/after dp */
1435 bool seen_digit = 0;
1436 I32 exp_adjust[2] = {0,0};
1437 I32 exp_acc[2] = {-1, -1};
1438 /* the current exponent adjust for the accumulators */
1443 I32 sig_digits = 0; /* noof significant digits seen so far */
1446 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1447 PERL_ARGS_ASSERT_MY_ATOF3;
1449 /* leading whitespace */
1450 while (s < send && isSPACE(*s))
1468 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1471 /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1472 0b-prefixed binary numbers, which is backward incompatible
1474 if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
1475 (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1480 /* If the length is passed in, the input string isn't NUL-terminated,
1481 * and in it turns out the function below assumes it is; therefore we
1482 * create a copy and NUL-terminate that */
1484 Newx(copy, len + 1, char);
1485 Copy(orig, copy, len, char);
1487 s = copy + (s - orig);
1490 result[2] = S_strtod(aTHX_ s, &endp);
1492 /* If we created a copy, 'endp' is in terms of that. Convert back to
1495 s = (s - copy) + (char *) orig;
1496 endp = (endp - copy) + (char *) orig;
1501 *value = negative ? -result[2] : result[2];
1506 #elif defined(USE_PERL_ATOF)
1508 /* There is no point in processing more significant digits
1509 * than the NV can hold. Note that NV_DIG is a lower-bound value,
1510 * while we need an upper-bound value. We add 2 to account for this;
1511 * since it will have been conservative on both the first and last digit.
1512 * For example a 32-bit mantissa with an exponent of 4 would have
1513 * exact values in the set
1521 * where for the purposes of calculating NV_DIG we would have to discount
1522 * both the first and last digit, since neither can hold all values from
1523 * 0..9; but for calculating the value we must examine those two digits.
1525 #ifdef MAX_SIG_DIG_PLUS
1526 /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1527 possible digits in a NV, especially if NVs are not IEEE compliant
1528 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1529 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1531 # define MAX_SIG_DIGITS (NV_DIG+2)
1534 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1535 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
1537 #if defined(NV_INF) || defined(NV_NAN)
1540 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1545 /* we accumulate digits into an integer; when this becomes too
1546 * large, we add the total to NV and start again */
1556 /* don't start counting until we see the first significant
1557 * digit, eg the 5 in 0.00005... */
1558 if (!sig_digits && digit == 0)
1561 if (++sig_digits > MAX_SIG_DIGITS) {
1562 /* limits of precision reached */
1564 ++accumulator[seen_dp];
1565 } else if (digit == 5) {
1566 if (old_digit % 2) { /* round to even - Allen */
1567 ++accumulator[seen_dp];
1575 /* skip remaining digits */
1576 while (s < send && isDIGIT(*s)) {
1582 /* warn of loss of precision? */
1585 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1586 /* add accumulator to result and start again */
1587 result[seen_dp] = S_mulexp10(result[seen_dp],
1589 + (NV)accumulator[seen_dp];
1590 accumulator[seen_dp] = 0;
1591 exp_acc[seen_dp] = 0;
1593 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1597 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1599 if (sig_digits > MAX_SIG_DIGITS) {
1600 while (s < send && isDIGIT(*s)) {
1611 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1613 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1616 if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
1617 bool expnegative = 0;
1627 while (s < send && isDIGIT(*s))
1628 exponent = exponent * 10 + (*s++ - '0');
1630 exponent = -exponent;
1633 /* now apply the exponent */
1636 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1637 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1639 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1642 /* now apply the sign */
1644 result[2] = -result[2];
1645 #endif /* USE_PERL_ATOF */
1651 =for apidoc isinfnan
1653 C<Perl_isinfnan()> is a utility function that returns true if the NV
1654 argument is either an infinity or a C<NaN>, false otherwise. To test
1655 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
1657 This is also the logical inverse of Perl_isfinite().
1662 Perl_isinfnan(NV nv)
1664 PERL_UNUSED_ARG(nv);
1677 =for apidoc isinfnansv
1679 Checks whether the argument would be either an infinity or C<NaN> when used
1680 as a number, but is careful not to trigger non-numeric or uninitialized
1681 warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already.
1687 Perl_isinfnansv(pTHX_ SV *sv)
1689 PERL_ARGS_ASSERT_ISINFNANSV;
1693 return Perl_isinfnan(SvNVX(sv));
1698 const char *s = SvPV_nomg_const(sv, len);
1699 return cBOOL(grok_infnan(&s, s+len));
1704 /* C99 has truncl, pre-C99 Solaris had aintl. We can use either with
1705 * copysignl to emulate modfl, which is in some platforms missing or
1707 # if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1709 Perl_my_modfl(long double x, long double *ip)
1712 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1714 # elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1716 Perl_my_modfl(long double x, long double *ip)
1719 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1724 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
1725 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1727 Perl_my_frexpl(long double x, int *e) {
1728 *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1729 return (scalbnl(x, -*e));
1734 =for apidoc Perl_signbit
1736 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1739 If F<Configure> detects this system has a C<signbit()> that will work with
1740 our NVs, then we just use it via the C<#define> in F<perl.h>. Otherwise,
1741 fall back on this implementation. The main use of this function
1742 is catching C<-0.0>.
1744 C<Configure> notes: This function is called C<'Perl_signbit'> instead of a
1745 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
1746 function or macro that doesn't happen to work with our particular choice
1747 of NVs. We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
1748 the standard system headers to be happy. Also, this is a no-context
1749 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1750 F<perl.h> as a simple macro call to the system's C<signbit()>.
1751 Users should just always call C<Perl_signbit()>.
1755 #if !defined(HAS_SIGNBIT)
1757 Perl_signbit(NV x) {
1758 # ifdef Perl_fp_class_nzero
1759 return Perl_fp_class_nzero(x);
1760 /* Try finding the high byte, and assume it's highest bit
1761 * is the sign. This assumption is probably wrong somewhere. */
1762 # elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1763 return (((unsigned char *)&x)[9] & 0x80);
1764 # elif defined(NV_LITTLE_ENDIAN)
1765 /* Note that NVSIZE is sizeof(NV), which would make the below be
1766 * wrong if the end bytes are unused, which happens with the x86
1767 * 80-bit long doubles, which is why take care of that above. */
1768 return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1769 # elif defined(NV_BIG_ENDIAN)
1770 return (((unsigned char *)&x)[0] & 0x80);
1772 /* This last resort fallback is wrong for the negative zero. */
1773 return (x < 0.0) ? 1 : 0;
1779 * ex: set ts=8 sts=4 sw=4 et: