numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen,
  13  *  unless wizards count differently to other people."  --Beorn
  14  *
  15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
  16  */
  17
  18 /*
  19 =head1 Numeric functions
  20
  21 =cut
  22
  23 This file contains all the stuff needed by perl for manipulating numeric
  24 values, including such things as replacements for the OS's atof() function
  25
  26 */
  27
  28 #include "EXTERN.h"
  29 #define PERL_IN_NUMERIC_C
  30 #include "perl.h"
  31
  32 #ifdef Perl_strtod
  33
  34 PERL_STATIC_INLINE NV
  35 S_strtod(pTHX_ const char * const s, char ** e)
  36 {
  37     DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  38     NV result;
  39
  40     STORE_LC_NUMERIC_SET_TO_NEEDED();
  41
  42 #  ifdef USE_QUADMATH
  43
  44     result = strtoflt128(s, e);
  45
  46 #  elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE)    \
  47                              && defined(USE_LONG_DOUBLE)
  48 #    if defined(__MINGW64_VERSION_MAJOR)
  49       /***********************************************
  50        We are unable to use strtold because of
  51         https://sourceforge.net/p/mingw-w64/bugs/711/
  52         &
  53         https://sourceforge.net/p/mingw-w64/bugs/725/
  54
  55        but __mingw_strtold is fine.
  56       ***********************************************/
  57
  58     result = __mingw_strtold(s, e);
  59
  60 #    else
  61
  62     result = strtold(s, e);
  63
  64 #    endif
  65 #  elif defined(HAS_STRTOD)
  66
  67     result = strtod(s, e);
  68
  69 #  else
  70 #    error No strtod() equivalent found
  71 #  endif
  72
  73     RESTORE_LC_NUMERIC();
  74
  75     return result;
  76 }
  77
  78 #endif  /* #ifdef Perl_strtod */
  79
  80 /*
  81
  82 =for apidoc my_strtod
  83
  84 This function is equivalent to the libc strtod() function, and is available
  85 even on platforms that lack plain strtod().  Its return value is the best
  86 available precision depending on platform capabilities and F<Configure>
  87 options.
  88
  89 It properly handles the locale radix character, meaning it expects a dot except
  90 when called from within the scope of S<C<use locale>>, in which case the radix
  91 character should be that specified by the current locale.
  92
  93 The synonym Strtod() may be used instead.
  94
  95 =cut
  96
  97 */
  98
  99 NV
 100 Perl_my_strtod(const char * const s, char **e)
 101 {
 102     dTHX;
 103
 104     PERL_ARGS_ASSERT_MY_STRTOD;
 105
 106 #ifdef Perl_strtod
 107
 108     return S_strtod(aTHX_ s, e);
 109
 110 #else
 111
 112     {
 113         NV result;
 114         char ** end_ptr = NULL;
 115
 116         *end_ptr = my_atof2(s, &result);
 117         if (e) {
 118             *e = *end_ptr;
 119         }
 120
 121         if (! *end_ptr) {
 122             result = 0.0;
 123         }
 124
 125         return result;
 126     }
 127
 128 #endif
 129
 130 }
 131
 132
 133 U32
 134 Perl_cast_ulong(NV f)
 135 {
 136   if (f < 0.0)
 137     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
 138   if (f < U32_MAX_P1) {
 139 #if CASTFLAGS & 2
 140     if (f < U32_MAX_P1_HALF)
 141       return (U32) f;
 142     f -= U32_MAX_P1_HALF;
 143     return ((U32) f) | (1 + (U32_MAX >> 1));
 144 #else
 145     return (U32) f;
 146 #endif
 147   }
 148   return f > 0 ? U32_MAX : 0 /* NaN */;
 149 }
 150
 151 I32
 152 Perl_cast_i32(NV f)
 153 {
 154   if (f < I32_MAX_P1)
 155     return f < I32_MIN ? I32_MIN : (I32) f;
 156   if (f < U32_MAX_P1) {
 157 #if CASTFLAGS & 2
 158     if (f < U32_MAX_P1_HALF)
 159       return (I32)(U32) f;
 160     f -= U32_MAX_P1_HALF;
 161     return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
 162 #else
 163     return (I32)(U32) f;
 164 #endif
 165   }
 166   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
 167 }
 168
 169 IV
 170 Perl_cast_iv(NV f)
 171 {
 172   if (f < IV_MAX_P1)
 173     return f < IV_MIN ? IV_MIN : (IV) f;
 174   if (f < UV_MAX_P1) {
 175 #if CASTFLAGS & 2
 176     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
 177     if (f < UV_MAX_P1_HALF)
 178       return (IV)(UV) f;
 179     f -= UV_MAX_P1_HALF;
 180     return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
 181 #else
 182     return (IV)(UV) f;
 183 #endif
 184   }
 185   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
 186 }
 187
 188 UV
 189 Perl_cast_uv(NV f)
 190 {
 191   if (f < 0.0)
 192     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
 193   if (f < UV_MAX_P1) {
 194 #if CASTFLAGS & 2
 195     if (f < UV_MAX_P1_HALF)
 196       return (UV) f;
 197     f -= UV_MAX_P1_HALF;
 198     return ((UV) f) | (1 + (UV_MAX >> 1));
 199 #else
 200     return (UV) f;
 201 #endif
 202   }
 203   return f > 0 ? UV_MAX : 0 /* NaN */;
 204 }
 205
 206 /*
 207 =for apidoc grok_bin
 208
 209 converts a string representing a binary number to numeric form.
 210
 211 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 212 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 213 The scan stops at the end of the string, or the first invalid character.
 214 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 215 invalid character will also trigger a warning.
 216 On return C<*len> is set to the length of the scanned string,
 217 and C<*flags> gives output flags.
 218
 219 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 220 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_bin>
 221 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 222 and writes the value to C<*result> (or the value is discarded if C<result>
 223 is NULL).
 224
 225 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
 226 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 227 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
 228 number may use C<"_"> characters to separate digits.
 229
 230 =for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
 231 =for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
 232 =for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
 233 =for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
 234 =for apidoc Amnh||PERL_SCAN_TRAILING
 235
 236 =cut
 237
 238 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 239 which suppresses any message for non-portable numbers that are still valid
 240 on this platform.
 241  */
 242
 243 UV
 244 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 245 {
 246     PERL_ARGS_ASSERT_GROK_BIN;
 247
 248     return grok_bin(start, len_p, flags, result);
 249 }
 250
 251 /*
 252 =for apidoc grok_hex
 253
 254 converts a string representing a hex number to numeric form.
 255
 256 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
 257 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 258 The scan stops at the end of the string, or the first invalid character.
 259 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 260 invalid character will also trigger a warning.
 261 On return C<*len> is set to the length of the scanned string,
 262 and C<*flags> gives output flags.
 263
 264 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 265 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_hex>
 266 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 267 and writes the value to C<*result> (or the value is discarded if C<result>
 268 is C<NULL>).
 269
 270 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
 271 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 272 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
 273 number may use C<"_"> characters to separate digits.
 274
 275 =cut
 276
 277 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 278 which suppresses any message for non-portable numbers, but which are valid
 279 on this platform.
 280  */
 281
 282 UV
 283 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 284 {
 285     PERL_ARGS_ASSERT_GROK_HEX;
 286
 287     return grok_hex(start, len_p, flags, result);
 288 }
 289
 290 UV
 291 Perl_grok_bin_oct_hex(pTHX_ const char *start,
 292                         STRLEN *len_p,
 293                         I32 *flags,
 294                         NV *result,
 295                         const unsigned shift) /* 1 for binary; 3 for octal;
 296                                                  4 for hex */
 297 {
 298     const char *s = start;
 299     STRLEN len = *len_p;
 300     UV value = 0;
 301     NV value_nv = 0;
 302     const PERL_UINT_FAST8_T base = 1 << shift;
 303     const UV max_div= UV_MAX / base;
 304     const PERL_UINT_FAST8_T class_bit = (base == 2)
 305                                         ? _CC_BINDIGIT
 306                                         : (base == 8)
 307                                           ? _CC_OCTDIGIT
 308                                           : _CC_XDIGIT;
 309     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 310     bool overflowed = FALSE;
 311
 312     PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
 313
 314     ASSUME(inRANGE(shift, 1, 4) && shift != 2);
 315
 316     if (base != 8 && !(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 317         const char prefix = base == 2 ? 'b' : 'x';
 318
 319         /* strip off leading b or 0b; x or 0x.
 320            for compatibility silently suffer "b" and "0b" as valid binary; "x"
 321            and "0x" as valid hex numbers. */
 322         if (len >= 1) {
 323             if (isALPHA_FOLD_EQ(s[0], prefix)) {
 324                 s++;
 325                 len--;
 326             }
 327             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], prefix))) {
 328                 s+=2;
 329                 len-=2;
 330             }
 331         }
 332     }
 333
 334     for (; len-- && *s; s++) {
 335         if (_generic_isCC(*s, class_bit)) {
 336             /* Write it in this wonky order with a goto to attempt to get the
 337                compiler to make the common case integer-only loop pretty tight.
 338                With gcc seems to be much straighter code than old scan_hex.  */
 339           redo:
 340             if (!overflowed) {
 341                 if (value <= max_div) {
 342                     value = (value << shift) | XDIGIT_VALUE(*s);
 343                         /* Note XDIGIT_VALUE() is branchless, works on binary
 344                          * and octal as well, so can be used here, without
 345                          * slowing those down */
 346                     continue;
 347                 }
 348                 /* Bah. We're just overflowed.  */
 349                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 350                                        "Integer overflow in %s number",
 351                                        (base == 16) ? "hexadecimal"
 352                                                     : (base == 2)
 353                                                       ? "binary"
 354                                                       : "octal");
 355                 overflowed = TRUE;
 356                 value_nv = (NV) value;
 357             }
 358             value_nv *= base;
 359             /* If an NV has not enough bits in its mantissa to
 360              * represent a UV this summing of small low-order numbers
 361              * is a waste of time (because the NV cannot preserve
 362              * the low-order bits anyway): we could just remember when
 363              * did we overflow and in the end just multiply value_nv by the
 364              * right amount of base-tuples. */
 365             value_nv += (NV) XDIGIT_VALUE(*s);
 366             continue;
 367         }
 368         if (   *s == '_'
 369             && len
 370             && allow_underscores
 371             && _generic_isCC(s[1], class_bit))
 372         {
 373             --len;
 374             ++s;
 375             goto redo;
 376         }
 377         if ( ! (*flags & PERL_SCAN_SILENT_ILLDIGIT)
 378             &&  ckWARN(WARN_DIGIT))
 379         {
 380             if (base != 8) {
 381                 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 382                                    "Illegal %s digit '%c' ignored",
 383                                       ((base == 2)
 384                                       ? "binary"
 385                                       : "hexadecimal"),
 386                                     *s);
 387             }
 388             else if (isDIGIT(*s)) { /* octal base */
 389
 390                 /* Allow \octal to work the DWIM way (that is, stop scanning as
 391                  * soon as non-octal characters are seen, complain only if
 392                  * someone seems to want to use the digits eight and nine.
 393                  * Since we know it is not octal, then if isDIGIT, must be an 8
 394                  * or 9). */
 395                 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 396                                        "Illegal octal digit '%c' ignored", *s);
 397             }
 398         }
 399         break;
 400     }
 401
 402     if (   ( overflowed && value_nv > 4294967295.0)
 403 #if UVSIZE > 4
 404         || (   !  overflowed && value > 0xffffffff
 405             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 406 #endif
 407     ) {
 408         const char * which = (base == 2)
 409                           ? "Binary number > 0b11111111111111111111111111111111"
 410                           : (base == 8)
 411                             ? "Octal number > 037777777777"
 412                             : "Hexadecimal number > 0xffffffff";
 413         /* Also there are listings for the other two.  Since they are the first
 414          * word, it would be hard for a user to find them there starting with a
 415          * %s. */
 416         /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
 417         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
 418     }
 419
 420     *len_p = s - start;
 421     if (!overflowed) {
 422         *flags = 0;
 423         return value;
 424     }
 425     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 426     if (result)
 427         *result = value_nv;
 428     return UV_MAX;
 429 }
 430
 431 /*
 432 =for apidoc grok_oct
 433
 434 converts a string representing an octal number to numeric form.
 435
 436 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 437 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 438 The scan stops at the end of the string, or the first invalid character.
 439 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 440 8 or 9 will also trigger a warning.
 441 On return C<*len> is set to the length of the scanned string,
 442 and C<*flags> gives output flags.
 443
 444 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 445 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_oct>
 446 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 447 and writes the value to C<*result> (or the value is discarded if C<result>
 448 is C<NULL>).
 449
 450 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
 451 number may use C<"_"> characters to separate digits.
 452
 453 =cut
 454
 455 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
 456 which suppresses any message for non-portable numbers, but which are valid
 457 on this platform.
 458  */
 459
 460 UV
 461 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 462 {
 463     PERL_ARGS_ASSERT_GROK_OCT;
 464
 465     return grok_oct(start, len_p, flags, result);
 466 }
 467
 468 /*
 469 =for apidoc scan_bin
 470
 471 For backwards compatibility.  Use C<grok_bin> instead.
 472
 473 =for apidoc scan_hex
 474
 475 For backwards compatibility.  Use C<grok_hex> instead.
 476
 477 =for apidoc scan_oct
 478
 479 For backwards compatibility.  Use C<grok_oct> instead.
 480
 481 =cut
 482  */
 483
 484 NV
 485 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 486 {
 487     NV rnv;
 488     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 489     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 490
 491     PERL_ARGS_ASSERT_SCAN_BIN;
 492
 493     *retlen = len;
 494     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 495 }
 496
 497 NV
 498 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 499 {
 500     NV rnv;
 501     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 502     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 503
 504     PERL_ARGS_ASSERT_SCAN_OCT;
 505
 506     *retlen = len;
 507     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 508 }
 509
 510 NV
 511 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 512 {
 513     NV rnv;
 514     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 515     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 516
 517     PERL_ARGS_ASSERT_SCAN_HEX;
 518
 519     *retlen = len;
 520     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 521 }
 522
 523 /*
 524 =for apidoc grok_numeric_radix
 525
 526 Scan and skip for a numeric decimal separator (radix).
 527
 528 =cut
 529  */
 530 bool
 531 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 532 {
 533     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 534
 535 #ifdef USE_LOCALE_NUMERIC
 536
 537     if (IN_LC(LC_NUMERIC)) {
 538         STRLEN len;
 539         char * radix;
 540         bool matches_radix = FALSE;
 541         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
 542
 543         STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
 544
 545         radix = SvPV(PL_numeric_radix_sv, len);
 546         radix = savepvn(radix, len);
 547
 548         RESTORE_LC_NUMERIC();
 549
 550         if (*sp + len <= send) {
 551             matches_radix = memEQ(*sp, radix, len);
 552         }
 553
 554         Safefree(radix);
 555
 556         if (matches_radix) {
 557             *sp += len;
 558             return TRUE;
 559         }
 560     }
 561
 562 #endif
 563
 564     /* always try "." if numeric radix didn't match because
 565      * we may have data from different locales mixed */
 566     if (*sp < send && **sp == '.') {
 567         ++*sp;
 568         return TRUE;
 569     }
 570
 571     return FALSE;
 572 }
 573
 574 /*
 575 =for apidoc grok_infnan
 576
 577 Helper for C<grok_number()>, accepts various ways of spelling "infinity"
 578 or "not a number", and returns one of the following flag combinations:
 579
 580   IS_NUMBER_INFINITY
 581   IS_NUMBER_NAN
 582   IS_NUMBER_INFINITY | IS_NUMBER_NEG
 583   IS_NUMBER_NAN | IS_NUMBER_NEG
 584   0
 585
 586 possibly |-ed with C<IS_NUMBER_TRAILING>.
 587
 588 If an infinity or a not-a-number is recognized, C<*sp> will point to
 589 one byte past the end of the recognized string.  If the recognition fails,
 590 zero is returned, and C<*sp> will not move.
 591
 592 =for apidoc Amn|bool|IS_NUMBER_GREATER_THAN_UV_MAX
 593 =for apidoc Amn|bool|IS_NUMBER_INFINITY
 594 =for apidoc Amn|bool|IS_NUMBER_IN_UV
 595 =for apidoc Amn|bool|IS_NUMBER_NAN
 596 =for apidoc Amn|bool|IS_NUMBER_NEG
 597 =for apidoc Amn|bool|IS_NUMBER_NOT_INT
 598
 599 =cut
 600 */
 601
 602 int
 603 Perl_grok_infnan(pTHX_ const char** sp, const char* send)
 604 {
 605     const char* s = *sp;
 606     int flags = 0;
 607 #if defined(NV_INF) || defined(NV_NAN)
 608     bool odh = FALSE; /* one-dot-hash: 1.#INF */
 609
 610     PERL_ARGS_ASSERT_GROK_INFNAN;
 611
 612     if (*s == '+') {
 613         s++; if (s == send) return 0;
 614     }
 615     else if (*s == '-') {
 616         flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
 617         s++; if (s == send) return 0;
 618     }
 619
 620     if (*s == '1') {
 621         /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
 622          * Let's keep the dot optional. */
 623         s++; if (s == send) return 0;
 624         if (*s == '.') {
 625             s++; if (s == send) return 0;
 626         }
 627         if (*s == '#') {
 628             s++; if (s == send) return 0;
 629         } else
 630             return 0;
 631         odh = TRUE;
 632     }
 633
 634     if (isALPHA_FOLD_EQ(*s, 'I')) {
 635         /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
 636
 637         s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 638         s++; if (s == send) return 0;
 639         if (isALPHA_FOLD_EQ(*s, 'F')) {
 640             s++;
 641             if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
 642                 int fail =
 643                     flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
 644                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
 645                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
 646                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
 647                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
 648                 s++;
 649             } else if (odh) {
 650                 while (*s == '0') { /* 1.#INF00 */
 651                     s++;
 652                 }
 653             }
 654             while (s < send && isSPACE(*s))
 655                 s++;
 656             if (s < send && *s) {
 657                 flags |= IS_NUMBER_TRAILING;
 658             }
 659             flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 660         }
 661         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
 662             s++;
 663             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 664             while (*s == '0') { /* 1.#IND00 */
 665                 s++;
 666             }
 667             if (*s) {
 668                 flags |= IS_NUMBER_TRAILING;
 669             }
 670         } else
 671             return 0;
 672     }
 673     else {
 674         /* Maybe NAN of some sort */
 675
 676         if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
 677             /* snan, qNaN */
 678             /* XXX do something with the snan/qnan difference */
 679             s++; if (s == send) return 0;
 680         }
 681
 682         if (isALPHA_FOLD_EQ(*s, 'N')) {
 683             s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
 684             s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 685             s++;
 686
 687             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 688             if (s == send) {
 689                 return flags;
 690             }
 691
 692             /* NaN can be followed by various stuff (NaNQ, NaNS), but
 693              * there are also multiple different NaN values, and some
 694              * implementations output the "payload" values,
 695              * e.g. NaN123, NAN(abc), while some legacy implementations
 696              * have weird stuff like NaN%. */
 697             if (isALPHA_FOLD_EQ(*s, 'q') ||
 698                 isALPHA_FOLD_EQ(*s, 's')) {
 699                 /* "nanq" or "nans" are ok, though generating
 700                  * these portably is tricky. */
 701                 s++;
 702                 if (s == send) {
 703                     return flags;
 704                 }
 705             }
 706             if (*s == '(') {
 707                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
 708                 const char *t;
 709                 s++;
 710                 if (s == send) {
 711                     return flags | IS_NUMBER_TRAILING;
 712                 }
 713                 t = s + 1;
 714                 while (t < send && *t && *t != ')') {
 715                     t++;
 716                 }
 717                 if (t == send) {
 718                     return flags | IS_NUMBER_TRAILING;
 719                 }
 720                 if (*t == ')') {
 721                     int nantype;
 722                     UV nanval;
 723                     if (s[0] == '0' && s + 2 < t &&
 724                         isALPHA_FOLD_EQ(s[1], 'x') &&
 725                         isXDIGIT(s[2])) {
 726                         STRLEN len = t - s;
 727                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 728                         nanval = grok_hex(s, &len, &flags, NULL);
 729                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 730                             nantype = 0;
 731                         } else {
 732                             nantype = IS_NUMBER_IN_UV;
 733                         }
 734                         s += len;
 735                     } else if (s[0] == '0' && s + 2 < t &&
 736                                isALPHA_FOLD_EQ(s[1], 'b') &&
 737                                (s[2] == '0' || s[2] == '1')) {
 738                         STRLEN len = t - s;
 739                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 740                         nanval = grok_bin(s, &len, &flags, NULL);
 741                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 742                             nantype = 0;
 743                         } else {
 744                             nantype = IS_NUMBER_IN_UV;
 745                         }
 746                         s += len;
 747                     } else {
 748                         const char *u;
 749                         nantype =
 750                             grok_number_flags(s, t - s, &nanval,
 751                                               PERL_SCAN_TRAILING |
 752                                               PERL_SCAN_ALLOW_UNDERSCORES);
 753                         /* Unfortunately grok_number_flags() doesn't
 754                          * tell how far we got and the ')' will always
 755                          * be "trailing", so we need to double-check
 756                          * whether we had something dubious. */
 757                         for (u = s; u < t; u++) {
 758                             if (!isDIGIT(*u)) {
 759                                 flags |= IS_NUMBER_TRAILING;
 760                                 break;
 761                             }
 762                         }
 763                         s = u;
 764                     }
 765
 766                     /* XXX Doesn't do octal: nan("0123").
 767                      * Probably not a big loss. */
 768
 769                     if ((nantype & IS_NUMBER_NOT_INT) ||
 770                         !(nantype && IS_NUMBER_IN_UV)) {
 771                         /* XXX the nanval is currently unused, that is,
 772                          * not inserted as the NaN payload of the NV.
 773                          * But the above code already parses the C99
 774                          * nan(...)  format.  See below, and see also
 775                          * the nan() in POSIX.xs.
 776                          *
 777                          * Certain configuration combinations where
 778                          * NVSIZE is greater than UVSIZE mean that
 779                          * a single UV cannot contain all the possible
 780                          * NaN payload bits.  There would need to be
 781                          * some more generic syntax than "nan($uv)".
 782                          *
 783                          * Issues to keep in mind:
 784                          *
 785                          * (1) In most common cases there would
 786                          * not be an integral number of bytes that
 787                          * could be set, only a certain number of bits.
 788                          * For example for the common case of
 789                          * NVSIZE == UVSIZE == 8 there is room for 52
 790                          * bits in the payload, but the most significant
 791                          * bit is commonly reserved for the
 792                          * signaling/quiet bit, leaving 51 bits.
 793                          * Furthermore, the C99 nan() is supposed
 794                          * to generate quiet NaNs, so it is doubtful
 795                          * whether it should be able to generate
 796                          * signaling NaNs.  For the x86 80-bit doubles
 797                          * (if building a long double Perl) there would
 798                          * be 62 bits (s/q bit being the 63rd).
 799                          *
 800                          * (2) Endianness of the payload bits. If the
 801                          * payload is specified as an UV, the low-order
 802                          * bits of the UV are naturally little-endianed
 803                          * (rightmost) bits of the payload.  The endianness
 804                          * of UVs and NVs can be different. */
 805                         return 0;
 806                     }
 807                     if (s < t) {
 808                         flags |= IS_NUMBER_TRAILING;
 809                     }
 810                 } else {
 811                     /* Looked like nan(...), but no close paren. */
 812                     flags |= IS_NUMBER_TRAILING;
 813                 }
 814             } else {
 815                 while (s < send && isSPACE(*s))
 816                     s++;
 817                 if (s < send && *s) {
 818                     /* Note that we here implicitly accept (parse as
 819                      * "nan", but with warnings) also any other weird
 820                      * trailing stuff for "nan".  In the above we just
 821                      * check that if we got the C99-style "nan(...)",
 822                      * the "..."  looks sane.
 823                      * If in future we accept more ways of specifying
 824                      * the nan payload, the accepting would happen around
 825                      * here. */
 826                     flags |= IS_NUMBER_TRAILING;
 827                 }
 828             }
 829             s = send;
 830         }
 831         else
 832             return 0;
 833     }
 834
 835     while (s < send && isSPACE(*s))
 836         s++;
 837
 838 #else
 839     PERL_UNUSED_ARG(send);
 840 #endif /* #if defined(NV_INF) || defined(NV_NAN) */
 841     *sp = s;
 842     return flags;
 843 }
 844
 845 /*
 846 =for apidoc grok_number_flags
 847
 848 Recognise (or not) a number.  The type of the number is returned
 849 (0 if unrecognised), otherwise it is a bit-ORed combination of
 850 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
 851 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
 852
 853 If the value of the number can fit in a UV, it is returned in C<*valuep>.
 854 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
 855 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
 856 to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
 857 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
 858 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
 859
 860 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
 861 seen (in which case C<*valuep> gives the true value truncated to an integer), and
 862 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
 863 absolute value).  C<IS_NUMBER_IN_UV> is not set if e notation was used or the
 864 number is larger than a UV.
 865
 866 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
 867 non-numeric text on an otherwise successful I<grok>, setting
 868 C<IS_NUMBER_TRAILING> on the result.
 869
 870 =for apidoc grok_number
 871
 872 Identical to C<grok_number_flags()> with C<flags> set to zero.
 873
 874 =cut
 875  */
 876 int
 877 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 878 {
 879     PERL_ARGS_ASSERT_GROK_NUMBER;
 880
 881     return grok_number_flags(pv, len, valuep, 0);
 882 }
 883
 884 static const UV uv_max_div_10 = UV_MAX / 10;
 885 static const U8 uv_max_mod_10 = UV_MAX % 10;
 886
 887 int
 888 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
 889 {
 890   const char *s = pv;
 891   const char * const send = pv + len;
 892   const char *d;
 893   int numtype = 0;
 894
 895   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
 896
 897   while (s < send && isSPACE(*s))
 898     s++;
 899   if (s == send) {
 900     return 0;
 901   } else if (*s == '-') {
 902     s++;
 903     numtype = IS_NUMBER_NEG;
 904   }
 905   else if (*s == '+')
 906     s++;
 907
 908   if (s == send)
 909     return 0;
 910
 911   /* The first digit (after optional sign): note that might
 912    * also point to "infinity" or "nan", or "1.#INF". */
 913   d = s;
 914
 915   /* next must be digit or the radix separator or beginning of infinity/nan */
 916   if (isDIGIT(*s)) {
 917     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 918        overflow.  */
 919     UV value = *s - '0';
 920     /* This construction seems to be more optimiser friendly.
 921        (without it gcc does the isDIGIT test and the *s - '0' separately)
 922        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 923        In theory the optimiser could deduce how far to unroll the loop
 924        before checking for overflow.  */
 925     if (++s < send) {
 926       int digit = *s - '0';
 927       if (inRANGE(digit, 0, 9)) {
 928         value = value * 10 + digit;
 929         if (++s < send) {
 930           digit = *s - '0';
 931           if (inRANGE(digit, 0, 9)) {
 932             value = value * 10 + digit;
 933             if (++s < send) {
 934               digit = *s - '0';
 935               if (inRANGE(digit, 0, 9)) {
 936                 value = value * 10 + digit;
 937                 if (++s < send) {
 938                   digit = *s - '0';
 939                   if (inRANGE(digit, 0, 9)) {
 940                     value = value * 10 + digit;
 941                     if (++s < send) {
 942                       digit = *s - '0';
 943                       if (inRANGE(digit, 0, 9)) {
 944                         value = value * 10 + digit;
 945                         if (++s < send) {
 946                           digit = *s - '0';
 947                           if (inRANGE(digit, 0, 9)) {
 948                             value = value * 10 + digit;
 949                             if (++s < send) {
 950                               digit = *s - '0';
 951                               if (inRANGE(digit, 0, 9)) {
 952                                 value = value * 10 + digit;
 953                                 if (++s < send) {
 954                                   digit = *s - '0';
 955                                   if (inRANGE(digit, 0, 9)) {
 956                                     value = value * 10 + digit;
 957                                     if (++s < send) {
 958                                       /* Now got 9 digits, so need to check
 959                                          each time for overflow.  */
 960                                       digit = *s - '0';
 961                                       while (    inRANGE(digit, 0, 9)
 962                                              && (value < uv_max_div_10
 963                                                  || (value == uv_max_div_10
 964                                                      && digit <= uv_max_mod_10))) {
 965                                         value = value * 10 + digit;
 966                                         if (++s < send)
 967                                           digit = *s - '0';
 968                                         else
 969                                           break;
 970                                       }
 971                                       if (inRANGE(digit, 0, 9)
 972                                           && (s < send)) {
 973                                         /* value overflowed.
 974                                            skip the remaining digits, don't
 975                                            worry about setting *valuep.  */
 976                                         do {
 977                                           s++;
 978                                         } while (s < send && isDIGIT(*s));
 979                                         numtype |=
 980                                           IS_NUMBER_GREATER_THAN_UV_MAX;
 981                                         goto skip_value;
 982                                       }
 983                                     }
 984                                   }
 985                                 }
 986                               }
 987                             }
 988                           }
 989                         }
 990                       }
 991                     }
 992                   }
 993                 }
 994               }
 995             }
 996           }
 997         }
 998       }
 999     }
1000     numtype |= IS_NUMBER_IN_UV;
1001     if (valuep)
1002       *valuep = value;
1003
1004   skip_value:
1005     if (GROK_NUMERIC_RADIX(&s, send)) {
1006       numtype |= IS_NUMBER_NOT_INT;
1007       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
1008         s++;
1009     }
1010   }
1011   else if (GROK_NUMERIC_RADIX(&s, send)) {
1012     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1013     /* no digits before the radix means we need digits after it */
1014     if (s < send && isDIGIT(*s)) {
1015       do {
1016         s++;
1017       } while (s < send && isDIGIT(*s));
1018       if (valuep) {
1019         /* integer approximation is valid - it's 0.  */
1020         *valuep = 0;
1021       }
1022     }
1023     else
1024         return 0;
1025   }
1026
1027   if (s > d && s < send) {
1028     /* we can have an optional exponent part */
1029     if (isALPHA_FOLD_EQ(*s, 'e')) {
1030       s++;
1031       if (s < send && (*s == '-' || *s == '+'))
1032         s++;
1033       if (s < send && isDIGIT(*s)) {
1034         do {
1035           s++;
1036         } while (s < send && isDIGIT(*s));
1037       }
1038       else if (flags & PERL_SCAN_TRAILING)
1039         return numtype | IS_NUMBER_TRAILING;
1040       else
1041         return 0;
1042
1043       /* The only flag we keep is sign.  Blow away any "it's UV"  */
1044       numtype &= IS_NUMBER_NEG;
1045       numtype |= IS_NUMBER_NOT_INT;
1046     }
1047   }
1048   while (s < send && isSPACE(*s))
1049     s++;
1050   if (s >= send)
1051     return numtype;
1052   if (memEQs(pv, len, "0 but true")) {
1053     if (valuep)
1054       *valuep = 0;
1055     return IS_NUMBER_IN_UV;
1056   }
1057   /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
1058   if ((s + 2 < send) && memCHRs("inqs#", toFOLD(*s))) {
1059       /* Really detect inf/nan. Start at d, not s, since the above
1060        * code might have already consumed the "1." or "1". */
1061       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
1062       if ((infnan & IS_NUMBER_INFINITY)) {
1063           return (numtype | infnan); /* Keep sign for infinity. */
1064       }
1065       else if ((infnan & IS_NUMBER_NAN)) {
1066           return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1067       }
1068   }
1069   else if (flags & PERL_SCAN_TRAILING) {
1070     return numtype | IS_NUMBER_TRAILING;
1071   }
1072
1073   return 0;
1074 }
1075
1076 /*
1077 =for apidoc grok_atoUV
1078
1079 parse a string, looking for a decimal unsigned integer.
1080
1081 On entry, C<pv> points to the beginning of the string;
1082 C<valptr> points to a UV that will receive the converted value, if found;
1083 C<endptr> is either NULL or points to a variable that points to one byte
1084 beyond the point in C<pv> that this routine should examine.
1085 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
1086
1087 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1088 no leading zeros).  Otherwise it returns TRUE, and sets C<*valptr> to that
1089 value.
1090
1091 If you constrain the portion of C<pv> that is looked at by this function (by
1092 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1093 valid value, it will return TRUE, setting C<*endptr> to the byte following the
1094 final digit of the value.  But if there is no constraint at what's looked at,
1095 all of C<pv> must be valid in order for TRUE to be returned.
1096
1097 The only characters this accepts are the decimal digits '0'..'9'.
1098
1099 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1100 leading whitespace, nor negative inputs.  If such features are required, the
1101 calling code needs to explicitly implement those.
1102
1103 Note that this function returns FALSE for inputs that would overflow a UV,
1104 or have leading zeros.  Thus a single C<0> is accepted, but not C<00> nor
1105 C<01>, C<002>, I<etc>.
1106
1107 Background: C<atoi> has severe problems with illegal inputs, it cannot be
1108 used for incremental parsing, and therefore should be avoided
1109 C<atoi> and C<strtol> are also affected by locale settings, which can also be
1110 seen as a bug (global state controlled by user environment).
1111
1112 =cut
1113
1114 */
1115
1116 bool
1117 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
1118 {
1119     const char* s = pv;
1120     const char** eptr;
1121     const char* end2; /* Used in case endptr is NULL. */
1122     UV val = 0; /* The parsed value. */
1123
1124     PERL_ARGS_ASSERT_GROK_ATOUV;
1125
1126     if (endptr) {
1127         eptr = endptr;
1128     }
1129     else {
1130         end2 = s + strlen(s);
1131         eptr = &end2;
1132     }
1133
1134     if (   *eptr <= s
1135         || ! isDIGIT(*s))
1136     {
1137         return FALSE;
1138     }
1139
1140     /* Single-digit inputs are quite common. */
1141     val = *s++ - '0';
1142     if (s < *eptr && isDIGIT(*s)) {
1143         /* Fail on extra leading zeros. */
1144         if (val == 0)
1145             return FALSE;
1146         while (s < *eptr && isDIGIT(*s)) {
1147             /* This could be unrolled like in grok_number(), but
1148                 * the expected uses of this are not speed-needy, and
1149                 * unlikely to need full 64-bitness. */
1150             const U8 digit = *s++ - '0';
1151             if (val < uv_max_div_10 ||
1152                 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1153                 val = val * 10 + digit;
1154             } else {
1155                 return FALSE;
1156             }
1157         }
1158     }
1159
1160     if (endptr == NULL) {
1161         if (*s) {
1162             return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1163         }
1164     }
1165     else {
1166         *endptr = s;
1167     }
1168
1169     *valptr = val;
1170     return TRUE;
1171 }
1172
1173 #ifndef Perl_strtod
1174 STATIC NV
1175 S_mulexp10(NV value, I32 exponent)
1176 {
1177     NV result = 1.0;
1178     NV power = 10.0;
1179     bool negative = 0;
1180     I32 bit;
1181
1182     if (exponent == 0)
1183         return value;
1184     if (value == 0)
1185         return (NV)0;
1186
1187     /* On OpenVMS VAX we by default use the D_FLOAT double format,
1188      * and that format does not have *easy* capabilities [1] for
1189      * overflowing doubles 'silently' as IEEE fp does.  We also need
1190      * to support G_FLOAT on both VAX and Alpha, and though the exponent
1191      * range is much larger than D_FLOAT it still doesn't do silent
1192      * overflow.  Therefore we need to detect early whether we would
1193      * overflow (this is the behaviour of the native string-to-float
1194      * conversion routines, and therefore of native applications, too).
1195      *
1196      * [1] Trying to establish a condition handler to trap floating point
1197      *     exceptions is not a good idea. */
1198
1199     /* In UNICOS and in certain Cray models (such as T90) there is no
1200      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1201      * There is something you can do if you are willing to use some
1202      * inline assembler: the instruction is called DFI-- but that will
1203      * disable *all* floating point interrupts, a little bit too large
1204      * a hammer.  Therefore we need to catch potential overflows before
1205      * it's too late. */
1206
1207 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
1208     STMT_START {
1209         const NV exp_v = log10(value);
1210         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1211             return NV_MAX;
1212         if (exponent < 0) {
1213             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1214                 return 0.0;
1215             while (-exponent >= NV_MAX_10_EXP) {
1216                 /* combination does not overflow, but 10^(-exponent) does */
1217                 value /= 10;
1218                 ++exponent;
1219             }
1220         }
1221     } STMT_END;
1222 #endif
1223
1224     if (exponent < 0) {
1225         negative = 1;
1226         exponent = -exponent;
1227 #ifdef NV_MAX_10_EXP
1228         /* for something like 1234 x 10^-309, the action of calculating
1229          * the intermediate value 10^309 then returning 1234 / (10^309)
1230          * will fail, since 10^309 becomes infinity. In this case try to
1231          * refactor it as 123 / (10^308) etc.
1232          */
1233         while (value && exponent > NV_MAX_10_EXP) {
1234             exponent--;
1235             value /= 10;
1236         }
1237         if (value == 0.0)
1238             return value;
1239 #endif
1240     }
1241 #if defined(__osf__)
1242     /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1243      * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1244      * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1245      * but that breaks another set of infnan.t tests. */
1246 #  define FP_OVERFLOWS_TO_ZERO
1247 #endif
1248     for (bit = 1; exponent; bit <<= 1) {
1249         if (exponent & bit) {
1250             exponent ^= bit;
1251             result *= power;
1252 #ifdef FP_OVERFLOWS_TO_ZERO
1253             if (result == 0)
1254 # ifdef NV_INF
1255                 return value < 0 ? -NV_INF : NV_INF;
1256 # else
1257                 return value < 0 ? -FLT_MAX : FLT_MAX;
1258 # endif
1259 #endif
1260             /* Floating point exceptions are supposed to be turned off,
1261              *  but if we're obviously done, don't risk another iteration.
1262              */
1263              if (exponent == 0) break;
1264         }
1265         power *= power;
1266     }
1267     return negative ? value / result : value * result;
1268 }
1269 #endif /* #ifndef Perl_strtod */
1270
1271 #ifdef Perl_strtod
1272 #  define ATOF(s, x) my_atof2(s, &x)
1273 #else
1274 #  define ATOF(s, x) Perl_atof2(s, x)
1275 #endif
1276
1277 NV
1278 Perl_my_atof(pTHX_ const char* s)
1279 {
1280     /* 's' must be NUL terminated */
1281
1282     NV x = 0.0;
1283
1284     PERL_ARGS_ASSERT_MY_ATOF;
1285
1286 #if ! defined(USE_LOCALE_NUMERIC)
1287
1288     ATOF(s, x);
1289
1290 #else
1291
1292     {
1293         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1294         STORE_LC_NUMERIC_SET_TO_NEEDED();
1295         if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1296             ATOF(s,x);
1297         }
1298         else {
1299
1300             /* Look through the string for the first thing that looks like a
1301              * decimal point: either the value in the current locale or the
1302              * standard fallback of '.'. The one which appears earliest in the
1303              * input string is the one that we should have atof look for. Note
1304              * that we have to determine this beforehand because on some
1305              * systems, Perl_atof2 is just a wrapper around the system's atof.
1306              * */
1307             const char * const standard_pos = strchr(s, '.');
1308             const char * const local_pos
1309                                   = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1310             const bool use_standard_radix
1311                     = standard_pos && (!local_pos || standard_pos < local_pos);
1312
1313             if (use_standard_radix) {
1314                 SET_NUMERIC_STANDARD();
1315                 LOCK_LC_NUMERIC_STANDARD();
1316             }
1317
1318             ATOF(s,x);
1319
1320             if (use_standard_radix) {
1321                 UNLOCK_LC_NUMERIC_STANDARD();
1322                 SET_NUMERIC_UNDERLYING();
1323             }
1324         }
1325         RESTORE_LC_NUMERIC();
1326     }
1327
1328 #endif
1329
1330     return x;
1331 }
1332
1333 #if defined(NV_INF) || defined(NV_NAN)
1334
1335 static char*
1336 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
1337 {
1338     const char *p0 = negative ? s - 1 : s;
1339     const char *p = p0;
1340     const int infnan = grok_infnan(&p, send);
1341     if (infnan && p != p0) {
1342         /* If we can generate inf/nan directly, let's do so. */
1343 #ifdef NV_INF
1344         if ((infnan & IS_NUMBER_INFINITY)) {
1345             *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
1346             return (char*)p;
1347         }
1348 #endif
1349 #ifdef NV_NAN
1350         if ((infnan & IS_NUMBER_NAN)) {
1351             *value = NV_NAN;
1352             return (char*)p;
1353         }
1354 #endif
1355 #ifdef Perl_strtod
1356         /* If still here, we didn't have either NV_INF or NV_NAN,
1357          * and can try falling back to native strtod/strtold.
1358          *
1359          * The native interface might not recognize all the possible
1360          * inf/nan strings Perl recognizes.  What we can try
1361          * is to try faking the input.  We will try inf/-inf/nan
1362          * as the most promising/portable input. */
1363         {
1364             const char* fake = "silence compiler warning";
1365             char* endp;
1366             NV nv;
1367 #ifdef NV_INF
1368             if ((infnan & IS_NUMBER_INFINITY)) {
1369                 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1370             }
1371 #endif
1372 #ifdef NV_NAN
1373             if ((infnan & IS_NUMBER_NAN)) {
1374                 fake = "nan";
1375             }
1376 #endif
1377             assert(strNE(fake, "silence compiler warning"));
1378             nv = S_strtod(aTHX_ fake, &endp);
1379             if (fake != endp) {
1380 #ifdef NV_INF
1381                 if ((infnan & IS_NUMBER_INFINITY)) {
1382 #  ifdef Perl_isinf
1383                     if (Perl_isinf(nv))
1384                         *value = nv;
1385 #  else
1386                     /* last resort, may generate SIGFPE */
1387                     *value = Perl_exp((NV)1e9);
1388                     if ((infnan & IS_NUMBER_NEG))
1389                         *value = -*value;
1390 #  endif
1391                     return (char*)p; /* p, not endp */
1392                 }
1393 #endif
1394 #ifdef NV_NAN
1395                 if ((infnan & IS_NUMBER_NAN)) {
1396 #  ifdef Perl_isnan
1397                     if (Perl_isnan(nv))
1398                         *value = nv;
1399 #  else
1400                     /* last resort, may generate SIGFPE */
1401                     *value = Perl_log((NV)-1.0);
1402 #  endif
1403                     return (char*)p; /* p, not endp */
1404 #endif
1405                 }
1406             }
1407         }
1408 #endif /* #ifdef Perl_strtod */
1409     }
1410     return NULL;
1411 }
1412
1413 #endif /* if defined(NV_INF) || defined(NV_NAN) */
1414
1415 char*
1416 Perl_my_atof2(pTHX_ const char* orig, NV* value)
1417 {
1418     PERL_ARGS_ASSERT_MY_ATOF2;
1419     return my_atof3(orig, value, 0);
1420 }
1421
1422 char*
1423 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
1424 {
1425     const char* s = orig;
1426     NV result[3] = {0.0, 0.0, 0.0};
1427 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1428     const char* send = s + ((len != 0)
1429                            ? len
1430                            : strlen(orig)); /* one past the last */
1431     bool negative = 0;
1432 #endif
1433 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
1434     UV accumulator[2] = {0,0};  /* before/after dp */
1435     bool seen_digit = 0;
1436     I32 exp_adjust[2] = {0,0};
1437     I32 exp_acc[2] = {-1, -1};
1438     /* the current exponent adjust for the accumulators */
1439     I32 exponent = 0;
1440     I32 seen_dp  = 0;
1441     I32 digit = 0;
1442     I32 old_digit = 0;
1443     I32 sig_digits = 0; /* noof significant digits seen so far */
1444 #endif
1445
1446 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1447     PERL_ARGS_ASSERT_MY_ATOF3;
1448
1449     /* leading whitespace */
1450     while (s < send && isSPACE(*s))
1451         ++s;
1452
1453     /* sign */
1454     switch (*s) {
1455         case '-':
1456             negative = 1;
1457             /* FALLTHROUGH */
1458         case '+':
1459             ++s;
1460     }
1461 #endif
1462
1463 #ifdef Perl_strtod
1464     {
1465         char* endp;
1466         char* copy = NULL;
1467
1468         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1469             return endp;
1470
1471         /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1472            0b-prefixed binary numbers, which is backward incompatible
1473         */
1474         if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
1475             (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1476             *value = 0;
1477             return (char *)s+1;
1478         }
1479
1480         /* If the length is passed in, the input string isn't NUL-terminated,
1481          * and in it turns out the function below assumes it is; therefore we
1482          * create a copy and NUL-terminate that */
1483         if (len) {
1484             Newx(copy, len + 1, char);
1485             Copy(orig, copy, len, char);
1486             copy[len] = '\0';
1487             s = copy + (s - orig);
1488         }
1489
1490         result[2] = S_strtod(aTHX_ s, &endp);
1491
1492         /* If we created a copy, 'endp' is in terms of that.  Convert back to
1493          * the original */
1494         if (copy) {
1495             s = (s - copy) + (char *) orig;
1496             endp = (endp - copy) + (char *) orig;
1497             Safefree(copy);
1498         }
1499
1500         if (s != endp) {
1501             *value = negative ? -result[2] : result[2];
1502             return endp;
1503         }
1504         return NULL;
1505     }
1506 #elif defined(USE_PERL_ATOF)
1507
1508 /* There is no point in processing more significant digits
1509  * than the NV can hold. Note that NV_DIG is a lower-bound value,
1510  * while we need an upper-bound value. We add 2 to account for this;
1511  * since it will have been conservative on both the first and last digit.
1512  * For example a 32-bit mantissa with an exponent of 4 would have
1513  * exact values in the set
1514  *               4
1515  *               8
1516  *              ..
1517  *     17179869172
1518  *     17179869176
1519  *     17179869180
1520  *
1521  * where for the purposes of calculating NV_DIG we would have to discount
1522  * both the first and last digit, since neither can hold all values from
1523  * 0..9; but for calculating the value we must examine those two digits.
1524  */
1525 #ifdef MAX_SIG_DIG_PLUS
1526     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1527        possible digits in a NV, especially if NVs are not IEEE compliant
1528        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1529 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1530 #else
1531 # define MAX_SIG_DIGITS (NV_DIG+2)
1532 #endif
1533
1534 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1535 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
1536
1537 #if defined(NV_INF) || defined(NV_NAN)
1538     {
1539         char* endp;
1540         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1541             return endp;
1542     }
1543 #endif
1544
1545     /* we accumulate digits into an integer; when this becomes too
1546      * large, we add the total to NV and start again */
1547
1548     while (s < send) {
1549         if (isDIGIT(*s)) {
1550             seen_digit = 1;
1551             old_digit = digit;
1552             digit = *s++ - '0';
1553             if (seen_dp)
1554                 exp_adjust[1]++;
1555
1556             /* don't start counting until we see the first significant
1557              * digit, eg the 5 in 0.00005... */
1558             if (!sig_digits && digit == 0)
1559                 continue;
1560
1561             if (++sig_digits > MAX_SIG_DIGITS) {
1562                 /* limits of precision reached */
1563                 if (digit > 5) {
1564                     ++accumulator[seen_dp];
1565                 } else if (digit == 5) {
1566                     if (old_digit % 2) { /* round to even - Allen */
1567                         ++accumulator[seen_dp];
1568                     }
1569                 }
1570                 if (seen_dp) {
1571                     exp_adjust[1]--;
1572                 } else {
1573                     exp_adjust[0]++;
1574                 }
1575                 /* skip remaining digits */
1576                 while (s < send && isDIGIT(*s)) {
1577                     ++s;
1578                     if (! seen_dp) {
1579                         exp_adjust[0]++;
1580                     }
1581                 }
1582                 /* warn of loss of precision? */
1583             }
1584             else {
1585                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1586                     /* add accumulator to result and start again */
1587                     result[seen_dp] = S_mulexp10(result[seen_dp],
1588                                                  exp_acc[seen_dp])
1589                         + (NV)accumulator[seen_dp];
1590                     accumulator[seen_dp] = 0;
1591                     exp_acc[seen_dp] = 0;
1592                 }
1593                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1594                 ++exp_acc[seen_dp];
1595             }
1596         }
1597         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1598             seen_dp = 1;
1599             if (sig_digits > MAX_SIG_DIGITS) {
1600                 while (s < send && isDIGIT(*s)) {
1601                     ++s;
1602                 }
1603                 break;
1604             }
1605         }
1606         else {
1607             break;
1608         }
1609     }
1610
1611     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1612     if (seen_dp) {
1613         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1614     }
1615
1616     if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
1617         bool expnegative = 0;
1618
1619         ++s;
1620         switch (*s) {
1621             case '-':
1622                 expnegative = 1;
1623                 /* FALLTHROUGH */
1624             case '+':
1625                 ++s;
1626         }
1627         while (s < send && isDIGIT(*s))
1628             exponent = exponent * 10 + (*s++ - '0');
1629         if (expnegative)
1630             exponent = -exponent;
1631     }
1632
1633     /* now apply the exponent */
1634
1635     if (seen_dp) {
1636         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1637                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1638     } else {
1639         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1640     }
1641
1642     /* now apply the sign */
1643     if (negative)
1644         result[2] = -result[2];
1645 #endif /* USE_PERL_ATOF */
1646     *value = result[2];
1647     return (char *)s;
1648 }
1649
1650 /*
1651 =for apidoc isinfnan
1652
1653 C<Perl_isinfnan()> is a utility function that returns true if the NV
1654 argument is either an infinity or a C<NaN>, false otherwise.  To test
1655 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
1656
1657 This is also the logical inverse of Perl_isfinite().
1658
1659 =cut
1660 */
1661 bool
1662 Perl_isinfnan(NV nv)
1663 {
1664   PERL_UNUSED_ARG(nv);
1665 #ifdef Perl_isinf
1666     if (Perl_isinf(nv))
1667         return TRUE;
1668 #endif
1669 #ifdef Perl_isnan
1670     if (Perl_isnan(nv))
1671         return TRUE;
1672 #endif
1673     return FALSE;
1674 }
1675
1676 /*
1677 =for apidoc isinfnansv
1678
1679 Checks whether the argument would be either an infinity or C<NaN> when used
1680 as a number, but is careful not to trigger non-numeric or uninitialized
1681 warnings.  it assumes the caller has done C<SvGETMAGIC(sv)> already.
1682
1683 =cut
1684 */
1685
1686 bool
1687 Perl_isinfnansv(pTHX_ SV *sv)
1688 {
1689     PERL_ARGS_ASSERT_ISINFNANSV;
1690     if (!SvOK(sv))
1691         return FALSE;
1692     if (SvNOKp(sv))
1693         return Perl_isinfnan(SvNVX(sv));
1694     if (SvIOKp(sv))
1695         return FALSE;
1696     {
1697         STRLEN len;
1698         const char *s = SvPV_nomg_const(sv, len);
1699         return cBOOL(grok_infnan(&s, s+len));
1700     }
1701 }
1702
1703 #ifndef HAS_MODFL
1704 /* C99 has truncl, pre-C99 Solaris had aintl.  We can use either with
1705  * copysignl to emulate modfl, which is in some platforms missing or
1706  * broken. */
1707 #  if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1708 long double
1709 Perl_my_modfl(long double x, long double *ip)
1710 {
1711     *ip = truncl(x);
1712     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1713 }
1714 #  elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1715 long double
1716 Perl_my_modfl(long double x, long double *ip)
1717 {
1718     *ip = aintl(x);
1719     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1720 }
1721 #  endif
1722 #endif
1723
1724 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
1725 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1726 long double
1727 Perl_my_frexpl(long double x, int *e) {
1728     *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1729     return (scalbnl(x, -*e));
1730 }
1731 #endif
1732
1733 /*
1734 =for apidoc Perl_signbit
1735
1736 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1737 it is not.
1738
1739 If F<Configure> detects this system has a C<signbit()> that will work with
1740 our NVs, then we just use it via the C<#define> in F<perl.h>.  Otherwise,
1741 fall back on this implementation.  The main use of this function
1742 is catching C<-0.0>.
1743
1744 C<Configure> notes:  This function is called C<'Perl_signbit'> instead of a
1745 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
1746 function or macro that doesn't happen to work with our particular choice
1747 of NVs.  We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
1748 the standard system headers to be happy.  Also, this is a no-context
1749 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1750 F<perl.h> as a simple macro call to the system's C<signbit()>.
1751 Users should just always call C<Perl_signbit()>.
1752
1753 =cut
1754 */
1755 #if !defined(HAS_SIGNBIT)
1756 int
1757 Perl_signbit(NV x) {
1758 #  ifdef Perl_fp_class_nzero
1759     return Perl_fp_class_nzero(x);
1760     /* Try finding the high byte, and assume it's highest bit
1761      * is the sign.  This assumption is probably wrong somewhere. */
1762 #  elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1763     return (((unsigned char *)&x)[9] & 0x80);
1764 #  elif defined(NV_LITTLE_ENDIAN)
1765     /* Note that NVSIZE is sizeof(NV), which would make the below be
1766      * wrong if the end bytes are unused, which happens with the x86
1767      * 80-bit long doubles, which is why take care of that above. */
1768     return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1769 #  elif defined(NV_BIG_ENDIAN)
1770     return (((unsigned char *)&x)[0] & 0x80);
1771 #  else
1772     /* This last resort fallback is wrong for the negative zero. */
1773     return (x < 0.0) ? 1 : 0;
1774 #  endif
1775 }
1776 #endif
1777
1778 /*
1779  * ex: set ts=8 sts=4 sw=4 et:
1780  */