numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen,
  13  *  unless wizards count differently to other people."  --Beorn
  14  *
  15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
  16  */
  17
  18 /*
  19 =head1 Numeric functions
  20
  21 =cut
  22
  23 This file contains all the stuff needed by perl for manipulating numeric
  24 values, including such things as replacements for the OS's atof() function
  25
  26 */
  27
  28 #include "EXTERN.h"
  29 #define PERL_IN_NUMERIC_C
  30 #include "perl.h"
  31
  32 #ifdef Perl_strtod
  33
  34 PERL_STATIC_INLINE NV
  35 S_strtod(pTHX_ const char * const s, char ** e)
  36 {
  37     DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  38     NV result;
  39
  40     STORE_LC_NUMERIC_SET_TO_NEEDED();
  41
  42 #  ifdef USE_QUADMATH
  43
  44     result = strtoflt128(s, e);
  45
  46 #  elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE)    \
  47                              && defined(USE_LONG_DOUBLE)
  48 #    if defined(__MINGW64_VERSION_MAJOR)
  49       /***********************************************
  50        We are unable to use strtold because of
  51         https://sourceforge.net/p/mingw-w64/bugs/711/
  52         &
  53         https://sourceforge.net/p/mingw-w64/bugs/725/
  54
  55        but __mingw_strtold is fine.
  56       ***********************************************/
  57
  58     result = __mingw_strtold(s, e);
  59
  60 #    else
  61
  62     result = strtold(s, e);
  63
  64 #    endif
  65 #  elif defined(HAS_STRTOD)
  66
  67     result = strtod(s, e);
  68
  69 #  endif
  70
  71     RESTORE_LC_NUMERIC();
  72
  73     return result;
  74 }
  75
  76 #endif  /* #ifdef Perl_strtod */
  77
  78 /*
  79
  80 =for apidoc my_strtod
  81
  82 This function is equivalent to the libc strtod() function, and is available
  83 even on platforms that lack plain strtod().  Its return value is the best
  84 available precision depending on platform capabilities and F<Configure>
  85 options.
  86
  87 It properly handles the locale radix character, meaning it expects a dot except
  88 when called from within the scope of S<C<use locale>>, in which case the radix
  89 character should be that specified by the current locale.
  90
  91 The synonym Strod() may be used instead.
  92
  93 =cut
  94
  95 */
  96
  97 NV
  98 my_strtod(const char * const s, char **e)
  99 {
 100     dTHX;
 101
 102     PERL_ARGS_ASSERT_MY_STRTOD;
 103
 104 #ifdef Perl_strtod
 105
 106     return S_strtod(aTHX_ s, e);
 107
 108 #else
 109
 110     {
 111         NV result;
 112         char ** end_ptr = NULL;
 113
 114         *end_ptr = my_atof2(s, &result);
 115         if (e) {
 116             *e = *end_ptr;
 117         }
 118
 119         if (! *end_ptr) {
 120             result = 0.0;
 121         }
 122
 123         return result;
 124     }
 125
 126 #endif
 127
 128 }
 129
 130
 131 U32
 132 Perl_cast_ulong(NV f)
 133 {
 134   if (f < 0.0)
 135     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
 136   if (f < U32_MAX_P1) {
 137 #if CASTFLAGS & 2
 138     if (f < U32_MAX_P1_HALF)
 139       return (U32) f;
 140     f -= U32_MAX_P1_HALF;
 141     return ((U32) f) | (1 + (U32_MAX >> 1));
 142 #else
 143     return (U32) f;
 144 #endif
 145   }
 146   return f > 0 ? U32_MAX : 0 /* NaN */;
 147 }
 148
 149 I32
 150 Perl_cast_i32(NV f)
 151 {
 152   if (f < I32_MAX_P1)
 153     return f < I32_MIN ? I32_MIN : (I32) f;
 154   if (f < U32_MAX_P1) {
 155 #if CASTFLAGS & 2
 156     if (f < U32_MAX_P1_HALF)
 157       return (I32)(U32) f;
 158     f -= U32_MAX_P1_HALF;
 159     return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
 160 #else
 161     return (I32)(U32) f;
 162 #endif
 163   }
 164   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
 165 }
 166
 167 IV
 168 Perl_cast_iv(NV f)
 169 {
 170   if (f < IV_MAX_P1)
 171     return f < IV_MIN ? IV_MIN : (IV) f;
 172   if (f < UV_MAX_P1) {
 173 #if CASTFLAGS & 2
 174     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
 175     if (f < UV_MAX_P1_HALF)
 176       return (IV)(UV) f;
 177     f -= UV_MAX_P1_HALF;
 178     return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
 179 #else
 180     return (IV)(UV) f;
 181 #endif
 182   }
 183   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
 184 }
 185
 186 UV
 187 Perl_cast_uv(NV f)
 188 {
 189   if (f < 0.0)
 190     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
 191   if (f < UV_MAX_P1) {
 192 #if CASTFLAGS & 2
 193     if (f < UV_MAX_P1_HALF)
 194       return (UV) f;
 195     f -= UV_MAX_P1_HALF;
 196     return ((UV) f) | (1 + (UV_MAX >> 1));
 197 #else
 198     return (UV) f;
 199 #endif
 200   }
 201   return f > 0 ? UV_MAX : 0 /* NaN */;
 202 }
 203
 204 /*
 205 =for apidoc grok_bin
 206
 207 converts a string representing a binary number to numeric form.
 208
 209 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 210 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 211 The scan stops at the end of the string, or the first invalid character.
 212 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 213 invalid character will also trigger a warning.
 214 On return C<*len> is set to the length of the scanned string,
 215 and C<*flags> gives output flags.
 216
 217 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 218 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_bin>
 219 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 220 and writes the value to C<*result> (or the value is discarded if C<result>
 221 is NULL).
 222
 223 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
 224 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 225 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
 226 number may use C<"_"> characters to separate digits.
 227
 228 =cut
 229
 230 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 231 which suppresses any message for non-portable numbers that are still valid
 232 on this platform.
 233  */
 234
 235 UV
 236 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 237 {
 238     const char *s = start;
 239     STRLEN len = *len_p;
 240     UV value = 0;
 241     NV value_nv = 0;
 242
 243     const UV max_div_2 = UV_MAX / 2;
 244     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 245     bool overflowed = FALSE;
 246     char bit;
 247
 248     PERL_ARGS_ASSERT_GROK_BIN;
 249
 250     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 251         /* strip off leading b or 0b.
 252            for compatibility silently suffer "b" and "0b" as valid binary
 253            numbers. */
 254         if (len >= 1) {
 255             if (isALPHA_FOLD_EQ(s[0], 'b')) {
 256                 s++;
 257                 len--;
 258             }
 259             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) {
 260                 s+=2;
 261                 len-=2;
 262             }
 263         }
 264     }
 265
 266     for (; len-- && (bit = *s); s++) {
 267         if (bit == '0' || bit == '1') {
 268             /* Write it in this wonky order with a goto to attempt to get the
 269                compiler to make the common case integer-only loop pretty tight.
 270                With gcc seems to be much straighter code than old scan_bin.  */
 271           redo:
 272             if (!overflowed) {
 273                 if (value <= max_div_2) {
 274                     value = (value << 1) | (bit - '0');
 275                     continue;
 276                 }
 277                 /* Bah. We're just overflowed.  */
 278                 /* diag_listed_as: Integer overflow in %s number */
 279                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 280                                  "Integer overflow in binary number");
 281                 overflowed = TRUE;
 282                 value_nv = (NV) value;
 283             }
 284             value_nv *= 2.0;
 285             /* If an NV has not enough bits in its mantissa to
 286              * represent a UV this summing of small low-order numbers
 287              * is a waste of time (because the NV cannot preserve
 288              * the low-order bits anyway): we could just remember when
 289              * did we overflow and in the end just multiply value_nv by the
 290              * right amount. */
 291             value_nv += (NV)(bit - '0');
 292             continue;
 293         }
 294         if (bit == '_' && len && allow_underscores && (bit = s[1])
 295             && (bit == '0' || bit == '1'))
 296             {
 297                 --len;
 298                 ++s;
 299                 goto redo;
 300             }
 301         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 302             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 303                            "Illegal binary digit '%c' ignored", *s);
 304         break;
 305     }
 306
 307     if (   ( overflowed && value_nv > 4294967295.0)
 308 #if UVSIZE > 4
 309         || (!overflowed && value > 0xffffffff
 310             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 311 #endif
 312         ) {
 313         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 314                        "Binary number > 0b11111111111111111111111111111111 non-portable");
 315     }
 316     *len_p = s - start;
 317     if (!overflowed) {
 318         *flags = 0;
 319         return value;
 320     }
 321     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 322     if (result)
 323         *result = value_nv;
 324     return UV_MAX;
 325 }
 326
 327 /*
 328 =for apidoc grok_hex
 329
 330 converts a string representing a hex number to numeric form.
 331
 332 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
 333 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 334 The scan stops at the end of the string, or the first invalid character.
 335 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 336 invalid character will also trigger a warning.
 337 On return C<*len> is set to the length of the scanned string,
 338 and C<*flags> gives output flags.
 339
 340 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 341 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_hex>
 342 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 343 and writes the value to C<*result> (or the value is discarded if C<result>
 344 is C<NULL>).
 345
 346 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
 347 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 348 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
 349 number may use C<"_"> characters to separate digits.
 350
 351 =cut
 352
 353 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 354 which suppresses any message for non-portable numbers, but which are valid
 355 on this platform.
 356  */
 357
 358 UV
 359 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 360 {
 361     const char *s = start;
 362     STRLEN len = *len_p;
 363     UV value = 0;
 364     NV value_nv = 0;
 365     const UV max_div_16 = UV_MAX / 16;
 366     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 367     bool overflowed = FALSE;
 368
 369     PERL_ARGS_ASSERT_GROK_HEX;
 370
 371     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 372         /* strip off leading x or 0x.
 373            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 374         */
 375         if (len >= 1) {
 376             if (isALPHA_FOLD_EQ(s[0], 'x')) {
 377                 s++;
 378                 len--;
 379             }
 380             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) {
 381                 s+=2;
 382                 len-=2;
 383             }
 384         }
 385     }
 386
 387     for (; len-- && *s; s++) {
 388         if (isXDIGIT(*s)) {
 389             /* Write it in this wonky order with a goto to attempt to get the
 390                compiler to make the common case integer-only loop pretty tight.
 391                With gcc seems to be much straighter code than old scan_hex.  */
 392           redo:
 393             if (!overflowed) {
 394                 if (value <= max_div_16) {
 395                     value = (value << 4) | XDIGIT_VALUE(*s);
 396                     continue;
 397                 }
 398                 /* Bah. We're just overflowed.  */
 399                 /* diag_listed_as: Integer overflow in %s number */
 400                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 401                                  "Integer overflow in hexadecimal number");
 402                 overflowed = TRUE;
 403                 value_nv = (NV) value;
 404             }
 405             value_nv *= 16.0;
 406             /* If an NV has not enough bits in its mantissa to
 407              * represent a UV this summing of small low-order numbers
 408              * is a waste of time (because the NV cannot preserve
 409              * the low-order bits anyway): we could just remember when
 410              * did we overflow and in the end just multiply value_nv by the
 411              * right amount of 16-tuples. */
 412             value_nv += (NV) XDIGIT_VALUE(*s);
 413             continue;
 414         }
 415         if (*s == '_' && len && allow_underscores && s[1]
 416                 && isXDIGIT(s[1]))
 417             {
 418                 --len;
 419                 ++s;
 420                 goto redo;
 421             }
 422         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 423             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 424                         "Illegal hexadecimal digit '%c' ignored", *s);
 425         break;
 426     }
 427
 428     if (   ( overflowed && value_nv > 4294967295.0)
 429 #if UVSIZE > 4
 430         || (!overflowed && value > 0xffffffff
 431             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 432 #endif
 433         ) {
 434         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 435                        "Hexadecimal number > 0xffffffff non-portable");
 436     }
 437     *len_p = s - start;
 438     if (!overflowed) {
 439         *flags = 0;
 440         return value;
 441     }
 442     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 443     if (result)
 444         *result = value_nv;
 445     return UV_MAX;
 446 }
 447
 448 /*
 449 =for apidoc grok_oct
 450
 451 converts a string representing an octal number to numeric form.
 452
 453 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 454 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 455 The scan stops at the end of the string, or the first invalid character.
 456 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 457 8 or 9 will also trigger a warning.
 458 On return C<*len> is set to the length of the scanned string,
 459 and C<*flags> gives output flags.
 460
 461 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 462 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_oct>
 463 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 464 and writes the value to C<*result> (or the value is discarded if C<result>
 465 is C<NULL>).
 466
 467 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
 468 number may use C<"_"> characters to separate digits.
 469
 470 =cut
 471
 472 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
 473 which suppresses any message for non-portable numbers, but which are valid
 474 on this platform.
 475  */
 476
 477 UV
 478 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 479 {
 480     const char *s = start;
 481     STRLEN len = *len_p;
 482     UV value = 0;
 483     NV value_nv = 0;
 484     const UV max_div_8 = UV_MAX / 8;
 485     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 486     bool overflowed = FALSE;
 487
 488     PERL_ARGS_ASSERT_GROK_OCT;
 489
 490     for (; len-- && *s; s++) {
 491         if (isOCTAL(*s)) {
 492             /* Write it in this wonky order with a goto to attempt to get the
 493                compiler to make the common case integer-only loop pretty tight.
 494             */
 495           redo:
 496             if (!overflowed) {
 497                 if (value <= max_div_8) {
 498                     value = (value << 3) | OCTAL_VALUE(*s);
 499                     continue;
 500                 }
 501                 /* Bah. We're just overflowed.  */
 502                 /* diag_listed_as: Integer overflow in %s number */
 503                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 504                                "Integer overflow in octal number");
 505                 overflowed = TRUE;
 506                 value_nv = (NV) value;
 507             }
 508             value_nv *= 8.0;
 509             /* If an NV has not enough bits in its mantissa to
 510              * represent a UV this summing of small low-order numbers
 511              * is a waste of time (because the NV cannot preserve
 512              * the low-order bits anyway): we could just remember when
 513              * did we overflow and in the end just multiply value_nv by the
 514              * right amount of 8-tuples. */
 515             value_nv += (NV) OCTAL_VALUE(*s);
 516             continue;
 517         }
 518         if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
 519             --len;
 520             ++s;
 521             goto redo;
 522         }
 523         /* Allow \octal to work the DWIM way (that is, stop scanning
 524          * as soon as non-octal characters are seen, complain only if
 525          * someone seems to want to use the digits eight and nine.  Since we
 526          * know it is not octal, then if isDIGIT, must be an 8 or 9). */
 527         if (isDIGIT(*s)) {
 528             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 529                 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 530                                "Illegal octal digit '%c' ignored", *s);
 531         }
 532         break;
 533     }
 534
 535     if (   ( overflowed && value_nv > 4294967295.0)
 536 #if UVSIZE > 4
 537         || (!overflowed && value > 0xffffffff
 538             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 539 #endif
 540         ) {
 541         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 542                        "Octal number > 037777777777 non-portable");
 543     }
 544     *len_p = s - start;
 545     if (!overflowed) {
 546         *flags = 0;
 547         return value;
 548     }
 549     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 550     if (result)
 551         *result = value_nv;
 552     return UV_MAX;
 553 }
 554
 555 /*
 556 =for apidoc scan_bin
 557
 558 For backwards compatibility.  Use C<grok_bin> instead.
 559
 560 =for apidoc scan_hex
 561
 562 For backwards compatibility.  Use C<grok_hex> instead.
 563
 564 =for apidoc scan_oct
 565
 566 For backwards compatibility.  Use C<grok_oct> instead.
 567
 568 =cut
 569  */
 570
 571 NV
 572 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 573 {
 574     NV rnv;
 575     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 576     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 577
 578     PERL_ARGS_ASSERT_SCAN_BIN;
 579
 580     *retlen = len;
 581     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 582 }
 583
 584 NV
 585 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 586 {
 587     NV rnv;
 588     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 589     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 590
 591     PERL_ARGS_ASSERT_SCAN_OCT;
 592
 593     *retlen = len;
 594     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 595 }
 596
 597 NV
 598 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 599 {
 600     NV rnv;
 601     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 602     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 603
 604     PERL_ARGS_ASSERT_SCAN_HEX;
 605
 606     *retlen = len;
 607     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 608 }
 609
 610 /*
 611 =for apidoc grok_numeric_radix
 612
 613 Scan and skip for a numeric decimal separator (radix).
 614
 615 =cut
 616  */
 617 bool
 618 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 619 {
 620     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 621
 622 #ifdef USE_LOCALE_NUMERIC
 623
 624     if (IN_LC(LC_NUMERIC)) {
 625         STRLEN len;
 626         char * radix;
 627         bool matches_radix = FALSE;
 628         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
 629
 630         STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
 631
 632         radix = SvPV(PL_numeric_radix_sv, len);
 633         radix = savepvn(radix, len);
 634
 635         RESTORE_LC_NUMERIC();
 636
 637         if (*sp + len <= send) {
 638             matches_radix = memEQ(*sp, radix, len);
 639         }
 640
 641         Safefree(radix);
 642
 643         if (matches_radix) {
 644             *sp += len;
 645             return TRUE;
 646         }
 647     }
 648
 649 #endif
 650
 651     /* always try "." if numeric radix didn't match because
 652      * we may have data from different locales mixed */
 653     if (*sp < send && **sp == '.') {
 654         ++*sp;
 655         return TRUE;
 656     }
 657
 658     return FALSE;
 659 }
 660
 661 /*
 662 =for apidoc grok_infnan
 663
 664 Helper for C<grok_number()>, accepts various ways of spelling "infinity"
 665 or "not a number", and returns one of the following flag combinations:
 666
 667   IS_NUMBER_INFINITY
 668   IS_NUMBER_NAN
 669   IS_NUMBER_INFINITY | IS_NUMBER_NEG
 670   IS_NUMBER_NAN | IS_NUMBER_NEG
 671   0
 672
 673 possibly |-ed with C<IS_NUMBER_TRAILING>.
 674
 675 If an infinity or a not-a-number is recognized, C<*sp> will point to
 676 one byte past the end of the recognized string.  If the recognition fails,
 677 zero is returned, and C<*sp> will not move.
 678
 679 =cut
 680 */
 681
 682 int
 683 Perl_grok_infnan(pTHX_ const char** sp, const char* send)
 684 {
 685     const char* s = *sp;
 686     int flags = 0;
 687 #if defined(NV_INF) || defined(NV_NAN)
 688     bool odh = FALSE; /* one-dot-hash: 1.#INF */
 689
 690     PERL_ARGS_ASSERT_GROK_INFNAN;
 691
 692     if (*s == '+') {
 693         s++; if (s == send) return 0;
 694     }
 695     else if (*s == '-') {
 696         flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
 697         s++; if (s == send) return 0;
 698     }
 699
 700     if (*s == '1') {
 701         /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
 702          * Let's keep the dot optional. */
 703         s++; if (s == send) return 0;
 704         if (*s == '.') {
 705             s++; if (s == send) return 0;
 706         }
 707         if (*s == '#') {
 708             s++; if (s == send) return 0;
 709         } else
 710             return 0;
 711         odh = TRUE;
 712     }
 713
 714     if (isALPHA_FOLD_EQ(*s, 'I')) {
 715         /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
 716
 717         s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 718         s++; if (s == send) return 0;
 719         if (isALPHA_FOLD_EQ(*s, 'F')) {
 720             s++;
 721             if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
 722                 int fail =
 723                     flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
 724                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
 725                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
 726                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
 727                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
 728                 s++;
 729             } else if (odh) {
 730                 while (*s == '0') { /* 1.#INF00 */
 731                     s++;
 732                 }
 733             }
 734             while (s < send && isSPACE(*s))
 735                 s++;
 736             if (s < send && *s) {
 737                 flags |= IS_NUMBER_TRAILING;
 738             }
 739             flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 740         }
 741         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
 742             s++;
 743             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 744             while (*s == '0') { /* 1.#IND00 */
 745                 s++;
 746             }
 747             if (*s) {
 748                 flags |= IS_NUMBER_TRAILING;
 749             }
 750         } else
 751             return 0;
 752     }
 753     else {
 754         /* Maybe NAN of some sort */
 755
 756         if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
 757             /* snan, qNaN */
 758             /* XXX do something with the snan/qnan difference */
 759             s++; if (s == send) return 0;
 760         }
 761
 762         if (isALPHA_FOLD_EQ(*s, 'N')) {
 763             s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
 764             s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 765             s++;
 766
 767             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 768
 769             /* NaN can be followed by various stuff (NaNQ, NaNS), but
 770              * there are also multiple different NaN values, and some
 771              * implementations output the "payload" values,
 772              * e.g. NaN123, NAN(abc), while some legacy implementations
 773              * have weird stuff like NaN%. */
 774             if (isALPHA_FOLD_EQ(*s, 'q') ||
 775                 isALPHA_FOLD_EQ(*s, 's')) {
 776                 /* "nanq" or "nans" are ok, though generating
 777                  * these portably is tricky. */
 778                 s++;
 779             }
 780             if (*s == '(') {
 781                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
 782                 const char *t;
 783                 s++;
 784                 if (s == send) {
 785                     return flags | IS_NUMBER_TRAILING;
 786                 }
 787                 t = s + 1;
 788                 while (t < send && *t && *t != ')') {
 789                     t++;
 790                 }
 791                 if (t == send) {
 792                     return flags | IS_NUMBER_TRAILING;
 793                 }
 794                 if (*t == ')') {
 795                     int nantype;
 796                     UV nanval;
 797                     if (s[0] == '0' && s + 2 < t &&
 798                         isALPHA_FOLD_EQ(s[1], 'x') &&
 799                         isXDIGIT(s[2])) {
 800                         STRLEN len = t - s;
 801                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 802                         nanval = grok_hex(s, &len, &flags, NULL);
 803                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 804                             nantype = 0;
 805                         } else {
 806                             nantype = IS_NUMBER_IN_UV;
 807                         }
 808                         s += len;
 809                     } else if (s[0] == '0' && s + 2 < t &&
 810                                isALPHA_FOLD_EQ(s[1], 'b') &&
 811                                (s[2] == '0' || s[2] == '1')) {
 812                         STRLEN len = t - s;
 813                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 814                         nanval = grok_bin(s, &len, &flags, NULL);
 815                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 816                             nantype = 0;
 817                         } else {
 818                             nantype = IS_NUMBER_IN_UV;
 819                         }
 820                         s += len;
 821                     } else {
 822                         const char *u;
 823                         nantype =
 824                             grok_number_flags(s, t - s, &nanval,
 825                                               PERL_SCAN_TRAILING |
 826                                               PERL_SCAN_ALLOW_UNDERSCORES);
 827                         /* Unfortunately grok_number_flags() doesn't
 828                          * tell how far we got and the ')' will always
 829                          * be "trailing", so we need to double-check
 830                          * whether we had something dubious. */
 831                         for (u = s; u < t; u++) {
 832                             if (!isDIGIT(*u)) {
 833                                 flags |= IS_NUMBER_TRAILING;
 834                                 break;
 835                             }
 836                         }
 837                         s = u;
 838                     }
 839
 840                     /* XXX Doesn't do octal: nan("0123").
 841                      * Probably not a big loss. */
 842
 843                     if ((nantype & IS_NUMBER_NOT_INT) ||
 844                         !(nantype && IS_NUMBER_IN_UV)) {
 845                         /* XXX the nanval is currently unused, that is,
 846                          * not inserted as the NaN payload of the NV.
 847                          * But the above code already parses the C99
 848                          * nan(...)  format.  See below, and see also
 849                          * the nan() in POSIX.xs.
 850                          *
 851                          * Certain configuration combinations where
 852                          * NVSIZE is greater than UVSIZE mean that
 853                          * a single UV cannot contain all the possible
 854                          * NaN payload bits.  There would need to be
 855                          * some more generic syntax than "nan($uv)".
 856                          *
 857                          * Issues to keep in mind:
 858                          *
 859                          * (1) In most common cases there would
 860                          * not be an integral number of bytes that
 861                          * could be set, only a certain number of bits.
 862                          * For example for the common case of
 863                          * NVSIZE == UVSIZE == 8 there is room for 52
 864                          * bits in the payload, but the most significant
 865                          * bit is commonly reserved for the
 866                          * signaling/quiet bit, leaving 51 bits.
 867                          * Furthermore, the C99 nan() is supposed
 868                          * to generate quiet NaNs, so it is doubtful
 869                          * whether it should be able to generate
 870                          * signaling NaNs.  For the x86 80-bit doubles
 871                          * (if building a long double Perl) there would
 872                          * be 62 bits (s/q bit being the 63rd).
 873                          *
 874                          * (2) Endianness of the payload bits. If the
 875                          * payload is specified as an UV, the low-order
 876                          * bits of the UV are naturally little-endianed
 877                          * (rightmost) bits of the payload.  The endianness
 878                          * of UVs and NVs can be different. */
 879                         return 0;
 880                     }
 881                     if (s < t) {
 882                         flags |= IS_NUMBER_TRAILING;
 883                     }
 884                 } else {
 885                     /* Looked like nan(...), but no close paren. */
 886                     flags |= IS_NUMBER_TRAILING;
 887                 }
 888             } else {
 889                 while (s < send && isSPACE(*s))
 890                     s++;
 891                 if (s < send && *s) {
 892                     /* Note that we here implicitly accept (parse as
 893                      * "nan", but with warnings) also any other weird
 894                      * trailing stuff for "nan".  In the above we just
 895                      * check that if we got the C99-style "nan(...)",
 896                      * the "..."  looks sane.
 897                      * If in future we accept more ways of specifying
 898                      * the nan payload, the accepting would happen around
 899                      * here. */
 900                     flags |= IS_NUMBER_TRAILING;
 901                 }
 902             }
 903             s = send;
 904         }
 905         else
 906             return 0;
 907     }
 908
 909     while (s < send && isSPACE(*s))
 910         s++;
 911
 912 #else
 913     PERL_UNUSED_ARG(send);
 914 #endif /* #if defined(NV_INF) || defined(NV_NAN) */
 915     *sp = s;
 916     return flags;
 917 }
 918
 919 /*
 920 =for apidoc grok_number_flags
 921
 922 Recognise (or not) a number.  The type of the number is returned
 923 (0 if unrecognised), otherwise it is a bit-ORed combination of
 924 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
 925 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
 926
 927 If the value of the number can fit in a UV, it is returned in C<*valuep>.
 928 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
 929 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
 930 to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
 931 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
 932 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
 933
 934 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
 935 seen (in which case C<*valuep> gives the true value truncated to an integer), and
 936 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
 937 absolute value).  C<IS_NUMBER_IN_UV> is not set if e notation was used or the
 938 number is larger than a UV.
 939
 940 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
 941 non-numeric text on an otherwise successful I<grok>, setting
 942 C<IS_NUMBER_TRAILING> on the result.
 943
 944 =for apidoc grok_number
 945
 946 Identical to C<grok_number_flags()> with C<flags> set to zero.
 947
 948 =cut
 949  */
 950 int
 951 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 952 {
 953     PERL_ARGS_ASSERT_GROK_NUMBER;
 954
 955     return grok_number_flags(pv, len, valuep, 0);
 956 }
 957
 958 static const UV uv_max_div_10 = UV_MAX / 10;
 959 static const U8 uv_max_mod_10 = UV_MAX % 10;
 960
 961 int
 962 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
 963 {
 964   const char *s = pv;
 965   const char * const send = pv + len;
 966   const char *d;
 967   int numtype = 0;
 968
 969   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
 970
 971   while (s < send && isSPACE(*s))
 972     s++;
 973   if (s == send) {
 974     return 0;
 975   } else if (*s == '-') {
 976     s++;
 977     numtype = IS_NUMBER_NEG;
 978   }
 979   else if (*s == '+')
 980     s++;
 981
 982   if (s == send)
 983     return 0;
 984
 985   /* The first digit (after optional sign): note that might
 986    * also point to "infinity" or "nan", or "1.#INF". */
 987   d = s;
 988
 989   /* next must be digit or the radix separator or beginning of infinity/nan */
 990   if (isDIGIT(*s)) {
 991     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 992        overflow.  */
 993     UV value = *s - '0';
 994     /* This construction seems to be more optimiser friendly.
 995        (without it gcc does the isDIGIT test and the *s - '0' separately)
 996        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 997        In theory the optimiser could deduce how far to unroll the loop
 998        before checking for overflow.  */
 999     if (++s < send) {
1000       int digit = *s - '0';
1001       if (inRANGE(digit, 0, 9)) {
1002         value = value * 10 + digit;
1003         if (++s < send) {
1004           digit = *s - '0';
1005           if (inRANGE(digit, 0, 9)) {
1006             value = value * 10 + digit;
1007             if (++s < send) {
1008               digit = *s - '0';
1009               if (inRANGE(digit, 0, 9)) {
1010                 value = value * 10 + digit;
1011                 if (++s < send) {
1012                   digit = *s - '0';
1013                   if (inRANGE(digit, 0, 9)) {
1014                     value = value * 10 + digit;
1015                     if (++s < send) {
1016                       digit = *s - '0';
1017                       if (inRANGE(digit, 0, 9)) {
1018                         value = value * 10 + digit;
1019                         if (++s < send) {
1020                           digit = *s - '0';
1021                           if (inRANGE(digit, 0, 9)) {
1022                             value = value * 10 + digit;
1023                             if (++s < send) {
1024                               digit = *s - '0';
1025                               if (inRANGE(digit, 0, 9)) {
1026                                 value = value * 10 + digit;
1027                                 if (++s < send) {
1028                                   digit = *s - '0';
1029                                   if (inRANGE(digit, 0, 9)) {
1030                                     value = value * 10 + digit;
1031                                     if (++s < send) {
1032                                       /* Now got 9 digits, so need to check
1033                                          each time for overflow.  */
1034                                       digit = *s - '0';
1035                                       while (    inRANGE(digit, 0, 9)
1036                                              && (value < uv_max_div_10
1037                                                  || (value == uv_max_div_10
1038                                                      && digit <= uv_max_mod_10))) {
1039                                         value = value * 10 + digit;
1040                                         if (++s < send)
1041                                           digit = *s - '0';
1042                                         else
1043                                           break;
1044                                       }
1045                                       if (inRANGE(digit, 0, 9)
1046                                           && (s < send)) {
1047                                         /* value overflowed.
1048                                            skip the remaining digits, don't
1049                                            worry about setting *valuep.  */
1050                                         do {
1051                                           s++;
1052                                         } while (s < send && isDIGIT(*s));
1053                                         numtype |=
1054                                           IS_NUMBER_GREATER_THAN_UV_MAX;
1055                                         goto skip_value;
1056                                       }
1057                                     }
1058                                   }
1059                                 }
1060                               }
1061                             }
1062                           }
1063                         }
1064                       }
1065                     }
1066                   }
1067                 }
1068               }
1069             }
1070           }
1071         }
1072       }
1073     }
1074     numtype |= IS_NUMBER_IN_UV;
1075     if (valuep)
1076       *valuep = value;
1077
1078   skip_value:
1079     if (GROK_NUMERIC_RADIX(&s, send)) {
1080       numtype |= IS_NUMBER_NOT_INT;
1081       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
1082         s++;
1083     }
1084   }
1085   else if (GROK_NUMERIC_RADIX(&s, send)) {
1086     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1087     /* no digits before the radix means we need digits after it */
1088     if (s < send && isDIGIT(*s)) {
1089       do {
1090         s++;
1091       } while (s < send && isDIGIT(*s));
1092       if (valuep) {
1093         /* integer approximation is valid - it's 0.  */
1094         *valuep = 0;
1095       }
1096     }
1097     else
1098         return 0;
1099   }
1100
1101   if (s > d && s < send) {
1102     /* we can have an optional exponent part */
1103     if (isALPHA_FOLD_EQ(*s, 'e')) {
1104       s++;
1105       if (s < send && (*s == '-' || *s == '+'))
1106         s++;
1107       if (s < send && isDIGIT(*s)) {
1108         do {
1109           s++;
1110         } while (s < send && isDIGIT(*s));
1111       }
1112       else if (flags & PERL_SCAN_TRAILING)
1113         return numtype | IS_NUMBER_TRAILING;
1114       else
1115         return 0;
1116
1117       /* The only flag we keep is sign.  Blow away any "it's UV"  */
1118       numtype &= IS_NUMBER_NEG;
1119       numtype |= IS_NUMBER_NOT_INT;
1120     }
1121   }
1122   while (s < send && isSPACE(*s))
1123     s++;
1124   if (s >= send)
1125     return numtype;
1126   if (memEQs(pv, len, "0 but true")) {
1127     if (valuep)
1128       *valuep = 0;
1129     return IS_NUMBER_IN_UV;
1130   }
1131   /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
1132   if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
1133       /* Really detect inf/nan. Start at d, not s, since the above
1134        * code might have already consumed the "1." or "1". */
1135       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
1136       if ((infnan & IS_NUMBER_INFINITY)) {
1137           return (numtype | infnan); /* Keep sign for infinity. */
1138       }
1139       else if ((infnan & IS_NUMBER_NAN)) {
1140           return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1141       }
1142   }
1143   else if (flags & PERL_SCAN_TRAILING) {
1144     return numtype | IS_NUMBER_TRAILING;
1145   }
1146
1147   return 0;
1148 }
1149
1150 /*
1151 =for apidoc grok_atoUV
1152
1153 parse a string, looking for a decimal unsigned integer.
1154
1155 On entry, C<pv> points to the beginning of the string;
1156 C<valptr> points to a UV that will receive the converted value, if found;
1157 C<endptr> is either NULL or points to a variable that points to one byte
1158 beyond the point in C<pv> that this routine should examine.
1159 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
1160
1161 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1162 no leading zeros).  Otherwise it returns TRUE, and sets C<*valptr> to that
1163 value.
1164
1165 If you constrain the portion of C<pv> that is looked at by this function (by
1166 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1167 valid value, it will return TRUE, setting C<*endptr> to the byte following the
1168 final digit of the value.  But if there is no constraint at what's looked at,
1169 all of C<pv> must be valid in order for TRUE to be returned.
1170
1171 The only characters this accepts are the decimal digits '0'..'9'.
1172
1173 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1174 leading whitespace, nor negative inputs.  If such features are required, the
1175 calling code needs to explicitly implement those.
1176
1177 Note that this function returns FALSE for inputs that would overflow a UV,
1178 or have leading zeros.  Thus a single C<0> is accepted, but not C<00> nor
1179 C<01>, C<002>, I<etc>.
1180
1181 Background: C<atoi> has severe problems with illegal inputs, it cannot be
1182 used for incremental parsing, and therefore should be avoided
1183 C<atoi> and C<strtol> are also affected by locale settings, which can also be
1184 seen as a bug (global state controlled by user environment).
1185
1186 =cut
1187
1188 */
1189
1190 bool
1191 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
1192 {
1193     const char* s = pv;
1194     const char** eptr;
1195     const char* end2; /* Used in case endptr is NULL. */
1196     UV val = 0; /* The parsed value. */
1197
1198     PERL_ARGS_ASSERT_GROK_ATOUV;
1199
1200     if (endptr) {
1201         eptr = endptr;
1202     }
1203     else {
1204         end2 = s + strlen(s);
1205         eptr = &end2;
1206     }
1207
1208     if (   *eptr <= s
1209         || ! isDIGIT(*s))
1210     {
1211         return FALSE;
1212     }
1213
1214     /* Single-digit inputs are quite common. */
1215     val = *s++ - '0';
1216     if (s < *eptr && isDIGIT(*s)) {
1217         /* Fail on extra leading zeros. */
1218         if (val == 0)
1219             return FALSE;
1220         while (s < *eptr && isDIGIT(*s)) {
1221             /* This could be unrolled like in grok_number(), but
1222                 * the expected uses of this are not speed-needy, and
1223                 * unlikely to need full 64-bitness. */
1224             const U8 digit = *s++ - '0';
1225             if (val < uv_max_div_10 ||
1226                 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1227                 val = val * 10 + digit;
1228             } else {
1229                 return FALSE;
1230             }
1231         }
1232     }
1233
1234     if (endptr == NULL) {
1235         if (*s) {
1236             return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1237         }
1238     }
1239     else {
1240         *endptr = s;
1241     }
1242
1243     *valptr = val;
1244     return TRUE;
1245 }
1246
1247 #ifndef Perl_strtod
1248 STATIC NV
1249 S_mulexp10(NV value, I32 exponent)
1250 {
1251     NV result = 1.0;
1252     NV power = 10.0;
1253     bool negative = 0;
1254     I32 bit;
1255
1256     if (exponent == 0)
1257         return value;
1258     if (value == 0)
1259         return (NV)0;
1260
1261     /* On OpenVMS VAX we by default use the D_FLOAT double format,
1262      * and that format does not have *easy* capabilities [1] for
1263      * overflowing doubles 'silently' as IEEE fp does.  We also need
1264      * to support G_FLOAT on both VAX and Alpha, and though the exponent
1265      * range is much larger than D_FLOAT it still doesn't do silent
1266      * overflow.  Therefore we need to detect early whether we would
1267      * overflow (this is the behaviour of the native string-to-float
1268      * conversion routines, and therefore of native applications, too).
1269      *
1270      * [1] Trying to establish a condition handler to trap floating point
1271      *     exceptions is not a good idea. */
1272
1273     /* In UNICOS and in certain Cray models (such as T90) there is no
1274      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1275      * There is something you can do if you are willing to use some
1276      * inline assembler: the instruction is called DFI-- but that will
1277      * disable *all* floating point interrupts, a little bit too large
1278      * a hammer.  Therefore we need to catch potential overflows before
1279      * it's too late. */
1280
1281 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
1282     STMT_START {
1283         const NV exp_v = log10(value);
1284         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1285             return NV_MAX;
1286         if (exponent < 0) {
1287             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1288                 return 0.0;
1289             while (-exponent >= NV_MAX_10_EXP) {
1290                 /* combination does not overflow, but 10^(-exponent) does */
1291                 value /= 10;
1292                 ++exponent;
1293             }
1294         }
1295     } STMT_END;
1296 #endif
1297
1298     if (exponent < 0) {
1299         negative = 1;
1300         exponent = -exponent;
1301 #ifdef NV_MAX_10_EXP
1302         /* for something like 1234 x 10^-309, the action of calculating
1303          * the intermediate value 10^309 then returning 1234 / (10^309)
1304          * will fail, since 10^309 becomes infinity. In this case try to
1305          * refactor it as 123 / (10^308) etc.
1306          */
1307         while (value && exponent > NV_MAX_10_EXP) {
1308             exponent--;
1309             value /= 10;
1310         }
1311         if (value == 0.0)
1312             return value;
1313 #endif
1314     }
1315 #if defined(__osf__)
1316     /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1317      * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1318      * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1319      * but that breaks another set of infnan.t tests. */
1320 #  define FP_OVERFLOWS_TO_ZERO
1321 #endif
1322     for (bit = 1; exponent; bit <<= 1) {
1323         if (exponent & bit) {
1324             exponent ^= bit;
1325             result *= power;
1326 #ifdef FP_OVERFLOWS_TO_ZERO
1327             if (result == 0)
1328 # ifdef NV_INF
1329                 return value < 0 ? -NV_INF : NV_INF;
1330 # else
1331                 return value < 0 ? -FLT_MAX : FLT_MAX;
1332 # endif
1333 #endif
1334             /* Floating point exceptions are supposed to be turned off,
1335              *  but if we're obviously done, don't risk another iteration.
1336              */
1337              if (exponent == 0) break;
1338         }
1339         power *= power;
1340     }
1341     return negative ? value / result : value * result;
1342 }
1343 #endif /* #ifndef Perl_strtod */
1344
1345 #ifdef Perl_strtod
1346 #  define ATOF(s, x) my_atof2(s, &x)
1347 #else
1348 #  define ATOF(s, x) Perl_atof2(s, x)
1349 #endif
1350
1351 NV
1352 Perl_my_atof(pTHX_ const char* s)
1353 {
1354     /* 's' must be NUL terminated */
1355
1356     NV x = 0.0;
1357
1358     PERL_ARGS_ASSERT_MY_ATOF;
1359
1360 #if ! defined(USE_LOCALE_NUMERIC)
1361
1362     ATOF(s, x);
1363
1364 #else
1365
1366     {
1367         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1368         STORE_LC_NUMERIC_SET_TO_NEEDED();
1369         if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1370             ATOF(s,x);
1371         }
1372         else {
1373
1374             /* Look through the string for the first thing that looks like a
1375              * decimal point: either the value in the current locale or the
1376              * standard fallback of '.'. The one which appears earliest in the
1377              * input string is the one that we should have atof look for. Note
1378              * that we have to determine this beforehand because on some
1379              * systems, Perl_atof2 is just a wrapper around the system's atof.
1380              * */
1381             const char * const standard_pos = strchr(s, '.');
1382             const char * const local_pos
1383                                   = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1384             const bool use_standard_radix
1385                     = standard_pos && (!local_pos || standard_pos < local_pos);
1386
1387             if (use_standard_radix) {
1388                 SET_NUMERIC_STANDARD();
1389                 LOCK_LC_NUMERIC_STANDARD();
1390             }
1391
1392             ATOF(s,x);
1393
1394             if (use_standard_radix) {
1395                 UNLOCK_LC_NUMERIC_STANDARD();
1396                 SET_NUMERIC_UNDERLYING();
1397             }
1398         }
1399         RESTORE_LC_NUMERIC();
1400     }
1401
1402 #endif
1403
1404     return x;
1405 }
1406
1407 #if defined(NV_INF) || defined(NV_NAN)
1408
1409 #ifdef USING_MSVC6
1410 #  pragma warning(push)
1411 #  pragma warning(disable:4756;disable:4056)
1412 #endif
1413 static char*
1414 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
1415 {
1416     const char *p0 = negative ? s - 1 : s;
1417     const char *p = p0;
1418     const int infnan = grok_infnan(&p, send);
1419     if (infnan && p != p0) {
1420         /* If we can generate inf/nan directly, let's do so. */
1421 #ifdef NV_INF
1422         if ((infnan & IS_NUMBER_INFINITY)) {
1423             *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
1424             return (char*)p;
1425         }
1426 #endif
1427 #ifdef NV_NAN
1428         if ((infnan & IS_NUMBER_NAN)) {
1429             *value = NV_NAN;
1430             return (char*)p;
1431         }
1432 #endif
1433 #ifdef Perl_strtod
1434         /* If still here, we didn't have either NV_INF or NV_NAN,
1435          * and can try falling back to native strtod/strtold.
1436          *
1437          * The native interface might not recognize all the possible
1438          * inf/nan strings Perl recognizes.  What we can try
1439          * is to try faking the input.  We will try inf/-inf/nan
1440          * as the most promising/portable input. */
1441         {
1442             const char* fake = "silence compiler warning";
1443             char* endp;
1444             NV nv;
1445 #ifdef NV_INF
1446             if ((infnan & IS_NUMBER_INFINITY)) {
1447                 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1448             }
1449 #endif
1450 #ifdef NV_NAN
1451             if ((infnan & IS_NUMBER_NAN)) {
1452                 fake = "nan";
1453             }
1454 #endif
1455             assert(strNE(fake, "silence compiler warning"));
1456             nv = S_strtod(aTHX_ fake, &endp);
1457             if (fake != endp) {
1458 #ifdef NV_INF
1459                 if ((infnan & IS_NUMBER_INFINITY)) {
1460 #  ifdef Perl_isinf
1461                     if (Perl_isinf(nv))
1462                         *value = nv;
1463 #  else
1464                     /* last resort, may generate SIGFPE */
1465                     *value = Perl_exp((NV)1e9);
1466                     if ((infnan & IS_NUMBER_NEG))
1467                         *value = -*value;
1468 #  endif
1469                     return (char*)p; /* p, not endp */
1470                 }
1471 #endif
1472 #ifdef NV_NAN
1473                 if ((infnan & IS_NUMBER_NAN)) {
1474 #  ifdef Perl_isnan
1475                     if (Perl_isnan(nv))
1476                         *value = nv;
1477 #  else
1478                     /* last resort, may generate SIGFPE */
1479                     *value = Perl_log((NV)-1.0);
1480 #  endif
1481                     return (char*)p; /* p, not endp */
1482 #endif
1483                 }
1484             }
1485         }
1486 #endif /* #ifdef Perl_strtod */
1487     }
1488     return NULL;
1489 }
1490 #ifdef USING_MSVC6
1491 #  pragma warning(pop)
1492 #endif
1493
1494 #endif /* if defined(NV_INF) || defined(NV_NAN) */
1495
1496 char*
1497 Perl_my_atof2(pTHX_ const char* orig, NV* value)
1498 {
1499     PERL_ARGS_ASSERT_MY_ATOF2;
1500     return my_atof3(orig, value, 0);
1501 }
1502
1503 char*
1504 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
1505 {
1506     const char* s = orig;
1507     NV result[3] = {0.0, 0.0, 0.0};
1508 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1509     const char* send = s + ((len != 0)
1510                            ? len
1511                            : strlen(orig)); /* one past the last */
1512     bool negative = 0;
1513 #endif
1514 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
1515     UV accumulator[2] = {0,0};  /* before/after dp */
1516     bool seen_digit = 0;
1517     I32 exp_adjust[2] = {0,0};
1518     I32 exp_acc[2] = {-1, -1};
1519     /* the current exponent adjust for the accumulators */
1520     I32 exponent = 0;
1521     I32 seen_dp  = 0;
1522     I32 digit = 0;
1523     I32 old_digit = 0;
1524     I32 sig_digits = 0; /* noof significant digits seen so far */
1525 #endif
1526
1527 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1528     PERL_ARGS_ASSERT_MY_ATOF3;
1529
1530     /* leading whitespace */
1531     while (s < send && isSPACE(*s))
1532         ++s;
1533
1534     /* sign */
1535     switch (*s) {
1536         case '-':
1537             negative = 1;
1538             /* FALLTHROUGH */
1539         case '+':
1540             ++s;
1541     }
1542 #endif
1543
1544 #ifdef Perl_strtod
1545     {
1546         char* endp;
1547         char* copy = NULL;
1548
1549         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1550             return endp;
1551
1552         /* If the length is passed in, the input string isn't NUL-terminated,
1553          * and in it turns out the function below assumes it is; therefore we
1554          * create a copy and NUL-terminate that */
1555         if (len) {
1556             Newx(copy, len + 1, char);
1557             Copy(orig, copy, len, char);
1558             copy[len] = '\0';
1559             s = copy + (s - orig);
1560         }
1561
1562         result[2] = S_strtod(aTHX_ s, &endp);
1563
1564         /* If we created a copy, 'endp' is in terms of that.  Convert back to
1565          * the original */
1566         if (copy) {
1567             s = (s - copy) + (char *) orig;
1568             endp = (endp - copy) + (char *) orig;
1569             Safefree(copy);
1570         }
1571
1572         if (s != endp) {
1573             *value = negative ? -result[2] : result[2];
1574             return endp;
1575         }
1576         return NULL;
1577     }
1578 #elif defined(USE_PERL_ATOF)
1579
1580 /* There is no point in processing more significant digits
1581  * than the NV can hold. Note that NV_DIG is a lower-bound value,
1582  * while we need an upper-bound value. We add 2 to account for this;
1583  * since it will have been conservative on both the first and last digit.
1584  * For example a 32-bit mantissa with an exponent of 4 would have
1585  * exact values in the set
1586  *               4
1587  *               8
1588  *              ..
1589  *     17179869172
1590  *     17179869176
1591  *     17179869180
1592  *
1593  * where for the purposes of calculating NV_DIG we would have to discount
1594  * both the first and last digit, since neither can hold all values from
1595  * 0..9; but for calculating the value we must examine those two digits.
1596  */
1597 #ifdef MAX_SIG_DIG_PLUS
1598     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1599        possible digits in a NV, especially if NVs are not IEEE compliant
1600        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1601 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1602 #else
1603 # define MAX_SIG_DIGITS (NV_DIG+2)
1604 #endif
1605
1606 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1607 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
1608
1609 #if defined(NV_INF) || defined(NV_NAN)
1610     {
1611         char* endp;
1612         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1613             return endp;
1614     }
1615 #endif
1616
1617     /* we accumulate digits into an integer; when this becomes too
1618      * large, we add the total to NV and start again */
1619
1620     while (s < send) {
1621         if (isDIGIT(*s)) {
1622             seen_digit = 1;
1623             old_digit = digit;
1624             digit = *s++ - '0';
1625             if (seen_dp)
1626                 exp_adjust[1]++;
1627
1628             /* don't start counting until we see the first significant
1629              * digit, eg the 5 in 0.00005... */
1630             if (!sig_digits && digit == 0)
1631                 continue;
1632
1633             if (++sig_digits > MAX_SIG_DIGITS) {
1634                 /* limits of precision reached */
1635                 if (digit > 5) {
1636                     ++accumulator[seen_dp];
1637                 } else if (digit == 5) {
1638                     if (old_digit % 2) { /* round to even - Allen */
1639                         ++accumulator[seen_dp];
1640                     }
1641                 }
1642                 if (seen_dp) {
1643                     exp_adjust[1]--;
1644                 } else {
1645                     exp_adjust[0]++;
1646                 }
1647                 /* skip remaining digits */
1648                 while (s < send && isDIGIT(*s)) {
1649                     ++s;
1650                     if (! seen_dp) {
1651                         exp_adjust[0]++;
1652                     }
1653                 }
1654                 /* warn of loss of precision? */
1655             }
1656             else {
1657                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1658                     /* add accumulator to result and start again */
1659                     result[seen_dp] = S_mulexp10(result[seen_dp],
1660                                                  exp_acc[seen_dp])
1661                         + (NV)accumulator[seen_dp];
1662                     accumulator[seen_dp] = 0;
1663                     exp_acc[seen_dp] = 0;
1664                 }
1665                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1666                 ++exp_acc[seen_dp];
1667             }
1668         }
1669         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1670             seen_dp = 1;
1671             if (sig_digits > MAX_SIG_DIGITS) {
1672                 while (s < send && isDIGIT(*s)) {
1673                     ++s;
1674                 }
1675                 break;
1676             }
1677         }
1678         else {
1679             break;
1680         }
1681     }
1682
1683     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1684     if (seen_dp) {
1685         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1686     }
1687
1688     if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
1689         bool expnegative = 0;
1690
1691         ++s;
1692         switch (*s) {
1693             case '-':
1694                 expnegative = 1;
1695                 /* FALLTHROUGH */
1696             case '+':
1697                 ++s;
1698         }
1699         while (s < send && isDIGIT(*s))
1700             exponent = exponent * 10 + (*s++ - '0');
1701         if (expnegative)
1702             exponent = -exponent;
1703     }
1704
1705     /* now apply the exponent */
1706
1707     if (seen_dp) {
1708         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1709                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1710     } else {
1711         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1712     }
1713
1714     /* now apply the sign */
1715     if (negative)
1716         result[2] = -result[2];
1717 #endif /* USE_PERL_ATOF */
1718     *value = result[2];
1719     return (char *)s;
1720 }
1721
1722 /*
1723 =for apidoc isinfnan
1724
1725 C<Perl_isinfnan()> is utility function that returns true if the NV
1726 argument is either an infinity or a C<NaN>, false otherwise.  To test
1727 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
1728
1729 This is also the logical inverse of Perl_isfinite().
1730
1731 =cut
1732 */
1733 bool
1734 Perl_isinfnan(NV nv)
1735 {
1736   PERL_UNUSED_ARG(nv);
1737 #ifdef Perl_isinf
1738     if (Perl_isinf(nv))
1739         return TRUE;
1740 #endif
1741 #ifdef Perl_isnan
1742     if (Perl_isnan(nv))
1743         return TRUE;
1744 #endif
1745     return FALSE;
1746 }
1747
1748 /*
1749 =for apidoc
1750
1751 Checks whether the argument would be either an infinity or C<NaN> when used
1752 as a number, but is careful not to trigger non-numeric or uninitialized
1753 warnings.  it assumes the caller has done C<SvGETMAGIC(sv)> already.
1754
1755 =cut
1756 */
1757
1758 bool
1759 Perl_isinfnansv(pTHX_ SV *sv)
1760 {
1761     PERL_ARGS_ASSERT_ISINFNANSV;
1762     if (!SvOK(sv))
1763         return FALSE;
1764     if (SvNOKp(sv))
1765         return Perl_isinfnan(SvNVX(sv));
1766     if (SvIOKp(sv))
1767         return FALSE;
1768     {
1769         STRLEN len;
1770         const char *s = SvPV_nomg_const(sv, len);
1771         return cBOOL(grok_infnan(&s, s+len));
1772     }
1773 }
1774
1775 #ifndef HAS_MODFL
1776 /* C99 has truncl, pre-C99 Solaris had aintl.  We can use either with
1777  * copysignl to emulate modfl, which is in some platforms missing or
1778  * broken. */
1779 #  if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1780 long double
1781 Perl_my_modfl(long double x, long double *ip)
1782 {
1783     *ip = truncl(x);
1784     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1785 }
1786 #  elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1787 long double
1788 Perl_my_modfl(long double x, long double *ip)
1789 {
1790     *ip = aintl(x);
1791     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1792 }
1793 #  endif
1794 #endif
1795
1796 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
1797 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1798 long double
1799 Perl_my_frexpl(long double x, int *e) {
1800     *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1801     return (scalbnl(x, -*e));
1802 }
1803 #endif
1804
1805 /*
1806 =for apidoc Perl_signbit
1807
1808 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1809 it is not.
1810
1811 If F<Configure> detects this system has a C<signbit()> that will work with
1812 our NVs, then we just use it via the C<#define> in F<perl.h>.  Otherwise,
1813 fall back on this implementation.  The main use of this function
1814 is catching C<-0.0>.
1815
1816 C<Configure> notes:  This function is called C<'Perl_signbit'> instead of a
1817 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
1818 function or macro that doesn't happen to work with our particular choice
1819 of NVs.  We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
1820 the standard system headers to be happy.  Also, this is a no-context
1821 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1822 F<perl.h> as a simple macro call to the system's C<signbit()>.
1823 Users should just always call C<Perl_signbit()>.
1824
1825 =cut
1826 */
1827 #if !defined(HAS_SIGNBIT)
1828 int
1829 Perl_signbit(NV x) {
1830 #  ifdef Perl_fp_class_nzero
1831     return Perl_fp_class_nzero(x);
1832     /* Try finding the high byte, and assume it's highest bit
1833      * is the sign.  This assumption is probably wrong somewhere. */
1834 #  elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1835     return (((unsigned char *)&x)[9] & 0x80);
1836 #  elif defined(NV_LITTLE_ENDIAN)
1837     /* Note that NVSIZE is sizeof(NV), which would make the below be
1838      * wrong if the end bytes are unused, which happens with the x86
1839      * 80-bit long doubles, which is why take care of that above. */
1840     return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1841 #  elif defined(NV_BIG_ENDIAN)
1842     return (((unsigned char *)&x)[0] & 0x80);
1843 #  else
1844     /* This last resort fallback is wrong for the negative zero. */
1845     return (x < 0.0) ? 1 : 0;
1846 #  endif
1847 }
1848 #endif
1849
1850 /*
1851  * ex: set ts=8 sts=4 sw=4 et:
1852  */