numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen,
  13  *  unless wizards count differently to other people."  --Beorn
  14  *
  15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
  16  */
  17
  18 /*
  19 =head1 Numeric functions
  20
  21 =cut
  22
  23 This file contains all the stuff needed by perl for manipulating numeric
  24 values, including such things as replacements for the OS's atof() function
  25
  26 */
  27
  28 #include "EXTERN.h"
  29 #define PERL_IN_NUMERIC_C
  30 #include "perl.h"
  31
  32 #ifdef Perl_strtod
  33
  34 PERL_STATIC_INLINE NV
  35 S_strtod(pTHX_ const char * const s, char ** e)
  36 {
  37     DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  38     NV result;
  39
  40     STORE_LC_NUMERIC_SET_TO_NEEDED();
  41
  42 #  ifdef USE_QUADMATH
  43
  44     result = strtoflt128(s, e);
  45
  46 #  elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE)    \
  47                              && defined(USE_LONG_DOUBLE)
  48 #    if defined(__MINGW64_VERSION_MAJOR)
  49       /***********************************************
  50        We are unable to use strtold because of
  51         https://sourceforge.net/p/mingw-w64/bugs/711/
  52         &
  53         https://sourceforge.net/p/mingw-w64/bugs/725/
  54
  55        but __mingw_strtold is fine.
  56       ***********************************************/
  57
  58     result = __mingw_strtold(s, e);
  59
  60 #    else
  61
  62     result = strtold(s, e);
  63
  64 #    endif
  65 #  elif defined(HAS_STRTOD)
  66
  67     result = strtod(s, e);
  68
  69 #  else
  70 #    error No strtod() equivalent found
  71 #  endif
  72
  73     RESTORE_LC_NUMERIC();
  74
  75     return result;
  76 }
  77
  78 #endif  /* #ifdef Perl_strtod */
  79
  80 /*
  81
  82 =for apidoc my_strtod
  83
  84 This function is equivalent to the libc strtod() function, and is available
  85 even on platforms that lack plain strtod().  Its return value is the best
  86 available precision depending on platform capabilities and F<Configure>
  87 options.
  88
  89 It properly handles the locale radix character, meaning it expects a dot except
  90 when called from within the scope of S<C<use locale>>, in which case the radix
  91 character should be that specified by the current locale.
  92
  93 The synonym Strtod() may be used instead.
  94
  95 =cut
  96
  97 */
  98
  99 NV
 100 Perl_my_strtod(const char * const s, char **e)
 101 {
 102     dTHX;
 103
 104     PERL_ARGS_ASSERT_MY_STRTOD;
 105
 106 #ifdef Perl_strtod
 107
 108     return S_strtod(aTHX_ s, e);
 109
 110 #else
 111
 112     {
 113         NV result;
 114         char ** end_ptr = NULL;
 115
 116         *end_ptr = my_atof2(s, &result);
 117         if (e) {
 118             *e = *end_ptr;
 119         }
 120
 121         if (! *end_ptr) {
 122             result = 0.0;
 123         }
 124
 125         return result;
 126     }
 127
 128 #endif
 129
 130 }
 131
 132
 133 U32
 134 Perl_cast_ulong(NV f)
 135 {
 136   if (f < 0.0)
 137     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
 138   if (f < U32_MAX_P1) {
 139 #if CASTFLAGS & 2
 140     if (f < U32_MAX_P1_HALF)
 141       return (U32) f;
 142     f -= U32_MAX_P1_HALF;
 143     return ((U32) f) | (1 + (U32_MAX >> 1));
 144 #else
 145     return (U32) f;
 146 #endif
 147   }
 148   return f > 0 ? U32_MAX : 0 /* NaN */;
 149 }
 150
 151 I32
 152 Perl_cast_i32(NV f)
 153 {
 154   if (f < I32_MAX_P1)
 155     return f < I32_MIN ? I32_MIN : (I32) f;
 156   if (f < U32_MAX_P1) {
 157 #if CASTFLAGS & 2
 158     if (f < U32_MAX_P1_HALF)
 159       return (I32)(U32) f;
 160     f -= U32_MAX_P1_HALF;
 161     return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
 162 #else
 163     return (I32)(U32) f;
 164 #endif
 165   }
 166   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
 167 }
 168
 169 IV
 170 Perl_cast_iv(NV f)
 171 {
 172   if (f < IV_MAX_P1)
 173     return f < IV_MIN ? IV_MIN : (IV) f;
 174   if (f < UV_MAX_P1) {
 175 #if CASTFLAGS & 2
 176     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
 177     if (f < UV_MAX_P1_HALF)
 178       return (IV)(UV) f;
 179     f -= UV_MAX_P1_HALF;
 180     return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
 181 #else
 182     return (IV)(UV) f;
 183 #endif
 184   }
 185   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
 186 }
 187
 188 UV
 189 Perl_cast_uv(NV f)
 190 {
 191   if (f < 0.0)
 192     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
 193   if (f < UV_MAX_P1) {
 194 #if CASTFLAGS & 2
 195     if (f < UV_MAX_P1_HALF)
 196       return (UV) f;
 197     f -= UV_MAX_P1_HALF;
 198     return ((UV) f) | (1 + (UV_MAX >> 1));
 199 #else
 200     return (UV) f;
 201 #endif
 202   }
 203   return f > 0 ? UV_MAX : 0 /* NaN */;
 204 }
 205
 206 /*
 207 =for apidoc grok_bin
 208
 209 converts a string representing a binary number to numeric form.
 210
 211 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 212 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 213 The scan stops at the end of the string, or the first invalid character.
 214 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 215 invalid character will also trigger a warning.
 216 On return C<*len> is set to the length of the scanned string,
 217 and C<*flags> gives output flags.
 218
 219 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 220 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_bin>
 221 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 222 and writes the value to C<*result> (or the value is discarded if C<result>
 223 is NULL).
 224
 225 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
 226 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 227 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
 228 number may use C<"_"> characters to separate digits.
 229
 230 =cut
 231
 232 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 233 which suppresses any message for non-portable numbers that are still valid
 234 on this platform.
 235  */
 236
 237 UV
 238 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 239 {
 240     const char *s = start;
 241     STRLEN len = *len_p;
 242     UV value = 0;
 243     NV value_nv = 0;
 244
 245     const UV max_div_2 = UV_MAX / 2;
 246     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 247     bool overflowed = FALSE;
 248     char bit;
 249
 250     PERL_ARGS_ASSERT_GROK_BIN;
 251
 252     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 253         /* strip off leading b or 0b.
 254            for compatibility silently suffer "b" and "0b" as valid binary
 255            numbers. */
 256         if (len >= 1) {
 257             if (isALPHA_FOLD_EQ(s[0], 'b')) {
 258                 s++;
 259                 len--;
 260             }
 261             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) {
 262                 s+=2;
 263                 len-=2;
 264             }
 265         }
 266     }
 267
 268     for (; len-- && (bit = *s); s++) {
 269         if (bit == '0' || bit == '1') {
 270             /* Write it in this wonky order with a goto to attempt to get the
 271                compiler to make the common case integer-only loop pretty tight.
 272                With gcc seems to be much straighter code than old scan_bin.  */
 273           redo:
 274             if (!overflowed) {
 275                 if (value <= max_div_2) {
 276                     value = (value << 1) | (bit - '0');
 277                     continue;
 278                 }
 279                 /* Bah. We're just overflowed.  */
 280                 /* diag_listed_as: Integer overflow in %s number */
 281                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 282                                  "Integer overflow in binary number");
 283                 overflowed = TRUE;
 284                 value_nv = (NV) value;
 285             }
 286             value_nv *= 2.0;
 287             /* If an NV has not enough bits in its mantissa to
 288              * represent a UV this summing of small low-order numbers
 289              * is a waste of time (because the NV cannot preserve
 290              * the low-order bits anyway): we could just remember when
 291              * did we overflow and in the end just multiply value_nv by the
 292              * right amount. */
 293             value_nv += (NV)(bit - '0');
 294             continue;
 295         }
 296         if (bit == '_' && len && allow_underscores && (bit = s[1])
 297             && (bit == '0' || bit == '1'))
 298             {
 299                 --len;
 300                 ++s;
 301                 goto redo;
 302             }
 303         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 304             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 305                            "Illegal binary digit '%c' ignored", *s);
 306         break;
 307     }
 308
 309     if (   ( overflowed && value_nv > 4294967295.0)
 310 #if UVSIZE > 4
 311         || (!overflowed && value > 0xffffffff
 312             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 313 #endif
 314         ) {
 315         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 316                        "Binary number > 0b11111111111111111111111111111111 non-portable");
 317     }
 318     *len_p = s - start;
 319     if (!overflowed) {
 320         *flags = 0;
 321         return value;
 322     }
 323     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 324     if (result)
 325         *result = value_nv;
 326     return UV_MAX;
 327 }
 328
 329 /*
 330 =for apidoc grok_hex
 331
 332 converts a string representing a hex number to numeric form.
 333
 334 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
 335 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 336 The scan stops at the end of the string, or the first invalid character.
 337 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 338 invalid character will also trigger a warning.
 339 On return C<*len> is set to the length of the scanned string,
 340 and C<*flags> gives output flags.
 341
 342 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 343 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_hex>
 344 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 345 and writes the value to C<*result> (or the value is discarded if C<result>
 346 is C<NULL>).
 347
 348 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
 349 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 350 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
 351 number may use C<"_"> characters to separate digits.
 352
 353 =cut
 354
 355 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 356 which suppresses any message for non-portable numbers, but which are valid
 357 on this platform.
 358  */
 359
 360 UV
 361 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 362 {
 363     const char *s = start;
 364     STRLEN len = *len_p;
 365     UV value = 0;
 366     NV value_nv = 0;
 367     const UV max_div_16 = UV_MAX / 16;
 368     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 369     bool overflowed = FALSE;
 370
 371     PERL_ARGS_ASSERT_GROK_HEX;
 372
 373     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 374         /* strip off leading x or 0x.
 375            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 376         */
 377         if (len >= 1) {
 378             if (isALPHA_FOLD_EQ(s[0], 'x')) {
 379                 s++;
 380                 len--;
 381             }
 382             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) {
 383                 s+=2;
 384                 len-=2;
 385             }
 386         }
 387     }
 388
 389     for (; len-- && *s; s++) {
 390         if (isXDIGIT(*s)) {
 391             /* Write it in this wonky order with a goto to attempt to get the
 392                compiler to make the common case integer-only loop pretty tight.
 393                With gcc seems to be much straighter code than old scan_hex.  */
 394           redo:
 395             if (!overflowed) {
 396                 if (value <= max_div_16) {
 397                     value = (value << 4) | XDIGIT_VALUE(*s);
 398                     continue;
 399                 }
 400                 /* Bah. We're just overflowed.  */
 401                 /* diag_listed_as: Integer overflow in %s number */
 402                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 403                                  "Integer overflow in hexadecimal number");
 404                 overflowed = TRUE;
 405                 value_nv = (NV) value;
 406             }
 407             value_nv *= 16.0;
 408             /* If an NV has not enough bits in its mantissa to
 409              * represent a UV this summing of small low-order numbers
 410              * is a waste of time (because the NV cannot preserve
 411              * the low-order bits anyway): we could just remember when
 412              * did we overflow and in the end just multiply value_nv by the
 413              * right amount of 16-tuples. */
 414             value_nv += (NV) XDIGIT_VALUE(*s);
 415             continue;
 416         }
 417         if (*s == '_' && len && allow_underscores && s[1]
 418                 && isXDIGIT(s[1]))
 419             {
 420                 --len;
 421                 ++s;
 422                 goto redo;
 423             }
 424         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 425             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 426                         "Illegal hexadecimal digit '%c' ignored", *s);
 427         break;
 428     }
 429
 430     if (   ( overflowed && value_nv > 4294967295.0)
 431 #if UVSIZE > 4
 432         || (!overflowed && value > 0xffffffff
 433             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 434 #endif
 435         ) {
 436         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 437                        "Hexadecimal number > 0xffffffff non-portable");
 438     }
 439     *len_p = s - start;
 440     if (!overflowed) {
 441         *flags = 0;
 442         return value;
 443     }
 444     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 445     if (result)
 446         *result = value_nv;
 447     return UV_MAX;
 448 }
 449
 450 /*
 451 =for apidoc grok_oct
 452
 453 converts a string representing an octal number to numeric form.
 454
 455 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 456 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 457 The scan stops at the end of the string, or the first invalid character.
 458 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 459 8 or 9 will also trigger a warning.
 460 On return C<*len> is set to the length of the scanned string,
 461 and C<*flags> gives output flags.
 462
 463 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 464 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_oct>
 465 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 466 and writes the value to C<*result> (or the value is discarded if C<result>
 467 is C<NULL>).
 468
 469 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
 470 number may use C<"_"> characters to separate digits.
 471
 472 =cut
 473
 474 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
 475 which suppresses any message for non-portable numbers, but which are valid
 476 on this platform.
 477  */
 478
 479 UV
 480 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 481 {
 482     const char *s = start;
 483     STRLEN len = *len_p;
 484     UV value = 0;
 485     NV value_nv = 0;
 486     const UV max_div_8 = UV_MAX / 8;
 487     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 488     bool overflowed = FALSE;
 489
 490     PERL_ARGS_ASSERT_GROK_OCT;
 491
 492     for (; len-- && *s; s++) {
 493         if (isOCTAL(*s)) {
 494             /* Write it in this wonky order with a goto to attempt to get the
 495                compiler to make the common case integer-only loop pretty tight.
 496             */
 497           redo:
 498             if (!overflowed) {
 499                 if (value <= max_div_8) {
 500                     value = (value << 3) | OCTAL_VALUE(*s);
 501                     continue;
 502                 }
 503                 /* Bah. We're just overflowed.  */
 504                 /* diag_listed_as: Integer overflow in %s number */
 505                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 506                                "Integer overflow in octal number");
 507                 overflowed = TRUE;
 508                 value_nv = (NV) value;
 509             }
 510             value_nv *= 8.0;
 511             /* If an NV has not enough bits in its mantissa to
 512              * represent a UV this summing of small low-order numbers
 513              * is a waste of time (because the NV cannot preserve
 514              * the low-order bits anyway): we could just remember when
 515              * did we overflow and in the end just multiply value_nv by the
 516              * right amount of 8-tuples. */
 517             value_nv += (NV) OCTAL_VALUE(*s);
 518             continue;
 519         }
 520         if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
 521             --len;
 522             ++s;
 523             goto redo;
 524         }
 525         /* Allow \octal to work the DWIM way (that is, stop scanning
 526          * as soon as non-octal characters are seen, complain only if
 527          * someone seems to want to use the digits eight and nine.  Since we
 528          * know it is not octal, then if isDIGIT, must be an 8 or 9). */
 529         if (isDIGIT(*s)) {
 530             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 531                 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 532                                "Illegal octal digit '%c' ignored", *s);
 533         }
 534         break;
 535     }
 536
 537     if (   ( overflowed && value_nv > 4294967295.0)
 538 #if UVSIZE > 4
 539         || (!overflowed && value > 0xffffffff
 540             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 541 #endif
 542         ) {
 543         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 544                        "Octal number > 037777777777 non-portable");
 545     }
 546     *len_p = s - start;
 547     if (!overflowed) {
 548         *flags = 0;
 549         return value;
 550     }
 551     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 552     if (result)
 553         *result = value_nv;
 554     return UV_MAX;
 555 }
 556
 557 /*
 558 =for apidoc scan_bin
 559
 560 For backwards compatibility.  Use C<grok_bin> instead.
 561
 562 =for apidoc scan_hex
 563
 564 For backwards compatibility.  Use C<grok_hex> instead.
 565
 566 =for apidoc scan_oct
 567
 568 For backwards compatibility.  Use C<grok_oct> instead.
 569
 570 =cut
 571  */
 572
 573 NV
 574 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 575 {
 576     NV rnv;
 577     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 578     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 579
 580     PERL_ARGS_ASSERT_SCAN_BIN;
 581
 582     *retlen = len;
 583     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 584 }
 585
 586 NV
 587 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 588 {
 589     NV rnv;
 590     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 591     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 592
 593     PERL_ARGS_ASSERT_SCAN_OCT;
 594
 595     *retlen = len;
 596     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 597 }
 598
 599 NV
 600 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 601 {
 602     NV rnv;
 603     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 604     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 605
 606     PERL_ARGS_ASSERT_SCAN_HEX;
 607
 608     *retlen = len;
 609     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 610 }
 611
 612 /*
 613 =for apidoc grok_numeric_radix
 614
 615 Scan and skip for a numeric decimal separator (radix).
 616
 617 =cut
 618  */
 619 bool
 620 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 621 {
 622     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 623
 624 #ifdef USE_LOCALE_NUMERIC
 625
 626     if (IN_LC(LC_NUMERIC)) {
 627         STRLEN len;
 628         char * radix;
 629         bool matches_radix = FALSE;
 630         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
 631
 632         STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
 633
 634         radix = SvPV(PL_numeric_radix_sv, len);
 635         radix = savepvn(radix, len);
 636
 637         RESTORE_LC_NUMERIC();
 638
 639         if (*sp + len <= send) {
 640             matches_radix = memEQ(*sp, radix, len);
 641         }
 642
 643         Safefree(radix);
 644
 645         if (matches_radix) {
 646             *sp += len;
 647             return TRUE;
 648         }
 649     }
 650
 651 #endif
 652
 653     /* always try "." if numeric radix didn't match because
 654      * we may have data from different locales mixed */
 655     if (*sp < send && **sp == '.') {
 656         ++*sp;
 657         return TRUE;
 658     }
 659
 660     return FALSE;
 661 }
 662
 663 /*
 664 =for apidoc grok_infnan
 665
 666 Helper for C<grok_number()>, accepts various ways of spelling "infinity"
 667 or "not a number", and returns one of the following flag combinations:
 668
 669   IS_NUMBER_INFINITY
 670   IS_NUMBER_NAN
 671   IS_NUMBER_INFINITY | IS_NUMBER_NEG
 672   IS_NUMBER_NAN | IS_NUMBER_NEG
 673   0
 674
 675 possibly |-ed with C<IS_NUMBER_TRAILING>.
 676
 677 If an infinity or a not-a-number is recognized, C<*sp> will point to
 678 one byte past the end of the recognized string.  If the recognition fails,
 679 zero is returned, and C<*sp> will not move.
 680
 681 =cut
 682 */
 683
 684 int
 685 Perl_grok_infnan(pTHX_ const char** sp, const char* send)
 686 {
 687     const char* s = *sp;
 688     int flags = 0;
 689 #if defined(NV_INF) || defined(NV_NAN)
 690     bool odh = FALSE; /* one-dot-hash: 1.#INF */
 691
 692     PERL_ARGS_ASSERT_GROK_INFNAN;
 693
 694     if (*s == '+') {
 695         s++; if (s == send) return 0;
 696     }
 697     else if (*s == '-') {
 698         flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
 699         s++; if (s == send) return 0;
 700     }
 701
 702     if (*s == '1') {
 703         /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
 704          * Let's keep the dot optional. */
 705         s++; if (s == send) return 0;
 706         if (*s == '.') {
 707             s++; if (s == send) return 0;
 708         }
 709         if (*s == '#') {
 710             s++; if (s == send) return 0;
 711         } else
 712             return 0;
 713         odh = TRUE;
 714     }
 715
 716     if (isALPHA_FOLD_EQ(*s, 'I')) {
 717         /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
 718
 719         s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 720         s++; if (s == send) return 0;
 721         if (isALPHA_FOLD_EQ(*s, 'F')) {
 722             s++;
 723             if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
 724                 int fail =
 725                     flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
 726                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
 727                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
 728                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
 729                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
 730                 s++;
 731             } else if (odh) {
 732                 while (*s == '0') { /* 1.#INF00 */
 733                     s++;
 734                 }
 735             }
 736             while (s < send && isSPACE(*s))
 737                 s++;
 738             if (s < send && *s) {
 739                 flags |= IS_NUMBER_TRAILING;
 740             }
 741             flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 742         }
 743         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
 744             s++;
 745             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 746             while (*s == '0') { /* 1.#IND00 */
 747                 s++;
 748             }
 749             if (*s) {
 750                 flags |= IS_NUMBER_TRAILING;
 751             }
 752         } else
 753             return 0;
 754     }
 755     else {
 756         /* Maybe NAN of some sort */
 757
 758         if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
 759             /* snan, qNaN */
 760             /* XXX do something with the snan/qnan difference */
 761             s++; if (s == send) return 0;
 762         }
 763
 764         if (isALPHA_FOLD_EQ(*s, 'N')) {
 765             s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
 766             s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 767             s++;
 768
 769             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 770
 771             /* NaN can be followed by various stuff (NaNQ, NaNS), but
 772              * there are also multiple different NaN values, and some
 773              * implementations output the "payload" values,
 774              * e.g. NaN123, NAN(abc), while some legacy implementations
 775              * have weird stuff like NaN%. */
 776             if (isALPHA_FOLD_EQ(*s, 'q') ||
 777                 isALPHA_FOLD_EQ(*s, 's')) {
 778                 /* "nanq" or "nans" are ok, though generating
 779                  * these portably is tricky. */
 780                 s++;
 781             }
 782             if (*s == '(') {
 783                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
 784                 const char *t;
 785                 s++;
 786                 if (s == send) {
 787                     return flags | IS_NUMBER_TRAILING;
 788                 }
 789                 t = s + 1;
 790                 while (t < send && *t && *t != ')') {
 791                     t++;
 792                 }
 793                 if (t == send) {
 794                     return flags | IS_NUMBER_TRAILING;
 795                 }
 796                 if (*t == ')') {
 797                     int nantype;
 798                     UV nanval;
 799                     if (s[0] == '0' && s + 2 < t &&
 800                         isALPHA_FOLD_EQ(s[1], 'x') &&
 801                         isXDIGIT(s[2])) {
 802                         STRLEN len = t - s;
 803                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 804                         nanval = grok_hex(s, &len, &flags, NULL);
 805                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 806                             nantype = 0;
 807                         } else {
 808                             nantype = IS_NUMBER_IN_UV;
 809                         }
 810                         s += len;
 811                     } else if (s[0] == '0' && s + 2 < t &&
 812                                isALPHA_FOLD_EQ(s[1], 'b') &&
 813                                (s[2] == '0' || s[2] == '1')) {
 814                         STRLEN len = t - s;
 815                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 816                         nanval = grok_bin(s, &len, &flags, NULL);
 817                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 818                             nantype = 0;
 819                         } else {
 820                             nantype = IS_NUMBER_IN_UV;
 821                         }
 822                         s += len;
 823                     } else {
 824                         const char *u;
 825                         nantype =
 826                             grok_number_flags(s, t - s, &nanval,
 827                                               PERL_SCAN_TRAILING |
 828                                               PERL_SCAN_ALLOW_UNDERSCORES);
 829                         /* Unfortunately grok_number_flags() doesn't
 830                          * tell how far we got and the ')' will always
 831                          * be "trailing", so we need to double-check
 832                          * whether we had something dubious. */
 833                         for (u = s; u < t; u++) {
 834                             if (!isDIGIT(*u)) {
 835                                 flags |= IS_NUMBER_TRAILING;
 836                                 break;
 837                             }
 838                         }
 839                         s = u;
 840                     }
 841
 842                     /* XXX Doesn't do octal: nan("0123").
 843                      * Probably not a big loss. */
 844
 845                     if ((nantype & IS_NUMBER_NOT_INT) ||
 846                         !(nantype && IS_NUMBER_IN_UV)) {
 847                         /* XXX the nanval is currently unused, that is,
 848                          * not inserted as the NaN payload of the NV.
 849                          * But the above code already parses the C99
 850                          * nan(...)  format.  See below, and see also
 851                          * the nan() in POSIX.xs.
 852                          *
 853                          * Certain configuration combinations where
 854                          * NVSIZE is greater than UVSIZE mean that
 855                          * a single UV cannot contain all the possible
 856                          * NaN payload bits.  There would need to be
 857                          * some more generic syntax than "nan($uv)".
 858                          *
 859                          * Issues to keep in mind:
 860                          *
 861                          * (1) In most common cases there would
 862                          * not be an integral number of bytes that
 863                          * could be set, only a certain number of bits.
 864                          * For example for the common case of
 865                          * NVSIZE == UVSIZE == 8 there is room for 52
 866                          * bits in the payload, but the most significant
 867                          * bit is commonly reserved for the
 868                          * signaling/quiet bit, leaving 51 bits.
 869                          * Furthermore, the C99 nan() is supposed
 870                          * to generate quiet NaNs, so it is doubtful
 871                          * whether it should be able to generate
 872                          * signaling NaNs.  For the x86 80-bit doubles
 873                          * (if building a long double Perl) there would
 874                          * be 62 bits (s/q bit being the 63rd).
 875                          *
 876                          * (2) Endianness of the payload bits. If the
 877                          * payload is specified as an UV, the low-order
 878                          * bits of the UV are naturally little-endianed
 879                          * (rightmost) bits of the payload.  The endianness
 880                          * of UVs and NVs can be different. */
 881                         return 0;
 882                     }
 883                     if (s < t) {
 884                         flags |= IS_NUMBER_TRAILING;
 885                     }
 886                 } else {
 887                     /* Looked like nan(...), but no close paren. */
 888                     flags |= IS_NUMBER_TRAILING;
 889                 }
 890             } else {
 891                 while (s < send && isSPACE(*s))
 892                     s++;
 893                 if (s < send && *s) {
 894                     /* Note that we here implicitly accept (parse as
 895                      * "nan", but with warnings) also any other weird
 896                      * trailing stuff for "nan".  In the above we just
 897                      * check that if we got the C99-style "nan(...)",
 898                      * the "..."  looks sane.
 899                      * If in future we accept more ways of specifying
 900                      * the nan payload, the accepting would happen around
 901                      * here. */
 902                     flags |= IS_NUMBER_TRAILING;
 903                 }
 904             }
 905             s = send;
 906         }
 907         else
 908             return 0;
 909     }
 910
 911     while (s < send && isSPACE(*s))
 912         s++;
 913
 914 #else
 915     PERL_UNUSED_ARG(send);
 916 #endif /* #if defined(NV_INF) || defined(NV_NAN) */
 917     *sp = s;
 918     return flags;
 919 }
 920
 921 /*
 922 =for apidoc grok_number_flags
 923
 924 Recognise (or not) a number.  The type of the number is returned
 925 (0 if unrecognised), otherwise it is a bit-ORed combination of
 926 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
 927 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
 928
 929 If the value of the number can fit in a UV, it is returned in C<*valuep>.
 930 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
 931 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
 932 to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
 933 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
 934 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
 935
 936 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
 937 seen (in which case C<*valuep> gives the true value truncated to an integer), and
 938 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
 939 absolute value).  C<IS_NUMBER_IN_UV> is not set if e notation was used or the
 940 number is larger than a UV.
 941
 942 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
 943 non-numeric text on an otherwise successful I<grok>, setting
 944 C<IS_NUMBER_TRAILING> on the result.
 945
 946 =for apidoc grok_number
 947
 948 Identical to C<grok_number_flags()> with C<flags> set to zero.
 949
 950 =cut
 951  */
 952 int
 953 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 954 {
 955     PERL_ARGS_ASSERT_GROK_NUMBER;
 956
 957     return grok_number_flags(pv, len, valuep, 0);
 958 }
 959
 960 static const UV uv_max_div_10 = UV_MAX / 10;
 961 static const U8 uv_max_mod_10 = UV_MAX % 10;
 962
 963 int
 964 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
 965 {
 966   const char *s = pv;
 967   const char * const send = pv + len;
 968   const char *d;
 969   int numtype = 0;
 970
 971   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
 972
 973   while (s < send && isSPACE(*s))
 974     s++;
 975   if (s == send) {
 976     return 0;
 977   } else if (*s == '-') {
 978     s++;
 979     numtype = IS_NUMBER_NEG;
 980   }
 981   else if (*s == '+')
 982     s++;
 983
 984   if (s == send)
 985     return 0;
 986
 987   /* The first digit (after optional sign): note that might
 988    * also point to "infinity" or "nan", or "1.#INF". */
 989   d = s;
 990
 991   /* next must be digit or the radix separator or beginning of infinity/nan */
 992   if (isDIGIT(*s)) {
 993     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 994        overflow.  */
 995     UV value = *s - '0';
 996     /* This construction seems to be more optimiser friendly.
 997        (without it gcc does the isDIGIT test and the *s - '0' separately)
 998        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 999        In theory the optimiser could deduce how far to unroll the loop
1000        before checking for overflow.  */
1001     if (++s < send) {
1002       int digit = *s - '0';
1003       if (inRANGE(digit, 0, 9)) {
1004         value = value * 10 + digit;
1005         if (++s < send) {
1006           digit = *s - '0';
1007           if (inRANGE(digit, 0, 9)) {
1008             value = value * 10 + digit;
1009             if (++s < send) {
1010               digit = *s - '0';
1011               if (inRANGE(digit, 0, 9)) {
1012                 value = value * 10 + digit;
1013                 if (++s < send) {
1014                   digit = *s - '0';
1015                   if (inRANGE(digit, 0, 9)) {
1016                     value = value * 10 + digit;
1017                     if (++s < send) {
1018                       digit = *s - '0';
1019                       if (inRANGE(digit, 0, 9)) {
1020                         value = value * 10 + digit;
1021                         if (++s < send) {
1022                           digit = *s - '0';
1023                           if (inRANGE(digit, 0, 9)) {
1024                             value = value * 10 + digit;
1025                             if (++s < send) {
1026                               digit = *s - '0';
1027                               if (inRANGE(digit, 0, 9)) {
1028                                 value = value * 10 + digit;
1029                                 if (++s < send) {
1030                                   digit = *s - '0';
1031                                   if (inRANGE(digit, 0, 9)) {
1032                                     value = value * 10 + digit;
1033                                     if (++s < send) {
1034                                       /* Now got 9 digits, so need to check
1035                                          each time for overflow.  */
1036                                       digit = *s - '0';
1037                                       while (    inRANGE(digit, 0, 9)
1038                                              && (value < uv_max_div_10
1039                                                  || (value == uv_max_div_10
1040                                                      && digit <= uv_max_mod_10))) {
1041                                         value = value * 10 + digit;
1042                                         if (++s < send)
1043                                           digit = *s - '0';
1044                                         else
1045                                           break;
1046                                       }
1047                                       if (inRANGE(digit, 0, 9)
1048                                           && (s < send)) {
1049                                         /* value overflowed.
1050                                            skip the remaining digits, don't
1051                                            worry about setting *valuep.  */
1052                                         do {
1053                                           s++;
1054                                         } while (s < send && isDIGIT(*s));
1055                                         numtype |=
1056                                           IS_NUMBER_GREATER_THAN_UV_MAX;
1057                                         goto skip_value;
1058                                       }
1059                                     }
1060                                   }
1061                                 }
1062                               }
1063                             }
1064                           }
1065                         }
1066                       }
1067                     }
1068                   }
1069                 }
1070               }
1071             }
1072           }
1073         }
1074       }
1075     }
1076     numtype |= IS_NUMBER_IN_UV;
1077     if (valuep)
1078       *valuep = value;
1079
1080   skip_value:
1081     if (GROK_NUMERIC_RADIX(&s, send)) {
1082       numtype |= IS_NUMBER_NOT_INT;
1083       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
1084         s++;
1085     }
1086   }
1087   else if (GROK_NUMERIC_RADIX(&s, send)) {
1088     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1089     /* no digits before the radix means we need digits after it */
1090     if (s < send && isDIGIT(*s)) {
1091       do {
1092         s++;
1093       } while (s < send && isDIGIT(*s));
1094       if (valuep) {
1095         /* integer approximation is valid - it's 0.  */
1096         *valuep = 0;
1097       }
1098     }
1099     else
1100         return 0;
1101   }
1102
1103   if (s > d && s < send) {
1104     /* we can have an optional exponent part */
1105     if (isALPHA_FOLD_EQ(*s, 'e')) {
1106       s++;
1107       if (s < send && (*s == '-' || *s == '+'))
1108         s++;
1109       if (s < send && isDIGIT(*s)) {
1110         do {
1111           s++;
1112         } while (s < send && isDIGIT(*s));
1113       }
1114       else if (flags & PERL_SCAN_TRAILING)
1115         return numtype | IS_NUMBER_TRAILING;
1116       else
1117         return 0;
1118
1119       /* The only flag we keep is sign.  Blow away any "it's UV"  */
1120       numtype &= IS_NUMBER_NEG;
1121       numtype |= IS_NUMBER_NOT_INT;
1122     }
1123   }
1124   while (s < send && isSPACE(*s))
1125     s++;
1126   if (s >= send)
1127     return numtype;
1128   if (memEQs(pv, len, "0 but true")) {
1129     if (valuep)
1130       *valuep = 0;
1131     return IS_NUMBER_IN_UV;
1132   }
1133   /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
1134   if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
1135       /* Really detect inf/nan. Start at d, not s, since the above
1136        * code might have already consumed the "1." or "1". */
1137       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
1138       if ((infnan & IS_NUMBER_INFINITY)) {
1139           return (numtype | infnan); /* Keep sign for infinity. */
1140       }
1141       else if ((infnan & IS_NUMBER_NAN)) {
1142           return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1143       }
1144   }
1145   else if (flags & PERL_SCAN_TRAILING) {
1146     return numtype | IS_NUMBER_TRAILING;
1147   }
1148
1149   return 0;
1150 }
1151
1152 /*
1153 =for apidoc grok_atoUV
1154
1155 parse a string, looking for a decimal unsigned integer.
1156
1157 On entry, C<pv> points to the beginning of the string;
1158 C<valptr> points to a UV that will receive the converted value, if found;
1159 C<endptr> is either NULL or points to a variable that points to one byte
1160 beyond the point in C<pv> that this routine should examine.
1161 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
1162
1163 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1164 no leading zeros).  Otherwise it returns TRUE, and sets C<*valptr> to that
1165 value.
1166
1167 If you constrain the portion of C<pv> that is looked at by this function (by
1168 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1169 valid value, it will return TRUE, setting C<*endptr> to the byte following the
1170 final digit of the value.  But if there is no constraint at what's looked at,
1171 all of C<pv> must be valid in order for TRUE to be returned.
1172
1173 The only characters this accepts are the decimal digits '0'..'9'.
1174
1175 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1176 leading whitespace, nor negative inputs.  If such features are required, the
1177 calling code needs to explicitly implement those.
1178
1179 Note that this function returns FALSE for inputs that would overflow a UV,
1180 or have leading zeros.  Thus a single C<0> is accepted, but not C<00> nor
1181 C<01>, C<002>, I<etc>.
1182
1183 Background: C<atoi> has severe problems with illegal inputs, it cannot be
1184 used for incremental parsing, and therefore should be avoided
1185 C<atoi> and C<strtol> are also affected by locale settings, which can also be
1186 seen as a bug (global state controlled by user environment).
1187
1188 =cut
1189
1190 */
1191
1192 bool
1193 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
1194 {
1195     const char* s = pv;
1196     const char** eptr;
1197     const char* end2; /* Used in case endptr is NULL. */
1198     UV val = 0; /* The parsed value. */
1199
1200     PERL_ARGS_ASSERT_GROK_ATOUV;
1201
1202     if (endptr) {
1203         eptr = endptr;
1204     }
1205     else {
1206         end2 = s + strlen(s);
1207         eptr = &end2;
1208     }
1209
1210     if (   *eptr <= s
1211         || ! isDIGIT(*s))
1212     {
1213         return FALSE;
1214     }
1215
1216     /* Single-digit inputs are quite common. */
1217     val = *s++ - '0';
1218     if (s < *eptr && isDIGIT(*s)) {
1219         /* Fail on extra leading zeros. */
1220         if (val == 0)
1221             return FALSE;
1222         while (s < *eptr && isDIGIT(*s)) {
1223             /* This could be unrolled like in grok_number(), but
1224                 * the expected uses of this are not speed-needy, and
1225                 * unlikely to need full 64-bitness. */
1226             const U8 digit = *s++ - '0';
1227             if (val < uv_max_div_10 ||
1228                 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1229                 val = val * 10 + digit;
1230             } else {
1231                 return FALSE;
1232             }
1233         }
1234     }
1235
1236     if (endptr == NULL) {
1237         if (*s) {
1238             return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1239         }
1240     }
1241     else {
1242         *endptr = s;
1243     }
1244
1245     *valptr = val;
1246     return TRUE;
1247 }
1248
1249 #ifndef Perl_strtod
1250 STATIC NV
1251 S_mulexp10(NV value, I32 exponent)
1252 {
1253     NV result = 1.0;
1254     NV power = 10.0;
1255     bool negative = 0;
1256     I32 bit;
1257
1258     if (exponent == 0)
1259         return value;
1260     if (value == 0)
1261         return (NV)0;
1262
1263     /* On OpenVMS VAX we by default use the D_FLOAT double format,
1264      * and that format does not have *easy* capabilities [1] for
1265      * overflowing doubles 'silently' as IEEE fp does.  We also need
1266      * to support G_FLOAT on both VAX and Alpha, and though the exponent
1267      * range is much larger than D_FLOAT it still doesn't do silent
1268      * overflow.  Therefore we need to detect early whether we would
1269      * overflow (this is the behaviour of the native string-to-float
1270      * conversion routines, and therefore of native applications, too).
1271      *
1272      * [1] Trying to establish a condition handler to trap floating point
1273      *     exceptions is not a good idea. */
1274
1275     /* In UNICOS and in certain Cray models (such as T90) there is no
1276      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1277      * There is something you can do if you are willing to use some
1278      * inline assembler: the instruction is called DFI-- but that will
1279      * disable *all* floating point interrupts, a little bit too large
1280      * a hammer.  Therefore we need to catch potential overflows before
1281      * it's too late. */
1282
1283 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
1284     STMT_START {
1285         const NV exp_v = log10(value);
1286         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1287             return NV_MAX;
1288         if (exponent < 0) {
1289             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1290                 return 0.0;
1291             while (-exponent >= NV_MAX_10_EXP) {
1292                 /* combination does not overflow, but 10^(-exponent) does */
1293                 value /= 10;
1294                 ++exponent;
1295             }
1296         }
1297     } STMT_END;
1298 #endif
1299
1300     if (exponent < 0) {
1301         negative = 1;
1302         exponent = -exponent;
1303 #ifdef NV_MAX_10_EXP
1304         /* for something like 1234 x 10^-309, the action of calculating
1305          * the intermediate value 10^309 then returning 1234 / (10^309)
1306          * will fail, since 10^309 becomes infinity. In this case try to
1307          * refactor it as 123 / (10^308) etc.
1308          */
1309         while (value && exponent > NV_MAX_10_EXP) {
1310             exponent--;
1311             value /= 10;
1312         }
1313         if (value == 0.0)
1314             return value;
1315 #endif
1316     }
1317 #if defined(__osf__)
1318     /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1319      * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1320      * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1321      * but that breaks another set of infnan.t tests. */
1322 #  define FP_OVERFLOWS_TO_ZERO
1323 #endif
1324     for (bit = 1; exponent; bit <<= 1) {
1325         if (exponent & bit) {
1326             exponent ^= bit;
1327             result *= power;
1328 #ifdef FP_OVERFLOWS_TO_ZERO
1329             if (result == 0)
1330 # ifdef NV_INF
1331                 return value < 0 ? -NV_INF : NV_INF;
1332 # else
1333                 return value < 0 ? -FLT_MAX : FLT_MAX;
1334 # endif
1335 #endif
1336             /* Floating point exceptions are supposed to be turned off,
1337              *  but if we're obviously done, don't risk another iteration.
1338              */
1339              if (exponent == 0) break;
1340         }
1341         power *= power;
1342     }
1343     return negative ? value / result : value * result;
1344 }
1345 #endif /* #ifndef Perl_strtod */
1346
1347 #ifdef Perl_strtod
1348 #  define ATOF(s, x) my_atof2(s, &x)
1349 #else
1350 #  define ATOF(s, x) Perl_atof2(s, x)
1351 #endif
1352
1353 NV
1354 Perl_my_atof(pTHX_ const char* s)
1355 {
1356     /* 's' must be NUL terminated */
1357
1358     NV x = 0.0;
1359
1360     PERL_ARGS_ASSERT_MY_ATOF;
1361
1362 #if ! defined(USE_LOCALE_NUMERIC)
1363
1364     ATOF(s, x);
1365
1366 #else
1367
1368     {
1369         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1370         STORE_LC_NUMERIC_SET_TO_NEEDED();
1371         if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1372             ATOF(s,x);
1373         }
1374         else {
1375
1376             /* Look through the string for the first thing that looks like a
1377              * decimal point: either the value in the current locale or the
1378              * standard fallback of '.'. The one which appears earliest in the
1379              * input string is the one that we should have atof look for. Note
1380              * that we have to determine this beforehand because on some
1381              * systems, Perl_atof2 is just a wrapper around the system's atof.
1382              * */
1383             const char * const standard_pos = strchr(s, '.');
1384             const char * const local_pos
1385                                   = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1386             const bool use_standard_radix
1387                     = standard_pos && (!local_pos || standard_pos < local_pos);
1388
1389             if (use_standard_radix) {
1390                 SET_NUMERIC_STANDARD();
1391                 LOCK_LC_NUMERIC_STANDARD();
1392             }
1393
1394             ATOF(s,x);
1395
1396             if (use_standard_radix) {
1397                 UNLOCK_LC_NUMERIC_STANDARD();
1398                 SET_NUMERIC_UNDERLYING();
1399             }
1400         }
1401         RESTORE_LC_NUMERIC();
1402     }
1403
1404 #endif
1405
1406     return x;
1407 }
1408
1409 #if defined(NV_INF) || defined(NV_NAN)
1410
1411 #ifdef USING_MSVC6
1412 #  pragma warning(push)
1413 #  pragma warning(disable:4756;disable:4056)
1414 #endif
1415 static char*
1416 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
1417 {
1418     const char *p0 = negative ? s - 1 : s;
1419     const char *p = p0;
1420     const int infnan = grok_infnan(&p, send);
1421     if (infnan && p != p0) {
1422         /* If we can generate inf/nan directly, let's do so. */
1423 #ifdef NV_INF
1424         if ((infnan & IS_NUMBER_INFINITY)) {
1425             *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
1426             return (char*)p;
1427         }
1428 #endif
1429 #ifdef NV_NAN
1430         if ((infnan & IS_NUMBER_NAN)) {
1431             *value = NV_NAN;
1432             return (char*)p;
1433         }
1434 #endif
1435 #ifdef Perl_strtod
1436         /* If still here, we didn't have either NV_INF or NV_NAN,
1437          * and can try falling back to native strtod/strtold.
1438          *
1439          * The native interface might not recognize all the possible
1440          * inf/nan strings Perl recognizes.  What we can try
1441          * is to try faking the input.  We will try inf/-inf/nan
1442          * as the most promising/portable input. */
1443         {
1444             const char* fake = "silence compiler warning";
1445             char* endp;
1446             NV nv;
1447 #ifdef NV_INF
1448             if ((infnan & IS_NUMBER_INFINITY)) {
1449                 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1450             }
1451 #endif
1452 #ifdef NV_NAN
1453             if ((infnan & IS_NUMBER_NAN)) {
1454                 fake = "nan";
1455             }
1456 #endif
1457             assert(strNE(fake, "silence compiler warning"));
1458             nv = S_strtod(aTHX_ fake, &endp);
1459             if (fake != endp) {
1460 #ifdef NV_INF
1461                 if ((infnan & IS_NUMBER_INFINITY)) {
1462 #  ifdef Perl_isinf
1463                     if (Perl_isinf(nv))
1464                         *value = nv;
1465 #  else
1466                     /* last resort, may generate SIGFPE */
1467                     *value = Perl_exp((NV)1e9);
1468                     if ((infnan & IS_NUMBER_NEG))
1469                         *value = -*value;
1470 #  endif
1471                     return (char*)p; /* p, not endp */
1472                 }
1473 #endif
1474 #ifdef NV_NAN
1475                 if ((infnan & IS_NUMBER_NAN)) {
1476 #  ifdef Perl_isnan
1477                     if (Perl_isnan(nv))
1478                         *value = nv;
1479 #  else
1480                     /* last resort, may generate SIGFPE */
1481                     *value = Perl_log((NV)-1.0);
1482 #  endif
1483                     return (char*)p; /* p, not endp */
1484 #endif
1485                 }
1486             }
1487         }
1488 #endif /* #ifdef Perl_strtod */
1489     }
1490     return NULL;
1491 }
1492 #ifdef USING_MSVC6
1493 #  pragma warning(pop)
1494 #endif
1495
1496 #endif /* if defined(NV_INF) || defined(NV_NAN) */
1497
1498 char*
1499 Perl_my_atof2(pTHX_ const char* orig, NV* value)
1500 {
1501     PERL_ARGS_ASSERT_MY_ATOF2;
1502     return my_atof3(orig, value, 0);
1503 }
1504
1505 char*
1506 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
1507 {
1508     const char* s = orig;
1509     NV result[3] = {0.0, 0.0, 0.0};
1510 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1511     const char* send = s + ((len != 0)
1512                            ? len
1513                            : strlen(orig)); /* one past the last */
1514     bool negative = 0;
1515 #endif
1516 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
1517     UV accumulator[2] = {0,0};  /* before/after dp */
1518     bool seen_digit = 0;
1519     I32 exp_adjust[2] = {0,0};
1520     I32 exp_acc[2] = {-1, -1};
1521     /* the current exponent adjust for the accumulators */
1522     I32 exponent = 0;
1523     I32 seen_dp  = 0;
1524     I32 digit = 0;
1525     I32 old_digit = 0;
1526     I32 sig_digits = 0; /* noof significant digits seen so far */
1527 #endif
1528
1529 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1530     PERL_ARGS_ASSERT_MY_ATOF3;
1531
1532     /* leading whitespace */
1533     while (s < send && isSPACE(*s))
1534         ++s;
1535
1536     /* sign */
1537     switch (*s) {
1538         case '-':
1539             negative = 1;
1540             /* FALLTHROUGH */
1541         case '+':
1542             ++s;
1543     }
1544 #endif
1545
1546 #ifdef Perl_strtod
1547     {
1548         char* endp;
1549         char* copy = NULL;
1550
1551         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1552             return endp;
1553
1554         /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1555            0b-prefixed binary numbers, which is backward incompatible
1556         */
1557         if ((len == 0 || len >= 2) && *s == '0' &&
1558             (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1559             *value = 0;
1560             return (char *)s+1;
1561         }
1562
1563         /* If the length is passed in, the input string isn't NUL-terminated,
1564          * and in it turns out the function below assumes it is; therefore we
1565          * create a copy and NUL-terminate that */
1566         if (len) {
1567             Newx(copy, len + 1, char);
1568             Copy(orig, copy, len, char);
1569             copy[len] = '\0';
1570             s = copy + (s - orig);
1571         }
1572
1573         result[2] = S_strtod(aTHX_ s, &endp);
1574
1575         /* If we created a copy, 'endp' is in terms of that.  Convert back to
1576          * the original */
1577         if (copy) {
1578             s = (s - copy) + (char *) orig;
1579             endp = (endp - copy) + (char *) orig;
1580             Safefree(copy);
1581         }
1582
1583         if (s != endp) {
1584             *value = negative ? -result[2] : result[2];
1585             return endp;
1586         }
1587         return NULL;
1588     }
1589 #elif defined(USE_PERL_ATOF)
1590
1591 /* There is no point in processing more significant digits
1592  * than the NV can hold. Note that NV_DIG is a lower-bound value,
1593  * while we need an upper-bound value. We add 2 to account for this;
1594  * since it will have been conservative on both the first and last digit.
1595  * For example a 32-bit mantissa with an exponent of 4 would have
1596  * exact values in the set
1597  *               4
1598  *               8
1599  *              ..
1600  *     17179869172
1601  *     17179869176
1602  *     17179869180
1603  *
1604  * where for the purposes of calculating NV_DIG we would have to discount
1605  * both the first and last digit, since neither can hold all values from
1606  * 0..9; but for calculating the value we must examine those two digits.
1607  */
1608 #ifdef MAX_SIG_DIG_PLUS
1609     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1610        possible digits in a NV, especially if NVs are not IEEE compliant
1611        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1612 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1613 #else
1614 # define MAX_SIG_DIGITS (NV_DIG+2)
1615 #endif
1616
1617 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1618 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
1619
1620 #if defined(NV_INF) || defined(NV_NAN)
1621     {
1622         char* endp;
1623         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1624             return endp;
1625     }
1626 #endif
1627
1628     /* we accumulate digits into an integer; when this becomes too
1629      * large, we add the total to NV and start again */
1630
1631     while (s < send) {
1632         if (isDIGIT(*s)) {
1633             seen_digit = 1;
1634             old_digit = digit;
1635             digit = *s++ - '0';
1636             if (seen_dp)
1637                 exp_adjust[1]++;
1638
1639             /* don't start counting until we see the first significant
1640              * digit, eg the 5 in 0.00005... */
1641             if (!sig_digits && digit == 0)
1642                 continue;
1643
1644             if (++sig_digits > MAX_SIG_DIGITS) {
1645                 /* limits of precision reached */
1646                 if (digit > 5) {
1647                     ++accumulator[seen_dp];
1648                 } else if (digit == 5) {
1649                     if (old_digit % 2) { /* round to even - Allen */
1650                         ++accumulator[seen_dp];
1651                     }
1652                 }
1653                 if (seen_dp) {
1654                     exp_adjust[1]--;
1655                 } else {
1656                     exp_adjust[0]++;
1657                 }
1658                 /* skip remaining digits */
1659                 while (s < send && isDIGIT(*s)) {
1660                     ++s;
1661                     if (! seen_dp) {
1662                         exp_adjust[0]++;
1663                     }
1664                 }
1665                 /* warn of loss of precision? */
1666             }
1667             else {
1668                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1669                     /* add accumulator to result and start again */
1670                     result[seen_dp] = S_mulexp10(result[seen_dp],
1671                                                  exp_acc[seen_dp])
1672                         + (NV)accumulator[seen_dp];
1673                     accumulator[seen_dp] = 0;
1674                     exp_acc[seen_dp] = 0;
1675                 }
1676                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1677                 ++exp_acc[seen_dp];
1678             }
1679         }
1680         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1681             seen_dp = 1;
1682             if (sig_digits > MAX_SIG_DIGITS) {
1683                 while (s < send && isDIGIT(*s)) {
1684                     ++s;
1685                 }
1686                 break;
1687             }
1688         }
1689         else {
1690             break;
1691         }
1692     }
1693
1694     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1695     if (seen_dp) {
1696         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1697     }
1698
1699     if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
1700         bool expnegative = 0;
1701
1702         ++s;
1703         switch (*s) {
1704             case '-':
1705                 expnegative = 1;
1706                 /* FALLTHROUGH */
1707             case '+':
1708                 ++s;
1709         }
1710         while (s < send && isDIGIT(*s))
1711             exponent = exponent * 10 + (*s++ - '0');
1712         if (expnegative)
1713             exponent = -exponent;
1714     }
1715
1716     /* now apply the exponent */
1717
1718     if (seen_dp) {
1719         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1720                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1721     } else {
1722         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1723     }
1724
1725     /* now apply the sign */
1726     if (negative)
1727         result[2] = -result[2];
1728 #endif /* USE_PERL_ATOF */
1729     *value = result[2];
1730     return (char *)s;
1731 }
1732
1733 /*
1734 =for apidoc isinfnan
1735
1736 C<Perl_isinfnan()> is utility function that returns true if the NV
1737 argument is either an infinity or a C<NaN>, false otherwise.  To test
1738 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
1739
1740 This is also the logical inverse of Perl_isfinite().
1741
1742 =cut
1743 */
1744 bool
1745 Perl_isinfnan(NV nv)
1746 {
1747   PERL_UNUSED_ARG(nv);
1748 #ifdef Perl_isinf
1749     if (Perl_isinf(nv))
1750         return TRUE;
1751 #endif
1752 #ifdef Perl_isnan
1753     if (Perl_isnan(nv))
1754         return TRUE;
1755 #endif
1756     return FALSE;
1757 }
1758
1759 /*
1760 =for apidoc
1761
1762 Checks whether the argument would be either an infinity or C<NaN> when used
1763 as a number, but is careful not to trigger non-numeric or uninitialized
1764 warnings.  it assumes the caller has done C<SvGETMAGIC(sv)> already.
1765
1766 =cut
1767 */
1768
1769 bool
1770 Perl_isinfnansv(pTHX_ SV *sv)
1771 {
1772     PERL_ARGS_ASSERT_ISINFNANSV;
1773     if (!SvOK(sv))
1774         return FALSE;
1775     if (SvNOKp(sv))
1776         return Perl_isinfnan(SvNVX(sv));
1777     if (SvIOKp(sv))
1778         return FALSE;
1779     {
1780         STRLEN len;
1781         const char *s = SvPV_nomg_const(sv, len);
1782         return cBOOL(grok_infnan(&s, s+len));
1783     }
1784 }
1785
1786 #ifndef HAS_MODFL
1787 /* C99 has truncl, pre-C99 Solaris had aintl.  We can use either with
1788  * copysignl to emulate modfl, which is in some platforms missing or
1789  * broken. */
1790 #  if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1791 long double
1792 Perl_my_modfl(long double x, long double *ip)
1793 {
1794     *ip = truncl(x);
1795     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1796 }
1797 #  elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1798 long double
1799 Perl_my_modfl(long double x, long double *ip)
1800 {
1801     *ip = aintl(x);
1802     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1803 }
1804 #  endif
1805 #endif
1806
1807 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
1808 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1809 long double
1810 Perl_my_frexpl(long double x, int *e) {
1811     *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1812     return (scalbnl(x, -*e));
1813 }
1814 #endif
1815
1816 /*
1817 =for apidoc Perl_signbit
1818
1819 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1820 it is not.
1821
1822 If F<Configure> detects this system has a C<signbit()> that will work with
1823 our NVs, then we just use it via the C<#define> in F<perl.h>.  Otherwise,
1824 fall back on this implementation.  The main use of this function
1825 is catching C<-0.0>.
1826
1827 C<Configure> notes:  This function is called C<'Perl_signbit'> instead of a
1828 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
1829 function or macro that doesn't happen to work with our particular choice
1830 of NVs.  We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
1831 the standard system headers to be happy.  Also, this is a no-context
1832 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1833 F<perl.h> as a simple macro call to the system's C<signbit()>.
1834 Users should just always call C<Perl_signbit()>.
1835
1836 =cut
1837 */
1838 #if !defined(HAS_SIGNBIT)
1839 int
1840 Perl_signbit(NV x) {
1841 #  ifdef Perl_fp_class_nzero
1842     return Perl_fp_class_nzero(x);
1843     /* Try finding the high byte, and assume it's highest bit
1844      * is the sign.  This assumption is probably wrong somewhere. */
1845 #  elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1846     return (((unsigned char *)&x)[9] & 0x80);
1847 #  elif defined(NV_LITTLE_ENDIAN)
1848     /* Note that NVSIZE is sizeof(NV), which would make the below be
1849      * wrong if the end bytes are unused, which happens with the x86
1850      * 80-bit long doubles, which is why take care of that above. */
1851     return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1852 #  elif defined(NV_BIG_ENDIAN)
1853     return (((unsigned char *)&x)[0] & 0x80);
1854 #  else
1855     /* This last resort fallback is wrong for the negative zero. */
1856     return (x < 0.0) ? 1 : 0;
1857 #  endif
1858 }
1859 #endif
1860
1861 /*
1862  * ex: set ts=8 sts=4 sw=4 et:
1863  */