numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen,
  13  *  unless wizards count differently to other people."  --Beorn
  14  *
  15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
  16  */
  17
  18 /*
  19 =head1 Numeric functions
  20
  21 =cut
  22
  23 This file contains all the stuff needed by perl for manipulating numeric
  24 values, including such things as replacements for the OS's atof() function
  25
  26 */
  27
  28 #include "EXTERN.h"
  29 #define PERL_IN_NUMERIC_C
  30 #include "perl.h"
  31
  32 #ifdef Perl_strtod
  33
  34 PERL_STATIC_INLINE NV
  35 S_strtod(pTHX_ const char * const s, char ** e)
  36 {
  37     DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  38     NV result;
  39
  40     STORE_LC_NUMERIC_SET_TO_NEEDED();
  41
  42 #  ifdef USE_QUADMATH
  43
  44     result = strtoflt128(s, e);
  45
  46 #  elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE)    \
  47                              && defined(USE_LONG_DOUBLE)
  48 #    if defined(__MINGW64_VERSION_MAJOR)
  49       /***********************************************
  50        We are unable to use strtold because of
  51         https://sourceforge.net/p/mingw-w64/bugs/711/
  52         &
  53         https://sourceforge.net/p/mingw-w64/bugs/725/
  54
  55        but __mingw_strtold is fine.
  56       ***********************************************/
  57
  58     result = __mingw_strtold(s, e);
  59
  60 #    else
  61
  62     result = strtold(s, e);
  63
  64 #    endif
  65 #  elif defined(HAS_STRTOD)
  66
  67     result = strtod(s, e);
  68
  69 #  else
  70 #    error No strtod() equivalent found
  71 #  endif
  72
  73     RESTORE_LC_NUMERIC();
  74
  75     return result;
  76 }
  77
  78 #endif  /* #ifdef Perl_strtod */
  79
  80 /*
  81
  82 =for apidoc my_strtod
  83
  84 This function is equivalent to the libc strtod() function, and is available
  85 even on platforms that lack plain strtod().  Its return value is the best
  86 available precision depending on platform capabilities and F<Configure>
  87 options.
  88
  89 It properly handles the locale radix character, meaning it expects a dot except
  90 when called from within the scope of S<C<use locale>>, in which case the radix
  91 character should be that specified by the current locale.
  92
  93 The synonym Strtod() may be used instead.
  94
  95 =cut
  96
  97 */
  98
  99 NV
 100 Perl_my_strtod(const char * const s, char **e)
 101 {
 102     dTHX;
 103
 104     PERL_ARGS_ASSERT_MY_STRTOD;
 105
 106 #ifdef Perl_strtod
 107
 108     return S_strtod(aTHX_ s, e);
 109
 110 #else
 111
 112     {
 113         NV result;
 114         char ** end_ptr = NULL;
 115
 116         *end_ptr = my_atof2(s, &result);
 117         if (e) {
 118             *e = *end_ptr;
 119         }
 120
 121         if (! *end_ptr) {
 122             result = 0.0;
 123         }
 124
 125         return result;
 126     }
 127
 128 #endif
 129
 130 }
 131
 132
 133 U32
 134 Perl_cast_ulong(NV f)
 135 {
 136   if (f < 0.0)
 137     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
 138   if (f < U32_MAX_P1) {
 139 #if CASTFLAGS & 2
 140     if (f < U32_MAX_P1_HALF)
 141       return (U32) f;
 142     f -= U32_MAX_P1_HALF;
 143     return ((U32) f) | (1 + (U32_MAX >> 1));
 144 #else
 145     return (U32) f;
 146 #endif
 147   }
 148   return f > 0 ? U32_MAX : 0 /* NaN */;
 149 }
 150
 151 I32
 152 Perl_cast_i32(NV f)
 153 {
 154   if (f < I32_MAX_P1)
 155     return f < I32_MIN ? I32_MIN : (I32) f;
 156   if (f < U32_MAX_P1) {
 157 #if CASTFLAGS & 2
 158     if (f < U32_MAX_P1_HALF)
 159       return (I32)(U32) f;
 160     f -= U32_MAX_P1_HALF;
 161     return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
 162 #else
 163     return (I32)(U32) f;
 164 #endif
 165   }
 166   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
 167 }
 168
 169 IV
 170 Perl_cast_iv(NV f)
 171 {
 172   if (f < IV_MAX_P1)
 173     return f < IV_MIN ? IV_MIN : (IV) f;
 174   if (f < UV_MAX_P1) {
 175 #if CASTFLAGS & 2
 176     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
 177     if (f < UV_MAX_P1_HALF)
 178       return (IV)(UV) f;
 179     f -= UV_MAX_P1_HALF;
 180     return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
 181 #else
 182     return (IV)(UV) f;
 183 #endif
 184   }
 185   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
 186 }
 187
 188 UV
 189 Perl_cast_uv(NV f)
 190 {
 191   if (f < 0.0)
 192     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
 193   if (f < UV_MAX_P1) {
 194 #if CASTFLAGS & 2
 195     if (f < UV_MAX_P1_HALF)
 196       return (UV) f;
 197     f -= UV_MAX_P1_HALF;
 198     return ((UV) f) | (1 + (UV_MAX >> 1));
 199 #else
 200     return (UV) f;
 201 #endif
 202   }
 203   return f > 0 ? UV_MAX : 0 /* NaN */;
 204 }
 205
 206 /*
 207 =for apidoc grok_bin
 208
 209 converts a string representing a binary number to numeric form.
 210
 211 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 212 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 213 The scan stops at the end of the string, or the first invalid character.
 214 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 215 invalid character will also trigger a warning.
 216 On return C<*len> is set to the length of the scanned string,
 217 and C<*flags> gives output flags.
 218
 219 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 220 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_bin>
 221 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 222 and writes the value to C<*result> (or the value is discarded if C<result>
 223 is NULL).
 224
 225 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
 226 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 227 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
 228 number may use C<"_"> characters to separate digits.
 229
 230 =for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
 231 =for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
 232 =for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
 233 =for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
 234 =for apidoc Amnh||PERL_SCAN_TRAILING
 235
 236 =cut
 237
 238 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 239 which suppresses any message for non-portable numbers that are still valid
 240 on this platform.
 241  */
 242
 243 UV
 244 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 245 {
 246     const char *s = start;
 247     STRLEN len = *len_p;
 248     UV value = 0;
 249     NV value_nv = 0;
 250
 251     const UV max_div_2 = UV_MAX / 2;
 252     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 253     bool overflowed = FALSE;
 254     char bit;
 255
 256     PERL_ARGS_ASSERT_GROK_BIN;
 257
 258     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 259         /* strip off leading b or 0b.
 260            for compatibility silently suffer "b" and "0b" as valid binary
 261            numbers. */
 262         if (len >= 1) {
 263             if (isALPHA_FOLD_EQ(s[0], 'b')) {
 264                 s++;
 265                 len--;
 266             }
 267             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) {
 268                 s+=2;
 269                 len-=2;
 270             }
 271         }
 272     }
 273
 274     for (; len-- && (bit = *s); s++) {
 275         if (bit == '0' || bit == '1') {
 276             /* Write it in this wonky order with a goto to attempt to get the
 277                compiler to make the common case integer-only loop pretty tight.
 278                With gcc seems to be much straighter code than old scan_bin.  */
 279           redo:
 280             if (!overflowed) {
 281                 if (value <= max_div_2) {
 282                     value = (value << 1) | (bit - '0');
 283                     continue;
 284                 }
 285                 /* Bah. We're just overflowed.  */
 286                 /* diag_listed_as: Integer overflow in %s number */
 287                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 288                                  "Integer overflow in binary number");
 289                 overflowed = TRUE;
 290                 value_nv = (NV) value;
 291             }
 292             value_nv *= 2.0;
 293             /* If an NV has not enough bits in its mantissa to
 294              * represent a UV this summing of small low-order numbers
 295              * is a waste of time (because the NV cannot preserve
 296              * the low-order bits anyway): we could just remember when
 297              * did we overflow and in the end just multiply value_nv by the
 298              * right amount. */
 299             value_nv += (NV)(bit - '0');
 300             continue;
 301         }
 302         if (bit == '_' && len && allow_underscores && (bit = s[1])
 303             && (bit == '0' || bit == '1'))
 304             {
 305                 --len;
 306                 ++s;
 307                 goto redo;
 308             }
 309         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 310             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 311                            "Illegal binary digit '%c' ignored", *s);
 312         break;
 313     }
 314
 315     if (   ( overflowed && value_nv > 4294967295.0)
 316 #if UVSIZE > 4
 317         || (!overflowed && value > 0xffffffff
 318             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 319 #endif
 320         ) {
 321         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 322                        "Binary number > 0b11111111111111111111111111111111 non-portable");
 323     }
 324     *len_p = s - start;
 325     if (!overflowed) {
 326         *flags = 0;
 327         return value;
 328     }
 329     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 330     if (result)
 331         *result = value_nv;
 332     return UV_MAX;
 333 }
 334
 335 /*
 336 =for apidoc grok_hex
 337
 338 converts a string representing a hex number to numeric form.
 339
 340 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
 341 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 342 The scan stops at the end of the string, or the first invalid character.
 343 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 344 invalid character will also trigger a warning.
 345 On return C<*len> is set to the length of the scanned string,
 346 and C<*flags> gives output flags.
 347
 348 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 349 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_hex>
 350 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 351 and writes the value to C<*result> (or the value is discarded if C<result>
 352 is C<NULL>).
 353
 354 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
 355 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 356 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
 357 number may use C<"_"> characters to separate digits.
 358
 359 =cut
 360
 361 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 362 which suppresses any message for non-portable numbers, but which are valid
 363 on this platform.
 364  */
 365
 366 UV
 367 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 368 {
 369     const char *s = start;
 370     STRLEN len = *len_p;
 371     UV value = 0;
 372     NV value_nv = 0;
 373     const UV max_div_16 = UV_MAX / 16;
 374     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 375     bool overflowed = FALSE;
 376
 377     PERL_ARGS_ASSERT_GROK_HEX;
 378
 379     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 380         /* strip off leading x or 0x.
 381            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 382         */
 383         if (len >= 1) {
 384             if (isALPHA_FOLD_EQ(s[0], 'x')) {
 385                 s++;
 386                 len--;
 387             }
 388             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) {
 389                 s+=2;
 390                 len-=2;
 391             }
 392         }
 393     }
 394
 395     for (; len-- && *s; s++) {
 396         if (isXDIGIT(*s)) {
 397             /* Write it in this wonky order with a goto to attempt to get the
 398                compiler to make the common case integer-only loop pretty tight.
 399                With gcc seems to be much straighter code than old scan_hex.  */
 400           redo:
 401             if (!overflowed) {
 402                 if (value <= max_div_16) {
 403                     value = (value << 4) | XDIGIT_VALUE(*s);
 404                     continue;
 405                 }
 406                 /* Bah. We're just overflowed.  */
 407                 /* diag_listed_as: Integer overflow in %s number */
 408                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 409                                  "Integer overflow in hexadecimal number");
 410                 overflowed = TRUE;
 411                 value_nv = (NV) value;
 412             }
 413             value_nv *= 16.0;
 414             /* If an NV has not enough bits in its mantissa to
 415              * represent a UV this summing of small low-order numbers
 416              * is a waste of time (because the NV cannot preserve
 417              * the low-order bits anyway): we could just remember when
 418              * did we overflow and in the end just multiply value_nv by the
 419              * right amount of 16-tuples. */
 420             value_nv += (NV) XDIGIT_VALUE(*s);
 421             continue;
 422         }
 423         if (*s == '_' && len && allow_underscores && s[1]
 424                 && isXDIGIT(s[1]))
 425             {
 426                 --len;
 427                 ++s;
 428                 goto redo;
 429             }
 430         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 431             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 432                         "Illegal hexadecimal digit '%c' ignored", *s);
 433         break;
 434     }
 435
 436     if (   ( overflowed && value_nv > 4294967295.0)
 437 #if UVSIZE > 4
 438         || (!overflowed && value > 0xffffffff
 439             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 440 #endif
 441         ) {
 442         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 443                        "Hexadecimal number > 0xffffffff non-portable");
 444     }
 445     *len_p = s - start;
 446     if (!overflowed) {
 447         *flags = 0;
 448         return value;
 449     }
 450     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 451     if (result)
 452         *result = value_nv;
 453     return UV_MAX;
 454 }
 455
 456 /*
 457 =for apidoc grok_oct
 458
 459 converts a string representing an octal number to numeric form.
 460
 461 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 462 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 463 The scan stops at the end of the string, or the first invalid character.
 464 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 465 8 or 9 will also trigger a warning.
 466 On return C<*len> is set to the length of the scanned string,
 467 and C<*flags> gives output flags.
 468
 469 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 470 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_oct>
 471 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 472 and writes the value to C<*result> (or the value is discarded if C<result>
 473 is C<NULL>).
 474
 475 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
 476 number may use C<"_"> characters to separate digits.
 477
 478 =cut
 479
 480 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
 481 which suppresses any message for non-portable numbers, but which are valid
 482 on this platform.
 483  */
 484
 485 UV
 486 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 487 {
 488     const char *s = start;
 489     STRLEN len = *len_p;
 490     UV value = 0;
 491     NV value_nv = 0;
 492     const UV max_div_8 = UV_MAX / 8;
 493     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 494     bool overflowed = FALSE;
 495
 496     PERL_ARGS_ASSERT_GROK_OCT;
 497
 498     for (; len-- && *s; s++) {
 499         if (isOCTAL(*s)) {
 500             /* Write it in this wonky order with a goto to attempt to get the
 501                compiler to make the common case integer-only loop pretty tight.
 502             */
 503           redo:
 504             if (!overflowed) {
 505                 if (value <= max_div_8) {
 506                     value = (value << 3) | OCTAL_VALUE(*s);
 507                     continue;
 508                 }
 509                 /* Bah. We're just overflowed.  */
 510                 /* diag_listed_as: Integer overflow in %s number */
 511                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 512                                "Integer overflow in octal number");
 513                 overflowed = TRUE;
 514                 value_nv = (NV) value;
 515             }
 516             value_nv *= 8.0;
 517             /* If an NV has not enough bits in its mantissa to
 518              * represent a UV this summing of small low-order numbers
 519              * is a waste of time (because the NV cannot preserve
 520              * the low-order bits anyway): we could just remember when
 521              * did we overflow and in the end just multiply value_nv by the
 522              * right amount of 8-tuples. */
 523             value_nv += (NV) OCTAL_VALUE(*s);
 524             continue;
 525         }
 526         if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
 527             --len;
 528             ++s;
 529             goto redo;
 530         }
 531         /* Allow \octal to work the DWIM way (that is, stop scanning
 532          * as soon as non-octal characters are seen, complain only if
 533          * someone seems to want to use the digits eight and nine.  Since we
 534          * know it is not octal, then if isDIGIT, must be an 8 or 9). */
 535         if (isDIGIT(*s)) {
 536             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 537                 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 538                                "Illegal octal digit '%c' ignored", *s);
 539         }
 540         break;
 541     }
 542
 543     if (   ( overflowed && value_nv > 4294967295.0)
 544 #if UVSIZE > 4
 545         || (!overflowed && value > 0xffffffff
 546             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 547 #endif
 548         ) {
 549         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 550                        "Octal number > 037777777777 non-portable");
 551     }
 552     *len_p = s - start;
 553     if (!overflowed) {
 554         *flags = 0;
 555         return value;
 556     }
 557     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 558     if (result)
 559         *result = value_nv;
 560     return UV_MAX;
 561 }
 562
 563 /*
 564 =for apidoc scan_bin
 565
 566 For backwards compatibility.  Use C<grok_bin> instead.
 567
 568 =for apidoc scan_hex
 569
 570 For backwards compatibility.  Use C<grok_hex> instead.
 571
 572 =for apidoc scan_oct
 573
 574 For backwards compatibility.  Use C<grok_oct> instead.
 575
 576 =cut
 577  */
 578
 579 NV
 580 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 581 {
 582     NV rnv;
 583     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 584     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 585
 586     PERL_ARGS_ASSERT_SCAN_BIN;
 587
 588     *retlen = len;
 589     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 590 }
 591
 592 NV
 593 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 594 {
 595     NV rnv;
 596     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 597     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 598
 599     PERL_ARGS_ASSERT_SCAN_OCT;
 600
 601     *retlen = len;
 602     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 603 }
 604
 605 NV
 606 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 607 {
 608     NV rnv;
 609     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 610     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 611
 612     PERL_ARGS_ASSERT_SCAN_HEX;
 613
 614     *retlen = len;
 615     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 616 }
 617
 618 /*
 619 =for apidoc grok_numeric_radix
 620
 621 Scan and skip for a numeric decimal separator (radix).
 622
 623 =cut
 624  */
 625 bool
 626 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 627 {
 628     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 629
 630 #ifdef USE_LOCALE_NUMERIC
 631
 632     if (IN_LC(LC_NUMERIC)) {
 633         STRLEN len;
 634         char * radix;
 635         bool matches_radix = FALSE;
 636         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
 637
 638         STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
 639
 640         radix = SvPV(PL_numeric_radix_sv, len);
 641         radix = savepvn(radix, len);
 642
 643         RESTORE_LC_NUMERIC();
 644
 645         if (*sp + len <= send) {
 646             matches_radix = memEQ(*sp, radix, len);
 647         }
 648
 649         Safefree(radix);
 650
 651         if (matches_radix) {
 652             *sp += len;
 653             return TRUE;
 654         }
 655     }
 656
 657 #endif
 658
 659     /* always try "." if numeric radix didn't match because
 660      * we may have data from different locales mixed */
 661     if (*sp < send && **sp == '.') {
 662         ++*sp;
 663         return TRUE;
 664     }
 665
 666     return FALSE;
 667 }
 668
 669 /*
 670 =for apidoc grok_infnan
 671
 672 Helper for C<grok_number()>, accepts various ways of spelling "infinity"
 673 or "not a number", and returns one of the following flag combinations:
 674
 675   IS_NUMBER_INFINITY
 676   IS_NUMBER_NAN
 677   IS_NUMBER_INFINITY | IS_NUMBER_NEG
 678   IS_NUMBER_NAN | IS_NUMBER_NEG
 679   0
 680
 681 possibly |-ed with C<IS_NUMBER_TRAILING>.
 682
 683 If an infinity or a not-a-number is recognized, C<*sp> will point to
 684 one byte past the end of the recognized string.  If the recognition fails,
 685 zero is returned, and C<*sp> will not move.
 686
 687 =for apidoc Amn|bool|IS_NUMBER_GREATER_THAN_UV_MAX
 688 =for apidoc Amn|bool|IS_NUMBER_INFINITY
 689 =for apidoc Amn|bool|IS_NUMBER_IN_UV
 690 =for apidoc Amn|bool|IS_NUMBER_NAN
 691 =for apidoc Amn|bool|IS_NUMBER_NEG
 692 =for apidoc Amn|bool|IS_NUMBER_NOT_INT
 693
 694 =cut
 695 */
 696
 697 int
 698 Perl_grok_infnan(pTHX_ const char** sp, const char* send)
 699 {
 700     const char* s = *sp;
 701     int flags = 0;
 702 #if defined(NV_INF) || defined(NV_NAN)
 703     bool odh = FALSE; /* one-dot-hash: 1.#INF */
 704
 705     PERL_ARGS_ASSERT_GROK_INFNAN;
 706
 707     if (*s == '+') {
 708         s++; if (s == send) return 0;
 709     }
 710     else if (*s == '-') {
 711         flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
 712         s++; if (s == send) return 0;
 713     }
 714
 715     if (*s == '1') {
 716         /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
 717          * Let's keep the dot optional. */
 718         s++; if (s == send) return 0;
 719         if (*s == '.') {
 720             s++; if (s == send) return 0;
 721         }
 722         if (*s == '#') {
 723             s++; if (s == send) return 0;
 724         } else
 725             return 0;
 726         odh = TRUE;
 727     }
 728
 729     if (isALPHA_FOLD_EQ(*s, 'I')) {
 730         /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
 731
 732         s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 733         s++; if (s == send) return 0;
 734         if (isALPHA_FOLD_EQ(*s, 'F')) {
 735             s++;
 736             if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
 737                 int fail =
 738                     flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
 739                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
 740                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
 741                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
 742                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
 743                 s++;
 744             } else if (odh) {
 745                 while (*s == '0') { /* 1.#INF00 */
 746                     s++;
 747                 }
 748             }
 749             while (s < send && isSPACE(*s))
 750                 s++;
 751             if (s < send && *s) {
 752                 flags |= IS_NUMBER_TRAILING;
 753             }
 754             flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 755         }
 756         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
 757             s++;
 758             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 759             while (*s == '0') { /* 1.#IND00 */
 760                 s++;
 761             }
 762             if (*s) {
 763                 flags |= IS_NUMBER_TRAILING;
 764             }
 765         } else
 766             return 0;
 767     }
 768     else {
 769         /* Maybe NAN of some sort */
 770
 771         if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
 772             /* snan, qNaN */
 773             /* XXX do something with the snan/qnan difference */
 774             s++; if (s == send) return 0;
 775         }
 776
 777         if (isALPHA_FOLD_EQ(*s, 'N')) {
 778             s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
 779             s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 780             s++;
 781
 782             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 783
 784             /* NaN can be followed by various stuff (NaNQ, NaNS), but
 785              * there are also multiple different NaN values, and some
 786              * implementations output the "payload" values,
 787              * e.g. NaN123, NAN(abc), while some legacy implementations
 788              * have weird stuff like NaN%. */
 789             if (isALPHA_FOLD_EQ(*s, 'q') ||
 790                 isALPHA_FOLD_EQ(*s, 's')) {
 791                 /* "nanq" or "nans" are ok, though generating
 792                  * these portably is tricky. */
 793                 s++;
 794             }
 795             if (*s == '(') {
 796                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
 797                 const char *t;
 798                 s++;
 799                 if (s == send) {
 800                     return flags | IS_NUMBER_TRAILING;
 801                 }
 802                 t = s + 1;
 803                 while (t < send && *t && *t != ')') {
 804                     t++;
 805                 }
 806                 if (t == send) {
 807                     return flags | IS_NUMBER_TRAILING;
 808                 }
 809                 if (*t == ')') {
 810                     int nantype;
 811                     UV nanval;
 812                     if (s[0] == '0' && s + 2 < t &&
 813                         isALPHA_FOLD_EQ(s[1], 'x') &&
 814                         isXDIGIT(s[2])) {
 815                         STRLEN len = t - s;
 816                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 817                         nanval = grok_hex(s, &len, &flags, NULL);
 818                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 819                             nantype = 0;
 820                         } else {
 821                             nantype = IS_NUMBER_IN_UV;
 822                         }
 823                         s += len;
 824                     } else if (s[0] == '0' && s + 2 < t &&
 825                                isALPHA_FOLD_EQ(s[1], 'b') &&
 826                                (s[2] == '0' || s[2] == '1')) {
 827                         STRLEN len = t - s;
 828                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 829                         nanval = grok_bin(s, &len, &flags, NULL);
 830                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 831                             nantype = 0;
 832                         } else {
 833                             nantype = IS_NUMBER_IN_UV;
 834                         }
 835                         s += len;
 836                     } else {
 837                         const char *u;
 838                         nantype =
 839                             grok_number_flags(s, t - s, &nanval,
 840                                               PERL_SCAN_TRAILING |
 841                                               PERL_SCAN_ALLOW_UNDERSCORES);
 842                         /* Unfortunately grok_number_flags() doesn't
 843                          * tell how far we got and the ')' will always
 844                          * be "trailing", so we need to double-check
 845                          * whether we had something dubious. */
 846                         for (u = s; u < t; u++) {
 847                             if (!isDIGIT(*u)) {
 848                                 flags |= IS_NUMBER_TRAILING;
 849                                 break;
 850                             }
 851                         }
 852                         s = u;
 853                     }
 854
 855                     /* XXX Doesn't do octal: nan("0123").
 856                      * Probably not a big loss. */
 857
 858                     if ((nantype & IS_NUMBER_NOT_INT) ||
 859                         !(nantype && IS_NUMBER_IN_UV)) {
 860                         /* XXX the nanval is currently unused, that is,
 861                          * not inserted as the NaN payload of the NV.
 862                          * But the above code already parses the C99
 863                          * nan(...)  format.  See below, and see also
 864                          * the nan() in POSIX.xs.
 865                          *
 866                          * Certain configuration combinations where
 867                          * NVSIZE is greater than UVSIZE mean that
 868                          * a single UV cannot contain all the possible
 869                          * NaN payload bits.  There would need to be
 870                          * some more generic syntax than "nan($uv)".
 871                          *
 872                          * Issues to keep in mind:
 873                          *
 874                          * (1) In most common cases there would
 875                          * not be an integral number of bytes that
 876                          * could be set, only a certain number of bits.
 877                          * For example for the common case of
 878                          * NVSIZE == UVSIZE == 8 there is room for 52
 879                          * bits in the payload, but the most significant
 880                          * bit is commonly reserved for the
 881                          * signaling/quiet bit, leaving 51 bits.
 882                          * Furthermore, the C99 nan() is supposed
 883                          * to generate quiet NaNs, so it is doubtful
 884                          * whether it should be able to generate
 885                          * signaling NaNs.  For the x86 80-bit doubles
 886                          * (if building a long double Perl) there would
 887                          * be 62 bits (s/q bit being the 63rd).
 888                          *
 889                          * (2) Endianness of the payload bits. If the
 890                          * payload is specified as an UV, the low-order
 891                          * bits of the UV are naturally little-endianed
 892                          * (rightmost) bits of the payload.  The endianness
 893                          * of UVs and NVs can be different. */
 894                         return 0;
 895                     }
 896                     if (s < t) {
 897                         flags |= IS_NUMBER_TRAILING;
 898                     }
 899                 } else {
 900                     /* Looked like nan(...), but no close paren. */
 901                     flags |= IS_NUMBER_TRAILING;
 902                 }
 903             } else {
 904                 while (s < send && isSPACE(*s))
 905                     s++;
 906                 if (s < send && *s) {
 907                     /* Note that we here implicitly accept (parse as
 908                      * "nan", but with warnings) also any other weird
 909                      * trailing stuff for "nan".  In the above we just
 910                      * check that if we got the C99-style "nan(...)",
 911                      * the "..."  looks sane.
 912                      * If in future we accept more ways of specifying
 913                      * the nan payload, the accepting would happen around
 914                      * here. */
 915                     flags |= IS_NUMBER_TRAILING;
 916                 }
 917             }
 918             s = send;
 919         }
 920         else
 921             return 0;
 922     }
 923
 924     while (s < send && isSPACE(*s))
 925         s++;
 926
 927 #else
 928     PERL_UNUSED_ARG(send);
 929 #endif /* #if defined(NV_INF) || defined(NV_NAN) */
 930     *sp = s;
 931     return flags;
 932 }
 933
 934 /*
 935 =for apidoc grok_number_flags
 936
 937 Recognise (or not) a number.  The type of the number is returned
 938 (0 if unrecognised), otherwise it is a bit-ORed combination of
 939 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
 940 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
 941
 942 If the value of the number can fit in a UV, it is returned in C<*valuep>.
 943 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
 944 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
 945 to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
 946 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
 947 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
 948
 949 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
 950 seen (in which case C<*valuep> gives the true value truncated to an integer), and
 951 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
 952 absolute value).  C<IS_NUMBER_IN_UV> is not set if e notation was used or the
 953 number is larger than a UV.
 954
 955 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
 956 non-numeric text on an otherwise successful I<grok>, setting
 957 C<IS_NUMBER_TRAILING> on the result.
 958
 959 =for apidoc grok_number
 960
 961 Identical to C<grok_number_flags()> with C<flags> set to zero.
 962
 963 =cut
 964  */
 965 int
 966 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 967 {
 968     PERL_ARGS_ASSERT_GROK_NUMBER;
 969
 970     return grok_number_flags(pv, len, valuep, 0);
 971 }
 972
 973 static const UV uv_max_div_10 = UV_MAX / 10;
 974 static const U8 uv_max_mod_10 = UV_MAX % 10;
 975
 976 int
 977 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
 978 {
 979   const char *s = pv;
 980   const char * const send = pv + len;
 981   const char *d;
 982   int numtype = 0;
 983
 984   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
 985
 986   while (s < send && isSPACE(*s))
 987     s++;
 988   if (s == send) {
 989     return 0;
 990   } else if (*s == '-') {
 991     s++;
 992     numtype = IS_NUMBER_NEG;
 993   }
 994   else if (*s == '+')
 995     s++;
 996
 997   if (s == send)
 998     return 0;
 999
1000   /* The first digit (after optional sign): note that might
1001    * also point to "infinity" or "nan", or "1.#INF". */
1002   d = s;
1003
1004   /* next must be digit or the radix separator or beginning of infinity/nan */
1005   if (isDIGIT(*s)) {
1006     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
1007        overflow.  */
1008     UV value = *s - '0';
1009     /* This construction seems to be more optimiser friendly.
1010        (without it gcc does the isDIGIT test and the *s - '0' separately)
1011        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
1012        In theory the optimiser could deduce how far to unroll the loop
1013        before checking for overflow.  */
1014     if (++s < send) {
1015       int digit = *s - '0';
1016       if (inRANGE(digit, 0, 9)) {
1017         value = value * 10 + digit;
1018         if (++s < send) {
1019           digit = *s - '0';
1020           if (inRANGE(digit, 0, 9)) {
1021             value = value * 10 + digit;
1022             if (++s < send) {
1023               digit = *s - '0';
1024               if (inRANGE(digit, 0, 9)) {
1025                 value = value * 10 + digit;
1026                 if (++s < send) {
1027                   digit = *s - '0';
1028                   if (inRANGE(digit, 0, 9)) {
1029                     value = value * 10 + digit;
1030                     if (++s < send) {
1031                       digit = *s - '0';
1032                       if (inRANGE(digit, 0, 9)) {
1033                         value = value * 10 + digit;
1034                         if (++s < send) {
1035                           digit = *s - '0';
1036                           if (inRANGE(digit, 0, 9)) {
1037                             value = value * 10 + digit;
1038                             if (++s < send) {
1039                               digit = *s - '0';
1040                               if (inRANGE(digit, 0, 9)) {
1041                                 value = value * 10 + digit;
1042                                 if (++s < send) {
1043                                   digit = *s - '0';
1044                                   if (inRANGE(digit, 0, 9)) {
1045                                     value = value * 10 + digit;
1046                                     if (++s < send) {
1047                                       /* Now got 9 digits, so need to check
1048                                          each time for overflow.  */
1049                                       digit = *s - '0';
1050                                       while (    inRANGE(digit, 0, 9)
1051                                              && (value < uv_max_div_10
1052                                                  || (value == uv_max_div_10
1053                                                      && digit <= uv_max_mod_10))) {
1054                                         value = value * 10 + digit;
1055                                         if (++s < send)
1056                                           digit = *s - '0';
1057                                         else
1058                                           break;
1059                                       }
1060                                       if (inRANGE(digit, 0, 9)
1061                                           && (s < send)) {
1062                                         /* value overflowed.
1063                                            skip the remaining digits, don't
1064                                            worry about setting *valuep.  */
1065                                         do {
1066                                           s++;
1067                                         } while (s < send && isDIGIT(*s));
1068                                         numtype |=
1069                                           IS_NUMBER_GREATER_THAN_UV_MAX;
1070                                         goto skip_value;
1071                                       }
1072                                     }
1073                                   }
1074                                 }
1075                               }
1076                             }
1077                           }
1078                         }
1079                       }
1080                     }
1081                   }
1082                 }
1083               }
1084             }
1085           }
1086         }
1087       }
1088     }
1089     numtype |= IS_NUMBER_IN_UV;
1090     if (valuep)
1091       *valuep = value;
1092
1093   skip_value:
1094     if (GROK_NUMERIC_RADIX(&s, send)) {
1095       numtype |= IS_NUMBER_NOT_INT;
1096       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
1097         s++;
1098     }
1099   }
1100   else if (GROK_NUMERIC_RADIX(&s, send)) {
1101     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1102     /* no digits before the radix means we need digits after it */
1103     if (s < send && isDIGIT(*s)) {
1104       do {
1105         s++;
1106       } while (s < send && isDIGIT(*s));
1107       if (valuep) {
1108         /* integer approximation is valid - it's 0.  */
1109         *valuep = 0;
1110       }
1111     }
1112     else
1113         return 0;
1114   }
1115
1116   if (s > d && s < send) {
1117     /* we can have an optional exponent part */
1118     if (isALPHA_FOLD_EQ(*s, 'e')) {
1119       s++;
1120       if (s < send && (*s == '-' || *s == '+'))
1121         s++;
1122       if (s < send && isDIGIT(*s)) {
1123         do {
1124           s++;
1125         } while (s < send && isDIGIT(*s));
1126       }
1127       else if (flags & PERL_SCAN_TRAILING)
1128         return numtype | IS_NUMBER_TRAILING;
1129       else
1130         return 0;
1131
1132       /* The only flag we keep is sign.  Blow away any "it's UV"  */
1133       numtype &= IS_NUMBER_NEG;
1134       numtype |= IS_NUMBER_NOT_INT;
1135     }
1136   }
1137   while (s < send && isSPACE(*s))
1138     s++;
1139   if (s >= send)
1140     return numtype;
1141   if (memEQs(pv, len, "0 but true")) {
1142     if (valuep)
1143       *valuep = 0;
1144     return IS_NUMBER_IN_UV;
1145   }
1146   /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
1147   if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) {
1148       /* Really detect inf/nan. Start at d, not s, since the above
1149        * code might have already consumed the "1." or "1". */
1150       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
1151       if ((infnan & IS_NUMBER_INFINITY)) {
1152           return (numtype | infnan); /* Keep sign for infinity. */
1153       }
1154       else if ((infnan & IS_NUMBER_NAN)) {
1155           return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1156       }
1157   }
1158   else if (flags & PERL_SCAN_TRAILING) {
1159     return numtype | IS_NUMBER_TRAILING;
1160   }
1161
1162   return 0;
1163 }
1164
1165 /*
1166 =for apidoc grok_atoUV
1167
1168 parse a string, looking for a decimal unsigned integer.
1169
1170 On entry, C<pv> points to the beginning of the string;
1171 C<valptr> points to a UV that will receive the converted value, if found;
1172 C<endptr> is either NULL or points to a variable that points to one byte
1173 beyond the point in C<pv> that this routine should examine.
1174 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
1175
1176 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1177 no leading zeros).  Otherwise it returns TRUE, and sets C<*valptr> to that
1178 value.
1179
1180 If you constrain the portion of C<pv> that is looked at by this function (by
1181 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1182 valid value, it will return TRUE, setting C<*endptr> to the byte following the
1183 final digit of the value.  But if there is no constraint at what's looked at,
1184 all of C<pv> must be valid in order for TRUE to be returned.
1185
1186 The only characters this accepts are the decimal digits '0'..'9'.
1187
1188 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1189 leading whitespace, nor negative inputs.  If such features are required, the
1190 calling code needs to explicitly implement those.
1191
1192 Note that this function returns FALSE for inputs that would overflow a UV,
1193 or have leading zeros.  Thus a single C<0> is accepted, but not C<00> nor
1194 C<01>, C<002>, I<etc>.
1195
1196 Background: C<atoi> has severe problems with illegal inputs, it cannot be
1197 used for incremental parsing, and therefore should be avoided
1198 C<atoi> and C<strtol> are also affected by locale settings, which can also be
1199 seen as a bug (global state controlled by user environment).
1200
1201 =cut
1202
1203 */
1204
1205 bool
1206 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
1207 {
1208     const char* s = pv;
1209     const char** eptr;
1210     const char* end2; /* Used in case endptr is NULL. */
1211     UV val = 0; /* The parsed value. */
1212
1213     PERL_ARGS_ASSERT_GROK_ATOUV;
1214
1215     if (endptr) {
1216         eptr = endptr;
1217     }
1218     else {
1219         end2 = s + strlen(s);
1220         eptr = &end2;
1221     }
1222
1223     if (   *eptr <= s
1224         || ! isDIGIT(*s))
1225     {
1226         return FALSE;
1227     }
1228
1229     /* Single-digit inputs are quite common. */
1230     val = *s++ - '0';
1231     if (s < *eptr && isDIGIT(*s)) {
1232         /* Fail on extra leading zeros. */
1233         if (val == 0)
1234             return FALSE;
1235         while (s < *eptr && isDIGIT(*s)) {
1236             /* This could be unrolled like in grok_number(), but
1237                 * the expected uses of this are not speed-needy, and
1238                 * unlikely to need full 64-bitness. */
1239             const U8 digit = *s++ - '0';
1240             if (val < uv_max_div_10 ||
1241                 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1242                 val = val * 10 + digit;
1243             } else {
1244                 return FALSE;
1245             }
1246         }
1247     }
1248
1249     if (endptr == NULL) {
1250         if (*s) {
1251             return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1252         }
1253     }
1254     else {
1255         *endptr = s;
1256     }
1257
1258     *valptr = val;
1259     return TRUE;
1260 }
1261
1262 #ifndef Perl_strtod
1263 STATIC NV
1264 S_mulexp10(NV value, I32 exponent)
1265 {
1266     NV result = 1.0;
1267     NV power = 10.0;
1268     bool negative = 0;
1269     I32 bit;
1270
1271     if (exponent == 0)
1272         return value;
1273     if (value == 0)
1274         return (NV)0;
1275
1276     /* On OpenVMS VAX we by default use the D_FLOAT double format,
1277      * and that format does not have *easy* capabilities [1] for
1278      * overflowing doubles 'silently' as IEEE fp does.  We also need
1279      * to support G_FLOAT on both VAX and Alpha, and though the exponent
1280      * range is much larger than D_FLOAT it still doesn't do silent
1281      * overflow.  Therefore we need to detect early whether we would
1282      * overflow (this is the behaviour of the native string-to-float
1283      * conversion routines, and therefore of native applications, too).
1284      *
1285      * [1] Trying to establish a condition handler to trap floating point
1286      *     exceptions is not a good idea. */
1287
1288     /* In UNICOS and in certain Cray models (such as T90) there is no
1289      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1290      * There is something you can do if you are willing to use some
1291      * inline assembler: the instruction is called DFI-- but that will
1292      * disable *all* floating point interrupts, a little bit too large
1293      * a hammer.  Therefore we need to catch potential overflows before
1294      * it's too late. */
1295
1296 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
1297     STMT_START {
1298         const NV exp_v = log10(value);
1299         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1300             return NV_MAX;
1301         if (exponent < 0) {
1302             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1303                 return 0.0;
1304             while (-exponent >= NV_MAX_10_EXP) {
1305                 /* combination does not overflow, but 10^(-exponent) does */
1306                 value /= 10;
1307                 ++exponent;
1308             }
1309         }
1310     } STMT_END;
1311 #endif
1312
1313     if (exponent < 0) {
1314         negative = 1;
1315         exponent = -exponent;
1316 #ifdef NV_MAX_10_EXP
1317         /* for something like 1234 x 10^-309, the action of calculating
1318          * the intermediate value 10^309 then returning 1234 / (10^309)
1319          * will fail, since 10^309 becomes infinity. In this case try to
1320          * refactor it as 123 / (10^308) etc.
1321          */
1322         while (value && exponent > NV_MAX_10_EXP) {
1323             exponent--;
1324             value /= 10;
1325         }
1326         if (value == 0.0)
1327             return value;
1328 #endif
1329     }
1330 #if defined(__osf__)
1331     /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1332      * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1333      * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1334      * but that breaks another set of infnan.t tests. */
1335 #  define FP_OVERFLOWS_TO_ZERO
1336 #endif
1337     for (bit = 1; exponent; bit <<= 1) {
1338         if (exponent & bit) {
1339             exponent ^= bit;
1340             result *= power;
1341 #ifdef FP_OVERFLOWS_TO_ZERO
1342             if (result == 0)
1343 # ifdef NV_INF
1344                 return value < 0 ? -NV_INF : NV_INF;
1345 # else
1346                 return value < 0 ? -FLT_MAX : FLT_MAX;
1347 # endif
1348 #endif
1349             /* Floating point exceptions are supposed to be turned off,
1350              *  but if we're obviously done, don't risk another iteration.
1351              */
1352              if (exponent == 0) break;
1353         }
1354         power *= power;
1355     }
1356     return negative ? value / result : value * result;
1357 }
1358 #endif /* #ifndef Perl_strtod */
1359
1360 #ifdef Perl_strtod
1361 #  define ATOF(s, x) my_atof2(s, &x)
1362 #else
1363 #  define ATOF(s, x) Perl_atof2(s, x)
1364 #endif
1365
1366 NV
1367 Perl_my_atof(pTHX_ const char* s)
1368 {
1369     /* 's' must be NUL terminated */
1370
1371     NV x = 0.0;
1372
1373     PERL_ARGS_ASSERT_MY_ATOF;
1374
1375 #if ! defined(USE_LOCALE_NUMERIC)
1376
1377     ATOF(s, x);
1378
1379 #else
1380
1381     {
1382         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1383         STORE_LC_NUMERIC_SET_TO_NEEDED();
1384         if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1385             ATOF(s,x);
1386         }
1387         else {
1388
1389             /* Look through the string for the first thing that looks like a
1390              * decimal point: either the value in the current locale or the
1391              * standard fallback of '.'. The one which appears earliest in the
1392              * input string is the one that we should have atof look for. Note
1393              * that we have to determine this beforehand because on some
1394              * systems, Perl_atof2 is just a wrapper around the system's atof.
1395              * */
1396             const char * const standard_pos = strchr(s, '.');
1397             const char * const local_pos
1398                                   = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1399             const bool use_standard_radix
1400                     = standard_pos && (!local_pos || standard_pos < local_pos);
1401
1402             if (use_standard_radix) {
1403                 SET_NUMERIC_STANDARD();
1404                 LOCK_LC_NUMERIC_STANDARD();
1405             }
1406
1407             ATOF(s,x);
1408
1409             if (use_standard_radix) {
1410                 UNLOCK_LC_NUMERIC_STANDARD();
1411                 SET_NUMERIC_UNDERLYING();
1412             }
1413         }
1414         RESTORE_LC_NUMERIC();
1415     }
1416
1417 #endif
1418
1419     return x;
1420 }
1421
1422 #if defined(NV_INF) || defined(NV_NAN)
1423
1424 static char*
1425 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
1426 {
1427     const char *p0 = negative ? s - 1 : s;
1428     const char *p = p0;
1429     const int infnan = grok_infnan(&p, send);
1430     if (infnan && p != p0) {
1431         /* If we can generate inf/nan directly, let's do so. */
1432 #ifdef NV_INF
1433         if ((infnan & IS_NUMBER_INFINITY)) {
1434             *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
1435             return (char*)p;
1436         }
1437 #endif
1438 #ifdef NV_NAN
1439         if ((infnan & IS_NUMBER_NAN)) {
1440             *value = NV_NAN;
1441             return (char*)p;
1442         }
1443 #endif
1444 #ifdef Perl_strtod
1445         /* If still here, we didn't have either NV_INF or NV_NAN,
1446          * and can try falling back to native strtod/strtold.
1447          *
1448          * The native interface might not recognize all the possible
1449          * inf/nan strings Perl recognizes.  What we can try
1450          * is to try faking the input.  We will try inf/-inf/nan
1451          * as the most promising/portable input. */
1452         {
1453             const char* fake = "silence compiler warning";
1454             char* endp;
1455             NV nv;
1456 #ifdef NV_INF
1457             if ((infnan & IS_NUMBER_INFINITY)) {
1458                 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1459             }
1460 #endif
1461 #ifdef NV_NAN
1462             if ((infnan & IS_NUMBER_NAN)) {
1463                 fake = "nan";
1464             }
1465 #endif
1466             assert(strNE(fake, "silence compiler warning"));
1467             nv = S_strtod(aTHX_ fake, &endp);
1468             if (fake != endp) {
1469 #ifdef NV_INF
1470                 if ((infnan & IS_NUMBER_INFINITY)) {
1471 #  ifdef Perl_isinf
1472                     if (Perl_isinf(nv))
1473                         *value = nv;
1474 #  else
1475                     /* last resort, may generate SIGFPE */
1476                     *value = Perl_exp((NV)1e9);
1477                     if ((infnan & IS_NUMBER_NEG))
1478                         *value = -*value;
1479 #  endif
1480                     return (char*)p; /* p, not endp */
1481                 }
1482 #endif
1483 #ifdef NV_NAN
1484                 if ((infnan & IS_NUMBER_NAN)) {
1485 #  ifdef Perl_isnan
1486                     if (Perl_isnan(nv))
1487                         *value = nv;
1488 #  else
1489                     /* last resort, may generate SIGFPE */
1490                     *value = Perl_log((NV)-1.0);
1491 #  endif
1492                     return (char*)p; /* p, not endp */
1493 #endif
1494                 }
1495             }
1496         }
1497 #endif /* #ifdef Perl_strtod */
1498     }
1499     return NULL;
1500 }
1501
1502 #endif /* if defined(NV_INF) || defined(NV_NAN) */
1503
1504 char*
1505 Perl_my_atof2(pTHX_ const char* orig, NV* value)
1506 {
1507     PERL_ARGS_ASSERT_MY_ATOF2;
1508     return my_atof3(orig, value, 0);
1509 }
1510
1511 char*
1512 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
1513 {
1514     const char* s = orig;
1515     NV result[3] = {0.0, 0.0, 0.0};
1516 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1517     const char* send = s + ((len != 0)
1518                            ? len
1519                            : strlen(orig)); /* one past the last */
1520     bool negative = 0;
1521 #endif
1522 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
1523     UV accumulator[2] = {0,0};  /* before/after dp */
1524     bool seen_digit = 0;
1525     I32 exp_adjust[2] = {0,0};
1526     I32 exp_acc[2] = {-1, -1};
1527     /* the current exponent adjust for the accumulators */
1528     I32 exponent = 0;
1529     I32 seen_dp  = 0;
1530     I32 digit = 0;
1531     I32 old_digit = 0;
1532     I32 sig_digits = 0; /* noof significant digits seen so far */
1533 #endif
1534
1535 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1536     PERL_ARGS_ASSERT_MY_ATOF3;
1537
1538     /* leading whitespace */
1539     while (s < send && isSPACE(*s))
1540         ++s;
1541
1542     /* sign */
1543     switch (*s) {
1544         case '-':
1545             negative = 1;
1546             /* FALLTHROUGH */
1547         case '+':
1548             ++s;
1549     }
1550 #endif
1551
1552 #ifdef Perl_strtod
1553     {
1554         char* endp;
1555         char* copy = NULL;
1556
1557         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1558             return endp;
1559
1560         /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1561            0b-prefixed binary numbers, which is backward incompatible
1562         */
1563         if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
1564             (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1565             *value = 0;
1566             return (char *)s+1;
1567         }
1568
1569         /* If the length is passed in, the input string isn't NUL-terminated,
1570          * and in it turns out the function below assumes it is; therefore we
1571          * create a copy and NUL-terminate that */
1572         if (len) {
1573             Newx(copy, len + 1, char);
1574             Copy(orig, copy, len, char);
1575             copy[len] = '\0';
1576             s = copy + (s - orig);
1577         }
1578
1579         result[2] = S_strtod(aTHX_ s, &endp);
1580
1581         /* If we created a copy, 'endp' is in terms of that.  Convert back to
1582          * the original */
1583         if (copy) {
1584             s = (s - copy) + (char *) orig;
1585             endp = (endp - copy) + (char *) orig;
1586             Safefree(copy);
1587         }
1588
1589         if (s != endp) {
1590             *value = negative ? -result[2] : result[2];
1591             return endp;
1592         }
1593         return NULL;
1594     }
1595 #elif defined(USE_PERL_ATOF)
1596
1597 /* There is no point in processing more significant digits
1598  * than the NV can hold. Note that NV_DIG is a lower-bound value,
1599  * while we need an upper-bound value. We add 2 to account for this;
1600  * since it will have been conservative on both the first and last digit.
1601  * For example a 32-bit mantissa with an exponent of 4 would have
1602  * exact values in the set
1603  *               4
1604  *               8
1605  *              ..
1606  *     17179869172
1607  *     17179869176
1608  *     17179869180
1609  *
1610  * where for the purposes of calculating NV_DIG we would have to discount
1611  * both the first and last digit, since neither can hold all values from
1612  * 0..9; but for calculating the value we must examine those two digits.
1613  */
1614 #ifdef MAX_SIG_DIG_PLUS
1615     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1616        possible digits in a NV, especially if NVs are not IEEE compliant
1617        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1618 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1619 #else
1620 # define MAX_SIG_DIGITS (NV_DIG+2)
1621 #endif
1622
1623 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1624 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
1625
1626 #if defined(NV_INF) || defined(NV_NAN)
1627     {
1628         char* endp;
1629         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1630             return endp;
1631     }
1632 #endif
1633
1634     /* we accumulate digits into an integer; when this becomes too
1635      * large, we add the total to NV and start again */
1636
1637     while (s < send) {
1638         if (isDIGIT(*s)) {
1639             seen_digit = 1;
1640             old_digit = digit;
1641             digit = *s++ - '0';
1642             if (seen_dp)
1643                 exp_adjust[1]++;
1644
1645             /* don't start counting until we see the first significant
1646              * digit, eg the 5 in 0.00005... */
1647             if (!sig_digits && digit == 0)
1648                 continue;
1649
1650             if (++sig_digits > MAX_SIG_DIGITS) {
1651                 /* limits of precision reached */
1652                 if (digit > 5) {
1653                     ++accumulator[seen_dp];
1654                 } else if (digit == 5) {
1655                     if (old_digit % 2) { /* round to even - Allen */
1656                         ++accumulator[seen_dp];
1657                     }
1658                 }
1659                 if (seen_dp) {
1660                     exp_adjust[1]--;
1661                 } else {
1662                     exp_adjust[0]++;
1663                 }
1664                 /* skip remaining digits */
1665                 while (s < send && isDIGIT(*s)) {
1666                     ++s;
1667                     if (! seen_dp) {
1668                         exp_adjust[0]++;
1669                     }
1670                 }
1671                 /* warn of loss of precision? */
1672             }
1673             else {
1674                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1675                     /* add accumulator to result and start again */
1676                     result[seen_dp] = S_mulexp10(result[seen_dp],
1677                                                  exp_acc[seen_dp])
1678                         + (NV)accumulator[seen_dp];
1679                     accumulator[seen_dp] = 0;
1680                     exp_acc[seen_dp] = 0;
1681                 }
1682                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1683                 ++exp_acc[seen_dp];
1684             }
1685         }
1686         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1687             seen_dp = 1;
1688             if (sig_digits > MAX_SIG_DIGITS) {
1689                 while (s < send && isDIGIT(*s)) {
1690                     ++s;
1691                 }
1692                 break;
1693             }
1694         }
1695         else {
1696             break;
1697         }
1698     }
1699
1700     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1701     if (seen_dp) {
1702         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1703     }
1704
1705     if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
1706         bool expnegative = 0;
1707
1708         ++s;
1709         switch (*s) {
1710             case '-':
1711                 expnegative = 1;
1712                 /* FALLTHROUGH */
1713             case '+':
1714                 ++s;
1715         }
1716         while (s < send && isDIGIT(*s))
1717             exponent = exponent * 10 + (*s++ - '0');
1718         if (expnegative)
1719             exponent = -exponent;
1720     }
1721
1722     /* now apply the exponent */
1723
1724     if (seen_dp) {
1725         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1726                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1727     } else {
1728         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1729     }
1730
1731     /* now apply the sign */
1732     if (negative)
1733         result[2] = -result[2];
1734 #endif /* USE_PERL_ATOF */
1735     *value = result[2];
1736     return (char *)s;
1737 }
1738
1739 /*
1740 =for apidoc isinfnan
1741
1742 C<Perl_isinfnan()> is utility function that returns true if the NV
1743 argument is either an infinity or a C<NaN>, false otherwise.  To test
1744 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
1745
1746 This is also the logical inverse of Perl_isfinite().
1747
1748 =cut
1749 */
1750 bool
1751 Perl_isinfnan(NV nv)
1752 {
1753   PERL_UNUSED_ARG(nv);
1754 #ifdef Perl_isinf
1755     if (Perl_isinf(nv))
1756         return TRUE;
1757 #endif
1758 #ifdef Perl_isnan
1759     if (Perl_isnan(nv))
1760         return TRUE;
1761 #endif
1762     return FALSE;
1763 }
1764
1765 /*
1766 =for apidoc
1767
1768 Checks whether the argument would be either an infinity or C<NaN> when used
1769 as a number, but is careful not to trigger non-numeric or uninitialized
1770 warnings.  it assumes the caller has done C<SvGETMAGIC(sv)> already.
1771
1772 =cut
1773 */
1774
1775 bool
1776 Perl_isinfnansv(pTHX_ SV *sv)
1777 {
1778     PERL_ARGS_ASSERT_ISINFNANSV;
1779     if (!SvOK(sv))
1780         return FALSE;
1781     if (SvNOKp(sv))
1782         return Perl_isinfnan(SvNVX(sv));
1783     if (SvIOKp(sv))
1784         return FALSE;
1785     {
1786         STRLEN len;
1787         const char *s = SvPV_nomg_const(sv, len);
1788         return cBOOL(grok_infnan(&s, s+len));
1789     }
1790 }
1791
1792 #ifndef HAS_MODFL
1793 /* C99 has truncl, pre-C99 Solaris had aintl.  We can use either with
1794  * copysignl to emulate modfl, which is in some platforms missing or
1795  * broken. */
1796 #  if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1797 long double
1798 Perl_my_modfl(long double x, long double *ip)
1799 {
1800     *ip = truncl(x);
1801     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1802 }
1803 #  elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1804 long double
1805 Perl_my_modfl(long double x, long double *ip)
1806 {
1807     *ip = aintl(x);
1808     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1809 }
1810 #  endif
1811 #endif
1812
1813 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
1814 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1815 long double
1816 Perl_my_frexpl(long double x, int *e) {
1817     *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1818     return (scalbnl(x, -*e));
1819 }
1820 #endif
1821
1822 /*
1823 =for apidoc Perl_signbit
1824
1825 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1826 it is not.
1827
1828 If F<Configure> detects this system has a C<signbit()> that will work with
1829 our NVs, then we just use it via the C<#define> in F<perl.h>.  Otherwise,
1830 fall back on this implementation.  The main use of this function
1831 is catching C<-0.0>.
1832
1833 C<Configure> notes:  This function is called C<'Perl_signbit'> instead of a
1834 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
1835 function or macro that doesn't happen to work with our particular choice
1836 of NVs.  We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
1837 the standard system headers to be happy.  Also, this is a no-context
1838 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1839 F<perl.h> as a simple macro call to the system's C<signbit()>.
1840 Users should just always call C<Perl_signbit()>.
1841
1842 =cut
1843 */
1844 #if !defined(HAS_SIGNBIT)
1845 int
1846 Perl_signbit(NV x) {
1847 #  ifdef Perl_fp_class_nzero
1848     return Perl_fp_class_nzero(x);
1849     /* Try finding the high byte, and assume it's highest bit
1850      * is the sign.  This assumption is probably wrong somewhere. */
1851 #  elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1852     return (((unsigned char *)&x)[9] & 0x80);
1853 #  elif defined(NV_LITTLE_ENDIAN)
1854     /* Note that NVSIZE is sizeof(NV), which would make the below be
1855      * wrong if the end bytes are unused, which happens with the x86
1856      * 80-bit long doubles, which is why take care of that above. */
1857     return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1858 #  elif defined(NV_BIG_ENDIAN)
1859     return (((unsigned char *)&x)[0] & 0x80);
1860 #  else
1861     /* This last resort fallback is wrong for the negative zero. */
1862     return (x < 0.0) ? 1 : 0;
1863 #  endif
1864 }
1865 #endif
1866
1867 /*
1868  * ex: set ts=8 sts=4 sw=4 et:
1869  */