numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen,
  13  *  unless wizards count differently to other people."  --Beorn
  14  *
  15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
  16  */
  17
  18 /*
  19 =head1 Numeric functions
  20
  21 =cut
  22
  23 This file contains all the stuff needed by perl for manipulating numeric
  24 values, including such things as replacements for the OS's atof() function
  25
  26 */
  27
  28 #include "EXTERN.h"
  29 #define PERL_IN_NUMERIC_C
  30 #include "perl.h"
  31
  32 #ifdef Perl_strtod
  33
  34 PERL_STATIC_INLINE NV
  35 S_strtod(pTHX_ const char * const s, char ** e)
  36 {
  37     DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
  38     NV result;
  39
  40     STORE_LC_NUMERIC_SET_TO_NEEDED();
  41
  42 #  ifdef USE_QUADMATH
  43
  44     result = strtoflt128(s, e);
  45
  46 #  elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE)    \
  47                              && defined(USE_LONG_DOUBLE)
  48 #    if defined(__MINGW64_VERSION_MAJOR)
  49       /***********************************************
  50        We are unable to use strtold because of
  51         https://sourceforge.net/p/mingw-w64/bugs/711/
  52         &
  53         https://sourceforge.net/p/mingw-w64/bugs/725/
  54
  55        but __mingw_strtold is fine.
  56       ***********************************************/
  57
  58     result = __mingw_strtold(s, e);
  59
  60 #    else
  61
  62     result = strtold(s, e);
  63
  64 #    endif
  65 #  elif defined(HAS_STRTOD)
  66
  67     result = strtod(s, e);
  68
  69 #  else
  70 #    error No strtod() equivalent found
  71 #  endif
  72
  73     RESTORE_LC_NUMERIC();
  74
  75     return result;
  76 }
  77
  78 #endif  /* #ifdef Perl_strtod */
  79
  80 /*
  81
  82 =for apidoc my_strtod
  83
  84 This function is equivalent to the libc strtod() function, and is available
  85 even on platforms that lack plain strtod().  Its return value is the best
  86 available precision depending on platform capabilities and F<Configure>
  87 options.
  88
  89 It properly handles the locale radix character, meaning it expects a dot except
  90 when called from within the scope of S<C<use locale>>, in which case the radix
  91 character should be that specified by the current locale.
  92
  93 The synonym Strtod() may be used instead.
  94
  95 =cut
  96
  97 */
  98
  99 NV
 100 Perl_my_strtod(const char * const s, char **e)
 101 {
 102     dTHX;
 103
 104     PERL_ARGS_ASSERT_MY_STRTOD;
 105
 106 #ifdef Perl_strtod
 107
 108     return S_strtod(aTHX_ s, e);
 109
 110 #else
 111
 112     {
 113         NV result;
 114         char ** end_ptr = NULL;
 115
 116         *end_ptr = my_atof2(s, &result);
 117         if (e) {
 118             *e = *end_ptr;
 119         }
 120
 121         if (! *end_ptr) {
 122             result = 0.0;
 123         }
 124
 125         return result;
 126     }
 127
 128 #endif
 129
 130 }
 131
 132
 133 U32
 134 Perl_cast_ulong(NV f)
 135 {
 136   if (f < 0.0)
 137     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
 138   if (f < U32_MAX_P1) {
 139 #if CASTFLAGS & 2
 140     if (f < U32_MAX_P1_HALF)
 141       return (U32) f;
 142     f -= U32_MAX_P1_HALF;
 143     return ((U32) f) | (1 + (U32_MAX >> 1));
 144 #else
 145     return (U32) f;
 146 #endif
 147   }
 148   return f > 0 ? U32_MAX : 0 /* NaN */;
 149 }
 150
 151 I32
 152 Perl_cast_i32(NV f)
 153 {
 154   if (f < I32_MAX_P1)
 155     return f < I32_MIN ? I32_MIN : (I32) f;
 156   if (f < U32_MAX_P1) {
 157 #if CASTFLAGS & 2
 158     if (f < U32_MAX_P1_HALF)
 159       return (I32)(U32) f;
 160     f -= U32_MAX_P1_HALF;
 161     return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
 162 #else
 163     return (I32)(U32) f;
 164 #endif
 165   }
 166   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
 167 }
 168
 169 IV
 170 Perl_cast_iv(NV f)
 171 {
 172   if (f < IV_MAX_P1)
 173     return f < IV_MIN ? IV_MIN : (IV) f;
 174   if (f < UV_MAX_P1) {
 175 #if CASTFLAGS & 2
 176     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
 177     if (f < UV_MAX_P1_HALF)
 178       return (IV)(UV) f;
 179     f -= UV_MAX_P1_HALF;
 180     return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
 181 #else
 182     return (IV)(UV) f;
 183 #endif
 184   }
 185   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
 186 }
 187
 188 UV
 189 Perl_cast_uv(NV f)
 190 {
 191   if (f < 0.0)
 192     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
 193   if (f < UV_MAX_P1) {
 194 #if CASTFLAGS & 2
 195     if (f < UV_MAX_P1_HALF)
 196       return (UV) f;
 197     f -= UV_MAX_P1_HALF;
 198     return ((UV) f) | (1 + (UV_MAX >> 1));
 199 #else
 200     return (UV) f;
 201 #endif
 202   }
 203   return f > 0 ? UV_MAX : 0 /* NaN */;
 204 }
 205
 206 /*
 207 =for apidoc grok_bin
 208
 209 converts a string representing a binary number to numeric form.
 210
 211 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 212 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 213 The scan stops at the end of the string, or the first invalid character.
 214 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 215 invalid character will also trigger a warning.
 216 On return C<*len> is set to the length of the scanned string,
 217 and C<*flags> gives output flags.
 218
 219 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 220 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_bin>
 221 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 222 and writes the value to C<*result> (or the value is discarded if C<result>
 223 is NULL).
 224
 225 The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
 226 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 227 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the binary
 228 number may use C<"_"> characters to separate digits.
 229
 230 =for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
 231 =for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
 232 =for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
 233 =for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
 234 =for apidoc Amnh||PERL_SCAN_TRAILING
 235
 236 =cut
 237
 238 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 239 which suppresses any message for non-portable numbers that are still valid
 240 on this platform.
 241  */
 242
 243 UV
 244 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 245 {
 246     const char *s = start;
 247     STRLEN len = *len_p;
 248     UV value = 0;
 249     NV value_nv = 0;
 250
 251     const UV max_div_2 = UV_MAX / 2;
 252     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 253     bool overflowed = FALSE;
 254     char bit;
 255
 256     PERL_ARGS_ASSERT_GROK_BIN;
 257
 258     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 259         /* strip off leading b or 0b.
 260            for compatibility silently suffer "b" and "0b" as valid binary
 261            numbers. */
 262         if (len >= 1) {
 263             if (isALPHA_FOLD_EQ(s[0], 'b')) {
 264                 s++;
 265                 len--;
 266             }
 267             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'b'))) {
 268                 s+=2;
 269                 len-=2;
 270             }
 271         }
 272     }
 273
 274     for (; len-- && (bit = *s); s++) {
 275         if (bit == '0' || bit == '1') {
 276             /* Write it in this wonky order with a goto to attempt to get the
 277                compiler to make the common case integer-only loop pretty tight.
 278                With gcc seems to be much straighter code than old scan_bin.  */
 279           redo:
 280             if (!overflowed) {
 281                 if (value <= max_div_2) {
 282                     value = (value << 1) | (bit - '0');
 283                     continue;
 284                 }
 285                 /* Bah. We're just overflowed.  */
 286                 /* diag_listed_as: Integer overflow in %s number */
 287                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 288                                  "Integer overflow in binary number");
 289                 overflowed = TRUE;
 290                 value_nv = (NV) value;
 291             }
 292             value_nv *= 2.0;
 293             /* If an NV has not enough bits in its mantissa to
 294              * represent a UV this summing of small low-order numbers
 295              * is a waste of time (because the NV cannot preserve
 296              * the low-order bits anyway): we could just remember when
 297              * did we overflow and in the end just multiply value_nv by the
 298              * right amount. */
 299             value_nv += (NV)(bit - '0');
 300             continue;
 301         }
 302         if (bit == '_' && len && allow_underscores && (bit = s[1])
 303             && (bit == '0' || bit == '1'))
 304             {
 305                 --len;
 306                 ++s;
 307                 goto redo;
 308             }
 309         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 310             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 311                            "Illegal binary digit '%c' ignored", *s);
 312         break;
 313     }
 314
 315     if (   ( overflowed && value_nv > 4294967295.0)
 316 #if UVSIZE > 4
 317         || (!overflowed && value > 0xffffffff
 318             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 319 #endif
 320         ) {
 321         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 322                        "Binary number > 0b11111111111111111111111111111111 non-portable");
 323     }
 324     *len_p = s - start;
 325     if (!overflowed) {
 326         *flags = 0;
 327         return value;
 328     }
 329     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 330     if (result)
 331         *result = value_nv;
 332     return UV_MAX;
 333 }
 334
 335 /*
 336 =for apidoc grok_hex
 337
 338 converts a string representing a hex number to numeric form.
 339
 340 On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
 341 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 342 The scan stops at the end of the string, or the first invalid character.
 343 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 344 invalid character will also trigger a warning.
 345 On return C<*len> is set to the length of the scanned string,
 346 and C<*flags> gives output flags.
 347
 348 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 349 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_hex>
 350 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 351 and writes the value to C<*result> (or the value is discarded if C<result>
 352 is C<NULL>).
 353
 354 The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
 355 C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.  If
 356 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the hex
 357 number may use C<"_"> characters to separate digits.
 358
 359 =cut
 360
 361 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 362 which suppresses any message for non-portable numbers, but which are valid
 363 on this platform.
 364  */
 365
 366 UV
 367 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 368 {
 369     const char *s = start;
 370     STRLEN len = *len_p;
 371     UV value = 0;
 372     NV value_nv = 0;
 373     const UV max_div_16 = UV_MAX / 16;
 374     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 375     bool overflowed = FALSE;
 376
 377     PERL_ARGS_ASSERT_GROK_HEX;
 378
 379     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 380         /* strip off leading x or 0x.
 381            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 382         */
 383         if (len >= 1) {
 384             if (isALPHA_FOLD_EQ(s[0], 'x')) {
 385                 s++;
 386                 len--;
 387             }
 388             else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], 'x'))) {
 389                 s+=2;
 390                 len-=2;
 391             }
 392         }
 393     }
 394
 395     for (; len-- && *s; s++) {
 396         if (isXDIGIT(*s)) {
 397             /* Write it in this wonky order with a goto to attempt to get the
 398                compiler to make the common case integer-only loop pretty tight.
 399                With gcc seems to be much straighter code than old scan_hex.  */
 400           redo:
 401             if (!overflowed) {
 402                 if (value <= max_div_16) {
 403                     value = (value << 4) | XDIGIT_VALUE(*s);
 404                     continue;
 405                 }
 406                 /* Bah. We're just overflowed.  */
 407                 /* diag_listed_as: Integer overflow in %s number */
 408                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 409                                  "Integer overflow in hexadecimal number");
 410                 overflowed = TRUE;
 411                 value_nv = (NV) value;
 412             }
 413             value_nv *= 16.0;
 414             /* If an NV has not enough bits in its mantissa to
 415              * represent a UV this summing of small low-order numbers
 416              * is a waste of time (because the NV cannot preserve
 417              * the low-order bits anyway): we could just remember when
 418              * did we overflow and in the end just multiply value_nv by the
 419              * right amount of 16-tuples. */
 420             value_nv += (NV) XDIGIT_VALUE(*s);
 421             continue;
 422         }
 423         if (*s == '_' && len && allow_underscores && s[1]
 424                 && isXDIGIT(s[1]))
 425             {
 426                 --len;
 427                 ++s;
 428                 goto redo;
 429             }
 430         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 431             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 432                         "Illegal hexadecimal digit '%c' ignored", *s);
 433         break;
 434     }
 435
 436     if (   ( overflowed && value_nv > 4294967295.0)
 437 #if UVSIZE > 4
 438         || (!overflowed && value > 0xffffffff
 439             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 440 #endif
 441         ) {
 442         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 443                        "Hexadecimal number > 0xffffffff non-portable");
 444     }
 445     *len_p = s - start;
 446     if (!overflowed) {
 447         *flags = 0;
 448         return value;
 449     }
 450     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 451     if (result)
 452         *result = value_nv;
 453     return UV_MAX;
 454 }
 455
 456 /*
 457 =for apidoc grok_oct
 458
 459 converts a string representing an octal number to numeric form.
 460
 461 On entry C<start> and C<*len> give the string to scan, C<*flags> gives
 462 conversion flags, and C<result> should be C<NULL> or a pointer to an NV.
 463 The scan stops at the end of the string, or the first invalid character.
 464 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>, encountering an
 465 8 or 9 will also trigger a warning.
 466 On return C<*len> is set to the length of the scanned string,
 467 and C<*flags> gives output flags.
 468
 469 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 470 and nothing is written to C<*result>.  If the value is > C<UV_MAX>, C<grok_oct>
 471 returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 472 and writes the value to C<*result> (or the value is discarded if C<result>
 473 is C<NULL>).
 474
 475 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then the octal
 476 number may use C<"_"> characters to separate digits.
 477
 478 =cut
 479
 480 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
 481 which suppresses any message for non-portable numbers, but which are valid
 482 on this platform.
 483  */
 484
 485 UV
 486 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 487 {
 488     const char *s = start;
 489     STRLEN len = *len_p;
 490     UV value = 0;
 491     NV value_nv = 0;
 492     const UV max_div_8 = UV_MAX / 8;
 493     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 494     bool overflowed = FALSE;
 495
 496     PERL_ARGS_ASSERT_GROK_OCT;
 497
 498     for (; len-- && *s; s++) {
 499         if (isOCTAL(*s)) {
 500             /* Write it in this wonky order with a goto to attempt to get the
 501                compiler to make the common case integer-only loop pretty tight.
 502             */
 503           redo:
 504             if (!overflowed) {
 505                 if (value <= max_div_8) {
 506                     value = (value << 3) | OCTAL_VALUE(*s);
 507                     continue;
 508                 }
 509                 /* Bah. We're just overflowed.  */
 510                 /* diag_listed_as: Integer overflow in %s number */
 511                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 512                                "Integer overflow in octal number");
 513                 overflowed = TRUE;
 514                 value_nv = (NV) value;
 515             }
 516             value_nv *= 8.0;
 517             /* If an NV has not enough bits in its mantissa to
 518              * represent a UV this summing of small low-order numbers
 519              * is a waste of time (because the NV cannot preserve
 520              * the low-order bits anyway): we could just remember when
 521              * did we overflow and in the end just multiply value_nv by the
 522              * right amount of 8-tuples. */
 523             value_nv += (NV) OCTAL_VALUE(*s);
 524             continue;
 525         }
 526         if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
 527             --len;
 528             ++s;
 529             goto redo;
 530         }
 531         /* Allow \octal to work the DWIM way (that is, stop scanning
 532          * as soon as non-octal characters are seen, complain only if
 533          * someone seems to want to use the digits eight and nine.  Since we
 534          * know it is not octal, then if isDIGIT, must be an 8 or 9). */
 535         if (isDIGIT(*s)) {
 536             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 537                 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 538                                "Illegal octal digit '%c' ignored", *s);
 539         }
 540         break;
 541     }
 542
 543     if (   ( overflowed && value_nv > 4294967295.0)
 544 #if UVSIZE > 4
 545         || (!overflowed && value > 0xffffffff
 546             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 547 #endif
 548         ) {
 549         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 550                        "Octal number > 037777777777 non-portable");
 551     }
 552     *len_p = s - start;
 553     if (!overflowed) {
 554         *flags = 0;
 555         return value;
 556     }
 557     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 558     if (result)
 559         *result = value_nv;
 560     return UV_MAX;
 561 }
 562
 563 /*
 564 =for apidoc scan_bin
 565
 566 For backwards compatibility.  Use C<grok_bin> instead.
 567
 568 =for apidoc scan_hex
 569
 570 For backwards compatibility.  Use C<grok_hex> instead.
 571
 572 =for apidoc scan_oct
 573
 574 For backwards compatibility.  Use C<grok_oct> instead.
 575
 576 =cut
 577  */
 578
 579 NV
 580 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 581 {
 582     NV rnv;
 583     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 584     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 585
 586     PERL_ARGS_ASSERT_SCAN_BIN;
 587
 588     *retlen = len;
 589     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 590 }
 591
 592 NV
 593 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 594 {
 595     NV rnv;
 596     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 597     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 598
 599     PERL_ARGS_ASSERT_SCAN_OCT;
 600
 601     *retlen = len;
 602     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 603 }
 604
 605 NV
 606 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 607 {
 608     NV rnv;
 609     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 610     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 611
 612     PERL_ARGS_ASSERT_SCAN_HEX;
 613
 614     *retlen = len;
 615     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 616 }
 617
 618 /*
 619 =for apidoc grok_numeric_radix
 620
 621 Scan and skip for a numeric decimal separator (radix).
 622
 623 =cut
 624  */
 625 bool
 626 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 627 {
 628     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 629
 630 #ifdef USE_LOCALE_NUMERIC
 631
 632     if (IN_LC(LC_NUMERIC)) {
 633         STRLEN len;
 634         char * radix;
 635         bool matches_radix = FALSE;
 636         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
 637
 638         STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
 639
 640         radix = SvPV(PL_numeric_radix_sv, len);
 641         radix = savepvn(radix, len);
 642
 643         RESTORE_LC_NUMERIC();
 644
 645         if (*sp + len <= send) {
 646             matches_radix = memEQ(*sp, radix, len);
 647         }
 648
 649         Safefree(radix);
 650
 651         if (matches_radix) {
 652             *sp += len;
 653             return TRUE;
 654         }
 655     }
 656
 657 #endif
 658
 659     /* always try "." if numeric radix didn't match because
 660      * we may have data from different locales mixed */
 661     if (*sp < send && **sp == '.') {
 662         ++*sp;
 663         return TRUE;
 664     }
 665
 666     return FALSE;
 667 }
 668
 669 /*
 670 =for apidoc grok_infnan
 671
 672 Helper for C<grok_number()>, accepts various ways of spelling "infinity"
 673 or "not a number", and returns one of the following flag combinations:
 674
 675   IS_NUMBER_INFINITY
 676   IS_NUMBER_NAN
 677   IS_NUMBER_INFINITY | IS_NUMBER_NEG
 678   IS_NUMBER_NAN | IS_NUMBER_NEG
 679   0
 680
 681 possibly |-ed with C<IS_NUMBER_TRAILING>.
 682
 683 If an infinity or a not-a-number is recognized, C<*sp> will point to
 684 one byte past the end of the recognized string.  If the recognition fails,
 685 zero is returned, and C<*sp> will not move.
 686
 687 =for apidoc Amn|bool|IS_NUMBER_GREATER_THAN_UV_MAX
 688 =for apidoc Amn|bool|IS_NUMBER_INFINITY
 689 =for apidoc Amn|bool|IS_NUMBER_IN_UV
 690 =for apidoc Amn|bool|IS_NUMBER_NAN
 691 =for apidoc Amn|bool|IS_NUMBER_NEG
 692 =for apidoc Amn|bool|IS_NUMBER_NOT_INT
 693
 694 =cut
 695 */
 696
 697 int
 698 Perl_grok_infnan(pTHX_ const char** sp, const char* send)
 699 {
 700     const char* s = *sp;
 701     int flags = 0;
 702 #if defined(NV_INF) || defined(NV_NAN)
 703     bool odh = FALSE; /* one-dot-hash: 1.#INF */
 704
 705     PERL_ARGS_ASSERT_GROK_INFNAN;
 706
 707     if (*s == '+') {
 708         s++; if (s == send) return 0;
 709     }
 710     else if (*s == '-') {
 711         flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
 712         s++; if (s == send) return 0;
 713     }
 714
 715     if (*s == '1') {
 716         /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
 717          * Let's keep the dot optional. */
 718         s++; if (s == send) return 0;
 719         if (*s == '.') {
 720             s++; if (s == send) return 0;
 721         }
 722         if (*s == '#') {
 723             s++; if (s == send) return 0;
 724         } else
 725             return 0;
 726         odh = TRUE;
 727     }
 728
 729     if (isALPHA_FOLD_EQ(*s, 'I')) {
 730         /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
 731
 732         s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 733         s++; if (s == send) return 0;
 734         if (isALPHA_FOLD_EQ(*s, 'F')) {
 735             s++;
 736             if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
 737                 int fail =
 738                     flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
 739                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
 740                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
 741                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
 742                 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
 743                 s++;
 744             } else if (odh) {
 745                 while (*s == '0') { /* 1.#INF00 */
 746                     s++;
 747                 }
 748             }
 749             while (s < send && isSPACE(*s))
 750                 s++;
 751             if (s < send && *s) {
 752                 flags |= IS_NUMBER_TRAILING;
 753             }
 754             flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 755         }
 756         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
 757             s++;
 758             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 759             while (*s == '0') { /* 1.#IND00 */
 760                 s++;
 761             }
 762             if (*s) {
 763                 flags |= IS_NUMBER_TRAILING;
 764             }
 765         } else
 766             return 0;
 767     }
 768     else {
 769         /* Maybe NAN of some sort */
 770
 771         if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
 772             /* snan, qNaN */
 773             /* XXX do something with the snan/qnan difference */
 774             s++; if (s == send) return 0;
 775         }
 776
 777         if (isALPHA_FOLD_EQ(*s, 'N')) {
 778             s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
 779             s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
 780             s++;
 781
 782             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 783             if (s == send) {
 784                 return flags;
 785             }
 786
 787             /* NaN can be followed by various stuff (NaNQ, NaNS), but
 788              * there are also multiple different NaN values, and some
 789              * implementations output the "payload" values,
 790              * e.g. NaN123, NAN(abc), while some legacy implementations
 791              * have weird stuff like NaN%. */
 792             if (isALPHA_FOLD_EQ(*s, 'q') ||
 793                 isALPHA_FOLD_EQ(*s, 's')) {
 794                 /* "nanq" or "nans" are ok, though generating
 795                  * these portably is tricky. */
 796                 s++;
 797                 if (s == send) {
 798                     return flags;
 799                 }
 800             }
 801             if (*s == '(') {
 802                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
 803                 const char *t;
 804                 s++;
 805                 if (s == send) {
 806                     return flags | IS_NUMBER_TRAILING;
 807                 }
 808                 t = s + 1;
 809                 while (t < send && *t && *t != ')') {
 810                     t++;
 811                 }
 812                 if (t == send) {
 813                     return flags | IS_NUMBER_TRAILING;
 814                 }
 815                 if (*t == ')') {
 816                     int nantype;
 817                     UV nanval;
 818                     if (s[0] == '0' && s + 2 < t &&
 819                         isALPHA_FOLD_EQ(s[1], 'x') &&
 820                         isXDIGIT(s[2])) {
 821                         STRLEN len = t - s;
 822                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 823                         nanval = grok_hex(s, &len, &flags, NULL);
 824                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 825                             nantype = 0;
 826                         } else {
 827                             nantype = IS_NUMBER_IN_UV;
 828                         }
 829                         s += len;
 830                     } else if (s[0] == '0' && s + 2 < t &&
 831                                isALPHA_FOLD_EQ(s[1], 'b') &&
 832                                (s[2] == '0' || s[2] == '1')) {
 833                         STRLEN len = t - s;
 834                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
 835                         nanval = grok_bin(s, &len, &flags, NULL);
 836                         if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
 837                             nantype = 0;
 838                         } else {
 839                             nantype = IS_NUMBER_IN_UV;
 840                         }
 841                         s += len;
 842                     } else {
 843                         const char *u;
 844                         nantype =
 845                             grok_number_flags(s, t - s, &nanval,
 846                                               PERL_SCAN_TRAILING |
 847                                               PERL_SCAN_ALLOW_UNDERSCORES);
 848                         /* Unfortunately grok_number_flags() doesn't
 849                          * tell how far we got and the ')' will always
 850                          * be "trailing", so we need to double-check
 851                          * whether we had something dubious. */
 852                         for (u = s; u < t; u++) {
 853                             if (!isDIGIT(*u)) {
 854                                 flags |= IS_NUMBER_TRAILING;
 855                                 break;
 856                             }
 857                         }
 858                         s = u;
 859                     }
 860
 861                     /* XXX Doesn't do octal: nan("0123").
 862                      * Probably not a big loss. */
 863
 864                     if ((nantype & IS_NUMBER_NOT_INT) ||
 865                         !(nantype && IS_NUMBER_IN_UV)) {
 866                         /* XXX the nanval is currently unused, that is,
 867                          * not inserted as the NaN payload of the NV.
 868                          * But the above code already parses the C99
 869                          * nan(...)  format.  See below, and see also
 870                          * the nan() in POSIX.xs.
 871                          *
 872                          * Certain configuration combinations where
 873                          * NVSIZE is greater than UVSIZE mean that
 874                          * a single UV cannot contain all the possible
 875                          * NaN payload bits.  There would need to be
 876                          * some more generic syntax than "nan($uv)".
 877                          *
 878                          * Issues to keep in mind:
 879                          *
 880                          * (1) In most common cases there would
 881                          * not be an integral number of bytes that
 882                          * could be set, only a certain number of bits.
 883                          * For example for the common case of
 884                          * NVSIZE == UVSIZE == 8 there is room for 52
 885                          * bits in the payload, but the most significant
 886                          * bit is commonly reserved for the
 887                          * signaling/quiet bit, leaving 51 bits.
 888                          * Furthermore, the C99 nan() is supposed
 889                          * to generate quiet NaNs, so it is doubtful
 890                          * whether it should be able to generate
 891                          * signaling NaNs.  For the x86 80-bit doubles
 892                          * (if building a long double Perl) there would
 893                          * be 62 bits (s/q bit being the 63rd).
 894                          *
 895                          * (2) Endianness of the payload bits. If the
 896                          * payload is specified as an UV, the low-order
 897                          * bits of the UV are naturally little-endianed
 898                          * (rightmost) bits of the payload.  The endianness
 899                          * of UVs and NVs can be different. */
 900                         return 0;
 901                     }
 902                     if (s < t) {
 903                         flags |= IS_NUMBER_TRAILING;
 904                     }
 905                 } else {
 906                     /* Looked like nan(...), but no close paren. */
 907                     flags |= IS_NUMBER_TRAILING;
 908                 }
 909             } else {
 910                 while (s < send && isSPACE(*s))
 911                     s++;
 912                 if (s < send && *s) {
 913                     /* Note that we here implicitly accept (parse as
 914                      * "nan", but with warnings) also any other weird
 915                      * trailing stuff for "nan".  In the above we just
 916                      * check that if we got the C99-style "nan(...)",
 917                      * the "..."  looks sane.
 918                      * If in future we accept more ways of specifying
 919                      * the nan payload, the accepting would happen around
 920                      * here. */
 921                     flags |= IS_NUMBER_TRAILING;
 922                 }
 923             }
 924             s = send;
 925         }
 926         else
 927             return 0;
 928     }
 929
 930     while (s < send && isSPACE(*s))
 931         s++;
 932
 933 #else
 934     PERL_UNUSED_ARG(send);
 935 #endif /* #if defined(NV_INF) || defined(NV_NAN) */
 936     *sp = s;
 937     return flags;
 938 }
 939
 940 /*
 941 =for apidoc grok_number_flags
 942
 943 Recognise (or not) a number.  The type of the number is returned
 944 (0 if unrecognised), otherwise it is a bit-ORed combination of
 945 C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
 946 C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
 947
 948 If the value of the number can fit in a UV, it is returned in C<*valuep>.
 949 C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
 950 will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
 951 to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
 952 If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
 953 C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
 954
 955 C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
 956 seen (in which case C<*valuep> gives the true value truncated to an integer), and
 957 C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
 958 absolute value).  C<IS_NUMBER_IN_UV> is not set if e notation was used or the
 959 number is larger than a UV.
 960
 961 C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
 962 non-numeric text on an otherwise successful I<grok>, setting
 963 C<IS_NUMBER_TRAILING> on the result.
 964
 965 =for apidoc grok_number
 966
 967 Identical to C<grok_number_flags()> with C<flags> set to zero.
 968
 969 =cut
 970  */
 971 int
 972 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 973 {
 974     PERL_ARGS_ASSERT_GROK_NUMBER;
 975
 976     return grok_number_flags(pv, len, valuep, 0);
 977 }
 978
 979 static const UV uv_max_div_10 = UV_MAX / 10;
 980 static const U8 uv_max_mod_10 = UV_MAX % 10;
 981
 982 int
 983 Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
 984 {
 985   const char *s = pv;
 986   const char * const send = pv + len;
 987   const char *d;
 988   int numtype = 0;
 989
 990   PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
 991
 992   while (s < send && isSPACE(*s))
 993     s++;
 994   if (s == send) {
 995     return 0;
 996   } else if (*s == '-') {
 997     s++;
 998     numtype = IS_NUMBER_NEG;
 999   }
1000   else if (*s == '+')
1001     s++;
1002
1003   if (s == send)
1004     return 0;
1005
1006   /* The first digit (after optional sign): note that might
1007    * also point to "infinity" or "nan", or "1.#INF". */
1008   d = s;
1009
1010   /* next must be digit or the radix separator or beginning of infinity/nan */
1011   if (isDIGIT(*s)) {
1012     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
1013        overflow.  */
1014     UV value = *s - '0';
1015     /* This construction seems to be more optimiser friendly.
1016        (without it gcc does the isDIGIT test and the *s - '0' separately)
1017        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
1018        In theory the optimiser could deduce how far to unroll the loop
1019        before checking for overflow.  */
1020     if (++s < send) {
1021       int digit = *s - '0';
1022       if (inRANGE(digit, 0, 9)) {
1023         value = value * 10 + digit;
1024         if (++s < send) {
1025           digit = *s - '0';
1026           if (inRANGE(digit, 0, 9)) {
1027             value = value * 10 + digit;
1028             if (++s < send) {
1029               digit = *s - '0';
1030               if (inRANGE(digit, 0, 9)) {
1031                 value = value * 10 + digit;
1032                 if (++s < send) {
1033                   digit = *s - '0';
1034                   if (inRANGE(digit, 0, 9)) {
1035                     value = value * 10 + digit;
1036                     if (++s < send) {
1037                       digit = *s - '0';
1038                       if (inRANGE(digit, 0, 9)) {
1039                         value = value * 10 + digit;
1040                         if (++s < send) {
1041                           digit = *s - '0';
1042                           if (inRANGE(digit, 0, 9)) {
1043                             value = value * 10 + digit;
1044                             if (++s < send) {
1045                               digit = *s - '0';
1046                               if (inRANGE(digit, 0, 9)) {
1047                                 value = value * 10 + digit;
1048                                 if (++s < send) {
1049                                   digit = *s - '0';
1050                                   if (inRANGE(digit, 0, 9)) {
1051                                     value = value * 10 + digit;
1052                                     if (++s < send) {
1053                                       /* Now got 9 digits, so need to check
1054                                          each time for overflow.  */
1055                                       digit = *s - '0';
1056                                       while (    inRANGE(digit, 0, 9)
1057                                              && (value < uv_max_div_10
1058                                                  || (value == uv_max_div_10
1059                                                      && digit <= uv_max_mod_10))) {
1060                                         value = value * 10 + digit;
1061                                         if (++s < send)
1062                                           digit = *s - '0';
1063                                         else
1064                                           break;
1065                                       }
1066                                       if (inRANGE(digit, 0, 9)
1067                                           && (s < send)) {
1068                                         /* value overflowed.
1069                                            skip the remaining digits, don't
1070                                            worry about setting *valuep.  */
1071                                         do {
1072                                           s++;
1073                                         } while (s < send && isDIGIT(*s));
1074                                         numtype |=
1075                                           IS_NUMBER_GREATER_THAN_UV_MAX;
1076                                         goto skip_value;
1077                                       }
1078                                     }
1079                                   }
1080                                 }
1081                               }
1082                             }
1083                           }
1084                         }
1085                       }
1086                     }
1087                   }
1088                 }
1089               }
1090             }
1091           }
1092         }
1093       }
1094     }
1095     numtype |= IS_NUMBER_IN_UV;
1096     if (valuep)
1097       *valuep = value;
1098
1099   skip_value:
1100     if (GROK_NUMERIC_RADIX(&s, send)) {
1101       numtype |= IS_NUMBER_NOT_INT;
1102       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
1103         s++;
1104     }
1105   }
1106   else if (GROK_NUMERIC_RADIX(&s, send)) {
1107     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1108     /* no digits before the radix means we need digits after it */
1109     if (s < send && isDIGIT(*s)) {
1110       do {
1111         s++;
1112       } while (s < send && isDIGIT(*s));
1113       if (valuep) {
1114         /* integer approximation is valid - it's 0.  */
1115         *valuep = 0;
1116       }
1117     }
1118     else
1119         return 0;
1120   }
1121
1122   if (s > d && s < send) {
1123     /* we can have an optional exponent part */
1124     if (isALPHA_FOLD_EQ(*s, 'e')) {
1125       s++;
1126       if (s < send && (*s == '-' || *s == '+'))
1127         s++;
1128       if (s < send && isDIGIT(*s)) {
1129         do {
1130           s++;
1131         } while (s < send && isDIGIT(*s));
1132       }
1133       else if (flags & PERL_SCAN_TRAILING)
1134         return numtype | IS_NUMBER_TRAILING;
1135       else
1136         return 0;
1137
1138       /* The only flag we keep is sign.  Blow away any "it's UV"  */
1139       numtype &= IS_NUMBER_NEG;
1140       numtype |= IS_NUMBER_NOT_INT;
1141     }
1142   }
1143   while (s < send && isSPACE(*s))
1144     s++;
1145   if (s >= send)
1146     return numtype;
1147   if (memEQs(pv, len, "0 but true")) {
1148     if (valuep)
1149       *valuep = 0;
1150     return IS_NUMBER_IN_UV;
1151   }
1152   /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
1153   if ((s + 2 < send) && memCHRs("inqs#", toFOLD(*s))) {
1154       /* Really detect inf/nan. Start at d, not s, since the above
1155        * code might have already consumed the "1." or "1". */
1156       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
1157       if ((infnan & IS_NUMBER_INFINITY)) {
1158           return (numtype | infnan); /* Keep sign for infinity. */
1159       }
1160       else if ((infnan & IS_NUMBER_NAN)) {
1161           return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1162       }
1163   }
1164   else if (flags & PERL_SCAN_TRAILING) {
1165     return numtype | IS_NUMBER_TRAILING;
1166   }
1167
1168   return 0;
1169 }
1170
1171 /*
1172 =for apidoc grok_atoUV
1173
1174 parse a string, looking for a decimal unsigned integer.
1175
1176 On entry, C<pv> points to the beginning of the string;
1177 C<valptr> points to a UV that will receive the converted value, if found;
1178 C<endptr> is either NULL or points to a variable that points to one byte
1179 beyond the point in C<pv> that this routine should examine.
1180 If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
1181
1182 Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1183 no leading zeros).  Otherwise it returns TRUE, and sets C<*valptr> to that
1184 value.
1185
1186 If you constrain the portion of C<pv> that is looked at by this function (by
1187 passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1188 valid value, it will return TRUE, setting C<*endptr> to the byte following the
1189 final digit of the value.  But if there is no constraint at what's looked at,
1190 all of C<pv> must be valid in order for TRUE to be returned.
1191
1192 The only characters this accepts are the decimal digits '0'..'9'.
1193
1194 As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1195 leading whitespace, nor negative inputs.  If such features are required, the
1196 calling code needs to explicitly implement those.
1197
1198 Note that this function returns FALSE for inputs that would overflow a UV,
1199 or have leading zeros.  Thus a single C<0> is accepted, but not C<00> nor
1200 C<01>, C<002>, I<etc>.
1201
1202 Background: C<atoi> has severe problems with illegal inputs, it cannot be
1203 used for incremental parsing, and therefore should be avoided
1204 C<atoi> and C<strtol> are also affected by locale settings, which can also be
1205 seen as a bug (global state controlled by user environment).
1206
1207 =cut
1208
1209 */
1210
1211 bool
1212 Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
1213 {
1214     const char* s = pv;
1215     const char** eptr;
1216     const char* end2; /* Used in case endptr is NULL. */
1217     UV val = 0; /* The parsed value. */
1218
1219     PERL_ARGS_ASSERT_GROK_ATOUV;
1220
1221     if (endptr) {
1222         eptr = endptr;
1223     }
1224     else {
1225         end2 = s + strlen(s);
1226         eptr = &end2;
1227     }
1228
1229     if (   *eptr <= s
1230         || ! isDIGIT(*s))
1231     {
1232         return FALSE;
1233     }
1234
1235     /* Single-digit inputs are quite common. */
1236     val = *s++ - '0';
1237     if (s < *eptr && isDIGIT(*s)) {
1238         /* Fail on extra leading zeros. */
1239         if (val == 0)
1240             return FALSE;
1241         while (s < *eptr && isDIGIT(*s)) {
1242             /* This could be unrolled like in grok_number(), but
1243                 * the expected uses of this are not speed-needy, and
1244                 * unlikely to need full 64-bitness. */
1245             const U8 digit = *s++ - '0';
1246             if (val < uv_max_div_10 ||
1247                 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1248                 val = val * 10 + digit;
1249             } else {
1250                 return FALSE;
1251             }
1252         }
1253     }
1254
1255     if (endptr == NULL) {
1256         if (*s) {
1257             return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1258         }
1259     }
1260     else {
1261         *endptr = s;
1262     }
1263
1264     *valptr = val;
1265     return TRUE;
1266 }
1267
1268 #ifndef Perl_strtod
1269 STATIC NV
1270 S_mulexp10(NV value, I32 exponent)
1271 {
1272     NV result = 1.0;
1273     NV power = 10.0;
1274     bool negative = 0;
1275     I32 bit;
1276
1277     if (exponent == 0)
1278         return value;
1279     if (value == 0)
1280         return (NV)0;
1281
1282     /* On OpenVMS VAX we by default use the D_FLOAT double format,
1283      * and that format does not have *easy* capabilities [1] for
1284      * overflowing doubles 'silently' as IEEE fp does.  We also need
1285      * to support G_FLOAT on both VAX and Alpha, and though the exponent
1286      * range is much larger than D_FLOAT it still doesn't do silent
1287      * overflow.  Therefore we need to detect early whether we would
1288      * overflow (this is the behaviour of the native string-to-float
1289      * conversion routines, and therefore of native applications, too).
1290      *
1291      * [1] Trying to establish a condition handler to trap floating point
1292      *     exceptions is not a good idea. */
1293
1294     /* In UNICOS and in certain Cray models (such as T90) there is no
1295      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1296      * There is something you can do if you are willing to use some
1297      * inline assembler: the instruction is called DFI-- but that will
1298      * disable *all* floating point interrupts, a little bit too large
1299      * a hammer.  Therefore we need to catch potential overflows before
1300      * it's too late. */
1301
1302 #if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
1303     STMT_START {
1304         const NV exp_v = log10(value);
1305         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1306             return NV_MAX;
1307         if (exponent < 0) {
1308             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1309                 return 0.0;
1310             while (-exponent >= NV_MAX_10_EXP) {
1311                 /* combination does not overflow, but 10^(-exponent) does */
1312                 value /= 10;
1313                 ++exponent;
1314             }
1315         }
1316     } STMT_END;
1317 #endif
1318
1319     if (exponent < 0) {
1320         negative = 1;
1321         exponent = -exponent;
1322 #ifdef NV_MAX_10_EXP
1323         /* for something like 1234 x 10^-309, the action of calculating
1324          * the intermediate value 10^309 then returning 1234 / (10^309)
1325          * will fail, since 10^309 becomes infinity. In this case try to
1326          * refactor it as 123 / (10^308) etc.
1327          */
1328         while (value && exponent > NV_MAX_10_EXP) {
1329             exponent--;
1330             value /= 10;
1331         }
1332         if (value == 0.0)
1333             return value;
1334 #endif
1335     }
1336 #if defined(__osf__)
1337     /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1338      * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1339      * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1340      * but that breaks another set of infnan.t tests. */
1341 #  define FP_OVERFLOWS_TO_ZERO
1342 #endif
1343     for (bit = 1; exponent; bit <<= 1) {
1344         if (exponent & bit) {
1345             exponent ^= bit;
1346             result *= power;
1347 #ifdef FP_OVERFLOWS_TO_ZERO
1348             if (result == 0)
1349 # ifdef NV_INF
1350                 return value < 0 ? -NV_INF : NV_INF;
1351 # else
1352                 return value < 0 ? -FLT_MAX : FLT_MAX;
1353 # endif
1354 #endif
1355             /* Floating point exceptions are supposed to be turned off,
1356              *  but if we're obviously done, don't risk another iteration.
1357              */
1358              if (exponent == 0) break;
1359         }
1360         power *= power;
1361     }
1362     return negative ? value / result : value * result;
1363 }
1364 #endif /* #ifndef Perl_strtod */
1365
1366 #ifdef Perl_strtod
1367 #  define ATOF(s, x) my_atof2(s, &x)
1368 #else
1369 #  define ATOF(s, x) Perl_atof2(s, x)
1370 #endif
1371
1372 NV
1373 Perl_my_atof(pTHX_ const char* s)
1374 {
1375     /* 's' must be NUL terminated */
1376
1377     NV x = 0.0;
1378
1379     PERL_ARGS_ASSERT_MY_ATOF;
1380
1381 #if ! defined(USE_LOCALE_NUMERIC)
1382
1383     ATOF(s, x);
1384
1385 #else
1386
1387     {
1388         DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1389         STORE_LC_NUMERIC_SET_TO_NEEDED();
1390         if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1391             ATOF(s,x);
1392         }
1393         else {
1394
1395             /* Look through the string for the first thing that looks like a
1396              * decimal point: either the value in the current locale or the
1397              * standard fallback of '.'. The one which appears earliest in the
1398              * input string is the one that we should have atof look for. Note
1399              * that we have to determine this beforehand because on some
1400              * systems, Perl_atof2 is just a wrapper around the system's atof.
1401              * */
1402             const char * const standard_pos = strchr(s, '.');
1403             const char * const local_pos
1404                                   = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1405             const bool use_standard_radix
1406                     = standard_pos && (!local_pos || standard_pos < local_pos);
1407
1408             if (use_standard_radix) {
1409                 SET_NUMERIC_STANDARD();
1410                 LOCK_LC_NUMERIC_STANDARD();
1411             }
1412
1413             ATOF(s,x);
1414
1415             if (use_standard_radix) {
1416                 UNLOCK_LC_NUMERIC_STANDARD();
1417                 SET_NUMERIC_UNDERLYING();
1418             }
1419         }
1420         RESTORE_LC_NUMERIC();
1421     }
1422
1423 #endif
1424
1425     return x;
1426 }
1427
1428 #if defined(NV_INF) || defined(NV_NAN)
1429
1430 static char*
1431 S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
1432 {
1433     const char *p0 = negative ? s - 1 : s;
1434     const char *p = p0;
1435     const int infnan = grok_infnan(&p, send);
1436     if (infnan && p != p0) {
1437         /* If we can generate inf/nan directly, let's do so. */
1438 #ifdef NV_INF
1439         if ((infnan & IS_NUMBER_INFINITY)) {
1440             *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
1441             return (char*)p;
1442         }
1443 #endif
1444 #ifdef NV_NAN
1445         if ((infnan & IS_NUMBER_NAN)) {
1446             *value = NV_NAN;
1447             return (char*)p;
1448         }
1449 #endif
1450 #ifdef Perl_strtod
1451         /* If still here, we didn't have either NV_INF or NV_NAN,
1452          * and can try falling back to native strtod/strtold.
1453          *
1454          * The native interface might not recognize all the possible
1455          * inf/nan strings Perl recognizes.  What we can try
1456          * is to try faking the input.  We will try inf/-inf/nan
1457          * as the most promising/portable input. */
1458         {
1459             const char* fake = "silence compiler warning";
1460             char* endp;
1461             NV nv;
1462 #ifdef NV_INF
1463             if ((infnan & IS_NUMBER_INFINITY)) {
1464                 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1465             }
1466 #endif
1467 #ifdef NV_NAN
1468             if ((infnan & IS_NUMBER_NAN)) {
1469                 fake = "nan";
1470             }
1471 #endif
1472             assert(strNE(fake, "silence compiler warning"));
1473             nv = S_strtod(aTHX_ fake, &endp);
1474             if (fake != endp) {
1475 #ifdef NV_INF
1476                 if ((infnan & IS_NUMBER_INFINITY)) {
1477 #  ifdef Perl_isinf
1478                     if (Perl_isinf(nv))
1479                         *value = nv;
1480 #  else
1481                     /* last resort, may generate SIGFPE */
1482                     *value = Perl_exp((NV)1e9);
1483                     if ((infnan & IS_NUMBER_NEG))
1484                         *value = -*value;
1485 #  endif
1486                     return (char*)p; /* p, not endp */
1487                 }
1488 #endif
1489 #ifdef NV_NAN
1490                 if ((infnan & IS_NUMBER_NAN)) {
1491 #  ifdef Perl_isnan
1492                     if (Perl_isnan(nv))
1493                         *value = nv;
1494 #  else
1495                     /* last resort, may generate SIGFPE */
1496                     *value = Perl_log((NV)-1.0);
1497 #  endif
1498                     return (char*)p; /* p, not endp */
1499 #endif
1500                 }
1501             }
1502         }
1503 #endif /* #ifdef Perl_strtod */
1504     }
1505     return NULL;
1506 }
1507
1508 #endif /* if defined(NV_INF) || defined(NV_NAN) */
1509
1510 char*
1511 Perl_my_atof2(pTHX_ const char* orig, NV* value)
1512 {
1513     PERL_ARGS_ASSERT_MY_ATOF2;
1514     return my_atof3(orig, value, 0);
1515 }
1516
1517 char*
1518 Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
1519 {
1520     const char* s = orig;
1521     NV result[3] = {0.0, 0.0, 0.0};
1522 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1523     const char* send = s + ((len != 0)
1524                            ? len
1525                            : strlen(orig)); /* one past the last */
1526     bool negative = 0;
1527 #endif
1528 #if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
1529     UV accumulator[2] = {0,0};  /* before/after dp */
1530     bool seen_digit = 0;
1531     I32 exp_adjust[2] = {0,0};
1532     I32 exp_acc[2] = {-1, -1};
1533     /* the current exponent adjust for the accumulators */
1534     I32 exponent = 0;
1535     I32 seen_dp  = 0;
1536     I32 digit = 0;
1537     I32 old_digit = 0;
1538     I32 sig_digits = 0; /* noof significant digits seen so far */
1539 #endif
1540
1541 #if defined(USE_PERL_ATOF) || defined(Perl_strtod)
1542     PERL_ARGS_ASSERT_MY_ATOF3;
1543
1544     /* leading whitespace */
1545     while (s < send && isSPACE(*s))
1546         ++s;
1547
1548     /* sign */
1549     switch (*s) {
1550         case '-':
1551             negative = 1;
1552             /* FALLTHROUGH */
1553         case '+':
1554             ++s;
1555     }
1556 #endif
1557
1558 #ifdef Perl_strtod
1559     {
1560         char* endp;
1561         char* copy = NULL;
1562
1563         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1564             return endp;
1565
1566         /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1567            0b-prefixed binary numbers, which is backward incompatible
1568         */
1569         if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
1570             (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1571             *value = 0;
1572             return (char *)s+1;
1573         }
1574
1575         /* If the length is passed in, the input string isn't NUL-terminated,
1576          * and in it turns out the function below assumes it is; therefore we
1577          * create a copy and NUL-terminate that */
1578         if (len) {
1579             Newx(copy, len + 1, char);
1580             Copy(orig, copy, len, char);
1581             copy[len] = '\0';
1582             s = copy + (s - orig);
1583         }
1584
1585         result[2] = S_strtod(aTHX_ s, &endp);
1586
1587         /* If we created a copy, 'endp' is in terms of that.  Convert back to
1588          * the original */
1589         if (copy) {
1590             s = (s - copy) + (char *) orig;
1591             endp = (endp - copy) + (char *) orig;
1592             Safefree(copy);
1593         }
1594
1595         if (s != endp) {
1596             *value = negative ? -result[2] : result[2];
1597             return endp;
1598         }
1599         return NULL;
1600     }
1601 #elif defined(USE_PERL_ATOF)
1602
1603 /* There is no point in processing more significant digits
1604  * than the NV can hold. Note that NV_DIG is a lower-bound value,
1605  * while we need an upper-bound value. We add 2 to account for this;
1606  * since it will have been conservative on both the first and last digit.
1607  * For example a 32-bit mantissa with an exponent of 4 would have
1608  * exact values in the set
1609  *               4
1610  *               8
1611  *              ..
1612  *     17179869172
1613  *     17179869176
1614  *     17179869180
1615  *
1616  * where for the purposes of calculating NV_DIG we would have to discount
1617  * both the first and last digit, since neither can hold all values from
1618  * 0..9; but for calculating the value we must examine those two digits.
1619  */
1620 #ifdef MAX_SIG_DIG_PLUS
1621     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1622        possible digits in a NV, especially if NVs are not IEEE compliant
1623        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1624 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1625 #else
1626 # define MAX_SIG_DIGITS (NV_DIG+2)
1627 #endif
1628
1629 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1630 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
1631
1632 #if defined(NV_INF) || defined(NV_NAN)
1633     {
1634         char* endp;
1635         if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
1636             return endp;
1637     }
1638 #endif
1639
1640     /* we accumulate digits into an integer; when this becomes too
1641      * large, we add the total to NV and start again */
1642
1643     while (s < send) {
1644         if (isDIGIT(*s)) {
1645             seen_digit = 1;
1646             old_digit = digit;
1647             digit = *s++ - '0';
1648             if (seen_dp)
1649                 exp_adjust[1]++;
1650
1651             /* don't start counting until we see the first significant
1652              * digit, eg the 5 in 0.00005... */
1653             if (!sig_digits && digit == 0)
1654                 continue;
1655
1656             if (++sig_digits > MAX_SIG_DIGITS) {
1657                 /* limits of precision reached */
1658                 if (digit > 5) {
1659                     ++accumulator[seen_dp];
1660                 } else if (digit == 5) {
1661                     if (old_digit % 2) { /* round to even - Allen */
1662                         ++accumulator[seen_dp];
1663                     }
1664                 }
1665                 if (seen_dp) {
1666                     exp_adjust[1]--;
1667                 } else {
1668                     exp_adjust[0]++;
1669                 }
1670                 /* skip remaining digits */
1671                 while (s < send && isDIGIT(*s)) {
1672                     ++s;
1673                     if (! seen_dp) {
1674                         exp_adjust[0]++;
1675                     }
1676                 }
1677                 /* warn of loss of precision? */
1678             }
1679             else {
1680                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1681                     /* add accumulator to result and start again */
1682                     result[seen_dp] = S_mulexp10(result[seen_dp],
1683                                                  exp_acc[seen_dp])
1684                         + (NV)accumulator[seen_dp];
1685                     accumulator[seen_dp] = 0;
1686                     exp_acc[seen_dp] = 0;
1687                 }
1688                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1689                 ++exp_acc[seen_dp];
1690             }
1691         }
1692         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1693             seen_dp = 1;
1694             if (sig_digits > MAX_SIG_DIGITS) {
1695                 while (s < send && isDIGIT(*s)) {
1696                     ++s;
1697                 }
1698                 break;
1699             }
1700         }
1701         else {
1702             break;
1703         }
1704     }
1705
1706     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1707     if (seen_dp) {
1708         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1709     }
1710
1711     if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
1712         bool expnegative = 0;
1713
1714         ++s;
1715         switch (*s) {
1716             case '-':
1717                 expnegative = 1;
1718                 /* FALLTHROUGH */
1719             case '+':
1720                 ++s;
1721         }
1722         while (s < send && isDIGIT(*s))
1723             exponent = exponent * 10 + (*s++ - '0');
1724         if (expnegative)
1725             exponent = -exponent;
1726     }
1727
1728     /* now apply the exponent */
1729
1730     if (seen_dp) {
1731         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1732                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1733     } else {
1734         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1735     }
1736
1737     /* now apply the sign */
1738     if (negative)
1739         result[2] = -result[2];
1740 #endif /* USE_PERL_ATOF */
1741     *value = result[2];
1742     return (char *)s;
1743 }
1744
1745 /*
1746 =for apidoc isinfnan
1747
1748 C<Perl_isinfnan()> is utility function that returns true if the NV
1749 argument is either an infinity or a C<NaN>, false otherwise.  To test
1750 in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
1751
1752 This is also the logical inverse of Perl_isfinite().
1753
1754 =cut
1755 */
1756 bool
1757 Perl_isinfnan(NV nv)
1758 {
1759   PERL_UNUSED_ARG(nv);
1760 #ifdef Perl_isinf
1761     if (Perl_isinf(nv))
1762         return TRUE;
1763 #endif
1764 #ifdef Perl_isnan
1765     if (Perl_isnan(nv))
1766         return TRUE;
1767 #endif
1768     return FALSE;
1769 }
1770
1771 /*
1772 =for apidoc isinfnansv
1773
1774 Checks whether the argument would be either an infinity or C<NaN> when used
1775 as a number, but is careful not to trigger non-numeric or uninitialized
1776 warnings.  it assumes the caller has done C<SvGETMAGIC(sv)> already.
1777
1778 =cut
1779 */
1780
1781 bool
1782 Perl_isinfnansv(pTHX_ SV *sv)
1783 {
1784     PERL_ARGS_ASSERT_ISINFNANSV;
1785     if (!SvOK(sv))
1786         return FALSE;
1787     if (SvNOKp(sv))
1788         return Perl_isinfnan(SvNVX(sv));
1789     if (SvIOKp(sv))
1790         return FALSE;
1791     {
1792         STRLEN len;
1793         const char *s = SvPV_nomg_const(sv, len);
1794         return cBOOL(grok_infnan(&s, s+len));
1795     }
1796 }
1797
1798 #ifndef HAS_MODFL
1799 /* C99 has truncl, pre-C99 Solaris had aintl.  We can use either with
1800  * copysignl to emulate modfl, which is in some platforms missing or
1801  * broken. */
1802 #  if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1803 long double
1804 Perl_my_modfl(long double x, long double *ip)
1805 {
1806     *ip = truncl(x);
1807     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1808 }
1809 #  elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1810 long double
1811 Perl_my_modfl(long double x, long double *ip)
1812 {
1813     *ip = aintl(x);
1814     return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1815 }
1816 #  endif
1817 #endif
1818
1819 /* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
1820 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1821 long double
1822 Perl_my_frexpl(long double x, int *e) {
1823     *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1824     return (scalbnl(x, -*e));
1825 }
1826 #endif
1827
1828 /*
1829 =for apidoc Perl_signbit
1830
1831 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1832 it is not.
1833
1834 If F<Configure> detects this system has a C<signbit()> that will work with
1835 our NVs, then we just use it via the C<#define> in F<perl.h>.  Otherwise,
1836 fall back on this implementation.  The main use of this function
1837 is catching C<-0.0>.
1838
1839 C<Configure> notes:  This function is called C<'Perl_signbit'> instead of a
1840 plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
1841 function or macro that doesn't happen to work with our particular choice
1842 of NVs.  We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
1843 the standard system headers to be happy.  Also, this is a no-context
1844 function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1845 F<perl.h> as a simple macro call to the system's C<signbit()>.
1846 Users should just always call C<Perl_signbit()>.
1847
1848 =cut
1849 */
1850 #if !defined(HAS_SIGNBIT)
1851 int
1852 Perl_signbit(NV x) {
1853 #  ifdef Perl_fp_class_nzero
1854     return Perl_fp_class_nzero(x);
1855     /* Try finding the high byte, and assume it's highest bit
1856      * is the sign.  This assumption is probably wrong somewhere. */
1857 #  elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1858     return (((unsigned char *)&x)[9] & 0x80);
1859 #  elif defined(NV_LITTLE_ENDIAN)
1860     /* Note that NVSIZE is sizeof(NV), which would make the below be
1861      * wrong if the end bytes are unused, which happens with the x86
1862      * 80-bit long doubles, which is why take care of that above. */
1863     return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1864 #  elif defined(NV_BIG_ENDIAN)
1865     return (((unsigned char *)&x)[0] & 0x80);
1866 #  else
1867     /* This last resort fallback is wrong for the negative zero. */
1868     return (x < 0.0) ? 1 : 0;
1869 #  endif
1870 }
1871 #endif
1872
1873 /*
1874  * ex: set ts=8 sts=4 sw=4 et:
1875  */