numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen,
  13  *  unless wizards count differently to other people."  --Beorn
  14  *
  15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
  16  */
  17
  18 /*
  19 =head1 Numeric functions
  20
  21 This file contains all the stuff needed by perl for manipulating numeric
  22 values, including such things as replacements for the OS's atof() function
  23
  24 =cut
  25
  26 */
  27
  28 #include "EXTERN.h"
  29 #define PERL_IN_NUMERIC_C
  30 #include "perl.h"
  31
  32 U32
  33 Perl_cast_ulong(pTHX_ NV f)
  34 {
  35     PERL_UNUSED_CONTEXT;
  36   if (f < 0.0)
  37     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
  38   if (f < U32_MAX_P1) {
  39 #if CASTFLAGS & 2
  40     if (f < U32_MAX_P1_HALF)
  41       return (U32) f;
  42     f -= U32_MAX_P1_HALF;
  43     return ((U32) f) | (1 + U32_MAX >> 1);
  44 #else
  45     return (U32) f;
  46 #endif
  47   }
  48   return f > 0 ? U32_MAX : 0 /* NaN */;
  49 }
  50
  51 I32
  52 Perl_cast_i32(pTHX_ NV f)
  53 {
  54     PERL_UNUSED_CONTEXT;
  55   if (f < I32_MAX_P1)
  56     return f < I32_MIN ? I32_MIN : (I32) f;
  57   if (f < U32_MAX_P1) {
  58 #if CASTFLAGS & 2
  59     if (f < U32_MAX_P1_HALF)
  60       return (I32)(U32) f;
  61     f -= U32_MAX_P1_HALF;
  62     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
  63 #else
  64     return (I32)(U32) f;
  65 #endif
  66   }
  67   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
  68 }
  69
  70 IV
  71 Perl_cast_iv(pTHX_ NV f)
  72 {
  73     PERL_UNUSED_CONTEXT;
  74   if (f < IV_MAX_P1)
  75     return f < IV_MIN ? IV_MIN : (IV) f;
  76   if (f < UV_MAX_P1) {
  77 #if CASTFLAGS & 2
  78     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
  79     if (f < UV_MAX_P1_HALF)
  80       return (IV)(UV) f;
  81     f -= UV_MAX_P1_HALF;
  82     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
  83 #else
  84     return (IV)(UV) f;
  85 #endif
  86   }
  87   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
  88 }
  89
  90 UV
  91 Perl_cast_uv(pTHX_ NV f)
  92 {
  93     PERL_UNUSED_CONTEXT;
  94   if (f < 0.0)
  95     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
  96   if (f < UV_MAX_P1) {
  97 #if CASTFLAGS & 2
  98     if (f < UV_MAX_P1_HALF)
  99       return (UV) f;
 100     f -= UV_MAX_P1_HALF;
 101     return ((UV) f) | (1 + UV_MAX >> 1);
 102 #else
 103     return (UV) f;
 104 #endif
 105   }
 106   return f > 0 ? UV_MAX : 0 /* NaN */;
 107 }
 108
 109 /*
 110 =for apidoc grok_bin
 111
 112 converts a string representing a binary number to numeric form.
 113
 114 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 115 conversion flags, and I<result> should be NULL or a pointer to an NV.
 116 The scan stops at the end of the string, or the first invalid character.
 117 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 118 invalid character will also trigger a warning.
 119 On return I<*len> is set to the length of the scanned string,
 120 and I<*flags> gives output flags.
 121
 122 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 123 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
 124 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 125 and writes the value to I<*result> (or the value is discarded if I<result>
 126 is NULL).
 127
 128 The binary number may optionally be prefixed with "0b" or "b" unless
 129 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 130 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
 131 number may use '_' characters to separate digits.
 132
 133 =cut
 134
 135 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 136 which suppresses any message for non-portable numbers that are still valid
 137 on this platform.
 138  */
 139
 140 UV
 141 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 142 {
 143     const char *s = start;
 144     STRLEN len = *len_p;
 145     UV value = 0;
 146     NV value_nv = 0;
 147
 148     const UV max_div_2 = UV_MAX / 2;
 149     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 150     bool overflowed = FALSE;
 151     char bit;
 152
 153     PERL_ARGS_ASSERT_GROK_BIN;
 154
 155     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 156         /* strip off leading b or 0b.
 157            for compatibility silently suffer "b" and "0b" as valid binary
 158            numbers. */
 159         if (len >= 1) {
 160             if (s[0] == 'b' || s[0] == 'B') {
 161                 s++;
 162                 len--;
 163             }
 164             else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
 165                 s+=2;
 166                 len-=2;
 167             }
 168         }
 169     }
 170
 171     for (; len-- && (bit = *s); s++) {
 172         if (bit == '0' || bit == '1') {
 173             /* Write it in this wonky order with a goto to attempt to get the
 174                compiler to make the common case integer-only loop pretty tight.
 175                With gcc seems to be much straighter code than old scan_bin.  */
 176           redo:
 177             if (!overflowed) {
 178                 if (value <= max_div_2) {
 179                     value = (value << 1) | (bit - '0');
 180                     continue;
 181                 }
 182                 /* Bah. We're just overflowed.  */
 183                 /* diag_listed_as: Integer overflow in %s number */
 184                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 185                                  "Integer overflow in binary number");
 186                 overflowed = TRUE;
 187                 value_nv = (NV) value;
 188             }
 189             value_nv *= 2.0;
 190             /* If an NV has not enough bits in its mantissa to
 191              * represent a UV this summing of small low-order numbers
 192              * is a waste of time (because the NV cannot preserve
 193              * the low-order bits anyway): we could just remember when
 194              * did we overflow and in the end just multiply value_nv by the
 195              * right amount. */
 196             value_nv += (NV)(bit - '0');
 197             continue;
 198         }
 199         if (bit == '_' && len && allow_underscores && (bit = s[1])
 200             && (bit == '0' || bit == '1'))
 201             {
 202                 --len;
 203                 ++s;
 204                 goto redo;
 205             }
 206         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 207             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 208                            "Illegal binary digit '%c' ignored", *s);
 209         break;
 210     }
 211
 212     if (   ( overflowed && value_nv > 4294967295.0)
 213 #if UVSIZE > 4
 214         || (!overflowed && value > 0xffffffff
 215             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 216 #endif
 217         ) {
 218         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 219                        "Binary number > 0b11111111111111111111111111111111 non-portable");
 220     }
 221     *len_p = s - start;
 222     if (!overflowed) {
 223         *flags = 0;
 224         return value;
 225     }
 226     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 227     if (result)
 228         *result = value_nv;
 229     return UV_MAX;
 230 }
 231
 232 /*
 233 =for apidoc grok_hex
 234
 235 converts a string representing a hex number to numeric form.
 236
 237 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 238 conversion flags, and I<result> should be NULL or a pointer to an NV.
 239 The scan stops at the end of the string, or the first invalid character.
 240 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 241 invalid character will also trigger a warning.
 242 On return I<*len> is set to the length of the scanned string,
 243 and I<*flags> gives output flags.
 244
 245 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 246 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
 247 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 248 and writes the value to I<*result> (or the value is discarded if I<result>
 249 is NULL).
 250
 251 The hex number may optionally be prefixed with "0x" or "x" unless
 252 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 253 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
 254 number may use '_' characters to separate digits.
 255
 256 =cut
 257
 258 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
 259 which suppresses any message for non-portable numbers that are still valid
 260 on this platform.
 261  */
 262
 263 UV
 264 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 265 {
 266     dVAR;
 267     const char *s = start;
 268     STRLEN len = *len_p;
 269     UV value = 0;
 270     NV value_nv = 0;
 271     const UV max_div_16 = UV_MAX / 16;
 272     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 273     bool overflowed = FALSE;
 274
 275     PERL_ARGS_ASSERT_GROK_HEX;
 276
 277     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 278         /* strip off leading x or 0x.
 279            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 280         */
 281         if (len >= 1) {
 282             if (s[0] == 'x' || s[0] == 'X') {
 283                 s++;
 284                 len--;
 285             }
 286             else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
 287                 s+=2;
 288                 len-=2;
 289             }
 290         }
 291     }
 292
 293     for (; len-- && *s; s++) {
 294         const char *hexdigit = strchr(PL_hexdigit, *s);
 295         if (hexdigit) {
 296             /* Write it in this wonky order with a goto to attempt to get the
 297                compiler to make the common case integer-only loop pretty tight.
 298                With gcc seems to be much straighter code than old scan_hex.  */
 299           redo:
 300             if (!overflowed) {
 301                 if (value <= max_div_16) {
 302                     value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
 303                     continue;
 304                 }
 305                 /* Bah. We're just overflowed.  */
 306                 /* diag_listed_as: Integer overflow in %s number */
 307                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 308                                  "Integer overflow in hexadecimal number");
 309                 overflowed = TRUE;
 310                 value_nv = (NV) value;
 311             }
 312             value_nv *= 16.0;
 313             /* If an NV has not enough bits in its mantissa to
 314              * represent a UV this summing of small low-order numbers
 315              * is a waste of time (because the NV cannot preserve
 316              * the low-order bits anyway): we could just remember when
 317              * did we overflow and in the end just multiply value_nv by the
 318              * right amount of 16-tuples. */
 319             value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
 320             continue;
 321         }
 322         if (*s == '_' && len && allow_underscores && s[1]
 323                 && (hexdigit = strchr(PL_hexdigit, s[1])))
 324             {
 325                 --len;
 326                 ++s;
 327                 goto redo;
 328             }
 329         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 330             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 331                         "Illegal hexadecimal digit '%c' ignored", *s);
 332         break;
 333     }
 334
 335     if (   ( overflowed && value_nv > 4294967295.0)
 336 #if UVSIZE > 4
 337         || (!overflowed && value > 0xffffffff
 338             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 339 #endif
 340         ) {
 341         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 342                        "Hexadecimal number > 0xffffffff non-portable");
 343     }
 344     *len_p = s - start;
 345     if (!overflowed) {
 346         *flags = 0;
 347         return value;
 348     }
 349     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 350     if (result)
 351         *result = value_nv;
 352     return UV_MAX;
 353 }
 354
 355 /*
 356 =for apidoc grok_oct
 357
 358 converts a string representing an octal number to numeric form.
 359
 360 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 361 conversion flags, and I<result> should be NULL or a pointer to an NV.
 362 The scan stops at the end of the string, or the first invalid character.
 363 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 364 8 or 9 will also trigger a warning.
 365 On return I<*len> is set to the length of the scanned string,
 366 and I<*flags> gives output flags.
 367
 368 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 369 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
 370 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 371 and writes the value to I<*result> (or the value is discarded if I<result>
 372 is NULL).
 373
 374 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
 375 number may use '_' characters to separate digits.
 376
 377 =cut
 378
 379 Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
 380 which suppresses any message for non-portable numbers, but which are valid
 381 on this platform.
 382  */
 383
 384 UV
 385 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 386 {
 387     const char *s = start;
 388     STRLEN len = *len_p;
 389     UV value = 0;
 390     NV value_nv = 0;
 391     const UV max_div_8 = UV_MAX / 8;
 392     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 393     bool overflowed = FALSE;
 394
 395     PERL_ARGS_ASSERT_GROK_OCT;
 396
 397     for (; len-- && *s; s++) {
 398          /* gcc 2.95 optimiser not smart enough to figure that this subtraction
 399             out front allows slicker code.  */
 400         int digit = *s - '0';
 401         if (digit >= 0 && digit <= 7) {
 402             /* Write it in this wonky order with a goto to attempt to get the
 403                compiler to make the common case integer-only loop pretty tight.
 404             */
 405           redo:
 406             if (!overflowed) {
 407                 if (value <= max_div_8) {
 408                     value = (value << 3) | digit;
 409                     continue;
 410                 }
 411                 /* Bah. We're just overflowed.  */
 412                 /* diag_listed_as: Integer overflow in %s number */
 413                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 414                                "Integer overflow in octal number");
 415                 overflowed = TRUE;
 416                 value_nv = (NV) value;
 417             }
 418             value_nv *= 8.0;
 419             /* If an NV has not enough bits in its mantissa to
 420              * represent a UV this summing of small low-order numbers
 421              * is a waste of time (because the NV cannot preserve
 422              * the low-order bits anyway): we could just remember when
 423              * did we overflow and in the end just multiply value_nv by the
 424              * right amount of 8-tuples. */
 425             value_nv += (NV)digit;
 426             continue;
 427         }
 428         if (digit == ('_' - '0') && len && allow_underscores
 429             && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
 430             {
 431                 --len;
 432                 ++s;
 433                 goto redo;
 434             }
 435         /* Allow \octal to work the DWIM way (that is, stop scanning
 436          * as soon as non-octal characters are seen, complain only if
 437          * someone seems to want to use the digits eight and nine). */
 438         if (digit == 8 || digit == 9) {
 439             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 440                 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 441                                "Illegal octal digit '%c' ignored", *s);
 442         }
 443         break;
 444     }
 445
 446     if (   ( overflowed && value_nv > 4294967295.0)
 447 #if UVSIZE > 4
 448         || (!overflowed && value > 0xffffffff
 449             && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
 450 #endif
 451         ) {
 452         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 453                        "Octal number > 037777777777 non-portable");
 454     }
 455     *len_p = s - start;
 456     if (!overflowed) {
 457         *flags = 0;
 458         return value;
 459     }
 460     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 461     if (result)
 462         *result = value_nv;
 463     return UV_MAX;
 464 }
 465
 466 /*
 467 =for apidoc scan_bin
 468
 469 For backwards compatibility. Use C<grok_bin> instead.
 470
 471 =for apidoc scan_hex
 472
 473 For backwards compatibility. Use C<grok_hex> instead.
 474
 475 =for apidoc scan_oct
 476
 477 For backwards compatibility. Use C<grok_oct> instead.
 478
 479 =cut
 480  */
 481
 482 NV
 483 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 484 {
 485     NV rnv;
 486     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 487     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 488
 489     PERL_ARGS_ASSERT_SCAN_BIN;
 490
 491     *retlen = len;
 492     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 493 }
 494
 495 NV
 496 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 497 {
 498     NV rnv;
 499     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 500     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 501
 502     PERL_ARGS_ASSERT_SCAN_OCT;
 503
 504     *retlen = len;
 505     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 506 }
 507
 508 NV
 509 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 510 {
 511     NV rnv;
 512     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 513     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 514
 515     PERL_ARGS_ASSERT_SCAN_HEX;
 516
 517     *retlen = len;
 518     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 519 }
 520
 521 /*
 522 =for apidoc grok_numeric_radix
 523
 524 Scan and skip for a numeric decimal separator (radix).
 525
 526 =cut
 527  */
 528 bool
 529 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 530 {
 531 #ifdef USE_LOCALE_NUMERIC
 532     dVAR;
 533
 534     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 535
 536     if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
 537         STRLEN len;
 538         const char * const radix = SvPV(PL_numeric_radix_sv, len);
 539         if (*sp + len <= send && memEQ(*sp, radix, len)) {
 540             *sp += len;
 541             return TRUE;
 542         }
 543     }
 544     /* always try "." if numeric radix didn't match because
 545      * we may have data from different locales mixed */
 546 #endif
 547
 548     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 549
 550     if (*sp < send && **sp == '.') {
 551         ++*sp;
 552         return TRUE;
 553     }
 554     return FALSE;
 555 }
 556
 557 /*
 558 =for apidoc grok_number
 559
 560 Recognise (or not) a number.  The type of the number is returned
 561 (0 if unrecognised), otherwise it is a bit-ORed combination of
 562 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
 563 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
 564
 565 If the value of the number can fit an in UV, it is returned in the *valuep
 566 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
 567 will never be set unless *valuep is valid, but *valuep may have been assigned
 568 to during processing even though IS_NUMBER_IN_UV is not set on return.
 569 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
 570 valuep is non-NULL, but no actual assignment (or SEGV) will occur.
 571
 572 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
 573 seen (in which case *valuep gives the true value truncated to an integer), and
 574 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
 575 absolute value).  IS_NUMBER_IN_UV is not set if e notation was used or the
 576 number is larger than a UV.
 577
 578 =cut
 579  */
 580 int
 581 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 582 {
 583   const char *s = pv;
 584   const char * const send = pv + len;
 585   const UV max_div_10 = UV_MAX / 10;
 586   const char max_mod_10 = UV_MAX % 10;
 587   int numtype = 0;
 588   int sawinf = 0;
 589   int sawnan = 0;
 590
 591   PERL_ARGS_ASSERT_GROK_NUMBER;
 592
 593   while (s < send && isSPACE(*s))
 594     s++;
 595   if (s == send) {
 596     return 0;
 597   } else if (*s == '-') {
 598     s++;
 599     numtype = IS_NUMBER_NEG;
 600   }
 601   else if (*s == '+')
 602   s++;
 603
 604   if (s == send)
 605     return 0;
 606
 607   /* next must be digit or the radix separator or beginning of infinity */
 608   if (isDIGIT(*s)) {
 609     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 610        overflow.  */
 611     UV value = *s - '0';
 612     /* This construction seems to be more optimiser friendly.
 613        (without it gcc does the isDIGIT test and the *s - '0' separately)
 614        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 615        In theory the optimiser could deduce how far to unroll the loop
 616        before checking for overflow.  */
 617     if (++s < send) {
 618       int digit = *s - '0';
 619       if (digit >= 0 && digit <= 9) {
 620         value = value * 10 + digit;
 621         if (++s < send) {
 622           digit = *s - '0';
 623           if (digit >= 0 && digit <= 9) {
 624             value = value * 10 + digit;
 625             if (++s < send) {
 626               digit = *s - '0';
 627               if (digit >= 0 && digit <= 9) {
 628                 value = value * 10 + digit;
 629                 if (++s < send) {
 630                   digit = *s - '0';
 631                   if (digit >= 0 && digit <= 9) {
 632                     value = value * 10 + digit;
 633                     if (++s < send) {
 634                       digit = *s - '0';
 635                       if (digit >= 0 && digit <= 9) {
 636                         value = value * 10 + digit;
 637                         if (++s < send) {
 638                           digit = *s - '0';
 639                           if (digit >= 0 && digit <= 9) {
 640                             value = value * 10 + digit;
 641                             if (++s < send) {
 642                               digit = *s - '0';
 643                               if (digit >= 0 && digit <= 9) {
 644                                 value = value * 10 + digit;
 645                                 if (++s < send) {
 646                                   digit = *s - '0';
 647                                   if (digit >= 0 && digit <= 9) {
 648                                     value = value * 10 + digit;
 649                                     if (++s < send) {
 650                                       /* Now got 9 digits, so need to check
 651                                          each time for overflow.  */
 652                                       digit = *s - '0';
 653                                       while (digit >= 0 && digit <= 9
 654                                              && (value < max_div_10
 655                                                  || (value == max_div_10
 656                                                      && digit <= max_mod_10))) {
 657                                         value = value * 10 + digit;
 658                                         if (++s < send)
 659                                           digit = *s - '0';
 660                                         else
 661                                           break;
 662                                       }
 663                                       if (digit >= 0 && digit <= 9
 664                                           && (s < send)) {
 665                                         /* value overflowed.
 666                                            skip the remaining digits, don't
 667                                            worry about setting *valuep.  */
 668                                         do {
 669                                           s++;
 670                                         } while (s < send && isDIGIT(*s));
 671                                         numtype |=
 672                                           IS_NUMBER_GREATER_THAN_UV_MAX;
 673                                         goto skip_value;
 674                                       }
 675                                     }
 676                                   }
 677                                 }
 678                               }
 679                             }
 680                           }
 681                         }
 682                       }
 683                     }
 684                   }
 685                 }
 686               }
 687             }
 688           }
 689         }
 690       }
 691     }
 692     numtype |= IS_NUMBER_IN_UV;
 693     if (valuep)
 694       *valuep = value;
 695
 696   skip_value:
 697     if (GROK_NUMERIC_RADIX(&s, send)) {
 698       numtype |= IS_NUMBER_NOT_INT;
 699       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
 700         s++;
 701     }
 702   }
 703   else if (GROK_NUMERIC_RADIX(&s, send)) {
 704     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
 705     /* no digits before the radix means we need digits after it */
 706     if (s < send && isDIGIT(*s)) {
 707       do {
 708         s++;
 709       } while (s < send && isDIGIT(*s));
 710       if (valuep) {
 711         /* integer approximation is valid - it's 0.  */
 712         *valuep = 0;
 713       }
 714     }
 715     else
 716       return 0;
 717   } else if (*s == 'I' || *s == 'i') {
 718     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 719     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
 720     s++; if (s < send && (*s == 'I' || *s == 'i')) {
 721       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 722       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
 723       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
 724       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
 725       s++;
 726     }
 727     sawinf = 1;
 728   } else if (*s == 'N' || *s == 'n') {
 729     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
 730     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
 731     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 732     s++;
 733     sawnan = 1;
 734   } else
 735     return 0;
 736
 737   if (sawinf) {
 738     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 739     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 740   } else if (sawnan) {
 741     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 742     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 743   } else if (s < send) {
 744     /* we can have an optional exponent part */
 745     if (*s == 'e' || *s == 'E') {
 746       /* The only flag we keep is sign.  Blow away any "it's UV"  */
 747       numtype &= IS_NUMBER_NEG;
 748       numtype |= IS_NUMBER_NOT_INT;
 749       s++;
 750       if (s < send && (*s == '-' || *s == '+'))
 751         s++;
 752       if (s < send && isDIGIT(*s)) {
 753         do {
 754           s++;
 755         } while (s < send && isDIGIT(*s));
 756       }
 757       else
 758       return 0;
 759     }
 760   }
 761   while (s < send && isSPACE(*s))
 762     s++;
 763   if (s >= send)
 764     return numtype;
 765   if (len == 10 && memEQ(pv, "0 but true", 10)) {
 766     if (valuep)
 767       *valuep = 0;
 768     return IS_NUMBER_IN_UV;
 769   }
 770   return 0;
 771 }
 772
 773 STATIC NV
 774 S_mulexp10(NV value, I32 exponent)
 775 {
 776     NV result = 1.0;
 777     NV power = 10.0;
 778     bool negative = 0;
 779     I32 bit;
 780
 781     if (exponent == 0)
 782         return value;
 783     if (value == 0)
 784         return (NV)0;
 785
 786     /* On OpenVMS VAX we by default use the D_FLOAT double format,
 787      * and that format does not have *easy* capabilities [1] for
 788      * overflowing doubles 'silently' as IEEE fp does.  We also need
 789      * to support G_FLOAT on both VAX and Alpha, and though the exponent
 790      * range is much larger than D_FLOAT it still doesn't do silent
 791      * overflow.  Therefore we need to detect early whether we would
 792      * overflow (this is the behaviour of the native string-to-float
 793      * conversion routines, and therefore of native applications, too).
 794      *
 795      * [1] Trying to establish a condition handler to trap floating point
 796      *     exceptions is not a good idea. */
 797
 798     /* In UNICOS and in certain Cray models (such as T90) there is no
 799      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
 800      * There is something you can do if you are willing to use some
 801      * inline assembler: the instruction is called DFI-- but that will
 802      * disable *all* floating point interrupts, a little bit too large
 803      * a hammer.  Therefore we need to catch potential overflows before
 804      * it's too late. */
 805
 806 #if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
 807     STMT_START {
 808         const NV exp_v = log10(value);
 809         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
 810             return NV_MAX;
 811         if (exponent < 0) {
 812             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
 813                 return 0.0;
 814             while (-exponent >= NV_MAX_10_EXP) {
 815                 /* combination does not overflow, but 10^(-exponent) does */
 816                 value /= 10;
 817                 ++exponent;
 818             }
 819         }
 820     } STMT_END;
 821 #endif
 822
 823     if (exponent < 0) {
 824         negative = 1;
 825         exponent = -exponent;
 826     }
 827     for (bit = 1; exponent; bit <<= 1) {
 828         if (exponent & bit) {
 829             exponent ^= bit;
 830             result *= power;
 831             /* Floating point exceptions are supposed to be turned off,
 832              *  but if we're obviously done, don't risk another iteration.
 833              */
 834              if (exponent == 0) break;
 835         }
 836         power *= power;
 837     }
 838     return negative ? value / result : value * result;
 839 }
 840
 841 NV
 842 Perl_my_atof(pTHX_ const char* s)
 843 {
 844     NV x = 0.0;
 845 #ifdef USE_LOCALE_NUMERIC
 846     dVAR;
 847
 848     PERL_ARGS_ASSERT_MY_ATOF;
 849
 850     if (PL_numeric_local && PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
 851         const char *standard = NULL, *local = NULL;
 852         bool use_standard_radix;
 853
 854         /* Look through the string for the first thing that looks like a
 855          * decimal point: either the value in the current locale or the
 856          * standard fallback of '.'. The one which appears earliest in the
 857          * input string is the one that we should have atof look for. Note that
 858          * we have to determine this beforehand because on some systems,
 859          * Perl_atof2 is just a wrapper around the system's atof. */
 860         standard = strchr(s, '.');
 861         local = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
 862
 863         use_standard_radix = standard && (!local || standard < local);
 864
 865         if (use_standard_radix)
 866             SET_NUMERIC_STANDARD();
 867
 868         Perl_atof2(s, x);
 869
 870         if (use_standard_radix)
 871             SET_NUMERIC_LOCAL();
 872     }
 873     else
 874         Perl_atof2(s, x);
 875 #else
 876     Perl_atof2(s, x);
 877 #endif
 878     return x;
 879 }
 880
 881 char*
 882 Perl_my_atof2(pTHX_ const char* orig, NV* value)
 883 {
 884     NV result[3] = {0.0, 0.0, 0.0};
 885     const char* s = orig;
 886 #ifdef USE_PERL_ATOF
 887     UV accumulator[2] = {0,0};  /* before/after dp */
 888     bool negative = 0;
 889     const char* send = s + strlen(orig) - 1;
 890     bool seen_digit = 0;
 891     I32 exp_adjust[2] = {0,0};
 892     I32 exp_acc[2] = {-1, -1};
 893     /* the current exponent adjust for the accumulators */
 894     I32 exponent = 0;
 895     I32 seen_dp  = 0;
 896     I32 digit = 0;
 897     I32 old_digit = 0;
 898     I32 sig_digits = 0; /* noof significant digits seen so far */
 899
 900     PERL_ARGS_ASSERT_MY_ATOF2;
 901
 902 /* There is no point in processing more significant digits
 903  * than the NV can hold. Note that NV_DIG is a lower-bound value,
 904  * while we need an upper-bound value. We add 2 to account for this;
 905  * since it will have been conservative on both the first and last digit.
 906  * For example a 32-bit mantissa with an exponent of 4 would have
 907  * exact values in the set
 908  *               4
 909  *               8
 910  *              ..
 911  *     17179869172
 912  *     17179869176
 913  *     17179869180
 914  *
 915  * where for the purposes of calculating NV_DIG we would have to discount
 916  * both the first and last digit, since neither can hold all values from
 917  * 0..9; but for calculating the value we must examine those two digits.
 918  */
 919 #ifdef MAX_SIG_DIG_PLUS
 920     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
 921        possible digits in a NV, especially if NVs are not IEEE compliant
 922        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
 923 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
 924 #else
 925 # define MAX_SIG_DIGITS (NV_DIG+2)
 926 #endif
 927
 928 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
 929 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
 930
 931     /* leading whitespace */
 932     while (isSPACE(*s))
 933         ++s;
 934
 935     /* sign */
 936     switch (*s) {
 937         case '-':
 938             negative = 1;
 939             /* fall through */
 940         case '+':
 941             ++s;
 942     }
 943
 944     /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
 945
 946 #ifdef HAS_STRTOD
 947     if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
 948         const char *p = negative ? s - 1 : s;
 949         char *endp;
 950         NV rslt;
 951         rslt = strtod(p, &endp);
 952         if (endp != p) {
 953             *value = rslt;
 954             return (char *)endp;
 955         }
 956     }
 957 #endif
 958
 959     /* we accumulate digits into an integer; when this becomes too
 960      * large, we add the total to NV and start again */
 961
 962     while (1) {
 963         if (isDIGIT(*s)) {
 964             seen_digit = 1;
 965             old_digit = digit;
 966             digit = *s++ - '0';
 967             if (seen_dp)
 968                 exp_adjust[1]++;
 969
 970             /* don't start counting until we see the first significant
 971              * digit, eg the 5 in 0.00005... */
 972             if (!sig_digits && digit == 0)
 973                 continue;
 974
 975             if (++sig_digits > MAX_SIG_DIGITS) {
 976                 /* limits of precision reached */
 977                 if (digit > 5) {
 978                     ++accumulator[seen_dp];
 979                 } else if (digit == 5) {
 980                     if (old_digit % 2) { /* round to even - Allen */
 981                         ++accumulator[seen_dp];
 982                     }
 983                 }
 984                 if (seen_dp) {
 985                     exp_adjust[1]--;
 986                 } else {
 987                     exp_adjust[0]++;
 988                 }
 989                 /* skip remaining digits */
 990                 while (isDIGIT(*s)) {
 991                     ++s;
 992                     if (! seen_dp) {
 993                         exp_adjust[0]++;
 994                     }
 995                 }
 996                 /* warn of loss of precision? */
 997             }
 998             else {
 999                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
1000                     /* add accumulator to result and start again */
1001                     result[seen_dp] = S_mulexp10(result[seen_dp],
1002                                                  exp_acc[seen_dp])
1003                         + (NV)accumulator[seen_dp];
1004                     accumulator[seen_dp] = 0;
1005                     exp_acc[seen_dp] = 0;
1006                 }
1007                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1008                 ++exp_acc[seen_dp];
1009             }
1010         }
1011         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1012             seen_dp = 1;
1013             if (sig_digits > MAX_SIG_DIGITS) {
1014                 do {
1015                     ++s;
1016                 } while (isDIGIT(*s));
1017                 break;
1018             }
1019         }
1020         else {
1021             break;
1022         }
1023     }
1024
1025     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1026     if (seen_dp) {
1027         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1028     }
1029
1030     if (seen_digit && (*s == 'e' || *s == 'E')) {
1031         bool expnegative = 0;
1032
1033         ++s;
1034         switch (*s) {
1035             case '-':
1036                 expnegative = 1;
1037                 /* fall through */
1038             case '+':
1039                 ++s;
1040         }
1041         while (isDIGIT(*s))
1042             exponent = exponent * 10 + (*s++ - '0');
1043         if (expnegative)
1044             exponent = -exponent;
1045     }
1046
1047
1048
1049     /* now apply the exponent */
1050
1051     if (seen_dp) {
1052         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1053                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1054     } else {
1055         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1056     }
1057
1058     /* now apply the sign */
1059     if (negative)
1060         result[2] = -result[2];
1061 #endif /* USE_PERL_ATOF */
1062     *value = result[2];
1063     return (char *)s;
1064 }
1065
1066 #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1067 long double
1068 Perl_my_modfl(long double x, long double *ip)
1069 {
1070         *ip = aintl(x);
1071         return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1072 }
1073 #endif
1074
1075 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1076 long double
1077 Perl_my_frexpl(long double x, int *e) {
1078         *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1079         return (scalbnl(x, -*e));
1080 }
1081 #endif
1082
1083 /*
1084 =for apidoc Perl_signbit
1085
1086 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1087 it is not.
1088
1089 If Configure detects this system has a signbit() that will work with
1090 our NVs, then we just use it via the #define in perl.h.  Otherwise,
1091 fall back on this implementation.  As a first pass, this gets everything
1092 right except -0.0.  Alas, catching -0.0 is the main use for this function,
1093 so this is not too helpful yet.  Still, at least we have the scaffolding
1094 in place to support other systems, should that prove useful.
1095
1096
1097 Configure notes:  This function is called 'Perl_signbit' instead of a
1098 plain 'signbit' because it is easy to imagine a system having a signbit()
1099 function or macro that doesn't happen to work with our particular choice
1100 of NVs.  We shouldn't just re-#define signbit as Perl_signbit and expect
1101 the standard system headers to be happy.  Also, this is a no-context
1102 function (no pTHX_) because Perl_signbit() is usually re-#defined in
1103 perl.h as a simple macro call to the system's signbit().
1104 Users should just always call Perl_signbit().
1105
1106 =cut
1107 */
1108 #if !defined(HAS_SIGNBIT)
1109 int
1110 Perl_signbit(NV x) {
1111     return (x < 0.0) ? 1 : 0;
1112 }
1113 #endif
1114
1115 /*
1116  * Local variables:
1117  * c-indentation-style: bsd
1118  * c-basic-offset: 4
1119  * indent-tabs-mode: nil
1120  * End:
1121  *
1122  * ex: set ts=8 sts=4 sw=4 et:
1123  */