numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   4  *    2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen,
  13  *  unless wizards count differently to other people."  --Beorn
  14  *
  15  *     [p.115 of _The Hobbit_: "Queer Lodgings"]
  16  */
  17
  18 /*
  19 =head1 Numeric functions
  20
  21 This file contains all the stuff needed by perl for manipulating numeric
  22 values, including such things as replacements for the OS's atof() function
  23
  24 =cut
  25
  26 */
  27
  28 #include "EXTERN.h"
  29 #define PERL_IN_NUMERIC_C
  30 #include "perl.h"
  31
  32 U32
  33 Perl_cast_ulong(pTHX_ NV f)
  34 {
  35     PERL_UNUSED_CONTEXT;
  36   if (f < 0.0)
  37     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
  38   if (f < U32_MAX_P1) {
  39 #if CASTFLAGS & 2
  40     if (f < U32_MAX_P1_HALF)
  41       return (U32) f;
  42     f -= U32_MAX_P1_HALF;
  43     return ((U32) f) | (1 + U32_MAX >> 1);
  44 #else
  45     return (U32) f;
  46 #endif
  47   }
  48   return f > 0 ? U32_MAX : 0 /* NaN */;
  49 }
  50
  51 I32
  52 Perl_cast_i32(pTHX_ NV f)
  53 {
  54     PERL_UNUSED_CONTEXT;
  55   if (f < I32_MAX_P1)
  56     return f < I32_MIN ? I32_MIN : (I32) f;
  57   if (f < U32_MAX_P1) {
  58 #if CASTFLAGS & 2
  59     if (f < U32_MAX_P1_HALF)
  60       return (I32)(U32) f;
  61     f -= U32_MAX_P1_HALF;
  62     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
  63 #else
  64     return (I32)(U32) f;
  65 #endif
  66   }
  67   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
  68 }
  69
  70 IV
  71 Perl_cast_iv(pTHX_ NV f)
  72 {
  73     PERL_UNUSED_CONTEXT;
  74   if (f < IV_MAX_P1)
  75     return f < IV_MIN ? IV_MIN : (IV) f;
  76   if (f < UV_MAX_P1) {
  77 #if CASTFLAGS & 2
  78     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
  79     if (f < UV_MAX_P1_HALF)
  80       return (IV)(UV) f;
  81     f -= UV_MAX_P1_HALF;
  82     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
  83 #else
  84     return (IV)(UV) f;
  85 #endif
  86   }
  87   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
  88 }
  89
  90 UV
  91 Perl_cast_uv(pTHX_ NV f)
  92 {
  93     PERL_UNUSED_CONTEXT;
  94   if (f < 0.0)
  95     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
  96   if (f < UV_MAX_P1) {
  97 #if CASTFLAGS & 2
  98     if (f < UV_MAX_P1_HALF)
  99       return (UV) f;
 100     f -= UV_MAX_P1_HALF;
 101     return ((UV) f) | (1 + UV_MAX >> 1);
 102 #else
 103     return (UV) f;
 104 #endif
 105   }
 106   return f > 0 ? UV_MAX : 0 /* NaN */;
 107 }
 108
 109 /*
 110 =for apidoc grok_bin
 111
 112 converts a string representing a binary number to numeric form.
 113
 114 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 115 conversion flags, and I<result> should be NULL or a pointer to an NV.
 116 The scan stops at the end of the string, or the first invalid character.
 117 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 118 invalid character will also trigger a warning.
 119 On return I<*len> is set to the length of the scanned string,
 120 and I<*flags> gives output flags.
 121
 122 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 123 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
 124 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 125 and writes the value to I<*result> (or the value is discarded if I<result>
 126 is NULL).
 127
 128 The binary number may optionally be prefixed with "0b" or "b" unless
 129 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 130 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
 131 number may use '_' characters to separate digits.
 132
 133 =cut
 134  */
 135
 136 UV
 137 Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 138 {
 139     const char *s = start;
 140     STRLEN len = *len_p;
 141     UV value = 0;
 142     NV value_nv = 0;
 143
 144     const UV max_div_2 = UV_MAX / 2;
 145     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 146     bool overflowed = FALSE;
 147     char bit;
 148
 149     PERL_ARGS_ASSERT_GROK_BIN;
 150
 151     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 152         /* strip off leading b or 0b.
 153            for compatibility silently suffer "b" and "0b" as valid binary
 154            numbers. */
 155         if (len >= 1) {
 156             if (s[0] == 'b' || s[0] == 'B') {
 157                 s++;
 158                 len--;
 159             }
 160             else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
 161                 s+=2;
 162                 len-=2;
 163             }
 164         }
 165     }
 166
 167     for (; len-- && (bit = *s); s++) {
 168         if (bit == '0' || bit == '1') {
 169             /* Write it in this wonky order with a goto to attempt to get the
 170                compiler to make the common case integer-only loop pretty tight.
 171                With gcc seems to be much straighter code than old scan_bin.  */
 172           redo:
 173             if (!overflowed) {
 174                 if (value <= max_div_2) {
 175                     value = (value << 1) | (bit - '0');
 176                     continue;
 177                 }
 178                 /* Bah. We're just overflowed.  */
 179                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 180                                  "Integer overflow in binary number");
 181                 overflowed = TRUE;
 182                 value_nv = (NV) value;
 183             }
 184             value_nv *= 2.0;
 185             /* If an NV has not enough bits in its mantissa to
 186              * represent a UV this summing of small low-order numbers
 187              * is a waste of time (because the NV cannot preserve
 188              * the low-order bits anyway): we could just remember when
 189              * did we overflow and in the end just multiply value_nv by the
 190              * right amount. */
 191             value_nv += (NV)(bit - '0');
 192             continue;
 193         }
 194         if (bit == '_' && len && allow_underscores && (bit = s[1])
 195             && (bit == '0' || bit == '1'))
 196             {
 197                 --len;
 198                 ++s;
 199                 goto redo;
 200             }
 201         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 202             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 203                            "Illegal binary digit '%c' ignored", *s);
 204         break;
 205     }
 206
 207     if (   ( overflowed && value_nv > 4294967295.0)
 208 #if UVSIZE > 4
 209         || (!overflowed && value > 0xffffffff  )
 210 #endif
 211         ) {
 212         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 213                        "Binary number > 0b11111111111111111111111111111111 non-portable");
 214     }
 215     *len_p = s - start;
 216     if (!overflowed) {
 217         *flags = 0;
 218         return value;
 219     }
 220     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 221     if (result)
 222         *result = value_nv;
 223     return UV_MAX;
 224 }
 225
 226 /*
 227 =for apidoc grok_hex
 228
 229 converts a string representing a hex number to numeric form.
 230
 231 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 232 conversion flags, and I<result> should be NULL or a pointer to an NV.
 233 The scan stops at the end of the string, or the first invalid character.
 234 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 235 invalid character will also trigger a warning.
 236 On return I<*len> is set to the length of the scanned string,
 237 and I<*flags> gives output flags.
 238
 239 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 240 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
 241 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 242 and writes the value to I<*result> (or the value is discarded if I<result>
 243 is NULL).
 244
 245 The hex number may optionally be prefixed with "0x" or "x" unless
 246 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 247 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
 248 number may use '_' characters to separate digits.
 249
 250 =cut
 251  */
 252
 253 UV
 254 Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 255 {
 256     dVAR;
 257     const char *s = start;
 258     STRLEN len = *len_p;
 259     UV value = 0;
 260     NV value_nv = 0;
 261     const UV max_div_16 = UV_MAX / 16;
 262     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 263     bool overflowed = FALSE;
 264
 265     PERL_ARGS_ASSERT_GROK_HEX;
 266
 267     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 268         /* strip off leading x or 0x.
 269            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 270         */
 271         if (len >= 1) {
 272             if (s[0] == 'x' || s[0] == 'X') {
 273                 s++;
 274                 len--;
 275             }
 276             else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
 277                 s+=2;
 278                 len-=2;
 279             }
 280         }
 281     }
 282
 283     for (; len-- && *s; s++) {
 284         const char *hexdigit = strchr(PL_hexdigit, *s);
 285         if (hexdigit) {
 286             /* Write it in this wonky order with a goto to attempt to get the
 287                compiler to make the common case integer-only loop pretty tight.
 288                With gcc seems to be much straighter code than old scan_hex.  */
 289           redo:
 290             if (!overflowed) {
 291                 if (value <= max_div_16) {
 292                     value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
 293                     continue;
 294                 }
 295                 /* Bah. We're just overflowed.  */
 296                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 297                                  "Integer overflow in hexadecimal number");
 298                 overflowed = TRUE;
 299                 value_nv = (NV) value;
 300             }
 301             value_nv *= 16.0;
 302             /* If an NV has not enough bits in its mantissa to
 303              * represent a UV this summing of small low-order numbers
 304              * is a waste of time (because the NV cannot preserve
 305              * the low-order bits anyway): we could just remember when
 306              * did we overflow and in the end just multiply value_nv by the
 307              * right amount of 16-tuples. */
 308             value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
 309             continue;
 310         }
 311         if (*s == '_' && len && allow_underscores && s[1]
 312                 && (hexdigit = strchr(PL_hexdigit, s[1])))
 313             {
 314                 --len;
 315                 ++s;
 316                 goto redo;
 317             }
 318         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 319             Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 320                         "Illegal hexadecimal digit '%c' ignored", *s);
 321         break;
 322     }
 323
 324     if (   ( overflowed && value_nv > 4294967295.0)
 325 #if UVSIZE > 4
 326         || (!overflowed && value > 0xffffffff  )
 327 #endif
 328         ) {
 329         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 330                        "Hexadecimal number > 0xffffffff non-portable");
 331     }
 332     *len_p = s - start;
 333     if (!overflowed) {
 334         *flags = 0;
 335         return value;
 336     }
 337     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 338     if (result)
 339         *result = value_nv;
 340     return UV_MAX;
 341 }
 342
 343 /*
 344 =for apidoc grok_oct
 345
 346 converts a string representing an octal number to numeric form.
 347
 348 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 349 conversion flags, and I<result> should be NULL or a pointer to an NV.
 350 The scan stops at the end of the string, or the first invalid character.
 351 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 352 8 or 9 will also trigger a warning.
 353 On return I<*len> is set to the length of the scanned string,
 354 and I<*flags> gives output flags.
 355
 356 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 357 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
 358 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 359 and writes the value to I<*result> (or the value is discarded if I<result>
 360 is NULL).
 361
 362 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
 363 number may use '_' characters to separate digits.
 364
 365 =cut
 366  */
 367
 368 UV
 369 Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
 370 {
 371     const char *s = start;
 372     STRLEN len = *len_p;
 373     UV value = 0;
 374     NV value_nv = 0;
 375     const UV max_div_8 = UV_MAX / 8;
 376     const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 377     bool overflowed = FALSE;
 378
 379     PERL_ARGS_ASSERT_GROK_OCT;
 380
 381     for (; len-- && *s; s++) {
 382          /* gcc 2.95 optimiser not smart enough to figure that this subtraction
 383             out front allows slicker code.  */
 384         int digit = *s - '0';
 385         if (digit >= 0 && digit <= 7) {
 386             /* Write it in this wonky order with a goto to attempt to get the
 387                compiler to make the common case integer-only loop pretty tight.
 388             */
 389           redo:
 390             if (!overflowed) {
 391                 if (value <= max_div_8) {
 392                     value = (value << 3) | digit;
 393                     continue;
 394                 }
 395                 /* Bah. We're just overflowed.  */
 396                 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
 397                                "Integer overflow in octal number");
 398                 overflowed = TRUE;
 399                 value_nv = (NV) value;
 400             }
 401             value_nv *= 8.0;
 402             /* If an NV has not enough bits in its mantissa to
 403              * represent a UV this summing of small low-order numbers
 404              * is a waste of time (because the NV cannot preserve
 405              * the low-order bits anyway): we could just remember when
 406              * did we overflow and in the end just multiply value_nv by the
 407              * right amount of 8-tuples. */
 408             value_nv += (NV)digit;
 409             continue;
 410         }
 411         if (digit == ('_' - '0') && len && allow_underscores
 412             && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
 413             {
 414                 --len;
 415                 ++s;
 416                 goto redo;
 417             }
 418         /* Allow \octal to work the DWIM way (that is, stop scanning
 419          * as soon as non-octal characters are seen, complain only if
 420          * someone seems to want to use the digits eight and nine). */
 421         if (digit == 8 || digit == 9) {
 422             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
 423                 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
 424                                "Illegal octal digit '%c' ignored", *s);
 425         }
 426         break;
 427     }
 428
 429     if (   ( overflowed && value_nv > 4294967295.0)
 430 #if UVSIZE > 4
 431         || (!overflowed && value > 0xffffffff  )
 432 #endif
 433         ) {
 434         Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
 435                        "Octal number > 037777777777 non-portable");
 436     }
 437     *len_p = s - start;
 438     if (!overflowed) {
 439         *flags = 0;
 440         return value;
 441     }
 442     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 443     if (result)
 444         *result = value_nv;
 445     return UV_MAX;
 446 }
 447
 448 /*
 449 =for apidoc scan_bin
 450
 451 For backwards compatibility. Use C<grok_bin> instead.
 452
 453 =for apidoc scan_hex
 454
 455 For backwards compatibility. Use C<grok_hex> instead.
 456
 457 =for apidoc scan_oct
 458
 459 For backwards compatibility. Use C<grok_oct> instead.
 460
 461 =cut
 462  */
 463
 464 NV
 465 Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 466 {
 467     NV rnv;
 468     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 469     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 470
 471     PERL_ARGS_ASSERT_SCAN_BIN;
 472
 473     *retlen = len;
 474     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 475 }
 476
 477 NV
 478 Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 479 {
 480     NV rnv;
 481     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 482     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 483
 484     PERL_ARGS_ASSERT_SCAN_OCT;
 485
 486     *retlen = len;
 487     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 488 }
 489
 490 NV
 491 Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
 492 {
 493     NV rnv;
 494     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 495     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 496
 497     PERL_ARGS_ASSERT_SCAN_HEX;
 498
 499     *retlen = len;
 500     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 501 }
 502
 503 /*
 504 =for apidoc grok_numeric_radix
 505
 506 Scan and skip for a numeric decimal separator (radix).
 507
 508 =cut
 509  */
 510 bool
 511 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 512 {
 513 #ifdef USE_LOCALE_NUMERIC
 514     dVAR;
 515
 516     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 517
 518     if (PL_numeric_radix_sv && IN_LOCALE) {
 519         STRLEN len;
 520         const char * const radix = SvPV(PL_numeric_radix_sv, len);
 521         if (*sp + len <= send && memEQ(*sp, radix, len)) {
 522             *sp += len;
 523             return TRUE;
 524         }
 525     }
 526     /* always try "." if numeric radix didn't match because
 527      * we may have data from different locales mixed */
 528 #endif
 529
 530     PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
 531
 532     if (*sp < send && **sp == '.') {
 533         ++*sp;
 534         return TRUE;
 535     }
 536     return FALSE;
 537 }
 538
 539 /*
 540 =for apidoc grok_number
 541
 542 Recognise (or not) a number.  The type of the number is returned
 543 (0 if unrecognised), otherwise it is a bit-ORed combination of
 544 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
 545 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
 546
 547 If the value of the number can fit an in UV, it is returned in the *valuep
 548 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
 549 will never be set unless *valuep is valid, but *valuep may have been assigned
 550 to during processing even though IS_NUMBER_IN_UV is not set on return.
 551 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
 552 valuep is non-NULL, but no actual assignment (or SEGV) will occur.
 553
 554 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
 555 seen (in which case *valuep gives the true value truncated to an integer), and
 556 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
 557 absolute value).  IS_NUMBER_IN_UV is not set if e notation was used or the
 558 number is larger than a UV.
 559
 560 =cut
 561  */
 562 int
 563 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 564 {
 565   const char *s = pv;
 566   const char * const send = pv + len;
 567   const UV max_div_10 = UV_MAX / 10;
 568   const char max_mod_10 = UV_MAX % 10;
 569   int numtype = 0;
 570   int sawinf = 0;
 571   int sawnan = 0;
 572
 573   PERL_ARGS_ASSERT_GROK_NUMBER;
 574
 575   while (s < send && isSPACE(*s))
 576     s++;
 577   if (s == send) {
 578     return 0;
 579   } else if (*s == '-') {
 580     s++;
 581     numtype = IS_NUMBER_NEG;
 582   }
 583   else if (*s == '+')
 584   s++;
 585
 586   if (s == send)
 587     return 0;
 588
 589   /* next must be digit or the radix separator or beginning of infinity */
 590   if (isDIGIT(*s)) {
 591     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 592        overflow.  */
 593     UV value = *s - '0';
 594     /* This construction seems to be more optimiser friendly.
 595        (without it gcc does the isDIGIT test and the *s - '0' separately)
 596        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 597        In theory the optimiser could deduce how far to unroll the loop
 598        before checking for overflow.  */
 599     if (++s < send) {
 600       int digit = *s - '0';
 601       if (digit >= 0 && digit <= 9) {
 602         value = value * 10 + digit;
 603         if (++s < send) {
 604           digit = *s - '0';
 605           if (digit >= 0 && digit <= 9) {
 606             value = value * 10 + digit;
 607             if (++s < send) {
 608               digit = *s - '0';
 609               if (digit >= 0 && digit <= 9) {
 610                 value = value * 10 + digit;
 611                 if (++s < send) {
 612                   digit = *s - '0';
 613                   if (digit >= 0 && digit <= 9) {
 614                     value = value * 10 + digit;
 615                     if (++s < send) {
 616                       digit = *s - '0';
 617                       if (digit >= 0 && digit <= 9) {
 618                         value = value * 10 + digit;
 619                         if (++s < send) {
 620                           digit = *s - '0';
 621                           if (digit >= 0 && digit <= 9) {
 622                             value = value * 10 + digit;
 623                             if (++s < send) {
 624                               digit = *s - '0';
 625                               if (digit >= 0 && digit <= 9) {
 626                                 value = value * 10 + digit;
 627                                 if (++s < send) {
 628                                   digit = *s - '0';
 629                                   if (digit >= 0 && digit <= 9) {
 630                                     value = value * 10 + digit;
 631                                     if (++s < send) {
 632                                       /* Now got 9 digits, so need to check
 633                                          each time for overflow.  */
 634                                       digit = *s - '0';
 635                                       while (digit >= 0 && digit <= 9
 636                                              && (value < max_div_10
 637                                                  || (value == max_div_10
 638                                                      && digit <= max_mod_10))) {
 639                                         value = value * 10 + digit;
 640                                         if (++s < send)
 641                                           digit = *s - '0';
 642                                         else
 643                                           break;
 644                                       }
 645                                       if (digit >= 0 && digit <= 9
 646                                           && (s < send)) {
 647                                         /* value overflowed.
 648                                            skip the remaining digits, don't
 649                                            worry about setting *valuep.  */
 650                                         do {
 651                                           s++;
 652                                         } while (s < send && isDIGIT(*s));
 653                                         numtype |=
 654                                           IS_NUMBER_GREATER_THAN_UV_MAX;
 655                                         goto skip_value;
 656                                       }
 657                                     }
 658                                   }
 659                                 }
 660                               }
 661                             }
 662                           }
 663                         }
 664                       }
 665                     }
 666                   }
 667                 }
 668               }
 669             }
 670           }
 671         }
 672       }
 673     }
 674     numtype |= IS_NUMBER_IN_UV;
 675     if (valuep)
 676       *valuep = value;
 677
 678   skip_value:
 679     if (GROK_NUMERIC_RADIX(&s, send)) {
 680       numtype |= IS_NUMBER_NOT_INT;
 681       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
 682         s++;
 683     }
 684   }
 685   else if (GROK_NUMERIC_RADIX(&s, send)) {
 686     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
 687     /* no digits before the radix means we need digits after it */
 688     if (s < send && isDIGIT(*s)) {
 689       do {
 690         s++;
 691       } while (s < send && isDIGIT(*s));
 692       if (valuep) {
 693         /* integer approximation is valid - it's 0.  */
 694         *valuep = 0;
 695       }
 696     }
 697     else
 698       return 0;
 699   } else if (*s == 'I' || *s == 'i') {
 700     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 701     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
 702     s++; if (s < send && (*s == 'I' || *s == 'i')) {
 703       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 704       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
 705       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
 706       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
 707       s++;
 708     }
 709     sawinf = 1;
 710   } else if (*s == 'N' || *s == 'n') {
 711     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
 712     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
 713     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 714     s++;
 715     sawnan = 1;
 716   } else
 717     return 0;
 718
 719   if (sawinf) {
 720     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 721     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 722   } else if (sawnan) {
 723     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 724     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 725   } else if (s < send) {
 726     /* we can have an optional exponent part */
 727     if (*s == 'e' || *s == 'E') {
 728       /* The only flag we keep is sign.  Blow away any "it's UV"  */
 729       numtype &= IS_NUMBER_NEG;
 730       numtype |= IS_NUMBER_NOT_INT;
 731       s++;
 732       if (s < send && (*s == '-' || *s == '+'))
 733         s++;
 734       if (s < send && isDIGIT(*s)) {
 735         do {
 736           s++;
 737         } while (s < send && isDIGIT(*s));
 738       }
 739       else
 740       return 0;
 741     }
 742   }
 743   while (s < send && isSPACE(*s))
 744     s++;
 745   if (s >= send)
 746     return numtype;
 747   if (len == 10 && memEQ(pv, "0 but true", 10)) {
 748     if (valuep)
 749       *valuep = 0;
 750     return IS_NUMBER_IN_UV;
 751   }
 752   return 0;
 753 }
 754
 755 STATIC NV
 756 S_mulexp10(NV value, I32 exponent)
 757 {
 758     NV result = 1.0;
 759     NV power = 10.0;
 760     bool negative = 0;
 761     I32 bit;
 762
 763     if (exponent == 0)
 764         return value;
 765     if (value == 0)
 766         return (NV)0;
 767
 768     /* On OpenVMS VAX we by default use the D_FLOAT double format,
 769      * and that format does not have *easy* capabilities [1] for
 770      * overflowing doubles 'silently' as IEEE fp does.  We also need
 771      * to support G_FLOAT on both VAX and Alpha, and though the exponent
 772      * range is much larger than D_FLOAT it still doesn't do silent
 773      * overflow.  Therefore we need to detect early whether we would
 774      * overflow (this is the behaviour of the native string-to-float
 775      * conversion routines, and therefore of native applications, too).
 776      *
 777      * [1] Trying to establish a condition handler to trap floating point
 778      *     exceptions is not a good idea. */
 779
 780     /* In UNICOS and in certain Cray models (such as T90) there is no
 781      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
 782      * There is something you can do if you are willing to use some
 783      * inline assembler: the instruction is called DFI-- but that will
 784      * disable *all* floating point interrupts, a little bit too large
 785      * a hammer.  Therefore we need to catch potential overflows before
 786      * it's too late. */
 787
 788 #if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
 789     STMT_START {
 790         const NV exp_v = log10(value);
 791         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
 792             return NV_MAX;
 793         if (exponent < 0) {
 794             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
 795                 return 0.0;
 796             while (-exponent >= NV_MAX_10_EXP) {
 797                 /* combination does not overflow, but 10^(-exponent) does */
 798                 value /= 10;
 799                 ++exponent;
 800             }
 801         }
 802     } STMT_END;
 803 #endif
 804
 805     if (exponent < 0) {
 806         negative = 1;
 807         exponent = -exponent;
 808     }
 809     for (bit = 1; exponent; bit <<= 1) {
 810         if (exponent & bit) {
 811             exponent ^= bit;
 812             result *= power;
 813             /* Floating point exceptions are supposed to be turned off,
 814              *  but if we're obviously done, don't risk another iteration.
 815              */
 816              if (exponent == 0) break;
 817         }
 818         power *= power;
 819     }
 820     return negative ? value / result : value * result;
 821 }
 822
 823 NV
 824 Perl_my_atof(pTHX_ const char* s)
 825 {
 826     NV x = 0.0;
 827 #ifdef USE_LOCALE_NUMERIC
 828     dVAR;
 829
 830     PERL_ARGS_ASSERT_MY_ATOF;
 831
 832     if (PL_numeric_local && IN_LOCALE) {
 833         NV y;
 834
 835         /* Scan the number twice; once using locale and once without;
 836          * choose the larger result (in absolute value). */
 837         Perl_atof2(s, x);
 838         SET_NUMERIC_STANDARD();
 839         Perl_atof2(s, y);
 840         SET_NUMERIC_LOCAL();
 841         if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
 842             return y;
 843     }
 844     else
 845         Perl_atof2(s, x);
 846 #else
 847     Perl_atof2(s, x);
 848 #endif
 849     return x;
 850 }
 851
 852 char*
 853 Perl_my_atof2(pTHX_ const char* orig, NV* value)
 854 {
 855     NV result[3] = {0.0, 0.0, 0.0};
 856     const char* s = orig;
 857 #ifdef USE_PERL_ATOF
 858     UV accumulator[2] = {0,0};  /* before/after dp */
 859     bool negative = 0;
 860     const char* send = s + strlen(orig) - 1;
 861     bool seen_digit = 0;
 862     I32 exp_adjust[2] = {0,0};
 863     I32 exp_acc[2] = {-1, -1};
 864     /* the current exponent adjust for the accumulators */
 865     I32 exponent = 0;
 866     I32 seen_dp  = 0;
 867     I32 digit = 0;
 868     I32 old_digit = 0;
 869     I32 sig_digits = 0; /* noof significant digits seen so far */
 870
 871     PERL_ARGS_ASSERT_MY_ATOF2;
 872
 873 /* There is no point in processing more significant digits
 874  * than the NV can hold. Note that NV_DIG is a lower-bound value,
 875  * while we need an upper-bound value. We add 2 to account for this;
 876  * since it will have been conservative on both the first and last digit.
 877  * For example a 32-bit mantissa with an exponent of 4 would have
 878  * exact values in the set
 879  *               4
 880  *               8
 881  *              ..
 882  *     17179869172
 883  *     17179869176
 884  *     17179869180
 885  *
 886  * where for the purposes of calculating NV_DIG we would have to discount
 887  * both the first and last digit, since neither can hold all values from
 888  * 0..9; but for calculating the value we must examine those two digits.
 889  */
 890 #ifdef MAX_SIG_DIG_PLUS
 891     /* It is not necessarily the case that adding 2 to NV_DIG gets all the
 892        possible digits in a NV, especially if NVs are not IEEE compliant
 893        (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
 894 # define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
 895 #else
 896 # define MAX_SIG_DIGITS (NV_DIG+2)
 897 #endif
 898
 899 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
 900 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
 901
 902     /* leading whitespace */
 903     while (isSPACE(*s))
 904         ++s;
 905
 906     /* sign */
 907     switch (*s) {
 908         case '-':
 909             negative = 1;
 910             /* fall through */
 911         case '+':
 912             ++s;
 913     }
 914
 915     /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
 916
 917 #ifdef HAS_STRTOD
 918     if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
 919         const char *p = negative ? s - 1 : s;
 920         char *endp;
 921         NV rslt;
 922         rslt = strtod(p, &endp);
 923         if (endp != p) {
 924             *value = rslt;
 925             return (char *)endp;
 926         }
 927     }
 928 #endif
 929
 930     /* we accumulate digits into an integer; when this becomes too
 931      * large, we add the total to NV and start again */
 932
 933     while (1) {
 934         if (isDIGIT(*s)) {
 935             seen_digit = 1;
 936             old_digit = digit;
 937             digit = *s++ - '0';
 938             if (seen_dp)
 939                 exp_adjust[1]++;
 940
 941             /* don't start counting until we see the first significant
 942              * digit, eg the 5 in 0.00005... */
 943             if (!sig_digits && digit == 0)
 944                 continue;
 945
 946             if (++sig_digits > MAX_SIG_DIGITS) {
 947                 /* limits of precision reached */
 948                 if (digit > 5) {
 949                     ++accumulator[seen_dp];
 950                 } else if (digit == 5) {
 951                     if (old_digit % 2) { /* round to even - Allen */
 952                         ++accumulator[seen_dp];
 953                     }
 954                 }
 955                 if (seen_dp) {
 956                     exp_adjust[1]--;
 957                 } else {
 958                     exp_adjust[0]++;
 959                 }
 960                 /* skip remaining digits */
 961                 while (isDIGIT(*s)) {
 962                     ++s;
 963                     if (! seen_dp) {
 964                         exp_adjust[0]++;
 965                     }
 966                 }
 967                 /* warn of loss of precision? */
 968             }
 969             else {
 970                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
 971                     /* add accumulator to result and start again */
 972                     result[seen_dp] = S_mulexp10(result[seen_dp],
 973                                                  exp_acc[seen_dp])
 974                         + (NV)accumulator[seen_dp];
 975                     accumulator[seen_dp] = 0;
 976                     exp_acc[seen_dp] = 0;
 977                 }
 978                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
 979                 ++exp_acc[seen_dp];
 980             }
 981         }
 982         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
 983             seen_dp = 1;
 984             if (sig_digits > MAX_SIG_DIGITS) {
 985                 do {
 986                     ++s;
 987                 } while (isDIGIT(*s));
 988                 break;
 989             }
 990         }
 991         else {
 992             break;
 993         }
 994     }
 995
 996     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
 997     if (seen_dp) {
 998         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
 999     }
1000
1001     if (seen_digit && (*s == 'e' || *s == 'E')) {
1002         bool expnegative = 0;
1003
1004         ++s;
1005         switch (*s) {
1006             case '-':
1007                 expnegative = 1;
1008                 /* fall through */
1009             case '+':
1010                 ++s;
1011         }
1012         while (isDIGIT(*s))
1013             exponent = exponent * 10 + (*s++ - '0');
1014         if (expnegative)
1015             exponent = -exponent;
1016     }
1017
1018
1019
1020     /* now apply the exponent */
1021
1022     if (seen_dp) {
1023         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1024                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1025     } else {
1026         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1027     }
1028
1029     /* now apply the sign */
1030     if (negative)
1031         result[2] = -result[2];
1032 #endif /* USE_PERL_ATOF */
1033     *value = result[2];
1034     return (char *)s;
1035 }
1036
1037 #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1038 long double
1039 Perl_my_modfl(long double x, long double *ip)
1040 {
1041         *ip = aintl(x);
1042         return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1043 }
1044 #endif
1045
1046 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1047 long double
1048 Perl_my_frexpl(long double x, int *e) {
1049         *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1050         return (scalbnl(x, -*e));
1051 }
1052 #endif
1053
1054 /*
1055 =for apidoc Perl_signbit
1056
1057 Return a non-zero integer if the sign bit on an NV is set, and 0 if
1058 it is not.
1059
1060 If Configure detects this system has a signbit() that will work with
1061 our NVs, then we just use it via the #define in perl.h.  Otherwise,
1062 fall back on this implementation.  As a first pass, this gets everything
1063 right except -0.0.  Alas, catching -0.0 is the main use for this function,
1064 so this is not too helpful yet.  Still, at least we have the scaffolding
1065 in place to support other systems, should that prove useful.
1066
1067
1068 Configure notes:  This function is called 'Perl_signbit' instead of a
1069 plain 'signbit' because it is easy to imagine a system having a signbit()
1070 function or macro that doesn't happen to work with our particular choice
1071 of NVs.  We shouldn't just re-#define signbit as Perl_signbit and expect
1072 the standard system headers to be happy.  Also, this is a no-context
1073 function (no pTHX_) because Perl_signbit() is usually re-#defined in
1074 perl.h as a simple macro call to the system's signbit().
1075 Users should just always call Perl_signbit().
1076
1077 =cut
1078 */
1079 #if !defined(HAS_SIGNBIT)
1080 int
1081 Perl_signbit(NV x) {
1082     return (x < 0.0) ? 1 : 0;
1083 }
1084 #endif
1085
1086 /*
1087  * Local variables:
1088  * c-indentation-style: bsd
1089  * c-basic-offset: 4
1090  * indent-tabs-mode: t
1091  * End:
1092  *
1093  * ex: set ts=8 sts=4 sw=4 noet:
1094  */