numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999,
   4  *    2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  * "That only makes eleven (plus one mislaid) and not fourteen, unless
  13  * wizards count differently to other people."
  14  */
  15
  16 /*
  17 =head1 Numeric functions
  18
  19 This file contains all the stuff needed by perl for manipulating numeric
  20 values, including such things as replacements for the OS's atof() function
  21
  22 =cut
  23
  24 */
  25
  26 #include "EXTERN.h"
  27 #define PERL_IN_NUMERIC_C
  28 #include "perl.h"
  29
  30 U32
  31 Perl_cast_ulong(pTHX_ NV f)
  32 {
  33   if (f < 0.0)
  34     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
  35   if (f < U32_MAX_P1) {
  36 #if CASTFLAGS & 2
  37     if (f < U32_MAX_P1_HALF)
  38       return (U32) f;
  39     f -= U32_MAX_P1_HALF;
  40     return ((U32) f) | (1 + U32_MAX >> 1);
  41 #else
  42     return (U32) f;
  43 #endif
  44   }
  45   return f > 0 ? U32_MAX : 0 /* NaN */;
  46 }
  47
  48 I32
  49 Perl_cast_i32(pTHX_ NV f)
  50 {
  51   if (f < I32_MAX_P1)
  52     return f < I32_MIN ? I32_MIN : (I32) f;
  53   if (f < U32_MAX_P1) {
  54 #if CASTFLAGS & 2
  55     if (f < U32_MAX_P1_HALF)
  56       return (I32)(U32) f;
  57     f -= U32_MAX_P1_HALF;
  58     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
  59 #else
  60     return (I32)(U32) f;
  61 #endif
  62   }
  63   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
  64 }
  65
  66 IV
  67 Perl_cast_iv(pTHX_ NV f)
  68 {
  69   if (f < IV_MAX_P1)
  70     return f < IV_MIN ? IV_MIN : (IV) f;
  71   if (f < UV_MAX_P1) {
  72 #if CASTFLAGS & 2
  73     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
  74     if (f < UV_MAX_P1_HALF)
  75       return (IV)(UV) f;
  76     f -= UV_MAX_P1_HALF;
  77     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
  78 #else
  79     return (IV)(UV) f;
  80 #endif
  81   }
  82   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
  83 }
  84
  85 UV
  86 Perl_cast_uv(pTHX_ NV f)
  87 {
  88   if (f < 0.0)
  89     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
  90   if (f < UV_MAX_P1) {
  91 #if CASTFLAGS & 2
  92     if (f < UV_MAX_P1_HALF)
  93       return (UV) f;
  94     f -= UV_MAX_P1_HALF;
  95     return ((UV) f) | (1 + UV_MAX >> 1);
  96 #else
  97     return (UV) f;
  98 #endif
  99   }
 100   return f > 0 ? UV_MAX : 0 /* NaN */;
 101 }
 102
 103 /*
 104 =for apidoc grok_bin
 105
 106 converts a string representing a binary number to numeric form.
 107
 108 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 109 conversion flags, and I<result> should be NULL or a pointer to an NV.
 110 The scan stops at the end of the string, or the first invalid character.
 111 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 112 invalid character will also trigger a warning.
 113 On return I<*len> is set to the length of the scanned string,
 114 and I<*flags> gives output flags.
 115
 116 If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
 117 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
 118 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 119 and writes the value to I<*result> (or the value is discarded if I<result>
 120 is NULL).
 121
 122 The binary number may optionally be prefixed with "0b" or "b" unless
 123 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 124 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
 125 number may use '_' characters to separate digits.
 126
 127 =cut
 128  */
 129
 130 UV
 131 Perl_grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 132     const char *s = start;
 133     STRLEN len = *len_p;
 134     UV value = 0;
 135     NV value_nv = 0;
 136
 137     const UV max_div_2 = UV_MAX / 2;
 138     const bool allow_underscores = (bool)(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 139     bool overflowed = FALSE;
 140     char bit;
 141
 142     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 143         /* strip off leading b or 0b.
 144            for compatibility silently suffer "b" and "0b" as valid binary
 145            numbers. */
 146         if (len >= 1) {
 147             if (s[0] == 'b') {
 148                 s++;
 149                 len--;
 150             }
 151             else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
 152                 s+=2;
 153                 len-=2;
 154             }
 155         }
 156     }
 157
 158     for (; len-- && (bit = *s); s++) {
 159         if (bit == '0' || bit == '1') {
 160             /* Write it in this wonky order with a goto to attempt to get the
 161                compiler to make the common case integer-only loop pretty tight.
 162                With gcc seems to be much straighter code than old scan_bin.  */
 163           redo:
 164             if (!overflowed) {
 165                 if (value <= max_div_2) {
 166                     value = (value << 1) | (bit - '0');
 167                     continue;
 168                 }
 169                 /* Bah. We're just overflowed.  */
 170                 if (ckWARN_d(WARN_OVERFLOW))
 171                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 172                                 "Integer overflow in binary number");
 173                 overflowed = TRUE;
 174                 value_nv = (NV) value;
 175             }
 176             value_nv *= 2.0;
 177             /* If an NV has not enough bits in its mantissa to
 178              * represent a UV this summing of small low-order numbers
 179              * is a waste of time (because the NV cannot preserve
 180              * the low-order bits anyway): we could just remember when
 181              * did we overflow and in the end just multiply value_nv by the
 182              * right amount. */
 183             value_nv += (NV)(bit - '0');
 184             continue;
 185         }
 186         if (bit == '_' && len && allow_underscores && (bit = s[1])
 187             && (bit == '0' || bit == '1'))
 188             {
 189                 --len;
 190                 ++s;
 191                 goto redo;
 192             }
 193         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
 194             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 195                         "Illegal binary digit '%c' ignored", *s);
 196         break;
 197     }
 198
 199     if (   ( overflowed && value_nv > 4294967295.0)
 200 #if UVSIZE > 4
 201         || (!overflowed && value > 0xffffffff  )
 202 #endif
 203         ) {
 204         if (ckWARN(WARN_PORTABLE))
 205             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 206                         "Binary number > 0b11111111111111111111111111111111 non-portable");
 207     }
 208     *len_p = s - start;
 209     if (!overflowed) {
 210         *flags = 0;
 211         return value;
 212     }
 213     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 214     if (result)
 215         *result = value_nv;
 216     return UV_MAX;
 217 }
 218
 219 /*
 220 =for apidoc grok_hex
 221
 222 converts a string representing a hex number to numeric form.
 223
 224 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 225 conversion flags, and I<result> should be NULL or a pointer to an NV.
 226 The scan stops at the end of the string, or the first invalid character.
 227 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 228 invalid character will also trigger a warning.
 229 On return I<*len> is set to the length of the scanned string,
 230 and I<*flags> gives output flags.
 231
 232 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 233 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
 234 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 235 and writes the value to I<*result> (or the value is discarded if I<result>
 236 is NULL).
 237
 238 The hex number may optionally be prefixed with "0x" or "x" unless
 239 C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
 240 C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
 241 number may use '_' characters to separate digits.
 242
 243 =cut
 244  */
 245
 246 UV
 247 Perl_grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 248     const char *s = start;
 249     STRLEN len = *len_p;
 250     UV value = 0;
 251     NV value_nv = 0;
 252
 253     const UV max_div_16 = UV_MAX / 16;
 254     const bool allow_underscores = (bool)(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 255     bool overflowed = FALSE;
 256
 257     if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
 258         /* strip off leading x or 0x.
 259            for compatibility silently suffer "x" and "0x" as valid hex numbers.
 260         */
 261         if (len >= 1) {
 262             if (s[0] == 'x') {
 263                 s++;
 264                 len--;
 265             }
 266             else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
 267                 s+=2;
 268                 len-=2;
 269             }
 270         }
 271     }
 272
 273     for (; len-- && *s; s++) {
 274         const char *hexdigit = strchr(PL_hexdigit, *s);
 275         if (hexdigit) {
 276             /* Write it in this wonky order with a goto to attempt to get the
 277                compiler to make the common case integer-only loop pretty tight.
 278                With gcc seems to be much straighter code than old scan_hex.  */
 279           redo:
 280             if (!overflowed) {
 281                 if (value <= max_div_16) {
 282                     value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
 283                     continue;
 284                 }
 285                 /* Bah. We're just overflowed.  */
 286                 if (ckWARN_d(WARN_OVERFLOW))
 287                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 288                                 "Integer overflow in hexadecimal number");
 289                 overflowed = TRUE;
 290                 value_nv = (NV) value;
 291             }
 292             value_nv *= 16.0;
 293             /* If an NV has not enough bits in its mantissa to
 294              * represent a UV this summing of small low-order numbers
 295              * is a waste of time (because the NV cannot preserve
 296              * the low-order bits anyway): we could just remember when
 297              * did we overflow and in the end just multiply value_nv by the
 298              * right amount of 16-tuples. */
 299             value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
 300             continue;
 301         }
 302         if (*s == '_' && len && allow_underscores && s[1]
 303                 && (hexdigit = strchr(PL_hexdigit, s[1])))
 304             {
 305                 --len;
 306                 ++s;
 307                 goto redo;
 308             }
 309         if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
 310             Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 311                         "Illegal hexadecimal digit '%c' ignored", *s);
 312         break;
 313     }
 314
 315     if (   ( overflowed && value_nv > 4294967295.0)
 316 #if UVSIZE > 4
 317         || (!overflowed && value > 0xffffffff  )
 318 #endif
 319         ) {
 320         if (ckWARN(WARN_PORTABLE))
 321             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 322                         "Hexadecimal number > 0xffffffff non-portable");
 323     }
 324     *len_p = s - start;
 325     if (!overflowed) {
 326         *flags = 0;
 327         return value;
 328     }
 329     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 330     if (result)
 331         *result = value_nv;
 332     return UV_MAX;
 333 }
 334
 335 /*
 336 =for apidoc grok_oct
 337
 338 converts a string representing an octal number to numeric form.
 339
 340 On entry I<start> and I<*len> give the string to scan, I<*flags> gives
 341 conversion flags, and I<result> should be NULL or a pointer to an NV.
 342 The scan stops at the end of the string, or the first invalid character.
 343 Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
 344 invalid character will also trigger a warning.
 345 On return I<*len> is set to the length of the scanned string,
 346 and I<*flags> gives output flags.
 347
 348 If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
 349 and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
 350 returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
 351 and writes the value to I<*result> (or the value is discarded if I<result>
 352 is NULL).
 353
 354 If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
 355 number may use '_' characters to separate digits.
 356
 357 =cut
 358  */
 359
 360 UV
 361 Perl_grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result) {
 362     const char *s = start;
 363     STRLEN len = *len_p;
 364     UV value = 0;
 365     NV value_nv = 0;
 366
 367     const UV max_div_8 = UV_MAX / 8;
 368     const bool allow_underscores = (bool)(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
 369     bool overflowed = FALSE;
 370
 371     for (; len-- && *s; s++) {
 372          /* gcc 2.95 optimiser not smart enough to figure that this subtraction
 373             out front allows slicker code.  */
 374         int digit = *s - '0';
 375         if (digit >= 0 && digit <= 7) {
 376             /* Write it in this wonky order with a goto to attempt to get the
 377                compiler to make the common case integer-only loop pretty tight.
 378             */
 379           redo:
 380             if (!overflowed) {
 381                 if (value <= max_div_8) {
 382                     value = (value << 3) | digit;
 383                     continue;
 384                 }
 385                 /* Bah. We're just overflowed.  */
 386                 if (ckWARN_d(WARN_OVERFLOW))
 387                     Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
 388                                 "Integer overflow in octal number");
 389                 overflowed = TRUE;
 390                 value_nv = (NV) value;
 391             }
 392             value_nv *= 8.0;
 393             /* If an NV has not enough bits in its mantissa to
 394              * represent a UV this summing of small low-order numbers
 395              * is a waste of time (because the NV cannot preserve
 396              * the low-order bits anyway): we could just remember when
 397              * did we overflow and in the end just multiply value_nv by the
 398              * right amount of 8-tuples. */
 399             value_nv += (NV)digit;
 400             continue;
 401         }
 402         if (digit == ('_' - '0') && len && allow_underscores
 403             && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
 404             {
 405                 --len;
 406                 ++s;
 407                 goto redo;
 408             }
 409         /* Allow \octal to work the DWIM way (that is, stop scanning
 410          * as soon as non-octal characters are seen, complain only if
 411          * someone seems to want to use the digits eight and nine). */
 412         if (digit == 8 || digit == 9) {
 413             if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT))
 414                 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
 415                             "Illegal octal digit '%c' ignored", *s);
 416         }
 417         break;
 418     }
 419
 420     if (   ( overflowed && value_nv > 4294967295.0)
 421 #if UVSIZE > 4
 422         || (!overflowed && value > 0xffffffff  )
 423 #endif
 424         ) {
 425         if (ckWARN(WARN_PORTABLE))
 426             Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
 427                         "Octal number > 037777777777 non-portable");
 428     }
 429     *len_p = s - start;
 430     if (!overflowed) {
 431         *flags = 0;
 432         return value;
 433     }
 434     *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
 435     if (result)
 436         *result = value_nv;
 437     return UV_MAX;
 438 }
 439
 440 /*
 441 =for apidoc scan_bin
 442
 443 For backwards compatibility. Use C<grok_bin> instead.
 444
 445 =for apidoc scan_hex
 446
 447 For backwards compatibility. Use C<grok_hex> instead.
 448
 449 =for apidoc scan_oct
 450
 451 For backwards compatibility. Use C<grok_oct> instead.
 452
 453 =cut
 454  */
 455
 456 NV
 457 Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 458 {
 459     NV rnv;
 460     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 461     const UV ruv = grok_bin (start, &len, &flags, &rnv);
 462
 463     *retlen = len;
 464     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 465 }
 466
 467 NV
 468 Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 469 {
 470     NV rnv;
 471     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 472     const UV ruv = grok_oct (start, &len, &flags, &rnv);
 473
 474     *retlen = len;
 475     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 476 }
 477
 478 NV
 479 Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 480 {
 481     NV rnv;
 482     I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
 483     const UV ruv = grok_hex (start, &len, &flags, &rnv);
 484
 485     *retlen = len;
 486     return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
 487 }
 488
 489 /*
 490 =for apidoc grok_numeric_radix
 491
 492 Scan and skip for a numeric decimal separator (radix).
 493
 494 =cut
 495  */
 496 bool
 497 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 498 {
 499 #ifdef USE_LOCALE_NUMERIC
 500     if (PL_numeric_radix_sv && IN_LOCALE) {
 501         STRLEN len;
 502         const char * const radix = SvPV(PL_numeric_radix_sv, len);
 503         if (*sp + len <= send && memEQ(*sp, radix, len)) {
 504             *sp += len;
 505             return TRUE;
 506         }
 507     }
 508     /* always try "." if numeric radix didn't match because
 509      * we may have data from different locales mixed */
 510 #endif
 511     if (*sp < send && **sp == '.') {
 512         ++*sp;
 513         return TRUE;
 514     }
 515     return FALSE;
 516 }
 517
 518 /*
 519 =for apidoc grok_number
 520
 521 Recognise (or not) a number.  The type of the number is returned
 522 (0 if unrecognised), otherwise it is a bit-ORed combination of
 523 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
 524 IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
 525
 526 If the value of the number can fit an in UV, it is returned in the *valuep
 527 IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
 528 will never be set unless *valuep is valid, but *valuep may have been assigned
 529 to during processing even though IS_NUMBER_IN_UV is not set on return.
 530 If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
 531 valuep is non-NULL, but no actual assignment (or SEGV) will occur.
 532
 533 IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
 534 seen (in which case *valuep gives the true value truncated to an integer), and
 535 IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
 536 absolute value).  IS_NUMBER_IN_UV is not set if e notation was used or the
 537 number is larger than a UV.
 538
 539 =cut
 540  */
 541 int
 542 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 543 {
 544   const char *s = pv;
 545   const char * const send = pv + len;
 546   const UV max_div_10 = UV_MAX / 10;
 547   const char max_mod_10 = UV_MAX % 10;
 548   int numtype = 0;
 549   int sawinf = 0;
 550   int sawnan = 0;
 551
 552   while (s < send && isSPACE(*s))
 553     s++;
 554   if (s == send) {
 555     return 0;
 556   } else if (*s == '-') {
 557     s++;
 558     numtype = IS_NUMBER_NEG;
 559   }
 560   else if (*s == '+')
 561   s++;
 562
 563   if (s == send)
 564     return 0;
 565
 566   /* next must be digit or the radix separator or beginning of infinity */
 567   if (isDIGIT(*s)) {
 568     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 569        overflow.  */
 570     UV value = *s - '0';
 571     /* This construction seems to be more optimiser friendly.
 572        (without it gcc does the isDIGIT test and the *s - '0' separately)
 573        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 574        In theory the optimiser could deduce how far to unroll the loop
 575        before checking for overflow.  */
 576     if (++s < send) {
 577       int digit = *s - '0';
 578       if (digit >= 0 && digit <= 9) {
 579         value = value * 10 + digit;
 580         if (++s < send) {
 581           digit = *s - '0';
 582           if (digit >= 0 && digit <= 9) {
 583             value = value * 10 + digit;
 584             if (++s < send) {
 585               digit = *s - '0';
 586               if (digit >= 0 && digit <= 9) {
 587                 value = value * 10 + digit;
 588                 if (++s < send) {
 589                   digit = *s - '0';
 590                   if (digit >= 0 && digit <= 9) {
 591                     value = value * 10 + digit;
 592                     if (++s < send) {
 593                       digit = *s - '0';
 594                       if (digit >= 0 && digit <= 9) {
 595                         value = value * 10 + digit;
 596                         if (++s < send) {
 597                           digit = *s - '0';
 598                           if (digit >= 0 && digit <= 9) {
 599                             value = value * 10 + digit;
 600                             if (++s < send) {
 601                               digit = *s - '0';
 602                               if (digit >= 0 && digit <= 9) {
 603                                 value = value * 10 + digit;
 604                                 if (++s < send) {
 605                                   digit = *s - '0';
 606                                   if (digit >= 0 && digit <= 9) {
 607                                     value = value * 10 + digit;
 608                                     if (++s < send) {
 609                                       /* Now got 9 digits, so need to check
 610                                          each time for overflow.  */
 611                                       digit = *s - '0';
 612                                       while (digit >= 0 && digit <= 9
 613                                              && (value < max_div_10
 614                                                  || (value == max_div_10
 615                                                      && digit <= max_mod_10))) {
 616                                         value = value * 10 + digit;
 617                                         if (++s < send)
 618                                           digit = *s - '0';
 619                                         else
 620                                           break;
 621                                       }
 622                                       if (digit >= 0 && digit <= 9
 623                                           && (s < send)) {
 624                                         /* value overflowed.
 625                                            skip the remaining digits, don't
 626                                            worry about setting *valuep.  */
 627                                         do {
 628                                           s++;
 629                                         } while (s < send && isDIGIT(*s));
 630                                         numtype |=
 631                                           IS_NUMBER_GREATER_THAN_UV_MAX;
 632                                         goto skip_value;
 633                                       }
 634                                     }
 635                                   }
 636                                 }
 637                               }
 638                             }
 639                           }
 640                         }
 641                       }
 642                     }
 643                   }
 644                 }
 645               }
 646             }
 647           }
 648         }
 649       }
 650     }
 651     numtype |= IS_NUMBER_IN_UV;
 652     if (valuep)
 653       *valuep = value;
 654
 655   skip_value:
 656     if (GROK_NUMERIC_RADIX(&s, send)) {
 657       numtype |= IS_NUMBER_NOT_INT;
 658       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
 659         s++;
 660     }
 661   }
 662   else if (GROK_NUMERIC_RADIX(&s, send)) {
 663     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
 664     /* no digits before the radix means we need digits after it */
 665     if (s < send && isDIGIT(*s)) {
 666       do {
 667         s++;
 668       } while (s < send && isDIGIT(*s));
 669       if (valuep) {
 670         /* integer approximation is valid - it's 0.  */
 671         *valuep = 0;
 672       }
 673     }
 674     else
 675       return 0;
 676   } else if (*s == 'I' || *s == 'i') {
 677     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 678     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
 679     s++; if (s < send && (*s == 'I' || *s == 'i')) {
 680       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 681       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
 682       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
 683       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
 684       s++;
 685     }
 686     sawinf = 1;
 687   } else if (*s == 'N' || *s == 'n') {
 688     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
 689     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
 690     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
 691     s++;
 692     sawnan = 1;
 693   } else
 694     return 0;
 695
 696   if (sawinf) {
 697     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 698     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 699   } else if (sawnan) {
 700     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 701     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
 702   } else if (s < send) {
 703     /* we can have an optional exponent part */
 704     if (*s == 'e' || *s == 'E') {
 705       /* The only flag we keep is sign.  Blow away any "it's UV"  */
 706       numtype &= IS_NUMBER_NEG;
 707       numtype |= IS_NUMBER_NOT_INT;
 708       s++;
 709       if (s < send && (*s == '-' || *s == '+'))
 710         s++;
 711       if (s < send && isDIGIT(*s)) {
 712         do {
 713           s++;
 714         } while (s < send && isDIGIT(*s));
 715       }
 716       else
 717       return 0;
 718     }
 719   }
 720   while (s < send && isSPACE(*s))
 721     s++;
 722   if (s >= send)
 723     return numtype;
 724   if (len == 10 && memEQ(pv, "0 but true", 10)) {
 725     if (valuep)
 726       *valuep = 0;
 727     return IS_NUMBER_IN_UV;
 728   }
 729   return 0;
 730 }
 731
 732 STATIC NV
 733 S_mulexp10(NV value, I32 exponent)
 734 {
 735     NV result = 1.0;
 736     NV power = 10.0;
 737     bool negative = 0;
 738     I32 bit;
 739
 740     if (exponent == 0)
 741         return value;
 742     if (value == 0)
 743         return (NV)0;
 744
 745     /* On OpenVMS VAX we by default use the D_FLOAT double format,
 746      * and that format does not have *easy* capabilities [1] for
 747      * overflowing doubles 'silently' as IEEE fp does.  We also need
 748      * to support G_FLOAT on both VAX and Alpha, and though the exponent
 749      * range is much larger than D_FLOAT it still doesn't do silent
 750      * overflow.  Therefore we need to detect early whether we would
 751      * overflow (this is the behaviour of the native string-to-float
 752      * conversion routines, and therefore of native applications, too).
 753      *
 754      * [1] Trying to establish a condition handler to trap floating point
 755      *     exceptions is not a good idea. */
 756
 757     /* In UNICOS and in certain Cray models (such as T90) there is no
 758      * IEEE fp, and no way at all from C to catch fp overflows gracefully.
 759      * There is something you can do if you are willing to use some
 760      * inline assembler: the instruction is called DFI-- but that will
 761      * disable *all* floating point interrupts, a little bit too large
 762      * a hammer.  Therefore we need to catch potential overflows before
 763      * it's too late. */
 764
 765 #if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
 766     STMT_START {
 767         const NV exp_v = log10(value);
 768         if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
 769             return NV_MAX;
 770         if (exponent < 0) {
 771             if (-(exponent + exp_v) >= NV_MAX_10_EXP)
 772                 return 0.0;
 773             while (-exponent >= NV_MAX_10_EXP) {
 774                 /* combination does not overflow, but 10^(-exponent) does */
 775                 value /= 10;
 776                 ++exponent;
 777             }
 778         }
 779     } STMT_END;
 780 #endif
 781
 782     if (exponent < 0) {
 783         negative = 1;
 784         exponent = -exponent;
 785     }
 786     for (bit = 1; exponent; bit <<= 1) {
 787         if (exponent & bit) {
 788             exponent ^= bit;
 789             result *= power;
 790             /* Floating point exceptions are supposed to be turned off,
 791              *  but if we're obviously done, don't risk another iteration.
 792              */
 793              if (exponent == 0) break;
 794         }
 795         power *= power;
 796     }
 797     return negative ? value / result : value * result;
 798 }
 799
 800 NV
 801 Perl_my_atof(pTHX_ const char* s)
 802 {
 803     NV x = 0.0;
 804 #ifdef USE_LOCALE_NUMERIC
 805     if (PL_numeric_local && IN_LOCALE) {
 806         NV y;
 807
 808         /* Scan the number twice; once using locale and once without;
 809          * choose the larger result (in absolute value). */
 810         Perl_atof2(s, x);
 811         SET_NUMERIC_STANDARD();
 812         Perl_atof2(s, y);
 813         SET_NUMERIC_LOCAL();
 814         if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
 815             return y;
 816     }
 817     else
 818         Perl_atof2(s, x);
 819 #else
 820     Perl_atof2(s, x);
 821 #endif
 822     return x;
 823 }
 824
 825 char*
 826 Perl_my_atof2(pTHX_ const char* orig, NV* value)
 827 {
 828     NV result[3] = {0.0, 0.0, 0.0};
 829     const char* s = orig;
 830 #ifdef USE_PERL_ATOF
 831     UV accumulator[2] = {0,0};  /* before/after dp */
 832     bool negative = 0;
 833     const char* send = s + strlen(orig) - 1;
 834     bool seen_digit = 0;
 835     I32 exp_adjust[2] = {0,0};
 836     I32 exp_acc[2] = {-1, -1};
 837     /* the current exponent adjust for the accumulators */
 838     I32 exponent = 0;
 839     I32 seen_dp  = 0;
 840     I32 digit = 0;
 841     I32 old_digit = 0;
 842     I32 sig_digits = 0; /* noof significant digits seen so far */
 843
 844 /* There is no point in processing more significant digits
 845  * than the NV can hold. Note that NV_DIG is a lower-bound value,
 846  * while we need an upper-bound value. We add 2 to account for this;
 847  * since it will have been conservative on both the first and last digit.
 848  * For example a 32-bit mantissa with an exponent of 4 would have
 849  * exact values in the set
 850  *               4
 851  *               8
 852  *              ..
 853  *     17179869172
 854  *     17179869176
 855  *     17179869180
 856  *
 857  * where for the purposes of calculating NV_DIG we would have to discount
 858  * both the first and last digit, since neither can hold all values from
 859  * 0..9; but for calculating the value we must examine those two digits.
 860  */
 861 #define MAX_SIG_DIGITS (NV_DIG+2)
 862
 863 /* the max number we can accumulate in a UV, and still safely do 10*N+9 */
 864 #define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
 865
 866     /* leading whitespace */
 867     while (isSPACE(*s))
 868         ++s;
 869
 870     /* sign */
 871     switch (*s) {
 872         case '-':
 873             negative = 1;
 874             /* fall through */
 875         case '+':
 876             ++s;
 877     }
 878
 879     /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
 880
 881 #ifdef HAS_STRTOD
 882     if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
 883         const char *p = negative ? s - 1 : s;
 884         char *endp;
 885         NV rslt;
 886         rslt = strtod(p, &endp);
 887         if (endp != p) {
 888             *value = rslt;
 889             return (char *)endp;
 890         }
 891     }
 892 #endif
 893
 894     /* we accumulate digits into an integer; when this becomes too
 895      * large, we add the total to NV and start again */
 896
 897     while (1) {
 898         if (isDIGIT(*s)) {
 899             seen_digit = 1;
 900             old_digit = digit;
 901             digit = *s++ - '0';
 902             if (seen_dp)
 903                 exp_adjust[1]++;
 904
 905             /* don't start counting until we see the first significant
 906              * digit, eg the 5 in 0.00005... */
 907             if (!sig_digits && digit == 0)
 908                 continue;
 909
 910             if (++sig_digits > MAX_SIG_DIGITS) {
 911                 /* limits of precision reached */
 912                 if (digit > 5) {
 913                     ++accumulator[seen_dp];
 914                 } else if (digit == 5) {
 915                     if (old_digit % 2) { /* round to even - Allen */
 916                         ++accumulator[seen_dp];
 917                     }
 918                 }
 919                 if (seen_dp) {
 920                     exp_adjust[1]--;
 921                 } else {
 922                     exp_adjust[0]++;
 923                 }
 924                 /* skip remaining digits */
 925                 while (isDIGIT(*s)) {
 926                     ++s;
 927                     if (! seen_dp) {
 928                         exp_adjust[0]++;
 929                     }
 930                 }
 931                 /* warn of loss of precision? */
 932             }
 933             else {
 934                 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
 935                     /* add accumulator to result and start again */
 936                     result[seen_dp] = S_mulexp10(result[seen_dp],
 937                                                  exp_acc[seen_dp])
 938                         + (NV)accumulator[seen_dp];
 939                     accumulator[seen_dp] = 0;
 940                     exp_acc[seen_dp] = 0;
 941                 }
 942                 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
 943                 ++exp_acc[seen_dp];
 944             }
 945         }
 946         else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
 947             seen_dp = 1;
 948             if (sig_digits > MAX_SIG_DIGITS) {
 949                 ++s;
 950                 while (isDIGIT(*s)) {
 951                     ++s;
 952                 }
 953                 break;
 954             }
 955         }
 956         else {
 957             break;
 958         }
 959     }
 960
 961     result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
 962     if (seen_dp) {
 963         result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
 964     }
 965
 966     if (seen_digit && (*s == 'e' || *s == 'E')) {
 967         bool expnegative = 0;
 968
 969         ++s;
 970         switch (*s) {
 971             case '-':
 972                 expnegative = 1;
 973                 /* fall through */
 974             case '+':
 975                 ++s;
 976         }
 977         while (isDIGIT(*s))
 978             exponent = exponent * 10 + (*s++ - '0');
 979         if (expnegative)
 980             exponent = -exponent;
 981     }
 982
 983
 984
 985     /* now apply the exponent */
 986
 987     if (seen_dp) {
 988         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
 989                 + S_mulexp10(result[1],exponent-exp_adjust[1]);
 990     } else {
 991         result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
 992     }
 993
 994     /* now apply the sign */
 995     if (negative)
 996         result[2] = -result[2];
 997 #endif /* USE_PERL_ATOF */
 998     *value = result[2];
 999     return (char *)s;
1000 }
1001
1002 #if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1003 long double
1004 Perl_my_modfl(long double x, long double *ip)
1005 {
1006         *ip = aintl(x);
1007         return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1008 }
1009 #endif
1010
1011 #if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1012 long double
1013 Perl_my_frexpl(long double x, int *e) {
1014         *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1015         return (scalbnl(x, -*e));
1016 }
1017 #endif
1018
1019 /*
1020  * Local variables:
1021  * c-indentation-style: bsd
1022  * c-basic-offset: 4
1023  * indent-tabs-mode: t
1024  * End:
1025  *
1026  * ex: set ts=8 sts=4 sw=4 noet:
1027  */