numeric.c

   1 /*    numeric.c
   2  *
   3  *    Copyright (c) 2001, Larry Wall
   4  *
   5  *    You may distribute under the terms of either the GNU General Public
   6  *    License or the Artistic License, as specified in the README file.
   7  *
   8  */
   9
  10 /*
  11  * "That only makes eleven (plus one mislaid) and not fourteen, unless
  12  * wizards count differently to other people."
  13  */
  14
  15 #include "EXTERN.h"
  16 #define PERL_IN_NUMERIC_C
  17 #include "perl.h"
  18
  19 U32
  20 Perl_cast_ulong(pTHX_ NV f)
  21 {
  22   if (f < 0.0)
  23     return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
  24   if (f < U32_MAX_P1) {
  25 #if CASTFLAGS & 2
  26     if (f < U32_MAX_P1_HALF)
  27       return (U32) f;
  28     f -= U32_MAX_P1_HALF;
  29     return ((U32) f) | (1 + U32_MAX >> 1);
  30 #else
  31     return (U32) f;
  32 #endif
  33   }
  34   return f > 0 ? U32_MAX : 0 /* NaN */;
  35 }
  36
  37 I32
  38 Perl_cast_i32(pTHX_ NV f)
  39 {
  40   if (f < I32_MAX_P1)
  41     return f < I32_MIN ? I32_MIN : (I32) f;
  42   if (f < U32_MAX_P1) {
  43 #if CASTFLAGS & 2
  44     if (f < U32_MAX_P1_HALF)
  45       return (I32)(U32) f;
  46     f -= U32_MAX_P1_HALF;
  47     return (I32)(((U32) f) | (1 + U32_MAX >> 1));
  48 #else
  49     return (I32)(U32) f;
  50 #endif
  51   }
  52   return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
  53 }
  54
  55 IV
  56 Perl_cast_iv(pTHX_ NV f)
  57 {
  58   if (f < IV_MAX_P1)
  59     return f < IV_MIN ? IV_MIN : (IV) f;
  60   if (f < UV_MAX_P1) {
  61 #if CASTFLAGS & 2
  62     /* For future flexibility allowing for sizeof(UV) >= sizeof(IV)  */
  63     if (f < UV_MAX_P1_HALF)
  64       return (IV)(UV) f;
  65     f -= UV_MAX_P1_HALF;
  66     return (IV)(((UV) f) | (1 + UV_MAX >> 1));
  67 #else
  68     return (IV)(UV) f;
  69 #endif
  70   }
  71   return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
  72 }
  73
  74 UV
  75 Perl_cast_uv(pTHX_ NV f)
  76 {
  77   if (f < 0.0)
  78     return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
  79   if (f < UV_MAX_P1) {
  80 #if CASTFLAGS & 2
  81     if (f < UV_MAX_P1_HALF)
  82       return (UV) f;
  83     f -= UV_MAX_P1_HALF;
  84     return ((UV) f) | (1 + UV_MAX >> 1);
  85 #else
  86     return (UV) f;
  87 #endif
  88   }
  89   return f > 0 ? UV_MAX : 0 /* NaN */;
  90 }
  91
  92 #if defined(HUGE_VAL) || (defined(USE_LONG_DOUBLE) && defined(HUGE_VALL))
  93 /*
  94  * This hack is to force load of "huge" support from libm.a
  95  * So it is in perl for (say) POSIX to use.
  96  * Needed for SunOS with Sun's 'acc' for example.
  97  */
  98 NV
  99 Perl_huge(void)
 100 {
 101 #   if defined(USE_LONG_DOUBLE) && defined(HUGE_VALL)
 102     return HUGE_VALL;
 103 #   endif
 104     return HUGE_VAL;
 105 }
 106 #endif
 107
 108 NV
 109 Perl_scan_bin(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 110 {
 111     register char *s = start;
 112     register NV rnv = 0.0;
 113     register UV ruv = 0;
 114     register bool seenb = FALSE;
 115     register bool overflowed = FALSE;
 116
 117     for (; len-- && *s; s++) {
 118         if (!(*s == '0' || *s == '1')) {
 119             if (*s == '_' && len && *retlen
 120                 && (s[1] == '0' || s[1] == '1'))
 121             {
 122                 --len;
 123                 ++s;
 124             }
 125             else if (seenb == FALSE && *s == 'b' && ruv == 0) {
 126                 /* Disallow 0bbb0b0bbb... */
 127                 seenb = TRUE;
 128                 continue;
 129             }
 130             else {
 131                 if (ckWARN(WARN_DIGIT))
 132                     Perl_warner(aTHX_ WARN_DIGIT,
 133                                 "Illegal binary digit '%c' ignored", *s);
 134                 break;
 135             }
 136         }
 137         if (!overflowed) {
 138             register UV xuv = ruv << 1;
 139
 140             if ((xuv >> 1) != ruv) {
 141                 overflowed = TRUE;
 142                 rnv = (NV) ruv;
 143                 if (ckWARN_d(WARN_OVERFLOW))
 144                     Perl_warner(aTHX_ WARN_OVERFLOW,
 145                                 "Integer overflow in binary number");
 146             }
 147             else
 148                 ruv = xuv | (*s - '0');
 149         }
 150         if (overflowed) {
 151             rnv *= 2;
 152             /* If an NV has not enough bits in its mantissa to
 153              * represent an UV this summing of small low-order numbers
 154              * is a waste of time (because the NV cannot preserve
 155              * the low-order bits anyway): we could just remember when
 156              * did we overflow and in the end just multiply rnv by the
 157              * right amount. */
 158             rnv += (*s - '0');
 159         }
 160     }
 161     if (!overflowed)
 162         rnv = (NV) ruv;
 163     if (   ( overflowed && rnv > 4294967295.0)
 164 #if UVSIZE > 4
 165         || (!overflowed && ruv > 0xffffffff  )
 166 #endif
 167         ) {
 168         if (ckWARN(WARN_PORTABLE))
 169             Perl_warner(aTHX_ WARN_PORTABLE,
 170                         "Binary number > 0b11111111111111111111111111111111 non-portable");
 171     }
 172     *retlen = s - start;
 173     return rnv;
 174 }
 175
 176 NV
 177 Perl_scan_oct(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 178 {
 179     register char *s = start;
 180     register NV rnv = 0.0;
 181     register UV ruv = 0;
 182     register bool overflowed = FALSE;
 183
 184     for (; len-- && *s; s++) {
 185         if (!(*s >= '0' && *s <= '7')) {
 186             if (*s == '_' && len && *retlen
 187                 && (s[1] >= '0' && s[1] <= '7'))
 188             {
 189                 --len;
 190                 ++s;
 191             }
 192             else {
 193                 /* Allow \octal to work the DWIM way (that is, stop scanning
 194                  * as soon as non-octal characters are seen, complain only iff
 195                  * someone seems to want to use the digits eight and nine). */
 196                 if (*s == '8' || *s == '9') {
 197                     if (ckWARN(WARN_DIGIT))
 198                         Perl_warner(aTHX_ WARN_DIGIT,
 199                                     "Illegal octal digit '%c' ignored", *s);
 200                 }
 201                 break;
 202             }
 203         }
 204         if (!overflowed) {
 205             register UV xuv = ruv << 3;
 206
 207             if ((xuv >> 3) != ruv) {
 208                 overflowed = TRUE;
 209                 rnv = (NV) ruv;
 210                 if (ckWARN_d(WARN_OVERFLOW))
 211                     Perl_warner(aTHX_ WARN_OVERFLOW,
 212                                 "Integer overflow in octal number");
 213             }
 214             else
 215                 ruv = xuv | (*s - '0');
 216         }
 217         if (overflowed) {
 218             rnv *= 8.0;
 219             /* If an NV has not enough bits in its mantissa to
 220              * represent an UV this summing of small low-order numbers
 221              * is a waste of time (because the NV cannot preserve
 222              * the low-order bits anyway): we could just remember when
 223              * did we overflow and in the end just multiply rnv by the
 224              * right amount of 8-tuples. */
 225             rnv += (NV)(*s - '0');
 226         }
 227     }
 228     if (!overflowed)
 229         rnv = (NV) ruv;
 230     if (   ( overflowed && rnv > 4294967295.0)
 231 #if UVSIZE > 4
 232         || (!overflowed && ruv > 0xffffffff  )
 233 #endif
 234         ) {
 235         if (ckWARN(WARN_PORTABLE))
 236             Perl_warner(aTHX_ WARN_PORTABLE,
 237                         "Octal number > 037777777777 non-portable");
 238     }
 239     *retlen = s - start;
 240     return rnv;
 241 }
 242
 243 NV
 244 Perl_scan_hex(pTHX_ char *start, STRLEN len, STRLEN *retlen)
 245 {
 246     register char *s = start;
 247     register NV rnv = 0.0;
 248     register UV ruv = 0;
 249     register bool overflowed = FALSE;
 250     char *hexdigit;
 251
 252     if (len > 2) {
 253         if (s[0] == 'x') {
 254             s++;
 255             len--;
 256         }
 257         else if (len > 3 && s[0] == '0' && s[1] == 'x') {
 258             s+=2;
 259             len-=2;
 260         }
 261     }
 262
 263     for (; len-- && *s; s++) {
 264         hexdigit = strchr((char *) PL_hexdigit, *s);
 265         if (!hexdigit) {
 266             if (*s == '_' && len && *retlen && s[1]
 267                 && (hexdigit = strchr((char *) PL_hexdigit, s[1])))
 268             {
 269                 --len;
 270                 ++s;
 271             }
 272             else {
 273                 if (ckWARN(WARN_DIGIT))
 274                     Perl_warner(aTHX_ WARN_DIGIT,
 275                                 "Illegal hexadecimal digit '%c' ignored", *s);
 276                 break;
 277             }
 278         }
 279         if (!overflowed) {
 280             register UV xuv = ruv << 4;
 281
 282             if ((xuv >> 4) != ruv) {
 283                 overflowed = TRUE;
 284                 rnv = (NV) ruv;
 285                 if (ckWARN_d(WARN_OVERFLOW))
 286                     Perl_warner(aTHX_ WARN_OVERFLOW,
 287                                 "Integer overflow in hexadecimal number");
 288             }
 289             else
 290                 ruv = xuv | ((hexdigit - PL_hexdigit) & 15);
 291         }
 292         if (overflowed) {
 293             rnv *= 16.0;
 294             /* If an NV has not enough bits in its mantissa to
 295              * represent an UV this summing of small low-order numbers
 296              * is a waste of time (because the NV cannot preserve
 297              * the low-order bits anyway): we could just remember when
 298              * did we overflow and in the end just multiply rnv by the
 299              * right amount of 16-tuples. */
 300             rnv += (NV)((hexdigit - PL_hexdigit) & 15);
 301         }
 302     }
 303     if (!overflowed)
 304         rnv = (NV) ruv;
 305     if (   ( overflowed && rnv > 4294967295.0)
 306 #if UVSIZE > 4
 307         || (!overflowed && ruv > 0xffffffff  )
 308 #endif
 309         ) {
 310         if (ckWARN(WARN_PORTABLE))
 311             Perl_warner(aTHX_ WARN_PORTABLE,
 312                         "Hexadecimal number > 0xffffffff non-portable");
 313     }
 314     *retlen = s - start;
 315     return rnv;
 316 }
 317
 318 /*
 319 =for apidoc grok_numeric_radix
 320
 321 Scan and skip for a numeric decimal separator (radix).
 322
 323 =cut
 324  */
 325 bool
 326 Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
 327 {
 328 #ifdef USE_LOCALE_NUMERIC
 329     if (PL_numeric_radix_sv && IN_LOCALE) {
 330         STRLEN len;
 331         char* radix = SvPV(PL_numeric_radix_sv, len);
 332         if (*sp + len <= send && memEQ(*sp, radix, len)) {
 333             *sp += len;
 334             return TRUE;
 335         }
 336     }
 337     /* always try "." if numeric radix didn't match because
 338      * we may have data from different locales mixed */
 339 #endif
 340     if (*sp < send && **sp == '.') {
 341         ++*sp;
 342         return TRUE;
 343     }
 344     return FALSE;
 345 }
 346
 347 /*
 348 =for apidoc grok_number
 349
 350 Recognise (or not) a number.  The type of the number is returned
 351 (0 if unrecognised), otherwise it is a bit-ORed combination of
 352 IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
 353 IS_NUMBER_NEG, IS_NUMBER_INFINITY (defined in perl.h).  If the value
 354 of the number can fit an in UV, it is returned in the *valuep.
 355
 356 =cut
 357  */
 358 int
 359 Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
 360 {
 361     const char *s = pv;
 362     const char *send = pv + len;
 363     const UV max_div_10 = UV_MAX / 10;
 364     const char max_mod_10 = UV_MAX % 10 + '0';
 365     int numtype = 0;
 366     int sawinf = 0;
 367
 368     while (isSPACE(*s))
 369         s++;
 370     if (*s == '-') {
 371         s++;
 372         numtype = IS_NUMBER_NEG;
 373     }
 374     else if (*s == '+')
 375         s++;
 376
 377     /* next must be digit or the radix separator or beginning of infinity */
 378     if (isDIGIT(*s)) {
 379         /* UVs are at least 32 bits, so the first 9 decimal digits cannot
 380            overflow.  */
 381         UV value = *s - '0';
 382         /* This construction seems to be more optimiser friendly.
 383            (without it gcc does the isDIGIT test and the *s - '0' separately)
 384            With it gcc on arm is managing 6 instructions (6 cycles) per digit.
 385            In theory the optimiser could deduce how far to unroll the loop
 386            before checking for overflow.  */
 387         int digit = *++s - '0';
 388         if (digit >= 0 && digit <= 9) {
 389             value = value * 10 + digit;
 390             digit = *++s - '0';
 391             if (digit >= 0 && digit <= 9) {
 392                 value = value * 10 + digit;
 393                 digit = *++s - '0';
 394                 if (digit >= 0 && digit <= 9) {
 395                     value = value * 10 + digit;
 396                     digit = *++s - '0';
 397                     if (digit >= 0 && digit <= 9) {
 398                         value = value * 10 + digit;
 399                         digit = *++s - '0';
 400                         if (digit >= 0 && digit <= 9) {
 401                             value = value * 10 + digit;
 402                             digit = *++s - '0';
 403                             if (digit >= 0 && digit <= 9) {
 404                                 value = value * 10 + digit;
 405                                 digit = *++s - '0';
 406                                 if (digit >= 0 && digit <= 9) {
 407                                     value = value * 10 + digit;
 408                                     digit = *++s - '0';
 409                                     if (digit >= 0 && digit <= 9) {
 410                                         value = value * 10 + digit;
 411                                         /* Now got 9 digits, so need to check
 412                                            each time for overflow.  */
 413                                         digit = *++s - '0';
 414                                         while (digit >= 0 && digit <= 9
 415                                                && (value < max_div_10
 416                                                    || (value == max_div_10
 417                                                        && *s <= max_mod_10))) {
 418                                             value = value * 10 + digit;
 419                                             digit = *++s - '0';
 420                                         }
 421                                         if (digit >= 0 && digit <= 9) {
 422                                             /* value overflowed.
 423                                                skip the remaining digits, don't
 424                                                worry about setting *valuep.  */
 425                                             do {
 426                                                 s++;
 427                                             } while (isDIGIT(*s));
 428                                             numtype |=
 429                                                 IS_NUMBER_GREATER_THAN_UV_MAX;
 430                                             goto skip_value;
 431                                         }
 432                                     }
 433                                 }
 434                             }
 435                         }
 436                     }
 437                 }
 438             }
 439         }
 440         numtype |= IS_NUMBER_IN_UV;
 441         if (valuep)
 442             *valuep = value;
 443
 444       skip_value:
 445         if (GROK_NUMERIC_RADIX(&s, send)) {
 446             numtype |= IS_NUMBER_NOT_INT;
 447             while (isDIGIT(*s))  /* optional digits after the radix */
 448                 s++;
 449         }
 450     }
 451     else if (GROK_NUMERIC_RADIX(&s, send)) {
 452         numtype |= IS_NUMBER_NOT_INT;
 453         /* no digits before the radix means we need digits after it */
 454         if (isDIGIT(*s)) {
 455             do {
 456                 s++;
 457             } while (isDIGIT(*s));
 458             numtype |= IS_NUMBER_IN_UV;
 459             if (valuep) {
 460                 /* integer approximation is valid - it's 0.  */
 461                 *valuep = 0;
 462             }
 463         }
 464         else
 465             return 0;
 466     }
 467     else if (*s == 'I' || *s == 'i') {
 468         s++; if (*s != 'N' && *s != 'n') return 0;
 469         s++; if (*s != 'F' && *s != 'f') return 0;
 470         s++; if (*s == 'I' || *s == 'i') {
 471             s++; if (*s != 'N' && *s != 'n') return 0;
 472             s++; if (*s != 'I' && *s != 'i') return 0;
 473             s++; if (*s != 'T' && *s != 't') return 0;
 474             s++; if (*s != 'Y' && *s != 'y') return 0;
 475             s++;
 476         }
 477         sawinf = 1;
 478     }
 479     else /* Add test for NaN here.  */
 480         return 0;
 481
 482     if (sawinf) {
 483         numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
 484         numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
 485     } else {
 486         /* we can have an optional exponent part */
 487         if (*s == 'e' || *s == 'E') {
 488             /* The only flag we keep is sign.  Blow away any "it's UV"  */
 489             numtype &= IS_NUMBER_NEG;
 490             numtype |= IS_NUMBER_NOT_INT;
 491             s++;
 492             if (*s == '-' || *s == '+')
 493                 s++;
 494             if (isDIGIT(*s)) {
 495                 do {
 496                     s++;
 497                 } while (isDIGIT(*s));
 498             }
 499             else
 500                 return 0;
 501         }
 502     }
 503     while (isSPACE(*s))
 504         s++;
 505     if (s >= send)
 506         return numtype;
 507     if (len == 10 && memEQ(pv, "0 but true", 10)) {
 508         if (valuep)
 509             *valuep = 0;
 510         return IS_NUMBER_IN_UV;
 511     }
 512     return 0;
 513 }
 514
 515 NV
 516 S_mulexp10(NV value, I32 exponent)
 517 {
 518     NV result = 1.0;
 519     NV power = 10.0;
 520     bool negative = 0;
 521     I32 bit;
 522
 523     if (exponent == 0)
 524         return value;
 525     else if (exponent < 0) {
 526         negative = 1;
 527         exponent = -exponent;
 528     }
 529     for (bit = 1; exponent; bit <<= 1) {
 530         if (exponent & bit) {
 531             exponent ^= bit;
 532             result *= power;
 533         }
 534         power *= power;
 535     }
 536     return negative ? value / result : value * result;
 537 }
 538
 539 NV
 540 Perl_my_atof(pTHX_ const char* s)
 541 {
 542     NV x = 0.0;
 543 #ifdef USE_LOCALE_NUMERIC
 544     if (PL_numeric_local && IN_LOCALE) {
 545         NV y;
 546
 547         /* Scan the number twice; once using locale and once without;
 548          * choose the larger result (in absolute value). */
 549         Perl_atof2(aTHX_ s, &x);
 550         SET_NUMERIC_STANDARD();
 551         Perl_atof2(aTHX_ s, &y);
 552         SET_NUMERIC_LOCAL();
 553         if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
 554             return y;
 555     }
 556     else
 557         Perl_atof2(aTHX_ s, &x);
 558 #else
 559     Perl_atof2(aTHX_ s, &x);
 560 #endif
 561     return x;
 562 }
 563
 564 char*
 565 Perl_my_atof2(pTHX_ const char* orig, NV* value)
 566 {
 567     NV result = 0.0;
 568     bool negative = 0;
 569     char* s = (char*)orig;
 570     char* send = s + strlen(orig) - 1;
 571     bool seendigit = 0;
 572     I32 expextra = 0;
 573     I32 exponent = 0;
 574     I32 i;
 575 /* this is arbitrary */
 576 #define PARTLIM 6
 577 /* we want the largest integers we can usefully use */
 578 #if defined(HAS_QUAD) && defined(USE_64_BIT_INT)
 579 #   define PARTSIZE ((int)TYPE_DIGITS(U64)-1)
 580     U64 part[PARTLIM];
 581 #else
 582 #   define PARTSIZE ((int)TYPE_DIGITS(U32)-1)
 583     U32 part[PARTLIM];
 584 #endif
 585     I32 ipart = 0;      /* index into part[] */
 586     I32 offcount;       /* number of digits in least significant part */
 587
 588     /* sign */
 589     switch (*s) {
 590         case '-':
 591             negative = 1;
 592             /* fall through */
 593         case '+':
 594             ++s;
 595     }
 596
 597     part[0] = offcount = 0;
 598     if (isDIGIT(*s)) {
 599         seendigit = 1;  /* get this over with */
 600
 601         /* skip leading zeros */
 602         while (*s == '0')
 603             ++s;
 604     }
 605
 606     /* integer digits */
 607     while (isDIGIT(*s)) {
 608         if (++offcount > PARTSIZE) {
 609             if (++ipart < PARTLIM) {
 610                 part[ipart] = 0;
 611                 offcount = 1;   /* ++0 */
 612             }
 613             else {
 614                 /* limits of precision reached */
 615                 --ipart;
 616                 --offcount;
 617                 if (*s >= '5')
 618                     ++part[ipart];
 619                 while (isDIGIT(*s)) {
 620                     ++expextra;
 621                     ++s;
 622                 }
 623                 /* warn of loss of precision? */
 624                 break;
 625             }
 626         }
 627         part[ipart] = part[ipart] * 10 + (*s++ - '0');
 628     }
 629
 630     /* decimal point */
 631     if (GROK_NUMERIC_RADIX((const char **)&s, send)) {
 632         if (isDIGIT(*s))
 633             seendigit = 1;      /* get this over with */
 634
 635         /* decimal digits */
 636         while (isDIGIT(*s)) {
 637             if (++offcount > PARTSIZE) {
 638                 if (++ipart < PARTLIM) {
 639                     part[ipart] = 0;
 640                     offcount = 1;       /* ++0 */
 641                 }
 642                 else {
 643                     /* limits of precision reached */
 644                     --ipart;
 645                     --offcount;
 646                     if (*s >= '5')
 647                         ++part[ipart];
 648                     while (isDIGIT(*s))
 649                         ++s;
 650                     /* warn of loss of precision? */
 651                     break;
 652                 }
 653             }
 654             --expextra;
 655             part[ipart] = part[ipart] * 10 + (*s++ - '0');
 656         }
 657     }
 658
 659     /* combine components of mantissa */
 660     for (i = 0; i <= ipart; ++i)
 661         result += S_mulexp10((NV)part[ipart - i],
 662                 i ? offcount + (i - 1) * PARTSIZE : 0);
 663
 664     if (seendigit && (*s == 'e' || *s == 'E')) {
 665         bool expnegative = 0;
 666
 667         ++s;
 668         switch (*s) {
 669             case '-':
 670                 expnegative = 1;
 671                 /* fall through */
 672             case '+':
 673                 ++s;
 674         }
 675         while (isDIGIT(*s))
 676             exponent = exponent * 10 + (*s++ - '0');
 677         if (expnegative)
 678             exponent = -exponent;
 679     }
 680
 681     /* now apply the exponent */
 682     exponent += expextra;
 683     result = S_mulexp10(result, exponent);
 684
 685     /* now apply the sign */
 686     if (negative)
 687         result = -result;
 688     *value = result;
 689     return s;
 690 }
 691