This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Use the new utf8 to code point functions
[perl5.git] / numeric.c
... / ...
CommitLineData
1/* numeric.c
2 *
3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
12 * "That only makes eleven (plus one mislaid) and not fourteen,
13 * unless wizards count differently to other people." --Beorn
14 *
15 * [p.115 of _The Hobbit_: "Queer Lodgings"]
16 */
17
18/*
19=head1 Numeric functions
20
21This file contains all the stuff needed by perl for manipulating numeric
22values, including such things as replacements for the OS's atof() function
23
24=cut
25
26*/
27
28#include "EXTERN.h"
29#define PERL_IN_NUMERIC_C
30#include "perl.h"
31
32U32
33Perl_cast_ulong(pTHX_ NV f)
34{
35 PERL_UNUSED_CONTEXT;
36 if (f < 0.0)
37 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
38 if (f < U32_MAX_P1) {
39#if CASTFLAGS & 2
40 if (f < U32_MAX_P1_HALF)
41 return (U32) f;
42 f -= U32_MAX_P1_HALF;
43 return ((U32) f) | (1 + U32_MAX >> 1);
44#else
45 return (U32) f;
46#endif
47 }
48 return f > 0 ? U32_MAX : 0 /* NaN */;
49}
50
51I32
52Perl_cast_i32(pTHX_ NV f)
53{
54 PERL_UNUSED_CONTEXT;
55 if (f < I32_MAX_P1)
56 return f < I32_MIN ? I32_MIN : (I32) f;
57 if (f < U32_MAX_P1) {
58#if CASTFLAGS & 2
59 if (f < U32_MAX_P1_HALF)
60 return (I32)(U32) f;
61 f -= U32_MAX_P1_HALF;
62 return (I32)(((U32) f) | (1 + U32_MAX >> 1));
63#else
64 return (I32)(U32) f;
65#endif
66 }
67 return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
68}
69
70IV
71Perl_cast_iv(pTHX_ NV f)
72{
73 PERL_UNUSED_CONTEXT;
74 if (f < IV_MAX_P1)
75 return f < IV_MIN ? IV_MIN : (IV) f;
76 if (f < UV_MAX_P1) {
77#if CASTFLAGS & 2
78 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
79 if (f < UV_MAX_P1_HALF)
80 return (IV)(UV) f;
81 f -= UV_MAX_P1_HALF;
82 return (IV)(((UV) f) | (1 + UV_MAX >> 1));
83#else
84 return (IV)(UV) f;
85#endif
86 }
87 return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
88}
89
90UV
91Perl_cast_uv(pTHX_ NV f)
92{
93 PERL_UNUSED_CONTEXT;
94 if (f < 0.0)
95 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
96 if (f < UV_MAX_P1) {
97#if CASTFLAGS & 2
98 if (f < UV_MAX_P1_HALF)
99 return (UV) f;
100 f -= UV_MAX_P1_HALF;
101 return ((UV) f) | (1 + UV_MAX >> 1);
102#else
103 return (UV) f;
104#endif
105 }
106 return f > 0 ? UV_MAX : 0 /* NaN */;
107}
108
109/*
110=for apidoc grok_bin
111
112converts a string representing a binary number to numeric form.
113
114On entry I<start> and I<*len> give the string to scan, I<*flags> gives
115conversion flags, and I<result> should be NULL or a pointer to an NV.
116The scan stops at the end of the string, or the first invalid character.
117Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
118invalid character will also trigger a warning.
119On return I<*len> is set to the length of the scanned string,
120and I<*flags> gives output flags.
121
122If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
123and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
124returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
125and writes the value to I<*result> (or the value is discarded if I<result>
126is NULL).
127
128The binary number may optionally be prefixed with "0b" or "b" unless
129C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
130C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
131number may use '_' characters to separate digits.
132
133=cut
134
135Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
136which suppresses any message for non-portable numbers that are still valid
137on this platform.
138 */
139
140UV
141Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
142{
143 const char *s = start;
144 STRLEN len = *len_p;
145 UV value = 0;
146 NV value_nv = 0;
147
148 const UV max_div_2 = UV_MAX / 2;
149 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
150 bool overflowed = FALSE;
151 char bit;
152
153 PERL_ARGS_ASSERT_GROK_BIN;
154
155 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
156 /* strip off leading b or 0b.
157 for compatibility silently suffer "b" and "0b" as valid binary
158 numbers. */
159 if (len >= 1) {
160 if (s[0] == 'b' || s[0] == 'B') {
161 s++;
162 len--;
163 }
164 else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
165 s+=2;
166 len-=2;
167 }
168 }
169 }
170
171 for (; len-- && (bit = *s); s++) {
172 if (bit == '0' || bit == '1') {
173 /* Write it in this wonky order with a goto to attempt to get the
174 compiler to make the common case integer-only loop pretty tight.
175 With gcc seems to be much straighter code than old scan_bin. */
176 redo:
177 if (!overflowed) {
178 if (value <= max_div_2) {
179 value = (value << 1) | (bit - '0');
180 continue;
181 }
182 /* Bah. We're just overflowed. */
183 /* diag_listed_as: Integer overflow in %s number */
184 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
185 "Integer overflow in binary number");
186 overflowed = TRUE;
187 value_nv = (NV) value;
188 }
189 value_nv *= 2.0;
190 /* If an NV has not enough bits in its mantissa to
191 * represent a UV this summing of small low-order numbers
192 * is a waste of time (because the NV cannot preserve
193 * the low-order bits anyway): we could just remember when
194 * did we overflow and in the end just multiply value_nv by the
195 * right amount. */
196 value_nv += (NV)(bit - '0');
197 continue;
198 }
199 if (bit == '_' && len && allow_underscores && (bit = s[1])
200 && (bit == '0' || bit == '1'))
201 {
202 --len;
203 ++s;
204 goto redo;
205 }
206 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
207 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
208 "Illegal binary digit '%c' ignored", *s);
209 break;
210 }
211
212 if ( ( overflowed && value_nv > 4294967295.0)
213#if UVSIZE > 4
214 || (!overflowed && value > 0xffffffff
215 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
216#endif
217 ) {
218 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
219 "Binary number > 0b11111111111111111111111111111111 non-portable");
220 }
221 *len_p = s - start;
222 if (!overflowed) {
223 *flags = 0;
224 return value;
225 }
226 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
227 if (result)
228 *result = value_nv;
229 return UV_MAX;
230}
231
232/*
233=for apidoc grok_hex
234
235converts a string representing a hex number to numeric form.
236
237On entry I<start> and I<*len> give the string to scan, I<*flags> gives
238conversion flags, and I<result> should be NULL or a pointer to an NV.
239The scan stops at the end of the string, or the first invalid character.
240Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
241invalid character will also trigger a warning.
242On return I<*len> is set to the length of the scanned string,
243and I<*flags> gives output flags.
244
245If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
246and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
247returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
248and writes the value to I<*result> (or the value is discarded if I<result>
249is NULL).
250
251The hex number may optionally be prefixed with "0x" or "x" unless
252C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
253C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
254number may use '_' characters to separate digits.
255
256=cut
257
258Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
259which suppresses any message for non-portable numbers that are still valid
260on this platform.
261 */
262
263UV
264Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
265{
266 dVAR;
267 const char *s = start;
268 STRLEN len = *len_p;
269 UV value = 0;
270 NV value_nv = 0;
271 const UV max_div_16 = UV_MAX / 16;
272 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
273 bool overflowed = FALSE;
274
275 PERL_ARGS_ASSERT_GROK_HEX;
276
277 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
278 /* strip off leading x or 0x.
279 for compatibility silently suffer "x" and "0x" as valid hex numbers.
280 */
281 if (len >= 1) {
282 if (s[0] == 'x' || s[0] == 'X') {
283 s++;
284 len--;
285 }
286 else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
287 s+=2;
288 len-=2;
289 }
290 }
291 }
292
293 for (; len-- && *s; s++) {
294 const char *hexdigit = strchr(PL_hexdigit, *s);
295 if (hexdigit) {
296 /* Write it in this wonky order with a goto to attempt to get the
297 compiler to make the common case integer-only loop pretty tight.
298 With gcc seems to be much straighter code than old scan_hex. */
299 redo:
300 if (!overflowed) {
301 if (value <= max_div_16) {
302 value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
303 continue;
304 }
305 /* Bah. We're just overflowed. */
306 /* diag_listed_as: Integer overflow in %s number */
307 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
308 "Integer overflow in hexadecimal number");
309 overflowed = TRUE;
310 value_nv = (NV) value;
311 }
312 value_nv *= 16.0;
313 /* If an NV has not enough bits in its mantissa to
314 * represent a UV this summing of small low-order numbers
315 * is a waste of time (because the NV cannot preserve
316 * the low-order bits anyway): we could just remember when
317 * did we overflow and in the end just multiply value_nv by the
318 * right amount of 16-tuples. */
319 value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
320 continue;
321 }
322 if (*s == '_' && len && allow_underscores && s[1]
323 && (hexdigit = strchr(PL_hexdigit, s[1])))
324 {
325 --len;
326 ++s;
327 goto redo;
328 }
329 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
330 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
331 "Illegal hexadecimal digit '%c' ignored", *s);
332 break;
333 }
334
335 if ( ( overflowed && value_nv > 4294967295.0)
336#if UVSIZE > 4
337 || (!overflowed && value > 0xffffffff
338 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
339#endif
340 ) {
341 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
342 "Hexadecimal number > 0xffffffff non-portable");
343 }
344 *len_p = s - start;
345 if (!overflowed) {
346 *flags = 0;
347 return value;
348 }
349 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
350 if (result)
351 *result = value_nv;
352 return UV_MAX;
353}
354
355/*
356=for apidoc grok_oct
357
358converts a string representing an octal number to numeric form.
359
360On entry I<start> and I<*len> give the string to scan, I<*flags> gives
361conversion flags, and I<result> should be NULL or a pointer to an NV.
362The scan stops at the end of the string, or the first invalid character.
363Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
3648 or 9 will also trigger a warning.
365On return I<*len> is set to the length of the scanned string,
366and I<*flags> gives output flags.
367
368If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
369and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
370returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
371and writes the value to I<*result> (or the value is discarded if I<result>
372is NULL).
373
374If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
375number may use '_' characters to separate digits.
376
377=cut
378
379Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
380which suppresses any message for non-portable numbers that are still valid
381on this platform.
382 */
383
384UV
385Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
386{
387 const char *s = start;
388 STRLEN len = *len_p;
389 UV value = 0;
390 NV value_nv = 0;
391 const UV max_div_8 = UV_MAX / 8;
392 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
393 bool overflowed = FALSE;
394
395 PERL_ARGS_ASSERT_GROK_OCT;
396
397 for (; len-- && *s; s++) {
398 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
399 out front allows slicker code. */
400 int digit = *s - '0';
401 if (digit >= 0 && digit <= 7) {
402 /* Write it in this wonky order with a goto to attempt to get the
403 compiler to make the common case integer-only loop pretty tight.
404 */
405 redo:
406 if (!overflowed) {
407 if (value <= max_div_8) {
408 value = (value << 3) | digit;
409 continue;
410 }
411 /* Bah. We're just overflowed. */
412 /* diag_listed_as: Integer overflow in %s number */
413 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
414 "Integer overflow in octal number");
415 overflowed = TRUE;
416 value_nv = (NV) value;
417 }
418 value_nv *= 8.0;
419 /* If an NV has not enough bits in its mantissa to
420 * represent a UV this summing of small low-order numbers
421 * is a waste of time (because the NV cannot preserve
422 * the low-order bits anyway): we could just remember when
423 * did we overflow and in the end just multiply value_nv by the
424 * right amount of 8-tuples. */
425 value_nv += (NV)digit;
426 continue;
427 }
428 if (digit == ('_' - '0') && len && allow_underscores
429 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
430 {
431 --len;
432 ++s;
433 goto redo;
434 }
435 /* Allow \octal to work the DWIM way (that is, stop scanning
436 * as soon as non-octal characters are seen, complain only if
437 * someone seems to want to use the digits eight and nine). */
438 if (digit == 8 || digit == 9) {
439 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
440 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
441 "Illegal octal digit '%c' ignored", *s);
442 }
443 break;
444 }
445
446 if ( ( overflowed && value_nv > 4294967295.0)
447#if UVSIZE > 4
448 || (!overflowed && value > 0xffffffff
449 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
450#endif
451 ) {
452 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
453 "Octal number > 037777777777 non-portable");
454 }
455 *len_p = s - start;
456 if (!overflowed) {
457 *flags = 0;
458 return value;
459 }
460 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
461 if (result)
462 *result = value_nv;
463 return UV_MAX;
464}
465
466/*
467=for apidoc scan_bin
468
469For backwards compatibility. Use C<grok_bin> instead.
470
471=for apidoc scan_hex
472
473For backwards compatibility. Use C<grok_hex> instead.
474
475=for apidoc scan_oct
476
477For backwards compatibility. Use C<grok_oct> instead.
478
479=cut
480 */
481
482NV
483Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
484{
485 NV rnv;
486 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
487 const UV ruv = grok_bin (start, &len, &flags, &rnv);
488
489 PERL_ARGS_ASSERT_SCAN_BIN;
490
491 *retlen = len;
492 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
493}
494
495NV
496Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
497{
498 NV rnv;
499 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
500 const UV ruv = grok_oct (start, &len, &flags, &rnv);
501
502 PERL_ARGS_ASSERT_SCAN_OCT;
503
504 *retlen = len;
505 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
506}
507
508NV
509Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
510{
511 NV rnv;
512 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
513 const UV ruv = grok_hex (start, &len, &flags, &rnv);
514
515 PERL_ARGS_ASSERT_SCAN_HEX;
516
517 *retlen = len;
518 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
519}
520
521/*
522=for apidoc grok_numeric_radix
523
524Scan and skip for a numeric decimal separator (radix).
525
526=cut
527 */
528bool
529Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
530{
531#ifdef USE_LOCALE_NUMERIC
532 dVAR;
533
534 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
535
536 if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
537 STRLEN len;
538 const char * const radix = SvPV(PL_numeric_radix_sv, len);
539 if (*sp + len <= send && memEQ(*sp, radix, len)) {
540 *sp += len;
541 return TRUE;
542 }
543 }
544 /* always try "." if numeric radix didn't match because
545 * we may have data from different locales mixed */
546#endif
547
548 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
549
550 if (*sp < send && **sp == '.') {
551 ++*sp;
552 return TRUE;
553 }
554 return FALSE;
555}
556
557/*
558=for apidoc grok_number
559
560Recognise (or not) a number. The type of the number is returned
561(0 if unrecognised), otherwise it is a bit-ORed combination of
562IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
563IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
564
565If the value of the number can fit an in UV, it is returned in the *valuep
566IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
567will never be set unless *valuep is valid, but *valuep may have been assigned
568to during processing even though IS_NUMBER_IN_UV is not set on return.
569If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
570valuep is non-NULL, but no actual assignment (or SEGV) will occur.
571
572IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
573seen (in which case *valuep gives the true value truncated to an integer), and
574IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
575absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the
576number is larger than a UV.
577
578=cut
579 */
580int
581Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
582{
583 const char *s = pv;
584 const char * const send = pv + len;
585 const UV max_div_10 = UV_MAX / 10;
586 const char max_mod_10 = UV_MAX % 10;
587 int numtype = 0;
588 int sawinf = 0;
589 int sawnan = 0;
590
591 PERL_ARGS_ASSERT_GROK_NUMBER;
592
593 while (s < send && isSPACE(*s))
594 s++;
595 if (s == send) {
596 return 0;
597 } else if (*s == '-') {
598 s++;
599 numtype = IS_NUMBER_NEG;
600 }
601 else if (*s == '+')
602 s++;
603
604 if (s == send)
605 return 0;
606
607 /* next must be digit or the radix separator or beginning of infinity */
608 if (isDIGIT(*s)) {
609 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
610 overflow. */
611 UV value = *s - '0';
612 /* This construction seems to be more optimiser friendly.
613 (without it gcc does the isDIGIT test and the *s - '0' separately)
614 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
615 In theory the optimiser could deduce how far to unroll the loop
616 before checking for overflow. */
617 if (++s < send) {
618 int digit = *s - '0';
619 if (digit >= 0 && digit <= 9) {
620 value = value * 10 + digit;
621 if (++s < send) {
622 digit = *s - '0';
623 if (digit >= 0 && digit <= 9) {
624 value = value * 10 + digit;
625 if (++s < send) {
626 digit = *s - '0';
627 if (digit >= 0 && digit <= 9) {
628 value = value * 10 + digit;
629 if (++s < send) {
630 digit = *s - '0';
631 if (digit >= 0 && digit <= 9) {
632 value = value * 10 + digit;
633 if (++s < send) {
634 digit = *s - '0';
635 if (digit >= 0 && digit <= 9) {
636 value = value * 10 + digit;
637 if (++s < send) {
638 digit = *s - '0';
639 if (digit >= 0 && digit <= 9) {
640 value = value * 10 + digit;
641 if (++s < send) {
642 digit = *s - '0';
643 if (digit >= 0 && digit <= 9) {
644 value = value * 10 + digit;
645 if (++s < send) {
646 digit = *s - '0';
647 if (digit >= 0 && digit <= 9) {
648 value = value * 10 + digit;
649 if (++s < send) {
650 /* Now got 9 digits, so need to check
651 each time for overflow. */
652 digit = *s - '0';
653 while (digit >= 0 && digit <= 9
654 && (value < max_div_10
655 || (value == max_div_10
656 && digit <= max_mod_10))) {
657 value = value * 10 + digit;
658 if (++s < send)
659 digit = *s - '0';
660 else
661 break;
662 }
663 if (digit >= 0 && digit <= 9
664 && (s < send)) {
665 /* value overflowed.
666 skip the remaining digits, don't
667 worry about setting *valuep. */
668 do {
669 s++;
670 } while (s < send && isDIGIT(*s));
671 numtype |=
672 IS_NUMBER_GREATER_THAN_UV_MAX;
673 goto skip_value;
674 }
675 }
676 }
677 }
678 }
679 }
680 }
681 }
682 }
683 }
684 }
685 }
686 }
687 }
688 }
689 }
690 }
691 }
692 numtype |= IS_NUMBER_IN_UV;
693 if (valuep)
694 *valuep = value;
695
696 skip_value:
697 if (GROK_NUMERIC_RADIX(&s, send)) {
698 numtype |= IS_NUMBER_NOT_INT;
699 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
700 s++;
701 }
702 }
703 else if (GROK_NUMERIC_RADIX(&s, send)) {
704 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
705 /* no digits before the radix means we need digits after it */
706 if (s < send && isDIGIT(*s)) {
707 do {
708 s++;
709 } while (s < send && isDIGIT(*s));
710 if (valuep) {
711 /* integer approximation is valid - it's 0. */
712 *valuep = 0;
713 }
714 }
715 else
716 return 0;
717 } else if (*s == 'I' || *s == 'i') {
718 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
719 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
720 s++; if (s < send && (*s == 'I' || *s == 'i')) {
721 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
722 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
723 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
724 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
725 s++;
726 }
727 sawinf = 1;
728 } else if (*s == 'N' || *s == 'n') {
729 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
730 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
731 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
732 s++;
733 sawnan = 1;
734 } else
735 return 0;
736
737 if (sawinf) {
738 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
739 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
740 } else if (sawnan) {
741 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
742 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
743 } else if (s < send) {
744 /* we can have an optional exponent part */
745 if (*s == 'e' || *s == 'E') {
746 /* The only flag we keep is sign. Blow away any "it's UV" */
747 numtype &= IS_NUMBER_NEG;
748 numtype |= IS_NUMBER_NOT_INT;
749 s++;
750 if (s < send && (*s == '-' || *s == '+'))
751 s++;
752 if (s < send && isDIGIT(*s)) {
753 do {
754 s++;
755 } while (s < send && isDIGIT(*s));
756 }
757 else
758 return 0;
759 }
760 }
761 while (s < send && isSPACE(*s))
762 s++;
763 if (s >= send)
764 return numtype;
765 if (len == 10 && memEQ(pv, "0 but true", 10)) {
766 if (valuep)
767 *valuep = 0;
768 return IS_NUMBER_IN_UV;
769 }
770 return 0;
771}
772
773STATIC NV
774S_mulexp10(NV value, I32 exponent)
775{
776 NV result = 1.0;
777 NV power = 10.0;
778 bool negative = 0;
779 I32 bit;
780
781 if (exponent == 0)
782 return value;
783 if (value == 0)
784 return (NV)0;
785
786 /* On OpenVMS VAX we by default use the D_FLOAT double format,
787 * and that format does not have *easy* capabilities [1] for
788 * overflowing doubles 'silently' as IEEE fp does. We also need
789 * to support G_FLOAT on both VAX and Alpha, and though the exponent
790 * range is much larger than D_FLOAT it still doesn't do silent
791 * overflow. Therefore we need to detect early whether we would
792 * overflow (this is the behaviour of the native string-to-float
793 * conversion routines, and therefore of native applications, too).
794 *
795 * [1] Trying to establish a condition handler to trap floating point
796 * exceptions is not a good idea. */
797
798 /* In UNICOS and in certain Cray models (such as T90) there is no
799 * IEEE fp, and no way at all from C to catch fp overflows gracefully.
800 * There is something you can do if you are willing to use some
801 * inline assembler: the instruction is called DFI-- but that will
802 * disable *all* floating point interrupts, a little bit too large
803 * a hammer. Therefore we need to catch potential overflows before
804 * it's too late. */
805
806#if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
807 STMT_START {
808 const NV exp_v = log10(value);
809 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
810 return NV_MAX;
811 if (exponent < 0) {
812 if (-(exponent + exp_v) >= NV_MAX_10_EXP)
813 return 0.0;
814 while (-exponent >= NV_MAX_10_EXP) {
815 /* combination does not overflow, but 10^(-exponent) does */
816 value /= 10;
817 ++exponent;
818 }
819 }
820 } STMT_END;
821#endif
822
823 if (exponent < 0) {
824 negative = 1;
825 exponent = -exponent;
826 }
827 for (bit = 1; exponent; bit <<= 1) {
828 if (exponent & bit) {
829 exponent ^= bit;
830 result *= power;
831 /* Floating point exceptions are supposed to be turned off,
832 * but if we're obviously done, don't risk another iteration.
833 */
834 if (exponent == 0) break;
835 }
836 power *= power;
837 }
838 return negative ? value / result : value * result;
839}
840
841NV
842Perl_my_atof(pTHX_ const char* s)
843{
844 NV x = 0.0;
845#ifdef USE_LOCALE_NUMERIC
846 dVAR;
847
848 PERL_ARGS_ASSERT_MY_ATOF;
849
850 if (PL_numeric_local && IN_SOME_LOCALE_FORM) {
851 NV y;
852
853 /* Scan the number twice; once using locale and once without;
854 * choose the larger result (in absolute value). */
855 Perl_atof2(s, x);
856 SET_NUMERIC_STANDARD();
857 Perl_atof2(s, y);
858 SET_NUMERIC_LOCAL();
859 if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
860 return y;
861 }
862 else
863 Perl_atof2(s, x);
864#else
865 Perl_atof2(s, x);
866#endif
867 return x;
868}
869
870char*
871Perl_my_atof2(pTHX_ const char* orig, NV* value)
872{
873 NV result[3] = {0.0, 0.0, 0.0};
874 const char* s = orig;
875#ifdef USE_PERL_ATOF
876 UV accumulator[2] = {0,0}; /* before/after dp */
877 bool negative = 0;
878 const char* send = s + strlen(orig) - 1;
879 bool seen_digit = 0;
880 I32 exp_adjust[2] = {0,0};
881 I32 exp_acc[2] = {-1, -1};
882 /* the current exponent adjust for the accumulators */
883 I32 exponent = 0;
884 I32 seen_dp = 0;
885 I32 digit = 0;
886 I32 old_digit = 0;
887 I32 sig_digits = 0; /* noof significant digits seen so far */
888
889 PERL_ARGS_ASSERT_MY_ATOF2;
890
891/* There is no point in processing more significant digits
892 * than the NV can hold. Note that NV_DIG is a lower-bound value,
893 * while we need an upper-bound value. We add 2 to account for this;
894 * since it will have been conservative on both the first and last digit.
895 * For example a 32-bit mantissa with an exponent of 4 would have
896 * exact values in the set
897 * 4
898 * 8
899 * ..
900 * 17179869172
901 * 17179869176
902 * 17179869180
903 *
904 * where for the purposes of calculating NV_DIG we would have to discount
905 * both the first and last digit, since neither can hold all values from
906 * 0..9; but for calculating the value we must examine those two digits.
907 */
908#ifdef MAX_SIG_DIG_PLUS
909 /* It is not necessarily the case that adding 2 to NV_DIG gets all the
910 possible digits in a NV, especially if NVs are not IEEE compliant
911 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
912# define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
913#else
914# define MAX_SIG_DIGITS (NV_DIG+2)
915#endif
916
917/* the max number we can accumulate in a UV, and still safely do 10*N+9 */
918#define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
919
920 /* leading whitespace */
921 while (isSPACE(*s))
922 ++s;
923
924 /* sign */
925 switch (*s) {
926 case '-':
927 negative = 1;
928 /* fall through */
929 case '+':
930 ++s;
931 }
932
933 /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
934
935#ifdef HAS_STRTOD
936 if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
937 const char *p = negative ? s - 1 : s;
938 char *endp;
939 NV rslt;
940 rslt = strtod(p, &endp);
941 if (endp != p) {
942 *value = rslt;
943 return (char *)endp;
944 }
945 }
946#endif
947
948 /* we accumulate digits into an integer; when this becomes too
949 * large, we add the total to NV and start again */
950
951 while (1) {
952 if (isDIGIT(*s)) {
953 seen_digit = 1;
954 old_digit = digit;
955 digit = *s++ - '0';
956 if (seen_dp)
957 exp_adjust[1]++;
958
959 /* don't start counting until we see the first significant
960 * digit, eg the 5 in 0.00005... */
961 if (!sig_digits && digit == 0)
962 continue;
963
964 if (++sig_digits > MAX_SIG_DIGITS) {
965 /* limits of precision reached */
966 if (digit > 5) {
967 ++accumulator[seen_dp];
968 } else if (digit == 5) {
969 if (old_digit % 2) { /* round to even - Allen */
970 ++accumulator[seen_dp];
971 }
972 }
973 if (seen_dp) {
974 exp_adjust[1]--;
975 } else {
976 exp_adjust[0]++;
977 }
978 /* skip remaining digits */
979 while (isDIGIT(*s)) {
980 ++s;
981 if (! seen_dp) {
982 exp_adjust[0]++;
983 }
984 }
985 /* warn of loss of precision? */
986 }
987 else {
988 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
989 /* add accumulator to result and start again */
990 result[seen_dp] = S_mulexp10(result[seen_dp],
991 exp_acc[seen_dp])
992 + (NV)accumulator[seen_dp];
993 accumulator[seen_dp] = 0;
994 exp_acc[seen_dp] = 0;
995 }
996 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
997 ++exp_acc[seen_dp];
998 }
999 }
1000 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
1001 seen_dp = 1;
1002 if (sig_digits > MAX_SIG_DIGITS) {
1003 do {
1004 ++s;
1005 } while (isDIGIT(*s));
1006 break;
1007 }
1008 }
1009 else {
1010 break;
1011 }
1012 }
1013
1014 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1015 if (seen_dp) {
1016 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1017 }
1018
1019 if (seen_digit && (*s == 'e' || *s == 'E')) {
1020 bool expnegative = 0;
1021
1022 ++s;
1023 switch (*s) {
1024 case '-':
1025 expnegative = 1;
1026 /* fall through */
1027 case '+':
1028 ++s;
1029 }
1030 while (isDIGIT(*s))
1031 exponent = exponent * 10 + (*s++ - '0');
1032 if (expnegative)
1033 exponent = -exponent;
1034 }
1035
1036
1037
1038 /* now apply the exponent */
1039
1040 if (seen_dp) {
1041 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1042 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1043 } else {
1044 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1045 }
1046
1047 /* now apply the sign */
1048 if (negative)
1049 result[2] = -result[2];
1050#endif /* USE_PERL_ATOF */
1051 *value = result[2];
1052 return (char *)s;
1053}
1054
1055#if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1056long double
1057Perl_my_modfl(long double x, long double *ip)
1058{
1059 *ip = aintl(x);
1060 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1061}
1062#endif
1063
1064#if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1065long double
1066Perl_my_frexpl(long double x, int *e) {
1067 *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1068 return (scalbnl(x, -*e));
1069}
1070#endif
1071
1072/*
1073=for apidoc Perl_signbit
1074
1075Return a non-zero integer if the sign bit on an NV is set, and 0 if
1076it is not.
1077
1078If Configure detects this system has a signbit() that will work with
1079our NVs, then we just use it via the #define in perl.h. Otherwise,
1080fall back on this implementation. As a first pass, this gets everything
1081right except -0.0. Alas, catching -0.0 is the main use for this function,
1082so this is not too helpful yet. Still, at least we have the scaffolding
1083in place to support other systems, should that prove useful.
1084
1085
1086Configure notes: This function is called 'Perl_signbit' instead of a
1087plain 'signbit' because it is easy to imagine a system having a signbit()
1088function or macro that doesn't happen to work with our particular choice
1089of NVs. We shouldn't just re-#define signbit as Perl_signbit and expect
1090the standard system headers to be happy. Also, this is a no-context
1091function (no pTHX_) because Perl_signbit() is usually re-#defined in
1092perl.h as a simple macro call to the system's signbit().
1093Users should just always call Perl_signbit().
1094
1095=cut
1096*/
1097#if !defined(HAS_SIGNBIT)
1098int
1099Perl_signbit(NV x) {
1100 return (x < 0.0) ? 1 : 0;
1101}
1102#endif
1103
1104/*
1105 * Local variables:
1106 * c-indentation-style: bsd
1107 * c-basic-offset: 4
1108 * indent-tabs-mode: t
1109 * End:
1110 *
1111 * ex: set ts=8 sts=4 sw=4 noet:
1112 */