This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Distinguish C- and perly- literals - PERLY_SNAIL
[perl5.git] / numeric.c
CommitLineData
98994639
HS
1/* numeric.c
2 *
663f364b 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
1129b882 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
98994639
HS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
4ac71550
TC
12 * "That only makes eleven (plus one mislaid) and not fourteen,
13 * unless wizards count differently to other people." --Beorn
14 *
15 * [p.115 of _The Hobbit_: "Queer Lodgings"]
98994639
HS
16 */
17
ccfc67b7 18/*
7fefc6c1 19
166f8a29
DM
20This file contains all the stuff needed by perl for manipulating numeric
21values, including such things as replacements for the OS's atof() function
22
ccfc67b7
JH
23*/
24
98994639
HS
25#include "EXTERN.h"
26#define PERL_IN_NUMERIC_C
27#include "perl.h"
28
9ec8aea5
KW
29#ifdef Perl_strtod
30
31PERL_STATIC_INLINE NV
32S_strtod(pTHX_ const char * const s, char ** e)
33{
34 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
35 NV result;
36
37 STORE_LC_NUMERIC_SET_TO_NEEDED();
38
39# ifdef USE_QUADMATH
40
41 result = strtoflt128(s, e);
42
43# elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \
44 && defined(USE_LONG_DOUBLE)
45# if defined(__MINGW64_VERSION_MAJOR)
46 /***********************************************
47 We are unable to use strtold because of
48 https://sourceforge.net/p/mingw-w64/bugs/711/
49 &
50 https://sourceforge.net/p/mingw-w64/bugs/725/
51
52 but __mingw_strtold is fine.
53 ***********************************************/
54
55 result = __mingw_strtold(s, e);
56
57# else
58
59 result = strtold(s, e);
60
61# endif
62# elif defined(HAS_STRTOD)
63
64 result = strtod(s, e);
65
02fd9d54
KW
66# else
67# error No strtod() equivalent found
9ec8aea5
KW
68# endif
69
70 RESTORE_LC_NUMERIC();
71
72 return result;
73}
74
75#endif /* #ifdef Perl_strtod */
76
77/*
78
79=for apidoc my_strtod
80
81This function is equivalent to the libc strtod() function, and is available
82even on platforms that lack plain strtod(). Its return value is the best
83available precision depending on platform capabilities and F<Configure>
84options.
85
86It properly handles the locale radix character, meaning it expects a dot except
87when called from within the scope of S<C<use locale>>, in which case the radix
88character should be that specified by the current locale.
89
e7a3fd45 90The synonym Strtod() may be used instead.
9ec8aea5
KW
91
92=cut
93
94*/
95
96NV
0f3d8cd4 97Perl_my_strtod(const char * const s, char **e)
9ec8aea5
KW
98{
99 dTHX;
100
101 PERL_ARGS_ASSERT_MY_STRTOD;
102
103#ifdef Perl_strtod
104
105 return S_strtod(aTHX_ s, e);
106
107#else
108
109 {
110 NV result;
111 char ** end_ptr = NULL;
112
113 *end_ptr = my_atof2(s, &result);
114 if (e) {
115 *e = *end_ptr;
116 }
117
118 if (! *end_ptr) {
119 result = 0.0;
120 }
121
122 return result;
123 }
124
125#endif
126
127}
128
129
98994639 130U32
ddeaf645 131Perl_cast_ulong(NV f)
98994639
HS
132{
133 if (f < 0.0)
134 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
135 if (f < U32_MAX_P1) {
136#if CASTFLAGS & 2
137 if (f < U32_MAX_P1_HALF)
138 return (U32) f;
139 f -= U32_MAX_P1_HALF;
071db91b 140 return ((U32) f) | (1 + (U32_MAX >> 1));
98994639
HS
141#else
142 return (U32) f;
143#endif
144 }
145 return f > 0 ? U32_MAX : 0 /* NaN */;
146}
147
148I32
ddeaf645 149Perl_cast_i32(NV f)
98994639
HS
150{
151 if (f < I32_MAX_P1)
152 return f < I32_MIN ? I32_MIN : (I32) f;
153 if (f < U32_MAX_P1) {
154#if CASTFLAGS & 2
155 if (f < U32_MAX_P1_HALF)
156 return (I32)(U32) f;
157 f -= U32_MAX_P1_HALF;
071db91b 158 return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
98994639
HS
159#else
160 return (I32)(U32) f;
161#endif
162 }
163 return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
164}
165
166IV
ddeaf645 167Perl_cast_iv(NV f)
98994639
HS
168{
169 if (f < IV_MAX_P1)
170 return f < IV_MIN ? IV_MIN : (IV) f;
171 if (f < UV_MAX_P1) {
172#if CASTFLAGS & 2
173 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
174 if (f < UV_MAX_P1_HALF)
175 return (IV)(UV) f;
176 f -= UV_MAX_P1_HALF;
071db91b 177 return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
98994639
HS
178#else
179 return (IV)(UV) f;
180#endif
181 }
182 return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
183}
184
185UV
ddeaf645 186Perl_cast_uv(NV f)
98994639
HS
187{
188 if (f < 0.0)
189 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
190 if (f < UV_MAX_P1) {
191#if CASTFLAGS & 2
192 if (f < UV_MAX_P1_HALF)
193 return (UV) f;
194 f -= UV_MAX_P1_HALF;
071db91b 195 return ((UV) f) | (1 + (UV_MAX >> 1));
98994639
HS
196#else
197 return (UV) f;
198#endif
199 }
200 return f > 0 ? UV_MAX : 0 /* NaN */;
201}
202
53305cf1
NC
203/*
204=for apidoc grok_bin
98994639 205
53305cf1
NC
206converts a string representing a binary number to numeric form.
207
6974a337
KW
208On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
209conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
210scan stops at the end of the string, or at just before the first invalid
211character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
212encountering an invalid character (except NUL) will also trigger a warning. On
213return C<*len_p> is set to the length of the scanned string, and C<*flags>
214gives output flags.
53305cf1 215
7fc63493 216If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
796b6530
KW
217and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin>
218returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
6974a337
KW
219and writes an approximation of the correct value into C<*result> (which is an
220NV; or the approximation is discarded if C<result> is NULL).
53305cf1 221
796b6530 222The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
6974a337
KW
223C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
224
225If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
226digits may be separated from each other by a single underscore; also a single
227leading underscore is accepted.
53305cf1 228
ceea512d
KW
229=for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
230=for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
231=for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
232=for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
ceea512d 233
53305cf1 234=cut
02470786
KW
235
236Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
237which suppresses any message for non-portable numbers that are still valid
238on this platform.
53305cf1
NC
239 */
240
241UV
7918f24d
NC
242Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
243{
7918f24d
NC
244 PERL_ARGS_ASSERT_GROK_BIN;
245
bcfb98ec 246 return grok_bin(start, len_p, flags, result);
98994639
HS
247}
248
53305cf1
NC
249/*
250=for apidoc grok_hex
251
252converts a string representing a hex number to numeric form.
253
2d7f6611 254On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
6974a337
KW
255conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
256scan stops at the end of the string, or at just before the first invalid
257character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
258encountering an invalid character (except NUL) will also trigger a warning. On
259return C<*len_p> is set to the length of the scanned string, and C<*flags>
260gives output flags.
53305cf1 261
796b6530
KW
262If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
263and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex>
264returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
6974a337
KW
265and writes an approximation of the correct value into C<*result> (which is an
266NV; or the approximation is discarded if C<result> is NULL).
53305cf1 267
796b6530 268The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
6974a337
KW
269C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
270
271If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
272digits may be separated from each other by a single underscore; also a single
273leading underscore is accepted.
53305cf1
NC
274
275=cut
02470786 276
2e046c5b 277Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
baf48926 278which suppresses any message for non-portable numbers, but which are valid
3f8c4d74 279on this platform. But, C<*flags> will have the corresponding flag bit set.
53305cf1
NC
280 */
281
282UV
7918f24d
NC
283Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
284{
bcfb98ec
KW
285 PERL_ARGS_ASSERT_GROK_HEX;
286
287 return grok_hex(start, len_p, flags, result);
288}
289
d05c9ddb
KW
290/*
291=for apidoc grok_oct
292
293converts a string representing an octal number to numeric form.
294
6974a337
KW
295On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
296conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
297scan stops at the end of the string, or at just before the first invalid
298character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
299encountering an invalid character (except NUL) will also trigger a warning. On
300return C<*len_p> is set to the length of the scanned string, and C<*flags>
301gives output flags.
d05c9ddb
KW
302
303If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
304and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct>
305returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
6974a337
KW
306and writes an approximation of the correct value into C<*result> (which is an
307NV; or the approximation is discarded if C<result> is NULL).
308
309If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
310digits may be separated from each other by a single underscore; also a single
311leading underscore is accepted.
d05c9ddb 312
a3815e44 313The C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for
6974a337 314this function.
d05c9ddb
KW
315
316=cut
317
318Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
319which suppresses any message for non-portable numbers, but which are valid
320on this platform.
321 */
322
323UV
324Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
325{
326 PERL_ARGS_ASSERT_GROK_OCT;
327
328 return grok_oct(start, len_p, flags, result);
329}
330
c969ff22
KW
331STATIC void
332S_output_non_portable(pTHX_ const U8 base)
333{
334 /* Display the proper message for a number in the given input base not
335 * fitting in 32 bits */
336 const char * which = (base == 2)
337 ? "Binary number > 0b11111111111111111111111111111111"
338 : (base == 8)
339 ? "Octal number > 037777777777"
340 : "Hexadecimal number > 0xffffffff";
341
342 PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
343
344 /* Also there are listings for the other two. That's because, since they
345 * are the first word, it would be hard for a user to find them there
346 * starting with a %s */
347 /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
348 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
349}
350
bcfb98ec
KW
351UV
352Perl_grok_bin_oct_hex(pTHX_ const char *start,
353 STRLEN *len_p,
354 I32 *flags,
355 NV *result,
c969ff22 356 const unsigned shift, /* 1 for binary; 3 for octal;
bcfb98ec 357 4 for hex */
c969ff22
KW
358 const U8 class_bit,
359 const char prefix
360 )
361
bcfb98ec 362{
c969ff22
KW
363 const char *s0 = start;
364 const char *s;
53305cf1 365 STRLEN len = *len_p;
c969ff22 366 STRLEN bytes_so_far; /* How many real digits have been processed */
53305cf1
NC
367 UV value = 0;
368 NV value_nv = 0;
c969ff22
KW
369 const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */
370 const UV max_div= UV_MAX / base; /* Value above which, the next digit
371 processed would overflow */
4b24f703
KW
372 const I32 input_flags = *flags;
373 const bool allow_underscores =
374 cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
c969ff22 375 bool overflowed = FALSE;
cddf31e4
KW
376
377 /* In overflows, this keeps track of how much to multiply the overflowed NV
378 * by as we continue to parse the remaining digits */
efc92487 379 NV factor = 0;
c969ff22
KW
380
381 /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It
382 * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to
383 * find the numeric value of a digit. That requires more instructions than
384 * OCTAL_VALUE would, but gives the same result for the narrowed range of
385 * octal digits; same for binary. If it were ever critical to squeeze more
386 * performance from this, the function could become grok_hex, and a regen
387 * perl script could scan it and write out two edited copies for the other
388 * two functions. That would improve the performance of all three
389 * somewhat. Besides eliminating XDIGIT_VALUE for the other two, extra
390 * parameters are now passed to this to avoid conditionals. Those could
391 * become declared consts, like:
392 * const U8 base = 16;
393 * const U8 base = 8;
394 * ...
395 */
98994639 396
bcfb98ec
KW
397 PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
398
399 ASSUME(inRANGE(shift, 1, 4) && shift != 2);
7918f24d 400
4b24f703
KW
401 /* Clear output flags; unlikely to find a problem that sets them */
402 *flags = 0;
403
c969ff22 404 if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
bcfb98ec
KW
405
406 /* strip off leading b or 0b; x or 0x.
407 for compatibility silently suffer "b" and "0b" as valid binary; "x"
408 and "0x" as valid hex numbers. */
a4c04bdc 409 if (len >= 1) {
c969ff22
KW
410 if (isALPHA_FOLD_EQ(s0[0], prefix)) {
411 s0++;
a4c04bdc
NC
412 len--;
413 }
c969ff22
KW
414 else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
415 s0+=2;
a4c04bdc
NC
416 len-=2;
417 }
418 }
98994639
HS
419 }
420
c969ff22
KW
421 s = s0; /* s0 potentially advanced from 'start' */
422
b2fffc9a 423 /* Unroll the loop so that the first 8 digits are branchless except for the
b9abeb0c 424 * switch. A ninth hex one overflows a 32 bit word. */
c969ff22
KW
425 switch (len) {
426 case 0:
427 return 0;
428 default:
c3ee6e27 429 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
430 value = (value << shift) | XDIGIT_VALUE(*s);
431 s++;
432 /* FALLTHROUGH */
b2fffc9a 433 case 7:
c3ee6e27 434 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
b2fffc9a
KW
435 value = (value << shift) | XDIGIT_VALUE(*s);
436 s++;
437 /* FALLTHROUGH */
c969ff22 438 case 6:
c3ee6e27 439 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
440 value = (value << shift) | XDIGIT_VALUE(*s);
441 s++;
442 /* FALLTHROUGH */
443 case 5:
c3ee6e27 444 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
445 value = (value << shift) | XDIGIT_VALUE(*s);
446 s++;
447 /* FALLTHROUGH */
448 case 4:
c3ee6e27 449 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
450 value = (value << shift) | XDIGIT_VALUE(*s);
451 s++;
452 /* FALLTHROUGH */
453 case 3:
c3ee6e27 454 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
455 value = (value << shift) | XDIGIT_VALUE(*s);
456 s++;
457 /* FALLTHROUGH */
458 case 2:
c3ee6e27 459 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
460 value = (value << shift) | XDIGIT_VALUE(*s);
461 s++;
462 /* FALLTHROUGH */
463 case 1:
c3ee6e27 464 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
465 value = (value << shift) | XDIGIT_VALUE(*s);
466
b2fffc9a 467 if (LIKELY(len <= 8)) {
c969ff22
KW
468 return value;
469 }
470
471 s++;
472 break;
473 }
474
475 bytes_so_far = s - s0;
476 factor = shift << bytes_so_far;
477 len -= bytes_so_far;
478
479 for (; len--; s++) {
bcfb98ec 480 if (_generic_isCC(*s, class_bit)) {
53305cf1
NC
481 /* Write it in this wonky order with a goto to attempt to get the
482 compiler to make the common case integer-only loop pretty tight.
35e5392d
KW
483 With gcc seems to be much straighter code than old scan_hex.
484 (khw suspects that adding a LIKELY() just above would do the
485 same thing) */
53305cf1 486 redo:
ebf83045
KW
487 if (LIKELY(value <= max_div)) {
488 value = (value << shift) | XDIGIT_VALUE(*s);
489 /* Note XDIGIT_VALUE() is branchless, works on binary
490 * and octal as well, so can be used here, without
491 * slowing those down */
efc92487 492 factor *= 1 << shift;
ebf83045
KW
493 continue;
494 }
cddf31e4
KW
495
496 /* Bah. We are about to overflow. Instead, add the unoverflowed
497 * value to an NV that contains an approximation to the correct
498 * value. Each time through the loop we have increased 'factor' so
499 * that it gives how much the current approximation needs to
500 * effectively be shifted to make room for this new value */
efc92487 501 value_nv *= factor;
cddf31e4
KW
502 value_nv += (NV) value;
503
504 /* Then we keep accumulating digits, until all are parsed. We
505 * start over using the current input value. This will be added to
506 * 'value_nv' eventually, either when all digits are gone, or we
507 * have overflowed this fresh start. */
508 value = XDIGIT_VALUE(*s);
509 factor = 1 << shift;
510
c969ff22
KW
511 if (! overflowed) {
512 overflowed = TRUE;
99a25d63
KW
513 if ( ! (input_flags & PERL_SCAN_SILENT_OVERFLOW)
514 && ckWARN_d(WARN_OVERFLOW))
515 {
516 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
bcfb98ec
KW
517 "Integer overflow in %s number",
518 (base == 16) ? "hexadecimal"
519 : (base == 2)
520 ? "binary"
521 : "octal");
99a25d63 522 }
53305cf1 523 }
53305cf1
NC
524 continue;
525 }
ebf83045 526
bcfb98ec
KW
527 if ( *s == '_'
528 && len
529 && allow_underscores
99a25d63
KW
530 && _generic_isCC(s[1], class_bit)
531
532 /* Don't allow a leading underscore if the only-medial bit is
533 * set */
534 && ( LIKELY(s > s0)
535 || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
536 != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
bcfb98ec
KW
537 {
538 --len;
539 ++s;
540 goto redo;
541 }
ebf83045 542
3f8c4d74
KW
543 if (*s) {
544 if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
17d6187b
KW
545 && ckWARN(WARN_DIGIT))
546 {
547 if (base != 8) {
548 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
549 "Illegal %s digit '%c' ignored",
550 ((base == 2)
551 ? "binary"
552 : "hexadecimal"),
553 *s);
554 }
555 else if (isDIGIT(*s)) { /* octal base */
556
557 /* Allow \octal to work the DWIM way (that is, stop
558 * scanning as soon as non-octal characters are seen,
559 * complain only if someone seems to want to use the digits
560 * eight and nine. Since we know it is not octal, then if
561 * isDIGIT, must be an 8 or 9). */
562 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
bcfb98ec 563 "Illegal octal digit '%c' ignored", *s);
3f8c4d74
KW
564 }
565 }
566
567 if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) {
568 *flags |= PERL_SCAN_NOTIFY_ILLDIGIT;
bcfb98ec
KW
569 }
570 }
ebf83045 571
53305cf1
NC
572 break;
573 }
19c1206d 574
c969ff22 575 *len_p = s - start;
cddf31e4 576
c969ff22 577 if (LIKELY(! overflowed)) {
53305cf1 578#if UVSIZE > 4
c969ff22 579 if ( UNLIKELY(value > 0xffffffff)
4b24f703 580 && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
c969ff22
KW
581 {
582 output_non_portable(base);
3f8c4d74 583 *flags |= PERL_SCAN_SILENT_NON_PORTABLE;
c969ff22 584 }
53305cf1 585#endif
c969ff22 586 return value;
53305cf1 587 }
bcfb98ec 588
c969ff22 589 /* Overflowed: Calculate the final overflow approximation */
efc92487 590 value_nv *= factor;
c969ff22 591 value_nv += (NV) value;
cddf31e4 592
c969ff22 593 output_non_portable(base);
ebf83045 594
3f8c4d74
KW
595 *flags |= PERL_SCAN_GREATER_THAN_UV_MAX
596 | PERL_SCAN_SILENT_NON_PORTABLE;
53305cf1
NC
597 if (result)
598 *result = value_nv;
599 return UV_MAX;
600}
601
602/*
53305cf1
NC
603=for apidoc scan_bin
604
72d33970 605For backwards compatibility. Use C<grok_bin> instead.
53305cf1
NC
606
607=for apidoc scan_hex
608
72d33970 609For backwards compatibility. Use C<grok_hex> instead.
53305cf1
NC
610
611=for apidoc scan_oct
612
72d33970 613For backwards compatibility. Use C<grok_oct> instead.
53305cf1
NC
614
615=cut
616 */
617
618NV
73d840c0 619Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
620{
621 NV rnv;
622 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 623 const UV ruv = grok_bin (start, &len, &flags, &rnv);
53305cf1 624
7918f24d
NC
625 PERL_ARGS_ASSERT_SCAN_BIN;
626
53305cf1
NC
627 *retlen = len;
628 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
629}
630
631NV
73d840c0 632Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
633{
634 NV rnv;
635 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 636 const UV ruv = grok_oct (start, &len, &flags, &rnv);
53305cf1 637
7918f24d
NC
638 PERL_ARGS_ASSERT_SCAN_OCT;
639
53305cf1
NC
640 *retlen = len;
641 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
642}
643
644NV
73d840c0 645Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
646{
647 NV rnv;
648 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 649 const UV ruv = grok_hex (start, &len, &flags, &rnv);
53305cf1 650
7918f24d
NC
651 PERL_ARGS_ASSERT_SCAN_HEX;
652
53305cf1
NC
653 *retlen = len;
654 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
98994639
HS
655}
656
657/*
658=for apidoc grok_numeric_radix
659
660Scan and skip for a numeric decimal separator (radix).
661
662=cut
663 */
664bool
665Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
666{
7918f24d
NC
667 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
668
7ea85fa8
KW
669#ifdef USE_LOCALE_NUMERIC
670
d6ded950 671 if (IN_LC(LC_NUMERIC)) {
f0dafd73
KW
672 STRLEN len;
673 char * radix;
674 bool matches_radix = FALSE;
67d796ae 675 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
f0dafd73 676
a1395eaf 677 STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
f0dafd73
KW
678
679 radix = SvPV(PL_numeric_radix_sv, len);
680 radix = savepvn(radix, len);
681
21431899 682 RESTORE_LC_NUMERIC();
f0dafd73
KW
683
684 if (*sp + len <= send) {
685 matches_radix = memEQ(*sp, radix, len);
686 }
687
688 Safefree(radix);
689
690 if (matches_radix) {
691 *sp += len;
692 return TRUE;
693 }
98994639 694 }
f0dafd73 695
98994639 696#endif
7918f24d 697
f0dafd73
KW
698 /* always try "." if numeric radix didn't match because
699 * we may have data from different locales mixed */
98994639
HS
700 if (*sp < send && **sp == '.') {
701 ++*sp;
702 return TRUE;
703 }
f0dafd73 704
98994639
HS
705 return FALSE;
706}
707
569f27e5 708/*
ff4eb398
JH
709=for apidoc grok_infnan
710
796b6530 711Helper for C<grok_number()>, accepts various ways of spelling "infinity"
ff4eb398
JH
712or "not a number", and returns one of the following flag combinations:
713
5962c2f6 714 IS_NUMBER_INFINITY
ff4eb398 715 IS_NUMBER_NAN
5962c2f6 716 IS_NUMBER_INFINITY | IS_NUMBER_NEG
ff4eb398
JH
717 IS_NUMBER_NAN | IS_NUMBER_NEG
718 0
719
796b6530 720possibly |-ed with C<IS_NUMBER_TRAILING>.
b489e20f 721
796b6530 722If an infinity or a not-a-number is recognized, C<*sp> will point to
62bdc035 723one byte past the end of the recognized string. If the recognition fails,
796b6530 724zero is returned, and C<*sp> will not move.
ff4eb398 725
44eb6c28
KW
726=for apidoc Amnh|bool|IS_NUMBER_GREATER_THAN_UV_MAX
727=for apidoc Amnh|bool|IS_NUMBER_INFINITY
728=for apidoc Amnh|bool|IS_NUMBER_IN_UV
729=for apidoc Amnh|bool|IS_NUMBER_NAN
730=for apidoc Amnh|bool|IS_NUMBER_NEG
731=for apidoc Amnh|bool|IS_NUMBER_NOT_INT
ceea512d 732
ff4eb398
JH
733=cut
734*/
735
736int
3823048b 737Perl_grok_infnan(pTHX_ const char** sp, const char* send)
ff4eb398
JH
738{
739 const char* s = *sp;
740 int flags = 0;
a5dc2484 741#if defined(NV_INF) || defined(NV_NAN)
62bdc035 742 bool odh = FALSE; /* one-dot-hash: 1.#INF */
ff4eb398
JH
743
744 PERL_ARGS_ASSERT_GROK_INFNAN;
745
8c12dc63
JH
746 if (*s == '+') {
747 s++; if (s == send) return 0;
748 }
749 else if (*s == '-') {
ff4eb398
JH
750 flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
751 s++; if (s == send) return 0;
752 }
753
754 if (*s == '1') {
62bdc035
JH
755 /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
756 * Let's keep the dot optional. */
ff4eb398
JH
757 s++; if (s == send) return 0;
758 if (*s == '.') {
759 s++; if (s == send) return 0;
760 }
761 if (*s == '#') {
762 s++; if (s == send) return 0;
763 } else
764 return 0;
e855f543 765 odh = TRUE;
ff4eb398
JH
766 }
767
305b8651 768 if (isALPHA_FOLD_EQ(*s, 'I')) {
62bdc035
JH
769 /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
770
305b8651 771 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
ff4eb398 772 s++; if (s == send) return 0;
305b8651 773 if (isALPHA_FOLD_EQ(*s, 'F')) {
ff4eb398 774 s++;
b8974fcb
JH
775 if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
776 int fail =
777 flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
778 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
779 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
780 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
781 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
3396ed30 782 s++;
b8974fcb 783 } else if (odh) {
bbd86075 784 while (s < send && *s == '0') { /* 1.#INF00 */
b8974fcb
JH
785 s++;
786 }
3396ed30 787 }
b489e20f
JH
788 while (s < send && isSPACE(*s))
789 s++;
790 if (s < send && *s) {
3396ed30 791 flags |= IS_NUMBER_TRAILING;
fae4db12 792 }
ff4eb398
JH
793 flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
794 }
e855f543 795 else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
ff4eb398
JH
796 s++;
797 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
bbd86075 798 while (s < send && *s == '0') { /* 1.#IND00 */
fae4db12
JH
799 s++;
800 }
bbd86075 801 if (s < send && *s) {
1e9aa12f
JH
802 flags |= IS_NUMBER_TRAILING;
803 }
ff4eb398
JH
804 } else
805 return 0;
ff4eb398
JH
806 }
807 else {
62bdc035 808 /* Maybe NAN of some sort */
3823048b
JH
809
810 if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
811 /* snan, qNaN */
812 /* XXX do something with the snan/qnan difference */
813 s++; if (s == send) return 0;
814 }
815
816 if (isALPHA_FOLD_EQ(*s, 'N')) {
817 s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
818 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
819 s++;
820
821 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
dca9f615
KW
822 if (s == send) {
823 return flags;
824 }
3823048b
JH
825
826 /* NaN can be followed by various stuff (NaNQ, NaNS), but
827 * there are also multiple different NaN values, and some
828 * implementations output the "payload" values,
829 * e.g. NaN123, NAN(abc), while some legacy implementations
830 * have weird stuff like NaN%. */
831 if (isALPHA_FOLD_EQ(*s, 'q') ||
832 isALPHA_FOLD_EQ(*s, 's')) {
833 /* "nanq" or "nans" are ok, though generating
834 * these portably is tricky. */
835 s++;
81d11450
KW
836 if (s == send) {
837 return flags;
838 }
3823048b
JH
839 }
840 if (*s == '(') {
841 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
842 const char *t;
843 s++;
844 if (s == send) {
845 return flags | IS_NUMBER_TRAILING;
846 }
847 t = s + 1;
848 while (t < send && *t && *t != ')') {
849 t++;
850 }
851 if (t == send) {
852 return flags | IS_NUMBER_TRAILING;
853 }
854 if (*t == ')') {
855 int nantype;
856 UV nanval;
857 if (s[0] == '0' && s + 2 < t &&
858 isALPHA_FOLD_EQ(s[1], 'x') &&
859 isXDIGIT(s[2])) {
860 STRLEN len = t - s;
861 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
862 nanval = grok_hex(s, &len, &flags, NULL);
863 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
864 nantype = 0;
865 } else {
866 nantype = IS_NUMBER_IN_UV;
867 }
868 s += len;
869 } else if (s[0] == '0' && s + 2 < t &&
870 isALPHA_FOLD_EQ(s[1], 'b') &&
871 (s[2] == '0' || s[2] == '1')) {
872 STRLEN len = t - s;
873 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
874 nanval = grok_bin(s, &len, &flags, NULL);
875 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
876 nantype = 0;
877 } else {
878 nantype = IS_NUMBER_IN_UV;
879 }
880 s += len;
881 } else {
882 const char *u;
883 nantype =
884 grok_number_flags(s, t - s, &nanval,
885 PERL_SCAN_TRAILING |
886 PERL_SCAN_ALLOW_UNDERSCORES);
887 /* Unfortunately grok_number_flags() doesn't
888 * tell how far we got and the ')' will always
889 * be "trailing", so we need to double-check
890 * whether we had something dubious. */
891 for (u = s; u < t; u++) {
892 if (!isDIGIT(*u)) {
893 flags |= IS_NUMBER_TRAILING;
894 break;
895 }
896 }
897 s = u;
898 }
899
900 /* XXX Doesn't do octal: nan("0123").
901 * Probably not a big loss. */
902
903 if ((nantype & IS_NUMBER_NOT_INT) ||
904 !(nantype && IS_NUMBER_IN_UV)) {
905 /* XXX the nanval is currently unused, that is,
906 * not inserted as the NaN payload of the NV.
907 * But the above code already parses the C99
908 * nan(...) format. See below, and see also
909 * the nan() in POSIX.xs.
910 *
911 * Certain configuration combinations where
912 * NVSIZE is greater than UVSIZE mean that
913 * a single UV cannot contain all the possible
914 * NaN payload bits. There would need to be
915 * some more generic syntax than "nan($uv)".
916 *
917 * Issues to keep in mind:
918 *
919 * (1) In most common cases there would
920 * not be an integral number of bytes that
921 * could be set, only a certain number of bits.
922 * For example for the common case of
923 * NVSIZE == UVSIZE == 8 there is room for 52
924 * bits in the payload, but the most significant
925 * bit is commonly reserved for the
926 * signaling/quiet bit, leaving 51 bits.
927 * Furthermore, the C99 nan() is supposed
928 * to generate quiet NaNs, so it is doubtful
929 * whether it should be able to generate
930 * signaling NaNs. For the x86 80-bit doubles
931 * (if building a long double Perl) there would
932 * be 62 bits (s/q bit being the 63rd).
933 *
934 * (2) Endianness of the payload bits. If the
935 * payload is specified as an UV, the low-order
936 * bits of the UV are naturally little-endianed
937 * (rightmost) bits of the payload. The endianness
938 * of UVs and NVs can be different. */
939 return 0;
940 }
941 if (s < t) {
942 flags |= IS_NUMBER_TRAILING;
943 }
944 } else {
945 /* Looked like nan(...), but no close paren. */
946 flags |= IS_NUMBER_TRAILING;
947 }
948 } else {
949 while (s < send && isSPACE(*s))
950 s++;
951 if (s < send && *s) {
952 /* Note that we here implicitly accept (parse as
953 * "nan", but with warnings) also any other weird
954 * trailing stuff for "nan". In the above we just
955 * check that if we got the C99-style "nan(...)",
956 * the "..." looks sane.
957 * If in future we accept more ways of specifying
958 * the nan payload, the accepting would happen around
959 * here. */
960 flags |= IS_NUMBER_TRAILING;
961 }
962 }
963 s = send;
964 }
965 else
966 return 0;
ff4eb398
JH
967 }
968
b489e20f
JH
969 while (s < send && isSPACE(*s))
970 s++;
971
a5dc2484
JH
972#else
973 PERL_UNUSED_ARG(send);
974#endif /* #if defined(NV_INF) || defined(NV_NAN) */
a1fe7cea
JH
975 *sp = s;
976 return flags;
ff4eb398
JH
977}
978
13393a5e 979/*
3823048b 980=for apidoc grok_number_flags
13393a5e
JH
981
982Recognise (or not) a number. The type of the number is returned
983(0 if unrecognised), otherwise it is a bit-ORed combination of
796b6530
KW
984C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
985C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
986
987If the value of the number can fit in a UV, it is returned in C<*valuep>.
988C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
989will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
990to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
991If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
992C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
993
994C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
995seen (in which case C<*valuep> gives the true value truncated to an integer), and
996C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
d0f6d176 997absolute value). C<IS_NUMBER_IN_UV> is not set if C<e> notation was used or the
13393a5e
JH
998number is larger than a UV.
999
1000C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
1001non-numeric text on an otherwise successful I<grok>, setting
1002C<IS_NUMBER_TRAILING> on the result.
1003
0b929024
KW
1004=for apidoc Amnh||PERL_SCAN_TRAILING
1005
13393a5e
JH
1006=for apidoc grok_number
1007
796b6530 1008Identical to C<grok_number_flags()> with C<flags> set to zero.
13393a5e
JH
1009
1010=cut
1011 */
1012int
1013Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
1014{
1015 PERL_ARGS_ASSERT_GROK_NUMBER;
1016
1017 return grok_number_flags(pv, len, valuep, 0);
1018}
1019
945b524a
JH
1020static const UV uv_max_div_10 = UV_MAX / 10;
1021static const U8 uv_max_mod_10 = UV_MAX % 10;
1022
3f7602fa 1023int
3823048b 1024Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
3f7602fa 1025{
60939fb8 1026 const char *s = pv;
c4420975 1027 const char * const send = pv + len;
ae776a2c 1028 const char *d;
60939fb8 1029 int numtype = 0;
60939fb8 1030
3823048b 1031 PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
7918f24d 1032
129ccace
KW
1033 if (UNLIKELY(isSPACE(*s))) {
1034 s++;
1035 while (s < send) {
1036 if (LIKELY(! isSPACE(*s))) goto non_space;
1037 s++;
1038 }
1039 return 0;
1040 non_space: ;
60939fb8 1041 }
60939fb8 1042
129ccace
KW
1043 /* See if signed. This assumes it is more likely to be unsigned, so
1044 * penalizes signed by an extra conditional; rewarding unsigned by one fewer
1045 * (because we detect '+' and '-' with a single test and then add a
1046 * conditional to determine which) */
1047 if (UNLIKELY((*s & ~('+' ^ '-')) == ('+' & '-') )) {
1048
1049 /* Here, on ASCII platforms, *s is one of: 0x29 = ')', 2B = '+', 2D = '-',
1050 * 2F = '/'. That is, it is either a sign, or a character that doesn't
1051 * belong in a number at all (unless it's a radix character in a weird
1052 * locale). Given this, it's far more likely to be a minus than the
1053 * others. (On EBCDIC it is one of 42, 44, 46, 48, 4A, 4C, 4E, (not 40
17d6187b
KW
1054 * because can't be a space) 60, 62, 64, 66, 68, 6A, 6C, 6E. Again,
1055 * only potentially a weird radix character, or 4E='+', or 60='-') */
129ccace
KW
1056 if (LIKELY(*s == '-')) {
1057 s++;
1058 numtype = IS_NUMBER_NEG;
1059 }
1060 else if (LIKELY(*s == '+'))
1061 s++;
1062 else /* Can't just return failure here, as it could be a weird radix
1063 character */
1064 goto done_sign;
1065
1066 if (UNLIKELY(s == send))
1067 return 0;
1068 done_sign: ;
1069 }
60939fb8 1070
ae776a2c 1071 /* The first digit (after optional sign): note that might
8c12dc63 1072 * also point to "infinity" or "nan", or "1.#INF". */
ae776a2c
JH
1073 d = s;
1074
8c12dc63 1075 /* next must be digit or the radix separator or beginning of infinity/nan */
129ccace 1076 if (LIKELY(isDIGIT(*s))) {
60939fb8
NC
1077 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
1078 overflow. */
129ccace
KW
1079 UV value = *s - '0'; /* Process this first (perhaps only) digit */
1080 int digit;
1081
1082 s++;
1083
1084 switch(send - s) {
1085 default: /* 8 or more remaining characters */
1086 digit = *s - '0';
1087 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1088 value = value * 10 + digit;
1089 s++;
1090 /* FALLTHROUGH */
1091 case 7:
1092 digit = *s - '0';
1093 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1094 value = value * 10 + digit;
1095 s++;
1096 /* FALLTHROUGH */
1097 case 6:
1098 digit = *s - '0';
1099 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1100 value = value * 10 + digit;
1101 s++;
1102 /* FALLTHROUGH */
1103 case 5:
1104 digit = *s - '0';
1105 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1106 value = value * 10 + digit;
1107 s++;
1108 /* FALLTHROUGH */
1109 case 4:
1110 digit = *s - '0';
1111 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1112 value = value * 10 + digit;
1113 s++;
1114 /* FALLTHROUGH */
1115 case 3:
1116 digit = *s - '0';
1117 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1118 value = value * 10 + digit;
1119 s++;
1120 /* FALLTHROUGH */
1121 case 2:
1122 digit = *s - '0';
1123 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1124 value = value * 10 + digit;
1125 s++;
1126 /* FALLTHROUGH */
1127 case 1:
1128 digit = *s - '0';
1129 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
60939fb8 1130 value = value * 10 + digit;
129ccace
KW
1131 s++;
1132 /* FALLTHROUGH */
1133 case 0: /* This case means the string consists of just the one
1134 digit we already have processed */
1135
1136 /* If we got here by falling through other than the default: case, we
1137 * have processed the whole string, and know it consists entirely of
1138 * digits, and can't have overflowed. */
1139 if (s >= send) {
1140 if (valuep)
1141 *valuep = value;
1142 return numtype|IS_NUMBER_IN_UV;
1143 }
1144
1145 /* Here, there are extra characters beyond the first 9 digits. Use a
1146 * loop to accumulate any remaining digits, until we get a non-digit or
1147 * would overflow. Note that leading zeros could cause us to get here
1148 * without being close to overflowing.
1149 *
1150 * (The conditional 's >= send' above could be eliminated by making the
1151 * default: in the switch to instead be 'case 8:', and process longer
1152 * strings separately by using the loop below. This would penalize
1153 * these inputs by the extra instructions needed for looping. That
1154 * could be eliminated by copying the unwound code from above to handle
1155 * the firt 9 digits of these. khw didn't think this saving of a
1156 * single conditional was worth it.) */
1157 do {
1158 digit = *s - '0';
1159 if (! inRANGE(digit, 0, 9)) goto mantissa_done;
1160 if ( value < uv_max_div_10
1161 || ( value == uv_max_div_10
1162 && digit <= uv_max_mod_10))
1163 {
60939fb8 1164 value = value * 10 + digit;
129ccace 1165 s++;
60939fb8 1166 }
129ccace
KW
1167 else { /* value would overflow. skip the remaining digits, don't
1168 worry about setting *valuep. */
1169 do {
1170 s++;
1171 } while (s < send && isDIGIT(*s));
1172 numtype |=
1173 IS_NUMBER_GREATER_THAN_UV_MAX;
1174 goto skip_value;
1175 }
1176 } while (s < send);
1177 } /* End switch on input length */
1178
1179 mantissa_done:
60939fb8
NC
1180 numtype |= IS_NUMBER_IN_UV;
1181 if (valuep)
1182 *valuep = value;
1183
1184 skip_value:
1185 if (GROK_NUMERIC_RADIX(&s, send)) {
1186 numtype |= IS_NUMBER_NOT_INT;
1187 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
1188 s++;
98994639 1189 }
129ccace 1190 } /* End of *s is a digit */
60939fb8
NC
1191 else if (GROK_NUMERIC_RADIX(&s, send)) {
1192 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1193 /* no digits before the radix means we need digits after it */
1194 if (s < send && isDIGIT(*s)) {
1195 do {
1196 s++;
1197 } while (s < send && isDIGIT(*s));
1198 if (valuep) {
1199 /* integer approximation is valid - it's 0. */
1200 *valuep = 0;
1201 }
98994639 1202 }
60939fb8 1203 else
ae776a2c 1204 return 0;
ff4eb398 1205 }
60939fb8 1206
129ccace 1207 if (LIKELY(s > d) && s < send) {
60939fb8 1208 /* we can have an optional exponent part */
129ccace 1209 if (UNLIKELY(isALPHA_FOLD_EQ(*s, 'e'))) {
60939fb8
NC
1210 s++;
1211 if (s < send && (*s == '-' || *s == '+'))
1212 s++;
1213 if (s < send && isDIGIT(*s)) {
1214 do {
1215 s++;
1216 } while (s < send && isDIGIT(*s));
1217 }
3f7602fa
TC
1218 else if (flags & PERL_SCAN_TRAILING)
1219 return numtype | IS_NUMBER_TRAILING;
60939fb8 1220 else
3f7602fa
TC
1221 return 0;
1222
1223 /* The only flag we keep is sign. Blow away any "it's UV" */
1224 numtype &= IS_NUMBER_NEG;
1225 numtype |= IS_NUMBER_NOT_INT;
60939fb8
NC
1226 }
1227 }
129ccace
KW
1228
1229 while (s < send) {
1230 if (LIKELY(! isSPACE(*s))) goto end_space;
60939fb8 1231 s++;
129ccace
KW
1232 }
1233 return numtype;
1234
1235 end_space:
1236
1237 if (UNLIKELY(memEQs(pv, len, "0 but true"))) {
60939fb8
NC
1238 if (valuep)
1239 *valuep = 0;
1240 return IS_NUMBER_IN_UV;
1241 }
129ccace 1242
8c12dc63 1243 /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
129ccace 1244 if ((s + 2 < send) && UNLIKELY(memCHRs("inqs#", toFOLD(*s)))) {
8c12dc63
JH
1245 /* Really detect inf/nan. Start at d, not s, since the above
1246 * code might have already consumed the "1." or "1". */
7eff3d39 1247 const int infnan = Perl_grok_infnan(aTHX_ &d, send);
8c12dc63
JH
1248 if ((infnan & IS_NUMBER_INFINITY)) {
1249 return (numtype | infnan); /* Keep sign for infinity. */
1250 }
1251 else if ((infnan & IS_NUMBER_NAN)) {
1252 return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1253 }
1254 }
3f7602fa
TC
1255 else if (flags & PERL_SCAN_TRAILING) {
1256 return numtype | IS_NUMBER_TRAILING;
1257 }
1258
60939fb8 1259 return 0;
98994639
HS
1260}
1261
6313e544 1262/*
5d4a52b5 1263=for apidoc grok_atoUV
6313e544 1264
5d4a52b5 1265parse a string, looking for a decimal unsigned integer.
338aa8b0 1266
5d4a52b5
KW
1267On entry, C<pv> points to the beginning of the string;
1268C<valptr> points to a UV that will receive the converted value, if found;
1269C<endptr> is either NULL or points to a variable that points to one byte
1270beyond the point in C<pv> that this routine should examine.
1271If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
f4379102 1272
5d4a52b5
KW
1273Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1274no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that
1275value.
6313e544 1276
5d4a52b5
KW
1277If you constrain the portion of C<pv> that is looked at by this function (by
1278passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1279valid value, it will return TRUE, setting C<*endptr> to the byte following the
1280final digit of the value. But if there is no constraint at what's looked at,
b9abeb0c
KW
1281all of C<pv> must be valid in order for TRUE to be returned. C<*endptr> is
1282unchanged from its value on input if FALSE is returned;
6313e544 1283
5d4a52b5 1284The only characters this accepts are the decimal digits '0'..'9'.
338aa8b0 1285
5d4a52b5
KW
1286As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1287leading whitespace, nor negative inputs. If such features are required, the
1288calling code needs to explicitly implement those.
6313e544 1289
5d4a52b5
KW
1290Note that this function returns FALSE for inputs that would overflow a UV,
1291or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor
1292C<01>, C<002>, I<etc>.
1293
1294Background: C<atoi> has severe problems with illegal inputs, it cannot be
d62b8c6a 1295used for incremental parsing, and therefore should be avoided
5d4a52b5 1296C<atoi> and C<strtol> are also affected by locale settings, which can also be
d62b8c6a
JH
1297seen as a bug (global state controlled by user environment).
1298
238217e5
JK
1299=cut
1300
6313e544
JH
1301*/
1302
22ff3130
HS
1303bool
1304Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
6313e544
JH
1305{
1306 const char* s = pv;
1307 const char** eptr;
1308 const char* end2; /* Used in case endptr is NULL. */
22ff3130 1309 UV val = 0; /* The parsed value. */
6313e544 1310
22ff3130 1311 PERL_ARGS_ASSERT_GROK_ATOUV;
6313e544 1312
5d4a52b5
KW
1313 if (endptr) {
1314 eptr = endptr;
1315 }
1316 else {
1317 end2 = s + strlen(s);
1318 eptr = &end2;
1319 }
1320
1321 if ( *eptr <= s
1322 || ! isDIGIT(*s))
1323 {
1324 return FALSE;
1325 }
1326
97d95d46
KW
1327 /* Single-digit inputs are quite common. */
1328 val = *s++ - '0';
1329 if (s < *eptr && isDIGIT(*s)) {
1330 /* Fail on extra leading zeros. */
1331 if (val == 0)
1332 return FALSE;
1333 while (s < *eptr && isDIGIT(*s)) {
1334 /* This could be unrolled like in grok_number(), but
1335 * the expected uses of this are not speed-needy, and
1336 * unlikely to need full 64-bitness. */
1337 const U8 digit = *s++ - '0';
1338 if (val < uv_max_div_10 ||
1339 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1340 val = val * 10 + digit;
1341 } else {
22ff3130 1342 return FALSE;
6313e544
JH
1343 }
1344 }
97d95d46
KW
1345 }
1346
5d4a52b5
KW
1347 if (endptr == NULL) {
1348 if (*s) {
1349 return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1350 }
1351 }
1352 else {
1353 *endptr = s;
75feedba 1354 }
97d95d46 1355
22ff3130
HS
1356 *valptr = val;
1357 return TRUE;
6313e544
JH
1358}
1359
ce6f496d 1360#ifndef Perl_strtod
4801ca72 1361STATIC NV
98994639
HS
1362S_mulexp10(NV value, I32 exponent)
1363{
1364 NV result = 1.0;
1365 NV power = 10.0;
1366 bool negative = 0;
1367 I32 bit;
1368
1369 if (exponent == 0)
1370 return value;
659c4b96
DM
1371 if (value == 0)
1372 return (NV)0;
87032ba1 1373
24866caa 1374 /* On OpenVMS VAX we by default use the D_FLOAT double format,
67597c89 1375 * and that format does not have *easy* capabilities [1] for
19c1206d
KW
1376 * overflowing doubles 'silently' as IEEE fp does. We also need
1377 * to support G_FLOAT on both VAX and Alpha, and though the exponent
1378 * range is much larger than D_FLOAT it still doesn't do silent
1379 * overflow. Therefore we need to detect early whether we would
1380 * overflow (this is the behaviour of the native string-to-float
24866caa 1381 * conversion routines, and therefore of native applications, too).
67597c89 1382 *
24866caa
CB
1383 * [1] Trying to establish a condition handler to trap floating point
1384 * exceptions is not a good idea. */
87032ba1
JH
1385
1386 /* In UNICOS and in certain Cray models (such as T90) there is no
1387 * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1388 * There is something you can do if you are willing to use some
1389 * inline assembler: the instruction is called DFI-- but that will
1390 * disable *all* floating point interrupts, a little bit too large
1391 * a hammer. Therefore we need to catch potential overflows before
1392 * it's too late. */
353813d9 1393
a7157111 1394#if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
353813d9 1395 STMT_START {
c4420975 1396 const NV exp_v = log10(value);
353813d9
HS
1397 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1398 return NV_MAX;
1399 if (exponent < 0) {
1400 if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1401 return 0.0;
1402 while (-exponent >= NV_MAX_10_EXP) {
1403 /* combination does not overflow, but 10^(-exponent) does */
1404 value /= 10;
1405 ++exponent;
1406 }
1407 }
1408 } STMT_END;
87032ba1
JH
1409#endif
1410
353813d9
HS
1411 if (exponent < 0) {
1412 negative = 1;
1413 exponent = -exponent;
b27804d8
DM
1414#ifdef NV_MAX_10_EXP
1415 /* for something like 1234 x 10^-309, the action of calculating
1416 * the intermediate value 10^309 then returning 1234 / (10^309)
1417 * will fail, since 10^309 becomes infinity. In this case try to
1418 * refactor it as 123 / (10^308) etc.
1419 */
1420 while (value && exponent > NV_MAX_10_EXP) {
1421 exponent--;
1422 value /= 10;
1423 }
48853916
JH
1424 if (value == 0.0)
1425 return value;
b27804d8 1426#endif
353813d9 1427 }
c62e754c
JH
1428#if defined(__osf__)
1429 /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1430 * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1431 * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1432 * but that breaks another set of infnan.t tests. */
1433# define FP_OVERFLOWS_TO_ZERO
1434#endif
98994639
HS
1435 for (bit = 1; exponent; bit <<= 1) {
1436 if (exponent & bit) {
1437 exponent ^= bit;
1438 result *= power;
c62e754c
JH
1439#ifdef FP_OVERFLOWS_TO_ZERO
1440 if (result == 0)
a7157111 1441# ifdef NV_INF
c62e754c 1442 return value < 0 ? -NV_INF : NV_INF;
a7157111
JH
1443# else
1444 return value < 0 ? -FLT_MAX : FLT_MAX;
1445# endif
c62e754c 1446#endif
236f0012 1447 /* Floating point exceptions are supposed to be turned off,
19c1206d 1448 * but if we're obviously done, don't risk another iteration.
236f0012
CB
1449 */
1450 if (exponent == 0) break;
98994639
HS
1451 }
1452 power *= power;
1453 }
1454 return negative ? value / result : value * result;
1455}
ce6f496d 1456#endif /* #ifndef Perl_strtod */
98994639 1457
ce6f496d 1458#ifdef Perl_strtod
b93d1309 1459# define ATOF(s, x) my_atof2(s, &x)
f7b64c80 1460#else
b93d1309 1461# define ATOF(s, x) Perl_atof2(s, x)
f7b64c80 1462#endif
b93d1309 1463
98994639
HS
1464NV
1465Perl_my_atof(pTHX_ const char* s)
1466{
b1de2493
KW
1467
1468/*
1469=for apidoc my_atof
1470
1471L<C<atof>(3)>, but properly works with Perl locale handling, accepting a dot
1472radix character always, but also the current locale's radix character if and
1473only if called from within the lexical scope of a Perl C<use locale> statement.
1474
1475N.B. C<s> must be NUL terminated.
1476
1477=cut
1478*/
f720c878 1479
98994639 1480 NV x = 0.0;
9eda1ea6
KW
1481
1482 PERL_ARGS_ASSERT_MY_ATOF;
1483
b93d1309 1484#if ! defined(USE_LOCALE_NUMERIC)
9eda1ea6 1485
b93d1309 1486 ATOF(s, x);
9eda1ea6
KW
1487
1488#else
7918f24d 1489
a2287a13 1490 {
67d796ae
KW
1491 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1492 STORE_LC_NUMERIC_SET_TO_NEEDED();
fdf55d20
KW
1493 if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1494 ATOF(s,x);
1495 }
1496 else {
19c1206d 1497
e4850248
KW
1498 /* Look through the string for the first thing that looks like a
1499 * decimal point: either the value in the current locale or the
1500 * standard fallback of '.'. The one which appears earliest in the
1501 * input string is the one that we should have atof look for. Note
1502 * that we have to determine this beforehand because on some
1503 * systems, Perl_atof2 is just a wrapper around the system's atof.
1504 * */
1ae85f6c
KW
1505 const char * const standard_pos = strchr(s, '.');
1506 const char * const local_pos
1507 = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1508 const bool use_standard_radix
1509 = standard_pos && (!local_pos || standard_pos < local_pos);
78787052 1510
665873e9 1511 if (use_standard_radix) {
e4850248 1512 SET_NUMERIC_STANDARD();
665873e9
KW
1513 LOCK_LC_NUMERIC_STANDARD();
1514 }
78787052 1515
b93d1309 1516 ATOF(s,x);
78787052 1517
665873e9
KW
1518 if (use_standard_radix) {
1519 UNLOCK_LC_NUMERIC_STANDARD();
67d796ae 1520 SET_NUMERIC_UNDERLYING();
665873e9 1521 }
e4850248 1522 }
a2287a13
KW
1523 RESTORE_LC_NUMERIC();
1524 }
9eda1ea6 1525
98994639 1526#endif
9eda1ea6 1527
98994639
HS
1528 return x;
1529}
1530
a7157111 1531#if defined(NV_INF) || defined(NV_NAN)
3c81f0b3 1532
829757a4 1533static char*
5563f457 1534S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
829757a4
JH
1535{
1536 const char *p0 = negative ? s - 1 : s;
1537 const char *p = p0;
7eff3d39 1538 const int infnan = grok_infnan(&p, send);
829757a4
JH
1539 if (infnan && p != p0) {
1540 /* If we can generate inf/nan directly, let's do so. */
1541#ifdef NV_INF
1542 if ((infnan & IS_NUMBER_INFINITY)) {
3823048b 1543 *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
829757a4
JH
1544 return (char*)p;
1545 }
1546#endif
1547#ifdef NV_NAN
1548 if ((infnan & IS_NUMBER_NAN)) {
3823048b 1549 *value = NV_NAN;
829757a4
JH
1550 return (char*)p;
1551 }
1552#endif
1553#ifdef Perl_strtod
68611e6f 1554 /* If still here, we didn't have either NV_INF or NV_NAN,
829757a4
JH
1555 * and can try falling back to native strtod/strtold.
1556 *
1557 * The native interface might not recognize all the possible
1558 * inf/nan strings Perl recognizes. What we can try
1559 * is to try faking the input. We will try inf/-inf/nan
1560 * as the most promising/portable input. */
1561 {
6d37e916 1562 const char* fake = "silence compiler warning";
829757a4
JH
1563 char* endp;
1564 NV nv;
a7157111 1565#ifdef NV_INF
829757a4
JH
1566 if ((infnan & IS_NUMBER_INFINITY)) {
1567 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1568 }
a7157111
JH
1569#endif
1570#ifdef NV_NAN
1571 if ((infnan & IS_NUMBER_NAN)) {
829757a4
JH
1572 fake = "nan";
1573 }
a7157111 1574#endif
6d37e916 1575 assert(strNE(fake, "silence compiler warning"));
9ec8aea5 1576 nv = S_strtod(aTHX_ fake, &endp);
829757a4 1577 if (fake != endp) {
a7157111 1578#ifdef NV_INF
829757a4 1579 if ((infnan & IS_NUMBER_INFINITY)) {
a7157111 1580# ifdef Perl_isinf
829757a4
JH
1581 if (Perl_isinf(nv))
1582 *value = nv;
a7157111 1583# else
829757a4
JH
1584 /* last resort, may generate SIGFPE */
1585 *value = Perl_exp((NV)1e9);
1586 if ((infnan & IS_NUMBER_NEG))
1587 *value = -*value;
a7157111 1588# endif
829757a4
JH
1589 return (char*)p; /* p, not endp */
1590 }
a7157111
JH
1591#endif
1592#ifdef NV_NAN
1593 if ((infnan & IS_NUMBER_NAN)) {
1594# ifdef Perl_isnan
829757a4
JH
1595 if (Perl_isnan(nv))
1596 *value = nv;
a7157111 1597# else
829757a4
JH
1598 /* last resort, may generate SIGFPE */
1599 *value = Perl_log((NV)-1.0);
a7157111 1600# endif
829757a4 1601 return (char*)p; /* p, not endp */
a7157111 1602#endif
829757a4
JH
1603 }
1604 }
1605 }
1606#endif /* #ifdef Perl_strtod */
1607 }
1608 return NULL;
1609}
1610
a7157111
JH
1611#endif /* if defined(NV_INF) || defined(NV_NAN) */
1612
98994639
HS
1613char*
1614Perl_my_atof2(pTHX_ const char* orig, NV* value)
1615{
6928bedc
KW
1616 PERL_ARGS_ASSERT_MY_ATOF2;
1617 return my_atof3(orig, value, 0);
1618}
1619
1620char*
16411967 1621Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
6928bedc 1622{
e1ec3a88 1623 const char* s = orig;
a4eca1d4 1624 NV result[3] = {0.0, 0.0, 0.0};
ce6f496d 1625#if defined(USE_PERL_ATOF) || defined(Perl_strtod)
6928bedc
KW
1626 const char* send = s + ((len != 0)
1627 ? len
1628 : strlen(orig)); /* one past the last */
a4eca1d4
JH
1629 bool negative = 0;
1630#endif
ce6f496d 1631#if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
a4eca1d4 1632 UV accumulator[2] = {0,0}; /* before/after dp */
8194bf88 1633 bool seen_digit = 0;
20f6aaab
AS
1634 I32 exp_adjust[2] = {0,0};
1635 I32 exp_acc[2] = {-1, -1};
1636 /* the current exponent adjust for the accumulators */
98994639 1637 I32 exponent = 0;
8194bf88 1638 I32 seen_dp = 0;
20f6aaab
AS
1639 I32 digit = 0;
1640 I32 old_digit = 0;
8194bf88 1641 I32 sig_digits = 0; /* noof significant digits seen so far */
a4eca1d4 1642#endif
8194bf88 1643
ce6f496d 1644#if defined(USE_PERL_ATOF) || defined(Perl_strtod)
6928bedc 1645 PERL_ARGS_ASSERT_MY_ATOF3;
7918f24d 1646
a4eca1d4 1647 /* leading whitespace */
6928bedc 1648 while (s < send && isSPACE(*s))
a4eca1d4
JH
1649 ++s;
1650
1651 /* sign */
1652 switch (*s) {
1653 case '-':
1654 negative = 1;
1655 /* FALLTHROUGH */
1656 case '+':
1657 ++s;
1658 }
1659#endif
1660
ce6f496d 1661#ifdef Perl_strtod
a4eca1d4
JH
1662 {
1663 char* endp;
d94e901a
KW
1664 char* copy = NULL;
1665
adc55e02 1666 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
a4eca1d4 1667 return endp;
d94e901a 1668
14d26b44
TC
1669 /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1670 0b-prefixed binary numbers, which is backward incompatible
1671 */
e56dfd96 1672 if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
14d26b44
TC
1673 (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1674 *value = 0;
1675 return (char *)s+1;
1676 }
1677
d94e901a
KW
1678 /* If the length is passed in, the input string isn't NUL-terminated,
1679 * and in it turns out the function below assumes it is; therefore we
1680 * create a copy and NUL-terminate that */
1681 if (len) {
1682 Newx(copy, len + 1, char);
1683 Copy(orig, copy, len, char);
1684 copy[len] = '\0';
1685 s = copy + (s - orig);
1686 }
1687
9ec8aea5 1688 result[2] = S_strtod(aTHX_ s, &endp);
d94e901a
KW
1689
1690 /* If we created a copy, 'endp' is in terms of that. Convert back to
1691 * the original */
1692 if (copy) {
aac39b03 1693 s = (s - copy) + (char *) orig;
d94e901a
KW
1694 endp = (endp - copy) + (char *) orig;
1695 Safefree(copy);
1696 }
1697
a4eca1d4
JH
1698 if (s != endp) {
1699 *value = negative ? -result[2] : result[2];
1700 return endp;
1701 }
1702 return NULL;
1703 }
1704#elif defined(USE_PERL_ATOF)
1705
8194bf88
DM
1706/* There is no point in processing more significant digits
1707 * than the NV can hold. Note that NV_DIG is a lower-bound value,
1708 * while we need an upper-bound value. We add 2 to account for this;
1709 * since it will have been conservative on both the first and last digit.
1710 * For example a 32-bit mantissa with an exponent of 4 would have
1711 * exact values in the set
1712 * 4
1713 * 8
1714 * ..
1715 * 17179869172
1716 * 17179869176
1717 * 17179869180
1718 *
1719 * where for the purposes of calculating NV_DIG we would have to discount
1720 * both the first and last digit, since neither can hold all values from
1721 * 0..9; but for calculating the value we must examine those two digits.
1722 */
ffa277e5
AS
1723#ifdef MAX_SIG_DIG_PLUS
1724 /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1725 possible digits in a NV, especially if NVs are not IEEE compliant
1726 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1727# define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1728#else
1729# define MAX_SIG_DIGITS (NV_DIG+2)
1730#endif
8194bf88
DM
1731
1732/* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1733#define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
98994639 1734
a5dc2484 1735#if defined(NV_INF) || defined(NV_NAN)
ae776a2c 1736 {
7eff3d39 1737 char* endp;
5563f457 1738 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
7eff3d39 1739 return endp;
ae776a2c 1740 }
a5dc2484 1741#endif
2b54f59f 1742
8194bf88
DM
1743 /* we accumulate digits into an integer; when this becomes too
1744 * large, we add the total to NV and start again */
98994639 1745
6928bedc 1746 while (s < send) {
8194bf88
DM
1747 if (isDIGIT(*s)) {
1748 seen_digit = 1;
20f6aaab 1749 old_digit = digit;
8194bf88 1750 digit = *s++ - '0';
20f6aaab
AS
1751 if (seen_dp)
1752 exp_adjust[1]++;
98994639 1753
8194bf88
DM
1754 /* don't start counting until we see the first significant
1755 * digit, eg the 5 in 0.00005... */
1756 if (!sig_digits && digit == 0)
1757 continue;
1758
1759 if (++sig_digits > MAX_SIG_DIGITS) {
98994639 1760 /* limits of precision reached */
20f6aaab
AS
1761 if (digit > 5) {
1762 ++accumulator[seen_dp];
1763 } else if (digit == 5) {
1764 if (old_digit % 2) { /* round to even - Allen */
1765 ++accumulator[seen_dp];
1766 }
1767 }
1768 if (seen_dp) {
1769 exp_adjust[1]--;
1770 } else {
1771 exp_adjust[0]++;
1772 }
8194bf88 1773 /* skip remaining digits */
6928bedc 1774 while (s < send && isDIGIT(*s)) {
98994639 1775 ++s;
20f6aaab
AS
1776 if (! seen_dp) {
1777 exp_adjust[0]++;
1778 }
98994639
HS
1779 }
1780 /* warn of loss of precision? */
98994639 1781 }
8194bf88 1782 else {
20f6aaab 1783 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
8194bf88 1784 /* add accumulator to result and start again */
20f6aaab
AS
1785 result[seen_dp] = S_mulexp10(result[seen_dp],
1786 exp_acc[seen_dp])
1787 + (NV)accumulator[seen_dp];
1788 accumulator[seen_dp] = 0;
1789 exp_acc[seen_dp] = 0;
98994639 1790 }
20f6aaab
AS
1791 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1792 ++exp_acc[seen_dp];
98994639 1793 }
8194bf88 1794 }
e1ec3a88 1795 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
8194bf88 1796 seen_dp = 1;
20f6aaab 1797 if (sig_digits > MAX_SIG_DIGITS) {
6928bedc 1798 while (s < send && isDIGIT(*s)) {
20f6aaab 1799 ++s;
9604fbf0 1800 }
20f6aaab
AS
1801 break;
1802 }
8194bf88
DM
1803 }
1804 else {
1805 break;
98994639
HS
1806 }
1807 }
1808
20f6aaab
AS
1809 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1810 if (seen_dp) {
1811 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1812 }
98994639 1813
6928bedc 1814 if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
98994639
HS
1815 bool expnegative = 0;
1816
1817 ++s;
1818 switch (*s) {
1819 case '-':
1820 expnegative = 1;
924ba076 1821 /* FALLTHROUGH */
98994639
HS
1822 case '+':
1823 ++s;
1824 }
6928bedc 1825 while (s < send && isDIGIT(*s))
98994639
HS
1826 exponent = exponent * 10 + (*s++ - '0');
1827 if (expnegative)
1828 exponent = -exponent;
1829 }
1830
1831 /* now apply the exponent */
20f6aaab
AS
1832
1833 if (seen_dp) {
1834 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1835 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1836 } else {
1837 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1838 }
98994639
HS
1839
1840 /* now apply the sign */
1841 if (negative)
20f6aaab 1842 result[2] = -result[2];
20f6aaab 1843 *value = result[2];
73d840c0 1844 return (char *)s;
dfee9352
TC
1845#else /* USE_PERL_ATOF */
1846 /* If you see this error you both don't have strtod (or configured -Ud_strtod or
1847 or it's long double/quadmath equivalent) and disabled USE_PERL_ATOF, thus
1848 removing any way for perl to convert strings to floating point numbers.
1849 */
1850# error No mechanism to convert strings to numbers available
1851#endif
98994639
HS
1852}
1853
5d34af89 1854/*
3d9d9213 1855=for apidoc isinfnan
5d34af89 1856
5f4d68d1 1857C<Perl_isinfnan()> is a utility function that returns true if the NV
796b6530
KW
1858argument is either an infinity or a C<NaN>, false otherwise. To test
1859in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
5d34af89 1860
68611e6f
JH
1861This is also the logical inverse of Perl_isfinite().
1862
5d34af89
JH
1863=cut
1864*/
1cd88304
JH
1865bool
1866Perl_isinfnan(NV nv)
1867{
a5dc2484 1868 PERL_UNUSED_ARG(nv);
1cd88304
JH
1869#ifdef Perl_isinf
1870 if (Perl_isinf(nv))
1871 return TRUE;
1872#endif
1873#ifdef Perl_isnan
1874 if (Perl_isnan(nv))
1875 return TRUE;
1876#endif
1877 return FALSE;
1878}
1879
354b74ae 1880/*
af147c81 1881=for apidoc isinfnansv
354b74ae 1882
796b6530 1883Checks whether the argument would be either an infinity or C<NaN> when used
354b74ae 1884as a number, but is careful not to trigger non-numeric or uninitialized
796b6530 1885warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already.
354b74ae
FC
1886
1887=cut
1888*/
1889
1890bool
1891Perl_isinfnansv(pTHX_ SV *sv)
1892{
1893 PERL_ARGS_ASSERT_ISINFNANSV;
1894 if (!SvOK(sv))
1895 return FALSE;
1896 if (SvNOKp(sv))
1897 return Perl_isinfnan(SvNVX(sv));
1898 if (SvIOKp(sv))
1899 return FALSE;
1900 {
1901 STRLEN len;
1902 const char *s = SvPV_nomg_const(sv, len);
3823048b 1903 return cBOOL(grok_infnan(&s, s+len));
354b74ae
FC
1904 }
1905}
1906
d67dac15 1907#ifndef HAS_MODFL
68611e6f
JH
1908/* C99 has truncl, pre-C99 Solaris had aintl. We can use either with
1909 * copysignl to emulate modfl, which is in some platforms missing or
1910 * broken. */
d67dac15
JH
1911# if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1912long double
1913Perl_my_modfl(long double x, long double *ip)
1914{
68611e6f
JH
1915 *ip = truncl(x);
1916 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
d67dac15
JH
1917}
1918# elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
55954f19
JH
1919long double
1920Perl_my_modfl(long double x, long double *ip)
1921{
68611e6f
JH
1922 *ip = aintl(x);
1923 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
55954f19 1924}
d67dac15 1925# endif
55954f19
JH
1926#endif
1927
7b9b7dff 1928/* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
55954f19
JH
1929#if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1930long double
1931Perl_my_frexpl(long double x, int *e) {
68611e6f
JH
1932 *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1933 return (scalbnl(x, -*e));
55954f19
JH
1934}
1935#endif
66610fdd
RGS
1936
1937/*
ed140128
AD
1938=for apidoc Perl_signbit
1939
1940Return a non-zero integer if the sign bit on an NV is set, and 0 if
19c1206d 1941it is not.
ed140128 1942
796b6530
KW
1943If F<Configure> detects this system has a C<signbit()> that will work with
1944our NVs, then we just use it via the C<#define> in F<perl.h>. Otherwise,
8b7fad81 1945fall back on this implementation. The main use of this function
796b6530 1946is catching C<-0.0>.
ed140128 1947
796b6530
KW
1948C<Configure> notes: This function is called C<'Perl_signbit'> instead of a
1949plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
ed140128 1950function or macro that doesn't happen to work with our particular choice
796b6530 1951of NVs. We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
ed140128 1952the standard system headers to be happy. Also, this is a no-context
796b6530
KW
1953function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1954F<perl.h> as a simple macro call to the system's C<signbit()>.
1955Users should just always call C<Perl_signbit()>.
ed140128
AD
1956
1957=cut
1958*/
1959#if !defined(HAS_SIGNBIT)
1960int
1961Perl_signbit(NV x) {
8b7fad81 1962# ifdef Perl_fp_class_nzero
406d5545
JH
1963 return Perl_fp_class_nzero(x);
1964 /* Try finding the high byte, and assume it's highest bit
1965 * is the sign. This assumption is probably wrong somewhere. */
572cd850
JH
1966# elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1967 return (((unsigned char *)&x)[9] & 0x80);
1968# elif defined(NV_LITTLE_ENDIAN)
1969 /* Note that NVSIZE is sizeof(NV), which would make the below be
1970 * wrong if the end bytes are unused, which happens with the x86
1971 * 80-bit long doubles, which is why take care of that above. */
1972 return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1973# elif defined(NV_BIG_ENDIAN)
1974 return (((unsigned char *)&x)[0] & 0x80);
1975# else
406d5545 1976 /* This last resort fallback is wrong for the negative zero. */
3585840c 1977 return (x < 0.0) ? 1 : 0;
572cd850 1978# endif
ed140128
AD
1979}
1980#endif
1981
1982/*
14d04a33 1983 * ex: set ts=8 sts=4 sw=4 et:
37442d52 1984 */