This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Unicode 13.0 is now official
[perl5.git] / numeric.c
CommitLineData
98994639
HS
1/* numeric.c
2 *
663f364b 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
1129b882 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
98994639
HS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
4ac71550
TC
12 * "That only makes eleven (plus one mislaid) and not fourteen,
13 * unless wizards count differently to other people." --Beorn
14 *
15 * [p.115 of _The Hobbit_: "Queer Lodgings"]
98994639
HS
16 */
17
ccfc67b7
JH
18/*
19=head1 Numeric functions
166f8a29 20
7fefc6c1
KW
21=cut
22
166f8a29
DM
23This file contains all the stuff needed by perl for manipulating numeric
24values, including such things as replacements for the OS's atof() function
25
ccfc67b7
JH
26*/
27
98994639
HS
28#include "EXTERN.h"
29#define PERL_IN_NUMERIC_C
30#include "perl.h"
31
9ec8aea5
KW
32#ifdef Perl_strtod
33
34PERL_STATIC_INLINE NV
35S_strtod(pTHX_ const char * const s, char ** e)
36{
37 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
38 NV result;
39
40 STORE_LC_NUMERIC_SET_TO_NEEDED();
41
42# ifdef USE_QUADMATH
43
44 result = strtoflt128(s, e);
45
46# elif defined(HAS_STRTOLD) && defined(HAS_LONG_DOUBLE) \
47 && defined(USE_LONG_DOUBLE)
48# if defined(__MINGW64_VERSION_MAJOR)
49 /***********************************************
50 We are unable to use strtold because of
51 https://sourceforge.net/p/mingw-w64/bugs/711/
52 &
53 https://sourceforge.net/p/mingw-w64/bugs/725/
54
55 but __mingw_strtold is fine.
56 ***********************************************/
57
58 result = __mingw_strtold(s, e);
59
60# else
61
62 result = strtold(s, e);
63
64# endif
65# elif defined(HAS_STRTOD)
66
67 result = strtod(s, e);
68
02fd9d54
KW
69# else
70# error No strtod() equivalent found
9ec8aea5
KW
71# endif
72
73 RESTORE_LC_NUMERIC();
74
75 return result;
76}
77
78#endif /* #ifdef Perl_strtod */
79
80/*
81
82=for apidoc my_strtod
83
84This function is equivalent to the libc strtod() function, and is available
85even on platforms that lack plain strtod(). Its return value is the best
86available precision depending on platform capabilities and F<Configure>
87options.
88
89It properly handles the locale radix character, meaning it expects a dot except
90when called from within the scope of S<C<use locale>>, in which case the radix
91character should be that specified by the current locale.
92
e7a3fd45 93The synonym Strtod() may be used instead.
9ec8aea5
KW
94
95=cut
96
97*/
98
99NV
0f3d8cd4 100Perl_my_strtod(const char * const s, char **e)
9ec8aea5
KW
101{
102 dTHX;
103
104 PERL_ARGS_ASSERT_MY_STRTOD;
105
106#ifdef Perl_strtod
107
108 return S_strtod(aTHX_ s, e);
109
110#else
111
112 {
113 NV result;
114 char ** end_ptr = NULL;
115
116 *end_ptr = my_atof2(s, &result);
117 if (e) {
118 *e = *end_ptr;
119 }
120
121 if (! *end_ptr) {
122 result = 0.0;
123 }
124
125 return result;
126 }
127
128#endif
129
130}
131
132
98994639 133U32
ddeaf645 134Perl_cast_ulong(NV f)
98994639
HS
135{
136 if (f < 0.0)
137 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
138 if (f < U32_MAX_P1) {
139#if CASTFLAGS & 2
140 if (f < U32_MAX_P1_HALF)
141 return (U32) f;
142 f -= U32_MAX_P1_HALF;
071db91b 143 return ((U32) f) | (1 + (U32_MAX >> 1));
98994639
HS
144#else
145 return (U32) f;
146#endif
147 }
148 return f > 0 ? U32_MAX : 0 /* NaN */;
149}
150
151I32
ddeaf645 152Perl_cast_i32(NV f)
98994639
HS
153{
154 if (f < I32_MAX_P1)
155 return f < I32_MIN ? I32_MIN : (I32) f;
156 if (f < U32_MAX_P1) {
157#if CASTFLAGS & 2
158 if (f < U32_MAX_P1_HALF)
159 return (I32)(U32) f;
160 f -= U32_MAX_P1_HALF;
071db91b 161 return (I32)(((U32) f) | (1 + (U32_MAX >> 1)));
98994639
HS
162#else
163 return (I32)(U32) f;
164#endif
165 }
166 return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
167}
168
169IV
ddeaf645 170Perl_cast_iv(NV f)
98994639
HS
171{
172 if (f < IV_MAX_P1)
173 return f < IV_MIN ? IV_MIN : (IV) f;
174 if (f < UV_MAX_P1) {
175#if CASTFLAGS & 2
176 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
177 if (f < UV_MAX_P1_HALF)
178 return (IV)(UV) f;
179 f -= UV_MAX_P1_HALF;
071db91b 180 return (IV)(((UV) f) | (1 + (UV_MAX >> 1)));
98994639
HS
181#else
182 return (IV)(UV) f;
183#endif
184 }
185 return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
186}
187
188UV
ddeaf645 189Perl_cast_uv(NV f)
98994639
HS
190{
191 if (f < 0.0)
192 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
193 if (f < UV_MAX_P1) {
194#if CASTFLAGS & 2
195 if (f < UV_MAX_P1_HALF)
196 return (UV) f;
197 f -= UV_MAX_P1_HALF;
071db91b 198 return ((UV) f) | (1 + (UV_MAX >> 1));
98994639
HS
199#else
200 return (UV) f;
201#endif
202 }
203 return f > 0 ? UV_MAX : 0 /* NaN */;
204}
205
53305cf1
NC
206/*
207=for apidoc grok_bin
98994639 208
53305cf1
NC
209converts a string representing a binary number to numeric form.
210
6974a337
KW
211On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
212conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
213scan stops at the end of the string, or at just before the first invalid
214character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
215encountering an invalid character (except NUL) will also trigger a warning. On
216return C<*len_p> is set to the length of the scanned string, and C<*flags>
217gives output flags.
53305cf1 218
7fc63493 219If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
796b6530
KW
220and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_bin>
221returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
6974a337
KW
222and writes an approximation of the correct value into C<*result> (which is an
223NV; or the approximation is discarded if C<result> is NULL).
53305cf1 224
796b6530 225The binary number may optionally be prefixed with C<"0b"> or C<"b"> unless
6974a337
KW
226C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
227
228If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
229digits may be separated from each other by a single underscore; also a single
230leading underscore is accepted.
53305cf1 231
ceea512d
KW
232=for apidoc Amnh||PERL_SCAN_ALLOW_UNDERSCORES
233=for apidoc Amnh||PERL_SCAN_DISALLOW_PREFIX
234=for apidoc Amnh||PERL_SCAN_GREATER_THAN_UV_MAX
235=for apidoc Amnh||PERL_SCAN_SILENT_ILLDIGIT
ceea512d 236
53305cf1 237=cut
02470786
KW
238
239Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
240which suppresses any message for non-portable numbers that are still valid
241on this platform.
53305cf1
NC
242 */
243
244UV
7918f24d
NC
245Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
246{
7918f24d
NC
247 PERL_ARGS_ASSERT_GROK_BIN;
248
bcfb98ec 249 return grok_bin(start, len_p, flags, result);
98994639
HS
250}
251
53305cf1
NC
252/*
253=for apidoc grok_hex
254
255converts a string representing a hex number to numeric form.
256
2d7f6611 257On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
6974a337
KW
258conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
259scan stops at the end of the string, or at just before the first invalid
260character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
261encountering an invalid character (except NUL) will also trigger a warning. On
262return C<*len_p> is set to the length of the scanned string, and C<*flags>
263gives output flags.
53305cf1 264
796b6530
KW
265If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
266and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_hex>
267returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
6974a337
KW
268and writes an approximation of the correct value into C<*result> (which is an
269NV; or the approximation is discarded if C<result> is NULL).
53305cf1 270
796b6530 271The hex number may optionally be prefixed with C<"0x"> or C<"x"> unless
6974a337
KW
272C<PERL_SCAN_DISALLOW_PREFIX> is set in C<*flags> on entry.
273
274If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
275digits may be separated from each other by a single underscore; also a single
276leading underscore is accepted.
53305cf1
NC
277
278=cut
02470786 279
2e046c5b 280Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
baf48926 281which suppresses any message for non-portable numbers, but which are valid
3f8c4d74 282on this platform. But, C<*flags> will have the corresponding flag bit set.
53305cf1
NC
283 */
284
285UV
7918f24d
NC
286Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
287{
bcfb98ec
KW
288 PERL_ARGS_ASSERT_GROK_HEX;
289
290 return grok_hex(start, len_p, flags, result);
291}
292
d05c9ddb
KW
293/*
294=for apidoc grok_oct
295
296converts a string representing an octal number to numeric form.
297
6974a337
KW
298On entry C<start> and C<*len_p> give the string to scan, C<*flags> gives
299conversion flags, and C<result> should be C<NULL> or a pointer to an NV. The
300scan stops at the end of the string, or at just before the first invalid
301character. Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in C<*flags>,
302encountering an invalid character (except NUL) will also trigger a warning. On
303return C<*len_p> is set to the length of the scanned string, and C<*flags>
304gives output flags.
d05c9ddb
KW
305
306If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
307and nothing is written to C<*result>. If the value is > C<UV_MAX>, C<grok_oct>
308returns C<UV_MAX>, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
6974a337
KW
309and writes an approximation of the correct value into C<*result> (which is an
310NV; or the approximation is discarded if C<result> is NULL).
311
312If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in C<*flags> then any or all pairs of
313digits may be separated from each other by a single underscore; also a single
314leading underscore is accepted.
d05c9ddb 315
6974a337
KW
316The the C<PERL_SCAN_DISALLOW_PREFIX> flag is always treated as being set for
317this function.
d05c9ddb
KW
318
319=cut
320
321Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
322which suppresses any message for non-portable numbers, but which are valid
323on this platform.
324 */
325
326UV
327Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
328{
329 PERL_ARGS_ASSERT_GROK_OCT;
330
331 return grok_oct(start, len_p, flags, result);
332}
333
c969ff22
KW
334STATIC void
335S_output_non_portable(pTHX_ const U8 base)
336{
337 /* Display the proper message for a number in the given input base not
338 * fitting in 32 bits */
339 const char * which = (base == 2)
340 ? "Binary number > 0b11111111111111111111111111111111"
341 : (base == 8)
342 ? "Octal number > 037777777777"
343 : "Hexadecimal number > 0xffffffff";
344
345 PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
346
347 /* Also there are listings for the other two. That's because, since they
348 * are the first word, it would be hard for a user to find them there
349 * starting with a %s */
350 /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
351 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
352}
353
bcfb98ec
KW
354UV
355Perl_grok_bin_oct_hex(pTHX_ const char *start,
356 STRLEN *len_p,
357 I32 *flags,
358 NV *result,
c969ff22 359 const unsigned shift, /* 1 for binary; 3 for octal;
bcfb98ec 360 4 for hex */
c969ff22
KW
361 const U8 class_bit,
362 const char prefix
363 )
364
bcfb98ec 365{
c969ff22
KW
366 const char *s0 = start;
367 const char *s;
53305cf1 368 STRLEN len = *len_p;
c969ff22 369 STRLEN bytes_so_far; /* How many real digits have been processed */
53305cf1
NC
370 UV value = 0;
371 NV value_nv = 0;
c969ff22
KW
372 const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */
373 const UV max_div= UV_MAX / base; /* Value above which, the next digit
374 processed would overflow */
4b24f703
KW
375 const I32 input_flags = *flags;
376 const bool allow_underscores =
377 cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
c969ff22 378 bool overflowed = FALSE;
cddf31e4
KW
379
380 /* In overflows, this keeps track of how much to multiply the overflowed NV
381 * by as we continue to parse the remaining digits */
efc92487 382 NV factor = 0;
c969ff22
KW
383
384 /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It
385 * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to
386 * find the numeric value of a digit. That requires more instructions than
387 * OCTAL_VALUE would, but gives the same result for the narrowed range of
388 * octal digits; same for binary. If it were ever critical to squeeze more
389 * performance from this, the function could become grok_hex, and a regen
390 * perl script could scan it and write out two edited copies for the other
391 * two functions. That would improve the performance of all three
392 * somewhat. Besides eliminating XDIGIT_VALUE for the other two, extra
393 * parameters are now passed to this to avoid conditionals. Those could
394 * become declared consts, like:
395 * const U8 base = 16;
396 * const U8 base = 8;
397 * ...
398 */
98994639 399
bcfb98ec
KW
400 PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
401
402 ASSUME(inRANGE(shift, 1, 4) && shift != 2);
7918f24d 403
4b24f703
KW
404 /* Clear output flags; unlikely to find a problem that sets them */
405 *flags = 0;
406
c969ff22 407 if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
bcfb98ec
KW
408
409 /* strip off leading b or 0b; x or 0x.
410 for compatibility silently suffer "b" and "0b" as valid binary; "x"
411 and "0x" as valid hex numbers. */
a4c04bdc 412 if (len >= 1) {
c969ff22
KW
413 if (isALPHA_FOLD_EQ(s0[0], prefix)) {
414 s0++;
a4c04bdc
NC
415 len--;
416 }
c969ff22
KW
417 else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
418 s0+=2;
a4c04bdc
NC
419 len-=2;
420 }
421 }
98994639
HS
422 }
423
c969ff22
KW
424 s = s0; /* s0 potentially advanced from 'start' */
425
b2fffc9a
KW
426 /* Unroll the loop so that the first 8 digits are branchless except for the
427 * switch. A ninth one overflows a 32 bit word. */
c969ff22
KW
428 switch (len) {
429 case 0:
430 return 0;
431 default:
c3ee6e27 432 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
433 value = (value << shift) | XDIGIT_VALUE(*s);
434 s++;
435 /* FALLTHROUGH */
b2fffc9a 436 case 7:
c3ee6e27 437 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
b2fffc9a
KW
438 value = (value << shift) | XDIGIT_VALUE(*s);
439 s++;
440 /* FALLTHROUGH */
c969ff22 441 case 6:
c3ee6e27 442 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
443 value = (value << shift) | XDIGIT_VALUE(*s);
444 s++;
445 /* FALLTHROUGH */
446 case 5:
c3ee6e27 447 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
448 value = (value << shift) | XDIGIT_VALUE(*s);
449 s++;
450 /* FALLTHROUGH */
451 case 4:
c3ee6e27 452 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
453 value = (value << shift) | XDIGIT_VALUE(*s);
454 s++;
455 /* FALLTHROUGH */
456 case 3:
c3ee6e27 457 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
458 value = (value << shift) | XDIGIT_VALUE(*s);
459 s++;
460 /* FALLTHROUGH */
461 case 2:
c3ee6e27 462 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
463 value = (value << shift) | XDIGIT_VALUE(*s);
464 s++;
465 /* FALLTHROUGH */
466 case 1:
c3ee6e27 467 if (UNLIKELY(! _generic_isCC(*s, class_bit))) break;
c969ff22
KW
468 value = (value << shift) | XDIGIT_VALUE(*s);
469
b2fffc9a 470 if (LIKELY(len <= 8)) {
c969ff22
KW
471 return value;
472 }
473
474 s++;
475 break;
476 }
477
478 bytes_so_far = s - s0;
479 factor = shift << bytes_so_far;
480 len -= bytes_so_far;
481
482 for (; len--; s++) {
bcfb98ec 483 if (_generic_isCC(*s, class_bit)) {
53305cf1
NC
484 /* Write it in this wonky order with a goto to attempt to get the
485 compiler to make the common case integer-only loop pretty tight.
35e5392d
KW
486 With gcc seems to be much straighter code than old scan_hex.
487 (khw suspects that adding a LIKELY() just above would do the
488 same thing) */
53305cf1 489 redo:
ebf83045
KW
490 if (LIKELY(value <= max_div)) {
491 value = (value << shift) | XDIGIT_VALUE(*s);
492 /* Note XDIGIT_VALUE() is branchless, works on binary
493 * and octal as well, so can be used here, without
494 * slowing those down */
efc92487 495 factor *= 1 << shift;
ebf83045
KW
496 continue;
497 }
cddf31e4
KW
498
499 /* Bah. We are about to overflow. Instead, add the unoverflowed
500 * value to an NV that contains an approximation to the correct
501 * value. Each time through the loop we have increased 'factor' so
502 * that it gives how much the current approximation needs to
503 * effectively be shifted to make room for this new value */
efc92487 504 value_nv *= factor;
cddf31e4
KW
505 value_nv += (NV) value;
506
507 /* Then we keep accumulating digits, until all are parsed. We
508 * start over using the current input value. This will be added to
509 * 'value_nv' eventually, either when all digits are gone, or we
510 * have overflowed this fresh start. */
511 value = XDIGIT_VALUE(*s);
512 factor = 1 << shift;
513
c969ff22
KW
514 if (! overflowed) {
515 overflowed = TRUE;
99a25d63
KW
516 if ( ! (input_flags & PERL_SCAN_SILENT_OVERFLOW)
517 && ckWARN_d(WARN_OVERFLOW))
518 {
519 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
bcfb98ec
KW
520 "Integer overflow in %s number",
521 (base == 16) ? "hexadecimal"
522 : (base == 2)
523 ? "binary"
524 : "octal");
99a25d63 525 }
53305cf1 526 }
53305cf1
NC
527 continue;
528 }
ebf83045 529
bcfb98ec
KW
530 if ( *s == '_'
531 && len
532 && allow_underscores
99a25d63
KW
533 && _generic_isCC(s[1], class_bit)
534
535 /* Don't allow a leading underscore if the only-medial bit is
536 * set */
537 && ( LIKELY(s > s0)
538 || UNLIKELY((input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)
539 != PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES)))
bcfb98ec
KW
540 {
541 --len;
542 ++s;
543 goto redo;
544 }
ebf83045 545
3f8c4d74
KW
546 if (*s) {
547 if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
17d6187b
KW
548 && ckWARN(WARN_DIGIT))
549 {
550 if (base != 8) {
551 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
552 "Illegal %s digit '%c' ignored",
553 ((base == 2)
554 ? "binary"
555 : "hexadecimal"),
556 *s);
557 }
558 else if (isDIGIT(*s)) { /* octal base */
559
560 /* Allow \octal to work the DWIM way (that is, stop
561 * scanning as soon as non-octal characters are seen,
562 * complain only if someone seems to want to use the digits
563 * eight and nine. Since we know it is not octal, then if
564 * isDIGIT, must be an 8 or 9). */
565 Perl_warner(aTHX_ packWARN(WARN_DIGIT),
bcfb98ec 566 "Illegal octal digit '%c' ignored", *s);
3f8c4d74
KW
567 }
568 }
569
570 if (input_flags & PERL_SCAN_NOTIFY_ILLDIGIT) {
571 *flags |= PERL_SCAN_NOTIFY_ILLDIGIT;
bcfb98ec
KW
572 }
573 }
ebf83045 574
53305cf1
NC
575 break;
576 }
19c1206d 577
c969ff22 578 *len_p = s - start;
cddf31e4 579
c969ff22 580 if (LIKELY(! overflowed)) {
53305cf1 581#if UVSIZE > 4
c969ff22 582 if ( UNLIKELY(value > 0xffffffff)
4b24f703 583 && ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
c969ff22
KW
584 {
585 output_non_portable(base);
3f8c4d74 586 *flags |= PERL_SCAN_SILENT_NON_PORTABLE;
c969ff22 587 }
53305cf1 588#endif
c969ff22 589 return value;
53305cf1 590 }
bcfb98ec 591
c969ff22 592 /* Overflowed: Calculate the final overflow approximation */
efc92487 593 value_nv *= factor;
c969ff22 594 value_nv += (NV) value;
cddf31e4 595
c969ff22 596 output_non_portable(base);
ebf83045 597
3f8c4d74
KW
598 *flags |= PERL_SCAN_GREATER_THAN_UV_MAX
599 | PERL_SCAN_SILENT_NON_PORTABLE;
53305cf1
NC
600 if (result)
601 *result = value_nv;
602 return UV_MAX;
603}
604
605/*
53305cf1
NC
606=for apidoc scan_bin
607
72d33970 608For backwards compatibility. Use C<grok_bin> instead.
53305cf1
NC
609
610=for apidoc scan_hex
611
72d33970 612For backwards compatibility. Use C<grok_hex> instead.
53305cf1
NC
613
614=for apidoc scan_oct
615
72d33970 616For backwards compatibility. Use C<grok_oct> instead.
53305cf1
NC
617
618=cut
619 */
620
621NV
73d840c0 622Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
623{
624 NV rnv;
625 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 626 const UV ruv = grok_bin (start, &len, &flags, &rnv);
53305cf1 627
7918f24d
NC
628 PERL_ARGS_ASSERT_SCAN_BIN;
629
53305cf1
NC
630 *retlen = len;
631 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
632}
633
634NV
73d840c0 635Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
636{
637 NV rnv;
638 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 639 const UV ruv = grok_oct (start, &len, &flags, &rnv);
53305cf1 640
7918f24d
NC
641 PERL_ARGS_ASSERT_SCAN_OCT;
642
53305cf1
NC
643 *retlen = len;
644 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
645}
646
647NV
73d840c0 648Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
649{
650 NV rnv;
651 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 652 const UV ruv = grok_hex (start, &len, &flags, &rnv);
53305cf1 653
7918f24d
NC
654 PERL_ARGS_ASSERT_SCAN_HEX;
655
53305cf1
NC
656 *retlen = len;
657 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
98994639
HS
658}
659
660/*
661=for apidoc grok_numeric_radix
662
663Scan and skip for a numeric decimal separator (radix).
664
665=cut
666 */
667bool
668Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
669{
7918f24d
NC
670 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
671
7ea85fa8
KW
672#ifdef USE_LOCALE_NUMERIC
673
d6ded950 674 if (IN_LC(LC_NUMERIC)) {
f0dafd73
KW
675 STRLEN len;
676 char * radix;
677 bool matches_radix = FALSE;
67d796ae 678 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
f0dafd73 679
a1395eaf 680 STORE_LC_NUMERIC_FORCE_TO_UNDERLYING();
f0dafd73
KW
681
682 radix = SvPV(PL_numeric_radix_sv, len);
683 radix = savepvn(radix, len);
684
21431899 685 RESTORE_LC_NUMERIC();
f0dafd73
KW
686
687 if (*sp + len <= send) {
688 matches_radix = memEQ(*sp, radix, len);
689 }
690
691 Safefree(radix);
692
693 if (matches_radix) {
694 *sp += len;
695 return TRUE;
696 }
98994639 697 }
f0dafd73 698
98994639 699#endif
7918f24d 700
f0dafd73
KW
701 /* always try "." if numeric radix didn't match because
702 * we may have data from different locales mixed */
98994639
HS
703 if (*sp < send && **sp == '.') {
704 ++*sp;
705 return TRUE;
706 }
f0dafd73 707
98994639
HS
708 return FALSE;
709}
710
569f27e5 711/*
ff4eb398
JH
712=for apidoc grok_infnan
713
796b6530 714Helper for C<grok_number()>, accepts various ways of spelling "infinity"
ff4eb398
JH
715or "not a number", and returns one of the following flag combinations:
716
5962c2f6 717 IS_NUMBER_INFINITY
ff4eb398 718 IS_NUMBER_NAN
5962c2f6 719 IS_NUMBER_INFINITY | IS_NUMBER_NEG
ff4eb398
JH
720 IS_NUMBER_NAN | IS_NUMBER_NEG
721 0
722
796b6530 723possibly |-ed with C<IS_NUMBER_TRAILING>.
b489e20f 724
796b6530 725If an infinity or a not-a-number is recognized, C<*sp> will point to
62bdc035 726one byte past the end of the recognized string. If the recognition fails,
796b6530 727zero is returned, and C<*sp> will not move.
ff4eb398 728
ceea512d
KW
729=for apidoc Amn|bool|IS_NUMBER_GREATER_THAN_UV_MAX
730=for apidoc Amn|bool|IS_NUMBER_INFINITY
731=for apidoc Amn|bool|IS_NUMBER_IN_UV
732=for apidoc Amn|bool|IS_NUMBER_NAN
733=for apidoc Amn|bool|IS_NUMBER_NEG
734=for apidoc Amn|bool|IS_NUMBER_NOT_INT
735
ff4eb398
JH
736=cut
737*/
738
739int
3823048b 740Perl_grok_infnan(pTHX_ const char** sp, const char* send)
ff4eb398
JH
741{
742 const char* s = *sp;
743 int flags = 0;
a5dc2484 744#if defined(NV_INF) || defined(NV_NAN)
62bdc035 745 bool odh = FALSE; /* one-dot-hash: 1.#INF */
ff4eb398
JH
746
747 PERL_ARGS_ASSERT_GROK_INFNAN;
748
8c12dc63
JH
749 if (*s == '+') {
750 s++; if (s == send) return 0;
751 }
752 else if (*s == '-') {
ff4eb398
JH
753 flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
754 s++; if (s == send) return 0;
755 }
756
757 if (*s == '1') {
62bdc035
JH
758 /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1.#IND (maybe also 1.#NAN)
759 * Let's keep the dot optional. */
ff4eb398
JH
760 s++; if (s == send) return 0;
761 if (*s == '.') {
762 s++; if (s == send) return 0;
763 }
764 if (*s == '#') {
765 s++; if (s == send) return 0;
766 } else
767 return 0;
e855f543 768 odh = TRUE;
ff4eb398
JH
769 }
770
305b8651 771 if (isALPHA_FOLD_EQ(*s, 'I')) {
62bdc035
JH
772 /* INF or IND (1.#IND is "indeterminate", a certain type of NAN) */
773
305b8651 774 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
ff4eb398 775 s++; if (s == send) return 0;
305b8651 776 if (isALPHA_FOLD_EQ(*s, 'F')) {
ff4eb398 777 s++;
b8974fcb
JH
778 if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
779 int fail =
780 flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
781 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
782 s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
783 s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
784 s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
3396ed30 785 s++;
b8974fcb
JH
786 } else if (odh) {
787 while (*s == '0') { /* 1.#INF00 */
788 s++;
789 }
3396ed30 790 }
b489e20f
JH
791 while (s < send && isSPACE(*s))
792 s++;
793 if (s < send && *s) {
3396ed30 794 flags |= IS_NUMBER_TRAILING;
fae4db12 795 }
ff4eb398
JH
796 flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
797 }
e855f543 798 else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
ff4eb398
JH
799 s++;
800 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
fae4db12
JH
801 while (*s == '0') { /* 1.#IND00 */
802 s++;
803 }
1e9aa12f
JH
804 if (*s) {
805 flags |= IS_NUMBER_TRAILING;
806 }
ff4eb398
JH
807 } else
808 return 0;
ff4eb398
JH
809 }
810 else {
62bdc035 811 /* Maybe NAN of some sort */
3823048b
JH
812
813 if (isALPHA_FOLD_EQ(*s, 'S') || isALPHA_FOLD_EQ(*s, 'Q')) {
814 /* snan, qNaN */
815 /* XXX do something with the snan/qnan difference */
816 s++; if (s == send) return 0;
817 }
818
819 if (isALPHA_FOLD_EQ(*s, 'N')) {
820 s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
821 s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
822 s++;
823
824 flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
dca9f615
KW
825 if (s == send) {
826 return flags;
827 }
3823048b
JH
828
829 /* NaN can be followed by various stuff (NaNQ, NaNS), but
830 * there are also multiple different NaN values, and some
831 * implementations output the "payload" values,
832 * e.g. NaN123, NAN(abc), while some legacy implementations
833 * have weird stuff like NaN%. */
834 if (isALPHA_FOLD_EQ(*s, 'q') ||
835 isALPHA_FOLD_EQ(*s, 's')) {
836 /* "nanq" or "nans" are ok, though generating
837 * these portably is tricky. */
838 s++;
81d11450
KW
839 if (s == send) {
840 return flags;
841 }
3823048b
JH
842 }
843 if (*s == '(') {
844 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
845 const char *t;
846 s++;
847 if (s == send) {
848 return flags | IS_NUMBER_TRAILING;
849 }
850 t = s + 1;
851 while (t < send && *t && *t != ')') {
852 t++;
853 }
854 if (t == send) {
855 return flags | IS_NUMBER_TRAILING;
856 }
857 if (*t == ')') {
858 int nantype;
859 UV nanval;
860 if (s[0] == '0' && s + 2 < t &&
861 isALPHA_FOLD_EQ(s[1], 'x') &&
862 isXDIGIT(s[2])) {
863 STRLEN len = t - s;
864 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
865 nanval = grok_hex(s, &len, &flags, NULL);
866 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
867 nantype = 0;
868 } else {
869 nantype = IS_NUMBER_IN_UV;
870 }
871 s += len;
872 } else if (s[0] == '0' && s + 2 < t &&
873 isALPHA_FOLD_EQ(s[1], 'b') &&
874 (s[2] == '0' || s[2] == '1')) {
875 STRLEN len = t - s;
876 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
877 nanval = grok_bin(s, &len, &flags, NULL);
878 if ((flags & PERL_SCAN_GREATER_THAN_UV_MAX)) {
879 nantype = 0;
880 } else {
881 nantype = IS_NUMBER_IN_UV;
882 }
883 s += len;
884 } else {
885 const char *u;
886 nantype =
887 grok_number_flags(s, t - s, &nanval,
888 PERL_SCAN_TRAILING |
889 PERL_SCAN_ALLOW_UNDERSCORES);
890 /* Unfortunately grok_number_flags() doesn't
891 * tell how far we got and the ')' will always
892 * be "trailing", so we need to double-check
893 * whether we had something dubious. */
894 for (u = s; u < t; u++) {
895 if (!isDIGIT(*u)) {
896 flags |= IS_NUMBER_TRAILING;
897 break;
898 }
899 }
900 s = u;
901 }
902
903 /* XXX Doesn't do octal: nan("0123").
904 * Probably not a big loss. */
905
906 if ((nantype & IS_NUMBER_NOT_INT) ||
907 !(nantype && IS_NUMBER_IN_UV)) {
908 /* XXX the nanval is currently unused, that is,
909 * not inserted as the NaN payload of the NV.
910 * But the above code already parses the C99
911 * nan(...) format. See below, and see also
912 * the nan() in POSIX.xs.
913 *
914 * Certain configuration combinations where
915 * NVSIZE is greater than UVSIZE mean that
916 * a single UV cannot contain all the possible
917 * NaN payload bits. There would need to be
918 * some more generic syntax than "nan($uv)".
919 *
920 * Issues to keep in mind:
921 *
922 * (1) In most common cases there would
923 * not be an integral number of bytes that
924 * could be set, only a certain number of bits.
925 * For example for the common case of
926 * NVSIZE == UVSIZE == 8 there is room for 52
927 * bits in the payload, but the most significant
928 * bit is commonly reserved for the
929 * signaling/quiet bit, leaving 51 bits.
930 * Furthermore, the C99 nan() is supposed
931 * to generate quiet NaNs, so it is doubtful
932 * whether it should be able to generate
933 * signaling NaNs. For the x86 80-bit doubles
934 * (if building a long double Perl) there would
935 * be 62 bits (s/q bit being the 63rd).
936 *
937 * (2) Endianness of the payload bits. If the
938 * payload is specified as an UV, the low-order
939 * bits of the UV are naturally little-endianed
940 * (rightmost) bits of the payload. The endianness
941 * of UVs and NVs can be different. */
942 return 0;
943 }
944 if (s < t) {
945 flags |= IS_NUMBER_TRAILING;
946 }
947 } else {
948 /* Looked like nan(...), but no close paren. */
949 flags |= IS_NUMBER_TRAILING;
950 }
951 } else {
952 while (s < send && isSPACE(*s))
953 s++;
954 if (s < send && *s) {
955 /* Note that we here implicitly accept (parse as
956 * "nan", but with warnings) also any other weird
957 * trailing stuff for "nan". In the above we just
958 * check that if we got the C99-style "nan(...)",
959 * the "..." looks sane.
960 * If in future we accept more ways of specifying
961 * the nan payload, the accepting would happen around
962 * here. */
963 flags |= IS_NUMBER_TRAILING;
964 }
965 }
966 s = send;
967 }
968 else
969 return 0;
ff4eb398
JH
970 }
971
b489e20f
JH
972 while (s < send && isSPACE(*s))
973 s++;
974
a5dc2484
JH
975#else
976 PERL_UNUSED_ARG(send);
977#endif /* #if defined(NV_INF) || defined(NV_NAN) */
a1fe7cea
JH
978 *sp = s;
979 return flags;
ff4eb398
JH
980}
981
13393a5e 982/*
3823048b 983=for apidoc grok_number_flags
13393a5e
JH
984
985Recognise (or not) a number. The type of the number is returned
986(0 if unrecognised), otherwise it is a bit-ORed combination of
796b6530
KW
987C<IS_NUMBER_IN_UV>, C<IS_NUMBER_GREATER_THAN_UV_MAX>, C<IS_NUMBER_NOT_INT>,
988C<IS_NUMBER_NEG>, C<IS_NUMBER_INFINITY>, C<IS_NUMBER_NAN> (defined in perl.h).
989
990If the value of the number can fit in a UV, it is returned in C<*valuep>.
991C<IS_NUMBER_IN_UV> will be set to indicate that C<*valuep> is valid, C<IS_NUMBER_IN_UV>
992will never be set unless C<*valuep> is valid, but C<*valuep> may have been assigned
993to during processing even though C<IS_NUMBER_IN_UV> is not set on return.
994If C<valuep> is C<NULL>, C<IS_NUMBER_IN_UV> will be set for the same cases as when
995C<valuep> is non-C<NULL>, but no actual assignment (or SEGV) will occur.
996
997C<IS_NUMBER_NOT_INT> will be set with C<IS_NUMBER_IN_UV> if trailing decimals were
998seen (in which case C<*valuep> gives the true value truncated to an integer), and
999C<IS_NUMBER_NEG> if the number is negative (in which case C<*valuep> holds the
1000absolute value). C<IS_NUMBER_IN_UV> is not set if e notation was used or the
13393a5e
JH
1001number is larger than a UV.
1002
1003C<flags> allows only C<PERL_SCAN_TRAILING>, which allows for trailing
1004non-numeric text on an otherwise successful I<grok>, setting
1005C<IS_NUMBER_TRAILING> on the result.
1006
0b929024
KW
1007=for apidoc Amnh||PERL_SCAN_TRAILING
1008
13393a5e
JH
1009=for apidoc grok_number
1010
796b6530 1011Identical to C<grok_number_flags()> with C<flags> set to zero.
13393a5e
JH
1012
1013=cut
1014 */
1015int
1016Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
1017{
1018 PERL_ARGS_ASSERT_GROK_NUMBER;
1019
1020 return grok_number_flags(pv, len, valuep, 0);
1021}
1022
945b524a
JH
1023static const UV uv_max_div_10 = UV_MAX / 10;
1024static const U8 uv_max_mod_10 = UV_MAX % 10;
1025
3f7602fa 1026int
3823048b 1027Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
3f7602fa 1028{
60939fb8 1029 const char *s = pv;
c4420975 1030 const char * const send = pv + len;
ae776a2c 1031 const char *d;
60939fb8 1032 int numtype = 0;
60939fb8 1033
3823048b 1034 PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS;
7918f24d 1035
129ccace
KW
1036 if (UNLIKELY(isSPACE(*s))) {
1037 s++;
1038 while (s < send) {
1039 if (LIKELY(! isSPACE(*s))) goto non_space;
1040 s++;
1041 }
1042 return 0;
1043 non_space: ;
60939fb8 1044 }
60939fb8 1045
129ccace
KW
1046 /* See if signed. This assumes it is more likely to be unsigned, so
1047 * penalizes signed by an extra conditional; rewarding unsigned by one fewer
1048 * (because we detect '+' and '-' with a single test and then add a
1049 * conditional to determine which) */
1050 if (UNLIKELY((*s & ~('+' ^ '-')) == ('+' & '-') )) {
1051
1052 /* Here, on ASCII platforms, *s is one of: 0x29 = ')', 2B = '+', 2D = '-',
1053 * 2F = '/'. That is, it is either a sign, or a character that doesn't
1054 * belong in a number at all (unless it's a radix character in a weird
1055 * locale). Given this, it's far more likely to be a minus than the
1056 * others. (On EBCDIC it is one of 42, 44, 46, 48, 4A, 4C, 4E, (not 40
17d6187b
KW
1057 * because can't be a space) 60, 62, 64, 66, 68, 6A, 6C, 6E. Again,
1058 * only potentially a weird radix character, or 4E='+', or 60='-') */
129ccace
KW
1059 if (LIKELY(*s == '-')) {
1060 s++;
1061 numtype = IS_NUMBER_NEG;
1062 }
1063 else if (LIKELY(*s == '+'))
1064 s++;
1065 else /* Can't just return failure here, as it could be a weird radix
1066 character */
1067 goto done_sign;
1068
1069 if (UNLIKELY(s == send))
1070 return 0;
1071 done_sign: ;
1072 }
60939fb8 1073
ae776a2c 1074 /* The first digit (after optional sign): note that might
8c12dc63 1075 * also point to "infinity" or "nan", or "1.#INF". */
ae776a2c
JH
1076 d = s;
1077
8c12dc63 1078 /* next must be digit or the radix separator or beginning of infinity/nan */
129ccace 1079 if (LIKELY(isDIGIT(*s))) {
60939fb8
NC
1080 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
1081 overflow. */
129ccace
KW
1082 UV value = *s - '0'; /* Process this first (perhaps only) digit */
1083 int digit;
1084
1085 s++;
1086
1087 switch(send - s) {
1088 default: /* 8 or more remaining characters */
1089 digit = *s - '0';
1090 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1091 value = value * 10 + digit;
1092 s++;
1093 /* FALLTHROUGH */
1094 case 7:
1095 digit = *s - '0';
1096 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1097 value = value * 10 + digit;
1098 s++;
1099 /* FALLTHROUGH */
1100 case 6:
1101 digit = *s - '0';
1102 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1103 value = value * 10 + digit;
1104 s++;
1105 /* FALLTHROUGH */
1106 case 5:
1107 digit = *s - '0';
1108 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1109 value = value * 10 + digit;
1110 s++;
1111 /* FALLTHROUGH */
1112 case 4:
1113 digit = *s - '0';
1114 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1115 value = value * 10 + digit;
1116 s++;
1117 /* FALLTHROUGH */
1118 case 3:
1119 digit = *s - '0';
1120 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1121 value = value * 10 + digit;
1122 s++;
1123 /* FALLTHROUGH */
1124 case 2:
1125 digit = *s - '0';
1126 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
1127 value = value * 10 + digit;
1128 s++;
1129 /* FALLTHROUGH */
1130 case 1:
1131 digit = *s - '0';
1132 if (UNLIKELY(! inRANGE(digit, 0, 9))) break;
60939fb8 1133 value = value * 10 + digit;
129ccace
KW
1134 s++;
1135 /* FALLTHROUGH */
1136 case 0: /* This case means the string consists of just the one
1137 digit we already have processed */
1138
1139 /* If we got here by falling through other than the default: case, we
1140 * have processed the whole string, and know it consists entirely of
1141 * digits, and can't have overflowed. */
1142 if (s >= send) {
1143 if (valuep)
1144 *valuep = value;
1145 return numtype|IS_NUMBER_IN_UV;
1146 }
1147
1148 /* Here, there are extra characters beyond the first 9 digits. Use a
1149 * loop to accumulate any remaining digits, until we get a non-digit or
1150 * would overflow. Note that leading zeros could cause us to get here
1151 * without being close to overflowing.
1152 *
1153 * (The conditional 's >= send' above could be eliminated by making the
1154 * default: in the switch to instead be 'case 8:', and process longer
1155 * strings separately by using the loop below. This would penalize
1156 * these inputs by the extra instructions needed for looping. That
1157 * could be eliminated by copying the unwound code from above to handle
1158 * the firt 9 digits of these. khw didn't think this saving of a
1159 * single conditional was worth it.) */
1160 do {
1161 digit = *s - '0';
1162 if (! inRANGE(digit, 0, 9)) goto mantissa_done;
1163 if ( value < uv_max_div_10
1164 || ( value == uv_max_div_10
1165 && digit <= uv_max_mod_10))
1166 {
60939fb8 1167 value = value * 10 + digit;
129ccace 1168 s++;
60939fb8 1169 }
129ccace
KW
1170 else { /* value would overflow. skip the remaining digits, don't
1171 worry about setting *valuep. */
1172 do {
1173 s++;
1174 } while (s < send && isDIGIT(*s));
1175 numtype |=
1176 IS_NUMBER_GREATER_THAN_UV_MAX;
1177 goto skip_value;
1178 }
1179 } while (s < send);
1180 } /* End switch on input length */
1181
1182 mantissa_done:
60939fb8
NC
1183 numtype |= IS_NUMBER_IN_UV;
1184 if (valuep)
1185 *valuep = value;
1186
1187 skip_value:
1188 if (GROK_NUMERIC_RADIX(&s, send)) {
1189 numtype |= IS_NUMBER_NOT_INT;
1190 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
1191 s++;
98994639 1192 }
129ccace 1193 } /* End of *s is a digit */
60939fb8
NC
1194 else if (GROK_NUMERIC_RADIX(&s, send)) {
1195 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
1196 /* no digits before the radix means we need digits after it */
1197 if (s < send && isDIGIT(*s)) {
1198 do {
1199 s++;
1200 } while (s < send && isDIGIT(*s));
1201 if (valuep) {
1202 /* integer approximation is valid - it's 0. */
1203 *valuep = 0;
1204 }
98994639 1205 }
60939fb8 1206 else
ae776a2c 1207 return 0;
ff4eb398 1208 }
60939fb8 1209
129ccace 1210 if (LIKELY(s > d) && s < send) {
60939fb8 1211 /* we can have an optional exponent part */
129ccace 1212 if (UNLIKELY(isALPHA_FOLD_EQ(*s, 'e'))) {
60939fb8
NC
1213 s++;
1214 if (s < send && (*s == '-' || *s == '+'))
1215 s++;
1216 if (s < send && isDIGIT(*s)) {
1217 do {
1218 s++;
1219 } while (s < send && isDIGIT(*s));
1220 }
3f7602fa
TC
1221 else if (flags & PERL_SCAN_TRAILING)
1222 return numtype | IS_NUMBER_TRAILING;
60939fb8 1223 else
3f7602fa
TC
1224 return 0;
1225
1226 /* The only flag we keep is sign. Blow away any "it's UV" */
1227 numtype &= IS_NUMBER_NEG;
1228 numtype |= IS_NUMBER_NOT_INT;
60939fb8
NC
1229 }
1230 }
129ccace
KW
1231
1232 while (s < send) {
1233 if (LIKELY(! isSPACE(*s))) goto end_space;
60939fb8 1234 s++;
129ccace
KW
1235 }
1236 return numtype;
1237
1238 end_space:
1239
1240 if (UNLIKELY(memEQs(pv, len, "0 but true"))) {
60939fb8
NC
1241 if (valuep)
1242 *valuep = 0;
1243 return IS_NUMBER_IN_UV;
1244 }
129ccace 1245
8c12dc63 1246 /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */
129ccace 1247 if ((s + 2 < send) && UNLIKELY(memCHRs("inqs#", toFOLD(*s)))) {
8c12dc63
JH
1248 /* Really detect inf/nan. Start at d, not s, since the above
1249 * code might have already consumed the "1." or "1". */
7eff3d39 1250 const int infnan = Perl_grok_infnan(aTHX_ &d, send);
8c12dc63
JH
1251 if ((infnan & IS_NUMBER_INFINITY)) {
1252 return (numtype | infnan); /* Keep sign for infinity. */
1253 }
1254 else if ((infnan & IS_NUMBER_NAN)) {
1255 return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */
1256 }
1257 }
3f7602fa
TC
1258 else if (flags & PERL_SCAN_TRAILING) {
1259 return numtype | IS_NUMBER_TRAILING;
1260 }
1261
60939fb8 1262 return 0;
98994639
HS
1263}
1264
6313e544 1265/*
5d4a52b5 1266=for apidoc grok_atoUV
6313e544 1267
5d4a52b5 1268parse a string, looking for a decimal unsigned integer.
338aa8b0 1269
5d4a52b5
KW
1270On entry, C<pv> points to the beginning of the string;
1271C<valptr> points to a UV that will receive the converted value, if found;
1272C<endptr> is either NULL or points to a variable that points to one byte
1273beyond the point in C<pv> that this routine should examine.
1274If C<endptr> is NULL, C<pv> is assumed to be NUL-terminated.
f4379102 1275
5d4a52b5
KW
1276Returns FALSE if C<pv> doesn't represent a valid unsigned integer value (with
1277no leading zeros). Otherwise it returns TRUE, and sets C<*valptr> to that
1278value.
6313e544 1279
5d4a52b5
KW
1280If you constrain the portion of C<pv> that is looked at by this function (by
1281passing a non-NULL C<endptr>), and if the intial bytes of that portion form a
1282valid value, it will return TRUE, setting C<*endptr> to the byte following the
1283final digit of the value. But if there is no constraint at what's looked at,
1284all of C<pv> must be valid in order for TRUE to be returned.
6313e544 1285
5d4a52b5 1286The only characters this accepts are the decimal digits '0'..'9'.
338aa8b0 1287
5d4a52b5
KW
1288As opposed to L<atoi(3)> or L<strtol(3)>, C<grok_atoUV> does NOT allow optional
1289leading whitespace, nor negative inputs. If such features are required, the
1290calling code needs to explicitly implement those.
6313e544 1291
5d4a52b5
KW
1292Note that this function returns FALSE for inputs that would overflow a UV,
1293or have leading zeros. Thus a single C<0> is accepted, but not C<00> nor
1294C<01>, C<002>, I<etc>.
1295
1296Background: C<atoi> has severe problems with illegal inputs, it cannot be
d62b8c6a 1297used for incremental parsing, and therefore should be avoided
5d4a52b5 1298C<atoi> and C<strtol> are also affected by locale settings, which can also be
d62b8c6a
JH
1299seen as a bug (global state controlled by user environment).
1300
238217e5
JK
1301=cut
1302
6313e544
JH
1303*/
1304
22ff3130
HS
1305bool
1306Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
6313e544
JH
1307{
1308 const char* s = pv;
1309 const char** eptr;
1310 const char* end2; /* Used in case endptr is NULL. */
22ff3130 1311 UV val = 0; /* The parsed value. */
6313e544 1312
22ff3130 1313 PERL_ARGS_ASSERT_GROK_ATOUV;
6313e544 1314
5d4a52b5
KW
1315 if (endptr) {
1316 eptr = endptr;
1317 }
1318 else {
1319 end2 = s + strlen(s);
1320 eptr = &end2;
1321 }
1322
1323 if ( *eptr <= s
1324 || ! isDIGIT(*s))
1325 {
1326 return FALSE;
1327 }
1328
97d95d46
KW
1329 /* Single-digit inputs are quite common. */
1330 val = *s++ - '0';
1331 if (s < *eptr && isDIGIT(*s)) {
1332 /* Fail on extra leading zeros. */
1333 if (val == 0)
1334 return FALSE;
1335 while (s < *eptr && isDIGIT(*s)) {
1336 /* This could be unrolled like in grok_number(), but
1337 * the expected uses of this are not speed-needy, and
1338 * unlikely to need full 64-bitness. */
1339 const U8 digit = *s++ - '0';
1340 if (val < uv_max_div_10 ||
1341 (val == uv_max_div_10 && digit <= uv_max_mod_10)) {
1342 val = val * 10 + digit;
1343 } else {
22ff3130 1344 return FALSE;
6313e544
JH
1345 }
1346 }
97d95d46
KW
1347 }
1348
5d4a52b5
KW
1349 if (endptr == NULL) {
1350 if (*s) {
1351 return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
1352 }
1353 }
1354 else {
1355 *endptr = s;
75feedba 1356 }
97d95d46 1357
22ff3130
HS
1358 *valptr = val;
1359 return TRUE;
6313e544
JH
1360}
1361
ce6f496d 1362#ifndef Perl_strtod
4801ca72 1363STATIC NV
98994639
HS
1364S_mulexp10(NV value, I32 exponent)
1365{
1366 NV result = 1.0;
1367 NV power = 10.0;
1368 bool negative = 0;
1369 I32 bit;
1370
1371 if (exponent == 0)
1372 return value;
659c4b96
DM
1373 if (value == 0)
1374 return (NV)0;
87032ba1 1375
24866caa 1376 /* On OpenVMS VAX we by default use the D_FLOAT double format,
67597c89 1377 * and that format does not have *easy* capabilities [1] for
19c1206d
KW
1378 * overflowing doubles 'silently' as IEEE fp does. We also need
1379 * to support G_FLOAT on both VAX and Alpha, and though the exponent
1380 * range is much larger than D_FLOAT it still doesn't do silent
1381 * overflow. Therefore we need to detect early whether we would
1382 * overflow (this is the behaviour of the native string-to-float
24866caa 1383 * conversion routines, and therefore of native applications, too).
67597c89 1384 *
24866caa
CB
1385 * [1] Trying to establish a condition handler to trap floating point
1386 * exceptions is not a good idea. */
87032ba1
JH
1387
1388 /* In UNICOS and in certain Cray models (such as T90) there is no
1389 * IEEE fp, and no way at all from C to catch fp overflows gracefully.
1390 * There is something you can do if you are willing to use some
1391 * inline assembler: the instruction is called DFI-- but that will
1392 * disable *all* floating point interrupts, a little bit too large
1393 * a hammer. Therefore we need to catch potential overflows before
1394 * it's too late. */
353813d9 1395
a7157111 1396#if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS) || defined(DOUBLE_IS_VAX_FLOAT)) && defined(NV_MAX_10_EXP)
353813d9 1397 STMT_START {
c4420975 1398 const NV exp_v = log10(value);
353813d9
HS
1399 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
1400 return NV_MAX;
1401 if (exponent < 0) {
1402 if (-(exponent + exp_v) >= NV_MAX_10_EXP)
1403 return 0.0;
1404 while (-exponent >= NV_MAX_10_EXP) {
1405 /* combination does not overflow, but 10^(-exponent) does */
1406 value /= 10;
1407 ++exponent;
1408 }
1409 }
1410 } STMT_END;
87032ba1
JH
1411#endif
1412
353813d9
HS
1413 if (exponent < 0) {
1414 negative = 1;
1415 exponent = -exponent;
b27804d8
DM
1416#ifdef NV_MAX_10_EXP
1417 /* for something like 1234 x 10^-309, the action of calculating
1418 * the intermediate value 10^309 then returning 1234 / (10^309)
1419 * will fail, since 10^309 becomes infinity. In this case try to
1420 * refactor it as 123 / (10^308) etc.
1421 */
1422 while (value && exponent > NV_MAX_10_EXP) {
1423 exponent--;
1424 value /= 10;
1425 }
48853916
JH
1426 if (value == 0.0)
1427 return value;
b27804d8 1428#endif
353813d9 1429 }
c62e754c
JH
1430#if defined(__osf__)
1431 /* Even with cc -ieee + ieee_set_fp_control(IEEE_TRAP_ENABLE_INV)
1432 * Tru64 fp behavior on inf/nan is somewhat broken. Another way
1433 * to do this would be ieee_set_fp_control(IEEE_TRAP_ENABLE_OVF)
1434 * but that breaks another set of infnan.t tests. */
1435# define FP_OVERFLOWS_TO_ZERO
1436#endif
98994639
HS
1437 for (bit = 1; exponent; bit <<= 1) {
1438 if (exponent & bit) {
1439 exponent ^= bit;
1440 result *= power;
c62e754c
JH
1441#ifdef FP_OVERFLOWS_TO_ZERO
1442 if (result == 0)
a7157111 1443# ifdef NV_INF
c62e754c 1444 return value < 0 ? -NV_INF : NV_INF;
a7157111
JH
1445# else
1446 return value < 0 ? -FLT_MAX : FLT_MAX;
1447# endif
c62e754c 1448#endif
236f0012 1449 /* Floating point exceptions are supposed to be turned off,
19c1206d 1450 * but if we're obviously done, don't risk another iteration.
236f0012
CB
1451 */
1452 if (exponent == 0) break;
98994639
HS
1453 }
1454 power *= power;
1455 }
1456 return negative ? value / result : value * result;
1457}
ce6f496d 1458#endif /* #ifndef Perl_strtod */
98994639 1459
ce6f496d 1460#ifdef Perl_strtod
b93d1309 1461# define ATOF(s, x) my_atof2(s, &x)
f7b64c80 1462#else
b93d1309 1463# define ATOF(s, x) Perl_atof2(s, x)
f7b64c80 1464#endif
b93d1309 1465
98994639
HS
1466NV
1467Perl_my_atof(pTHX_ const char* s)
1468{
f720c878
KW
1469 /* 's' must be NUL terminated */
1470
98994639 1471 NV x = 0.0;
9eda1ea6
KW
1472
1473 PERL_ARGS_ASSERT_MY_ATOF;
1474
b93d1309 1475#if ! defined(USE_LOCALE_NUMERIC)
9eda1ea6 1476
b93d1309 1477 ATOF(s, x);
9eda1ea6
KW
1478
1479#else
7918f24d 1480
a2287a13 1481 {
67d796ae
KW
1482 DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
1483 STORE_LC_NUMERIC_SET_TO_NEEDED();
fdf55d20
KW
1484 if (! (PL_numeric_radix_sv && IN_LC(LC_NUMERIC))) {
1485 ATOF(s,x);
1486 }
1487 else {
19c1206d 1488
e4850248
KW
1489 /* Look through the string for the first thing that looks like a
1490 * decimal point: either the value in the current locale or the
1491 * standard fallback of '.'. The one which appears earliest in the
1492 * input string is the one that we should have atof look for. Note
1493 * that we have to determine this beforehand because on some
1494 * systems, Perl_atof2 is just a wrapper around the system's atof.
1495 * */
1ae85f6c
KW
1496 const char * const standard_pos = strchr(s, '.');
1497 const char * const local_pos
1498 = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
1499 const bool use_standard_radix
1500 = standard_pos && (!local_pos || standard_pos < local_pos);
78787052 1501
665873e9 1502 if (use_standard_radix) {
e4850248 1503 SET_NUMERIC_STANDARD();
665873e9
KW
1504 LOCK_LC_NUMERIC_STANDARD();
1505 }
78787052 1506
b93d1309 1507 ATOF(s,x);
78787052 1508
665873e9
KW
1509 if (use_standard_radix) {
1510 UNLOCK_LC_NUMERIC_STANDARD();
67d796ae 1511 SET_NUMERIC_UNDERLYING();
665873e9 1512 }
e4850248 1513 }
a2287a13
KW
1514 RESTORE_LC_NUMERIC();
1515 }
9eda1ea6 1516
98994639 1517#endif
9eda1ea6 1518
98994639
HS
1519 return x;
1520}
1521
a7157111 1522#if defined(NV_INF) || defined(NV_NAN)
3c81f0b3 1523
829757a4 1524static char*
5563f457 1525S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value)
829757a4
JH
1526{
1527 const char *p0 = negative ? s - 1 : s;
1528 const char *p = p0;
7eff3d39 1529 const int infnan = grok_infnan(&p, send);
829757a4
JH
1530 if (infnan && p != p0) {
1531 /* If we can generate inf/nan directly, let's do so. */
1532#ifdef NV_INF
1533 if ((infnan & IS_NUMBER_INFINITY)) {
3823048b 1534 *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF;
829757a4
JH
1535 return (char*)p;
1536 }
1537#endif
1538#ifdef NV_NAN
1539 if ((infnan & IS_NUMBER_NAN)) {
3823048b 1540 *value = NV_NAN;
829757a4
JH
1541 return (char*)p;
1542 }
1543#endif
1544#ifdef Perl_strtod
68611e6f 1545 /* If still here, we didn't have either NV_INF or NV_NAN,
829757a4
JH
1546 * and can try falling back to native strtod/strtold.
1547 *
1548 * The native interface might not recognize all the possible
1549 * inf/nan strings Perl recognizes. What we can try
1550 * is to try faking the input. We will try inf/-inf/nan
1551 * as the most promising/portable input. */
1552 {
6d37e916 1553 const char* fake = "silence compiler warning";
829757a4
JH
1554 char* endp;
1555 NV nv;
a7157111 1556#ifdef NV_INF
829757a4
JH
1557 if ((infnan & IS_NUMBER_INFINITY)) {
1558 fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf";
1559 }
a7157111
JH
1560#endif
1561#ifdef NV_NAN
1562 if ((infnan & IS_NUMBER_NAN)) {
829757a4
JH
1563 fake = "nan";
1564 }
a7157111 1565#endif
6d37e916 1566 assert(strNE(fake, "silence compiler warning"));
9ec8aea5 1567 nv = S_strtod(aTHX_ fake, &endp);
829757a4 1568 if (fake != endp) {
a7157111 1569#ifdef NV_INF
829757a4 1570 if ((infnan & IS_NUMBER_INFINITY)) {
a7157111 1571# ifdef Perl_isinf
829757a4
JH
1572 if (Perl_isinf(nv))
1573 *value = nv;
a7157111 1574# else
829757a4
JH
1575 /* last resort, may generate SIGFPE */
1576 *value = Perl_exp((NV)1e9);
1577 if ((infnan & IS_NUMBER_NEG))
1578 *value = -*value;
a7157111 1579# endif
829757a4
JH
1580 return (char*)p; /* p, not endp */
1581 }
a7157111
JH
1582#endif
1583#ifdef NV_NAN
1584 if ((infnan & IS_NUMBER_NAN)) {
1585# ifdef Perl_isnan
829757a4
JH
1586 if (Perl_isnan(nv))
1587 *value = nv;
a7157111 1588# else
829757a4
JH
1589 /* last resort, may generate SIGFPE */
1590 *value = Perl_log((NV)-1.0);
a7157111 1591# endif
829757a4 1592 return (char*)p; /* p, not endp */
a7157111 1593#endif
829757a4
JH
1594 }
1595 }
1596 }
1597#endif /* #ifdef Perl_strtod */
1598 }
1599 return NULL;
1600}
1601
a7157111
JH
1602#endif /* if defined(NV_INF) || defined(NV_NAN) */
1603
98994639
HS
1604char*
1605Perl_my_atof2(pTHX_ const char* orig, NV* value)
1606{
6928bedc
KW
1607 PERL_ARGS_ASSERT_MY_ATOF2;
1608 return my_atof3(orig, value, 0);
1609}
1610
1611char*
16411967 1612Perl_my_atof3(pTHX_ const char* orig, NV* value, const STRLEN len)
6928bedc 1613{
e1ec3a88 1614 const char* s = orig;
a4eca1d4 1615 NV result[3] = {0.0, 0.0, 0.0};
ce6f496d 1616#if defined(USE_PERL_ATOF) || defined(Perl_strtod)
6928bedc
KW
1617 const char* send = s + ((len != 0)
1618 ? len
1619 : strlen(orig)); /* one past the last */
a4eca1d4
JH
1620 bool negative = 0;
1621#endif
ce6f496d 1622#if defined(USE_PERL_ATOF) && !defined(Perl_strtod)
a4eca1d4 1623 UV accumulator[2] = {0,0}; /* before/after dp */
8194bf88 1624 bool seen_digit = 0;
20f6aaab
AS
1625 I32 exp_adjust[2] = {0,0};
1626 I32 exp_acc[2] = {-1, -1};
1627 /* the current exponent adjust for the accumulators */
98994639 1628 I32 exponent = 0;
8194bf88 1629 I32 seen_dp = 0;
20f6aaab
AS
1630 I32 digit = 0;
1631 I32 old_digit = 0;
8194bf88 1632 I32 sig_digits = 0; /* noof significant digits seen so far */
a4eca1d4 1633#endif
8194bf88 1634
ce6f496d 1635#if defined(USE_PERL_ATOF) || defined(Perl_strtod)
6928bedc 1636 PERL_ARGS_ASSERT_MY_ATOF3;
7918f24d 1637
a4eca1d4 1638 /* leading whitespace */
6928bedc 1639 while (s < send && isSPACE(*s))
a4eca1d4
JH
1640 ++s;
1641
1642 /* sign */
1643 switch (*s) {
1644 case '-':
1645 negative = 1;
1646 /* FALLTHROUGH */
1647 case '+':
1648 ++s;
1649 }
1650#endif
1651
ce6f496d 1652#ifdef Perl_strtod
a4eca1d4
JH
1653 {
1654 char* endp;
d94e901a
KW
1655 char* copy = NULL;
1656
adc55e02 1657 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
a4eca1d4 1658 return endp;
d94e901a 1659
14d26b44
TC
1660 /* strtold() accepts 0x-prefixed hex and in POSIX implementations,
1661 0b-prefixed binary numbers, which is backward incompatible
1662 */
e56dfd96 1663 if ((len == 0 || len - (s-orig) >= 2) && *s == '0' &&
14d26b44
TC
1664 (isALPHA_FOLD_EQ(s[1], 'x') || isALPHA_FOLD_EQ(s[1], 'b'))) {
1665 *value = 0;
1666 return (char *)s+1;
1667 }
1668
d94e901a
KW
1669 /* If the length is passed in, the input string isn't NUL-terminated,
1670 * and in it turns out the function below assumes it is; therefore we
1671 * create a copy and NUL-terminate that */
1672 if (len) {
1673 Newx(copy, len + 1, char);
1674 Copy(orig, copy, len, char);
1675 copy[len] = '\0';
1676 s = copy + (s - orig);
1677 }
1678
9ec8aea5 1679 result[2] = S_strtod(aTHX_ s, &endp);
d94e901a
KW
1680
1681 /* If we created a copy, 'endp' is in terms of that. Convert back to
1682 * the original */
1683 if (copy) {
aac39b03 1684 s = (s - copy) + (char *) orig;
d94e901a
KW
1685 endp = (endp - copy) + (char *) orig;
1686 Safefree(copy);
1687 }
1688
a4eca1d4
JH
1689 if (s != endp) {
1690 *value = negative ? -result[2] : result[2];
1691 return endp;
1692 }
1693 return NULL;
1694 }
1695#elif defined(USE_PERL_ATOF)
1696
8194bf88
DM
1697/* There is no point in processing more significant digits
1698 * than the NV can hold. Note that NV_DIG is a lower-bound value,
1699 * while we need an upper-bound value. We add 2 to account for this;
1700 * since it will have been conservative on both the first and last digit.
1701 * For example a 32-bit mantissa with an exponent of 4 would have
1702 * exact values in the set
1703 * 4
1704 * 8
1705 * ..
1706 * 17179869172
1707 * 17179869176
1708 * 17179869180
1709 *
1710 * where for the purposes of calculating NV_DIG we would have to discount
1711 * both the first and last digit, since neither can hold all values from
1712 * 0..9; but for calculating the value we must examine those two digits.
1713 */
ffa277e5
AS
1714#ifdef MAX_SIG_DIG_PLUS
1715 /* It is not necessarily the case that adding 2 to NV_DIG gets all the
1716 possible digits in a NV, especially if NVs are not IEEE compliant
1717 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
1718# define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
1719#else
1720# define MAX_SIG_DIGITS (NV_DIG+2)
1721#endif
8194bf88
DM
1722
1723/* the max number we can accumulate in a UV, and still safely do 10*N+9 */
1724#define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
98994639 1725
a5dc2484 1726#if defined(NV_INF) || defined(NV_NAN)
ae776a2c 1727 {
7eff3d39 1728 char* endp;
5563f457 1729 if ((endp = S_my_atof_infnan(aTHX_ s, negative, send, value)))
7eff3d39 1730 return endp;
ae776a2c 1731 }
a5dc2484 1732#endif
2b54f59f 1733
8194bf88
DM
1734 /* we accumulate digits into an integer; when this becomes too
1735 * large, we add the total to NV and start again */
98994639 1736
6928bedc 1737 while (s < send) {
8194bf88
DM
1738 if (isDIGIT(*s)) {
1739 seen_digit = 1;
20f6aaab 1740 old_digit = digit;
8194bf88 1741 digit = *s++ - '0';
20f6aaab
AS
1742 if (seen_dp)
1743 exp_adjust[1]++;
98994639 1744
8194bf88
DM
1745 /* don't start counting until we see the first significant
1746 * digit, eg the 5 in 0.00005... */
1747 if (!sig_digits && digit == 0)
1748 continue;
1749
1750 if (++sig_digits > MAX_SIG_DIGITS) {
98994639 1751 /* limits of precision reached */
20f6aaab
AS
1752 if (digit > 5) {
1753 ++accumulator[seen_dp];
1754 } else if (digit == 5) {
1755 if (old_digit % 2) { /* round to even - Allen */
1756 ++accumulator[seen_dp];
1757 }
1758 }
1759 if (seen_dp) {
1760 exp_adjust[1]--;
1761 } else {
1762 exp_adjust[0]++;
1763 }
8194bf88 1764 /* skip remaining digits */
6928bedc 1765 while (s < send && isDIGIT(*s)) {
98994639 1766 ++s;
20f6aaab
AS
1767 if (! seen_dp) {
1768 exp_adjust[0]++;
1769 }
98994639
HS
1770 }
1771 /* warn of loss of precision? */
98994639 1772 }
8194bf88 1773 else {
20f6aaab 1774 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
8194bf88 1775 /* add accumulator to result and start again */
20f6aaab
AS
1776 result[seen_dp] = S_mulexp10(result[seen_dp],
1777 exp_acc[seen_dp])
1778 + (NV)accumulator[seen_dp];
1779 accumulator[seen_dp] = 0;
1780 exp_acc[seen_dp] = 0;
98994639 1781 }
20f6aaab
AS
1782 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1783 ++exp_acc[seen_dp];
98994639 1784 }
8194bf88 1785 }
e1ec3a88 1786 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
8194bf88 1787 seen_dp = 1;
20f6aaab 1788 if (sig_digits > MAX_SIG_DIGITS) {
6928bedc 1789 while (s < send && isDIGIT(*s)) {
20f6aaab 1790 ++s;
9604fbf0 1791 }
20f6aaab
AS
1792 break;
1793 }
8194bf88
DM
1794 }
1795 else {
1796 break;
98994639
HS
1797 }
1798 }
1799
20f6aaab
AS
1800 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1801 if (seen_dp) {
1802 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1803 }
98994639 1804
6928bedc 1805 if (s < send && seen_digit && (isALPHA_FOLD_EQ(*s, 'e'))) {
98994639
HS
1806 bool expnegative = 0;
1807
1808 ++s;
1809 switch (*s) {
1810 case '-':
1811 expnegative = 1;
924ba076 1812 /* FALLTHROUGH */
98994639
HS
1813 case '+':
1814 ++s;
1815 }
6928bedc 1816 while (s < send && isDIGIT(*s))
98994639
HS
1817 exponent = exponent * 10 + (*s++ - '0');
1818 if (expnegative)
1819 exponent = -exponent;
1820 }
1821
1822 /* now apply the exponent */
20f6aaab
AS
1823
1824 if (seen_dp) {
1825 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1826 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1827 } else {
1828 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1829 }
98994639
HS
1830
1831 /* now apply the sign */
1832 if (negative)
20f6aaab 1833 result[2] = -result[2];
a36244b7 1834#endif /* USE_PERL_ATOF */
20f6aaab 1835 *value = result[2];
73d840c0 1836 return (char *)s;
98994639
HS
1837}
1838
5d34af89 1839/*
3d9d9213 1840=for apidoc isinfnan
5d34af89 1841
5f4d68d1 1842C<Perl_isinfnan()> is a utility function that returns true if the NV
796b6530
KW
1843argument is either an infinity or a C<NaN>, false otherwise. To test
1844in more detail, use C<Perl_isinf()> and C<Perl_isnan()>.
5d34af89 1845
68611e6f
JH
1846This is also the logical inverse of Perl_isfinite().
1847
5d34af89
JH
1848=cut
1849*/
1cd88304
JH
1850bool
1851Perl_isinfnan(NV nv)
1852{
a5dc2484 1853 PERL_UNUSED_ARG(nv);
1cd88304
JH
1854#ifdef Perl_isinf
1855 if (Perl_isinf(nv))
1856 return TRUE;
1857#endif
1858#ifdef Perl_isnan
1859 if (Perl_isnan(nv))
1860 return TRUE;
1861#endif
1862 return FALSE;
1863}
1864
354b74ae 1865/*
af147c81 1866=for apidoc isinfnansv
354b74ae 1867
796b6530 1868Checks whether the argument would be either an infinity or C<NaN> when used
354b74ae 1869as a number, but is careful not to trigger non-numeric or uninitialized
796b6530 1870warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already.
354b74ae
FC
1871
1872=cut
1873*/
1874
1875bool
1876Perl_isinfnansv(pTHX_ SV *sv)
1877{
1878 PERL_ARGS_ASSERT_ISINFNANSV;
1879 if (!SvOK(sv))
1880 return FALSE;
1881 if (SvNOKp(sv))
1882 return Perl_isinfnan(SvNVX(sv));
1883 if (SvIOKp(sv))
1884 return FALSE;
1885 {
1886 STRLEN len;
1887 const char *s = SvPV_nomg_const(sv, len);
3823048b 1888 return cBOOL(grok_infnan(&s, s+len));
354b74ae
FC
1889 }
1890}
1891
d67dac15 1892#ifndef HAS_MODFL
68611e6f
JH
1893/* C99 has truncl, pre-C99 Solaris had aintl. We can use either with
1894 * copysignl to emulate modfl, which is in some platforms missing or
1895 * broken. */
d67dac15
JH
1896# if defined(HAS_TRUNCL) && defined(HAS_COPYSIGNL)
1897long double
1898Perl_my_modfl(long double x, long double *ip)
1899{
68611e6f
JH
1900 *ip = truncl(x);
1901 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
d67dac15
JH
1902}
1903# elif defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
55954f19
JH
1904long double
1905Perl_my_modfl(long double x, long double *ip)
1906{
68611e6f
JH
1907 *ip = aintl(x);
1908 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
55954f19 1909}
d67dac15 1910# endif
55954f19
JH
1911#endif
1912
7b9b7dff 1913/* Similarly, with ilogbl and scalbnl we can emulate frexpl. */
55954f19
JH
1914#if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1915long double
1916Perl_my_frexpl(long double x, int *e) {
68611e6f
JH
1917 *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1918 return (scalbnl(x, -*e));
55954f19
JH
1919}
1920#endif
66610fdd
RGS
1921
1922/*
ed140128
AD
1923=for apidoc Perl_signbit
1924
1925Return a non-zero integer if the sign bit on an NV is set, and 0 if
19c1206d 1926it is not.
ed140128 1927
796b6530
KW
1928If F<Configure> detects this system has a C<signbit()> that will work with
1929our NVs, then we just use it via the C<#define> in F<perl.h>. Otherwise,
8b7fad81 1930fall back on this implementation. The main use of this function
796b6530 1931is catching C<-0.0>.
ed140128 1932
796b6530
KW
1933C<Configure> notes: This function is called C<'Perl_signbit'> instead of a
1934plain C<'signbit'> because it is easy to imagine a system having a C<signbit()>
ed140128 1935function or macro that doesn't happen to work with our particular choice
796b6530 1936of NVs. We shouldn't just re-C<#define> C<signbit> as C<Perl_signbit> and expect
ed140128 1937the standard system headers to be happy. Also, this is a no-context
796b6530
KW
1938function (no C<pTHX_>) because C<Perl_signbit()> is usually re-C<#defined> in
1939F<perl.h> as a simple macro call to the system's C<signbit()>.
1940Users should just always call C<Perl_signbit()>.
ed140128
AD
1941
1942=cut
1943*/
1944#if !defined(HAS_SIGNBIT)
1945int
1946Perl_signbit(NV x) {
8b7fad81 1947# ifdef Perl_fp_class_nzero
406d5545
JH
1948 return Perl_fp_class_nzero(x);
1949 /* Try finding the high byte, and assume it's highest bit
1950 * is the sign. This assumption is probably wrong somewhere. */
572cd850
JH
1951# elif defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN
1952 return (((unsigned char *)&x)[9] & 0x80);
1953# elif defined(NV_LITTLE_ENDIAN)
1954 /* Note that NVSIZE is sizeof(NV), which would make the below be
1955 * wrong if the end bytes are unused, which happens with the x86
1956 * 80-bit long doubles, which is why take care of that above. */
1957 return (((unsigned char *)&x)[NVSIZE - 1] & 0x80);
1958# elif defined(NV_BIG_ENDIAN)
1959 return (((unsigned char *)&x)[0] & 0x80);
1960# else
406d5545 1961 /* This last resort fallback is wrong for the negative zero. */
3585840c 1962 return (x < 0.0) ? 1 : 0;
572cd850 1963# endif
ed140128
AD
1964}
1965#endif
1966
1967/*
14d04a33 1968 * ex: set ts=8 sts=4 sw=4 et:
37442d52 1969 */