This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
cmpVERSION.pl: Check lib/*.pl, too
[perl5.git] / numeric.c
CommitLineData
98994639
HS
1/* numeric.c
2 *
663f364b 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
1129b882 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
98994639
HS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
4ac71550
TC
12 * "That only makes eleven (plus one mislaid) and not fourteen,
13 * unless wizards count differently to other people." --Beorn
14 *
15 * [p.115 of _The Hobbit_: "Queer Lodgings"]
98994639
HS
16 */
17
ccfc67b7
JH
18/*
19=head1 Numeric functions
166f8a29
DM
20
21This file contains all the stuff needed by perl for manipulating numeric
22values, including such things as replacements for the OS's atof() function
23
24=cut
25
ccfc67b7
JH
26*/
27
98994639
HS
28#include "EXTERN.h"
29#define PERL_IN_NUMERIC_C
30#include "perl.h"
31
32U32
33Perl_cast_ulong(pTHX_ NV f)
34{
96a5add6 35 PERL_UNUSED_CONTEXT;
98994639
HS
36 if (f < 0.0)
37 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
38 if (f < U32_MAX_P1) {
39#if CASTFLAGS & 2
40 if (f < U32_MAX_P1_HALF)
41 return (U32) f;
42 f -= U32_MAX_P1_HALF;
43 return ((U32) f) | (1 + U32_MAX >> 1);
44#else
45 return (U32) f;
46#endif
47 }
48 return f > 0 ? U32_MAX : 0 /* NaN */;
49}
50
51I32
52Perl_cast_i32(pTHX_ NV f)
53{
96a5add6 54 PERL_UNUSED_CONTEXT;
98994639
HS
55 if (f < I32_MAX_P1)
56 return f < I32_MIN ? I32_MIN : (I32) f;
57 if (f < U32_MAX_P1) {
58#if CASTFLAGS & 2
59 if (f < U32_MAX_P1_HALF)
60 return (I32)(U32) f;
61 f -= U32_MAX_P1_HALF;
62 return (I32)(((U32) f) | (1 + U32_MAX >> 1));
63#else
64 return (I32)(U32) f;
65#endif
66 }
67 return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
68}
69
70IV
71Perl_cast_iv(pTHX_ NV f)
72{
96a5add6 73 PERL_UNUSED_CONTEXT;
98994639
HS
74 if (f < IV_MAX_P1)
75 return f < IV_MIN ? IV_MIN : (IV) f;
76 if (f < UV_MAX_P1) {
77#if CASTFLAGS & 2
78 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
79 if (f < UV_MAX_P1_HALF)
80 return (IV)(UV) f;
81 f -= UV_MAX_P1_HALF;
82 return (IV)(((UV) f) | (1 + UV_MAX >> 1));
83#else
84 return (IV)(UV) f;
85#endif
86 }
87 return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
88}
89
90UV
91Perl_cast_uv(pTHX_ NV f)
92{
96a5add6 93 PERL_UNUSED_CONTEXT;
98994639
HS
94 if (f < 0.0)
95 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
96 if (f < UV_MAX_P1) {
97#if CASTFLAGS & 2
98 if (f < UV_MAX_P1_HALF)
99 return (UV) f;
100 f -= UV_MAX_P1_HALF;
101 return ((UV) f) | (1 + UV_MAX >> 1);
102#else
103 return (UV) f;
104#endif
105 }
106 return f > 0 ? UV_MAX : 0 /* NaN */;
107}
108
53305cf1
NC
109/*
110=for apidoc grok_bin
98994639 111
53305cf1
NC
112converts a string representing a binary number to numeric form.
113
114On entry I<start> and I<*len> give the string to scan, I<*flags> gives
115conversion flags, and I<result> should be NULL or a pointer to an NV.
116The scan stops at the end of the string, or the first invalid character.
7b667b5f
MHM
117Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
118invalid character will also trigger a warning.
119On return I<*len> is set to the length of the scanned string,
120and I<*flags> gives output flags.
53305cf1 121
7fc63493 122If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
53305cf1
NC
123and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
124returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
125and writes the value to I<*result> (or the value is discarded if I<result>
126is NULL).
127
7b667b5f 128The binary number may optionally be prefixed with "0b" or "b" unless
a4c04bdc
NC
129C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
130C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
53305cf1
NC
131number may use '_' characters to separate digits.
132
133=cut
02470786
KW
134
135Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
136which suppresses any message for non-portable numbers that are still valid
137on this platform.
53305cf1
NC
138 */
139
140UV
7918f24d
NC
141Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
142{
53305cf1
NC
143 const char *s = start;
144 STRLEN len = *len_p;
145 UV value = 0;
146 NV value_nv = 0;
147
148 const UV max_div_2 = UV_MAX / 2;
f2338a2e 149 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1 150 bool overflowed = FALSE;
7fc63493 151 char bit;
53305cf1 152
7918f24d
NC
153 PERL_ARGS_ASSERT_GROK_BIN;
154
a4c04bdc
NC
155 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
156 /* strip off leading b or 0b.
157 for compatibility silently suffer "b" and "0b" as valid binary
158 numbers. */
159 if (len >= 1) {
a674e8db 160 if (s[0] == 'b' || s[0] == 'B') {
a4c04bdc
NC
161 s++;
162 len--;
163 }
a674e8db 164 else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
a4c04bdc
NC
165 s+=2;
166 len-=2;
167 }
168 }
53305cf1
NC
169 }
170
7fc63493 171 for (; len-- && (bit = *s); s++) {
53305cf1
NC
172 if (bit == '0' || bit == '1') {
173 /* Write it in this wonky order with a goto to attempt to get the
174 compiler to make the common case integer-only loop pretty tight.
175 With gcc seems to be much straighter code than old scan_bin. */
176 redo:
177 if (!overflowed) {
178 if (value <= max_div_2) {
179 value = (value << 1) | (bit - '0');
180 continue;
181 }
182 /* Bah. We're just overflowed. */
9b387841
NC
183 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
184 "Integer overflow in binary number");
53305cf1
NC
185 overflowed = TRUE;
186 value_nv = (NV) value;
187 }
188 value_nv *= 2.0;
98994639 189 /* If an NV has not enough bits in its mantissa to
d1be9408 190 * represent a UV this summing of small low-order numbers
98994639
HS
191 * is a waste of time (because the NV cannot preserve
192 * the low-order bits anyway): we could just remember when
53305cf1 193 * did we overflow and in the end just multiply value_nv by the
98994639 194 * right amount. */
53305cf1
NC
195 value_nv += (NV)(bit - '0');
196 continue;
197 }
198 if (bit == '_' && len && allow_underscores && (bit = s[1])
199 && (bit == '0' || bit == '1'))
98994639
HS
200 {
201 --len;
202 ++s;
53305cf1 203 goto redo;
98994639 204 }
a2a5de95
NC
205 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
206 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
207 "Illegal binary digit '%c' ignored", *s);
53305cf1 208 break;
98994639 209 }
53305cf1
NC
210
211 if ( ( overflowed && value_nv > 4294967295.0)
98994639 212#if UVSIZE > 4
02470786
KW
213 || (!overflowed && value > 0xffffffff
214 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
98994639
HS
215#endif
216 ) {
a2a5de95
NC
217 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
218 "Binary number > 0b11111111111111111111111111111111 non-portable");
53305cf1
NC
219 }
220 *len_p = s - start;
221 if (!overflowed) {
222 *flags = 0;
223 return value;
98994639 224 }
53305cf1
NC
225 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
226 if (result)
227 *result = value_nv;
228 return UV_MAX;
98994639
HS
229}
230
53305cf1
NC
231/*
232=for apidoc grok_hex
233
234converts a string representing a hex number to numeric form.
235
236On entry I<start> and I<*len> give the string to scan, I<*flags> gives
237conversion flags, and I<result> should be NULL or a pointer to an NV.
7b667b5f
MHM
238The scan stops at the end of the string, or the first invalid character.
239Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
240invalid character will also trigger a warning.
241On return I<*len> is set to the length of the scanned string,
242and I<*flags> gives output flags.
53305cf1
NC
243
244If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
245and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
246returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
247and writes the value to I<*result> (or the value is discarded if I<result>
248is NULL).
249
d1be9408 250The hex number may optionally be prefixed with "0x" or "x" unless
a4c04bdc
NC
251C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
252C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
53305cf1
NC
253number may use '_' characters to separate digits.
254
255=cut
02470786
KW
256
257Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
258which suppresses any message for non-portable numbers that are still valid
259on this platform.
53305cf1
NC
260 */
261
262UV
7918f24d
NC
263Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
264{
27da23d5 265 dVAR;
53305cf1
NC
266 const char *s = start;
267 STRLEN len = *len_p;
268 UV value = 0;
269 NV value_nv = 0;
53305cf1 270 const UV max_div_16 = UV_MAX / 16;
f2338a2e 271 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1 272 bool overflowed = FALSE;
98994639 273
7918f24d
NC
274 PERL_ARGS_ASSERT_GROK_HEX;
275
a4c04bdc
NC
276 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
277 /* strip off leading x or 0x.
278 for compatibility silently suffer "x" and "0x" as valid hex numbers.
279 */
280 if (len >= 1) {
a674e8db 281 if (s[0] == 'x' || s[0] == 'X') {
a4c04bdc
NC
282 s++;
283 len--;
284 }
a674e8db 285 else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
a4c04bdc
NC
286 s+=2;
287 len-=2;
288 }
289 }
98994639
HS
290 }
291
292 for (; len-- && *s; s++) {
a3b680e6 293 const char *hexdigit = strchr(PL_hexdigit, *s);
53305cf1
NC
294 if (hexdigit) {
295 /* Write it in this wonky order with a goto to attempt to get the
296 compiler to make the common case integer-only loop pretty tight.
297 With gcc seems to be much straighter code than old scan_hex. */
298 redo:
299 if (!overflowed) {
300 if (value <= max_div_16) {
301 value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
302 continue;
303 }
304 /* Bah. We're just overflowed. */
9b387841
NC
305 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
306 "Integer overflow in hexadecimal number");
53305cf1
NC
307 overflowed = TRUE;
308 value_nv = (NV) value;
309 }
310 value_nv *= 16.0;
311 /* If an NV has not enough bits in its mantissa to
d1be9408 312 * represent a UV this summing of small low-order numbers
53305cf1
NC
313 * is a waste of time (because the NV cannot preserve
314 * the low-order bits anyway): we could just remember when
315 * did we overflow and in the end just multiply value_nv by the
316 * right amount of 16-tuples. */
317 value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
318 continue;
319 }
320 if (*s == '_' && len && allow_underscores && s[1]
e1ec3a88 321 && (hexdigit = strchr(PL_hexdigit, s[1])))
98994639
HS
322 {
323 --len;
324 ++s;
53305cf1 325 goto redo;
98994639 326 }
a2a5de95
NC
327 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
328 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
53305cf1
NC
329 "Illegal hexadecimal digit '%c' ignored", *s);
330 break;
331 }
332
333 if ( ( overflowed && value_nv > 4294967295.0)
334#if UVSIZE > 4
02470786
KW
335 || (!overflowed && value > 0xffffffff
336 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
53305cf1
NC
337#endif
338 ) {
a2a5de95
NC
339 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
340 "Hexadecimal number > 0xffffffff non-portable");
53305cf1
NC
341 }
342 *len_p = s - start;
343 if (!overflowed) {
344 *flags = 0;
345 return value;
346 }
347 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
348 if (result)
349 *result = value_nv;
350 return UV_MAX;
351}
352
353/*
354=for apidoc grok_oct
355
7b667b5f
MHM
356converts a string representing an octal number to numeric form.
357
358On entry I<start> and I<*len> give the string to scan, I<*flags> gives
359conversion flags, and I<result> should be NULL or a pointer to an NV.
360The scan stops at the end of the string, or the first invalid character.
361Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
154bd527 3628 or 9 will also trigger a warning.
7b667b5f
MHM
363On return I<*len> is set to the length of the scanned string,
364and I<*flags> gives output flags.
365
366If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
367and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
368returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
369and writes the value to I<*result> (or the value is discarded if I<result>
370is NULL).
371
372If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
373number may use '_' characters to separate digits.
53305cf1
NC
374
375=cut
02470786
KW
376
377Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
378which suppresses any message for non-portable numbers that are still valid
379on this platform.
53305cf1
NC
380 */
381
382UV
7918f24d
NC
383Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
384{
53305cf1
NC
385 const char *s = start;
386 STRLEN len = *len_p;
387 UV value = 0;
388 NV value_nv = 0;
53305cf1 389 const UV max_div_8 = UV_MAX / 8;
f2338a2e 390 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1
NC
391 bool overflowed = FALSE;
392
7918f24d
NC
393 PERL_ARGS_ASSERT_GROK_OCT;
394
53305cf1
NC
395 for (; len-- && *s; s++) {
396 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
397 out front allows slicker code. */
398 int digit = *s - '0';
399 if (digit >= 0 && digit <= 7) {
400 /* Write it in this wonky order with a goto to attempt to get the
401 compiler to make the common case integer-only loop pretty tight.
402 */
403 redo:
404 if (!overflowed) {
405 if (value <= max_div_8) {
406 value = (value << 3) | digit;
407 continue;
408 }
409 /* Bah. We're just overflowed. */
9b387841
NC
410 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
411 "Integer overflow in octal number");
53305cf1
NC
412 overflowed = TRUE;
413 value_nv = (NV) value;
414 }
415 value_nv *= 8.0;
98994639 416 /* If an NV has not enough bits in its mantissa to
d1be9408 417 * represent a UV this summing of small low-order numbers
98994639
HS
418 * is a waste of time (because the NV cannot preserve
419 * the low-order bits anyway): we could just remember when
53305cf1
NC
420 * did we overflow and in the end just multiply value_nv by the
421 * right amount of 8-tuples. */
422 value_nv += (NV)digit;
423 continue;
424 }
425 if (digit == ('_' - '0') && len && allow_underscores
426 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
427 {
428 --len;
429 ++s;
430 goto redo;
431 }
432 /* Allow \octal to work the DWIM way (that is, stop scanning
7b667b5f 433 * as soon as non-octal characters are seen, complain only if
53305cf1
NC
434 * someone seems to want to use the digits eight and nine). */
435 if (digit == 8 || digit == 9) {
a2a5de95
NC
436 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
437 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
438 "Illegal octal digit '%c' ignored", *s);
53305cf1
NC
439 }
440 break;
98994639 441 }
53305cf1
NC
442
443 if ( ( overflowed && value_nv > 4294967295.0)
98994639 444#if UVSIZE > 4
02470786
KW
445 || (!overflowed && value > 0xffffffff
446 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
98994639
HS
447#endif
448 ) {
a2a5de95
NC
449 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
450 "Octal number > 037777777777 non-portable");
53305cf1
NC
451 }
452 *len_p = s - start;
453 if (!overflowed) {
454 *flags = 0;
455 return value;
98994639 456 }
53305cf1
NC
457 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
458 if (result)
459 *result = value_nv;
460 return UV_MAX;
461}
462
463/*
464=for apidoc scan_bin
465
466For backwards compatibility. Use C<grok_bin> instead.
467
468=for apidoc scan_hex
469
470For backwards compatibility. Use C<grok_hex> instead.
471
472=for apidoc scan_oct
473
474For backwards compatibility. Use C<grok_oct> instead.
475
476=cut
477 */
478
479NV
73d840c0 480Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
481{
482 NV rnv;
483 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 484 const UV ruv = grok_bin (start, &len, &flags, &rnv);
53305cf1 485
7918f24d
NC
486 PERL_ARGS_ASSERT_SCAN_BIN;
487
53305cf1
NC
488 *retlen = len;
489 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
490}
491
492NV
73d840c0 493Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
494{
495 NV rnv;
496 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 497 const UV ruv = grok_oct (start, &len, &flags, &rnv);
53305cf1 498
7918f24d
NC
499 PERL_ARGS_ASSERT_SCAN_OCT;
500
53305cf1
NC
501 *retlen = len;
502 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
503}
504
505NV
73d840c0 506Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
507{
508 NV rnv;
509 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 510 const UV ruv = grok_hex (start, &len, &flags, &rnv);
53305cf1 511
7918f24d
NC
512 PERL_ARGS_ASSERT_SCAN_HEX;
513
53305cf1
NC
514 *retlen = len;
515 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
98994639
HS
516}
517
518/*
519=for apidoc grok_numeric_radix
520
521Scan and skip for a numeric decimal separator (radix).
522
523=cut
524 */
525bool
526Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
527{
528#ifdef USE_LOCALE_NUMERIC
97aff369 529 dVAR;
7918f24d
NC
530
531 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
532
98994639
HS
533 if (PL_numeric_radix_sv && IN_LOCALE) {
534 STRLEN len;
c4420975 535 const char * const radix = SvPV(PL_numeric_radix_sv, len);
98994639
HS
536 if (*sp + len <= send && memEQ(*sp, radix, len)) {
537 *sp += len;
538 return TRUE;
539 }
540 }
541 /* always try "." if numeric radix didn't match because
542 * we may have data from different locales mixed */
543#endif
7918f24d
NC
544
545 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
546
98994639
HS
547 if (*sp < send && **sp == '.') {
548 ++*sp;
549 return TRUE;
550 }
551 return FALSE;
552}
553
554/*
555=for apidoc grok_number
556
557Recognise (or not) a number. The type of the number is returned
558(0 if unrecognised), otherwise it is a bit-ORed combination of
559IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
aa8b85de 560IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
60939fb8
NC
561
562If the value of the number can fit an in UV, it is returned in the *valuep
563IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
564will never be set unless *valuep is valid, but *valuep may have been assigned
565to during processing even though IS_NUMBER_IN_UV is not set on return.
566If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
567valuep is non-NULL, but no actual assignment (or SEGV) will occur.
568
569IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
570seen (in which case *valuep gives the true value truncated to an integer), and
571IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
572absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the
573number is larger than a UV.
98994639
HS
574
575=cut
576 */
577int
578Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
579{
60939fb8 580 const char *s = pv;
c4420975 581 const char * const send = pv + len;
60939fb8
NC
582 const UV max_div_10 = UV_MAX / 10;
583 const char max_mod_10 = UV_MAX % 10;
584 int numtype = 0;
585 int sawinf = 0;
aa8b85de 586 int sawnan = 0;
60939fb8 587
7918f24d
NC
588 PERL_ARGS_ASSERT_GROK_NUMBER;
589
60939fb8
NC
590 while (s < send && isSPACE(*s))
591 s++;
592 if (s == send) {
593 return 0;
594 } else if (*s == '-') {
595 s++;
596 numtype = IS_NUMBER_NEG;
597 }
598 else if (*s == '+')
599 s++;
600
601 if (s == send)
602 return 0;
603
604 /* next must be digit or the radix separator or beginning of infinity */
605 if (isDIGIT(*s)) {
606 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
607 overflow. */
608 UV value = *s - '0';
609 /* This construction seems to be more optimiser friendly.
610 (without it gcc does the isDIGIT test and the *s - '0' separately)
611 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
612 In theory the optimiser could deduce how far to unroll the loop
613 before checking for overflow. */
58bb9ec3
NC
614 if (++s < send) {
615 int digit = *s - '0';
60939fb8
NC
616 if (digit >= 0 && digit <= 9) {
617 value = value * 10 + digit;
58bb9ec3
NC
618 if (++s < send) {
619 digit = *s - '0';
60939fb8
NC
620 if (digit >= 0 && digit <= 9) {
621 value = value * 10 + digit;
58bb9ec3
NC
622 if (++s < send) {
623 digit = *s - '0';
60939fb8
NC
624 if (digit >= 0 && digit <= 9) {
625 value = value * 10 + digit;
58bb9ec3
NC
626 if (++s < send) {
627 digit = *s - '0';
60939fb8
NC
628 if (digit >= 0 && digit <= 9) {
629 value = value * 10 + digit;
58bb9ec3
NC
630 if (++s < send) {
631 digit = *s - '0';
60939fb8
NC
632 if (digit >= 0 && digit <= 9) {
633 value = value * 10 + digit;
58bb9ec3
NC
634 if (++s < send) {
635 digit = *s - '0';
60939fb8
NC
636 if (digit >= 0 && digit <= 9) {
637 value = value * 10 + digit;
58bb9ec3
NC
638 if (++s < send) {
639 digit = *s - '0';
60939fb8
NC
640 if (digit >= 0 && digit <= 9) {
641 value = value * 10 + digit;
58bb9ec3
NC
642 if (++s < send) {
643 digit = *s - '0';
60939fb8
NC
644 if (digit >= 0 && digit <= 9) {
645 value = value * 10 + digit;
58bb9ec3 646 if (++s < send) {
60939fb8
NC
647 /* Now got 9 digits, so need to check
648 each time for overflow. */
58bb9ec3 649 digit = *s - '0';
60939fb8
NC
650 while (digit >= 0 && digit <= 9
651 && (value < max_div_10
652 || (value == max_div_10
653 && digit <= max_mod_10))) {
654 value = value * 10 + digit;
58bb9ec3
NC
655 if (++s < send)
656 digit = *s - '0';
60939fb8
NC
657 else
658 break;
659 }
660 if (digit >= 0 && digit <= 9
51bd16da 661 && (s < send)) {
60939fb8
NC
662 /* value overflowed.
663 skip the remaining digits, don't
664 worry about setting *valuep. */
665 do {
666 s++;
667 } while (s < send && isDIGIT(*s));
668 numtype |=
669 IS_NUMBER_GREATER_THAN_UV_MAX;
670 goto skip_value;
671 }
672 }
673 }
98994639 674 }
60939fb8
NC
675 }
676 }
677 }
678 }
679 }
680 }
681 }
682 }
683 }
684 }
685 }
98994639 686 }
60939fb8 687 }
98994639 688 }
60939fb8
NC
689 numtype |= IS_NUMBER_IN_UV;
690 if (valuep)
691 *valuep = value;
692
693 skip_value:
694 if (GROK_NUMERIC_RADIX(&s, send)) {
695 numtype |= IS_NUMBER_NOT_INT;
696 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
697 s++;
98994639 698 }
60939fb8
NC
699 }
700 else if (GROK_NUMERIC_RADIX(&s, send)) {
701 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
702 /* no digits before the radix means we need digits after it */
703 if (s < send && isDIGIT(*s)) {
704 do {
705 s++;
706 } while (s < send && isDIGIT(*s));
707 if (valuep) {
708 /* integer approximation is valid - it's 0. */
709 *valuep = 0;
710 }
98994639 711 }
60939fb8
NC
712 else
713 return 0;
714 } else if (*s == 'I' || *s == 'i') {
715 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
716 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
717 s++; if (s < send && (*s == 'I' || *s == 'i')) {
718 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
719 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
720 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
721 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
722 s++;
98994639 723 }
60939fb8 724 sawinf = 1;
aa8b85de
JH
725 } else if (*s == 'N' || *s == 'n') {
726 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
727 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
728 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
729 s++;
730 sawnan = 1;
731 } else
98994639 732 return 0;
60939fb8
NC
733
734 if (sawinf) {
735 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
736 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
aa8b85de
JH
737 } else if (sawnan) {
738 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
739 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
60939fb8
NC
740 } else if (s < send) {
741 /* we can have an optional exponent part */
742 if (*s == 'e' || *s == 'E') {
743 /* The only flag we keep is sign. Blow away any "it's UV" */
744 numtype &= IS_NUMBER_NEG;
745 numtype |= IS_NUMBER_NOT_INT;
746 s++;
747 if (s < send && (*s == '-' || *s == '+'))
748 s++;
749 if (s < send && isDIGIT(*s)) {
750 do {
751 s++;
752 } while (s < send && isDIGIT(*s));
753 }
754 else
755 return 0;
756 }
757 }
758 while (s < send && isSPACE(*s))
759 s++;
760 if (s >= send)
aa8b85de 761 return numtype;
60939fb8
NC
762 if (len == 10 && memEQ(pv, "0 but true", 10)) {
763 if (valuep)
764 *valuep = 0;
765 return IS_NUMBER_IN_UV;
766 }
767 return 0;
98994639
HS
768}
769
4801ca72 770STATIC NV
98994639
HS
771S_mulexp10(NV value, I32 exponent)
772{
773 NV result = 1.0;
774 NV power = 10.0;
775 bool negative = 0;
776 I32 bit;
777
778 if (exponent == 0)
779 return value;
20f6aaab 780 if (value == 0)
66a1b24b 781 return (NV)0;
87032ba1 782
24866caa 783 /* On OpenVMS VAX we by default use the D_FLOAT double format,
67597c89 784 * and that format does not have *easy* capabilities [1] for
24866caa
CB
785 * overflowing doubles 'silently' as IEEE fp does. We also need
786 * to support G_FLOAT on both VAX and Alpha, and though the exponent
787 * range is much larger than D_FLOAT it still doesn't do silent
788 * overflow. Therefore we need to detect early whether we would
789 * overflow (this is the behaviour of the native string-to-float
790 * conversion routines, and therefore of native applications, too).
67597c89 791 *
24866caa
CB
792 * [1] Trying to establish a condition handler to trap floating point
793 * exceptions is not a good idea. */
87032ba1
JH
794
795 /* In UNICOS and in certain Cray models (such as T90) there is no
796 * IEEE fp, and no way at all from C to catch fp overflows gracefully.
797 * There is something you can do if you are willing to use some
798 * inline assembler: the instruction is called DFI-- but that will
799 * disable *all* floating point interrupts, a little bit too large
800 * a hammer. Therefore we need to catch potential overflows before
801 * it's too late. */
353813d9
HS
802
803#if ((defined(VMS) && !defined(__IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
804 STMT_START {
c4420975 805 const NV exp_v = log10(value);
353813d9
HS
806 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
807 return NV_MAX;
808 if (exponent < 0) {
809 if (-(exponent + exp_v) >= NV_MAX_10_EXP)
810 return 0.0;
811 while (-exponent >= NV_MAX_10_EXP) {
812 /* combination does not overflow, but 10^(-exponent) does */
813 value /= 10;
814 ++exponent;
815 }
816 }
817 } STMT_END;
87032ba1
JH
818#endif
819
353813d9
HS
820 if (exponent < 0) {
821 negative = 1;
822 exponent = -exponent;
823 }
98994639
HS
824 for (bit = 1; exponent; bit <<= 1) {
825 if (exponent & bit) {
826 exponent ^= bit;
827 result *= power;
236f0012
CB
828 /* Floating point exceptions are supposed to be turned off,
829 * but if we're obviously done, don't risk another iteration.
830 */
831 if (exponent == 0) break;
98994639
HS
832 }
833 power *= power;
834 }
835 return negative ? value / result : value * result;
836}
837
838NV
839Perl_my_atof(pTHX_ const char* s)
840{
841 NV x = 0.0;
842#ifdef USE_LOCALE_NUMERIC
97aff369 843 dVAR;
7918f24d
NC
844
845 PERL_ARGS_ASSERT_MY_ATOF;
846
98994639
HS
847 if (PL_numeric_local && IN_LOCALE) {
848 NV y;
849
850 /* Scan the number twice; once using locale and once without;
851 * choose the larger result (in absolute value). */
a36244b7 852 Perl_atof2(s, x);
98994639 853 SET_NUMERIC_STANDARD();
a36244b7 854 Perl_atof2(s, y);
98994639
HS
855 SET_NUMERIC_LOCAL();
856 if ((y < 0.0 && y < x) || (y > 0.0 && y > x))
857 return y;
858 }
859 else
a36244b7 860 Perl_atof2(s, x);
98994639 861#else
a36244b7 862 Perl_atof2(s, x);
98994639
HS
863#endif
864 return x;
865}
866
867char*
868Perl_my_atof2(pTHX_ const char* orig, NV* value)
869{
20f6aaab 870 NV result[3] = {0.0, 0.0, 0.0};
e1ec3a88 871 const char* s = orig;
a36244b7 872#ifdef USE_PERL_ATOF
20f6aaab 873 UV accumulator[2] = {0,0}; /* before/after dp */
a36244b7 874 bool negative = 0;
e1ec3a88 875 const char* send = s + strlen(orig) - 1;
8194bf88 876 bool seen_digit = 0;
20f6aaab
AS
877 I32 exp_adjust[2] = {0,0};
878 I32 exp_acc[2] = {-1, -1};
879 /* the current exponent adjust for the accumulators */
98994639 880 I32 exponent = 0;
8194bf88 881 I32 seen_dp = 0;
20f6aaab
AS
882 I32 digit = 0;
883 I32 old_digit = 0;
8194bf88
DM
884 I32 sig_digits = 0; /* noof significant digits seen so far */
885
7918f24d
NC
886 PERL_ARGS_ASSERT_MY_ATOF2;
887
8194bf88
DM
888/* There is no point in processing more significant digits
889 * than the NV can hold. Note that NV_DIG is a lower-bound value,
890 * while we need an upper-bound value. We add 2 to account for this;
891 * since it will have been conservative on both the first and last digit.
892 * For example a 32-bit mantissa with an exponent of 4 would have
893 * exact values in the set
894 * 4
895 * 8
896 * ..
897 * 17179869172
898 * 17179869176
899 * 17179869180
900 *
901 * where for the purposes of calculating NV_DIG we would have to discount
902 * both the first and last digit, since neither can hold all values from
903 * 0..9; but for calculating the value we must examine those two digits.
904 */
ffa277e5
AS
905#ifdef MAX_SIG_DIG_PLUS
906 /* It is not necessarily the case that adding 2 to NV_DIG gets all the
907 possible digits in a NV, especially if NVs are not IEEE compliant
908 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
909# define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
910#else
911# define MAX_SIG_DIGITS (NV_DIG+2)
912#endif
8194bf88
DM
913
914/* the max number we can accumulate in a UV, and still safely do 10*N+9 */
915#define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
98994639 916
96a05aee
HS
917 /* leading whitespace */
918 while (isSPACE(*s))
919 ++s;
920
98994639
HS
921 /* sign */
922 switch (*s) {
923 case '-':
924 negative = 1;
925 /* fall through */
926 case '+':
927 ++s;
928 }
929
2b54f59f
YST
930 /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
931
932#ifdef HAS_STRTOD
933 if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
c042ae3a 934 const char *p = negative ? s - 1 : s;
2b54f59f
YST
935 char *endp;
936 NV rslt;
937 rslt = strtod(p, &endp);
938 if (endp != p) {
939 *value = rslt;
940 return (char *)endp;
941 }
942 }
943#endif
944
8194bf88
DM
945 /* we accumulate digits into an integer; when this becomes too
946 * large, we add the total to NV and start again */
98994639 947
8194bf88
DM
948 while (1) {
949 if (isDIGIT(*s)) {
950 seen_digit = 1;
20f6aaab 951 old_digit = digit;
8194bf88 952 digit = *s++ - '0';
20f6aaab
AS
953 if (seen_dp)
954 exp_adjust[1]++;
98994639 955
8194bf88
DM
956 /* don't start counting until we see the first significant
957 * digit, eg the 5 in 0.00005... */
958 if (!sig_digits && digit == 0)
959 continue;
960
961 if (++sig_digits > MAX_SIG_DIGITS) {
98994639 962 /* limits of precision reached */
20f6aaab
AS
963 if (digit > 5) {
964 ++accumulator[seen_dp];
965 } else if (digit == 5) {
966 if (old_digit % 2) { /* round to even - Allen */
967 ++accumulator[seen_dp];
968 }
969 }
970 if (seen_dp) {
971 exp_adjust[1]--;
972 } else {
973 exp_adjust[0]++;
974 }
8194bf88 975 /* skip remaining digits */
98994639 976 while (isDIGIT(*s)) {
98994639 977 ++s;
20f6aaab
AS
978 if (! seen_dp) {
979 exp_adjust[0]++;
980 }
98994639
HS
981 }
982 /* warn of loss of precision? */
98994639 983 }
8194bf88 984 else {
20f6aaab 985 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
8194bf88 986 /* add accumulator to result and start again */
20f6aaab
AS
987 result[seen_dp] = S_mulexp10(result[seen_dp],
988 exp_acc[seen_dp])
989 + (NV)accumulator[seen_dp];
990 accumulator[seen_dp] = 0;
991 exp_acc[seen_dp] = 0;
98994639 992 }
20f6aaab
AS
993 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
994 ++exp_acc[seen_dp];
98994639 995 }
8194bf88 996 }
e1ec3a88 997 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
8194bf88 998 seen_dp = 1;
20f6aaab 999 if (sig_digits > MAX_SIG_DIGITS) {
c86f7df5 1000 do {
20f6aaab 1001 ++s;
c86f7df5 1002 } while (isDIGIT(*s));
20f6aaab
AS
1003 break;
1004 }
8194bf88
DM
1005 }
1006 else {
1007 break;
98994639
HS
1008 }
1009 }
1010
20f6aaab
AS
1011 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1012 if (seen_dp) {
1013 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1014 }
98994639 1015
8194bf88 1016 if (seen_digit && (*s == 'e' || *s == 'E')) {
98994639
HS
1017 bool expnegative = 0;
1018
1019 ++s;
1020 switch (*s) {
1021 case '-':
1022 expnegative = 1;
1023 /* fall through */
1024 case '+':
1025 ++s;
1026 }
1027 while (isDIGIT(*s))
1028 exponent = exponent * 10 + (*s++ - '0');
1029 if (expnegative)
1030 exponent = -exponent;
1031 }
1032
20f6aaab
AS
1033
1034
98994639 1035 /* now apply the exponent */
20f6aaab
AS
1036
1037 if (seen_dp) {
1038 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1039 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1040 } else {
1041 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1042 }
98994639
HS
1043
1044 /* now apply the sign */
1045 if (negative)
20f6aaab 1046 result[2] = -result[2];
a36244b7 1047#endif /* USE_PERL_ATOF */
20f6aaab 1048 *value = result[2];
73d840c0 1049 return (char *)s;
98994639
HS
1050}
1051
55954f19
JH
1052#if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1053long double
1054Perl_my_modfl(long double x, long double *ip)
1055{
1056 *ip = aintl(x);
1057 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1058}
1059#endif
1060
1061#if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1062long double
1063Perl_my_frexpl(long double x, int *e) {
1064 *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1065 return (scalbnl(x, -*e));
1066}
1067#endif
66610fdd
RGS
1068
1069/*
ed140128
AD
1070=for apidoc Perl_signbit
1071
1072Return a non-zero integer if the sign bit on an NV is set, and 0 if
1073it is not.
1074
1075If Configure detects this system has a signbit() that will work with
1076our NVs, then we just use it via the #define in perl.h. Otherwise,
1077fall back on this implementation. As a first pass, this gets everything
1078right except -0.0. Alas, catching -0.0 is the main use for this function,
1079so this is not too helpful yet. Still, at least we have the scaffolding
1080in place to support other systems, should that prove useful.
1081
1082
1083Configure notes: This function is called 'Perl_signbit' instead of a
1084plain 'signbit' because it is easy to imagine a system having a signbit()
1085function or macro that doesn't happen to work with our particular choice
1086of NVs. We shouldn't just re-#define signbit as Perl_signbit and expect
1087the standard system headers to be happy. Also, this is a no-context
1088function (no pTHX_) because Perl_signbit() is usually re-#defined in
1089perl.h as a simple macro call to the system's signbit().
1090Users should just always call Perl_signbit().
1091
1092=cut
1093*/
1094#if !defined(HAS_SIGNBIT)
1095int
1096Perl_signbit(NV x) {
1097 return (x < 0.0) ? 1 : 0;
1098}
1099#endif
1100
1101/*
66610fdd
RGS
1102 * Local variables:
1103 * c-indentation-style: bsd
1104 * c-basic-offset: 4
1105 * indent-tabs-mode: t
1106 * End:
1107 *
37442d52
RGS
1108 * ex: set ts=8 sts=4 sw=4 noet:
1109 */