This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regen/unicode_constants.pl: White-space only
[perl5.git] / numeric.c
CommitLineData
98994639
HS
1/* numeric.c
2 *
663f364b 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
1129b882 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
98994639
HS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
4ac71550
TC
12 * "That only makes eleven (plus one mislaid) and not fourteen,
13 * unless wizards count differently to other people." --Beorn
14 *
15 * [p.115 of _The Hobbit_: "Queer Lodgings"]
98994639
HS
16 */
17
ccfc67b7
JH
18/*
19=head1 Numeric functions
166f8a29
DM
20
21This file contains all the stuff needed by perl for manipulating numeric
22values, including such things as replacements for the OS's atof() function
23
24=cut
25
ccfc67b7
JH
26*/
27
98994639
HS
28#include "EXTERN.h"
29#define PERL_IN_NUMERIC_C
30#include "perl.h"
31
32U32
33Perl_cast_ulong(pTHX_ NV f)
34{
96a5add6 35 PERL_UNUSED_CONTEXT;
98994639
HS
36 if (f < 0.0)
37 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
38 if (f < U32_MAX_P1) {
39#if CASTFLAGS & 2
40 if (f < U32_MAX_P1_HALF)
41 return (U32) f;
42 f -= U32_MAX_P1_HALF;
43 return ((U32) f) | (1 + U32_MAX >> 1);
44#else
45 return (U32) f;
46#endif
47 }
48 return f > 0 ? U32_MAX : 0 /* NaN */;
49}
50
51I32
52Perl_cast_i32(pTHX_ NV f)
53{
96a5add6 54 PERL_UNUSED_CONTEXT;
98994639
HS
55 if (f < I32_MAX_P1)
56 return f < I32_MIN ? I32_MIN : (I32) f;
57 if (f < U32_MAX_P1) {
58#if CASTFLAGS & 2
59 if (f < U32_MAX_P1_HALF)
60 return (I32)(U32) f;
61 f -= U32_MAX_P1_HALF;
62 return (I32)(((U32) f) | (1 + U32_MAX >> 1));
63#else
64 return (I32)(U32) f;
65#endif
66 }
67 return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
68}
69
70IV
71Perl_cast_iv(pTHX_ NV f)
72{
96a5add6 73 PERL_UNUSED_CONTEXT;
98994639
HS
74 if (f < IV_MAX_P1)
75 return f < IV_MIN ? IV_MIN : (IV) f;
76 if (f < UV_MAX_P1) {
77#if CASTFLAGS & 2
78 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
79 if (f < UV_MAX_P1_HALF)
80 return (IV)(UV) f;
81 f -= UV_MAX_P1_HALF;
82 return (IV)(((UV) f) | (1 + UV_MAX >> 1));
83#else
84 return (IV)(UV) f;
85#endif
86 }
87 return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
88}
89
90UV
91Perl_cast_uv(pTHX_ NV f)
92{
96a5add6 93 PERL_UNUSED_CONTEXT;
98994639
HS
94 if (f < 0.0)
95 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
96 if (f < UV_MAX_P1) {
97#if CASTFLAGS & 2
98 if (f < UV_MAX_P1_HALF)
99 return (UV) f;
100 f -= UV_MAX_P1_HALF;
101 return ((UV) f) | (1 + UV_MAX >> 1);
102#else
103 return (UV) f;
104#endif
105 }
106 return f > 0 ? UV_MAX : 0 /* NaN */;
107}
108
53305cf1
NC
109/*
110=for apidoc grok_bin
98994639 111
53305cf1
NC
112converts a string representing a binary number to numeric form.
113
114On entry I<start> and I<*len> give the string to scan, I<*flags> gives
115conversion flags, and I<result> should be NULL or a pointer to an NV.
116The scan stops at the end of the string, or the first invalid character.
7b667b5f
MHM
117Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
118invalid character will also trigger a warning.
119On return I<*len> is set to the length of the scanned string,
120and I<*flags> gives output flags.
53305cf1 121
7fc63493 122If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
72d33970 123and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
53305cf1
NC
124returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
125and writes the value to I<*result> (or the value is discarded if I<result>
126is NULL).
127
7b667b5f 128The binary number may optionally be prefixed with "0b" or "b" unless
72d33970 129C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
a4c04bdc 130C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
53305cf1
NC
131number may use '_' characters to separate digits.
132
133=cut
02470786
KW
134
135Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
136which suppresses any message for non-portable numbers that are still valid
137on this platform.
53305cf1
NC
138 */
139
140UV
7918f24d
NC
141Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
142{
53305cf1
NC
143 const char *s = start;
144 STRLEN len = *len_p;
145 UV value = 0;
146 NV value_nv = 0;
147
148 const UV max_div_2 = UV_MAX / 2;
f2338a2e 149 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1 150 bool overflowed = FALSE;
7fc63493 151 char bit;
53305cf1 152
7918f24d
NC
153 PERL_ARGS_ASSERT_GROK_BIN;
154
a4c04bdc
NC
155 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
156 /* strip off leading b or 0b.
157 for compatibility silently suffer "b" and "0b" as valid binary
158 numbers. */
159 if (len >= 1) {
a674e8db 160 if (s[0] == 'b' || s[0] == 'B') {
a4c04bdc
NC
161 s++;
162 len--;
163 }
a674e8db 164 else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
a4c04bdc
NC
165 s+=2;
166 len-=2;
167 }
168 }
53305cf1
NC
169 }
170
7fc63493 171 for (; len-- && (bit = *s); s++) {
53305cf1
NC
172 if (bit == '0' || bit == '1') {
173 /* Write it in this wonky order with a goto to attempt to get the
174 compiler to make the common case integer-only loop pretty tight.
175 With gcc seems to be much straighter code than old scan_bin. */
176 redo:
177 if (!overflowed) {
178 if (value <= max_div_2) {
179 value = (value << 1) | (bit - '0');
180 continue;
181 }
182 /* Bah. We're just overflowed. */
dcbac5bb 183 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
184 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
185 "Integer overflow in binary number");
53305cf1
NC
186 overflowed = TRUE;
187 value_nv = (NV) value;
188 }
189 value_nv *= 2.0;
98994639 190 /* If an NV has not enough bits in its mantissa to
d1be9408 191 * represent a UV this summing of small low-order numbers
98994639
HS
192 * is a waste of time (because the NV cannot preserve
193 * the low-order bits anyway): we could just remember when
53305cf1 194 * did we overflow and in the end just multiply value_nv by the
98994639 195 * right amount. */
53305cf1
NC
196 value_nv += (NV)(bit - '0');
197 continue;
198 }
199 if (bit == '_' && len && allow_underscores && (bit = s[1])
200 && (bit == '0' || bit == '1'))
98994639
HS
201 {
202 --len;
203 ++s;
53305cf1 204 goto redo;
98994639 205 }
a2a5de95
NC
206 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
207 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
208 "Illegal binary digit '%c' ignored", *s);
53305cf1 209 break;
98994639 210 }
53305cf1
NC
211
212 if ( ( overflowed && value_nv > 4294967295.0)
98994639 213#if UVSIZE > 4
02470786
KW
214 || (!overflowed && value > 0xffffffff
215 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
98994639
HS
216#endif
217 ) {
a2a5de95
NC
218 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
219 "Binary number > 0b11111111111111111111111111111111 non-portable");
53305cf1
NC
220 }
221 *len_p = s - start;
222 if (!overflowed) {
223 *flags = 0;
224 return value;
98994639 225 }
53305cf1
NC
226 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
227 if (result)
228 *result = value_nv;
229 return UV_MAX;
98994639
HS
230}
231
53305cf1
NC
232/*
233=for apidoc grok_hex
234
235converts a string representing a hex number to numeric form.
236
c2da02fc 237On entry I<start> and I<*len_p> give the string to scan, I<*flags> gives
53305cf1 238conversion flags, and I<result> should be NULL or a pointer to an NV.
7b667b5f
MHM
239The scan stops at the end of the string, or the first invalid character.
240Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
241invalid character will also trigger a warning.
242On return I<*len> is set to the length of the scanned string,
243and I<*flags> gives output flags.
53305cf1
NC
244
245If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
72d33970 246and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
53305cf1
NC
247returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
248and writes the value to I<*result> (or the value is discarded if I<result>
249is NULL).
250
d1be9408 251The hex number may optionally be prefixed with "0x" or "x" unless
72d33970 252C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
a4c04bdc 253C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
53305cf1
NC
254number may use '_' characters to separate digits.
255
256=cut
02470786
KW
257
258Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
259which suppresses any message for non-portable numbers that are still valid
260on this platform.
53305cf1
NC
261 */
262
263UV
7918f24d
NC
264Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
265{
27da23d5 266 dVAR;
53305cf1
NC
267 const char *s = start;
268 STRLEN len = *len_p;
269 UV value = 0;
270 NV value_nv = 0;
53305cf1 271 const UV max_div_16 = UV_MAX / 16;
f2338a2e 272 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1 273 bool overflowed = FALSE;
98994639 274
7918f24d
NC
275 PERL_ARGS_ASSERT_GROK_HEX;
276
a4c04bdc
NC
277 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
278 /* strip off leading x or 0x.
279 for compatibility silently suffer "x" and "0x" as valid hex numbers.
280 */
281 if (len >= 1) {
a674e8db 282 if (s[0] == 'x' || s[0] == 'X') {
a4c04bdc
NC
283 s++;
284 len--;
285 }
a674e8db 286 else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
a4c04bdc
NC
287 s+=2;
288 len-=2;
289 }
290 }
98994639
HS
291 }
292
293 for (; len-- && *s; s++) {
626ef089 294 if (isXDIGIT(*s)) {
53305cf1
NC
295 /* Write it in this wonky order with a goto to attempt to get the
296 compiler to make the common case integer-only loop pretty tight.
297 With gcc seems to be much straighter code than old scan_hex. */
298 redo:
299 if (!overflowed) {
300 if (value <= max_div_16) {
626ef089 301 value = (value << 4) | XDIGIT_VALUE(*s);
53305cf1
NC
302 continue;
303 }
304 /* Bah. We're just overflowed. */
dcbac5bb 305 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
306 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
307 "Integer overflow in hexadecimal number");
53305cf1
NC
308 overflowed = TRUE;
309 value_nv = (NV) value;
310 }
311 value_nv *= 16.0;
312 /* If an NV has not enough bits in its mantissa to
d1be9408 313 * represent a UV this summing of small low-order numbers
53305cf1
NC
314 * is a waste of time (because the NV cannot preserve
315 * the low-order bits anyway): we could just remember when
316 * did we overflow and in the end just multiply value_nv by the
317 * right amount of 16-tuples. */
626ef089 318 value_nv += (NV) XDIGIT_VALUE(*s);
53305cf1
NC
319 continue;
320 }
321 if (*s == '_' && len && allow_underscores && s[1]
626ef089 322 && isXDIGIT(s[1]))
98994639
HS
323 {
324 --len;
325 ++s;
53305cf1 326 goto redo;
98994639 327 }
a2a5de95
NC
328 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
329 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
53305cf1
NC
330 "Illegal hexadecimal digit '%c' ignored", *s);
331 break;
332 }
333
334 if ( ( overflowed && value_nv > 4294967295.0)
335#if UVSIZE > 4
02470786
KW
336 || (!overflowed && value > 0xffffffff
337 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
53305cf1
NC
338#endif
339 ) {
a2a5de95
NC
340 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
341 "Hexadecimal number > 0xffffffff non-portable");
53305cf1
NC
342 }
343 *len_p = s - start;
344 if (!overflowed) {
345 *flags = 0;
346 return value;
347 }
348 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
349 if (result)
350 *result = value_nv;
351 return UV_MAX;
352}
353
354/*
355=for apidoc grok_oct
356
7b667b5f
MHM
357converts a string representing an octal number to numeric form.
358
359On entry I<start> and I<*len> give the string to scan, I<*flags> gives
360conversion flags, and I<result> should be NULL or a pointer to an NV.
361The scan stops at the end of the string, or the first invalid character.
362Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
154bd527 3638 or 9 will also trigger a warning.
7b667b5f
MHM
364On return I<*len> is set to the length of the scanned string,
365and I<*flags> gives output flags.
366
367If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
72d33970 368and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
7b667b5f
MHM
369returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
370and writes the value to I<*result> (or the value is discarded if I<result>
371is NULL).
372
373If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
374number may use '_' characters to separate digits.
53305cf1
NC
375
376=cut
02470786 377
333ae27c
KW
378Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
379which suppresses any message for non-portable numbers, but which are valid
02470786 380on this platform.
53305cf1
NC
381 */
382
383UV
7918f24d
NC
384Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
385{
53305cf1
NC
386 const char *s = start;
387 STRLEN len = *len_p;
388 UV value = 0;
389 NV value_nv = 0;
53305cf1 390 const UV max_div_8 = UV_MAX / 8;
f2338a2e 391 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1
NC
392 bool overflowed = FALSE;
393
7918f24d
NC
394 PERL_ARGS_ASSERT_GROK_OCT;
395
53305cf1 396 for (; len-- && *s; s++) {
626ef089 397 if (isOCTAL(*s)) {
53305cf1
NC
398 /* Write it in this wonky order with a goto to attempt to get the
399 compiler to make the common case integer-only loop pretty tight.
400 */
401 redo:
402 if (!overflowed) {
403 if (value <= max_div_8) {
626ef089 404 value = (value << 3) | OCTAL_VALUE(*s);
53305cf1
NC
405 continue;
406 }
407 /* Bah. We're just overflowed. */
dcbac5bb 408 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
409 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
410 "Integer overflow in octal number");
53305cf1
NC
411 overflowed = TRUE;
412 value_nv = (NV) value;
413 }
414 value_nv *= 8.0;
98994639 415 /* If an NV has not enough bits in its mantissa to
d1be9408 416 * represent a UV this summing of small low-order numbers
98994639
HS
417 * is a waste of time (because the NV cannot preserve
418 * the low-order bits anyway): we could just remember when
53305cf1
NC
419 * did we overflow and in the end just multiply value_nv by the
420 * right amount of 8-tuples. */
626ef089 421 value_nv += (NV) OCTAL_VALUE(*s);
53305cf1
NC
422 continue;
423 }
626ef089
KW
424 if (*s == '_' && len && allow_underscores && isOCTAL(s[1])) {
425 --len;
426 ++s;
427 goto redo;
428 }
53305cf1 429 /* Allow \octal to work the DWIM way (that is, stop scanning
7b667b5f 430 * as soon as non-octal characters are seen, complain only if
626ef089
KW
431 * someone seems to want to use the digits eight and nine. Since we
432 * know it is not octal, then if isDIGIT, must be an 8 or 9). */
433 if (isDIGIT(*s)) {
a2a5de95
NC
434 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
435 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
436 "Illegal octal digit '%c' ignored", *s);
53305cf1
NC
437 }
438 break;
98994639 439 }
53305cf1
NC
440
441 if ( ( overflowed && value_nv > 4294967295.0)
98994639 442#if UVSIZE > 4
02470786
KW
443 || (!overflowed && value > 0xffffffff
444 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
98994639
HS
445#endif
446 ) {
a2a5de95
NC
447 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
448 "Octal number > 037777777777 non-portable");
53305cf1
NC
449 }
450 *len_p = s - start;
451 if (!overflowed) {
452 *flags = 0;
453 return value;
98994639 454 }
53305cf1
NC
455 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
456 if (result)
457 *result = value_nv;
458 return UV_MAX;
459}
460
461/*
462=for apidoc scan_bin
463
72d33970 464For backwards compatibility. Use C<grok_bin> instead.
53305cf1
NC
465
466=for apidoc scan_hex
467
72d33970 468For backwards compatibility. Use C<grok_hex> instead.
53305cf1
NC
469
470=for apidoc scan_oct
471
72d33970 472For backwards compatibility. Use C<grok_oct> instead.
53305cf1
NC
473
474=cut
475 */
476
477NV
73d840c0 478Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
479{
480 NV rnv;
481 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 482 const UV ruv = grok_bin (start, &len, &flags, &rnv);
53305cf1 483
7918f24d
NC
484 PERL_ARGS_ASSERT_SCAN_BIN;
485
53305cf1
NC
486 *retlen = len;
487 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
488}
489
490NV
73d840c0 491Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
492{
493 NV rnv;
494 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 495 const UV ruv = grok_oct (start, &len, &flags, &rnv);
53305cf1 496
7918f24d
NC
497 PERL_ARGS_ASSERT_SCAN_OCT;
498
53305cf1
NC
499 *retlen = len;
500 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
501}
502
503NV
73d840c0 504Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
505{
506 NV rnv;
507 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 508 const UV ruv = grok_hex (start, &len, &flags, &rnv);
53305cf1 509
7918f24d
NC
510 PERL_ARGS_ASSERT_SCAN_HEX;
511
53305cf1
NC
512 *retlen = len;
513 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
98994639
HS
514}
515
516/*
517=for apidoc grok_numeric_radix
518
519Scan and skip for a numeric decimal separator (radix).
520
521=cut
522 */
523bool
524Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
525{
526#ifdef USE_LOCALE_NUMERIC
97aff369 527 dVAR;
7918f24d
NC
528
529 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
530
66cbab2c 531 if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
98994639 532 STRLEN len;
c4420975 533 const char * const radix = SvPV(PL_numeric_radix_sv, len);
98994639
HS
534 if (*sp + len <= send && memEQ(*sp, radix, len)) {
535 *sp += len;
536 return TRUE;
537 }
538 }
539 /* always try "." if numeric radix didn't match because
540 * we may have data from different locales mixed */
541#endif
7918f24d
NC
542
543 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
544
98994639
HS
545 if (*sp < send && **sp == '.') {
546 ++*sp;
547 return TRUE;
548 }
549 return FALSE;
550}
551
552/*
553=for apidoc grok_number
554
555Recognise (or not) a number. The type of the number is returned
556(0 if unrecognised), otherwise it is a bit-ORed combination of
557IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
aa8b85de 558IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
60939fb8 559
cd164854 560If the value of the number can fit in a UV, it is returned in the *valuep
60939fb8
NC
561IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
562will never be set unless *valuep is valid, but *valuep may have been assigned
563to during processing even though IS_NUMBER_IN_UV is not set on return.
564If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
565valuep is non-NULL, but no actual assignment (or SEGV) will occur.
566
567IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
568seen (in which case *valuep gives the true value truncated to an integer), and
569IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
570absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the
571number is larger than a UV.
98994639
HS
572
573=cut
574 */
575int
576Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
577{
60939fb8 578 const char *s = pv;
c4420975 579 const char * const send = pv + len;
60939fb8
NC
580 const UV max_div_10 = UV_MAX / 10;
581 const char max_mod_10 = UV_MAX % 10;
582 int numtype = 0;
583 int sawinf = 0;
aa8b85de 584 int sawnan = 0;
60939fb8 585
7918f24d
NC
586 PERL_ARGS_ASSERT_GROK_NUMBER;
587
60939fb8
NC
588 while (s < send && isSPACE(*s))
589 s++;
590 if (s == send) {
591 return 0;
592 } else if (*s == '-') {
593 s++;
594 numtype = IS_NUMBER_NEG;
595 }
596 else if (*s == '+')
aa42a541 597 s++;
60939fb8
NC
598
599 if (s == send)
600 return 0;
601
602 /* next must be digit or the radix separator or beginning of infinity */
603 if (isDIGIT(*s)) {
604 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
605 overflow. */
606 UV value = *s - '0';
607 /* This construction seems to be more optimiser friendly.
608 (without it gcc does the isDIGIT test and the *s - '0' separately)
609 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
610 In theory the optimiser could deduce how far to unroll the loop
611 before checking for overflow. */
58bb9ec3
NC
612 if (++s < send) {
613 int digit = *s - '0';
60939fb8
NC
614 if (digit >= 0 && digit <= 9) {
615 value = value * 10 + digit;
58bb9ec3
NC
616 if (++s < send) {
617 digit = *s - '0';
60939fb8
NC
618 if (digit >= 0 && digit <= 9) {
619 value = value * 10 + digit;
58bb9ec3
NC
620 if (++s < send) {
621 digit = *s - '0';
60939fb8
NC
622 if (digit >= 0 && digit <= 9) {
623 value = value * 10 + digit;
58bb9ec3
NC
624 if (++s < send) {
625 digit = *s - '0';
60939fb8
NC
626 if (digit >= 0 && digit <= 9) {
627 value = value * 10 + digit;
58bb9ec3
NC
628 if (++s < send) {
629 digit = *s - '0';
60939fb8
NC
630 if (digit >= 0 && digit <= 9) {
631 value = value * 10 + digit;
58bb9ec3
NC
632 if (++s < send) {
633 digit = *s - '0';
60939fb8
NC
634 if (digit >= 0 && digit <= 9) {
635 value = value * 10 + digit;
58bb9ec3
NC
636 if (++s < send) {
637 digit = *s - '0';
60939fb8
NC
638 if (digit >= 0 && digit <= 9) {
639 value = value * 10 + digit;
58bb9ec3
NC
640 if (++s < send) {
641 digit = *s - '0';
60939fb8
NC
642 if (digit >= 0 && digit <= 9) {
643 value = value * 10 + digit;
58bb9ec3 644 if (++s < send) {
60939fb8
NC
645 /* Now got 9 digits, so need to check
646 each time for overflow. */
58bb9ec3 647 digit = *s - '0';
60939fb8
NC
648 while (digit >= 0 && digit <= 9
649 && (value < max_div_10
650 || (value == max_div_10
651 && digit <= max_mod_10))) {
652 value = value * 10 + digit;
58bb9ec3
NC
653 if (++s < send)
654 digit = *s - '0';
60939fb8
NC
655 else
656 break;
657 }
658 if (digit >= 0 && digit <= 9
51bd16da 659 && (s < send)) {
60939fb8
NC
660 /* value overflowed.
661 skip the remaining digits, don't
662 worry about setting *valuep. */
663 do {
664 s++;
665 } while (s < send && isDIGIT(*s));
666 numtype |=
667 IS_NUMBER_GREATER_THAN_UV_MAX;
668 goto skip_value;
669 }
670 }
671 }
98994639 672 }
60939fb8
NC
673 }
674 }
675 }
676 }
677 }
678 }
679 }
680 }
681 }
682 }
683 }
98994639 684 }
60939fb8 685 }
98994639 686 }
60939fb8
NC
687 numtype |= IS_NUMBER_IN_UV;
688 if (valuep)
689 *valuep = value;
690
691 skip_value:
692 if (GROK_NUMERIC_RADIX(&s, send)) {
693 numtype |= IS_NUMBER_NOT_INT;
694 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
695 s++;
98994639 696 }
60939fb8
NC
697 }
698 else if (GROK_NUMERIC_RADIX(&s, send)) {
699 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
700 /* no digits before the radix means we need digits after it */
701 if (s < send && isDIGIT(*s)) {
702 do {
703 s++;
704 } while (s < send && isDIGIT(*s));
705 if (valuep) {
706 /* integer approximation is valid - it's 0. */
707 *valuep = 0;
708 }
98994639 709 }
60939fb8
NC
710 else
711 return 0;
712 } else if (*s == 'I' || *s == 'i') {
713 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
714 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
715 s++; if (s < send && (*s == 'I' || *s == 'i')) {
716 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
717 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
718 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
719 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
720 s++;
98994639 721 }
60939fb8 722 sawinf = 1;
aa8b85de
JH
723 } else if (*s == 'N' || *s == 'n') {
724 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
725 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
726 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
727 s++;
728 sawnan = 1;
729 } else
98994639 730 return 0;
60939fb8
NC
731
732 if (sawinf) {
733 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
734 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
aa8b85de
JH
735 } else if (sawnan) {
736 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
737 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
60939fb8
NC
738 } else if (s < send) {
739 /* we can have an optional exponent part */
740 if (*s == 'e' || *s == 'E') {
741 /* The only flag we keep is sign. Blow away any "it's UV" */
742 numtype &= IS_NUMBER_NEG;
743 numtype |= IS_NUMBER_NOT_INT;
744 s++;
745 if (s < send && (*s == '-' || *s == '+'))
746 s++;
747 if (s < send && isDIGIT(*s)) {
748 do {
749 s++;
750 } while (s < send && isDIGIT(*s));
751 }
752 else
753 return 0;
754 }
755 }
756 while (s < send && isSPACE(*s))
757 s++;
758 if (s >= send)
aa8b85de 759 return numtype;
60939fb8
NC
760 if (len == 10 && memEQ(pv, "0 but true", 10)) {
761 if (valuep)
762 *valuep = 0;
763 return IS_NUMBER_IN_UV;
764 }
765 return 0;
98994639
HS
766}
767
4801ca72 768STATIC NV
98994639
HS
769S_mulexp10(NV value, I32 exponent)
770{
771 NV result = 1.0;
772 NV power = 10.0;
773 bool negative = 0;
774 I32 bit;
775
776 if (exponent == 0)
777 return value;
659c4b96
DM
778 if (value == 0)
779 return (NV)0;
87032ba1 780
24866caa 781 /* On OpenVMS VAX we by default use the D_FLOAT double format,
67597c89 782 * and that format does not have *easy* capabilities [1] for
24866caa
CB
783 * overflowing doubles 'silently' as IEEE fp does. We also need
784 * to support G_FLOAT on both VAX and Alpha, and though the exponent
785 * range is much larger than D_FLOAT it still doesn't do silent
786 * overflow. Therefore we need to detect early whether we would
787 * overflow (this is the behaviour of the native string-to-float
788 * conversion routines, and therefore of native applications, too).
67597c89 789 *
24866caa
CB
790 * [1] Trying to establish a condition handler to trap floating point
791 * exceptions is not a good idea. */
87032ba1
JH
792
793 /* In UNICOS and in certain Cray models (such as T90) there is no
794 * IEEE fp, and no way at all from C to catch fp overflows gracefully.
795 * There is something you can do if you are willing to use some
796 * inline assembler: the instruction is called DFI-- but that will
797 * disable *all* floating point interrupts, a little bit too large
798 * a hammer. Therefore we need to catch potential overflows before
799 * it's too late. */
353813d9 800
85bba25f 801#if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
353813d9 802 STMT_START {
c4420975 803 const NV exp_v = log10(value);
353813d9
HS
804 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
805 return NV_MAX;
806 if (exponent < 0) {
807 if (-(exponent + exp_v) >= NV_MAX_10_EXP)
808 return 0.0;
809 while (-exponent >= NV_MAX_10_EXP) {
810 /* combination does not overflow, but 10^(-exponent) does */
811 value /= 10;
812 ++exponent;
813 }
814 }
815 } STMT_END;
87032ba1
JH
816#endif
817
353813d9
HS
818 if (exponent < 0) {
819 negative = 1;
820 exponent = -exponent;
b27804d8
DM
821#ifdef NV_MAX_10_EXP
822 /* for something like 1234 x 10^-309, the action of calculating
823 * the intermediate value 10^309 then returning 1234 / (10^309)
824 * will fail, since 10^309 becomes infinity. In this case try to
825 * refactor it as 123 / (10^308) etc.
826 */
827 while (value && exponent > NV_MAX_10_EXP) {
828 exponent--;
829 value /= 10;
830 }
831#endif
353813d9 832 }
98994639
HS
833 for (bit = 1; exponent; bit <<= 1) {
834 if (exponent & bit) {
835 exponent ^= bit;
836 result *= power;
236f0012
CB
837 /* Floating point exceptions are supposed to be turned off,
838 * but if we're obviously done, don't risk another iteration.
839 */
840 if (exponent == 0) break;
98994639
HS
841 }
842 power *= power;
843 }
844 return negative ? value / result : value * result;
845}
846
847NV
848Perl_my_atof(pTHX_ const char* s)
849{
850 NV x = 0.0;
851#ifdef USE_LOCALE_NUMERIC
97aff369 852 dVAR;
7918f24d
NC
853
854 PERL_ARGS_ASSERT_MY_ATOF;
855
a2287a13
KW
856 {
857 DECLARE_STORE_LC_NUMERIC_SET_TO_NEEDED();
e4850248
KW
858 if (PL_numeric_local && PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
859 const char *standard = NULL, *local = NULL;
860 bool use_standard_radix;
98994639 861
e4850248
KW
862 /* Look through the string for the first thing that looks like a
863 * decimal point: either the value in the current locale or the
864 * standard fallback of '.'. The one which appears earliest in the
865 * input string is the one that we should have atof look for. Note
866 * that we have to determine this beforehand because on some
867 * systems, Perl_atof2 is just a wrapper around the system's atof.
868 * */
869 standard = strchr(s, '.');
870 local = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
78787052 871
e4850248 872 use_standard_radix = standard && (!local || standard < local);
78787052 873
e4850248
KW
874 if (use_standard_radix)
875 SET_NUMERIC_STANDARD();
78787052 876
e4850248 877 Perl_atof2(s, x);
78787052 878
e4850248
KW
879 if (use_standard_radix)
880 SET_NUMERIC_LOCAL();
881 }
882 else
883 Perl_atof2(s, x);
a2287a13
KW
884 RESTORE_LC_NUMERIC();
885 }
98994639 886#else
a36244b7 887 Perl_atof2(s, x);
98994639
HS
888#endif
889 return x;
890}
891
892char*
893Perl_my_atof2(pTHX_ const char* orig, NV* value)
894{
20f6aaab 895 NV result[3] = {0.0, 0.0, 0.0};
e1ec3a88 896 const char* s = orig;
a36244b7 897#ifdef USE_PERL_ATOF
20f6aaab 898 UV accumulator[2] = {0,0}; /* before/after dp */
a36244b7 899 bool negative = 0;
e1ec3a88 900 const char* send = s + strlen(orig) - 1;
8194bf88 901 bool seen_digit = 0;
20f6aaab
AS
902 I32 exp_adjust[2] = {0,0};
903 I32 exp_acc[2] = {-1, -1};
904 /* the current exponent adjust for the accumulators */
98994639 905 I32 exponent = 0;
8194bf88 906 I32 seen_dp = 0;
20f6aaab
AS
907 I32 digit = 0;
908 I32 old_digit = 0;
8194bf88
DM
909 I32 sig_digits = 0; /* noof significant digits seen so far */
910
7918f24d
NC
911 PERL_ARGS_ASSERT_MY_ATOF2;
912
8194bf88
DM
913/* There is no point in processing more significant digits
914 * than the NV can hold. Note that NV_DIG is a lower-bound value,
915 * while we need an upper-bound value. We add 2 to account for this;
916 * since it will have been conservative on both the first and last digit.
917 * For example a 32-bit mantissa with an exponent of 4 would have
918 * exact values in the set
919 * 4
920 * 8
921 * ..
922 * 17179869172
923 * 17179869176
924 * 17179869180
925 *
926 * where for the purposes of calculating NV_DIG we would have to discount
927 * both the first and last digit, since neither can hold all values from
928 * 0..9; but for calculating the value we must examine those two digits.
929 */
ffa277e5
AS
930#ifdef MAX_SIG_DIG_PLUS
931 /* It is not necessarily the case that adding 2 to NV_DIG gets all the
932 possible digits in a NV, especially if NVs are not IEEE compliant
933 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
934# define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
935#else
936# define MAX_SIG_DIGITS (NV_DIG+2)
937#endif
8194bf88
DM
938
939/* the max number we can accumulate in a UV, and still safely do 10*N+9 */
940#define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
98994639 941
96a05aee
HS
942 /* leading whitespace */
943 while (isSPACE(*s))
944 ++s;
945
98994639
HS
946 /* sign */
947 switch (*s) {
948 case '-':
949 negative = 1;
924ba076 950 /* FALLTHROUGH */
98994639
HS
951 case '+':
952 ++s;
953 }
954
2b54f59f
YST
955 /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
956
957#ifdef HAS_STRTOD
958 if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
c042ae3a 959 const char *p = negative ? s - 1 : s;
2b54f59f
YST
960 char *endp;
961 NV rslt;
962 rslt = strtod(p, &endp);
963 if (endp != p) {
964 *value = rslt;
965 return (char *)endp;
966 }
967 }
968#endif
969
8194bf88
DM
970 /* we accumulate digits into an integer; when this becomes too
971 * large, we add the total to NV and start again */
98994639 972
8194bf88
DM
973 while (1) {
974 if (isDIGIT(*s)) {
975 seen_digit = 1;
20f6aaab 976 old_digit = digit;
8194bf88 977 digit = *s++ - '0';
20f6aaab
AS
978 if (seen_dp)
979 exp_adjust[1]++;
98994639 980
8194bf88
DM
981 /* don't start counting until we see the first significant
982 * digit, eg the 5 in 0.00005... */
983 if (!sig_digits && digit == 0)
984 continue;
985
986 if (++sig_digits > MAX_SIG_DIGITS) {
98994639 987 /* limits of precision reached */
20f6aaab
AS
988 if (digit > 5) {
989 ++accumulator[seen_dp];
990 } else if (digit == 5) {
991 if (old_digit % 2) { /* round to even - Allen */
992 ++accumulator[seen_dp];
993 }
994 }
995 if (seen_dp) {
996 exp_adjust[1]--;
997 } else {
998 exp_adjust[0]++;
999 }
8194bf88 1000 /* skip remaining digits */
98994639 1001 while (isDIGIT(*s)) {
98994639 1002 ++s;
20f6aaab
AS
1003 if (! seen_dp) {
1004 exp_adjust[0]++;
1005 }
98994639
HS
1006 }
1007 /* warn of loss of precision? */
98994639 1008 }
8194bf88 1009 else {
20f6aaab 1010 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
8194bf88 1011 /* add accumulator to result and start again */
20f6aaab
AS
1012 result[seen_dp] = S_mulexp10(result[seen_dp],
1013 exp_acc[seen_dp])
1014 + (NV)accumulator[seen_dp];
1015 accumulator[seen_dp] = 0;
1016 exp_acc[seen_dp] = 0;
98994639 1017 }
20f6aaab
AS
1018 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1019 ++exp_acc[seen_dp];
98994639 1020 }
8194bf88 1021 }
e1ec3a88 1022 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
8194bf88 1023 seen_dp = 1;
20f6aaab 1024 if (sig_digits > MAX_SIG_DIGITS) {
c86f7df5 1025 do {
20f6aaab 1026 ++s;
c86f7df5 1027 } while (isDIGIT(*s));
20f6aaab
AS
1028 break;
1029 }
8194bf88
DM
1030 }
1031 else {
1032 break;
98994639
HS
1033 }
1034 }
1035
20f6aaab
AS
1036 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1037 if (seen_dp) {
1038 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1039 }
98994639 1040
8194bf88 1041 if (seen_digit && (*s == 'e' || *s == 'E')) {
98994639
HS
1042 bool expnegative = 0;
1043
1044 ++s;
1045 switch (*s) {
1046 case '-':
1047 expnegative = 1;
924ba076 1048 /* FALLTHROUGH */
98994639
HS
1049 case '+':
1050 ++s;
1051 }
1052 while (isDIGIT(*s))
1053 exponent = exponent * 10 + (*s++ - '0');
1054 if (expnegative)
1055 exponent = -exponent;
1056 }
1057
20f6aaab
AS
1058
1059
98994639 1060 /* now apply the exponent */
20f6aaab
AS
1061
1062 if (seen_dp) {
1063 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1064 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1065 } else {
1066 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1067 }
98994639
HS
1068
1069 /* now apply the sign */
1070 if (negative)
20f6aaab 1071 result[2] = -result[2];
a36244b7 1072#endif /* USE_PERL_ATOF */
20f6aaab 1073 *value = result[2];
73d840c0 1074 return (char *)s;
98994639
HS
1075}
1076
55954f19
JH
1077#if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1078long double
1079Perl_my_modfl(long double x, long double *ip)
1080{
1081 *ip = aintl(x);
1082 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1083}
1084#endif
1085
1086#if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1087long double
1088Perl_my_frexpl(long double x, int *e) {
1089 *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1090 return (scalbnl(x, -*e));
1091}
1092#endif
66610fdd
RGS
1093
1094/*
ed140128
AD
1095=for apidoc Perl_signbit
1096
1097Return a non-zero integer if the sign bit on an NV is set, and 0 if
1098it is not.
1099
1100If Configure detects this system has a signbit() that will work with
1101our NVs, then we just use it via the #define in perl.h. Otherwise,
1102fall back on this implementation. As a first pass, this gets everything
1103right except -0.0. Alas, catching -0.0 is the main use for this function,
1104so this is not too helpful yet. Still, at least we have the scaffolding
1105in place to support other systems, should that prove useful.
1106
1107
1108Configure notes: This function is called 'Perl_signbit' instead of a
1109plain 'signbit' because it is easy to imagine a system having a signbit()
1110function or macro that doesn't happen to work with our particular choice
1111of NVs. We shouldn't just re-#define signbit as Perl_signbit and expect
1112the standard system headers to be happy. Also, this is a no-context
1113function (no pTHX_) because Perl_signbit() is usually re-#defined in
1114perl.h as a simple macro call to the system's signbit().
1115Users should just always call Perl_signbit().
1116
1117=cut
1118*/
1119#if !defined(HAS_SIGNBIT)
1120int
1121Perl_signbit(NV x) {
1122 return (x < 0.0) ? 1 : 0;
1123}
1124#endif
1125
1126/*
66610fdd
RGS
1127 * Local variables:
1128 * c-indentation-style: bsd
1129 * c-basic-offset: 4
14d04a33 1130 * indent-tabs-mode: nil
66610fdd
RGS
1131 * End:
1132 *
14d04a33 1133 * ex: set ts=8 sts=4 sw=4 et:
37442d52 1134 */