This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: Remove unnecessary flag setting
[perl5.git] / numeric.c
CommitLineData
98994639
HS
1/* numeric.c
2 *
663f364b 3 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
1129b882 4 * 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
98994639
HS
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11/*
4ac71550
TC
12 * "That only makes eleven (plus one mislaid) and not fourteen,
13 * unless wizards count differently to other people." --Beorn
14 *
15 * [p.115 of _The Hobbit_: "Queer Lodgings"]
98994639
HS
16 */
17
ccfc67b7
JH
18/*
19=head1 Numeric functions
166f8a29
DM
20
21This file contains all the stuff needed by perl for manipulating numeric
22values, including such things as replacements for the OS's atof() function
23
24=cut
25
ccfc67b7
JH
26*/
27
98994639
HS
28#include "EXTERN.h"
29#define PERL_IN_NUMERIC_C
30#include "perl.h"
31
32U32
33Perl_cast_ulong(pTHX_ NV f)
34{
96a5add6 35 PERL_UNUSED_CONTEXT;
98994639
HS
36 if (f < 0.0)
37 return f < I32_MIN ? (U32) I32_MIN : (U32)(I32) f;
38 if (f < U32_MAX_P1) {
39#if CASTFLAGS & 2
40 if (f < U32_MAX_P1_HALF)
41 return (U32) f;
42 f -= U32_MAX_P1_HALF;
43 return ((U32) f) | (1 + U32_MAX >> 1);
44#else
45 return (U32) f;
46#endif
47 }
48 return f > 0 ? U32_MAX : 0 /* NaN */;
49}
50
51I32
52Perl_cast_i32(pTHX_ NV f)
53{
96a5add6 54 PERL_UNUSED_CONTEXT;
98994639
HS
55 if (f < I32_MAX_P1)
56 return f < I32_MIN ? I32_MIN : (I32) f;
57 if (f < U32_MAX_P1) {
58#if CASTFLAGS & 2
59 if (f < U32_MAX_P1_HALF)
60 return (I32)(U32) f;
61 f -= U32_MAX_P1_HALF;
62 return (I32)(((U32) f) | (1 + U32_MAX >> 1));
63#else
64 return (I32)(U32) f;
65#endif
66 }
67 return f > 0 ? (I32)U32_MAX : 0 /* NaN */;
68}
69
70IV
71Perl_cast_iv(pTHX_ NV f)
72{
96a5add6 73 PERL_UNUSED_CONTEXT;
98994639
HS
74 if (f < IV_MAX_P1)
75 return f < IV_MIN ? IV_MIN : (IV) f;
76 if (f < UV_MAX_P1) {
77#if CASTFLAGS & 2
78 /* For future flexibility allowing for sizeof(UV) >= sizeof(IV) */
79 if (f < UV_MAX_P1_HALF)
80 return (IV)(UV) f;
81 f -= UV_MAX_P1_HALF;
82 return (IV)(((UV) f) | (1 + UV_MAX >> 1));
83#else
84 return (IV)(UV) f;
85#endif
86 }
87 return f > 0 ? (IV)UV_MAX : 0 /* NaN */;
88}
89
90UV
91Perl_cast_uv(pTHX_ NV f)
92{
96a5add6 93 PERL_UNUSED_CONTEXT;
98994639
HS
94 if (f < 0.0)
95 return f < IV_MIN ? (UV) IV_MIN : (UV)(IV) f;
96 if (f < UV_MAX_P1) {
97#if CASTFLAGS & 2
98 if (f < UV_MAX_P1_HALF)
99 return (UV) f;
100 f -= UV_MAX_P1_HALF;
101 return ((UV) f) | (1 + UV_MAX >> 1);
102#else
103 return (UV) f;
104#endif
105 }
106 return f > 0 ? UV_MAX : 0 /* NaN */;
107}
108
53305cf1
NC
109/*
110=for apidoc grok_bin
98994639 111
53305cf1
NC
112converts a string representing a binary number to numeric form.
113
114On entry I<start> and I<*len> give the string to scan, I<*flags> gives
115conversion flags, and I<result> should be NULL or a pointer to an NV.
116The scan stops at the end of the string, or the first invalid character.
7b667b5f
MHM
117Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
118invalid character will also trigger a warning.
119On return I<*len> is set to the length of the scanned string,
120and I<*flags> gives output flags.
53305cf1 121
7fc63493 122If the value is <= C<UV_MAX> it is returned as a UV, the output flags are clear,
53305cf1
NC
123and nothing is written to I<*result>. If the value is > UV_MAX C<grok_bin>
124returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
125and writes the value to I<*result> (or the value is discarded if I<result>
126is NULL).
127
7b667b5f 128The binary number may optionally be prefixed with "0b" or "b" unless
a4c04bdc
NC
129C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
130C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the binary
53305cf1
NC
131number may use '_' characters to separate digits.
132
133=cut
02470786
KW
134
135Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
136which suppresses any message for non-portable numbers that are still valid
137on this platform.
53305cf1
NC
138 */
139
140UV
7918f24d
NC
141Perl_grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
142{
53305cf1
NC
143 const char *s = start;
144 STRLEN len = *len_p;
145 UV value = 0;
146 NV value_nv = 0;
147
148 const UV max_div_2 = UV_MAX / 2;
f2338a2e 149 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1 150 bool overflowed = FALSE;
7fc63493 151 char bit;
53305cf1 152
7918f24d
NC
153 PERL_ARGS_ASSERT_GROK_BIN;
154
a4c04bdc
NC
155 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
156 /* strip off leading b or 0b.
157 for compatibility silently suffer "b" and "0b" as valid binary
158 numbers. */
159 if (len >= 1) {
a674e8db 160 if (s[0] == 'b' || s[0] == 'B') {
a4c04bdc
NC
161 s++;
162 len--;
163 }
a674e8db 164 else if (len >= 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
a4c04bdc
NC
165 s+=2;
166 len-=2;
167 }
168 }
53305cf1
NC
169 }
170
7fc63493 171 for (; len-- && (bit = *s); s++) {
53305cf1
NC
172 if (bit == '0' || bit == '1') {
173 /* Write it in this wonky order with a goto to attempt to get the
174 compiler to make the common case integer-only loop pretty tight.
175 With gcc seems to be much straighter code than old scan_bin. */
176 redo:
177 if (!overflowed) {
178 if (value <= max_div_2) {
179 value = (value << 1) | (bit - '0');
180 continue;
181 }
182 /* Bah. We're just overflowed. */
dcbac5bb 183 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
184 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
185 "Integer overflow in binary number");
53305cf1
NC
186 overflowed = TRUE;
187 value_nv = (NV) value;
188 }
189 value_nv *= 2.0;
98994639 190 /* If an NV has not enough bits in its mantissa to
d1be9408 191 * represent a UV this summing of small low-order numbers
98994639
HS
192 * is a waste of time (because the NV cannot preserve
193 * the low-order bits anyway): we could just remember when
53305cf1 194 * did we overflow and in the end just multiply value_nv by the
98994639 195 * right amount. */
53305cf1
NC
196 value_nv += (NV)(bit - '0');
197 continue;
198 }
199 if (bit == '_' && len && allow_underscores && (bit = s[1])
200 && (bit == '0' || bit == '1'))
98994639
HS
201 {
202 --len;
203 ++s;
53305cf1 204 goto redo;
98994639 205 }
a2a5de95
NC
206 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
207 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
208 "Illegal binary digit '%c' ignored", *s);
53305cf1 209 break;
98994639 210 }
53305cf1
NC
211
212 if ( ( overflowed && value_nv > 4294967295.0)
98994639 213#if UVSIZE > 4
02470786
KW
214 || (!overflowed && value > 0xffffffff
215 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
98994639
HS
216#endif
217 ) {
a2a5de95
NC
218 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
219 "Binary number > 0b11111111111111111111111111111111 non-portable");
53305cf1
NC
220 }
221 *len_p = s - start;
222 if (!overflowed) {
223 *flags = 0;
224 return value;
98994639 225 }
53305cf1
NC
226 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
227 if (result)
228 *result = value_nv;
229 return UV_MAX;
98994639
HS
230}
231
53305cf1
NC
232/*
233=for apidoc grok_hex
234
235converts a string representing a hex number to numeric form.
236
237On entry I<start> and I<*len> give the string to scan, I<*flags> gives
238conversion flags, and I<result> should be NULL or a pointer to an NV.
7b667b5f
MHM
239The scan stops at the end of the string, or the first invalid character.
240Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
241invalid character will also trigger a warning.
242On return I<*len> is set to the length of the scanned string,
243and I<*flags> gives output flags.
53305cf1
NC
244
245If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
246and nothing is written to I<*result>. If the value is > UV_MAX C<grok_hex>
247returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
248and writes the value to I<*result> (or the value is discarded if I<result>
249is NULL).
250
d1be9408 251The hex number may optionally be prefixed with "0x" or "x" unless
a4c04bdc
NC
252C<PERL_SCAN_DISALLOW_PREFIX> is set in I<*flags> on entry. If
253C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the hex
53305cf1
NC
254number may use '_' characters to separate digits.
255
256=cut
02470786
KW
257
258Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE
259which suppresses any message for non-portable numbers that are still valid
260on this platform.
53305cf1
NC
261 */
262
263UV
7918f24d
NC
264Perl_grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
265{
27da23d5 266 dVAR;
53305cf1
NC
267 const char *s = start;
268 STRLEN len = *len_p;
269 UV value = 0;
270 NV value_nv = 0;
53305cf1 271 const UV max_div_16 = UV_MAX / 16;
f2338a2e 272 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1 273 bool overflowed = FALSE;
98994639 274
7918f24d
NC
275 PERL_ARGS_ASSERT_GROK_HEX;
276
a4c04bdc
NC
277 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
278 /* strip off leading x or 0x.
279 for compatibility silently suffer "x" and "0x" as valid hex numbers.
280 */
281 if (len >= 1) {
a674e8db 282 if (s[0] == 'x' || s[0] == 'X') {
a4c04bdc
NC
283 s++;
284 len--;
285 }
a674e8db 286 else if (len >= 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
a4c04bdc
NC
287 s+=2;
288 len-=2;
289 }
290 }
98994639
HS
291 }
292
293 for (; len-- && *s; s++) {
a3b680e6 294 const char *hexdigit = strchr(PL_hexdigit, *s);
53305cf1
NC
295 if (hexdigit) {
296 /* Write it in this wonky order with a goto to attempt to get the
297 compiler to make the common case integer-only loop pretty tight.
298 With gcc seems to be much straighter code than old scan_hex. */
299 redo:
300 if (!overflowed) {
301 if (value <= max_div_16) {
302 value = (value << 4) | ((hexdigit - PL_hexdigit) & 15);
303 continue;
304 }
305 /* Bah. We're just overflowed. */
dcbac5bb 306 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
307 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
308 "Integer overflow in hexadecimal number");
53305cf1
NC
309 overflowed = TRUE;
310 value_nv = (NV) value;
311 }
312 value_nv *= 16.0;
313 /* If an NV has not enough bits in its mantissa to
d1be9408 314 * represent a UV this summing of small low-order numbers
53305cf1
NC
315 * is a waste of time (because the NV cannot preserve
316 * the low-order bits anyway): we could just remember when
317 * did we overflow and in the end just multiply value_nv by the
318 * right amount of 16-tuples. */
319 value_nv += (NV)((hexdigit - PL_hexdigit) & 15);
320 continue;
321 }
322 if (*s == '_' && len && allow_underscores && s[1]
e1ec3a88 323 && (hexdigit = strchr(PL_hexdigit, s[1])))
98994639
HS
324 {
325 --len;
326 ++s;
53305cf1 327 goto redo;
98994639 328 }
a2a5de95
NC
329 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
330 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
53305cf1
NC
331 "Illegal hexadecimal digit '%c' ignored", *s);
332 break;
333 }
334
335 if ( ( overflowed && value_nv > 4294967295.0)
336#if UVSIZE > 4
02470786
KW
337 || (!overflowed && value > 0xffffffff
338 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
53305cf1
NC
339#endif
340 ) {
a2a5de95
NC
341 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
342 "Hexadecimal number > 0xffffffff non-portable");
53305cf1
NC
343 }
344 *len_p = s - start;
345 if (!overflowed) {
346 *flags = 0;
347 return value;
348 }
349 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
350 if (result)
351 *result = value_nv;
352 return UV_MAX;
353}
354
355/*
356=for apidoc grok_oct
357
7b667b5f
MHM
358converts a string representing an octal number to numeric form.
359
360On entry I<start> and I<*len> give the string to scan, I<*flags> gives
361conversion flags, and I<result> should be NULL or a pointer to an NV.
362The scan stops at the end of the string, or the first invalid character.
363Unless C<PERL_SCAN_SILENT_ILLDIGIT> is set in I<*flags>, encountering an
154bd527 3648 or 9 will also trigger a warning.
7b667b5f
MHM
365On return I<*len> is set to the length of the scanned string,
366and I<*flags> gives output flags.
367
368If the value is <= UV_MAX it is returned as a UV, the output flags are clear,
369and nothing is written to I<*result>. If the value is > UV_MAX C<grok_oct>
370returns UV_MAX, sets C<PERL_SCAN_GREATER_THAN_UV_MAX> in the output flags,
371and writes the value to I<*result> (or the value is discarded if I<result>
372is NULL).
373
374If C<PERL_SCAN_ALLOW_UNDERSCORES> is set in I<*flags> then the octal
375number may use '_' characters to separate digits.
53305cf1
NC
376
377=cut
02470786 378
333ae27c
KW
379Not documented yet because experimental is C<PERL_SCAN_SILENT_NON_PORTABLE>
380which suppresses any message for non-portable numbers, but which are valid
02470786 381on this platform.
53305cf1
NC
382 */
383
384UV
7918f24d
NC
385Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
386{
53305cf1
NC
387 const char *s = start;
388 STRLEN len = *len_p;
389 UV value = 0;
390 NV value_nv = 0;
53305cf1 391 const UV max_div_8 = UV_MAX / 8;
f2338a2e 392 const bool allow_underscores = cBOOL(*flags & PERL_SCAN_ALLOW_UNDERSCORES);
53305cf1
NC
393 bool overflowed = FALSE;
394
7918f24d
NC
395 PERL_ARGS_ASSERT_GROK_OCT;
396
53305cf1
NC
397 for (; len-- && *s; s++) {
398 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
399 out front allows slicker code. */
400 int digit = *s - '0';
401 if (digit >= 0 && digit <= 7) {
402 /* Write it in this wonky order with a goto to attempt to get the
403 compiler to make the common case integer-only loop pretty tight.
404 */
405 redo:
406 if (!overflowed) {
407 if (value <= max_div_8) {
408 value = (value << 3) | digit;
409 continue;
410 }
411 /* Bah. We're just overflowed. */
dcbac5bb 412 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
413 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
414 "Integer overflow in octal number");
53305cf1
NC
415 overflowed = TRUE;
416 value_nv = (NV) value;
417 }
418 value_nv *= 8.0;
98994639 419 /* If an NV has not enough bits in its mantissa to
d1be9408 420 * represent a UV this summing of small low-order numbers
98994639
HS
421 * is a waste of time (because the NV cannot preserve
422 * the low-order bits anyway): we could just remember when
53305cf1
NC
423 * did we overflow and in the end just multiply value_nv by the
424 * right amount of 8-tuples. */
425 value_nv += (NV)digit;
426 continue;
427 }
428 if (digit == ('_' - '0') && len && allow_underscores
429 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
430 {
431 --len;
432 ++s;
433 goto redo;
434 }
435 /* Allow \octal to work the DWIM way (that is, stop scanning
7b667b5f 436 * as soon as non-octal characters are seen, complain only if
53305cf1
NC
437 * someone seems to want to use the digits eight and nine). */
438 if (digit == 8 || digit == 9) {
a2a5de95
NC
439 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
440 Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
441 "Illegal octal digit '%c' ignored", *s);
53305cf1
NC
442 }
443 break;
98994639 444 }
53305cf1
NC
445
446 if ( ( overflowed && value_nv > 4294967295.0)
98994639 447#if UVSIZE > 4
02470786
KW
448 || (!overflowed && value > 0xffffffff
449 && ! (*flags & PERL_SCAN_SILENT_NON_PORTABLE))
98994639
HS
450#endif
451 ) {
a2a5de95
NC
452 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
453 "Octal number > 037777777777 non-portable");
53305cf1
NC
454 }
455 *len_p = s - start;
456 if (!overflowed) {
457 *flags = 0;
458 return value;
98994639 459 }
53305cf1
NC
460 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
461 if (result)
462 *result = value_nv;
463 return UV_MAX;
464}
465
466/*
467=for apidoc scan_bin
468
469For backwards compatibility. Use C<grok_bin> instead.
470
471=for apidoc scan_hex
472
473For backwards compatibility. Use C<grok_hex> instead.
474
475=for apidoc scan_oct
476
477For backwards compatibility. Use C<grok_oct> instead.
478
479=cut
480 */
481
482NV
73d840c0 483Perl_scan_bin(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
484{
485 NV rnv;
486 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 487 const UV ruv = grok_bin (start, &len, &flags, &rnv);
53305cf1 488
7918f24d
NC
489 PERL_ARGS_ASSERT_SCAN_BIN;
490
53305cf1
NC
491 *retlen = len;
492 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
493}
494
495NV
73d840c0 496Perl_scan_oct(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
497{
498 NV rnv;
499 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 500 const UV ruv = grok_oct (start, &len, &flags, &rnv);
53305cf1 501
7918f24d
NC
502 PERL_ARGS_ASSERT_SCAN_OCT;
503
53305cf1
NC
504 *retlen = len;
505 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
506}
507
508NV
73d840c0 509Perl_scan_hex(pTHX_ const char *start, STRLEN len, STRLEN *retlen)
53305cf1
NC
510{
511 NV rnv;
512 I32 flags = *retlen ? PERL_SCAN_ALLOW_UNDERSCORES : 0;
73d840c0 513 const UV ruv = grok_hex (start, &len, &flags, &rnv);
53305cf1 514
7918f24d
NC
515 PERL_ARGS_ASSERT_SCAN_HEX;
516
53305cf1
NC
517 *retlen = len;
518 return (flags & PERL_SCAN_GREATER_THAN_UV_MAX) ? rnv : (NV)ruv;
98994639
HS
519}
520
521/*
522=for apidoc grok_numeric_radix
523
524Scan and skip for a numeric decimal separator (radix).
525
526=cut
527 */
528bool
529Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
530{
531#ifdef USE_LOCALE_NUMERIC
97aff369 532 dVAR;
7918f24d
NC
533
534 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
535
66cbab2c 536 if (PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
98994639 537 STRLEN len;
c4420975 538 const char * const radix = SvPV(PL_numeric_radix_sv, len);
98994639
HS
539 if (*sp + len <= send && memEQ(*sp, radix, len)) {
540 *sp += len;
541 return TRUE;
542 }
543 }
544 /* always try "." if numeric radix didn't match because
545 * we may have data from different locales mixed */
546#endif
7918f24d
NC
547
548 PERL_ARGS_ASSERT_GROK_NUMERIC_RADIX;
549
98994639
HS
550 if (*sp < send && **sp == '.') {
551 ++*sp;
552 return TRUE;
553 }
554 return FALSE;
555}
556
557/*
558=for apidoc grok_number
559
560Recognise (or not) a number. The type of the number is returned
561(0 if unrecognised), otherwise it is a bit-ORed combination of
562IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT,
aa8b85de 563IS_NUMBER_NEG, IS_NUMBER_INFINITY, IS_NUMBER_NAN (defined in perl.h).
60939fb8
NC
564
565If the value of the number can fit an in UV, it is returned in the *valuep
566IS_NUMBER_IN_UV will be set to indicate that *valuep is valid, IS_NUMBER_IN_UV
567will never be set unless *valuep is valid, but *valuep may have been assigned
568to during processing even though IS_NUMBER_IN_UV is not set on return.
569If valuep is NULL, IS_NUMBER_IN_UV will be set for the same cases as when
570valuep is non-NULL, but no actual assignment (or SEGV) will occur.
571
572IS_NUMBER_NOT_INT will be set with IS_NUMBER_IN_UV if trailing decimals were
573seen (in which case *valuep gives the true value truncated to an integer), and
574IS_NUMBER_NEG if the number is negative (in which case *valuep holds the
575absolute value). IS_NUMBER_IN_UV is not set if e notation was used or the
576number is larger than a UV.
98994639
HS
577
578=cut
579 */
580int
581Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
582{
60939fb8 583 const char *s = pv;
c4420975 584 const char * const send = pv + len;
60939fb8
NC
585 const UV max_div_10 = UV_MAX / 10;
586 const char max_mod_10 = UV_MAX % 10;
587 int numtype = 0;
588 int sawinf = 0;
aa8b85de 589 int sawnan = 0;
60939fb8 590
7918f24d
NC
591 PERL_ARGS_ASSERT_GROK_NUMBER;
592
60939fb8
NC
593 while (s < send && isSPACE(*s))
594 s++;
595 if (s == send) {
596 return 0;
597 } else if (*s == '-') {
598 s++;
599 numtype = IS_NUMBER_NEG;
600 }
601 else if (*s == '+')
602 s++;
603
604 if (s == send)
605 return 0;
606
607 /* next must be digit or the radix separator or beginning of infinity */
608 if (isDIGIT(*s)) {
609 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
610 overflow. */
611 UV value = *s - '0';
612 /* This construction seems to be more optimiser friendly.
613 (without it gcc does the isDIGIT test and the *s - '0' separately)
614 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
615 In theory the optimiser could deduce how far to unroll the loop
616 before checking for overflow. */
58bb9ec3
NC
617 if (++s < send) {
618 int digit = *s - '0';
60939fb8
NC
619 if (digit >= 0 && digit <= 9) {
620 value = value * 10 + digit;
58bb9ec3
NC
621 if (++s < send) {
622 digit = *s - '0';
60939fb8
NC
623 if (digit >= 0 && digit <= 9) {
624 value = value * 10 + digit;
58bb9ec3
NC
625 if (++s < send) {
626 digit = *s - '0';
60939fb8
NC
627 if (digit >= 0 && digit <= 9) {
628 value = value * 10 + digit;
58bb9ec3
NC
629 if (++s < send) {
630 digit = *s - '0';
60939fb8
NC
631 if (digit >= 0 && digit <= 9) {
632 value = value * 10 + digit;
58bb9ec3
NC
633 if (++s < send) {
634 digit = *s - '0';
60939fb8
NC
635 if (digit >= 0 && digit <= 9) {
636 value = value * 10 + digit;
58bb9ec3
NC
637 if (++s < send) {
638 digit = *s - '0';
60939fb8
NC
639 if (digit >= 0 && digit <= 9) {
640 value = value * 10 + digit;
58bb9ec3
NC
641 if (++s < send) {
642 digit = *s - '0';
60939fb8
NC
643 if (digit >= 0 && digit <= 9) {
644 value = value * 10 + digit;
58bb9ec3
NC
645 if (++s < send) {
646 digit = *s - '0';
60939fb8
NC
647 if (digit >= 0 && digit <= 9) {
648 value = value * 10 + digit;
58bb9ec3 649 if (++s < send) {
60939fb8
NC
650 /* Now got 9 digits, so need to check
651 each time for overflow. */
58bb9ec3 652 digit = *s - '0';
60939fb8
NC
653 while (digit >= 0 && digit <= 9
654 && (value < max_div_10
655 || (value == max_div_10
656 && digit <= max_mod_10))) {
657 value = value * 10 + digit;
58bb9ec3
NC
658 if (++s < send)
659 digit = *s - '0';
60939fb8
NC
660 else
661 break;
662 }
663 if (digit >= 0 && digit <= 9
51bd16da 664 && (s < send)) {
60939fb8
NC
665 /* value overflowed.
666 skip the remaining digits, don't
667 worry about setting *valuep. */
668 do {
669 s++;
670 } while (s < send && isDIGIT(*s));
671 numtype |=
672 IS_NUMBER_GREATER_THAN_UV_MAX;
673 goto skip_value;
674 }
675 }
676 }
98994639 677 }
60939fb8
NC
678 }
679 }
680 }
681 }
682 }
683 }
684 }
685 }
686 }
687 }
688 }
98994639 689 }
60939fb8 690 }
98994639 691 }
60939fb8
NC
692 numtype |= IS_NUMBER_IN_UV;
693 if (valuep)
694 *valuep = value;
695
696 skip_value:
697 if (GROK_NUMERIC_RADIX(&s, send)) {
698 numtype |= IS_NUMBER_NOT_INT;
699 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
700 s++;
98994639 701 }
60939fb8
NC
702 }
703 else if (GROK_NUMERIC_RADIX(&s, send)) {
704 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
705 /* no digits before the radix means we need digits after it */
706 if (s < send && isDIGIT(*s)) {
707 do {
708 s++;
709 } while (s < send && isDIGIT(*s));
710 if (valuep) {
711 /* integer approximation is valid - it's 0. */
712 *valuep = 0;
713 }
98994639 714 }
60939fb8
NC
715 else
716 return 0;
717 } else if (*s == 'I' || *s == 'i') {
718 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
719 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
720 s++; if (s < send && (*s == 'I' || *s == 'i')) {
721 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
722 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
723 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
724 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
725 s++;
98994639 726 }
60939fb8 727 sawinf = 1;
aa8b85de
JH
728 } else if (*s == 'N' || *s == 'n') {
729 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
730 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
731 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
732 s++;
733 sawnan = 1;
734 } else
98994639 735 return 0;
60939fb8
NC
736
737 if (sawinf) {
738 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
739 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
aa8b85de
JH
740 } else if (sawnan) {
741 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
742 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
60939fb8
NC
743 } else if (s < send) {
744 /* we can have an optional exponent part */
745 if (*s == 'e' || *s == 'E') {
746 /* The only flag we keep is sign. Blow away any "it's UV" */
747 numtype &= IS_NUMBER_NEG;
748 numtype |= IS_NUMBER_NOT_INT;
749 s++;
750 if (s < send && (*s == '-' || *s == '+'))
751 s++;
752 if (s < send && isDIGIT(*s)) {
753 do {
754 s++;
755 } while (s < send && isDIGIT(*s));
756 }
757 else
758 return 0;
759 }
760 }
761 while (s < send && isSPACE(*s))
762 s++;
763 if (s >= send)
aa8b85de 764 return numtype;
60939fb8
NC
765 if (len == 10 && memEQ(pv, "0 but true", 10)) {
766 if (valuep)
767 *valuep = 0;
768 return IS_NUMBER_IN_UV;
769 }
770 return 0;
98994639
HS
771}
772
4801ca72 773STATIC NV
98994639
HS
774S_mulexp10(NV value, I32 exponent)
775{
776 NV result = 1.0;
777 NV power = 10.0;
778 bool negative = 0;
779 I32 bit;
780
781 if (exponent == 0)
782 return value;
20f6aaab 783 if (value == 0)
66a1b24b 784 return (NV)0;
87032ba1 785
24866caa 786 /* On OpenVMS VAX we by default use the D_FLOAT double format,
67597c89 787 * and that format does not have *easy* capabilities [1] for
24866caa
CB
788 * overflowing doubles 'silently' as IEEE fp does. We also need
789 * to support G_FLOAT on both VAX and Alpha, and though the exponent
790 * range is much larger than D_FLOAT it still doesn't do silent
791 * overflow. Therefore we need to detect early whether we would
792 * overflow (this is the behaviour of the native string-to-float
793 * conversion routines, and therefore of native applications, too).
67597c89 794 *
24866caa
CB
795 * [1] Trying to establish a condition handler to trap floating point
796 * exceptions is not a good idea. */
87032ba1
JH
797
798 /* In UNICOS and in certain Cray models (such as T90) there is no
799 * IEEE fp, and no way at all from C to catch fp overflows gracefully.
800 * There is something you can do if you are willing to use some
801 * inline assembler: the instruction is called DFI-- but that will
802 * disable *all* floating point interrupts, a little bit too large
803 * a hammer. Therefore we need to catch potential overflows before
804 * it's too late. */
353813d9 805
85bba25f 806#if ((defined(VMS) && !defined(_IEEE_FP)) || defined(_UNICOS)) && defined(NV_MAX_10_EXP)
353813d9 807 STMT_START {
c4420975 808 const NV exp_v = log10(value);
353813d9
HS
809 if (exponent >= NV_MAX_10_EXP || exponent + exp_v >= NV_MAX_10_EXP)
810 return NV_MAX;
811 if (exponent < 0) {
812 if (-(exponent + exp_v) >= NV_MAX_10_EXP)
813 return 0.0;
814 while (-exponent >= NV_MAX_10_EXP) {
815 /* combination does not overflow, but 10^(-exponent) does */
816 value /= 10;
817 ++exponent;
818 }
819 }
820 } STMT_END;
87032ba1
JH
821#endif
822
353813d9
HS
823 if (exponent < 0) {
824 negative = 1;
825 exponent = -exponent;
826 }
98994639
HS
827 for (bit = 1; exponent; bit <<= 1) {
828 if (exponent & bit) {
829 exponent ^= bit;
830 result *= power;
236f0012
CB
831 /* Floating point exceptions are supposed to be turned off,
832 * but if we're obviously done, don't risk another iteration.
833 */
834 if (exponent == 0) break;
98994639
HS
835 }
836 power *= power;
837 }
838 return negative ? value / result : value * result;
839}
840
841NV
842Perl_my_atof(pTHX_ const char* s)
843{
844 NV x = 0.0;
845#ifdef USE_LOCALE_NUMERIC
97aff369 846 dVAR;
7918f24d
NC
847
848 PERL_ARGS_ASSERT_MY_ATOF;
849
78787052 850 if (PL_numeric_local && PL_numeric_radix_sv && IN_SOME_LOCALE_FORM) {
07b2a6c8 851 const char *standard = NULL, *local = NULL;
78787052 852 bool use_standard_radix;
98994639 853
f1c982a2
JL
854 /* Look through the string for the first thing that looks like a
855 * decimal point: either the value in the current locale or the
856 * standard fallback of '.'. The one which appears earliest in the
857 * input string is the one that we should have atof look for. Note that
858 * we have to determine this beforehand because on some systems,
859 * Perl_atof2 is just a wrapper around the system's atof. */
78787052
JL
860 standard = strchr(s, '.');
861 local = strstr(s, SvPV_nolen(PL_numeric_radix_sv));
862
863 use_standard_radix = standard && (!local || standard < local);
864
865 if (use_standard_radix)
866 SET_NUMERIC_STANDARD();
867
868 Perl_atof2(s, x);
869
870 if (use_standard_radix)
871 SET_NUMERIC_LOCAL();
98994639
HS
872 }
873 else
a36244b7 874 Perl_atof2(s, x);
98994639 875#else
a36244b7 876 Perl_atof2(s, x);
98994639
HS
877#endif
878 return x;
879}
880
881char*
882Perl_my_atof2(pTHX_ const char* orig, NV* value)
883{
20f6aaab 884 NV result[3] = {0.0, 0.0, 0.0};
e1ec3a88 885 const char* s = orig;
a36244b7 886#ifdef USE_PERL_ATOF
20f6aaab 887 UV accumulator[2] = {0,0}; /* before/after dp */
a36244b7 888 bool negative = 0;
e1ec3a88 889 const char* send = s + strlen(orig) - 1;
8194bf88 890 bool seen_digit = 0;
20f6aaab
AS
891 I32 exp_adjust[2] = {0,0};
892 I32 exp_acc[2] = {-1, -1};
893 /* the current exponent adjust for the accumulators */
98994639 894 I32 exponent = 0;
8194bf88 895 I32 seen_dp = 0;
20f6aaab
AS
896 I32 digit = 0;
897 I32 old_digit = 0;
8194bf88
DM
898 I32 sig_digits = 0; /* noof significant digits seen so far */
899
7918f24d
NC
900 PERL_ARGS_ASSERT_MY_ATOF2;
901
8194bf88
DM
902/* There is no point in processing more significant digits
903 * than the NV can hold. Note that NV_DIG is a lower-bound value,
904 * while we need an upper-bound value. We add 2 to account for this;
905 * since it will have been conservative on both the first and last digit.
906 * For example a 32-bit mantissa with an exponent of 4 would have
907 * exact values in the set
908 * 4
909 * 8
910 * ..
911 * 17179869172
912 * 17179869176
913 * 17179869180
914 *
915 * where for the purposes of calculating NV_DIG we would have to discount
916 * both the first and last digit, since neither can hold all values from
917 * 0..9; but for calculating the value we must examine those two digits.
918 */
ffa277e5
AS
919#ifdef MAX_SIG_DIG_PLUS
920 /* It is not necessarily the case that adding 2 to NV_DIG gets all the
921 possible digits in a NV, especially if NVs are not IEEE compliant
922 (e.g., long doubles on IRIX) - Allen <allens@cpan.org> */
923# define MAX_SIG_DIGITS (NV_DIG+MAX_SIG_DIG_PLUS)
924#else
925# define MAX_SIG_DIGITS (NV_DIG+2)
926#endif
8194bf88
DM
927
928/* the max number we can accumulate in a UV, and still safely do 10*N+9 */
929#define MAX_ACCUMULATE ( (UV) ((UV_MAX - 9)/10))
98994639 930
96a05aee
HS
931 /* leading whitespace */
932 while (isSPACE(*s))
933 ++s;
934
98994639
HS
935 /* sign */
936 switch (*s) {
937 case '-':
938 negative = 1;
939 /* fall through */
940 case '+':
941 ++s;
942 }
943
2b54f59f
YST
944 /* punt to strtod for NaN/Inf; if no support for it there, tough luck */
945
946#ifdef HAS_STRTOD
947 if (*s == 'n' || *s == 'N' || *s == 'i' || *s == 'I') {
c042ae3a 948 const char *p = negative ? s - 1 : s;
2b54f59f
YST
949 char *endp;
950 NV rslt;
951 rslt = strtod(p, &endp);
952 if (endp != p) {
953 *value = rslt;
954 return (char *)endp;
955 }
956 }
957#endif
958
8194bf88
DM
959 /* we accumulate digits into an integer; when this becomes too
960 * large, we add the total to NV and start again */
98994639 961
8194bf88
DM
962 while (1) {
963 if (isDIGIT(*s)) {
964 seen_digit = 1;
20f6aaab 965 old_digit = digit;
8194bf88 966 digit = *s++ - '0';
20f6aaab
AS
967 if (seen_dp)
968 exp_adjust[1]++;
98994639 969
8194bf88
DM
970 /* don't start counting until we see the first significant
971 * digit, eg the 5 in 0.00005... */
972 if (!sig_digits && digit == 0)
973 continue;
974
975 if (++sig_digits > MAX_SIG_DIGITS) {
98994639 976 /* limits of precision reached */
20f6aaab
AS
977 if (digit > 5) {
978 ++accumulator[seen_dp];
979 } else if (digit == 5) {
980 if (old_digit % 2) { /* round to even - Allen */
981 ++accumulator[seen_dp];
982 }
983 }
984 if (seen_dp) {
985 exp_adjust[1]--;
986 } else {
987 exp_adjust[0]++;
988 }
8194bf88 989 /* skip remaining digits */
98994639 990 while (isDIGIT(*s)) {
98994639 991 ++s;
20f6aaab
AS
992 if (! seen_dp) {
993 exp_adjust[0]++;
994 }
98994639
HS
995 }
996 /* warn of loss of precision? */
98994639 997 }
8194bf88 998 else {
20f6aaab 999 if (accumulator[seen_dp] > MAX_ACCUMULATE) {
8194bf88 1000 /* add accumulator to result and start again */
20f6aaab
AS
1001 result[seen_dp] = S_mulexp10(result[seen_dp],
1002 exp_acc[seen_dp])
1003 + (NV)accumulator[seen_dp];
1004 accumulator[seen_dp] = 0;
1005 exp_acc[seen_dp] = 0;
98994639 1006 }
20f6aaab
AS
1007 accumulator[seen_dp] = accumulator[seen_dp] * 10 + digit;
1008 ++exp_acc[seen_dp];
98994639 1009 }
8194bf88 1010 }
e1ec3a88 1011 else if (!seen_dp && GROK_NUMERIC_RADIX(&s, send)) {
8194bf88 1012 seen_dp = 1;
20f6aaab 1013 if (sig_digits > MAX_SIG_DIGITS) {
c86f7df5 1014 do {
20f6aaab 1015 ++s;
c86f7df5 1016 } while (isDIGIT(*s));
20f6aaab
AS
1017 break;
1018 }
8194bf88
DM
1019 }
1020 else {
1021 break;
98994639
HS
1022 }
1023 }
1024
20f6aaab
AS
1025 result[0] = S_mulexp10(result[0], exp_acc[0]) + (NV)accumulator[0];
1026 if (seen_dp) {
1027 result[1] = S_mulexp10(result[1], exp_acc[1]) + (NV)accumulator[1];
1028 }
98994639 1029
8194bf88 1030 if (seen_digit && (*s == 'e' || *s == 'E')) {
98994639
HS
1031 bool expnegative = 0;
1032
1033 ++s;
1034 switch (*s) {
1035 case '-':
1036 expnegative = 1;
1037 /* fall through */
1038 case '+':
1039 ++s;
1040 }
1041 while (isDIGIT(*s))
1042 exponent = exponent * 10 + (*s++ - '0');
1043 if (expnegative)
1044 exponent = -exponent;
1045 }
1046
20f6aaab
AS
1047
1048
98994639 1049 /* now apply the exponent */
20f6aaab
AS
1050
1051 if (seen_dp) {
1052 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0])
1053 + S_mulexp10(result[1],exponent-exp_adjust[1]);
1054 } else {
1055 result[2] = S_mulexp10(result[0],exponent+exp_adjust[0]);
1056 }
98994639
HS
1057
1058 /* now apply the sign */
1059 if (negative)
20f6aaab 1060 result[2] = -result[2];
a36244b7 1061#endif /* USE_PERL_ATOF */
20f6aaab 1062 *value = result[2];
73d840c0 1063 return (char *)s;
98994639
HS
1064}
1065
55954f19
JH
1066#if ! defined(HAS_MODFL) && defined(HAS_AINTL) && defined(HAS_COPYSIGNL)
1067long double
1068Perl_my_modfl(long double x, long double *ip)
1069{
1070 *ip = aintl(x);
1071 return (x == *ip ? copysignl(0.0L, x) : x - *ip);
1072}
1073#endif
1074
1075#if ! defined(HAS_FREXPL) && defined(HAS_ILOGBL) && defined(HAS_SCALBNL)
1076long double
1077Perl_my_frexpl(long double x, int *e) {
1078 *e = x == 0.0L ? 0 : ilogbl(x) + 1;
1079 return (scalbnl(x, -*e));
1080}
1081#endif
66610fdd
RGS
1082
1083/*
ed140128
AD
1084=for apidoc Perl_signbit
1085
1086Return a non-zero integer if the sign bit on an NV is set, and 0 if
1087it is not.
1088
1089If Configure detects this system has a signbit() that will work with
1090our NVs, then we just use it via the #define in perl.h. Otherwise,
1091fall back on this implementation. As a first pass, this gets everything
1092right except -0.0. Alas, catching -0.0 is the main use for this function,
1093so this is not too helpful yet. Still, at least we have the scaffolding
1094in place to support other systems, should that prove useful.
1095
1096
1097Configure notes: This function is called 'Perl_signbit' instead of a
1098plain 'signbit' because it is easy to imagine a system having a signbit()
1099function or macro that doesn't happen to work with our particular choice
1100of NVs. We shouldn't just re-#define signbit as Perl_signbit and expect
1101the standard system headers to be happy. Also, this is a no-context
1102function (no pTHX_) because Perl_signbit() is usually re-#defined in
1103perl.h as a simple macro call to the system's signbit().
1104Users should just always call Perl_signbit().
1105
1106=cut
1107*/
1108#if !defined(HAS_SIGNBIT)
1109int
1110Perl_signbit(NV x) {
1111 return (x < 0.0) ? 1 : 0;
1112}
1113#endif
1114
1115/*
66610fdd
RGS
1116 * Local variables:
1117 * c-indentation-style: bsd
1118 * c-basic-offset: 4
14d04a33 1119 * indent-tabs-mode: nil
66610fdd
RGS
1120 * End:
1121 *
14d04a33 1122 * ex: set ts=8 sts=4 sw=4 et:
37442d52 1123 */