1 ################################################################################
3 ## Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
4 ## Version 2.x, Copyright (C) 2001, Paul Marquess.
5 ## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
7 ## This program is free software; you can redistribute it and/or
8 ## modify it under the same terms as Perl itself.
10 ################################################################################
23 __UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling)
24 __UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)
25 __UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)
26 __UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
28 __UNDEFINED__ IS_NUMBER_IN_UV 0x01
29 __UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02
30 __UNDEFINED__ IS_NUMBER_NOT_INT 0x04
31 __UNDEFINED__ IS_NUMBER_NEG 0x08
32 __UNDEFINED__ IS_NUMBER_INFINITY 0x10
33 __UNDEFINED__ IS_NUMBER_NAN 0x20
35 __UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
37 __UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02
38 __UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04
39 __UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01
40 __UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02
42 #ifndef grok_numeric_radix
43 #if { NEED grok_numeric_radix }
45 grok_numeric_radix(pTHX_ const char **sp, const char *send)
47 #ifdef USE_LOCALE_NUMERIC
48 #ifdef PL_numeric_radix_sv
49 if (PL_numeric_radix_sv && IN_LOCALE) {
51 char* radix = SvPV(PL_numeric_radix_sv, len);
52 if (*sp + len <= send && memEQ(*sp, radix, len)) {
58 /* older perls don't have PL_numeric_radix_sv so the radix
59 * must manually be requested from locale.h
62 dTHR; /* needed for older threaded perls */
63 struct lconv *lc = localeconv();
64 char *radix = lc->decimal_point;
65 if (radix && IN_LOCALE) {
66 STRLEN len = strlen(radix);
67 if (*sp + len <= send && memEQ(*sp, radix, len)) {
73 #endif /* USE_LOCALE_NUMERIC */
74 /* always try "." if numeric radix didn't match because
75 * we may have data from different locales mixed */
76 if (*sp < send && **sp == '.') {
86 #if { NEED grok_number }
88 grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
91 const char *send = pv + len;
92 const UV max_div_10 = UV_MAX / 10;
93 const char max_mod_10 = UV_MAX % 10;
98 while (s < send && isSPACE(*s))
102 } else if (*s == '-') {
104 numtype = IS_NUMBER_NEG;
112 /* next must be digit or the radix separator or beginning of infinity */
114 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
117 /* This construction seems to be more optimiser friendly.
118 (without it gcc does the isDIGIT test and the *s - '0' separately)
119 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
120 In theory the optimiser could deduce how far to unroll the loop
121 before checking for overflow. */
123 int digit = *s - '0';
124 if (digit >= 0 && digit <= 9) {
125 value = value * 10 + digit;
128 if (digit >= 0 && digit <= 9) {
129 value = value * 10 + digit;
132 if (digit >= 0 && digit <= 9) {
133 value = value * 10 + digit;
136 if (digit >= 0 && digit <= 9) {
137 value = value * 10 + digit;
140 if (digit >= 0 && digit <= 9) {
141 value = value * 10 + digit;
144 if (digit >= 0 && digit <= 9) {
145 value = value * 10 + digit;
148 if (digit >= 0 && digit <= 9) {
149 value = value * 10 + digit;
152 if (digit >= 0 && digit <= 9) {
153 value = value * 10 + digit;
155 /* Now got 9 digits, so need to check
156 each time for overflow. */
158 while (digit >= 0 && digit <= 9
159 && (value < max_div_10
160 || (value == max_div_10
161 && digit <= max_mod_10))) {
162 value = value * 10 + digit;
168 if (digit >= 0 && digit <= 9
171 skip the remaining digits, don't
172 worry about setting *valuep. */
175 } while (s < send && isDIGIT(*s));
177 IS_NUMBER_GREATER_THAN_UV_MAX;
197 numtype |= IS_NUMBER_IN_UV;
202 if (GROK_NUMERIC_RADIX(&s, send)) {
203 numtype |= IS_NUMBER_NOT_INT;
204 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
208 else if (GROK_NUMERIC_RADIX(&s, send)) {
209 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
210 /* no digits before the radix means we need digits after it */
211 if (s < send && isDIGIT(*s)) {
214 } while (s < send && isDIGIT(*s));
216 /* integer approximation is valid - it's 0. */
222 } else if (*s == 'I' || *s == 'i') {
223 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
224 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
225 s++; if (s < send && (*s == 'I' || *s == 'i')) {
226 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
227 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
228 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
229 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
233 } else if (*s == 'N' || *s == 'n') {
234 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
235 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
236 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
243 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
244 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
246 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
247 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
248 } else if (s < send) {
249 /* we can have an optional exponent part */
250 if (*s == 'e' || *s == 'E') {
251 /* The only flag we keep is sign. Blow away any "it's UV" */
252 numtype &= IS_NUMBER_NEG;
253 numtype |= IS_NUMBER_NOT_INT;
255 if (s < send && (*s == '-' || *s == '+'))
257 if (s < send && isDIGIT(*s)) {
260 } while (s < send && isDIGIT(*s));
266 while (s < send && isSPACE(*s))
270 if (len == 10 && memEQ(pv, "0 but true", 10)) {
273 return IS_NUMBER_IN_UV;
281 * The grok_* routines have been modified to use warn() instead of
282 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
283 * which is why the stack variable has been renamed to 'xdigit'.
287 #if { NEED grok_bin }
289 grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
291 const char *s = start;
296 const UV max_div_2 = UV_MAX / 2;
297 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
298 bool overflowed = FALSE;
300 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
301 /* strip off leading b or 0b.
302 for compatibility silently suffer "b" and "0b" as valid binary
309 else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
316 for (; len-- && *s; s++) {
318 if (bit == '0' || bit == '1') {
319 /* Write it in this wonky order with a goto to attempt to get the
320 compiler to make the common case integer-only loop pretty tight.
321 With gcc seems to be much straighter code than old scan_bin. */
324 if (value <= max_div_2) {
325 value = (value << 1) | (bit - '0');
328 /* Bah. We're just overflowed. */
329 warn("Integer overflow in binary number");
331 value_nv = (NV) value;
334 /* If an NV has not enough bits in its mantissa to
335 * represent a UV this summing of small low-order numbers
336 * is a waste of time (because the NV cannot preserve
337 * the low-order bits anyway): we could just remember when
338 * did we overflow and in the end just multiply value_nv by the
340 value_nv += (NV)(bit - '0');
343 if (bit == '_' && len && allow_underscores && (bit = s[1])
344 && (bit == '0' || bit == '1'))
350 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
351 warn("Illegal binary digit '%c' ignored", *s);
355 if ( ( overflowed && value_nv > 4294967295.0)
357 || (!overflowed && value > 0xffffffff )
360 warn("Binary number > 0b11111111111111111111111111111111 non-portable");
367 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
376 #if { NEED grok_hex }
378 grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
380 const char *s = start;
385 const UV max_div_16 = UV_MAX / 16;
386 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
387 bool overflowed = FALSE;
390 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
391 /* strip off leading x or 0x.
392 for compatibility silently suffer "x" and "0x" as valid hex numbers.
399 else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
406 for (; len-- && *s; s++) {
407 xdigit = strchr((char *) PL_hexdigit, *s);
409 /* Write it in this wonky order with a goto to attempt to get the
410 compiler to make the common case integer-only loop pretty tight.
411 With gcc seems to be much straighter code than old scan_hex. */
414 if (value <= max_div_16) {
415 value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
418 warn("Integer overflow in hexadecimal number");
420 value_nv = (NV) value;
423 /* If an NV has not enough bits in its mantissa to
424 * represent a UV this summing of small low-order numbers
425 * is a waste of time (because the NV cannot preserve
426 * the low-order bits anyway): we could just remember when
427 * did we overflow and in the end just multiply value_nv by the
428 * right amount of 16-tuples. */
429 value_nv += (NV)((xdigit - PL_hexdigit) & 15);
432 if (*s == '_' && len && allow_underscores && s[1]
433 && (xdigit = strchr((char *) PL_hexdigit, s[1])))
439 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
440 warn("Illegal hexadecimal digit '%c' ignored", *s);
444 if ( ( overflowed && value_nv > 4294967295.0)
446 || (!overflowed && value > 0xffffffff )
449 warn("Hexadecimal number > 0xffffffff non-portable");
456 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
465 #if { NEED grok_oct }
467 grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
469 const char *s = start;
474 const UV max_div_8 = UV_MAX / 8;
475 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
476 bool overflowed = FALSE;
478 for (; len-- && *s; s++) {
479 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
480 out front allows slicker code. */
481 int digit = *s - '0';
482 if (digit >= 0 && digit <= 7) {
483 /* Write it in this wonky order with a goto to attempt to get the
484 compiler to make the common case integer-only loop pretty tight.
488 if (value <= max_div_8) {
489 value = (value << 3) | digit;
492 /* Bah. We're just overflowed. */
493 warn("Integer overflow in octal number");
495 value_nv = (NV) value;
498 /* If an NV has not enough bits in its mantissa to
499 * represent a UV this summing of small low-order numbers
500 * is a waste of time (because the NV cannot preserve
501 * the low-order bits anyway): we could just remember when
502 * did we overflow and in the end just multiply value_nv by the
503 * right amount of 8-tuples. */
504 value_nv += (NV)digit;
507 if (digit == ('_' - '0') && len && allow_underscores
508 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
514 /* Allow \octal to work the DWIM way (that is, stop scanning
515 * as soon as non-octal characters are seen, complain only iff
516 * someone seems to want to use the digits eight and nine). */
517 if (digit == 8 || digit == 9) {
518 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
519 warn("Illegal octal digit '%c' ignored", *s);
524 if ( ( overflowed && value_nv > 4294967295.0)
526 || (!overflowed && value > 0xffffffff )
529 warn("Octal number > 037777777777 non-portable");
536 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
546 #define NEED_grok_number
547 #define NEED_grok_numeric_radix
548 #define NEED_grok_bin
549 #define NEED_grok_hex
550 #define NEED_grok_oct
561 pv = SvPV(string, len);
562 if (!grok_number(pv, len, &RETVAL))
575 pv = SvPV(string, len);
576 RETVAL = grok_bin(pv, &len, &flags, NULL);
588 pv = SvPV(string, len);
589 RETVAL = grok_hex(pv, &len, &flags, NULL);
601 pv = SvPV(string, len);
602 RETVAL = grok_oct(pv, &len, &flags, NULL);
607 Perl_grok_number(string)
613 pv = SvPV(string, len);
614 if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
620 Perl_grok_bin(string)
627 pv = SvPV(string, len);
628 RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
633 Perl_grok_hex(string)
640 pv = SvPV(string, len);
641 RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
646 Perl_grok_oct(string)
653 pv = SvPV(string, len);
654 RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
660 is(&Devel::PPPort::grok_number("42"), 42);
661 ok(!defined(&Devel::PPPort::grok_number("A")));
662 is(&Devel::PPPort::grok_bin("10000001"), 129);
663 is(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
664 is(&Devel::PPPort::grok_oct("377"), 255);
666 is(&Devel::PPPort::Perl_grok_number("42"), 42);
667 ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
668 is(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
669 is(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
670 is(&Devel::PPPort::Perl_grok_oct("377"), 255);