This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
parts/inc/uv: Add Replace: pragma
[perl5.git] / dist / Devel-PPPort / parts / inc / grok
CommitLineData
adfe19db
MHM
1################################################################################
2##
b2049988 3## Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
adfe19db
MHM
4## Version 2.x, Copyright (C) 2001, Paul Marquess.
5## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
6##
7## This program is free software; you can redistribute it and/or
8## modify it under the same terms as Perl itself.
9##
10################################################################################
11
12=provides
13
14grok_hex
15grok_oct
16grok_bin
17grok_numeric_radix
18grok_number
19__UNDEFINED__
20
21=implementation
22
23__UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling)
24__UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)
25__UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)
26__UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
27
28__UNDEFINED__ IS_NUMBER_IN_UV 0x01
29__UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02
30__UNDEFINED__ IS_NUMBER_NOT_INT 0x04
31__UNDEFINED__ IS_NUMBER_NEG 0x08
32__UNDEFINED__ IS_NUMBER_INFINITY 0x10
33__UNDEFINED__ IS_NUMBER_NAN 0x20
34
adfe19db
MHM
35__UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
36
37__UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02
38__UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04
39__UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01
40__UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02
41
42#ifndef grok_numeric_radix
43#if { NEED grok_numeric_radix }
44bool
45grok_numeric_radix(pTHX_ const char **sp, const char *send)
46{
47#ifdef USE_LOCALE_NUMERIC
48#ifdef PL_numeric_radix_sv
4a582685 49 if (PL_numeric_radix_sv && IN_LOCALE) {
adfe19db
MHM
50 STRLEN len;
51 char* radix = SvPV(PL_numeric_radix_sv, len);
52 if (*sp + len <= send && memEQ(*sp, radix, len)) {
53 *sp += len;
4a582685 54 return TRUE;
adfe19db
MHM
55 }
56 }
57#else
58 /* older perls don't have PL_numeric_radix_sv so the radix
59 * must manually be requested from locale.h
60 */
61#include <locale.h>
62 dTHR; /* needed for older threaded perls */
63 struct lconv *lc = localeconv();
64 char *radix = lc->decimal_point;
4a582685 65 if (radix && IN_LOCALE) {
adfe19db
MHM
66 STRLEN len = strlen(radix);
67 if (*sp + len <= send && memEQ(*sp, radix, len)) {
68 *sp += len;
4a582685 69 return TRUE;
adfe19db
MHM
70 }
71 }
0d0f8426 72#endif
adfe19db
MHM
73#endif /* USE_LOCALE_NUMERIC */
74 /* always try "." if numeric radix didn't match because
75 * we may have data from different locales mixed */
76 if (*sp < send && **sp == '.') {
77 ++*sp;
78 return TRUE;
79 }
80 return FALSE;
81}
82#endif
83#endif
84
adfe19db
MHM
85#ifndef grok_number
86#if { NEED grok_number }
87int
88grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
89{
90 const char *s = pv;
91 const char *send = pv + len;
92 const UV max_div_10 = UV_MAX / 10;
93 const char max_mod_10 = UV_MAX % 10;
94 int numtype = 0;
95 int sawinf = 0;
96 int sawnan = 0;
97
98 while (s < send && isSPACE(*s))
99 s++;
100 if (s == send) {
101 return 0;
102 } else if (*s == '-') {
103 s++;
104 numtype = IS_NUMBER_NEG;
105 }
106 else if (*s == '+')
107 s++;
108
109 if (s == send)
110 return 0;
111
112 /* next must be digit or the radix separator or beginning of infinity */
113 if (isDIGIT(*s)) {
114 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
115 overflow. */
116 UV value = *s - '0';
117 /* This construction seems to be more optimiser friendly.
118 (without it gcc does the isDIGIT test and the *s - '0' separately)
119 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
120 In theory the optimiser could deduce how far to unroll the loop
121 before checking for overflow. */
122 if (++s < send) {
123 int digit = *s - '0';
124 if (digit >= 0 && digit <= 9) {
125 value = value * 10 + digit;
126 if (++s < send) {
127 digit = *s - '0';
128 if (digit >= 0 && digit <= 9) {
129 value = value * 10 + digit;
130 if (++s < send) {
131 digit = *s - '0';
132 if (digit >= 0 && digit <= 9) {
133 value = value * 10 + digit;
b2049988 134 if (++s < send) {
adfe19db
MHM
135 digit = *s - '0';
136 if (digit >= 0 && digit <= 9) {
137 value = value * 10 + digit;
138 if (++s < send) {
139 digit = *s - '0';
140 if (digit >= 0 && digit <= 9) {
141 value = value * 10 + digit;
142 if (++s < send) {
143 digit = *s - '0';
144 if (digit >= 0 && digit <= 9) {
145 value = value * 10 + digit;
146 if (++s < send) {
147 digit = *s - '0';
148 if (digit >= 0 && digit <= 9) {
149 value = value * 10 + digit;
150 if (++s < send) {
151 digit = *s - '0';
152 if (digit >= 0 && digit <= 9) {
153 value = value * 10 + digit;
154 if (++s < send) {
155 /* Now got 9 digits, so need to check
156 each time for overflow. */
157 digit = *s - '0';
158 while (digit >= 0 && digit <= 9
159 && (value < max_div_10
160 || (value == max_div_10
161 && digit <= max_mod_10))) {
162 value = value * 10 + digit;
163 if (++s < send)
164 digit = *s - '0';
165 else
166 break;
167 }
168 if (digit >= 0 && digit <= 9
169 && (s < send)) {
170 /* value overflowed.
171 skip the remaining digits, don't
172 worry about setting *valuep. */
173 do {
174 s++;
175 } while (s < send && isDIGIT(*s));
176 numtype |=
177 IS_NUMBER_GREATER_THAN_UV_MAX;
178 goto skip_value;
179 }
180 }
181 }
b2049988 182 }
adfe19db
MHM
183 }
184 }
185 }
186 }
187 }
188 }
189 }
190 }
191 }
192 }
193 }
b2049988 194 }
adfe19db
MHM
195 }
196 }
197 numtype |= IS_NUMBER_IN_UV;
198 if (valuep)
199 *valuep = value;
200
201 skip_value:
202 if (GROK_NUMERIC_RADIX(&s, send)) {
203 numtype |= IS_NUMBER_NOT_INT;
204 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
205 s++;
206 }
207 }
208 else if (GROK_NUMERIC_RADIX(&s, send)) {
209 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
210 /* no digits before the radix means we need digits after it */
211 if (s < send && isDIGIT(*s)) {
212 do {
213 s++;
214 } while (s < send && isDIGIT(*s));
215 if (valuep) {
216 /* integer approximation is valid - it's 0. */
217 *valuep = 0;
218 }
219 }
220 else
221 return 0;
222 } else if (*s == 'I' || *s == 'i') {
223 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
224 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
225 s++; if (s < send && (*s == 'I' || *s == 'i')) {
226 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
227 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
228 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
229 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
230 s++;
231 }
232 sawinf = 1;
233 } else if (*s == 'N' || *s == 'n') {
234 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
235 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
236 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
237 s++;
238 sawnan = 1;
239 } else
240 return 0;
241
242 if (sawinf) {
243 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
244 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
245 } else if (sawnan) {
246 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
247 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
248 } else if (s < send) {
249 /* we can have an optional exponent part */
250 if (*s == 'e' || *s == 'E') {
251 /* The only flag we keep is sign. Blow away any "it's UV" */
252 numtype &= IS_NUMBER_NEG;
253 numtype |= IS_NUMBER_NOT_INT;
254 s++;
255 if (s < send && (*s == '-' || *s == '+'))
256 s++;
257 if (s < send && isDIGIT(*s)) {
258 do {
259 s++;
260 } while (s < send && isDIGIT(*s));
261 }
262 else
263 return 0;
264 }
265 }
266 while (s < send && isSPACE(*s))
267 s++;
268 if (s >= send)
269 return numtype;
270 if (len == 10 && memEQ(pv, "0 but true", 10)) {
271 if (valuep)
272 *valuep = 0;
273 return IS_NUMBER_IN_UV;
274 }
275 return 0;
276}
277#endif
278#endif
279
280/*
281 * The grok_* routines have been modified to use warn() instead of
282 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
283 * which is why the stack variable has been renamed to 'xdigit'.
284 */
285
286#ifndef grok_bin
287#if { NEED grok_bin }
288UV
aab9a3b6 289grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
adfe19db
MHM
290{
291 const char *s = start;
292 STRLEN len = *len_p;
293 UV value = 0;
294 NV value_nv = 0;
295
296 const UV max_div_2 = UV_MAX / 2;
297 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
298 bool overflowed = FALSE;
299
300 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
301 /* strip off leading b or 0b.
302 for compatibility silently suffer "b" and "0b" as valid binary
303 numbers. */
304 if (len >= 1) {
305 if (s[0] == 'b') {
306 s++;
307 len--;
308 }
309 else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
310 s+=2;
311 len-=2;
312 }
313 }
314 }
315
316 for (; len-- && *s; s++) {
317 char bit = *s;
318 if (bit == '0' || bit == '1') {
319 /* Write it in this wonky order with a goto to attempt to get the
320 compiler to make the common case integer-only loop pretty tight.
321 With gcc seems to be much straighter code than old scan_bin. */
322 redo:
323 if (!overflowed) {
324 if (value <= max_div_2) {
325 value = (value << 1) | (bit - '0');
326 continue;
327 }
328 /* Bah. We're just overflowed. */
329 warn("Integer overflow in binary number");
330 overflowed = TRUE;
331 value_nv = (NV) value;
332 }
333 value_nv *= 2.0;
b2049988
MHM
334 /* If an NV has not enough bits in its mantissa to
335 * represent a UV this summing of small low-order numbers
336 * is a waste of time (because the NV cannot preserve
337 * the low-order bits anyway): we could just remember when
338 * did we overflow and in the end just multiply value_nv by the
339 * right amount. */
adfe19db
MHM
340 value_nv += (NV)(bit - '0');
341 continue;
342 }
343 if (bit == '_' && len && allow_underscores && (bit = s[1])
344 && (bit == '0' || bit == '1'))
b2049988
MHM
345 {
346 --len;
347 ++s;
adfe19db 348 goto redo;
b2049988 349 }
adfe19db
MHM
350 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
351 warn("Illegal binary digit '%c' ignored", *s);
352 break;
353 }
4a582685 354
adfe19db
MHM
355 if ( ( overflowed && value_nv > 4294967295.0)
356#if UVSIZE > 4
b2049988 357 || (!overflowed && value > 0xffffffff )
adfe19db 358#endif
b2049988
MHM
359 ) {
360 warn("Binary number > 0b11111111111111111111111111111111 non-portable");
adfe19db
MHM
361 }
362 *len_p = s - start;
363 if (!overflowed) {
364 *flags = 0;
365 return value;
366 }
367 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
368 if (result)
369 *result = value_nv;
370 return UV_MAX;
371}
372#endif
373#endif
374
375#ifndef grok_hex
376#if { NEED grok_hex }
377UV
aab9a3b6 378grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
adfe19db
MHM
379{
380 const char *s = start;
381 STRLEN len = *len_p;
382 UV value = 0;
383 NV value_nv = 0;
384
385 const UV max_div_16 = UV_MAX / 16;
386 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
387 bool overflowed = FALSE;
388 const char *xdigit;
389
390 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
391 /* strip off leading x or 0x.
392 for compatibility silently suffer "x" and "0x" as valid hex numbers.
393 */
394 if (len >= 1) {
395 if (s[0] == 'x') {
396 s++;
397 len--;
398 }
399 else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
400 s+=2;
401 len-=2;
402 }
403 }
404 }
405
406 for (; len-- && *s; s++) {
b2049988 407 xdigit = strchr((char *) PL_hexdigit, *s);
adfe19db
MHM
408 if (xdigit) {
409 /* Write it in this wonky order with a goto to attempt to get the
410 compiler to make the common case integer-only loop pretty tight.
411 With gcc seems to be much straighter code than old scan_hex. */
412 redo:
413 if (!overflowed) {
414 if (value <= max_div_16) {
415 value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
416 continue;
417 }
418 warn("Integer overflow in hexadecimal number");
419 overflowed = TRUE;
420 value_nv = (NV) value;
421 }
422 value_nv *= 16.0;
b2049988
MHM
423 /* If an NV has not enough bits in its mantissa to
424 * represent a UV this summing of small low-order numbers
425 * is a waste of time (because the NV cannot preserve
426 * the low-order bits anyway): we could just remember when
427 * did we overflow and in the end just multiply value_nv by the
428 * right amount of 16-tuples. */
adfe19db
MHM
429 value_nv += (NV)((xdigit - PL_hexdigit) & 15);
430 continue;
431 }
432 if (*s == '_' && len && allow_underscores && s[1]
b2049988
MHM
433 && (xdigit = strchr((char *) PL_hexdigit, s[1])))
434 {
435 --len;
436 ++s;
adfe19db 437 goto redo;
b2049988 438 }
adfe19db
MHM
439 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
440 warn("Illegal hexadecimal digit '%c' ignored", *s);
441 break;
442 }
4a582685 443
adfe19db
MHM
444 if ( ( overflowed && value_nv > 4294967295.0)
445#if UVSIZE > 4
b2049988 446 || (!overflowed && value > 0xffffffff )
adfe19db 447#endif
b2049988
MHM
448 ) {
449 warn("Hexadecimal number > 0xffffffff non-portable");
adfe19db
MHM
450 }
451 *len_p = s - start;
452 if (!overflowed) {
453 *flags = 0;
454 return value;
455 }
456 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
457 if (result)
458 *result = value_nv;
459 return UV_MAX;
460}
461#endif
462#endif
463
464#ifndef grok_oct
465#if { NEED grok_oct }
466UV
aab9a3b6 467grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
adfe19db
MHM
468{
469 const char *s = start;
470 STRLEN len = *len_p;
471 UV value = 0;
472 NV value_nv = 0;
473
474 const UV max_div_8 = UV_MAX / 8;
475 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
476 bool overflowed = FALSE;
477
478 for (; len-- && *s; s++) {
479 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
480 out front allows slicker code. */
481 int digit = *s - '0';
482 if (digit >= 0 && digit <= 7) {
483 /* Write it in this wonky order with a goto to attempt to get the
484 compiler to make the common case integer-only loop pretty tight.
485 */
486 redo:
487 if (!overflowed) {
488 if (value <= max_div_8) {
489 value = (value << 3) | digit;
490 continue;
491 }
492 /* Bah. We're just overflowed. */
493 warn("Integer overflow in octal number");
494 overflowed = TRUE;
495 value_nv = (NV) value;
496 }
497 value_nv *= 8.0;
b2049988
MHM
498 /* If an NV has not enough bits in its mantissa to
499 * represent a UV this summing of small low-order numbers
500 * is a waste of time (because the NV cannot preserve
501 * the low-order bits anyway): we could just remember when
502 * did we overflow and in the end just multiply value_nv by the
503 * right amount of 8-tuples. */
adfe19db
MHM
504 value_nv += (NV)digit;
505 continue;
506 }
507 if (digit == ('_' - '0') && len && allow_underscores
508 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
b2049988
MHM
509 {
510 --len;
511 ++s;
adfe19db 512 goto redo;
b2049988 513 }
adfe19db
MHM
514 /* Allow \octal to work the DWIM way (that is, stop scanning
515 * as soon as non-octal characters are seen, complain only iff
516 * someone seems to want to use the digits eight and nine). */
517 if (digit == 8 || digit == 9) {
518 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
519 warn("Illegal octal digit '%c' ignored", *s);
520 }
521 break;
522 }
4a582685 523
adfe19db
MHM
524 if ( ( overflowed && value_nv > 4294967295.0)
525#if UVSIZE > 4
b2049988 526 || (!overflowed && value > 0xffffffff )
adfe19db 527#endif
b2049988
MHM
528 ) {
529 warn("Octal number > 037777777777 non-portable");
adfe19db
MHM
530 }
531 *len_p = s - start;
532 if (!overflowed) {
533 *flags = 0;
534 return value;
535 }
536 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
537 if (result)
538 *result = value_nv;
539 return UV_MAX;
540}
541#endif
542#endif
543
544=xsinit
545
546#define NEED_grok_number
547#define NEED_grok_numeric_radix
548#define NEED_grok_bin
549#define NEED_grok_hex
550#define NEED_grok_oct
551
552=xsubs
553
554UV
555grok_number(string)
b2049988
MHM
556 SV *string
557 PREINIT:
558 const char *pv;
559 STRLEN len;
560 CODE:
561 pv = SvPV(string, len);
562 if (!grok_number(pv, len, &RETVAL))
563 XSRETURN_UNDEF;
564 OUTPUT:
565 RETVAL
adfe19db
MHM
566
567UV
568grok_bin(string)
b2049988
MHM
569 SV *string
570 PREINIT:
571 char *pv;
ea4b7f32 572 I32 flags = 0;
b2049988
MHM
573 STRLEN len;
574 CODE:
575 pv = SvPV(string, len);
576 RETVAL = grok_bin(pv, &len, &flags, NULL);
577 OUTPUT:
578 RETVAL
adfe19db
MHM
579
580UV
581grok_hex(string)
b2049988
MHM
582 SV *string
583 PREINIT:
584 char *pv;
ea4b7f32 585 I32 flags = 0;
b2049988
MHM
586 STRLEN len;
587 CODE:
588 pv = SvPV(string, len);
589 RETVAL = grok_hex(pv, &len, &flags, NULL);
590 OUTPUT:
591 RETVAL
adfe19db
MHM
592
593UV
594grok_oct(string)
b2049988
MHM
595 SV *string
596 PREINIT:
597 char *pv;
ea4b7f32 598 I32 flags = 0;
b2049988
MHM
599 STRLEN len;
600 CODE:
601 pv = SvPV(string, len);
602 RETVAL = grok_oct(pv, &len, &flags, NULL);
603 OUTPUT:
604 RETVAL
adfe19db
MHM
605
606UV
607Perl_grok_number(string)
b2049988
MHM
608 SV *string
609 PREINIT:
610 const char *pv;
611 STRLEN len;
612 CODE:
613 pv = SvPV(string, len);
614 if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
615 XSRETURN_UNDEF;
616 OUTPUT:
617 RETVAL
adfe19db
MHM
618
619UV
620Perl_grok_bin(string)
b2049988
MHM
621 SV *string
622 PREINIT:
623 char *pv;
ea4b7f32 624 I32 flags = 0;
b2049988
MHM
625 STRLEN len;
626 CODE:
627 pv = SvPV(string, len);
628 RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
629 OUTPUT:
630 RETVAL
adfe19db
MHM
631
632UV
633Perl_grok_hex(string)
b2049988
MHM
634 SV *string
635 PREINIT:
636 char *pv;
ea4b7f32 637 I32 flags = 0;
b2049988
MHM
638 STRLEN len;
639 CODE:
640 pv = SvPV(string, len);
641 RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
642 OUTPUT:
643 RETVAL
adfe19db
MHM
644
645UV
646Perl_grok_oct(string)
b2049988
MHM
647 SV *string
648 PREINIT:
649 char *pv;
ea4b7f32 650 I32 flags = 0;
b2049988
MHM
651 STRLEN len;
652 CODE:
653 pv = SvPV(string, len);
654 RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
655 OUTPUT:
656 RETVAL
adfe19db
MHM
657
658=tests plan => 10
659
660ok(&Devel::PPPort::grok_number("42"), 42);
661ok(!defined(&Devel::PPPort::grok_number("A")));
662ok(&Devel::PPPort::grok_bin("10000001"), 129);
663ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
664ok(&Devel::PPPort::grok_oct("377"), 255);
665
666ok(&Devel::PPPort::Perl_grok_number("42"), 42);
667ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
668ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
669ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
670ok(&Devel::PPPort::Perl_grok_oct("377"), 255);