This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to Devel::PPPort 3.00_03.
[perl5.git] / ext / Devel / PPPort / parts / inc / grok
CommitLineData
adfe19db
MHM
1################################################################################
2##
3## $Revision: 6 $
4## $Author: mhx $
5## $Date: 2004/08/13 12:45:54 +0200 $
6##
7################################################################################
8##
9## Version 3.x, Copyright (C) 2004, Marcus Holland-Moritz.
10## Version 2.x, Copyright (C) 2001, Paul Marquess.
11## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
12##
13## This program is free software; you can redistribute it and/or
14## modify it under the same terms as Perl itself.
15##
16################################################################################
17
18=provides
19
20grok_hex
21grok_oct
22grok_bin
23grok_numeric_radix
24grok_number
25__UNDEFINED__
26
27=implementation
28
29__UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling)
30__UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)
31__UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)
32__UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
33
34__UNDEFINED__ IS_NUMBER_IN_UV 0x01
35__UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02
36__UNDEFINED__ IS_NUMBER_NOT_INT 0x04
37__UNDEFINED__ IS_NUMBER_NEG 0x08
38__UNDEFINED__ IS_NUMBER_INFINITY 0x10
39__UNDEFINED__ IS_NUMBER_NAN 0x20
40
41/* GROK_NUMERIC_RADIX depends on grok_numeric_radix */
42__UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
43
44__UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02
45__UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04
46__UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01
47__UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02
48
49#ifndef grok_numeric_radix
50#if { NEED grok_numeric_radix }
51bool
52grok_numeric_radix(pTHX_ const char **sp, const char *send)
53{
54#ifdef USE_LOCALE_NUMERIC
55#ifdef PL_numeric_radix_sv
56 if (PL_numeric_radix_sv && IN_LOCALE) {
57 STRLEN len;
58 char* radix = SvPV(PL_numeric_radix_sv, len);
59 if (*sp + len <= send && memEQ(*sp, radix, len)) {
60 *sp += len;
61 return TRUE;
62 }
63 }
64#else
65 /* older perls don't have PL_numeric_radix_sv so the radix
66 * must manually be requested from locale.h
67 */
68#include <locale.h>
69 dTHR; /* needed for older threaded perls */
70 struct lconv *lc = localeconv();
71 char *radix = lc->decimal_point;
72 if (radix && IN_LOCALE) {
73 STRLEN len = strlen(radix);
74 if (*sp + len <= send && memEQ(*sp, radix, len)) {
75 *sp += len;
76 return TRUE;
77 }
78 }
79#endif /* PERL_VERSION */
80#endif /* USE_LOCALE_NUMERIC */
81 /* always try "." if numeric radix didn't match because
82 * we may have data from different locales mixed */
83 if (*sp < send && **sp == '.') {
84 ++*sp;
85 return TRUE;
86 }
87 return FALSE;
88}
89#endif
90#endif
91
92/* grok_number depends on grok_numeric_radix */
93
94#ifndef grok_number
95#if { NEED grok_number }
96int
97grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
98{
99 const char *s = pv;
100 const char *send = pv + len;
101 const UV max_div_10 = UV_MAX / 10;
102 const char max_mod_10 = UV_MAX % 10;
103 int numtype = 0;
104 int sawinf = 0;
105 int sawnan = 0;
106
107 while (s < send && isSPACE(*s))
108 s++;
109 if (s == send) {
110 return 0;
111 } else if (*s == '-') {
112 s++;
113 numtype = IS_NUMBER_NEG;
114 }
115 else if (*s == '+')
116 s++;
117
118 if (s == send)
119 return 0;
120
121 /* next must be digit or the radix separator or beginning of infinity */
122 if (isDIGIT(*s)) {
123 /* UVs are at least 32 bits, so the first 9 decimal digits cannot
124 overflow. */
125 UV value = *s - '0';
126 /* This construction seems to be more optimiser friendly.
127 (without it gcc does the isDIGIT test and the *s - '0' separately)
128 With it gcc on arm is managing 6 instructions (6 cycles) per digit.
129 In theory the optimiser could deduce how far to unroll the loop
130 before checking for overflow. */
131 if (++s < send) {
132 int digit = *s - '0';
133 if (digit >= 0 && digit <= 9) {
134 value = value * 10 + digit;
135 if (++s < send) {
136 digit = *s - '0';
137 if (digit >= 0 && digit <= 9) {
138 value = value * 10 + digit;
139 if (++s < send) {
140 digit = *s - '0';
141 if (digit >= 0 && digit <= 9) {
142 value = value * 10 + digit;
143 if (++s < send) {
144 digit = *s - '0';
145 if (digit >= 0 && digit <= 9) {
146 value = value * 10 + digit;
147 if (++s < send) {
148 digit = *s - '0';
149 if (digit >= 0 && digit <= 9) {
150 value = value * 10 + digit;
151 if (++s < send) {
152 digit = *s - '0';
153 if (digit >= 0 && digit <= 9) {
154 value = value * 10 + digit;
155 if (++s < send) {
156 digit = *s - '0';
157 if (digit >= 0 && digit <= 9) {
158 value = value * 10 + digit;
159 if (++s < send) {
160 digit = *s - '0';
161 if (digit >= 0 && digit <= 9) {
162 value = value * 10 + digit;
163 if (++s < send) {
164 /* Now got 9 digits, so need to check
165 each time for overflow. */
166 digit = *s - '0';
167 while (digit >= 0 && digit <= 9
168 && (value < max_div_10
169 || (value == max_div_10
170 && digit <= max_mod_10))) {
171 value = value * 10 + digit;
172 if (++s < send)
173 digit = *s - '0';
174 else
175 break;
176 }
177 if (digit >= 0 && digit <= 9
178 && (s < send)) {
179 /* value overflowed.
180 skip the remaining digits, don't
181 worry about setting *valuep. */
182 do {
183 s++;
184 } while (s < send && isDIGIT(*s));
185 numtype |=
186 IS_NUMBER_GREATER_THAN_UV_MAX;
187 goto skip_value;
188 }
189 }
190 }
191 }
192 }
193 }
194 }
195 }
196 }
197 }
198 }
199 }
200 }
201 }
202 }
203 }
204 }
205 }
206 numtype |= IS_NUMBER_IN_UV;
207 if (valuep)
208 *valuep = value;
209
210 skip_value:
211 if (GROK_NUMERIC_RADIX(&s, send)) {
212 numtype |= IS_NUMBER_NOT_INT;
213 while (s < send && isDIGIT(*s)) /* optional digits after the radix */
214 s++;
215 }
216 }
217 else if (GROK_NUMERIC_RADIX(&s, send)) {
218 numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
219 /* no digits before the radix means we need digits after it */
220 if (s < send && isDIGIT(*s)) {
221 do {
222 s++;
223 } while (s < send && isDIGIT(*s));
224 if (valuep) {
225 /* integer approximation is valid - it's 0. */
226 *valuep = 0;
227 }
228 }
229 else
230 return 0;
231 } else if (*s == 'I' || *s == 'i') {
232 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
233 s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
234 s++; if (s < send && (*s == 'I' || *s == 'i')) {
235 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
236 s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
237 s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
238 s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
239 s++;
240 }
241 sawinf = 1;
242 } else if (*s == 'N' || *s == 'n') {
243 /* XXX TODO: There are signaling NaNs and quiet NaNs. */
244 s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
245 s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
246 s++;
247 sawnan = 1;
248 } else
249 return 0;
250
251 if (sawinf) {
252 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
253 numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
254 } else if (sawnan) {
255 numtype &= IS_NUMBER_NEG; /* Keep track of sign */
256 numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
257 } else if (s < send) {
258 /* we can have an optional exponent part */
259 if (*s == 'e' || *s == 'E') {
260 /* The only flag we keep is sign. Blow away any "it's UV" */
261 numtype &= IS_NUMBER_NEG;
262 numtype |= IS_NUMBER_NOT_INT;
263 s++;
264 if (s < send && (*s == '-' || *s == '+'))
265 s++;
266 if (s < send && isDIGIT(*s)) {
267 do {
268 s++;
269 } while (s < send && isDIGIT(*s));
270 }
271 else
272 return 0;
273 }
274 }
275 while (s < send && isSPACE(*s))
276 s++;
277 if (s >= send)
278 return numtype;
279 if (len == 10 && memEQ(pv, "0 but true", 10)) {
280 if (valuep)
281 *valuep = 0;
282 return IS_NUMBER_IN_UV;
283 }
284 return 0;
285}
286#endif
287#endif
288
289/*
290 * The grok_* routines have been modified to use warn() instead of
291 * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit,
292 * which is why the stack variable has been renamed to 'xdigit'.
293 */
294
295#ifndef grok_bin
296#if { NEED grok_bin }
297UV
298grok_bin(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
299{
300 const char *s = start;
301 STRLEN len = *len_p;
302 UV value = 0;
303 NV value_nv = 0;
304
305 const UV max_div_2 = UV_MAX / 2;
306 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
307 bool overflowed = FALSE;
308
309 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
310 /* strip off leading b or 0b.
311 for compatibility silently suffer "b" and "0b" as valid binary
312 numbers. */
313 if (len >= 1) {
314 if (s[0] == 'b') {
315 s++;
316 len--;
317 }
318 else if (len >= 2 && s[0] == '0' && s[1] == 'b') {
319 s+=2;
320 len-=2;
321 }
322 }
323 }
324
325 for (; len-- && *s; s++) {
326 char bit = *s;
327 if (bit == '0' || bit == '1') {
328 /* Write it in this wonky order with a goto to attempt to get the
329 compiler to make the common case integer-only loop pretty tight.
330 With gcc seems to be much straighter code than old scan_bin. */
331 redo:
332 if (!overflowed) {
333 if (value <= max_div_2) {
334 value = (value << 1) | (bit - '0');
335 continue;
336 }
337 /* Bah. We're just overflowed. */
338 warn("Integer overflow in binary number");
339 overflowed = TRUE;
340 value_nv = (NV) value;
341 }
342 value_nv *= 2.0;
343 /* If an NV has not enough bits in its mantissa to
344 * represent a UV this summing of small low-order numbers
345 * is a waste of time (because the NV cannot preserve
346 * the low-order bits anyway): we could just remember when
347 * did we overflow and in the end just multiply value_nv by the
348 * right amount. */
349 value_nv += (NV)(bit - '0');
350 continue;
351 }
352 if (bit == '_' && len && allow_underscores && (bit = s[1])
353 && (bit == '0' || bit == '1'))
354 {
355 --len;
356 ++s;
357 goto redo;
358 }
359 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
360 warn("Illegal binary digit '%c' ignored", *s);
361 break;
362 }
363
364 if ( ( overflowed && value_nv > 4294967295.0)
365#if UVSIZE > 4
366 || (!overflowed && value > 0xffffffff )
367#endif
368 ) {
369 warn("Binary number > 0b11111111111111111111111111111111 non-portable");
370 }
371 *len_p = s - start;
372 if (!overflowed) {
373 *flags = 0;
374 return value;
375 }
376 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
377 if (result)
378 *result = value_nv;
379 return UV_MAX;
380}
381#endif
382#endif
383
384#ifndef grok_hex
385#if { NEED grok_hex }
386UV
387grok_hex(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
388{
389 const char *s = start;
390 STRLEN len = *len_p;
391 UV value = 0;
392 NV value_nv = 0;
393
394 const UV max_div_16 = UV_MAX / 16;
395 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
396 bool overflowed = FALSE;
397 const char *xdigit;
398
399 if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) {
400 /* strip off leading x or 0x.
401 for compatibility silently suffer "x" and "0x" as valid hex numbers.
402 */
403 if (len >= 1) {
404 if (s[0] == 'x') {
405 s++;
406 len--;
407 }
408 else if (len >= 2 && s[0] == '0' && s[1] == 'x') {
409 s+=2;
410 len-=2;
411 }
412 }
413 }
414
415 for (; len-- && *s; s++) {
416 xdigit = strchr((char *) PL_hexdigit, *s);
417 if (xdigit) {
418 /* Write it in this wonky order with a goto to attempt to get the
419 compiler to make the common case integer-only loop pretty tight.
420 With gcc seems to be much straighter code than old scan_hex. */
421 redo:
422 if (!overflowed) {
423 if (value <= max_div_16) {
424 value = (value << 4) | ((xdigit - PL_hexdigit) & 15);
425 continue;
426 }
427 warn("Integer overflow in hexadecimal number");
428 overflowed = TRUE;
429 value_nv = (NV) value;
430 }
431 value_nv *= 16.0;
432 /* If an NV has not enough bits in its mantissa to
433 * represent a UV this summing of small low-order numbers
434 * is a waste of time (because the NV cannot preserve
435 * the low-order bits anyway): we could just remember when
436 * did we overflow and in the end just multiply value_nv by the
437 * right amount of 16-tuples. */
438 value_nv += (NV)((xdigit - PL_hexdigit) & 15);
439 continue;
440 }
441 if (*s == '_' && len && allow_underscores && s[1]
442 && (xdigit = strchr((char *) PL_hexdigit, s[1])))
443 {
444 --len;
445 ++s;
446 goto redo;
447 }
448 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
449 warn("Illegal hexadecimal digit '%c' ignored", *s);
450 break;
451 }
452
453 if ( ( overflowed && value_nv > 4294967295.0)
454#if UVSIZE > 4
455 || (!overflowed && value > 0xffffffff )
456#endif
457 ) {
458 warn("Hexadecimal number > 0xffffffff non-portable");
459 }
460 *len_p = s - start;
461 if (!overflowed) {
462 *flags = 0;
463 return value;
464 }
465 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
466 if (result)
467 *result = value_nv;
468 return UV_MAX;
469}
470#endif
471#endif
472
473#ifndef grok_oct
474#if { NEED grok_oct }
475UV
476grok_oct(pTHX_ char *start, STRLEN *len_p, I32 *flags, NV *result)
477{
478 const char *s = start;
479 STRLEN len = *len_p;
480 UV value = 0;
481 NV value_nv = 0;
482
483 const UV max_div_8 = UV_MAX / 8;
484 bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES;
485 bool overflowed = FALSE;
486
487 for (; len-- && *s; s++) {
488 /* gcc 2.95 optimiser not smart enough to figure that this subtraction
489 out front allows slicker code. */
490 int digit = *s - '0';
491 if (digit >= 0 && digit <= 7) {
492 /* Write it in this wonky order with a goto to attempt to get the
493 compiler to make the common case integer-only loop pretty tight.
494 */
495 redo:
496 if (!overflowed) {
497 if (value <= max_div_8) {
498 value = (value << 3) | digit;
499 continue;
500 }
501 /* Bah. We're just overflowed. */
502 warn("Integer overflow in octal number");
503 overflowed = TRUE;
504 value_nv = (NV) value;
505 }
506 value_nv *= 8.0;
507 /* If an NV has not enough bits in its mantissa to
508 * represent a UV this summing of small low-order numbers
509 * is a waste of time (because the NV cannot preserve
510 * the low-order bits anyway): we could just remember when
511 * did we overflow and in the end just multiply value_nv by the
512 * right amount of 8-tuples. */
513 value_nv += (NV)digit;
514 continue;
515 }
516 if (digit == ('_' - '0') && len && allow_underscores
517 && (digit = s[1] - '0') && (digit >= 0 && digit <= 7))
518 {
519 --len;
520 ++s;
521 goto redo;
522 }
523 /* Allow \octal to work the DWIM way (that is, stop scanning
524 * as soon as non-octal characters are seen, complain only iff
525 * someone seems to want to use the digits eight and nine). */
526 if (digit == 8 || digit == 9) {
527 if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT))
528 warn("Illegal octal digit '%c' ignored", *s);
529 }
530 break;
531 }
532
533 if ( ( overflowed && value_nv > 4294967295.0)
534#if UVSIZE > 4
535 || (!overflowed && value > 0xffffffff )
536#endif
537 ) {
538 warn("Octal number > 037777777777 non-portable");
539 }
540 *len_p = s - start;
541 if (!overflowed) {
542 *flags = 0;
543 return value;
544 }
545 *flags = PERL_SCAN_GREATER_THAN_UV_MAX;
546 if (result)
547 *result = value_nv;
548 return UV_MAX;
549}
550#endif
551#endif
552
553=xsinit
554
555#define NEED_grok_number
556#define NEED_grok_numeric_radix
557#define NEED_grok_bin
558#define NEED_grok_hex
559#define NEED_grok_oct
560
561=xsubs
562
563UV
564grok_number(string)
565 SV *string
566 PREINIT:
567 const char *pv;
568 STRLEN len;
569 CODE:
570 pv = SvPV(string, len);
571 if (!grok_number(pv, len, &RETVAL))
572 XSRETURN_UNDEF;
573 OUTPUT:
574 RETVAL
575
576UV
577grok_bin(string)
578 SV *string
579 PREINIT:
580 char *pv;
581 I32 flags;
582 STRLEN len;
583 CODE:
584 pv = SvPV(string, len);
585 RETVAL = grok_bin(pv, &len, &flags, NULL);
586 OUTPUT:
587 RETVAL
588
589UV
590grok_hex(string)
591 SV *string
592 PREINIT:
593 char *pv;
594 I32 flags;
595 STRLEN len;
596 CODE:
597 pv = SvPV(string, len);
598 RETVAL = grok_hex(pv, &len, &flags, NULL);
599 OUTPUT:
600 RETVAL
601
602UV
603grok_oct(string)
604 SV *string
605 PREINIT:
606 char *pv;
607 I32 flags;
608 STRLEN len;
609 CODE:
610 pv = SvPV(string, len);
611 RETVAL = grok_oct(pv, &len, &flags, NULL);
612 OUTPUT:
613 RETVAL
614
615UV
616Perl_grok_number(string)
617 SV *string
618 PREINIT:
619 const char *pv;
620 STRLEN len;
621 CODE:
622 pv = SvPV(string, len);
623 if (!Perl_grok_number(aTHX_ pv, len, &RETVAL))
624 XSRETURN_UNDEF;
625 OUTPUT:
626 RETVAL
627
628UV
629Perl_grok_bin(string)
630 SV *string
631 PREINIT:
632 char *pv;
633 I32 flags;
634 STRLEN len;
635 CODE:
636 pv = SvPV(string, len);
637 RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL);
638 OUTPUT:
639 RETVAL
640
641UV
642Perl_grok_hex(string)
643 SV *string
644 PREINIT:
645 char *pv;
646 I32 flags;
647 STRLEN len;
648 CODE:
649 pv = SvPV(string, len);
650 RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL);
651 OUTPUT:
652 RETVAL
653
654UV
655Perl_grok_oct(string)
656 SV *string
657 PREINIT:
658 char *pv;
659 I32 flags;
660 STRLEN len;
661 CODE:
662 pv = SvPV(string, len);
663 RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL);
664 OUTPUT:
665 RETVAL
666
667=tests plan => 10
668
669ok(&Devel::PPPort::grok_number("42"), 42);
670ok(!defined(&Devel::PPPort::grok_number("A")));
671ok(&Devel::PPPort::grok_bin("10000001"), 129);
672ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef);
673ok(&Devel::PPPort::grok_oct("377"), 255);
674
675ok(&Devel::PPPort::Perl_grok_number("42"), 42);
676ok(!defined(&Devel::PPPort::Perl_grok_number("A")));
677ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129);
678ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef);
679ok(&Devel::PPPort::Perl_grok_oct("377"), 255);
680