From: Jarkko Hietaniemi Date: Sun, 24 Aug 2014 02:49:04 +0000 (-0400) Subject: More robust inf/nan recognition and generation. X-Git-Tag: v5.21.4~562 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/8c12dc63d1f47b22b69bbcfe5c21dcadb14c5397?hp=b2bea0a9cc3975a6d0df1ebfa82cd24df518b9b0 More robust inf/nan recognition and generation. Drop INFNAN_PEEK, premature optimization and hard to get right (it basically imitates unrolled first half of grok_infnan). Just keep grok_infan fast. (There is one spot in grok_number_flags() where we peek at the next byte to avoid wasted work.) If falling back (from not having NV_INF/NV_NAN) to the native strtod (or similar), fake the input based on the grok_infnan result. Add last-resort ways to generate inf/nan. Recognize explicit unary plus, like "+Inf", and "INFINITE". In tests use cmp_ok(), fix typos, add tests. --- diff --git a/numeric.c b/numeric.c index daaec06..355980a 100644 --- a/numeric.c +++ b/numeric.c @@ -586,16 +586,6 @@ Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) return grok_number_flags(pv, len, valuep, 0); } -/* Peek ahead to see whether this could be Inf/NaN/qNaN/snan/1.#INF */ -#define INFNAN_PEEK(s, send) \ - (s < send && \ - ((isALPHA_FOLD_EQ(*s, 'I') || isALPHA_FOLD_EQ(*s, 'N')) || \ - ((s + 4) < send && \ - (isALPHA_FOLD_EQ(*s, 'Q') || isALPHA_FOLD_EQ(*s, 'S')) && \ - isALPHA_FOLD_EQ(s[1], 'N')) || \ - ((s + 5) < send && \ - (*s == '1' && ((s[1] == '.' && s[2] == '#') || s[1] == '#'))))) - /* =for apidoc grok_infnan @@ -623,7 +613,10 @@ Perl_grok_infnan(const char** sp, const char* send) PERL_ARGS_ASSERT_GROK_INFNAN; - if (*s == '-') { + if (*s == '+') { + s++; if (s == send) return 0; + } + else if (*s == '-') { flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */ s++; if (s == send) return 0; } @@ -650,8 +643,11 @@ Perl_grok_infnan(const char** sp, const char* send) s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return 0; s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return 0; - /* XXX maybe also grok "infinite"? */ - s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return 0; + s++; if (s == send || + /* allow either Infinity or Infinite */ + (isALPHA_FOLD_NE(*s, 'Y') && + isALPHA_FOLD_NE(*s, 'E'))) + return 0; s++; } else if (*s) return 0; @@ -681,10 +677,11 @@ Perl_grok_infnan(const char** sp, const char* send) flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; - /* NaN can be followed by various stuff since there are - * multiple different NaN values, and some implementations - * output the "payload" values, e.g. NaN123, NAN(abc), - * some implementation just have weird stuff like NaN%. */ + /* NaN can be followed by various stuff (NaNQ, NaNS), but + * there are also multiple different NaN values, and some + * implementations output the "payload" values, + * e.g. NaN123, NAN(abc), while some implementations just + * have weird stuff like NaN%. */ s = send; } else @@ -707,8 +704,6 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) const char * const send = pv + len; const char *d; int numtype = 0; - int sawinf = 0; - int sawnan = 0; PERL_ARGS_ASSERT_GROK_NUMBER_FLAGS; @@ -727,10 +722,10 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) return 0; /* The first digit (after optional sign): note that might - * also point to "infinity" or "nan". */ + * also point to "infinity" or "nan", or "1.#INF". */ d = s; - /* next must be digit or the radix separator or beginning of infinity */ + /* next must be digit or the radix separator or beginning of infinity/nan */ if (isDIGIT(*s)) { /* UVs are at least 32 bits, so the first 9 decimal digits cannot overflow. */ @@ -841,30 +836,8 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) else return 0; } - else { - if (INFNAN_PEEK(d, send)) { - int infnan = Perl_grok_infnan(&d, send); - if ((infnan & IS_NUMBER_INFINITY)) { - numtype |= infnan; - sawinf = 1; - } - else if ((infnan & IS_NUMBER_NAN)) { - numtype |= infnan; - sawnan = 1; - } - else - return 0; - s = d; - } - } - if (sawinf) { - /* Keep the sign for infinity. */ - numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; - } else if (sawnan) { - numtype &= IS_NUMBER_NEG; /* Clear sign for nan. */ - numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; - } else if (s < send) { + if (s < send) { /* we can have an optional exponent part */ if (isALPHA_FOLD_EQ(*s, 'e')) { s++; @@ -894,6 +867,18 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) *valuep = 0; return IS_NUMBER_IN_UV; } + /* We could be e.g. at "Inf" or "NaN", or at the "#" of "1.#INF". */ + if ((s + 2 < send) && strchr("inqs#", toFOLD(*s))) { + /* Really detect inf/nan. Start at d, not s, since the above + * code might have already consumed the "1." or "1". */ + int infnan = Perl_grok_infnan(&d, send); + if ((infnan & IS_NUMBER_INFINITY)) { + return (numtype | infnan); /* Keep sign for infinity. */ + } + else if ((infnan & IS_NUMBER_NAN)) { + return (numtype | infnan) & ~IS_NUMBER_NEG; /* Clear sign for nan. */ + } + } else if (flags & PERL_SCAN_TRAILING) { return numtype | IS_NUMBER_TRAILING; } @@ -1174,30 +1159,68 @@ Perl_my_atof2(pTHX_ const char* orig, NV* value) { const char *p0 = negative ? s - 1 : s; const char *p = p0; -#if defined(NV_INF) && defined(NV_NAN) - int infnan_flags = grok_infnan(&p, send); - if (infnan_flags && p != p0) { - if ((infnan_flags & IS_NUMBER_INFINITY)) { - *value = (infnan_flags & IS_NUMBER_NEG) ? -NV_INF: NV_INF; + int infnan = grok_infnan(&p, send); + if (infnan && p != p0) { + /* If we can generate inf/nan directly, let's do so. */ +#ifdef NV_INF + if ((infnan & IS_NUMBER_INFINITY)) { + *value = (infnan & IS_NUMBER_NEG) ? -NV_INF: NV_INF; return (char*)p; } - else if ((infnan_flags & IS_NUMBER_NAN)) { +#endif +#ifdef NV_NAN + if ((infnan & IS_NUMBER_NAN)) { *value = NV_NAN; return (char*)p; } - } -#elif defined(HAS_STRTOD) - if (INFNAN_PEEK(s, send)) { - /* The native strtod() may not get all the possible - * inf/nan strings INFNAN_PEEK() recognizes. */ - char* endp; - NV nv = Perl_strtod(p, &endp); - if (p != endp) { - *value = nv; - return endp; +#endif +#ifdef Perl_strtod + /* If still here, we didn't have either NV_INF or INV_NAN, + * and can try falling back to native strtod/strtold. + * + * The native interface might not recognize all the possible + * inf/nan strings Perl recognizes. What we can try + * is to try faking the input. We will try inf/-inf/nan + * as the most promising/portable input. */ + { + const char* fake = NULL; + char* endp; + NV nv; + if ((infnan & IS_NUMBER_INFINITY)) { + fake = ((infnan & IS_NUMBER_NEG)) ? "-inf" : "inf"; + } + else if ((infnan & IS_NUMBER_NAN)) { + fake = "nan"; + } + assert(fake); + nv = Perl_strtod(fake, &endp); + if (fake != endp) { + if ((infnan & IS_NUMBER_INFINITY)) { +#ifdef Perl_isinf + if (Perl_isinf(nv)) + *value = nv; +#else + /* last resort, may generate SIGFPE */ + *value = Perl_exp((NV)1e9); + if ((infnan & IS_NUMBER_NEG)) + *value = -*value; +#endif + return (char*)p; /* p, not endp */ + } + else if ((infnan & IS_NUMBER_NAN)) { +#ifdef Perl_isnan + if (Perl_isnan(nv)) + *value = nv; +#else + /* last resort, may generate SIGFPE */ + *value = Perl_log((NV)-1.0); +#endif + return (char*)p; /* p, not endp */ + } + } } +#endif /* #ifdef Perl_strtod */ } -#endif } /* we accumulate digits into an integer; when this becomes too diff --git a/t/op/infnan.t b/t/op/infnan.t index dccd888..acd7a34 100644 --- a/t/op/infnan.t +++ b/t/op/infnan.t @@ -1,4 +1,4 @@ -#!./perl +#!./perl -w BEGIN { chdir 't' if -d 't'; @@ -6,21 +6,25 @@ BEGIN { require './test.pl'; } +use strict; + my $PInf = "Inf" + 0; my $NInf = "-Inf" + 0; my $NaN = "NaN" + 0; -my @PInf = ("Inf", "inf", "INF", "Infinity", "INFINITY", +my @PInf = ("Inf", "inf", "INF", "+Inf", + "Infinity", "INFINITE", "1.#INF", "1#INF"); -my @NInf = map { "-$_" } @PInf; +my @NInf = map { "-$_" } grep { ! /^\+/ } @PInf; my @NaN = ("NAN", "nan", "qnan", "SNAN", "NanQ", "NANS", - "1.#QNAN", "1#SNAN", "1.#NAN", "1#IND", + "1.#QNAN", "+1#SNAN", "-1.#NAN", "1#IND", "NaN123", "NAN(123)", "nan%", "nanonano"); # RIP, Robin Williams. -my $inf_tests = 6 + 6 * @PInf + 5; -my $nan_tests = 5 + 2 * @NaN + 3; +my $inf_tests = 9 + 3 * @PInf + 3 * @NInf + 5; +my $nan_tests = 7 + 2 * @NaN + 3; + my $infnan_tests = 4; plan tests => $inf_tests + $nan_tests + $infnan_tests; @@ -29,30 +33,34 @@ my $has_inf; my $has_nan; SKIP: { - if ($PInf == 1 && $NINf == 1) { + if ($PInf == 1 && $NInf == 1) { skip $inf_tests, "no infinity found"; } $has_inf = 1; - ok($PInf > 0, "positive infinity"); - ok($NInf < 0, "negative infinity"); + cmp_ok($PInf, '>', 0, "positive infinity"); + cmp_ok($NInf, '<', 0, "negative infinity"); + + cmp_ok($PInf, '>', $NInf, "positive > negative"); + cmp_ok($NInf, '==', -$PInf, "negative == -positive"); + cmp_ok(-$NInf, '==', $PInf, "--negative == positive"); is($PInf, "Inf", "$PInf value stringifies as Inf"); - is($NInf, "-Inf", "$PInf value stringifies as -Inf"); + is($NInf, "-Inf", "$NInf value stringifies as -Inf"); is(sprintf("%g", $PInf), "Inf", "$PInf sprintf %g is Inf"); is(sprintf("%a", $PInf), "Inf", "$PInf sprintf %a is Inf"); for my $i (@PInf) { - is($i + 0, $PInf, "$i is +Inf"); - ok($i > 0, "$i is positive"); + cmp_ok($i + 0 , '==', $PInf, "$i is +Inf"); + cmp_ok($i, '>', 0, "$i is positive"); is("@{[$i+0]}", "Inf", "$i value stringifies as Inf"); } for my $i (@NInf) { - is($i + 0, $NInf, "$i is -Inf"); - ok($i < 0, "$i is negative"); + cmp_ok($i + 0, '==', $NInf, "$i is -Inf"); + cmp_ok($i, '<', 0, "$i is negative"); is("@{[$i+0]}", "-Inf", "$i value stringifies as -Inf"); } @@ -62,7 +70,7 @@ SKIP: { is(1/$PInf, 0, "one per +Inf is zero"); is(1/$NInf, 0, "one per -Inf is zero"); - is(9**9**9, $PInf, "9**9**9 is +Inf"); + is(9**9**9, $PInf, "9**9**9 is Inf"); } SKIP: { @@ -72,20 +80,23 @@ SKIP: { $has_nan = 1; - ok($NaN != $NaN, "nan is not nan numerically"); - ok($NaN eq $NaN, "nan is nan stringifically"); + cmp_ok($NaN, '!=', $NaN, "NaN is NaN numerically (by not being NaN)"); + ok($NaN eq $NaN, "NaN is NaN stringifically"); is("$NaN", "NaN", "$NaN value stringies as NaN"); + is("+NaN" + 0, "NaN", "+NaN is NaN"); + is("-NaN" + 0, "NaN", "-NaN is NaN"); + is(sprintf("%g", $NaN), "NaN", "$NaN sprintf %g is NaN"); - is(sprintf("%a", $NaN), "NaN", "$NaN sprintf %a is Inf"); + is(sprintf("%a", $NaN), "NaN", "$NaN sprintf %a is NaN"); for my $i (@NaN) { - cmp_ok($i + 0, '!=', $i + 0, "$i is nan"); + cmp_ok($i + 0, '!=', $i + 0, "$i is NaN numerically (by not being NaN)"); is("@{[$i+0]}", "NaN", "$i value stringifies as NaN"); } - # is() okay with $NaN because eq is used. + # is() okay with $NaN because it uses eq. is($NaN * 0, $NaN, "NaN times zero is NaN"); is($NaN * 2, $NaN, "NaN times two is NaN"); @@ -94,12 +105,12 @@ SKIP: { SKIP: { unless ($has_inf && $has_nan) { - skip $infnan_tests, "no both inf and nan"; + skip $infnan_tests, "no both Inf and Nan"; } - # is() okay with $NaN because eq is used. - is($PInf * 0, $NaN, "inf times zero is nan"); - is($PInf * $NaN, $NaN, "inf times nan is nan"); - is($PInf + $NaN, $NaN, "inf plus nan is nan"); - is($PInf - $PInf, $NaN, "inf minus inf is nan"); + # is() okay with $NaN because it uses eq. + is($PInf * 0, $NaN, "Inf times zero is NaN"); + is($PInf * $NaN, $NaN, "Inf times NaN is NaN"); + is($PInf + $NaN, $NaN, "Inf plus NaN is NaN"); + is($PInf - $PInf, $NaN, "Inf minus inf is NaN"); }