X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/c0de32e24cb35f30ec0d37406b19fcc377911df7..2bb3725dfe9edb3ec056dbc691826cda14d85a48:/sv.c diff --git a/sv.c b/sv.c index afd4376..92c34a8 100644 --- a/sv.c +++ b/sv.c @@ -10563,6 +10563,280 @@ Perl_sv_vcatpvfn(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, sv_vcatpvfn_flags(sv, pat, patlen, args, svargs, svmax, maybe_tainted, SV_GMAGIC|SV_SMAGIC); } +/* vhex will contain the values (0..15) of the hex digits ("nybbles" + * of 4 bits); 1 for the implicit 1, and at most 128 bits of mantissa, + * four bits per xdigit. */ +#define VHEX_SIZE (1+128/4) + +/* If we do not have a known long double format, (including not using + * long doubles, or long doubles being equal to doubles) then we will + * fall back to the ldexp/frexp route, with which we can retrieve at + * most as many bits as our widest unsigned integer type is. We try + * to get a 64-bit unsigned integer even if we are not having 64-bit + * UV. */ +#if defined(HAS_QUAD) && defined(Uquad_t) +# define MANTISSATYPE Uquad_t +# define MANTISSASIZE 8 +#else +# define MANTISSATYPE UV /* May lose precision if UVSIZE is not 8. */ +# define MANTISSASIZE UVSIZE +#endif + +/* S_hextract() is a helper for Perl_sv_vcatpvfn_flags, for extracting + * the hexadecimal values (for %a/%A). The nv is the NV where the value + * are being extracted from (either directly from the long double in-memory + * presentation, or from the uquad computed via frexp+ldexp). frexp also + * is used to update the exponent. vhex is the pointer to the beginning + * of the output buffer (of VHEX_SIZE). + * + * The tricky part is that S_hextract() needs to be called twice: + * the first time with vend as NULL, and the second time with vend as + * the pointer returned by the first call. What happens is that on + * the first round the output size is computed, and the intended + * extraction sanity checked. On the second round the actual output + * (the extraction of the hexadecimal values) takes place. + * Sanity failures cause fatal failures during both rounds. */ +STATIC U8* +S_hextract(pTHX_ const NV nv, int* exponent, U8* vhex, U8* vend) +{ + U8* v = vhex; + int ix; + int ixmin = 0, ixmax = 0; + + /* XXX Inf/NaN/denormal handling in the HEXTRACT_IMPLICIT_BIT, + * and elsewhere. */ + + /* These macros are just to reduce typos, they have multiple + * repetitions below, but usually only one (or sometimes two) + * of them is really being used. */ + /* HEXTRACT_OUTPUT() extracts the high nybble first. */ +#define HEXTRACT_OUTPUT_HI(ix) (*v++ = nvp[ix] >> 4) +#define HEXTRACT_OUTPUT_LO(ix) (*v++ = nvp[ix] & 0xF) +#define HEXTRACT_OUTPUT(ix) \ + STMT_START { \ + HEXTRACT_OUTPUT_HI(ix); \ + HEXTRACT_OUTPUT_LO(ix); \ + } STMT_END +#define HEXTRACT_COUNT(ix, c) \ + STMT_START { \ + v += c; \ + if (ix < ixmin) \ + ixmin = ix; \ + else if (ix > ixmax) \ + ixmax = ix; \ + } STMT_END +#define HEXTRACT_IMPLICIT_BIT() \ + if (exponent) { \ + if (vend) \ + *v++ = 1; \ + else \ + v++; \ + } + + /* First see if we are using long doubles. */ +#if NVSIZE > DOUBLESIZE && LONG_DOUBLEKIND != LONG_DOUBLE_IS_DOUBLE + const U8* nvp = (const U8*)(&nv); +# define HEXTRACTSIZE NVSIZE + (void)Perl_frexp(PERL_ABS(nv), exponent); +# if LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN + /* Used in e.g. VMS and HP-UX IA-64, e.g. -0.1L: + * 9a 99 99 99 99 99 99 99 99 99 99 99 99 99 fb 3f */ + /* The bytes 13..0 are the mantissa/fraction, + * the 15,14 are the sign+exponent. */ + HEXTRACT_IMPLICIT_BIT(); + for (ix = 13; ix >= 0; ix--) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } + *exponent -= 4; +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_BIG_ENDIAN + /* Used in e.g. Solaris Sparc and HP-UX PA-RISC, e.g. -0.1L: + * bf fb 99 99 99 99 99 99 99 99 99 99 99 99 99 9a */ + /* The bytes 2..15 are the mantissa/fraction, + * the 0,1 are the sign+exponent. */ + HEXTRACT_IMPLICIT_BIT(); + for (ix = 2; ix <= 15; ix++) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } + *exponent -= 4; +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN + /* x86 80-bit "extended precision", 64 bits of mantissa / fraction / + * significand, 15 bits of exponent, 1 bit of sign. NVSIZE can + * be either 12 (ILP32, Solaris x86) or 16 (LP64, Linux and OS X), + * meaning that 2 or 6 bytes are empty padding. */ + /* The bytes 7..0 are the mantissa/fraction */ + /* There explicitly is *no* implicit bit in this case. */ + for (ix = 7; ix >= 0; ix--) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } + *exponent -= 4; +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_BIG_ENDIAN + /* The last 8 bytes are the mantissa/fraction. + * (does this format ever happen?) */ + /* There explicitly is *no* implicit bit in this case. */ + for (ix = LONGDBLSIZE - 8; ix < LONGDBLSIZE; ix++) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } + *exponent -= 4; +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN + /* Where is this used? + * 9a 99 99 99 99 99 59 bc 9a 99 99 99 99 99 b9 3f */ + HEXTRACT_IMPLICIT_BIT(); + if (vend) + HEXTRACT_OUTPUT_LO(14); + else + HEXTRACT_COUNT(14, 1); + for (ix = 13; ix >= 8; ix--) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } + /* XXX not extracting from the second double -- see the discussion + * below for the big endian double double. */ +# if 0 + if (vend) + HEXTRACT_OUTPUT_LO(6); + else + HEXTRACT_COUNT(6, 1); + for (ix = 5; ix >= 0; ix--) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } +# endif + (*exponent)--; +# elif LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BIG_ENDIAN + /* Used in e.g. PPC/Power (AIX) and MIPS. + * + * The mantissa bits are in two separate stretches, e.g. for -0.1L: + * 3f b9 99 99 99 99 99 9a bc 59 99 99 99 99 99 9a + */ + HEXTRACT_IMPLICIT_BIT(); + if (vend) + HEXTRACT_OUTPUT_LO(1); + else + HEXTRACT_COUNT(1, 1); + for (ix = 2; ix < 8; ix++) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } + /* XXX not extracting the second double mantissa bits- this is not + * right nor ideal (we effectively reduce the output format to + * that of a "single double", only 53 bits), but we do not know + * exactly how to do the extraction correctly so that it matches + * the semantics of, say, the IEEE quadruple float. */ +# if 0 + if (vend) + HEXTRACT_OUTPUT_LO(9); + else + HEXTRACT_COUNT(9, 1); + for (ix = 10; ix < 16; ix++) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } +# endif + (*exponent)--; +# else + Perl_croak(aTHX_ + "Hexadecimal float: unsupported long double format"); +# endif +#else + /* If not using long doubles (or if the long double format is + * known but not yet supported), try to retrieve the mantissa bits + * via frexp+ldexp. */ + + NV norm = Perl_frexp(PERL_ABS(nv), exponent); + /* Theoretically we have all the bytes [0, MANTISSASIZE-1] to + * inspect; but in practice we don't want the leading nybbles that + * are zero. With the common IEEE 754 value for NV_MANT_DIG being + * 53, we want the limit byte to be (int)((53-1)/8) == 6. + * + * Note that this is _not_ inspecting the in-memory format of the + * nv (as opposed to the long double method), but instead the UV + * retrieved with the frexp+ldexp invocation. */ +# if MANTISSASIZE * 8 > NV_MANT_DIG + MANTISSATYPE mantissa = Perl_ldexp(norm, NV_MANT_DIG); + int limit_byte = (NV_MANT_DIG - 1) / 8; +# else + /* There will be low-order precision loss. Try to salvage as many + * bits as possible. Will truncate, not round. */ + MANTISSATYPE mantissa = + Perl_ldexp(norm, + /* The highest possible shift by two that fits in the + * mantissa and is aligned (by four) the same was as + * NV_MANT_DIG. */ + MANTISSASIZE * 8 - (4 - NV_MANT_DIG % 4)); + int limit_byte = MANTISSASIZE - 1; +# endif + const U8* nvp = (const U8*)(&mantissa); +# define HEXTRACTSIZE MANTISSASIZE + /* We make here the wild assumption that the endianness of doubles + * is similar to the endianness of integers, and that there is no + * middle-endianness. This may come back to haunt us (the rumor + * has it that ARM can be quite haunted). + * + * We generate 4-bit xdigits (nybble/nibble) instead of 8-bit + * bytes, since we might need to handle printf precision, and also + * insert the radix. + */ +# if BYTEORDER == 0x12345678 || BYTEORDER == 0x1234 || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_IEEE_754_128_BIT_LITTLE_ENDIAN || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_X86_80_BIT_LITTLE_ENDIAN || \ + LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_LITTLE_ENDIAN + /* Little endian. */ + for (ix = limit_byte; ix >= 0; ix--) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } +# else + /* Big endian. */ + for (ix = MANTISSASIZE - 1 - limit_byte; ix < MANTISSASIZE; ix++) { + if (vend) + HEXTRACT_OUTPUT(ix); + else + HEXTRACT_COUNT(ix, 2); + } +# endif + /* If there are not enough bits in MANTISSATYPE, we couldn't get + * all of them, issue a warning. + * + * Note that NV_PRESERVES_UV_BITS would not help here, it is the + * wrong way around. */ +# if NV_MANT_DIG > MANTISSASIZE * 8 + Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Hexadecimal float: precision loss"); +# endif +#endif + /* Croak for various reasons: if the output pointer escaped the + * output buffer, if the extraction index escaped the extraction + * buffer, or if the ending output pointer didn't match the + * previously computed value. */ + if (v <= vhex || v - vhex >= VHEX_SIZE || + ixmin < 0 || ixmax >= HEXTRACTSIZE || + (vend && v != vend)) + Perl_croak(aTHX_ "Hexadecimal float: internal error"); + return v; +} + void Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN patlen, va_list *const args, SV **const svargs, const I32 svmax, bool *const maybe_tainted, @@ -10585,6 +10859,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* large enough for "%#.#f" --chip */ /* what about long double NVs? --jhi */ bool no_redundant_warning = FALSE; /* did we use any explicit format parameter index? */ + bool hexfp = FALSE; DECLARATION_FOR_STORE_LC_NUMERIC_SET_TO_NEEDED; @@ -11376,6 +11651,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p case 'e': case 'E': case 'f': case 'g': case 'G': + case 'a': case 'A': if (vectorize) goto unknown; @@ -11428,14 +11704,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p /* nv * 0 will be NaN for NaN, +Inf and -Inf, and 0 for anything else. frexp() has some unspecified behaviour for those three */ if (c != 'e' && c != 'E' && (nv * 0) == 0) { - i = PERL_INT_MIN; - /* FIXME: if HAS_LONG_DOUBLE but not USE_LONG_DOUBLE this - will cast our (long double) to (double) */ - (void)Perl_frexp(nv, &i); - if (i == PERL_INT_MIN) - Perl_die(aTHX_ "panic: frexp"); - if (i > 0) - need = BIT_DIGITS(i); + i = PERL_INT_MIN; + /* FIXME: if HAS_LONG_DOUBLE but not USE_LONG_DOUBLE this + will cast our (long double) to (double) */ + (void)Perl_frexp(nv, &i); + if (i == PERL_INT_MIN) + Perl_die(aTHX_ "panic: frexp"); + hexfp = (c == 'a' || c == 'A'); + if (UNLIKELY(hexfp)) { + /* Hexadecimal floating point: this size + * computation probably overshoots, but that is + * better than undershooting. */ + need += + (nv < 0) + /* possible unary minus */ + 2 + /* "0x" */ + 1 + /* the very unlikely carry */ + 1 + /* "1" */ + 1 + /* "." */ + /* We want one byte per each 4 bits in the + * mantissa. This works out to about 0.83 + * bytes per NV decimal digit (of 4 bits): + * (NV_DIG * log(10)/log(2)) / 4, + * we overestimate by using 5/6 (0.8333...) */ + ((NV_DIG * 5) / 6 + 1) + + 2 + /* "p+" */ + (i >= 0 ? BIT_DIGITS(i) : 1 + BIT_DIGITS(-i)) + + 1; /* \0 */ +#ifdef USE_LOCALE_NUMERIC + STORE_LC_NUMERIC_SET_TO_NEEDED(); + if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) + need += SvLEN(PL_numeric_radix_sv); + RESTORE_LC_NUMERIC(); +#endif + } + else if (i > 0) { + need = BIT_DIGITS(i); + } /* if i < 0, the number of digits is hard to predict. */ } need += has_precis ? precis : 6; /* known default */ @@ -11533,7 +11837,175 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p break; } } - { + + if (UNLIKELY(hexfp)) { + /* Hexadecimal floating point. */ + char* p = PL_efloatbuf; + U8 vhex[VHEX_SIZE]; + U8* v = vhex; /* working pointer to vhex */ + U8* vend; /* pointer to one beyond last digit of vhex */ + U8* vfnz = NULL; /* first non-zero */ + const bool lower = (c == 'a'); + /* At output the values of vhex (up to vend) will + * be mapped through the xdig to get the actual + * human-readable xdigits. */ + const char* xdig = PL_hexdigit; + int zerotail = 0; /* how many extra zeros to append */ + int exponent = 0; /* exponent of the floating point input */ + + /* XXX: denormals, NaN, Inf. + * + * For example with denormals, (assuming the vanilla + * 64-bit double): the exponent is zero. 1xp-1074 is + * the smallest denormal and the smallest double, it + * should be output as 0x0.0000000000001p-1022 to + * match its internal structure. */ + + vend = S_hextract(aTHX_ nv, &exponent, vhex, NULL); + S_hextract(aTHX_ nv, &exponent, vhex, vend); + + if (nv < 0) + *p++ = '-'; + else if (plus) + *p++ = plus; + *p++ = '0'; + if (lower) { + *p++ = 'x'; + } + else { + *p++ = 'X'; + xdig += 16; /* Use uppercase hex. */ + } + + /* Find the first non-zero xdigit. */ + for (v = vhex; v < vend; v++) { + if (*v) { + vfnz = v; + break; + } + } + + if (vfnz) { + U8* vlnz = NULL; /* The last non-zero. */ + + /* Find the last non-zero xdigit. */ + for (v = vend - 1; v >= vhex; v--) { + if (*v) { + vlnz = v; + break; + } + } + +#if NVSIZE == DOUBLESIZE + /* For long doubles S_hextract() took care of this. */ + exponent--; +#endif + + if (precis > 0) { + v = vhex + precis + 1; + if (v < vend) { + /* Round away from zero: if the tail + * beyond the precis xdigits is equal to + * or greater than 0x8000... */ + bool round = *v > 0x8; + if (!round && *v == 0x8) { + for (v++; v < vend; v++) { + if (*v) { + round = TRUE; + break; + } + } + } + if (round) { + for (v = vhex + precis; v >= vhex; v--) { + if (*v < 0xF) { + (*v)++; + break; + } + *v = 0; + if (v == vhex) { + /* If the carry goes all the way to + * the front, we need to output + * a single '1'. This goes against + * the "xdigit and then radix" + * but since this is "cannot happen" + * category, that is probably good. */ + *p++ = xdig[1]; + } + } + } + /* The new effective "last non zero". */ + vlnz = vhex + precis; + } + else { + zerotail = precis - (vlnz - vhex); + } + } + + v = vhex; + *p++ = xdig[*v++]; + + /* The radix is always output after the first + * non-zero xdigit, or if alt. */ + if (vfnz < vlnz || alt) { +#ifndef USE_LOCALE_NUMERIC + *p++ = '.'; +#else + STORE_LC_NUMERIC_SET_TO_NEEDED(); + if (PL_numeric_radix_sv && IN_LC(LC_NUMERIC)) { + STRLEN n; + const char* r = SvPV(PL_numeric_radix_sv, n); + Copy(r, p, n, char); + p += n; + } + else { + *p++ = '.'; + } + RESTORE_LC_NUMERIC(); +#endif + } + + while (v <= vlnz) + *p++ = xdig[*v++]; + + while (zerotail--) + *p++ = '0'; + } + else { + *p++ = '0'; + exponent = 0; + } + + elen = p - PL_efloatbuf; + elen += my_snprintf(p, PL_efloatsize - elen, + "%c%+d", lower ? 'p' : 'P', + exponent); + + if (elen < width) { + if (left) { + /* Pad the back with spaces. */ + memset(PL_efloatbuf + elen, ' ', width - elen); + } + else if (fill == '0') { + /* Insert the zeros between the "0x" and + * the digits, otherwise we end up with + * "0000xHHH..." */ + STRLEN nzero = width - elen; + char* zerox = PL_efloatbuf + 2; + Move(zerox, zerox + nzero, elen - 2, char); + memset(zerox, fill, nzero); + } + else { + /* Move it to the right. */ + Move(PL_efloatbuf, PL_efloatbuf + width - elen, + elen, char); + /* Pad the front with spaces. */ + memset(PL_efloatbuf, ' ', width - elen); + } + elen = width; + } + } + else { char *ptr = ebuf + sizeof ebuf; *--ptr = '\0'; *--ptr = c; @@ -11577,14 +12049,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p * that is safe to use, even though it's not literal */ GCC_DIAG_IGNORE(-Wformat-nonliteral); #if defined(HAS_LONG_DOUBLE) - elen = ((intsize == 'q') - ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, nv) - : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)nv)); + elen = ((intsize == 'q') + ? my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, nv) + : my_snprintf(PL_efloatbuf, PL_efloatsize, ptr, (double)nv)); #else - elen = my_sprintf(PL_efloatbuf, ptr, nv); + elen = my_sprintf(PL_efloatbuf, ptr, nv); #endif GCC_DIAG_RESTORE; } + float_converted: eptr = PL_efloatbuf;