X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/bee81d3656aea4bc40e5dc8a9b070a57a2bfafe3..95609a20fdb72a97909c16d79689341efc52a3d7:/pp_pack.c diff --git a/pp_pack.c b/pp_pack.c index 70c8487..7033649 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -112,21 +112,17 @@ typedef union { /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack(). --jhi Feb 1999 */ -#if U16SIZE > SIZE16 || U32SIZE > SIZE32 -# if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ -# define OFF16(p) ((char*)(p)) -# define OFF32(p) ((char*)(p)) -# else -# if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ -# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16)) -# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32)) -# else - ++++ bad cray byte order -# endif -# endif -#else +#if U16SIZE <= SIZE16 && U32SIZE <= SIZE32 # define OFF16(p) ((char *) (p)) # define OFF32(p) ((char *) (p)) +#elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ +# define OFF16(p) ((char*)(p)) +# define OFF32(p) ((char*)(p)) +#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ +# define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16)) +# define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32)) +#else +# error "bad cray byte order" #endif #define PUSH16(utf8, cur, p, needs_swap) \ @@ -195,7 +191,7 @@ S_mul128(pTHX_ SV *sv, U8 m) PERL_ARGS_ASSERT_MUL128; - if (!strnEQ(s, "0000", 4)) { /* need to grow sv */ + if (! memBEGINs(s, len, "0000")) { /* need to grow sv */ SV * const tmpNew = newSVpvs("0000000000"); sv_catsv(tmpNew, sv); @@ -237,7 +233,7 @@ S_mul128(pTHX_ SV *sv, U8 m) #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */ #define PACK_SIZE_MASK 0x3F -#include "packsizetables.c" +#include "packsizetables.inc" static void S_reverse_copy(const char *src, char *dest, STRLEN len) @@ -251,12 +247,15 @@ STATIC U8 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) { STRLEN retlen; - UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, + UV val; + + if (*s >= end) { + goto croak; + } + val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); - /* We try to process malformed UTF-8 as much as possible (preferably with - warnings), but these two mean we make no progress in the string and - might enter an infinite loop */ - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) + croak: Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack", (int) TYPE_NO_MODIFIERS(datumtype)); if (val >= 0x100) { @@ -290,7 +289,7 @@ S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_ for (;buf_len > 0; buf_len--) { if (from >= end) return FALSE; val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags); - if (retlen == (STRLEN) -1 || retlen == 0) { + if (retlen == (STRLEN) -1) { from += UTF8SKIP(from); bad |= 1; } else from += retlen; @@ -327,8 +326,8 @@ S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_ } STATIC char * -S_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { - PERL_ARGS_ASSERT_BYTES_TO_UNI; +S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { + PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8; if (UNLIKELY(needs_swap)) { const U8 *p = start + len; @@ -348,7 +347,7 @@ S_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \ STMT_START { \ if (UNLIKELY(utf8)) \ - (cur) = S_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \ + (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \ else { \ if (UNLIKELY(needs_swap)) \ S_reverse_copy((char *)(buf), cur, len); \ @@ -386,7 +385,7 @@ STMT_START { \ STMT_START { \ if (utf8) { \ const U8 au8 = (byte); \ - (s) = S_bytes_to_utf8(&au8, 1, (s), 0); \ + (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\ } else *(U8 *)(s)++ = (byte); \ } STMT_END @@ -396,7 +395,7 @@ STMT_START { \ STRLEN retlen; \ if (str >= end) break; \ val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \ - if (retlen == (STRLEN) -1 || retlen == 0) { \ + if (retlen == (STRLEN) -1) { \ *cur = '\0'; \ Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \ } \ @@ -791,20 +790,20 @@ first_symbol(const char *pat, const char *patend) { =for apidoc unpackstring -The engine implementing the unpack() Perl function. +The engine implementing the C Perl function. -Using the template pat..patend, this function unpacks the string -s..strend into a number of mortal SVs, which it pushes onto the perl -argument (@_) stack (so you will need to issue a C before and +Using the template C, this function unpacks the string +C into a number of mortal SVs, which it pushes onto the perl +argument (C<@_>) stack (so you will need to issue a C before and C after the call to this function). It returns the number of pushed elements. -The strend and patend pointers should point to the byte following the last -character of each string. +The C and C pointers should point to the byte following the +last character of each string. Although this function returns its values on the perl argument stack, it doesn't take any parameters from that stack (and thus in particular -there's no need to do a PUSHMARK before calling it, unlike L for +there's no need to do a C before calling it, unlike L for example). =cut */ @@ -1073,9 +1072,14 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c /* 'A' strips both nulls and spaces */ const char *ptr; if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) { - for (ptr = s+len-1; ptr >= s; ptr--) - if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) && - !isSPACE_utf8(ptr)) break; + for (ptr = s+len-1; ptr >= s; ptr--) { + if ( *ptr != 0 + && !UTF8_IS_CONTINUATION(*ptr) + && !isSPACE_utf8_safe(ptr, strend)) + { + break; + } + } if (ptr >= s) ptr += UTF8SKIP(ptr); else ptr++; if (ptr > s+len) @@ -1220,7 +1224,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c STRLEN retlen; aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); s += retlen; } @@ -1243,7 +1247,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c STRLEN retlen; const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); s += retlen; if (!checksum) @@ -1305,7 +1309,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c strend - s, &retlen, UTF8_ALLOW_DEFAULT)); - if (retlen == (STRLEN) -1 || retlen == 0) + if (retlen == (STRLEN) -1) Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); s += retlen; } @@ -1573,7 +1577,8 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c U8 ch; ch = SHIFT_BYTE(utf8, s, strend, datumtype); auv = (auv << 7) | (ch & 0x7f); - /* UTF8_IS_XXXXX not right here - using constant 0x80 */ + /* UTF8_IS_XXXXX not right here because this is a BER, not + * UTF-8 format - using constant 0x80 */ if (ch < 0x80) { bytes = 0; mPUSHu(auv); @@ -1584,7 +1589,8 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (++bytes >= sizeof(UV)) { /* promote to string */ const char *t; - sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv); + sv = Perl_newSVpvf(aTHX_ "%.*" UVuf, + (int)TYPE_DIGITS(UV), auv); while (s < strend) { ch = SHIFT_BYTE(utf8, s, strend, datumtype); sv = mul128(sv, (U8)(ch & 0x7f)); @@ -1713,41 +1719,41 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c * first failure, and don't report where/what that is, so again we * can ignore UTF8ness */ - while (s < strend && *s != ' ' && ISUUCHAR(*s)) { - I32 a, b, c, d; - char hunk[3]; - - len = PL_uudmap[*(U8*)s++] & 077; - while (len > 0) { - if (s < strend && ISUUCHAR(*s)) - a = PL_uudmap[*(U8*)s++] & 077; - else - a = 0; - if (s < strend && ISUUCHAR(*s)) - b = PL_uudmap[*(U8*)s++] & 077; - else - b = 0; - if (s < strend && ISUUCHAR(*s)) - c = PL_uudmap[*(U8*)s++] & 077; - else - c = 0; - if (s < strend && ISUUCHAR(*s)) - d = PL_uudmap[*(U8*)s++] & 077; - else - d = 0; - hunk[0] = (char)((a << 2) | (b >> 4)); - hunk[1] = (char)((b << 4) | (c >> 2)); - hunk[2] = (char)((c << 6) | d); - if (!checksum) - sv_catpvn(sv, hunk, (len > 3) ? 3 : len); - len -= 3; - } - if (*s == '\n') - s++; - else /* possible checksum byte */ - if (s + 1 < strend && s[1] == '\n') - s += 2; - } + while (s < strend && *s != ' ' && ISUUCHAR(*s)) { + I32 a, b, c, d; + char hunk[3]; + + len = PL_uudmap[*(U8*)s++] & 077; + while (len > 0) { + if (s < strend && ISUUCHAR(*s)) + a = PL_uudmap[*(U8*)s++] & 077; + else + a = 0; + if (s < strend && ISUUCHAR(*s)) + b = PL_uudmap[*(U8*)s++] & 077; + else + b = 0; + if (s < strend && ISUUCHAR(*s)) + c = PL_uudmap[*(U8*)s++] & 077; + else + c = 0; + if (s < strend && ISUUCHAR(*s)) + d = PL_uudmap[*(U8*)s++] & 077; + else + d = 0; + hunk[0] = (char)((a << 2) | (b >> 4)); + hunk[1] = (char)((b << 4) | (c >> 2)); + hunk[2] = (char)((c << 6) | d); + if (!checksum) + sv_catpvn(sv, hunk, (len > 3) ? 3 : len); + len -= 3; + } + if (*s == '\n') + s++; + else /* possible checksum byte */ + if (s + 1 < strend && s[1] == '\n') + s += 2; + } if (!checksum) XPUSHs(sv); break; @@ -1766,7 +1772,18 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c } while (cdouble < 0.0) cdouble += anv; - cdouble = Perl_modf(cdouble / anv, &trouble) * anv; + cdouble = Perl_modf(cdouble / anv, &trouble); +#ifdef LONGDOUBLE_DOUBLEDOUBLE + /* Workaround for powerpc doubledouble modfl bug: + * close to 1.0L and -1.0L cdouble is 0, and trouble + * is cdouble / anv. */ + if (trouble != Perl_ceil(trouble)) { + cdouble = trouble; + if (cdouble > 1.0L) cdouble -= 1.0L; + if (cdouble < -1.0L) cdouble += 1.0L; + } +#endif + cdouble *= anv; sv = newSVnv(cdouble); } else { @@ -1814,7 +1831,7 @@ PP(pp_unpack) { dSP; dPOPPOPssrl; - I32 gimme = GIMME_V; + U8 gimme = GIMME_V; STRLEN llen; STRLEN rlen; const char *pat = SvPV_const(left, llen); @@ -1936,7 +1953,7 @@ S_div128(pTHX_ SV *pnum, bool *done) /* =for apidoc packlist -The engine implementing pack() Perl function. +The engine implementing C Perl function. =cut */ @@ -2051,9 +2068,9 @@ S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype) const I32 c = TYPE_NO_MODIFIERS(datumtype); const NV nv = SvNV_nomg(sv); if (c == 'w') - Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv); + Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv); else - Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c); + Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c); } return sv; } @@ -2094,7 +2111,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) char *cur = start + SvCUR(cat); bool needs_swap; -#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no) +#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no) #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no) switch (howlen) { @@ -2343,7 +2360,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen, datumtype | TYPE_IS_PACK)) Perl_croak(aTHX_ "panic: predicted utf8 length not available, " - "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf, + "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf, (int)datumtype, aptr, end, cur, (UV)fromlen); cur += fromlen; len -= fromlen; @@ -2476,7 +2493,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (howlen == e_star) len = fromlen; field_len = (len+1)/2; GROWING(utf8, cat, start, cur, field_len); - if (!utf8 && len > (I32)fromlen) len = fromlen; + if (!utf8_source && len > (I32)fromlen) len = fromlen; bits = 0; l = 0; if (datumtype == 'H') @@ -2569,17 +2586,14 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (in_bytes) auv = auv % 0x100; if (utf8) { W_utf8: - if (cur > end) { + if (cur >= end) { *cur = '\0'; SvCUR_set(cat, cur - start); GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvchr_to_utf8_flags((U8 *) cur, - auv, - warn_utf8 ? - 0 : UNICODE_ALLOW_ANY); + cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0); } else { if (auv >= 0x100) { if (!SvUTF8(cat)) { @@ -2630,9 +2644,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) auv = SvUV_no_inf(fromstr, datumtype); if (utf8) { U8 buffer[UTF8_MAXLEN], *endb; - endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), - warn_utf8 ? - 0 : UNICODE_ALLOW_ANY); + endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0); if (cur+(endb-buffer)*UTF8_EXPAND >= end) { *cur = '\0'; SvCUR_set(cat, cur - start); @@ -2640,7 +2652,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) len+(endb-buffer)*UTF8_EXPAND); end = start+SvLEN(cat); } - cur = S_bytes_to_utf8(buffer, endb-buffer, cur, 0); + cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0); } else { if (cur >= end) { *cur = '\0'; @@ -2648,9 +2660,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) GROWING(0, cat, start, cur, len+UTF8_MAXLEN); end = start+SvLEN(cat)-UTF8_MAXLEN; } - cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv), - warn_utf8 ? - 0 : UNICODE_ALLOW_ANY); + cur = (char *) uvchr_to_utf8_flags((U8 *) cur, + UNI_TO_NATIVE(auv), + 0); } } break; @@ -2662,7 +2674,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) NV anv; fromstr = NEXTFROM; anv = SvNV(fromstr); -# if defined(VMS) && !defined(_IEEE_FP) +# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT) /* IEEE fp overflow shenanigans are unavailable on VAX and optional * on Alpha; fake it if we don't have them. */ @@ -2672,10 +2684,17 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) afloat = -FLT_MAX; else afloat = (float)anv; # else +# if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + if(Perl_isnan(anv)) + afloat = (float)NV_NAN; + else +# endif +# ifdef NV_INF /* a simple cast to float is undefined if outside * the range of values that can be represented */ afloat = (float)(anv > FLT_MAX ? NV_INF : anv < -FLT_MAX ? -NV_INF : anv); +# endif # endif PUSH_VAR(utf8, cur, afloat, needs_swap); } @@ -2686,7 +2705,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) NV anv; fromstr = NEXTFROM; anv = SvNV(fromstr); -# if defined(VMS) && !defined(_IEEE_FP) +# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT) /* IEEE fp overflow shenanigans are unavailable on VAX and optional * on Alpha; fake it if we don't have them. */ @@ -2709,6 +2728,12 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) #ifdef __GNUC__ /* to work round a gcc/x86 bug; don't use SvNV */ anv.nv = sv_2nv(fromstr); +# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \ + && LONG_DOUBLESIZE > 10 + /* GCC sometimes overwrites the padding in the + assignment above */ + Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8); +# endif #else anv.nv = SvNV(fromstr); #endif @@ -2726,6 +2751,11 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) # ifdef __GNUC__ /* to work round a gcc/x86 bug; don't use SvNV */ aldouble.ld = (long double)sv_2nv(fromstr); +# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10 + /* GCC sometimes overwrites the padding in the + assignment above */ + Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8); +# endif # else aldouble.ld = (long double)SvNV(fromstr); # endif @@ -3012,7 +3042,8 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) * of pack() (and all copies of the result) are * gone. */ - if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) && + if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1) + || (SvPADTMP(fromstr) && !SvREADONLY(fromstr)))) { Perl_ck_warner(aTHX_ packWARN(WARN_PACK), "Attempt to pack pointer to temporary value"); @@ -3094,7 +3125,7 @@ PP(pp_pack) const char *patend = pat + fromlen; MARK++; - sv_setpvs(cat, ""); + SvPVCLEAR(cat); SvUTF8_off(cat); packlist(cat, pat, patend, MARK, SP + 1); @@ -3106,11 +3137,5 @@ PP(pp_pack) } /* - * Local variables: - * c-indentation-style: bsd - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * * ex: set ts=8 sts=4 sw=4 et: */