X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/e68aed92f594456b596d9b3d8c0fba00406fe76d..05732f970020f06b089f24626fd76d527286eea9:/pp_pack.c diff --git a/pp_pack.c b/pp_pack.c index 60462eb..cf1074e 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -142,7 +142,7 @@ typedef union { # error "Unsupported byteorder" /* Need to add code here to re-instate mixed endian support. NEEDS_SWAP would need to hold a flag indicating which action to - take, and S_reverse_copy and the code in uni_to_bytes would need + take, and S_reverse_copy and the code in S_utf8_to_bytes would need logic adding to deal with any mixed-endian transformations needed. */ #endif @@ -151,7 +151,7 @@ typedef union { #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \ STMT_START { \ if (UNLIKELY(utf8)) { \ - if (!uni_to_bytes(aTHX_ &s, strend, \ + if (!S_utf8_to_bytes(aTHX_ &s, strend, \ (char *) (buf), len, datumtype)) break; \ } else { \ if (UNLIKELY(needs_swap)) \ @@ -216,16 +216,8 @@ S_mul128(pTHX_ SV *sv, U8 m) /* Explosives and implosives. */ -#if 'I' == 73 && 'J' == 74 -/* On an ASCII/ISO kind of system */ -#define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a') -#else -/* - Some other sort of character set - use memchr() so we don't match - the null byte. - */ -#define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') -#endif +#define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \ + && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a')) /* type modifiers */ #define TYPE_IS_SHRIEKING 0x100 @@ -256,7 +248,7 @@ S_reverse_copy(const char *src, char *dest, STRLEN len) } STATIC U8 -uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) +utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) { STRLEN retlen; UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, @@ -278,11 +270,11 @@ uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) } #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \ - uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \ + utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \ *(U8 *)(s)++) STATIC bool -uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype) +S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype) { UV val; STRLEN retlen; @@ -334,24 +326,9 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len return TRUE; } -STATIC bool -next_uni_uu(pTHX_ const char **s, const char *end, I32 *out) -{ - STRLEN retlen; - const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY); - if (val >= 0x100 || !ISUUCHAR(val) || - retlen == (STRLEN) -1 || retlen == 0) { - *out = 0; - return FALSE; - } - *out = PL_uudmap[val] & 077; - *s += retlen; - return TRUE; -} - STATIC char * -S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { - PERL_ARGS_ASSERT_BYTES_TO_UNI; +S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { + PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8; if (UNLIKELY(needs_swap)) { const U8 *p = start + len; @@ -371,7 +348,7 @@ S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \ STMT_START { \ if (UNLIKELY(utf8)) \ - (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap); \ + (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \ else { \ if (UNLIKELY(needs_swap)) \ S_reverse_copy((char *)(buf), cur, len); \ @@ -409,7 +386,7 @@ STMT_START { \ STMT_START { \ if (utf8) { \ const U8 au8 = (byte); \ - (s) = S_bytes_to_uni(&au8, 1, (s), 0); \ + (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\ } else *(U8 *)(s)++ = (byte); \ } STMT_END @@ -1128,7 +1105,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (checksum) { if (utf8) while (len >= 8 && s < strend) { - cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)]; + cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)]; len -= 8; } else @@ -1163,7 +1140,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (len & 7) bits >>= 1; else if (utf8) { if (s >= strend) break; - bits = uni_to_byte(aTHX_ &s, strend, datumtype); + bits = utf8_to_byte(aTHX_ &s, strend, datumtype); } else bits = *(U8 *) s++; *str++ = bits & 1 ? '1' : '0'; } @@ -1174,7 +1151,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (len & 7) bits <<= 1; else if (utf8) { if (s >= strend) break; - bits = uni_to_byte(aTHX_ &s, strend, datumtype); + bits = utf8_to_byte(aTHX_ &s, strend, datumtype); } else bits = *(U8 *) s++; *str++ = bits & 0x80 ? '1' : '0'; } @@ -1202,7 +1179,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (len & 1) bits >>= 4; else if (utf8) { if (s >= strend) break; - bits = uni_to_byte(aTHX_ &s, strend, datumtype); + bits = utf8_to_byte(aTHX_ &s, strend, datumtype); } else bits = * (U8 *) s++; if (!checksum) *str++ = PL_hexdigit[bits & 15]; @@ -1214,7 +1191,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (len & 1) bits <<= 4; else if (utf8) { if (s >= strend) break; - bits = uni_to_byte(aTHX_ &s, strend, datumtype); + bits = utf8_to_byte(aTHX_ &s, strend, datumtype); } else bits = *(U8 *) s++; if (!checksum) *str++ = PL_hexdigit[(bits >> 4) & 15]; @@ -1312,11 +1289,11 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c STRLEN len; /* Bug: warns about bad utf8 even if we are short on bytes and will break out of the loop */ - if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1, + if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1, 'U')) break; len = UTF8SKIP(result); - if (!uni_to_bytes(aTHX_ &ptr, strend, + if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) &result[1], len-1, 'U')) break; auv = NATIVE_TO_UNI(utf8n_to_uvchr(result, len, @@ -1729,72 +1706,48 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c sv = sv_2mortal(newSV(l)); if (l) SvPOK_on(sv); } - if (utf8) { - while (next_uni_uu(aTHX_ &s, strend, &len)) { - I32 a, b, c, d; - char hunk[3]; - - while (len > 0) { - next_uni_uu(aTHX_ &s, strend, &a); - next_uni_uu(aTHX_ &s, strend, &b); - next_uni_uu(aTHX_ &s, strend, &c); - next_uni_uu(aTHX_ &s, strend, &d); - hunk[0] = (char)((a << 2) | (b >> 4)); - hunk[1] = (char)((b << 4) | (c >> 2)); - hunk[2] = (char)((c << 6) | d); - if (!checksum) - sv_catpvn(sv, hunk, (len > 3) ? 3 : len); - len -= 3; - } - if (s < strend) { - if (*s == '\n') { - s++; - } - else { - /* possible checksum byte */ - const char *skip = s+UTF8SKIP(s); - if (skip < strend && *skip == '\n') - s = skip+1; - } - } - } - } else { - while (s < strend && *s > ' ' && ISUUCHAR(*s)) { - I32 a, b, c, d; - char hunk[3]; - - len = PL_uudmap[*(U8*)s++] & 077; - while (len > 0) { - if (s < strend && ISUUCHAR(*s)) - a = PL_uudmap[*(U8*)s++] & 077; - else - a = 0; - if (s < strend && ISUUCHAR(*s)) - b = PL_uudmap[*(U8*)s++] & 077; - else - b = 0; - if (s < strend && ISUUCHAR(*s)) - c = PL_uudmap[*(U8*)s++] & 077; - else - c = 0; - if (s < strend && ISUUCHAR(*s)) - d = PL_uudmap[*(U8*)s++] & 077; - else - d = 0; - hunk[0] = (char)((a << 2) | (b >> 4)); - hunk[1] = (char)((b << 4) | (c >> 2)); - hunk[2] = (char)((c << 6) | d); - if (!checksum) - sv_catpvn(sv, hunk, (len > 3) ? 3 : len); - len -= 3; - } - if (*s == '\n') - s++; - else /* possible checksum byte */ - if (s + 1 < strend && s[1] == '\n') - s += 2; - } - } + + /* Note that all legal uuencoded strings are ASCII printables, so + * have the same representation under UTF-8 vs not. This means we + * can ignore UTF8ness on legal input. For illegal we stop at the + * first failure, and don't report where/what that is, so again we + * can ignore UTF8ness */ + + while (s < strend && *s != ' ' && ISUUCHAR(*s)) { + I32 a, b, c, d; + char hunk[3]; + + len = PL_uudmap[*(U8*)s++] & 077; + while (len > 0) { + if (s < strend && ISUUCHAR(*s)) + a = PL_uudmap[*(U8*)s++] & 077; + else + a = 0; + if (s < strend && ISUUCHAR(*s)) + b = PL_uudmap[*(U8*)s++] & 077; + else + b = 0; + if (s < strend && ISUUCHAR(*s)) + c = PL_uudmap[*(U8*)s++] & 077; + else + c = 0; + if (s < strend && ISUUCHAR(*s)) + d = PL_uudmap[*(U8*)s++] & 077; + else + d = 0; + hunk[0] = (char)((a << 2) | (b >> 4)); + hunk[1] = (char)((b << 4) | (c >> 2)); + hunk[2] = (char)((c << 6) | d); + if (!checksum) + sv_catpvn(sv, hunk, (len > 3) ? 3 : len); + len -= 3; + } + if (*s == '\n') + s++; + else /* possible checksum byte */ + if (s + 1 < strend && s[1] == '\n') + s += 2; + } if (!checksum) XPUSHs(sv); break; @@ -2141,7 +2094,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) char *cur = start + SvCUR(cat); bool needs_swap; -#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no) +#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no) #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no) switch (howlen) { @@ -2387,7 +2340,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (datumtype == 'Z') len++; } GROWING(0, cat, start, cur, len); - if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen, + if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen, datumtype | TYPE_IS_PACK)) Perl_croak(aTHX_ "panic: predicted utf8 length not available, " "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf, @@ -2687,7 +2640,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) len+(endb-buffer)*UTF8_EXPAND); end = start+SvLEN(cat); } - cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0); + cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0); } else { if (cur >= end) { *cur = '\0'; @@ -2719,6 +2672,11 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) afloat = -FLT_MAX; else afloat = (float)anv; # else +#if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) + if(Perl_isnan(anv)) + afloat = (float)NV_NAN; + else +#endif /* a simple cast to float is undefined if outside * the range of values that can be represented */ afloat = (float)(anv > FLT_MAX ? NV_INF : @@ -2756,6 +2714,12 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) #ifdef __GNUC__ /* to work round a gcc/x86 bug; don't use SvNV */ anv.nv = sv_2nv(fromstr); +# if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \ + && LONG_DOUBLESIZE > 10 + /* GCC sometimes overwrites the padding in the + assignment above */ + Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8); +# endif #else anv.nv = SvNV(fromstr); #endif @@ -2773,6 +2737,11 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) # ifdef __GNUC__ /* to work round a gcc/x86 bug; don't use SvNV */ aldouble.ld = (long double)sv_2nv(fromstr); +# if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10 + /* GCC sometimes overwrites the padding in the + assignment above */ + Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8); +# endif # else aldouble.ld = (long double)SvNV(fromstr); # endif @@ -3102,7 +3071,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) todo = fromlen; if (from_utf8) { char buffer[64]; - if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo, + if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo, 'u' | TYPE_IS_PACK)) { *cur = '\0'; SvCUR_set(cat, cur - start); @@ -3153,11 +3122,5 @@ PP(pp_pack) } /* - * Local variables: - * c-indentation-style: bsd - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * * ex: set ts=8 sts=4 sw=4 et: */