X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/ce399ba62db9cda174a31da7c5310c71b8a9adc4..cfff97972660a620adf7d1f4616e4a60f0c7dd32:/pp_pack.c?ds=sidebyside diff --git a/pp_pack.c b/pp_pack.c index 6e11eb2..76e6315 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -26,7 +26,6 @@ * other pp*.c files for the rest of the pp_ functions. */ - #include "EXTERN.h" #define PERL_IN_PP_PACK_C #include "perl.h" @@ -62,7 +61,7 @@ typedef struct tempsym { (symptr)->grpend = NULL; \ (symptr)->code = 0; \ (symptr)->length = 0; \ - (symptr)->howlen = 0; \ + (symptr)->howlen = e_no_len; \ (symptr)->level = 0; \ (symptr)->flags = (f); \ (symptr)->strbeg = 0; \ @@ -381,7 +380,7 @@ STATIC const packprops_t packprops[512] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE, + /* C */ sizeof(unsigned char), #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) /* D */ LONG_DOUBLESIZE, #else @@ -532,7 +531,7 @@ STATIC const packprops_t packprops[512] = { /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE, + /* C */ sizeof(unsigned char), #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) /* D */ LONG_DOUBLESIZE, #else @@ -702,34 +701,27 @@ next_uni_uu(pTHX_ const char **s, const char *end, I32 *out) return TRUE; } -STATIC void -bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) { - U8 buffer[UTF8_MAXLEN]; +STATIC char * +S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) { const U8 * const end = start + len; - char *d = *dest; + while (start < end) { - const int length = - uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer; - switch(length) { - case 1: - *d++ = buffer[0]; - break; - case 2: - *d++ = buffer[0]; - *d++ = buffer[1]; - break; - default: - Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes", - *start, length); + const UV uv = NATIVE_TO_ASCII(*start); + if (UNI_IS_INVARIANT(uv)) + *dest++ = (char)(U8)UTF_TO_NATIVE(uv); + else { + *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv); + *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv); } start++; } - *dest = d; + return dest; } #define PUSH_BYTES(utf8, cur, buf, len) \ STMT_START { \ - if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \ + if (utf8) \ + (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \ else { \ Copy(buf, cur, len, char); \ (cur) += (len); \ @@ -764,7 +756,7 @@ STMT_START { \ STMT_START { \ if (utf8) { \ const U8 au8 = (byte); \ - bytes_to_uni(aTHX_ &au8, 1, &(s)); \ + (s) = bytes_to_uni(&au8, 1, (s)); \ } else *(U8 *)(s)++ = (byte); \ } STMT_END @@ -783,7 +775,7 @@ STMT_START { \ static const char *_action( const tempsym_t* symptr ) { - return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"; + return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"); } /* Returns the sizeof() struct described by pat */ @@ -1186,8 +1178,7 @@ Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, cons return unpack_rec(&sym, s, s, strend, NULL ); } -STATIC -I32 +STATIC I32 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s ) { dVAR; dSP; @@ -1570,10 +1561,29 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c XPUSHs(sv); break; } + case 'C': + if (len == 0) { + if (explicit_length) + /* Switch to "character" mode */ + utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; + break; + } + /* FALL THROUGH */ case 'c': - while (len-- > 0) { - int aint = SHIFT_BYTE(utf8, s, strend, datumtype); - if (aint >= 128) /* fake up signed chars */ + while (len-- > 0 && s < strend) { + int aint; + if (utf8) + { + STRLEN retlen; + aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, + ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); + if (retlen == (STRLEN) -1 || retlen == 0) + Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); + s += retlen; + } + else + aint = *(U8 *)(s)++; + if (aint >= 128 && datumtype != 'C') /* fake up signed chars */ aint -= 256; if (!checksum) PUSHs(sv_2mortal(newSViv((IV)aint))); @@ -1583,18 +1593,9 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c cuv += aint; } break; - case 'C': case 'W': W_checksum: - if (len == 0) { - if (explicit_length && datumtype == 'C') - /* Switch to "character" mode */ - utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; - break; - } - if (datumtype == 'C' ? - (symptr->flags & FLAG_DO_UTF8) && - !(symptr->flags & FLAG_WAS_UTF8) : utf8) { + if (utf8) { while (len-- > 0 && s < strend) { STRLEN retlen; const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, @@ -1978,7 +1979,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (++bytes >= sizeof(UV)) { /* promote to string */ const char *t; - sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv); + sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv); while (s < strend) { ch = SHIFT_BYTE(utf8, s, strend, datumtype); sv = mul128(sv, (U8)(ch & 0x7f)); @@ -2090,22 +2091,6 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c break; #endif case 'u': - /* MKS: - * Initialise the decode mapping. By using a table driven - * algorithm, the code will be character-set independent - * (and just as fast as doing character arithmetic) - */ - if (PL_uudmap['M'] == 0) { - size_t i; - - for (i = 0; i < sizeof(PL_uuemap); ++i) - PL_uudmap[(U8)PL_uuemap[i]] = i; - /* - * Because ' ' and '`' map to the same value, - * we need to decode them both the same. - */ - PL_uudmap[' '] = 0; - } { const STRLEN l = (STRLEN) (strend - s) * 3 / 4; sv = sv_2mortal(newSV(l)); @@ -2114,9 +2099,8 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (utf8) { while (next_uni_uu(aTHX_ &s, strend, &len)) { I32 a, b, c, d; - char hunk[4]; + char hunk[3]; - hunk[3] = '\0'; while (len > 0) { next_uni_uu(aTHX_ &s, strend, &a); next_uni_uu(aTHX_ &s, strend, &b); @@ -2143,9 +2127,8 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c } else { while (s < strend && *s > ' ' && ISUUCHAR(*s)) { I32 a, b, c, d; - char hunk[4]; + char hunk[3]; - hunk[3] = '\0'; len = PL_uudmap[*(U8*)s++] & 077; while (len > 0) { if (s < strend && ISUUCHAR(*s)) @@ -2359,28 +2342,6 @@ S_div128(pTHX_ SV *pnum, bool *done) } /* -=for apidoc pack_cat - -The engine implementing pack() Perl function. Note: parameters next_in_list and -flags are not used. This call should not be used; use packlist instead. - -=cut -*/ - -void -Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags) -{ - tempsym_t sym; - PERL_UNUSED_ARG(next_in_list); - PERL_UNUSED_ARG(flags); - - TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); - - (void)pack_rec( cat, &sym, beglist, endlist ); -} - - -/* =for apidoc packlist The engine implementing pack() Perl function. @@ -2392,14 +2353,13 @@ void Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist ) { dVAR; - STRLEN no_len; tempsym_t sym; TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); /* We're going to do changes through SvPVX(cat). Make sure it's valid. Also make sure any UTF8 flag is loaded */ - SvPV_force(cat, no_len); + SvPV_force_nolen(cat); if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8; @@ -2660,6 +2620,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (savsym.howlen == e_star && beglist == endlist) break; /* No way to continue */ } + items = endlist - beglist; lookahead.flags = symptr->flags & ~group_modifiers; goto no_change; } @@ -2978,7 +2939,6 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; break; } - GROWING(0, cat, start, cur, len); while (len-- > 0) { IV aiv; fromstr = NEXTFROM; @@ -2987,12 +2947,12 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) ckWARN(WARN_PACK)) Perl_warner(aTHX_ packWARN(WARN_PACK), "Character in 'C' format wrapped in pack"); - *cur++ = (char)(aiv & 0xff); + PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); } break; case 'W': { char *end; - U8 in_bytes = IN_BYTES; + U8 in_bytes = (U8)IN_BYTES; end = start+SvLEN(cat)-1; if (utf8) end -= UTF8_MAXLEN-1; @@ -3075,7 +3035,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) len+(endb-buffer)*UTF8_EXPAND); end = start+SvLEN(cat); } - bytes_to_uni(aTHX_ buffer, endb-buffer, &cur); + cur = bytes_to_uni(buffer, endb-buffer, cur); } else { if (cur >= end) { *cur = '\0';