X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/0bd48802b00657d8da2da444a1c97fb25cb7d562..01c6f5f43bced0924a07064299094b1a8851d0a1:/pp_pack.c diff --git a/pp_pack.c b/pp_pack.c index 093e601..51b42d9 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -1,7 +1,7 @@ /* pp_pack.c * * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others + * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -62,7 +62,7 @@ typedef struct tempsym { (symptr)->grpend = NULL; \ (symptr)->code = 0; \ (symptr)->length = 0; \ - (symptr)->howlen = 0; \ + (symptr)->howlen = e_no_len; \ (symptr)->level = 0; \ (symptr)->flags = (f); \ (symptr)->strbeg = 0; \ @@ -179,7 +179,7 @@ S_mul128(pTHX_ SV *sv, U8 m) char *t; if (!strnEQ(s, "0000", 4)) { /* need to grow sv */ - SV * const tmpNew = newSVpvn("0000000000", 10); + SV * const tmpNew = newSVpvs("0000000000"); sv_catsv(tmpNew, sv); SvREFCNT_dec(sv); /* free old sv */ @@ -322,6 +322,11 @@ S_mul128(pTHX_ SV *sv, U8 m) # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void) # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char) # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char) +# elif PTRSIZE == IVSIZE +# define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void) +# define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void) +# define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char) +# define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char) # else # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer) # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer) @@ -684,6 +689,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len STATIC bool next_uni_uu(pTHX_ const char **s, const char *end, I32 *out) { + dVAR; STRLEN retlen; const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY); if (val >= 0x100 || !ISUUCHAR(val) || @@ -696,34 +702,27 @@ next_uni_uu(pTHX_ const char **s, const char *end, I32 *out) return TRUE; } -STATIC void -bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) { - U8 buffer[UTF8_MAXLEN]; +STATIC char * +S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) { const U8 * const end = start + len; - char *d = *dest; + while (start < end) { - const int length = - uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer; - switch(length) { - case 1: - *d++ = buffer[0]; - break; - case 2: - *d++ = buffer[0]; - *d++ = buffer[1]; - break; - default: - Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes", - *start, length); + const UV uv = NATIVE_TO_ASCII(*start); + if (UNI_IS_INVARIANT(uv)) + *dest++ = (char)(U8)UTF_TO_NATIVE(uv); + else { + *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv); + *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv); } start++; } - *dest = d; + return dest; } #define PUSH_BYTES(utf8, cur, buf, len) \ STMT_START { \ - if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \ + if (utf8) \ + (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \ else { \ Copy(buf, cur, len, char); \ (cur) += (len); \ @@ -758,7 +757,7 @@ STMT_START { \ STMT_START { \ if (utf8) { \ const U8 au8 = (byte); \ - bytes_to_uni(aTHX_ &au8, 1, &(s)); \ + (s) = bytes_to_uni(&au8, 1, (s)); \ } else *(U8 *)(s)++ = (byte); \ } STMT_END @@ -777,7 +776,7 @@ STMT_START { \ static const char *_action( const tempsym_t* symptr ) { - return ( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"; + return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"); } /* Returns the sizeof() struct described by pat */ @@ -1148,41 +1147,6 @@ first_symbol(const char *pat, const char *patend) { } /* -=for apidoc unpack_str - -The engine implementing unpack() Perl function. Note: parameters strbeg, new_s -and ocnt are not used. This call should not be used, use unpackstring instead. - -=cut */ - -I32 -Perl_unpack_str(pTHX_ const char *pat, const char *patend, const char *s, const char *strbeg, const char *strend, char **new_s, I32 ocnt, U32 flags) -{ - tempsym_t sym; - PERL_UNUSED_ARG(strbeg); - PERL_UNUSED_ARG(new_s); - PERL_UNUSED_ARG(ocnt); - - if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8; - else if (need_utf8(pat, patend)) { - /* We probably should try to avoid this in case a scalar context call - wouldn't get to the "U0" */ - STRLEN len = strend - s; - s = (char *) bytes_to_utf8((U8 *) s, &len); - SAVEFREEPV(s); - strend = s + len; - flags |= FLAG_DO_UTF8; - } - - if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8)) - flags |= FLAG_PARSE_UTF8; - - TEMPSYM_INIT(&sym, pat, patend, flags); - - return unpack_rec(&sym, s, s, strend, NULL ); -} - -/* =for apidoc unpackstring The engine implementing unpack() Perl function. C puts the @@ -1531,7 +1495,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c break; } - sv = sv_2mortal(NEWSV(35, len ? len : 1)); + sv = sv_2mortal(newSV(len ? len : 1)); SvPOK_on(sv); str = SvPVX(sv); if (datumtype == 'b') { @@ -1568,7 +1532,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c /* Preliminary length estimate, acceptable for utf8 too */ if (howlen == e_star || len > (strend - s) * 2) len = (strend - s) * 2; - sv = sv_2mortal(NEWSV(35, len ? len : 1)); + sv = sv_2mortal(newSV(len ? len : 1)); SvPOK_on(sv); str = SvPVX(sv); if (datumtype == 'h') { @@ -2007,7 +1971,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c if (++bytes >= sizeof(UV)) { /* promote to string */ const char *t; - sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv); + sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv); while (s < strend) { ch = SHIFT_BYTE(utf8, s, strend, datumtype); sv = mul128(sv, (U8)(ch & 0x7f)); @@ -2124,7 +2088,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c * algorithm, the code will be character-set independent * (and just as fast as doing character arithmetic) */ - if (PL_uudmap['M'] == 0) { + if (PL_uudmap[(U8)'M'] == 0) { size_t i; for (i = 0; i < sizeof(PL_uuemap); ++i) @@ -2133,11 +2097,11 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c * Because ' ' and '`' map to the same value, * we need to decode them both the same. */ - PL_uudmap[' '] = 0; + PL_uudmap[(U8)' '] = 0; } { const STRLEN l = (STRLEN) (strend - s) * 3 / 4; - sv = sv_2mortal(NEWSV(42, l)); + sv = sv_2mortal(newSV(l)); if (l) SvPOK_on(sv); } if (utf8) { @@ -2269,6 +2233,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c PP(pp_unpack) { + dVAR; dSP; dPOPPOPssrl; I32 gimme = GIMME_V; @@ -2387,28 +2352,6 @@ S_div128(pTHX_ SV *pnum, bool *done) } /* -=for apidoc pack_cat - -The engine implementing pack() Perl function. Note: parameters next_in_list and -flags are not used. This call should not be used; use packlist instead. - -=cut -*/ - -void -Perl_pack_cat(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags) -{ - tempsym_t sym; - PERL_UNUSED_ARG(next_in_list); - PERL_UNUSED_ARG(flags); - - TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); - - (void)pack_rec( cat, &sym, beglist, endlist ); -} - - -/* =for apidoc packlist The engine implementing pack() Perl function. @@ -2419,14 +2362,14 @@ The engine implementing pack() Perl function. void Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist ) { - STRLEN no_len; + dVAR; tempsym_t sym; TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); /* We're going to do changes through SvPVX(cat). Make sure it's valid. Also make sure any UTF8 flag is loaded */ - SvPV_force(cat, no_len); + SvPV_force_nolen(cat); if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8; @@ -2515,6 +2458,7 @@ STATIC SV ** S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) { + dVAR; tempsym_t lookahead; I32 items = endlist - beglist; bool found = next_symbol(symptr); @@ -2532,7 +2476,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) SV *fromstr; STRLEN fromlen; I32 len; - SV *lengthcode = Nullsv; + SV *lengthcode = NULL; I32 datumtype = symptr->code; howlen_t howlen = symptr->howlen; char *start = SvPVX(cat); @@ -2570,9 +2514,20 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (strchr("aAZ", lookahead.code)) { if (lookahead.howlen == e_number) count = lookahead.length; else { - if (items > 0) + if (items > 0) { + if (SvGAMAGIC(*beglist)) { + /* Avoid reading the active data more than once + by copying it to a temporary. */ + STRLEN len; + const char *const pv = SvPV_const(*beglist, len); + SV *const temp = sv_2mortal(newSVpvn(pv, len)); + if (SvUTF8(*beglist)) + SvUTF8_on(temp); + *beglist = temp; + } count = DO_UTF8(*beglist) ? sv_len_utf8(*beglist) : sv_len(*beglist); + } else count = 0; if (lookahead.code == 'Z') count++; } @@ -2675,6 +2630,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (savsym.howlen == e_star && beglist == endlist) break; /* No way to continue */ } + items = endlist - beglist; lookahead.flags = symptr->flags & ~group_modifiers; goto no_change; } @@ -2859,7 +2815,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (datumtype == 'B') while (l++ < len) { if (utf8_source) { - UV val; + UV val = 0; NEXT_UNI_VAL(val, cur, str, end, utf8_flags); bits |= val & 1; } else bits |= *str++ & 1; @@ -2873,7 +2829,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) /* datumtype == 'b' */ while (l++ < len) { if (utf8_source) { - UV val; + UV val = 0; NEXT_UNI_VAL(val, cur, str, end, utf8_flags); if (val & 1) bits |= 0x80; } else if (*str++ & 1) @@ -2928,7 +2884,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) if (datumtype == 'H') while (l++ < len) { if (utf8_source) { - UV val; + UV val = 0; NEXT_UNI_VAL(val, cur, str, end, utf8_flags); if (val < 256 && isALPHA(val)) bits |= (val + 9) & 0xf; @@ -2947,7 +2903,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) else while (l++ < len) { if (utf8_source) { - UV val; + UV val = 0; NEXT_UNI_VAL(val, cur, str, end, utf8_flags); if (val < 256 && isALPHA(val)) bits |= ((val + 9) & 0xf) << 4; @@ -3007,7 +2963,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) break; case 'W': { char *end; - U8 in_bytes = IN_BYTES; + U8 in_bytes = (U8)IN_BYTES; end = start+SvLEN(cat)-1; if (utf8) end -= UTF8_MAXLEN-1; @@ -3090,7 +3046,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) len+(endb-buffer)*UTF8_EXPAND); end = start+SvLEN(cat); } - bytes_to_uni(aTHX_ buffer, endb-buffer, &cur); + cur = bytes_to_uni(buffer, endb-buffer, cur); } else { if (cur >= end) { *cur = '\0'; @@ -3599,7 +3555,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) PP(pp_pack) { - dSP; dMARK; dORIGMARK; dTARGET; + dVAR; dSP; dMARK; dORIGMARK; dTARGET; register SV *cat = TARG; STRLEN fromlen; SV *pat_sv = *++MARK;