This is a live mirror of the Perl 5 development currently hosted at
https://github.com/perl/perl5
https://perl5.git.perl.org
/
perl5.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Tests for the .pmc functionality.
[perl5.git]
/
pp_pack.c
diff --git
a/pp_pack.c
b/pp_pack.c
index
61af698
..
76e6315
100644
(file)
--- a/
pp_pack.c
+++ b/
pp_pack.c
@@
-26,7
+26,6
@@
* other pp*.c files for the rest of the pp_ functions.
*/
* other pp*.c files for the rest of the pp_ functions.
*/
-
#include "EXTERN.h"
#define PERL_IN_PP_PACK_C
#include "perl.h"
#include "EXTERN.h"
#define PERL_IN_PP_PACK_C
#include "perl.h"
@@
-62,7
+61,7
@@
typedef struct tempsym {
(symptr)->grpend = NULL; \
(symptr)->code = 0; \
(symptr)->length = 0; \
(symptr)->grpend = NULL; \
(symptr)->code = 0; \
(symptr)->length = 0; \
- (symptr)->howlen =
0;
\
+ (symptr)->howlen =
e_no_len;
\
(symptr)->level = 0; \
(symptr)->flags = (f); \
(symptr)->strbeg = 0; \
(symptr)->level = 0; \
(symptr)->flags = (f); \
(symptr)->strbeg = 0; \
@@
-381,7
+380,7
@@
STATIC const packprops_t packprops[512] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0,
- /* C */ sizeof(unsigned char)
| PACK_SIZE_UNPREDICTABLE
,
+ /* C */ sizeof(unsigned char),
#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
/* D */ LONG_DOUBLESIZE,
#else
#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
/* D */ LONG_DOUBLESIZE,
#else
@@
-532,7
+531,7
@@
STATIC const packprops_t packprops[512] = {
/* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- /* C */ sizeof(unsigned char)
| PACK_SIZE_UNPREDICTABLE
,
+ /* C */ sizeof(unsigned char),
#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
/* D */ LONG_DOUBLESIZE,
#else
#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
/* D */ LONG_DOUBLESIZE,
#else
@@
-702,34
+701,27
@@
next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
return TRUE;
}
return TRUE;
}
-STATIC void
-bytes_to_uni(pTHX_ const U8 *start, STRLEN len, char **dest) {
- U8 buffer[UTF8_MAXLEN];
+STATIC char *
+S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
const U8 * const end = start + len;
const U8 * const end = start + len;
- char *d = *dest;
+
while (start < end) {
while (start < end) {
- const int length =
- uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
- switch(length) {
- case 1:
- *d++ = buffer[0];
- break;
- case 2:
- *d++ = buffer[0];
- *d++ = buffer[1];
- break;
- default:
- Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
- *start, length);
+ const UV uv = NATIVE_TO_ASCII(*start);
+ if (UNI_IS_INVARIANT(uv))
+ *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
+ else {
+ *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
+ *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
}
start++;
}
}
start++;
}
-
*dest = d
;
+
return dest
;
}
#define PUSH_BYTES(utf8, cur, buf, len) \
STMT_START { \
}
#define PUSH_BYTES(utf8, cur, buf, len) \
STMT_START { \
- if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur)); \
+ if (utf8) \
+ (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \
else { \
Copy(buf, cur, len, char); \
(cur) += (len); \
else { \
Copy(buf, cur, len, char); \
(cur) += (len); \
@@
-764,7
+756,7
@@
STMT_START { \
STMT_START { \
if (utf8) { \
const U8 au8 = (byte); \
STMT_START { \
if (utf8) { \
const U8 au8 = (byte); \
-
bytes_to_uni(aTHX_ &au8, 1, &(s));
\
+
(s) = bytes_to_uni(&au8, 1, (s));
\
} else *(U8 *)(s)++ = (byte); \
} STMT_END
} else *(U8 *)(s)++ = (byte); \
} STMT_END
@@
-783,7
+775,7
@@
STMT_START { \
static const char *_action( const tempsym_t* symptr )
{
static const char *_action( const tempsym_t* symptr )
{
- return (
symptr->flags & FLAG_PACK ) ? "pack" : "unpack"
;
+ return (
const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack")
;
}
/* Returns the sizeof() struct described by pat */
}
/* Returns the sizeof() struct described by pat */
@@
-1186,8
+1178,7
@@
Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, cons
return unpack_rec(&sym, s, s, strend, NULL );
}
return unpack_rec(&sym, s, s, strend, NULL );
}
-STATIC
-I32
+STATIC I32
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
{
dVAR; dSP;
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
{
dVAR; dSP;
@@
-1570,10
+1561,29
@@
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
XPUSHs(sv);
break;
}
XPUSHs(sv);
break;
}
+ case 'C':
+ if (len == 0) {
+ if (explicit_length)
+ /* Switch to "character" mode */
+ utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
+ break;
+ }
+ /* FALL THROUGH */
case 'c':
case 'c':
- while (len-- > 0) {
- int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
- if (aint >= 128) /* fake up signed chars */
+ while (len-- > 0 && s < strend) {
+ int aint;
+ if (utf8)
+ {
+ STRLEN retlen;
+ aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
+ ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+ if (retlen == (STRLEN) -1 || retlen == 0)
+ Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+ s += retlen;
+ }
+ else
+ aint = *(U8 *)(s)++;
+ if (aint >= 128 && datumtype != 'C') /* fake up signed chars */
aint -= 256;
if (!checksum)
PUSHs(sv_2mortal(newSViv((IV)aint)));
aint -= 256;
if (!checksum)
PUSHs(sv_2mortal(newSViv((IV)aint)));
@@
-1583,18
+1593,9
@@
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
cuv += aint;
}
break;
cuv += aint;
}
break;
- case 'C':
case 'W':
W_checksum:
case 'W':
W_checksum:
- if (len == 0) {
- if (explicit_length && datumtype == 'C')
- /* Switch to "character" mode */
- utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
- break;
- }
- if (datumtype == 'C' ?
- (symptr->flags & FLAG_DO_UTF8) &&
- !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
+ if (utf8) {
while (len-- > 0 && s < strend) {
STRLEN retlen;
const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
while (len-- > 0 && s < strend) {
STRLEN retlen;
const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
@@
-1978,7
+1979,7
@@
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
if (++bytes >= sizeof(UV)) { /* promote to string */
const char *t;
if (++bytes >= sizeof(UV)) { /* promote to string */
const char *t;
- sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
+ sv = Perl_newSVpvf(aTHX_ "%.*"UV
u
f, (int)TYPE_DIGITS(UV), auv);
while (s < strend) {
ch = SHIFT_BYTE(utf8, s, strend, datumtype);
sv = mul128(sv, (U8)(ch & 0x7f));
while (s < strend) {
ch = SHIFT_BYTE(utf8, s, strend, datumtype);
sv = mul128(sv, (U8)(ch & 0x7f));
@@
-2090,22
+2091,6
@@
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
break;
#endif
case 'u':
break;
#endif
case 'u':
- /* MKS:
- * Initialise the decode mapping. By using a table driven
- * algorithm, the code will be character-set independent
- * (and just as fast as doing character arithmetic)
- */
- if (PL_uudmap['M'] == 0) {
- size_t i;
-
- for (i = 0; i < sizeof(PL_uuemap); ++i)
- PL_uudmap[(U8)PL_uuemap[i]] = i;
- /*
- * Because ' ' and '`' map to the same value,
- * we need to decode them both the same.
- */
- PL_uudmap[' '] = 0;
- }
{
const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
sv = sv_2mortal(newSV(l));
{
const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
sv = sv_2mortal(newSV(l));
@@
-2114,9
+2099,8
@@
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
if (utf8) {
while (next_uni_uu(aTHX_ &s, strend, &len)) {
I32 a, b, c, d;
if (utf8) {
while (next_uni_uu(aTHX_ &s, strend, &len)) {
I32 a, b, c, d;
- char hunk[
4
];
+ char hunk[
3
];
- hunk[3] = '\0';
while (len > 0) {
next_uni_uu(aTHX_ &s, strend, &a);
next_uni_uu(aTHX_ &s, strend, &b);
while (len > 0) {
next_uni_uu(aTHX_ &s, strend, &a);
next_uni_uu(aTHX_ &s, strend, &b);
@@
-2143,9
+2127,8
@@
S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
} else {
while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
I32 a, b, c, d;
} else {
while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
I32 a, b, c, d;
- char hunk[
4
];
+ char hunk[
3
];
- hunk[3] = '\0';
len = PL_uudmap[*(U8*)s++] & 077;
while (len > 0) {
if (s < strend && ISUUCHAR(*s))
len = PL_uudmap[*(U8*)s++] & 077;
while (len > 0) {
if (s < strend && ISUUCHAR(*s))
@@
-2370,14
+2353,13
@@
void
Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
{
dVAR;
Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
{
dVAR;
- STRLEN no_len;
tempsym_t sym;
TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
/* We're going to do changes through SvPVX(cat). Make sure it's valid.
Also make sure any UTF8 flag is loaded */
tempsym_t sym;
TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
/* We're going to do changes through SvPVX(cat). Make sure it's valid.
Also make sure any UTF8 flag is loaded */
- SvPV_force
(cat, no_len
);
+ SvPV_force
_nolen(cat
);
if (DO_UTF8(cat))
sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
if (DO_UTF8(cat))
sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
@@
-2638,6
+2620,7
@@
S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
if (savsym.howlen == e_star && beglist == endlist)
break; /* No way to continue */
}
if (savsym.howlen == e_star && beglist == endlist)
break; /* No way to continue */
}
+ items = endlist - beglist;
lookahead.flags = symptr->flags & ~group_modifiers;
goto no_change;
}
lookahead.flags = symptr->flags & ~group_modifiers;
goto no_change;
}
@@
-2956,7
+2939,6
@@
S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
break;
}
utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
break;
}
- GROWING(0, cat, start, cur, len);
while (len-- > 0) {
IV aiv;
fromstr = NEXTFROM;
while (len-- > 0) {
IV aiv;
fromstr = NEXTFROM;
@@
-2965,12
+2947,12
@@
S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
ckWARN(WARN_PACK))
Perl_warner(aTHX_ packWARN(WARN_PACK),
"Character in 'C' format wrapped in pack");
ckWARN(WARN_PACK))
Perl_warner(aTHX_ packWARN(WARN_PACK),
"Character in 'C' format wrapped in pack");
-
*cur++ = (char)(aiv & 0xff
);
+
PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)
);
}
break;
case 'W': {
char *end;
}
break;
case 'W': {
char *end;
- U8 in_bytes = IN_BYTES;
+ U8 in_bytes =
(U8)
IN_BYTES;
end = start+SvLEN(cat)-1;
if (utf8) end -= UTF8_MAXLEN-1;
end = start+SvLEN(cat)-1;
if (utf8) end -= UTF8_MAXLEN-1;
@@
-3053,7
+3035,7
@@
S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
len+(endb-buffer)*UTF8_EXPAND);
end = start+SvLEN(cat);
}
len+(endb-buffer)*UTF8_EXPAND);
end = start+SvLEN(cat);
}
-
bytes_to_uni(aTHX_ buffer, endb-buffer, &
cur);
+
cur = bytes_to_uni(buffer, endb-buffer,
cur);
} else {
if (cur >= end) {
*cur = '\0';
} else {
if (cur >= end) {
*cur = '\0';