This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Refactoring to Sv*_set() macros - patch #5
[perl5.git] / pp_pack.c
index b9d8be5..5ee841b 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
 #define PERL_IN_PP_PACK_C
 #include "perl.h"
 
+#if PERL_VERSION >= 9
+# define PERL_PACK_CAN_BYTEORDER
+# define PERL_PACK_CAN_SHRIEKSIGN
+#endif
+
+#ifndef CHAR_BIT
+# define CHAR_BIT      8
+#endif
+/* Maximum number of bytes to which a byte can grow due to upgrade */
+#define UTF8_EXPAND    2
+
 /*
  * Offset for integer pack/unpack.
  *
 
 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
-#    define OFF16(p)   (char*)(p)
-#    define OFF32(p)   (char*)(p)
+#    define OFF16(p)   ((char*)(p))
+#    define OFF32(p)   ((char*)(p))
 #  else
 #    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
 #      define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16))
 #      define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32))
 #    else
-       }}}} bad cray byte order
+       ++++ bad cray byte order
 #    endif
 #  endif
-#  define COPY16(s,p)  (*(p) = 0, Copy(s, OFF16(p), SIZE16, char))
-#  define COPY32(s,p)  (*(p) = 0, Copy(s, OFF32(p), SIZE32, char))
-#  define COPYNN(s,p,n) (*(p) = 0, Copy(s, (char *)(p), n, char))
-#  define CAT16(sv,p)  sv_catpvn(sv, OFF16(p), SIZE16)
-#  define CAT32(sv,p)  sv_catpvn(sv, OFF32(p), SIZE32)
 #else
-#  define COPY16(s,p)  Copy(s, p, SIZE16, char)
-#  define COPY32(s,p)  Copy(s, p, SIZE32, char)
-#  define COPYNN(s,p,n) Copy(s, (char *)(p), n, char)
-#  define CAT16(sv,p)  sv_catpvn(sv, (char*)(p), SIZE16)
-#  define CAT32(sv,p)  sv_catpvn(sv, (char*)(p), SIZE32)
+#  define OFF16(p)     ((char *) (p))
+#  define OFF32(p)     ((char *) (p))
 #endif
 
+/* Only to be used inside a loop (see the break) */
+#define SHIFT16(utf8, s, strend, p, datumtype) STMT_START {            \
+    if (utf8) {                                                                \
+       if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break;      \
+    } else {                                                           \
+       Copy(s, OFF16(p), SIZE16, char);                                \
+       (s) += SIZE16;                                                  \
+    }                                                                  \
+} STMT_END
+
+/* Only to be used inside a loop (see the break) */
+#define SHIFT32(utf8, s, strend, p, datumtype) STMT_START {            \
+    if (utf8) {                                                                \
+       if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break;      \
+    } else {                                                           \
+       Copy(s, OFF32(p), SIZE32, char);                                \
+       (s) += SIZE32;                                                  \
+    }                                                                  \
+} STMT_END
+
+#define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
+#define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
+
+/* Only to be used inside a loop (see the break) */
+#define SHIFT_VAR(utf8, s, strend, var, datumtype)     \
+STMT_START {                                           \
+    if (utf8) {                                                \
+        if (!uni_to_bytes(aTHX_ &s, strend,            \
+            (char *) &var, sizeof(var), datumtype)) break;\
+    } else {                                           \
+        Copy(s, (char *) &var, sizeof(var), char);     \
+        s += sizeof(var);                              \
+    }                                                  \
+} STMT_END
+
+#define PUSH_VAR(utf8, aptr, var)      \
+       PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
+
 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
 #define MAX_SUB_TEMPLATE_LEVEL 100
 
 /* flags (note that type modifiers can also be used as flags!) */
+#define FLAG_WAS_UTF8        0x40
+#define FLAG_PARSE_UTF8       0x20     /* Parse as utf8 */
 #define FLAG_UNPACK_ONLY_ONE  0x10
-#define FLAG_UNPACK_DO_UTF8   0x08
+#define FLAG_DO_UTF8          0x08     /* The underlying string is utf8 */
 #define FLAG_SLASH            0x04
 #define FLAG_COMMA            0x02
 #define FLAG_PACK             0x01
@@ -134,15 +177,41 @@ S_mul128(pTHX_ SV *sv, U8 m)
 #define TYPE_IS_SHRIEKING      0x100
 #define TYPE_IS_BIG_ENDIAN     0x200
 #define TYPE_IS_LITTLE_ENDIAN  0x400
+#define TYPE_IS_PACK           0x800
 #define TYPE_ENDIANNESS_MASK   (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
-#define TYPE_ENDIANNESS(t)     ((t) & TYPE_ENDIANNESS_MASK)
-#define TYPE_NO_ENDIANNESS(t)  ((t) & ~TYPE_ENDIANNESS_MASK)
 #define TYPE_MODIFIERS(t)      ((t) & ~0xFF)
 #define TYPE_NO_MODIFIERS(t)   ((t) & 0xFF)
 
-#define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
+#else
+# define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
+#endif
+
+#ifndef PERL_PACK_CAN_BYTEORDER
+/* Put "can't" first because it is shorter  */
+# define TYPE_ENDIANNESS(t)    0
+# define TYPE_NO_ENDIANNESS(t) (t)
 
-#define DO_BO_UNPACK(var, type)                                               \
+# define ENDIANNESS_ALLOWED_TYPES   ""
+
+# define DO_BO_UNPACK(var, type)
+# define DO_BO_PACK(var, type)
+# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
+# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
+# define DO_BO_UNPACK_N(var, type)
+# define DO_BO_PACK_N(var, type)
+# define DO_BO_UNPACK_P(var)
+# define DO_BO_PACK_P(var)
+
+#else /* PERL_PACK_CAN_BYTEORDER */
+
+# define TYPE_ENDIANNESS(t)    ((t) & TYPE_ENDIANNESS_MASK)
+# define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK)
+
+# define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
+
+# define DO_BO_UNPACK(var, type)                                              \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:    var = my_betoh ## type (var); break;  \
@@ -151,7 +220,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
           }                                                                   \
         } STMT_END
 
-#define DO_BO_PACK(var, type)                                                 \
+# define DO_BO_PACK(var, type)                                                \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:    var = my_htobe ## type (var); break;  \
@@ -160,35 +229,35 @@ S_mul128(pTHX_ SV *sv, U8 m)
           }                                                                   \
         } STMT_END
 
-#define DO_BO_UNPACK_PTR(var, type, pre_cast)                                 \
+# define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)                     \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:                                          \
-              var = (void *) my_betoh ## type ((pre_cast) var);               \
+              var = (post_cast*) my_betoh ## type ((pre_cast) var);           \
               break;                                                          \
             case TYPE_IS_LITTLE_ENDIAN:                                       \
-              var = (void *) my_letoh ## type ((pre_cast) var);               \
+              var = (post_cast *) my_letoh ## type ((pre_cast) var);          \
               break;                                                          \
             default:                                                          \
               break;                                                          \
           }                                                                   \
         } STMT_END
 
-#define DO_BO_PACK_PTR(var, type, pre_cast)                                   \
+# define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)                       \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
             case TYPE_IS_BIG_ENDIAN:                                          \
-              var = (void *) my_htobe ## type ((pre_cast) var);               \
+              var = (post_cast *) my_htobe ## type ((pre_cast) var);          \
               break;                                                          \
             case TYPE_IS_LITTLE_ENDIAN:                                       \
-              var = (void *) my_htole ## type ((pre_cast) var);               \
+              var = (post_cast *) my_htole ## type ((pre_cast) var);          \
               break;                                                          \
             default:                                                          \
               break;                                                          \
           }                                                                   \
         } STMT_END
 
-#define BO_CANT_DOIT(action, type)                                            \
+# define BO_CANT_DOIT(action, type)                                           \
         STMT_START {                                                          \
           switch (TYPE_ENDIANNESS(datumtype)) {                               \
              case TYPE_IS_BIG_ENDIAN:                                         \
@@ -204,20 +273,24 @@ S_mul128(pTHX_ SV *sv, U8 m)
            }                                                                  \
          } STMT_END
 
-#if PTRSIZE == INTSIZE
-# define DO_BO_UNPACK_P(var)   DO_BO_UNPACK_PTR(var, i, int)
-# define DO_BO_PACK_P(var)     DO_BO_PACK_PTR(var, i, int)
-#elif PTRSIZE == LONGSIZE
-# define DO_BO_UNPACK_P(var)   DO_BO_UNPACK_PTR(var, l, long)
-# define DO_BO_PACK_P(var)     DO_BO_PACK_PTR(var, l, long)
-#else
-# define DO_BO_UNPACK_P(var)   BO_CANT_DOIT(unpack, pointer)
-# define DO_BO_PACK_P(var)     BO_CANT_DOIT(pack, pointer)
-#endif
+# if PTRSIZE == INTSIZE
+#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, i, int, void)
+#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, i, int, void)
+#  define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char)
+#  define DO_BO_PACK_PC(var)   DO_BO_PACK_PTR(var, i, int, char)
+# elif PTRSIZE == LONGSIZE
+#  define DO_BO_UNPACK_P(var)  DO_BO_UNPACK_PTR(var, l, long, void)
+#  define DO_BO_PACK_P(var)    DO_BO_PACK_PTR(var, l, long, void)
+#  define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char)
+#  define DO_BO_PACK_PC(var)   DO_BO_PACK_PTR(var, l, long, char)
+# else
+#  define DO_BO_UNPACK_P(var)  BO_CANT_DOIT(unpack, pointer)
+#  define DO_BO_PACK_P(var)    BO_CANT_DOIT(pack, pointer)
+# endif
 
-#if defined(my_htolen) && defined(my_letohn) && \
+# if defined(my_htolen) && defined(my_letohn) && \
     defined(my_htoben) && defined(my_betohn)
-# define DO_BO_UNPACK_N(var, type)                                            \
+#  define DO_BO_UNPACK_N(var, type)                                           \
          STMT_START {                                                         \
            switch (TYPE_ENDIANNESS(datumtype)) {                              \
              case TYPE_IS_BIG_ENDIAN:    my_betohn(&var, sizeof(type)); break;\
@@ -226,7 +299,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
            }                                                                  \
          } STMT_END
 
-# define DO_BO_PACK_N(var, type)                                              \
+#  define DO_BO_PACK_N(var, type)                                             \
          STMT_START {                                                         \
            switch (TYPE_ENDIANNESS(datumtype)) {                              \
              case TYPE_IS_BIG_ENDIAN:    my_htoben(&var, sizeof(type)); break;\
@@ -234,193 +307,537 @@ S_mul128(pTHX_ SV *sv, U8 m)
              default: break;                                                  \
            }                                                                  \
          } STMT_END
-#else
-# define DO_BO_UNPACK_N(var, type)     BO_CANT_DOIT(unpack, type)
-# define DO_BO_PACK_N(var, type)       BO_CANT_DOIT(pack, type)
-#endif
+# else
+#  define DO_BO_UNPACK_N(var, type)    BO_CANT_DOIT(unpack, type)
+#  define DO_BO_PACK_N(var, type)      BO_CANT_DOIT(pack, type)
+# endif
 
-/* Returns the sizeof() struct described by pat */
-STATIC I32
-S_measure_struct(pTHX_ register tempsym_t* symptr)
-{
-    register I32 len = 0;
-    register I32 total = 0;
-    int star;
+#endif /* PERL_PACK_CAN_BYTEORDER */
 
-    register int size;
+#define PACK_SIZE_CANNOT_CSUM          0x80
+#define PACK_SIZE_UNPREDICTABLE                0x40    /* Not a fixed size element */
+#define PACK_SIZE_MASK                 0x3F
 
-    while (next_symbol(symptr)) {
+/* These tables are regenerated by genpacksizetables.pl (and then hand pasted
+   in).  You're unlikely ever to need to regenerate them.  */
 
-        switch( symptr->howlen ){
-        case e_no_len:
-       case e_number:
-           len = symptr->length;
-           break;
-        case e_star:
-           Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
-                       symptr->flags & FLAG_PACK ? "pack" : "unpack" );
-            break;
-        }
+#if TYPE_IS_SHRIEKING != 0x100
+   ++++shriek offset should be 256
+#endif
 
-        /* endianness doesn't influence the size of a type */
-       switch(TYPE_NO_ENDIANNESS(symptr->code)) {
-       default:
-            Perl_croak(aTHX_ "Invalid type '%c' in %s",
-                       (int)TYPE_NO_MODIFIERS(symptr->code),
-                       symptr->flags & FLAG_PACK ? "pack" : "unpack" );
-       case '@':
-       case '/':
-       case 'U':                       /* XXXX Is it correct? */
-       case 'w':
-       case 'u':
-           Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
-                       (int)symptr->code,
-                       symptr->flags & FLAG_PACK ? "pack" : "unpack" );
-       case '%':
-           size = 0;
-           break;
-       case '(':
-       {
-            tempsym_t savsym = *symptr;
-           symptr->patptr = savsym.grpbeg;
-            symptr->patend = savsym.grpend;
-           /* XXXX Theoretically, we need to measure many times at different
-              positions, since the subexpression may contain
-              alignment commands, but be not of aligned length.
-              Need to detect this and croak().  */
-           size = measure_struct(symptr);
-            *symptr = savsym;
-           break;
-       }
-       case 'X' | TYPE_IS_SHRIEKING:
-           /* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS. */
-           if (!len)                   /* Avoid division by 0 */
-               len = 1;
-           len = total % len;          /* Assumed: the start is aligned. */
-           /* FALL THROUGH */
-       case 'X':
-           size = -1;
-           if (total < len)
-               Perl_croak(aTHX_ "'X' outside of string in %s",
-                          symptr->flags & FLAG_PACK ? "pack" : "unpack" );
-           break;
-       case 'x' | TYPE_IS_SHRIEKING:
-           if (!len)                   /* Avoid division by 0 */
-               len = 1;
-           star = total % len;         /* Assumed: the start is aligned. */
-           if (star)                   /* Other portable ways? */
-               len = len - star;
-           else
-               len = 0;
-           /* FALL THROUGH */
-       case 'x':
-       case 'A':
-       case 'Z':
-       case 'a':
-       case 'c':
-       case 'C':
-           size = 1;
-           break;
-       case 'B':
-       case 'b':
-           len = (len + 7)/8;
-           size = 1;
-           break;
-       case 'H':
-       case 'h':
-           len = (len + 1)/2;
-           size = 1;
-           break;
-       case 's' | TYPE_IS_SHRIEKING:
-#if SHORTSIZE != SIZE16
-           size = sizeof(short);
-           break;
+typedef U8 packprops_t;
+#if 'J'-'I' == 1
+/* ASCII */
+const packprops_t packprops[512] = {
+    /* normal */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0,
+    /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
+#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
+    /* D */ LONG_DOUBLESIZE,
 #else
-            /* FALL THROUGH */
+    0,
 #endif
-       case 's':
-           size = SIZE16;
-           break;
-       case 'S' | TYPE_IS_SHRIEKING:
-#if SHORTSIZE != SIZE16
-           size = sizeof(unsigned short);
-           break;
+    0,
+    /* F */ NVSIZE,
+    0, 0,
+    /* I */ sizeof(unsigned int),
+    /* J */ UVSIZE,
+    0,
+    /* L */ SIZE32,
+    0,
+    /* N */ SIZE32,
+    0, 0,
+#if defined(HAS_QUAD)
+    /* Q */ sizeof(Uquad_t),
 #else
-            /* FALL THROUGH */
+    0,
 #endif
-       case 'v' | TYPE_IS_SHRIEKING:
-       case 'n' | TYPE_IS_SHRIEKING:
-       case 'v':
-       case 'n':
-       case 'S':
-           size = SIZE16;
-           break;
-       case 'i' | TYPE_IS_SHRIEKING:
-       case 'i':
-           size = sizeof(int);
-           break;
-       case 'I' | TYPE_IS_SHRIEKING:
-       case 'I':
-           size = sizeof(unsigned int);
-           break;
-       case 'j':
-           size = IVSIZE;
-           break;
-       case 'J':
-           size = UVSIZE;
-           break;
-       case 'l' | TYPE_IS_SHRIEKING:
-#if LONGSIZE != SIZE32
-           size = sizeof(long);
-            break;
+    0,
+    /* S */ SIZE16,
+    0,
+    /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
+    /* V */ SIZE32,
+    /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* c */ sizeof(char),
+    /* d */ sizeof(double),
+    0,
+    /* f */ sizeof(float),
+    0, 0,
+    /* i */ sizeof(int),
+    /* j */ IVSIZE,
+    0,
+    /* l */ SIZE32,
+    0,
+    /* n */ SIZE16,
+    0,
+    /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
+#if defined(HAS_QUAD)
+    /* q */ sizeof(Quad_t),
 #else
-            /* FALL THROUGH */
+    0,
 #endif
-       case 'l':
-           size = SIZE32;
-           break;
-       case 'L' | TYPE_IS_SHRIEKING:
-#if LONGSIZE != SIZE32
-           size = sizeof(unsigned long);
-           break;
+    0,
+    /* s */ SIZE16,
+    0, 0,
+    /* v */ SIZE16,
+    /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    /* shrieking */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* I */ sizeof(unsigned int),
+    0, 0,
+    /* L */ sizeof(unsigned long),
+    0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* N */ SIZE32,
 #else
-            /* FALL THROUGH */
+    0,
 #endif
-       case 'V' | TYPE_IS_SHRIEKING:
-       case 'N' | TYPE_IS_SHRIEKING:
-       case 'V':
-       case 'N':
-       case 'L':
-           size = SIZE32;
-           break;
-       case 'P':
-           len = 1;
-           /* FALL THROUGH */
-       case 'p':
-           size = sizeof(char*);
-           break;
-#ifdef HAS_QUAD
-       case 'q':
-           size = sizeof(Quad_t);
-           break;
-       case 'Q':
-           size = sizeof(Uquad_t);
-           break;
+    0, 0, 0, 0,
+    /* S */ sizeof(unsigned short),
+    0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* V */ SIZE32,
+#else
+    0,
 #endif
-       case 'f':
-           size = sizeof(float);
-           break;
-       case 'd':
-           size = sizeof(double);
-           break;
-       case 'F':
-           size = NVSIZE;
-           break;
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0,
+    /* i */ sizeof(int),
+    0, 0,
+    /* l */ sizeof(long),
+    0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* n */ SIZE16,
+#else
+    0,
+#endif
+    0, 0, 0, 0,
+    /* s */ sizeof(short),
+    0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* v */ SIZE16,
+#else
+    0,
+#endif
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+#else
+/* EBCDIC (or bust) */
+const packprops_t packprops[512] = {
+    /* normal */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0,
+    /* c */ sizeof(char),
+    /* d */ sizeof(double),
+    0,
+    /* f */ sizeof(float),
+    0, 0,
+    /* i */ sizeof(int),
+    0, 0, 0, 0, 0, 0, 0,
+    /* j */ IVSIZE,
+    0,
+    /* l */ SIZE32,
+    0,
+    /* n */ SIZE16,
+    0,
+    /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
+#if defined(HAS_QUAD)
+    /* q */ sizeof(Quad_t),
+#else
+    0,
+#endif
+    0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* s */ SIZE16,
+    0, 0,
+    /* v */ SIZE16,
+    /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* C */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
-       case 'D':
-           size = LONG_DOUBLESIZE;
+    /* D */ LONG_DOUBLESIZE,
+#else
+    0,
+#endif
+    0,
+    /* F */ NVSIZE,
+    0, 0,
+    /* I */ sizeof(unsigned int),
+    0, 0, 0, 0, 0, 0, 0,
+    /* J */ UVSIZE,
+    0,
+    /* L */ SIZE32,
+    0,
+    /* N */ SIZE32,
+    0, 0,
+#if defined(HAS_QUAD)
+    /* Q */ sizeof(Uquad_t),
+#else
+    0,
+#endif
+    0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* S */ SIZE16,
+    0,
+    /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
+    /* V */ SIZE32,
+    /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* shrieking */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* i */ sizeof(int),
+    0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* l */ sizeof(long),
+    0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* n */ SIZE16,
+#else
+    0,
+#endif
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* s */ sizeof(short),
+    0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* v */ SIZE16,
+#else
+    0,
+#endif
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0,
+    /* I */ sizeof(unsigned int),
+    0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* L */ sizeof(unsigned long),
+    0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* N */ SIZE32,
+#else
+    0,
+#endif
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    /* S */ sizeof(unsigned short),
+    0, 0,
+#if defined(PERL_PACK_CAN_SHRIEKSIGN)
+    /* V */ SIZE32,
+#else
+    0,
+#endif
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+#endif
+
+STATIC U8
+uni_to_byte(pTHX_ char **s, const char *end, I32 datumtype)
+{
+    UV val;
+    STRLEN retlen;
+    val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
+                        ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+    /* We try to process malformed UTF-8 as much as possible (preferrably with
+       warnings), but these two mean we make no progress in the string and
+       might enter an infinite loop */
+    if (retlen == (STRLEN) -1 || retlen == 0)
+       Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
+                  (int) TYPE_NO_MODIFIERS(datumtype));
+    if (val >= 0x100) {
+       if (ckWARN(WARN_UNPACK))
+       Perl_warner(aTHX_ packWARN(WARN_UNPACK),
+                   "Character in '%c' format wrapped in unpack",
+                   (int) TYPE_NO_MODIFIERS(datumtype));
+       val &= 0xff;
+    }
+    *s += retlen;
+    return (U8)val;
+}
+
+#define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
+       uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
+       *(U8 *)(s)++)
+
+STATIC bool
+uni_to_bytes(pTHX_ char **s, char *end, char *buf, int buf_len, I32 datumtype)
+{
+    UV val;
+    STRLEN retlen;
+    char *from = *s;
+    int bad = 0;
+    U32 flags = ckWARN(WARN_UTF8) ?
+       UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
+    for (;buf_len > 0; buf_len--) {
+       if (from >= end) return FALSE;
+       val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
+       if (retlen == (STRLEN) -1 || retlen == 0) {
+           from += UTF8SKIP(from);
+           bad |= 1;
+       } else from += retlen;
+       if (val >= 0x100) {
+           bad |= 2;
+           val &= 0xff;
+       }
+       *(U8 *)buf++ = (U8)val;
+    }
+    /* We have enough characters for the buffer. Did we have problems ? */
+    if (bad) {
+       if (bad & 1) {
+           /* Rewalk the string fragment while warning */
+           char *ptr;
+           flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+           for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
+               if (ptr >= end) break;
+               utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
+           }
+           if (from > end) from = end;
+       }
+       if ((bad & 2) && ckWARN(WARN_UNPACK))
+           Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
+                                      WARN_PACK : WARN_UNPACK),
+                       "Character(s) in '%c' format wrapped in %s",
+                       (int) TYPE_NO_MODIFIERS(datumtype),
+                       datumtype & TYPE_IS_PACK ? "pack" : "unpack");
+    }
+    *s = from;
+    return TRUE;
+}
+
+STATIC bool
+next_uni_uu(pTHX_ char **s, const char *end, I32 *out)
+{
+    UV val;
+    STRLEN retlen;
+    val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
+    if (val >= 0x100 || !ISUUCHAR(val) ||
+       retlen == (STRLEN) -1 || retlen == 0) {
+       *out = 0;
+       return FALSE;
+    }
+    *out = PL_uudmap[val] & 077;
+    *s += retlen;
+    return TRUE;
+}
+
+STATIC void
+bytes_to_uni(pTHX_ U8 *start, STRLEN len, char **dest) {
+    U8 buffer[UTF8_MAXLEN];
+    U8 *end = start + len;
+    char *d = *dest;
+    while (start < end) {
+       int length =
+           uvuni_to_utf8_flags(buffer, NATIVE_TO_UNI(*start), 0) - buffer;
+       switch(length) {
+         case 1:
+           *d++ = buffer[0];
+           break;
+         case 2:
+           *d++ = buffer[0];
+           *d++ = buffer[1];
+           break;
+         default:
+           Perl_croak(aTHX_ "Perl bug: value %d UTF-8 expands to %d bytes",
+                      *start, length);
+       }
+       start++;
+    }
+    *dest = d;
+}
+
+#define PUSH_BYTES(utf8, cur, buf, len)                                \
+STMT_START {                                                   \
+    if (utf8) bytes_to_uni(aTHX_ (U8 *) buf, len, &(cur));     \
+    else {                                                     \
+       Copy(buf, cur, len, char);                              \
+       (cur) += (len);                                         \
+    }                                                          \
+} STMT_END
+
+#define GROWING(utf8, cat, start, cur, in_len) \
+STMT_START {                                   \
+    STRLEN glen = (in_len);                    \
+    if (utf8) glen *= UTF8_EXPAND;             \
+    if ((cur) + glen >= (start) + SvLEN(cat)) {        \
+       (start) = sv_exp_grow(aTHX_ cat, glen); \
+       (cur) = (start) + SvCUR(cat);           \
+    }                                          \
+} STMT_END
+
+#define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
+STMT_START {                                   \
+    STRLEN glen = (in_len);                    \
+    STRLEN gl = glen;                          \
+    if (utf8) gl *= UTF8_EXPAND;               \
+    if ((cur) + gl >= (start) + SvLEN(cat)) {  \
+        *cur = '\0';                           \
+        SvCUR_set((cat), (cur) - (start));     \
+       (start) = sv_exp_grow(aTHX_ cat, gl);   \
+       (cur) = (start) + SvCUR(cat);           \
+    }                                          \
+    PUSH_BYTES(utf8, cur, buf, glen);          \
+} STMT_END
+
+#define PUSH_BYTE(utf8, s, byte)               \
+STMT_START {                                   \
+    if (utf8) {                                        \
+       U8 au8 = (byte);                        \
+       bytes_to_uni(aTHX_ &au8, 1, &(s));      \
+    } else *(U8 *)(s)++ = (byte);              \
+} STMT_END
+
+/* Only to be used inside a loop (see the break) */
+#define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)           \
+STMT_START {                                                   \
+    STRLEN retlen;                                             \
+    if (str >= end) break;                                     \
+    val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);    \
+    if (retlen == (STRLEN) -1 || retlen == 0) {                        \
+       *cur = '\0';                                            \
+       Perl_croak(aTHX_ "Malformed UTF-8 string in pack");     \
+    }                                                          \
+    str += retlen;                                             \
+} STMT_END
+
+/* Returns the sizeof() struct described by pat */
+STATIC I32
+S_measure_struct(pTHX_ tempsym_t* symptr)
+{
+    I32 total = 0;
+
+    while (next_symbol(symptr)) {
+       I32 len;
+       int star, size;
+
+        switch (symptr->howlen) {
+         case e_star:
+           Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
+                       symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+            break;
+         default:
+           /* e_no_len and e_number */
+           len = symptr->length;
            break;
+        }
+
+       size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
+       if (!size) {
+           /* endianness doesn't influence the size of a type */
+           switch(TYPE_NO_ENDIANNESS(symptr->code)) {
+           default:
+               Perl_croak(aTHX_ "Invalid type '%c' in %s",
+                          (int)TYPE_NO_MODIFIERS(symptr->code),
+                          symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           case '.' | TYPE_IS_SHRIEKING:
+           case '@' | TYPE_IS_SHRIEKING:
 #endif
+           case '@':
+           case '.':
+           case '/':
+           case 'U':                   /* XXXX Is it correct? */
+           case 'w':
+           case 'u':
+               Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
+                          (int) TYPE_NO_MODIFIERS(symptr->code),
+                          symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+           case '%':
+               size = 0;
+               break;
+           case '(':
+           {
+               tempsym_t savsym = *symptr;
+               symptr->patptr = savsym.grpbeg;
+               symptr->patend = savsym.grpend;
+               /* XXXX Theoretically, we need to measure many times at
+                  different positions, since the subexpression may contain
+                  alignment commands, but be not of aligned length.
+                  Need to detect this and croak().  */
+               size = measure_struct(symptr);
+               *symptr = savsym;
+               break;
+           }
+           case 'X' | TYPE_IS_SHRIEKING:
+               /* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
+                */
+               if (!len)               /* Avoid division by 0 */
+                   len = 1;
+               len = total % len;      /* Assumed: the start is aligned. */
+               /* FALL THROUGH */
+           case 'X':
+               size = -1;
+               if (total < len)
+                   Perl_croak(aTHX_ "'X' outside of string in %s",
+                              symptr->flags & FLAG_PACK ? "pack" : "unpack" );
+               break;
+           case 'x' | TYPE_IS_SHRIEKING:
+               if (!len)               /* Avoid division by 0 */
+                   len = 1;
+               star = total % len;     /* Assumed: the start is aligned. */
+               if (star)               /* Other portable ways? */
+                   len = len - star;
+               else
+                   len = 0;
+               /* FALL THROUGH */
+           case 'x':
+           case 'A':
+           case 'Z':
+           case 'a':
+               size = 1;
+               break;
+           case 'B':
+           case 'b':
+               len = (len + 7)/8;
+               size = 1;
+               break;
+           case 'H':
+           case 'h':
+               len = (len + 1)/2;
+               size = 1;
+               break;
+
+           case 'P':
+               len = 1;
+               size = sizeof(char*);
+               break;
+           }
        }
        total += len * size;
     }
@@ -459,7 +876,7 @@ S_group_end(pTHX_ register char *patptr, register char *patend, char ender)
 /* Convert unsigned decimal number to binary.
  * Expects a pointer to the first digit and address of length variable
  * Advances char pointer to 1st non-digit char and returns number
- */ 
+ */
 STATIC char *
 S_get_num(pTHX_ register char *patptr, I32 *lenptr )
 {
@@ -477,10 +894,11 @@ S_get_num(pTHX_ register char *patptr, I32 *lenptr )
  * locates next template code and count
  */
 STATIC bool
-S_next_symbol(pTHX_ register tempsym_t* symptr )
+S_next_symbol(pTHX_ tempsym_t* symptr )
 {
-  register char* patptr = symptr->patptr; 
-  register char* patend = symptr->patend; 
+  char* patptr = symptr->patptr;
+  char* patend = symptr->patend;
+  const char *allowed = "";
 
   symptr->flags &= ~FLAG_SLASH;
 
@@ -494,7 +912,7 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
       if (patptr < patend)
        patptr++;
     } else {
-      /* We should have found a template code */ 
+      /* We should have found a template code */
       I32 code = *patptr++ & 0xFF;
       U32 inherited_modifiers = 0;
 
@@ -507,9 +925,9 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
         }
        continue;
       }
-      
+
       /* for '(', skip to ')' */
-      if (code == '(') {  
+      if (code == '(') {
         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
           Perl_croak(aTHX_ "()-group starts with a count in %s",
                      symptr->flags & FLAG_PACK ? "pack" : "unpack" );
@@ -528,13 +946,13 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
 
       /* look for modifiers */
       while (patptr < patend) {
-        const char *allowed;
         I32 modifier = 0;
         switch (*patptr) {
           case '!':
             modifier = TYPE_IS_SHRIEKING;
-            allowed = "sSiIlLxXnNvV";
+            allowed = SHRIEKING_ALLOWED_TYPES;
             break;
+#ifdef PERL_PACK_CAN_BYTEORDER
           case '>':
             modifier = TYPE_IS_BIG_ENDIAN;
             allowed = ENDIANNESS_ALLOWED_TYPES;
@@ -543,6 +961,7 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
             modifier = TYPE_IS_LITTLE_ENDIAN;
             allowed = ENDIANNESS_ALLOWED_TYPES;
             break;
+#endif /* PERL_PACK_CAN_BYTEORDER */
           default:
             break;
         }
@@ -578,7 +997,7 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
       /* inherit modifiers */
       code |= inherited_modifiers;
 
-      /* look for count and/or / */ 
+      /* look for count and/or / */
       if (patptr < patend) {
        if (isDIGIT(*patptr)) {
          patptr = get_num( patptr, &symptr->length );
@@ -589,7 +1008,7 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
           symptr->howlen = e_star;
 
         } else if (*patptr == '[') {
-          char* lenptr = ++patptr;            
+          char* lenptr = ++patptr;
           symptr->howlen = e_number;
           patptr = group_end( patptr, patend, ']' ) + 1;
           /* what kind of [] is it? */
@@ -639,15 +1058,53 @@ S_next_symbol(pTHX_ register tempsym_t* symptr )
       }
 
       symptr->code = code;
-      symptr->patptr = patptr; 
+      symptr->patptr = patptr;
       return TRUE;
     }
   }
-  symptr->patptr = patptr; 
+  symptr->patptr = patptr;
   return FALSE;
 }
 
 /*
+   There is no way to cleanly handle the case where we should process the
+   string per byte in its upgraded form while it's really in downgraded form
+   (e.g. estimates like strend-s as an upper bound for the number of
+   characters left wouldn't work). So if we foresee the need of this
+   (pattern starts with U or contains U0), we want to work on the encoded
+   version of the string. Users are advised to upgrade their pack string
+   themselves if they need to do a lot of unpacks like this on it
+*/
+STATIC bool
+need_utf8(const char *pat, const char *patend)
+{
+    bool first = TRUE;
+    while (pat < patend) {
+       if (pat[0] == '#') {
+           pat++;
+           pat = (char *) memchr(pat, '\n', patend-pat);
+           if (!pat) return FALSE;
+       } else if (pat[0] == 'U') {
+           if (first || pat[1] == '0') return TRUE;
+       } else first = FALSE;
+       pat++;
+    }
+    return FALSE;
+}
+
+STATIC char
+first_symbol(const char *pat, const char *patend) {
+    while (pat < patend) {
+       if (pat[0] != '#') return pat[0];
+       pat++;
+       pat = (char *) memchr(pat, '\n', patend-pat);
+       if (!pat) return 0;
+       pat++;
+    }
+    return 0;
+}
+
+/*
 =for apidoc unpack_str
 
 The engine implementing unpack() Perl function. Note: parameters strbeg, new_s
@@ -656,9 +1113,24 @@ and ocnt are not used. This call should not be used, use unpackstring instead.
 =cut */
 
 I32
-Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
+Perl_unpack_str(pTHX_ char *pat, char *patend, char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
 {
     tempsym_t sym = { 0 };
+
+    if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
+    else if (need_utf8(pat, patend)) {
+       /* We probably should try to avoid this in case a scalar context call
+          wouldn't get to the "U0" */
+       STRLEN len = strend - s;
+       s = (char *) bytes_to_utf8((U8 *) s, &len);
+       SAVEFREEPV(s);
+       strend = s + len;
+       flags |= FLAG_DO_UTF8;
+    }
+
+    if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
+       flags |= FLAG_PARSE_UTF8;
+
     sym.patptr = pat;
     sym.patend = patend;
     sym.flags  = flags;
@@ -676,9 +1148,24 @@ Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
 =cut */
 
 I32
-Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char *strend, U32 flags)
+Perl_unpackstring(pTHX_ char *pat, char *patend, char *s, char *strend, U32 flags)
 {
     tempsym_t sym = { 0 };
+
+    if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
+    else if (need_utf8(pat, patend)) {
+       /* We probably should try to avoid this in case a scalar context call
+          wouldn't get to the "U0" */
+       STRLEN len = strend - s;
+       s = (char *) bytes_to_utf8((U8 *) s, &len);
+       SAVEFREEPV(s);
+       strend = s + len;
+       flags |= FLAG_DO_UTF8;
+    }
+
+    if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
+       flags |= FLAG_PARSE_UTF8;
+
     sym.patptr = pat;
     sym.patend = patend;
     sym.flags  = flags;
@@ -688,76 +1175,64 @@ Perl_unpackstring(pTHX_ char *pat, register char *patend, register char *s, char
 
 STATIC
 I32
-S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, char *strend, char **new_s )
+S_unpack_rec(pTHX_ tempsym_t* symptr, char *s, char *strbeg, char *strend, char **new_s )
 {
     dSP;
-    I32 datumtype;
-    register I32 len = 0;
-    register I32 bits = 0;
-    register char *str;
     SV *sv;
     I32 start_sp_offset = SP - PL_stack_base;
     howlen_t howlen;
 
-    /* These must not be in registers: */
-    I16 ai16;
-    U16 au16;
-    I32 ai32;
-    U32 au32;
-#ifdef HAS_QUAD
-    Quad_t aquad;
-    Uquad_t auquad;
-#endif
-#if SHORTSIZE != SIZE16
-    short ashort;
-    unsigned short aushort;
-#endif
-    int aint;
-    unsigned int auint;
-    long along;
-#if LONGSIZE != SIZE32
-    unsigned long aulong;
-#endif
-    char *aptr;
-    float afloat;
-    double adouble;
-#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
-    long double aldouble;
-#endif
-    IV aiv;
-    UV auv;
-    NV anv;
-
     I32 checksum = 0;
     UV cuv = 0;
     NV cdouble = 0.0;
-    const int bits_in_uv = 8 * sizeof(cuv);
-    char* strrelbeg = s;
+    const int bits_in_uv = CHAR_BIT * sizeof(cuv);
     bool beyond = FALSE;
+    bool explicit_length;
     bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
+    bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
+    symptr->strbeg = s - strbeg;
 
     while (next_symbol(symptr)) {
-        datumtype = symptr->code;
+       packprops_t props;
+       I32 len, ai32;
+        I32 datumtype = symptr->code;
        /* do first one only unless in list context
-          / is implemented by unpacking the count, then poping it from the
+          / is implemented by unpacking the count, then popping it from the
           stack, so must check that we're not in the middle of a /  */
         if ( unpack_only_one
             && (SP - PL_stack_base == start_sp_offset + 1)
             && (datumtype != '/') )   /* XXX can this be omitted */
             break;
 
-        switch( howlen = symptr->howlen ){
-        case e_no_len:
-       case e_number:
-           len = symptr->length;
+        switch (howlen = symptr->howlen) {
+         case e_star:
+           len = strend - strbeg;      /* long enough */
            break;
-        case e_star:
-           len = strend - strbeg;      /* long enough */          
+         default:
+           /* e_no_len and e_number */
+           len = symptr->length;
            break;
         }
 
+        explicit_length = TRUE;
       redo_switch:
         beyond = s >= strend;
+
+       props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
+       if (props) {
+           /* props nonzero means we can process this letter. */
+           long size = props & PACK_SIZE_MASK;
+           long howmany = (strend - s) / size;
+           if (len > howmany)
+               len = howmany;
+
+           if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
+               if (len && unpack_only_one) len = 1;
+               EXTEND(SP, len);
+               EXTEND_MORTAL(len);
+           }
+       }
+
        switch(TYPE_NO_ENDIANNESS(datumtype)) {
        default:
            Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
@@ -772,54 +1247,135 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case '(':
        {
-           char *ss = s;               /* Move from register */
             tempsym_t savsym = *symptr;
            U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
            symptr->flags |= group_modifiers;
             symptr->patend = savsym.grpend;
+           symptr->previous = &savsym;
             symptr->level++;
            PUTBACK;
            while (len--) {
                symptr->patptr = savsym.grpbeg;
-               unpack_rec(symptr, ss, strbeg, strend, &ss );
-                if (ss == strend && savsym.howlen == e_star)
+               if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
+               else      symptr->flags &= ~FLAG_PARSE_UTF8;
+               unpack_rec(symptr, s, strbeg, strend, &s);
+                if (s == strend && savsym.howlen == e_star)
                    break; /* No way to continue */
            }
            SPAGAIN;
-           s = ss;
-           symptr->flags &= ~group_modifiers;
-            savsym.flags = symptr->flags;
+            savsym.flags = symptr->flags & ~group_modifiers;
             *symptr = savsym;
            break;
        }
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '.' | TYPE_IS_SHRIEKING:
+#endif
+       case '.': {
+           char *from;
+           SV *sv;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+           bool u8 = utf8;
+#endif
+           if (howlen == e_star) from = strbeg;
+           else if (len <= 0) from = s;
+           else {
+               tempsym_t *group = symptr;
+
+               while (--len && group) group = group->previous;
+               from = group ? strbeg + group->strbeg : strbeg;
+           }
+           sv = from <= s ?
+               newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
+               newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
+           XPUSHs(sv_2mortal(sv));
+           break;
+       }
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '@' | TYPE_IS_SHRIEKING:
+#endif
        case '@':
-           if (len > strend - strrelbeg)
-               Perl_croak(aTHX_ "'@' outside of string in unpack");
-           s = strrelbeg + len;
+           s = strbeg + symptr->strbeg;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+           if (utf8)
+#endif
+           {
+               while (len > 0) {
+                   if (s >= strend)
+                       Perl_croak(aTHX_ "'@' outside of string in unpack");
+                   s += UTF8SKIP(s);
+                   len--;
+               }
+               if (s > strend)
+                   Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
+           } else {
+               if (strend-s < len)
+                   Perl_croak(aTHX_ "'@' outside of string in unpack");
+               s += len;
+           }
            break;
        case 'X' | TYPE_IS_SHRIEKING:
            if (!len)                   /* Avoid division by 0 */
                len = 1;
-           len = (s - strbeg) % len;
+           if (utf8) {
+               char *hop, *last;
+               I32 l = len;
+               hop = last = strbeg;
+               while (hop < s) {
+                   hop += UTF8SKIP(hop);
+                   if (--l == 0) {
+                       last = hop;
+                       l = len;
+                   }
+               }
+               if (last > s)
+                   Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+               s = last;
+               break;
+           }
+           len = (s - strbeg) % len;
            /* FALL THROUGH */
        case 'X':
-           if (len > s - strbeg)
-               Perl_croak(aTHX_ "'X' outside of string in unpack" );
-           s -= len;
+           if (utf8) {
+               while (len > 0) {
+                   if (s <= strbeg)
+                       Perl_croak(aTHX_ "'X' outside of string in unpack");
+                   while (--s, UTF8_IS_CONTINUATION(*s)) {
+                       if (s <= strbeg)
+                           Perl_croak(aTHX_ "'X' outside of string in unpack");
+                   }
+                   len--;
+               }
+           } else {
+               if (len > s - strbeg)
+                   Perl_croak(aTHX_ "'X' outside of string in unpack" );
+               s -= len;
+           }
            break;
        case 'x' | TYPE_IS_SHRIEKING:
            if (!len)                   /* Avoid division by 0 */
                len = 1;
-           aint = (s - strbeg) % len;
-           if (aint)                   /* Other portable ways? */
-               len = len - aint;
-           else
-               len = 0;
+           if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
+           else      ai32 = (s - strbeg)                         % len;
+           if (ai32 == 0) break;
+           len -= ai32;
            /* FALL THROUGH */
        case 'x':
-           if (len > strend - s)
-               Perl_croak(aTHX_ "'x' outside of string in unpack");
-           s += len;
+           if (utf8) {
+               while (len>0) {
+                   if (s >= strend)
+                       Perl_croak(aTHX_ "'x' outside of string in unpack");
+                   s += UTF8SKIP(s);
+                   len--;
+               }
+           } else {
+               if (len > strend - s)
+                   Perl_croak(aTHX_ "'x' outside of string in unpack");
+               s += len;
+           }
            break;
        case '/':
            Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
@@ -827,39 +1383,71 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'A':
        case 'Z':
        case 'a':
-           if (len > strend - s)
-               len = strend - s;
-           if (checksum)
-               goto uchar_checksum;
-           sv = NEWSV(35, len);
-           sv_setpvn(sv, s, len);
-           if (len > 0 && (datumtype == 'A' || datumtype == 'Z')) {
-               aptr = s;       /* borrow register */
-               if (datumtype == 'Z') { /* 'Z' strips stuff after first null */
-                   s = SvPVX(sv);
-                   while (*s)
-                       s++;
-                   if (howlen == e_star) /* exact for 'Z*' */
-                       len = s - SvPVX(sv) + 1;
+           if (checksum) {
+               /* Preliminary length estimate is assumed done in 'W' */
+               if (len > strend - s) len = strend - s;
+               goto W_checksum;
+           }
+           if (utf8) {
+               I32 l;
+               char *hop;
+               for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
+                   if (hop >= strend) {
+                       if (hop > strend)
+                           Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+                       break;
+                   }
                }
-               else {          /* 'A' strips both nulls and spaces */
-                   s = SvPVX(sv) + len - 1;
-                   while (s >= SvPVX(sv) && (!*s || isSPACE(*s)))
-                       s--;
-                   *++s = '\0';
+               if (hop > strend)
+                   Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+               len = hop - s;
+           } else if (len > strend - s)
+               len = strend - s;
+
+           if (datumtype == 'Z') {
+               /* 'Z' strips stuff after first null */
+               char *ptr, *end;
+               end = s + len;
+               for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
+               sv = newSVpvn(s, ptr-s);
+               if (howlen == e_star) /* exact for 'Z*' */
+                   len = ptr-s + (ptr != strend ? 1 : 0);
+           } else if (datumtype == 'A') {
+               /* 'A' strips both nulls and spaces */
+               char *ptr;
+               if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
+                   for (ptr = s+len-1; ptr >= s; ptr--)
+                       if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
+                           !is_utf8_space((U8 *) ptr)) break;
+                   if (ptr >= s) ptr += UTF8SKIP(ptr);
+                   else ptr++;
+                   if (ptr > s+len)
+                       Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+               } else {
+                   for (ptr = s+len-1; ptr >= s; ptr--)
+                       if (*ptr != 0 && !isSPACE(*ptr)) break;
+                   ptr++;
                }
-               SvCUR_set(sv, s - SvPVX(sv));
-               s = aptr;       /* unborrow register */
+               sv = newSVpvn(s, ptr-s);
+           } else sv = newSVpvn(s, len);
+
+           if (utf8) {
+               SvUTF8_on(sv);
+               /* Undo any upgrade done due to need_utf8() */
+               if (!(symptr->flags & FLAG_WAS_UTF8))
+                   sv_utf8_downgrade(sv, 0);
            }
-           s += len;
            XPUSHs(sv_2mortal(sv));
+           s += len;
            break;
        case 'B':
-       case 'b':
+       case 'b': {
+           char *str;
            if (howlen == e_star || len > (strend - s) * 8)
                len = (strend - s) * 8;
            if (checksum) {
                if (!PL_bitcount) {
+                   int bits;
                    Newz(601, PL_bitcount, 256, char);
                    for (bits = 1; bits < 256; bits++) {
                        if (bits & 1)   PL_bitcount[bits]++;
@@ -872,103 +1460,108 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                        if (bits & 128) PL_bitcount[bits]++;
                    }
                }
-               while (len >= 8) {
-                   cuv += PL_bitcount[*(unsigned char*)s++];
-                   len -= 8;
-               }
-               if (len) {
-                   bits = *s;
-                   if (datumtype == 'b') {
+               if (utf8)
+                   while (len >= 8 && s < strend) {
+                       cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
+                       len -= 8;
+                   }
+               else
+                   while (len >= 8) {
+                       cuv += PL_bitcount[*(U8 *)s++];
+                       len -= 8;
+                   }
+               if (len && s < strend) {
+                   U8 bits;
+                   bits = SHIFT_BYTE(utf8, s, strend, datumtype);
+                   if (datumtype == 'b')
                        while (len-- > 0) {
                            if (bits & 1) cuv++;
                            bits >>= 1;
                        }
-                   }
-                   else {
+                   else
                        while (len-- > 0) {
-                           if (bits & 128) cuv++;
+                           if (bits & 0x80) cuv++;
                            bits <<= 1;
                        }
-                   }
                }
                break;
            }
-           sv = NEWSV(35, len + 1);
-           SvCUR_set(sv, len);
+
+           sv = sv_2mortal(NEWSV(35, len ? len : 1));
            SvPOK_on(sv);
            str = SvPVX(sv);
            if (datumtype == 'b') {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 7)                /*SUPPRESS 595*/
-                       bits >>= 1;
-                   else
-                       bits = *s++;
-                   *str++ = '0' + (bits & 1);
+               U8 bits = 0;
+               ai32 = len;
+               for (len = 0; len < ai32; len++) {
+                   if (len & 7) bits >>= 1;
+                   else if (utf8) {
+                       if (s >= strend) break;
+                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                   } else bits = *(U8 *) s++;
+                   *str++ = bits & 1 ? '1' : '0';
                }
-           }
-           else {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 7)
-                       bits <<= 1;
-                   else
-                       bits = *s++;
-                   *str++ = '0' + ((bits & 128) != 0);
+           } else {
+               U8 bits = 0;
+               ai32 = len;
+               for (len = 0; len < ai32; len++) {
+                   if (len & 7) bits <<= 1;
+                   else if (utf8) {
+                       if (s >= strend) break;
+                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                   } else bits = *(U8 *) s++;
+                   *str++ = bits & 0x80 ? '1' : '0';
                }
            }
            *str = '\0';
-           XPUSHs(sv_2mortal(sv));
+           SvCUR_set(sv, str - SvPVX(sv));
+           XPUSHs(sv);
            break;
+       }
        case 'H':
-       case 'h':
+       case 'h': {
+           char *str;
+           /* Preliminary length estimate, acceptable for utf8 too */
            if (howlen == e_star || len > (strend - s) * 2)
                len = (strend - s) * 2;
-           sv = NEWSV(35, len + 1);
-           SvCUR_set(sv, len);
+           sv = sv_2mortal(NEWSV(35, len ? len : 1));
            SvPOK_on(sv);
            str = SvPVX(sv);
            if (datumtype == 'h') {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 1)
-                       bits >>= 4;
-                   else
-                       bits = *s++;
+               U8 bits = 0;
+               ai32 = len;
+               for (len = 0; len < ai32; len++) {
+                   if (len & 1) bits >>= 4;
+                   else if (utf8) {
+                       if (s >= strend) break;
+                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                   } else bits = * (U8 *) s++;
                    *str++ = PL_hexdigit[bits & 15];
                }
-           }
-           else {
-               aint = len;
-               for (len = 0; len < aint; len++) {
-                   if (len & 1)
-                       bits <<= 4;
-                   else
-                       bits = *s++;
+           } else {
+               U8 bits = 0;
+               ai32 = len;
+               for (len = 0; len < ai32; len++) {
+                   if (len & 1) bits <<= 4;
+                   else if (utf8) {
+                       if (s >= strend) break;
+                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                   } else bits = *(U8 *) s++;
                    *str++ = PL_hexdigit[(bits >> 4) & 15];
                }
            }
            *str = '\0';
-           XPUSHs(sv_2mortal(sv));
+           SvCUR_set(sv, str - SvPVX(sv));
+           XPUSHs(sv);
            break;
+       }
        case 'c':
-           if (len > strend - s)
-               len = strend - s;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               aint = *s++;
+               int aint = SHIFT_BYTE(utf8, s, strend, datumtype);
                if (aint >= 128)        /* fake up signed chars */
                    aint -= 256;
-               if (!checksum) {
-                   sv = NEWSV(36, 0);
-                   sv_setiv(sv, (IV)aint);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)aint)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aint;
                else
@@ -976,84 +1569,99 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            }
            break;
        case 'C':
-       unpack_C: /* unpack U will jump here if not UTF-8 */
+       case 'W':
+         W_checksum:
             if (len == 0) {
-                symptr->flags &= ~FLAG_UNPACK_DO_UTF8;
+                if (explicit_length && datumtype == 'C')
+                   /* Switch to "character" mode */
+                   utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
                break;
            }
-           if (len > strend - s)
-               len = strend - s;
-           if (checksum) {
-             uchar_checksum:
-               while (len-- > 0) {
-                   auint = *s++ & 255;
-                   cuv += auint;
+           if (datumtype == 'C' ?
+                (symptr->flags & FLAG_DO_UTF8) &&
+               !(symptr->flags & FLAG_WAS_UTF8) : utf8) {
+               while (len-- > 0 && s < strend) {
+                   UV val;
+                   STRLEN retlen;
+                   val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
+                                        ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
+                   if (retlen == (STRLEN) -1 || retlen == 0)
+                       Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+                   s += retlen;
+                   if (!checksum)
+                       PUSHs(sv_2mortal(newSVuv((UV) val)));
+                   else if (checksum > bits_in_uv)
+                       cdouble += (NV) val;
+                   else
+                       cuv += val;
                }
-           }
-           else {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
+           } else if (!checksum)
                while (len-- > 0) {
-                   auint = *s++ & 255;
-                   sv = NEWSV(37, 0);
-                   sv_setiv(sv, (IV)auint);
-                   PUSHs(sv_2mortal(sv));
-               }
+                   U8 ch = *(U8 *) s++;
+                   PUSHs(sv_2mortal(newSVuv((UV) ch)));
            }
+           else if (checksum > bits_in_uv)
+               while (len-- > 0) cdouble += (NV) *(U8 *) s++;
+           else
+               while (len-- > 0) cuv += *(U8 *) s++;
            break;
        case 'U':
            if (len == 0) {
-                symptr->flags |= FLAG_UNPACK_DO_UTF8;
+                if (explicit_length) {
+                   /* Switch to "bytes in UTF-8" mode */
+                   if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
+                   else
+                       /* Should be impossible due to the need_utf8() test */
+                       Perl_croak(aTHX_ "U0 mode on a byte string");
+               }
                break;
            }
-           if ((symptr->flags & FLAG_UNPACK_DO_UTF8) == 0)
-                goto unpack_C;
-           if (len > strend - s)
-               len = strend - s;
+           if (len > strend - s) len = strend - s;
            if (!checksum) {
-               if (len && unpack_only_one)
-                    len = 1;
+               if (len && unpack_only_one) len = 1;
                EXTEND(SP, len);
                EXTEND_MORTAL(len);
            }
            while (len-- > 0 && s < strend) {
-               STRLEN alen;
-               auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV));
-               along = alen;
-               s += along;
-               if (!checksum) {
-                   sv = NEWSV(37, 0);
-                   sv_setuv(sv, (UV)auint);
-                   PUSHs(sv_2mortal(sv));
+               STRLEN retlen;
+               UV auv;
+               if (utf8) {
+                   U8 result[UTF8_MAXLEN];
+                   char *ptr;
+                   STRLEN len;
+                   ptr = s;
+                   /* Bug: warns about bad utf8 even if we are short on bytes
+                      and will break out of the loop */
+                   if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
+                                     'U'))
+                       break;
+                   len = UTF8SKIP(result);
+                   if (!uni_to_bytes(aTHX_ &ptr, strend,
+                                     (char *) &result[1], len-1, 'U')) break;
+                   auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
+                   s = ptr;
+               } else {
+                   auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
+                   if (retlen == (STRLEN) -1 || retlen == 0)
+                       Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
+                   s += retlen;
                }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV) auv)));
                else if (checksum > bits_in_uv)
-                   cdouble += (NV)auint;
+                   cdouble += (NV) auv;
                else
-                   cuv += auint;
+                   cuv += auv;
            }
            break;
        case 's' | TYPE_IS_SHRIEKING:
 #if SHORTSIZE != SIZE16
-           along = (strend - s) / sizeof(short);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPYNN(s, &ashort, sizeof(short));
+               short ashort;
+               SHIFT_VAR(utf8, s, strend, ashort, datumtype);
                DO_BO_UNPACK(ashort, s);
-               s += sizeof(short);
-               if (!checksum) {
-                   sv = NEWSV(38, 0);
-                   sv_setiv(sv, (IV)ashort);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)ashort)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ashort;
                else
@@ -1064,28 +1672,20 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            /* Fallthrough! */
 #endif
        case 's':
-           along = (strend - s) / SIZE16;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPY16(s, &ai16);
+               I16 ai16;
+
+#if U16SIZE > SIZE16
+               ai16 = 0;
+#endif
+               SHIFT16(utf8, s, strend, &ai16, datumtype);
                DO_BO_UNPACK(ai16, 16);
 #if U16SIZE > SIZE16
                if (ai16 > 32767)
                    ai16 -= 65536;
 #endif
-               s += SIZE16;
-               if (!checksum) {
-                   sv = NEWSV(38, 0);
-                   sv_setiv(sv, (IV)ai16);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)ai16)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ai16;
                else
@@ -1094,24 +1694,12 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case 'S' | TYPE_IS_SHRIEKING:
 #if SHORTSIZE != SIZE16
-           along = (strend - s) / sizeof(unsigned short);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-               if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPYNN(s, &aushort, sizeof(unsigned short));
+               unsigned short aushort;
+               SHIFT_VAR(utf8, s, strend, aushort, datumtype);
                DO_BO_UNPACK(aushort, s);
-               s += sizeof(unsigned short);
-               if (!checksum) {
-                   sv = NEWSV(39, 0);
-                   sv_setiv(sv, (UV)aushort);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV) aushort)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aushort;
                else
@@ -1124,19 +1712,13 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'v':
        case 'n':
        case 'S':
-           along = (strend - s) / SIZE16;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPY16(s, &au16);
+               U16 au16;
+#if U16SIZE > SIZE16
+               au16 = 0;
+#endif
+               SHIFT16(utf8, s, strend, &au16, datumtype);
                DO_BO_UNPACK(au16, 16);
-               s += SIZE16;
 #ifdef HAS_NTOHS
                if (datumtype == 'n')
                    au16 = PerlSock_ntohs(au16);
@@ -1145,95 +1727,48 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                if (datumtype == 'v')
                    au16 = vtohs(au16);
 #endif
-               if (!checksum) {
-                   sv = NEWSV(39, 0);
-                   sv_setiv(sv, (UV)au16);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV)au16)));
                else if (checksum > bits_in_uv)
-                   cdouble += (NV)au16;
+                   cdouble += (NV) au16;
                else
                    cuv += au16;
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'v' | TYPE_IS_SHRIEKING:
        case 'n' | TYPE_IS_SHRIEKING:
-           along = (strend - s) / SIZE16;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPY16(s, &ai16);
-               s += SIZE16;
-#ifdef HAS_NTOHS
+               I16 ai16;
+# if U16SIZE > SIZE16
+               ai16 = 0;
+# endif
+               SHIFT16(utf8, s, strend, &ai16, datumtype);
+# ifdef HAS_NTOHS
                if (datumtype == ('n' | TYPE_IS_SHRIEKING))
-                   ai16 = (I16)PerlSock_ntohs((U16)ai16);
-#endif
-#ifdef HAS_VTOHS
+                   ai16 = (I16) PerlSock_ntohs((U16) ai16);
+# endif /* HAS_NTOHS */
+# ifdef HAS_VTOHS
                if (datumtype == ('v' | TYPE_IS_SHRIEKING))
-                   ai16 = (I16)vtohs((U16)ai16);
-#endif
-               if (!checksum) {
-                   sv = NEWSV(39, 0);
-                   sv_setiv(sv, (IV)ai16);
-                   PUSHs(sv_2mortal(sv));
-               }
+                   ai16 = (I16) vtohs((U16) ai16);
+# endif /* HAS_VTOHS */
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)ai16)));
                else if (checksum > bits_in_uv)
-                   cdouble += (NV)ai16;
+                   cdouble += (NV) ai16;
                else
                    cuv += ai16;
            }
            break;
+#endif /* PERL_PACK_CAN_SHRIEKSIGN */
        case 'i':
        case 'i' | TYPE_IS_SHRIEKING:
-           along = (strend - s) / sizeof(int);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &aint, 1, int);
+               int aint;
+               SHIFT_VAR(utf8, s, strend, aint, datumtype);
                DO_BO_UNPACK(aint, i);
-               s += sizeof(int);
-               if (!checksum) {
-                   sv = NEWSV(40, 0);
-#ifdef __osf__
-                    /* Without the dummy below unpack("i", pack("i",-1))
-                     * return 0xFFffFFff instead of -1 for Digital Unix V4.0
-                     * cc with optimization turned on.
-                    *
-                    * The bug was detected in
-                    * DEC C V5.8-009 on Digital UNIX V4.0 (Rev. 1091) (V4.0E)
-                    * with optimization (-O4) turned on.
-                    * DEC C V5.2-040 on Digital UNIX V4.0 (Rev. 564) (V4.0B)
-                    * does not have this problem even with -O4.
-                    *
-                    * This bug was reported as DECC_BUGS 1431
-                    * and tracked internally as GEM_BUGS 7775.
-                    *
-                    * The bug is fixed in
-                    * Tru64 UNIX V5.0:      Compaq C V6.1-006 or later
-                    * UNIX V4.0F support:   DEC C V5.9-006 or later
-                    * UNIX V4.0E support:   DEC C V5.8-011 or later
-                    * and also in DTK.
-                    *
-                    * See also few lines later for the same bug.
-                    */
-                    (aint) ?
-                       sv_setiv(sv, (IV)aint) :
-#endif
-                       sv_setiv(sv, (IV)aint);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)aint)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aint;
                else
@@ -1242,31 +1777,12 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case 'I':
        case 'I' | TYPE_IS_SHRIEKING:
-           along = (strend - s) / sizeof(unsigned int);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &auint, 1, unsigned int);
+               unsigned int auint;
+               SHIFT_VAR(utf8, s, strend, auint, datumtype);
                DO_BO_UNPACK(auint, i);
-               s += sizeof(unsigned int);
-               if (!checksum) {
-                   sv = NEWSV(41, 0);
-#ifdef __osf__
-                    /* Without the dummy below unpack("I", pack("I",0xFFFFFFFF))
-                     * returns 1.84467440737096e+19 instead of 0xFFFFFFFF.
-                    * See details few lines earlier. */
-                    (auint) ?
-                       sv_setuv(sv, (UV)auint) :
-#endif
-                   sv_setuv(sv, (UV)auint);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV)auint)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)auint;
                else
@@ -1274,30 +1790,20 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            }
            break;
        case 'j':
-           along = (strend - s) / IVSIZE;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &aiv, 1, IV);
+               IV aiv;
+               SHIFT_VAR(utf8, s, strend, aiv, datumtype);
 #if IVSIZE == INTSIZE
                DO_BO_UNPACK(aiv, i);
 #elif IVSIZE == LONGSIZE
                DO_BO_UNPACK(aiv, l);
 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
                DO_BO_UNPACK(aiv, 64);
+#else
+               Perl_croak(aTHX_ "'j' not supported on this platform");
 #endif
-               s += IVSIZE;
-               if (!checksum) {
-                   sv = NEWSV(40, 0);
-                   sv_setiv(sv, aiv);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv(aiv)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aiv;
                else
@@ -1305,30 +1811,20 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            }
            break;
        case 'J':
-           along = (strend - s) / UVSIZE;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &auv, 1, UV);
-#if UVSIZE == INTSIZE
+               UV auv;
+               SHIFT_VAR(utf8, s, strend, auv, datumtype);
+#if IVSIZE == INTSIZE
                DO_BO_UNPACK(auv, i);
-#elif UVSIZE == LONGSIZE
+#elif IVSIZE == LONGSIZE
                DO_BO_UNPACK(auv, l);
-#elif defined(HAS_QUAD) && UVSIZE == U64SIZE
+#elif defined(HAS_QUAD) && IVSIZE == U64SIZE
                DO_BO_UNPACK(auv, 64);
+#else
+               Perl_croak(aTHX_ "'J' not supported on this platform");
 #endif
-               s += UVSIZE;
-               if (!checksum) {
-                   sv = NEWSV(41, 0);
-                   sv_setuv(sv, auv);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv(auv)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)auv;
                else
@@ -1337,24 +1833,12 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case 'l' | TYPE_IS_SHRIEKING:
 #if LONGSIZE != SIZE32
-           along = (strend - s) / sizeof(long);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPYNN(s, &along, sizeof(long));
+               long along;
+               SHIFT_VAR(utf8, s, strend, along, datumtype);
                DO_BO_UNPACK(along, l);
-               s += sizeof(long);
-               if (!checksum) {
-                   sv = NEWSV(42, 0);
-                   sv_setiv(sv, (IV)along);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)along)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)along;
                else
@@ -1365,28 +1849,18 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            /* Fallthrough! */
 #endif
        case 'l':
-           along = (strend - s) / SIZE32;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPY32(s, &ai32);
+               I32 ai32;
+#if U32SIZE > SIZE32
+               ai32 = 0;
+#endif
+               SHIFT32(utf8, s, strend, &ai32, datumtype);
                DO_BO_UNPACK(ai32, 32);
 #if U32SIZE > SIZE32
-               if (ai32 > 2147483647)
-                   ai32 -= 4294967296;
+               if (ai32 > 2147483647) ai32 -= 4294967296;
 #endif
-               s += SIZE32;
-               if (!checksum) {
-                   sv = NEWSV(42, 0);
-                   sv_setiv(sv, (IV)ai32);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)ai32)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ai32;
                else
@@ -1395,24 +1869,12 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            break;
        case 'L' | TYPE_IS_SHRIEKING:
 #if LONGSIZE != SIZE32
-           along = (strend - s) / sizeof(unsigned long);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPYNN(s, &aulong, sizeof(unsigned long));
+               unsigned long aulong;
+               SHIFT_VAR(utf8, s, strend, aulong, datumtype);
                DO_BO_UNPACK(aulong, l);
-               s += sizeof(unsigned long);
-               if (!checksum) {
-                   sv = NEWSV(43, 0);
-                   sv_setuv(sv, (UV)aulong);
-                   PUSHs(sv_2mortal(sv));
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV)aulong)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aulong;
                else
@@ -1425,19 +1887,13 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
        case 'V':
        case 'N':
        case 'L':
-           along = (strend - s) / SIZE32;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPY32(s, &au32);
+               U32 au32;
+#if U32SIZE > SIZE32
+               au32 = 0;
+#endif
+               SHIFT32(utf8, s, strend, &au32, datumtype);
                DO_BO_UNPACK(au32, 32);
-               s += SIZE32;
 #ifdef HAS_NTOHL
                if (datumtype == 'N')
                    au32 = PerlSock_ntohl(au32);
@@ -1446,98 +1902,75 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                if (datumtype == 'V')
                    au32 = vtohl(au32);
 #endif
-                if (!checksum) {
-                    sv = NEWSV(43, 0);
-                    sv_setuv(sv, (UV)au32);
-                    PUSHs(sv_2mortal(sv));
-                }
-                else if (checksum > bits_in_uv)
-                    cdouble += (NV)au32;
-                else
-                    cuv += au32;
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVuv((UV)au32)));
+               else if (checksum > bits_in_uv)
+                   cdouble += (NV)au32;
+               else
+                   cuv += au32;
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'V' | TYPE_IS_SHRIEKING:
        case 'N' | TYPE_IS_SHRIEKING:
-           along = (strend - s) / SIZE32;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               COPY32(s, &ai32);
-               s += SIZE32;
-#ifdef HAS_NTOHL
+               I32 ai32;
+# if U32SIZE > SIZE32
+               ai32 = 0;
+# endif
+               SHIFT32(utf8, s, strend, &ai32, datumtype);
+# ifdef HAS_NTOHL
                if (datumtype == ('N' | TYPE_IS_SHRIEKING))
                    ai32 = (I32)PerlSock_ntohl((U32)ai32);
-#endif
-#ifdef HAS_VTOHL
+# endif
+# ifdef HAS_VTOHL
                if (datumtype == ('V' | TYPE_IS_SHRIEKING))
                    ai32 = (I32)vtohl((U32)ai32);
-#endif
-               if (!checksum) {
-                   sv = NEWSV(43, 0);
-                   sv_setiv(sv, (IV)ai32);
-                   PUSHs(sv_2mortal(sv));
-               }
+# endif
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSViv((IV)ai32)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)ai32;
                else
                    cuv += ai32;
            }
            break;
+#endif /* PERL_PACK_CAN_SHRIEKSIGN */
        case 'p':
-           along = (strend - s) / sizeof(char*);
-           if (len > along)
-               len = along;
-           EXTEND(SP, len);
-           EXTEND_MORTAL(len);
            while (len-- > 0) {
-               if (sizeof(char*) > strend - s)
-                   break;
-               else {
-                   Copy(s, &aptr, 1, char*);
-                   DO_BO_UNPACK_P(aptr);
-                   s += sizeof(char*);
-               }
-               sv = NEWSV(44, 0);
-               if (aptr)
-                   sv_setpv(sv, aptr);
-               PUSHs(sv_2mortal(sv));
+               char *aptr;
+               SHIFT_VAR(utf8, s, strend, aptr, datumtype);
+               DO_BO_UNPACK_PC(aptr);
+               /* newSVpv generates undef if aptr is NULL */
+               PUSHs(sv_2mortal(newSVpv(aptr, 0)));
            }
            break;
        case 'w':
-            if (len && unpack_only_one)
-                len = 1;
-           EXTEND(SP, len);
-           EXTEND_MORTAL(len);
            {
                UV auv = 0;
                U32 bytes = 0;
-               
-               while ((len > 0) && (s < strend)) {
-                   auv = (auv << 7) | (*s & 0x7f);
+
+               while (len > 0 && s < strend) {
+                   U8 ch;
+                   ch = SHIFT_BYTE(utf8, s, strend, datumtype);
+                   auv = (auv << 7) | (ch & 0x7f);
                    /* UTF8_IS_XXXXX not right here - using constant 0x80 */
-                   if ((U8)(*s++) < 0x80) {
+                   if (ch < 0x80) {
                        bytes = 0;
-                       sv = NEWSV(40, 0);
-                       sv_setuv(sv, auv);
-                       PUSHs(sv_2mortal(sv));
+                       PUSHs(sv_2mortal(newSVuv(auv)));
                        len--;
                        auv = 0;
+                       continue;
                    }
-                   else if (++bytes >= sizeof(UV)) {   /* promote to string */
+                   if (++bytes >= sizeof(UV)) {        /* promote to string */
                        char *t;
                        STRLEN n_a;
 
                        sv = Perl_newSVpvf(aTHX_ "%.*"UVf, (int)TYPE_DIGITS(UV), auv);
                        while (s < strend) {
-                           sv = mul128(sv, (U8)(*s & 0x7f));
-                           if (!(*s++ & 0x80)) {
+                           ch = SHIFT_BYTE(utf8, s, strend, datumtype);
+                           sv = mul128(sv, (U8)(ch & 0x7f));
+                           if (!(ch & 0x80)) {
                                bytes = 0;
                                break;
                            }
@@ -1559,47 +1992,23 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            if (symptr->howlen == e_star)
                Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
            EXTEND(SP, 1);
-           if (sizeof(char*) > strend - s)
-               break;
-           else {
-               Copy(s, &aptr, 1, char*);
-               DO_BO_UNPACK_P(aptr);
-               s += sizeof(char*);
+           if (sizeof(char*) <= strend - s) {
+               char *aptr;
+               SHIFT_VAR(utf8, s, strend, aptr, datumtype);
+               DO_BO_UNPACK_PC(aptr);
+               /* newSVpvn generates undef if aptr is NULL */
+               PUSHs(sv_2mortal(newSVpvn(aptr, len)));
            }
-           sv = NEWSV(44, 0);
-           if (aptr)
-               sv_setpvn(sv, aptr, len);
-           PUSHs(sv_2mortal(sv));
            break;
 #ifdef HAS_QUAD
        case 'q':
-           along = (strend - s) / sizeof(Quad_t);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-                EXTEND(SP, len);
-                EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               if (s + sizeof(Quad_t) > strend) {
-                   /* Surely this should never happen? NWC  */
-                   aquad = 0;
-               }
-               else {
-                   Copy(s, &aquad, 1, Quad_t);
-                   DO_BO_UNPACK(aquad, 64);
-                   s += sizeof(Quad_t);
-               }
-               if (!checksum) {
-                    sv = NEWSV(42, 0);
-                    if (aquad >= IV_MIN && aquad <= IV_MAX)
-                       sv_setiv(sv, (IV)aquad);
-                    else
-                        sv_setnv(sv, (NV)aquad);
-                    PUSHs(sv_2mortal(sv));
-                }
+               Quad_t aquad;
+               SHIFT_VAR(utf8, s, strend, aquad, datumtype);
+               DO_BO_UNPACK(aquad, 64);
+               if (!checksum)
+                    PUSHs(sv_2mortal(aquad >= IV_MIN && aquad <= IV_MAX ?
+                                    newSViv((IV)aquad) : newSVnv((NV)aquad)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aquad;
                else
@@ -1607,133 +2016,64 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
            }
            break;
        case 'Q':
-           along = (strend - s) / sizeof(Uquad_t);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-                EXTEND(SP, len);
-                EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               if (s + sizeof(Uquad_t) > strend)
-                   auquad = 0;
-               else {
-                   Copy(s, &auquad, 1, Uquad_t);
-                   DO_BO_UNPACK(auquad, 64);
-                   s += sizeof(Uquad_t);
-               }
-               if (!checksum) {
-                   sv = NEWSV(43, 0);
-                   if (auquad <= UV_MAX)
-                       sv_setuv(sv, (UV)auquad);
-                   else
-                       sv_setnv(sv, (NV)auquad);
-                   PUSHs(sv_2mortal(sv));
-               }
+               Uquad_t auquad;
+               SHIFT_VAR(utf8, s, strend, auquad, datumtype);
+               DO_BO_UNPACK(auquad, 64);
+               if (!checksum)
+                   PUSHs(sv_2mortal(auquad <= UV_MAX ?
+                                    newSVuv((UV)auquad):newSVnv((NV)auquad)));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)auquad;
                else
                    cuv += auquad;
            }
            break;
-#endif
+#endif /* HAS_QUAD */
        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
        case 'f':
-           along = (strend - s) / sizeof(float);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &afloat, 1, float);
+               float afloat;
+               SHIFT_VAR(utf8, s, strend, afloat, datumtype);
                DO_BO_UNPACK_N(afloat, float);
-               s += sizeof(float);
-               if (!checksum) {
-                   sv = NEWSV(47, 0);
-                   sv_setnv(sv, (NV)afloat);
-                   PUSHs(sv_2mortal(sv));
-               }
-               else {
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv((NV)afloat)));
+               else
                    cdouble += afloat;
-               }
            }
            break;
        case 'd':
-           along = (strend - s) / sizeof(double);
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &adouble, 1, double);
+               double adouble;
+               SHIFT_VAR(utf8, s, strend, adouble, datumtype);
                DO_BO_UNPACK_N(adouble, double);
-               s += sizeof(double);
-               if (!checksum) {
-                   sv = NEWSV(48, 0);
-                   sv_setnv(sv, (NV)adouble);
-                   PUSHs(sv_2mortal(sv));
-               }
-               else {
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv((NV)adouble)));
+               else
                    cdouble += adouble;
-               }
            }
            break;
        case 'F':
-           along = (strend - s) / NVSIZE;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &anv, 1, NV);
+               NV anv;
+               SHIFT_VAR(utf8, s, strend, anv, datumtype);
                DO_BO_UNPACK_N(anv, NV);
-               s += NVSIZE;
-               if (!checksum) {
-                   sv = NEWSV(48, 0);
-                   sv_setnv(sv, anv);
-                   PUSHs(sv_2mortal(sv));
-               }
-               else {
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv(anv)));
+               else
                    cdouble += anv;
-               }
            }
            break;
 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
        case 'D':
-           along = (strend - s) / LONG_DOUBLESIZE;
-           if (len > along)
-               len = along;
-           if (!checksum) {
-                if (len && unpack_only_one)
-                    len = 1;
-               EXTEND(SP, len);
-               EXTEND_MORTAL(len);
-           }
            while (len-- > 0) {
-               Copy(s, &aldouble, 1, long double);
+               long double aldouble;
+               SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
                DO_BO_UNPACK_N(aldouble, long double);
-               s += LONG_DOUBLESIZE;
-               if (!checksum) {
-                   sv = NEWSV(48, 0);
-                   sv_setnv(sv, (NV)aldouble);
-                   PUSHs(sv_2mortal(sv));
-               }
-               else {cdouble += aldouble;
-               }
+               if (!checksum)
+                   PUSHs(sv_2mortal(newSVnv((NV)aldouble)));
+               else
+                   cdouble += aldouble;
            }
            break;
 #endif
@@ -1754,78 +2094,105 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                  */
                 PL_uudmap[' '] = 0;
             }
-
-           along = (strend - s) * 3 / 4;
-           sv = NEWSV(42, along);
-           if (along)
-               SvPOK_on(sv);
-           while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
-               I32 a, b, c, d;
-               char hunk[4];
-
-               hunk[3] = '\0';
-               len = PL_uudmap[*(U8*)s++] & 077;
-               while (len > 0) {
-                   if (s < strend && ISUUCHAR(*s))
-                       a = PL_uudmap[*(U8*)s++] & 077;
-                   else
-                       a = 0;
-                   if (s < strend && ISUUCHAR(*s))
-                       b = PL_uudmap[*(U8*)s++] & 077;
-                   else
-                       b = 0;
-                   if (s < strend && ISUUCHAR(*s))
-                       c = PL_uudmap[*(U8*)s++] & 077;
-                   else
-                       c = 0;
-                   if (s < strend && ISUUCHAR(*s))
-                       d = PL_uudmap[*(U8*)s++] & 077;
-                   else
-                       d = 0;
-                   hunk[0] = (char)((a << 2) | (b >> 4));
-                   hunk[1] = (char)((b << 4) | (c >> 2));
-                   hunk[2] = (char)((c << 6) | d);
-                   sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
-                   len -= 3;
+           {
+               STRLEN l = (STRLEN) (strend - s) * 3 / 4;
+               sv = sv_2mortal(NEWSV(42, l));
+               if (l) SvPOK_on(sv);
+           }
+           if (utf8) {
+               while (next_uni_uu(aTHX_ &s, strend, &len)) {
+                   I32 a, b, c, d;
+                   char hunk[4];
+
+                   hunk[3] = '\0';
+                   while (len > 0) {
+                       next_uni_uu(aTHX_ &s, strend, &a);
+                       next_uni_uu(aTHX_ &s, strend, &b);
+                       next_uni_uu(aTHX_ &s, strend, &c);
+                       next_uni_uu(aTHX_ &s, strend, &d);
+                       hunk[0] = (char)((a << 2) | (b >> 4));
+                       hunk[1] = (char)((b << 4) | (c >> 2));
+                       hunk[2] = (char)((c << 6) | d);
+                       sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
+                       len -= 3;
+                   }
+                   if (s < strend) {
+                       if (*s == '\n') s++;
+                       else {
+                           /* possible checksum byte */
+                           char *skip = s+UTF8SKIP(s);
+                           if (skip < strend && *skip == '\n') s = skip+1;
+                       }
+                   }
+               }
+           } else {
+               while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
+                   I32 a, b, c, d;
+                   char hunk[4];
+
+                   hunk[3] = '\0';
+                   len = PL_uudmap[*(U8*)s++] & 077;
+                   while (len > 0) {
+                       if (s < strend && ISUUCHAR(*s))
+                           a = PL_uudmap[*(U8*)s++] & 077;
+                       else
+                           a = 0;
+                       if (s < strend && ISUUCHAR(*s))
+                           b = PL_uudmap[*(U8*)s++] & 077;
+                       else
+                           b = 0;
+                       if (s < strend && ISUUCHAR(*s))
+                           c = PL_uudmap[*(U8*)s++] & 077;
+                       else
+                           c = 0;
+                       if (s < strend && ISUUCHAR(*s))
+                           d = PL_uudmap[*(U8*)s++] & 077;
+                       else
+                           d = 0;
+                       hunk[0] = (char)((a << 2) | (b >> 4));
+                       hunk[1] = (char)((b << 4) | (c >> 2));
+                       hunk[2] = (char)((c << 6) | d);
+                       sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
+                       len -= 3;
+                   }
+                   if (*s == '\n')
+                       s++;
+                   else        /* possible checksum byte */
+                       if (s + 1 < strend && s[1] == '\n')
+                           s += 2;
                }
-               if (*s == '\n')
-                   s++;
-               else    /* possible checksum byte */
-                   if (s + 1 < strend && s[1] == '\n')
-                       s += 2;
            }
-           XPUSHs(sv_2mortal(sv));
+           XPUSHs(sv);
            break;
        }
 
        if (checksum) {
-           sv = NEWSV(42, 0);
            if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
              (checksum > bits_in_uv &&
-              strchr("csSiIlLnNUvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
-               NV trouble;
+              strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
+               NV trouble, anv;
 
-                adouble = (NV) (1 << (checksum & 15));
+                anv = (NV) (1 << (checksum & 15));
                while (checksum >= 16) {
                    checksum -= 16;
-                   adouble *= 65536.0;
+                   anv *= 65536.0;
                }
                while (cdouble < 0.0)
-                   cdouble += adouble;
-               cdouble = Perl_modf(cdouble / adouble, &trouble) * adouble;
-               sv_setnv(sv, cdouble);
+                   cdouble += anv;
+               cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
+               sv = newSVnv(cdouble);
            }
            else {
                if (checksum < bits_in_uv) {
                    UV mask = ((UV)1 << checksum) - 1;
                    cuv &= mask;
                }
-               sv_setuv(sv, cuv);
+               sv = newSVuv(cuv);
            }
            XPUSHs(sv_2mortal(sv));
            checksum = 0;
        }
-    
+
         if (symptr->flags & FLAG_SLASH){
             if (SP - PL_stack_base - start_sp_offset <= 0)
                 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
@@ -1845,6 +2212,7 @@ S_unpack_rec(pTHX_ register tempsym_t* symptr, register char *s, char *strbeg, c
                Perl_croak(aTHX_ "Code missing after '/' in unpack" );
             }
             datumtype = symptr->code;
+            explicit_length = FALSE;
            goto redo_switch;
         }
     }
@@ -1862,23 +2230,16 @@ PP(pp_unpack)
     I32 gimme = GIMME_V;
     STRLEN llen;
     STRLEN rlen;
-    register char *pat = SvPV(left, llen);
-#ifdef PACKED_IS_OCTETS
-    /* Packed side is assumed to be octets - so force downgrade if it
-       has been UTF-8 encoded by accident
-     */
-    register char *s = SvPVbyte(right, rlen);
-#else
-    register char *s = SvPV(right, rlen);
-#endif
+    char *pat = SvPV(left,  llen);
+    char *s   = SvPV(right, rlen);
     char *strend = s + rlen;
-    register char *patend = pat + llen;
-    register I32 cnt;
+    char *patend = pat + llen;
+    I32 cnt;
 
     PUTBACK;
     cnt = unpackstring(pat, patend, s, strend,
                     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
-                    | (DO_UTF8(right) ? FLAG_UNPACK_DO_UTF8 : 0));
+                    | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
 
     SPAGAIN;
     if ( !cnt && gimme == G_SCALAR )
@@ -1886,32 +2247,27 @@ PP(pp_unpack)
     RETURN;
 }
 
-STATIC void
-S_doencodes(pTHX_ register SV *sv, register char *s, register I32 len)
+STATIC U8 *
+doencodes(U8 *h, char *s, I32 len)
 {
-    char hunk[5];
-
-    *hunk = PL_uuemap[len];
-    sv_catpvn(sv, hunk, 1);
-    hunk[4] = '\0';
+    *h++ = PL_uuemap[len];
     while (len > 2) {
-       hunk[0] = PL_uuemap[(077 & (*s >> 2))];
-       hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017)))];
-       hunk[2] = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
-       hunk[3] = PL_uuemap[(077 & (s[2] & 077))];
-       sv_catpvn(sv, hunk, 4);
+       *h++ = PL_uuemap[(077 & (s[0] >> 2))];
+       *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
+       *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
+       *h++ = PL_uuemap[(077 & (s[2] & 077))];
        s += 3;
        len -= 3;
     }
     if (len > 0) {
        char r = (len > 1 ? s[1] : '\0');
-       hunk[0] = PL_uuemap[(077 & (*s >> 2))];
-       hunk[1] = PL_uuemap[(077 & (((*s << 4) & 060) | ((r >> 4) & 017)))];
-       hunk[2] = PL_uuemap[(077 & ((r << 2) & 074))];
-       hunk[3] = PL_uuemap[0];
-       sv_catpvn(sv, hunk, 4);
+       *h++ = PL_uuemap[(077 & (s[0] >> 2))];
+       *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
+       *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
+       *h++ = PL_uuemap[0];
     }
-    sv_catpvn(sv, "\n", 1);
+    *h++ = '\n';
+    return h;
 }
 
 STATIC SV *
@@ -2024,494 +2380,847 @@ The engine implementing pack() Perl function.
 void
 Perl_packlist(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist )
 {
+    STRLEN no_len;
     tempsym_t sym = { 0 };
+
     sym.patptr = pat;
     sym.patend = patend;
     sym.flags  = FLAG_PACK;
 
+    /* We're going to do changes through SvPVX(cat). Make sure it's valid.
+       Also make sure any UTF8 flag is loaded */
+    SvPV_force(cat, no_len);
+    if (DO_UTF8(cat)) sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
+
     (void)pack_rec( cat, &sym, beglist, endlist );
 }
 
+/* like sv_utf8_upgrade, but also repoint the group start markers */
+STATIC void
+marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
+    STRLEN len;
+    tempsym_t *group;
+    char *from_ptr, *to_start, *to_ptr, **marks, **m, *from_start, *from_end;
+
+    if (SvUTF8(sv)) return;
+
+    from_start = SvPVX(sv);
+    from_end = from_start + SvCUR(sv);
+    for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
+       if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
+    if (from_ptr == from_end) {
+       /* Simple case: no character needs to be changed */
+       SvUTF8_on(sv);
+       return;
+    }
+
+    len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
+    New('U', to_start, len, char);
+    Copy(from_start, to_start, from_ptr-from_start, char);
+    to_ptr = to_start + (from_ptr-from_start);
+
+    New('U', marks, sym_ptr->level+2, char *);
+    for (group=sym_ptr; group; group = group->previous)
+       marks[group->level] = from_start + group->strbeg;
+    marks[sym_ptr->level+1] = from_end+1;
+    for (m = marks; *m < from_ptr; m++)
+       *m = to_start + (*m-from_start);
+
+    for (;from_ptr < from_end; from_ptr++) {
+       while (*m == from_ptr) *m++ = to_ptr;
+       to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
+    }
+    *to_ptr = 0;
+
+    while (*m == from_ptr) *m++ = to_ptr;
+    if (m != marks + sym_ptr->level+1) {
+       Safefree(marks);
+       Safefree(to_start);
+       Perl_croak(aTHX_ "Assertion: marks beyond string end");
+    }
+    for (group=sym_ptr; group; group = group->previous)
+       group->strbeg = marks[group->level] - to_start;
+    Safefree(marks);
+
+    if (SvOOK(sv)) {
+       if (SvIVX(sv)) {
+           SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
+           from_start -= SvIVX(sv);
+           SvIV_set(sv, 0);
+       }
+       SvFLAGS(sv) &= ~SVf_OOK;
+    }
+    if (SvLEN(sv) != 0)
+       Safefree(from_start);
+    SvPV_set(sv, to_start);
+    SvCUR_set(sv, to_ptr - to_start);
+    SvLEN_set(sv, len);
+    SvUTF8_on(sv);
+}
+
+/* Exponential string grower. Makes string extension effectively O(n)
+   needed says how many extra bytes we need (not counting the final '\0')
+   Only grows the string if there is an actual lack of space
+*/
+STATIC char *
+sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
+    STRLEN cur = SvCUR(sv);
+    STRLEN len = SvLEN(sv);
+    STRLEN extend;
+    if (len - cur > needed) return SvPVX(sv);
+    extend = needed > len ? needed : len;
+    return SvGROW(sv, len+extend+1);
+}
 
 STATIC
 SV **
-S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV **endlist )
+S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 {
-    register I32 items;
-    STRLEN fromlen;
-    register I32 len = 0;
-    SV *fromstr;
-    /*SUPPRESS 442*/
-    static char null10[] = {0,0,0,0,0,0,0,0,0,0};
-    static char *space10 = "          ";
-    bool found;
-
-    /* These must not be in registers: */
-    char achar;
-    I16 ai16;
-    U16 au16;
-    I32 ai32;
-    U32 au32;
-#ifdef HAS_QUAD
-    Quad_t aquad;
-    Uquad_t auquad;
-#endif
-#if SHORTSIZE != SIZE16
-    short ashort;
-    unsigned short aushort;
-#endif
-    int aint;
-    unsigned int auint;
-#if LONGSIZE != SIZE32
-    long along;
-    unsigned long aulong;
-#endif
-    char *aptr;
-    float afloat;
-    double adouble;
-#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
-    long double aldouble;
-#endif
-    IV aiv;
-    UV auv;
-    NV anv;
-
-    int strrelbeg = SvCUR(cat);
     tempsym_t lookahead;
-
-    items = endlist - beglist;
-    found = next_symbol( symptr );
-
-#ifndef PACKED_IS_OCTETS
-    if (symptr->level == 0 && found && symptr->code == 'U' ){
-       SvUTF8_on(cat);
+    I32 items  = endlist - beglist;
+    bool found = next_symbol(symptr);
+    bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
+
+    if (symptr->level == 0 && found && symptr->code == 'U') {
+       marked_upgrade(aTHX_ cat, symptr);
+       symptr->flags |= FLAG_DO_UTF8;
+       utf8 = 0;
     }
-#endif
+    symptr->strbeg = SvCUR(cat);
 
     while (found) {
+       SV *fromstr;
+       STRLEN fromlen;
+       I32 len;
        SV *lengthcode = Nullsv;
-#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
-
         I32 datumtype = symptr->code;
-        howlen_t howlen;
+        howlen_t howlen = symptr->howlen;
+       char *start = SvPVX(cat);
+       char *cur   = start + SvCUR(cat);
 
-        switch( howlen = symptr->howlen ){
-        case e_no_len:
-       case e_number:
-           len = symptr->length;
+#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
+
+        switch (howlen) {
+         case e_star:
+           len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
+               0 : items;
            break;
-        case e_star:
-           len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 0 : items; 
+         default:
+           /* e_no_len and e_number */
+           len = symptr->length;
            break;
         }
 
+       if (len) {
+           packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
+
+           if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
+               /* We can process this letter. */
+               STRLEN size = props & PACK_SIZE_MASK;
+               GROWING(utf8, cat, start, cur, (STRLEN) len * size);
+           }
+        }
+
         /* Look ahead for next symbol. Do we have code/code? */
         lookahead = *symptr;
         found = next_symbol(&lookahead);
-       if ( symptr->flags & FLAG_SLASH ) {
-           if (found){
-               if ( 0 == strchr( "aAZ", lookahead.code ) ||
-                     e_star != lookahead.howlen )
-                   Perl_croak(aTHX_ "'/' must be followed by 'a*', 'A*' or 'Z*' in pack");
-               lengthcode = sv_2mortal(newSViv(sv_len(items > 0
-                                                  ? *beglist : &PL_sv_no)
-                                           + (lookahead.code == 'Z' ? 1 : 0)));
+       if (symptr->flags & FLAG_SLASH) {
+           IV count;
+           if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
+           if (strchr("aAZ", lookahead.code)) {
+               if (lookahead.howlen == e_number) count = lookahead.length;
+               else {
+                   if (items > 0)
+                       count = DO_UTF8(*beglist) ?
+                           sv_len_utf8(*beglist) : sv_len(*beglist);
+                   else count = 0;
+                   if (lookahead.code == 'Z') count++;
+               }
            } else {
-               Perl_croak(aTHX_ "Code missing after '/' in pack");
-            }
+               if (lookahead.howlen == e_number && lookahead.length < items)
+                   count = lookahead.length;
+               else count = items;
+           }
+           lookahead.howlen = e_number;
+           lookahead.length = count;
+           lengthcode = sv_2mortal(newSViv(count));
        }
 
+       /* Code inside the switch must take care to properly update
+          cat (CUR length and '\0' termination) if it updated *cur and
+          doesn't simply leave using break */
        switch(TYPE_NO_ENDIANNESS(datumtype)) {
        default:
-           Perl_croak(aTHX_ "Invalid type '%c' in pack", (int)TYPE_NO_MODIFIERS(datumtype));
+           Perl_croak(aTHX_ "Invalid type '%c' in pack",
+                      (int) TYPE_NO_MODIFIERS(datumtype));
        case '%':
            Perl_croak(aTHX_ "'%%' may not be used in pack");
+       {
+           char *from;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '.' | TYPE_IS_SHRIEKING:
+#endif
+       case '.':
+           if (howlen == e_star) from = start;
+           else if (len == 0) from = cur;
+           else {
+               tempsym_t *group = symptr;
+
+               while (--len && group) group = group->previous;
+               from = group ? start + group->strbeg : start;
+           }
+           fromstr = NEXTFROM;
+           len = SvIV(fromstr);
+           goto resize;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+       case '@' | TYPE_IS_SHRIEKING:
+#endif
        case '@':
-           len += strrelbeg - SvCUR(cat);
-           if (len > 0)
-               goto grow;
-           len = -len;
-           if (len > 0)
+           from = start + symptr->strbeg;
+         resize:
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
+           if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
+#else /* PERL_PACK_CAN_SHRIEKSIGN */
+           if (utf8)
+#endif
+               if (len >= 0) {
+                   while (len && from < cur) {
+                       from += UTF8SKIP(from);
+                       len--;
+                   }
+                   if (from > cur)
+                       Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+                   if (len) {
+                       /* Here we know from == cur */
+                     grow:
+                       GROWING(0, cat, start, cur, len);
+                       Zero(cur, len, char);
+                       cur += len;
+                   } else if (from < cur) {
+                       len = cur - from;
+                       goto shrink;
+                   } else goto no_change;
+               } else {
+                   cur = from;
+                   len = -len;
+                   goto utf8_shrink;
+               }
+           else {
+               len -= cur - from;
+               if (len > 0) goto grow;
+               if (len == 0) goto no_change;
+               len = -len;
                goto shrink;
+           }
            break;
-       case '(':
-       {
+       }
+       case '(': {
             tempsym_t savsym = *symptr;
            U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
            symptr->flags |= group_modifiers;
             symptr->patend = savsym.grpend;
             symptr->level++;
+           symptr->previous = &lookahead;
            while (len--) {
+               U32 was_utf8;
+               if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
+               else      symptr->flags &= ~FLAG_PARSE_UTF8;
+               was_utf8 = SvUTF8(cat);
                symptr->patptr = savsym.grpbeg;
-               beglist = pack_rec(cat, symptr, beglist, endlist );
+               beglist = pack_rec(cat, symptr, beglist, endlist);
+               if (SvUTF8(cat) != was_utf8)
+                   /* This had better be an upgrade while in utf8==0 mode */
+                   utf8 = 1;
+
                if (savsym.howlen == e_star && beglist == endlist)
                    break;              /* No way to continue */
            }
-           symptr->flags &= ~group_modifiers;
-            lookahead.flags = symptr->flags;
-            *symptr = savsym;
-           break;
+           lookahead.flags  = symptr->flags & ~group_modifiers;
+           goto no_change;
        }
        case 'X' | TYPE_IS_SHRIEKING:
            if (!len)                   /* Avoid division by 0 */
                len = 1;
-           len = (SvCUR(cat)) % len;
+           if (utf8) {
+               char *hop, *last;
+               I32 l = len;
+               hop = last = start;
+               while (hop < cur) {
+                   hop += UTF8SKIP(hop);
+                   if (--l == 0) {
+                       last = hop;
+                       l = len;
+                   }
+               }
+               if (last > cur)
+                   Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+               cur = last;
+               break;
+           }
+           len = (cur-start) % len;
            /* FALL THROUGH */
        case 'X':
-         shrink:
-           if ((I32)SvCUR(cat) < len)
-               Perl_croak(aTHX_ "'X' outside of string in pack");
-           SvCUR(cat) -= len;
-           *SvEND(cat) = '\0';
-           break;
-       case 'x' | TYPE_IS_SHRIEKING:
+           if (utf8) {
+               if (len < 1) goto no_change;
+             utf8_shrink:
+               while (len > 0) {
+                   if (cur <= start)
+                       Perl_croak(aTHX_ "'%c' outside of string in pack",
+                                  (int) TYPE_NO_MODIFIERS(datumtype));
+                   while (--cur, UTF8_IS_CONTINUATION(*cur)) {
+                       if (cur <= start)
+                           Perl_croak(aTHX_ "'%c' outside of string in pack",
+                                      (int) TYPE_NO_MODIFIERS(datumtype));
+                   }
+                   len--;
+               }
+           } else {
+             shrink:
+               if (cur - start < len)
+                   Perl_croak(aTHX_ "'%c' outside of string in pack",
+                              (int) TYPE_NO_MODIFIERS(datumtype));
+               cur -= len;
+           }
+           if (cur < start+symptr->strbeg) {
+               /* Make sure group starts don't point into the void */
+               tempsym_t *group;
+               STRLEN length = cur-start;
+               for (group = symptr;
+                    group && length < group->strbeg;
+                    group = group->previous) group->strbeg = length;
+               lookahead.strbeg = length;
+           }
+           break;
+       case 'x' | TYPE_IS_SHRIEKING: {
+           I32 ai32;
            if (!len)                   /* Avoid division by 0 */
                len = 1;
-           aint = (SvCUR(cat)) % len;
-           if (aint)                   /* Other portable ways? */
-               len = len - aint;
-           else
-               len = 0;
-           /* FALL THROUGH */
-
+           if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
+           else      ai32 = (cur - start) % len;
+           if (ai32 == 0) goto no_change;
+           len -= ai32;
+       }
+       /* FALL THROUGH */
        case 'x':
-         grow:
-           while (len >= 10) {
-               sv_catpvn(cat, null10, 10);
-               len -= 10;
-           }
-           sv_catpvn(cat, null10, len);
-           break;
+           goto grow;
        case 'A':
        case 'Z':
-       case 'a':
+       case 'a': {
+           char *aptr;
+
            fromstr = NEXTFROM;
            aptr = SvPV(fromstr, fromlen);
-           if (howlen == e_star) {   
-               len = fromlen;
-               if (datumtype == 'Z')
-                   ++len;
-           }
-           if ((I32)fromlen >= len) {
-               sv_catpvn(cat, aptr, len);
-               if (datumtype == 'Z')
-                   *(SvEND(cat)-1) = '\0';
-           }
-           else {
-               sv_catpvn(cat, aptr, fromlen);
+           if (DO_UTF8(fromstr)) {
+               char *end, *s;
+
+               if (!utf8 && !SvUTF8(cat)) {
+                   marked_upgrade(aTHX_ cat, symptr);
+                   lookahead.flags |= FLAG_DO_UTF8;
+                   lookahead.strbeg = symptr->strbeg;
+                   utf8 = 1;
+                   start = SvPVX(cat);
+                   cur = start + SvCUR(cat);
+               }
+               if (howlen == e_star) {
+                   if (utf8) goto string_copy;
+                   len = fromlen+1;
+               }
+               s = aptr;
+               end = aptr + fromlen;
+               fromlen = datumtype == 'Z' ? len-1 : len;
+               while ((I32) fromlen > 0 && s < end) {
+                   s += UTF8SKIP(s);
+                   fromlen--;
+               }
+               if (s > end)
+                   Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
+               if (utf8) {
+                   len = fromlen;
+                   if (datumtype == 'Z') len++;
+                   fromlen = s-aptr;
+                   len += fromlen;
+
+                   goto string_copy;
+               }
+               fromlen = len - fromlen;
+               if (datumtype == 'Z') fromlen--;
+               if (howlen == e_star) {
+                   len = fromlen;
+                   if (datumtype == 'Z') len++;
+               }
+               GROWING(0, cat, start, cur, len);
+               if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
+                                 datumtype | TYPE_IS_PACK))
+                   Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
+               cur += fromlen;
                len -= fromlen;
-               if (datumtype == 'A') {
-                   while (len >= 10) {
-                       sv_catpvn(cat, space10, 10);
-                       len -= 10;
-                   }
-                   sv_catpvn(cat, space10, len);
+           } else if (utf8) {
+               if (howlen == e_star) {
+                   len = fromlen;
+                   if (datumtype == 'Z') len++;
                }
-               else {
-                   while (len >= 10) {
-                       sv_catpvn(cat, null10, 10);
-                       len -= 10;
-                   }
-                   sv_catpvn(cat, null10, len);
+               if (len <= (I32) fromlen) {
+                   fromlen = len;
+                   if (datumtype == 'Z' && fromlen > 0) fromlen--;
+               }
+               /* assumes a byte expands to at most UTF8_EXPAND bytes on
+                  upgrade, so:
+                  expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
+               GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
+               len -= fromlen;
+               while (fromlen > 0) {
+                   cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
+                   aptr++;
+                   fromlen--;
+               }
+           } else {
+             string_copy:
+               if (howlen == e_star) {
+                   len = fromlen;
+                   if (datumtype == 'Z') len++;
                }
+               if (len <= (I32) fromlen) {
+                   fromlen = len;
+                   if (datumtype == 'Z' && fromlen > 0) fromlen--;
+               }
+               GROWING(0, cat, start, cur, len);
+               Copy(aptr, cur, fromlen, char);
+               cur += fromlen;
+               len -= fromlen;
            }
+           memset(cur, datumtype == 'A' ? ' ' : '\0', len);
+           cur += len;
            break;
+       }
        case 'B':
-       case 'b':
-           {
-               register char *str;
-               I32 saveitems;
+       case 'b': {
+           char *str, *end;
+           I32 l, field_len;
+           U8 bits;
+           bool utf8_source;
+           U32 utf8_flags;
 
-               fromstr = NEXTFROM;
-               saveitems = items;
-               str = SvPV(fromstr, fromlen);
-               if (howlen == e_star)
-                   len = fromlen;
-               aint = SvCUR(cat);
-               SvCUR(cat) += (len+7)/8;
-               SvGROW(cat, SvCUR(cat) + 1);
-               aptr = SvPVX(cat) + aint;
-               if (len > (I32)fromlen)
-                   len = fromlen;
-               aint = len;
-               items = 0;
-               if (datumtype == 'B') {
-                   for (len = 0; len++ < aint;) {
-                       items |= *str++ & 1;
-                       if (len & 7)
-                           items <<= 1;
-                       else {
-                           *aptr++ = items & 0xff;
-                           items = 0;
-                       }
+           fromstr = NEXTFROM;
+           str = SvPV(fromstr, fromlen);
+           end = str + fromlen;
+           if (DO_UTF8(fromstr)) {
+               utf8_source = TRUE;
+               utf8_flags  = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+           } else {
+               utf8_source = FALSE;
+               utf8_flags  = 0; /* Unused, but keep compilers happy */
+           }
+           if (howlen == e_star) len = fromlen;
+           field_len = (len+7)/8;
+           GROWING(utf8, cat, start, cur, field_len);
+           if (len > (I32)fromlen) len = fromlen;
+           bits = 0;
+           l = 0;
+           if (datumtype == 'B')
+               while (l++ < len) {
+                   if (utf8_source) {
+                       UV val;
+                       NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+                       bits |= val & 1;
+                   } else bits |= *str++ & 1;
+                   if (l & 7) bits <<= 1;
+                   else {
+                       PUSH_BYTE(utf8, cur, bits);
+                       bits = 0;
                    }
                }
-               else {
-                   for (len = 0; len++ < aint;) {
-                       if (*str++ & 1)
-                           items |= 128;
-                       if (len & 7)
-                           items >>= 1;
-                       else {
-                           *aptr++ = items & 0xff;
-                           items = 0;
-                       }
+           else
+               /* datumtype == 'b' */
+               while (l++ < len) {
+                   if (utf8_source) {
+                       UV val;
+                       NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+                       if (val & 1) bits |= 0x80;
+                   } else if (*str++ & 1)
+                       bits |= 0x80;
+                   if (l & 7) bits >>= 1;
+                   else {
+                       PUSH_BYTE(utf8, cur, bits);
+                       bits = 0;
                    }
                }
-               if (aint & 7) {
-                   if (datumtype == 'B')
-                       items <<= 7 - (aint & 7);
-                   else
-                       items >>= 7 - (aint & 7);
-                   *aptr++ = items & 0xff;
-               }
-               str = SvPVX(cat) + SvCUR(cat);
-               while (aptr <= str)
-                   *aptr++ = '\0';
-
-               items = saveitems;
+           l--;
+           if (l & 7) {
+               if (datumtype == 'B')
+                   bits <<= 7 - (l & 7);
+               else
+                   bits >>= 7 - (l & 7);
+               PUSH_BYTE(utf8, cur, bits);
+               l += 7;
            }
+           /* Determine how many chars are left in the requested field */
+           l /= 8;
+           if (howlen == e_star) field_len = 0;
+           else field_len -= l;
+           Zero(cur, field_len, char);
+           cur += field_len;
            break;
+       }
        case 'H':
-       case 'h':
-           {
-               register char *str;
-               I32 saveitems;
+       case 'h': {
+           char *str, *end;
+           I32 l, field_len;
+           U8 bits;
+           bool utf8_source;
+           U32 utf8_flags;
 
-               fromstr = NEXTFROM;
-               saveitems = items;
-               str = SvPV(fromstr, fromlen);
-               if (howlen == e_star)
-                   len = fromlen;
-               aint = SvCUR(cat);
-               SvCUR(cat) += (len+1)/2;
-               SvGROW(cat, SvCUR(cat) + 1);
-               aptr = SvPVX(cat) + aint;
-               if (len > (I32)fromlen)
-                   len = fromlen;
-               aint = len;
-               items = 0;
-               if (datumtype == 'H') {
-                   for (len = 0; len++ < aint;) {
-                       if (isALPHA(*str))
-                           items |= ((*str++ & 15) + 9) & 15;
+           fromstr = NEXTFROM;
+           str = SvPV(fromstr, fromlen);
+           end = str + fromlen;
+           if (DO_UTF8(fromstr)) {
+               utf8_source = TRUE;
+               utf8_flags  = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
+           } else {
+               utf8_source = FALSE;
+               utf8_flags  = 0; /* Unused, but keep compilers happy */
+           }
+           if (howlen == e_star) len = fromlen;
+           field_len = (len+1)/2;
+           GROWING(utf8, cat, start, cur, field_len);
+           if (!utf8 && len > (I32)fromlen) len = fromlen;
+           bits = 0;
+           l = 0;
+           if (datumtype == 'H')
+               while (l++ < len) {
+                   if (utf8_source) {
+                       UV val;
+                       NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+                       if (val < 256 && isALPHA(val))
+                           bits |= (val + 9) & 0xf;
                        else
-                           items |= *str++ & 15;
-                       if (len & 1)
-                           items <<= 4;
-                       else {
-                           *aptr++ = items & 0xff;
-                           items = 0;
-                       }
+                           bits |= val & 0xf;
+                   } else if (isALPHA(*str))
+                       bits |= (*str++ + 9) & 0xf;
+                   else
+                       bits |= *str++ & 0xf;
+                   if (l & 1) bits <<= 4;
+                   else {
+                       PUSH_BYTE(utf8, cur, bits);
+                       bits = 0;
                    }
                }
-               else {
-                   for (len = 0; len++ < aint;) {
-                       if (isALPHA(*str))
-                           items |= (((*str++ & 15) + 9) & 15) << 4;
+           else
+               while (l++ < len) {
+                   if (utf8_source) {
+                       UV val;
+                       NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
+                       if (val < 256 && isALPHA(val))
+                           bits |= ((val + 9) & 0xf) << 4;
                        else
-                           items |= (*str++ & 15) << 4;
-                       if (len & 1)
-                           items >>= 4;
-                       else {
-                           *aptr++ = items & 0xff;
-                           items = 0;
-                       }
+                           bits |= (val & 0xf) << 4;
+                   } else if (isALPHA(*str))
+                       bits |= ((*str++ + 9) & 0xf) << 4;
+                   else
+                       bits |= (*str++ & 0xf) << 4;
+                   if (l & 1) bits >>= 4;
+                   else {
+                       PUSH_BYTE(utf8, cur, bits);
+                       bits = 0;
                    }
                }
-               if (aint & 1)
-                   *aptr++ = items & 0xff;
-               str = SvPVX(cat) + SvCUR(cat);
-               while (aptr <= str)
-                   *aptr++ = '\0';
-
-               items = saveitems;
+           l--;
+           if (l & 1) {
+               PUSH_BYTE(utf8, cur, bits);
+               l++;
+           }
+           /* Determine how many chars are left in the requested field */
+           l /= 2;
+           if (howlen == e_star) field_len = 0;
+           else field_len -= l;
+           Zero(cur, field_len, char);
+           cur += field_len;
+           break;
+       }
+       case 'c':
+           while (len-- > 0) {
+               IV aiv;
+               fromstr = NEXTFROM;
+               aiv = SvIV(fromstr);
+               if ((-128 > aiv || aiv > 127) &&
+                   ckWARN(WARN_PACK))
+                   Perl_warner(aTHX_ packWARN(WARN_PACK),
+                               "Character in 'c' format wrapped in pack");
+               PUSH_BYTE(utf8, cur, aiv & 0xff);
            }
            break;
        case 'C':
-       case 'c':
+           if (len == 0) {
+               utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
+               break;
+           }
+           GROWING(0, cat, start, cur, len);
+           while (len-- > 0) {
+               IV aiv;
+               fromstr = NEXTFROM;
+               aiv = SvIV(fromstr);
+               if ((0 > aiv || aiv > 0xff) &&
+                   ckWARN(WARN_PACK))
+                   Perl_warner(aTHX_ packWARN(WARN_PACK),
+                               "Character in 'C' format wrapped in pack");
+               *cur++ = aiv & 0xff;
+           }
+           break;
+       case 'W': {
+           char *end;
+           U8 in_bytes = IN_BYTES;
+
+           end = start+SvLEN(cat)-1;
+           if (utf8) end -= UTF8_MAXLEN-1;
            while (len-- > 0) {
+               UV auv;
                fromstr = NEXTFROM;
-               switch (TYPE_NO_MODIFIERS(datumtype)) {
-               case 'C':
-                   aint = SvIV(fromstr);
-                   if ((aint < 0 || aint > 255) &&
-                       ckWARN(WARN_PACK))
-                       Perl_warner(aTHX_ packWARN(WARN_PACK),
-                                   "Character in 'C' format wrapped in pack");
-                   achar = aint & 255;
-                   sv_catpvn(cat, &achar, sizeof(char));
-                   break;
-               case 'c':
-                   aint = SvIV(fromstr);
-                   if ((aint < -128 || aint > 127) &&
-                       ckWARN(WARN_PACK))
-                       Perl_warner(aTHX_ packWARN(WARN_PACK),
-                                   "Character in 'c' format wrapped in pack" );
-                   achar = aint & 255;
-                   sv_catpvn(cat, &achar, sizeof(char));
-                   break;
+               auv = SvUV(fromstr);
+               if (in_bytes) auv = auv % 0x100;
+               if (utf8) {
+                 W_utf8:
+                   if (cur > end) {
+                       *cur = '\0';
+                       SvCUR_set(cat, cur - start);
+
+                       GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
+                       end = start+SvLEN(cat)-UTF8_MAXLEN;
+                   }
+                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
+                                                      NATIVE_TO_UNI(auv),
+                                                      ckWARN(WARN_UTF8) ?
+                                                      0 : UNICODE_ALLOW_ANY);
+               } else {
+                   if (auv >= 0x100) {
+                       if (!SvUTF8(cat)) {
+                           *cur = '\0';
+                           SvCUR_set(cat, cur - start);
+                           marked_upgrade(aTHX_ cat, symptr);
+                           lookahead.flags |= FLAG_DO_UTF8;
+                           lookahead.strbeg = symptr->strbeg;
+                           utf8 = 1;
+                           start = SvPVX(cat);
+                           cur = start + SvCUR(cat);
+                           end = start+SvLEN(cat)-UTF8_MAXLEN;
+                           goto W_utf8;
+                       }
+                       if (ckWARN(WARN_PACK))
+                           Perl_warner(aTHX_ packWARN(WARN_PACK),
+                                       "Character in 'W' format wrapped in pack");
+                       auv &= 0xff;
+                   }
+                   if (cur >= end) {
+                       *cur = '\0';
+                       SvCUR_set(cat, cur - start);
+                       GROWING(0, cat, start, cur, len+1);
+                       end = start+SvLEN(cat)-1;
+                   }
+                   *(U8 *) cur++ = (U8)auv;
                }
            }
            break;
-       case 'U':
+       }
+       case 'U': {
+           char *end;
+
+           if (len == 0) {
+               if (!(symptr->flags & FLAG_DO_UTF8)) {
+                   marked_upgrade(aTHX_ cat, symptr);
+                   lookahead.flags |= FLAG_DO_UTF8;
+                   lookahead.strbeg = symptr->strbeg;
+               }
+               utf8 = 0;
+               goto no_change;
+           }
+
+           end = start+SvLEN(cat);
+           if (!utf8) end -= UTF8_MAXLEN;
            while (len-- > 0) {
+               UV auv;
                fromstr = NEXTFROM;
-               auint = UNI_TO_NATIVE(SvUV(fromstr));
-               SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
-               SvCUR_set(cat,
-                         (char*)uvchr_to_utf8_flags((U8*)SvEND(cat),
-                                                    auint,
-                                                    ckWARN(WARN_UTF8) ?
-                                                    0 : UNICODE_ALLOW_ANY)
-                         - SvPVX(cat));
+               auv = SvUV(fromstr);
+               if (utf8) {
+                   U8 buffer[UTF8_MAXLEN], *endb;
+                   endb = uvuni_to_utf8_flags(buffer, auv,
+                                              ckWARN(WARN_UTF8) ?
+                                              0 : UNICODE_ALLOW_ANY);
+                   if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
+                       *cur = '\0';
+                       SvCUR_set(cat, cur - start);
+                       GROWING(0, cat, start, cur,
+                               len+(endb-buffer)*UTF8_EXPAND);
+                       end = start+SvLEN(cat);
+                   }
+                   bytes_to_uni(aTHX_ buffer, endb-buffer, &cur);
+               } else {
+                   if (cur >= end) {
+                       *cur = '\0';
+                       SvCUR_set(cat, cur - start);
+                       GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
+                       end = start+SvLEN(cat)-UTF8_MAXLEN;
+                   }
+                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
+                                                      ckWARN(WARN_UTF8) ?
+                                                      0 : UNICODE_ALLOW_ANY);
+               }
            }
-           *SvEND(cat) = '\0';
            break;
+       }
        /* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
        case 'f':
            while (len-- > 0) {
+               float afloat;
+               NV anv;
                fromstr = NEXTFROM;
+               anv = SvNV(fromstr);
 #ifdef __VOS__
-/* VOS does not automatically map a floating-point overflow
-   during conversion from double to float into infinity, so we
-   do it by hand.  This code should either be generalized for
-   any OS that needs it, or removed if and when VOS implements
-   posix-976 (suggestion to support mapping to infinity).
-   Paul.Green@stratus.com 02-04-02.  */
-               if (SvNV(fromstr) > FLT_MAX)
-                    afloat = _float_constants[0];   /* single prec. inf. */
-               else if (SvNV(fromstr) < -FLT_MAX)
-                    afloat = _float_constants[0];   /* single prec. inf. */
-               else afloat = (float)SvNV(fromstr);
-#else
+               /* VOS does not automatically map a floating-point overflow
+                  during conversion from double to float into infinity, so we
+                  do it by hand.  This code should either be generalized for
+                  any OS that needs it, or removed if and when VOS implements
+                  posix-976 (suggestion to support mapping to infinity).
+                  Paul.Green@stratus.com 02-04-02.  */
+               if (anv > FLT_MAX)
+                   afloat = _float_constants[0];   /* single prec. inf. */
+               else if (anv < -FLT_MAX)
+                   afloat = _float_constants[0];   /* single prec. inf. */
+               else afloat = (float) anv;
+#else /* __VOS__ */
 # if defined(VMS) && !defined(__IEEE_FP)
-/* IEEE fp overflow shenanigans are unavailable on VAX and optional
- * on Alpha; fake it if we don't have them.
- */
-               if (SvNV(fromstr) > FLT_MAX)
-                    afloat = FLT_MAX;
-               else if (SvNV(fromstr) < -FLT_MAX)
-                    afloat = -FLT_MAX;
-               else afloat = (float)SvNV(fromstr);
+               /* IEEE fp overflow shenanigans are unavailable on VAX and optional
               * on Alpha; fake it if we don't have them.
               */
+               if (anv > FLT_MAX)
+                   afloat = FLT_MAX;
+               else if (anv < -FLT_MAX)
+                   afloat = -FLT_MAX;
+               else afloat = (float)anv;
 # else
-               afloat = (float)SvNV(fromstr);
+               afloat = (float)anv;
 # endif
-#endif
+#endif /* __VOS__ */
                DO_BO_PACK_N(afloat, float);
-               sv_catpvn(cat, (char *)&afloat, sizeof (float));
+               PUSH_VAR(utf8, cur, afloat);
            }
            break;
        case 'd':
            while (len-- > 0) {
+               double adouble;
+               NV anv;
                fromstr = NEXTFROM;
+               anv = SvNV(fromstr);
 #ifdef __VOS__
-/* VOS does not automatically map a floating-point overflow
-   during conversion from long double to double into infinity,
-   so we do it by hand.  This code should either be generalized
-   for any OS that needs it, or removed if and when VOS
-   implements posix-976 (suggestion to support mapping to
-   infinity).  Paul.Green@stratus.com 02-04-02.  */
-               if (SvNV(fromstr) > DBL_MAX)
-                    adouble = _double_constants[0];   /* double prec. inf. */
-               else if (SvNV(fromstr) < -DBL_MAX)
-                    adouble = _double_constants[0];   /* double prec. inf. */
-               else adouble = (double)SvNV(fromstr);
-#else
+               /* VOS does not automatically map a floating-point overflow
+                  during conversion from long double to double into infinity,
+                  so we do it by hand.  This code should either be generalized
+                  for any OS that needs it, or removed if and when VOS
+                  implements posix-976 (suggestion to support mapping to
+                  infinity).  Paul.Green@stratus.com 02-04-02.  */
+               if (anv > DBL_MAX)
+                   adouble = _double_constants[0];   /* double prec. inf. */
+               else if (anv < -DBL_MAX)
+                   adouble = _double_constants[0];   /* double prec. inf. */
+               else adouble = (double) anv;
+#else /* __VOS__ */
 # if defined(VMS) && !defined(__IEEE_FP)
-/* IEEE fp overflow shenanigans are unavailable on VAX and optional
- * on Alpha; fake it if we don't have them.
- */
-               if (SvNV(fromstr) > DBL_MAX)
-                    adouble = DBL_MAX;
-               else if (SvNV(fromstr) < -DBL_MAX)
-                    adouble = -DBL_MAX;
-               else adouble = (double)SvNV(fromstr);
+               /* IEEE fp overflow shenanigans are unavailable on VAX and optional
               * on Alpha; fake it if we don't have them.
               */
+               if (anv > DBL_MAX)
+                   adouble = DBL_MAX;
+               else if (anv < -DBL_MAX)
+                   adouble = -DBL_MAX;
+               else adouble = (double)anv;
 # else
-               adouble = (double)SvNV(fromstr);
+               adouble = (double)anv;
 # endif
-#endif
+#endif /* __VOS__ */
                DO_BO_PACK_N(adouble, double);
-               sv_catpvn(cat, (char *)&adouble, sizeof (double));
+               PUSH_VAR(utf8, cur, adouble);
            }
            break;
-       case 'F':
+       case 'F': {
+           NV anv;
            Zero(&anv, 1, NV); /* can be long double with unused bits */
            while (len-- > 0) {
                fromstr = NEXTFROM;
                anv = SvNV(fromstr);
                DO_BO_PACK_N(anv, NV);
-               sv_catpvn(cat, (char *)&anv, NVSIZE);
+               PUSH_VAR(utf8, cur, anv);
            }
            break;
+       }
 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
-       case 'D':
+       case 'D': {
+           long double aldouble;
            /* long doubles can have unused bits, which may be nonzero */
            Zero(&aldouble, 1, long double);
            while (len-- > 0) {
                fromstr = NEXTFROM;
                aldouble = (long double)SvNV(fromstr);
                DO_BO_PACK_N(aldouble, long double);
-               sv_catpvn(cat, (char *)&aldouble, LONG_DOUBLESIZE);
+               PUSH_VAR(utf8, cur, aldouble);
            }
            break;
+       }
 #endif
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'n' | TYPE_IS_SHRIEKING:
+#endif
        case 'n':
            while (len-- > 0) {
+               I16 ai16;
                fromstr = NEXTFROM;
                ai16 = (I16)SvIV(fromstr);
 #ifdef HAS_HTONS
                ai16 = PerlSock_htons(ai16);
 #endif
-               CAT16(cat, &ai16);
+               PUSH16(utf8, cur, &ai16);
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'v' | TYPE_IS_SHRIEKING:
+#endif
        case 'v':
            while (len-- > 0) {
+               I16 ai16;
                fromstr = NEXTFROM;
                ai16 = (I16)SvIV(fromstr);
 #ifdef HAS_HTOVS
                ai16 = htovs(ai16);
 #endif
-               CAT16(cat, &ai16);
+               PUSH16(utf8, cur, &ai16);
            }
            break;
         case 'S' | TYPE_IS_SHRIEKING:
 #if SHORTSIZE != SIZE16
-           {
-               while (len-- > 0) {
-                   fromstr = NEXTFROM;
-                   aushort = SvUV(fromstr);
-                   DO_BO_PACK(aushort, s);
-                   sv_catpvn(cat, (char *)&aushort, sizeof(unsigned short));
-               }
-            }
+           while (len-- > 0) {
+               unsigned short aushort;
+               fromstr = NEXTFROM;
+               aushort = SvUV(fromstr);
+               DO_BO_PACK(aushort, s);
+               PUSH_VAR(utf8, cur, aushort);
+           }
             break;
 #else
             /* Fall through! */
 #endif
        case 'S':
-            {
-               while (len-- > 0) {
-                   fromstr = NEXTFROM;
-                   au16 = (U16)SvUV(fromstr);
-                   DO_BO_PACK(au16, 16);
-                   CAT16(cat, &au16);
-               }
-
+           while (len-- > 0) {
+               U16 au16;
+               fromstr = NEXTFROM;
+               au16 = (U16)SvUV(fromstr);
+               DO_BO_PACK(au16, 16);
+               PUSH16(utf8, cur, &au16);
            }
            break;
        case 's' | TYPE_IS_SHRIEKING:
 #if SHORTSIZE != SIZE16
-           {
-               while (len-- > 0) {
-                   fromstr = NEXTFROM;
-                   ashort = SvIV(fromstr);
-                   DO_BO_PACK(ashort, s);
-                   sv_catpvn(cat, (char *)&ashort, sizeof(short));
-               }
+           while (len-- > 0) {
+               short ashort;
+               fromstr = NEXTFROM;
+               ashort = SvIV(fromstr);
+               DO_BO_PACK(ashort, s);
+               PUSH_VAR(utf8, cur, ashort);
            }
             break;
 #else
@@ -2519,23 +3228,26 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
 #endif
        case 's':
            while (len-- > 0) {
+               I16 ai16;
                fromstr = NEXTFROM;
                ai16 = (I16)SvIV(fromstr);
                DO_BO_PACK(ai16, 16);
-               CAT16(cat, &ai16);
+               PUSH16(utf8, cur, &ai16);
            }
            break;
        case 'I':
        case 'I' | TYPE_IS_SHRIEKING:
            while (len-- > 0) {
+               unsigned int auint;
                fromstr = NEXTFROM;
                auint = SvUV(fromstr);
                DO_BO_PACK(auint, i);
-               sv_catpvn(cat, (char*)&auint, sizeof(unsigned int));
+               PUSH_VAR(utf8, cur, auint);
            }
            break;
        case 'j':
            while (len-- > 0) {
+               IV aiv;
                fromstr = NEXTFROM;
                aiv = SvIV(fromstr);
 #if IVSIZE == INTSIZE
@@ -2544,12 +3256,15 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                DO_BO_PACK(aiv, l);
 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
                DO_BO_PACK(aiv, 64);
+#else
+               Perl_croak(aTHX_ "'j' not supported on this platform");
 #endif
-               sv_catpvn(cat, (char*)&aiv, IVSIZE);
+               PUSH_VAR(utf8, cur, aiv);
            }
            break;
        case 'J':
            while (len-- > 0) {
+               UV auv;
                fromstr = NEXTFROM;
                auv = SvUV(fromstr);
 #if UVSIZE == INTSIZE
@@ -2558,26 +3273,31 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                DO_BO_PACK(auv, l);
 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
                DO_BO_PACK(auv, 64);
+#else
+               Perl_croak(aTHX_ "'J' not supported on this platform");
 #endif
-               sv_catpvn(cat, (char*)&auv, UVSIZE);
+               PUSH_VAR(utf8, cur, auv);
            }
            break;
        case 'w':
             while (len-- > 0) {
+               NV anv;
                fromstr = NEXTFROM;
                anv = SvNV(fromstr);
 
-               if (anv < 0)
+               if (anv < 0) {
+                   *cur = '\0';
+                   SvCUR_set(cat, cur - start);
                    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
+               }
 
                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
                    any negative IVs will have already been got by the croak()
                    above. IOK is untrue for fractions, so we test them
                    against UV_MAX_P1.  */
-               if (SvIOK(fromstr) || anv < UV_MAX_P1)
-               {
-                   char   buf[(sizeof(UV)*8)/7+1];
+               if (SvIOK(fromstr) || anv < UV_MAX_P1) {
+                   char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
                    char  *in = buf + sizeof(buf);
                    UV     auv = SvUV(fromstr);
 
@@ -2586,29 +3306,10 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                        auv >>= 7;
                    } while (auv);
                    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
-                   sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
-               }
-               else if (SvPOKp(fromstr)) {  /* decimal string arithmetics */
-                   char           *from, *result, *in;
-                   SV             *norm;
-                   STRLEN          len;
-                   bool            done;
-
-                   /* Copy string and check for compliance */
-                   from = SvPV(fromstr, len);
-                   if ((norm = is_an_int(from, len)) == NULL)
-                       Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
-
-                   New('w', result, len, char);
-                   in = result + len;
-                   done = FALSE;
-                   while (!done)
-                       *--in = div128(norm, &done) | 0x80;
-                   result[len - 1] &= 0x7F; /* clear continue bit */
-                   sv_catpvn(cat, in, (result + len) - in);
-                   Safefree(result);
-                   SvREFCNT_dec(norm); /* free norm */
-                }
+                   PUSH_GROWING_BYTES(utf8, cat, start, cur,
+                                      in, (buf + sizeof(buf)) - in);
+               } else if (SvPOKp(fromstr))
+                   goto w_string;
                else if (SvNOKp(fromstr)) {
                    /* 10**NV_MAX_10_EXP is the largest power of 10
                       so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
@@ -2621,10 +3322,10 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                       floating-point value.
                    */
 #ifdef NV_MAX_10_EXP
-/*                 char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
+                   /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
                    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
 #else
-/*                 char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
+                   /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
                    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
 #endif
                    char  *in = buf + sizeof(buf);
@@ -2638,14 +3339,15 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                        anv = next;
                    } while (anv > 0);
                    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
-                   sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
-               }
-               else {
+                   PUSH_GROWING_BYTES(utf8, cat, start, cur,
+                                      in, (buf + sizeof(buf)) - in);
+               else {
                    char           *from, *result, *in;
                    SV             *norm;
                    STRLEN          len;
                    bool            done;
 
+                 w_string:
                    /* Copy string and check for compliance */
                    from = SvPV(fromstr, len);
                    if ((norm = is_an_int(from, len)) == NULL)
@@ -2654,79 +3356,83 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                    New('w', result, len, char);
                    in = result + len;
                    done = FALSE;
-                   while (!done)
-                       *--in = div128(norm, &done) | 0x80;
+                   while (!done) *--in = div128(norm, &done) | 0x80;
                    result[len - 1] &= 0x7F; /* clear continue bit */
-                   sv_catpvn(cat, in, (result + len) - in);
+                   PUSH_GROWING_BYTES(utf8, cat, start, cur,
+                                      in, (result + len) - in);
                    Safefree(result);
                    SvREFCNT_dec(norm); /* free norm */
-               }
+               }
            }
             break;
        case 'i':
        case 'i' | TYPE_IS_SHRIEKING:
            while (len-- > 0) {
+               int aint;
                fromstr = NEXTFROM;
                aint = SvIV(fromstr);
                DO_BO_PACK(aint, i);
-               sv_catpvn(cat, (char*)&aint, sizeof(int));
+               PUSH_VAR(utf8, cur, aint);
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'N' | TYPE_IS_SHRIEKING:
+#endif
        case 'N':
            while (len-- > 0) {
+               U32 au32;
                fromstr = NEXTFROM;
                au32 = SvUV(fromstr);
 #ifdef HAS_HTONL
                au32 = PerlSock_htonl(au32);
 #endif
-               CAT32(cat, &au32);
+               PUSH32(utf8, cur, &au32);
            }
            break;
+#ifdef PERL_PACK_CAN_SHRIEKSIGN
        case 'V' | TYPE_IS_SHRIEKING:
+#endif
        case 'V':
            while (len-- > 0) {
+               U32 au32;
                fromstr = NEXTFROM;
                au32 = SvUV(fromstr);
 #ifdef HAS_HTOVL
                au32 = htovl(au32);
 #endif
-               CAT32(cat, &au32);
+               PUSH32(utf8, cur, &au32);
            }
            break;
        case 'L' | TYPE_IS_SHRIEKING:
 #if LONGSIZE != SIZE32
-           {
-               while (len-- > 0) {
-                   fromstr = NEXTFROM;
-                   aulong = SvUV(fromstr);
-                   DO_BO_PACK(aulong, l);
-                   sv_catpvn(cat, (char *)&aulong, sizeof(unsigned long));
-               }
+           while (len-- > 0) {
+               unsigned long aulong;
+               fromstr = NEXTFROM;
+               aulong = SvUV(fromstr);
+               DO_BO_PACK(aulong, l);
+               PUSH_VAR(utf8, cur, aulong);
            }
            break;
 #else
             /* Fall though! */
 #endif
        case 'L':
-            {
-               while (len-- > 0) {
-                   fromstr = NEXTFROM;
-                   au32 = SvUV(fromstr);
-                   DO_BO_PACK(au32, 32);
-                   CAT32(cat, &au32);
-               }
+           while (len-- > 0) {
+               U32 au32;
+               fromstr = NEXTFROM;
+               au32 = SvUV(fromstr);
+               DO_BO_PACK(au32, 32);
+               PUSH32(utf8, cur, &au32);
            }
            break;
        case 'l' | TYPE_IS_SHRIEKING:
 #if LONGSIZE != SIZE32
-           {
-               while (len-- > 0) {
-                   fromstr = NEXTFROM;
-                   along = SvIV(fromstr);
-                   DO_BO_PACK(along, l);
-                   sv_catpvn(cat, (char *)&along, sizeof(long));
-               }
+           while (len-- > 0) {
+               long along;
+               fromstr = NEXTFROM;
+               along = SvIV(fromstr);
+               DO_BO_PACK(along, l);
+               PUSH_VAR(utf8, cur, along);
            }
            break;
 #else
@@ -2734,38 +3440,44 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
 #endif
        case 'l':
             while (len-- > 0) {
+               I32 ai32;
                fromstr = NEXTFROM;
                ai32 = SvIV(fromstr);
                DO_BO_PACK(ai32, 32);
-               CAT32(cat, &ai32);
+               PUSH32(utf8, cur, &ai32);
            }
            break;
 #ifdef HAS_QUAD
        case 'Q':
            while (len-- > 0) {
+               Uquad_t auquad;
                fromstr = NEXTFROM;
-               auquad = (Uquad_t)SvUV(fromstr);
+               auquad = (Uquad_t) SvUV(fromstr);
                DO_BO_PACK(auquad, 64);
-               sv_catpvn(cat, (char*)&auquad, sizeof(Uquad_t));
+               PUSH_VAR(utf8, cur, auquad);
            }
            break;
        case 'q':
            while (len-- > 0) {
+               Quad_t aquad;
                fromstr = NEXTFROM;
                aquad = (Quad_t)SvIV(fromstr);
                DO_BO_PACK(aquad, 64);
-               sv_catpvn(cat, (char*)&aquad, sizeof(Quad_t));
+               PUSH_VAR(utf8, cur, aquad);
            }
            break;
-#endif
+#endif /* HAS_QUAD */
        case 'P':
            len = 1;            /* assume SV is correct length */
+           GROWING(utf8, cat, start, cur, sizeof(char *));
            /* Fall through! */
        case 'p':
            while (len-- > 0) {
+               char *aptr;
+
                fromstr = NEXTFROM;
-               if (fromstr == &PL_sv_undef)
-                   aptr = NULL;
+               SvGETMAGIC(fromstr);
+               if (!SvOK(fromstr)) aptr = NULL;
                else {
                    STRLEN n_a;
                    /* XXX better yet, could spirit away the string to
@@ -2773,43 +3485,71 @@ S_pack_rec(pTHX_ SV *cat, register tempsym_t* symptr, register SV **beglist, SV
                     * of pack() (and all copies of the result) are
                     * gone.
                     */
-                   if (ckWARN(WARN_PACK) && (SvTEMP(fromstr)
-                                               || (SvPADTMP(fromstr)
-                                                   && !SvREADONLY(fromstr))))
-                   {
+                   if (ckWARN(WARN_PACK) &&
+                       (SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
+                                            !SvREADONLY(fromstr)))) {
                        Perl_warner(aTHX_ packWARN(WARN_PACK),
-                               "Attempt to pack pointer to temporary value");
+                                   "Attempt to pack pointer to temporary value");
                    }
                    if (SvPOK(fromstr) || SvNIOK(fromstr))
-                       aptr = SvPV(fromstr,n_a);
+                       aptr = SvPV_flags(fromstr, n_a, 0);
                    else
-                       aptr = SvPV_force(fromstr,n_a);
+                       aptr = SvPV_force_flags(fromstr, n_a, 0);
                }
-               DO_BO_PACK_P(aptr);
-               sv_catpvn(cat, (char*)&aptr, sizeof(char*));
+               DO_BO_PACK_PC(aptr);
+               PUSH_VAR(utf8, cur, aptr);
            }
            break;
-       case 'u':
+       case 'u': {
+           char *aptr, *aend;
+           bool from_utf8;
+
            fromstr = NEXTFROM;
+           if (len <= 2) len = 45;
+           else len = len / 3 * 3;
+           if (len >= 64) {
+               Perl_warner(aTHX_ packWARN(WARN_PACK),
+                           "Field too wide in 'u' format in pack");
+               len = 63;
+           }
            aptr = SvPV(fromstr, fromlen);
-           SvGROW(cat, fromlen * 4 / 3);
-           if (len <= 2)
-               len = 45;
-           else
-               len = len / 3 * 3;
+           from_utf8 = DO_UTF8(fromstr);
+           if (from_utf8) {
+               aend = aptr + fromlen;
+               fromlen = sv_len_utf8(fromstr);
+           } else aend = NULL; /* Unused, but keep compilers happy */
+           GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
            while (fromlen > 0) {
+               U8 *end;
                I32 todo;
+               U8 hunk[1+63/3*4+1];
 
                if ((I32)fromlen > len)
                    todo = len;
                else
                    todo = fromlen;
-               doencodes(cat, aptr, todo);
+               if (from_utf8) {
+                   char buffer[64];
+                   if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
+                                     'u' | TYPE_IS_PACK)) {
+                       *cur = '\0';
+                       SvCUR_set(cat, cur - start);
+                       Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
+                   }
+                   end = doencodes(hunk, buffer, todo);
+               } else {
+                   end = doencodes(hunk, aptr, todo);
+                   aptr += todo;
+               }
+               PUSH_BYTES(utf8, cur, hunk, end-hunk);
                fromlen -= todo;
-               aptr += todo;
            }
            break;
        }
+       }
+       *cur = '\0';
+       SvCUR_set(cat, cur - start);
+      no_change:
        *symptr = lookahead;
     }
     return beglist;
@@ -2827,6 +3567,7 @@ PP(pp_pack)
 
     MARK++;
     sv_setpvn(cat, "", 0);
+    SvUTF8_off(cat);
 
     packlist(cat, pat, patend, MARK, SP + 1);
 
@@ -2843,5 +3584,5 @@ PP(pp_pack)
  * indent-tabs-mode: t
  * End:
  *
- * vim: expandtab shiftwidth=4:
+ * vim: shiftwidth=4:
 */