This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to Tie::File 0.93, from mjd.
[perl5.git] / pp_pack.c
index 03bf3c8..452a2b0 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -7,6 +7,14 @@
  *
  */
 
+/*
+ * He still hopefully carried some of his gear in his pack: a small tinder-box,
+ * two small shallow pans, the smaller fitting into the larger; inside them a
+ * wooden spoon, a short two-pronged fork and some skewers were stowed; and
+ * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
+ * some salt.
+ */
+
 #include "EXTERN.h"
 #define PERL_IN_PP_PACK_C
 #include "perl.h"
@@ -115,32 +123,308 @@ S_mul128(pTHX_ SV *sv, U8 m)
 #define ISUUCHAR(ch)    (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
 #endif
 
+#define UNPACK_ONLY_ONE        0x1
+#define UNPACK_DO_UTF8 0x2
 
-PP(pp_unpack)
+STATIC char *
+S_group_end(pTHX_ register char *pat, register char *patend, char ender)
 {
-    dSP;
-    dPOPPOPssrl;
-    I32 start_sp_offset = SP - PL_stack_base;
-    I32 gimme = GIMME_V;
-    SV *sv;
-    STRLEN llen;
-    STRLEN rlen;
-    register char *pat = SvPV(left, llen);
-#ifdef PACKED_IS_OCTETS
-    /* Packed side is assumed to be octets - so force downgrade if it
-       has been UTF-8 encoded by accident
-     */
-    register char *s = SvPVbyte(right, rlen);
+    while (pat < patend) {
+       char c = *pat++;
+
+       if (isSPACE(c))
+           continue;
+       else if (c == ender)
+           return --pat;
+       else if (c == '#') {
+           while (pat < patend && *pat != '\n')
+               pat++;
+           continue;
+       } else if (c == '(')
+           pat = group_end(pat, patend, ')') + 1;
+       else if (c == '[')
+           pat = group_end(pat, patend, ']') + 1;
+    }
+    Perl_croak(aTHX_ "No group ending character `%c' found", ender);
+    return 0;
+}
+
+#define TYPE_IS_SHRIEKING      0x100
+
+/* Returns the sizeof() struct described by pat */
+STATIC I32
+S_measure_struct(pTHX_ char *pat, register char *patend)
+{
+    I32 datumtype;
+    register I32 len;
+    register I32 total = 0;
+    int commas = 0;
+    int star;          /* 1 if count is *, -1 if no count given, -2 for / */
+#ifdef PERL_NATINT_PACK
+    int natint;                /* native integer */
+    int unatint;       /* unsigned native integer */
+#endif
+    char buf[2];
+    register int size;
+
+    while ((pat = next_symbol(pat, patend)) < patend) {
+       datumtype = *pat++ & 0xFF;
+#ifdef PERL_NATINT_PACK
+       natint = 0;
+#endif
+       if (*pat == '!') {
+           static const char *natstr = "sSiIlLxX";
+
+           if (strchr(natstr, datumtype)) {
+               if (datumtype == 'x' || datumtype == 'X') {
+                   datumtype |= TYPE_IS_SHRIEKING;
+               } else {                /* XXXX Should be redone similarly! */
+#ifdef PERL_NATINT_PACK
+                   natint = 1;
+#endif
+               }
+               pat++;
+           }
+           else
+               Perl_croak(aTHX_ "'!' allowed only after types %s", natstr);
+       }
+       len = find_count(&pat, patend, &star);
+       if (star > 0)                   /*  */
+               Perl_croak(aTHX_ "%s not allowed in length fields", "count *");
+       else if (star < 0)              /* No explicit len */
+               len = datumtype != '@';
+
+       switch(datumtype) {
+       default:
+           Perl_croak(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype);
+       case '@':
+       case '/':
+       case 'U':                       /* XXXX Is it correct? */
+       case 'w':
+       case 'u':
+           buf[0] = datumtype;
+           buf[1] = 0;
+           Perl_croak(aTHX_ "%s not allowed in length fields", buf);
+       case ',': /* grandfather in commas but with a warning */
+           if (commas++ == 0 && ckWARN(WARN_UNPACK))
+               Perl_warner(aTHX_ packWARN(WARN_UNPACK),
+                           "Invalid type in unpack: '%c'", (int)datumtype);
+           /* FALL THROUGH */
+       case '%':
+           size = 0;
+           break;
+       case '(':
+       {
+           char *beg = pat, *end;
+
+           if (star >= 0)
+               Perl_croak(aTHX_ "()-group starts with a count");
+           end = group_end(beg, patend, ')');
+           pat = end + 1;
+           len = find_count(&pat, patend, &star);
+           if (star < 0)               /* No count */
+               len = 1;
+           else if (star > 0)  /* Star */
+               Perl_croak(aTHX_ "%s not allowed in length fields", "count *");
+           /* XXXX Theoretically, we need to measure many times at different
+              positions, since the subexpression may contain
+              alignment commands, but be not of aligned length.
+              Need to detect this and croak().  */
+           size = measure_struct(beg, end);
+           break;
+       }
+       case 'X' | TYPE_IS_SHRIEKING:
+           /* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS. */
+           if (!len)                   /* Avoid division by 0 */
+               len = 1;
+           len = total % len;          /* Assumed: the start is aligned. */
+           /* FALL THROUGH */
+       case 'X':
+           size = -1;
+           if (total < len)
+               Perl_croak(aTHX_ "X outside of string");
+           break;
+       case 'x' | TYPE_IS_SHRIEKING:
+           if (!len)                   /* Avoid division by 0 */
+               len = 1;
+           star = total % len;         /* Assumed: the start is aligned. */
+           if (star)                   /* Other portable ways? */
+               len = len - star;
+           else
+               len = 0;
+           /* FALL THROUGH */
+       case 'x':
+       case 'A':
+       case 'Z':
+       case 'a':
+       case 'c':
+       case 'C':
+           size = 1;
+           break;
+       case 'B':
+       case 'b':
+           len = (len + 7)/8;
+           size = 1;
+           break;
+       case 'H':
+       case 'h':
+           len = (len + 1)/2;
+           size = 1;
+           break;
+       case 's':
+#if SHORTSIZE == SIZE16
+           size = SIZE16;
 #else
-    register char *s = SvPV(right, rlen);
+           size = (natint ? sizeof(short) : SIZE16);
 #endif
-    char *strend = s + rlen;
-    char *strbeg = s;
-    register char *patend = pat + llen;
+           break;
+       case 'v':
+       case 'n':
+       case 'S':
+#if SHORTSIZE == SIZE16
+           size = SIZE16;
+#else
+           unatint = natint && datumtype == 'S';
+           size = (unatint ? sizeof(unsigned short) : SIZE16);
+#endif
+           break;
+       case 'i':
+           size = sizeof(int);
+           break;
+       case 'I':
+           size = sizeof(unsigned int);
+           break;
+       case 'j':
+           size = IVSIZE;
+           break;
+       case 'J':
+           size = UVSIZE;
+           break;
+       case 'l':
+#if LONGSIZE == SIZE32
+           size = SIZE32;
+#else
+           size = (natint ? sizeof(long) : SIZE32);
+#endif
+           break;
+       case 'V':
+       case 'N':
+       case 'L':
+#if LONGSIZE == SIZE32
+           size = SIZE32;
+#else
+           unatint = natint && datumtype == 'L';
+           size = (unatint ? sizeof(unsigned long) : SIZE32);
+#endif
+           break;
+       case 'P':
+           len = 1;
+           /* FALL THROUGH */
+       case 'p':
+           size = sizeof(char*);
+           break;
+#ifdef HAS_QUAD
+       case 'q':
+           size = sizeof(Quad_t);
+           break;
+       case 'Q':
+           size = sizeof(Uquad_t);
+           break;
+#endif
+       case 'f':
+           size = sizeof(float);
+           break;
+       case 'd':
+           size = sizeof(double);
+           break;
+       case 'F':
+           size = NVSIZE;
+           break;
+#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
+       case 'D':
+           size = LONG_DOUBLESIZE;
+           break;
+#endif
+       }
+       total += len * size;
+    }
+    return total;
+}
+
+/* Returns -1 on no count or on star */
+STATIC I32
+S_find_count(pTHX_ char **ppat, register char *patend, int *star)
+{
+    char *pat = *ppat;
+    I32 len;
+
+    *star = 0;
+    if (pat >= patend)
+       len = 1;
+    else if (*pat == '*') {
+       pat++;
+       *star = 1;
+       len = -1;
+    }
+    else if (isDIGIT(*pat)) {
+       len = *pat++ - '0';
+       while (isDIGIT(*pat)) {
+           len = (len * 10) + (*pat++ - '0');
+           if (len < 0)                /* 50% chance of catching... */
+               Perl_croak(aTHX_ "Repeat count in pack/unpack overflows");
+       }
+    }
+    else if (*pat == '[') {
+       char *end = group_end(++pat, patend, ']');
+
+       len = 0;
+       *ppat = end + 1;
+       if (isDIGIT(*pat))
+           return find_count(&pat, end, star);
+       return measure_struct(pat, end);
+    }
+    else
+       len = *star = -1;
+    *ppat = pat;
+    return len;
+}
+
+STATIC char *
+S_next_symbol(pTHX_ register char *pat, register char *patend)
+{
+    while (pat < patend) {
+       if (isSPACE(*pat))
+           pat++;
+       else if (*pat == '#') {
+           pat++;
+           while (pat < patend && *pat != '\n')
+               pat++;
+           if (pat < patend)
+               pat++;
+       }
+       else
+           return pat;
+    }
+    return pat;
+}
+
+/*
+=for apidoc unpack_str
+
+The engine implementing unpack() Perl function.
+
+=cut */
+
+I32
+Perl_unpack_str(pTHX_ char *pat, register char *patend, register char *s, char *strbeg, char *strend, char **new_s, I32 ocnt, U32 flags)
+{
+    dSP;
     I32 datumtype;
     register I32 len;
     register I32 bits = 0;
     register char *str;
+    SV *sv;
+    I32 start_sp_offset = SP - PL_stack_base;
 
     /* These must not be in registers: */
     short ashort;
@@ -159,103 +443,140 @@ PP(pp_unpack)
     float afloat;
     double adouble;
     I32 checksum = 0;
-    UV culong = 0;
+    UV cuv = 0;
     NV cdouble = 0.0;
-    const int bits_in_uv = 8 * sizeof(culong);
+    const int bits_in_uv = 8 * sizeof(cuv);
     int commas = 0;
-    int star;
+    int star;          /* 1 if count is *, -1 if no count given, -2 for / */
 #ifdef PERL_NATINT_PACK
     int natint;                /* native integer */
     int unatint;       /* unsigned native integer */
 #endif
-    bool do_utf8 = DO_UTF8(right);
+    IV aiv;
+    UV auv;
+    NV anv;
+#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
+    long double aldouble;
+#endif
+    bool do_utf8 = flags & UNPACK_DO_UTF8;
 
-    while (pat < patend) {
-      reparse:
+    while ((pat = next_symbol(pat, patend)) < patend) {
        datumtype = *pat++ & 0xFF;
 #ifdef PERL_NATINT_PACK
        natint = 0;
 #endif
-       if (isSPACE(datumtype))
-           continue;
-       if (datumtype == '#') {
-           while (pat < patend && *pat != '\n')
-               pat++;
-           continue;
-       }
+       /* do first one only unless in list context
+          / is implemented by unpacking the count, then poping it from the
+          stack, so must check that we're not in the middle of a /  */
+        if ( (flags & UNPACK_ONLY_ONE)
+            && (SP - PL_stack_base == start_sp_offset + 1)
+            && (datumtype != '/') )
+            break;
        if (*pat == '!') {
-           char *natstr = "sSiIlL";
+           static const char natstr[] = "sSiIlLxX";
 
            if (strchr(natstr, datumtype)) {
+               if (datumtype == 'x' || datumtype == 'X') {
+                   datumtype |= TYPE_IS_SHRIEKING;
+               } else {                /* XXXX Should be redone similarly! */
 #ifdef PERL_NATINT_PACK
-               natint = 1;
+                   natint = 1;
 #endif
+               }
                pat++;
            }
            else
-               DIE(aTHX_ "'!' allowed only after types %s", natstr);
+               Perl_croak(aTHX_ "'!' allowed only after types %s", natstr);
        }
-       star = 0;
-       if (pat >= patend)
-           len = 1;
-       else if (*pat == '*') {
-           len = strend - strbeg;      /* long enough */
-           pat++;
-           star = 1;
-       }
-       else if (isDIGIT(*pat)) {
-           len = *pat++ - '0';
-           while (isDIGIT(*pat)) {
-               len = (len * 10) + (*pat++ - '0');
-               if (len < 0)
-                   DIE(aTHX_ "Repeat count in unpack overflows");
-           }
-       }
-       else
-           len = (datumtype != '@');
+       len = find_count(&pat, patend, &star);
+       if (star > 0)
+               len = strend - strbeg;  /* long enough */
+       else if (star < 0)              /* No explicit len */
+               len = datumtype != '@';
+
       redo_switch:
        switch(datumtype) {
        default:
-           DIE(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype);
+           Perl_croak(aTHX_ "Invalid type in unpack: '%c'", (int)datumtype);
        case ',': /* grandfather in commas but with a warning */
            if (commas++ == 0 && ckWARN(WARN_UNPACK))
-               Perl_warner(aTHX_ WARN_UNPACK,
+               Perl_warner(aTHX_ packWARN(WARN_UNPACK),
                            "Invalid type in unpack: '%c'", (int)datumtype);
            break;
        case '%':
-           if (len == 1 && pat[-1] != '1')
-               len = 16;
+           if (len == 1 && pat[-1] != '1' && pat[-1] != ']')
+               len = 16;               /* len is not specified */
            checksum = len;
-           culong = 0;
+           cuv = 0;
            cdouble = 0;
-           if (pat < patend)
-               goto reparse;
+           continue;
            break;
+       case '(':
+       {
+           char *beg = pat;
+           char *ss = s;               /* Move from register */
+
+           if (star >= 0)
+               Perl_croak(aTHX_ "()-group starts with a count");
+           aptr = group_end(beg, patend, ')');
+           pat = aptr + 1;
+           if (star != -2) {
+               len = find_count(&pat, patend, &star);
+               if (star < 0)           /* No count */
+                   len = 1;
+               else if (star > 0)      /* Star */
+                   len = strend - strbeg; /* long enough? */
+           }
+           PUTBACK;
+           while (len--) {
+               unpack_str(beg, aptr, ss, strbeg, strend, &ss,
+                          ocnt + SP - PL_stack_base - start_sp_offset, flags);
+               if (star > 0 && ss == strend)
+                   break;              /* No way to continue */
+           }
+           SPAGAIN;
+           s = ss;
+           break;
+       }
        case '@':
            if (len > strend - strbeg)
-               DIE(aTHX_ "@ outside of string");
+               Perl_croak(aTHX_ "@ outside of string");
            s = strbeg + len;
            break;
+       case 'X' | TYPE_IS_SHRIEKING:
+           if (!len)                   /* Avoid division by 0 */
+               len = 1;
+           len = (s - strbeg) % len;
+           /* FALL THROUGH */
        case 'X':
            if (len > s - strbeg)
-               DIE(aTHX_ "X outside of string");
+               Perl_croak(aTHX_ "X outside of string");
            s -= len;
            break;
+       case 'x' | TYPE_IS_SHRIEKING:
+           if (!len)                   /* Avoid division by 0 */
+               len = 1;
+           aint = (s - strbeg) % len;
+           if (aint)                   /* Other portable ways? */
+               len = len - aint;
+           else
+               len = 0;
+           /* FALL THROUGH */
        case 'x':
            if (len > strend - s)
-               DIE(aTHX_ "x outside of string");
+               Perl_croak(aTHX_ "x outside of string");
            s += len;
            break;
        case '/':
-           if (start_sp_offset >= SP - PL_stack_base)
-               DIE(aTHX_ "/ must follow a numeric type");
+           if (ocnt + SP - PL_stack_base - start_sp_offset <= 0)
+               Perl_croak(aTHX_ "/ must follow a numeric type");
            datumtype = *pat++;
            if (*pat == '*')
                pat++;          /* ignore '*' for compatibility with pack */
            if (isDIGIT(*pat))
-               DIE(aTHX_ "/ cannot take a count" );
+               Perl_croak(aTHX_ "/ cannot take a count" );
            len = POPi;
-           star = 0;
+           star = -2;
            goto redo_switch;
        case 'A':
        case 'Z':
@@ -272,7 +593,7 @@ PP(pp_unpack)
                    s = SvPVX(sv);
                    while (*s)
                        s++;
-                   if (star) /* exact for 'Z*' */
+                   if (star > 0) /* exact for 'Z*' */
                        len = s - SvPVX(sv) + 1;
                }
                else {          /* 'A' strips both nulls and spaces */
@@ -289,7 +610,7 @@ PP(pp_unpack)
            break;
        case 'B':
        case 'b':
-           if (star || len > (strend - s) * 8)
+           if (star > 0 || len > (strend - s) * 8)
                len = (strend - s) * 8;
            if (checksum) {
                if (!PL_bitcount) {
@@ -306,20 +627,20 @@ PP(pp_unpack)
                    }
                }
                while (len >= 8) {
-                   culong += PL_bitcount[*(unsigned char*)s++];
+                   cuv += PL_bitcount[*(unsigned char*)s++];
                    len -= 8;
                }
                if (len) {
                    bits = *s;
                    if (datumtype == 'b') {
                        while (len-- > 0) {
-                           if (bits & 1) culong++;
+                           if (bits & 1) cuv++;
                            bits >>= 1;
                        }
                    }
                    else {
                        while (len-- > 0) {
-                           if (bits & 128) culong++;
+                           if (bits & 128) cuv++;
                            bits <<= 1;
                        }
                    }
@@ -355,7 +676,7 @@ PP(pp_unpack)
            break;
        case 'H':
        case 'h':
-           if (star || len > (strend - s) * 2)
+           if (star > 0 || len > (strend - s) * 2)
                len = (strend - s) * 2;
            sv = NEWSV(35, len + 1);
            SvCUR_set(sv, len);
@@ -395,7 +716,7 @@ PP(pp_unpack)
                    if (checksum > bits_in_uv)
                        cdouble += (NV)aint;
                    else
-                       culong += aint;
+                       cuv += aint;
                }
            }
            else {
@@ -423,7 +744,7 @@ PP(pp_unpack)
              uchar_checksum:
                while (len-- > 0) {
                    auint = *s++ & 255;
-                   culong += auint;
+                   cuv += auint;
                }
            }
            else {
@@ -449,13 +770,13 @@ PP(pp_unpack)
            if (checksum) {
                while (len-- > 0 && s < strend) {
                    STRLEN alen;
-                   auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
+                   auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV));
                    along = alen;
                    s += along;
                    if (checksum > bits_in_uv)
                        cdouble += (NV)auint;
                    else
-                       culong += auint;
+                       cuv += auint;
                }
            }
            else {
@@ -463,7 +784,7 @@ PP(pp_unpack)
                EXTEND_MORTAL(len);
                while (len-- > 0 && s < strend) {
                    STRLEN alen;
-                   auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
+                   auint = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, strend - s, &alen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV));
                    along = alen;
                    s += along;
                    sv = NEWSV(37, 0);
@@ -490,7 +811,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)ashort;
                        else
-                           culong += ashort;
+                           cuv += ashort;
 
                    }
                }
@@ -507,7 +828,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)ashort;
                        else
-                           culong += ashort;
+                           cuv += ashort;
                    }
                }
            }
@@ -563,7 +884,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)aushort;
                        else
-                           culong += aushort;
+                           cuv += aushort;
                    }
                }
                else
@@ -583,7 +904,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)aushort;
                        else
-                           culong += aushort;
+                           cuv += aushort;
                    }
                }
            }
@@ -633,7 +954,7 @@ PP(pp_unpack)
                    if (checksum > bits_in_uv)
                        cdouble += (NV)aint;
                    else
-                       culong += aint;
+                       cuv += aint;
                }
            }
            else {
@@ -684,7 +1005,7 @@ PP(pp_unpack)
                    if (checksum > bits_in_uv)
                        cdouble += (NV)auint;
                    else
-                       culong += auint;
+                       cuv += auint;
                }
            }
            else {
@@ -706,6 +1027,58 @@ PP(pp_unpack)
                }
            }
            break;
+       case 'j':
+           along = (strend - s) / IVSIZE;
+           if (len > along)
+               len = along;
+           if (checksum) {
+               while (len-- > 0) {
+                   Copy(s, &aiv, 1, IV);
+                   s += IVSIZE;
+                   if (checksum > bits_in_uv)
+                       cdouble += (NV)aiv;
+                   else
+                       cuv += aiv;
+               }
+           }
+           else {
+               EXTEND(SP, len);
+               EXTEND_MORTAL(len);
+               while (len-- > 0) {
+                   Copy(s, &aiv, 1, IV);
+                   s += IVSIZE;
+                   sv = NEWSV(40, 0);
+                   sv_setiv(sv, aiv);
+                   PUSHs(sv_2mortal(sv));
+               }
+           }
+           break;
+       case 'J':
+           along = (strend - s) / UVSIZE;
+           if (len > along)
+               len = along;
+           if (checksum) {
+               while (len-- > 0) {
+                   Copy(s, &auv, 1, UV);
+                   s += UVSIZE;
+                   if (checksum > bits_in_uv)
+                       cdouble += (NV)auv;
+                   else
+                       cuv += auv;
+               }
+           }
+           else {
+               EXTEND(SP, len);
+               EXTEND_MORTAL(len);
+               while (len-- > 0) {
+                   Copy(s, &auv, 1, UV);
+                   s += UVSIZE;
+                   sv = NEWSV(41, 0);
+                   sv_setuv(sv, auv);
+                   PUSHs(sv_2mortal(sv));
+               }
+           }
+           break;
        case 'l':
 #if LONGSIZE == SIZE32
            along = (strend - s) / SIZE32;
@@ -723,7 +1096,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)along;
                        else
-                           culong += along;
+                           cuv += along;
                    }
                }
                else
@@ -742,7 +1115,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)along;
                        else
-                           culong += along;
+                           cuv += along;
                    }
                }
            }
@@ -800,7 +1173,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)aulong;
                        else
-                           culong += aulong;
+                           cuv += aulong;
                    }
                }
                else
@@ -820,7 +1193,7 @@ PP(pp_unpack)
                        if (checksum > bits_in_uv)
                            cdouble += (NV)aulong;
                        else
-                           culong += aulong;
+                           cuv += aulong;
                    }
                }
            }
@@ -918,12 +1291,12 @@ PP(pp_unpack)
                    }
                }
                if ((s >= strend) && bytes)
-                   DIE(aTHX_ "Unterminated compressed integer");
+                   Perl_croak(aTHX_ "Unterminated compressed integer");
            }
            break;
        case 'P':
-           if (star)
-               DIE(aTHX_ "P must have an explicit size");
+           if (star > 0)
+               Perl_croak(aTHX_ "P must have an explicit size");
            EXTEND(SP, 1);
            if (sizeof(char*) > strend - s)
                break;
@@ -948,7 +1321,7 @@ PP(pp_unpack)
                    if (checksum > bits_in_uv)
                        cdouble += (NV)aquad;
                    else
-                       culong += aquad;
+                       cuv += aquad;
                }
            }
             else {
@@ -958,12 +1331,12 @@ PP(pp_unpack)
                     if (s + sizeof(Quad_t) > strend)
                         aquad = 0;
                     else {
-                   Copy(s, &aquad, 1, Quad_t);
-                   s += sizeof(Quad_t);
+                       Copy(s, &aquad, 1, Quad_t);
+                       s += sizeof(Quad_t);
                     }
                     sv = NEWSV(42, 0);
                     if (aquad >= IV_MIN && aquad <= IV_MAX)
-                   sv_setiv(sv, (IV)aquad);
+                       sv_setiv(sv, (IV)aquad);
                     else
                         sv_setnv(sv, (NV)aquad);
                     PUSHs(sv_2mortal(sv));
@@ -971,7 +1344,7 @@ PP(pp_unpack)
             }
            break;
        case 'Q':
-           along = (strend - s) / sizeof(Quad_t);
+           along = (strend - s) / sizeof(Uquad_t);
            if (len > along)
                len = along;
            if (checksum) {
@@ -981,7 +1354,7 @@ PP(pp_unpack)
                    if (checksum > bits_in_uv)
                        cdouble += (NV)auquad;
                    else
-                       culong += auquad;
+                       cuv += auquad;
                }
            }
             else {
@@ -1006,7 +1379,6 @@ PP(pp_unpack)
 #endif
        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
        case 'f':
-       case 'F':
            along = (strend - s) / sizeof(float);
            if (len > along)
                len = along;
@@ -1030,7 +1402,6 @@ PP(pp_unpack)
            }
            break;
        case 'd':
-       case 'D':
            along = (strend - s) / sizeof(double);
            if (len > along)
                len = along;
@@ -1053,6 +1424,54 @@ PP(pp_unpack)
                }
            }
            break;
+       case 'F':
+           along = (strend - s) / NVSIZE;
+           if (len > along)
+               len = along;
+           if (checksum) {
+               while (len-- > 0) {
+                   Copy(s, &anv, 1, NV);
+                   s += NVSIZE;
+                   cdouble += anv;
+               }
+           }
+           else {
+               EXTEND(SP, len);
+               EXTEND_MORTAL(len);
+               while (len-- > 0) {
+                   Copy(s, &anv, 1, NV);
+                   s += NVSIZE;
+                   sv = NEWSV(48, 0);
+                   sv_setnv(sv, anv);
+                   PUSHs(sv_2mortal(sv));
+               }
+           }
+           break;
+#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
+       case 'D':
+           along = (strend - s) / LONG_DOUBLESIZE;
+           if (len > along)
+               len = along;
+           if (checksum) {
+               while (len-- > 0) {
+                   Copy(s, &aldouble, 1, long double);
+                   s += LONG_DOUBLESIZE;
+                   cdouble += aldouble;
+               }
+           }
+           else {
+               EXTEND(SP, len);
+               EXTEND_MORTAL(len);
+               while (len-- > 0) {
+                   Copy(s, &aldouble, 1, long double);
+                   s += LONG_DOUBLESIZE;
+                   sv = NEWSV(48, 0);
+                   sv_setnv(sv, (NV)aldouble);
+                   PUSHs(sv_2mortal(sv));
+               }
+           }
+           break;
+#endif
        case 'u':
            /* MKS:
             * Initialise the decode mapping.  By using a table driven
@@ -1106,8 +1525,9 @@ PP(pp_unpack)
                }
                if (*s == '\n')
                    s++;
-               else if (s[1] == '\n')          /* possible checksum byte */
-                   s += 2;
+               else    /* possible checksum byte */
+                   if (s + 1 < strend && s[1] == '\n')
+                       s += 2;
            }
            XPUSHs(sv_2mortal(sv));
            break;
@@ -1115,7 +1535,8 @@ PP(pp_unpack)
        if (checksum) {
            sv = NEWSV(42, 0);
            if (strchr("fFdD", datumtype) ||
-             (checksum > bits_in_uv && strchr("csSiIlLnNUvVqQ", datumtype)) ) {
+             (checksum > bits_in_uv &&
+              strchr("csSiIlLnNUvVqQjJ", datumtype)) ) {
                NV trouble;
 
                 adouble = (NV) (1 << (checksum & 15));
@@ -1131,24 +1552,48 @@ PP(pp_unpack)
            else {
                if (checksum < bits_in_uv) {
                    UV mask = ((UV)1 << checksum) - 1;
-                   culong &= mask;
+
+                   cuv &= mask;
                }
-               sv_setuv(sv, (UV)culong);
+               sv_setuv(sv, cuv);
            }
            XPUSHs(sv_2mortal(sv));
            checksum = 0;
        }
-        if (gimme != G_ARRAY &&
-            SP - PL_stack_base == start_sp_offset + 1) {
-          /* do first one only unless in list context
-             / is implmented by unpacking the count, then poping it from the
-             stack, so must check that we're not in the middle of a /  */
-          if ((pat >= patend) || *pat != '/')
-            RETURN;
-        }
     }
-    if (SP - PL_stack_base == start_sp_offset && gimme == G_SCALAR)
-       PUSHs(&PL_sv_undef);
+    if (new_s)
+       *new_s = s;
+    PUTBACK;
+    return SP - PL_stack_base - start_sp_offset;
+}
+
+PP(pp_unpack)
+{
+    dSP;
+    dPOPPOPssrl;
+    I32 gimme = GIMME_V;
+    STRLEN llen;
+    STRLEN rlen;
+    register char *pat = SvPV(left, llen);
+#ifdef PACKED_IS_OCTETS
+    /* Packed side is assumed to be octets - so force downgrade if it
+       has been UTF-8 encoded by accident
+     */
+    register char *s = SvPVbyte(right, rlen);
+#else
+    register char *s = SvPV(right, rlen);
+#endif
+    char *strend = s + rlen;
+    register char *patend = pat + llen;
+    register I32 cnt;
+
+    PUTBACK;
+    cnt = unpack_str(pat, patend, s, s, strend, NULL, 0,
+                    ((gimme == G_SCALAR) ? UNPACK_ONLY_ONE : 0)
+                    | (DO_UTF8(right) ? UNPACK_DO_UTF8 : 0));
+    SPAGAIN;
+    if ( !cnt && gimme == G_SCALAR )
+       PUSHs(&PL_sv_undef);
     RETURN;
 }
 
@@ -1256,22 +1701,27 @@ S_div128(pTHX_ SV *pnum, bool *done)
   return (m);
 }
 
+#define PACK_CHILD     0x1
 
-PP(pp_pack)
+/*
+=for apidoc pack_cat
+
+The engine implementing pack() Perl function.
+
+=cut */
+
+void
+Perl_pack_cat(pTHX_ SV *cat, char *pat, register char *patend, register SV **beglist, SV **endlist, SV ***next_in_list, U32 flags)
 {
-    dSP; dMARK; dORIGMARK; dTARGET;
-    register SV *cat = TARG;
     register I32 items;
     STRLEN fromlen;
-    register char *pat = SvPVx(*++MARK, fromlen);
-    char *patcopy;
-    register char *patend = pat + fromlen;
     register I32 len;
     I32 datumtype;
     SV *fromstr;
     /*SUPPRESS 442*/
     static char null10[] = {0,0,0,0,0,0,0,0,0,0};
     static char *space10 = "          ";
+    int star;
 
     /* These must not be in registers: */
     char achar;
@@ -1280,6 +1730,12 @@ PP(pp_pack)
     unsigned int auint;
     I32 along;
     U32 aulong;
+    IV aiv;
+    UV auv;
+    NV anv;
+#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
+    long double aldouble;
+#endif
 #ifdef HAS_QUAD
     Quad_t aquad;
     Uquad_t auquad;
@@ -1292,74 +1748,58 @@ PP(pp_pack)
     int natint;                /* native integer */
 #endif
 
-    items = SP - MARK;
-    MARK++;
-    sv_setpvn(cat, "", 0);
-    patcopy = pat;
-    while (pat < patend) {
+    items = endlist - beglist;
+#ifndef PACKED_IS_OCTETS
+    pat = next_symbol(pat, patend);
+    if (pat < patend && *pat == 'U' && !flags)
+       SvUTF8_on(cat);
+#endif
+    while ((pat = next_symbol(pat, patend)) < patend) {
        SV *lengthcode = Nullsv;
-#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *MARK++ : &PL_sv_no)
+#define NEXTFROM ( lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
        datumtype = *pat++ & 0xFF;
 #ifdef PERL_NATINT_PACK
        natint = 0;
 #endif
-       if (isSPACE(datumtype)) {
-           patcopy++;
-           continue;
-        }
-#ifndef PACKED_IS_OCTETS
-       if (datumtype == 'U' && pat == patcopy+1)
-           SvUTF8_on(cat);
-#endif
-       if (datumtype == '#') {
-           while (pat < patend && *pat != '\n')
-               pat++;
-           continue;
-       }
         if (*pat == '!') {
-           char *natstr = "sSiIlL";
+           static const char natstr[] = "sSiIlLxX";
 
            if (strchr(natstr, datumtype)) {
+               if (datumtype == 'x' || datumtype == 'X') {
+                   datumtype |= TYPE_IS_SHRIEKING;
+               } else {                /* XXXX Should be redone similarly! */
 #ifdef PERL_NATINT_PACK
-               natint = 1;
+                   natint = 1;
 #endif
+               }
                pat++;
            }
            else
-               DIE(aTHX_ "'!' allowed only after types %s", natstr);
+               Perl_croak(aTHX_ "'!' allowed only after types %s", natstr);
        }
-       if (*pat == '*') {
+       len = find_count(&pat, patend, &star);
+       if (star > 0)                   /* Count is '*' */
            len = strchr("@Xxu", datumtype) ? 0 : items;
-           pat++;
-       }
-       else if (isDIGIT(*pat)) {
-           len = *pat++ - '0';
-           while (isDIGIT(*pat)) {
-               len = (len * 10) + (*pat++ - '0');
-               if (len < 0)
-                   DIE(aTHX_ "Repeat count in pack overflows");
-           }
-       }
-       else
+       else if (star < 0)              /* Default len */
            len = 1;
-       if (*pat == '/') {
+       if (*pat == '/') {              /* doing lookahead how... */
            ++pat;
            if ((*pat != 'a' && *pat != 'A' && *pat != 'Z') || pat[1] != '*')
-               DIE(aTHX_ "/ must be followed by a*, A* or Z*");
+               Perl_croak(aTHX_ "/ must be followed by a*, A* or Z*");
            lengthcode = sv_2mortal(newSViv(sv_len(items > 0
-                                                  ? *MARK : &PL_sv_no)
+                                                  ? *beglist : &PL_sv_no)
                                             + (*pat == 'Z' ? 1 : 0)));
        }
        switch(datumtype) {
        default:
-           DIE(aTHX_ "Invalid type in pack: '%c'", (int)datumtype);
+           Perl_croak(aTHX_ "Invalid type in pack: '%c'", (int)datumtype);
        case ',': /* grandfather in commas but with a warning */
            if (commas++ == 0 && ckWARN(WARN_PACK))
-               Perl_warner(aTHX_ WARN_PACK,
+               Perl_warner(aTHX_ packWARN(WARN_PACK),
                            "Invalid type in pack: '%c'", (int)datumtype);
            break;
        case '%':
-           DIE(aTHX_ "%% may only be used in unpack");
+           Perl_croak(aTHX_ "%% may only be used in unpack");
        case '@':
            len -= SvCUR(cat);
            if (len > 0)
@@ -1368,13 +1808,52 @@ PP(pp_pack)
            if (len > 0)
                goto shrink;
            break;
+       case '(':
+       {
+           char *beg = pat;
+           SV **savebeglist = beglist; /* beglist de-register-ed */
+
+           if (star >= 0)
+               Perl_croak(aTHX_ "()-group starts with a count");
+           aptr = group_end(beg, patend, ')');
+           pat = aptr + 1;
+           if (star != -2) {
+               len = find_count(&pat, patend, &star);
+               if (star < 0)           /* No count */
+                   len = 1;
+               else if (star > 0)      /* Star */
+                   len = items;        /* long enough? */
+           }
+           while (len--) {
+               pack_cat(cat, beg, aptr, savebeglist, endlist,
+                        &savebeglist, PACK_CHILD);
+               if (star > 0 && savebeglist == endlist)
+                   break;              /* No way to continue */
+           }
+           beglist = savebeglist;
+           break;
+       }
+       case 'X' | TYPE_IS_SHRIEKING:
+           if (!len)                   /* Avoid division by 0 */
+               len = 1;
+           len = (SvCUR(cat)) % len;
+           /* FALL THROUGH */
        case 'X':
          shrink:
            if (SvCUR(cat) < len)
-               DIE(aTHX_ "X outside of string");
+               Perl_croak(aTHX_ "X outside of string");
            SvCUR(cat) -= len;
            *SvEND(cat) = '\0';
            break;
+       case 'x' | TYPE_IS_SHRIEKING:
+           if (!len)                   /* Avoid division by 0 */
+               len = 1;
+           aint = (SvCUR(cat)) % len;
+           if (aint)                   /* Other portable ways? */
+               len = len - aint;
+           else
+               len = 0;
+           /* FALL THROUGH */
        case 'x':
          grow:
            while (len >= 10) {
@@ -1388,7 +1867,7 @@ PP(pp_pack)
        case 'a':
            fromstr = NEXTFROM;
            aptr = SvPV(fromstr, fromlen);
-           if (pat[lengthcode ? -2 : -1] == '*') { /* -2 after '/' */  
+           if (star > 0) { /* -2 after '/' */  
                len = fromlen;
                if (datumtype == 'Z')
                    ++len;
@@ -1426,7 +1905,7 @@ PP(pp_pack)
                fromstr = NEXTFROM;
                saveitems = items;
                str = SvPV(fromstr, fromlen);
-               if (pat[-1] == '*')
+               if (star > 0)
                    len = fromlen;
                aint = SvCUR(cat);
                SvCUR(cat) += (len+7)/8;
@@ -1482,7 +1961,7 @@ PP(pp_pack)
                fromstr = NEXTFROM;
                saveitems = items;
                str = SvPV(fromstr, fromlen);
-               if (pat[-1] == '*')
+               if (star > 0)
                    len = fromlen;
                aint = SvCUR(cat);
                SvCUR(cat) += (len+1)/2;
@@ -1538,7 +2017,7 @@ PP(pp_pack)
                    aint = SvIV(fromstr);
                    if ((aint < 0 || aint > 255) &&
                        ckWARN(WARN_PACK))
-                       Perl_warner(aTHX_ WARN_PACK,
+                       Perl_warner(aTHX_ packWARN(WARN_PACK),
                                    "Character in \"C\" format wrapped");
                    achar = aint & 255;
                    sv_catpvn(cat, &achar, sizeof(char));
@@ -1547,7 +2026,7 @@ PP(pp_pack)
                    aint = SvIV(fromstr);
                    if ((aint < -128 || aint > 127) &&
                        ckWARN(WARN_PACK))
-                       Perl_warner(aTHX_ WARN_PACK,
+                       Perl_warner(aTHX_ packWARN(WARN_PACK),
                                    "Character in \"c\" format wrapped");
                    achar = aint & 255;
                    sv_catpvn(cat, &achar, sizeof(char));
@@ -1558,16 +2037,19 @@ PP(pp_pack)
        case 'U':
            while (len-- > 0) {
                fromstr = NEXTFROM;
-               auint = SvUV(fromstr);
+               auint = UNI_TO_NATIVE(SvUV(fromstr));
                SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
-               SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint)
-                              - SvPVX(cat));
+               SvCUR_set(cat,
+                         (char*)uvchr_to_utf8_flags((U8*)SvEND(cat),
+                                                    auint,
+                                                    ckWARN(WARN_UTF8) ?
+                                                    0 : UNICODE_ALLOW_ANY)
+                         - SvPVX(cat));
            }
            *SvEND(cat) = '\0';
            break;
        /* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
        case 'f':
-       case 'F':
            while (len-- > 0) {
                fromstr = NEXTFROM;
                afloat = (float)SvNV(fromstr);
@@ -1575,13 +2057,28 @@ PP(pp_pack)
            }
            break;
        case 'd':
-       case 'D':
            while (len-- > 0) {
                fromstr = NEXTFROM;
                adouble = (double)SvNV(fromstr);
                sv_catpvn(cat, (char *)&adouble, sizeof (double));
            }
            break;
+       case 'F':
+           while (len-- > 0) {
+               fromstr = NEXTFROM;
+               anv = SvNV(fromstr);
+               sv_catpvn(cat, (char *)&anv, NVSIZE);
+           }
+           break;
+#if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
+       case 'D':
+           while (len-- > 0) {
+               fromstr = NEXTFROM;
+               aldouble = (long double)SvNV(fromstr);
+               sv_catpvn(cat, (char *)&aldouble, LONG_DOUBLESIZE);
+           }
+           break;
+#endif
        case 'n':
            while (len-- > 0) {
                fromstr = NEXTFROM;
@@ -1654,29 +2151,38 @@ PP(pp_pack)
                sv_catpvn(cat, (char*)&auint, sizeof(unsigned int));
            }
            break;
+       case 'j':
+           while (len-- > 0) {
+               fromstr = NEXTFROM;
+               aiv = SvIV(fromstr);
+               sv_catpvn(cat, (char*)&aiv, IVSIZE);
+           }
+           break;
+       case 'J':
+           while (len-- > 0) {
+               fromstr = NEXTFROM;
+               auv = SvUV(fromstr);
+               sv_catpvn(cat, (char*)&auv, UVSIZE);
+           }
+           break;
        case 'w':
             while (len-- > 0) {
                fromstr = NEXTFROM;
-               adouble = Perl_floor(SvNV(fromstr));
+               adouble = SvNV(fromstr);
 
                if (adouble < 0)
-                   DIE(aTHX_ "Cannot compress negative numbers");
+                   Perl_croak(aTHX_ "Cannot compress negative numbers");
 
-               if (
-#if UVSIZE > 4 && UVSIZE >= NVSIZE
-                   adouble <= 0xffffffff
-#else
-#   ifdef CXUX_BROKEN_CONSTANT_CONVERT
-                   adouble <= UV_MAX_cxux
-#   else
-                   adouble <= UV_MAX
-#   endif
-#endif
-                   )
+                /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
+                   which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
+                   any negative IVs will have already been got by the croak()
+                   above. IOK is untrue for fractions, so we test them
+                   against UV_MAX_P1.  */
+               if (SvIOK(fromstr) || adouble < UV_MAX_P1)
                {
-                   char   buf[1 + sizeof(UV)];
+                   char   buf[(sizeof(UV)*8)/7+1];
                    char  *in = buf + sizeof(buf);
-                   UV     auv = U_V(adouble);
+                   UV     auv = SvUV(fromstr);
 
                    do {
                        *--in = (auv & 0x7f) | 0x80;
@@ -1694,7 +2200,7 @@ PP(pp_pack)
                    /* Copy string and check for compliance */
                    from = SvPV(fromstr, len);
                    if ((norm = is_an_int(from, len)) == NULL)
-                       DIE(aTHX_ "can compress only unsigned integer");
+                       Perl_croak(aTHX_ "can compress only unsigned integer");
 
                    New('w', result, len, char);
                    in = result + len;
@@ -1710,18 +2216,38 @@ PP(pp_pack)
                    char   buf[sizeof(double) * 2];     /* 8/7 <= 2 */
                    char  *in = buf + sizeof(buf);
 
+                    adouble = Perl_floor(adouble);
                    do {
                        double next = floor(adouble / 128);
                        *--in = (unsigned char)(adouble - (next * 128)) | 0x80;
                        if (in <= buf)  /* this cannot happen ;-) */
-                           DIE(aTHX_ "Cannot compress integer");
+                           Perl_croak(aTHX_ "Cannot compress integer");
                        adouble = next;
                    } while (adouble > 0);
                    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
                    sv_catpvn(cat, in, (buf + sizeof(buf)) - in);
                }
-               else
-                   DIE(aTHX_ "Cannot compress non integer");
+               else {
+                   char           *from, *result, *in;
+                   SV             *norm;
+                   STRLEN          len;
+                   bool            done;
+
+                   /* Copy string and check for compliance */
+                   from = SvPV(fromstr, len);
+                   if ((norm = is_an_int(from, len)) == NULL)
+                       Perl_croak(aTHX_ "can compress only unsigned integer");
+
+                   New('w', result, len, char);
+                   in = result + len;
+                   done = FALSE;
+                   while (!done)
+                       *--in = div128(norm, &done) | 0x80;
+                   result[len - 1] &= 0x7F; /* clear continue bit */
+                   sv_catpvn(cat, in, (result + len) - in);
+                   Safefree(result);
+                   SvREFCNT_dec(norm); /* free norm */
+               }
            }
             break;
        case 'i':
@@ -1828,7 +2354,7 @@ PP(pp_pack)
                                                || (SvPADTMP(fromstr)
                                                    && !SvREADONLY(fromstr))))
                    {
-                       Perl_warner(aTHX_ WARN_PACK,
+                       Perl_warner(aTHX_ packWARN(WARN_PACK),
                                "Attempt to pack pointer to temporary value");
                    }
                    if (SvPOK(fromstr) || SvNIOK(fromstr))
@@ -1861,10 +2387,28 @@ PP(pp_pack)
            break;
        }
     }
+    if (next_in_list)
+       *next_in_list = beglist;
+}
+#undef NEXTFROM
+
+
+PP(pp_pack)
+{
+    dSP; dMARK; dORIGMARK; dTARGET;
+    register SV *cat = TARG;
+    STRLEN fromlen;
+    register char *pat = SvPVx(*++MARK, fromlen);
+    register char *patend = pat + fromlen;
+
+    MARK++;
+    sv_setpvn(cat, "", 0);
+
+    pack_cat(cat, pat, patend, MARK, SP + 1, NULL, 0);
+
     SvSETMAGIC(cat);
     SP = ORIGMARK;
     PUSHs(cat);
     RETURN;
 }
-#undef NEXTFROM