This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix ExtUtils-ParseXS/t/*.t that needed '.' in @INC
[perl5.git] / pp_pack.c
index 40db6ef..86d138b 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -142,7 +142,7 @@ typedef union {
 #  error "Unsupported byteorder"
         /* Need to add code here to re-instate mixed endian support.
            NEEDS_SWAP would need to hold a flag indicating which action to
 #  error "Unsupported byteorder"
         /* Need to add code here to re-instate mixed endian support.
            NEEDS_SWAP would need to hold a flag indicating which action to
-           take, and S_reverse_copy and the code in uni_to_bytes would need
+           take, and S_reverse_copy and the code in S_utf8_to_bytes would need
            logic adding to deal with any mixed-endian transformations needed.
         */
 #endif
            logic adding to deal with any mixed-endian transformations needed.
         */
 #endif
@@ -151,7 +151,7 @@ typedef union {
 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)  \
 STMT_START {                                           \
     if (UNLIKELY(utf8)) {                               \
 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)  \
 STMT_START {                                           \
     if (UNLIKELY(utf8)) {                               \
-        if (!uni_to_bytes(aTHX_ &s, strend,            \
+        if (!S_utf8_to_bytes(aTHX_ &s, strend,         \
          (char *) (buf), len, datumtype)) break;       \
     } else {                                           \
         if (UNLIKELY(needs_swap))                       \
          (char *) (buf), len, datumtype)) break;       \
     } else {                                           \
         if (UNLIKELY(needs_swap))                       \
@@ -216,16 +216,8 @@ S_mul128(pTHX_ SV *sv, U8 m)
 
 /* Explosives and implosives. */
 
 
 /* Explosives and implosives. */
 
-#if 'I' == 73 && 'J' == 74
-/* On an ASCII/ISO kind of system */
-#define ISUUCHAR(ch)    ((ch) >= ' ' && (ch) < 'a')
-#else
-/*
-  Some other sort of character set - use memchr() so we don't match
-  the null byte.
- */
-#define ISUUCHAR(ch)    (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
-#endif
+#define ISUUCHAR(ch)    (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ')  \
+                      && NATIVE_TO_LATIN1(ch) <  NATIVE_TO_LATIN1('a'))
 
 /* type modifiers */
 #define TYPE_IS_SHRIEKING      0x100
 
 /* type modifiers */
 #define TYPE_IS_SHRIEKING      0x100
@@ -245,7 +237,7 @@ S_mul128(pTHX_ SV *sv, U8 m)
 #define PACK_SIZE_UNPREDICTABLE                0x40    /* Not a fixed size element */
 #define PACK_SIZE_MASK                 0x3F
 
 #define PACK_SIZE_UNPREDICTABLE                0x40    /* Not a fixed size element */
 #define PACK_SIZE_MASK                 0x3F
 
-#include "packsizetables.c"
+#include "packsizetables.inc"
 
 static void
 S_reverse_copy(const char *src, char *dest, STRLEN len)
 
 static void
 S_reverse_copy(const char *src, char *dest, STRLEN len)
@@ -256,15 +248,18 @@ S_reverse_copy(const char *src, char *dest, STRLEN len)
 }
 
 STATIC U8
 }
 
 STATIC U8
-uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
+utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
 {
     STRLEN retlen;
 {
     STRLEN retlen;
-    UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
+    UV val;
+
+    if (*s >= end) {
+       goto croak;
+    }
+    val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
-    /* We try to process malformed UTF-8 as much as possible (preferably with
-       warnings), but these two mean we make no progress in the string and
-       might enter an infinite loop */
-    if (retlen == (STRLEN) -1 || retlen == 0)
+    if (retlen == (STRLEN) -1)
+      croak:
        Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
                   (int) TYPE_NO_MODIFIERS(datumtype));
     if (val >= 0x100) {
        Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
                   (int) TYPE_NO_MODIFIERS(datumtype));
     if (val >= 0x100) {
@@ -278,11 +273,11 @@ uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
 }
 
 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
 }
 
 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
-       uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
+       utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
        *(U8 *)(s)++)
 
 STATIC bool
        *(U8 *)(s)++)
 
 STATIC bool
-uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
+S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
 {
     UV val;
     STRLEN retlen;
 {
     UV val;
     STRLEN retlen;
@@ -298,7 +293,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
     for (;buf_len > 0; buf_len--) {
        if (from >= end) return FALSE;
        val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
     for (;buf_len > 0; buf_len--) {
        if (from >= end) return FALSE;
        val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
-       if (retlen == (STRLEN) -1 || retlen == 0) {
+       if (retlen == (STRLEN) -1) {
            from += UTF8SKIP(from);
            bad |= 1;
        } else from += retlen;
            from += UTF8SKIP(from);
            bad |= 1;
        } else from += retlen;
@@ -334,24 +329,9 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
     return TRUE;
 }
 
     return TRUE;
 }
 
-STATIC bool
-next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
-{
-    STRLEN retlen;
-    const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
-    if (val >= 0x100 || !ISUUCHAR(val) ||
-       retlen == (STRLEN) -1 || retlen == 0) {
-       *out = 0;
-       return FALSE;
-    }
-    *out = PL_uudmap[val] & 077;
-    *s += retlen;
-    return TRUE;
-}
-
 STATIC char *
 STATIC char *
-S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
-    PERL_ARGS_ASSERT_BYTES_TO_UNI;
+S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
+    PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8;
 
     if (UNLIKELY(needs_swap)) {
         const U8 *p = start + len;
 
     if (UNLIKELY(needs_swap)) {
         const U8 *p = start + len;
@@ -371,7 +351,7 @@ S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
 STMT_START {                                                   \
     if (UNLIKELY(utf8))                                                \
 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
 STMT_START {                                                   \
     if (UNLIKELY(utf8))                                                \
-       (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap);       \
+       (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap);       \
     else {                                                     \
         if (UNLIKELY(needs_swap))                               \
             S_reverse_copy((char *)(buf), cur, len);            \
     else {                                                     \
         if (UNLIKELY(needs_swap))                               \
             S_reverse_copy((char *)(buf), cur, len);            \
@@ -409,7 +389,7 @@ STMT_START {                                        \
 STMT_START {                                   \
     if (utf8) {                                        \
        const U8 au8 = (byte);                  \
 STMT_START {                                   \
     if (utf8) {                                        \
        const U8 au8 = (byte);                  \
-       (s) = S_bytes_to_uni(&au8, 1, (s), 0);  \
+       (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\
     } else *(U8 *)(s)++ = (byte);              \
 } STMT_END
 
     } else *(U8 *)(s)++ = (byte);              \
 } STMT_END
 
@@ -419,7 +399,7 @@ STMT_START {                                                        \
     STRLEN retlen;                                             \
     if (str >= end) break;                                     \
     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);    \
     STRLEN retlen;                                             \
     if (str >= end) break;                                     \
     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);    \
-    if (retlen == (STRLEN) -1 || retlen == 0) {                        \
+    if (retlen == (STRLEN) -1) {                               \
        *cur = '\0';                                            \
        Perl_croak(aTHX_ "Malformed UTF-8 string in pack");     \
     }                                                          \
        *cur = '\0';                                            \
        Perl_croak(aTHX_ "Malformed UTF-8 string in pack");     \
     }                                                          \
@@ -814,20 +794,20 @@ first_symbol(const char *pat, const char *patend) {
 
 =for apidoc unpackstring
 
 
 =for apidoc unpackstring
 
-The engine implementing the unpack() Perl function.
+The engine implementing the C<unpack()> Perl function.
 
 
-Using the template pat..patend, this function unpacks the string
-s..strend into a number of mortal SVs, which it pushes onto the perl
-argument (@_) stack (so you will need to issue a C<PUTBACK> before and
+Using the template C<pat..patend>, this function unpacks the string
+C<s..strend> into a number of mortal SVs, which it pushes onto the perl
+argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and
 C<SPAGAIN> after the call to this function).  It returns the number of
 pushed elements.
 
 C<SPAGAIN> after the call to this function).  It returns the number of
 pushed elements.
 
-The strend and patend pointers should point to the byte following the last
-character of each string.
+The C<strend> and C<patend> pointers should point to the byte following the
+last character of each string.
 
 Although this function returns its values on the perl argument stack, it
 doesn't take any parameters from that stack (and thus in particular
 
 Although this function returns its values on the perl argument stack, it
 doesn't take any parameters from that stack (and thus in particular
-there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for
+there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for
 example).
 
 =cut */
 example).
 
 =cut */
@@ -1096,9 +1076,14 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                /* 'A' strips both nulls and spaces */
                const char *ptr;
                if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
                /* 'A' strips both nulls and spaces */
                const char *ptr;
                if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
-                   for (ptr = s+len-1; ptr >= s; ptr--)
-                       if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
-                           !isSPACE_utf8(ptr)) break;
+                    for (ptr = s+len-1; ptr >= s; ptr--) {
+                        if (   *ptr != 0
+                            && !UTF8_IS_CONTINUATION(*ptr)
+                            && !isSPACE_utf8_safe(ptr, strend))
+                        {
+                            break;
+                        }
+                    }
                    if (ptr >= s) ptr += UTF8SKIP(ptr);
                    else ptr++;
                    if (ptr > s+len)
                    if (ptr >= s) ptr += UTF8SKIP(ptr);
                    else ptr++;
                    if (ptr > s+len)
@@ -1128,7 +1113,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
            if (checksum) {
                if (utf8)
                    while (len >= 8 && s < strend) {
            if (checksum) {
                if (utf8)
                    while (len >= 8 && s < strend) {
-                       cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
+                       cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)];
                        len -= 8;
                    }
                else
                        len -= 8;
                    }
                else
@@ -1163,7 +1148,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    if (len & 7) bits >>= 1;
                    else if (utf8) {
                        if (s >= strend) break;
                    if (len & 7) bits >>= 1;
                    else if (utf8) {
                        if (s >= strend) break;
-                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                       bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
                    } else bits = *(U8 *) s++;
                    *str++ = bits & 1 ? '1' : '0';
                }
                    } else bits = *(U8 *) s++;
                    *str++ = bits & 1 ? '1' : '0';
                }
@@ -1174,7 +1159,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    if (len & 7) bits <<= 1;
                    else if (utf8) {
                        if (s >= strend) break;
                    if (len & 7) bits <<= 1;
                    else if (utf8) {
                        if (s >= strend) break;
-                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                       bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
                    } else bits = *(U8 *) s++;
                    *str++ = bits & 0x80 ? '1' : '0';
                }
                    } else bits = *(U8 *) s++;
                    *str++ = bits & 0x80 ? '1' : '0';
                }
@@ -1202,7 +1187,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    if (len & 1) bits >>= 4;
                    else if (utf8) {
                        if (s >= strend) break;
                    if (len & 1) bits >>= 4;
                    else if (utf8) {
                        if (s >= strend) break;
-                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                       bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
                    } else bits = * (U8 *) s++;
                    if (!checksum)
                        *str++ = PL_hexdigit[bits & 15];
                    } else bits = * (U8 *) s++;
                    if (!checksum)
                        *str++ = PL_hexdigit[bits & 15];
@@ -1214,7 +1199,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    if (len & 1) bits <<= 4;
                    else if (utf8) {
                        if (s >= strend) break;
                    if (len & 1) bits <<= 4;
                    else if (utf8) {
                        if (s >= strend) break;
-                       bits = uni_to_byte(aTHX_ &s, strend, datumtype);
+                       bits = utf8_to_byte(aTHX_ &s, strend, datumtype);
                    } else bits = *(U8 *) s++;
                    if (!checksum)
                        *str++ = PL_hexdigit[(bits >> 4) & 15];
                    } else bits = *(U8 *) s++;
                    if (!checksum)
                        *str++ = PL_hexdigit[(bits >> 4) & 15];
@@ -1243,7 +1228,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    STRLEN retlen;
                    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
                                 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
                    STRLEN retlen;
                    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
                                 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
-                   if (retlen == (STRLEN) -1 || retlen == 0)
+                   if (retlen == (STRLEN) -1)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
                  }
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
                  }
@@ -1266,7 +1251,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    STRLEN retlen;
                    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
                                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
                    STRLEN retlen;
                    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
                                         ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
-                   if (retlen == (STRLEN) -1 || retlen == 0)
+                   if (retlen == (STRLEN) -1)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
                    if (!checksum)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
                    if (!checksum)
@@ -1312,17 +1297,23 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    STRLEN len;
                    /* Bug: warns about bad utf8 even if we are short on bytes
                       and will break out of the loop */
                    STRLEN len;
                    /* Bug: warns about bad utf8 even if we are short on bytes
                       and will break out of the loop */
-                   if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
+                   if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
                                      'U'))
                        break;
                    len = UTF8SKIP(result);
                                      'U'))
                        break;
                    len = UTF8SKIP(result);
-                   if (!uni_to_bytes(aTHX_ &ptr, strend,
+                   if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
                                      (char *) &result[1], len-1, 'U')) break;
                                      (char *) &result[1], len-1, 'U')) break;
-                   auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
+                                                       len,
+                                                       &retlen,
+                                                       UTF8_ALLOW_DEFAULT));
                    s = ptr;
                } else {
                    s = ptr;
                } else {
-                   auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
-                   if (retlen == (STRLEN) -1 || retlen == 0)
+                   auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
+                                                       strend - s,
+                                                       &retlen,
+                                                       UTF8_ALLOW_DEFAULT));
+                   if (retlen == (STRLEN) -1)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
                }
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
                }
@@ -1590,7 +1581,8 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    U8 ch;
                    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
                    auv = (auv << 7) | (ch & 0x7f);
                    U8 ch;
                    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
                    auv = (auv << 7) | (ch & 0x7f);
-                   /* UTF8_IS_XXXXX not right here - using constant 0x80 */
+                    /* UTF8_IS_XXXXX not right here because this is a BER, not
+                     * UTF-8 format - using constant 0x80 */
                    if (ch < 0x80) {
                        bytes = 0;
                        mPUSHu(auv);
                    if (ch < 0x80) {
                        bytes = 0;
                        mPUSHu(auv);
@@ -1601,7 +1593,8 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    if (++bytes >= sizeof(UV)) {        /* promote to string */
                        const char *t;
 
                    if (++bytes >= sizeof(UV)) {        /* promote to string */
                        const char *t;
 
-                       sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
+                       sv = Perl_newSVpvf(aTHX_ "%.*" UVuf,
+                                                 (int)TYPE_DIGITS(UV), auv);
                        while (s < strend) {
                            ch = SHIFT_BYTE(utf8, s, strend, datumtype);
                            sv = mul128(sv, (U8)(ch & 0x7f));
                        while (s < strend) {
                            ch = SHIFT_BYTE(utf8, s, strend, datumtype);
                            sv = mul128(sv, (U8)(ch & 0x7f));
@@ -1723,76 +1716,52 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                sv = sv_2mortal(newSV(l));
                if (l) SvPOK_on(sv);
            }
                sv = sv_2mortal(newSV(l));
                if (l) SvPOK_on(sv);
            }
-           if (utf8) {
-               while (next_uni_uu(aTHX_ &s, strend, &len)) {
-                   I32 a, b, c, d;
-                   char hunk[3];
-
-                   while (len > 0) {
-                       next_uni_uu(aTHX_ &s, strend, &a);
-                       next_uni_uu(aTHX_ &s, strend, &b);
-                       next_uni_uu(aTHX_ &s, strend, &c);
-                       next_uni_uu(aTHX_ &s, strend, &d);
-                       hunk[0] = (char)((a << 2) | (b >> 4));
-                       hunk[1] = (char)((b << 4) | (c >> 2));
-                       hunk[2] = (char)((c << 6) | d);
-                       if (!checksum)
-                           sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
-                       len -= 3;
-                   }
-                   if (s < strend) {
-                       if (*s == '\n') {
-                            s++;
-                        }
-                       else {
-                           /* possible checksum byte */
-                           const char *skip = s+UTF8SKIP(s);
-                           if (skip < strend && *skip == '\n')
-                                s = skip+1;
-                       }
-                   }
-               }
-           } else {
-               while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
-                   I32 a, b, c, d;
-                   char hunk[3];
-
-                   len = PL_uudmap[*(U8*)s++] & 077;
-                   while (len > 0) {
-                       if (s < strend && ISUUCHAR(*s))
-                           a = PL_uudmap[*(U8*)s++] & 077;
-                       else
-                           a = 0;
-                       if (s < strend && ISUUCHAR(*s))
-                           b = PL_uudmap[*(U8*)s++] & 077;
-                       else
-                           b = 0;
-                       if (s < strend && ISUUCHAR(*s))
-                           c = PL_uudmap[*(U8*)s++] & 077;
-                       else
-                           c = 0;
-                       if (s < strend && ISUUCHAR(*s))
-                           d = PL_uudmap[*(U8*)s++] & 077;
-                       else
-                           d = 0;
-                       hunk[0] = (char)((a << 2) | (b >> 4));
-                       hunk[1] = (char)((b << 4) | (c >> 2));
-                       hunk[2] = (char)((c << 6) | d);
-                       if (!checksum)
-                           sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
-                       len -= 3;
-                   }
-                   if (*s == '\n')
-                       s++;
-                   else        /* possible checksum byte */
-                       if (s + 1 < strend && s[1] == '\n')
-                           s += 2;
-               }
-           }
+
+            /* Note that all legal uuencoded strings are ASCII printables, so
+             * have the same representation under UTF-8 vs not.  This means we
+             * can ignore UTF8ness on legal input.  For illegal we stop at the
+             * first failure, and don't report where/what that is, so again we
+             * can ignore UTF8ness */
+
+            while (s < strend && *s != ' ' && ISUUCHAR(*s)) {
+                I32 a, b, c, d;
+                char hunk[3];
+
+                len = PL_uudmap[*(U8*)s++] & 077;
+                while (len > 0) {
+                    if (s < strend && ISUUCHAR(*s))
+                        a = PL_uudmap[*(U8*)s++] & 077;
+                    else
+                        a = 0;
+                    if (s < strend && ISUUCHAR(*s))
+                        b = PL_uudmap[*(U8*)s++] & 077;
+                    else
+                        b = 0;
+                    if (s < strend && ISUUCHAR(*s))
+                        c = PL_uudmap[*(U8*)s++] & 077;
+                    else
+                        c = 0;
+                    if (s < strend && ISUUCHAR(*s))
+                        d = PL_uudmap[*(U8*)s++] & 077;
+                    else
+                        d = 0;
+                    hunk[0] = (char)((a << 2) | (b >> 4));
+                    hunk[1] = (char)((b << 4) | (c >> 2));
+                    hunk[2] = (char)((c << 6) | d);
+                    if (!checksum)
+                        sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
+                    len -= 3;
+                }
+                if (*s == '\n')
+                    s++;
+                else   /* possible checksum byte */
+                    if (s + 1 < strend && s[1] == '\n')
+                        s += 2;
+            }
            if (!checksum)
                XPUSHs(sv);
            break;
            if (!checksum)
                XPUSHs(sv);
            break;
-       }
+       } /* End of switch */
 
        if (checksum) {
            if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
 
        if (checksum) {
            if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
@@ -1807,7 +1776,18 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                }
                while (cdouble < 0.0)
                    cdouble += anv;
                }
                while (cdouble < 0.0)
                    cdouble += anv;
-               cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
+               cdouble = Perl_modf(cdouble / anv, &trouble);
+#ifdef LONGDOUBLE_DOUBLEDOUBLE
+                /* Workaround for powerpc doubledouble modfl bug:
+                 * close to 1.0L and -1.0L cdouble is 0, and trouble
+                 * is cdouble / anv. */
+                if (trouble != Perl_ceil(trouble)) {
+                  cdouble = trouble;
+                  if (cdouble >  1.0L) cdouble -= 1.0L;
+                  if (cdouble < -1.0L) cdouble += 1.0L;
+                }
+#endif
+                cdouble *= anv;
                sv = newSVnv(cdouble);
            }
            else {
                sv = newSVnv(cdouble);
            }
            else {
@@ -1855,7 +1835,7 @@ PP(pp_unpack)
 {
     dSP;
     dPOPPOPssrl;
 {
     dSP;
     dPOPPOPssrl;
-    I32 gimme = GIMME_V;
+    U8 gimme = GIMME_V;
     STRLEN llen;
     STRLEN rlen;
     const char *pat = SvPV_const(left,  llen);
     STRLEN llen;
     STRLEN rlen;
     const char *pat = SvPV_const(left,  llen);
@@ -1876,7 +1856,7 @@ PP(pp_unpack)
 }
 
 STATIC U8 *
 }
 
 STATIC U8 *
-doencodes(U8 *h, const char *s, I32 len)
+doencodes(U8 *h, const U8 *s, I32 len)
 {
     *h++ = PL_uuemap[len];
     while (len > 2) {
 {
     *h++ = PL_uuemap[len];
     while (len > 2) {
@@ -1888,7 +1868,7 @@ doencodes(U8 *h, const char *s, I32 len)
        len -= 3;
     }
     if (len > 0) {
        len -= 3;
     }
     if (len > 0) {
-        const char r = (len > 1 ? s[1] : '\0');
+        const U8 r = (len > 1 ? s[1] : '\0');
        *h++ = PL_uuemap[(077 & (s[0] >> 2))];
        *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
        *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
        *h++ = PL_uuemap[(077 & (s[0] >> 2))];
        *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
        *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
@@ -1977,7 +1957,7 @@ S_div128(pTHX_ SV *pnum, bool *done)
 /*
 =for apidoc packlist
 
 /*
 =for apidoc packlist
 
-The engine implementing pack() Perl function.
+The engine implementing C<pack()> Perl function.
 
 =cut
 */
 
 =cut
 */
@@ -2082,22 +2062,27 @@ S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
     return SvGROW(sv, len+extend+1);
 }
 
     return SvGROW(sv, len+extend+1);
 }
 
-static void
+static SV *
 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
 {
     SvGETMAGIC(sv);
 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
 {
     SvGETMAGIC(sv);
+    if (UNLIKELY(SvAMAGIC(sv)))
+       sv = sv_2num(sv);
     if (UNLIKELY(isinfnansv(sv))) {
        const I32 c = TYPE_NO_MODIFIERS(datumtype);
        const NV nv = SvNV_nomg(sv);
        if (c == 'w')
     if (UNLIKELY(isinfnansv(sv))) {
        const I32 c = TYPE_NO_MODIFIERS(datumtype);
        const NV nv = SvNV_nomg(sv);
        if (c == 'w')
-           Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
+           Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv);
        else
        else
-           Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
+           Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c);
     }
     }
+    return sv;
 }
 
 }
 
-#define SvIV_no_inf(sv,d) (S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
-#define SvUV_no_inf(sv,d) (S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
+#define SvIV_no_inf(sv,d) \
+       ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
+#define SvUV_no_inf(sv,d) \
+       ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
 
 STATIC
 SV **
 
 STATIC
 SV **
@@ -2130,7 +2115,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
        char *cur   = start + SvCUR(cat);
         bool needs_swap;
 
        char *cur   = start + SvCUR(cat);
         bool needs_swap;
 
-#define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
+#define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no)
 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
 
         switch (howlen) {
 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
 
         switch (howlen) {
@@ -2376,10 +2361,10 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    if (datumtype == 'Z') len++;
                }
                GROWING(0, cat, start, cur, len);
                    if (datumtype == 'Z') len++;
                }
                GROWING(0, cat, start, cur, len);
-               if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
+               if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen,
                                  datumtype | TYPE_IS_PACK))
                    Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
                                  datumtype | TYPE_IS_PACK))
                    Perl_croak(aTHX_ "panic: predicted utf8 length not available, "
-                              "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf,
+                              "for '%c', aptr=%p end=%p cur=%p, fromlen=%" UVuf,
                               (int)datumtype, aptr, end, cur, (UV)fromlen);
                cur += fromlen;
                len -= fromlen;
                               (int)datumtype, aptr, end, cur, (UV)fromlen);
                cur += fromlen;
                len -= fromlen;
@@ -2512,7 +2497,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            if (howlen == e_star) len = fromlen;
            field_len = (len+1)/2;
            GROWING(utf8, cat, start, cur, field_len);
            if (howlen == e_star) len = fromlen;
            field_len = (len+1)/2;
            GROWING(utf8, cat, start, cur, field_len);
-           if (!utf8 && len > (I32)fromlen) len = fromlen;
+           if (!utf8_source && len > (I32)fromlen) len = fromlen;
            bits = 0;
            l = 0;
            if (datumtype == 'H')
            bits = 0;
            l = 0;
            if (datumtype == 'H')
@@ -2605,17 +2590,14 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                if (in_bytes) auv = auv % 0x100;
                if (utf8) {
                  W_utf8:
                if (in_bytes) auv = auv % 0x100;
                if (utf8) {
                  W_utf8:
-                   if (cur > end) {
+                   if (cur >= end) {
                        *cur = '\0';
                        SvCUR_set(cat, cur - start);
 
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
                        *cur = '\0';
                        SvCUR_set(cat, cur - start);
 
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
-                                                      auv,
-                                                      warn_utf8 ?
-                                                      0 : UNICODE_ALLOW_ANY);
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
                } else {
                    if (auv >= 0x100) {
                        if (!SvUTF8(cat)) {
                } else {
                    if (auv >= 0x100) {
                        if (!SvUTF8(cat)) {
@@ -2666,9 +2648,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                auv = SvUV_no_inf(fromstr, datumtype);
                if (utf8) {
                    U8 buffer[UTF8_MAXLEN], *endb;
                auv = SvUV_no_inf(fromstr, datumtype);
                if (utf8) {
                    U8 buffer[UTF8_MAXLEN], *endb;
-                   endb = uvchr_to_utf8_flags(buffer, auv,
-                                              warn_utf8 ?
-                                              0 : UNICODE_ALLOW_ANY);
+                   endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
                        *cur = '\0';
                        SvCUR_set(cat, cur - start);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
                        *cur = '\0';
                        SvCUR_set(cat, cur - start);
@@ -2676,7 +2656,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                                len+(endb-buffer)*UTF8_EXPAND);
                        end = start+SvLEN(cat);
                    }
                                len+(endb-buffer)*UTF8_EXPAND);
                        end = start+SvLEN(cat);
                    }
-                    cur = S_bytes_to_uni(buffer, endb-buffer, cur, 0);
+                    cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0);
                } else {
                    if (cur >= end) {
                        *cur = '\0';
                } else {
                    if (cur >= end) {
                        *cur = '\0';
@@ -2684,9 +2664,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
-                                                      warn_utf8 ?
-                                                      0 : UNICODE_ALLOW_ANY);
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
+                                                       UNI_TO_NATIVE(auv),
+                                                      0);
                }
            }
            break;
                }
            }
            break;
@@ -2698,7 +2678,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                NV anv;
                fromstr = NEXTFROM;
                anv = SvNV(fromstr);
                NV anv;
                fromstr = NEXTFROM;
                anv = SvNV(fromstr);
-# if defined(VMS) && !defined(_IEEE_FP)
+# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
                 * on Alpha; fake it if we don't have them.
                 */
                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
                 * on Alpha; fake it if we don't have them.
                 */
@@ -2708,7 +2688,17 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    afloat = -FLT_MAX;
                else afloat = (float)anv;
 # else
                    afloat = -FLT_MAX;
                else afloat = (float)anv;
 # else
-               afloat = (float)anv;
+#  if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
+               if(Perl_isnan(anv))
+                   afloat = (float)NV_NAN;
+               else
+#  endif
+#  ifdef NV_INF
+                /* a simple cast to float is undefined if outside
+                 * the range of values that can be represented */
+               afloat = (float)(anv >  FLT_MAX ?  NV_INF :
+                                 anv < -FLT_MAX ? -NV_INF : anv);
+#  endif
 # endif
                 PUSH_VAR(utf8, cur, afloat, needs_swap);
            }
 # endif
                 PUSH_VAR(utf8, cur, afloat, needs_swap);
            }
@@ -2719,7 +2709,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                NV anv;
                fromstr = NEXTFROM;
                anv = SvNV(fromstr);
                NV anv;
                fromstr = NEXTFROM;
                anv = SvNV(fromstr);
-# if defined(VMS) && !defined(_IEEE_FP)
+# if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT)
                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
                 * on Alpha; fake it if we don't have them.
                 */
                /* IEEE fp overflow shenanigans are unavailable on VAX and optional
                 * on Alpha; fake it if we don't have them.
                 */
@@ -2742,6 +2732,12 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 #ifdef __GNUC__
                /* to work round a gcc/x86 bug; don't use SvNV */
                anv.nv = sv_2nv(fromstr);
 #ifdef __GNUC__
                /* to work round a gcc/x86 bug; don't use SvNV */
                anv.nv = sv_2nv(fromstr);
+#    if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \
+         && LONG_DOUBLESIZE > 10
+                /* GCC sometimes overwrites the padding in the
+                   assignment above */
+                Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8);
+#    endif
 #else
                anv.nv = SvNV(fromstr);
 #endif
 #else
                anv.nv = SvNV(fromstr);
 #endif
@@ -2759,6 +2755,11 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 #  ifdef __GNUC__
                /* to work round a gcc/x86 bug; don't use SvNV */
                aldouble.ld = (long double)sv_2nv(fromstr);
 #  ifdef __GNUC__
                /* to work round a gcc/x86 bug; don't use SvNV */
                aldouble.ld = (long double)sv_2nv(fromstr);
+#    if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10
+                /* GCC sometimes overwrites the padding in the
+                   assignment above */
+                Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8);
+#    endif
 #  else
                aldouble.ld = (long double)SvNV(fromstr);
 #  endif
 #  else
                aldouble.ld = (long double)SvNV(fromstr);
 #  endif
@@ -3045,7 +3046,8 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                     * of pack() (and all copies of the result) are
                     * gone.
                     */
                     * of pack() (and all copies of the result) are
                     * gone.
                     */
-                   if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
+                   if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1)
+                        || (SvPADTMP(fromstr) &&
                             !SvREADONLY(fromstr)))) {
                        Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
                                       "Attempt to pack pointer to temporary value");
                             !SvREADONLY(fromstr)))) {
                        Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
                                       "Attempt to pack pointer to temporary value");
@@ -3088,7 +3090,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    todo = fromlen;
                if (from_utf8) {
                    char buffer[64];
                    todo = fromlen;
                if (from_utf8) {
                    char buffer[64];
-                   if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
+                   if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo,
                                      'u' | TYPE_IS_PACK)) {
                        *cur = '\0';
                        SvCUR_set(cat, cur - start);
                                      'u' | TYPE_IS_PACK)) {
                        *cur = '\0';
                        SvCUR_set(cat, cur - start);
@@ -3096,9 +3098,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                                   "aptr=%p, aend=%p, buffer=%p, todo=%ld",
                                   aptr, aend, buffer, (long) todo);
                    }
                                   "aptr=%p, aend=%p, buffer=%p, todo=%ld",
                                   aptr, aend, buffer, (long) todo);
                    }
-                   end = doencodes(hunk, buffer, todo);
+                   end = doencodes(hunk, (const U8 *)buffer, todo);
                } else {
                } else {
-                   end = doencodes(hunk, aptr, todo);
+                   end = doencodes(hunk, (const U8 *)aptr, todo);
                    aptr += todo;
                }
                PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
                    aptr += todo;
                }
                PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
@@ -3127,7 +3129,7 @@ PP(pp_pack)
     const char *patend = pat + fromlen;
 
     MARK++;
     const char *patend = pat + fromlen;
 
     MARK++;
-    sv_setpvs(cat, "");
+    SvPVCLEAR(cat);
     SvUTF8_off(cat);
 
     packlist(cat, pat, patend, MARK, SP + 1);
     SvUTF8_off(cat);
 
     packlist(cat, pat, patend, MARK, SP + 1);
@@ -3139,11 +3141,5 @@ PP(pp_pack)
 }
 
 /*
 }
 
 /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
  * ex: set ts=8 sts=4 sw=4 et:
  */
  * ex: set ts=8 sts=4 sw=4 et:
  */