This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add bitwise feature feature
[perl5.git] / pp_pack.c
index f877fe2..60462eb 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -337,7 +337,6 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
 STATIC bool
 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
 {
-    dVAR;
     STRLEN retlen;
     const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
     if (val >= 0x100 || !ISUUCHAR(val) ||
@@ -862,7 +861,7 @@ Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, cons
 STATIC I32
 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
 {
-    dVAR; dSP;
+    dSP;
     SV *sv = NULL;
     const I32 start_sp_offset = SP - PL_stack_base;
     howlen_t howlen;
@@ -1319,10 +1318,16 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    len = UTF8SKIP(result);
                    if (!uni_to_bytes(aTHX_ &ptr, strend,
                                      (char *) &result[1], len-1, 'U')) break;
-                   auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
+                                                       len,
+                                                       &retlen,
+                                                       UTF8_ALLOW_DEFAULT));
                    s = ptr;
                } else {
-                   auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
+                                                       strend - s,
+                                                       &retlen,
+                                                       UTF8_ALLOW_DEFAULT));
                    if (retlen == (STRLEN) -1 || retlen == 0)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
@@ -1699,6 +1704,18 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                ld_bytes aldouble;
                 SHIFT_BYTES(utf8, s, strend, aldouble.bytes,
                             sizeof(aldouble.bytes), datumtype, needs_swap);
+                /* The most common long double format, the x86 80-bit
+                 * extended precision, has either 2 or 6 unused bytes,
+                 * which may contain garbage, which may contain
+                 * unintentional data.  While we do zero the bytes of
+                 * the long double data in pack(), here in unpack() we
+                 * don't, because it's really hard to envision that
+                 * reading the long double off aldouble would be
+                 * affected by the unused bytes.
+                 *
+                 * Note that trying to unpack 'long doubles' of 'long
+                 * doubles' packed in another system is in the general
+                 * case doomed without having more detail. */
                if (!checksum)
                    mPUSHn(aldouble.ld);
                else
@@ -1781,7 +1798,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
            if (!checksum)
                XPUSHs(sv);
            break;
-       }
+       } /* End of switch */
 
        if (checksum) {
            if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
@@ -1842,7 +1859,6 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
 
 PP(pp_unpack)
 {
-    dVAR;
     dSP;
     dPOPPOPssrl;
     I32 gimme = GIMME_V;
@@ -1866,7 +1882,7 @@ PP(pp_unpack)
 }
 
 STATIC U8 *
-doencodes(U8 *h, const char *s, I32 len)
+doencodes(U8 *h, const U8 *s, I32 len)
 {
     *h++ = PL_uuemap[len];
     while (len > 2) {
@@ -1878,7 +1894,7 @@ doencodes(U8 *h, const char *s, I32 len)
        len -= 3;
     }
     if (len > 0) {
-        const char r = (len > 1 ? s[1] : '\0');
+        const U8 r = (len > 1 ? s[1] : '\0');
        *h++ = PL_uuemap[(077 & (s[0] >> 2))];
        *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
        *h++ = PL_uuemap[(077 & ((r << 2) & 074))];
@@ -1975,7 +1991,6 @@ The engine implementing pack() Perl function.
 void
 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist )
 {
-    dVAR;
     tempsym_t sym;
 
     PERL_ARGS_ASSERT_PACKLIST;
@@ -2073,11 +2088,32 @@ S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
     return SvGROW(sv, len+extend+1);
 }
 
+static SV *
+S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype)
+{
+    SvGETMAGIC(sv);
+    if (UNLIKELY(SvAMAGIC(sv)))
+       sv = sv_2num(sv);
+    if (UNLIKELY(isinfnansv(sv))) {
+       const I32 c = TYPE_NO_MODIFIERS(datumtype);
+       const NV nv = SvNV_nomg(sv);
+       if (c == 'w')
+           Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv);
+       else
+           Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c);
+    }
+    return sv;
+}
+
+#define SvIV_no_inf(sv,d) \
+       ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv))
+#define SvUV_no_inf(sv,d) \
+       ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv))
+
 STATIC
 SV **
 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 {
-    dVAR;
     tempsym_t lookahead;
     I32 items  = endlist - beglist;
     bool found = next_symbol(symptr);
@@ -2106,6 +2142,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
         bool needs_swap;
 
 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
+#define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no)
 
         switch (howlen) {
          case e_star:
@@ -2158,7 +2195,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
        /* Code inside the switch must take care to properly update
           cat (CUR length and '\0' termination) if it updated *cur and
           doesn't simply leave using break */
-       switch(TYPE_NO_ENDIANNESS(datumtype)) {
+       switch (TYPE_NO_ENDIANNESS(datumtype)) {
        default:
            Perl_croak(aTHX_ "Invalid type '%c' in pack",
                       (int) TYPE_NO_MODIFIERS(datumtype));
@@ -2176,7 +2213,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                from = group ? start + group->strbeg : start;
            }
            fromstr = NEXTFROM;
-           len = SvIV(fromstr);
+           len = SvIV_no_inf(fromstr, datumtype);
            goto resize;
        case '@' | TYPE_IS_SHRIEKING:
        case '@':
@@ -2544,7 +2581,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                IV aiv;
                fromstr = NEXTFROM;
-               aiv = SvIV(fromstr);
+                aiv = SvIV_no_inf(fromstr, datumtype);
                if ((-128 > aiv || aiv > 127))
                    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
                                   "Character in 'c' format wrapped in pack");
@@ -2559,7 +2596,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                IV aiv;
                fromstr = NEXTFROM;
-               aiv = SvIV(fromstr);
+                aiv = SvIV_no_inf(fromstr, datumtype);
                if ((0 > aiv || aiv > 0xff))
                    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
                                   "Character in 'C' format wrapped in pack");
@@ -2575,7 +2612,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                UV auv;
                fromstr = NEXTFROM;
-               auv = SvUV(fromstr);
+               auv = SvUV_no_inf(fromstr, datumtype);
                if (in_bytes) auv = auv % 0x100;
                if (utf8) {
                  W_utf8:
@@ -2637,10 +2674,10 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                UV auv;
                fromstr = NEXTFROM;
-               auv = SvUV(fromstr);
+               auv = SvUV_no_inf(fromstr, datumtype);
                if (utf8) {
                    U8 buffer[UTF8_MAXLEN], *endb;
-                   endb = uvchr_to_utf8_flags(buffer, auv,
+                   endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv),
                                               warn_utf8 ?
                                               0 : UNICODE_ALLOW_ANY);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
@@ -2658,7 +2695,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv),
                                                       warn_utf8 ?
                                                       0 : UNICODE_ALLOW_ANY);
                }
@@ -2682,7 +2719,10 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                    afloat = -FLT_MAX;
                else afloat = (float)anv;
 # else
-               afloat = (float)anv;
+                /* a simple cast to float is undefined if outside
+                 * the range of values that can be represented */
+               afloat = (float)(anv >  FLT_MAX ?  NV_INF :
+                                 anv < -FLT_MAX ? -NV_INF : anv);
 # endif
                 PUSH_VAR(utf8, cur, afloat, needs_swap);
            }
@@ -2747,7 +2787,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                I16 ai16;
                fromstr = NEXTFROM;
-               ai16 = (I16)SvIV(fromstr);
+               ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
                ai16 = PerlSock_htons(ai16);
                 PUSH16(utf8, cur, &ai16, FALSE);
            }
@@ -2757,7 +2797,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                I16 ai16;
                fromstr = NEXTFROM;
-               ai16 = (I16)SvIV(fromstr);
+               ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
                ai16 = htovs(ai16);
                 PUSH16(utf8, cur, &ai16, FALSE);
            }
@@ -2767,7 +2807,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                unsigned short aushort;
                fromstr = NEXTFROM;
-               aushort = SvUV(fromstr);
+               aushort = SvUV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, aushort, needs_swap);
            }
             break;
@@ -2778,7 +2818,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                U16 au16;
                fromstr = NEXTFROM;
-               au16 = (U16)SvUV(fromstr);
+               au16 = (U16)SvUV_no_inf(fromstr, datumtype);
                 PUSH16(utf8, cur, &au16, needs_swap);
            }
            break;
@@ -2787,7 +2827,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                short ashort;
                fromstr = NEXTFROM;
-               ashort = SvIV(fromstr);
+               ashort = SvIV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, ashort, needs_swap);
            }
             break;
@@ -2798,7 +2838,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                I16 ai16;
                fromstr = NEXTFROM;
-               ai16 = (I16)SvIV(fromstr);
+               ai16 = (I16)SvIV_no_inf(fromstr, datumtype);
                 PUSH16(utf8, cur, &ai16, needs_swap);
            }
            break;
@@ -2807,7 +2847,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                unsigned int auint;
                fromstr = NEXTFROM;
-               auint = SvUV(fromstr);
+               auint = SvUV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, auint, needs_swap);
            }
            break;
@@ -2815,7 +2855,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                IV aiv;
                fromstr = NEXTFROM;
-               aiv = SvIV(fromstr);
+               aiv = SvIV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, aiv, needs_swap);
            }
            break;
@@ -2823,7 +2863,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                UV auv;
                fromstr = NEXTFROM;
-               auv = SvUV(fromstr);
+               auv = SvUV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, auv, needs_swap);
            }
            break;
@@ -2831,7 +2871,8 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
             while (len-- > 0) {
                NV anv;
                fromstr = NEXTFROM;
-               anv = SvNV(fromstr);
+               S_sv_check_infnan(aTHX_ fromstr, datumtype);
+               anv = SvNV_nomg(fromstr);
 
                if (anv < 0) {
                    *cur = '\0';
@@ -2847,7 +2888,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                if (SvIOK(fromstr) || anv < UV_MAX_P1) {
                    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
                    char  *in = buf + sizeof(buf);
-                   UV     auv = SvUV(fromstr);
+                   UV     auv = SvUV_nomg(fromstr);
 
                    do {
                        *--in = (char)((auv & 0x7f) | 0x80);
@@ -2898,7 +2939,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 
                  w_string:
                    /* Copy string and check for compliance */
-                   from = SvPV_const(fromstr, len);
+                   from = SvPV_nomg_const(fromstr, len);
                    if ((norm = is_an_int(from, len)) == NULL)
                        Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
 
@@ -2919,7 +2960,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                int aint;
                fromstr = NEXTFROM;
-               aint = SvIV(fromstr);
+               aint = SvIV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, aint, needs_swap);
            }
            break;
@@ -2928,7 +2969,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                U32 au32;
                fromstr = NEXTFROM;
-               au32 = SvUV(fromstr);
+               au32 = SvUV_no_inf(fromstr, datumtype);
                au32 = PerlSock_htonl(au32);
                 PUSH32(utf8, cur, &au32, FALSE);
            }
@@ -2938,7 +2979,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                U32 au32;
                fromstr = NEXTFROM;
-               au32 = SvUV(fromstr);
+               au32 = SvUV_no_inf(fromstr, datumtype);
                au32 = htovl(au32);
                 PUSH32(utf8, cur, &au32, FALSE);
            }
@@ -2948,7 +2989,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                unsigned long aulong;
                fromstr = NEXTFROM;
-               aulong = SvUV(fromstr);
+               aulong = SvUV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, aulong, needs_swap);
            }
            break;
@@ -2959,7 +3000,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                U32 au32;
                fromstr = NEXTFROM;
-               au32 = SvUV(fromstr);
+               au32 = SvUV_no_inf(fromstr, datumtype);
                 PUSH32(utf8, cur, &au32, needs_swap);
            }
            break;
@@ -2968,7 +3009,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                long along;
                fromstr = NEXTFROM;
-               along = SvIV(fromstr);
+               along = SvIV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, along, needs_swap);
            }
            break;
@@ -2979,7 +3020,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
             while (len-- > 0) {
                I32 ai32;
                fromstr = NEXTFROM;
-               ai32 = SvIV(fromstr);
+               ai32 = SvIV_no_inf(fromstr, datumtype);
                 PUSH32(utf8, cur, &ai32, needs_swap);
            }
            break;
@@ -2988,7 +3029,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                Uquad_t auquad;
                fromstr = NEXTFROM;
-               auquad = (Uquad_t) SvUV(fromstr);
+               auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, auquad, needs_swap);
            }
            break;
@@ -2996,7 +3037,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            while (len-- > 0) {
                Quad_t aquad;
                fromstr = NEXTFROM;
-               aquad = (Quad_t)SvIV(fromstr);
+               aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype);
                 PUSH_VAR(utf8, cur, aquad, needs_swap);
            }
            break;
@@ -3069,9 +3110,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                                   "aptr=%p, aend=%p, buffer=%p, todo=%ld",
                                   aptr, aend, buffer, (long) todo);
                    }
-                   end = doencodes(hunk, buffer, todo);
+                   end = doencodes(hunk, (const U8 *)buffer, todo);
                } else {
-                   end = doencodes(hunk, aptr, todo);
+                   end = doencodes(hunk, (const U8 *)aptr, todo);
                    aptr += todo;
                }
                PUSH_BYTES(utf8, cur, hunk, end-hunk, 0);
@@ -3092,7 +3133,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 
 PP(pp_pack)
 {
-    dVAR; dSP; dMARK; dORIGMARK; dTARGET;
+    dSP; dMARK; dORIGMARK; dTARGET;
     SV *cat = TARG;
     STRLEN fromlen;
     SV *pat_sv = *++MARK;