This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade CPAN from version 2.05-TRIAL2 to 2.05
[perl5.git] / pp_pack.c
index 65c1b86..3aa7a73 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -150,11 +150,11 @@ typedef union {
 /* Only to be used inside a loop (see the break) */
 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)  \
 STMT_START {                                           \
 /* Only to be used inside a loop (see the break) */
 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)  \
 STMT_START {                                           \
-    if (utf8) {                                                \
+    if (UNLIKELY(utf8)) {                               \
         if (!uni_to_bytes(aTHX_ &s, strend,            \
          (char *) (buf), len, datumtype)) break;       \
     } else {                                           \
         if (!uni_to_bytes(aTHX_ &s, strend,            \
          (char *) (buf), len, datumtype)) break;       \
     } else {                                           \
-        if (needs_swap)                                 \
+        if (UNLIKELY(needs_swap))                       \
             S_reverse_copy(s, (char *) (buf), len);     \
         else                                            \
             Copy(s, (char *) (buf), len, char);                \
             S_reverse_copy(s, (char *) (buf), len);     \
         else                                            \
             Copy(s, (char *) (buf), len, char);                \
@@ -292,7 +292,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
        UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
     const bool needs_swap = NEEDS_SWAP(datumtype);
 
        UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
     const bool needs_swap = NEEDS_SWAP(datumtype);
 
-    if (needs_swap)
+    if (UNLIKELY(needs_swap))
         buf += buf_len;
 
     for (;buf_len > 0; buf_len--) {
         buf += buf_len;
 
     for (;buf_len > 0; buf_len--) {
@@ -306,7 +306,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
            bad |= 2;
            val &= 0xff;
        }
            bad |= 2;
            val &= 0xff;
        }
-        if (needs_swap)
+        if (UNLIKELY(needs_swap))
             *(U8 *)--buf = (U8)val;
         else
             *(U8 *)buf++ = (U8)val;
             *(U8 *)--buf = (U8)val;
         else
             *(U8 *)buf++ = (U8)val;
@@ -319,7 +319,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
            const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
            for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
                if (ptr >= end) break;
            const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
            for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
                if (ptr >= end) break;
-               utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
+               utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
            }
            if (from > end) from = end;
        }
            }
            if (from > end) from = end;
        }
@@ -354,27 +354,15 @@ STATIC char *
 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
     PERL_ARGS_ASSERT_BYTES_TO_UNI;
 
 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
     PERL_ARGS_ASSERT_BYTES_TO_UNI;
 
-    if (needs_swap) {
+    if (UNLIKELY(needs_swap)) {
         const U8 *p = start + len;
         while (p-- > start) {
         const U8 *p = start + len;
         while (p-- > start) {
-            const UV uv = NATIVE_TO_ASCII(*p);
-            if (UNI_IS_INVARIANT(uv))
-                *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
-            else {
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
-            }
+            append_utf8_from_native_byte(*p, (U8 **) & dest);
         }
     } else {
         const U8 * const end = start + len;
         while (start < end) {
         }
     } else {
         const U8 * const end = start + len;
         while (start < end) {
-            const UV uv = NATIVE_TO_ASCII(*start);
-            if (UNI_IS_INVARIANT(uv))
-                *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
-            else {
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
-            }
+            append_utf8_from_native_byte(*start, (U8 **) & dest);
             start++;
         }
     }
             start++;
         }
     }
@@ -383,10 +371,10 @@ S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
 
 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
 STMT_START {                                                   \
 
 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
 STMT_START {                                                   \
-    if (utf8)                                                  \
+    if (UNLIKELY(utf8))                                                \
        (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap);       \
     else {                                                     \
        (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap);       \
     else {                                                     \
-        if (needs_swap)                                         \
+        if (UNLIKELY(needs_swap))                               \
             S_reverse_copy((char *)(buf), cur, len);            \
         else                                                    \
             Copy(buf, cur, len, char);                         \
             S_reverse_copy((char *)(buf), cur, len);            \
         else                                                    \
             Copy(buf, cur, len, char);                         \
@@ -829,7 +817,7 @@ The engine implementing the unpack() Perl function.
 Using the template pat..patend, this function unpacks the string
 s..strend into a number of mortal SVs, which it pushes onto the perl
 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
 Using the template pat..patend, this function unpacks the string
 s..strend into a number of mortal SVs, which it pushes onto the perl
 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
-C<SPAGAIN> after the call to this function). It returns the number of
+C<SPAGAIN> after the call to this function).  It returns the number of
 pushed elements.
 
 The strend and patend pointers should point to the byte following the last
 pushed elements.
 
 The strend and patend pointers should point to the byte following the last
@@ -1328,10 +1316,10 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    len = UTF8SKIP(result);
                    if (!uni_to_bytes(aTHX_ &ptr, strend,
                                      (char *) &result[1], len-1, 'U')) break;
                    len = UTF8SKIP(result);
                    if (!uni_to_bytes(aTHX_ &ptr, strend,
                                      (char *) &result[1], len-1, 'U')) break;
-                   auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
                    s = ptr;
                } else {
                    s = ptr;
                } else {
-                   auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
                    if (retlen == (STRLEN) -1 || retlen == 0)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
                    if (retlen == (STRLEN) -1 || retlen == 0)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
@@ -1644,7 +1632,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
            }
            break;
                PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
            }
            break;
-#ifdef HAS_QUAD
+#if IVSIZE >= 8
        case 'q':
            while (len-- > 0) {
                Quad_t aquad;
        case 'q':
            while (len-- > 0) {
                Quad_t aquad;
@@ -1671,7 +1659,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    cuv += auquad;
            }
            break;
                    cuv += auquad;
            }
            break;
-#endif /* HAS_QUAD */
+#endif
        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
        case 'f':
            while (len-- > 0) {
        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
        case 'f':
            while (len-- > 0) {
@@ -2015,7 +2003,7 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
     from_start = SvPVX_const(sv);
     from_end = from_start + SvCUR(sv);
     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
     from_start = SvPVX_const(sv);
     from_end = from_start + SvCUR(sv);
     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
-       if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
+       if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
     if (from_ptr == from_end) {
        /* Simple case: no character needs to be changed */
        SvUTF8_on(sv);
     if (from_ptr == from_end) {
        /* Simple case: no character needs to be changed */
        SvUTF8_on(sv);
@@ -2597,8 +2585,8 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
-                                                      NATIVE_TO_UNI(auv),
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
+                                                      auv,
                                                       warn_utf8 ?
                                                       0 : UNICODE_ALLOW_ANY);
                } else {
                                                       warn_utf8 ?
                                                       0 : UNICODE_ALLOW_ANY);
                } else {
@@ -2651,7 +2639,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                auv = SvUV(fromstr);
                if (utf8) {
                    U8 buffer[UTF8_MAXLEN], *endb;
                auv = SvUV(fromstr);
                if (utf8) {
                    U8 buffer[UTF8_MAXLEN], *endb;
-                   endb = uvuni_to_utf8_flags(buffer, auv,
+                   endb = uvchr_to_utf8_flags(buffer, auv,
                                               warn_utf8 ?
                                               0 : UNICODE_ALLOW_ANY);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
                                               warn_utf8 ?
                                               0 : UNICODE_ALLOW_ANY);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
@@ -2669,7 +2657,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
                                                       warn_utf8 ?
                                                       0 : UNICODE_ALLOW_ANY);
                }
                                                       warn_utf8 ?
                                                       0 : UNICODE_ALLOW_ANY);
                }
@@ -2994,7 +2982,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                 PUSH32(utf8, cur, &ai32, needs_swap);
            }
            break;
                 PUSH32(utf8, cur, &ai32, needs_swap);
            }
            break;
-#ifdef HAS_QUAD
+#if IVSIZE >= 8
        case 'Q':
            while (len-- > 0) {
                Uquad_t auquad;
        case 'Q':
            while (len-- > 0) {
                Uquad_t auquad;
@@ -3011,7 +2999,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                 PUSH_VAR(utf8, cur, aquad, needs_swap);
            }
            break;
                 PUSH_VAR(utf8, cur, aquad, needs_swap);
            }
            break;
-#endif /* HAS_QUAD */
+#endif
        case 'P':
            len = 1;            /* assume SV is correct length */
            GROWING(utf8, cat, start, cur, sizeof(char *));
        case 'P':
            len = 1;            /* assume SV is correct length */
            GROWING(utf8, cat, start, cur, sizeof(char *));