This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perllocale: Wrap some text in C<...>
[perl5.git] / pp_pack.c
index 65c1b86..394f7ff 100644 (file)
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -150,11 +150,11 @@ typedef union {
 /* Only to be used inside a loop (see the break) */
 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap)  \
 STMT_START {                                           \
-    if (utf8) {                                                \
+    if (UNLIKELY(utf8)) {                               \
         if (!uni_to_bytes(aTHX_ &s, strend,            \
          (char *) (buf), len, datumtype)) break;       \
     } else {                                           \
-        if (needs_swap)                                 \
+        if (UNLIKELY(needs_swap))                       \
             S_reverse_copy(s, (char *) (buf), len);     \
         else                                            \
             Copy(s, (char *) (buf), len, char);                \
@@ -292,7 +292,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
        UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
     const bool needs_swap = NEEDS_SWAP(datumtype);
 
-    if (needs_swap)
+    if (UNLIKELY(needs_swap))
         buf += buf_len;
 
     for (;buf_len > 0; buf_len--) {
@@ -306,7 +306,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
            bad |= 2;
            val &= 0xff;
        }
-        if (needs_swap)
+        if (UNLIKELY(needs_swap))
             *(U8 *)--buf = (U8)val;
         else
             *(U8 *)buf++ = (U8)val;
@@ -319,7 +319,7 @@ uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len
            const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
            for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
                if (ptr >= end) break;
-               utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
+               utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags);
            }
            if (from > end) from = end;
        }
@@ -354,27 +354,15 @@ STATIC char *
 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
     PERL_ARGS_ASSERT_BYTES_TO_UNI;
 
-    if (needs_swap) {
+    if (UNLIKELY(needs_swap)) {
         const U8 *p = start + len;
         while (p-- > start) {
-            const UV uv = NATIVE_TO_ASCII(*p);
-            if (UNI_IS_INVARIANT(uv))
-                *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
-            else {
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
-            }
+            append_utf8_from_native_byte(*p, (U8 **) & dest);
         }
     } else {
         const U8 * const end = start + len;
         while (start < end) {
-            const UV uv = NATIVE_TO_ASCII(*start);
-            if (UNI_IS_INVARIANT(uv))
-                *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
-            else {
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
-                *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
-            }
+            append_utf8_from_native_byte(*start, (U8 **) & dest);
             start++;
         }
     }
@@ -383,10 +371,10 @@ S_bytes_to_uni(const U8 *start, STRLEN len, char *dest, const bool needs_swap) {
 
 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap)             \
 STMT_START {                                                   \
-    if (utf8)                                                  \
+    if (UNLIKELY(utf8))                                                \
        (cur) = S_bytes_to_uni((U8 *) buf, len, (cur), needs_swap);       \
     else {                                                     \
-        if (needs_swap)                                         \
+        if (UNLIKELY(needs_swap))                               \
             S_reverse_copy((char *)(buf), cur, len);            \
         else                                                    \
             Copy(buf, cur, len, char);                         \
@@ -509,7 +497,7 @@ S_measure_struct(pTHX_ tempsym_t* symptr)
                if (!len)               /* Avoid division by 0 */
                    len = 1;
                len = total % len;      /* Assumed: the start is aligned. */
-               /* FALL THROUGH */
+               /* FALLTHROUGH */
            case 'X':
                size = -1;
                if (total < len)
@@ -523,7 +511,7 @@ S_measure_struct(pTHX_ tempsym_t* symptr)
                    len = len - star;
                else
                    len = 0;
-               /* FALL THROUGH */
+               /* FALLTHROUGH */
            case 'x':
            case 'A':
            case 'Z':
@@ -829,7 +817,7 @@ The engine implementing the unpack() Perl function.
 Using the template pat..patend, this function unpacks the string
 s..strend into a number of mortal SVs, which it pushes onto the perl
 argument (@_) stack (so you will need to issue a C<PUTBACK> before and
-C<SPAGAIN> after the call to this function). It returns the number of
+C<SPAGAIN> after the call to this function).  It returns the number of
 pushed elements.
 
 The strend and patend pointers should point to the byte following the last
@@ -1025,7 +1013,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                break;
            }
            len = (s - strbeg) % len;
-           /* FALL THROUGH */
+           /* FALLTHROUGH */
        case 'X':
            if (utf8) {
                while (len > 0) {
@@ -1052,7 +1040,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
            if (ai32 == 0) break;
            len -= ai32;
             }
-           /* FALL THROUGH */
+           /* FALLTHROUGH */
        case 'x':
            if (utf8) {
                while (len>0) {
@@ -1244,7 +1232,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
                break;
            }
-           /* FALL THROUGH */
+           /* FALLTHROUGH */
        case 'c':
            while (len-- > 0 && s < strend) {
                int aint;
@@ -1328,10 +1316,10 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                    len = UTF8SKIP(result);
                    if (!uni_to_bytes(aTHX_ &ptr, strend,
                                      (char *) &result[1], len-1, 'U')) break;
-                   auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = utf8n_to_uvchr(result, len, &retlen, UTF8_ALLOW_DEFAULT);
                    s = ptr;
                } else {
-                   auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
+                   auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT);
                    if (retlen == (STRLEN) -1 || retlen == 0)
                        Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
                    s += retlen;
@@ -1358,7 +1346,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
            }
            break;
 #else
-           /* Fallthrough! */
+           /* FALLTHROUGH */
 #endif
        case 's':
            while (len-- > 0) {
@@ -1395,7 +1383,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
            }
            break;
 #else
-            /* Fallthrough! */
+            /* FALLTHROUGH */
 #endif
        case 'v':
        case 'n':
@@ -1504,7 +1492,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
            }
            break;
 #else
-           /* Fallthrough! */
+           /* FALLTHROUGH */
 #endif
        case 'l':
            while (len-- > 0) {
@@ -1538,7 +1526,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
            }
            break;
 #else
-            /* Fall through! */
+            /* FALLTHROUGH */
 #endif
        case 'V':
        case 'N':
@@ -1644,14 +1632,13 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
            }
            break;
-#ifdef HAS_QUAD
+#if defined(HAS_QUAD) && IVSIZE >= 8
        case 'q':
            while (len-- > 0) {
                Quad_t aquad;
                 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap);
                if (!checksum)
-                    mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
-                          newSViv((IV)aquad) : newSVnv((NV)aquad));
+                    mPUSHs(newSViv((IV)aquad));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)aquad;
                else
@@ -1663,15 +1650,14 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
                Uquad_t auquad;
                 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap);
                if (!checksum)
-                   mPUSHs(auquad <= UV_MAX ?
-                          newSVuv((UV)auquad) : newSVnv((NV)auquad));
+                   mPUSHs(newSVuv((UV)auquad));
                else if (checksum > bits_in_uv)
                    cdouble += (NV)auquad;
                else
                    cuv += auquad;
            }
            break;
-#endif /* HAS_QUAD */
+#endif
        /* float and double added gnb@melba.bby.oz.au 22/11/89 */
        case 'f':
            while (len-- > 0) {
@@ -2015,7 +2001,7 @@ marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
     from_start = SvPVX_const(sv);
     from_end = from_start + SvCUR(sv);
     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
-       if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
+       if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break;
     if (from_ptr == from_end) {
        /* Simple case: no character needs to be changed */
        SvUTF8_on(sv);
@@ -2270,7 +2256,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                break;
            }
            len = (cur-start) % len;
-           /* FALL THROUGH */
+           /* FALLTHROUGH */
        case 'X':
            if (utf8) {
                if (len < 1) goto no_change;
@@ -2312,7 +2298,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            if (ai32 == 0) goto no_change;
            len -= ai32;
        }
-       /* FALL THROUGH */
+       /* FALLTHROUGH */
        case 'x':
            goto grow;
        case 'A':
@@ -2597,8 +2583,8 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
-                                                      NATIVE_TO_UNI(auv),
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
+                                                      auv,
                                                       warn_utf8 ?
                                                       0 : UNICODE_ALLOW_ANY);
                } else {
@@ -2651,7 +2637,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                auv = SvUV(fromstr);
                if (utf8) {
                    U8 buffer[UTF8_MAXLEN], *endb;
-                   endb = uvuni_to_utf8_flags(buffer, auv,
+                   endb = uvchr_to_utf8_flags(buffer, auv,
                                               warn_utf8 ?
                                               0 : UNICODE_ALLOW_ANY);
                    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
@@ -2669,7 +2655,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                        GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
                        end = start+SvLEN(cat)-UTF8_MAXLEN;
                    }
-                   cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
+                   cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv,
                                                       warn_utf8 ?
                                                       0 : UNICODE_ALLOW_ANY);
                }
@@ -2783,7 +2769,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            }
             break;
 #else
-            /* Fall through! */
+            /* FALLTHROUGH */
 #endif
        case 'S':
            while (len-- > 0) {
@@ -2803,7 +2789,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
            }
             break;
 #else
-            /* Fall through! */
+            /* FALLTHROUGH */
 #endif
        case 's':
            while (len-- > 0) {
@@ -2994,7 +2980,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                 PUSH32(utf8, cur, &ai32, needs_swap);
            }
            break;
-#ifdef HAS_QUAD
+#if defined(HAS_QUAD) && IVSIZE >= 8
        case 'Q':
            while (len-- > 0) {
                Uquad_t auquad;
@@ -3011,11 +2997,11 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
                 PUSH_VAR(utf8, cur, aquad, needs_swap);
            }
            break;
-#endif /* HAS_QUAD */
+#endif
        case 'P':
            len = 1;            /* assume SV is correct length */
            GROWING(utf8, cat, start, cur, sizeof(char *));
-           /* Fall through! */
+           /* FALLTHROUGH */
        case 'p':
            while (len-- > 0) {
                const char *aptr;