This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
pp.c, utf8.c: Convert to use TWO_BYTE_UTF8_TO_UNI
authorKarl Williamson <public@khwilliamson.com>
Mon, 15 Nov 2010 17:27:02 +0000 (10:27 -0700)
committerFather Chrysostomos <sprout@cpan.org>
Mon, 22 Nov 2010 21:32:54 +0000 (13:32 -0800)
pp.c
utf8.c

diff --git a/pp.c b/pp.c
index 9a74280..ca73573 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -3828,7 +3828,7 @@ PP(pp_ucfirst)
 
            /* Convert the two source bytes to a single Unicode code point
             * value, change case and save for below */
-           chr = UTF8_ACCUMULATE(*s, *(s+1));
+           chr = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
            if (op_type == OP_LCFIRST) {    /* lower casing is easy */
                U8 lower = toLOWER_LATIN1(chr);
                STORE_UNI_TO_UTF8_TWO_BYTE(tmpbuf, lower);
@@ -4153,7 +4153,7 @@ PP(pp_uc)
 
                /* Likewise, if it fits in a byte, its case change is in our
                 * table */
-               U8 orig = UTF8_ACCUMULATE(*s, *(s+1));
+               U8 orig = TWO_BYTE_UTF8_TO_UNI(*s, *(s+1));
                U8 upper = toUPPER_LATIN1_MOD(orig);
                CAT_TWO_BYTE_UNI_UPPER_MOD(d, orig, upper);
                s += 2;
@@ -4391,7 +4391,7 @@ PP(pp_lc)
            else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
 
                /* As do the ones in the Latin1 range */
-               U8 lower = toLOWER_LATIN1(UTF8_ACCUMULATE(*s, *(s+1)));
+               U8 lower = toLOWER_LATIN1(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1)));
                CAT_UNI_TO_UTF8_TWO_BYTE(d, lower);
                s += 2;
            }
diff --git a/utf8.c b/utf8.c
index 019d49f..a818b3e 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -836,8 +836,7 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen)
                if (u < uend) {
                    U8 c1 = *u++;
                    if (UTF8_IS_CONTINUATION(c1)) {
-                       c = UTF8_ACCUMULATE(NATIVE_TO_UTF(c), c1);
-                       c = ASCII_TO_NATIVE(c);
+                       c = UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(c, c1));
                    } else {
                        Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
                                         "Malformed UTF-8 character "
@@ -966,8 +965,7 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8)
        U8 c = *s++;
        if (!UTF8_IS_INVARIANT(c)) {
            /* Then it is two-byte encoded */
-           c = UTF8_ACCUMULATE(NATIVE_TO_UTF(c), *s++);
-           c = ASCII_TO_NATIVE(c);
+           c = UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(c, *s++));
        }
        *d++ = c;
     }