This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
pp.c: Eliminate custom macro and use Copy() instead
authorKarl Williamson <public@khwilliamson.com>
Sun, 5 May 2013 02:23:14 +0000 (20:23 -0600)
committerKarl Williamson <public@khwilliamson.com>
Mon, 20 May 2013 17:01:51 +0000 (11:01 -0600)
I think it's clearer to use Copy.  When I wrote this custom macro, we
didn't have the infrastructure to generate a UTF-8 encoded string at
compile time.

pp.c
regen/unicode_constants.pl
unicode_constants.h

diff --git a/pp.c b/pp.c
index bd2d8c9..48710cc 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -47,6 +47,9 @@ extern Pid_t getpid (void);
     _LIB_VERSION_TYPE _LIB_VERSION = _IEEE_;
 #endif
 
+static const STRLEN small_mu_len = sizeof(GREEK_SMALL_LETTER_MU_UTF8) - 1;
+static const STRLEN capital_iota_len = sizeof(GREEK_CAPITAL_LETTER_IOTA_UTF8) - 1;
+
 /* variations on pp_null */
 
 PP(pp_stub)
@@ -3430,15 +3433,6 @@ PP(pp_crypt)
 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level.  So 
  * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
 
-/* Generates code to store a unicode codepoint c that is known to occupy
- * exactly two UTF-8 and UTF-EBCDIC bytes; it is stored into p and p+1,
- * and p is advanced to point to the next available byte after the two bytes */
-#define CAT_UNI_TO_UTF8_TWO_BYTE(p, c)                                     \
-    STMT_START {                                                           \
-       *(p)++ = UTF8_TWO_BYTE_HI(c);                                       \
-       *((p)++) = UTF8_TWO_BYTE_LO(c);                                     \
-    } STMT_END
-
 PP(pp_ucfirst)
 {
     /* Actually is both lcfirst() and ucfirst().  Only the first character
@@ -3762,10 +3756,8 @@ PP(pp_uc)
            if (in_iota_subscript && ! _is_utf8_mark(s)) {
 
                /* A non-mark.  Time to output the iota subscript */
-#define GREEK_CAPITAL_LETTER_IOTA 0x0399
-#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
-
-               CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+               Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+                d += capital_iota_len;
                in_iota_subscript = FALSE;
             }
 
@@ -3775,6 +3767,8 @@ PP(pp_uc)
             u = UTF8SKIP(s);
             uv = _to_utf8_upper_flags(s, tmpbuf, &ulen,
                                      cBOOL(IN_LOCALE_RUNTIME), &tainted);
+#define GREEK_CAPITAL_LETTER_IOTA 0x0399
+#define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
             if (uv == GREEK_CAPITAL_LETTER_IOTA
                 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
             {
@@ -3800,7 +3794,8 @@ PP(pp_uc)
             s += u;
        }
        if (in_iota_subscript) {
-           CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_CAPITAL_LETTER_IOTA);
+            Copy(GREEK_CAPITAL_LETTER_IOTA_UTF8, d, capital_iota_len, U8);
+            d += capital_iota_len;
        }
        SvUTF8_on(dest);
        *d = '\0';
@@ -4231,7 +4226,8 @@ PP(pp_fc)
                                                 (send -s) * 2 + 1);
                     d = (U8*)SvPVX(dest) + len;
 
-                    CAT_UNI_TO_UTF8_TWO_BYTE(d, GREEK_SMALL_LETTER_MU);
+                    Copy(GREEK_SMALL_LETTER_MU_UTF8, d, small_mu_len, U8);
+                    d += small_mu_len;
                     s++;
                     for (; s < send; s++) {
                         STRLEN ulen;
index 1977fbd..0c45cf0 100644 (file)
@@ -145,6 +145,9 @@ U+0300 string
 U+0301 string
 U+0308 string
 
+U+0399 string
+U+03BC string
+
 U+03B9 string
 
 U+03C5 string
index 19f3acd..90521ec 100644 (file)
@@ -25,6 +25,9 @@
 #define COMBINING_ACUTE_ACCENT_UTF8  "\xCC\x81"    /* U+0301 */
 #define COMBINING_DIAERESIS_UTF8  "\xCC\x88"    /* U+0308 */
 
+#define GREEK_CAPITAL_LETTER_IOTA_UTF8  "\xCE\x99"    /* U+0399 */
+#define GREEK_SMALL_LETTER_MU_UTF8  "\xCE\xBC"    /* U+03BC */
+
 #define GREEK_SMALL_LETTER_IOTA_UTF8  "\xCE\xB9"    /* U+03B9 */
 
 #define GREEK_SMALL_LETTER_UPSILON_UTF8  "\xCF\x85"    /* U+03C5 */