This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
utf8.c: Don't use swash for to_uni_lower() latin1 calls
authorKarl Williamson <public@khwilliamson.com>
Sun, 16 Oct 2011 19:36:36 +0000 (13:36 -0600)
committerKarl Williamson <public@khwilliamson.com>
Tue, 18 Oct 2011 03:52:17 +0000 (21:52 -0600)
The lowercase of latin-1 range code points is known to the perl core, so
for those we can short-ciruit converting to utf8 and reading in a swash

utf8.c

diff --git a/utf8.c b/utf8.c
index 0c21584..9b42c75 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1346,8 +1346,24 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
 {
     PERL_ARGS_ASSERT_TO_UNI_LOWER;
 
 {
     PERL_ARGS_ASSERT_TO_UNI_LOWER;
 
-    uvchr_to_utf8(p, c);
-    return to_utf8_lower(p, p, lenp);
+    if (c > 255) {
+       uvchr_to_utf8(p, c);
+       return to_utf8_lower(p, p, lenp);
+    }
+
+    /* We have the latin1-range values compiled into the core, so just use
+     * those, converting the result to utf8 */
+    c = toLOWER_LATIN1(c);
+    if (UNI_IS_INVARIANT(c)) {
+       *p = c;
+       *lenp = 1;
+    }
+    else {
+       *p = UTF8_TWO_BYTE_HI(c);
+       *(p+1) = UTF8_TWO_BYTE_LO(c);
+       *lenp = 2;
+    }
+    return c;
 }
 
 UV
 }
 
 UV