This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Glob.xs: Remove comment
[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c
index 7001041..9b42c75 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1346,8 +1346,24 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
 {
     PERL_ARGS_ASSERT_TO_UNI_LOWER;
 
-    uvchr_to_utf8(p, c);
-    return to_utf8_lower(p, p, lenp);
+    if (c > 255) {
+       uvchr_to_utf8(p, c);
+       return to_utf8_lower(p, p, lenp);
+    }
+
+    /* We have the latin1-range values compiled into the core, so just use
+     * those, converting the result to utf8 */
+    c = toLOWER_LATIN1(c);
+    if (UNI_IS_INVARIANT(c)) {
+       *p = c;
+       *lenp = 1;
+    }
+    else {
+       *p = UTF8_TWO_BYTE_HI(c);
+       *(p+1) = UTF8_TWO_BYTE_LO(c);
+       *lenp = 2;
+    }
+    return c;
 }
 
 UV
@@ -3444,45 +3460,48 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1
             * handle locale rules */
            }
            else {
-           if ((flags & FOLDEQ_UTF8_LOCALE)
-               && (! u1 || UTF8_IS_INVARIANT(*p1) || UTF8_IS_DOWNGRADEABLE_START(*p1)))
-           {
-               /* There is no mixing of code points above and below 255. */
-               if (u2 && (! UTF8_IS_INVARIANT(*p2)
-                   && ! UTF8_IS_DOWNGRADEABLE_START(*p2)))
+               if ((flags & FOLDEQ_UTF8_LOCALE)
+                   && (! u1 || UTF8_IS_INVARIANT(*p1)
+                       || UTF8_IS_DOWNGRADEABLE_START(*p1)))
                {
-                   return 0;
-               }
+                   /* There is no mixing of code points above and below 255. */
+                   if (u2 && (! UTF8_IS_INVARIANT(*p2)
+                       && ! UTF8_IS_DOWNGRADEABLE_START(*p2)))
+                   {
+                       return 0;
+                   }
 
-               /* We handle locale rules by converting, if necessary, the code
-                * point to a single byte. */
-               if (! u1 || UTF8_IS_INVARIANT(*p1)) {
-                   *foldbuf1 = *p1;
+                   /* We handle locale rules by converting, if necessary, the
+                    * code point to a single byte. */
+                   if (! u1 || UTF8_IS_INVARIANT(*p1)) {
+                       *foldbuf1 = *p1;
+                   }
+                   else {
+                       *foldbuf1 = TWO_BYTE_UTF8_TO_UNI(*p1, *(p1 + 1));
+                   }
+                   n1 = 1;
                }
-               else {
-                   *foldbuf1 = TWO_BYTE_UTF8_TO_UNI(*p1, *(p1 + 1));
+               else if (isASCII(*p1)) {        /* Note, that here won't be
+                                                  both ASCII and using locale
+                                                  rules */
+
+                   /* If trying to mix non- with ASCII, and not supposed to,
+                    * fail */
+                   if ((flags & FOLDEQ_UTF8_NOMIX_ASCII) && ! isASCII(*p2)) {
+                       return 0;
+                   }
+                   n1 = 1;
+                   *foldbuf1 = toLOWER(*p1);   /* Folds in the ASCII range are
+                                                  just lowercased */
                }
-               n1 = 1;
-           }
-           else if (isASCII(*p1)) {    /* Note, that here won't be both ASCII
-                                          and using locale rules */
-
-               /* If trying to mix non- with ASCII, and not supposed to, fail */
-               if ((flags & FOLDEQ_UTF8_NOMIX_ASCII) && ! isASCII(*p2)) {
-                   return 0;
+               else if (u1) {
+                   to_utf8_fold(p1, foldbuf1, &n1);
                }
-               n1 = 1;
-               *foldbuf1 = toLOWER(*p1);   /* Folds in the ASCII range are
-                                              just lowercased */
-           }
-           else if (u1) {
-                to_utf8_fold(p1, foldbuf1, &n1);
-            }
-            else {  /* Not utf8, convert to it first and then get fold */
-                uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1)));
-                to_utf8_fold(natbuf, foldbuf1, &n1);
-            }
-            f1 = foldbuf1;
+               else {  /* Not utf8, convert to it first and then get fold */
+                   uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1)));
+                   to_utf8_fold(natbuf, foldbuf1, &n1);
+               }
+               f1 = foldbuf1;
            }
         }
 
@@ -3492,51 +3511,51 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, register UV l1, bool u1
                n2 = UTF8SKIP(f2);
            }
            else {
-           if ((flags & FOLDEQ_UTF8_LOCALE)
-               && (! u2 || UTF8_IS_INVARIANT(*p2) || UTF8_IS_DOWNGRADEABLE_START(*p2)))
-           {
-               /* Here, the next char in s2 is < 256.  We've already worked on
-                * s1, and if it isn't also < 256, can't match */
-               if (u1 && (! UTF8_IS_INVARIANT(*p1)
-                   && ! UTF8_IS_DOWNGRADEABLE_START(*p1)))
+               if ((flags & FOLDEQ_UTF8_LOCALE)
+                   && (! u2 || UTF8_IS_INVARIANT(*p2) || UTF8_IS_DOWNGRADEABLE_START(*p2)))
                {
-                   return 0;
-               }
-               if (! u2 || UTF8_IS_INVARIANT(*p2)) {
-                   *foldbuf2 = *p2;
+                   /* Here, the next char in s2 is < 256.  We've already
+                    * worked on s1, and if it isn't also < 256, can't match */
+                   if (u1 && (! UTF8_IS_INVARIANT(*p1)
+                       && ! UTF8_IS_DOWNGRADEABLE_START(*p1)))
+                   {
+                       return 0;
+                   }
+                   if (! u2 || UTF8_IS_INVARIANT(*p2)) {
+                       *foldbuf2 = *p2;
+                   }
+                   else {
+                       *foldbuf2 = TWO_BYTE_UTF8_TO_UNI(*p2, *(p2 + 1));
+                   }
+
+                   /* Use another function to handle locale rules.  We've made
+                    * sure that both characters to compare are single bytes */
+                   if (! foldEQ_locale((char *) f1, (char *) foldbuf2, 1)) {
+                       return 0;
+                   }
+                   n1 = n2 = 0;
                }
-               else {
-                   *foldbuf2 = TWO_BYTE_UTF8_TO_UNI(*p2, *(p2 + 1));
+               else if (isASCII(*p2)) {
+                   if (flags && ! isASCII(*p1)) {
+                       return 0;
+                   }
+                   n2 = 1;
+                   *foldbuf2 = toLOWER(*p2);
                }
-
-               /* Use another function to handle locale rules.  We've made
-                * sure that both characters to compare are single bytes */
-               if (! foldEQ_locale((char *) f1, (char *) foldbuf2, 1)) {
-                   return 0;
+               else if (u2) {
+                   to_utf8_fold(p2, foldbuf2, &n2);
                }
-               n1 = n2 = 0;
-           }
-           else if (isASCII(*p2)) {
-               if (flags && ! isASCII(*p1)) {
-                   return 0;
+               else {
+                   uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2)));
+                   to_utf8_fold(natbuf, foldbuf2, &n2);
                }
-               n2 = 1;
-               *foldbuf2 = toLOWER(*p2);
-           }
-           else if (u2) {
-                to_utf8_fold(p2, foldbuf2, &n2);
-            }
-            else {
-                uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2)));
-                to_utf8_fold(natbuf, foldbuf2, &n2);
-            }
-            f2 = foldbuf2;
+               f2 = foldbuf2;
            }
         }
 
        /* Here f1 and f2 point to the beginning of the strings to compare.
-        * These strings are the folds of the input characters, stored in utf8.
-        */
+        * These strings are the folds of the next character from each input
+        * string, stored in utf8. */
 
         /* While there is more to look for in both folds, see if they
         * continue to match */