This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Module-CoreList on CPAN is 3.01
[perl5.git] / utf8.c
diff --git a/utf8.c b/utf8.c
index 4745a63..bf51a91 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -190,7 +190,7 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
        *d++ = (U8)(( uv        & 0x3f) | 0x80);
        return d;
     }
-#ifdef HAS_QUAD
+#ifdef UTF8_QUAD_MAX
     if (uv < UTF8_QUAD_MAX)
 #endif
     {
@@ -203,7 +203,7 @@ Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
        *d++ = (U8)(( uv        & 0x3f) | 0x80);
        return d;
     }
-#ifdef HAS_QUAD
+#ifdef UTF8_QUAD_MAX
     {
        *d++ =                            0xff;         /* Can't match U+FFFE! */
        *d++ =                            0x80;         /* 6 Reserved bits */
@@ -366,10 +366,8 @@ Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end)
        len = UTF8SKIP(buf);
     }
 
-#ifdef IS_UTF8_CHAR
     if (IS_UTF8_CHAR_FAST(len))
         return IS_UTF8_CHAR(buf, len) ? len : 0;
-#endif /* #ifdef IS_UTF8_CHAR */
     return is_utf8_char_slow(buf, len);
 }
 
@@ -630,7 +628,7 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
      * We also should not consume too few bytes, otherwise someone could inject
      * things.  For example, an input could be deliberately designed to
      * overflow, and if this code bailed out immediately upon discovering that,
-     * returning to the caller *retlen pointing to the very next byte (one
+     * returning to the caller C<*retlen> pointing to the very next byte (one
      * which is actually part of of the overflowing sequence), that could look
      * legitimate to the caller, which could discard the initial partial
      * sequence and process the rest, inappropriately */
@@ -2611,8 +2609,8 @@ Perl__to_utf8_upper_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, const bool
        *lenp = 1;
     }
     else {
-       *ustrp = UTF8_EIGHT_BIT_HI(result);
-       *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+       *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+       *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
        *lenp = 2;
     }
 
@@ -2677,8 +2675,8 @@ Perl__to_utf8_title_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, const bool
        *lenp = 1;
     }
     else {
-       *ustrp = UTF8_EIGHT_BIT_HI(result);
-       *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+       *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+       *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
        *lenp = 2;
     }
 
@@ -2742,8 +2740,8 @@ Perl__to_utf8_lower_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, const bool
        *lenp = 1;
     }
     else {
-       *ustrp = UTF8_EIGHT_BIT_HI(result);
-       *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+       *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+       *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
        *lenp = 2;
     }
 
@@ -2872,8 +2870,8 @@ Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags, b
        *lenp = 1;
     }
     else {
-       *ustrp = UTF8_EIGHT_BIT_HI(result);
-       *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+       *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+       *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
        *lenp = 2;
     }
 
@@ -3426,30 +3424,19 @@ S_swash_scan_list_line(pTHX_ U8* l, U8* const lend, UV* min, UV* max, UV* val,
            *max = *min;
 
        /* Non-binary tables have a third entry: what the first element of the
-        * range maps to */
+        * range maps to.  The map for those currently read here is in hex */
        if (wants_value) {
            if (isBLANK(*l)) {
                ++l;
-
-               /* The ToLc, etc table mappings are not in hex, and must be
-                * corrected by adding the code point to them */
-               if (typeto) {
-                   char *after_strtol = (char *) lend;
-                   *val = Strtol((char *)l, &after_strtol, 10);
-                   l = (U8 *) after_strtol;
-               }
-               else { /* Other tables are in hex, and are the correct result
-                         without tweaking */
-                   flags = PERL_SCAN_SILENT_ILLDIGIT
-                       | PERL_SCAN_DISALLOW_PREFIX
-                       | PERL_SCAN_SILENT_NON_PORTABLE;
-                   numlen = lend - l;
-                   *val = grok_hex((char *)l, &numlen, &flags, NULL);
-                   if (numlen)
-                       l += numlen;
-                   else
-                       *val = 0;
-               }
+                flags = PERL_SCAN_SILENT_ILLDIGIT
+                    | PERL_SCAN_DISALLOW_PREFIX
+                    | PERL_SCAN_SILENT_NON_PORTABLE;
+                numlen = lend - l;
+                *val = grok_hex((char *)l, &numlen, &flags, NULL);
+                if (numlen)
+                    l += numlen;
+                else
+                    *val = 0;
            }
            else {
                *val = 0;
@@ -4762,11 +4749,13 @@ Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
 
 Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>.
 
-This function was usefulfor code that wanted to handle both EBCDIC and
+This function was useful for code that wanted to handle both EBCDIC and
 ASCII platforms with Unicode properties, but starting in Perl v5.20, the
 distinctions between the platforms have mostly been made invisible to most
-code, so this function is quite unlikely to be what you want.
-C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead.
+code, so this function is quite unlikely to be what you want.  If you do need
+this precise functionality, use instead
+C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>>
+or C<L<NATIVE_TO_UNI(utf8n_to_uvchr(...))|/utf8n_to_uvchr>>.
 
 =cut
 */