*d++ = (U8)(( uv & 0x3f) | 0x80);
return d;
}
-#ifdef HAS_QUAD
+#ifdef UTF8_QUAD_MAX
if (uv < UTF8_QUAD_MAX)
#endif
{
*d++ = (U8)(( uv & 0x3f) | 0x80);
return d;
}
-#ifdef HAS_QUAD
+#ifdef UTF8_QUAD_MAX
{
*d++ = 0xff; /* Can't match U+FFFE! */
*d++ = 0x80; /* 6 Reserved bits */
len = UTF8SKIP(buf);
}
-#ifdef IS_UTF8_CHAR
if (IS_UTF8_CHAR_FAST(len))
return IS_UTF8_CHAR(buf, len) ? len : 0;
-#endif /* #ifdef IS_UTF8_CHAR */
return is_utf8_char_slow(buf, len);
}
* We also should not consume too few bytes, otherwise someone could inject
* things. For example, an input could be deliberately designed to
* overflow, and if this code bailed out immediately upon discovering that,
- * returning to the caller *retlen pointing to the very next byte (one
+ * returning to the caller C<*retlen> pointing to the very next byte (one
* which is actually part of of the overflowing sequence), that could look
* legitimate to the caller, which could discard the initial partial
* sequence and process the rest, inappropriately */
*lenp = 1;
}
else {
- *ustrp = UTF8_EIGHT_BIT_HI(result);
- *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+ *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+ *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
*lenp = 2;
}
*lenp = 1;
}
else {
- *ustrp = UTF8_EIGHT_BIT_HI(result);
- *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+ *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+ *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
*lenp = 2;
}
*lenp = 1;
}
else {
- *ustrp = UTF8_EIGHT_BIT_HI(result);
- *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+ *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+ *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
*lenp = 2;
}
*lenp = 1;
}
else {
- *ustrp = UTF8_EIGHT_BIT_HI(result);
- *(ustrp + 1) = UTF8_EIGHT_BIT_LO(result);
+ *ustrp = UTF8_EIGHT_BIT_HI((U8) result);
+ *(ustrp + 1) = UTF8_EIGHT_BIT_LO((U8) result);
*lenp = 2;
}
*max = *min;
/* Non-binary tables have a third entry: what the first element of the
- * range maps to */
+ * range maps to. The map for those currently read here is in hex */
if (wants_value) {
if (isBLANK(*l)) {
++l;
-
- /* The ToLc, etc table mappings are not in hex, and must be
- * corrected by adding the code point to them */
- if (typeto) {
- char *after_strtol = (char *) lend;
- *val = Strtol((char *)l, &after_strtol, 10);
- l = (U8 *) after_strtol;
- }
- else { /* Other tables are in hex, and are the correct result
- without tweaking */
- flags = PERL_SCAN_SILENT_ILLDIGIT
- | PERL_SCAN_DISALLOW_PREFIX
- | PERL_SCAN_SILENT_NON_PORTABLE;
- numlen = lend - l;
- *val = grok_hex((char *)l, &numlen, &flags, NULL);
- if (numlen)
- l += numlen;
- else
- *val = 0;
- }
+ flags = PERL_SCAN_SILENT_ILLDIGIT
+ | PERL_SCAN_DISALLOW_PREFIX
+ | PERL_SCAN_SILENT_NON_PORTABLE;
+ numlen = lend - l;
+ *val = grok_hex((char *)l, &numlen, &flags, NULL);
+ if (numlen)
+ l += numlen;
+ else
+ *val = 0;
}
else {
*val = 0;
Instead use L</utf8_to_uvchr_buf>, or rarely, L</utf8n_to_uvchr>.
-This function was usefulfor code that wanted to handle both EBCDIC and
+This function was useful for code that wanted to handle both EBCDIC and
ASCII platforms with Unicode properties, but starting in Perl v5.20, the
distinctions between the platforms have mostly been made invisible to most
-code, so this function is quite unlikely to be what you want.
-C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>> instead.
+code, so this function is quite unlikely to be what you want. If you do need
+this precise functionality, use instead
+C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|/utf8_to_uvchr_buf>>
+or C<L<NATIVE_TO_UNI(utf8n_to_uvchr(...))|/utf8n_to_uvchr>>.
=cut
*/