}
/* Add the (Unicode) code point to the output. */
- if (UNI_IS_INVARIANT(uv)) {
+ if (OFFUNI_IS_INVARIANT(uv)) {
*d++ = (char) LATIN1_TO_NATIVE(uv);
}
else {
{
PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS;
- if (UNI_IS_INVARIANT(uv)) {
+ if (OFFUNI_IS_INVARIANT(uv)) {
*d++ = (U8) LATIN1_TO_NATIVE(uv);
return d;
}
while (p < pend) {
UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
p += 2;
- if (UNI_IS_INVARIANT(uv)) {
+ if (OFFUNI_IS_INVARIANT(uv)) {
*d++ = LATIN1_TO_NATIVE((U8) uv);
continue;
}
/* Is the representation of the Unicode code point 'cp' the same regardless of
* being encoded in UTF-8 or not? */
-#define UNI_IS_INVARIANT(cp) isASCII(cp)
+#define OFFUNI_IS_INVARIANT(cp) isASCII(cp)
/* Is the representation of the code point 'cp' the same regardless of
* being encoded in UTF-8 or not? 'cp' is native if < 256; Unicode otherwise
* */
-#define UVCHR_IS_INVARIANT(uv) UNI_IS_INVARIANT(uv)
+#define UVCHR_IS_INVARIANT(uv) OFFUNI_IS_INVARIANT(uv)
/* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence
* in UTF-8? This is the inverse of UTF8_IS_INVARIANT */
#define UTF8SKIP(s) PL_utf8skip[*(const U8*)(s)]
#define UTF8_SKIP(s) UTF8SKIP(s)
+/* Most code that says 'UNI_' really means the native value for code points up
+ * through 255 */
+#define UNI_IS_INVARIANT(cp) UVCHR_IS_INVARIANT(cp)
+
/* Is the byte 'c' the same character when encoded in UTF-8 as when not. This
* works on both UTF-8 encoded strings and non-encoded, as it returns TRUE in
* each for the exact same set of bit patterns. It is valid on a subset of
(uv) < 0x400000 ? 5 : \
(uv) < 0x4000000 ? 6 : 7 )
-
-#define UNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0)
+#define OFFUNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0)
/* It turns out that on EBCDIC platforms, the invariants are the characters
* that have ASCII equivalents, plus the C1 controls. Since the C0 controls