? _generic_isCC(*(p), classnum) \
: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
? _generic_isCC( \
- TWO_BYTE_UTF8_TO_NATIVE(*(p), \
+ EIGHT_BIT_UTF8_TO_NATIVE(*(p), \
*((p)+1 )), \
classnum) \
: utf8)
(UTF8_IS_INVARIANT(*(p)) \
? macro(*(p)) \
: (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
- ? macro(TWO_BYTE_UTF8_TO_NATIVE(*(p), *((p)+1))) \
+ ? macro(EIGHT_BIT_UTF8_TO_NATIVE(*(p), *((p)+1)))\
: utf8)
#define _generic_LC_swash_utf8(macro, classnum, p) \
}
else {
p++;
- *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p);
+ *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
}
}
}
}
else {
p++;
- *q = (char) TWO_BYTE_UTF8_TO_NATIVE(c, *p);
+ *q = (char) EIGHT_BIT_UTF8_TO_NATIVE(c, *p);
}
}
}
IN_LC_RUNTIME(LC_CTYPE)
||
#endif
- _isQUOTEMETA(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s + 1))))
+ _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1))))
{
to_quote = TRUE;
}
}
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! IS_NON_FINAL_FOLD(TWO_BYTE_UTF8_TO_NATIVE(
+ if (! IS_NON_FINAL_FOLD(EIGHT_BIT_UTF8_TO_NATIVE(
*s, *(s+1))))
{
break;
}
else if (UTF8_IS_DOWNGRADEABLE_START(*character)) {
return isFOO_lc(classnum,
- TWO_BYTE_UTF8_TO_NATIVE(*character, *(character + 1)));
+ EIGHT_BIT_UTF8_TO_NATIVE(*character, *(character + 1)));
}
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, character + UTF8SKIP(character));
classnum)))
|| (UTF8_IS_DOWNGRADEABLE_START(*s)
&& to_complement ^ cBOOL(
- _generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*s,
+ _generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*s,
*(s + 1)),
classnum))))
{
l++;
}
else {
- if (TWO_BYTE_UTF8_TO_NATIVE(*l, *(l+1)) != * (U8*) s)
+ if (EIGHT_BIT_UTF8_TO_NATIVE(*l, *(l+1)) != * (U8*) s)
{
sayNO;
}
s++;
}
else {
- if (TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)) != * (U8*) l)
+ if (EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)) != * (U8*) l)
{
sayNO;
}
}
else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
- (U8) TWO_BYTE_UTF8_TO_NATIVE(nextchr,
+ (U8) EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
*(locinput + 1))))))
{
sayNO;
}
else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) {
if (! (to_complement
- ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(nextchr,
+ ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(nextchr,
*(locinput + 1)),
FLAGS(scan)))))
{
/* Target isn't utf8; convert the character in the UTF-8
* pattern to non-UTF8, and do a simple loop */
- c = TWO_BYTE_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, *(STRING(p) + 1));
while (scan < loceol && UCHARAT(scan) == c) {
scan++;
}
}
else if (UTF8_IS_DOWNGRADEABLE_START(*scan)) {
if (! (to_complement
- ^ cBOOL(_generic_isCC(TWO_BYTE_UTF8_TO_NATIVE(*scan,
+ ^ cBOOL(_generic_isCC(EIGHT_BIT_UTF8_TO_NATIVE(*scan,
*(scan + 1)),
classnum))))
{
}
else {
assert(p < e -1 );
- *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
p += 2;
}
}
}
s++;
} else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
+ if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
goto bad_charname;
}
s += 2;
s++;
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
+ if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
{
goto bad_charname;
}
if (u < uend) {
U8 c1 = *u++;
if (UTF8_IS_CONTINUATION(c1)) {
- c = TWO_BYTE_UTF8_TO_NATIVE(c, c1);
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, c1);
} else {
Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
"Malformed UTF-8 character "
U8 c = *s++;
if (! UTF8_IS_INVARIANT(c)) {
/* Then it is two-byte encoded */
- c = TWO_BYTE_UTF8_TO_NATIVE(c, *s);
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, *s);
s++;
}
*d++ = c;
U8 c = *s++;
if (! UTF8_IS_INVARIANT(c)) {
/* Then it is two-byte encoded */
- c = TWO_BYTE_UTF8_TO_NATIVE(c, *s);
+ c = EIGHT_BIT_UTF8_TO_NATIVE(c, *s);
s++;
}
*d++ = c;
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toUPPER_LC(c);
}
else {
- return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return _to_upper_title_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp, 'S');
}
}
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toUPPER_LC(c);
}
else {
- return _to_upper_title_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return _to_upper_title_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp, 's');
}
}
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toLOWER_LC(c);
}
else {
- return to_lower_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return to_lower_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp);
}
}
}
else if UTF8_IS_DOWNGRADEABLE_START(*p) {
if (flags & FOLD_FLAGS_LOCALE) {
- U8 c = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ U8 c = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
result = toFOLD_LC(c);
}
else {
- return _to_fold_latin1(TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1)),
+ return _to_fold_latin1(EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)),
ustrp, lenp,
flags & (FOLD_FLAGS_FULL | FOLD_FLAGS_NOMIX_ASCII));
}
else if (UTF8_IS_DOWNGRADEABLE_START(c)) {
klen = 0;
needents = 256;
- off = TWO_BYTE_UTF8_TO_NATIVE(c, *(ptr + 1));
+ off = EIGHT_BIT_UTF8_TO_NATIVE(c, *(ptr + 1));
}
else {
klen = UTF8SKIP(ptr) - 1;
/* Longer, but more accurate name */
#define UTF8_IS_ABOVE_LATIN1_START(c) UTF8_IS_ABOVE_LATIN1(c)
+/* Convert a UTF-8 variant Latin1 character to a native code point value.
+ * Needs just one iteration of accumulate. Should be used only if it is known
+ * that the code point is < 256, and is not UTF-8 invariant. Use the slower
+ * but more general TWO_BYTE_UTF8_TO_NATIVE() which handles any code point
+ * representable by two bytes (which turns out to be up through
+ * MAX_PORTABLE_UTF8_TWO_BYTE). The two parameters are:
+ * HI: a downgradable start byte;
+ * LO: continuation.
+ * */
+#define EIGHT_BIT_UTF8_TO_NATIVE(HI, LO) \
+ ( __ASSERT_(UTF8_IS_DOWNGRADEABLE_START(HI)) \
+ __ASSERT_(UTF8_IS_CONTINUATION(LO)) \
+ LATIN1_TO_NATIVE(UTF8_ACCUMULATE(( \
+ NATIVE_UTF8_TO_I8(HI) & UTF_START_MASK(2)), (LO))))
+
/* Convert a two (not one) byte utf8 character to a native code point value.
* Needs just one iteration of accumulate. Should not be used unless it is
* known that the two bytes are legal: 1) two-byte start, and 2) continuation.