* UTF-8 encoded character that give the number of bytes that comprise the
* character.
* */
-#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : (0xFE << (7-(len))))
+#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : (0xFF & (0xFE << (7-(len)))))
/* Masks out the initial one bits in a start byte, leaving the real data ones.
* Doesn't work on an invariant byte */
* however this doesn't won't work for ebcdic, and should be avoided. Use
* regen/unicode_constants instead */
#define UTF8_TWO_BYTE_HI_nocast(c) I8_TO_NATIVE_UTF8((NATIVE_TO_UNI(c) \
- >> UTF_ACCUMULATION_SHIFT) | (0xFF & UTF_START_MARK(2)))
+ >> UTF_ACCUMULATION_SHIFT) | UTF_START_MARK(2))
#define UTF8_TWO_BYTE_LO_nocast(c) I8_TO_NATIVE_UTF8((NATIVE_TO_UNI(c) \
& UTF_CONTINUATION_MASK) \
| UTF_CONTINUATION_MARK)
* These expand identically to the TWO_BYTE versions on ASCII platforms, but
* use to/from LATIN1 instead of UNI, which on EBCDIC eliminates tests */
#define UTF8_EIGHT_BIT_HI(c) I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c) \
- >> UTF_ACCUMULATION_SHIFT) | (0xFF & UTF_START_MARK(2)))
+ >> UTF_ACCUMULATION_SHIFT) | UTF_START_MARK(2))
#define UTF8_EIGHT_BIT_LO(c) I8_TO_NATIVE_UTF8((NATIVE_TO_LATIN1(c) \
& UTF_CONTINUATION_MASK) \
| UTF_CONTINUATION_MARK)
&& NATIVE_UTF8_TO_I8(c) <= 0xC7)
#define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_UTF8_TO_I8(c) >= 0xC8)
-#define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len)))))
+/* Can't exceed 7 on EBCDIC platforms */
+#define UTF_START_MARK(len) (0xFF & (0xFE << (7-(len))))
+
#define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2)))
#define UTF_CONTINUATION_MARK 0xA0
#define UTF_CONTINUATION_MASK ((U8)0x1f)