X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/65ab9279784aa811d78b2903b57bc0e7947dec78..1e696034880c724355310894883f86e27e0cb264:/utf8.h diff --git a/utf8.h b/utf8.h index b0cfedf..6864b3a 100644 --- a/utf8.h +++ b/utf8.h @@ -20,6 +20,16 @@ #define uvuni_to_utf8(d, uv) uvuni_to_utf8_flags(d, uv, 0) #define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0) +/* +=for apidoc ibcmp_utf8 + +This is a synonym for (! foldEQ_utf8()) + +=cut +*/ +#define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \ + cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2)) + #ifdef EBCDIC /* The equivalent of these macros but implementing UTF-EBCDIC are in the following header file: @@ -153,6 +163,15 @@ Perl's extended UTF-8 means we can have start bytes up to FF. #define UTF8_ACCUMULATE(old, new) (((old) << UTF_ACCUMULATION_SHIFT) | (((U8)new) & UTF_CONTINUATION_MASK)) +/* Convert a two (not one) byte utf8 character to a unicode code point value. + * Needs just one iteration of accumulate. Should not be used unless it is + * known that the two bytes are legal: 1) two-byte start, and 2) continuation. + * Note that the result can be larger than 255 if the input character is not + * downgradable */ +#define TWO_BYTE_UTF8_TO_UNI(HI, LO) \ + UTF8_ACCUMULATE((NATIVE_TO_UTF(HI) & UTF_START_MASK(2)), \ + NATIVE_TO_UTF(LO)) + #define UTF8SKIP(s) PL_utf8skip[*(const U8*)(s)] #define UTF8_IS_INVARIANT(c) UNI_IS_INVARIANT(NATIVE_TO_UTF(c)) @@ -256,6 +275,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF. #define UNICODE_GREEK_CAPITAL_LETTER_SIGMA 0x03A3 #define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2 #define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3 +#define GREEK_SMALL_LETTER_MU 0x03BC #define UNI_DISPLAY_ISPRINT 0x0001 #define UNI_DISPLAY_BACKSLASH 0x0002 @@ -270,7 +290,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF. #define ANYOF_FOLD_SHARP_S(node, input, end) \ (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \ - (ANYOF_FLAGS(node) & ANYOF_UNICODE) && \ + (ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \ (ANYOF_FLAGS(node) & ANYOF_FOLD) && \ ((end) > (input) + 1) && \ toLOWER((input)[0]) == 's' && \