regcomp.c: teach tries about EXACTFU

[perl5.git] / utf8.h
diff --git a/utf8.h b/utf8.h

index b0cfedf..6864b3a 100644 (file)
--- a/utf8.h
+++ b/utf8.h
@@ -20,6 +20,16 @@
  #define uvuni_to_utf8(d, uv)           uvuni_to_utf8_flags(d, uv, 0)
  #define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)
  
+/*
+=for apidoc ibcmp_utf8
+
+This is a synonym for (! foldEQ_utf8())
+
+=cut
+*/
+#define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
+                   cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2))
+
  #ifdef EBCDIC
  /* The equivalent of these macros but implementing UTF-EBCDIC
     are in the following header file:
@@ -153,6 +163,15 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  
  #define UTF8_ACCUMULATE(old, new)      (((old) << UTF_ACCUMULATION_SHIFT) | (((U8)new) & UTF_CONTINUATION_MASK))
  
+/* Convert a two (not one) byte utf8 character to a unicode code point value.
+ * Needs just one iteration of accumulate.  Should not be used unless it is
+ * known that the two bytes are legal: 1) two-byte start, and 2) continuation.
+ * Note that the result can be larger than 255 if the input character is not
+ * downgradable */
+#define TWO_BYTE_UTF8_TO_UNI(HI, LO) \
+                   UTF8_ACCUMULATE((NATIVE_TO_UTF(HI) & UTF_START_MASK(2)), \
+                                    NATIVE_TO_UTF(LO))
+
  #define UTF8SKIP(s) PL_utf8skip[*(const U8*)(s)]
  
  #define UTF8_IS_INVARIANT(c)           UNI_IS_INVARIANT(NATIVE_TO_UTF(c))
@@ -256,6 +275,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  #define UNICODE_GREEK_CAPITAL_LETTER_SIGMA     0x03A3
  #define UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
  #define UNICODE_GREEK_SMALL_LETTER_SIGMA       0x03C3
+#define GREEK_SMALL_LETTER_MU                   0x03BC
  
  #define UNI_DISPLAY_ISPRINT    0x0001
  #define UNI_DISPLAY_BACKSLASH  0x0002
@@ -270,7 +290,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
  
  #define ANYOF_FOLD_SHARP_S(node, input, end)   \
         (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \
-        (ANYOF_FLAGS(node) & ANYOF_UNICODE) && \
+        (ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \
          (ANYOF_FLAGS(node) & ANYOF_FOLD) && \
          ((end) > (input) + 1) && \
          toLOWER((input)[0]) == 's' && \