This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add utility and .h for character's UTF-8
[perl5.git] / regexec.c
index e87e365..57f47ce 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -81,6 +81,7 @@
 #endif
 
 #include "inline_invlist.c"
+#include "utf8_strings.h"
 
 #define RF_tainted     1       /* tainted information used? e.g. locale */
 #define RF_warned      2               /* warned about big count? */
 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 
 /* these are unrolled below in the CCC_TRY_XXX defined */
-#ifdef EBCDIC
-    /* Often 'str' is a hard-coded utf8 string instead of utfebcdic. so just
-     * skip the check on EBCDIC platforms */
-#   define LOAD_UTF8_CHARCLASS(class,str) LOAD_UTF8_CHARCLASS_NO_CHECK(class)
-#else
-#   define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
+#define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
     if (!CAT2(PL_utf8_,class)) { \
        bool ok; \
        ENTER; save_re_context(); \
        ok=CAT2(is_utf8_,class)((const U8*)str); \
         PERL_UNUSED_VAR(ok); \
        assert(ok); assert(CAT2(PL_utf8_,class)); LEAVE; } } STMT_END
-#endif
-
 /* Doesn't do an assert to verify that is correct */
 #define LOAD_UTF8_CHARCLASS_NO_CHECK(class) STMT_START { \
     if (!CAT2(PL_utf8_,class)) { \
 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 
 #define LOAD_UTF8_CHARCLASS_GCB()  /* Grapheme cluster boundaries */        \
-       LOAD_UTF8_CHARCLASS(X_begin, " ");                                  \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_special_begin);                             \
-       /* These are utf8 constants, and not utf-ebcdic constants, so the   \
-           * assert should likely and hopefully fail on an EBCDIC machine */ \
-       LOAD_UTF8_CHARCLASS(X_extend, "\xcc\x80"); /* U+0300 */             \
-                                                                           \
-       /* No asserts are done for these, in case called on an early        \
-           * Unicode version in which they map to nothing */               \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* U+0E40 "\xe0\xb9\x80" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_L);          /* U+1100 "\xe1\x84\x80" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_LV_LVT_V);/* U+AC01 "\xea\xb0\x81" */\
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_T);      /* U+11A8 "\xe1\x86\xa8" */ \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_RI);                                 \
-       LOAD_UTF8_CHARCLASS_NO_CHECK(X_V)       /* U+1160 "\xe1\x85\xa0" */  
+        /* No asserts are done for some of these, in case called on a   */  \
+        /* Unicode version in which they map to nothing */                  \
+       LOAD_UTF8_CHARCLASS(X_begin, HYPHEN_UTF8);                          \
+       LOAD_UTF8_CHARCLASS_NO_CHECK(X_special_begin);                      \
+       LOAD_UTF8_CHARCLASS(X_extend, COMBINING_GRAVE_ACCENT_UTF8);         \
+       LOAD_UTF8_CHARCLASS_NO_CHECK(X_prepend);/* empty in most releases*/ \
+       LOAD_UTF8_CHARCLASS(X_L, HANGUL_CHOSEONG_KIYEOK_UTF8);              \
+       LOAD_UTF8_CHARCLASS(X_LV_LVT_V, HANGUL_JUNGSEONG_FILLER_UTF8);      \
+       LOAD_UTF8_CHARCLASS_NO_CHECK(X_RI);    /* empty in many releases */ \
+       LOAD_UTF8_CHARCLASS(X_T, HANGUL_JONGSEONG_KIYEOK_UTF8);             \
+       LOAD_UTF8_CHARCLASS(X_V, HANGUL_JUNGSEONG_FILLER_UTF8)
 
 #define PLACEHOLDER    /* Something for the preprocessor to grab onto */