-/*
- We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
- so that it is possible to override the option here without having to
- rebuild the entire core. as we are required to do if we change regcomp.h
- which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
-*/
-#if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
-#define BROKEN_UNICODE_CHARCLASS_MAPPINGS
-#endif
-
-#ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
-#define LOAD_UTF8_CHARCLASS_PERL_WORD() LOAD_UTF8_CHARCLASS_ALNUM()
-#define LOAD_UTF8_CHARCLASS_PERL_SPACE() LOAD_UTF8_CHARCLASS_SPACE()
-#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
-#define RE_utf8_perl_word PL_utf8_alnum
-#define RE_utf8_perl_space PL_utf8_space
-#define RE_utf8_posix_digit PL_utf8_digit
-#define perl_word alnum
-#define perl_space space
-#define posix_digit digit
-#else
-#define LOAD_UTF8_CHARCLASS_PERL_WORD() LOAD_UTF8_CHARCLASS(perl_word,"a")
-#define LOAD_UTF8_CHARCLASS_PERL_SPACE() LOAD_UTF8_CHARCLASS(perl_space," ")
-#define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
-#define RE_utf8_perl_word PL_utf8_perl_word
-#define RE_utf8_perl_space PL_utf8_perl_space
-#define RE_utf8_posix_digit PL_utf8_posix_digit
-#endif
-
-
-#define _CCC_TRY_AFF_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC) \
- case NAMEL: \
- PL_reg_flags |= RF_tainted; \
- /* FALL THROUGH */ \
- case NAME: \
- if (!nextchr) \
- sayNO; \
- if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
- if (!CAT2(PL_utf8_,CLASS)) { \
- bool ok; \
- ENTER; \
- save_re_context(); \
- ok=CAT2(is_utf8_,CLASS)((const U8*)STR); \
- assert(ok); \
- LEAVE; \
- } \
- if (!(OP(scan) == NAME \
- ? cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, utf8_target)) \
- : LCFUNC_utf8((U8*)locinput))) \
- { \
- sayNO; \
- } \
- locinput += PL_utf8skip[nextchr]; \
- nextchr = UCHARAT(locinput); \
- break; \
- } \
- /* Drops through to the macro that calls this one */
-
-#define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC) \
- _CCC_TRY_AFF_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC) \
- if (!(OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr))) \
- sayNO; \
- nextchr = UCHARAT(++locinput); \
- break
-
-/* Almost identical to the above, but has a case for a node that matches chars
- * between 128 and 255 using Unicode (latin1) semantics. */
-#define CCC_TRY_AFF_U(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU,LCFUNC) \
- _CCC_TRY_AFF_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC) \
- if (!(OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) & USE_UNI))))) \
- sayNO; \
- nextchr = UCHARAT(++locinput); \
- break
-
-#define _CCC_TRY_NEG_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC) \
- case NAMEL: \
- PL_reg_flags |= RF_tainted; \
- /* FALL THROUGH */ \
- case NAME : \
- if (!nextchr && locinput >= PL_regeol) \
- sayNO; \
- if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
- if (!CAT2(PL_utf8_,CLASS)) { \
- bool ok; \
- ENTER; \
- save_re_context(); \
- ok=CAT2(is_utf8_,CLASS)((const U8*)STR); \
- assert(ok); \
- LEAVE; \
- } \
- if ((OP(scan) == NAME \
- ? cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, utf8_target)) \
- : LCFUNC_utf8((U8*)locinput))) \
- { \
- sayNO; \
- } \
- locinput += PL_utf8skip[nextchr]; \
- nextchr = UCHARAT(locinput); \
- break; \
- }
-
-#define CCC_TRY_NEG(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC) \
- _CCC_TRY_NEG_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC) \
- if ((OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr))) \
- sayNO; \
- nextchr = UCHARAT(++locinput); \
- break
-
-
-#define CCC_TRY_NEG_U(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU,LCFUNC) \
- _CCC_TRY_NEG_COMMON(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNCU) \
- if ((OP(scan) == NAMEL ? LCFUNC(nextchr) : (FUNCU(nextchr) && (isASCII(nextchr) || (FLAGS(scan) & USE_UNI))))) \
- sayNO; \
- nextchr = UCHARAT(++locinput); \
- break
-
-
+#define PLACEHOLDER /* Something for the preprocessor to grab onto */
+
+/* The actual code for CCC_TRY, which uses several variables from the routine
+ * it's callable from. It is designed to be the bulk of a case statement.
+ * FUNC is the macro or function to call on non-utf8 targets that indicate if
+ * nextchr matches the class.
+ * UTF8_TEST is the whole test string to use for utf8 targets
+ * LOAD is what to use to test, and if not present to load in the swash for the
+ * class
+ * POS_OR_NEG is either empty or ! to complement the results of FUNC or
+ * UTF8_TEST test.
+ * The logic is: Fail if we're at the end-of-string; otherwise if the target is
+ * utf8 and a variant, load the swash if necessary and test using the utf8
+ * test. Advance to the next character if test is ok, otherwise fail; If not
+ * utf8 or an invariant under utf8, use the non-utf8 test, and fail if it
+ * fails, or advance to the next character */
+
+#define _CCC_TRY_CODE(POS_OR_NEG, FUNC, UTF8_TEST, CLASS, STR) \
+ if (locinput >= PL_regeol) { \
+ sayNO; \
+ } \
+ if (utf8_target && UTF8_IS_CONTINUED(nextchr)) { \
+ LOAD_UTF8_CHARCLASS(CLASS, STR); \
+ if (POS_OR_NEG (UTF8_TEST)) { \
+ sayNO; \
+ } \
+ locinput += PL_utf8skip[nextchr]; \
+ nextchr = UCHARAT(locinput); \
+ break; \
+ } \
+ if (POS_OR_NEG (FUNC(nextchr))) { \
+ sayNO; \
+ } \
+ nextchr = UCHARAT(++locinput); \
+ break;
+
+/* Handle the non-locale cases for a character class and its complement. It
+ * calls _CCC_TRY_CODE with a ! to complement the test for the character class.
+ * This is because that code fails when the test succeeds, so we want to have
+ * the test fail so that the code succeeds. The swash is stored in a
+ * predictable PL_ place */
+#define _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, \
+ CLASS, STR) \
+ case NAME: \
+ _CCC_TRY_CODE( !, FUNC, \
+ cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
+ (U8*)locinput, TRUE)), \
+ CLASS, STR) \
+ case NNAME: \
+ _CCC_TRY_CODE( PLACEHOLDER , FUNC, \
+ cBOOL(swash_fetch(CAT2(PL_utf8_,CLASS), \
+ (U8*)locinput, TRUE)), \
+ CLASS, STR) \
+
+/* Generate the case statements for both locale and non-locale character
+ * classes in regmatch for classes that don't have special unicode semantics.
+ * Locales don't use an immediate swash, but an intermediary special locale
+ * function that is called on the pointer to the current place in the input
+ * string. That function will resolve to needing the same swash. One might
+ * think that because we don't know what the locale will match, we shouldn't
+ * check with the swash loading function that it loaded properly; ie, that we
+ * should use LOAD_UTF8_CHARCLASS_NO_CHECK for those, but what is passed to the
+ * regular LOAD_UTF8_CHARCLASS is in non-locale terms, and so locale is
+ * irrelevant here */
+#define CCC_TRY(NAME, NNAME, FUNC, \
+ NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
+ NAMEA, NNAMEA, FUNCA, \
+ CLASS, STR) \
+ case NAMEL: \
+ PL_reg_flags |= RF_tainted; \
+ _CCC_TRY_CODE( !, LCFUNC, LCFUNC_utf8((U8*)locinput), CLASS, STR) \
+ case NNAMEL: \
+ PL_reg_flags |= RF_tainted; \
+ _CCC_TRY_CODE( PLACEHOLDER, LCFUNC, LCFUNC_utf8((U8*)locinput), \
+ CLASS, STR) \
+ case NAMEA: \
+ if (locinput >= PL_regeol || ! FUNCA(nextchr)) { \
+ sayNO; \
+ } \
+ /* Matched a utf8-invariant, so don't have to worry about utf8 */ \
+ nextchr = UCHARAT(++locinput); \
+ break; \
+ case NNAMEA: \
+ if (locinput >= PL_regeol || FUNCA(nextchr)) { \
+ sayNO; \
+ } \
+ if (utf8_target) { \
+ locinput += PL_utf8skip[nextchr]; \
+ nextchr = UCHARAT(locinput); \
+ } \
+ else { \
+ nextchr = UCHARAT(++locinput); \
+ } \
+ break; \
+ /* Generate the non-locale cases */ \
+ _CCC_TRY_NONLOCALE(NAME, NNAME, FUNC, CLASS, STR)
+
+/* This is like CCC_TRY, but has an extra set of parameters for generating case
+ * statements to handle separate Unicode semantics nodes */
+#define CCC_TRY_U(NAME, NNAME, FUNC, \
+ NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
+ NAMEU, NNAMEU, FUNCU, \
+ NAMEA, NNAMEA, FUNCA, \
+ CLASS, STR) \
+ CCC_TRY(NAME, NNAME, FUNC, \
+ NAMEL, NNAMEL, LCFUNC, LCFUNC_utf8, \
+ NAMEA, NNAMEA, FUNCA, \
+ CLASS, STR) \
+ _CCC_TRY_NONLOCALE(NAMEU, NNAMEU, FUNCU, CLASS, STR)