+ to_complement = 1;
+ /* FALLTHROUGH */
+
+ case POSIXA:
+ posixa:
+ /* Don't need to worry about utf8, as it can match only a single
+ * byte invariant character. */
+ REXEC_FBC_CLASS_SCAN(
+ to_complement ^ cBOOL(_generic_isCC_A(*s, FLAGS(c))));
+ break;
+
+ case NPOSIXU:
+ to_complement = 1;
+ /* FALLTHROUGH */
+
+ case POSIXU:
+ if (! utf8_target) {
+ REXEC_FBC_CLASS_SCAN(to_complement ^ cBOOL(_generic_isCC(*s,
+ FLAGS(c))));
+ }
+ else {
+
+ posix_utf8:
+ classnum = (_char_class_number) FLAGS(c);
+ if (classnum < _FIRST_NON_SWASH_CC) {
+ while (s < strend) {
+
+ /* We avoid loading in the swash as long as possible, but
+ * should we have to, we jump to a separate loop. This
+ * extra 'if' statement is what keeps this code from being
+ * just a call to REXEC_FBC_UTF8_CLASS_SCAN() */
+ if (UTF8_IS_ABOVE_LATIN1(*s)) {
+ goto found_above_latin1;
+ }
+ if ((UTF8_IS_INVARIANT(*s)
+ && to_complement ^ cBOOL(_generic_isCC((U8) *s,
+ classnum)))
+ || (UTF8_IS_DOWNGRADEABLE_START(*s)
+ && to_complement ^ cBOOL(
+ _generic_isCC(TWO_BYTE_UTF8_TO_UNI(*s, *(s + 1)),
+ classnum))))
+ {
+ if (tmp && (reginfo->intuit || regtry(reginfo, &s)))
+ goto got_it;
+ else {
+ tmp = doevery;
+ }
+ }
+ else {
+ tmp = 1;
+ }
+ s += UTF8SKIP(s);
+ }
+ }
+ else switch (classnum) { /* These classes are implemented as
+ macros */
+ case _CC_ENUM_SPACE: /* XXX would require separate code if we
+ revert the change of \v matching this */
+ /* FALL THROUGH */
+
+ case _CC_ENUM_PSXSPC:
+ REXEC_FBC_UTF8_CLASS_SCAN(
+ to_complement ^ cBOOL(isSPACE_utf8(s)));
+ break;
+
+ case _CC_ENUM_BLANK:
+ REXEC_FBC_UTF8_CLASS_SCAN(
+ to_complement ^ cBOOL(isBLANK_utf8(s)));
+ break;
+
+ case _CC_ENUM_XDIGIT:
+ REXEC_FBC_UTF8_CLASS_SCAN(
+ to_complement ^ cBOOL(isXDIGIT_utf8(s)));
+ break;
+
+ case _CC_ENUM_VERTSPACE:
+ REXEC_FBC_UTF8_CLASS_SCAN(
+ to_complement ^ cBOOL(isVERTWS_utf8(s)));
+ break;
+
+ case _CC_ENUM_CNTRL:
+ REXEC_FBC_UTF8_CLASS_SCAN(
+ to_complement ^ cBOOL(isCNTRL_utf8(s)));
+ break;
+
+ default:
+ Perl_croak(aTHX_ "panic: find_byclass() node %d='%s' has an unexpected character class '%d'", OP(c), PL_reg_name[OP(c)], classnum);
+ assert(0); /* NOTREACHED */
+ }
+ }
+ break;
+
+ found_above_latin1: /* Here we have to load a swash to get the result
+ for the current code point */
+ if (! PL_utf8_swash_ptrs[classnum]) {
+ U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
+ PL_utf8_swash_ptrs[classnum] =
+ _core_swash_init("utf8", swash_property_names[classnum],
+ &PL_sv_undef, 1, 0, NULL, &flags);
+ }
+
+ /* This is a copy of the loop above for swash classes, though using the
+ * FBC macro instead of being expanded out. Since we've loaded the
+ * swash, we don't have to check for that each time through the loop */
+ REXEC_FBC_UTF8_CLASS_SCAN(
+ to_complement ^ cBOOL(_generic_utf8(
+ classnum,
+ s,
+ swash_fetch(PL_utf8_swash_ptrs[classnum],
+ (U8 *) s, TRUE))));
+ break;
+
+ case AHOCORASICKC:
+ case AHOCORASICK:
+ {
+ DECL_TRIE_TYPE(c);
+ /* what trie are we using right now */
+ reg_ac_data *aho = (reg_ac_data*)progi->data->data[ ARG( c ) ];
+ reg_trie_data *trie = (reg_trie_data*)progi->data->data[ aho->trie ];
+ HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
+
+ const char *last_start = strend - trie->minlen;
+#ifdef DEBUGGING
+ const char *real_start = s;
+#endif
+ STRLEN maxlen = trie->maxlen;
+ SV *sv_points;
+ U8 **points; /* map of where we were in the input string
+ when reading a given char. For ASCII this
+ is unnecessary overhead as the relationship
+ is always 1:1, but for Unicode, especially
+ case folded Unicode this is not true. */
+ U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
+ U8 *bitmap=NULL;
+
+
+ GET_RE_DEBUG_FLAGS_DECL;
+
+ /* We can't just allocate points here. We need to wrap it in
+ * an SV so it gets freed properly if there is a croak while
+ * running the match */
+ ENTER;
+ SAVETMPS;
+ sv_points=newSV(maxlen * sizeof(U8 *));
+ SvCUR_set(sv_points,
+ maxlen * sizeof(U8 *));
+ SvPOK_on(sv_points);
+ sv_2mortal(sv_points);
+ points=(U8**)SvPV_nolen(sv_points );
+ if ( trie_type != trie_utf8_fold
+ && (trie->bitmap || OP(c)==AHOCORASICKC) )
+ {
+ if (trie->bitmap)
+ bitmap=(U8*)trie->bitmap;
+ else
+ bitmap=(U8*)ANYOF_BITMAP(c);
+ }
+ /* this is the Aho-Corasick algorithm modified a touch
+ to include special handling for long "unknown char" sequences.
+ The basic idea being that we use AC as long as we are dealing
+ with a possible matching char, when we encounter an unknown char
+ (and we have not encountered an accepting state) we scan forward
+ until we find a legal starting char.
+ AC matching is basically that of trie matching, except that when
+ we encounter a failing transition, we fall back to the current
+ states "fail state", and try the current char again, a process
+ we repeat until we reach the root state, state 1, or a legal
+ transition. If we fail on the root state then we can either
+ terminate if we have reached an accepting state previously, or
+ restart the entire process from the beginning if we have not.
+
+ */
+ while (s <= last_start) {
+ const U32 uniflags = UTF8_ALLOW_DEFAULT;
+ U8 *uc = (U8*)s;
+ U16 charid = 0;
+ U32 base = 1;
+ U32 state = 1;
+ UV uvc = 0;
+ STRLEN len = 0;
+ STRLEN foldlen = 0;
+ U8 *uscan = (U8*)NULL;
+ U8 *leftmost = NULL;
+#ifdef DEBUGGING
+ U32 accepted_word= 0;
+#endif
+ U32 pointpos = 0;
+
+ while ( state && uc <= (U8*)strend ) {
+ int failed=0;
+ U32 word = aho->states[ state ].wordnum;
+
+ if( state==1 ) {
+ if ( bitmap ) {
+ DEBUG_TRIE_EXECUTE_r(
+ if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
+ dump_exec_pos( (char *)uc, c, strend, real_start,
+ (char *)uc, utf8_target );
+ PerlIO_printf( Perl_debug_log,
+ " Scanning for legal start char...\n");
+ }
+ );
+ if (utf8_target) {
+ while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
+ uc += UTF8SKIP(uc);
+ }
+ } else {
+ while ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
+ uc++;
+ }