folded length <= unfolded).
EXACTFU_SS str Match this string using /iu rules (w/len);
- (string folded iff in UTF-8; non-UTF8
- folded length > unfolded).
+ (string not UTF-8, only portions guaranteed
+ to be folded; folded length > unfolded).
EXACTFLU8 str Like EXACTFU, but use /il, UTF-8, folded,
and everything in it is above 255.
EXACTFAA_NO_TRIE str Match this string using /iaa rules (w/len)
* so that the optimizer doesn't reject these possibilities based on size
* constraints.
* 2) For the sequence involving the Sharp s (\xDF), the node type EXACTFU_SS
- * is used for an EXACTFU node that contains at least one "ss" sequence in
- * it. For non-UTF-8 patterns and strings, this is the only case where
- * there is a possible fold length change. That means that a regular
- * EXACTFU node without UTF-8 involvement doesn't have to concern itself
- * with length changes, and so can be processed faster. regexec.c takes
- * advantage of this. Generally, an EXACTFish node that is in UTF-8 is
- * pre-folded by regcomp.c (except EXACTFL, some of whose folds aren't
- * known until runtime). This saves effort in regex matching. However,
- * the pre-folding isn't done for non-UTF8 patterns because the fold of
- * the MICRO SIGN requires UTF-8, and we don't want to slow things down by
+ * is used in non-UTF-8 patterns for an EXACTFU node that contains at
+ * least one "ss" sequence in it. For UTF-8 patterns, the procedures in
+ * step 1) above are sufficient to handle these, but for non-UTF-8
+ * patterns and strings, this is the only case where there is a possible
+ * fold length change. That means that a regular EXACTFU node without
+ * UTF-8 involvement doesn't have to concern itself with length changes,
+ * and so can be processed faster. regexec.c takes advantage of this.
+ * Generally, an EXACTFish node that is in UTF-8 is pre-folded by
+ * regcomp.c (except EXACTFL, some of whose folds aren't known until
+ * runtime). This saves effort in regex matching. However, the
+ * pre-folding isn't done for non-UTF8 patterns because the fold of the
+ * MICRO SIGN requires UTF-8, and we don't want to slow things down by
* forcing the pattern into UTF8 unless necessary. Also what EXACTF (and,
* again, EXACTFL) nodes fold to isn't known until runtime. The fold
* possibilities for the non-UTF8 patterns are quite simple, except for
continue;
}
- /* Nodes with 'ss' require special handling, except for
- * EXACTFAA-ish for which there is no multi-char fold to this */
- if (len == 2 && *s == 's' && *(s+1) == 's'
- && OP(scan) != EXACTFAA
- && OP(scan) != EXACTFAA_NO_TRIE)
- {
- count = 2;
- if (OP(scan) != EXACTFL) {
- OP(scan) = EXACTFU_SS;
- }
- s += 2;
- }
- else { /* Here is a generic multi-char fold. */
+ { /* Here is a generic multi-char fold. */
U8* multi_end = s + len;
/* Count how many characters are in it. In the case of
# End of important relative ordering.
-EXACTFU_SS EXACT, str ; Match this string using /iu rules (w/len); (string folded iff in UTF-8; non-UTF8 folded length > unfolded).
+EXACTFU_SS EXACT, str ; Match this string using /iu rules (w/len); (string not UTF-8, only portions guaranteed to be folded; folded length > unfolded).
EXACTFLU8 EXACT, str ; Like EXACTFU, but use /il, UTF-8, folded, and everything in it is above 255.
EXACTFAA_NO_TRIE EXACT, str ; Match this string using /iaa rules (w/len) (string not UTF-8, not guaranteed to be folded, not currently trie-able).
goto do_exactf_non_utf8;
case EXACTFU_SS:
- if (is_utf8_pat) {
- utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
- }
+ assert(! is_utf8_pat);
goto do_exactf_utf8;
case EXACTFLU8:
goto do_exactf;
case EXACTFU_SS: /* /\x{df}/iu */
+ assert(! is_utf8_pat);
+ /* FALLTHROUGH */
case EXACTFU: /* /abc/iu */
folder = foldEQ_latin1;
fold_array = PL_fold_latin1;
I32 hardcount = 0; /* How many matches so far */
bool utf8_target = reginfo->is_utf8_target;
unsigned int to_complement = 0; /* Invert the result? */
- UV utf8_flags;
+ UV utf8_flags = 0;
_char_class_number classnum;
PERL_ARGS_ASSERT_REGREPEAT;
case EXACTF: /* This node only generated for non-utf8 patterns */
assert(! reginfo->is_utf8_pat);
- utf8_flags = 0;
goto do_exactf;
case EXACTFLU8:
utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
goto do_exactf;
- case EXACTFU_SS:
case EXACTFU:
- utf8_flags = reginfo->is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+ if (reginfo->is_utf8_pat) {
+ utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
+ }
+ /* FALLTHROUGH */
+
+ case EXACTFU_SS:
do_exactf: {
int c1, c2;
#define EXACTFL 39 /* 0x27 Match this string using /il rules (w/len); (string not guaranteed to be folded). */
#define EXACTFU 40 /* 0x28 Match this string using /iu rules (w/len); (string folded iff in UTF-8; non-UTF8 folded length <= unfolded). */
#define EXACTFAA 41 /* 0x29 Match this string using /iaa rules (w/len) (string folded iff in UTF-8; non-UTF8 folded length <= unfolded). */
-#define EXACTFU_SS 42 /* 0x2a Match this string using /iu rules (w/len); (string folded iff in UTF-8; non-UTF8 folded length > unfolded). */
+#define EXACTFU_SS 42 /* 0x2a Match this string using /iu rules (w/len); (string not UTF-8, only portions guaranteed to be folded; folded length > unfolded). */
#define EXACTFLU8 43 /* 0x2b Like EXACTFU, but use /il, UTF-8, folded, and everything in it is above 255. */
#define EXACTFAA_NO_TRIE 44 /* 0x2c Match this string using /iaa rules (w/len) (string not UTF-8, not guaranteed to be folded, not currently trie-able). */
#define EXACT_ONLY8 45 /* 0x2d Like EXACT, but only UTF-8 encoded targets can match */