+ else if /* Below is not a UTF-8 pattern; there's a somewhat different
+ set of problematic characters */
+ ((multi_fold_from
+ = what_MULTI_CHAR_FOLD_latin1_safe(pat, pat + pat_len)))
+ {
+ /* We may have to canonicalize a multi-char fold, as in the UTF-8
+ * case */
+ _to_uni_fold_flags(multi_fold_from, mod_pat, &pat_len,
+ FOLD_FLAGS_FULL);
+ pat = mod_pat;
+ }
+ else if (UNLIKELY(*pat == LATIN_SMALL_LETTER_SHARP_S)) {
+ mod_pat[0] = mod_pat[1] = 's';
+ pat_len = 2;
+ utf8_pat = utf8_target; /* UTF-8ness immaterial for invariant
+ chars, and speeds copying */
+ pat = mod_pat;
+ }
+ else if (LIKELY(op != TURKISH || *pat != 'I')) {
+ mod_pat[0] = toLOWER_L1(*pat);
+ pat_len = 1;
+ pat = mod_pat;
+ }
+ }
+ else if /* Below isn't a node that we convert to UTF-8 */
+ ( utf8_target
+ && ! utf8_pat
+ && op == EXACTFAA_NO_TRIE
+ && *pat == LATIN_SMALL_LETTER_SHARP_S)
+ {
+ /* A very special case. Folding U+DF goes to U+17F under /iaa. We
+ * did this at compile time when the pattern was UTF-8 , but otherwise
+ * we couldn't do it earlier, because it requires a UTF-8 target for
+ * this match to be legal. */
+ pat_len = 2 * (sizeof(LATIN_SMALL_LETTER_LONG_S_UTF8) - 1);
+ Copy(LATIN_SMALL_LETTER_LONG_S_UTF8
+ LATIN_SMALL_LETTER_LONG_S_UTF8, mod_pat, pat_len, U8);
+ pat = mod_pat;
+ utf8_pat = TRUE;
+ }
+
+ /* Here, we have taken care of the initial work for a few very problematic
+ * situations, possibly creating a modified pattern.
+ *
+ * Now ready for the general case. We build up all the possible things
+ * that could match the first character of the pattern into the elements of
+ * 'matches[]'
+ *
+ * Everything generally matches at least itself. But if there is a
+ * UTF8ness mismatch, we have to convert to that of the target string. */
+ if (UTF8_IS_INVARIANT(*pat)) { /* Immaterial if either is in UTF-8 */
+ matches[0][0] = pat[0];
+ lengths[0] = 1;
+ m->count++;
+ }
+ else if (utf8_target) {
+ if (utf8_pat) {
+ lengths[0] = UTF8SKIP(pat);
+ Copy(pat, matches[0], lengths[0], U8);
+ m->count++;
+ }
+ else { /* target is UTF-8, pattern isn't */
+ matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]);
+ matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]);
+ lengths[0] = 2;
+ m->count++;
+ }
+ }
+ else if (! utf8_pat) { /* Neither is UTF-8 */
+ matches[0][0] = pat[0];
+ lengths[0] = 1;
+ m->count++;
+ }
+ else /* target isn't UTF-8; pattern is. No match possible unless the
+ pattern's first character can fit in a byte */
+ if (UTF8_IS_DOWNGRADEABLE_START(*pat))
+ {
+ matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]);
+ lengths[0] = 1;
+ m->count++;
+ }
+
+ /* Here we have taken care of any necessary node-type changes */
+
+ if (m->count) {
+ m->max_length = lengths[0];
+ m->min_length = lengths[0];
+ }
+
+ /* For non-folding nodes, there are no other possible candidate matches,
+ * but for foldable ones, we have to look further. */
+ if (UNLIKELY(op == TURKISH) || isEXACTFish(op)) { /* A folding node */
+ UV folded; /* The first character in the pattern, folded */
+ U32 first_fold_from; /* A character that folds to it */
+ const U32 * remaining_fold_froms; /* The remaining characters that
+ fold to it, if any */
+ Size_t folds_to_count; /* The total number of characters that fold to
+ 'folded' */
+
+ /* If the node begins with a sequence of more than one character that
+ * together form the fold of a single character, it is called a
+ * 'multi-character fold', and the normal functions don't handle this
+ * case. We set 'multi_fold_from' to the single folded-from character,
+ * which is handled in an extra iteration below */
+ if (utf8_pat) {
+ folded = valid_utf8_to_uvchr(pat, NULL);
+ multi_fold_from
+ = what_MULTI_CHAR_FOLD_utf8_safe(pat, pat + pat_len);
+ }
+ else {
+ folded = *pat;
+
+ /* This may generate illegal combinations for things like EXACTF,
+ * but rather than repeat the logic and exclude them here, all such
+ * illegalities are checked for and skipped below in the loop */
+ multi_fold_from
+ = what_MULTI_CHAR_FOLD_latin1_safe(pat, pat + pat_len);
+ }
+
+ /* Everything matches at least itself; initialize to that because the
+ * only the branches below that set it are the ones where the number
+ * isn't 1. */
+ folds_to_count = 1;