- /* The third problematic sequence is 'ss', which can match just the
- * single byte LATIN SMALL LETTER SHARP S, and it can do it in both
- * non- and UTF-8. Code elsewhere in this file makes sure, however,
- * that the sharp s gets folded to 'ss' under Unicode rules even if not
- * UTF-8. */
- if (STR_LEN(scan) >= 2
- && (OP(scan) == EXACTFU
- || OP(scan) == EXACTFU_NO_TRIE /* The code above could have
- set to this node type */
- || OP(scan) == EXACTF))
- {
- /* The string will be folded to 'ss' if it's in UTF-8, but it could
- * include capital 'S' instead of lower case when not UTF-8. We
- * could have different code to handle the two cases, but this is
- * not necessary since both S and s are invariants under UTF-8; and
- * not worth it, especially because we can use just one test for
- * either 'S' or 's' each * time through the loop (plus a mask).
- * Ths is because on both EBCDIC and ASCII machines, 'S' and 's'
- * differ by a single bit. On ASCII they are 32 apart; on EBCDIC,
- * they are 64. This uses an exclusive 'or' to find that bit and
- * then inverts it to form a mask, with just a single 0, in the bit
- * position where 'S' and 's' differ. */
- const char S_or_s_mask = ~ ('S' ^ 's');
- const char s_masked = 's' & S_or_s_mask;
-
- for (s = s0; s < s_end - 1; s++) {
- if (((*s & S_or_s_mask) == s_masked)
- && ((*(s+1) & S_or_s_mask) == s_masked))
- {
- s++;
- *min_change -= 1;
-
- /* EXACTFU_SS also isn't trie'able, so don't have to
- * preserve EXACTFU_NO_TRIE. EXACTF is also not trie'able,
- * and because we essentially punt the optimizations in its
- * case, we don't need to indicate that it has an ss */
- if (OP(scan) == EXACTFU || OP(scan) == EXACTFU_NO_TRIE) {
- OP(scan) = EXACTFU_SS;
- }
+ /* Here, the pattern is not UTF-8. We need to look only for the
+ * 'ss' sequence, and in the EXACTF case, the sharp s, which can be
+ * in the final position. Otherwise we can stop looking 1 byte
+ * earlier because have to find both the first and second 's' */
+ const U8* upper = (OP(scan) == EXACTF) ? s_end : s_end -1;
+
+ for (s = s0; s < upper; s++) {
+ switch (*s) {
+ case 'S':
+ case 's':
+ if (s_end - s > 1
+ && ((*(s+1) & S_or_s_mask) == s_masked))
+ {
+ *min_change -= 1;
+
+ /* EXACTF nodes need to know that the minimum
+ * length changed so that a sharp s in the string
+ * can match this ss in the pattern, but they
+ * remain EXACTF nodes, as they are not trie'able,
+ * so don't have to invent a new node type to
+ * exclude them from the trie code */
+ if (OP(scan) != EXACTF) {
+ OP(scan) = EXACTFU_SS;
+ }
+ s++;
+ }
+ break;
+ case LATIN_SMALL_LETTER_SHARP_S:
+ if (OP(scan) == EXACTF) {
+ *has_exactf_sharp_s = TRUE;
+ }
+ break;