* this file makes sure that in EXACTFU nodes, the sharp s gets folded to
* 'ss', even if the pattern isn't UTF-8. This avoids the issues
* described in the next item.
- * 4) A problem remains for the sharp s in EXACTF and EXACTFA nodes when the
+ * 3) A problem remains for the sharp s in EXACTF and EXACTFA nodes when the
* pattern isn't in UTF-8. (BTW, there cannot be an EXACTF node with a
* UTF-8 pattern.) An assumption that the optimizer part of regexec.c
* (probably unwittingly, in Perl_regexec_flags()) makes is that a
* but in a non-UTF8 pattern, folding it to that above-Latin1 string would
* require the pattern to be forced into UTF-8, the overhead of which we
* want to avoid.)
- */
+ *
+ * Similarly, the code that generates tries doesn't currently handle
+ * not-already-folded multi-char folds, and it looks like a pain to change
+ * that. Therefore, trie generation of EXACTFA nodes with the sharp s
+ * doesn't work. Instead, such an EXACTFA is turned into a new regnode,
+ * EXACTFA_NO_TRIE, which the trie code knows not to handle. Most people
+ * using /iaa matching will be doing so almost entirely with ASCII
+ * strings, so this should rarely be encountered in practice */
#define JOIN_EXACT(scan,min_subtract,has_exactf_sharp_s, flags) \
if (PL_regkind[OP(scan)] == EXACT) \
}
/* Nodes with 'ss' require special handling, except for EXACTFL
- * and EXACTFA for which there is no multi-char fold to this */
+ * and EXACTFA-ish for which there is no multi-char fold to
+ * this */
if (len == 2 && *s == 's' && *(s+1) == 's'
- && OP(scan) != EXACTFL && OP(scan) != EXACTFA)
+ && OP(scan) != EXACTFL
+ && OP(scan) != EXACTFA
+ && OP(scan) != EXACTFA_NO_TRIE)
{
count = 2;
OP(scan) = EXACTFU_SS;
* test for them. The code that generates the
* is_MULTI_foo() macros croaks should one actually get put
* into Unicode .) */
- if (OP(scan) != EXACTFL && OP(scan) != EXACTFA) {
+ if (OP(scan) != EXACTFL
+ && OP(scan) != EXACTFA
+ && OP(scan) != EXACTFA_NO_TRIE)
+ {
count = utf8_length(s, multi_end);
s = multi_end;
}
/* Non-UTF-8 pattern, EXACTFA node. There can't be a multi-char
* fold to the ASCII range (and there are no existing ones in the
* upper latin1 range). But, as outlined in the comments preceding
- * this function, we need to flag any occurrences of the sharp s */
+ * this function, we need to flag any occurrences of the sharp s.
+ * This character forbids trie formation (because of added
+ * complexity) */
while (s < s_end) {
if (*s == LATIN_SMALL_LETTER_SHARP_S) {
+ OP(scan) = EXACTFA_NO_TRIE;
*has_exactf_sharp_s = TRUE;
break;
}
EXACT | EXACT
EXACTFU | EXACTFU
EXACTFU_SS | EXACTFU
- EXACTFA | 0
+ EXACTFA | EXACTFA
*/
#define TRIE_TYPE(X) ( ( NOTHING == (X) ) ? NOTHING : \
( EXACT == (X) ) ? EXACT : \
( EXACTFU == (X) || EXACTFU_SS == (X) ) ? EXACTFU : \
+ ( EXACTFA == (X) ) ? EXACTFA : \
0 )
/* dont use tail as the end marker for this traverse */
/* All other (EXACTFL handled above) folds except under
* /iaa that include s, S, and sharp_s also may include
* the others */
- if (OP(scan) != EXACTFA) {
+ if (OP(scan) != EXACTFA && OP(scan) != EXACTFA_NO_TRIE)
+ {
if (uc == 's' || uc == 'S') {
ANYOF_BITMAP_SET(data->start_class,
LATIN_SMALL_LETTER_SHARP_S);
/* All folds except under /iaa that include s, S,
* and sharp_s also may include the others */
- if (OP(scan) != EXACTFA) {
+ if (OP(scan) != EXACTFA
+ && OP(scan) != EXACTFA_NO_TRIE)
+ {
if (uc == 's' || uc == 'S') {
ANYOF_BITMAP_SET(data->start_class,
LATIN_SMALL_LETTER_SHARP_S);
switch (OP(scan)) {
case EXACT:
case EXACTF:
+ case EXACTFA_NO_TRIE:
case EXACTFA:
case EXACTFU:
case EXACTFU_SS: