X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/3b7535215021402ddc215d2cc374a23a8b33d86d..9826f54371c5a7143a609971a843ee096e036fbf:/regcomp.c diff --git a/regcomp.c b/regcomp.c index 852906a..7f0e582 100644 --- a/regcomp.c +++ b/regcomp.c @@ -710,6 +710,7 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *c cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL; if (LOC) cl->flags |= ANYOF_LOCALE; + cl->flags |= ANYOF_FOLD; } /* Can match anything (initialization) */ @@ -779,18 +780,21 @@ S_cl_and(struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_EOS)) cl->flags &= ~ANYOF_EOS; - if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE && + if (!(and_with->flags & ANYOF_FOLD)) + cl->flags &= ~ANYOF_FOLD; + + if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP && !(and_with->flags & ANYOF_INVERT)) { cl->flags &= ~ANYOF_UNICODE_ALL; - cl->flags |= ANYOF_UNICODE; + cl->flags |= ANYOF_NONBITMAP; ARG_SET(cl, ARG(and_with)); } if (!(and_with->flags & ANYOF_UNICODE_ALL) && !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE_ALL; - if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) && + if (!(and_with->flags & (ANYOF_NONBITMAP|ANYOF_UNICODE_ALL)) && !(and_with->flags & ANYOF_INVERT)) - cl->flags &= ~ANYOF_UNICODE; + cl->flags &= ~ANYOF_NONBITMAP; } /* 'OR' a given class with another one. Can create false positives */ @@ -844,14 +848,21 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con if (or_with->flags & ANYOF_EOS) cl->flags |= ANYOF_EOS; - if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE && + if (or_with->flags & ANYOF_FOLD) + cl->flags |= ANYOF_FOLD; + + /* If both nodes match something outside the bitmap, but what they match + * outside is not the same pointer, and hence not easily compared, give up + * and allow the start class to match everything outside the bitmap */ + if (cl->flags & ANYOF_NONBITMAP && or_with->flags & ANYOF_NONBITMAP && ARG(cl) != ARG(or_with)) { cl->flags |= ANYOF_UNICODE_ALL; - cl->flags &= ~ANYOF_UNICODE; + cl->flags &= ~ANYOF_NONBITMAP; } + if (or_with->flags & ANYOF_UNICODE_ALL) { cl->flags |= ANYOF_UNICODE_ALL; - cl->flags &= ~ANYOF_UNICODE; + cl->flags &= ~ANYOF_NONBITMAP; } } @@ -3565,7 +3576,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, goto do_default; if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */ value = (ANYOF_BITMAP_TEST(data->start_class,'\n') - || (data->start_class->flags & ANYOF_CLASS)); + || ((data->start_class->flags & ANYOF_CLASS) + && ANYOF_CLASS_TEST_ANY_SET(data->start_class))); cl_anything(pRExC_state, data->start_class); } if (flags & SCF_DO_STCLASS_AND || !value) @@ -8309,7 +8321,7 @@ parseit: (value=='p' ? '+' : '!'), (int)n, RExC_parse); } RExC_parse = e + 1; - ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP; namedclass = ANYOF_MAX; /* no official name, but it's named */ } break; @@ -8415,7 +8427,7 @@ parseit: RExC_emit += ANYOF_CLASS_ADD_SKIP; ANYOF_CLASS_ZERO(ret); } - ANYOF_FLAGS(ret) |= ANYOF_CLASS|ANYOF_LARGE; + ANYOF_FLAGS(ret) |= ANYOF_CLASS; } /* a bad range like a-\d, a-[:digit:] ? */ @@ -8433,7 +8445,7 @@ parseit: ANYOF_BITMAP_SET(ret, '-'); } else { - ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP; Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-'); } @@ -8623,7 +8635,7 @@ parseit: const UV prevnatvalue = NATIVE_TO_UNI(prevvalue); const UV natvalue = NATIVE_TO_UNI(value); stored+=2; /* can't optimize this class */ - ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP; if (prevnatvalue < natvalue) { /* what about > ? */ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", prevnatvalue, natvalue); @@ -9512,7 +9524,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags); /* output any special charclass tests (used mostly under use locale) */ - if (o->flags & ANYOF_CLASS) + if (o->flags & ANYOF_CLASS && ANYOF_CLASS_TEST_ANY_SET(o)) for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++) if (ANYOF_CLASS_TEST(o,i)) { sv_catpv(sv, anyofs[i]); @@ -9522,7 +9534,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags); /* output information about the unicode matching */ - if (flags & ANYOF_UNICODE) + if (flags & ANYOF_NONBITMAP) sv_catpvs(sv, "{unicode}"); else if (flags & ANYOF_UNICODE_ALL) sv_catpvs(sv, "{unicode_all}"); @@ -10380,7 +10392,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, } else if (op == ANYOF) { /* arglen 1 + class block */ - node += 1 + ((ANYOF_FLAGS(node) & ANYOF_LARGE) + node += 1 + ((ANYOF_FLAGS(node) & ANYOF_CLASS) ? ANYOF_CLASS_SKIP : ANYOF_SKIP); node = NEXTOPER(node); }