/* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
we don't need this definition. */
#define IS_TEXT(rn) ( OP(rn)==EXACT || OP(rn)==REF || OP(rn)==NREF )
-#define IS_TEXTF(rn) ( (OP(rn)==EXACTFU || OP(rn)==EXACTFA || OP(rn)==EXACTF) || OP(rn)==REFF || OP(rn)==NREFF )
+#define IS_TEXTF(rn) ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFU_NO_TRIE || OP(rn)==EXACTFA || OP(rn)==EXACTF || OP(rn)==REFF || OP(rn)==NREFF )
#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
#else
/* ... so we use this as its faster. */
#define IS_TEXT(rn) ( OP(rn)==EXACT )
-#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU || OP(rn) == EXACTFA)
+#define IS_TEXTFU(rn) ( OP(rn)==EXACTFU || OP(rn)==EXACTFU_SS || OP(rn)==EXACTFU_NO_TRIE || OP(rn) == EXACTFA)
#define IS_TEXTF(rn) ( OP(rn)==EXACTF )
#define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
folder = foldEQ_locale;
goto do_exactf_non_utf8;
+ case EXACTFU_SS:
+ if (UTF_PATTERN) {
+ utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
+ }
+ goto do_exactf_utf8;
+
+ case EXACTFU_NO_TRIE:
case EXACTFU:
if (UTF_PATTERN || utf8_target) {
utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
/*XXX: The s-- is almost definitely wrong here under unicode - demeprhq*/
s--;
}
- /* We can use a more efficient search as newlines are the same in unicode as they are in latin */
- while (s < end) {
+ /* We can use a more efficient search as newlines are the same in unicode as they are in latin */
+ while (s <= end) { /* note it could be possible to match at the end of the string */
if (*s++ == '\n') { /* don't need PL_utf8skip here */
if (regtry(®info, &s))
goto got_it;
fold_utf8_flags = FOLDEQ_UTF8_LOCALE;
goto do_exactf;
+ case EXACTFU_SS:
+ case EXACTFU_NO_TRIE:
case EXACTFU:
folder = foldEQ_latin1;
fold_array = PL_fold_latin1;
s = STRING(scan);
ln = STR_LEN(scan);
- if (utf8_target || UTF_PATTERN) {
- /* Either target or the pattern are utf8. */
+ if (utf8_target || UTF_PATTERN || state_num == EXACTFU_SS) {
+ /* Either target or the pattern are utf8, or has the issue where
+ * the fold lengths may differ. */
const char * const l = locinput;
char *e = PL_regeol;
switch (OP(text_node)) {
case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
case EXACTFA:
+ case EXACTFU_SS:
+ case EXACTFU_NO_TRIE:
case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
default: ST.c2 = ST.c1;
switch (OP(text_node)) {
case EXACTF: ST.c2 = PL_fold[ST.c1]; break;
case EXACTFA:
+ case EXACTFU_SS:
+ case EXACTFU_NO_TRIE:
case EXACTFU: ST.c2 = PL_fold_latin1[ST.c1]; break;
case EXACTFL: ST.c2 = PL_fold_locale[ST.c1]; break;
default: ST.c2 = ST.c1; break;
sayNO;
/* NOTREACHED */
#undef ST
- case FOLDCHAR:
- n = ARG(scan);
- if ( n == (U32)what_len_TRICKYFOLD(locinput,utf8_target,ln) ) {
- locinput += ln;
- } else if ( LATIN_SMALL_LETTER_SHARP_S == n && !utf8_target && !UTF_PATTERN ) {
- sayNO;
- } else {
- U8 folded[UTF8_MAXBYTES_CASE+1];
- STRLEN foldlen;
- const char * const l = locinput;
- char *e = PL_regeol;
- to_uni_fold(n, folded, &foldlen);
-
- if (! foldEQ_utf8((const char*) folded, 0, foldlen, 1,
- l, &e, 0, utf8_target)) {
- sayNO;
- }
- locinput = e;
- }
- nextchr = UCHARAT(locinput);
- break;
case LNBREAK:
if ((n=is_LNBREAK(locinput,utf8_target))) {
locinput += n;
utf8_flags = 0;
goto do_exactf;
+ case EXACTFU_SS:
+ case EXACTFU_NO_TRIE:
case EXACTFU:
utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
c = (U8)*STRING(p);
assert(! UTF_PATTERN || UNI_IS_INVARIANT(c));
- if (utf8_target) { /* Use full Unicode fold matching */
+ if (utf8_target || OP(p) == EXACTFU_SS) { /* Use full Unicode fold matching */
char *tmpeol = loceol;
while (hardcount < max
&& foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
switch (OP(p)) {
case EXACTF: folded = PL_fold[c]; break;
case EXACTFA:
+ case EXACTFU_NO_TRIE:
case EXACTFU: folded = PL_fold_latin1[c]; break;
case EXACTFL: folded = PL_fold_locale[c]; break;
default: Perl_croak(aTHX_ "panic: Unexpected op %u", OP(p));
(ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_ALPHA) && isALPHA_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NALPHA) && !isALPHA_LC(c)) ||
- (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII(c)) ||
- (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII(c)) ||
+ (ANYOF_CLASS_TEST(n, ANYOF_ASCII) && isASCII_LC(c)) ||
+ (ANYOF_CLASS_TEST(n, ANYOF_NASCII) && !isASCII_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_CNTRL) && isCNTRL_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NCNTRL) && !isCNTRL_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_GRAPH) && isGRAPH_LC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_PSXSPC) && isPSXSPC(c)) ||
(ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c)) ||
- (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK(c)) ||
- (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK(c))
+ (ANYOF_CLASS_TEST(n, ANYOF_BLANK) && isBLANK_LC(c)) ||
+ (ANYOF_CLASS_TEST(n, ANYOF_NBLANK) && !isBLANK_LC(c))
) /* How's that for a conditional? */
) {
match = TRUE;