regexec.c: Latin1 chars can fold match UTF8_ALL

author Karl Williamson <public@khwilliamson.com>

Sat, 27 Nov 2010 17:26:01 +0000 (10:26 -0700)

committer Father Chrysostomos <sprout@cpan.org>

Sun, 28 Nov 2010 12:49:14 +0000 (04:49 -0800)
author Karl Williamson <public@khwilliamson.com>
Sat, 27 Nov 2010 17:26:01 +0000 (10:26 -0700)
committer Father Chrysostomos <sprout@cpan.org>
Sun, 28 Nov 2010 12:49:14 +0000 (04:49 -0800)
diff --git a/handy.h b/handy.h

index 391156a..216d0ea 100644 (file)
--- a/handy.h
+++ b/handy.h
@@ -609,7 +609,7 @@ patched there.  The file as of this writing is cpan/Devel-PPPort/parts/inc/misc
  #   define isUPPER_A(c)  cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_UPPER_A))
  #   define isWORDCHAR_A(c) cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_WORDCHAR_A))
  #   define isXDIGIT_A(c)  cBOOL(FITS_IN_8_BITS(c) && (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_XDIGIT_A))
-#   define _NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_NONLATIN1_FOLD))
+#   define _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c) ((! cBOOL(FITS_IN_8_BITS(c))) || (PL_charclass[(U8) NATIVE_TO_UNI(c)] & _CC_NONLATIN1_FOLD))
  #else   /* No perl.h. */
  #   define isOCTAL_A(c)  ((c) >= '0' && (c) <= '9')
  #   ifdef EBCDIC
diff --git a/regcomp.c b/regcomp.c

index 07834a0..23824ac 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -8184,7 +8184,7 @@ S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8
      if (UNI_SEMANTICS && value == LATIN_SMALL_LETTER_SHARP_S) {
         ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
      }
-    else if (_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C(value)
+    else if (_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(value)
              || (! UNI_SEMANTICS
                   && ! isASCII(value)
                   && PL_fold_latin1[value] != value))
diff --git a/regexec.c b/regexec.c

index 375d4fd..874dce3 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -6300,11 +6300,17 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
      /* If the bitmap didn't (or couldn't) match, and something outside the
       * bitmap could match, try that */
      if (!match) {
-       if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
-           match = TRUE;
+       if (utf8_target && (flags & ANYOF_UNICODE_ALL)) {
+           if (c >= 256
+               || ((flags & ANYOF_FOLD) /* Latin1 1 that has a non-Latin1 fold
+                                           should match */
+                   && _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c)))
+           {
+               match = TRUE;
+           }
         }
-       else if ((flags & ANYOF_NONBITMAP_NON_UTF8)
-                || (utf8_target && flags & ANYOF_UTF8))
+       if (!match && ((flags & ANYOF_NONBITMAP_NON_UTF8)
+                      || (utf8_target && flags & ANYOF_UTF8)))
         {
             AV *av;
             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
author	Karl Williamson <public@khwilliamson.com>
	Sat, 27 Nov 2010 17:26:01 +0000 (10:26 -0700)
committer	Father Chrysostomos <sprout@cpan.org>
	Sun, 28 Nov 2010 12:49:14 +0000 (04:49 -0800)
handy.h		patch \| blob \| blame \| history
regcomp.c		patch \| blob \| blame \| history
regexec.c		patch \| blob \| blame \| history