X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/4f03b4b68c373d6b483f2a44808498ec2c2bf9f7..75dcb4fc63cd34de1327827601b8cabf0e7a562e:/regcomp.c

diff --git a/regcomp.c b/regcomp.c
index ba48a76..70e9e2f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1387,8 +1387,8 @@ is the recommended Unicode-aware way of saying
 	       scan += len;                                                   \
 	       len = 0;                                                       \
 	    } else {                                                          \
-		uvc = utf8n_to_uvuni( (const U8*)uc, UTF8_MAXLEN, &len, uniflags);\
-		uvc = to_uni_fold( uvc, foldbuf, &foldlen );                  \
+		len = UTF8SKIP(uc);\
+		uvc = to_utf8_fold( uc, foldbuf, &foldlen);                   \
 		foldlen -= UNISKIP( uvc );                                    \
 		scan = foldbuf + UNISKIP( uvc );                              \
 	    }                                                                 \
@@ -4523,7 +4523,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     struct regexp *r;
     register regexp_internal *ri;
     STRLEN plen;
-    char  *exp;
+    char* VOL exp;
     char* xend;
     regnode *scan;
     I32 flags;
@@ -4553,7 +4553,14 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
 
     DEBUG_r(if (!PL_colorset) reginitcolors());
 
-    RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+    exp = SvPV(pattern, plen);
+
+    if (plen == 0) { /* ignore the utf8ness if the pattern is 0 length */
+	RExC_utf8 = RExC_orig_utf8 = 0;
+    }
+    else {
+	RExC_utf8 = RExC_orig_utf8 = SvUTF8(pattern);
+    }
     RExC_uni_semantics = 0;
     RExC_contains_locale = 0;
 
@@ -4565,12 +4572,7 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
     }
 
     if (jump_ret == 0) {    /* First time through */
-	exp = SvPV(pattern, plen);
 	xend = exp + plen;
-	/* ignore the utf8ness if the pattern is 0 length */
-	if (plen == 0) {
-	    RExC_utf8 = RExC_orig_utf8 = 0;
-	}
 
         DEBUG_COMPILE_r({
             SV *dsv= sv_newmortal();
@@ -4602,7 +4604,9 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
         -- dmq */
         DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
 	    "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
-        exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pattern, plen), &len);
+        exp = (char*)Perl_bytes_to_utf8(aTHX_
+				        (U8*)SvPV_nomg(pattern, plen),
+					&len);
         xend = exp + len;
         RExC_orig_utf8 = RExC_utf8 = 1;
         SAVEFREEPV(exp);
@@ -10450,7 +10454,11 @@ parseit:
 		if (! PL_utf8_tofold) {
 		    U8 dummy[UTF8_MAXBYTES+1];
 		    STRLEN dummy_len;
-		    to_utf8_fold((U8*) "A", dummy, &dummy_len);
+
+		    /* This particular string is above \xff in both UTF-8 and
+		     * UTFEBCDIC */
+		    to_utf8_fold((U8*) "\xC8\x80", dummy, &dummy_len);
+		    assert(PL_utf8_tofold); /* Verify that worked */
 		}
 		PL_utf8_foldclosures = _swash_inversion_hash(PL_utf8_tofold);
 	    }