the bit of code that concats the args into a pattern and at the same time
notes the start and end indices of the text of the code blocks, got it
wrong if the pattern got upgraded half way through concatting. So work out
in advance whether the string is likely to be utf8.
OP *o = NULL;
int n = 0;
+ bool utf8 = 0;
if (pRExC_state->num_code_blocks) {
o = cLISTOPx(expr)->op_first;
pat = newSVpvn("", 0);
SAVEFREESV(pat);
+
+ /* determine if the pattern is going to be utf8 (needed
+ * in advance to align code block indices correctly).
+ * XXX This could fail to be detected for an arg with
+ * overloading but not concat overloading; but the main effect
+ * in this obscure case is to need a 'use re eval' for a
+ * literal code block */
+ for (svp = patternp; svp < patternp + pat_count; svp++) {
+ if (SvUTF8(*svp))
+ utf8 = 1;
+ }
+ if (utf8)
+ SvUTF8_on(pat);
+
for (svp = patternp; svp < patternp + pat_count; svp++) {
SV *sv, *msv = *svp;
bool code = 0;
}
-plan tests => 241; # Update this when adding/deleting tests.
+plan tests => 242; # Update this when adding/deleting tests.
run_tests() unless caller;
# XXX remove this when TODOs are fixed
no warnings qw(uninitialized closure);
+ # if the pattern string gets utf8 upgraded while concatenating,
+ # make sure a literal code block is still detected (by still
+ # compiling in the absence of use re 'eval')
+
+ {
+ my $s1 = "\x{80}";
+ my $s2 = "\x{100}";
+ ok("\x{80}\x{100}" =~ /^$s1(?{1})$s2$/, "utf8 upgrade");
+ }
+
my ($cr1, $cr2, $cr3, $cr4);
use re 'eval';