From: David Mitchell Date: Fri, 7 Sep 2012 12:32:11 +0000 (+0100) Subject: fix a bug in handling $+[0] and unicode X-Git-Tag: v5.17.4~130^2 X-Git-Url: https://perl5.git.perl.org/perl5.git/commitdiff_plain/3de645a82921698b4886d748e3a5a5ed98752f42 fix a bug in handling $+[0] and unicode The code to decide what substring of a pattern target to copy for the sake of $1, $& etc, would, in the absence of $&, only copy the minimum range needed to cover $1,$2,...., which might be a shorter range than what $& covers. This is fine most of the time, but, when calculating $+[0] on a unicode string, it needs a copy of the whole part of the string covered by $&, since it needs to convert the byte offest into a char offset. So to fix this, always copy as a minimum, the $& range. I suppose we could be more clever about this: detect the presence of @+ in the code, only do it for UTF8 etc; but this is simple and non-fragile. --- diff --git a/regexec.c b/regexec.c index fa69a50..2dc2314 100644 --- a/regexec.c +++ b/regexec.c @@ -2592,7 +2592,7 @@ got_it: && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */ && !(PL_sawampersand & SAWAMPERSAND_RIGHT) ) { /* don't copy $' part of string */ - U32 n = (PL_sawampersand & SAWAMPERSAND_MIDDLE) ? 0 : 1; + U32 n = 0; max = -1; /* calculate the right-most part of the string covered * by a capture. Due to look-ahead, this may be to @@ -2613,7 +2613,7 @@ got_it: && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY) /* //p */ && !(PL_sawampersand & SAWAMPERSAND_LEFT) ) { /* don't copy $` part of string */ - U32 n = (PL_sawampersand & SAWAMPERSAND_MIDDLE) ? 0 : 1; + U32 n = 0; min = max; /* calculate the left-most part of the string covered * by a capture. Due to look-behind, this may be to diff --git a/t/re/re_tests b/t/re/re_tests index 94b7a38..1aebbe6 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -163,6 +163,7 @@ ab|cd abcd y $& ab ()ef def y $&-$1 ef- ()ef def y $-[0] 1 ()ef def y $+[0] 3 +()\x{100}\x{1000} d\x{100}\x{1000} y $+[0] 3 ()ef def y $-[1] 1 ()ef def y $+[1] 1 *a - c - Quantifier follows nothing