From a197cbdd7e9359ac8bb532cbf8b4f45b621c0781 Mon Sep 17 00:00:00 2001 From: Gurusamy Sarathy Date: Sat, 18 Mar 2000 03:26:57 +0000 Subject: [PATCH] make return values from match in a list context, as well as $& et al propagate utf8-ness (from Graham Barr) p4raw-id: //depot/perl@5794 --- mg.c | 26 +++++++-- pp_hot.c | 4 ++ t/pragma/utf8.t | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 193 insertions(+), 6 deletions(-) diff --git a/mg.c b/mg.c index fbc74c4..27039fa 100644 --- a/mg.c +++ b/mg.c @@ -386,12 +386,12 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) register I32 paren; register I32 i; register REGEXP *rx; + I32 s1, t1; switch (*mg->mg_ptr) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '&': if (PL_curpm && (rx = PL_curpm->op_pmregexp)) { - I32 s1, t1; paren = atoi(mg->mg_ptr); getparen: @@ -400,6 +400,16 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) (t1 = rx->endp[paren]) != -1) { i = t1 - s1; + getlen: + if (i > 0 && (PL_curpm->op_pmdynflags & PMdf_UTF8) && !IN_BYTE) { + char *s = rx->subbeg + s1; + char *send = rx->subbeg + t1; + i = 0; + while (s < send) { + s += UTF8SKIP(s); + i++; + } + } if (i >= 0) return i; } @@ -416,8 +426,11 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) if (PL_curpm && (rx = PL_curpm->op_pmregexp)) { if (rx->startp[0] != -1) { i = rx->startp[0]; - if (i >= 0) - return i; + if (i > 0) { + s1 = 0; + t1 = i; + goto getlen; + } } } return 0; @@ -425,8 +438,11 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg) if (PL_curpm && (rx = PL_curpm->op_pmregexp)) { if (rx->endp[0] != -1) { i = rx->sublen - rx->endp[0]; - if (i >= 0) - return i; + if (i > 0) { + s1 = rx->endp[0]; + t1 = rx->sublen; + goto getlen; + } } } return 0; diff --git a/pp_hot.c b/pp_hot.c index 237bb01..c888ea5 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1057,6 +1057,10 @@ play_it_again: len = rx->endp[i] - rx->startp[i]; s = rx->startp[i] + truebase; sv_setpvn(*SP, s, len); + if ((pm->op_pmdynflags & PMdf_UTF8) && !IN_BYTE) { + SvUTF8_on(*SP); + sv_utf8_downgrade(*SP, TRUE); + } } } if (global) { diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t index 2ae8d9c..0e55a67 100755 --- a/t/pragma/utf8.t +++ b/t/pragma/utf8.t @@ -10,7 +10,7 @@ BEGIN { } } -print "1..12\n"; +print "1..60\n"; my $test = 1; @@ -84,3 +84,170 @@ sub ok { ok $1, '123alpha'; $test++; } +{ + use utf8; + + $_ = "\x{263A}>\x{263A}\x{263A}"; + + ok length, 4; + $test++; + + ok length((m/>(.)/)[0]), 1; + $test++; + + ok length($&), 2; + $test++; + + ok length($'), 1; + $test++; + + ok length($`), 1; + $test++; + + ok length($1), 1; + $test++; + + ok length($tmp=$&), 2; + $test++; + + ok length($tmp=$'), 1; + $test++; + + ok length($tmp=$`), 1; + $test++; + + ok length($tmp=$1), 1; + $test++; + + ok $&, pack("C*", ord(">"), 0342, 0230, 0272); + $test++; + + ok $', pack("C*", 0342, 0230, 0272); + $test++; + + ok $`, pack("C*", 0342, 0230, 0272); + $test++; + + ok $1, pack("C*", 0342, 0230, 0272); + $test++; + + { + use bytes; + no utf8; + + ok length, 10; + $test++; + + ok length((m/>(.)/)[0]), 1; + $test++; + + ok length($&), 2; + $test++; + + ok length($'), 5; + $test++; + + ok length($`), 3; + $test++; + + ok length($1), 1; + $test++; + + ok $&, pack("C*", ord(">"), 0342); + $test++; + + ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); + $test++; + + ok $`, pack("C*", 0342, 0230, 0272); + $test++; + + ok $1, pack("C*", 0342); + $test++; + + } + + + { + no utf8; + $_="\342\230\272>\342\230\272\342\230\272"; + } + + ok length, 10; + $test++; + + ok length((m/>(.)/)[0]), 1; + $test++; + + ok length($&), 2; + $test++; + + ok length($'), 1; + $test++; + + ok length($`), 1; + $test++; + + ok length($1), 1; + $test++; + + ok length($tmp=$&), 2; + $test++; + + ok length($tmp=$'), 1; + $test++; + + ok length($tmp=$`), 1; + $test++; + + ok length($tmp=$1), 1; + $test++; + + ok $&, pack("C*", ord(">"), 0342, 0230, 0272); + $test++; + + ok $', pack("C*", 0342, 0230, 0272); + $test++; + + ok $`, pack("C*", 0342, 0230, 0272); + $test++; + + ok $1, pack("C*", 0342, 0230, 0272); + $test++; + + { + use bytes; + no utf8; + + ok length, 10; + $test++; + + ok length((m/>(.)/)[0]), 1; + $test++; + + ok length($&), 2; + $test++; + + ok length($'), 5; + $test++; + + ok length($`), 3; + $test++; + + ok length($1), 1; + $test++; + + ok $&, pack("C*", ord(">"), 0342); + $test++; + + ok $', pack("C*", 0230, 0272, 0342, 0230, 0272); + $test++; + + ok $`, pack("C*", 0342, 0230, 0272); + $test++; + + ok $1, pack("C*", 0342); + $test++; + + } +} -- 1.8.3.1