Avoid needless use of UTF8=1 format [RT#56336]
authorEric Brine <ikegami@adaelis.com>
Sun, 22 Aug 2010 05:17:26 +0000 (22:17 -0700)
committerRafael Garcia-Suarez <rgs@consttype.org>
Mon, 30 Aug 2010 22:19:35 +0000 (00:19 +0200)
Some literals (e.g. q'abc') don't set the UTF8 flag for pure ASCII literals.
Others (e.g. -abc) do. This should be consistent.

lib/utf8.t
toke.c

index 7f8481c..0bd06f9 100644 (file)
@@ -458,7 +458,6 @@ SKIP: {
    ok( !utf8::is_utf8( 'asd'         ), "Wasteful format - q{}" );
    ok( !utf8::is_utf8( qw(asd)       ), "Wasteful format - qw{}" );
    ok( !utf8::is_utf8( (asd => 1)[0] ), "Wasteful format - =>" );
-   local $TODO = 'Avoid needless use of UTF8=1 format [RT#56336]';
    ok( !utf8::is_utf8( asd           ), "Wasteful format - bareword" );
    ok( !utf8::is_utf8( -asd          ), "Wasteful format - -word" );
    ok( !utf8::is_utf8( asd::         ), "Wasteful format - word::" );
diff --git a/toke.c b/toke.c
index b8eb11a..42f0103 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -6290,16 +6290,15 @@ Perl_yylex(pTHX)
 
                /* if we saw a global override before, get the right name */
 
+               sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
+                   len ? len : strlen(PL_tokenbuf));
                if (gvp) {
+                   SV * const tmp_sv = sv;
                    sv = newSVpvs("CORE::GLOBAL::");
-                   sv_catpv(sv,PL_tokenbuf);
-               }
-               else {
-                   /* If len is 0, newSVpv does strlen(), which is correct.
-                      If len is non-zero, then it will be the true length,
-                      and so the scalar will be created correctly.  */
-                   sv = newSVpv(PL_tokenbuf,len);
+                   sv_catsv(sv, tmp_sv);
+                   SvREFCNT_dec(tmp_sv);
                }
+
 #ifdef PERL_MAD
                if (PL_madskills && !PL_thistoken) {
                    char *start = SvPVX(PL_linestr) + PL_realtokenstart;
@@ -6309,17 +6308,11 @@ Perl_yylex(pTHX)
 #endif
 
                /* Presume this is going to be a bareword of some sort. */
-
                CLINE;
                pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
                pl_yylval.opval->op_private = OPpCONST_BARE;
-               /* UTF-8 package name? */
-               if (UTF && !IN_BYTES &&
-                   is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
-                   SvUTF8_on(sv);
 
                /* And if "Foo::", then that's what it certainly is. */
-
                if (len)
                    goto safe_bareword;