This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Concatenate some string constants
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index e27e32c..8832d08 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,7 +1,7 @@
 /*    toke.c
  *
  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- *    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
+ *    2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -75,6 +75,22 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 #define LEX_FORMLINE            1
 #define LEX_KNOWNEXT            0
 
+#ifdef DEBUGGING
+static char* lex_state_names[] = {
+    "KNOWNEXT",
+    "FORMLINE",
+    "INTERPCONST",
+    "INTERPCONCAT",
+    "INTERPENDMAYBE",
+    "INTERPEND",
+    "INTERPSTART",
+    "INTERPPUSH",
+    "INTERPCASEMOD",
+    "INTERPNORMAL",
+    "NORMAL"
+};
+#endif
+
 #ifdef ff_next
 #undef ff_next
 #endif
@@ -116,79 +132,200 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
  * Also see LOP and lop() below.
  */
 
-/* Note that REPORT() and REPORT2() will be expressions that supply
- * their own trailing comma, not suitable for statements as such. */
 #ifdef DEBUGGING /* Serve -DT. */
-#   define REPORT(x,retval) tokereport(x,s,(int)retval),
-#   define REPORT2(x,retval) tokereport(x,s, yylval.ival),
+#   define REPORT(retval) tokereport(s,(int)retval)
 #else
-#   define REPORT(x,retval)
-#   define REPORT2(x,retval)
+#   define REPORT(retval) (retval)
 #endif
 
-#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
-#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
-#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
-#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
-#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
-#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
-#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
-#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
-#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
-#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
-#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
-#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
-#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
-#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
-#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
-#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
-#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
-#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
-#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
-#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
+#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
+#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
+#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
+#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
+#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
+#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
+#define FTST(f)  return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
+#define FUN0(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
+#define FUN1(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
+#define BOop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
+#define BAop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
+#define SHop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
+#define PWop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define PMop(f)  return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
+#define Aop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Mop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Eop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
+#define Rop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 
 /* This bit of chicanery makes a unary function followed by
  * a parenthesis into a function with one argument, highest precedence.
  * The UNIDOR macro is for unary functions that can be followed by the //
  * operator (such as C<shift // 0>).
  */
-#define UNI2(f,x) return(yylval.ival = f, \
-       REPORT("uni",f) \
+#define UNI2(f,x) return ( \
+       yylval.ival = f, \
        PL_expect = x, \
        PL_bufptr = s, \
        PL_last_uni = PL_oldbufptr, \
        PL_last_lop_op = f, \
-       (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+       REPORT( \
+           (*s == '(' || (s = skipspace(s), *s == '(')  \
+           ? (int)FUNC1 : (int)UNIOP)))
 #define UNI(f)    UNI2(f,XTERM)
 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 
-#define UNIBRACK(f) return(yylval.ival = f, \
-        REPORT("uni",f) \
+#define UNIBRACK(f) return ( \
+       yylval.ival = f, \
        PL_bufptr = s, \
        PL_last_uni = PL_oldbufptr, \
-       (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+        REPORT( \
+           (*s == '(' || (s = skipspace(s), *s == '(') \
+       ? (int)FUNC1 : (int)UNIOP)))
 
 /* grandfather return to old style */
 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 
 #ifdef DEBUGGING
 
-STATIC void
-S_tokereport(pTHX_ char *thing, char* s, I32 rv)
+/* how to interpret the yylval associated with the token */
+enum token_type {
+    TOKENTYPE_NONE,
+    TOKENTYPE_IVAL,
+    TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
+    TOKENTYPE_PVAL,
+    TOKENTYPE_OPVAL,
+    TOKENTYPE_GVVAL
+};
+
+static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
 {
-    DEBUG_T({
-        SV* report = newSVpv(thing, 0);
-        Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
-               (IV)rv);
+    { ADDOP,           TOKENTYPE_OPNUM,        "ADDOP" },
+    { ANDAND,          TOKENTYPE_NONE,         "ANDAND" },
+    { ANDOP,           TOKENTYPE_NONE,         "ANDOP" },
+    { ANONSUB,         TOKENTYPE_IVAL,         "ANONSUB" },
+    { ARROW,           TOKENTYPE_NONE,         "ARROW" },
+    { ASSIGNOP,                TOKENTYPE_OPNUM,        "ASSIGNOP" },
+    { BITANDOP,                TOKENTYPE_OPNUM,        "BITANDOP" },
+    { BITOROP,         TOKENTYPE_OPNUM,        "BITOROP" },
+    { COLONATTR,       TOKENTYPE_NONE,         "COLONATTR" },
+    { CONTINUE,                TOKENTYPE_NONE,         "CONTINUE" },
+    { DO,              TOKENTYPE_NONE,         "DO" },
+    { DOLSHARP,                TOKENTYPE_NONE,         "DOLSHARP" },
+    { DORDOR,          TOKENTYPE_NONE,         "DORDOR" },
+    { DOROP,           TOKENTYPE_OPNUM,        "DOROP" },
+    { DOTDOT,          TOKENTYPE_IVAL,         "DOTDOT" },
+    { ELSE,            TOKENTYPE_NONE,         "ELSE" },
+    { ELSIF,           TOKENTYPE_IVAL,         "ELSIF" },
+    { EQOP,            TOKENTYPE_OPNUM,        "EQOP" },
+    { FOR,             TOKENTYPE_IVAL,         "FOR" },
+    { FORMAT,          TOKENTYPE_NONE,         "FORMAT" },
+    { FUNC,            TOKENTYPE_OPNUM,        "FUNC" },
+    { FUNC0,           TOKENTYPE_OPNUM,        "FUNC0" },
+    { FUNC0SUB,                TOKENTYPE_OPVAL,        "FUNC0SUB" },
+    { FUNC1,           TOKENTYPE_OPNUM,        "FUNC1" },
+    { FUNCMETH,                TOKENTYPE_OPVAL,        "FUNCMETH" },
+    { HASHBRACK,       TOKENTYPE_NONE,         "HASHBRACK" },
+    { IF,              TOKENTYPE_IVAL,         "IF" },
+    { LABEL,           TOKENTYPE_PVAL,         "LABEL" },
+    { LOCAL,           TOKENTYPE_IVAL,         "LOCAL" },
+    { LOOPEX,          TOKENTYPE_OPNUM,        "LOOPEX" },
+    { LSTOP,           TOKENTYPE_OPNUM,        "LSTOP" },
+    { LSTOPSUB,                TOKENTYPE_OPVAL,        "LSTOPSUB" },
+    { MATCHOP,         TOKENTYPE_OPNUM,        "MATCHOP" },
+    { METHOD,          TOKENTYPE_OPVAL,        "METHOD" },
+    { MULOP,           TOKENTYPE_OPNUM,        "MULOP" },
+    { MY,              TOKENTYPE_IVAL,         "MY" },
+    { MYSUB,           TOKENTYPE_NONE,         "MYSUB" },
+    { NOAMP,           TOKENTYPE_NONE,         "NOAMP" },
+    { NOTOP,           TOKENTYPE_NONE,         "NOTOP" },
+    { OROP,            TOKENTYPE_IVAL,         "OROP" },
+    { OROR,            TOKENTYPE_NONE,         "OROR" },
+    { PACKAGE,         TOKENTYPE_NONE,         "PACKAGE" },
+    { PMFUNC,          TOKENTYPE_OPVAL,        "PMFUNC" },
+    { POSTDEC,         TOKENTYPE_NONE,         "POSTDEC" },
+    { POSTINC,         TOKENTYPE_NONE,         "POSTINC" },
+    { POWOP,           TOKENTYPE_OPNUM,        "POWOP" },
+    { PREDEC,          TOKENTYPE_NONE,         "PREDEC" },
+    { PREINC,          TOKENTYPE_NONE,         "PREINC" },
+    { PRIVATEREF,      TOKENTYPE_OPVAL,        "PRIVATEREF" },
+    { REFGEN,          TOKENTYPE_NONE,         "REFGEN" },
+    { RELOP,           TOKENTYPE_OPNUM,        "RELOP" },
+    { SHIFTOP,         TOKENTYPE_OPNUM,        "SHIFTOP" },
+    { SUB,             TOKENTYPE_NONE,         "SUB" },
+    { THING,           TOKENTYPE_OPVAL,        "THING" },
+    { UMINUS,          TOKENTYPE_NONE,         "UMINUS" },
+    { UNIOP,           TOKENTYPE_OPNUM,        "UNIOP" },
+    { UNIOPSUB,                TOKENTYPE_OPVAL,        "UNIOPSUB" },
+    { UNLESS,          TOKENTYPE_IVAL,         "UNLESS" },
+    { UNTIL,           TOKENTYPE_IVAL,         "UNTIL" },
+    { USE,             TOKENTYPE_IVAL,         "USE" },
+    { WHILE,           TOKENTYPE_IVAL,         "WHILE" },
+    { WORD,            TOKENTYPE_OPVAL,        "WORD" },
+    { 0,               TOKENTYPE_NONE,         0 }
+};
+
+/* dump the returned token in rv, plus any optional arg in yylval */
 
+STATIC int
+S_tokereport(pTHX_ char* s, I32 rv)
+{
+    if (DEBUG_T_TEST) {
+       char *name = Nullch;
+       enum token_type type = TOKENTYPE_NONE;
+       struct debug_tokens *p;
+        SV* report = NEWSV(0, 60);
+
+        Perl_sv_catpvf(aTHX_ report, "<== ");
+
+       for (p = debug_tokens; p->token; p++) {
+           if (p->token == (int)rv) {
+               name = p->name;
+               type = p->type;
+               break;
+           }
+       }
+       if (name)
+           Perl_sv_catpvf(aTHX_ report, "%s", name);
+       else if ((char)rv > ' ' && (char)rv < '~')
+           Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+       else if (!rv)
+           Perl_sv_catpvf(aTHX_ report, "EOF");
+       else
+           Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
+       switch (type) {
+       case TOKENTYPE_NONE:
+       case TOKENTYPE_GVVAL: /* doesn't appear to be used */
+           break;
+       case TOKENTYPE_IVAL:
+           Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
+           break;
+       case TOKENTYPE_OPNUM:
+           Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
+                                   PL_op_name[yylval.ival]);
+           break;
+       case TOKENTYPE_PVAL:
+           Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
+           break;
+       case TOKENTYPE_OPVAL:
+           if (yylval.opval)
+               Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
+                                   PL_op_name[yylval.opval->op_type]);
+           else
+               Perl_sv_catpv(aTHX_ report, "(opval=null)");
+           break;
+       }
+        Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
         if (s - PL_bufptr > 0)
             sv_catpvn(report, PL_bufptr, s - PL_bufptr);
         else {
             if (PL_oldbufptr && *PL_oldbufptr)
                 sv_catpv(report, PL_tokenbuf);
         }
-        PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
-    });
+        PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
+    };
+    return (int)rv;
 }
 
 #endif
@@ -586,7 +723,7 @@ S_skipspace(pTHX_ register char *s)
                PL_minus_n = PL_minus_p = 0;
            }
            else
-               sv_setpv(PL_linestr,";");
+               sv_setpvn(PL_linestr,";", 1);
 
            /* reset variables for next time we lex */
            PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
@@ -697,20 +834,19 @@ S_lop(pTHX_ I32 f, int x, char *s)
 {
     yylval.ival = f;
     CLINE;
-    REPORT("lop", f)
     PL_expect = x;
     PL_bufptr = s;
     PL_last_lop = PL_oldbufptr;
     PL_last_lop_op = (OPCODE)f;
     if (PL_nexttoke)
-       return LSTOP;
+       return REPORT(LSTOP);
     if (*s == '(')
-       return FUNC;
+       return REPORT(FUNC);
     s = skipspace(s);
     if (*s == '(')
-       return FUNC;
+       return REPORT(FUNC);
     else
-       return LSTOP;
+       return REPORT(LSTOP);
 }
 
 /*
@@ -1220,7 +1356,7 @@ S_scan_const(pTHX_ char *start)
 
     const char *leaveit =      /* set of acceptably-backslashed characters */
        PL_lex_inpat
-           ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+           ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
            : "";
 
     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
@@ -1830,8 +1966,10 @@ S_intuit_more(pTHX_ register char *s)
                    weight -= 5;        /* cope with negative subscript */
                break;
            default:
-               if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
-                       isALPHA(*s) && s[1] && isALPHA(s[1])) {
+               if (!isALNUM(last_un_char)
+                   && !(last_un_char == '$' || last_un_char == '@'
+                        || last_un_char == '&')
+                   && isALPHA(*s) && s[1] && isALPHA(s[1])) {
                    char *d = tmpbuf;
                    while (isALPHA(*s))
                        *d++ = *s++;
@@ -2021,19 +2159,17 @@ Perl_filter_del(pTHX_ filter_t funcp)
 }
 
 
-/* Invoke the n'th filter function for the current rsfp.        */
+/* Invoke the idxth filter function for the current rsfp.       */
+/* maxlen 0 = read one text line */
 I32
 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
-
-
-                               /* 0 = read one text line */
 {
     filter_t funcp;
     SV *datasv = NULL;
 
     if (!PL_rsfp_filters)
        return -1;
-    if (idx > AvFILLp(PL_rsfp_filters)){       /* Any more filters?    */
+    if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?   */
        /* Provide a default input filter to make life easy.    */
        /* Note that we append to the line. This is handy.      */
        DEBUG_P(PerlIO_printf(Perl_debug_log,
@@ -2064,7 +2200,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
        return SvCUR(buf_sv);
     }
     /* Skip this filter slot if filter has been deleted        */
-    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
+    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
        DEBUG_P(PerlIO_printf(Perl_debug_log,
                              "filter_read %d: skipped (filter deleted)\n",
                              idx));
@@ -2090,7 +2226,6 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
     }
 #endif
     if (PL_rsfp_filters) {
-
        if (!append)
             SvCUR_set(sv, 0);  /* start with empty line        */
         if (FILTER_READ(0, sv, 0) > 0)
@@ -2167,7 +2302,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
 int
 Perl_yylex(pTHX)
 {
-    register char *s;
+    register char *s = PL_bufptr;
     register char *d;
     register I32 tmp;
     STRLEN len;
@@ -2176,9 +2311,13 @@ Perl_yylex(pTHX)
     bool bof = FALSE;
     I32 orig_keyword = 0;
 
+    DEBUG_T( {
+       PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
+                                       lex_state_names[PL_lex_state]);
+    } );
     /* check if there's an identifier for us to look at */
     if (PL_pending_ident)
-        return S_pending_ident(aTHX);
+        return REPORT(S_pending_ident(aTHX));
 
     /* no identifier pending identification */
 
@@ -2202,7 +2341,7 @@ Perl_yylex(pTHX)
               "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
               (IV)PL_nexttype[PL_nexttoke]); });
 
-       return(PL_nexttype[PL_nexttoke]);
+       return REPORT(PL_nexttype[PL_nexttoke]);
 
     /* interpolated case modifiers like \L \U, including \Q and \E.
        when we get here, PL_bufptr is at the \
@@ -2221,11 +2360,12 @@ Perl_yylex(pTHX)
                oldmod = PL_lex_casestack[--PL_lex_casemods];
                PL_lex_casestack[PL_lex_casemods] = '\0';
 
-               if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
+               if (PL_bufptr != PL_bufend
+                   && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
                    PL_bufptr += 2;
                    PL_lex_state = LEX_INTERPCONCAT;
                }
-               return ')';
+               return REPORT(')');
            }
            if (PL_bufptr != PL_bufend)
                PL_bufptr += 2;
@@ -2244,10 +2384,10 @@ Perl_yylex(pTHX)
            else {
                if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
                    tmp = *s, *s = s[2], s[2] = (char)tmp;      /* misordered... */
-               if (strchr("LU", *s) &&
+               if ((*s == 'L' || *s == 'U') &&
                    (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
                    PL_lex_casestack[--PL_lex_casemods] = '\0';
-                   return ')';
+                   return REPORT(')');
                }
                if (PL_lex_casemods > 10)
                    Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
@@ -2274,18 +2414,22 @@ Perl_yylex(pTHX)
            if (PL_lex_starts) {
                s = PL_bufptr;
                PL_lex_starts = 0;
-               Aop(OP_CONCAT);
+               /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+               if (PL_lex_casemods == 1 && PL_lex_inpat)
+                   OPERATOR(',');
+               else
+                   Aop(OP_CONCAT);
            }
            else
                return yylex();
        }
 
     case LEX_INTERPPUSH:
-        return sublex_push();
+        return REPORT(sublex_push());
 
     case LEX_INTERPSTART:
        if (PL_bufptr == PL_bufend)
-           return sublex_done();
+           return REPORT(sublex_done());
        DEBUG_T({ PerlIO_printf(Perl_debug_log,
               "### Interpolated variable at '%s'\n", PL_bufptr); });
        PL_expect = XTERM;
@@ -2304,7 +2448,11 @@ Perl_yylex(pTHX)
        }
        if (PL_lex_starts++) {
            s = PL_bufptr;
-           Aop(OP_CONCAT);
+           /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+           if (!PL_lex_casemods && PL_lex_inpat)
+               OPERATOR(',');
+           else
+               Aop(OP_CONCAT);
        }
        return yylex();
 
@@ -2319,7 +2467,7 @@ Perl_yylex(pTHX)
        if (PL_lex_dojoin) {
            PL_lex_dojoin = FALSE;
            PL_lex_state = LEX_INTERPCONCAT;
-           return ')';
+           return REPORT(')');
        }
        if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
            && SvEVALED(PL_lex_repl))
@@ -2335,7 +2483,7 @@ Perl_yylex(pTHX)
            Perl_croak(aTHX_ "panic: INTERPCONCAT");
 #endif
        if (PL_bufptr == PL_bufend)
-           return sublex_done();
+           return REPORT(sublex_done());
 
        if (SvIVX(PL_linestr) == '\'') {
            SV *sv = newSVsv(PL_linestr);
@@ -2358,8 +2506,13 @@ Perl_yylex(pTHX)
            PL_nextval[PL_nexttoke] = yylval;
            PL_expect = XTERM;
            force_next(THING);
-           if (PL_lex_starts++)
-               Aop(OP_CONCAT);
+           if (PL_lex_starts++) {
+               /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+               if (!PL_lex_casemods && PL_lex_inpat)
+                   OPERATOR(',');
+               else
+                   Aop(OP_CONCAT);
+           }
            else {
                PL_bufptr = s;
                return yylex();
@@ -2379,7 +2532,7 @@ Perl_yylex(pTHX)
     PL_oldoldbufptr = PL_oldbufptr;
     PL_oldbufptr = s;
     DEBUG_T( {
-       PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
+       PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
                      exp_name[PL_expect], s);
     } );
 
@@ -2415,12 +2568,12 @@ Perl_yylex(pTHX)
            PL_preambled = TRUE;
            sv_setpv(PL_linestr,incl_perldb());
            if (SvCUR(PL_linestr))
-               sv_catpv(PL_linestr,";");
+               sv_catpvn(PL_linestr,";", 1);
            if (PL_preambleav){
                while(AvFILLp(PL_preambleav) >= 0) {
                    SV *tmpsv = av_shift(PL_preambleav);
                    sv_catsv(PL_linestr, tmpsv);
-                   sv_catpv(PL_linestr, ";");
+                   sv_catpvn(PL_linestr, ";", 1);
                    sv_free(tmpsv);
                }
                sv_free((SV*)PL_preambleav);
@@ -2432,29 +2585,23 @@ Perl_yylex(pTHX)
                    sv_catpv(PL_linestr,"chomp;");
                if (PL_minus_a) {
                    if (PL_minus_F) {
-                       if (strchr("/'\"", *PL_splitstr)
+                       if ((*PL_splitstr == '/' || *PL_splitstr == '\''
+                            || *PL_splitstr == '"')
                              && strchr(PL_splitstr + 1, *PL_splitstr))
                            Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
                        else {
-                           char delim;
-                           s = "'~#\200\1'"; /* surely one char is unused...*/
-                           while (s[1] && strchr(PL_splitstr, *s))  s++;
-                           delim = *s;
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
-                                     "q" + (delim == '\''), delim);
-                           for (s = PL_splitstr; *s; s++) {
-                               if (*s == '\\')
-                                   sv_catpvn(PL_linestr, "\\", 1);
-                               sv_catpvn(PL_linestr, s, 1);
-                           }
-                           Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
+                           /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
+                              bytes can be used as quoting characters.  :-) */
+                           Perl_sv_catpvf(aTHX_ PL_linestr,
+                                          "our @F=split(q%c%s%c);",
+                                          0, PL_splitstr, 0);
                        }
                    }
                    else
                        sv_catpv(PL_linestr,"our @F=split(' ');");
                }
            }
-           sv_catpv(PL_linestr, "\n");
+           sv_catpvn(PL_linestr, "\n", 1);
            PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
            PL_last_lop = PL_last_uni = Nullch;
@@ -2484,8 +2631,8 @@ Perl_yylex(pTHX)
                    PL_doextract = FALSE;
                }
                if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
-                   sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
-                   sv_catpv(PL_linestr,";}");
+                   sv_setpv(PL_linestr,PL_minus_p
+                            ? ";}continue{print;}" : ";}");
                    PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                    PL_last_lop = PL_last_uni = Nullch;
@@ -2497,8 +2644,13 @@ Perl_yylex(pTHX)
                sv_setpv(PL_linestr,"");
                TOKEN(';');     /* not infinite loop because rsfp is NULL now */
            }
-           /* if it looks like the start of a BOM, check if it in fact is */
-           else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
+           /* If it looks like the start of a BOM or raw UTF-16,
+            * check if it in fact is. */
+           else if (bof &&
+                    (*s == 0 ||
+                     *(U8*)s == 0xEF ||
+                     *(U8*)s >= 0xFE ||
+                     s[1] == 0)) {
 #ifdef PERLIO_IS_STDIO
 #  ifdef __GNU_LIBRARY__
 #    if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
@@ -2987,7 +3139,7 @@ Perl_yylex(pTHX)
                        yyerror("Unterminated attribute parameter in attribute list");
                        if (attrs)
                            op_free(attrs);
-                       return 0;       /* EOF indicator */
+                       return REPORT(0);       /* EOF indicator */
                    }
                }
                if (PL_lex_stuff) {
@@ -3444,7 +3596,7 @@ Perl_yylex(pTHX)
            if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
                PL_expect = XTERM;
                depcom();
-               return ','; /* grandfather non-comma-format format */
+               return REPORT(','); /* grandfather non-comma-format format */
            }
        }
 
@@ -3559,7 +3711,8 @@ Perl_yylex(pTHX)
                PL_expect = XTERM;              /* e.g. print $fh 3 */
            else if (*s == '.' && isDIGIT(s[1]))
                PL_expect = XTERM;              /* e.g. print $fh .3 */
-           else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
+           else if ((*s == '?' || *s == '-' || *s == '+')
+                    && !isSPACE(s[1]) && s[1] != '=')
                PL_expect = XTERM;              /* e.g. print $fh -1 */
            else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
                PL_expect = XTERM;              /* e.g. print $fh /.../
@@ -3687,7 +3840,7 @@ Perl_yylex(pTHX)
            if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
                PL_expect = XTERM;
                depcom();
-               return ',';     /* grandfather non-comma-format format */
+               return REPORT(','); /* grandfather non-comma-format format */
            }
            else
                no_op("String",s);
@@ -3706,7 +3859,7 @@ Perl_yylex(pTHX)
            if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
                PL_expect = XTERM;
                depcom();
-               return ',';     /* grandfather non-comma-format format */
+               return REPORT(','); /* grandfather non-comma-format format */
            }
            else
                no_op("String",s);
@@ -3882,6 +4035,16 @@ Perl_yylex(pTHX)
            {
                tmp = 0;                /* any sub overrides "weak" keyword */
            }
+           else if (gv && !gvp
+                   && tmp == -KEY_err
+                   && GvCVu(gv)
+                   && PL_expect != XOPERATOR
+                   && PL_expect != XTERMORDORDOR)
+           {
+               /* any sub overrides the "err" keyword, except when really an
+                * operator is expected */
+               tmp = 0;
+           }
            else {                      /* no override */
                tmp = -tmp;
                if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
@@ -3994,7 +4157,7 @@ Perl_yylex(pTHX)
                    /* Two barewords in a row may indicate method call. */
 
                    if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
-                       return tmp;
+                       return REPORT(tmp);
 
                    /* If not a declared subroutine, it's an indirect object. */
                    /* (But it's an indir obj regardless for sort.) */
@@ -4051,7 +4214,7 @@ Perl_yylex(pTHX)
                if (!orig_keyword
                        && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
                        && (tmp = intuit_method(s,gv)))
-                   return tmp;
+                   return REPORT(tmp);
 
                /* Not a method, so call it a subroutine (if defined) */
 
@@ -4083,7 +4246,7 @@ Perl_yylex(pTHX)
                        char *proto = SvPV((SV*)cv, len);
                        if (!len)
                            TERM(FUNC0SUB);
-                       if (strEQ(proto, "$"))
+                       if (*proto == '$' && proto[1] == '\0')
                            OPERATOR(UNIOPSUB);
                        while (*proto == ';')
                            proto++;
@@ -4116,7 +4279,8 @@ Perl_yylex(pTHX)
                }
 
            safe_bareword:
-               if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
+               if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+                   && ckWARN_d(WARN_AMBIGUOUS)) {
                    Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
                        "Operator or semicolon missing before %c%s",
                        lastchar, PL_tokenbuf);
@@ -4140,7 +4304,7 @@ Perl_yylex(pTHX)
        case KEY___PACKAGE__:
            yylval.opval = (OP*)newSVOP(OP_CONST, 0,
                                        (PL_curstash
-                                        ? newSVsv(PL_curstname)
+                                        ? newSVpv(HvNAME(PL_curstash), 0)
                                         : &PL_sv_undef));
            TERM(THING);
 
@@ -4681,8 +4845,8 @@ Perl_yylex(pTHX)
            if (isIDFIRST_lazy_if(s,UTF)) {
                char *t;
                for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
-               t = skipspace(d);
-               if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
+               for (t=d; *t && isSPACE(*t); t++) ;
+               if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
                    /* [perl #16184] */
                    && !(t[0] == '=' && t[1] == '>')
                ) {
@@ -5090,8 +5254,12 @@ Perl_yylex(pTHX)
 
                if (*s == ':' && s[1] != ':')
                    PL_expect = attrful;
-               else if (!have_name && *s != '{' && key == KEY_sub)
-                   Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+               else if (*s != '{' && key == KEY_sub) {
+                   if (!have_name)
+                       Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+                   else if (*s != ';')
+                       Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
+               }
 
                if (have_proto) {
                    PL_nextval[PL_nexttoke].opval =
@@ -5613,7 +5781,7 @@ Perl_keyword(pTHX_ register char *d, I32 len)
            else if (*d == 'l') {
                if (strEQ(d,"login"))           return -KEY_getlogin;
            }
-           else if (strEQ(d,"c"))              return -KEY_getc;
+           else if (*d == 'c' && d[1] == '\0') return -KEY_getc;
            break;
        }
        switch (len) {
@@ -5760,12 +5928,16 @@ Perl_keyword(pTHX_ register char *d, I32 len)
        }
        break;
     case 'q':
-       if (len <= 2) {
-           if (strEQ(d,"q"))                   return KEY_q;
-           if (strEQ(d,"qr"))                  return KEY_qr;
-           if (strEQ(d,"qq"))                  return KEY_qq;
-           if (strEQ(d,"qw"))                  return KEY_qw;
-           if (strEQ(d,"qx"))                  return KEY_qx;
+       if (len == 1) {
+                                               return KEY_q;
+       }
+       else if (len == 2) {
+           switch (d[1]) {
+           case 'r':                           return KEY_qr;
+           case 'q':                           return KEY_qq;
+           case 'w':                           return KEY_qw;
+           case 'x':                           return KEY_qx;
+           };
        }
        else if (strEQ(d,"quotemeta"))          return -KEY_quotemeta;
        break;
@@ -6237,7 +6409,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des
        return s;
     }
     if (*s == '$' && s[1] &&
-       (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
+       (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
     {
        return s;
     }
@@ -6490,15 +6662,23 @@ S_scan_trans(pTHX_ char *start)
     }
 
     complement = del = squash = 0;
-    while (strchr("cds", *s)) {
-       if (*s == 'c')
+    while (1) {
+       switch (*s) {
+       case 'c':
            complement = OPpTRANS_COMPLEMENT;
-       else if (*s == 'd')
+           break;
+       case 'd':
            del = OPpTRANS_DELETE;
-       else if (*s == 's')
+           break;
+       case 's':
            squash = OPpTRANS_SQUASH;
+           break;
+       default:
+           goto no_more;
+       }
        s++;
     }
+  no_more:
 
     New(803, tbl, complement&&!del?258:256, short);
     o = newPVOP(OP_TRANS, 0, (char*)tbl);
@@ -6531,7 +6711,7 @@ S_scan_heredoc(pTHX_ register char *s)
     if (!outer)
        *d++ = '\n';
     for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
-    if (*peek && strchr("`'\"",*peek)) {
+    if (*peek == '`' || *peek == '\'' || *peek =='"') {
        s = peek;
        term = *s++;
        s = delimcpy(d, e, s, PL_bufend, term, &len);
@@ -6623,7 +6803,7 @@ S_scan_heredoc(pTHX_ register char *s)
        sv_setpvn(tmpstr,d+1,s-d);
        s += len - 1;
        sv_catpvn(herewas,s,bufend-s);
-       (void)strcpy(bufptr,SvPVX(herewas));
+       Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
 
        s = olds;
        goto retval;
@@ -6685,10 +6865,11 @@ S_scan_heredoc(pTHX_ register char *s)
            av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
        }
        if (*s == term && memEQ(s,PL_tokenbuf,len)) {
-           s = PL_bufend - 1;
-           *s = ' ';
+           STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
+           *(SvPVX(PL_linestr) + off ) = ' ';
            sv_catsv(PL_linestr,herewas);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+           s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
        }
        else {
            s = PL_bufend;
@@ -6793,7 +6974,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 
        /* turn <> into <ARGV> */
        if (!len)
-           (void)strcpy(d,"ARGV");
+           Copy("ARGV",d,5,char);
 
        /* Check whether readline() is overriden */
        if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
@@ -7489,7 +7670,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
        }
 
        /* read exponent part, if present */
-       if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
+       if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
            floatit = TRUE;
            s++;
 
@@ -7834,72 +8015,92 @@ S_swallow_bom(pTHX_ U8 *s)
 {
     STRLEN slen;
     slen = SvCUR(PL_linestr);
-    switch (*s) {
+    switch (s[0]) {
     case 0xFF:
        if (s[1] == 0xFE) {
-           /* UTF-16 little-endian */
+           /* UTF-16 little-endian? (or UTF32-LE?) */
            if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
-               Perl_croak(aTHX_ "Unsupported script encoding");
+               Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
 #ifndef PERL_NO_UTF16_FILTER
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
            s += 2;
+       utf16le:
            if (PL_bufend > (char*)s) {
                U8 *news;
                I32 newlen;
 
                filter_add(utf16rev_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
-                                                PL_bufend - (char*)s - 1,
-                                                &newlen);
-               Copy(news, s, newlen, U8);
-               SvCUR_set(PL_linestr, newlen);
-               PL_bufend = SvPVX(PL_linestr) + newlen;
-               news[newlen++] = '\0';
+               utf16_to_utf8_reversed(s, news,
+                                      PL_bufend - (char*)s - 1,
+                                      &newlen);
+               sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
+               SvUTF8_on(PL_linestr);
+               s = (U8*)SvPVX(PL_linestr);
+               PL_bufend = SvPVX(PL_linestr) + newlen;
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
 #endif
        }
        break;
     case 0xFE:
-       if (s[1] == 0xFF) {   /* UTF-16 big-endian */
+       if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
 #ifndef PERL_NO_UTF16_FILTER
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
            s += 2;
+       utf16be:
            if (PL_bufend > (char *)s) {
                U8 *news;
                I32 newlen;
 
                filter_add(utf16_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend = (char*)utf16_to_utf8(s, news,
-                                                PL_bufend - (char*)s,
-                                                &newlen);
-               Copy(news, s, newlen, U8);
-               SvCUR_set(PL_linestr, newlen);
-               PL_bufend = SvPVX(PL_linestr) + newlen;
-               news[newlen++] = '\0';
+               utf16_to_utf8(s, news,
+                             PL_bufend - (char*)s,
+                             &newlen);
+               sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
+               SvUTF8_on(PL_linestr);
+               s = (U8*)SvPVX(PL_linestr);
+               PL_bufend = SvPVX(PL_linestr) + newlen;
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
 #endif
        }
        break;
     case 0xEF:
        if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
            s += 3;                      /* UTF-8 */
        }
        break;
     case 0:
-       if (slen > 3 && s[1] == 0 &&  /* UTF-32 big-endian */
-           s[2] == 0xFE && s[3] == 0xFF)
-       {
-           Perl_croak(aTHX_ "Unsupported script encoding");
+       if (slen > 3) {
+            if (s[1] == 0) {
+                 if (s[2] == 0xFE && s[3] == 0xFF) {
+                      /* UTF-32 big-endian */
+                      Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+                 }
+            }
+            else if (s[2] == 0 && s[3] != 0) {
+                 /* Leading bytes
+                  * 00 xx 00 xx
+                  * are a good indicator of UTF-16BE. */
+                 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+                 goto utf16be;
+            }
        }
+    default:
+        if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
+                 /* Leading bytes
+                  * xx 00 xx 00
+                  * are a good indicator of UTF-16LE. */
+             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+             goto utf16le;
+        }
     }
     return (char*)s;
 }
@@ -7925,38 +8126,42 @@ restore_rsfp(pTHX_ void *f)
 static I32
 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 {
+    STRLEN old = SvCUR(sv);
     I32 count = FILTER_READ(idx+1, sv, maxlen);
+    DEBUG_P(PerlIO_printf(Perl_debug_log,
+                         "utf16_textfilter(%p): %d %d (%d)\n",
+                         utf16_textfilter, idx, maxlen, count));
     if (count) {
        U8* tmps;
-       U8* tend;
        I32 newlen;
        New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
-       if (!*SvPV_nolen(sv))
-       /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
-       return count;
-
-       tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
-       sv_usepvn(sv, (char*)tmps, tend - tmps);
+       Copy(SvPVX(sv), tmps, old, char);
+       utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+                     SvCUR(sv) - old, &newlen);
+       sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
     }
-    return count;
+    DEBUG_P({sv_dump(sv);});
+    return SvCUR(sv);
 }
 
 static I32
 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 {
+    STRLEN old = SvCUR(sv);
     I32 count = FILTER_READ(idx+1, sv, maxlen);
+    DEBUG_P(PerlIO_printf(Perl_debug_log,
+                         "utf16rev_textfilter(%p): %d %d (%d)\n",
+                         utf16rev_textfilter, idx, maxlen, count));
     if (count) {
        U8* tmps;
-       U8* tend;
        I32 newlen;
-       if (!*SvPV_nolen(sv))
-       /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
-       return count;
-
        New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
-       tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
-       sv_usepvn(sv, (char*)tmps, tend - tmps);
+       Copy(SvPVX(sv), tmps, old, char);
+       utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+                     SvCUR(sv) - old, &newlen);
+       sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
     }
+    DEBUG_P({ sv_dump(sv); });
     return count;
 }
 #endif