This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix for bug: [perl #32562] __PACKAGE__ symbol has wrong value
[perl5.git] / toke.c
diff --git a/toke.c b/toke.c
index 5e4c7e7..d798946 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -1,7 +1,7 @@
 /*    toke.c
  *
  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- *    2000, 2001, 2002, 2003, by Larry Wall and others
+ *    2000, 2001, 2002, 2003, 2004, by Larry Wall and others
  *
  *    You may distribute under the terms of either the GNU General Public
  *    License or the Artistic License, as specified in the README file.
@@ -23,8 +23,8 @@
 #define PERL_IN_TOKE_C
 #include "perl.h"
 
-#define yychar PL_yychar
-#define yylval PL_yylval
+#define yychar (*PL_yycharp)
+#define yylval (*PL_yylvalp)
 
 static char ident_too_long[] = "Identifier too long";
 static char c_without_g[] = "Use of /c modifier is meaningless without /g";
@@ -75,24 +75,24 @@ static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
 #define LEX_FORMLINE            1
 #define LEX_KNOWNEXT            0
 
-#ifdef ff_next
-#undef ff_next
+#ifdef DEBUGGING
+static char* lex_state_names[] = {
+    "KNOWNEXT",
+    "FORMLINE",
+    "INTERPCONST",
+    "INTERPCONCAT",
+    "INTERPENDMAYBE",
+    "INTERPEND",
+    "INTERPSTART",
+    "INTERPPUSH",
+    "INTERPCASEMOD",
+    "INTERPNORMAL",
+    "NORMAL"
+};
 #endif
 
-#ifdef USE_PURE_BISON
-#  ifndef YYMAXLEVEL
-#    define YYMAXLEVEL 100
-#  endif
-YYSTYPE* yylval_pointer[YYMAXLEVEL];
-int* yychar_pointer[YYMAXLEVEL];
-int yyactlevel = -1;
-#  undef yylval
-#  undef yychar
-#  define yylval (*yylval_pointer[yyactlevel])
-#  define yychar (*yychar_pointer[yyactlevel])
-#  define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
-#  undef yylex
-#  define yylex()      Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
+#ifdef ff_next
+#undef ff_next
 #endif
 
 #include "keywords.h"
@@ -132,79 +132,200 @@ int yyactlevel = -1;
  * Also see LOP and lop() below.
  */
 
-/* Note that REPORT() and REPORT2() will be expressions that supply
- * their own trailing comma, not suitable for statements as such. */
 #ifdef DEBUGGING /* Serve -DT. */
-#   define REPORT(x,retval) tokereport(x,s,(int)retval),
-#   define REPORT2(x,retval) tokereport(x,s, yylval.ival),
+#   define REPORT(retval) tokereport(s,(int)retval)
 #else
-#   define REPORT(x,retval)
-#   define REPORT2(x,retval)
+#   define REPORT(retval) (retval)
 #endif
 
-#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
-#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
-#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
-#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
-#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
-#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
-#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
-#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
-#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
-#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
-#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
-#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
-#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
-#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
-#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
-#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
-#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
-#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
-#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
-#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
+#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
+#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
+#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
+#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
+#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
+#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
+#define FTST(f)  return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
+#define FUN0(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
+#define FUN1(f)  return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
+#define BOop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
+#define BAop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
+#define SHop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
+#define PWop(f)  return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define PMop(f)  return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
+#define Aop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Mop(f)   return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Eop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
+#define Rop(f)   return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
 
 /* This bit of chicanery makes a unary function followed by
  * a parenthesis into a function with one argument, highest precedence.
  * The UNIDOR macro is for unary functions that can be followed by the //
  * operator (such as C<shift // 0>).
  */
-#define UNI2(f,x) return(yylval.ival = f, \
-       REPORT("uni",f) \
+#define UNI2(f,x) return ( \
+       yylval.ival = f, \
        PL_expect = x, \
        PL_bufptr = s, \
        PL_last_uni = PL_oldbufptr, \
        PL_last_lop_op = f, \
-       (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+       REPORT( \
+           (*s == '(' || (s = skipspace(s), *s == '(')  \
+           ? (int)FUNC1 : (int)UNIOP)))
 #define UNI(f)    UNI2(f,XTERM)
 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
 
-#define UNIBRACK(f) return(yylval.ival = f, \
-        REPORT("uni",f) \
+#define UNIBRACK(f) return ( \
+       yylval.ival = f, \
        PL_bufptr = s, \
        PL_last_uni = PL_oldbufptr, \
-       (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+        REPORT( \
+           (*s == '(' || (s = skipspace(s), *s == '(') \
+       ? (int)FUNC1 : (int)UNIOP)))
 
 /* grandfather return to old style */
 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
 
 #ifdef DEBUGGING
 
-STATIC void
-S_tokereport(pTHX_ char *thing, char* s, I32 rv)
+/* how to interpret the yylval associated with the token */
+enum token_type {
+    TOKENTYPE_NONE,
+    TOKENTYPE_IVAL,
+    TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
+    TOKENTYPE_PVAL,
+    TOKENTYPE_OPVAL,
+    TOKENTYPE_GVVAL
+};
+
+static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
 {
-    DEBUG_T({
-        SV* report = newSVpv(thing, 0);
-        Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
-               (IV)rv);
+    { ADDOP,           TOKENTYPE_OPNUM,        "ADDOP" },
+    { ANDAND,          TOKENTYPE_NONE,         "ANDAND" },
+    { ANDOP,           TOKENTYPE_NONE,         "ANDOP" },
+    { ANONSUB,         TOKENTYPE_IVAL,         "ANONSUB" },
+    { ARROW,           TOKENTYPE_NONE,         "ARROW" },
+    { ASSIGNOP,                TOKENTYPE_OPNUM,        "ASSIGNOP" },
+    { BITANDOP,                TOKENTYPE_OPNUM,        "BITANDOP" },
+    { BITOROP,         TOKENTYPE_OPNUM,        "BITOROP" },
+    { COLONATTR,       TOKENTYPE_NONE,         "COLONATTR" },
+    { CONTINUE,                TOKENTYPE_NONE,         "CONTINUE" },
+    { DO,              TOKENTYPE_NONE,         "DO" },
+    { DOLSHARP,                TOKENTYPE_NONE,         "DOLSHARP" },
+    { DORDOR,          TOKENTYPE_NONE,         "DORDOR" },
+    { DOROP,           TOKENTYPE_OPNUM,        "DOROP" },
+    { DOTDOT,          TOKENTYPE_IVAL,         "DOTDOT" },
+    { ELSE,            TOKENTYPE_NONE,         "ELSE" },
+    { ELSIF,           TOKENTYPE_IVAL,         "ELSIF" },
+    { EQOP,            TOKENTYPE_OPNUM,        "EQOP" },
+    { FOR,             TOKENTYPE_IVAL,         "FOR" },
+    { FORMAT,          TOKENTYPE_NONE,         "FORMAT" },
+    { FUNC,            TOKENTYPE_OPNUM,        "FUNC" },
+    { FUNC0,           TOKENTYPE_OPNUM,        "FUNC0" },
+    { FUNC0SUB,                TOKENTYPE_OPVAL,        "FUNC0SUB" },
+    { FUNC1,           TOKENTYPE_OPNUM,        "FUNC1" },
+    { FUNCMETH,                TOKENTYPE_OPVAL,        "FUNCMETH" },
+    { HASHBRACK,       TOKENTYPE_NONE,         "HASHBRACK" },
+    { IF,              TOKENTYPE_IVAL,         "IF" },
+    { LABEL,           TOKENTYPE_PVAL,         "LABEL" },
+    { LOCAL,           TOKENTYPE_IVAL,         "LOCAL" },
+    { LOOPEX,          TOKENTYPE_OPNUM,        "LOOPEX" },
+    { LSTOP,           TOKENTYPE_OPNUM,        "LSTOP" },
+    { LSTOPSUB,                TOKENTYPE_OPVAL,        "LSTOPSUB" },
+    { MATCHOP,         TOKENTYPE_OPNUM,        "MATCHOP" },
+    { METHOD,          TOKENTYPE_OPVAL,        "METHOD" },
+    { MULOP,           TOKENTYPE_OPNUM,        "MULOP" },
+    { MY,              TOKENTYPE_IVAL,         "MY" },
+    { MYSUB,           TOKENTYPE_NONE,         "MYSUB" },
+    { NOAMP,           TOKENTYPE_NONE,         "NOAMP" },
+    { NOTOP,           TOKENTYPE_NONE,         "NOTOP" },
+    { OROP,            TOKENTYPE_IVAL,         "OROP" },
+    { OROR,            TOKENTYPE_NONE,         "OROR" },
+    { PACKAGE,         TOKENTYPE_NONE,         "PACKAGE" },
+    { PMFUNC,          TOKENTYPE_OPVAL,        "PMFUNC" },
+    { POSTDEC,         TOKENTYPE_NONE,         "POSTDEC" },
+    { POSTINC,         TOKENTYPE_NONE,         "POSTINC" },
+    { POWOP,           TOKENTYPE_OPNUM,        "POWOP" },
+    { PREDEC,          TOKENTYPE_NONE,         "PREDEC" },
+    { PREINC,          TOKENTYPE_NONE,         "PREINC" },
+    { PRIVATEREF,      TOKENTYPE_OPVAL,        "PRIVATEREF" },
+    { REFGEN,          TOKENTYPE_NONE,         "REFGEN" },
+    { RELOP,           TOKENTYPE_OPNUM,        "RELOP" },
+    { SHIFTOP,         TOKENTYPE_OPNUM,        "SHIFTOP" },
+    { SUB,             TOKENTYPE_NONE,         "SUB" },
+    { THING,           TOKENTYPE_OPVAL,        "THING" },
+    { UMINUS,          TOKENTYPE_NONE,         "UMINUS" },
+    { UNIOP,           TOKENTYPE_OPNUM,        "UNIOP" },
+    { UNIOPSUB,                TOKENTYPE_OPVAL,        "UNIOPSUB" },
+    { UNLESS,          TOKENTYPE_IVAL,         "UNLESS" },
+    { UNTIL,           TOKENTYPE_IVAL,         "UNTIL" },
+    { USE,             TOKENTYPE_IVAL,         "USE" },
+    { WHILE,           TOKENTYPE_IVAL,         "WHILE" },
+    { WORD,            TOKENTYPE_OPVAL,        "WORD" },
+    { 0,               TOKENTYPE_NONE,         0 }
+};
+
+/* dump the returned token in rv, plus any optional arg in yylval */
 
+STATIC int
+S_tokereport(pTHX_ char* s, I32 rv)
+{
+    if (DEBUG_T_TEST) {
+       char *name = Nullch;
+       enum token_type type = TOKENTYPE_NONE;
+       struct debug_tokens *p;
+        SV* report = NEWSV(0, 60);
+
+        Perl_sv_catpvf(aTHX_ report, "<== ");
+
+       for (p = debug_tokens; p->token; p++) {
+           if (p->token == (int)rv) {
+               name = p->name;
+               type = p->type;
+               break;
+           }
+       }
+       if (name)
+           Perl_sv_catpvf(aTHX_ report, "%s", name);
+       else if ((char)rv > ' ' && (char)rv < '~')
+           Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+       else if (!rv)
+           Perl_sv_catpvf(aTHX_ report, "EOF");
+       else
+           Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
+       switch (type) {
+       case TOKENTYPE_NONE:
+       case TOKENTYPE_GVVAL: /* doesn't appear to be used */
+           break;
+       case TOKENTYPE_IVAL:
+           Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
+           break;
+       case TOKENTYPE_OPNUM:
+           Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
+                                   PL_op_name[yylval.ival]);
+           break;
+       case TOKENTYPE_PVAL:
+           Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
+           break;
+       case TOKENTYPE_OPVAL:
+           if (yylval.opval)
+               Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
+                                   PL_op_name[yylval.opval->op_type]);
+           else
+               Perl_sv_catpv(aTHX_ report, "(opval=null)");
+           break;
+       }
+        Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
         if (s - PL_bufptr > 0)
             sv_catpvn(report, PL_bufptr, s - PL_bufptr);
         else {
             if (PL_oldbufptr && *PL_oldbufptr)
                 sv_catpv(report, PL_tokenbuf);
         }
-        PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
-    });
+        PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
+    };
+    return (int)rv;
 }
 
 #endif
@@ -713,20 +834,19 @@ S_lop(pTHX_ I32 f, int x, char *s)
 {
     yylval.ival = f;
     CLINE;
-    REPORT("lop", f)
     PL_expect = x;
     PL_bufptr = s;
     PL_last_lop = PL_oldbufptr;
     PL_last_lop_op = (OPCODE)f;
     if (PL_nexttoke)
-       return LSTOP;
+       return REPORT(LSTOP);
     if (*s == '(')
-       return FUNC;
+       return REPORT(FUNC);
     s = skipspace(s);
     if (*s == '(')
-       return FUNC;
+       return REPORT(FUNC);
     else
-       return LSTOP;
+       return REPORT(LSTOP);
 }
 
 /*
@@ -1236,7 +1356,7 @@ S_scan_const(pTHX_ char *start)
 
     const char *leaveit =      /* set of acceptably-backslashed characters */
        PL_lex_inpat
-           ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+           ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
            : "";
 
     if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
@@ -2037,19 +2157,17 @@ Perl_filter_del(pTHX_ filter_t funcp)
 }
 
 
-/* Invoke the n'th filter function for the current rsfp.        */
+/* Invoke the idxth filter function for the current rsfp.       */
+/* maxlen 0 = read one text line */
 I32
 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
-
-
-                               /* 0 = read one text line */
 {
     filter_t funcp;
     SV *datasv = NULL;
 
     if (!PL_rsfp_filters)
        return -1;
-    if (idx > AvFILLp(PL_rsfp_filters)){       /* Any more filters?    */
+    if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?   */
        /* Provide a default input filter to make life easy.    */
        /* Note that we append to the line. This is handy.      */
        DEBUG_P(PerlIO_printf(Perl_debug_log,
@@ -2080,7 +2198,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
        return SvCUR(buf_sv);
     }
     /* Skip this filter slot if filter has been deleted        */
-    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
+    if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
        DEBUG_P(PerlIO_printf(Perl_debug_log,
                              "filter_read %d: skipped (filter deleted)\n",
                              idx));
@@ -2106,7 +2224,6 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
     }
 #endif
     if (PL_rsfp_filters) {
-
        if (!append)
             SvCUR_set(sv, 0);  /* start with empty line        */
         if (FILTER_READ(0, sv, 0) > 0)
@@ -2176,26 +2293,6 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
       if we already built the token before, use it.
 */
 
-#ifdef USE_PURE_BISON
-int
-Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
-{
-    int r;
-
-    yyactlevel++;
-    yylval_pointer[yyactlevel] = lvalp;
-    yychar_pointer[yyactlevel] = lcharp;
-    if (yyactlevel >= YYMAXLEVEL)
-       Perl_croak(aTHX_ "panic: YYMAXLEVEL");
-
-    r = Perl_yylex(aTHX);
-
-    if (yyactlevel > 0)
-       yyactlevel--;
-
-    return r;
-}
-#endif
 
 #ifdef __SC__
 #pragma segment Perl_yylex
@@ -2203,7 +2300,7 @@ Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
 int
 Perl_yylex(pTHX)
 {
-    register char *s;
+    register char *s = PL_bufptr;
     register char *d;
     register I32 tmp;
     STRLEN len;
@@ -2212,9 +2309,13 @@ Perl_yylex(pTHX)
     bool bof = FALSE;
     I32 orig_keyword = 0;
 
+    DEBUG_T( {
+       PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
+                                       lex_state_names[PL_lex_state]);
+    } );
     /* check if there's an identifier for us to look at */
     if (PL_pending_ident)
-        return S_pending_ident(aTHX);
+        return REPORT(S_pending_ident(aTHX));
 
     /* no identifier pending identification */
 
@@ -2238,7 +2339,7 @@ Perl_yylex(pTHX)
               "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
               (IV)PL_nexttype[PL_nexttoke]); });
 
-       return(PL_nexttype[PL_nexttoke]);
+       return REPORT(PL_nexttype[PL_nexttoke]);
 
     /* interpolated case modifiers like \L \U, including \Q and \E.
        when we get here, PL_bufptr is at the \
@@ -2261,7 +2362,7 @@ Perl_yylex(pTHX)
                    PL_bufptr += 2;
                    PL_lex_state = LEX_INTERPCONCAT;
                }
-               return ')';
+               return REPORT(')');
            }
            if (PL_bufptr != PL_bufend)
                PL_bufptr += 2;
@@ -2283,7 +2384,7 @@ Perl_yylex(pTHX)
                if (strchr("LU", *s) &&
                    (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
                    PL_lex_casestack[--PL_lex_casemods] = '\0';
-                   return ')';
+                   return REPORT(')');
                }
                if (PL_lex_casemods > 10)
                    Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
@@ -2310,18 +2411,22 @@ Perl_yylex(pTHX)
            if (PL_lex_starts) {
                s = PL_bufptr;
                PL_lex_starts = 0;
-               Aop(OP_CONCAT);
+               /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+               if (PL_lex_casemods == 1 && PL_lex_inpat)
+                   OPERATOR(',');
+               else
+                   Aop(OP_CONCAT);
            }
            else
                return yylex();
        }
 
     case LEX_INTERPPUSH:
-        return sublex_push();
+        return REPORT(sublex_push());
 
     case LEX_INTERPSTART:
        if (PL_bufptr == PL_bufend)
-           return sublex_done();
+           return REPORT(sublex_done());
        DEBUG_T({ PerlIO_printf(Perl_debug_log,
               "### Interpolated variable at '%s'\n", PL_bufptr); });
        PL_expect = XTERM;
@@ -2340,7 +2445,11 @@ Perl_yylex(pTHX)
        }
        if (PL_lex_starts++) {
            s = PL_bufptr;
-           Aop(OP_CONCAT);
+           /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+           if (!PL_lex_casemods && PL_lex_inpat)
+               OPERATOR(',');
+           else
+               Aop(OP_CONCAT);
        }
        return yylex();
 
@@ -2355,7 +2464,7 @@ Perl_yylex(pTHX)
        if (PL_lex_dojoin) {
            PL_lex_dojoin = FALSE;
            PL_lex_state = LEX_INTERPCONCAT;
-           return ')';
+           return REPORT(')');
        }
        if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
            && SvEVALED(PL_lex_repl))
@@ -2371,7 +2480,7 @@ Perl_yylex(pTHX)
            Perl_croak(aTHX_ "panic: INTERPCONCAT");
 #endif
        if (PL_bufptr == PL_bufend)
-           return sublex_done();
+           return REPORT(sublex_done());
 
        if (SvIVX(PL_linestr) == '\'') {
            SV *sv = newSVsv(PL_linestr);
@@ -2394,8 +2503,13 @@ Perl_yylex(pTHX)
            PL_nextval[PL_nexttoke] = yylval;
            PL_expect = XTERM;
            force_next(THING);
-           if (PL_lex_starts++)
-               Aop(OP_CONCAT);
+           if (PL_lex_starts++) {
+               /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+               if (!PL_lex_casemods && PL_lex_inpat)
+                   OPERATOR(',');
+               else
+                   Aop(OP_CONCAT);
+           }
            else {
                PL_bufptr = s;
                return yylex();
@@ -2415,7 +2529,7 @@ Perl_yylex(pTHX)
     PL_oldoldbufptr = PL_oldbufptr;
     PL_oldbufptr = s;
     DEBUG_T( {
-       PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
+       PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
                      exp_name[PL_expect], s);
     } );
 
@@ -2533,8 +2647,13 @@ Perl_yylex(pTHX)
                sv_setpv(PL_linestr,"");
                TOKEN(';');     /* not infinite loop because rsfp is NULL now */
            }
-           /* if it looks like the start of a BOM, check if it in fact is */
-           else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
+           /* If it looks like the start of a BOM or raw UTF-16,
+            * check if it in fact is. */
+           else if (bof &&
+                    (*s == 0 ||
+                     *(U8*)s == 0xEF ||
+                     *(U8*)s >= 0xFE ||
+                     s[1] == 0)) {
 #ifdef PERLIO_IS_STDIO
 #  ifdef __GNU_LIBRARY__
 #    if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
@@ -3023,7 +3142,7 @@ Perl_yylex(pTHX)
                        yyerror("Unterminated attribute parameter in attribute list");
                        if (attrs)
                            op_free(attrs);
-                       return 0;       /* EOF indicator */
+                       return REPORT(0);       /* EOF indicator */
                    }
                }
                if (PL_lex_stuff) {
@@ -3035,9 +3154,20 @@ Perl_yylex(pTHX)
                    PL_lex_stuff = Nullsv;
                }
                else {
+                   if (len == 6 && strnEQ(s, "unique", len)) {
+                       if (PL_in_my == KEY_our)
+#ifdef USE_ITHREADS
+                           GvUNIQUE_on(cGVOPx_gv(yylval.opval));
+#else
+                           ; /* skip to avoid loading attributes.pm */
+#endif
+                       else 
+                           Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
+                   }
+
                    /* NOTE: any CV attrs applied here need to be part of
                       the CVf_BUILTIN_ATTRS define in cv.h! */
-                   if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
+                   else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
                        CvLVALUE_on(PL_compcv);
                    else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
                        CvLOCKED_on(PL_compcv);
@@ -3045,13 +3175,6 @@ Perl_yylex(pTHX)
                        CvMETHOD_on(PL_compcv);
                    else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
                        CvASSERTION_on(PL_compcv);
-                   else if (PL_in_my == KEY_our && len == 6 &&
-                            strnEQ(s, "unique", len))
-#ifdef USE_ITHREADS
-                       GvUNIQUE_on(cGVOPx_gv(yylval.opval));
-#else
-                       ; /* skip that case to avoid loading attributes.pm */
-#endif
                    /* After we've set the flags, it could be argued that
                       we don't need to do the attributes.pm-based setting
                       process, and shouldn't bother appending recognized
@@ -3414,8 +3537,24 @@ Perl_yylex(pTHX)
     case '!':
        s++;
        tmp = *s++;
-       if (tmp == '=')
+       if (tmp == '=') {
+            /* was this !=~ where !~ was meant?
+             * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
+
+            if (*s == '~' && ckWARN(WARN_SYNTAX)) {
+                char *t = s+1;
+
+                while (t < PL_bufend && isSPACE(*t))
+                    ++t;
+
+                if (*t == '/' || *t == '?' ||
+                    ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
+                    (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
+                    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+                                "!=~ should be !~");
+            }
            Eop(OP_NE);
+        }
        if (tmp == '~')
            PMop(OP_NOT);
        s--;
@@ -3460,7 +3599,7 @@ Perl_yylex(pTHX)
            if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
                PL_expect = XTERM;
                depcom();
-               return ','; /* grandfather non-comma-format format */
+               return REPORT(','); /* grandfather non-comma-format format */
            }
        }
 
@@ -3703,7 +3842,7 @@ Perl_yylex(pTHX)
            if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
                PL_expect = XTERM;
                depcom();
-               return ',';     /* grandfather non-comma-format format */
+               return REPORT(','); /* grandfather non-comma-format format */
            }
            else
                no_op("String",s);
@@ -3722,7 +3861,7 @@ Perl_yylex(pTHX)
            if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
                PL_expect = XTERM;
                depcom();
-               return ',';     /* grandfather non-comma-format format */
+               return REPORT(','); /* grandfather non-comma-format format */
            }
            else
                no_op("String",s);
@@ -3898,6 +4037,16 @@ Perl_yylex(pTHX)
            {
                tmp = 0;                /* any sub overrides "weak" keyword */
            }
+           else if (gv && !gvp
+                   && tmp == -KEY_err
+                   && GvCVu(gv)
+                   && PL_expect != XOPERATOR
+                   && PL_expect != XTERMORDORDOR)
+           {
+               /* any sub overrides the "err" keyword, except when really an
+                * operator is expected */
+               tmp = 0;
+           }
            else {                      /* no override */
                tmp = -tmp;
                if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
@@ -4010,7 +4159,7 @@ Perl_yylex(pTHX)
                    /* Two barewords in a row may indicate method call. */
 
                    if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
-                       return tmp;
+                       return REPORT(tmp);
 
                    /* If not a declared subroutine, it's an indirect object. */
                    /* (But it's an indir obj regardless for sort.) */
@@ -4067,7 +4216,7 @@ Perl_yylex(pTHX)
                if (!orig_keyword
                        && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
                        && (tmp = intuit_method(s,gv)))
-                   return tmp;
+                   return REPORT(tmp);
 
                /* Not a method, so call it a subroutine (if defined) */
 
@@ -4156,7 +4305,7 @@ Perl_yylex(pTHX)
        case KEY___PACKAGE__:
            yylval.opval = (OP*)newSVOP(OP_CONST, 0,
                                        (PL_curstash
-                                        ? newSVsv(PL_curstname)
+                                        ? newSVpv(HvNAME(PL_curstash), 0)
                                         : &PL_sv_undef));
            TERM(THING);
 
@@ -4697,8 +4846,8 @@ Perl_yylex(pTHX)
            if (isIDFIRST_lazy_if(s,UTF)) {
                char *t;
                for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
-               t = skipspace(d);
-               if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
+               for (t=d; *t && isSPACE(*t); t++) ;
+               if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
                    /* [perl #16184] */
                    && !(t[0] == '=' && t[1] == '>')
                ) {
@@ -5106,8 +5255,12 @@ Perl_yylex(pTHX)
 
                if (*s == ':' && s[1] != ':')
                    PL_expect = attrful;
-               else if (!have_name && *s != '{' && key == KEY_sub)
-                   Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+               else if (*s != '{' && key == KEY_sub) {
+                   if (!have_name)
+                       Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+                   else if (*s != ';')
+                       Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
+               }
 
                if (have_proto) {
                    PL_nextval[PL_nexttoke].opval =
@@ -6518,7 +6671,8 @@ S_scan_trans(pTHX_ char *start)
 
     New(803, tbl, complement&&!del?258:256, short);
     o = newPVOP(OP_TRANS, 0, (char*)tbl);
-    o->op_private = del|squash|complement|
+    o->op_private &= ~OPpTRANS_ALL;
+    o->op_private |= del|squash|complement|
       (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
       (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF   : 0);
 
@@ -6638,7 +6792,7 @@ S_scan_heredoc(pTHX_ register char *s)
        sv_setpvn(tmpstr,d+1,s-d);
        s += len - 1;
        sv_catpvn(herewas,s,bufend-s);
-       (void)strcpy(bufptr,SvPVX(herewas));
+       Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
 
        s = olds;
        goto retval;
@@ -6700,10 +6854,11 @@ S_scan_heredoc(pTHX_ register char *s)
            av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
        }
        if (*s == term && memEQ(s,PL_tokenbuf,len)) {
-           s = PL_bufend - 1;
-           *s = ' ';
+           STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
+           *(SvPVX(PL_linestr) + off ) = ' ';
            sv_catsv(PL_linestr,herewas);
            PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+           s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
        }
        else {
            s = PL_bufend;
@@ -6808,7 +6963,7 @@ S_scan_inputsymbol(pTHX_ char *start)
 
        /* turn <> into <ARGV> */
        if (!len)
-           (void)strcpy(d,"ARGV");
+           Copy("ARGV",d,5,char);
 
        /* Check whether readline() is overriden */
        if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
@@ -7792,12 +7947,7 @@ Perl_yyerror(pTHX_ char *s)
     }
     else if (yychar > 255)
        where = "next token ???";
-#ifdef USE_PURE_BISON
-/*  GNU Bison sets the value -2 */
-    else if (yychar == -2) {
-#else
-    else if ((yychar & 127) == 127) {
-#endif
+    else if (yychar == -2) { /* YYEMPTY */
        if (PL_lex_state == LEX_NORMAL ||
           (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
            where = "at end of line";
@@ -7854,72 +8004,92 @@ S_swallow_bom(pTHX_ U8 *s)
 {
     STRLEN slen;
     slen = SvCUR(PL_linestr);
-    switch (*s) {
+    switch (s[0]) {
     case 0xFF:
        if (s[1] == 0xFE) {
-           /* UTF-16 little-endian */
+           /* UTF-16 little-endian? (or UTF32-LE?) */
            if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
-               Perl_croak(aTHX_ "Unsupported script encoding");
+               Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
 #ifndef PERL_NO_UTF16_FILTER
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
            s += 2;
+       utf16le:
            if (PL_bufend > (char*)s) {
                U8 *news;
                I32 newlen;
 
                filter_add(utf16rev_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
-                                                PL_bufend - (char*)s - 1,
-                                                &newlen);
-               Copy(news, s, newlen, U8);
-               SvCUR_set(PL_linestr, newlen);
-               PL_bufend = SvPVX(PL_linestr) + newlen;
-               news[newlen++] = '\0';
+               utf16_to_utf8_reversed(s, news,
+                                      PL_bufend - (char*)s - 1,
+                                      &newlen);
+               sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
+               SvUTF8_on(PL_linestr);
+               s = (U8*)SvPVX(PL_linestr);
+               PL_bufend = SvPVX(PL_linestr) + newlen;
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
 #endif
        }
        break;
     case 0xFE:
-       if (s[1] == 0xFF) {   /* UTF-16 big-endian */
+       if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
 #ifndef PERL_NO_UTF16_FILTER
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
            s += 2;
+       utf16be:
            if (PL_bufend > (char *)s) {
                U8 *news;
                I32 newlen;
 
                filter_add(utf16_textfilter, NULL);
                New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
-               PL_bufend = (char*)utf16_to_utf8(s, news,
-                                                PL_bufend - (char*)s,
-                                                &newlen);
-               Copy(news, s, newlen, U8);
-               SvCUR_set(PL_linestr, newlen);
-               PL_bufend = SvPVX(PL_linestr) + newlen;
-               news[newlen++] = '\0';
+               utf16_to_utf8(s, news,
+                             PL_bufend - (char*)s,
+                             &newlen);
+               sv_setpvn(PL_linestr, (const char*)news, newlen);
                Safefree(news);
+               SvUTF8_on(PL_linestr);
+               s = (U8*)SvPVX(PL_linestr);
+               PL_bufend = SvPVX(PL_linestr) + newlen;
            }
 #else
-           Perl_croak(aTHX_ "Unsupported script encoding");
+           Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
 #endif
        }
        break;
     case 0xEF:
        if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
-           DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
+           if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
            s += 3;                      /* UTF-8 */
        }
        break;
     case 0:
-       if (slen > 3 && s[1] == 0 &&  /* UTF-32 big-endian */
-           s[2] == 0xFE && s[3] == 0xFF)
-       {
-           Perl_croak(aTHX_ "Unsupported script encoding");
+       if (slen > 3) {
+            if (s[1] == 0) {
+                 if (s[2] == 0xFE && s[3] == 0xFF) {
+                      /* UTF-32 big-endian */
+                      Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+                 }
+            }
+            else if (s[2] == 0 && s[3] != 0) {
+                 /* Leading bytes
+                  * 00 xx 00 xx
+                  * are a good indicator of UTF-16BE. */
+                 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+                 goto utf16be;
+            }
        }
+    default:
+        if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
+                 /* Leading bytes
+                  * xx 00 xx 00
+                  * are a good indicator of UTF-16LE. */
+             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+             goto utf16le;
+        }
     }
     return (char*)s;
 }
@@ -7945,38 +8115,42 @@ restore_rsfp(pTHX_ void *f)
 static I32
 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 {
+    STRLEN old = SvCUR(sv);
     I32 count = FILTER_READ(idx+1, sv, maxlen);
+    DEBUG_P(PerlIO_printf(Perl_debug_log,
+                         "utf16_textfilter(%p): %d %d (%d)\n",
+                         utf16_textfilter, idx, maxlen, count));
     if (count) {
        U8* tmps;
-       U8* tend;
        I32 newlen;
        New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
-       if (!*SvPV_nolen(sv))
-       /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
-       return count;
-
-       tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
-       sv_usepvn(sv, (char*)tmps, tend - tmps);
+       Copy(SvPVX(sv), tmps, old, char);
+       utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+                     SvCUR(sv) - old, &newlen);
+       sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
     }
-    return count;
+    DEBUG_P({sv_dump(sv);});
+    return SvCUR(sv);
 }
 
 static I32
 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 {
+    STRLEN old = SvCUR(sv);
     I32 count = FILTER_READ(idx+1, sv, maxlen);
+    DEBUG_P(PerlIO_printf(Perl_debug_log,
+                         "utf16rev_textfilter(%p): %d %d (%d)\n",
+                         utf16rev_textfilter, idx, maxlen, count));
     if (count) {
        U8* tmps;
-       U8* tend;
        I32 newlen;
-       if (!*SvPV_nolen(sv))
-       /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
-       return count;
-
        New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
-       tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
-       sv_usepvn(sv, (char*)tmps, tend - tmps);
+       Copy(SvPVX(sv), tmps, old, char);
+       utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+                     SvCUR(sv) - old, &newlen);
+       sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
     }
+    DEBUG_P({ sv_dump(sv); });
     return count;
 }
 #endif