/* toke.c
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, by Larry Wall and others
+ * 2000, 2001, 2002, 2003, 2004, by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
#define PERL_IN_TOKE_C
#include "perl.h"
-#define yychar PL_yychar
-#define yylval PL_yylval
+#define yychar (*PL_yycharp)
+#define yylval (*PL_yylvalp)
static char ident_too_long[] = "Identifier too long";
static char c_without_g[] = "Use of /c modifier is meaningless without /g";
#define LEX_FORMLINE 1
#define LEX_KNOWNEXT 0
-#ifdef ff_next
-#undef ff_next
+#ifdef DEBUGGING
+static char* lex_state_names[] = {
+ "KNOWNEXT",
+ "FORMLINE",
+ "INTERPCONST",
+ "INTERPCONCAT",
+ "INTERPENDMAYBE",
+ "INTERPEND",
+ "INTERPSTART",
+ "INTERPPUSH",
+ "INTERPCASEMOD",
+ "INTERPNORMAL",
+ "NORMAL"
+};
#endif
-#ifdef USE_PURE_BISON
-# ifndef YYMAXLEVEL
-# define YYMAXLEVEL 100
-# endif
-YYSTYPE* yylval_pointer[YYMAXLEVEL];
-int* yychar_pointer[YYMAXLEVEL];
-int yyactlevel = -1;
-# undef yylval
-# undef yychar
-# define yylval (*yylval_pointer[yyactlevel])
-# define yychar (*yychar_pointer[yyactlevel])
-# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
-# undef yylex
-# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
+#ifdef ff_next
+#undef ff_next
#endif
#include "keywords.h"
* Also see LOP and lop() below.
*/
-/* Note that REPORT() and REPORT2() will be expressions that supply
- * their own trailing comma, not suitable for statements as such. */
#ifdef DEBUGGING /* Serve -DT. */
-# define REPORT(x,retval) tokereport(x,s,(int)retval),
-# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
+# define REPORT(retval) tokereport(s,(int)retval)
#else
-# define REPORT(x,retval)
-# define REPORT2(x,retval)
+# define REPORT(retval) (retval)
#endif
-#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
-#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
-#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
-#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
-#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
-#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
-#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
-#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
-#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
-#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
-#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
-#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
-#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
-#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
-#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
-#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
-#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
-#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
-#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
-#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
+#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
+#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
+#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
+#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
+#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
+#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
+#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
+#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
+#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
+#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
+#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
+#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
+#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
+#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
+#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
/* This bit of chicanery makes a unary function followed by
* a parenthesis into a function with one argument, highest precedence.
* The UNIDOR macro is for unary functions that can be followed by the //
* operator (such as C<shift // 0>).
*/
-#define UNI2(f,x) return(yylval.ival = f, \
- REPORT("uni",f) \
+#define UNI2(f,x) return ( \
+ yylval.ival = f, \
PL_expect = x, \
PL_bufptr = s, \
PL_last_uni = PL_oldbufptr, \
PL_last_lop_op = f, \
- (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+ REPORT( \
+ (*s == '(' || (s = skipspace(s), *s == '(') \
+ ? (int)FUNC1 : (int)UNIOP)))
#define UNI(f) UNI2(f,XTERM)
#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
-#define UNIBRACK(f) return(yylval.ival = f, \
- REPORT("uni",f) \
+#define UNIBRACK(f) return ( \
+ yylval.ival = f, \
PL_bufptr = s, \
PL_last_uni = PL_oldbufptr, \
- (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+ REPORT( \
+ (*s == '(' || (s = skipspace(s), *s == '(') \
+ ? (int)FUNC1 : (int)UNIOP)))
/* grandfather return to old style */
#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
#ifdef DEBUGGING
-STATIC void
-S_tokereport(pTHX_ char *thing, char* s, I32 rv)
+/* how to interpret the yylval associated with the token */
+enum token_type {
+ TOKENTYPE_NONE,
+ TOKENTYPE_IVAL,
+ TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
+ TOKENTYPE_PVAL,
+ TOKENTYPE_OPVAL,
+ TOKENTYPE_GVVAL
+};
+
+static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
{
- DEBUG_T({
- SV* report = newSVpv(thing, 0);
- Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
- (IV)rv);
+ { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
+ { ANDAND, TOKENTYPE_NONE, "ANDAND" },
+ { ANDOP, TOKENTYPE_NONE, "ANDOP" },
+ { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
+ { ARROW, TOKENTYPE_NONE, "ARROW" },
+ { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
+ { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
+ { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
+ { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
+ { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
+ { DO, TOKENTYPE_NONE, "DO" },
+ { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
+ { DORDOR, TOKENTYPE_NONE, "DORDOR" },
+ { DOROP, TOKENTYPE_OPNUM, "DOROP" },
+ { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
+ { ELSE, TOKENTYPE_NONE, "ELSE" },
+ { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
+ { EQOP, TOKENTYPE_OPNUM, "EQOP" },
+ { FOR, TOKENTYPE_IVAL, "FOR" },
+ { FORMAT, TOKENTYPE_NONE, "FORMAT" },
+ { FUNC, TOKENTYPE_OPNUM, "FUNC" },
+ { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
+ { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
+ { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
+ { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
+ { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
+ { IF, TOKENTYPE_IVAL, "IF" },
+ { LABEL, TOKENTYPE_PVAL, "LABEL" },
+ { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
+ { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
+ { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
+ { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
+ { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
+ { METHOD, TOKENTYPE_OPVAL, "METHOD" },
+ { MULOP, TOKENTYPE_OPNUM, "MULOP" },
+ { MY, TOKENTYPE_IVAL, "MY" },
+ { MYSUB, TOKENTYPE_NONE, "MYSUB" },
+ { NOAMP, TOKENTYPE_NONE, "NOAMP" },
+ { NOTOP, TOKENTYPE_NONE, "NOTOP" },
+ { OROP, TOKENTYPE_IVAL, "OROP" },
+ { OROR, TOKENTYPE_NONE, "OROR" },
+ { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
+ { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
+ { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
+ { POSTINC, TOKENTYPE_NONE, "POSTINC" },
+ { POWOP, TOKENTYPE_OPNUM, "POWOP" },
+ { PREDEC, TOKENTYPE_NONE, "PREDEC" },
+ { PREINC, TOKENTYPE_NONE, "PREINC" },
+ { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
+ { REFGEN, TOKENTYPE_NONE, "REFGEN" },
+ { RELOP, TOKENTYPE_OPNUM, "RELOP" },
+ { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
+ { SUB, TOKENTYPE_NONE, "SUB" },
+ { THING, TOKENTYPE_OPVAL, "THING" },
+ { UMINUS, TOKENTYPE_NONE, "UMINUS" },
+ { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
+ { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
+ { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
+ { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
+ { USE, TOKENTYPE_IVAL, "USE" },
+ { WHILE, TOKENTYPE_IVAL, "WHILE" },
+ { WORD, TOKENTYPE_OPVAL, "WORD" },
+ { 0, TOKENTYPE_NONE, 0 }
+};
+
+/* dump the returned token in rv, plus any optional arg in yylval */
+STATIC int
+S_tokereport(pTHX_ char* s, I32 rv)
+{
+ if (DEBUG_T_TEST) {
+ char *name = Nullch;
+ enum token_type type = TOKENTYPE_NONE;
+ struct debug_tokens *p;
+ SV* report = NEWSV(0, 60);
+
+ Perl_sv_catpvf(aTHX_ report, "<== ");
+
+ for (p = debug_tokens; p->token; p++) {
+ if (p->token == (int)rv) {
+ name = p->name;
+ type = p->type;
+ break;
+ }
+ }
+ if (name)
+ Perl_sv_catpvf(aTHX_ report, "%s", name);
+ else if ((char)rv > ' ' && (char)rv < '~')
+ Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+ else if (!rv)
+ Perl_sv_catpvf(aTHX_ report, "EOF");
+ else
+ Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
+ switch (type) {
+ case TOKENTYPE_NONE:
+ case TOKENTYPE_GVVAL: /* doesn't appear to be used */
+ break;
+ case TOKENTYPE_IVAL:
+ Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
+ break;
+ case TOKENTYPE_OPNUM:
+ Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
+ PL_op_name[yylval.ival]);
+ break;
+ case TOKENTYPE_PVAL:
+ Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
+ break;
+ case TOKENTYPE_OPVAL:
+ if (yylval.opval)
+ Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
+ PL_op_name[yylval.opval->op_type]);
+ else
+ Perl_sv_catpv(aTHX_ report, "(opval=null)");
+ break;
+ }
+ Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
if (s - PL_bufptr > 0)
sv_catpvn(report, PL_bufptr, s - PL_bufptr);
else {
if (PL_oldbufptr && *PL_oldbufptr)
sv_catpv(report, PL_tokenbuf);
}
- PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
- });
+ PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
+ };
+ return (int)rv;
}
#endif
{
yylval.ival = f;
CLINE;
- REPORT("lop", f)
PL_expect = x;
PL_bufptr = s;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = (OPCODE)f;
if (PL_nexttoke)
- return LSTOP;
+ return REPORT(LSTOP);
if (*s == '(')
- return FUNC;
+ return REPORT(FUNC);
s = skipspace(s);
if (*s == '(')
- return FUNC;
+ return REPORT(FUNC);
else
- return LSTOP;
+ return REPORT(LSTOP);
}
/*
const char *leaveit = /* set of acceptably-backslashed characters */
PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+ ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
: "";
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
}
-/* Invoke the n'th filter function for the current rsfp. */
+/* Invoke the idxth filter function for the current rsfp. */
+/* maxlen 0 = read one text line */
I32
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
-
-
- /* 0 = read one text line */
{
filter_t funcp;
SV *datasv = NULL;
if (!PL_rsfp_filters)
return -1;
- if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
+ if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
/* Provide a default input filter to make life easy. */
/* Note that we append to the line. This is handy. */
DEBUG_P(PerlIO_printf(Perl_debug_log,
return SvCUR(buf_sv);
}
/* Skip this filter slot if filter has been deleted */
- if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
+ if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
DEBUG_P(PerlIO_printf(Perl_debug_log,
"filter_read %d: skipped (filter deleted)\n",
idx));
}
#endif
if (PL_rsfp_filters) {
-
if (!append)
SvCUR_set(sv, 0); /* start with empty line */
if (FILTER_READ(0, sv, 0) > 0)
if we already built the token before, use it.
*/
-#ifdef USE_PURE_BISON
-int
-Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
-{
- int r;
-
- yyactlevel++;
- yylval_pointer[yyactlevel] = lvalp;
- yychar_pointer[yyactlevel] = lcharp;
- if (yyactlevel >= YYMAXLEVEL)
- Perl_croak(aTHX_ "panic: YYMAXLEVEL");
-
- r = Perl_yylex(aTHX);
-
- if (yyactlevel > 0)
- yyactlevel--;
-
- return r;
-}
-#endif
#ifdef __SC__
#pragma segment Perl_yylex
int
Perl_yylex(pTHX)
{
- register char *s;
+ register char *s = PL_bufptr;
register char *d;
register I32 tmp;
STRLEN len;
bool bof = FALSE;
I32 orig_keyword = 0;
+ DEBUG_T( {
+ PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
+ lex_state_names[PL_lex_state]);
+ } );
/* check if there's an identifier for us to look at */
if (PL_pending_ident)
- return S_pending_ident(aTHX);
+ return REPORT(S_pending_ident(aTHX));
/* no identifier pending identification */
"### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
(IV)PL_nexttype[PL_nexttoke]); });
- return(PL_nexttype[PL_nexttoke]);
+ return REPORT(PL_nexttype[PL_nexttoke]);
/* interpolated case modifiers like \L \U, including \Q and \E.
when we get here, PL_bufptr is at the \
PL_bufptr += 2;
PL_lex_state = LEX_INTERPCONCAT;
}
- return ')';
+ return REPORT(')');
}
if (PL_bufptr != PL_bufend)
PL_bufptr += 2;
if (strchr("LU", *s) &&
(strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
PL_lex_casestack[--PL_lex_casemods] = '\0';
- return ')';
+ return REPORT(')');
}
if (PL_lex_casemods > 10)
Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
if (PL_lex_starts) {
s = PL_bufptr;
PL_lex_starts = 0;
- Aop(OP_CONCAT);
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (PL_lex_casemods == 1 && PL_lex_inpat)
+ OPERATOR(',');
+ else
+ Aop(OP_CONCAT);
}
else
return yylex();
}
case LEX_INTERPPUSH:
- return sublex_push();
+ return REPORT(sublex_push());
case LEX_INTERPSTART:
if (PL_bufptr == PL_bufend)
- return sublex_done();
+ return REPORT(sublex_done());
DEBUG_T({ PerlIO_printf(Perl_debug_log,
"### Interpolated variable at '%s'\n", PL_bufptr); });
PL_expect = XTERM;
}
if (PL_lex_starts++) {
s = PL_bufptr;
- Aop(OP_CONCAT);
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (!PL_lex_casemods && PL_lex_inpat)
+ OPERATOR(',');
+ else
+ Aop(OP_CONCAT);
}
return yylex();
if (PL_lex_dojoin) {
PL_lex_dojoin = FALSE;
PL_lex_state = LEX_INTERPCONCAT;
- return ')';
+ return REPORT(')');
}
if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
&& SvEVALED(PL_lex_repl))
Perl_croak(aTHX_ "panic: INTERPCONCAT");
#endif
if (PL_bufptr == PL_bufend)
- return sublex_done();
+ return REPORT(sublex_done());
if (SvIVX(PL_linestr) == '\'') {
SV *sv = newSVsv(PL_linestr);
PL_nextval[PL_nexttoke] = yylval;
PL_expect = XTERM;
force_next(THING);
- if (PL_lex_starts++)
- Aop(OP_CONCAT);
+ if (PL_lex_starts++) {
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (!PL_lex_casemods && PL_lex_inpat)
+ OPERATOR(',');
+ else
+ Aop(OP_CONCAT);
+ }
else {
PL_bufptr = s;
return yylex();
PL_oldoldbufptr = PL_oldbufptr;
PL_oldbufptr = s;
DEBUG_T( {
- PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
+ PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
exp_name[PL_expect], s);
} );
sv_setpv(PL_linestr,"");
TOKEN(';'); /* not infinite loop because rsfp is NULL now */
}
- /* if it looks like the start of a BOM, check if it in fact is */
- else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
+ /* If it looks like the start of a BOM or raw UTF-16,
+ * check if it in fact is. */
+ else if (bof &&
+ (*s == 0 ||
+ *(U8*)s == 0xEF ||
+ *(U8*)s >= 0xFE ||
+ s[1] == 0)) {
#ifdef PERLIO_IS_STDIO
# ifdef __GNU_LIBRARY__
# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
yyerror("Unterminated attribute parameter in attribute list");
if (attrs)
op_free(attrs);
- return 0; /* EOF indicator */
+ return REPORT(0); /* EOF indicator */
}
}
if (PL_lex_stuff) {
PL_lex_stuff = Nullsv;
}
else {
+ if (len == 6 && strnEQ(s, "unique", len)) {
+ if (PL_in_my == KEY_our)
+#ifdef USE_ITHREADS
+ GvUNIQUE_on(cGVOPx_gv(yylval.opval));
+#else
+ ; /* skip to avoid loading attributes.pm */
+#endif
+ else
+ Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
+ }
+
/* NOTE: any CV attrs applied here need to be part of
the CVf_BUILTIN_ATTRS define in cv.h! */
- if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
+ else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
CvLVALUE_on(PL_compcv);
else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
CvLOCKED_on(PL_compcv);
CvMETHOD_on(PL_compcv);
else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
CvASSERTION_on(PL_compcv);
- else if (PL_in_my == KEY_our && len == 6 &&
- strnEQ(s, "unique", len))
-#ifdef USE_ITHREADS
- GvUNIQUE_on(cGVOPx_gv(yylval.opval));
-#else
- ; /* skip that case to avoid loading attributes.pm */
-#endif
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
process, and shouldn't bother appending recognized
case '!':
s++;
tmp = *s++;
- if (tmp == '=')
+ if (tmp == '=') {
+ /* was this !=~ where !~ was meant?
+ * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
+
+ if (*s == '~' && ckWARN(WARN_SYNTAX)) {
+ char *t = s+1;
+
+ while (t < PL_bufend && isSPACE(*t))
+ ++t;
+
+ if (*t == '/' || *t == '?' ||
+ ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
+ (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
+ Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
+ "!=~ should be !~");
+ }
Eop(OP_NE);
+ }
if (tmp == '~')
PMop(OP_NOT);
s--;
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
depcom();
- return ','; /* grandfather non-comma-format format */
+ return REPORT(','); /* grandfather non-comma-format format */
}
}
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
depcom();
- return ','; /* grandfather non-comma-format format */
+ return REPORT(','); /* grandfather non-comma-format format */
}
else
no_op("String",s);
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
depcom();
- return ','; /* grandfather non-comma-format format */
+ return REPORT(','); /* grandfather non-comma-format format */
}
else
no_op("String",s);
{
tmp = 0; /* any sub overrides "weak" keyword */
}
+ else if (gv && !gvp
+ && tmp == -KEY_err
+ && GvCVu(gv)
+ && PL_expect != XOPERATOR
+ && PL_expect != XTERMORDORDOR)
+ {
+ /* any sub overrides the "err" keyword, except when really an
+ * operator is expected */
+ tmp = 0;
+ }
else { /* no override */
tmp = -tmp;
if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
/* Two barewords in a row may indicate method call. */
if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
- return tmp;
+ return REPORT(tmp);
/* If not a declared subroutine, it's an indirect object. */
/* (But it's an indir obj regardless for sort.) */
if (!orig_keyword
&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
&& (tmp = intuit_method(s,gv)))
- return tmp;
+ return REPORT(tmp);
/* Not a method, so call it a subroutine (if defined) */
case KEY___PACKAGE__:
yylval.opval = (OP*)newSVOP(OP_CONST, 0,
(PL_curstash
- ? newSVsv(PL_curstname)
+ ? newSVpv(HvNAME(PL_curstash), 0)
: &PL_sv_undef));
TERM(THING);
if (isIDFIRST_lazy_if(s,UTF)) {
char *t;
for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
- t = skipspace(d);
- if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
+ for (t=d; *t && isSPACE(*t); t++) ;
+ if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
/* [perl #16184] */
&& !(t[0] == '=' && t[1] == '>')
) {
if (*s == ':' && s[1] != ':')
PL_expect = attrful;
- else if (!have_name && *s != '{' && key == KEY_sub)
- Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+ else if (*s != '{' && key == KEY_sub) {
+ if (!have_name)
+ Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+ else if (*s != ';')
+ Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
+ }
if (have_proto) {
PL_nextval[PL_nexttoke].opval =
New(803, tbl, complement&&!del?258:256, short);
o = newPVOP(OP_TRANS, 0, (char*)tbl);
- o->op_private = del|squash|complement|
+ o->op_private &= ~OPpTRANS_ALL;
+ o->op_private |= del|squash|complement|
(DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
(DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
sv_setpvn(tmpstr,d+1,s-d);
s += len - 1;
sv_catpvn(herewas,s,bufend-s);
- (void)strcpy(bufptr,SvPVX(herewas));
+ Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
s = olds;
goto retval;
av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
}
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
- s = PL_bufend - 1;
- *s = ' ';
+ STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
+ *(SvPVX(PL_linestr) + off ) = ' ';
sv_catsv(PL_linestr,herewas);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
}
else {
s = PL_bufend;
/* turn <> into <ARGV> */
if (!len)
- (void)strcpy(d,"ARGV");
+ Copy("ARGV",d,5,char);
/* Check whether readline() is overriden */
if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
}
else if (yychar > 255)
where = "next token ???";
-#ifdef USE_PURE_BISON
-/* GNU Bison sets the value -2 */
- else if (yychar == -2) {
-#else
- else if ((yychar & 127) == 127) {
-#endif
+ else if (yychar == -2) { /* YYEMPTY */
if (PL_lex_state == LEX_NORMAL ||
(PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
where = "at end of line";
{
STRLEN slen;
slen = SvCUR(PL_linestr);
- switch (*s) {
+ switch (s[0]) {
case 0xFF:
if (s[1] == 0xFE) {
- /* UTF-16 little-endian */
+ /* UTF-16 little-endian? (or UTF32-LE?) */
if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
#ifndef PERL_NO_UTF16_FILTER
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
s += 2;
+ utf16le:
if (PL_bufend > (char*)s) {
U8 *news;
I32 newlen;
filter_add(utf16rev_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
- &newlen);
- Copy(news, s, newlen, U8);
- SvCUR_set(PL_linestr, newlen);
- PL_bufend = SvPVX(PL_linestr) + newlen;
- news[newlen++] = '\0';
+ utf16_to_utf8_reversed(s, news,
+ PL_bufend - (char*)s - 1,
+ &newlen);
+ sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
+ SvUTF8_on(PL_linestr);
+ s = (U8*)SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
#endif
}
break;
case 0xFE:
- if (s[1] == 0xFF) { /* UTF-16 big-endian */
+ if (s[1] == 0xFF) { /* UTF-16 big-endian? */
#ifndef PERL_NO_UTF16_FILTER
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
s += 2;
+ utf16be:
if (PL_bufend > (char *)s) {
U8 *news;
I32 newlen;
filter_add(utf16_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend = (char*)utf16_to_utf8(s, news,
- PL_bufend - (char*)s,
- &newlen);
- Copy(news, s, newlen, U8);
- SvCUR_set(PL_linestr, newlen);
- PL_bufend = SvPVX(PL_linestr) + newlen;
- news[newlen++] = '\0';
+ utf16_to_utf8(s, news,
+ PL_bufend - (char*)s,
+ &newlen);
+ sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
+ SvUTF8_on(PL_linestr);
+ s = (U8*)SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
#endif
}
break;
case 0xEF:
if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
s += 3; /* UTF-8 */
}
break;
case 0:
- if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
- s[2] == 0xFE && s[3] == 0xFF)
- {
- Perl_croak(aTHX_ "Unsupported script encoding");
+ if (slen > 3) {
+ if (s[1] == 0) {
+ if (s[2] == 0xFE && s[3] == 0xFF) {
+ /* UTF-32 big-endian */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+ }
+ }
+ else if (s[2] == 0 && s[3] != 0) {
+ /* Leading bytes
+ * 00 xx 00 xx
+ * are a good indicator of UTF-16BE. */
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+ goto utf16be;
+ }
}
+ default:
+ if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
+ /* Leading bytes
+ * xx 00 xx 00
+ * are a good indicator of UTF-16LE. */
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+ goto utf16le;
+ }
}
return (char*)s;
}
static I32
utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
+ STRLEN old = SvCUR(sv);
I32 count = FILTER_READ(idx+1, sv, maxlen);
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16_textfilter(%p): %d %d (%d)\n",
+ utf16_textfilter, idx, maxlen, count));
if (count) {
U8* tmps;
- U8* tend;
I32 newlen;
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- if (!*SvPV_nolen(sv))
- /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
- return count;
-
- tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
- sv_usepvn(sv, (char*)tmps, tend - tmps);
+ Copy(SvPVX(sv), tmps, old, char);
+ utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+ SvCUR(sv) - old, &newlen);
+ sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
}
- return count;
+ DEBUG_P({sv_dump(sv);});
+ return SvCUR(sv);
}
static I32
utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
+ STRLEN old = SvCUR(sv);
I32 count = FILTER_READ(idx+1, sv, maxlen);
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16rev_textfilter(%p): %d %d (%d)\n",
+ utf16rev_textfilter, idx, maxlen, count));
if (count) {
U8* tmps;
- U8* tend;
I32 newlen;
- if (!*SvPV_nolen(sv))
- /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
- return count;
-
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
- sv_usepvn(sv, (char*)tmps, tend - tmps);
+ Copy(SvPVX(sv), tmps, old, char);
+ utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+ SvCUR(sv) - old, &newlen);
+ sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
}
+ DEBUG_P({ sv_dump(sv); });
return count;
}
#endif