/* toke.c
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- * 2000, 2001, 2002, 2003, 2004, by Larry Wall and others
+ * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
#define LEX_FORMLINE 1
#define LEX_KNOWNEXT 0
+#ifdef DEBUGGING
+static char* lex_state_names[] = {
+ "KNOWNEXT",
+ "FORMLINE",
+ "INTERPCONST",
+ "INTERPCONCAT",
+ "INTERPENDMAYBE",
+ "INTERPEND",
+ "INTERPSTART",
+ "INTERPPUSH",
+ "INTERPCASEMOD",
+ "INTERPNORMAL",
+ "NORMAL"
+};
+#endif
+
#ifdef ff_next
#undef ff_next
#endif
* Also see LOP and lop() below.
*/
-/* Note that REPORT() and REPORT2() will be expressions that supply
- * their own trailing comma, not suitable for statements as such. */
#ifdef DEBUGGING /* Serve -DT. */
-# define REPORT(x,retval) tokereport(x,s,(int)retval),
-# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
+# define REPORT(retval) tokereport(s,(int)retval)
#else
-# define REPORT(x,retval)
-# define REPORT2(x,retval)
+# define REPORT(retval) (retval)
#endif
-#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
-#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
-#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
-#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
-#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
-#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
-#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
-#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
-#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
-#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
-#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
-#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
-#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
-#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
-#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
-#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
-#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
-#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
-#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
-#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
+#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
+#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
+#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
+#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
+#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
+#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
+#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
+#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
+#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
+#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
+#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
+#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
+#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
+#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
+#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
/* This bit of chicanery makes a unary function followed by
* a parenthesis into a function with one argument, highest precedence.
* The UNIDOR macro is for unary functions that can be followed by the //
* operator (such as C<shift // 0>).
*/
-#define UNI2(f,x) return(yylval.ival = f, \
- REPORT("uni",f) \
+#define UNI2(f,x) return ( \
+ yylval.ival = f, \
PL_expect = x, \
PL_bufptr = s, \
PL_last_uni = PL_oldbufptr, \
PL_last_lop_op = f, \
- (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+ REPORT( \
+ (*s == '(' || (s = skipspace(s), *s == '(') \
+ ? (int)FUNC1 : (int)UNIOP)))
#define UNI(f) UNI2(f,XTERM)
#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
-#define UNIBRACK(f) return(yylval.ival = f, \
- REPORT("uni",f) \
+#define UNIBRACK(f) return ( \
+ yylval.ival = f, \
PL_bufptr = s, \
PL_last_uni = PL_oldbufptr, \
- (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
+ REPORT( \
+ (*s == '(' || (s = skipspace(s), *s == '(') \
+ ? (int)FUNC1 : (int)UNIOP)))
/* grandfather return to old style */
#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
#ifdef DEBUGGING
-STATIC void
-S_tokereport(pTHX_ char *thing, char* s, I32 rv)
+/* how to interpret the yylval associated with the token */
+enum token_type {
+ TOKENTYPE_NONE,
+ TOKENTYPE_IVAL,
+ TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
+ TOKENTYPE_PVAL,
+ TOKENTYPE_OPVAL,
+ TOKENTYPE_GVVAL
+};
+
+static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
{
- DEBUG_T({
- SV* report = newSVpv(thing, 0);
- Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
- (IV)rv);
+ { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
+ { ANDAND, TOKENTYPE_NONE, "ANDAND" },
+ { ANDOP, TOKENTYPE_NONE, "ANDOP" },
+ { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
+ { ARROW, TOKENTYPE_NONE, "ARROW" },
+ { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
+ { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
+ { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
+ { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
+ { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
+ { DO, TOKENTYPE_NONE, "DO" },
+ { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
+ { DORDOR, TOKENTYPE_NONE, "DORDOR" },
+ { DOROP, TOKENTYPE_OPNUM, "DOROP" },
+ { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
+ { ELSE, TOKENTYPE_NONE, "ELSE" },
+ { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
+ { EQOP, TOKENTYPE_OPNUM, "EQOP" },
+ { FOR, TOKENTYPE_IVAL, "FOR" },
+ { FORMAT, TOKENTYPE_NONE, "FORMAT" },
+ { FUNC, TOKENTYPE_OPNUM, "FUNC" },
+ { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
+ { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
+ { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
+ { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
+ { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
+ { IF, TOKENTYPE_IVAL, "IF" },
+ { LABEL, TOKENTYPE_PVAL, "LABEL" },
+ { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
+ { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
+ { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
+ { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
+ { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
+ { METHOD, TOKENTYPE_OPVAL, "METHOD" },
+ { MULOP, TOKENTYPE_OPNUM, "MULOP" },
+ { MY, TOKENTYPE_IVAL, "MY" },
+ { MYSUB, TOKENTYPE_NONE, "MYSUB" },
+ { NOAMP, TOKENTYPE_NONE, "NOAMP" },
+ { NOTOP, TOKENTYPE_NONE, "NOTOP" },
+ { OROP, TOKENTYPE_IVAL, "OROP" },
+ { OROR, TOKENTYPE_NONE, "OROR" },
+ { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
+ { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
+ { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
+ { POSTINC, TOKENTYPE_NONE, "POSTINC" },
+ { POWOP, TOKENTYPE_OPNUM, "POWOP" },
+ { PREDEC, TOKENTYPE_NONE, "PREDEC" },
+ { PREINC, TOKENTYPE_NONE, "PREINC" },
+ { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
+ { REFGEN, TOKENTYPE_NONE, "REFGEN" },
+ { RELOP, TOKENTYPE_OPNUM, "RELOP" },
+ { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
+ { SUB, TOKENTYPE_NONE, "SUB" },
+ { THING, TOKENTYPE_OPVAL, "THING" },
+ { UMINUS, TOKENTYPE_NONE, "UMINUS" },
+ { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
+ { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
+ { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
+ { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
+ { USE, TOKENTYPE_IVAL, "USE" },
+ { WHILE, TOKENTYPE_IVAL, "WHILE" },
+ { WORD, TOKENTYPE_OPVAL, "WORD" },
+ { 0, TOKENTYPE_NONE, 0 }
+};
+
+/* dump the returned token in rv, plus any optional arg in yylval */
+STATIC int
+S_tokereport(pTHX_ char* s, I32 rv)
+{
+ if (DEBUG_T_TEST) {
+ char *name = Nullch;
+ enum token_type type = TOKENTYPE_NONE;
+ struct debug_tokens *p;
+ SV* report = NEWSV(0, 60);
+
+ Perl_sv_catpvf(aTHX_ report, "<== ");
+
+ for (p = debug_tokens; p->token; p++) {
+ if (p->token == (int)rv) {
+ name = p->name;
+ type = p->type;
+ break;
+ }
+ }
+ if (name)
+ Perl_sv_catpvf(aTHX_ report, "%s", name);
+ else if ((char)rv > ' ' && (char)rv < '~')
+ Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
+ else if (!rv)
+ Perl_sv_catpvf(aTHX_ report, "EOF");
+ else
+ Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
+ switch (type) {
+ case TOKENTYPE_NONE:
+ case TOKENTYPE_GVVAL: /* doesn't appear to be used */
+ break;
+ case TOKENTYPE_IVAL:
+ Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
+ break;
+ case TOKENTYPE_OPNUM:
+ Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
+ PL_op_name[yylval.ival]);
+ break;
+ case TOKENTYPE_PVAL:
+ Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
+ break;
+ case TOKENTYPE_OPVAL:
+ if (yylval.opval)
+ Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
+ PL_op_name[yylval.opval->op_type]);
+ else
+ Perl_sv_catpv(aTHX_ report, "(opval=null)");
+ break;
+ }
+ Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
if (s - PL_bufptr > 0)
sv_catpvn(report, PL_bufptr, s - PL_bufptr);
else {
if (PL_oldbufptr && *PL_oldbufptr)
sv_catpv(report, PL_tokenbuf);
}
- PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
- });
+ PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
+ };
+ return (int)rv;
}
#endif
PL_minus_n = PL_minus_p = 0;
}
else
- sv_setpv(PL_linestr,";");
+ sv_setpvn(PL_linestr,";", 1);
/* reset variables for next time we lex */
PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
{
yylval.ival = f;
CLINE;
- REPORT("lop", f)
PL_expect = x;
PL_bufptr = s;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = (OPCODE)f;
if (PL_nexttoke)
- return LSTOP;
+ return REPORT(LSTOP);
if (*s == '(')
- return FUNC;
+ return REPORT(FUNC);
s = skipspace(s);
if (*s == '(')
- return FUNC;
+ return REPORT(FUNC);
else
- return LSTOP;
+ return REPORT(LSTOP);
}
/*
const char *leaveit = /* set of acceptably-backslashed characters */
PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
+ ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
: "";
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
weight -= 5; /* cope with negative subscript */
break;
default:
- if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
- isALPHA(*s) && s[1] && isALPHA(s[1])) {
+ if (!isALNUM(last_un_char)
+ && !(last_un_char == '$' || last_un_char == '@'
+ || last_un_char == '&')
+ && isALPHA(*s) && s[1] && isALPHA(s[1])) {
char *d = tmpbuf;
while (isALPHA(*s))
*d++ = *s++;
}
-/* Invoke the n'th filter function for the current rsfp. */
+/* Invoke the idxth filter function for the current rsfp. */
+/* maxlen 0 = read one text line */
I32
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
-
-
- /* 0 = read one text line */
{
filter_t funcp;
SV *datasv = NULL;
if (!PL_rsfp_filters)
return -1;
- if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
+ if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
/* Provide a default input filter to make life easy. */
/* Note that we append to the line. This is handy. */
DEBUG_P(PerlIO_printf(Perl_debug_log,
return SvCUR(buf_sv);
}
/* Skip this filter slot if filter has been deleted */
- if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
+ if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
DEBUG_P(PerlIO_printf(Perl_debug_log,
"filter_read %d: skipped (filter deleted)\n",
idx));
}
#endif
if (PL_rsfp_filters) {
-
if (!append)
SvCUR_set(sv, 0); /* start with empty line */
if (FILTER_READ(0, sv, 0) > 0)
int
Perl_yylex(pTHX)
{
- register char *s;
+ register char *s = PL_bufptr;
register char *d;
register I32 tmp;
STRLEN len;
bool bof = FALSE;
I32 orig_keyword = 0;
+ DEBUG_T( {
+ PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
+ lex_state_names[PL_lex_state]);
+ } );
/* check if there's an identifier for us to look at */
if (PL_pending_ident)
- return S_pending_ident(aTHX);
+ return REPORT(S_pending_ident(aTHX));
/* no identifier pending identification */
"### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
(IV)PL_nexttype[PL_nexttoke]); });
- return(PL_nexttype[PL_nexttoke]);
+ return REPORT(PL_nexttype[PL_nexttoke]);
/* interpolated case modifiers like \L \U, including \Q and \E.
when we get here, PL_bufptr is at the \
oldmod = PL_lex_casestack[--PL_lex_casemods];
PL_lex_casestack[PL_lex_casemods] = '\0';
- if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
+ if (PL_bufptr != PL_bufend
+ && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
PL_bufptr += 2;
PL_lex_state = LEX_INTERPCONCAT;
}
- return ')';
+ return REPORT(')');
}
if (PL_bufptr != PL_bufend)
PL_bufptr += 2;
else {
if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
- if (strchr("LU", *s) &&
+ if ((*s == 'L' || *s == 'U') &&
(strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
PL_lex_casestack[--PL_lex_casemods] = '\0';
- return ')';
+ return REPORT(')');
}
if (PL_lex_casemods > 10)
Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
if (PL_lex_starts) {
s = PL_bufptr;
PL_lex_starts = 0;
- Aop(OP_CONCAT);
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (PL_lex_casemods == 1 && PL_lex_inpat)
+ OPERATOR(',');
+ else
+ Aop(OP_CONCAT);
}
else
return yylex();
}
case LEX_INTERPPUSH:
- return sublex_push();
+ return REPORT(sublex_push());
case LEX_INTERPSTART:
if (PL_bufptr == PL_bufend)
- return sublex_done();
+ return REPORT(sublex_done());
DEBUG_T({ PerlIO_printf(Perl_debug_log,
"### Interpolated variable at '%s'\n", PL_bufptr); });
PL_expect = XTERM;
}
if (PL_lex_starts++) {
s = PL_bufptr;
- Aop(OP_CONCAT);
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (!PL_lex_casemods && PL_lex_inpat)
+ OPERATOR(',');
+ else
+ Aop(OP_CONCAT);
}
return yylex();
if (PL_lex_dojoin) {
PL_lex_dojoin = FALSE;
PL_lex_state = LEX_INTERPCONCAT;
- return ')';
+ return REPORT(')');
}
if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
&& SvEVALED(PL_lex_repl))
Perl_croak(aTHX_ "panic: INTERPCONCAT");
#endif
if (PL_bufptr == PL_bufend)
- return sublex_done();
+ return REPORT(sublex_done());
if (SvIVX(PL_linestr) == '\'') {
SV *sv = newSVsv(PL_linestr);
PL_nextval[PL_nexttoke] = yylval;
PL_expect = XTERM;
force_next(THING);
- if (PL_lex_starts++)
- Aop(OP_CONCAT);
+ if (PL_lex_starts++) {
+ /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
+ if (!PL_lex_casemods && PL_lex_inpat)
+ OPERATOR(',');
+ else
+ Aop(OP_CONCAT);
+ }
else {
PL_bufptr = s;
return yylex();
PL_oldoldbufptr = PL_oldbufptr;
PL_oldbufptr = s;
DEBUG_T( {
- PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
+ PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
exp_name[PL_expect], s);
} );
PL_preambled = TRUE;
sv_setpv(PL_linestr,incl_perldb());
if (SvCUR(PL_linestr))
- sv_catpv(PL_linestr,";");
+ sv_catpvn(PL_linestr,";", 1);
if (PL_preambleav){
while(AvFILLp(PL_preambleav) >= 0) {
SV *tmpsv = av_shift(PL_preambleav);
sv_catsv(PL_linestr, tmpsv);
- sv_catpv(PL_linestr, ";");
+ sv_catpvn(PL_linestr, ";", 1);
sv_free(tmpsv);
}
sv_free((SV*)PL_preambleav);
sv_catpv(PL_linestr,"chomp;");
if (PL_minus_a) {
if (PL_minus_F) {
- if (strchr("/'\"", *PL_splitstr)
+ if ((*PL_splitstr == '/' || *PL_splitstr == '\''
+ || *PL_splitstr == '"')
&& strchr(PL_splitstr + 1, *PL_splitstr))
Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
else {
- char delim;
- s = "'~#\200\1'"; /* surely one char is unused...*/
- while (s[1] && strchr(PL_splitstr, *s)) s++;
- delim = *s;
- Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
- "q" + (delim == '\''), delim);
- for (s = PL_splitstr; *s; s++) {
- if (*s == '\\')
- sv_catpvn(PL_linestr, "\\", 1);
- sv_catpvn(PL_linestr, s, 1);
- }
- Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
+ /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
+ bytes can be used as quoting characters. :-) */
+ Perl_sv_catpvf(aTHX_ PL_linestr,
+ "our @F=split(q%c%s%c);",
+ 0, PL_splitstr, 0);
}
}
else
sv_catpv(PL_linestr,"our @F=split(' ');");
}
}
- sv_catpv(PL_linestr, "\n");
+ sv_catpvn(PL_linestr, "\n", 1);
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = Nullch;
PL_doextract = FALSE;
}
if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
- sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
- sv_catpv(PL_linestr,";}");
+ sv_setpv(PL_linestr,PL_minus_p
+ ? ";}continue{print;}" : ";}");
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = Nullch;
sv_setpv(PL_linestr,"");
TOKEN(';'); /* not infinite loop because rsfp is NULL now */
}
- /* if it looks like the start of a BOM, check if it in fact is */
- else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
+ /* If it looks like the start of a BOM or raw UTF-16,
+ * check if it in fact is. */
+ else if (bof &&
+ (*s == 0 ||
+ *(U8*)s == 0xEF ||
+ *(U8*)s >= 0xFE ||
+ s[1] == 0)) {
#ifdef PERLIO_IS_STDIO
# ifdef __GNU_LIBRARY__
# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
yyerror("Unterminated attribute parameter in attribute list");
if (attrs)
op_free(attrs);
- return 0; /* EOF indicator */
+ return REPORT(0); /* EOF indicator */
}
}
if (PL_lex_stuff) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
depcom();
- return ','; /* grandfather non-comma-format format */
+ return REPORT(','); /* grandfather non-comma-format format */
}
}
PL_expect = XTERM; /* e.g. print $fh 3 */
else if (*s == '.' && isDIGIT(s[1]))
PL_expect = XTERM; /* e.g. print $fh .3 */
- else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
+ else if ((*s == '?' || *s == '-' || *s == '+')
+ && !isSPACE(s[1]) && s[1] != '=')
PL_expect = XTERM; /* e.g. print $fh -1 */
else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
PL_expect = XTERM; /* e.g. print $fh /.../
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
depcom();
- return ','; /* grandfather non-comma-format format */
+ return REPORT(','); /* grandfather non-comma-format format */
}
else
no_op("String",s);
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
PL_expect = XTERM;
depcom();
- return ','; /* grandfather non-comma-format format */
+ return REPORT(','); /* grandfather non-comma-format format */
}
else
no_op("String",s);
{
tmp = 0; /* any sub overrides "weak" keyword */
}
+ else if (gv && !gvp
+ && tmp == -KEY_err
+ && GvCVu(gv)
+ && PL_expect != XOPERATOR
+ && PL_expect != XTERMORDORDOR)
+ {
+ /* any sub overrides the "err" keyword, except when really an
+ * operator is expected */
+ tmp = 0;
+ }
else { /* no override */
tmp = -tmp;
if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
/* Two barewords in a row may indicate method call. */
if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
- return tmp;
+ return REPORT(tmp);
/* If not a declared subroutine, it's an indirect object. */
/* (But it's an indir obj regardless for sort.) */
if (!orig_keyword
&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
&& (tmp = intuit_method(s,gv)))
- return tmp;
+ return REPORT(tmp);
/* Not a method, so call it a subroutine (if defined) */
char *proto = SvPV((SV*)cv, len);
if (!len)
TERM(FUNC0SUB);
- if (strEQ(proto, "$"))
+ if (*proto == '$' && proto[1] == '\0')
OPERATOR(UNIOPSUB);
while (*proto == ';')
proto++;
}
safe_bareword:
- if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
+ if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+ && ckWARN_d(WARN_AMBIGUOUS)) {
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
"Operator or semicolon missing before %c%s",
lastchar, PL_tokenbuf);
case KEY___PACKAGE__:
yylval.opval = (OP*)newSVOP(OP_CONST, 0,
(PL_curstash
- ? newSVsv(PL_curstname)
+ ? newSVpv(HvNAME(PL_curstash), 0)
: &PL_sv_undef));
TERM(THING);
if (isIDFIRST_lazy_if(s,UTF)) {
char *t;
for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
- t = skipspace(d);
- if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
+ for (t=d; *t && isSPACE(*t); t++) ;
+ if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
/* [perl #16184] */
&& !(t[0] == '=' && t[1] == '>')
) {
if (*s == ':' && s[1] != ':')
PL_expect = attrful;
- else if (!have_name && *s != '{' && key == KEY_sub)
- Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+ else if (*s != '{' && key == KEY_sub) {
+ if (!have_name)
+ Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
+ else if (*s != ';')
+ Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
+ }
if (have_proto) {
PL_nextval[PL_nexttoke].opval =
else if (*d == 'l') {
if (strEQ(d,"login")) return -KEY_getlogin;
}
- else if (strEQ(d,"c")) return -KEY_getc;
+ else if (*d == 'c' && d[1] == '\0') return -KEY_getc;
break;
}
switch (len) {
}
break;
case 'q':
- if (len <= 2) {
- if (strEQ(d,"q")) return KEY_q;
- if (strEQ(d,"qr")) return KEY_qr;
- if (strEQ(d,"qq")) return KEY_qq;
- if (strEQ(d,"qw")) return KEY_qw;
- if (strEQ(d,"qx")) return KEY_qx;
+ if (len == 1) {
+ return KEY_q;
+ }
+ else if (len == 2) {
+ switch (d[1]) {
+ case 'r': return KEY_qr;
+ case 'q': return KEY_qq;
+ case 'w': return KEY_qw;
+ case 'x': return KEY_qx;
+ };
}
else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
break;
return s;
}
if (*s == '$' && s[1] &&
- (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
+ (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
{
return s;
}
}
complement = del = squash = 0;
- while (strchr("cds", *s)) {
- if (*s == 'c')
+ while (1) {
+ switch (*s) {
+ case 'c':
complement = OPpTRANS_COMPLEMENT;
- else if (*s == 'd')
+ break;
+ case 'd':
del = OPpTRANS_DELETE;
- else if (*s == 's')
+ break;
+ case 's':
squash = OPpTRANS_SQUASH;
+ break;
+ default:
+ goto no_more;
+ }
s++;
}
+ no_more:
New(803, tbl, complement&&!del?258:256, short);
o = newPVOP(OP_TRANS, 0, (char*)tbl);
if (!outer)
*d++ = '\n';
for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
- if (*peek && strchr("`'\"",*peek)) {
+ if (*peek == '`' || *peek == '\'' || *peek =='"') {
s = peek;
term = *s++;
s = delimcpy(d, e, s, PL_bufend, term, &len);
sv_setpvn(tmpstr,d+1,s-d);
s += len - 1;
sv_catpvn(herewas,s,bufend-s);
- (void)strcpy(bufptr,SvPVX(herewas));
+ Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
s = olds;
goto retval;
av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
}
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
- s = PL_bufend - 1;
- *s = ' ';
+ STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
+ *(SvPVX(PL_linestr) + off ) = ' ';
sv_catsv(PL_linestr,herewas);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
}
else {
s = PL_bufend;
/* turn <> into <ARGV> */
if (!len)
- (void)strcpy(d,"ARGV");
+ Copy("ARGV",d,5,char);
/* Check whether readline() is overriden */
if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
}
/* read exponent part, if present */
- if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
+ if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
floatit = TRUE;
s++;
{
STRLEN slen;
slen = SvCUR(PL_linestr);
- switch (*s) {
+ switch (s[0]) {
case 0xFF:
if (s[1] == 0xFE) {
- /* UTF-16 little-endian */
+ /* UTF-16 little-endian? (or UTF32-LE?) */
if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
#ifndef PERL_NO_UTF16_FILTER
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
s += 2;
+ utf16le:
if (PL_bufend > (char*)s) {
U8 *news;
I32 newlen;
filter_add(utf16rev_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
- &newlen);
- Copy(news, s, newlen, U8);
- SvCUR_set(PL_linestr, newlen);
- PL_bufend = SvPVX(PL_linestr) + newlen;
- news[newlen++] = '\0';
+ utf16_to_utf8_reversed(s, news,
+ PL_bufend - (char*)s - 1,
+ &newlen);
+ sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
+ SvUTF8_on(PL_linestr);
+ s = (U8*)SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
#endif
}
break;
case 0xFE:
- if (s[1] == 0xFF) { /* UTF-16 big-endian */
+ if (s[1] == 0xFF) { /* UTF-16 big-endian? */
#ifndef PERL_NO_UTF16_FILTER
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
s += 2;
+ utf16be:
if (PL_bufend > (char *)s) {
U8 *news;
I32 newlen;
filter_add(utf16_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend = (char*)utf16_to_utf8(s, news,
- PL_bufend - (char*)s,
- &newlen);
- Copy(news, s, newlen, U8);
- SvCUR_set(PL_linestr, newlen);
- PL_bufend = SvPVX(PL_linestr) + newlen;
- news[newlen++] = '\0';
+ utf16_to_utf8(s, news,
+ PL_bufend - (char*)s,
+ &newlen);
+ sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
+ SvUTF8_on(PL_linestr);
+ s = (U8*)SvPVX(PL_linestr);
+ PL_bufend = SvPVX(PL_linestr) + newlen;
}
#else
- Perl_croak(aTHX_ "Unsupported script encoding");
+ Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
#endif
}
break;
case 0xEF:
if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
- DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
s += 3; /* UTF-8 */
}
break;
case 0:
- if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
- s[2] == 0xFE && s[3] == 0xFF)
- {
- Perl_croak(aTHX_ "Unsupported script encoding");
+ if (slen > 3) {
+ if (s[1] == 0) {
+ if (s[2] == 0xFE && s[3] == 0xFF) {
+ /* UTF-32 big-endian */
+ Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
+ }
+ }
+ else if (s[2] == 0 && s[3] != 0) {
+ /* Leading bytes
+ * 00 xx 00 xx
+ * are a good indicator of UTF-16BE. */
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
+ goto utf16be;
+ }
}
+ default:
+ if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
+ /* Leading bytes
+ * xx 00 xx 00
+ * are a good indicator of UTF-16LE. */
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
+ goto utf16le;
+ }
}
return (char*)s;
}
static I32
utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
+ STRLEN old = SvCUR(sv);
I32 count = FILTER_READ(idx+1, sv, maxlen);
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16_textfilter(%p): %d %d (%d)\n",
+ utf16_textfilter, idx, maxlen, count));
if (count) {
U8* tmps;
- U8* tend;
I32 newlen;
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- if (!*SvPV_nolen(sv))
- /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
- return count;
-
- tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
- sv_usepvn(sv, (char*)tmps, tend - tmps);
+ Copy(SvPVX(sv), tmps, old, char);
+ utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+ SvCUR(sv) - old, &newlen);
+ sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
}
- return count;
+ DEBUG_P({sv_dump(sv);});
+ return SvCUR(sv);
}
static I32
utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
+ STRLEN old = SvCUR(sv);
I32 count = FILTER_READ(idx+1, sv, maxlen);
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16rev_textfilter(%p): %d %d (%d)\n",
+ utf16rev_textfilter, idx, maxlen, count));
if (count) {
U8* tmps;
- U8* tend;
I32 newlen;
- if (!*SvPV_nolen(sv))
- /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
- return count;
-
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
- sv_usepvn(sv, (char*)tmps, tend - tmps);
+ Copy(SvPVX(sv), tmps, old, char);
+ utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+ SvCUR(sv) - old, &newlen);
+ sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
}
+ DEBUG_P({ sv_dump(sv); });
return count;
}
#endif