X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/8fa7f3676ed75809365905727fbae97dc8767f29..9132e1a37fb6d8c980f305a393fed9323586cbca:/toke.c diff --git a/toke.c b/toke.c index 424249f..6206c44 100644 --- a/toke.c +++ b/toke.c @@ -1,6 +1,7 @@ /* toke.c * - * Copyright (c) 1991-2001, Larry Wall + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, + * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -22,28 +23,29 @@ #define PERL_IN_TOKE_C #include "perl.h" -#define yychar PL_yychar -#define yylval PL_yylval +#define yychar (*PL_yycharp) +#define yylval (*PL_yylvalp) static char ident_too_long[] = "Identifier too long"; +static char c_without_g[] = "Use of /c modifier is meaningless without /g"; +static char c_in_subst[] = "Use of /c modifier is meaningless in s///"; -static void restore_rsfp(pTHXo_ void *f); +static void restore_rsfp(pTHX_ void *f); #ifndef PERL_NO_UTF16_FILTER -static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen); -static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen); +static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen); +static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen); #endif #define XFAKEBRACK 128 #define XENUMMASK 127 -#ifdef EBCDIC -/* For now 'use utf8' does not affect tokenizer on EBCDIC */ -#define UTF (PL_linestr && DO_UTF8(PL_linestr)) +#ifdef USE_UTF8_SCRIPTS +# define UTF (!IN_BYTES) #else -#define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) +# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8)) #endif -/* In variables name $^X, these are the legal values for X. +/* In variables named $^X, these are the legal values for X. * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) @@ -73,24 +75,24 @@ static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen); #define LEX_FORMLINE 1 #define LEX_KNOWNEXT 0 -#ifdef ff_next -#undef ff_next +#ifdef DEBUGGING +static char* lex_state_names[] = { + "KNOWNEXT", + "FORMLINE", + "INTERPCONST", + "INTERPCONCAT", + "INTERPENDMAYBE", + "INTERPEND", + "INTERPSTART", + "INTERPPUSH", + "INTERPCASEMOD", + "INTERPNORMAL", + "NORMAL" +}; #endif -#ifdef USE_PURE_BISON -# ifndef YYMAXLEVEL -# define YYMAXLEVEL 100 -# endif -YYSTYPE* yylval_pointer[YYMAXLEVEL]; -int* yychar_pointer[YYMAXLEVEL]; -int yyactlevel = -1; -# undef yylval -# undef yychar -# define yylval (*yylval_pointer[yyactlevel]) -# define yychar (*yychar_pointer[yyactlevel]) -# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel] -# undef yylex -# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]) +#ifdef ff_next +#undef ff_next #endif #include "keywords.h" @@ -130,75 +132,198 @@ int yyactlevel = -1; * Also see LOP and lop() below. */ -/* Note that REPORT() and REPORT2() will be expressions that supply - * their own trailing comma, not suitable for statements as such. */ #ifdef DEBUGGING /* Serve -DT. */ -# define REPORT(x,retval) tokereport(x,s,(int)retval), -# define REPORT2(x,retval) tokereport(x,s, yylval.ival), +# define REPORT(retval) tokereport(s,(int)retval) #else -# define REPORT(x,retval) -# define REPORT2(x,retval) +# define REPORT(retval) (retval) #endif -#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval) -#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval) -#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)) -#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval) -#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval) -#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval) -#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval) -#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX) -#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP) -#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0) -#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1) -#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP)) -#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP)) -#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP)) -#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP)) -#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP) -#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP)) -#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP)) -#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP) -#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP) +#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval)) +#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval)) +#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval))) +#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval)) +#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval)) +#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval)) +#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) +#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX)) +#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) +#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) +#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1)) +#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP))) +#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP))) +#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP))) +#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP))) +#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) +#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP))) +#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP))) +#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) +#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) /* This bit of chicanery makes a unary function followed by * a parenthesis into a function with one argument, highest precedence. + * The UNIDOR macro is for unary functions that can be followed by the // + * operator (such as C). */ -#define UNI(f) return(yylval.ival = f, \ - REPORT("uni",f) \ - PL_expect = XTERM, \ +#define UNI2(f,x) return ( \ + yylval.ival = f, \ + PL_expect = x, \ PL_bufptr = s, \ PL_last_uni = PL_oldbufptr, \ PL_last_lop_op = f, \ - (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) - -#define UNIBRACK(f) return(yylval.ival = f, \ - REPORT("uni",f) \ + REPORT( \ + (*s == '(' || (s = skipspace(s), *s == '(') \ + ? (int)FUNC1 : (int)UNIOP))) +#define UNI(f) UNI2(f,XTERM) +#define UNIDOR(f) UNI2(f,XTERMORDORDOR) + +#define UNIBRACK(f) return ( \ + yylval.ival = f, \ PL_bufptr = s, \ PL_last_uni = PL_oldbufptr, \ - (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) ) + REPORT( \ + (*s == '(' || (s = skipspace(s), *s == '(') \ + ? (int)FUNC1 : (int)UNIOP))) /* grandfather return to old style */ #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP) #ifdef DEBUGGING -STATIC void -S_tokereport(pTHX_ char *thing, char* s, I32 rv) +/* how to interpret the yylval associated with the token */ +enum token_type { + TOKENTYPE_NONE, + TOKENTYPE_IVAL, + TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */ + TOKENTYPE_PVAL, + TOKENTYPE_OPVAL, + TOKENTYPE_GVVAL +}; + +static struct debug_tokens { int token, type; char *name;} debug_tokens[] = { - DEBUG_T({ - SV* report = newSVpv(thing, 0); - Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop), - (IV)rv); + { ADDOP, TOKENTYPE_OPNUM, "ADDOP" }, + { ANDAND, TOKENTYPE_NONE, "ANDAND" }, + { ANDOP, TOKENTYPE_NONE, "ANDOP" }, + { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, + { ARROW, TOKENTYPE_NONE, "ARROW" }, + { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, + { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, + { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, + { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, + { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, + { DO, TOKENTYPE_NONE, "DO" }, + { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, + { DORDOR, TOKENTYPE_NONE, "DORDOR" }, + { DOROP, TOKENTYPE_OPNUM, "DOROP" }, + { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, + { ELSE, TOKENTYPE_NONE, "ELSE" }, + { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, + { EQOP, TOKENTYPE_OPNUM, "EQOP" }, + { FOR, TOKENTYPE_IVAL, "FOR" }, + { FORMAT, TOKENTYPE_NONE, "FORMAT" }, + { FUNC, TOKENTYPE_OPNUM, "FUNC" }, + { FUNC0, TOKENTYPE_OPNUM, "FUNC0" }, + { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" }, + { FUNC1, TOKENTYPE_OPNUM, "FUNC1" }, + { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" }, + { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, + { IF, TOKENTYPE_IVAL, "IF" }, + { LABEL, TOKENTYPE_PVAL, "LABEL" }, + { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, + { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, + { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, + { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" }, + { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" }, + { METHOD, TOKENTYPE_OPVAL, "METHOD" }, + { MULOP, TOKENTYPE_OPNUM, "MULOP" }, + { MY, TOKENTYPE_IVAL, "MY" }, + { MYSUB, TOKENTYPE_NONE, "MYSUB" }, + { NOAMP, TOKENTYPE_NONE, "NOAMP" }, + { NOTOP, TOKENTYPE_NONE, "NOTOP" }, + { OROP, TOKENTYPE_IVAL, "OROP" }, + { OROR, TOKENTYPE_NONE, "OROR" }, + { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, + { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, + { POSTDEC, TOKENTYPE_NONE, "POSTDEC" }, + { POSTINC, TOKENTYPE_NONE, "POSTINC" }, + { POWOP, TOKENTYPE_OPNUM, "POWOP" }, + { PREDEC, TOKENTYPE_NONE, "PREDEC" }, + { PREINC, TOKENTYPE_NONE, "PREINC" }, + { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, + { REFGEN, TOKENTYPE_NONE, "REFGEN" }, + { RELOP, TOKENTYPE_OPNUM, "RELOP" }, + { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, + { SUB, TOKENTYPE_NONE, "SUB" }, + { THING, TOKENTYPE_OPVAL, "THING" }, + { UMINUS, TOKENTYPE_NONE, "UMINUS" }, + { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, + { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, + { UNLESS, TOKENTYPE_IVAL, "UNLESS" }, + { UNTIL, TOKENTYPE_IVAL, "UNTIL" }, + { USE, TOKENTYPE_IVAL, "USE" }, + { WHILE, TOKENTYPE_IVAL, "WHILE" }, + { WORD, TOKENTYPE_OPVAL, "WORD" }, + { 0, TOKENTYPE_NONE, 0 } +}; + +/* dump the returned token in rv, plus any optional arg in yylval */ +STATIC int +S_tokereport(pTHX_ char* s, I32 rv) +{ + if (DEBUG_T_TEST) { + char *name = Nullch; + enum token_type type = TOKENTYPE_NONE; + struct debug_tokens *p; + SV* report = newSVpvn("<== ", 4); + + for (p = debug_tokens; p->token; p++) { + if (p->token == (int)rv) { + name = p->name; + type = p->type; + break; + } + } + if (name) + Perl_sv_catpv(aTHX_ report, name); + else if ((char)rv > ' ' && (char)rv < '~') + Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); + else if (!rv) + Perl_sv_catpv(aTHX_ report, "EOF"); + else + Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv); + switch (type) { + case TOKENTYPE_NONE: + case TOKENTYPE_GVVAL: /* doesn't appear to be used */ + break; + case TOKENTYPE_IVAL: + Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival); + break; + case TOKENTYPE_OPNUM: + Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", + PL_op_name[yylval.ival]); + break; + case TOKENTYPE_PVAL: + Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval); + break; + case TOKENTYPE_OPVAL: + if (yylval.opval) + Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", + PL_op_name[yylval.opval->op_type]); + else + Perl_sv_catpv(aTHX_ report, "(opval=null)"); + break; + } + Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop)); if (s - PL_bufptr > 0) sv_catpvn(report, PL_bufptr, s - PL_bufptr); else { if (PL_oldbufptr && *PL_oldbufptr) sv_catpv(report, PL_tokenbuf); } - PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report)); - }); + PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report)); + }; + return (int)rv; } #endif @@ -206,8 +331,8 @@ S_tokereport(pTHX_ char *thing, char* s, I32 rv) /* * S_ao * - * This subroutine detects &&= and ||= and turns an ANDAND or OROR - * into an OP_ANDASSIGN or OP_ORASSIGN + * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR + * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN */ STATIC int @@ -219,6 +344,8 @@ S_ao(pTHX_ int toketype) yylval.ival = OP_ANDASSIGN; else if (toketype == OROR) yylval.ival = OP_ORASSIGN; + else if (toketype == DORDOR) + yylval.ival = OP_DORASSIGN; toketype = ASSIGNOP; } return toketype; @@ -248,18 +375,23 @@ S_no_op(pTHX_ char *what, char *s) else PL_bufptr = s; yywarn(Perl_form(aTHX_ "%s found where operator expected", what)); - if (is_first) - Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n"); - else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { - char *t; - for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ; - if (t < PL_bufptr && isSPACE(*t)) - Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n", - t - PL_oldoldbufptr, PL_oldoldbufptr); - } - else { - assert(s >= oldbp); - Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp); + if (ckWARN_d(WARN_SYNTAX)) { + if (is_first) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Missing semicolon on previous line?)\n"); + else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { + char *t; + for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ; + if (t < PL_bufptr && isSPACE(*t)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Do you need to predeclare %.*s?)\n", + t - PL_oldoldbufptr, PL_oldoldbufptr); + } + else { + assert(s >= oldbp); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp); + } } PL_bufptr = oldbp; } @@ -297,7 +429,7 @@ S_missingterm(pTHX_ char *s) s = tmpbuf; } else { - *tmpbuf = PL_multi_close; + *tmpbuf = (char)PL_multi_close; tmpbuf[1] = '\0'; s = tmpbuf; } @@ -313,7 +445,23 @@ void Perl_deprecate(pTHX_ char *s) { if (ckWARN(WARN_DEPRECATED)) - Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s); + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s); +} + +void +Perl_deprecate_old(pTHX_ char *s) +{ + /* This function should NOT be called for any new deprecated warnings */ + /* Use Perl_deprecate instead */ + /* */ + /* It is here to maintain backward compatibility with the pre-5.8 */ + /* warnings category hierarchy. The "deprecated" category used to */ + /* live under the "syntax" category. It is now a top-level category */ + /* in its own right. */ + + if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX)) + Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX), + "Use of %s is deprecated", s); } /* @@ -324,7 +472,7 @@ Perl_deprecate(pTHX_ char *s) STATIC void S_depcom(pTHX) { - deprecate("comma-less variable list"); + deprecate_old("comma-less variable list"); } /* @@ -401,8 +549,8 @@ Perl_lex_start(pTHX_ SV *line) SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); - SAVEPPTR(PL_lex_brackstack); - SAVEPPTR(PL_lex_casestack); + SAVEGENERICPV(PL_lex_brackstack); + SAVEGENERICPV(PL_lex_casestack); SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp); SAVESPTR(PL_lex_stuff); SAVEI32(PL_lex_defer); @@ -417,8 +565,6 @@ Perl_lex_start(pTHX_ SV *line) PL_lex_brackets = 0; New(899, PL_lex_brackstack, 120, char); New(899, PL_lex_casestack, 12, char); - SAVEFREEPV(PL_lex_brackstack); - SAVEFREEPV(PL_lex_casestack); PL_lex_casemods = 0; *PL_lex_casestack = '\0'; PL_lex_dojoin = 0; @@ -433,7 +579,7 @@ Perl_lex_start(pTHX_ SV *line) if (SvREADONLY(PL_linestr)) PL_linestr = sv_2mortal(newSVsv(PL_linestr)); s = SvPV(PL_linestr, len); - if (len && s[len-1] != ';') { + if (!len || s[len-1] != ';') { if (!(SvFLAGS(PL_linestr) & SVs_TEMP)) PL_linestr = sv_2mortal(newSVsv(PL_linestr)); sv_catpvn(PL_linestr, "\n;", 2); @@ -442,8 +588,6 @@ Perl_lex_start(pTHX_ SV *line) PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr); PL_bufend = PL_bufptr + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = Nullch; - SvREFCNT_dec(PL_rs); - PL_rs = newSVpvn("\n", 1); PL_rsfp = 0; } @@ -513,11 +657,7 @@ S_incline(pTHX_ char *s) ch = *t; *t = '\0'; if (t - s > 0) { -#ifdef USE_ITHREADS - Safefree(CopFILE(PL_curcop)); -#else - SvREFCNT_dec(CopFILEGV(PL_curcop)); -#endif + CopFILE_free(PL_curcop); CopFILE_set(PL_curcop, s); } *t = ch; @@ -573,15 +713,17 @@ S_skipspace(pTHX_ register char *s) (prevlen = SvCUR(PL_linestr)))) == Nullch) { /* end of file. Add on the -p or -n magic */ - if (PL_minus_n || PL_minus_p) { - sv_setpv(PL_linestr,PL_minus_p ? - ";}continue{print or die qq(-p destination: $!\\n)" : - ""); - sv_catpv(PL_linestr,";}"); + if (PL_minus_p) { + sv_setpv(PL_linestr, + ";}continue{print or die qq(-p destination: $!\\n);}"); PL_minus_n = PL_minus_p = 0; } + else if (PL_minus_n) { + sv_setpvn(PL_linestr, ";}", 2); + PL_minus_n = 0; + } else - sv_setpv(PL_linestr,";"); + sv_setpvn(PL_linestr,";", 1); /* reset variables for next time we lex */ PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart @@ -633,6 +775,8 @@ S_skipspace(pTHX_ register char *s) sv_upgrade(sv, SVt_PVMG); sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr); + (void)SvIOK_on(sv); + SvIVX(sv) = 0; av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); } } @@ -663,42 +807,13 @@ S_check_uni(pTHX) if (ckWARN_d(WARN_AMBIGUOUS)){ char ch = *s; *s = '\0'; - Perl_warner(aTHX_ WARN_AMBIGUOUS, - "Warning: Use of \"%s\" without parens is ambiguous", + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Warning: Use of \"%s\" without parentheses is ambiguous", PL_last_uni); *s = ch; } } -/* workaround to replace the UNI() macro with a function. Only the - * hints/uts.sh file mentions this. Other comments elsewhere in the - * source indicate Microport Unix might need it too. - */ - -#ifdef CRIPPLED_CC - -#undef UNI -#define UNI(f) return uni(f,s) - -STATIC int -S_uni(pTHX_ I32 f, char *s) -{ - yylval.ival = f; - PL_expect = XTERM; - PL_bufptr = s; - PL_last_uni = PL_oldbufptr; - PL_last_lop_op = f; - if (*s == '(') - return FUNC1; - s = skipspace(s); - if (*s == '(') - return FUNC1; - else - return UNIOP; -} - -#endif /* CRIPPLED_CC */ - /* * LOP : macro to build a list operator. Its behaviour has been replaced * with a subroutine, S_lop() for which LOP is just another name. @@ -719,20 +834,19 @@ S_lop(pTHX_ I32 f, int x, char *s) { yylval.ival = f; CLINE; - REPORT("lop", f) PL_expect = x; PL_bufptr = s; PL_last_lop = PL_oldbufptr; - PL_last_lop_op = f; + PL_last_lop_op = (OPCODE)f; if (PL_nexttoke) - return LSTOP; + return REPORT(LSTOP); if (*s == '(') - return FUNC; + return REPORT(FUNC); s = skipspace(s); if (*s == '(') - return FUNC; + return REPORT(FUNC); else - return LSTOP; + return REPORT(LSTOP); } /* @@ -756,6 +870,15 @@ S_force_next(pTHX_ I32 type) } } +STATIC SV * +S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len) +{ + SV *sv = newSVpvn(start,len); + if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len)) + SvUTF8_on(sv); + return sv; +} + /* * S_force_word * When the lexer knows the next thing is a word (for instance, it has @@ -795,7 +918,9 @@ S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow PL_expect = XOPERATOR; } } - PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0)); + PL_nextval[PL_nexttoke].opval + = (OP*)newSVOP(OP_CONST,0, + S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE; force_next(token); } @@ -861,10 +986,13 @@ Perl_str_to_version(pTHX_ SV *sv) /* * S_force_version * Forces the next token to be a version number. + * If the next token appears to be an invalid version number, (e.g. "v2b"), + * and if "guessing" is TRUE, then no new token is created (and the caller + * must use an alternative parsing method). */ STATIC char * -S_force_version(pTHX_ char *s) +S_force_version(pTHX_ char *s, int guessing) { OP *version = Nullop; char *d; @@ -875,7 +1003,8 @@ S_force_version(pTHX_ char *s) if (*d == 'v') d++; if (isDIGIT(*d)) { - for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++); + while (isDIGIT(*d) || *d == '_' || *d == '.') + d++; if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) { SV *ver; s = scan_num(s, &yylval); @@ -887,13 +1016,15 @@ S_force_version(pTHX_ char *s) SvNOK_on(ver); /* hint that it is a version */ } } + else if (guessing) + return s; } /* NOTE: The parser sees the package name and the VERSION swapped */ PL_nextval[PL_nexttoke].opval = version; force_next(WORD); - return (s); + return s; } /* @@ -1005,6 +1136,9 @@ S_sublex_start(pTHX) } yylval.opval = (OP*)newSVOP(op_type, 0, sv); PL_lex_stuff = Nullsv; + /* Allow // "foo" */ + if (op_type == OP_READLINE) + PL_expect = XTERMORDORDOR; return THING; } @@ -1053,8 +1187,8 @@ S_sublex_push(pTHX) SAVEPPTR(PL_last_uni); SAVEPPTR(PL_linestart); SAVESPTR(PL_linestr); - SAVEPPTR(PL_lex_brackstack); - SAVEPPTR(PL_lex_casestack); + SAVEGENERICPV(PL_lex_brackstack); + SAVEGENERICPV(PL_lex_casestack); PL_linestr = PL_lex_stuff; PL_lex_stuff = Nullsv; @@ -1069,13 +1203,11 @@ S_sublex_push(pTHX) PL_lex_brackets = 0; New(899, PL_lex_brackstack, 120, char); New(899, PL_lex_casestack, 12, char); - SAVEFREEPV(PL_lex_brackstack); - SAVEFREEPV(PL_lex_casestack); PL_lex_casemods = 0; *PL_lex_casestack = '\0'; PL_lex_starts = 0; PL_lex_state = LEX_INTERPCONCAT; - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); PL_lex_inwhat = PL_sublex_info.sub_inwhat; if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) @@ -1187,7 +1319,7 @@ S_sublex_done(pTHX) It stops processing as soon as it finds an embedded $ or @ variable and leaves it to the caller to work out what's going on. - @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo. + @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo. $ in pattern could be $foo or could be tail anchor. Assumption: it's a tail anchor if $ is the last thing in the string, or if it's @@ -1233,7 +1365,7 @@ S_scan_const(pTHX_ char *start) const char *leaveit = /* set of acceptably-backslashed characters */ PL_lex_inpat - ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#" + ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#" : ""; if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { @@ -1257,7 +1389,7 @@ S_scan_const(pTHX_ char *start) char *e = d++; while (e-- > c) *(e + 1) = *e; - *c = UTF_TO_NATIVE(0xff); + *c = (char)UTF_TO_NATIVE(0xff); /* mark the range as done, and continue */ dorange = FALSE; didrange = TRUE; @@ -1274,7 +1406,7 @@ S_scan_const(pTHX_ char *start) if (min > max) { Perl_croak(aTHX_ - "Invalid [] range \"%c-%c\" in transliteration operator", + "Invalid range \"%c-%c\" in transliteration operator", (char)min, (char)max); } @@ -1294,7 +1426,7 @@ S_scan_const(pTHX_ char *start) else #endif for (i = min; i <= max; i++) - *d++ = i; + *d++ = (char)i; /* mark the range as done, and continue */ dorange = FALSE; @@ -1308,7 +1440,7 @@ S_scan_const(pTHX_ char *start) Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); } if (has_utf8) { - *d++ = UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ + *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */ s++; continue; } @@ -1326,7 +1458,7 @@ S_scan_const(pTHX_ char *start) except for the last char, which will be done separately. */ else if (*s == '(' && PL_lex_inpat && s[1] == '?') { if (s[2] == '#') { - while (s < send && *s != ')') + while (s+1 < send && *s != ')') *d++ = NATIVE_TO_NEED(has_utf8,*s++); } else if (s[2] == '{' /* This should match regcomp.c */ @@ -1345,10 +1477,8 @@ S_scan_const(pTHX_ char *start) count--; regparse++; } - if (*regparse != ')') { + if (*regparse != ')') regparse--; /* Leave one char for continuation. */ - yyerror("Sequence (?{...}) not terminated or not {}-balanced"); - } while (s < regparse) *d++ = NATIVE_TO_NEED(has_utf8,*s++); } @@ -1362,7 +1492,7 @@ S_scan_const(pTHX_ char *start) } /* check for embedded arrays - (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-) + (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) */ else if (*s == '@' && s[1] && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1]))) @@ -1374,7 +1504,7 @@ S_scan_const(pTHX_ char *start) else if (*s == '$') { if (!PL_lex_inpat) /* not a regexp, so $ must be var */ break; - if (s + 1 < send && !strchr("()| \n\t", s[1])) + if (s + 1 < send && !strchr("()| \r\n\t", s[1])) break; /* in regexp, $ might be tail anchor */ } @@ -1396,7 +1526,7 @@ S_scan_const(pTHX_ char *start) isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); *--s = '$'; break; } @@ -1419,8 +1549,10 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - if (ckWARN(WARN_MISC) && isALNUM(*s)) - Perl_warner(aTHX_ WARN_MISC, + if (ckWARN(WARN_MISC) && + isALNUM(*s) && + *s != '_') + Perl_warner(aTHX_ packWARN(WARN_MISC), "Unrecognized escape \\%c passed through", *s); /* default action is to copy the quoted character */ @@ -1431,8 +1563,9 @@ S_scan_const(pTHX_ char *start) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { - STRLEN len = 0; /* disallow underscores */ - uv = (UV)scan_oct(s, 3, &len); + I32 flags = 0; + STRLEN len = 3; + uv = grok_oct(s, &len, &flags, NULL); s += len; } goto NUM_ESCAPE_INSERT; @@ -1442,20 +1575,24 @@ S_scan_const(pTHX_ char *start) ++s; if (*s == '{') { char* e = strchr(s, '}'); - STRLEN len = 1; /* allow underscores */ + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | + PERL_SCAN_DISALLOW_PREFIX; + STRLEN len; + ++s; if (!e) { yyerror("Missing right brace on \\x{}"); - ++s; continue; } - uv = (UV)scan_hex(s + 1, e - s - 1, &len); + len = e - s; + uv = grok_hex(s, &len, &flags, NULL); s = e + 1; } else { { - STRLEN len = 0; /* disallow underscores */ - uv = (UV)scan_hex(s, 2, &len); + STRLEN len = 2; + I32 flags = PERL_SCAN_DISALLOW_PREFIX; + uv = grok_hex(s, &len, &flags, NULL); s += len; } } @@ -1495,8 +1632,8 @@ S_scan_const(pTHX_ char *start) while (src >= (U8 *)SvPVX(sv)) { if (!NATIVE_IS_INVARIANT(*src)) { U8 ch = NATIVE_TO_ASCII(*src); - *dst-- = UTF8_EIGHT_BIT_LO(ch); - *dst-- = UTF8_EIGHT_BIT_HI(ch); + *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch); + *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch); } else { *dst-- = *src; @@ -1525,7 +1662,7 @@ S_scan_const(pTHX_ char *start) } continue; - /* \N{latin small letter a} is a named character */ + /* \N{LATIN SMALL LETTER A} is a named character */ case 'N': ++s; if (*s == '{') { @@ -1539,12 +1676,42 @@ S_scan_const(pTHX_ char *start) e = s - 1; goto cont_scan; } + if (e > s + 2 && s[1] == 'U' && s[2] == '+') { + /* \N{U+...} */ + I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | + PERL_SCAN_DISALLOW_PREFIX; + s += 3; + len = e - s; + uv = grok_hex(s, &len, &flags, NULL); + s = e + 1; + goto NUM_ESCAPE_INSERT; + } res = newSVpvn(s + 1, e - s - 1); res = new_constant( Nullch, 0, "charnames", res, Nullsv, "\\N{...}" ); if (has_utf8) sv_utf8_upgrade(res); str = SvPV(res,len); +#ifdef EBCDIC_NEVER_MIND + /* charnames uses pack U and that has been + * recently changed to do the below uni->native + * mapping, so this would be redundant (and wrong, + * the code point would be doubly converted). + * But leave this in just in case the pack U change + * gets revoked, but the semantics is still + * desireable for charnames. --jhi */ + { + UV uv = utf8_to_uvchr((U8*)str, 0); + + if (uv < 0x100) { + U8 tmpbuf[UTF8_MAXBYTES+1], *d; + + d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv)); + sv_setpvn(res, (char *)tmpbuf, d - tmpbuf); + str = SvPV(res, len); + } + } +#endif if (!has_utf8 && SvUTF8(res)) { char *ostart = SvPVX(sv); SvCUR_set(sv, d - ostart); @@ -1552,11 +1719,11 @@ S_scan_const(pTHX_ char *start) *d = '\0'; sv_utf8_upgrade(sv); /* this just broke our allocation above... */ - SvGROW(sv, send - start); + SvGROW(sv, (STRLEN)(send - start)); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } - if (len > e - s + 4) { + if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ char *odest = SvPVX(sv); SvGROW(sv, (SvLEN(sv) + len - (e - s + 4))); @@ -1575,7 +1742,7 @@ S_scan_const(pTHX_ char *start) /* \c is a control character */ case 'c': s++; - { + if (s < send) { U8 c = *s++; #ifdef EBCDIC if (isLOWER(c)) @@ -1583,6 +1750,9 @@ S_scan_const(pTHX_ char *start) #endif *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c)); } + else { + yyerror("Missing control char name in \\c"); + } continue; /* printf-style backslashes, formfeeds, newlines, etc */ @@ -1638,13 +1808,18 @@ S_scan_const(pTHX_ char *start) *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); if (SvCUR(sv) >= SvLEN(sv)) - Perl_croak(aTHX_ "panic: constant overflowed allocated space"); + Perl_croak(aTHX_ "panic: constant overflowed allocated space"); SvPOK_on(sv); + if (PL_encoding && !has_utf8) { + sv_recode_to_utf8(sv, PL_encoding); + if (SvUTF8(sv)) + has_utf8 = TRUE; + } if (has_utf8) { SvUTF8_on(sv); if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { - PL_sublex_info.sub_op->op_private |= + PL_sublex_info.sub_op->op_private |= (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); } } @@ -1800,8 +1975,10 @@ S_intuit_more(pTHX_ register char *s) weight -= 5; /* cope with negative subscript */ break; default: - if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) && - isALPHA(*s) && s[1] && isALPHA(s[1])) { + if (!isALNUM(last_un_char) + && !(last_un_char == '$' || last_un_char == '@' + || last_un_char == '&') + && isALPHA(*s) && s[1] && isALPHA(s[1])) { char *d = tmpbuf; while (isALPHA(*s)) *d++ = *s++; @@ -1839,7 +2016,7 @@ S_intuit_more(pTHX_ register char *s) * Method if it's "foo $bar" * Not a method if it's really "print foo $bar" * Method if it's really "foo package::" (interpreted as package->foo) - * Not a method if bar is known to be a subroutne ("sub bar; foo bar") + * Not a method if bar is known to be a subroutine ("sub bar; foo bar") * Not a method if bar is a filehandle or package, but is quoted with * => */ @@ -1923,7 +2100,7 @@ S_incl_perldb(pTHX) if (pdb) return pdb; - SETERRNO(0,SS$_NORMAL); + SETERRNO(0,SS_NORMAL); return "BEGIN { require 'perl5db.pl' }"; } return ""; @@ -1962,7 +2139,7 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */ IoFLAGS(datasv) |= IOf_FAKE_DIRP; DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", - funcp, SvPV_nolen(datasv))); + (void*)funcp, SvPV_nolen(datasv))); av_unshift(PL_rsfp_filters, 1); av_store(PL_rsfp_filters, 0, datasv) ; return(datasv); @@ -1974,7 +2151,7 @@ void Perl_filter_del(pTHX_ filter_t funcp) { SV *datasv; - DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp)); + DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp)); if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) return; /* if filter is on top of stack (usual case) just pop it off */ @@ -1991,19 +2168,17 @@ Perl_filter_del(pTHX_ filter_t funcp) } -/* Invoke the n'th filter function for the current rsfp. */ +/* Invoke the idxth filter function for the current rsfp. */ +/* maxlen 0 = read one text line */ I32 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) - - - /* 0 = read one text line */ { filter_t funcp; SV *datasv = NULL; if (!PL_rsfp_filters) return -1; - if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */ + if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ /* Provide a default input filter to make life easy. */ /* Note that we append to the line. This is handy. */ DEBUG_P(PerlIO_printf(Perl_debug_log, @@ -2014,7 +2189,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) int old_len = SvCUR(buf_sv) ; /* ensure buf_sv is large enough */ - SvGROW(buf_sv, old_len + maxlen) ; + SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ; if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){ if (PerlIO_error(PL_rsfp)) return -1; /* error */ @@ -2034,7 +2209,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) return SvCUR(buf_sv); } /* Skip this filter slot if filter has been deleted */ - if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){ + if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_read %d: skipped (filter deleted)\n", idx)); @@ -2044,11 +2219,11 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) funcp = (filter_t)IoANY(datasv); DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_read %d: via function %p (%s)\n", - idx, funcp, SvPV_nolen(datasv))); + idx, (void*)funcp, SvPV_nolen(datasv))); /* Call function. The function is expected to */ /* call "FILTER_READ(idx+1, buf_sv)" first. */ /* Return: <0:error, =0:eof, >0:not eof */ - return (*funcp)(aTHXo_ idx, buf_sv, maxlen); + return (*funcp)(aTHX_ idx, buf_sv, maxlen); } STATIC char * @@ -2060,7 +2235,6 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append) } #endif if (PL_rsfp_filters) { - if (!append) SvCUR_set(sv, 0); /* start with empty line */ if (FILTER_READ(0, sv, 0) > 0) @@ -2101,7 +2275,7 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len) #ifdef DEBUGGING static char* exp_name[] = { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK" + "ATTRTERM", "TERMBLOCK", "TERMORDORDOR" }; #endif @@ -2130,26 +2304,6 @@ S_find_in_my_stash(pTHX_ char *pkgname, I32 len) if we already built the token before, use it. */ -#ifdef USE_PURE_BISON -int -Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp) -{ - int r; - - yyactlevel++; - yylval_pointer[yyactlevel] = lvalp; - yychar_pointer[yyactlevel] = lcharp; - if (yyactlevel >= YYMAXLEVEL) - Perl_croak(aTHX_ "panic: YYMAXLEVEL"); - - r = Perl_yylex(aTHX); - - if (yyactlevel > 0) - yyactlevel--; - - return r; -} -#endif #ifdef __SC__ #pragma segment Perl_yylex @@ -2157,141 +2311,22 @@ Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp) int Perl_yylex(pTHX) { - register char *s; + register char *s = PL_bufptr; register char *d; register I32 tmp; STRLEN len; GV *gv = Nullgv; GV **gvp = 0; bool bof = FALSE; + I32 orig_keyword = 0; + DEBUG_T( { + PerlIO_printf(Perl_debug_log, "### LEX_%s\n", + lex_state_names[PL_lex_state]); + } ); /* check if there's an identifier for us to look at */ - if (PL_pending_ident) { - /* pit holds the identifier we read and pending_ident is reset */ - char pit = PL_pending_ident; - PL_pending_ident = 0; - - DEBUG_T({ PerlIO_printf(Perl_debug_log, - "### Tokener saw identifier '%s'\n", PL_tokenbuf); }); - - /* if we're in a my(), we can't allow dynamics here. - $foo'bar has already been turned into $foo::bar, so - just check for colons. - - if it's a legal name, the OP is a PADANY. - */ - if (PL_in_my) { - if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ - if (strchr(PL_tokenbuf,':')) - yyerror(Perl_form(aTHX_ "No package name allowed for " - "variable %s in \"our\"", - PL_tokenbuf)); - tmp = pad_allocmy(PL_tokenbuf); - } - else { - if (strchr(PL_tokenbuf,':')) - yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf)); - - yylval.opval = newOP(OP_PADANY, 0); - yylval.opval->op_targ = pad_allocmy(PL_tokenbuf); - return PRIVATEREF; - } - } - - /* - build the ops for accesses to a my() variable. - - Deny my($a) or my($b) in a sort block, *if* $a or $b is - then used in a comparison. This catches most, but not - all cases. For instance, it catches - sort { my($a); $a <=> $b } - but not - sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; } - (although why you'd do that is anyone's guess). - */ - - if (!strchr(PL_tokenbuf,':')) { -#ifdef USE_THREADS - /* Check for single character per-thread SVs */ - if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0' - && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */ - && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD) - { - yylval.opval = newOP(OP_THREADSV, 0); - yylval.opval->op_targ = tmp; - return PRIVATEREF; - } -#endif /* USE_THREADS */ - if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) { - SV *namesv = AvARRAY(PL_comppad_name)[tmp]; - /* might be an "our" variable" */ - if (SvFLAGS(namesv) & SVpad_OUR) { - /* build ops for a bareword */ - SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0); - sv_catpvn(sym, "::", 2); - sv_catpv(sym, PL_tokenbuf+1); - yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); - yylval.opval->op_private = OPpCONST_ENTERED; - gv_fetchpv(SvPVX(sym), - (PL_in_eval - ? (GV_ADDMULTI | GV_ADDINEVAL) - : TRUE - ), - ((PL_tokenbuf[0] == '$') ? SVt_PV - : (PL_tokenbuf[0] == '@') ? SVt_PVAV - : SVt_PVHV)); - return WORD; - } - - /* if it's a sort block and they're naming $a or $b */ - if (PL_last_lop_op == OP_SORT && - PL_tokenbuf[0] == '$' && - (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b') - && !PL_tokenbuf[2]) - { - for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart; - d < PL_bufend && *d != '\n'; - d++) - { - if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) { - Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison", - PL_tokenbuf); - } - } - } - - yylval.opval = newOP(OP_PADANY, 0); - yylval.opval->op_targ = tmp; - return PRIVATEREF; - } - } - - /* - Whine if they've said @foo in a doublequoted string, - and @foo isn't a variable we can find in the symbol - table. - */ - if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { - GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV); - if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) - && ckWARN(WARN_AMBIGUOUS)) - { - /* Downgraded from fatal to warning 20000522 mjd */ - Perl_warner(aTHX_ WARN_AMBIGUOUS, - "Possible unintended interpolation of %s in string", - PL_tokenbuf); - } - } - - /* build ops for a bareword */ - yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0)); - yylval.opval->op_private = OPpCONST_ENTERED; - gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE, - ((PL_tokenbuf[0] == '$') ? SVt_PV - : (PL_tokenbuf[0] == '@') ? SVt_PVAV - : SVt_PVHV)); - return WORD; - } + if (PL_pending_ident) + return REPORT(S_pending_ident(aTHX)); /* no identifier pending identification */ @@ -2315,7 +2350,7 @@ Perl_yylex(pTHX) "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr, (IV)PL_nexttype[PL_nexttoke]); }); - return(PL_nexttype[PL_nexttoke]); + return REPORT(PL_nexttype[PL_nexttoke]); /* interpolated case modifiers like \L \U, including \Q and \E. when we get here, PL_bufptr is at the \ @@ -2334,11 +2369,12 @@ Perl_yylex(pTHX) oldmod = PL_lex_casestack[--PL_lex_casemods]; PL_lex_casestack[PL_lex_casemods] = '\0'; - if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) { + if (PL_bufptr != PL_bufend + && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) { PL_bufptr += 2; PL_lex_state = LEX_INTERPCONCAT; } - return ')'; + return REPORT(')'); } if (PL_bufptr != PL_bufend) PL_bufptr += 2; @@ -2349,55 +2385,60 @@ Perl_yylex(pTHX) DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Saw case modifier at '%s'\n", PL_bufptr); }); s = PL_bufptr + 1; - if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) - tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */ - if (strchr("LU", *s) && - (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) - { - PL_lex_casestack[--PL_lex_casemods] = '\0'; - return ')'; - } - if (PL_lex_casemods > 10) { - char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char); - if (newlb != PL_lex_casestack) { - SAVEFREEPV(newlb); - PL_lex_casestack = newlb; + if (s[1] == '\\' && s[2] == 'E') { + PL_bufptr = s + 3; + PL_lex_state = LEX_INTERPCONCAT; + return yylex(); + } + else { + if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) + tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ + if ((*s == 'L' || *s == 'U') && + (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) { + PL_lex_casestack[--PL_lex_casemods] = '\0'; + return REPORT(')'); } + if (PL_lex_casemods > 10) + Renew(PL_lex_casestack, PL_lex_casemods + 2, char); + PL_lex_casestack[PL_lex_casemods++] = *s; + PL_lex_casestack[PL_lex_casemods] = '\0'; + PL_lex_state = LEX_INTERPCONCAT; + PL_nextval[PL_nexttoke].ival = 0; + force_next('('); + if (*s == 'l') + PL_nextval[PL_nexttoke].ival = OP_LCFIRST; + else if (*s == 'u') + PL_nextval[PL_nexttoke].ival = OP_UCFIRST; + else if (*s == 'L') + PL_nextval[PL_nexttoke].ival = OP_LC; + else if (*s == 'U') + PL_nextval[PL_nexttoke].ival = OP_UC; + else if (*s == 'Q') + PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA; + else + Perl_croak(aTHX_ "panic: yylex"); + PL_bufptr = s + 1; } - PL_lex_casestack[PL_lex_casemods++] = *s; - PL_lex_casestack[PL_lex_casemods] = '\0'; - PL_lex_state = LEX_INTERPCONCAT; - PL_nextval[PL_nexttoke].ival = 0; - force_next('('); - if (*s == 'l') - PL_nextval[PL_nexttoke].ival = OP_LCFIRST; - else if (*s == 'u') - PL_nextval[PL_nexttoke].ival = OP_UCFIRST; - else if (*s == 'L') - PL_nextval[PL_nexttoke].ival = OP_LC; - else if (*s == 'U') - PL_nextval[PL_nexttoke].ival = OP_UC; - else if (*s == 'Q') - PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA; - else - Perl_croak(aTHX_ "panic: yylex"); - PL_bufptr = s + 1; force_next(FUNC); if (PL_lex_starts) { s = PL_bufptr; PL_lex_starts = 0; - Aop(OP_CONCAT); + /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ + if (PL_lex_casemods == 1 && PL_lex_inpat) + OPERATOR(','); + else + Aop(OP_CONCAT); } else return yylex(); } case LEX_INTERPPUSH: - return sublex_push(); + return REPORT(sublex_push()); case LEX_INTERPSTART: if (PL_bufptr == PL_bufend) - return sublex_done(); + return REPORT(sublex_done()); DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Interpolated variable at '%s'\n", PL_bufptr); }); PL_expect = XTERM; @@ -2406,13 +2447,7 @@ Perl_yylex(pTHX) if (PL_lex_dojoin) { PL_nextval[PL_nexttoke].ival = 0; force_next(','); -#ifdef USE_THREADS - PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0); - PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\""); - force_next(PRIVATEREF); -#else force_ident("\"", '$'); -#endif /* USE_THREADS */ PL_nextval[PL_nexttoke].ival = 0; force_next('$'); PL_nextval[PL_nexttoke].ival = 0; @@ -2422,7 +2457,11 @@ Perl_yylex(pTHX) } if (PL_lex_starts++) { s = PL_bufptr; - Aop(OP_CONCAT); + /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ + if (!PL_lex_casemods && PL_lex_inpat) + OPERATOR(','); + else + Aop(OP_CONCAT); } return yylex(); @@ -2437,7 +2476,7 @@ Perl_yylex(pTHX) if (PL_lex_dojoin) { PL_lex_dojoin = FALSE; PL_lex_state = LEX_INTERPCONCAT; - return ')'; + return REPORT(')'); } if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl && SvEVALED(PL_lex_repl)) @@ -2453,7 +2492,7 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "panic: INTERPCONCAT"); #endif if (PL_bufptr == PL_bufend) - return sublex_done(); + return REPORT(sublex_done()); if (SvIVX(PL_linestr) == '\'') { SV *sv = newSVsv(PL_linestr); @@ -2476,8 +2515,13 @@ Perl_yylex(pTHX) PL_nextval[PL_nexttoke] = yylval; PL_expect = XTERM; force_next(THING); - if (PL_lex_starts++) - Aop(OP_CONCAT); + if (PL_lex_starts++) { + /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ + if (!PL_lex_casemods && PL_lex_inpat) + OPERATOR(','); + else + Aop(OP_CONCAT); + } else { PL_bufptr = s; return yylex(); @@ -2497,7 +2541,7 @@ Perl_yylex(pTHX) PL_oldoldbufptr = PL_oldbufptr; PL_oldbufptr = s; DEBUG_T( { - PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n", + PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n", exp_name[PL_expect], s); } ); @@ -2514,8 +2558,12 @@ Perl_yylex(pTHX) if (!PL_rsfp) { PL_last_uni = 0; PL_last_lop = 0; - if (PL_lex_brackets) - yyerror("Missing right curly or square bracket"); + if (PL_lex_brackets) { + if (PL_lex_formbrack) + yyerror("Format not terminated"); + else + yyerror("Missing right curly or square bracket"); + } DEBUG_T( { PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); } ); @@ -2529,12 +2577,12 @@ Perl_yylex(pTHX) PL_preambled = TRUE; sv_setpv(PL_linestr,incl_perldb()); if (SvCUR(PL_linestr)) - sv_catpv(PL_linestr,";"); + sv_catpvn(PL_linestr,";", 1); if (PL_preambleav){ while(AvFILLp(PL_preambleav) >= 0) { SV *tmpsv = av_shift(PL_preambleav); sv_catsv(PL_linestr, tmpsv); - sv_catpv(PL_linestr, ";"); + sv_catpvn(PL_linestr, ";", 1); sv_free(tmpsv); } sv_free((SV*)PL_preambleav); @@ -2545,33 +2593,36 @@ Perl_yylex(pTHX) if (PL_minus_l) sv_catpv(PL_linestr,"chomp;"); if (PL_minus_a) { - GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV); - if (gv) - GvIMPORTED_AV_on(gv); if (PL_minus_F) { - if (strchr("/'\"", *PL_splitstr) + if ((*PL_splitstr == '/' || *PL_splitstr == '\'' + || *PL_splitstr == '"') && strchr(PL_splitstr + 1, *PL_splitstr)) - Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr); + Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); else { - char delim; - s = "'~#\200\1'"; /* surely one char is unused...*/ - while (s[1] && strchr(PL_splitstr, *s)) s++; - delim = *s; - Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c", - "q" + (delim == '\''), delim); - for (s = PL_splitstr; *s; s++) { + /* "q\0${splitstr}\0" is legal perl. Yes, even NUL + bytes can be used as quoting characters. :-) */ + /* The count here deliberately includes the NUL + that terminates the C string constant. This + embeds the opening NUL into the string. */ + sv_catpvn(PL_linestr, "our @F=split(q", 15); + s = PL_splitstr; + do { + /* Need to \ \s */ if (*s == '\\') - sv_catpvn(PL_linestr, "\\", 1); + sv_catpvn(PL_linestr, s, 1); sv_catpvn(PL_linestr, s, 1); - } - Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim); + } while (*s++); + /* This loop will embed the trailing NUL of + PL_linestr as the last thing it does before + terminating. */ + sv_catpvn(PL_linestr, ");", 2); } } else - sv_catpv(PL_linestr,"@F=split(' ');"); + sv_catpv(PL_linestr,"our @F=split(' ');"); } } - sv_catpv(PL_linestr, "\n"); + sv_catpvn(PL_linestr, "\n", 1); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = Nullch; @@ -2580,6 +2631,8 @@ Perl_yylex(pTHX) sv_upgrade(sv, SVt_PVMG); sv_setsv(sv,PL_linestr); + (void)SvIOK_on(sv); + SvIVX(sv) = 0; av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); } goto retry; @@ -2599,8 +2652,8 @@ Perl_yylex(pTHX) PL_doextract = FALSE; } if (!PL_in_eval && (PL_minus_n || PL_minus_p)) { - sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : ""); - sv_catpv(PL_linestr,";}"); + sv_setpv(PL_linestr,PL_minus_p + ? ";}continue{print;}" : ";}"); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = Nullch; @@ -2612,8 +2665,13 @@ Perl_yylex(pTHX) sv_setpv(PL_linestr,""); TOKEN(';'); /* not infinite loop because rsfp is NULL now */ } - /* if it looks like the start of a BOM, check if it in fact is */ - else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) { + /* If it looks like the start of a BOM or raw UTF-16, + * check if it in fact is. */ + else if (bof && + (*s == 0 || + *(U8*)s == 0xEF || + *(U8*)s >= 0xFE || + s[1] == 0)) { #ifdef PERLIO_IS_STDIO # ifdef __GNU_LIBRARY__ # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */ @@ -2635,7 +2693,7 @@ Perl_yylex(pTHX) if (!PL_preprocess) bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); #else - bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr); + bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr); #endif if (bof) { PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -2643,9 +2701,6 @@ Perl_yylex(pTHX) } } if (PL_doextract) { - if (*s == '#' && s[1] == '!' && instr(s,"perl")) - PL_doextract = FALSE; - /* Incest with pod. */ if (*s == '=' && strnEQ(s, "=cut", 4)) { sv_setpv(PL_linestr, ""); @@ -2663,6 +2718,8 @@ Perl_yylex(pTHX) sv_upgrade(sv, SVt_PVMG); sv_setsv(sv,PL_linestr); + (void)SvIOK_on(sv); + SvIVX(sv) = 0; av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv); } PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -2703,12 +2760,25 @@ Perl_yylex(pTHX) * at least, set argv[0] to the basename of the Perl * interpreter. So, having found "#!", we'll set it right. */ - SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); + SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */ assert(SvPOK(x) || SvGMAGICAL(x)); if (sv_eq(x, CopFILESV(PL_curcop))) { sv_setpvn(x, ipath, ipathend - ipath); SvSETMAGIC(x); } + else { + STRLEN blen; + STRLEN llen; + char *bstart = SvPV(CopFILESV(PL_curcop),blen); + char *lstart = SvPV(x,llen); + if (llen < blen) { + bstart += blen - llen; + if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') { + sv_setpvn(x, ipath, ipathend - ipath); + SvSETMAGIC(x); + } + } + } TAINT_NOT; /* $^X is always tainted, but that's OK */ } #endif /* ARG_ZERO_IS_SCRIPT */ @@ -2780,7 +2850,9 @@ Perl_yylex(pTHX) else newargv = PL_origargv; newargv[0] = ipath; + PERL_FPU_PRE_EXEC PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv)); + PERL_FPU_POST_EXEC Perl_croak(aTHX_ "Can't exec %s", ipath); } #endif @@ -2793,6 +2865,7 @@ Perl_yylex(pTHX) while (SPACE_OR_TAB(*d)) d++; if (*d++ == '-') { + bool switches_done = PL_doswitches; do { if (*d == 'M' || *d == 'm') { char *m = d; @@ -2802,6 +2875,14 @@ Perl_yylex(pTHX) } d = moreswitches(d); } while (d); + if (PL_doswitches && !switches_done) { + int argc = PL_origargc; + char **argv = PL_origargv; + do { + argc--,argv++; + } while (argc && argv[0][0] == '-' && argv[0][1]); + init_argv_symbols(argc,argv); + } if ((PERLDB_LINE && !oldpdb) || ((PL_minus_n || PL_minus_p) && !(oldn || oldp))) /* if we have already added "LINE: while (<>) {", @@ -2816,6 +2897,14 @@ Perl_yylex(pTHX) (void)gv_fetchfile(PL_origfilename); goto retry; } + if (PL_doswitches && !switches_done) { + int argc = PL_origargc; + char **argv = PL_origargv; + do { + argc--,argv++; + } while (argc && argv[0][0] == '-' && argv[0][1]); + init_argv_symbols(argc,argv); + } } } } @@ -2922,7 +3011,7 @@ Perl_yylex(pTHX) break; } if (ftst) { - PL_last_lop_op = ftst; + PL_last_lop_op = (OPCODE)ftst; DEBUG_T( { PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)ftst); } ); @@ -2932,10 +3021,10 @@ Perl_yylex(pTHX) /* Assume it was a minus followed by a one-letter named * subroutine call (or a -bareword), then. */ DEBUG_T( { PerlIO_printf(Perl_debug_log, - "### %c looked like a file test but was not\n", - (int)ftst); + "### '-%c' looked like a file test but was not\n", + (int) tmp); } ); - s -= 2; + s = --PL_bufptr; } } tmp = *s++; @@ -3007,8 +3096,6 @@ Perl_yylex(pTHX) PL_tokenbuf[0] = '%'; s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); if (!PL_tokenbuf[1]) { - if (s == PL_bufend) - yyerror("Final % should be \\% or %name"); PREREF('%'); } PL_pending_ident = '%'; @@ -3052,6 +3139,7 @@ Perl_yylex(pTHX) switch (tmp) { case KEY_or: case KEY_and: + case KEY_err: case KEY_for: case KEY_unless: case KEY_if: @@ -3072,7 +3160,7 @@ Perl_yylex(pTHX) yyerror("Unterminated attribute parameter in attribute list"); if (attrs) op_free(attrs); - return 0; /* EOF indicator */ + return REPORT(0); /* EOF indicator */ } } if (PL_lex_stuff) { @@ -3084,21 +3172,37 @@ Perl_yylex(pTHX) PL_lex_stuff = Nullsv; } else { - if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len)) + if (len == 6 && strnEQ(s, "unique", len)) { + if (PL_in_my == KEY_our) +#ifdef USE_ITHREADS + GvUNIQUE_on(cGVOPx_gv(yylval.opval)); +#else + ; /* skip to avoid loading attributes.pm */ +#endif + else + Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); + } + + /* NOTE: any CV attrs applied here need to be part of + the CVf_BUILTIN_ATTRS define in cv.h! */ + else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len)) CvLVALUE_on(PL_compcv); else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len)) CvLOCKED_on(PL_compcv); else if (!PL_in_my && len == 6 && strnEQ(s, "method", len)) CvMETHOD_on(PL_compcv); -#ifdef USE_ITHREADS - else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len)) - GvSHARED_on(cGVOPx_gv(yylval.opval)); -#endif + else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len)) + CvASSERTION_on(PL_compcv); /* After we've set the flags, it could be argued that we don't need to do the attributes.pm-based setting process, and shouldn't bother appending recognized - flags. To experiment with that, uncomment the - following "else": */ + flags. To experiment with that, uncomment the + following "else". (Note that's already been + uncommented. That keeps the above-applied built-in + attributes from being intercepted (and possibly + rejected) by a package's attribute routines, but is + justified by the performance win for the common case + of applying only built-in attributes.) */ else attrs = append_elem(OP_LIST, attrs, newSVOP(OP_CONST, 0, @@ -3111,7 +3215,7 @@ Perl_yylex(pTHX) break; /* require real whitespace or :'s */ } tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */ - if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) { + if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) { char q = ((*s == '\'') ? '"' : '\''); /* If here for an expression, and parsed no attrs, back off. */ if (tmp == '=' && !attrs) { @@ -3145,6 +3249,7 @@ Perl_yylex(pTHX) PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */ else PL_expect = XTERM; + s = skipspace(s); TOKEN('('); case ';': CLINE; @@ -3173,11 +3278,7 @@ Perl_yylex(pTHX) leftbracket: s++; if (PL_lex_brackets > 100) { - char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char); - if (newlb != PL_lex_brackstack) { - SAVEFREEPV(newlb); - PL_lex_brackstack = newlb; - } + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); } switch (PL_expect) { case XTERM: @@ -3270,12 +3371,17 @@ Perl_yylex(pTHX) || ((*t == 'q' || *t == 'x') && ++t < PL_bufend && !isALNUM(*t)))) { + /* skip q//-like construct */ char *tmps; char open, close, term; I32 brackets = 1; while (t < PL_bufend && isSPACE(*t)) t++; + /* check for q => */ + if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') { + OPERATOR(HASHBRACK); + } term = *t; open = term; if (term && (tmps = strchr("([{< )]}> )]}>",term))) @@ -3288,7 +3394,7 @@ Perl_yylex(pTHX) else if (*t == open) break; } - else + else { for (t++; t < PL_bufend; t++) { if (*t == '\\' && t+1 < PL_bufend) t++; @@ -3297,8 +3403,13 @@ Perl_yylex(pTHX) else if (*t == open) brackets++; } + } + t++; } - t++; + else + /* skip plain q word */ + while (t < PL_bufend && isALNUM_lazy_if(t,UTF)) + t += UTF8SKIP(t); } else if (isALNUM_lazy_if(t,UTF)) { t += UTF8SKIP(t); @@ -3366,7 +3477,7 @@ Perl_yylex(pTHX) && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart) { CopLINE_dec(PL_curcop); - Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi); + Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi); CopLINE_inc(PL_curcop); } BAop(OP_BIT_AND); @@ -3399,7 +3510,7 @@ Perl_yylex(pTHX) if (tmp == '~') PMop(OP_MATCH); if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp)) - Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp); s--; if (PL_expect == XSTATE && isALPHA(tmp) && (s == PL_linestart+1 || s[-2] == '\n') ) @@ -3444,8 +3555,24 @@ Perl_yylex(pTHX) case '!': s++; tmp = *s++; - if (tmp == '=') + if (tmp == '=') { + /* was this !=~ where !~ was meant? + * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */ + + if (*s == '~' && ckWARN(WARN_SYNTAX)) { + char *t = s+1; + + while (t < PL_bufend && isSPACE(*t)) + ++t; + + if (*t == '/' || *t == '?' || + ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) || + (*t == 't' && t[1] == 'r' && !isALNUM(t[2]))) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "!=~ should be !~"); + } Eop(OP_NE); + } if (tmp == '~') PMop(OP_NOT); s--; @@ -3490,7 +3617,7 @@ Perl_yylex(pTHX) if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { PL_expect = XTERM; depcom(); - return ','; /* grandfather non-comma-format format */ + return REPORT(','); /* grandfather non-comma-format format */ } } @@ -3543,7 +3670,7 @@ Perl_yylex(pTHX) PL_bufptr = skipspace(PL_bufptr); while (t < PL_bufend && *t != ']') t++; - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Multidimensional syntax %.*s not supported", (t - PL_bufptr) + 1, PL_bufptr); } @@ -3561,7 +3688,7 @@ Perl_yylex(pTHX) t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len); for (; isSPACE(*t); t++) ; if (*t == ';' && get_cv(tmpbuf, FALSE)) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "You need to quote \"%s\"", tmpbuf); } } @@ -3598,17 +3725,19 @@ Perl_yylex(pTHX) } } else { - GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV); - if (gv && GvCVu(gv)) - PL_expect = XTERM; /* e.g. print $fh subr() */ + PL_expect = XTERM; /* e.g. print $fh subr() */ } } else if (isDIGIT(*s)) PL_expect = XTERM; /* e.g. print $fh 3 */ else if (*s == '.' && isDIGIT(s[1])) PL_expect = XTERM; /* e.g. print $fh .3 */ - else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=') - PL_expect = XTERM; /* e.g. print $fh -1 */ + else if ((*s == '?' || *s == '-' || *s == '+') + && !isSPACE(s[1]) && s[1] != '=') + PL_expect = XTERM; /* e.g. print $fh -1 */ + else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/') + PL_expect = XTERM; /* e.g. print $fh /.../ + XXX except DORDOR operator */ else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=') PL_expect = XTERM; /* print $fh <<"EOF" */ } @@ -3621,8 +3750,6 @@ Perl_yylex(pTHX) PL_tokenbuf[0] = '@'; s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); if (!PL_tokenbuf[1]) { - if (s == PL_bufend) - yyerror("Final @ should be \\@ or @name"); PREREF('@'); } if (PL_lex_state == LEX_NORMAL) @@ -3640,7 +3767,7 @@ Perl_yylex(pTHX) if (*t == '}' || *t == ']') { t++; PL_bufptr = skipspace(PL_bufptr); - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Scalar value %.*s better written as $%.*s", t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1); } @@ -3650,22 +3777,40 @@ Perl_yylex(pTHX) PL_pending_ident = '@'; TERM('@'); - case '/': /* may either be division or pattern */ - case '?': /* may either be conditional or pattern */ - if (PL_expect != XOPERATOR) { - /* Disable warning on "study /blah/" */ - if (PL_oldoldbufptr == PL_last_uni - && (*PL_last_uni != 's' || s - PL_last_uni < 5 - || memNE(PL_last_uni, "study", 5) - || isALNUM_lazy_if(PL_last_uni+5,UTF))) - check_uni(); - s = scan_pat(s,OP_MATCH); - TERM(sublex_start()); - } - tmp = *s++; - if (tmp == '/') - Mop(OP_DIVIDE); - OPERATOR(tmp); + case '/': /* may be division, defined-or, or pattern */ + if (PL_expect == XTERMORDORDOR && s[1] == '/') { + s += 2; + AOPERATOR(DORDOR); + } + case '?': /* may either be conditional or pattern */ + if(PL_expect == XOPERATOR) { + tmp = *s++; + if(tmp == '?') { + OPERATOR('?'); + } + else { + tmp = *s++; + if(tmp == '/') { + /* A // operator. */ + AOPERATOR(DORDOR); + } + else { + s--; + Mop(OP_DIVIDE); + } + } + } + else { + /* Disable warning on "study /blah/" */ + if (PL_oldoldbufptr == PL_last_uni + && (*PL_last_uni != 's' || s - PL_last_uni < 5 + || memNE(PL_last_uni, "study", 5) + || isALNUM_lazy_if(PL_last_uni+5,UTF) + )) + check_uni(); + s = scan_pat(s,OP_MATCH); + TERM(sublex_start()); + } case '.': if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack @@ -3716,7 +3861,7 @@ Perl_yylex(pTHX) if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { PL_expect = XTERM; depcom(); - return ','; /* grandfather non-comma-format format */ + return REPORT(','); /* grandfather non-comma-format format */ } else no_op("String",s); @@ -3735,7 +3880,7 @@ Perl_yylex(pTHX) if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { PL_expect = XTERM; depcom(); - return ','; /* grandfather non-comma-format format */ + return REPORT(','); /* grandfather non-comma-format format */ } else no_op("String",s); @@ -3767,7 +3912,7 @@ Perl_yylex(pTHX) case '\\': s++; if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s)) - Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression", + Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression", *s, *s); if (PL_expect == XOPERATOR) no_op("Backslash",s); @@ -3785,7 +3930,9 @@ Perl_yylex(pTHX) TERM(THING); } /* avoid v123abc() or $h{v1}, allow C */ - else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) { + else if (!isALPHA(*start) && (PL_expect == XTERM + || PL_expect == XREF || PL_expect == XSTATE + || PL_expect == XTERMORDORDOR)) { char c = *start; GV *gv; *start = '\0'; @@ -3834,6 +3981,7 @@ Perl_yylex(pTHX) case 'z': case 'Z': keylookup: { + orig_keyword = 0; gv = Nullgv; gvp = 0; @@ -3869,10 +4017,10 @@ Perl_yylex(pTHX) /* Is this a word before a => operator? */ if (*d == '=' && d[1] == '>') { CLINE; - yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0)); + yylval.opval + = (OP*)newSVOP(OP_CONST, 0, + S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); yylval.opval->op_private = OPpCONST_BARE; - if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) - SvUTF8_on(((SVOP*)yylval.opval)->op_sv); TERM(WORD); } @@ -3898,6 +4046,7 @@ Perl_yylex(pTHX) } } if (ogv) { + orig_keyword = tmp; tmp = 0; /* overridden by import or by GLOBAL */ } else if (gv && !gvp @@ -3907,13 +4056,27 @@ Perl_yylex(pTHX) { tmp = 0; /* any sub overrides "weak" keyword */ } + else if (gv && !gvp + && tmp == -KEY_err + && GvCVu(gv) + && PL_expect != XOPERATOR + && PL_expect != XTERMORDORDOR) + { + /* any sub overrides the "err" keyword, except when really an + * operator is expected */ + tmp = 0; + } else { /* no override */ tmp = -tmp; + if (tmp == KEY_dump && ckWARN(WARN_MISC)) { + Perl_warner(aTHX_ packWARN(WARN_MISC), + "dump() better written as CORE::dump()"); + } gv = Nullgv; gvp = 0; if (ckWARN(WARN_AMBIGUOUS) && hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */ - Perl_warner(aTHX_ WARN_AMBIGUOUS, + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous call resolved as CORE::%s(), %s", GvENAME(hgv), "qualify as such or use &"); } @@ -3925,6 +4088,7 @@ Perl_yylex(pTHX) default: /* not a keyword */ just_a_word: { SV *sv; + int pkgname = 0; char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); /* Get the rest if it looks like a package qualifier */ @@ -3937,12 +4101,13 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf, *s == '\'' ? "'" : "::"); len += morelen; + pkgname = 1; } if (PL_expect == XOPERATOR) { if (PL_bufptr == PL_linestart) { CopLINE_dec(PL_curcop); - Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi); + Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi); CopLINE_inc(PL_curcop); } else @@ -3957,7 +4122,7 @@ Perl_yylex(pTHX) PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') { if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV)) - Perl_warner(aTHX_ WARN_BAREWORD, + Perl_warner(aTHX_ packWARN(WARN_BAREWORD), "Bareword \"%s\" refers to nonexistent package", PL_tokenbuf); len -= 2; @@ -3977,14 +4142,22 @@ Perl_yylex(pTHX) sv = newSVpvn("CORE::GLOBAL::",14); sv_catpv(sv,PL_tokenbuf); } - else - sv = newSVpv(PL_tokenbuf,0); + else { + /* If len is 0, newSVpv does strlen(), which is correct. + If len is non-zero, then it will be the true length, + and so the scalar will be created correctly. */ + sv = newSVpv(PL_tokenbuf,len); + } /* Presume this is going to be a bareword of some sort. */ CLINE; yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); yylval.opval->op_private = OPpCONST_BARE; + /* UTF-8 package name? */ + if (UTF && !IN_BYTES && + is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) + SvUTF8_on(sv); /* And if "Foo::", then that's what it certainly is. */ @@ -4009,7 +4182,7 @@ Perl_yylex(pTHX) /* Two barewords in a row may indicate method call. */ if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv))) - return tmp; + return REPORT(tmp); /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ @@ -4024,12 +4197,11 @@ Perl_yylex(pTHX) } } - PL_expect = XOPERATOR; s = skipspace(s); /* Is this a word before a => operator? */ - if (*s == '=' && s[1] == '>') { + if (*s == '=' && s[1] == '>' && !pkgname) { CLINE; sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) @@ -4064,15 +4236,17 @@ Perl_yylex(pTHX) /* If followed by a bareword, see if it looks like indir obj. */ - if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp = intuit_method(s,gv))) - return tmp; + if (!orig_keyword + && (isIDFIRST_lazy_if(s,UTF) || *s == '$') + && (tmp = intuit_method(s,gv))) + return REPORT(tmp); /* Not a method, so call it a subroutine (if defined) */ if (gv && GvCVu(gv)) { CV* cv; if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS)) - Perl_warner(aTHX_ WARN_AMBIGUOUS, + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of -%s resolved as -&%s()", PL_tokenbuf, PL_tokenbuf); /* Check for a constant sub */ @@ -4097,10 +4271,13 @@ Perl_yylex(pTHX) char *proto = SvPV((SV*)cv, len); if (!len) TERM(FUNC0SUB); - if (strEQ(proto, "$")) + if (*proto == '$' && proto[1] == '\0') OPERATOR(UNIOPSUB); + while (*proto == ';') + proto++; if (*proto == '&' && *s == '{') { - sv_setpv(PL_subname,"__ANON__"); + sv_setpv(PL_subname, PL_curstash ? + "__ANON__" : "__ANON__::__ANON__"); PREBLOCK(LSTOPSUB); } } @@ -4119,19 +4296,20 @@ Perl_yylex(pTHX) if (ckWARN(WARN_RESERVED)) { if (lastchar != '-') { for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ; - if (!*d) - Perl_warner(aTHX_ WARN_RESERVED, PL_warn_reserved, + if (!*d && !gv_stashpv(PL_tokenbuf,FALSE)) + Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, PL_tokenbuf); } } } safe_bareword: - if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) { - Perl_warner(aTHX_ WARN_AMBIGUOUS, + if ((lastchar == '*' || lastchar == '%' || lastchar == '&') + && ckWARN_d(WARN_AMBIGUOUS)) { + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "Operator or semicolon missing before %c%s", lastchar, PL_tokenbuf); - Perl_warner(aTHX_ WARN_AMBIGUOUS, + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c resolved as operator %c", lastchar, lastchar); } @@ -4151,7 +4329,7 @@ Perl_yylex(pTHX) case KEY___PACKAGE__: yylval.opval = (OP*)newSVOP(OP_CONST, 0, (PL_curstash - ? newSVsv(PL_curstname) + ? newSVpv(HvNAME(PL_curstash), 0) : &PL_sv_undef)); TERM(THING); @@ -4195,7 +4373,11 @@ Perl_yylex(pTHX) loc = PerlIO_tell(PL_rsfp); (void)PerlIO_seek(PL_rsfp, 0L, 0); } +#ifdef NETWARE + if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) { +#else if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { +#endif /* NETWARE */ #ifdef PERLIO_IS_STDIO /* really? */ # if defined(__BORLANDC__) /* XXX see note in do_binmode() */ @@ -4208,8 +4390,29 @@ Perl_yylex(pTHX) } #endif #ifdef PERLIO_LAYERS - if (UTF && !IN_BYTES) - PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + if (!IN_BYTES) { + if (UTF) + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); + else if (PL_encoding) { + SV *name; + dSP; + ENTER; + SAVETMPS; + PUSHMARK(sp); + EXTEND(SP, 1); + XPUSHs(PL_encoding); + PUTBACK; + call_method("name", G_SCALAR); + SPAGAIN; + name = POPs; + PUTBACK; + PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, + Perl_form(aTHX_ ":encoding(%"SVf")", + name)); + FREETMPS; + LEAVE; + } + } #endif PL_rsfp = Nullfp; } @@ -4297,12 +4500,6 @@ Perl_yylex(pTHX) LOP(OP_CRYPT,XTERM); case KEY_chmod: - if (ckWARN(WARN_CHMOD)) { - for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ; - if (*d != '0' && isDIGIT(*d)) - Perl_warner(aTHX_ WARN_CHMOD, - "chmod() mode argument is missing initial 0"); - } LOP(OP_CHMOD,XTERM); case KEY_chown: @@ -4325,7 +4522,7 @@ Perl_yylex(pTHX) if (*s == '{') PRETERMBLOCK(DO); if (*s != '\'') - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,TRUE,TRUE,FALSE); OPERATOR(DO); case KEY_die: @@ -4373,6 +4570,9 @@ Perl_yylex(pTHX) case KEY_eof: UNI(OP_EOF); + case KEY_err: + OPERATOR(DOROP); + case KEY_exp: UNI(OP_EXP); @@ -4456,7 +4656,7 @@ Perl_yylex(pTHX) UNI(OP_GMTIME); case KEY_getc: - UNI(OP_GETC); + UNIDOR(OP_GETC); case KEY_getppid: FUN0(OP_GETPPID); @@ -4655,7 +4855,7 @@ Perl_yylex(pTHX) if (PL_expect != XSTATE) yyerror("\"no\" not allowed in expression"); s = force_word(s,WORD,FALSE,TRUE,FALSE); - s = force_version(s); + s = force_version(s, FALSE); yylval.ival = 0; OPERATOR(USE); @@ -4670,11 +4870,15 @@ Perl_yylex(pTHX) if (isIDFIRST_lazy_if(s,UTF)) { char *t; for (d = s; isALNUM_lazy_if(d,UTF); d++) ; - t = skipspace(d); - if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)) - Perl_warner(aTHX_ WARN_PRECEDENCE, + for (t=d; *t && isSPACE(*t); t++) ; + if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) + /* [perl #16184] */ + && !(t[0] == '=' && t[1] == '>') + ) { + Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), "Precedence problem: open %.*s should be open(%.*s)", - d-s,s, d-s,s); + d - s, s, d - s, s); + } } LOP(OP_OPEN,XTERM); @@ -4706,10 +4910,10 @@ Perl_yylex(pTHX) LOP(OP_PUSH,XTERM); case KEY_pop: - UNI(OP_POP); + UNIDOR(OP_POP); case KEY_pos: - UNI(OP_POS); + UNIDOR(OP_POS); case KEY_pack: LOP(OP_PACK,XTERM); @@ -4748,12 +4952,12 @@ Perl_yylex(pTHX) if (!warned && ckWARN(WARN_QW)) { for (; !isSPACE(*d) && len; --len, ++d) { if (*d == ',') { - Perl_warner(aTHX_ WARN_QW, + Perl_warner(aTHX_ packWARN(WARN_QW), "Possible attempt to separate words with commas"); ++warned; } else if (*d == '#') { - Perl_warner(aTHX_ WARN_QW, + Perl_warner(aTHX_ packWARN(WARN_QW), "Possible attempt to put comments in qw() list"); ++warned; } @@ -4807,10 +5011,12 @@ Perl_yylex(pTHX) case KEY_require: s = skipspace(s); - if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { - s = force_version(s); + if (isDIGIT(*s)) { + s = force_version(s, FALSE); } - else { + else if (*s != 'v' || !isDIGIT(s[1]) + || (s = force_version(s, TRUE), *s == 'v')) + { *PL_tokenbuf = '\0'; s = force_word(s,WORD,TRUE,TRUE,FALSE); if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) @@ -4847,7 +5053,7 @@ Perl_yylex(pTHX) case KEY_readline: set_csh(); - UNI(OP_READLINE); + UNIDOR(OP_READLINE); case KEY_readpipe: set_csh(); @@ -4863,7 +5069,7 @@ Perl_yylex(pTHX) LOP(OP_REVERSE,XTERM); case KEY_readlink: - UNI(OP_READLINK); + UNIDOR(OP_READLINK); case KEY_ref: UNI(OP_REF); @@ -4930,7 +5136,7 @@ Perl_yylex(pTHX) LOP(OP_SSOCKOPT,XTERM); case KEY_shift: - UNI(OP_SHIFT); + UNIDOR(OP_SHIFT); case KEY_shmctl: LOP(OP_SHMCTL,XTERM); @@ -4999,7 +5205,7 @@ Perl_yylex(pTHX) char tmpbuf[sizeof PL_tokenbuf]; SSize_t tboffset = 0; expectation attrful; - bool have_name, have_proto; + bool have_name, have_proto, bad_proto; int key = tmp; s = skipspace(s); @@ -5047,14 +5253,22 @@ Perl_yylex(pTHX) s = scan_str(s,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - /* strip spaces */ + /* strip spaces and check for bad characters */ d = SvPVX(PL_lex_stuff); tmp = 0; + bad_proto = FALSE; for (p = d; *p; ++p) { - if (!isSPACE(*p)) + if (!isSPACE(*p)) { d[tmp++] = *p; + if (!strchr("$@%*;[]&\\", *p)) + bad_proto = TRUE; + } } d[tmp] = '\0'; + if (bad_proto && ckWARN(WARN_SYNTAX)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "Illegal character in prototype for %"SVf" : %s", + PL_subname, d); SvCUR(PL_lex_stuff) = tmp; have_proto = TRUE; @@ -5065,6 +5279,12 @@ Perl_yylex(pTHX) if (*s == ':' && s[1] != ':') PL_expect = attrful; + else if (*s != '{' && key == KEY_sub) { + if (!have_name) + Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine"); + else if (*s != ';') + Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname); + } if (have_proto) { PL_nextval[PL_nexttoke].opval = @@ -5073,7 +5293,8 @@ Perl_yylex(pTHX) force_next(THING); } if (!have_name) { - sv_setpv(PL_subname,"__ANON__"); + sv_setpv(PL_subname, + PL_curstash ? "__ANON__" : "__ANON__::__ANON__"); TOKEN(ANONSUB); } (void) force_word(PL_oldbufptr + tboffset, WORD, @@ -5151,7 +5372,7 @@ Perl_yylex(pTHX) LOP(OP_UNLINK,XTERM); case KEY_undef: - UNI(OP_UNDEF); + UNIDOR(OP_UNDEF); case KEY_unpack: LOP(OP_UNPACK,XTERM); @@ -5160,13 +5381,7 @@ Perl_yylex(pTHX) LOP(OP_UTIME,XTERM); case KEY_umask: - if (ckWARN(WARN_UMASK)) { - for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ; - if (*d != '0' && isDIGIT(*d)) - Perl_warner(aTHX_ WARN_UMASK, - "umask: argument is missing initial 0"); - } - UNI(OP_UMASK); + UNIDOR(OP_UMASK); case KEY_unshift: LOP(OP_UNSHIFT,XTERM); @@ -5176,15 +5391,19 @@ Perl_yylex(pTHX) yyerror("\"use\" not allowed in expression"); s = skipspace(s); if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { - s = force_version(s); + s = force_version(s, TRUE); if (*s == ';' || (s = skipspace(s), *s == ';')) { PL_nextval[PL_nexttoke].opval = Nullop; force_next(WORD); } + else if (*s == 'v') { + s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_version(s, FALSE); + } } else { s = force_word(s,WORD,FALSE,TRUE,FALSE); - s = force_version(s); + s = force_version(s, FALSE); } yylval.ival = 1; OPERATOR(USE); @@ -5215,10 +5434,9 @@ Perl_yylex(pTHX) case KEY_write: #ifdef EBCDIC { - static char ctl_l[2]; - - if (ctl_l[0] == '\0') - ctl_l[0] = toCTRL('L'); + char ctl_l[2]; + ctl_l[0] = toCTRL('L'); + ctl_l[1] = '\0'; gv_fetchpv(ctl_l,TRUE, SVt_PV); } #else @@ -5246,607 +5464,1462 @@ Perl_yylex(pTHX) #pragma segment Main #endif -I32 -Perl_keyword(pTHX_ register char *d, I32 len) +static int +S_pending_ident(pTHX) { - switch (*d) { - case '_': - if (d[1] == '_') { - if (strEQ(d,"__FILE__")) return -KEY___FILE__; - if (strEQ(d,"__LINE__")) return -KEY___LINE__; - if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__; - if (strEQ(d,"__DATA__")) return KEY___DATA__; - if (strEQ(d,"__END__")) return KEY___END__; - } - break; - case 'A': - if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD; - break; - case 'a': - switch (len) { - case 3: - if (strEQ(d,"and")) return -KEY_and; - if (strEQ(d,"abs")) return -KEY_abs; - break; - case 5: - if (strEQ(d,"alarm")) return -KEY_alarm; - if (strEQ(d,"atan2")) return -KEY_atan2; - break; - case 6: - if (strEQ(d,"accept")) return -KEY_accept; - break; - } - break; - case 'B': - if (strEQ(d,"BEGIN")) return KEY_BEGIN; - break; - case 'b': - if (strEQ(d,"bless")) return -KEY_bless; - if (strEQ(d,"bind")) return -KEY_bind; - if (strEQ(d,"binmode")) return -KEY_binmode; - break; - case 'C': - if (strEQ(d,"CORE")) return -KEY_CORE; - if (strEQ(d,"CHECK")) return KEY_CHECK; - break; - case 'c': - switch (len) { - case 3: - if (strEQ(d,"cmp")) return -KEY_cmp; - if (strEQ(d,"chr")) return -KEY_chr; - if (strEQ(d,"cos")) return -KEY_cos; - break; - case 4: - if (strEQ(d,"chop")) return -KEY_chop; - break; - case 5: - if (strEQ(d,"close")) return -KEY_close; - if (strEQ(d,"chdir")) return -KEY_chdir; - if (strEQ(d,"chomp")) return -KEY_chomp; - if (strEQ(d,"chmod")) return -KEY_chmod; - if (strEQ(d,"chown")) return -KEY_chown; - if (strEQ(d,"crypt")) return -KEY_crypt; - break; - case 6: - if (strEQ(d,"chroot")) return -KEY_chroot; - if (strEQ(d,"caller")) return -KEY_caller; - break; - case 7: - if (strEQ(d,"connect")) return -KEY_connect; - break; - case 8: - if (strEQ(d,"closedir")) return -KEY_closedir; - if (strEQ(d,"continue")) return -KEY_continue; - break; - } - break; - case 'D': - if (strEQ(d,"DESTROY")) return KEY_DESTROY; - break; - case 'd': - switch (len) { - case 2: - if (strEQ(d,"do")) return KEY_do; - break; - case 3: - if (strEQ(d,"die")) return -KEY_die; - break; - case 4: - if (strEQ(d,"dump")) return -KEY_dump; - break; - case 6: - if (strEQ(d,"delete")) return KEY_delete; - break; - case 7: - if (strEQ(d,"defined")) return KEY_defined; - if (strEQ(d,"dbmopen")) return -KEY_dbmopen; - break; - case 8: - if (strEQ(d,"dbmclose")) return -KEY_dbmclose; - break; - } - break; - case 'E': - if (strEQ(d,"END")) return KEY_END; - break; - case 'e': - switch (len) { - case 2: - if (strEQ(d,"eq")) return -KEY_eq; - break; - case 3: - if (strEQ(d,"eof")) return -KEY_eof; - if (strEQ(d,"exp")) return -KEY_exp; - break; - case 4: - if (strEQ(d,"else")) return KEY_else; - if (strEQ(d,"exit")) return -KEY_exit; - if (strEQ(d,"eval")) return KEY_eval; - if (strEQ(d,"exec")) return -KEY_exec; - if (strEQ(d,"each")) return -KEY_each; - break; - case 5: - if (strEQ(d,"elsif")) return KEY_elsif; - break; - case 6: - if (strEQ(d,"exists")) return KEY_exists; - if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif"); - break; - case 8: - if (strEQ(d,"endgrent")) return -KEY_endgrent; - if (strEQ(d,"endpwent")) return -KEY_endpwent; - break; - case 9: - if (strEQ(d,"endnetent")) return -KEY_endnetent; - break; - case 10: - if (strEQ(d,"endhostent")) return -KEY_endhostent; - if (strEQ(d,"endservent")) return -KEY_endservent; - break; - case 11: - if (strEQ(d,"endprotoent")) return -KEY_endprotoent; - break; - } - break; - case 'f': - switch (len) { - case 3: - if (strEQ(d,"for")) return KEY_for; - break; - case 4: - if (strEQ(d,"fork")) return -KEY_fork; - break; - case 5: - if (strEQ(d,"fcntl")) return -KEY_fcntl; - if (strEQ(d,"flock")) return -KEY_flock; - break; - case 6: - if (strEQ(d,"format")) return KEY_format; - if (strEQ(d,"fileno")) return -KEY_fileno; - break; - case 7: - if (strEQ(d,"foreach")) return KEY_foreach; - break; - case 8: - if (strEQ(d,"formline")) return -KEY_formline; - break; - } - break; + register char *d; + register I32 tmp = 0; + /* pit holds the identifier we read and pending_ident is reset */ + char pit = PL_pending_ident; + PL_pending_ident = 0; + + DEBUG_T({ PerlIO_printf(Perl_debug_log, + "### Tokener saw identifier '%s'\n", PL_tokenbuf); }); + + /* if we're in a my(), we can't allow dynamics here. + $foo'bar has already been turned into $foo::bar, so + just check for colons. + + if it's a legal name, the OP is a PADANY. + */ + if (PL_in_my) { + if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ + if (strchr(PL_tokenbuf,':')) + yyerror(Perl_form(aTHX_ "No package name allowed for " + "variable %s in \"our\"", + PL_tokenbuf)); + tmp = allocmy(PL_tokenbuf); + } + else { + if (strchr(PL_tokenbuf,':')) + yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf)); + + yylval.opval = newOP(OP_PADANY, 0); + yylval.opval->op_targ = allocmy(PL_tokenbuf); + return PRIVATEREF; + } + } + + /* + build the ops for accesses to a my() variable. + + Deny my($a) or my($b) in a sort block, *if* $a or $b is + then used in a comparison. This catches most, but not + all cases. For instance, it catches + sort { my($a); $a <=> $b } + but not + sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; } + (although why you'd do that is anyone's guess). + */ + + if (!strchr(PL_tokenbuf,':')) { + if (!PL_in_my) + tmp = pad_findmy(PL_tokenbuf); + if (tmp != NOT_IN_PAD) { + /* might be an "our" variable" */ + if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) { + /* build ops for a bareword */ + SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0); + sv_catpvn(sym, "::", 2); + sv_catpv(sym, PL_tokenbuf+1); + yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym); + yylval.opval->op_private = OPpCONST_ENTERED; + gv_fetchsv(sym, + (PL_in_eval + ? (GV_ADDMULTI | GV_ADDINEVAL) + : GV_ADDMULTI + ), + ((PL_tokenbuf[0] == '$') ? SVt_PV + : (PL_tokenbuf[0] == '@') ? SVt_PVAV + : SVt_PVHV)); + return WORD; + } + + /* if it's a sort block and they're naming $a or $b */ + if (PL_last_lop_op == OP_SORT && + PL_tokenbuf[0] == '$' && + (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b') + && !PL_tokenbuf[2]) + { + for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart; + d < PL_bufend && *d != '\n'; + d++) + { + if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) { + Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison", + PL_tokenbuf); + } + } + } + + yylval.opval = newOP(OP_PADANY, 0); + yylval.opval->op_targ = tmp; + return PRIVATEREF; + } + } + + /* + Whine if they've said @foo in a doublequoted string, + and @foo isn't a variable we can find in the symbol + table. + */ + if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { + GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV); + if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) + && ckWARN(WARN_AMBIGUOUS)) + { + /* Downgraded from fatal to warning 20000522 mjd */ + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Possible unintended interpolation of %s in string", + PL_tokenbuf); + } + } + + /* build ops for a bareword */ + yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0)); + yylval.opval->op_private = OPpCONST_ENTERED; + gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE, + ((PL_tokenbuf[0] == '$') ? SVt_PV + : (PL_tokenbuf[0] == '@') ? SVt_PVAV + : SVt_PVHV)); + return WORD; +} + +/* Weights are the number of occurrences of that keyword in about 135M of + input to Perl_keyword from a lot of real perl. This routine is about 20% + faster than the routine it replaces. */ + +I32 +Perl_keyword (pTHX_ char *name, I32 len) { + /* Initially switch on the length of the name. */ + switch (len) { + case 1: + /* Names all of length 1. */ + /* m q s x y */ + /* Offset 0 gives the best switch position. */ + switch (name[0]) { + case 'm': + { + return KEY_m; /* Weight 148776 */ + } + break; + case 'q': + { + return KEY_q; /* Weight 69076 */ + } + break; + case 's': + { + return KEY_s; /* Weight 403691 */ + } + break; + case 'x': + { + return -KEY_x; /* Weight 38549 */ + } + break; + case 'y': + { + return KEY_y; /* Weight 567 */ + } + break; + } + break; + case 2: + /* Names all of length 2. */ + /* do eq ge gt if lc le lt my ne no or qq qr qw qx tr uc */ + /* Offset 0 gives the best switch position. */ + switch (name[0]) { + case 'd': + if (name[1] == 'o') { + return KEY_do; /* Weight 96004 */ + } + break; + case 'e': + if (name[1] == 'q') { + return -KEY_eq; /* Weight 797065 */ + } + break; case 'g': - if (strnEQ(d,"get",3)) { - d += 3; - if (*d == 'p') { - switch (len) { - case 7: - if (strEQ(d,"ppid")) return -KEY_getppid; - if (strEQ(d,"pgrp")) return -KEY_getpgrp; - break; - case 8: - if (strEQ(d,"pwent")) return -KEY_getpwent; - if (strEQ(d,"pwnam")) return -KEY_getpwnam; - if (strEQ(d,"pwuid")) return -KEY_getpwuid; - break; - case 11: - if (strEQ(d,"peername")) return -KEY_getpeername; - if (strEQ(d,"protoent")) return -KEY_getprotoent; - if (strEQ(d,"priority")) return -KEY_getpriority; - break; - case 14: - if (strEQ(d,"protobyname")) return -KEY_getprotobyname; - break; - case 16: - if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber; - break; - } - } - else if (*d == 'h') { - if (strEQ(d,"hostbyname")) return -KEY_gethostbyname; - if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr; - if (strEQ(d,"hostent")) return -KEY_gethostent; - } - else if (*d == 'n') { - if (strEQ(d,"netbyname")) return -KEY_getnetbyname; - if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr; - if (strEQ(d,"netent")) return -KEY_getnetent; - } - else if (*d == 's') { - if (strEQ(d,"servbyname")) return -KEY_getservbyname; - if (strEQ(d,"servbyport")) return -KEY_getservbyport; - if (strEQ(d,"servent")) return -KEY_getservent; - if (strEQ(d,"sockname")) return -KEY_getsockname; - if (strEQ(d,"sockopt")) return -KEY_getsockopt; - } - else if (*d == 'g') { - if (strEQ(d,"grent")) return -KEY_getgrent; - if (strEQ(d,"grnam")) return -KEY_getgrnam; - if (strEQ(d,"grgid")) return -KEY_getgrgid; - } - else if (*d == 'l') { - if (strEQ(d,"login")) return -KEY_getlogin; - } - else if (strEQ(d,"c")) return -KEY_getc; - break; - } - switch (len) { - case 2: - if (strEQ(d,"gt")) return -KEY_gt; - if (strEQ(d,"ge")) return -KEY_ge; - break; - case 4: - if (strEQ(d,"grep")) return KEY_grep; - if (strEQ(d,"goto")) return KEY_goto; - if (strEQ(d,"glob")) return KEY_glob; - break; - case 6: - if (strEQ(d,"gmtime")) return -KEY_gmtime; - break; - } - break; + if (name[1] == 'e') { + return -KEY_ge; /* Weight 5666 */ + } + if (name[1] == 't') { + return -KEY_gt; /* Weight 897 */ + } + break; + case 'i': + if (name[1] == 'f') { + return KEY_if; /* Weight 2482605 */ + } + break; + case 'l': + if (name[1] == 'c') { + return -KEY_lc; /* Weight 38487 */ + } + if (name[1] == 'e') { + return -KEY_le; /* Weight 4052 */ + } + if (name[1] == 't') { + return -KEY_lt; /* Weight 335 */ + } + break; + case 'm': + if (name[1] == 'y') { + return KEY_my; /* Weight 3785925 */ + } + break; + case 'n': + if (name[1] == 'e') { + return -KEY_ne; /* Weight 112906 */ + } + if (name[1] == 'o') { + return KEY_no; /* Weight 61989 */ + } + break; + case 'o': + if (name[1] == 'r') { + return -KEY_or; /* Weight 405163 */ + } + break; + case 'q': + if (name[1] == 'w') { + return KEY_qw; /* Weight 415641 */ + } + if (name[1] == 'q') { + return KEY_qq; /* Weight 55149 */ + } + if (name[1] == 'r') { + return KEY_qr; /* Weight 28519 */ + } + if (name[1] == 'x') { + return KEY_qx; /* Weight 177 */ + } + break; + case 't': + if (name[1] == 'r') { + return KEY_tr; /* Weight 22665 */ + } + break; + case 'u': + if (name[1] == 'c') { + return -KEY_uc; /* Weight 16961 */ + } + break; + } + break; + case 3: + /* Names all of length 3. */ + /* END abs and chr cmp cos die eof err exp for hex int log map not oct ord + our pop pos ref sin sub tie use vec xor */ + /* Offset 0 gives the best switch position. */ + switch (*name++) { + case 'E': + if (name[0] == 'N' && name[1] == 'D') { + return KEY_END; /* Weight 3565 */ + } + break; + case 'a': + if (name[0] == 'n' && name[1] == 'd') { + return -KEY_and; /* Weight 284867 */ + } + if (name[0] == 'b' && name[1] == 's') { + return -KEY_abs; /* Weight 7767 */ + } + break; + case 'c': + if (name[0] == 'h' && name[1] == 'r') { + return -KEY_chr; /* Weight 35654 */ + } + if (name[0] == 'm' && name[1] == 'p') { + return -KEY_cmp; /* Weight 6808 */ + } + if (name[0] == 'o' && name[1] == 's') { + return -KEY_cos; /* Weight 447 */ + } + break; + case 'd': + if (name[0] == 'i' && name[1] == 'e') { + return -KEY_die; /* Weight 192203 */ + } + break; + case 'e': + if (name[0] == 'o' && name[1] == 'f') { + return -KEY_eof; /* Weight 1618 */ + } + if (name[0] == 'r' && name[1] == 'r') { + return -KEY_err; /* Weight 522 */ + } + if (name[0] == 'x' && name[1] == 'p') { + return -KEY_exp; /* Weight 423 */ + } + break; + case 'f': + if (name[0] == 'o' && name[1] == 'r') { + return KEY_for; /* Weight 118158 */ + } + break; case 'h': - if (strEQ(d,"hex")) return -KEY_hex; - break; - case 'I': - if (strEQ(d,"INIT")) return KEY_INIT; - break; + if (name[0] == 'e' && name[1] == 'x') { + return -KEY_hex; /* Weight 3629 */ + } + break; case 'i': - switch (len) { - case 2: - if (strEQ(d,"if")) return KEY_if; - break; - case 3: - if (strEQ(d,"int")) return -KEY_int; - break; - case 5: - if (strEQ(d,"index")) return -KEY_index; - if (strEQ(d,"ioctl")) return -KEY_ioctl; - break; - } - break; + if (name[0] == 'n' && name[1] == 't') { + return -KEY_int; /* Weight 18549 */ + } + break; + case 'l': + if (name[0] == 'o' && name[1] == 'g') { + return -KEY_log; + } + break; + case 'm': + if (name[0] == 'a' && name[1] == 'p') { + return KEY_map; /* Weight 115207 */ + } + break; + case 'n': + if (name[0] == 'o' && name[1] == 't') { + return -KEY_not; /* Weight 55868 */ + } + break; + case 'o': + if (name[0] == 'u' && name[1] == 'r') { + return KEY_our; /* Weight 194417 */ + } + if (name[0] == 'r' && name[1] == 'd') { + return -KEY_ord; /* Weight 22221 */ + } + if (name[0] == 'c' && name[1] == 't') { + return -KEY_oct; /* Weight 4195 */ + } + break; + case 'p': + if (name[0] == 'o' && name[1] == 'p') { + return -KEY_pop; /* Weight 46933 */ + } + if (name[0] == 'o' && name[1] == 's') { + return KEY_pos; /* Weight 5503 */ + } + break; + case 'r': + if (name[0] == 'e' && name[1] == 'f') { + return -KEY_ref; /* Weight 347102 */ + } + break; + case 's': + if (name[0] == 'u' && name[1] == 'b') { + return KEY_sub; /* Weight 2053554 */ + } + if (name[0] == 'i' && name[1] == 'n') { + return -KEY_sin; /* Weight 499 */ + } + break; + case 't': + if (name[0] == 'i' && name[1] == 'e') { + return KEY_tie; /* Weight 10131 */ + } + break; + case 'u': + if (name[0] == 's' && name[1] == 'e') { + return KEY_use; /* Weight 686081 */ + } + break; + case 'v': + if (name[0] == 'e' && name[1] == 'c') { + return -KEY_vec; /* Weight 110566 */ + } + break; + case 'x': + if (name[0] == 'o' && name[1] == 'r') { + return -KEY_xor; /* Weight 619 */ + } + break; + } + break; + case 4: + /* Names all of length 4. */ + /* CORE INIT bind chop dump each else eval exec exit fork getc glob goto + grep join keys kill last link lock next open pack pipe push rand read + recv redo seek send sort sqrt stat tell tied time wait warn */ + /* Offset 0 gives the best switch position. */ + switch (*name++) { + case 'C': + if (!memcmp(name, "ORE", 3)) { + /* C */ + return -KEY_CORE; /* Weight 47391 */ + } + break; + case 'I': + if (!memcmp(name, "NIT", 3)) { + /* I */ + return KEY_INIT; /* Weight 418 */ + } + break; + case 'b': + if (!memcmp(name, "ind", 3)) { + /* b */ + return -KEY_bind; /* Weight 290 */ + } + break; + case 'c': + if (!memcmp(name, "hop", 3)) { + /* c */ + return -KEY_chop; /* Weight 10172 */ + } + break; + case 'd': + if (!memcmp(name, "ump", 3)) { + /* d */ + return -KEY_dump; /* Weight 274 */ + } + break; + case 'e': + if (!memcmp(name, "lse", 3)) { + /* e */ + return KEY_else; /* Weight 527806 */ + } + if (!memcmp(name, "val", 3)) { + /* e */ + return KEY_eval; /* Weight 136977 */ + } + if (!memcmp(name, "ach", 3)) { + /* e */ + return -KEY_each; /* Weight 18414 */ + } + if (!memcmp(name, "xit", 3)) { + /* e */ + return -KEY_exit; /* Weight 8262 */ + } + if (!memcmp(name, "xec", 3)) { + /* e */ + return -KEY_exec; /* Weight 429 */ + } + break; + case 'f': + if (!memcmp(name, "ork", 3)) { + /* f */ + return -KEY_fork; /* Weight 327 */ + } + break; + case 'g': + if (!memcmp(name, "oto", 3)) { + /* g */ + return KEY_goto; /* Weight 109258 */ + } + if (!memcmp(name, "rep", 3)) { + /* g */ + return KEY_grep; /* Weight 75912 */ + } + if (!memcmp(name, "lob", 3)) { + /* g */ + return KEY_glob; /* Weight 2172 */ + } + if (!memcmp(name, "etc", 3)) { + /* g */ + return -KEY_getc; /* Weight 981 */ + } + break; case 'j': - if (strEQ(d,"join")) return -KEY_join; - break; + if (!memcmp(name, "oin", 3)) { + /* j */ + return -KEY_join; /* Weight 130820 */ + } + break; case 'k': - if (len == 4) { - if (strEQ(d,"keys")) return -KEY_keys; - if (strEQ(d,"kill")) return -KEY_kill; - } - break; + if (!memcmp(name, "eys", 3)) { + /* k */ + return -KEY_keys; /* Weight 131427 */ + } + if (!memcmp(name, "ill", 3)) { + /* k */ + return -KEY_kill; /* Weight 382 */ + } + break; case 'l': - switch (len) { - case 2: - if (strEQ(d,"lt")) return -KEY_lt; - if (strEQ(d,"le")) return -KEY_le; - if (strEQ(d,"lc")) return -KEY_lc; - break; - case 3: - if (strEQ(d,"log")) return -KEY_log; - break; - case 4: - if (strEQ(d,"last")) return KEY_last; - if (strEQ(d,"link")) return -KEY_link; - if (strEQ(d,"lock")) return -KEY_lock; - break; - case 5: - if (strEQ(d,"local")) return KEY_local; - if (strEQ(d,"lstat")) return -KEY_lstat; - break; - case 6: - if (strEQ(d,"length")) return -KEY_length; - if (strEQ(d,"listen")) return -KEY_listen; - break; - case 7: - if (strEQ(d,"lcfirst")) return -KEY_lcfirst; - break; - case 9: - if (strEQ(d,"localtime")) return -KEY_localtime; - break; - } - break; + if (!memcmp(name, "ast", 3)) { + /* l */ + return KEY_last; /* Weight 95078 */ + } + if (!memcmp(name, "ock", 3)) { + /* l */ + return -KEY_lock; /* Weight 4210 */ + } + if (!memcmp(name, "ink", 3)) { + /* l */ + return -KEY_link; /* Weight 425 */ + } + break; + case 'n': + if (!memcmp(name, "ext", 3)) { + /* n */ + return KEY_next; /* Weight 153355 */ + } + break; + case 'o': + if (!memcmp(name, "pen", 3)) { + /* o */ + return -KEY_open; /* Weight 39060 */ + } + break; + case 'p': + if (!memcmp(name, "ush", 3)) { + /* p */ + return -KEY_push; /* Weight 256975 */ + } + if (!memcmp(name, "ack", 3)) { + /* p */ + return -KEY_pack; /* Weight 14491 */ + } + if (!memcmp(name, "ipe", 3)) { + /* p */ + return -KEY_pipe; /* Weight 344 */ + } + break; + case 'r': + if (!memcmp(name, "ead", 3)) { + /* r */ + return -KEY_read; /* Weight 9434 */ + } + if (!memcmp(name, "edo", 3)) { + /* r */ + return KEY_redo; /* Weight 5219 */ + } + if (!memcmp(name, "and", 3)) { + /* r */ + return -KEY_rand; /* Weight 1824 */ + } + if (!memcmp(name, "ecv", 3)) { + /* r */ + return -KEY_recv; /* Weight 250 */ + } + break; + case 's': + if (!memcmp(name, "tat", 3)) { + /* s */ + return -KEY_stat; /* Weight 36702 */ + } + if (!memcmp(name, "ort", 3)) { + /* s */ + return KEY_sort; /* Weight 36394 */ + } + if (!memcmp(name, "eek", 3)) { + /* s */ + return -KEY_seek; /* Weight 2174 */ + } + if (!memcmp(name, "qrt", 3)) { + /* s */ + return -KEY_sqrt; /* Weight 766 */ + } + if (!memcmp(name, "end", 3)) { + /* s */ + return -KEY_send; /* Weight 496 */ + } + break; + case 't': + if (!memcmp(name, "ime", 3)) { + /* t */ + return -KEY_time; /* Weight 32168 */ + } + if (!memcmp(name, "ied", 3)) { + /* t */ + return KEY_tied; /* Weight 9749 */ + } + if (!memcmp(name, "ell", 3)) { + /* t */ + return -KEY_tell; /* Weight 2578 */ + } + break; + case 'w': + if (!memcmp(name, "arn", 3)) { + /* w */ + return -KEY_warn; /* Weight 91372 */ + } + if (!memcmp(name, "ait", 3)) { + /* w */ + return -KEY_wait; + } + break; + } + break; + case 5: + /* Names all of length 5. */ + /* BEGIN CHECK alarm atan2 bless chdir chmod chomp chown close crypt elsif + fcntl flock index ioctl local lstat mkdir print reset rmdir semop shift + sleep split srand study times umask undef untie until utime while write + */ + /* Offset 3 gives the best switch position. */ + switch (name[3]) { + case 'C': + if (!memcmp(name, "CHECK", 5)) { + /* ^ */ + return KEY_CHECK; /* Weight 538 */ + } + break; + case 'I': + if (!memcmp(name, "BEGIN", 5)) { + /* ^ */ + return KEY_BEGIN; /* Weight 24125 */ + } + break; + case 'a': + if (!memcmp(name, "local", 5)) { + /* ^ */ + return KEY_local; /* Weight 262973 */ + } + if (!memcmp(name, "lstat", 5)) { + /* ^ */ + return -KEY_lstat; /* Weight 13859 */ + } + break; + case 'c': + if (!memcmp(name, "flock", 5)) { + /* ^ */ + return -KEY_flock; /* Weight 260 */ + } + break; + case 'd': + if (!memcmp(name, "study", 5)) { + /* ^ */ + return KEY_study; /* Weight 1933 */ + } + break; + case 'e': + if (!memcmp(name, "undef", 5)) { + /* ^ */ + return KEY_undef; /* Weight 311156 */ + } + if (!memcmp(name, "index", 5)) { + /* ^ */ + return -KEY_index; /* Weight 51465 */ + } + if (!memcmp(name, "sleep", 5)) { + /* ^ */ + return -KEY_sleep; /* Weight 519 */ + } + if (!memcmp(name, "times", 5)) { + /* ^ */ + return -KEY_times; /* Weight 310 */ + } + if (!memcmp(name, "reset", 5)) { + /* ^ */ + return -KEY_reset; /* Weight 127 */ + } + break; + case 'f': + if (!memcmp(name, "shift", 5)) { + /* ^ */ + return -KEY_shift; /* Weight 904125 */ + } + break; + case 'i': + if (!memcmp(name, "elsif", 5)) { + /* ^ */ + return KEY_elsif; /* Weight 322365 */ + } + if (!memcmp(name, "split", 5)) { + /* ^ */ + return KEY_split; /* Weight 93678 */ + } + if (!memcmp(name, "chdir", 5)) { + /* ^ */ + return -KEY_chdir; /* Weight 20317 */ + } + if (!memcmp(name, "mkdir", 5)) { + /* ^ */ + return -KEY_mkdir; /* Weight 2951 */ + } + if (!memcmp(name, "rmdir", 5)) { + /* ^ */ + return -KEY_rmdir; /* Weight 2493 */ + } + if (!memcmp(name, "until", 5)) { + /* ^ */ + return KEY_until; /* Weight 818 */ + } + if (!memcmp(name, "untie", 5)) { + /* ^ */ + return KEY_untie; /* Weight 420 */ + } + break; + case 'l': + if (!memcmp(name, "while", 5)) { + /* ^ */ + return KEY_while; /* Weight 120305 */ + } + break; case 'm': - switch (len) { - case 1: return KEY_m; - case 2: - if (strEQ(d,"my")) return KEY_my; - break; - case 3: - if (strEQ(d,"map")) return KEY_map; - break; - case 5: - if (strEQ(d,"mkdir")) return -KEY_mkdir; - break; - case 6: - if (strEQ(d,"msgctl")) return -KEY_msgctl; - if (strEQ(d,"msgget")) return -KEY_msgget; - if (strEQ(d,"msgrcv")) return -KEY_msgrcv; - if (strEQ(d,"msgsnd")) return -KEY_msgsnd; - break; - } - break; + if (!memcmp(name, "chomp", 5)) { + /* ^ */ + return -KEY_chomp; /* Weight 22337 */ + } + if (!memcmp(name, "utime", 5)) { + /* ^ */ + return -KEY_utime; /* Weight 3849 */ + } + break; case 'n': - if (strEQ(d,"next")) return KEY_next; - if (strEQ(d,"ne")) return -KEY_ne; - if (strEQ(d,"not")) return -KEY_not; - if (strEQ(d,"no")) return KEY_no; - break; + if (!memcmp(name, "print", 5)) { + /* ^ */ + return KEY_print; /* Weight 220904 */ + } + if (!memcmp(name, "atan2", 5)) { + /* ^ */ + return -KEY_atan2; /* Weight 350 */ + } + if (!memcmp(name, "srand", 5)) { + /* ^ */ + return -KEY_srand; /* Weight 41 */ + } + break; case 'o': - switch (len) { - case 2: - if (strEQ(d,"or")) return -KEY_or; - break; - case 3: - if (strEQ(d,"ord")) return -KEY_ord; - if (strEQ(d,"oct")) return -KEY_oct; - if (strEQ(d,"our")) return KEY_our; - break; - case 4: - if (strEQ(d,"open")) return -KEY_open; - break; - case 7: - if (strEQ(d,"opendir")) return -KEY_opendir; - break; - } - break; + if (!memcmp(name, "chmod", 5)) { + /* ^ */ + return -KEY_chmod; /* Weight 18455 */ + } + if (!memcmp(name, "semop", 5)) { + /* ^ */ + return -KEY_semop; + } + break; case 'p': - switch (len) { - case 3: - if (strEQ(d,"pop")) return -KEY_pop; - if (strEQ(d,"pos")) return KEY_pos; - break; - case 4: - if (strEQ(d,"push")) return -KEY_push; - if (strEQ(d,"pack")) return -KEY_pack; - if (strEQ(d,"pipe")) return -KEY_pipe; - break; - case 5: - if (strEQ(d,"print")) return KEY_print; - break; - case 6: - if (strEQ(d,"printf")) return KEY_printf; - break; - case 7: - if (strEQ(d,"package")) return KEY_package; - break; - case 9: - if (strEQ(d,"prototype")) return KEY_prototype; - } - break; - case 'q': - if (len <= 2) { - if (strEQ(d,"q")) return KEY_q; - if (strEQ(d,"qr")) return KEY_qr; - if (strEQ(d,"qq")) return KEY_qq; - if (strEQ(d,"qw")) return KEY_qw; - if (strEQ(d,"qx")) return KEY_qx; - } - else if (strEQ(d,"quotemeta")) return -KEY_quotemeta; - break; + if (!memcmp(name, "crypt", 5)) { + /* ^ */ + return -KEY_crypt; /* Weight 8 */ + } + break; case 'r': - switch (len) { - case 3: - if (strEQ(d,"ref")) return -KEY_ref; - break; - case 4: - if (strEQ(d,"read")) return -KEY_read; - if (strEQ(d,"rand")) return -KEY_rand; - if (strEQ(d,"recv")) return -KEY_recv; - if (strEQ(d,"redo")) return KEY_redo; - break; - case 5: - if (strEQ(d,"rmdir")) return -KEY_rmdir; - if (strEQ(d,"reset")) return -KEY_reset; - break; - case 6: - if (strEQ(d,"return")) return KEY_return; - if (strEQ(d,"rename")) return -KEY_rename; - if (strEQ(d,"rindex")) return -KEY_rindex; - break; - case 7: - if (strEQ(d,"require")) return KEY_require; - if (strEQ(d,"reverse")) return -KEY_reverse; - if (strEQ(d,"readdir")) return -KEY_readdir; - break; - case 8: - if (strEQ(d,"readlink")) return -KEY_readlink; - if (strEQ(d,"readline")) return -KEY_readline; - if (strEQ(d,"readpipe")) return -KEY_readpipe; - break; - case 9: - if (strEQ(d,"rewinddir")) return -KEY_rewinddir; - break; - } - break; + if (!memcmp(name, "alarm", 5)) { + /* ^ */ + return -KEY_alarm; + } + break; case 's': - switch (d[1]) { - case 0: return KEY_s; - case 'c': - if (strEQ(d,"scalar")) return KEY_scalar; - break; - case 'e': - switch (len) { - case 4: - if (strEQ(d,"seek")) return -KEY_seek; - if (strEQ(d,"send")) return -KEY_send; - break; - case 5: - if (strEQ(d,"semop")) return -KEY_semop; - break; - case 6: - if (strEQ(d,"select")) return -KEY_select; - if (strEQ(d,"semctl")) return -KEY_semctl; - if (strEQ(d,"semget")) return -KEY_semget; - break; - case 7: - if (strEQ(d,"setpgrp")) return -KEY_setpgrp; - if (strEQ(d,"seekdir")) return -KEY_seekdir; - break; - case 8: - if (strEQ(d,"setpwent")) return -KEY_setpwent; - if (strEQ(d,"setgrent")) return -KEY_setgrent; - break; - case 9: - if (strEQ(d,"setnetent")) return -KEY_setnetent; - break; - case 10: - if (strEQ(d,"setsockopt")) return -KEY_setsockopt; - if (strEQ(d,"sethostent")) return -KEY_sethostent; - if (strEQ(d,"setservent")) return -KEY_setservent; - break; - case 11: - if (strEQ(d,"setpriority")) return -KEY_setpriority; - if (strEQ(d,"setprotoent")) return -KEY_setprotoent; - break; - } - break; - case 'h': - switch (len) { - case 5: - if (strEQ(d,"shift")) return -KEY_shift; - break; - case 6: - if (strEQ(d,"shmctl")) return -KEY_shmctl; - if (strEQ(d,"shmget")) return -KEY_shmget; - break; - case 7: - if (strEQ(d,"shmread")) return -KEY_shmread; - break; - case 8: - if (strEQ(d,"shmwrite")) return -KEY_shmwrite; - if (strEQ(d,"shutdown")) return -KEY_shutdown; - break; - } - break; - case 'i': - if (strEQ(d,"sin")) return -KEY_sin; - break; - case 'l': - if (strEQ(d,"sleep")) return -KEY_sleep; - break; - case 'o': - if (strEQ(d,"sort")) return KEY_sort; - if (strEQ(d,"socket")) return -KEY_socket; - if (strEQ(d,"socketpair")) return -KEY_socketpair; - break; - case 'p': - if (strEQ(d,"split")) return KEY_split; - if (strEQ(d,"sprintf")) return -KEY_sprintf; - if (strEQ(d,"splice")) return -KEY_splice; - break; - case 'q': - if (strEQ(d,"sqrt")) return -KEY_sqrt; - break; - case 'r': - if (strEQ(d,"srand")) return -KEY_srand; - break; - case 't': - if (strEQ(d,"stat")) return -KEY_stat; - if (strEQ(d,"study")) return KEY_study; - break; - case 'u': - if (strEQ(d,"substr")) return -KEY_substr; - if (strEQ(d,"sub")) return KEY_sub; - break; - case 'y': - switch (len) { - case 6: - if (strEQ(d,"system")) return -KEY_system; - break; - case 7: - if (strEQ(d,"symlink")) return -KEY_symlink; - if (strEQ(d,"syscall")) return -KEY_syscall; - if (strEQ(d,"sysopen")) return -KEY_sysopen; - if (strEQ(d,"sysread")) return -KEY_sysread; - if (strEQ(d,"sysseek")) return -KEY_sysseek; - break; - case 8: - if (strEQ(d,"syswrite")) return -KEY_syswrite; - break; - } - break; - } - break; + if (!memcmp(name, "bless", 5)) { + /* ^ */ + return -KEY_bless; /* Weight 62111 */ + } + if (!memcmp(name, "close", 5)) { + /* ^ */ + return -KEY_close; /* Weight 44077 */ + } + if (!memcmp(name, "umask", 5)) { + /* ^ */ + return -KEY_umask; /* Weight 1658 */ + } + break; case 't': - switch (len) { - case 2: - if (strEQ(d,"tr")) return KEY_tr; - break; - case 3: - if (strEQ(d,"tie")) return KEY_tie; - break; - case 4: - if (strEQ(d,"tell")) return -KEY_tell; - if (strEQ(d,"tied")) return KEY_tied; - if (strEQ(d,"time")) return -KEY_time; - break; - case 5: - if (strEQ(d,"times")) return -KEY_times; - break; - case 7: - if (strEQ(d,"telldir")) return -KEY_telldir; - break; - case 8: - if (strEQ(d,"truncate")) return -KEY_truncate; - break; - } + if (!memcmp(name, "write", 5)) { + /* ^ */ + return -KEY_write; /* Weight 2525 */ + } + if (!memcmp(name, "fcntl", 5)) { + /* ^ */ + return -KEY_fcntl; /* Weight 1257 */ + } + if (!memcmp(name, "ioctl", 5)) { + /* ^ */ + return -KEY_ioctl; /* Weight 967 */ + } + break; + case 'w': + if (!memcmp(name, "chown", 5)) { + /* ^ */ + return -KEY_chown; /* Weight 34 */ + } + break; + } + break; + case 6: + /* Names all of length 6. */ + /* accept caller chroot delete elseif exists fileno format gmtime length + listen msgctl msgget msgrcv msgsnd printf rename return rindex scalar + select semctl semget shmctl shmget socket splice substr system unless + unlink unpack values */ + /* Offset 3 gives the best switch position. */ + switch (name[3]) { + case 'a': + if (!memcmp(name, "unpack", 6)) { + /* ^ */ + return -KEY_unpack; /* Weight 7849 */ + } + if (!memcmp(name, "rename", 6)) { + /* ^ */ + return -KEY_rename; /* Weight 4918 */ + } + break; + case 'c': + if (!memcmp(name, "semctl", 6)) { + /* ^ */ + return -KEY_semctl; /* Weight 17 */ + } + if (!memcmp(name, "msgctl", 6)) { + /* ^ */ + return -KEY_msgctl; + } + if (!memcmp(name, "shmctl", 6)) { + /* ^ */ + return -KEY_shmctl; + } + break; + case 'd': + if (!memcmp(name, "rindex", 6)) { + /* ^ */ + return -KEY_rindex; /* Weight 5005 */ + } + break; + case 'e': + if (!memcmp(name, "unless", 6)) { + /* ^ */ + return KEY_unless; /* Weight 913955 */ + } + if (!memcmp(name, "delete", 6)) { + /* ^ */ + return KEY_delete; /* Weight 74966 */ + } + if (!memcmp(name, "select", 6)) { + /* ^ */ + return -KEY_select; /* Weight 12209 */ + } + if (!memcmp(name, "fileno", 6)) { + /* ^ */ + return -KEY_fileno; /* Weight 8591 */ + } + if (!memcmp(name, "accept", 6)) { + /* ^ */ + return -KEY_accept; /* Weight 233 */ + } + if (!memcmp(name, "elseif", 6)) { + /* ^ */ + /* This is somewhat hacky. */ + if(ckWARN_d(WARN_SYNTAX)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif"); break; + } + break; + case 'g': + if (!memcmp(name, "length", 6)) { + /* ^ */ + return -KEY_length; /* Weight 163975 */ + } + if (!memcmp(name, "msgget", 6)) { + /* ^ */ + return -KEY_msgget; + } + if (!memcmp(name, "semget", 6)) { + /* ^ */ + return -KEY_semget; + } + if (!memcmp(name, "shmget", 6)) { + /* ^ */ + return -KEY_shmget; + } + break; + case 'i': + if (!memcmp(name, "splice", 6)) { + /* ^ */ + return -KEY_splice; /* Weight 25143 */ + } + if (!memcmp(name, "unlink", 6)) { + /* ^ */ + return -KEY_unlink; /* Weight 18616 */ + } + if (!memcmp(name, "gmtime", 6)) { + /* ^ */ + return -KEY_gmtime; /* Weight 4040 */ + } + break; + case 'k': + if (!memcmp(name, "socket", 6)) { + /* ^ */ + return -KEY_socket; + } + break; + case 'l': + if (!memcmp(name, "caller", 6)) { + /* ^ */ + return -KEY_caller; /* Weight 148457 */ + } + if (!memcmp(name, "scalar", 6)) { + /* ^ */ + return KEY_scalar; /* Weight 43953 */ + } + break; + case 'm': + if (!memcmp(name, "format", 6)) { + /* ^ */ + return KEY_format; /* Weight 1735 */ + } + break; + case 'n': + if (!memcmp(name, "printf", 6)) { + /* ^ */ + return KEY_printf; /* Weight 6874 */ + } + break; + case 'o': + if (!memcmp(name, "chroot", 6)) { + /* ^ */ + return -KEY_chroot; + } + break; + case 'r': + if (!memcmp(name, "msgrcv", 6)) { + /* ^ */ + return -KEY_msgrcv; + } + break; + case 's': + if (!memcmp(name, "exists", 6)) { + /* ^ */ + return KEY_exists; /* Weight 145939 */ + } + if (!memcmp(name, "substr", 6)) { + /* ^ */ + return -KEY_substr; /* Weight 121344 */ + } + if (!memcmp(name, "msgsnd", 6)) { + /* ^ */ + return -KEY_msgsnd; + } + break; + case 't': + if (!memcmp(name, "system", 6)) { + /* ^ */ + return -KEY_system; /* Weight 4326 */ + } + if (!memcmp(name, "listen", 6)) { + /* ^ */ + return -KEY_listen; + } + break; case 'u': - switch (len) { - case 2: - if (strEQ(d,"uc")) return -KEY_uc; - break; - case 3: - if (strEQ(d,"use")) return KEY_use; - break; - case 5: - if (strEQ(d,"undef")) return KEY_undef; - if (strEQ(d,"until")) return KEY_until; - if (strEQ(d,"untie")) return KEY_untie; - if (strEQ(d,"utime")) return -KEY_utime; - if (strEQ(d,"umask")) return -KEY_umask; - break; - case 6: - if (strEQ(d,"unless")) return KEY_unless; - if (strEQ(d,"unpack")) return -KEY_unpack; - if (strEQ(d,"unlink")) return -KEY_unlink; - break; - case 7: - if (strEQ(d,"unshift")) return -KEY_unshift; - if (strEQ(d,"ucfirst")) return -KEY_ucfirst; - break; - } - break; - case 'v': - if (strEQ(d,"values")) return -KEY_values; - if (strEQ(d,"vec")) return -KEY_vec; - break; + if (!memcmp(name, "return", 6)) { + /* ^ */ + return KEY_return; /* Weight 1401629 */ + } + if (!memcmp(name, "values", 6)) { + /* ^ */ + return -KEY_values; /* Weight 10110 */ + } + break; + } + break; + case 7: + /* Names all of length 7. */ + /* DESTROY __END__ binmode connect dbmopen defined foreach getpgrp getppid + lcfirst opendir package readdir require reverse seekdir setpgrp shmread + sprintf symlink syscall sysopen sysread sysseek telldir ucfirst unshift + waitpid */ + /* Offset 3 gives the best switch position. */ + switch (name[3]) { + case 'N': + if (!memcmp(name, "__END__", 7)) { + /* ^ */ + return KEY___END__; /* Weight 112636 */ + } + break; + case 'T': + if (!memcmp(name, "DESTROY", 7)) { + /* ^ */ + return KEY_DESTROY; /* Weight 7 */ + } + break; + case 'c': + if (!memcmp(name, "syscall", 7)) { + /* ^ */ + return -KEY_syscall; /* Weight 560 */ + } + break; + case 'd': + if (!memcmp(name, "readdir", 7)) { + /* ^ */ + return -KEY_readdir; /* Weight 11716 */ + } + break; + case 'e': + if (!memcmp(name, "foreach", 7)) { + /* ^ */ + return KEY_foreach; /* Weight 281720 */ + } + if (!memcmp(name, "reverse", 7)) { + /* ^ */ + return -KEY_reverse; /* Weight 10571 */ + } + break; + case 'h': + if (!memcmp(name, "unshift", 7)) { + /* ^ */ + return -KEY_unshift; /* Weight 36504 */ + } + break; + case 'i': + if (!memcmp(name, "defined", 7)) { + /* ^ */ + return KEY_defined; /* Weight 694277 */ + } + if (!memcmp(name, "sprintf", 7)) { + /* ^ */ + return -KEY_sprintf; /* Weight 72704 */ + } + if (!memcmp(name, "ucfirst", 7)) { + /* ^ */ + return -KEY_ucfirst; /* Weight 1012 */ + } + if (!memcmp(name, "lcfirst", 7)) { + /* ^ */ + return -KEY_lcfirst; /* Weight 165 */ + } + break; + case 'k': + if (!memcmp(name, "package", 7)) { + /* ^ */ + return KEY_package; /* Weight 245661 */ + } + if (!memcmp(name, "seekdir", 7)) { + /* ^ */ + return -KEY_seekdir; /* Weight 20 */ + } + break; + case 'l': + if (!memcmp(name, "symlink", 7)) { + /* ^ */ + return -KEY_symlink; /* Weight 386 */ + } + if (!memcmp(name, "telldir", 7)) { + /* ^ */ + return -KEY_telldir; /* Weight 294 */ + } + break; + case 'm': + if (!memcmp(name, "binmode", 7)) { + /* ^ */ + return -KEY_binmode; /* Weight 12301 */ + } + break; + case 'n': + if (!memcmp(name, "opendir", 7)) { + /* ^ */ + return -KEY_opendir; /* Weight 9007 */ + } + if (!memcmp(name, "connect", 7)) { + /* ^ */ + return -KEY_connect; /* Weight 526 */ + } + break; + case 'o': + if (!memcmp(name, "sysopen", 7)) { + /* ^ */ + return -KEY_sysopen; /* Weight 1230 */ + } + if (!memcmp(name, "dbmopen", 7)) { + /* ^ */ + return -KEY_dbmopen; + } + break; + case 'p': + if (!memcmp(name, "getppid", 7)) { + /* ^ */ + return -KEY_getppid; /* Weight 10 */ + } + if (!memcmp(name, "getpgrp", 7)) { + /* ^ */ + return -KEY_getpgrp; + } + if (!memcmp(name, "setpgrp", 7)) { + /* ^ */ + return -KEY_setpgrp; + } + break; + case 'r': + if (!memcmp(name, "sysread", 7)) { + /* ^ */ + return -KEY_sysread; /* Weight 3729 */ + } + if (!memcmp(name, "shmread", 7)) { + /* ^ */ + return -KEY_shmread; + } + break; + case 's': + if (!memcmp(name, "sysseek", 7)) { + /* ^ */ + return -KEY_sysseek; /* Weight 721 */ + } + break; + case 't': + if (!memcmp(name, "waitpid", 7)) { + /* ^ */ + return -KEY_waitpid; /* Weight 414 */ + } + break; + case 'u': + if (!memcmp(name, "require", 7)) { + /* ^ */ + return KEY_require; /* Weight 375220 */ + } + break; + } + break; + case 8: + /* Names all of length 8. */ + /* AUTOLOAD __DATA__ __FILE__ __LINE__ closedir continue dbmclose endgrent + endpwent formline getgrent getgrgid getgrnam getlogin getpwent getpwnam + getpwuid readline readlink readpipe setgrent setpwent shmwrite shutdown + syswrite truncate */ + /* Offset 3 gives the best switch position. */ + switch (name[3]) { + case 'A': + if (!memcmp(name, "__DATA__", 8)) { + /* ^ */ + return KEY___DATA__; /* Weight 395 */ + } + break; + case 'I': + if (!memcmp(name, "__FILE__", 8)) { + /* ^ */ + return -KEY___FILE__; /* Weight 888 */ + } + if (!memcmp(name, "__LINE__", 8)) { + /* ^ */ + return -KEY___LINE__; /* Weight 209 */ + } + break; + case 'O': + if (!memcmp(name, "AUTOLOAD", 8)) { + /* ^ */ + return KEY_AUTOLOAD; /* Weight 2713 */ + } + break; + case 'c': + if (!memcmp(name, "dbmclose", 8)) { + /* ^ */ + return -KEY_dbmclose; + } + break; + case 'd': + if (!memcmp(name, "readlink", 8)) { + /* ^ */ + return -KEY_readlink; /* Weight 1537 */ + } + if (!memcmp(name, "readline", 8)) { + /* ^ */ + return -KEY_readline; /* Weight 19 */ + } + if (!memcmp(name, "readpipe", 8)) { + /* ^ */ + return -KEY_readpipe; + } + break; + case 'g': + if (!memcmp(name, "getgrgid", 8)) { + /* ^ */ + return -KEY_getgrgid; /* Weight 67 */ + } + if (!memcmp(name, "getgrnam", 8)) { + /* ^ */ + return -KEY_getgrnam; /* Weight 11 */ + } + if (!memcmp(name, "endgrent", 8)) { + /* ^ */ + return -KEY_endgrent; + } + if (!memcmp(name, "getgrent", 8)) { + /* ^ */ + return -KEY_getgrent; + } + if (!memcmp(name, "setgrent", 8)) { + /* ^ */ + return -KEY_setgrent; + } + break; + case 'l': + if (!memcmp(name, "getlogin", 8)) { + /* ^ */ + return -KEY_getlogin; /* Weight 158 */ + } + break; + case 'm': + if (!memcmp(name, "formline", 8)) { + /* ^ */ + return -KEY_formline; /* Weight 959 */ + } + break; + case 'n': + if (!memcmp(name, "truncate", 8)) { + /* ^ */ + return -KEY_truncate; /* Weight 1351 */ + } + break; + case 'p': + if (!memcmp(name, "getpwuid", 8)) { + /* ^ */ + return -KEY_getpwuid; /* Weight 681 */ + } + if (!memcmp(name, "getpwnam", 8)) { + /* ^ */ + return -KEY_getpwnam; /* Weight 483 */ + } + if (!memcmp(name, "getpwent", 8)) { + /* ^ */ + return -KEY_getpwent; /* Weight 12 */ + } + if (!memcmp(name, "endpwent", 8)) { + /* ^ */ + return -KEY_endpwent; + } + if (!memcmp(name, "setpwent", 8)) { + /* ^ */ + return -KEY_setpwent; + } + break; + case 's': + if (!memcmp(name, "closedir", 8)) { + /* ^ */ + return -KEY_closedir; /* Weight 11986 */ + } + break; + case 't': + if (!memcmp(name, "continue", 8)) { + /* ^ */ + return -KEY_continue; /* Weight 2925 */ + } + if (!memcmp(name, "shutdown", 8)) { + /* ^ */ + return -KEY_shutdown; + } + break; case 'w': - switch (len) { - case 4: - if (strEQ(d,"warn")) return -KEY_warn; - if (strEQ(d,"wait")) return -KEY_wait; - break; - case 5: - if (strEQ(d,"while")) return KEY_while; - if (strEQ(d,"write")) return -KEY_write; - break; - case 7: - if (strEQ(d,"waitpid")) return -KEY_waitpid; - break; - case 9: - if (strEQ(d,"wantarray")) return -KEY_wantarray; - break; - } - break; - case 'x': - if (len == 1) return -KEY_x; - if (strEQ(d,"xor")) return -KEY_xor; - break; - case 'y': - if (len == 1) return KEY_y; - break; - case 'z': - break; + if (!memcmp(name, "syswrite", 8)) { + /* ^ */ + return -KEY_syswrite; /* Weight 4437 */ + } + if (!memcmp(name, "shmwrite", 8)) { + /* ^ */ + return -KEY_shmwrite; + } + break; } - return 0; + break; + case 9: + /* Names all of length 9. */ + /* endnetent getnetent localtime prototype quotemeta rewinddir setnetent + wantarray */ + /* Offset 0 gives the best switch position. */ + switch (*name++) { + case 'e': + if (!memcmp(name, "ndnetent", 8)) { + /* e */ + return -KEY_endnetent; + } + break; + case 'g': + if (!memcmp(name, "etnetent", 8)) { + /* g */ + return -KEY_getnetent; + } + break; + case 'l': + if (!memcmp(name, "ocaltime", 8)) { + /* l */ + return -KEY_localtime; /* Weight 7993 */ + } + break; + case 'p': + if (!memcmp(name, "rototype", 8)) { + /* p */ + return KEY_prototype; /* Weight 1602 */ + } + break; + case 'q': + if (!memcmp(name, "uotemeta", 8)) { + /* q */ + return -KEY_quotemeta; /* Weight 3120 */ + } + break; + case 'r': + if (!memcmp(name, "ewinddir", 8)) { + /* r */ + return -KEY_rewinddir; /* Weight 218 */ + } + break; + case 's': + if (!memcmp(name, "etnetent", 8)) { + /* s */ + return -KEY_setnetent; /* Weight 1 */ + } + break; + case 'w': + if (!memcmp(name, "antarray", 8)) { + /* w */ + return -KEY_wantarray; /* Weight 43024 */ + } + break; + } + break; + case 10: + /* Names all of length 10. */ + /* endhostent endservent gethostent getservent getsockopt sethostent + setservent setsockopt socketpair */ + /* Offset 6 gives the best switch position. */ + switch (name[6]) { + case 'k': + if (!memcmp(name, "setsockopt", 10)) { + /* ^ */ + return -KEY_setsockopt; /* Weight 356 */ + } + if (!memcmp(name, "getsockopt", 10)) { + /* ^ */ + return -KEY_getsockopt; /* Weight 243 */ + } + break; + case 'p': + if (!memcmp(name, "socketpair", 10)) { + /* ^ */ + return -KEY_socketpair; + } + break; + case 't': + if (!memcmp(name, "gethostent", 10)) { + /* ^ */ + return -KEY_gethostent; /* Weight 3 */ + } + if (!memcmp(name, "endhostent", 10)) { + /* ^ */ + return -KEY_endhostent; + } + if (!memcmp(name, "sethostent", 10)) { + /* ^ */ + return -KEY_sethostent; + } + break; + case 'v': + if (!memcmp(name, "getservent", 10)) { + /* ^ */ + return -KEY_getservent; /* Weight 4 */ + } + if (!memcmp(name, "endservent", 10)) { + /* ^ */ + return -KEY_endservent; + } + if (!memcmp(name, "setservent", 10)) { + /* ^ */ + return -KEY_setservent; + } + break; + } + break; + case 11: + /* Names all of length 11. */ + /* __PACKAGE__ endprotoent getpeername getpriority getprotoent getsockname + setpriority setprotoent */ + /* Offset 5 gives the best switch position. */ + switch (name[5]) { + case 'K': + if (!memcmp(name, "__PACKAGE__", 11)) { + /* ^ */ + return -KEY___PACKAGE__; /* Weight 36767 */ + } + break; + case 'c': + if (!memcmp(name, "getsockname", 11)) { + /* ^ */ + return -KEY_getsockname; /* Weight 235 */ + } + break; + case 'e': + if (!memcmp(name, "getpeername", 11)) { + /* ^ */ + return -KEY_getpeername; /* Weight 713 */ + } + break; + case 'i': + if (!memcmp(name, "getpriority", 11)) { + /* ^ */ + return -KEY_getpriority; /* Weight 5 */ + } + if (!memcmp(name, "setpriority", 11)) { + /* ^ */ + return -KEY_setpriority; /* Weight 2 */ + } + break; + case 'o': + if (!memcmp(name, "endprotoent", 11)) { + /* ^ */ + return -KEY_endprotoent; + } + if (!memcmp(name, "getprotoent", 11)) { + /* ^ */ + return -KEY_getprotoent; + } + if (!memcmp(name, "setprotoent", 11)) { + /* ^ */ + return -KEY_setprotoent; + } + break; + } + break; + case 12: + /* Names all of length 12. */ + /* getnetbyaddr getnetbyname */ + /* Offset 9 gives the best switch position. */ + switch (name[9]) { + case 'a': + if (!memcmp(name, "getnetbyname", 12)) { + /* ^ */ + return -KEY_getnetbyname; + } + break; + case 'd': + if (!memcmp(name, "getnetbyaddr", 12)) { + /* ^ */ + return -KEY_getnetbyaddr; + } + break; + } + break; + case 13: + /* Names all of length 13. */ + /* gethostbyaddr gethostbyname getservbyname getservbyport */ + /* Offset 10 gives the best switch position. */ + switch (name[10]) { + case 'a': + if (!memcmp(name, "gethostbyname", 13)) { + /* ^ */ + return -KEY_gethostbyname; /* Weight 970 */ + } + if (!memcmp(name, "getservbyname", 13)) { + /* ^ */ + return -KEY_getservbyname; /* Weight 299 */ + } + break; + case 'd': + if (!memcmp(name, "gethostbyaddr", 13)) { + /* ^ */ + return -KEY_gethostbyaddr; /* Weight 68 */ + } + break; + case 'o': + if (!memcmp(name, "getservbyport", 13)) { + /* ^ */ + return -KEY_getservbyport; + } + break; + } + break; + case 14: + if (!memcmp(name, "getprotobyname", 14)) { + return -KEY_getprotobyname; /* Weight 755 */ + } + break; + case 16: + if (!memcmp(name, "getprotobynumber", 16)) { + return -KEY_getprotobynumber; /* Weight 232 */ + } + break; + } + return 0; } STATIC void @@ -5866,7 +6939,7 @@ S_checkcomma(pTHX_ register char *s, char *name, char *what) if (*w) for (; *w && isSPACE(*w); w++) ; if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */ - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%s (...) interpreted as function",name); } } @@ -5995,6 +7068,9 @@ S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv, return res; } +/* Returns a NUL terminated string, with the length of the string written to + *slp + */ STATIC char * S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp) { @@ -6088,7 +7164,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des return s; } if (*s == '$' && s[1] && - (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) ) + (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) ) { return s; } @@ -6139,7 +7215,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) { const char *brack = *s == '[' ? "[...]" : "{...}"; - Perl_warner(aTHX_ WARN_AMBIGUOUS, + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c{%s%s} resolved to %c%s%s", funny, dest, brack, funny, dest, brack); } @@ -6163,15 +7239,17 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des } if (*s == '}') { s++; - if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) + if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { PL_lex_state = LEX_INTERPEND; + PL_expect = XREF; + } if (funny == '#') funny = '@'; if (PL_lex_state == LEX_NORMAL) { if (ckWARN(WARN_AMBIGUOUS) && (keyword(dest, d - dest) || get_cv(dest, FALSE))) { - Perl_warner(aTHX_ WARN_AMBIGUOUS, + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c{%s} resolved to %c%s", funny, dest, funny, dest); } @@ -6188,7 +7266,7 @@ S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN des } void -Perl_pmflag(pTHX_ U16 *pmfl, int ch) +Perl_pmflag(pTHX_ U32* pmfl, int ch) { if (ch == 'i') *pmfl |= PMf_FOLD; @@ -6227,6 +7305,13 @@ S_scan_pat(pTHX_ char *start, I32 type) while (*s && strchr("iogcmsx", *s)) pmflag(&pm->op_pmflags,*s++); } + /* issue a warning if /c is specified,but /g is not */ + if (ckWARN(WARN_REGEXP) && + (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) + { + Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g); + } + pm->op_pmpermflags = pm->op_pmflags; PL_lex_op = (OP*)pm; @@ -6275,6 +7360,12 @@ S_scan_subst(pTHX_ char *start) break; } + /* /c is not meaningful with s/// */ + if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE)) + { + Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst); + } + if (es) { SV *repl; PL_sublex_info.super_bufptr = s; @@ -6326,19 +7417,28 @@ S_scan_trans(pTHX_ char *start) } complement = del = squash = 0; - while (strchr("cds", *s)) { - if (*s == 'c') + while (1) { + switch (*s) { + case 'c': complement = OPpTRANS_COMPLEMENT; - else if (*s == 'd') + break; + case 'd': del = OPpTRANS_DELETE; - else if (*s == 's') + break; + case 's': squash = OPpTRANS_SQUASH; + break; + default: + goto no_more; + } s++; } + no_more: New(803, tbl, complement&&!del?258:256, short); o = newPVOP(OP_TRANS, 0, (char*)tbl); - o->op_private = del|squash|complement| + o->op_private &= ~OPpTRANS_ALL; + o->op_private |= del|squash|complement| (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0); @@ -6366,7 +7466,7 @@ S_scan_heredoc(pTHX_ register char *s) if (!outer) *d++ = '\n'; for (peek = s; SPACE_OR_TAB(*peek); peek++) ; - if (*peek && strchr("`'\"",*peek)) { + if (*peek == '`' || *peek == '\'' || *peek =='"') { s = peek; term = *s++; s = delimcpy(d, e, s, PL_bufend, term, &len); @@ -6380,7 +7480,7 @@ S_scan_heredoc(pTHX_ register char *s) else term = '"'; if (!isALNUM_lazy_if(s,UTF)) - deprecate("bare << to mean <<\"\""); + deprecate_old("bare << to mean <<\"\""); for (; isALNUM_lazy_if(s,UTF); s++) { if (d < e) *d++ = *s; @@ -6451,14 +7551,14 @@ S_scan_heredoc(pTHX_ register char *s) CopLINE_inc(PL_curcop); } if (s >= bufend) { - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } sv_setpvn(herewas,bufptr,d-bufptr+1); sv_setpvn(tmpstr,d+1,s-d); s += len - 1; sv_catpvn(herewas,s,bufend-s); - (void)strcpy(bufptr,SvPVX(herewas)); + Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char); s = olds; goto retval; @@ -6471,7 +7571,7 @@ S_scan_heredoc(pTHX_ register char *s) CopLINE_inc(PL_curcop); } if (s >= PL_bufend) { - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } sv_setpvn(tmpstr,d+1,s-d); @@ -6489,7 +7589,7 @@ S_scan_heredoc(pTHX_ register char *s) while (s >= PL_bufend) { /* multiple line string? */ if (!outer || !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); missingterm(PL_tokenbuf); } CopLINE_inc(PL_curcop); @@ -6515,13 +7615,16 @@ S_scan_heredoc(pTHX_ register char *s) sv_upgrade(sv, SVt_PVMG); sv_setsv(sv,PL_linestr); + (void)SvIOK_on(sv); + SvIVX(sv) = 0; av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv); } if (*s == term && memEQ(s,PL_tokenbuf,len)) { - s = PL_bufend - 1; - *s = ' '; + STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr); + *(SvPVX(PL_linestr) + off ) = ' '; sv_catsv(PL_linestr,herewas); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */ } else { s = PL_bufend; @@ -6536,8 +7639,12 @@ retval: Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); } SvREFCNT_dec(herewas); - if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) - SvUTF8_on(tmpstr); + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) + SvUTF8_on(tmpstr); + else if (PL_encoding) + sv_recode_to_utf8(tmpstr, PL_encoding); + } PL_lex_stuff = tmpstr; yylval.ival = op_type; return s; @@ -6614,12 +7721,24 @@ S_scan_inputsymbol(pTHX_ char *start) return s; } else { + bool readline_overriden = FALSE; + GV *gv_readline = Nullgv; + GV **gvp; /* we're in a filehandle read situation */ d = PL_tokenbuf; /* turn <> into */ if (!len) - (void)strcpy(d,"ARGV"); + Copy("ARGV",d,5,char); + + /* Check whether readline() is overriden */ + if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV)) + && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)) + || + ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE)) + && (gv_readline = *gvp) != (GV*)&PL_sv_undef + && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))) + readline_overriden = TRUE; /* if <$fh>, create the ops to turn the variable into a filehandle @@ -6631,17 +7750,44 @@ S_scan_inputsymbol(pTHX_ char *start) add symbol table ops */ if ((tmp = pad_findmy(d)) != NOT_IN_PAD) { - OP *o = newOP(OP_PADSV, 0); - o->op_targ = tmp; - PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, o); + if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) { + SV *sym = sv_2mortal( + newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0)); + sv_catpvn(sym, "::", 2); + sv_catpv(sym, d+1); + d = SvPVX(sym); + goto intro_sym; + } + else { + OP *o = newOP(OP_PADSV, 0); + o->op_targ = tmp; + PL_lex_op = readline_overriden + ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, + append_elem(OP_LIST, o, + newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) + : (OP*)newUNOP(OP_READLINE, 0, o); + } } else { - GV *gv = gv_fetchpv(d+1,TRUE, SVt_PV); - PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, - newUNOP(OP_RV2SV, 0, - newGVOP(OP_GV, 0, gv))); - } - PL_lex_op->op_flags |= OPf_SPECIAL; + GV *gv; + ++d; +intro_sym: + gv = gv_fetchpv(d, + (PL_in_eval + ? (GV_ADDMULTI | GV_ADDINEVAL) + : GV_ADDMULTI), + SVt_PV); + PL_lex_op = readline_overriden + ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, + append_elem(OP_LIST, + newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), + newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) + : (OP*)newUNOP(OP_READLINE, 0, + newUNOP(OP_RV2SV, 0, + newGVOP(OP_GV, 0, gv))); + } + if (!readline_overriden) + PL_lex_op->op_flags |= OPf_SPECIAL; /* we created the ops in PL_lex_op, so make yylval.ival a null op */ yylval.ival = OP_NULL; } @@ -6650,7 +7796,12 @@ S_scan_inputsymbol(pTHX_ char *start) ( or ) so build a simple readline OP */ else { GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO); - PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); + PL_lex_op = readline_overriden + ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, + append_elem(OP_LIST, + newGVOP(OP_GV, 0, gv), + newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) + : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); yylval.ival = OP_NULL; } } @@ -6712,6 +7863,10 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) register char *to; /* current position in the sv's data */ I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ + I32 termcode; /* terminating char. code */ + U8 termstr[UTF8_MAXBYTES]; /* terminating string */ + STRLEN termlen; /* length of terminating string */ + char *last = NULL; /* last position for nesting bracket */ /* skip space before the delimiter */ if (isSPACE(*s)) @@ -6722,8 +7877,16 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* after skipping whitespace, the next character is the terminator */ term = *s; - if (!UTF8_IS_INVARIANT((U8)term) && UTF) - has_utf8 = TRUE; + if (!UTF) { + termcode = termstr[0] = term; + termlen = 1; + } + else { + termcode = utf8_to_uvchr((U8*)s, &termlen); + Copy(s, termstr, termlen, U8); + if (!UTF8_IS_INVARIANT(term)) + has_utf8 = TRUE; + } /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); @@ -6731,21 +7894,92 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* find corresponding closing delimiter */ if (term && (tmps = strchr("([{< )]}> )]}>",term))) - term = tmps[5]; + termcode = termstr[0] = term = tmps[5]; + PL_multi_close = term; /* create a new SV to hold the contents. 87 is leak category, I'm assuming. 79 is the SV's initial length. What a random number. */ sv = NEWSV(87,79); sv_upgrade(sv, SVt_PVIV); - SvIVX(sv) = term; + SvIVX(sv) = termcode; (void)SvPOK_only(sv); /* validate pointer */ /* move past delimiter and try to read a complete string */ if (keep_delims) - sv_catpvn(sv, s, 1); - s++; + sv_catpvn(sv, s, termlen); + s += termlen; for (;;) { + if (PL_encoding && !UTF) { + bool cont = TRUE; + + while (cont) { + int offset = s - SvPVX(PL_linestr); + bool found = sv_cat_decode(sv, PL_encoding, PL_linestr, + &offset, (char*)termstr, termlen); + char *ns = SvPVX(PL_linestr) + offset; + char *svlast = SvEND(sv) - 1; + + for (; s < ns; s++) { + if (*s == '\n' && !PL_rsfp) + CopLINE_inc(PL_curcop); + } + if (!found) + goto read_more_line; + else { + /* handle quoted delimiters */ + if (SvCUR(sv) > 1 && *(svlast-1) == '\\') { + char *t; + for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';) + t--; + if ((svlast-1 - t) % 2) { + if (!keep_quoted) { + *(svlast-1) = term; + *svlast = '\0'; + SvCUR_set(sv, SvCUR(sv) - 1); + } + continue; + } + } + if (PL_multi_open == PL_multi_close) { + cont = FALSE; + } + else { + char *t, *w; + if (!last) + last = SvPVX(sv); + for (w = t = last; t < svlast; w++, t++) { + /* At here, all closes are "was quoted" one, + so we don't check PL_multi_close. */ + if (*t == '\\') { + if (!keep_quoted && *(t+1) == PL_multi_open) + t++; + else + *w++ = *t++; + } + else if (*t == PL_multi_open) + brackets++; + + *w = *t; + } + if (w < t) { + *w++ = term; + *w = '\0'; + SvCUR_set(sv, w - SvPVX(sv)); + } + last = w; + if (--brackets <= 0) + cont = FALSE; + } + } + } + if (!keep_delims) { + SvCUR_set(sv, SvCUR(sv) - 1); + *SvEND(sv) = '\0'; + } + break; + } + /* extend sv if need be */ SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); /* set 'to' to the next character in the sv's string */ @@ -6767,8 +8001,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } /* terminate when run out of buffer (the for() condition), or have found the terminator */ - else if (*s == term) - break; + else if (*s == term) { + if (termlen == 1) + break; + if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) + break; + } else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) has_utf8 = TRUE; *to = *s; @@ -6830,13 +8068,14 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) to[-1] = '\n'; #endif + read_more_line: /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began */ if (!PL_rsfp || !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) { sv_free(sv); - CopLINE_set(PL_curcop, PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); return Nullch; } /* we read a line, so increment our line counter */ @@ -6848,6 +8087,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) sv_upgrade(sv, SVt_PVMG); sv_setsv(sv,PL_linestr); + (void)SvIOK_on(sv); + SvIVX(sv) = 0; av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv); } @@ -6858,12 +8099,15 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* at this point, we have successfully read the delimited string */ - if (keep_delims) - sv_catpvn(sv, s, 1); - if (has_utf8) + if (!PL_encoding || UTF) { + if (keep_delims) + sv_catpvn(sv, s, termlen); + s += termlen; + } + if (has_utf8 || PL_encoding) SvUTF8_on(sv); + PL_multi_end = CopLINE(PL_curcop); - s++; /* if we allocated too much space, give some back */ if (SvCUR(sv) + 5 < SvLEN(sv)) { @@ -6940,6 +8184,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) UV u = 0; I32 shift; bool overflowed = FALSE; + bool just_zero = TRUE; /* just plain 0 or binary number? */ static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; static char* bases[5] = { "", "binary", "", "octal", "hexadecimal" }; @@ -6956,9 +8201,11 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) if (s[1] == 'x') { shift = 4; s += 2; + just_zero = FALSE; } else if (s[1] == 'b') { shift = 1; s += 2; + just_zero = FALSE; } /* check for a decimal in disguise */ else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E') @@ -6971,7 +8218,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) if (*s == '_') { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } @@ -6995,7 +8242,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* _ are ignored -- but warned about if consecutive */ case '_': if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; break; @@ -7030,6 +8277,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) */ digit: + just_zero = FALSE; if (!overflowed) { x = u << shift; /* make room for the digit */ @@ -7038,7 +8286,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) overflowed = TRUE; n = (NV) u; if (ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ WARN_OVERFLOW, + Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), "Integer overflow in %s number", base); } else @@ -7068,13 +8316,13 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (s[-1] == '_') { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } sv = NEWSV(92,0); if (overflowed) { if (ckWARN(WARN_PORTABLE) && n > 4294967295.0) - Perl_warner(aTHX_ WARN_PORTABLE, + Perl_warner(aTHX_ packWARN(WARN_PORTABLE), "%s number > %s non-portable", Base, max); sv_setnv(sv, n); @@ -7082,13 +8330,16 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) else { #if UVSIZE > 4 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff) - Perl_warner(aTHX_ WARN_PORTABLE, + Perl_warner(aTHX_ packWARN(WARN_PORTABLE), "%s number > %s non-portable", Base, max); #endif sv_setuv(sv, u); } - if (PL_hints & HINT_NEW_BINARY) + if (just_zero && (PL_hints & HINT_NEW_INTEGER)) + sv = new_constant(start, s - start, "integer", + sv, Nullsv, NULL); + else if (PL_hints & HINT_NEW_BINARY) sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL); } break; @@ -7111,7 +8362,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) */ if (*s == '_') { if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } @@ -7127,7 +8378,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* final misplaced underbar check */ if (lastub && s == lastub + 1) { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number"); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } /* read a decimal portion if there is one. avoid @@ -7140,7 +8391,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) if (*s == '_') { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s; } @@ -7153,7 +8404,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) Perl_croak(aTHX_ number_too_long); if (*s == '_') { if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s; } @@ -7163,18 +8414,18 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* fractional part ending in underbar? */ if (s[-1] == '_') { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); } if (*s == '.' && isDIGIT(s[1])) { /* oops, it's really a v-string, but without the "v" */ - s = start - 1; + s = start; goto vstring; } } /* read exponent part, if present */ - if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) { + if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) { floatit = TRUE; s++; @@ -7184,7 +8435,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* stray preinitial _ */ if (*s == '_') { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } @@ -7196,7 +8447,7 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* stray initial _ */ if (*s == '_') { if (ckWARN(WARN_SYNTAX)) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } @@ -7212,98 +8463,46 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) if (ckWARN(WARN_SYNTAX) && ((lastub && s == lastub + 1) || (!isDIGIT(s[1]) && s[1] != '_'))) - Perl_warner(aTHX_ WARN_SYNTAX, + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number"); lastub = s++; } } } - /* terminate the string */ - *d = '\0'; /* make an sv from the string */ sv = NEWSV(92,0); -#if defined(Strtol) && defined(Strtoul) - /* - strtol/strtoll sets errno to ERANGE if the number is too big - for an integer. We try to do an integer conversion first - if no characters indicating "float" have been found. + We try to do an integer conversion first if no characters + indicating "float" have been found. */ if (!floatit) { - IV iv = 0; - UV uv = 0; - errno = 0; - if (*PL_tokenbuf == '-') - iv = Strtol(PL_tokenbuf, (char**)NULL, 10); - else - uv = Strtoul(PL_tokenbuf, (char**)NULL, 10); - if (errno) - floatit = TRUE; /* Probably just too large. */ - else if (*PL_tokenbuf == '-') - sv_setiv(sv, iv); - else if (uv <= IV_MAX) + UV uv; + int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); + + if (flags == IS_NUMBER_IN_UV) { + if (uv <= IV_MAX) sv_setiv(sv, uv); /* Prefer IVs over UVs. */ - else + else sv_setuv(sv, uv); - } + } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { + if (uv <= (UV) IV_MIN) + sv_setiv(sv, -(IV)uv); + else + floatit = TRUE; + } else + floatit = TRUE; + } if (floatit) { + /* terminate the string */ + *d = '\0'; nv = Atof(PL_tokenbuf); sv_setnv(sv, nv); } -#else - /* - No working strtou?ll?. - - Unfortunately atol() doesn't do range checks (returning - LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows) - everywhere [1], so we cannot use use atol() (or atoll()). - If we could, they would be used, as Atol(), very much like - Strtol() and Strtoul() are used above. - - [1] XXX Configure test needed to check for atol() - (and atoll()) overflow behaviour XXX - - --jhi - - We need to do this the hard way. */ - - nv = Atof(PL_tokenbuf); - - /* See if we can make do with an integer value without loss of - precision. We use U_V to cast to a UV, because some - compilers have issues. Then we try casting it back and see - if it was the same [1]. We only do this if we know we - specifically read an integer. If floatit is true, then we - don't need to do the conversion at all. - [1] Note that this is lossy if our NVs cannot preserve our - UVs. There are metaconfig defines NV_PRESERVES_UV (a boolean) - and NV_PRESERVES_UV_BITS (a number), but in general we really - do hope all such potentially lossy platforms have strtou?ll? - to do a lossless IV/UV conversion. - - Maybe could do some tricks with DBL_DIG, LDBL_DIG and - DBL_MANT_DIG and LDBL_MANT_DIG (these are already available - as NV_DIG and NV_MANT_DIG)? - - --jhi - */ - { - UV uv = U_V(nv); - if (!floatit && (NV)uv == nv) { - if (uv <= IV_MAX) - sv_setiv(sv, uv); /* Prefer IVs over UVs. */ - else - sv_setuv(sv, uv); - } - else - sv_setnv(sv, nv); - } -#endif if ( floatit ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) sv = new_constant(PL_tokenbuf, d - PL_tokenbuf, @@ -7314,58 +8513,8 @@ Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp) /* if it starts with a v, it could be a v-string */ case 'v': vstring: - { - char *pos = s; - pos++; - while (isDIGIT(*pos) || *pos == '_') - pos++; - if (!isALPHA(*pos)) { - UV rev; - U8 tmpbuf[UTF8_MAXLEN+1]; - U8 *tmpend; - s++; /* get past 'v' */ - - sv = NEWSV(92,5); - sv_setpvn(sv, "", 0); - - for (;;) { - if (*s == '0' && isDIGIT(s[1])) - yyerror("Octal number in vector unsupported"); - rev = 0; - { - /* this is atoi() that tolerates underscores */ - char *end = pos; - UV mult = 1; - while (--end >= s) { - UV orev; - if (*end == '_') - continue; - orev = rev; - rev += (*end - '0') * mult; - mult *= 10; - if (orev > rev && ckWARN_d(WARN_OVERFLOW)) - Perl_warner(aTHX_ WARN_OVERFLOW, - "Integer overflow in decimal number"); - } - } - /* Append native character for the rev point */ - tmpend = uvchr_to_utf8(tmpbuf, rev); - sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); - if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev))) - SvUTF8_on(sv); - if (*pos == '.' && isDIGIT(pos[1])) - s = ++pos; - else { - s = pos; - break; - } - while (isDIGIT(*pos) || *pos == '_') - pos++; - } - SvPOK_on(sv); - SvREADONLY_on(sv); - } - } + sv = NEWSV(92,5); /* preallocate storage space */ + s = scan_vstring(s,sv); break; } @@ -7386,20 +8535,23 @@ S_scan_formline(pTHX_ register char *s) register char *t; SV *stuff = newSVpvn("",0); bool needargs = FALSE; + bool eofmt = FALSE; while (!needargs) { - if (*s == '.' || *s == /*{*/'}') { + if (*s == '.') { /*SUPPRESS 530*/ #ifdef PERL_STRICT_CR for (t = s+1;SPACE_OR_TAB(*t); t++) ; #else for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ; #endif - if (*t == '\n' || t == PL_bufend) + if (*t == '\n' || t == PL_bufend) { + eofmt = TRUE; break; + } } if (PL_in_eval && !PL_rsfp) { - eol = strchr(s,'\n'); + eol = memchr(s,'\n',PL_bufend-s); if (!eol++) eol = PL_bufend; } @@ -7414,15 +8566,19 @@ S_scan_formline(pTHX_ register char *s) if (*t == '@' || *t == '^') needargs = TRUE; } - sv_catpvn(stuff, s, eol-s); + if (eol > s) { + sv_catpvn(stuff, s, eol-s); #ifndef PERL_STRICT_CR - if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { - char *end = SvPVX(stuff) + SvCUR(stuff); - end[-2] = '\n'; - end[-1] = '\0'; - SvCUR(stuff)--; - } + if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { + char *end = SvPVX(stuff) + SvCUR(stuff); + end[-2] = '\n'; + end[-1] = '\0'; + SvCUR(stuff)--; + } #endif + } + else + break; } s = eol; if (PL_rsfp) { @@ -7432,7 +8588,6 @@ S_scan_formline(pTHX_ register char *s) PL_last_lop = PL_last_uni = Nullch; if (!s) { s = PL_bufptr; - yyerror("Format not terminated"); break; } } @@ -7448,6 +8603,12 @@ S_scan_formline(pTHX_ register char *s) } else PL_lex_state = LEX_FORMLINE; + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff))) + SvUTF8_on(stuff); + else if (PL_encoding) + sv_recode_to_utf8(stuff, PL_encoding); + } PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff); force_next(THING); PL_nextval[PL_nexttoke].ival = OP_FORMLINE; @@ -7455,7 +8616,8 @@ S_scan_formline(pTHX_ register char *s) } else { SvREFCNT_dec(stuff); - PL_lex_formbrack = 0; + if (eofmt) + PL_lex_formbrack = 0; PL_bufptr = s; } return s; @@ -7475,52 +8637,22 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) { I32 oldsavestack_ix = PL_savestack_ix; CV* outsidecv = PL_compcv; - AV* comppadlist; if (PL_compcv) { assert(SvTYPE(PL_compcv) == SVt_PVCV); } SAVEI32(PL_subline); save_item(PL_subname); - SAVEI32(PL_padix); - SAVECOMPPAD(); - SAVESPTR(PL_comppad_name); SAVESPTR(PL_compcv); - SAVEI32(PL_comppad_name_fill); - SAVEI32(PL_min_intro_pending); - SAVEI32(PL_max_intro_pending); - SAVEI32(PL_pad_reset_pending); PL_compcv = (CV*)NEWSV(1104,0); sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV); CvFLAGS(PL_compcv) |= flags; - PL_comppad = newAV(); - av_push(PL_comppad, Nullsv); - PL_curpad = AvARRAY(PL_comppad); - PL_comppad_name = newAV(); - PL_comppad_name_fill = 0; - PL_min_intro_pending = 0; - PL_padix = 0; PL_subline = CopLINE(PL_curcop); -#ifdef USE_THREADS - av_store(PL_comppad_name, 0, newSVpvn("@_", 2)); - PL_curpad[0] = (SV*)newAV(); - SvPADMY_on(PL_curpad[0]); /* XXX Needed? */ -#endif /* USE_THREADS */ - - comppadlist = newAV(); - AvREAL_off(comppadlist); - av_store(comppadlist, 0, (SV*)PL_comppad_name); - av_store(comppadlist, 1, (SV*)PL_comppad); - - CvPADLIST(PL_compcv) = comppadlist; + CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB); CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv); -#ifdef USE_THREADS - CvOWNER(PL_compcv) = 0; - New(666, CvMUTEXP(PL_compcv), 1, perl_mutex); - MUTEX_INIT(CvMUTEXP(PL_compcv)); -#endif /* USE_THREADS */ + CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; return oldsavestack_ix; } @@ -7549,26 +8681,39 @@ Perl_yyerror(pTHX_ char *s) where = "at EOF"; else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) { + /* + Only for NetWare: + The code below is removed for NetWare because it abends/crashes on NetWare + when the script has error such as not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ +#ifndef NETWARE while (isSPACE(*PL_oldoldbufptr)) PL_oldoldbufptr++; +#endif context = PL_oldoldbufptr; contlen = PL_bufptr - PL_oldoldbufptr; } else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) { + /* + Only for NetWare: + The code below is removed for NetWare because it abends/crashes on NetWare + when the script has error such as not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ +#ifndef NETWARE while (isSPACE(*PL_oldbufptr)) PL_oldbufptr++; +#endif context = PL_oldbufptr; contlen = PL_bufptr - PL_oldbufptr; } else if (yychar > 255) where = "next token ???"; -#ifdef USE_PURE_BISON -/* GNU Bison sets the value -2 */ - else if (yychar == -2) { -#else - else if ((yychar & 127) == 127) { -#endif + else if (yychar == -2) { /* YYEMPTY */ if (PL_lex_state == LEX_NORMAL || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) where = "at end of line"; @@ -7589,7 +8734,7 @@ Perl_yyerror(pTHX_ char *s) } msg = sv_2mortal(newSVpv(s, 0)); Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", - CopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); + OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); if (context) Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context); else @@ -7600,17 +8745,17 @@ Perl_yyerror(pTHX_ char *s) (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); PL_multi_end = 0; } - if (PL_in_eval & EVAL_WARNONLY) - Perl_warn(aTHX_ "%"SVf, msg); + if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg); else qerror(msg); if (PL_error_count >= 10) { if (PL_in_eval && SvCUR(ERRSV)) Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n", - ERRSV, CopFILE(PL_curcop)); + ERRSV, OutCopFILE(PL_curcop)); else Perl_croak(aTHX_ "%s has too many errors.\n", - CopFILE(PL_curcop)); + OutCopFILE(PL_curcop)); } PL_in_my = 0; PL_in_my_stash = Nullhv; @@ -7625,87 +8770,103 @@ S_swallow_bom(pTHX_ U8 *s) { STRLEN slen; slen = SvCUR(PL_linestr); - switch (*s) { + switch (s[0]) { case 0xFF: if (s[1] == 0xFE) { - /* UTF-16 little-endian */ + /* UTF-16 little-endian? (or UTF32-LE?) */ if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ - Perl_croak(aTHX_ "Unsupported script encoding"); + Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE"); #ifndef PERL_NO_UTF16_FILTER - DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n")); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n"); s += 2; + utf16le: if (PL_bufend > (char*)s) { U8 *news; I32 newlen; filter_add(utf16rev_textfilter, NULL); New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - PL_bufend = (char*)utf16_to_utf8_reversed(s, news, - PL_bufend - (char*)s - 1, - &newlen); - Copy(news, s, newlen, U8); - SvCUR_set(PL_linestr, newlen); - PL_bufend = SvPVX(PL_linestr) + newlen; - news[newlen++] = '\0'; + utf16_to_utf8_reversed(s, news, + PL_bufend - (char*)s - 1, + &newlen); + sv_setpvn(PL_linestr, (const char*)news, newlen); Safefree(news); + SvUTF8_on(PL_linestr); + s = (U8*)SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + newlen; } #else - Perl_croak(aTHX_ "Unsupported script encoding"); + Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE"); #endif } break; case 0xFE: - if (s[1] == 0xFF) { /* UTF-16 big-endian */ + if (s[1] == 0xFF) { /* UTF-16 big-endian? */ #ifndef PERL_NO_UTF16_FILTER - DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n")); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); s += 2; + utf16be: if (PL_bufend > (char *)s) { U8 *news; I32 newlen; filter_add(utf16_textfilter, NULL); New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8); - PL_bufend = (char*)utf16_to_utf8(s, news, - PL_bufend - (char*)s, - &newlen); - Copy(news, s, newlen, U8); - SvCUR_set(PL_linestr, newlen); - PL_bufend = SvPVX(PL_linestr) + newlen; - news[newlen++] = '\0'; + utf16_to_utf8(s, news, + PL_bufend - (char*)s, + &newlen); + sv_setpvn(PL_linestr, (const char*)news, newlen); Safefree(news); + SvUTF8_on(PL_linestr); + s = (U8*)SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + newlen; } #else - Perl_croak(aTHX_ "Unsupported script encoding"); + Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE"); #endif } break; case 0xEF: if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) { - DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n")); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); s += 3; /* UTF-8 */ } break; case 0: - if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */ - s[2] == 0xFE && s[3] == 0xFF) - { - Perl_croak(aTHX_ "Unsupported script encoding"); + if (slen > 3) { + if (s[1] == 0) { + if (s[2] == 0xFE && s[3] == 0xFF) { + /* UTF-32 big-endian */ + Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE"); + } + } + else if (s[2] == 0 && s[3] != 0) { + /* Leading bytes + * 00 xx 00 xx + * are a good indicator of UTF-16BE. */ + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); + goto utf16be; + } } + default: + if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { + /* Leading bytes + * xx 00 xx 00 + * are a good indicator of UTF-16LE. */ + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); + goto utf16le; + } } return (char*)s; } -#ifdef PERL_OBJECT -#include "XSUB.h" -#endif - /* * restore_rsfp * Restore a source filter. */ static void -restore_rsfp(pTHXo_ void *f) +restore_rsfp(pTHX_ void *f) { PerlIO *fp = (PerlIO*)f; @@ -7718,40 +8879,131 @@ restore_rsfp(pTHXo_ void *f) #ifndef PERL_NO_UTF16_FILTER static I32 -utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen) +utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) { + STRLEN old = SvCUR(sv); I32 count = FILTER_READ(idx+1, sv, maxlen); + DEBUG_P(PerlIO_printf(Perl_debug_log, + "utf16_textfilter(%p): %d %d (%d)\n", + utf16_textfilter, idx, maxlen, (int) count)); if (count) { U8* tmps; - U8* tend; I32 newlen; New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); - if (!*SvPV_nolen(sv)) - /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ - return count; - - tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); - sv_usepvn(sv, (char*)tmps, tend - tmps); + Copy(SvPVX(sv), tmps, old, char); + utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old, + SvCUR(sv) - old, &newlen); + sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); } - return count; + DEBUG_P({sv_dump(sv);}); + return SvCUR(sv); } static I32 -utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen) +utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen) { + STRLEN old = SvCUR(sv); I32 count = FILTER_READ(idx+1, sv, maxlen); + DEBUG_P(PerlIO_printf(Perl_debug_log, + "utf16rev_textfilter(%p): %d %d (%d)\n", + utf16rev_textfilter, idx, maxlen, (int) count)); if (count) { U8* tmps; - U8* tend; I32 newlen; - if (!*SvPV_nolen(sv)) - /* Game over, but don't feed an odd-length string to utf16_to_utf8 */ - return count; - New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8); - tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen); - sv_usepvn(sv, (char*)tmps, tend - tmps); + Copy(SvPVX(sv), tmps, old, char); + utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old, + SvCUR(sv) - old, &newlen); + sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old); } + DEBUG_P({ sv_dump(sv); }); return count; } #endif + +/* +Returns a pointer to the next character after the parsed +vstring, as well as updating the passed in sv. + +Function must be called like + + sv = NEWSV(92,5); + s = scan_vstring(s,sv); + +The sv should already be large enough to store the vstring +passed in, for performance reasons. + +*/ + +char * +Perl_scan_vstring(pTHX_ char *s, SV *sv) +{ + char *pos = s; + char *start = s; + if (*pos == 'v') pos++; /* get past 'v' */ + while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_')) + pos++; + if ( *pos != '.') { + /* this may not be a v-string if followed by => */ + char *next = pos; + while (next < PL_bufend && isSPACE(*next)) + ++next; + if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) { + /* return string not v-string */ + sv_setpvn(sv,(char *)s,pos-s); + return pos; + } + } + + if (!isALPHA(*pos)) { + UV rev; + U8 tmpbuf[UTF8_MAXBYTES+1]; + U8 *tmpend; + + if (*s == 'v') s++; /* get past 'v' */ + + sv_setpvn(sv, "", 0); + + for (;;) { + rev = 0; + { + /* this is atoi() that tolerates underscores */ + char *end = pos; + UV mult = 1; + while (--end >= s) { + UV orev; + if (*end == '_') + continue; + orev = rev; + rev += (*end - '0') * mult; + mult *= 10; + if (orev > rev && ckWARN_d(WARN_OVERFLOW)) + Perl_warner(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in decimal number"); + } + } +#ifdef EBCDIC + if (rev > 0x7FFFFFFF) + Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647"); +#endif + /* Append native character for the rev point */ + tmpend = uvchr_to_utf8(tmpbuf, rev); + sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); + if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev))) + SvUTF8_on(sv); + if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1])) + s = ++pos; + else { + s = pos; + break; + } + while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_')) + pos++; + } + SvPOK_on(sv); + sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); + SvRMAGICAL_on(sv); + } + return s; +} +