X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/73351a7160e044aa54e64f7da1c78c3401a64c7b..54700c0d575024f07079e3baa1cbd5ee8446a5ea:/toke.c diff --git a/toke.c b/toke.c index 41e6930..ea99050 100644 --- a/toke.c +++ b/toke.c @@ -38,11 +38,10 @@ Individual members of C have their own documentation. #include "EXTERN.h" #define PERL_IN_TOKE_C #include "perl.h" -#include "dquote_inline.h" #include "invlist_inline.h" #define new_constant(a,b,c,d,e,f,g, h) \ - S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h) + S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h) #define pl_yylval (PL_parser->yylval) @@ -94,7 +93,8 @@ Individual members of C have their own documentation. (SvTYPE(sv) >= SVt_PVNV \ && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen) -static const char* const ident_too_long = "Identifier too long"; +static const char ident_too_long[] = "Identifier too long"; +static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'"; # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke] @@ -138,15 +138,24 @@ static const char* const ident_too_long = "Identifier too long"; #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */ #define LEX_INTERPSTART 6 /* expecting the start of a $var */ - /* at end of code, eg "$x" followed by: */ + /* at end of code, eg "$x" followed by: */ #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */ #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */ #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of - string or after \E, $foo, etc */ + string or after \E, $foo, etc */ #define LEX_INTERPCONST 2 /* NOT USED */ #define LEX_FORMLINE 1 /* expecting a format line */ +/* returned to yyl_try() to request it to retry the parse loop, expected to only + be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof() + can also return it. + + yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1, + other token values are 258 or higher (see perly.h), so -1 should be + a safe value here. +*/ +#define YYL_RETRY (-1) #ifdef DEBUGGING static const char* const lex_state_names[] = { @@ -196,8 +205,10 @@ static const char* const lex_state_names[] = { * Aop : addition-level operator * AopNOASSIGN : addition-level operator that is never part of .= * Mop : multiplication-level operator - * Eop : equality-testing operator - * Rop : relational operator <= != gt + * ChEop : chaining equality-testing operator + * NCEop : non-chaining comparison operator at equality precedence + * ChRop : chaining relational operator <= != gt + * NCRop : non-chaining relational operator isa * * Also see LOP and lop() below. */ @@ -217,9 +228,9 @@ static const char* const lex_state_names[] = { #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval)) #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1])) #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \ - pl_yylval.ival=f, \ - PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ - REPORT((int)LOOPEX)) + pl_yylval.ival=f, \ + PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \ + REPORT((int)LOOPEX)) #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP)) #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0)) #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP)) @@ -227,15 +238,17 @@ static const char* const lex_state_names[] = { #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP)) #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP)) #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \ - REPORT('~') + REPORT(PERLY_TILDE) #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP)) #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP)) #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP)) #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP)) #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP)) #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP)) -#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP)) -#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP)) +#define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP)) +#define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP)) +#define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP)) +#define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP)) /* This bit of chicanery makes a unary function followed by * a parenthesis into a function with one argument, highest precedence. @@ -243,50 +256,124 @@ static const char* const lex_state_names[] = { * operator (such as C). */ #define UNI3(f,x,have_x) { \ - pl_yylval.ival = f; \ - if (have_x) PL_expect = x; \ - PL_bufptr = s; \ - PL_last_uni = PL_oldbufptr; \ - PL_last_lop_op = (f) < 0 ? -(f) : (f); \ - if (*s == '(') \ - return REPORT( (int)FUNC1 ); \ - s = skipspace(s); \ - return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ - } + pl_yylval.ival = f; \ + if (have_x) PL_expect = x; \ + PL_bufptr = s; \ + PL_last_uni = PL_oldbufptr; \ + PL_last_lop_op = (f) < 0 ? -(f) : (f); \ + if (*s == '(') \ + return REPORT( (int)FUNC1 ); \ + s = skipspace(s); \ + return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \ + } #define UNI(f) UNI3(f,XTERM,1) #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1) #define UNIPROTO(f,optional) { \ - if (optional) PL_last_uni = PL_oldbufptr; \ - OPERATOR(f); \ - } + if (optional) PL_last_uni = PL_oldbufptr; \ + OPERATOR(f); \ + } #define UNIBRACK(f) UNI3(f,0,0) -/* grandfather return to old style */ +/* return has special case parsing. + * + * List operators have low precedence. Functions have high precedence. + * Every built in, *except return*, if written with () around its arguments, is + * parsed as a function. Hence every other list built in: + * + * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9 + * 429 + * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5 + * 639 + * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()' + * Useless use of a constant (2) in void context at -e line 1. + * Useless use of a constant (4) in void context at -e line 1. + * + * $ + * + * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a + * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string. + * + * Whereas return: + * + * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()' + * 2 + * 4 + * 9 + * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()' + * Useless use of a constant (2) in void context at -e line 1. + * Useless use of a constant (4) in void context at -e line 1. + * 9 + * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()' + * Useless use of a constant (2) in void context at -e line 1. + * Useless use of a constant (4) in void context at -e line 1. + * 9 + * $ + * + * and: + * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()' + * 2 + * 4 + * 6 + * + * This last example is what we expect, but it's clearly inconsistent with how + * C *ought* to behave, if the rules were consistently + * followed. + * + * + * Perl 3 attempted to be consistent: + * + * The rules are more consistent about where parens are needed and + * where they are not. In particular, unary operators and list operators now + * behave like functions if they're called like functions. + * + * However, the behaviour for return was reverted to the "old" parsing with + * patches 9-12: + * + * The construct + * return (1,2,3); + * did not do what was expected, since return was swallowing the + * parens in order to consider itself a function. The solution, + * since return never wants any trailing expression such as + * return (1,2,3) + 2; + * is to simply make return an exception to the paren-makes-a-function + * rule, and treat it the way it always was, so that it doesn't + * strip the parens. + * + * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with + * LOP(OP_RETURN, XTERM); + * + * and constructs such as + * + * return (Internals::V())[2] + * + * turn into syntax errors + */ + #define OLDLOP(f) \ - do { \ - if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ - pl_yylval.ival = (f); \ - PL_expect = XTERM; \ - PL_bufptr = s; \ - return (int)LSTOP; \ - } while(0) + do { \ + if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \ + PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \ + pl_yylval.ival = (f); \ + PL_expect = XTERM; \ + PL_bufptr = s; \ + return (int)LSTOP; \ + } while(0) #define COPLINE_INC_WITH_HERELINES \ STMT_START { \ - CopLINE_inc(PL_curcop); \ - if (PL_parser->herelines) \ - CopLINE(PL_curcop) += PL_parser->herelines, \ - PL_parser->herelines = 0; \ + CopLINE_inc(PL_curcop); \ + if (PL_parser->herelines) \ + CopLINE(PL_curcop) += PL_parser->herelines, \ + PL_parser->herelines = 0; \ } STMT_END /* Called after scan_str to update CopLINE(PL_curcop), but only when there * is no sublex_push to follow. */ #define COPLINE_SET_FROM_MULTI_END \ STMT_START { \ - CopLINE_set(PL_curcop, PL_multi_end); \ - if (PL_multi_end != PL_multi_start) \ - PL_parser->herelines = 0; \ + CopLINE_set(PL_curcop, PL_multi_end); \ + if (PL_multi_end != PL_multi_start) \ + PL_parser->herelines = 0; \ } STMT_END @@ -303,7 +390,6 @@ struct code { static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE }; - #ifdef DEBUGGING /* how to interpret the pl_yylval associated with the token */ @@ -315,6 +401,9 @@ enum token_type { TOKENTYPE_OPVAL }; +#define DEBUG_TOKEN(Type, Name) \ + { Name, TOKENTYPE_##Type, #Name } + static struct debug_tokens { const int token; enum token_type type; @@ -330,17 +419,18 @@ static struct debug_tokens { { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, { BITOROP, TOKENTYPE_OPNUM, "BITOROP" }, + { CATCH, TOKENTYPE_IVAL, "CATCH" }, + { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" }, + { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" }, { COLONATTR, TOKENTYPE_NONE, "COLONATTR" }, { CONTINUE, TOKENTYPE_NONE, "CONTINUE" }, { DEFAULT, TOKENTYPE_NONE, "DEFAULT" }, { DO, TOKENTYPE_NONE, "DO" }, { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" }, { DORDOR, TOKENTYPE_NONE, "DORDOR" }, - { DOROP, TOKENTYPE_OPNUM, "DOROP" }, { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" }, { ELSE, TOKENTYPE_NONE, "ELSE" }, { ELSIF, TOKENTYPE_IVAL, "ELSIF" }, - { EQOP, TOKENTYPE_OPNUM, "EQOP" }, { FOR, TOKENTYPE_IVAL, "FOR" }, { FORMAT, TOKENTYPE_NONE, "FORMAT" }, { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" }, @@ -363,11 +453,33 @@ static struct debug_tokens { { METHOD, TOKENTYPE_OPVAL, "METHOD" }, { MULOP, TOKENTYPE_OPNUM, "MULOP" }, { MY, TOKENTYPE_IVAL, "MY" }, + { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" }, + { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" }, { NOAMP, TOKENTYPE_NONE, "NOAMP" }, { NOTOP, TOKENTYPE_NONE, "NOTOP" }, { OROP, TOKENTYPE_IVAL, "OROP" }, { OROR, TOKENTYPE_NONE, "OROR" }, { PACKAGE, TOKENTYPE_NONE, "PACKAGE" }, + DEBUG_TOKEN (IVAL, PERLY_AMPERSAND), + DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE), + DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN), + DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE), + DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN), + DEBUG_TOKEN (IVAL, PERLY_COLON), + DEBUG_TOKEN (IVAL, PERLY_COMMA), + DEBUG_TOKEN (IVAL, PERLY_DOT), + DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN), + DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK), + DEBUG_TOKEN (IVAL, PERLY_MINUS), + DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN), + DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN), + DEBUG_TOKEN (IVAL, PERLY_PLUS), + DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK), + DEBUG_TOKEN (IVAL, PERLY_SEMICOLON), + DEBUG_TOKEN (IVAL, PERLY_SLASH), + DEBUG_TOKEN (IVAL, PERLY_SNAIL), + DEBUG_TOKEN (IVAL, PERLY_STAR), + DEBUG_TOKEN (IVAL, PERLY_TILDE), { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" }, { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" }, { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" }, @@ -380,7 +492,6 @@ static struct debug_tokens { { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" }, { QWLIST, TOKENTYPE_OPVAL, "QWLIST" }, { REFGEN, TOKENTYPE_NONE, "REFGEN" }, - { RELOP, TOKENTYPE_OPNUM, "RELOP" }, { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, { SIGSUB, TOKENTYPE_NONE, "SIGSUB" }, @@ -388,6 +499,7 @@ static struct debug_tokens { { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" }, { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" }, { THING, TOKENTYPE_OPVAL, "THING" }, + { TRY, TOKENTYPE_IVAL, "TRY" }, { UMINUS, TOKENTYPE_NONE, "UMINUS" }, { UNIOP, TOKENTYPE_OPNUM, "UNIOP" }, { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" }, @@ -401,6 +513,8 @@ static struct debug_tokens { { 0, TOKENTYPE_NONE, NULL } }; +#undef DEBUG_TOKEN + /* dump the returned token in rv, plus any optional arg in pl_yylval */ STATIC int @@ -409,57 +523,57 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) PERL_ARGS_ASSERT_TOKEREPORT; if (DEBUG_T_TEST) { - const char *name = NULL; - enum token_type type = TOKENTYPE_NONE; - const struct debug_tokens *p; - SV* const report = newSVpvs("<== "); - - for (p = debug_tokens; p->token; p++) { - if (p->token == (int)rv) { - name = p->name; - type = p->type; - break; - } - } - if (name) - Perl_sv_catpv(aTHX_ report, name); - else if (isGRAPH(rv)) - { - Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); - if ((char)rv == 'p') - sv_catpvs(report, " (pending identifier)"); - } - else if (!rv) - sv_catpvs(report, "EOF"); - else - Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv); - switch (type) { - case TOKENTYPE_NONE: - break; - case TOKENTYPE_IVAL: - Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival); - break; - case TOKENTYPE_OPNUM: - Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", - PL_op_name[lvalp->ival]); - break; - case TOKENTYPE_PVAL: - Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); - break; - case TOKENTYPE_OPVAL: - if (lvalp->opval) { - Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", - PL_op_name[lvalp->opval->op_type]); - if (lvalp->opval->op_type == OP_CONST) { - Perl_sv_catpvf(aTHX_ report, " %s", - SvPEEK(cSVOPx_sv(lvalp->opval))); - } - - } - else - sv_catpvs(report, "(opval=null)"); - break; - } + const char *name = NULL; + enum token_type type = TOKENTYPE_NONE; + const struct debug_tokens *p; + SV* const report = newSVpvs("<== "); + + for (p = debug_tokens; p->token; p++) { + if (p->token == (int)rv) { + name = p->name; + type = p->type; + break; + } + } + if (name) + Perl_sv_catpv(aTHX_ report, name); + else if (isGRAPH(rv)) + { + Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); + if ((char)rv == 'p') + sv_catpvs(report, " (pending identifier)"); + } + else if (!rv) + sv_catpvs(report, "EOF"); + else + Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv); + switch (type) { + case TOKENTYPE_NONE: + break; + case TOKENTYPE_IVAL: + Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival); + break; + case TOKENTYPE_OPNUM: + Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)", + PL_op_name[lvalp->ival]); + break; + case TOKENTYPE_PVAL: + Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval); + break; + case TOKENTYPE_OPVAL: + if (lvalp->opval) { + Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)", + PL_op_name[lvalp->opval->op_type]); + if (lvalp->opval->op_type == OP_CONST) { + Perl_sv_catpvf(aTHX_ report, " %s", + SvPEEK(cSVOPx_sv(lvalp->opval))); + } + + } + else + sv_catpvs(report, "(opval=null)"); + break; + } PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report)); }; return (int)rv; @@ -494,14 +608,15 @@ STATIC int S_ao(pTHX_ int toketype) { if (*PL_bufptr == '=') { - PL_bufptr++; - if (toketype == ANDAND) - pl_yylval.ival = OP_ANDASSIGN; - else if (toketype == OROR) - pl_yylval.ival = OP_ORASSIGN; - else if (toketype == DORDOR) - pl_yylval.ival = OP_DORASSIGN; - toketype = ASSIGNOP; + PL_bufptr++; + + switch (toketype) { + case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break; + case OROR: pl_yylval.ival = OP_ORASSIGN; break; + case DORDOR: pl_yylval.ival = OP_DORASSIGN; break; + } + + toketype = ASSIGNOP; } return REPORT(toketype); } @@ -531,36 +646,36 @@ S_no_op(pTHX_ const char *const what, char *s) PERL_ARGS_ASSERT_NO_OP; if (!s) - s = oldbp; + s = oldbp; else - PL_bufptr = s; + PL_bufptr = s; yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); if (ckWARN_d(WARN_SYNTAX)) { - if (is_first) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Missing semicolon on previous line?)\n"); + if (is_first) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Missing semicolon on previous line?)\n"); else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr, PL_bufend, UTF)) { - const char *t; - for (t = PL_oldoldbufptr; + const char *t; + for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':'); t += UTF ? UTF8SKIP(t) : 1) { - NOOP; - } - if (t < PL_bufptr && isSPACE(*t)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Do you need to predeclare %" UTF8f "?)\n", - UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); - } - else { - assert(s >= oldbp); - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Missing operator before %" UTF8f "?)\n", - UTF8fARG(UTF, s - oldbp, oldbp)); - } + NOOP; + } + if (t < PL_bufptr && isSPACE(*t)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Do you need to predeclare %" UTF8f "?)\n", + UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); + } + else { + assert(s >= oldbp); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Missing operator before %" UTF8f "?)\n", + UTF8fARG(UTF, s - oldbp, oldbp)); + } } PL_bufptr = oldbp; } @@ -580,42 +695,38 @@ S_missingterm(pTHX_ char *s, STRLEN len) char tmpbuf[UTF8_MAXBYTES + 1]; char q; bool uni = FALSE; - SV *sv; if (s) { - char * const nl = (char *) my_memrchr(s, '\n', len); + char * const nl = (char *) my_memrchr(s, '\n', len); if (nl) { *nl = '\0'; len = nl - s; } - uni = UTF; + uni = UTF; } else if (PL_multi_close < 32) { - *tmpbuf = '^'; - tmpbuf[1] = (char)toCTRL(PL_multi_close); - tmpbuf[2] = '\0'; - s = tmpbuf; + *tmpbuf = '^'; + tmpbuf[1] = (char)toCTRL(PL_multi_close); + tmpbuf[2] = '\0'; + s = tmpbuf; len = 2; } else { - if (LIKELY(PL_multi_close < 256)) { - *tmpbuf = (char)PL_multi_close; - tmpbuf[1] = '\0'; + if (! UTF && LIKELY(PL_multi_close < 256)) { + *tmpbuf = (char)PL_multi_close; + tmpbuf[1] = '\0'; len = 1; - } - else { + } + else { char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); *end = '\0'; len = end - tmpbuf; - uni = TRUE; - } - s = tmpbuf; + uni = TRUE; + } + s = tmpbuf; } q = memchr(s, '"', len) ? '\'' : '"'; - sv = sv_2mortal(newSVpvn(s, len)); - if (uni) - SvUTF8_on(sv); - Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c" - " anywhere before EOF", q, SVfARG(sv), q); + Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c" + " anywhere before EOF", q, UTF8fARG(uni, len, s), q); } #include "feature.h" @@ -636,18 +747,18 @@ strip_return(SV *sv) /* outer loop optimized to do nothing if there are no CR-LFs */ while (s < e) { - if (*s++ == '\r' && *s == '\n') { - /* hit a CR-LF, need to copy the rest */ - char *d = s - 1; - *d++ = *s++; - while (s < e) { - if (*s == '\r' && s[1] == '\n') - s++; - *d++ = *s++; - } - SvCUR(sv) -= s - d; - return; - } + if (*s++ == '\r' && *s == '\n') { + /* hit a CR-LF, need to copy the rest */ + char *d = s - 1; + *d++ = *s++; + while (s < e) { + if (*s == '\r' && s[1] == '\n') + s++; + *d++ = *s++; + } + SvCUR(sv) -= s - d; + return; + } } } @@ -656,7 +767,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) { const I32 count = FILTER_READ(idx+1, sv, maxlen); if (count > 0 && !maxlen) - strip_return(sv); + strip_return(sv); return count; } #endif @@ -701,7 +812,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) yy_parser *parser, *oparser; if (flags && flags & ~LEX_START_FLAGS) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); /* create and initialise a parser */ @@ -741,10 +852,10 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) Newxz(parser->lex_shared, 1, LEXSHARED); if (line) { - STRLEN len; + STRLEN len; const U8* first_bad_char_loc; - s = SvPV_const(line, len); + s = SvPV_const(line, len); if ( SvUTF8(line) && UNLIKELY(! is_utf8_string_loc((U8 *) s, @@ -758,19 +869,19 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) NOT_REACHED; /* NOTREACHED */ } - parser->linestr = flags & LEX_START_COPIED - ? SvREFCNT_inc_simple_NN(line) - : newSVpvn_flags(s, len, SvUTF8(line)); - if (!rsfp) - sv_catpvs(parser->linestr, "\n;"); + parser->linestr = flags & LEX_START_COPIED + ? SvREFCNT_inc_simple_NN(line) + : newSVpvn_flags(s, len, SvUTF8(line)); + if (!rsfp) + sv_catpvs(parser->linestr, "\n;"); } else { - parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); + parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); } parser->oldoldbufptr = - parser->oldbufptr = - parser->bufptr = - parser->linestart = SvPVX(parser->linestr); + parser->oldbufptr = + parser->bufptr = + parser->linestart = SvPVX(parser->linestr); parser->bufend = parser->bufptr + SvCUR(parser->linestr); parser->last_lop = parser->last_uni = NULL; @@ -794,10 +905,10 @@ Perl_parser_free(pTHX_ const yy_parser *parser) SvREFCNT_dec(parser->linestr); if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) - PerlIO_clearerr(parser->rsfp); + PerlIO_clearerr(parser->rsfp); else if (parser->rsfp && (!parser->old_parser || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) - PerlIO_close(parser->rsfp); + PerlIO_close(parser->rsfp); SvREFCNT_dec(parser->rsfp_filters); SvREFCNT_dec(parser->lex_stuff); SvREFCNT_dec(parser->lex_sub_repl); @@ -815,13 +926,13 @@ Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab) I32 nexttoke = parser->nexttoke; PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS; while (nexttoke--) { - if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) - && parser->nextval[nexttoke].opval - && parser->nextval[nexttoke].opval->op_slabbed - && OpSLAB(parser->nextval[nexttoke].opval) == slab) { - op_free(parser->nextval[nexttoke].opval); - parser->nextval[nexttoke].opval = NULL; - } + if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff) + && parser->nextval[nexttoke].opval + && parser->nextval[nexttoke].opval->op_slabbed + && OpSLAB(parser->nextval[nexttoke].opval) == slab) { + op_free(parser->nextval[nexttoke].opval); + parser->nextval[nexttoke].opval = NULL; + } } } @@ -950,7 +1061,7 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len) linestr = PL_parser->linestr; buf = SvPVX(linestr); if (len <= SvLEN(linestr)) - return buf; + return buf; /* Is the lex_shared linestr SV the same as the current linestr SV? * Only in this case does re_eval_start need adjusting, since it @@ -976,9 +1087,9 @@ Perl_lex_grow_linestr(pTHX_ STRLEN len) PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; PL_parser->linestart = buf + linestart_pos; if (PL_parser->last_uni) - PL_parser->last_uni = buf + last_uni_pos; + PL_parser->last_uni = buf + last_uni_pos; if (PL_parser->last_lop) - PL_parser->last_lop = buf + last_lop_pos; + PL_parser->last_lop = buf + last_lop_pos; if (current && PL_parser->lex_shared->re_eval_start) PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos; return buf; @@ -1011,73 +1122,72 @@ function is more convenient. void Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) { - dVAR; char *bufptr; PERL_ARGS_ASSERT_LEX_STUFF_PVN; if (flags & ~(LEX_STUFF_UTF8)) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn"); if (UTF) { - if (flags & LEX_STUFF_UTF8) { - goto plain_copy; - } else { - STRLEN highhalf = variant_under_utf8_count((U8 *) pv, + if (flags & LEX_STUFF_UTF8) { + goto plain_copy; + } else { + STRLEN highhalf = variant_under_utf8_count((U8 *) pv, (U8 *) pv + len); const char *p, *e = pv+len;; - if (!highhalf) - goto plain_copy; - lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); - bufptr = PL_parser->bufptr; - Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); - SvCUR_set(PL_parser->linestr, - SvCUR(PL_parser->linestr) + len+highhalf); - PL_parser->bufend += len+highhalf; - for (p = pv; p != e; p++) { + if (!highhalf) + goto plain_copy; + lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf); + bufptr = PL_parser->bufptr; + Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char); + SvCUR_set(PL_parser->linestr, + SvCUR(PL_parser->linestr) + len+highhalf); + PL_parser->bufend += len+highhalf; + for (p = pv; p != e; p++) { append_utf8_from_native_byte(*p, (U8 **) &bufptr); - } - } + } + } } else { - if (flags & LEX_STUFF_UTF8) { - STRLEN highhalf = 0; - const char *p, *e = pv+len; - for (p = pv; p != e; p++) { - U8 c = (U8)*p; - if (UTF8_IS_ABOVE_LATIN1(c)) { - Perl_croak(aTHX_ "Lexing code attempted to stuff " - "non-Latin-1 character into Latin-1 input"); - } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { - p++; - highhalf++; + if (flags & LEX_STUFF_UTF8) { + STRLEN highhalf = 0; + const char *p, *e = pv+len; + for (p = pv; p != e; p++) { + U8 c = (U8)*p; + if (UTF8_IS_ABOVE_LATIN1(c)) { + Perl_croak(aTHX_ "Lexing code attempted to stuff " + "non-Latin-1 character into Latin-1 input"); + } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { + p++; + highhalf++; } else assert(UTF8_IS_INVARIANT(c)); - } - if (!highhalf) - goto plain_copy; - lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); - bufptr = PL_parser->bufptr; - Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); - SvCUR_set(PL_parser->linestr, - SvCUR(PL_parser->linestr) + len-highhalf); - PL_parser->bufend += len-highhalf; - p = pv; - while (p < e) { - if (UTF8_IS_INVARIANT(*p)) { - *bufptr++ = *p; + } + if (!highhalf) + goto plain_copy; + lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf); + bufptr = PL_parser->bufptr; + Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char); + SvCUR_set(PL_parser->linestr, + SvCUR(PL_parser->linestr) + len-highhalf); + PL_parser->bufend += len-highhalf; + p = pv; + while (p < e) { + if (UTF8_IS_INVARIANT(*p)) { + *bufptr++ = *p; p++; - } - else { + } + else { assert(p < e -1 ); - *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); - p += 2; + *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1)); + p += 2; } - } - } else { - plain_copy: - lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); - bufptr = PL_parser->bufptr; - Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); - SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); - PL_parser->bufend += len; - Copy(pv, bufptr, len, char); - } + } + } else { + plain_copy: + lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len); + bufptr = PL_parser->bufptr; + Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char); + SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len); + PL_parser->bufend += len; + Copy(pv, bufptr, len, char); + } } } @@ -1137,7 +1247,7 @@ Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags) STRLEN len; PERL_ARGS_ASSERT_LEX_STUFF_SV; if (flags) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv"); pv = SvPV(sv, len); lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0)); } @@ -1164,12 +1274,12 @@ Perl_lex_unstuff(pTHX_ char *ptr) PERL_ARGS_ASSERT_LEX_UNSTUFF; buf = PL_parser->bufptr; if (ptr < buf) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); if (ptr == buf) - return; + return; bufend = PL_parser->bufend; if (ptr > bufend) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff"); unstuff_len = ptr - buf; Move(ptr, buf, bufend+1-ptr, char); SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len); @@ -1198,12 +1308,12 @@ Perl_lex_read_to(pTHX_ char *ptr) PERL_ARGS_ASSERT_LEX_READ_TO; s = PL_parser->bufptr; if (ptr < s || ptr > PL_parser->bufend) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to"); for (; s != ptr; s++) - if (*s == '\n') { - COPLINE_INC_WITH_HERELINES; - PL_parser->linestart = s+1; - } + if (*s == '\n') { + COPLINE_INC_WITH_HERELINES; + PL_parser->linestart = s+1; + } PL_parser->bufptr = ptr; } @@ -1235,20 +1345,20 @@ Perl_lex_discard_to(pTHX_ char *ptr) PERL_ARGS_ASSERT_LEX_DISCARD_TO; buf = SvPVX(PL_parser->linestr); if (ptr < buf) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); if (ptr == buf) - return; + return; if (ptr > PL_parser->bufptr) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to"); discard_len = ptr - buf; if (PL_parser->oldbufptr < ptr) - PL_parser->oldbufptr = ptr; + PL_parser->oldbufptr = ptr; if (PL_parser->oldoldbufptr < ptr) - PL_parser->oldoldbufptr = ptr; + PL_parser->oldoldbufptr = ptr; if (PL_parser->last_uni && PL_parser->last_uni < ptr) - PL_parser->last_uni = NULL; + PL_parser->last_uni = NULL; if (PL_parser->last_lop && PL_parser->last_lop < ptr) - PL_parser->last_lop = NULL; + PL_parser->last_lop = NULL; Move(ptr, buf, PL_parser->bufend+1-ptr, char); SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len); PL_parser->bufend -= discard_len; @@ -1256,9 +1366,9 @@ Perl_lex_discard_to(pTHX_ char *ptr) PL_parser->oldbufptr -= discard_len; PL_parser->oldoldbufptr -= discard_len; if (PL_parser->last_uni) - PL_parser->last_uni -= discard_len; + PL_parser->last_uni -= discard_len; if (PL_parser->last_lop) - PL_parser->last_lop -= discard_len; + PL_parser->last_lop -= discard_len; } void @@ -1318,64 +1428,64 @@ Perl_lex_next_chunk(pTHX_ U32 flags) bool got_some; if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); if (!(flags & LEX_NO_TERM) && PL_lex_inwhat) - return FALSE; + return FALSE; linestr = PL_parser->linestr; buf = SvPVX(linestr); if (!(flags & LEX_KEEP_PREVIOUS) && PL_parser->bufptr == PL_parser->bufend) { - old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; - linestart_pos = 0; - if (PL_parser->last_uni != PL_parser->bufend) - PL_parser->last_uni = NULL; - if (PL_parser->last_lop != PL_parser->bufend) - PL_parser->last_lop = NULL; - last_uni_pos = last_lop_pos = 0; - *buf = 0; - SvCUR_set(linestr, 0); + old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0; + linestart_pos = 0; + if (PL_parser->last_uni != PL_parser->bufend) + PL_parser->last_uni = NULL; + if (PL_parser->last_lop != PL_parser->bufend) + PL_parser->last_lop = NULL; + last_uni_pos = last_lop_pos = 0; + *buf = 0; + SvCUR_set(linestr, 0); } else { - old_bufend_pos = PL_parser->bufend - buf; - bufptr_pos = PL_parser->bufptr - buf; - oldbufptr_pos = PL_parser->oldbufptr - buf; - oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; - linestart_pos = PL_parser->linestart - buf; - last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; - last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; + old_bufend_pos = PL_parser->bufend - buf; + bufptr_pos = PL_parser->bufptr - buf; + oldbufptr_pos = PL_parser->oldbufptr - buf; + oldoldbufptr_pos = PL_parser->oldoldbufptr - buf; + linestart_pos = PL_parser->linestart - buf; + last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0; + last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0; } if (flags & LEX_FAKE_EOF) { - goto eof; + goto eof; } else if (!PL_parser->rsfp && !PL_parser->filtered) { - got_some = 0; + got_some = 0; } else if (filter_gets(linestr, old_bufend_pos)) { - got_some = 1; - got_some_for_debugger = 1; + got_some = 1; + got_some_for_debugger = 1; } else if (flags & LEX_NO_TERM) { - got_some = 0; + got_some = 0; } else { - if (!SvPOK(linestr)) /* can get undefined by filter_gets */ + if (!SvPOK(linestr)) /* can get undefined by filter_gets */ SvPVCLEAR(linestr); - eof: - /* End of real input. Close filehandle (unless it was STDIN), - * then add implicit termination. - */ - if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) - PerlIO_clearerr(PL_parser->rsfp); - else if (PL_parser->rsfp) - (void)PerlIO_close(PL_parser->rsfp); - PL_parser->rsfp = NULL; - PL_parser->in_pod = PL_parser->filtered = 0; - if (!PL_in_eval && PL_minus_p) { - sv_catpvs(linestr, - /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); - PL_minus_n = PL_minus_p = 0; - } else if (!PL_in_eval && PL_minus_n) { - sv_catpvs(linestr, /*{*/";}"); - PL_minus_n = 0; - } else - sv_catpvs(linestr, ";"); - got_some = 1; + eof: + /* End of real input. Close filehandle (unless it was STDIN), + * then add implicit termination. + */ + if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) + PerlIO_clearerr(PL_parser->rsfp); + else if (PL_parser->rsfp) + (void)PerlIO_close(PL_parser->rsfp); + PL_parser->rsfp = NULL; + PL_parser->in_pod = PL_parser->filtered = 0; + if (!PL_in_eval && PL_minus_p) { + sv_catpvs(linestr, + /*{*/";}continue{print or die qq(-p destination: $!\\n);}"); + PL_minus_n = PL_minus_p = 0; + } else if (!PL_in_eval && PL_minus_n) { + sv_catpvs(linestr, /*{*/";}"); + PL_minus_n = 0; + } else + sv_catpvs(linestr, ";"); + got_some = 1; } buf = SvPVX(linestr); new_bufend_pos = SvCUR(linestr); @@ -1401,22 +1511,22 @@ Perl_lex_next_chunk(pTHX_ U32 flags) PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; PL_parser->linestart = buf + linestart_pos; if (PL_parser->last_uni) - PL_parser->last_uni = buf + last_uni_pos; + PL_parser->last_uni = buf + last_uni_pos; if (PL_parser->last_lop) - PL_parser->last_lop = buf + last_lop_pos; + PL_parser->last_lop = buf + last_lop_pos; if (PL_parser->preambling != NOLINE) { - CopLINE_set(PL_curcop, PL_parser->preambling + 1); - PL_parser->preambling = NOLINE; + CopLINE_set(PL_curcop, PL_parser->preambling + 1); + PL_parser->preambling = NOLINE; } if ( got_some_for_debugger && PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) { - /* debugger active and we're not compiling the debugger code, - * so store the line into the debugger's array of lines - */ - update_debugger_info(NULL, buf+old_bufend_pos, - new_bufend_pos-old_bufend_pos); + /* debugger active and we're not compiling the debugger code, + * so store the line into the debugger's array of lines + */ + update_debugger_info(NULL, buf+old_bufend_pos, + new_bufend_pos-old_bufend_pos); } return got_some; } @@ -1443,50 +1553,49 @@ is encountered, an exception is generated. I32 Perl_lex_peek_unichar(pTHX_ U32 flags) { - dVAR; char *s, *bufend; if (flags & ~(LEX_KEEP_PREVIOUS)) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar"); s = PL_parser->bufptr; bufend = PL_parser->bufend; if (UTF) { - U8 head; - I32 unichar; - STRLEN len, retlen; - if (s == bufend) { - if (!lex_next_chunk(flags)) - return -1; - s = PL_parser->bufptr; - bufend = PL_parser->bufend; - } - head = (U8)*s; - if (UTF8_IS_INVARIANT(head)) - return head; - if (UTF8_IS_START(head)) { - len = UTF8SKIP(&head); - while ((STRLEN)(bufend-s) < len) { - if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) - break; - s = PL_parser->bufptr; - bufend = PL_parser->bufend; - } - } - unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); - if (retlen == (STRLEN)-1) { + U8 head; + I32 unichar; + STRLEN len, retlen; + if (s == bufend) { + if (!lex_next_chunk(flags)) + return -1; + s = PL_parser->bufptr; + bufend = PL_parser->bufend; + } + head = (U8)*s; + if (UTF8_IS_INVARIANT(head)) + return head; + if (UTF8_IS_START(head)) { + len = UTF8SKIP(&head); + while ((STRLEN)(bufend-s) < len) { + if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS)) + break; + s = PL_parser->bufptr; + bufend = PL_parser->bufend; + } + } + unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY); + if (retlen == (STRLEN)-1) { _force_out_malformed_utf8_message((U8 *) s, (U8 *) bufend, 0, 1 /* 1 means die */ ); NOT_REACHED; /* NOTREACHED */ - } - return unichar; + } + return unichar; } else { - if (s == bufend) { - if (!lex_next_chunk(flags)) - return -1; - s = PL_parser->bufptr; - } - return (U8)*s; + if (s == bufend) { + if (!lex_next_chunk(flags)) + return -1; + s = PL_parser->bufptr; + } + return (U8)*s; } } @@ -1515,15 +1624,15 @@ Perl_lex_read_unichar(pTHX_ U32 flags) { I32 c; if (flags & ~(LEX_KEEP_PREVIOUS)) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar"); c = lex_peek_unichar(flags); if (c != -1) { - if (c == '\n') - COPLINE_INC_WITH_HERELINES; - if (UTF) - PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); - else - ++(PL_parser->bufptr); + if (c == '\n') + COPLINE_INC_WITH_HERELINES; + if (UTF) + PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr); + else + ++(PL_parser->bufptr); } return c; } @@ -1555,49 +1664,49 @@ Perl_lex_read_space(pTHX_ U32 flags) const bool can_incline = !(flags & LEX_NO_INCLINE); bool need_incline = 0; if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE)) - Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); + Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space"); s = PL_parser->bufptr; bufend = PL_parser->bufend; while (1) { - char c = *s; - if (c == '#') { - do { - c = *++s; - } while (!(c == '\n' || (c == 0 && s == bufend))); - } else if (c == '\n') { - s++; - if (can_incline) { - PL_parser->linestart = s; - if (s == bufend) - need_incline = 1; - else - incline(s, bufend); - } - } else if (isSPACE(c)) { - s++; - } else if (c == 0 && s == bufend) { - bool got_more; - line_t l; - if (flags & LEX_NO_NEXT_CHUNK) - break; - PL_parser->bufptr = s; - l = CopLINE(PL_curcop); - CopLINE(PL_curcop) += PL_parser->herelines + 1; - got_more = lex_next_chunk(flags); - CopLINE_set(PL_curcop, l); - s = PL_parser->bufptr; - bufend = PL_parser->bufend; - if (!got_more) - break; - if (can_incline && need_incline && PL_parser->rsfp) { - incline(s, bufend); - need_incline = 0; - } - } else if (!c) { - s++; - } else { - break; - } + char c = *s; + if (c == '#') { + do { + c = *++s; + } while (!(c == '\n' || (c == 0 && s == bufend))); + } else if (c == '\n') { + s++; + if (can_incline) { + PL_parser->linestart = s; + if (s == bufend) + need_incline = 1; + else + incline(s, bufend); + } + } else if (isSPACE(c)) { + s++; + } else if (c == 0 && s == bufend) { + bool got_more; + line_t l; + if (flags & LEX_NO_NEXT_CHUNK) + break; + PL_parser->bufptr = s; + l = CopLINE(PL_curcop); + CopLINE(PL_curcop) += PL_parser->herelines + 1; + got_more = lex_next_chunk(flags); + CopLINE_set(PL_curcop, l); + s = PL_parser->bufptr; + bufend = PL_parser->bufend; + if (!got_more) + break; + if (can_incline && need_incline && PL_parser->rsfp) { + incline(s, bufend); + need_incline = 0; + } + } else if (!c) { + s++; + } else { + break; + } } PL_parser->bufptr = s; } @@ -1638,75 +1747,75 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) PERL_ARGS_ASSERT_VALIDATE_PROTO; if (!proto) - return TRUE; + return TRUE; p = SvPV(proto, len); origlen = len; for (; len--; p++) { - if (!isSPACE(*p)) { - if (must_be_last) - proto_after_greedy_proto = TRUE; - if (underscore) { - if (!memCHRs(";@%", *p)) - bad_proto_after_underscore = TRUE; - underscore = FALSE; - } - if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') { - bad_proto = TRUE; - } - else { - if (*p == '[') - in_brackets = TRUE; - else if (*p == ']') - in_brackets = FALSE; - else if ((*p == '@' || *p == '%') + if (!isSPACE(*p)) { + if (must_be_last) + proto_after_greedy_proto = TRUE; + if (underscore) { + if (!memCHRs(";@%", *p)) + bad_proto_after_underscore = TRUE; + underscore = FALSE; + } + if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') { + bad_proto = TRUE; + } + else { + if (*p == '[') + in_brackets = TRUE; + else if (*p == ']') + in_brackets = FALSE; + else if ((*p == '@' || *p == '%') && !after_slash && !in_brackets ) { - must_be_last = TRUE; - greedy_proto = *p; - } - else if (*p == '_') - underscore = TRUE; - } - if (*p == '\\') - after_slash = TRUE; - else - after_slash = FALSE; - } + must_be_last = TRUE; + greedy_proto = *p; + } + else if (*p == '_') + underscore = TRUE; + } + if (*p == '\\') + after_slash = TRUE; + else + after_slash = FALSE; + } } if (warn) { - SV *tmpsv = newSVpvs_flags("", SVs_TEMP); - p -= origlen; - p = SvUTF8(proto) - ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), - origlen, UNI_DISPLAY_ISPRINT) - : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); - - if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { - SV *name2 = sv_2mortal(newSVsv(PL_curstname)); - sv_catpvs(name2, "::"); - sv_catsv(name2, (SV *)name); - name = name2; - } - - if (proto_after_greedy_proto) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Prototype after '%c' for %" SVf " : %s", - greedy_proto, SVfARG(name), p); - if (in_brackets) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Missing ']' in prototype for %" SVf " : %s", - SVfARG(name), p); - if (bad_proto) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Illegal character in prototype for %" SVf " : %s", - SVfARG(name), p); - if (bad_proto_after_underscore) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Illegal character after '_' in prototype for %" SVf " : %s", - SVfARG(name), p); + SV *tmpsv = newSVpvs_flags("", SVs_TEMP); + p -= origlen; + p = SvUTF8(proto) + ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), + origlen, UNI_DISPLAY_ISPRINT) + : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); + + if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { + SV *name2 = sv_2mortal(newSVsv(PL_curstname)); + sv_catpvs(name2, "::"); + sv_catsv(name2, (SV *)name); + name = name2; + } + + if (proto_after_greedy_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Prototype after '%c' for %" SVf " : %s", + greedy_proto, SVfARG(name), p); + if (in_brackets) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Missing ']' in prototype for %" SVf " : %s", + SVfARG(name), p); + if (bad_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character in prototype for %" SVf " : %s", + SVfARG(name), p); + if (bad_proto_after_underscore) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character after '_' in prototype for %" SVf " : %s", + SVfARG(name), p); } return (! (proto_after_greedy_proto || bad_proto) ); @@ -1738,110 +1847,110 @@ S_incline(pTHX_ const char *s, const char *end) COPLINE_INC_WITH_HERELINES; if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL && s+1 == PL_bufend && *s == ';') { - /* fake newline in string eval */ - CopLINE_dec(PL_curcop); - return; + /* fake newline in string eval */ + CopLINE_dec(PL_curcop); + return; } if (*s++ != '#') - return; + return; while (SPACE_OR_TAB(*s)) - s++; + s++; if (memBEGINs(s, (STRLEN) (end - s), "line")) - s += sizeof("line") - 1; + s += sizeof("line") - 1; else - return; + return; if (SPACE_OR_TAB(*s)) - s++; + s++; else - return; + return; while (SPACE_OR_TAB(*s)) - s++; + s++; if (!isDIGIT(*s)) - return; + return; n = s; while (isDIGIT(*s)) - s++; + s++; if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0') - return; + return; while (SPACE_OR_TAB(*s)) - s++; + s++; if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { - s++; - e = t + 1; + s++; + e = t + 1; } else { - t = s; - while (*t && !isSPACE(*t)) - t++; - e = t; + t = s; + while (*t && !isSPACE(*t)) + t++; + e = t; } while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f') - e++; + e++; if (*e != '\n' && *e != '\0') - return; /* false alarm */ + return; /* false alarm */ if (!grok_atoUV(n, &uv, &e)) return; line_num = ((line_t)uv) - 1; if (t - s > 0) { - const STRLEN len = t - s; - - if (!PL_rsfp && !PL_parser->filtered) { - /* must copy *{"::_<(eval N)[oldfilename:L]"} - * to *{"::_ 0) { - AV * const av2 = GvAVn(gv2); - SV **svp = AvARRAY(av) + start; - Size_t l = line_num+1; - while (items-- && l < SSize_t_MAX && l == (line_t)l) - av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++)); - } - } - } - - if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); - } - } - CopFILE_free(PL_curcop); - CopFILE_setn(PL_curcop, s, len); + const STRLEN len = t - s; + + if (!PL_rsfp && !PL_parser->filtered) { + /* must copy *{"::_<(eval N)[oldfilename:L]"} + * to *{"::_ 0) { + AV * const av2 = GvAVn(gv2); + SV **svp = AvARRAY(av) + start; + Size_t l = line_num+1; + while (items-- && l < SSize_t_MAX && l == (line_t)l) + av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++)); + } + } + } + + if (tmpbuf2 != smallbuf) Safefree(tmpbuf2); + } + } + CopFILE_free(PL_curcop); + CopFILE_setn(PL_curcop, s, len); } CopLINE_set(PL_curcop, line_num); } @@ -1851,23 +1960,23 @@ S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len) { AV *av = CopFILEAVx(PL_curcop); if (av) { - SV * sv; - if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); - else { - sv = *av_fetch(av, 0, 1); - SvUPGRADE(sv, SVt_PVMG); - } + SV * sv; + if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG); + else { + sv = *av_fetch(av, 0, 1); + SvUPGRADE(sv, SVt_PVMG); + } if (!SvPOK(sv)) SvPVCLEAR(sv); - if (orig_sv) - sv_catsv(sv, orig_sv); - else - sv_catpvn(sv, buf, len); - if (!SvIOK(sv)) { - (void)SvIOK_on(sv); - SvIV_set(sv, 0); - } - if (PL_parser->preambling == NOLINE) - av_store(av, CopLINE(PL_curcop), sv); + if (orig_sv) + sv_catsv(sv, orig_sv); + else + sv_catpvn(sv, buf, len); + if (!SvIOK(sv)) { + (void)SvIOK_on(sv); + SvIV_set(sv, 0); + } + if (PL_parser->preambling == NOLINE) + av_store(av, CopLINE(PL_curcop), sv); } } @@ -1890,19 +1999,19 @@ Perl_skipspace_flags(pTHX_ char *s, U32 flags) { PERL_ARGS_ASSERT_SKIPSPACE_FLAGS; if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { - while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) - s++; + while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s)) + s++; } else { - STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); - PL_bufptr = s; - lex_read_space(flags | LEX_KEEP_PREVIOUS | - (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? - LEX_NO_NEXT_CHUNK : 0)); - s = PL_bufptr; - PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; - if (PL_linestart > PL_bufptr) - PL_bufptr = PL_linestart; - return s; + STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr); + PL_bufptr = s; + lex_read_space(flags | LEX_KEEP_PREVIOUS | + (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ? + LEX_NO_NEXT_CHUNK : 0)); + s = PL_bufptr; + PL_bufptr = SvPVX(PL_linestr) + bufptr_pos; + if (PL_linestart > PL_bufptr) + PL_bufptr = PL_linestart; + return s; } return s; } @@ -1922,18 +2031,18 @@ S_check_uni(pTHX) const char *s; if (PL_oldoldbufptr != PL_last_uni) - return; + return; while (isSPACE(*PL_last_uni)) - PL_last_uni++; + PL_last_uni++; s = PL_last_uni; while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') - s += UTF ? UTF8SKIP(s) : 1; + s += UTF ? UTF8SKIP(s) : 1; if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) - return; + return; Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous", - UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); + "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous", + UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni)); } /* @@ -1965,18 +2074,18 @@ S_lop(pTHX_ I32 f, U8 x, char *s) PL_last_lop = PL_oldbufptr; PL_last_lop_op = (OPCODE)f; if (PL_nexttoke) - goto lstop; + goto lstop; PL_expect = x; if (*s == '(') - return REPORT(FUNC); + return REPORT(FUNC); s = skipspace(s); if (*s == '(') - return REPORT(FUNC); + return REPORT(FUNC); else { - lstop: - if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) - PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; - return REPORT(LSTOP); + lstop: + if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) + PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; + return REPORT(LSTOP); } } @@ -1995,7 +2104,7 @@ S_force_next(pTHX_ I32 type) #ifdef DEBUGGING if (DEBUG_T_TEST) { PerlIO_printf(Perl_debug_log, "### forced token:\n"); - tokereport(type, &NEXTVAL_NEXTTOKE); + tokereport(type, &NEXTVAL_NEXTTOKE); } #endif assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype)); @@ -2015,24 +2124,30 @@ S_force_next(pTHX_ I32 type) static int S_postderef(pTHX_ int const funny, char const next) { - assert(funny == DOLSHARP || memCHRs("$@%&*", funny)); + assert(funny == DOLSHARP + || funny == PERLY_DOLLAR + || funny == PERLY_SNAIL + || funny == PERLY_PERCENT_SIGN + || funny == PERLY_AMPERSAND + || funny == PERLY_STAR + ); if (next == '*') { - PL_expect = XOPERATOR; - if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { - assert('@' == funny || '$' == funny || DOLSHARP == funny); - PL_lex_state = LEX_INTERPEND; - if ('@' == funny) - force_next(POSTJOIN); - } - force_next(next); - PL_bufptr+=2; + PL_expect = XOPERATOR; + if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { + assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny); + PL_lex_state = LEX_INTERPEND; + if (PERLY_SNAIL == funny) + force_next(POSTJOIN); + } + force_next(PERLY_STAR); + PL_bufptr+=2; } else { - if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL - && !PL_lex_brackets) - PL_lex_dojoin = 2; - PL_expect = XOPERATOR; - PL_bufptr++; + if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL + && !PL_lex_brackets) + PL_lex_dojoin = 2; + PL_expect = XOPERATOR; + PL_bufptr++; } return funny; } @@ -2042,19 +2157,19 @@ Perl_yyunlex(pTHX) { int yyc = PL_parser->yychar; if (yyc != YYEMPTY) { - if (yyc) { - NEXTVAL_NEXTTOKE = PL_parser->yylval; - if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) { - PL_lex_allbrackets--; - PL_lex_brackets--; - yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); - } else if (yyc == '('/*)*/) { - PL_lex_allbrackets--; - yyc |= (2<<24); - } - force_next(yyc); - } - PL_parser->yychar = YYEMPTY; + if (yyc) { + NEXTVAL_NEXTTOKE = PL_parser->yylval; + if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) { + PL_lex_allbrackets--; + PL_lex_brackets--; + yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16); + } else if (yyc == PERLY_PAREN_OPEN) { + PL_lex_allbrackets--; + yyc |= (2<<24); + } + force_next(yyc); + } + PL_parser->yychar = YYEMPTY; } } @@ -2099,30 +2214,30 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || (allow_pack && *s == ':' && s[1] == ':') ) { - s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); - if (check_keyword) { - char *s2 = PL_tokenbuf; - STRLEN len2 = len; - if (allow_pack && memBEGINPs(s2, len, "CORE::")) { - s2 += sizeof("CORE::") - 1; + s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); + if (check_keyword) { + char *s2 = PL_tokenbuf; + STRLEN len2 = len; + if (allow_pack && memBEGINPs(s2, len, "CORE::")) { + s2 += sizeof("CORE::") - 1; len2 -= sizeof("CORE::") - 1; } - if (keyword(s2, len2, 0)) - return start; - } - if (token == METHOD) { - s = skipspace(s); - if (*s == '(') - PL_expect = XTERM; - else { - PL_expect = XOPERATOR; - } - } - NEXTVAL_NEXTTOKE.opval + if (keyword(s2, len2, 0)) + return start; + } + if (token == METHOD) { + s = skipspace(s); + if (*s == '(') + PL_expect = XTERM; + else { + PL_expect = XOPERATOR; + } + } + NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST,0, - S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); - NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; - force_next(token); + S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len)); + NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; + force_next(token); } return s; } @@ -2142,25 +2257,25 @@ S_force_ident(pTHX_ const char *s, int kind) PERL_ARGS_ASSERT_FORCE_IDENT; if (s[0]) { - const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ + const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */ OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0)); - NEXTVAL_NEXTTOKE.opval = o; - force_next(BAREWORD); - if (kind) { - o->op_private = OPpCONST_ENTERED; - /* XXX see note in pp_entereval() for why we forgo typo - warnings if the symbol must be introduced in an eval. - GSAR 96-10-12 */ - gv_fetchpvn_flags(s, len, - (PL_in_eval ? GV_ADDMULTI - : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), - kind == '$' ? SVt_PV : - kind == '@' ? SVt_PVAV : - kind == '%' ? SVt_PVHV : - SVt_PVGV - ); - } + NEXTVAL_NEXTTOKE.opval = o; + force_next(BAREWORD); + if (kind) { + o->op_private = OPpCONST_ENTERED; + /* XXX see note in pp_entereval() for why we forgo typo + warnings if the symbol must be introduced in an eval. + GSAR 96-10-12 */ + gv_fetchpvn_flags(s, len, + (PL_in_eval ? GV_ADDMULTI + : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), + kind == PERLY_DOLLAR ? SVt_PV : + kind == PERLY_SNAIL ? SVt_PVAV : + kind == PERLY_PERCENT_SIGN ? SVt_PVHV : + SVt_PVGV + ); + } } } @@ -2184,17 +2299,17 @@ Perl_str_to_version(pTHX_ SV *sv) PERL_ARGS_ASSERT_STR_TO_VERSION; while (start < end) { - STRLEN skip; - UV n; - if (utf) - n = utf8n_to_uvchr((U8*)start, len, &skip, 0); - else { - n = *(U8*)start; - skip = 1; - } - retval += ((NV)n)/nshift; - start += skip; - nshift *= 1000; + STRLEN skip; + UV n; + if (utf) + n = utf8n_to_uvchr((U8*)start, len, &skip, 0); + else { + n = *(U8*)start; + skip = 1; + } + retval += ((NV)n)/nshift; + start += skip; + nshift *= 1000; } return retval; } @@ -2219,24 +2334,24 @@ S_force_version(pTHX_ char *s, int guessing) d = s; if (*d == 'v') - d++; + d++; if (isDIGIT(*d)) { - while (isDIGIT(*d) || *d == '_' || *d == '.') - d++; + while (isDIGIT(*d) || *d == '_' || *d == '.') + d++; if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { - SV *ver; + SV *ver; s = scan_num(s, &pl_yylval); version = pl_yylval.opval; - ver = cSVOPx(version)->op_sv; - if (SvPOK(ver) && !SvNIOK(ver)) { - SvUPGRADE(ver, SVt_PVNV); - SvNV_set(ver, str_to_version(ver)); - SvNOK_on(ver); /* hint that it is a version */ - } + ver = cSVOPx(version)->op_sv; + if (SvPOK(ver) && !SvNIOK(ver)) { + SvUPGRADE(ver, SVt_PVNV); + SvNV_set(ver, str_to_version(ver)); + SvNOK_on(ver); /* hint that it is a version */ + } + } + else if (guessing) { + return s; } - else if (guessing) { - return s; - } } /* NOTE: The parser sees the package name and the VERSION swapped */ @@ -2260,20 +2375,20 @@ S_force_strict_version(pTHX_ char *s) PERL_ARGS_ASSERT_FORCE_STRICT_VERSION; while (isSPACE(*s)) /* leading whitespace */ - s++; + s++; if (is_STRICT_VERSION(s,&errstr)) { - SV *ver = newSV(0); - s = (char *)scan_version(s, ver, 0); - version = newSVOP(OP_CONST, 0, ver); + SV *ver = newSV_type(SVt_NULL); + s = (char *)scan_version(s, ver, 0); + version = newSVOP(OP_CONST, 0, ver); } else if ((*s != ';' && *s != '{' && *s != '}' ) && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' ))) { - PL_bufptr = s; - if (errstr) - yyerror(errstr); /* version required */ - return s; + PL_bufptr = s; + if (errstr) + yyerror(errstr); /* version required */ + return s; } /* NOTE: The parser sees the package name and the VERSION swapped */ @@ -2304,25 +2419,25 @@ S_tokeq(pTHX_ SV *sv) assert (SvLEN(sv)); assert (!SvIsCOW(sv)); if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */ - goto finish; + goto finish; s = SvPVX(sv); send = SvEND(sv); /* This is relying on the SV being "well formed" with a trailing '\0' */ while (s < send && !(*s == '\\' && s[1] == '\\')) - s++; + s++; if (s == send) - goto finish; + goto finish; d = s; if ( PL_hints & HINT_NEW_STRING ) { - pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), - SVs_TEMP | SvUTF8(sv)); + pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv), + SVs_TEMP | SvUTF8(sv)); } while (s < send) { - if (*s == '\\') { - if (s + 1 < send && (s[1] == '\\')) - s++; /* all that, just for this */ - } - *d++ = *s++; + if (*s == '\\') { + if (s + 1 < send && (s[1] == '\\')) + s++; /* all that, just for this */ + } + *d++ = *s++; } *d = '\0'; SvCUR_set(sv, d - SvPVX_const(sv)); @@ -2366,25 +2481,25 @@ S_sublex_start(pTHX) const I32 op_type = pl_yylval.ival; if (op_type == OP_NULL) { - pl_yylval.opval = PL_lex_op; - PL_lex_op = NULL; - return THING; + pl_yylval.opval = PL_lex_op; + PL_lex_op = NULL; + return THING; } if (op_type == OP_CONST) { - SV *sv = PL_lex_stuff; - PL_lex_stuff = NULL; - sv = tokeq(sv); - - if (SvTYPE(sv) == SVt_PVIV) { - /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ - STRLEN len; - const char * const p = SvPV_const(sv, len); - SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); - SvREFCNT_dec(sv); - sv = nsv; - } + SV *sv = PL_lex_stuff; + PL_lex_stuff = NULL; + sv = tokeq(sv); + + if (SvTYPE(sv) == SVt_PVIV) { + /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ + STRLEN len; + const char * const p = SvPV_const(sv, len); + SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); + SvREFCNT_dec(sv); + sv = nsv; + } pl_yylval.opval = newSVOP(op_type, 0, sv); - return THING; + return THING; } PL_parser->lex_super_state = PL_lex_state; @@ -2396,12 +2511,12 @@ S_sublex_start(pTHX) PL_expect = XTERM; if (PL_lex_op) { - pl_yylval.opval = PL_lex_op; - PL_lex_op = NULL; - return PMFUNC; + pl_yylval.opval = PL_lex_op; + PL_lex_op = NULL; + return PMFUNC; } else - return FUNC; + return FUNC; } /* @@ -2433,10 +2548,10 @@ S_sublex_push(pTHX) SAVEI16(PL_lex_inwhat); if (is_heredoc) { - SAVECOPLINE(PL_curcop); - SAVEI32(PL_multi_end); - SAVEI32(PL_parser->herelines); - PL_parser->herelines = 0; + SAVECOPLINE(PL_curcop); + SAVEI32(PL_multi_end); + SAVEI32(PL_parser->herelines); + PL_parser->herelines = 0; } SAVEIV(PL_multi_close); SAVEPPTR(PL_bufptr); @@ -2455,7 +2570,7 @@ S_sublex_push(pTHX) /* The here-doc parser needs to be able to peek into outer lexing scopes to find the body of the here-doc. So we put PL_linestr and - PL_bufptr into lex_shared, to ‘share’ those values. + PL_bufptr into lex_shared, to 'share' those values. */ PL_parser->lex_shared->ls_linestr = PL_linestr; PL_parser->lex_shared->ls_bufptr = PL_bufptr; @@ -2473,7 +2588,7 @@ S_sublex_push(pTHX) SAVEGENERICSV(PL_parser->lex_sub_repl); PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart - = SvPVX(PL_linestr); + = SvPVX(PL_linestr); PL_bufend += SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; SAVEFREESV(PL_linestr); @@ -2490,7 +2605,7 @@ S_sublex_push(pTHX) PL_lex_starts = 0; PL_lex_state = LEX_INTERPCONCAT; if (is_heredoc) - CopLINE_set(PL_curcop, (line_t)PL_multi_start); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); PL_copline = NOLINE; Newxz(shared, 1, LEXSHARED); @@ -2500,9 +2615,9 @@ S_sublex_push(pTHX) PL_lex_inwhat = PL_parser->lex_sub_inwhat; if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS; if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST) - PL_lex_inpat = PL_parser->lex_sub_op; + PL_lex_inpat = PL_parser->lex_sub_op; else - PL_lex_inpat = NULL; + PL_lex_inpat = NULL; PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); PL_in_eval &= ~EVAL_RE_REPARSING; @@ -2519,71 +2634,129 @@ STATIC I32 S_sublex_done(pTHX) { if (!PL_lex_starts++) { - SV * const sv = newSVpvs(""); - if (SvUTF8(PL_linestr)) - SvUTF8_on(sv); - PL_expect = XOPERATOR; + SV * const sv = newSVpvs(""); + if (SvUTF8(PL_linestr)) + SvUTF8_on(sv); + PL_expect = XOPERATOR; pl_yylval.opval = newSVOP(OP_CONST, 0, sv); - return THING; + return THING; } if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */ - PL_lex_state = LEX_INTERPCASEMOD; - return yylex(); + PL_lex_state = LEX_INTERPCASEMOD; + return yylex(); } /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */ assert(PL_lex_inwhat != OP_TRANSR); if (PL_lex_repl) { - assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); - PL_linestr = PL_lex_repl; - PL_lex_inpat = 0; - PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); - PL_bufend += SvCUR(PL_linestr); - PL_last_lop = PL_last_uni = NULL; - PL_lex_dojoin = FALSE; - PL_lex_brackets = 0; - PL_lex_allbrackets = 0; - PL_lex_fakeeof = LEX_FAKEEOF_NEVER; - PL_lex_casemods = 0; - *PL_lex_casestack = '\0'; - PL_lex_starts = 0; - if (SvEVALED(PL_lex_repl)) { - PL_lex_state = LEX_INTERPNORMAL; - PL_lex_starts++; - /* we don't clear PL_lex_repl here, so that we can check later - whether this is an evalled subst; that means we rely on the - logic to ensure sublex_done() is called again only via the - branch (in yylex()) that clears PL_lex_repl, else we'll loop */ - } - else { - PL_lex_state = LEX_INTERPCONCAT; - PL_lex_repl = NULL; - } - if (SvTYPE(PL_linestr) >= SVt_PVNV) { - CopLINE(PL_curcop) += - ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines - + PL_parser->herelines; - PL_parser->herelines = 0; - } - return '/'; + assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS); + PL_linestr = PL_lex_repl; + PL_lex_inpat = 0; + PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); + PL_bufend += SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = NULL; + PL_lex_dojoin = FALSE; + PL_lex_brackets = 0; + PL_lex_allbrackets = 0; + PL_lex_fakeeof = LEX_FAKEEOF_NEVER; + PL_lex_casemods = 0; + *PL_lex_casestack = '\0'; + PL_lex_starts = 0; + if (SvEVALED(PL_lex_repl)) { + PL_lex_state = LEX_INTERPNORMAL; + PL_lex_starts++; + /* we don't clear PL_lex_repl here, so that we can check later + whether this is an evalled subst; that means we rely on the + logic to ensure sublex_done() is called again only via the + branch (in yylex()) that clears PL_lex_repl, else we'll loop */ + } + else { + PL_lex_state = LEX_INTERPCONCAT; + PL_lex_repl = NULL; + } + if (SvTYPE(PL_linestr) >= SVt_PVNV) { + CopLINE(PL_curcop) += + ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines + + PL_parser->herelines; + PL_parser->herelines = 0; + } + return PERLY_SLASH; } else { - const line_t l = CopLINE(PL_curcop); - LEAVE; + const line_t l = CopLINE(PL_curcop); + LEAVE; if (PL_parser->sub_error_count != PL_error_count) { if (PL_parser->sub_no_recover) { yyquit(); NOT_REACHED; } } - if (PL_multi_close == '<') - PL_parser->herelines += l - PL_multi_end; - PL_bufend = SvPVX(PL_linestr); - PL_bufend += SvCUR(PL_linestr); - PL_expect = XOPERATOR; - return SUBLEXEND; + if (PL_multi_close == '<') + PL_parser->herelines += l - PL_multi_end; + PL_bufend = SvPVX(PL_linestr); + PL_bufend += SvCUR(PL_linestr); + PL_expect = XOPERATOR; + return SUBLEXEND; + } +} + +HV * +Perl_load_charnames(pTHX_ SV * char_name, const char * context, + const STRLEN context_len, const char ** error_msg) +{ + /* Load the official _charnames module if not already there. The + * parameters are just to give info for any error messages generated: + * char_name a name to look up which is the reason for loading this + * context 'char_name' in the context in the input in which it appears + * context_len how many bytes 'context' occupies + * error_msg *error_msg will be set to any error + * + * Returns the ^H table if success; otherwise NULL */ + + unsigned int i; + HV * table; + SV **cvp; + SV * res; + + PERL_ARGS_ASSERT_LOAD_CHARNAMES; + + /* This loop is executed 1 1/2 times. On the first time through, if it + * isn't already loaded, try loading it, and iterate just once to see if it + * worked. */ + for (i = 0; i < 2; i++) { + table = GvHV(PL_hintgv); /* ^H */ + + if ( table + && (PL_hints & HINT_LOCALIZE_HH) + && (cvp = hv_fetchs(table, "charnames", FALSE)) + && SvOK(*cvp)) + { + return table; /* Quit if already loaded */ + } + + if (i == 0) { + Perl_load_module(aTHX_ + 0, + newSVpvs("_charnames"), + + /* version parameter; no need to specify it, as if we get too early + * a version, will fail anyway, not being able to find 'charnames' + * */ + NULL, + newSVpvs(":full"), + newSVpvs(":short"), + NULL); + } } + + /* Here, it failed; new_constant will give appropriate error messages */ + *error_msg = NULL; + res = new_constant( NULL, 0, "charnames", char_name, NULL, + context, context_len, error_msg); + SvREFCNT_dec(res); + + return NULL; } STATIC SV* @@ -2599,7 +2772,7 @@ S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const /* charnames doesn't work well if there have been errors found */ if (PL_error_count > 0) { - return NULL; + return NULL; } result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg); @@ -2613,7 +2786,7 @@ S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const SV* Perl_get_and_check_backslash_N_name(pTHX_ const char* s, - const char* const e, + const char* e, const bool is_utf8, const char ** error_msg) { @@ -2624,41 +2797,62 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it * doesn't have to be. */ + SV* char_name; SV* res; HV * table; SV **cvp; SV *cv; SV *rv; HV *stash; - const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ - dVAR; + + /* Points to the beginning of the \N{... so that any messages include the + * context of what's failing*/ + const char* context = s - 3; + STRLEN context_len = e - context + 1; /* include all of \N{...} */ + PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; assert(e >= s); assert(s > (char *) 3); - res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); + while (s < e && isBLANK(*s)) { + s++; + } + + while (s < e && isBLANK(*(e - 1))) { + e--; + } + + char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0); - if (!SvCUR(res)) { - SvREFCNT_dec_NN(res); + if (!SvCUR(char_name)) { + SvREFCNT_dec_NN(char_name); /* diag_listed_as: Unknown charname '%s' */ *error_msg = Perl_form(aTHX_ "Unknown charname ''"); return NULL; } - res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, - /* include the <}> */ - e - backslash_ptr + 1, error_msg); - if (! SvPOK(res)) { - SvREFCNT_dec_NN(res); + /* Autoload the charnames module */ + + table = load_charnames(char_name, context, context_len, error_msg); + if (table == NULL) { + return NULL; + } + + *error_msg = NULL; + res = new_constant( NULL, 0, "charnames", char_name, NULL, + context, context_len, error_msg); + if (*error_msg) { + *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name)); + + SvREFCNT_dec(res); return NULL; } /* See if the charnames handler is the Perl core's, and if so, we can skip * the validation needed for a user-supplied one, as Perl's does its own * validation. */ - table = GvHV(PL_hintgv); /* ^H */ cvp = hv_fetchs(table, "charnames", FALSE); if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv), SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) @@ -2687,7 +2881,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, if (! isCHARNAME_CONT(*s)) { goto bad_charname; } - if (*s == ' ' && *(s-1) == ' ') { + if (*s == ' ' && *(s-1) == ' ') { goto multi_spaces; } s++; @@ -2755,7 +2949,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, *error_msg = Perl_form(aTHX_ "charnames alias definitions may not contain trailing " "white-space; marked by <-- HERE in %.*s<-- HERE %.*s", - (int)(s - backslash_ptr + 1), backslash_ptr, + (int)(s - context + 1), context, (int)(e - s + 1), s + 1); return NULL; } @@ -2775,7 +2969,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, immediately after '%s' */ *error_msg = Perl_form(aTHX_ "Malformed UTF-8 returned by %.*s immediately after '%.*s'", - (int) (e - backslash_ptr + 1), backslash_ptr, + (int) context_len, context, (int) ((char *) first_bad_char_loc - str), str); return NULL; } @@ -2791,7 +2985,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, in \N{%s} */ *error_msg = Perl_form(aTHX_ "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", - (int)(s - backslash_ptr + 1), backslash_ptr, + (int)(s - context + 1), context, (int)(e - s + 1), s + 1); return NULL; } @@ -2803,7 +2997,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, *error_msg = Perl_form(aTHX_ "charnames alias definitions may not contain a sequence of " "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s", - (int)(s - backslash_ptr + 1), backslash_ptr, + (int)(s - context + 1), context, (int)(e - s + 1), s + 1); return NULL; } @@ -2833,12 +3027,12 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...}) pass through: - all other \-char, including \N and \N{ apart from \N{ABC} + all other \-char, including \N and \N{ apart from \N{ABC} stops on: - @ and $ where it appears to be a var, but not for $ as tail anchor + @ and $ where it appears to be a var, but not for $ as tail anchor \l \L \u \U \Q \E - (?{ or (??{ + (?{ or (??{ In transliterations: characters are VERY literal, except for - not at the start or end @@ -2874,25 +3068,25 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, The structure of the code is while (there's a character to process) { - handle transliteration ranges - skip regexp comments /(?#comment)/ and codes /(?{code})/ - skip #-initiated comments in //x patterns - check for embedded arrays - check for embedded scalars - if (backslash) { - deprecate \1 in substitution replacements - handle string-changing backslashes \l \U \Q \E, etc. - switch (what was escaped) { - handle \- in a transliteration (becomes a literal -) - if a pattern and not \N{, go treat as regular character - handle \132 (octal characters) - handle \x15 and \x{1234} (hex characters) - handle \N{name} (named characters, also \N{3,5} in a pattern) - handle \cV (control characters) - handle printf-style backslashes (\f, \r, \n, etc) - } (end switch) - continue - } (end if backslash) + handle transliteration ranges + skip regexp comments /(?#comment)/ and codes /(?{code})/ + skip #-initiated comments in //x patterns + check for embedded arrays + check for embedded scalars + if (backslash) { + deprecate \1 in substitution replacements + handle string-changing backslashes \l \U \Q \E, etc. + switch (what was escaped) { + handle \- in a transliteration (becomes a literal -) + if a pattern and not \N{, go treat as regular character + handle \132 (octal characters) + handle \x15 and \x{1234} (hex characters) + handle \N{name} (named characters, also \N{3,5} in a pattern) + handle \cV (control characters) + handle printf-style backslashes (\f, \r, \n, etc) + } (end switch) + continue + } (end if backslash) handle regular character } (end while character to read) @@ -2901,7 +3095,7 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, STATIC char * S_scan_const(pTHX_ char *start) { - char *send = PL_bufend; /* end of the constant */ + const char * const send = PL_bufend;/* end of the constant */ SV *sv = newSV(send - start); /* sv for the constant. See note below on sizing. */ char *s = start; /* start of the constant */ @@ -2909,7 +3103,7 @@ S_scan_const(pTHX_ char *start) bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ bool in_charclass = FALSE; /* within /[...]/ */ - bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be + const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be UTF8? But, this can show as true when the source isn't utf8, as for example when it is entirely composed @@ -2970,7 +3164,7 @@ S_scan_const(pTHX_ char *start) ) { /* get transliterations out of the way (they're most literal) */ - if (PL_lex_inwhat == OP_TRANS) { + if (PL_lex_inwhat == OP_TRANS) { /* But there isn't any special handling necessary unless there is a * range, so for most cases we just drop down and handle the value @@ -2994,7 +3188,7 @@ S_scan_const(pTHX_ char *start) * because each code point in it has to be processed here * individually to get its native translation */ - if (! dorange) { + if (! dorange) { /* Here, we don't think we're in a range. If the new character * is not a hyphen; or if it is a hyphen, but it's too close to @@ -3055,7 +3249,7 @@ S_scan_const(pTHX_ char *start) char * max_ptr; char * min_ptr; IV range_min; - IV range_max; /* last character in range */ + IV range_max; /* last character in range */ STRLEN grow; Size_t offset_to_min = 0; Size_t extras = 0; @@ -3142,8 +3336,8 @@ S_scan_const(pTHX_ char *start) * of them */ if (isPRINT_A(range_min) && isPRINT_A(range_max)) { Perl_croak(aTHX_ - "Invalid range \"%c-%c\" in transliteration operator", - (char)range_min, (char)range_max); + "Invalid range \"%c-%c\" in transliteration operator", + (char)range_min, (char)range_max); } #ifdef EBCDIC else if (convert_unicode) { @@ -3171,7 +3365,7 @@ S_scan_const(pTHX_ char *start) /* Here the range contains at least 3 code points */ - if (d_is_utf8) { + if (d_is_utf8) { /* If everything in the transliteration is below 256, we * can avoid special handling later. A translation table @@ -3183,7 +3377,7 @@ S_scan_const(pTHX_ char *start) * if we have to convert to/from Unicode values */ if ( has_above_latin1 #ifdef EBCDIC - && (range_min > 255 || ! convert_unicode) + && (range_min > 255 || ! convert_unicode) #endif ) { const STRLEN off = d - SvPVX(sv); @@ -3218,7 +3412,7 @@ S_scan_const(pTHX_ char *start) range_max = 255; } #endif - } + } /* Here we need to expand out the string to contain each * character in the range. Grow the output to handle this. @@ -3315,8 +3509,8 @@ S_scan_const(pTHX_ char *start) for (i = range_min; i <= range_max; i++) { *d++ = (char)LATIN1_TO_NATIVE((U8) i); } - } - } + } + } else #endif /* Always gets run for ASCII, and sometimes for EBCDIC. */ @@ -3351,8 +3545,8 @@ S_scan_const(pTHX_ char *start) * 'utf8_variant_count' on EBCDIC (it's already been * counted when originally parsed) */ *d++ = (char) range_max; - } - } + } + } #ifdef EBCDIC /* If the original range extended above 255, add in that @@ -3370,37 +3564,37 @@ S_scan_const(pTHX_ char *start) #endif range_done: - /* mark the range as done, and continue */ - didrange = TRUE; - dorange = FALSE; + /* mark the range as done, and continue */ + didrange = TRUE; + dorange = FALSE; #ifdef EBCDIC - non_portable_endpoint = 0; + non_portable_endpoint = 0; backslash_N = 0; #endif - continue; - } /* End of is a range */ + continue; + } /* End of is a range */ } /* End of transliteration. Joins main code after these else's */ - else if (*s == '[' && PL_lex_inpat && !in_charclass) { - char *s1 = s-1; - int esc = 0; - while (s1 >= start && *s1-- == '\\') - esc = !esc; - if (!esc) - in_charclass = TRUE; - } - else if (*s == ']' && PL_lex_inpat && in_charclass) { - char *s1 = s-1; - int esc = 0; - while (s1 >= start && *s1-- == '\\') - esc = !esc; - if (!esc) - in_charclass = FALSE; - } + else if (*s == '[' && PL_lex_inpat && !in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = TRUE; + } + else if (*s == ']' && PL_lex_inpat && in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = FALSE; + } /* skip for regexp comments /(?#comment)/, except for the last * char, which will be done separately. Stop on (?{..}) and * friends */ - else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { - if (s[2] == '#') { + else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { + if (s[2] == '#') { if (s_is_utf8) { PERL_UINT_FAST8_T len = UTF8SKIP(s); @@ -3414,181 +3608,189 @@ S_scan_const(pTHX_ char *start) else while (s+1 < send && *s != ')') { *d++ = *s++; } - } - else if (!PL_lex_casemods + } + else if (!PL_lex_casemods && ( s[2] == '{' /* This should match regcomp.c */ - || (s[2] == '?' && s[3] == '{'))) - { - break; - } - } + || (s[2] == '?' && s[3] == '{'))) + { + break; + } + } /* likewise skip #-initiated comments in //x patterns */ - else if (*s == '#' + else if (*s == '#' && PL_lex_inpat && !in_charclass && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) { - while (s < send && *s != '\n') - *d++ = *s++; - } + while (s < send && *s != '\n') + *d++ = *s++; + } /* no further processing of single-quoted regex */ - else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') - goto default_action; + else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') + goto default_action; /* check for embedded arrays * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) */ - else if (*s == '@' && s[1]) { - if (UTF + else if (*s == '@' && s[1]) { + if (UTF ? isIDFIRST_utf8_safe(s+1, send) : isWORDCHAR_A(s[1])) { - break; + break; } - if (memCHRs(":'{$", s[1])) - break; - if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) - break; /* in regexp, neither @+ nor @- are interpolated */ - } + if (memCHRs(":'{$", s[1])) + break; + if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) + break; /* in regexp, neither @+ nor @- are interpolated */ + } /* check for embedded scalars. only stop if we're sure it's a * variable. */ - else if (*s == '$') { - if (!PL_lex_inpat) /* not a regexp, so $ must be var */ - break; - if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) { - if (s[1] == '\\') { - Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of $\\ in regex"); - } - break; /* in regexp, $ might be tail anchor */ + else if (*s == '$') { + if (!PL_lex_inpat) /* not a regexp, so $ must be var */ + break; + if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) { + if (s[1] == '\\') { + Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Possible unintended interpolation of $\\ in regex"); + } + break; /* in regexp, $ might be tail anchor */ } - } + } - /* End of else if chain - OP_TRANS rejoin rest */ + /* End of else if chain - OP_TRANS rejoin rest */ if (UNLIKELY(s >= send)) { assert(s == send); break; } - /* backslashes */ - if (*s == '\\' && s+1 < send) { - char* e; /* Can be used for ending '}', etc. */ - - s++; + /* backslashes */ + if (*s == '\\' && s+1 < send) { + char* bslash = s; /* point to beginning \ */ + char* rbrace; /* point to ending '}' */ + char* e; /* 1 past the meat (non-blanks) before the + brace */ + s++; - /* warn on \1 - \9 in substitution replacements, but note that \11 - * is an octal; and \19 is \1 followed by '9' */ - if (PL_lex_inwhat == OP_SUBST + /* warn on \1 - \9 in substitution replacements, but note that \11 + * is an octal; and \19 is \1 followed by '9' */ + if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat && isDIGIT(*s) && *s != '0' && !isDIGIT(s[1])) - { - /* diag_listed_as: \%d better written as $%d */ - Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); - *--s = '$'; - break; - } - - /* string-change backslash escapes */ - if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) { - --s; - break; - } - /* In a pattern, process \N, but skip any other backslash escapes. - * This is because we don't want to translate an escape sequence - * into a meta symbol and have the regex compiler use the meta - * symbol meaning, e.g. \x{2E} would be confused with a dot. But - * in spite of this, we do have to process \N here while the proper - * charnames handler is in scope. See bugs #56444 and #62056. + { + /* diag_listed_as: \%d better written as $%d */ + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s); + s = bslash; + *s = '$'; + break; + } + + /* string-change backslash escapes */ + if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) { + s = bslash; + break; + } + /* In a pattern, process \N, but skip any other backslash escapes. + * This is because we don't want to translate an escape sequence + * into a meta symbol and have the regex compiler use the meta + * symbol meaning, e.g. \x{2E} would be confused with a dot. But + * in spite of this, we do have to process \N here while the proper + * charnames handler is in scope. See bugs #56444 and #62056. * - * There is a complication because \N in a pattern may also stand - * for 'match a non-nl', and not mean a charname, in which case its - * processing should be deferred to the regex compiler. To be a - * charname it must be followed immediately by a '{', and not look - * like \N followed by a curly quantifier, i.e., not something like - * \N{3,}. regcurly returns a boolean indicating if it is a legal - * quantifier */ - else if (PL_lex_inpat - && (*s != 'N' - || s[1] != '{' - || regcurly(s + 1))) - { - *d++ = '\\'; - goto default_action; - } - - switch (*s) { - default: - { - if ((isALPHANUMERIC(*s))) - Perl_ck_warner(aTHX_ packWARN(WARN_MISC), - "Unrecognized escape \\%c passed through", - *s); - /* default action is to copy the quoted character */ - goto default_action; - } - - /* eg. \132 indicates the octal constant 0132 */ - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - { - I32 flags = PERL_SCAN_SILENT_ILLDIGIT; + * There is a complication because \N in a pattern may also stand + * for 'match a non-nl', and not mean a charname, in which case its + * processing should be deferred to the regex compiler. To be a + * charname it must be followed immediately by a '{', and not look + * like \N followed by a curly quantifier, i.e., not something like + * \N{3,}. regcurly returns a boolean indicating if it is a legal + * quantifier */ + else if (PL_lex_inpat + && (*s != 'N' + || s[1] != '{' + || regcurly(s + 1, send, NULL))) + { + *d++ = '\\'; + goto default_action; + } + + switch (*s) { + default: + { + if ((isALPHANUMERIC(*s))) + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "Unrecognized escape \\%c passed through", + *s); + /* default action is to copy the quoted character */ + goto default_action; + } + + /* eg. \132 indicates the octal constant 0132 */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + { + I32 flags = PERL_SCAN_SILENT_ILLDIGIT + | PERL_SCAN_NOTIFY_ILLDIGIT; STRLEN len = 3; - uv = grok_oct(s, &len, &flags, NULL); - s += len; - if (len < 3 && s < send && isDIGIT(*s) + uv = grok_oct(s, &len, &flags, NULL); + s += len; + if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) + && s < send + && isDIGIT(*s) /* like \08, \178 */ && ckWARN(WARN_MISC)) { - Perl_warner(aTHX_ packWARN(WARN_MISC), - "%s", form_short_octal_warning(s, len)); + Perl_warner(aTHX_ packWARN(WARN_MISC), "%s", + form_alien_digit_msg(8, len, s, send, UTF, FALSE)); } - } - goto NUM_ESCAPE_INSERT; + } + goto NUM_ESCAPE_INSERT; - /* eg. \o{24} indicates the octal constant \024 */ - case 'o': - { - const char* error; + /* eg. \o{24} indicates the octal constant \024 */ + case 'o': + { + const char* error; - bool valid = grok_bslash_o(&s, send, + if (! grok_bslash_o(&s, send, &uv, &error, - TRUE, /* Output warning */ + NULL, FALSE, /* Not strict */ - UTF); - if (! valid) { - yyerror(error); - uv = 0; /* drop through to ensure range ends are set */ - } - goto NUM_ESCAPE_INSERT; - } - - /* eg. \x24 indicates the hex constant 0x24 */ - case 'x': - { - const char* error; - - bool valid = grok_bslash_x(&s, send, + FALSE, /* No illegal cp's */ + UTF)) + { + yyerror(error); + uv = 0; /* drop through to ensure range ends are set */ + } + goto NUM_ESCAPE_INSERT; + } + + /* eg. \x24 indicates the hex constant 0x24 */ + case 'x': + { + const char* error; + + if (! grok_bslash_x(&s, send, &uv, &error, - TRUE, /* Output warning */ + NULL, FALSE, /* Not strict */ - UTF); - if (! valid) { - yyerror(error); - uv = 0; /* drop through to ensure range ends are set */ - } - } - - NUM_ESCAPE_INSERT: - /* Insert oct or hex escaped character. */ - - /* Here uv is the ordinal of the next character being added */ - if (UVCHR_IS_INVARIANT(uv)) { - *d++ = (char) uv; - } - else { - if (!d_is_utf8 && uv > 255) { + FALSE, /* No illegal cp's */ + UTF)) + { + yyerror(error); + uv = 0; /* drop through to ensure range ends are set */ + } + } + + NUM_ESCAPE_INSERT: + /* Insert oct or hex escaped character. */ + + /* Here uv is the ordinal of the next character being added */ + if (UVCHR_IS_INVARIANT(uv)) { + *d++ = (char) uv; + } + else { + if (!d_is_utf8 && uv > 255) { /* Here, 'uv' won't fit unless we convert to UTF-8. * If we've only seen invariants so far, all we have to @@ -3620,10 +3822,10 @@ S_scan_const(pTHX_ char *start) } if (! d_is_utf8) { - *d++ = (char)uv; + *d++ = (char)uv; utf8_variant_count++; } - else { + else { /* Usually, there will already be enough room in 'sv' * since such escapes are likely longer than any UTF-8 * sequence they can end up as. This isn't the case on @@ -3640,18 +3842,18 @@ S_scan_const(pTHX_ char *start) d = SvCUR(sv) + SvGROW(sv, needed); } - d = (char*) uvchr_to_utf8_flags((U8*)d, uv, + d = (char*) uvchr_to_utf8_flags((U8*)d, uv, (ckWARN(WARN_PORTABLE)) ? UNICODE_WARN_PERL_EXTENDED : 0); - } - } -#ifdef EBCDIC + } + } +#ifdef EBCDIC non_portable_endpoint++; #endif - continue; + continue; - case 'N': + case 'N': /* In a non-pattern \N must be like \N{U+0041}, or it can be a * named character, like \N{LATIN SMALL LETTER A}, or a named * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND @@ -3674,8 +3876,8 @@ S_scan_const(pTHX_ char *start) * right now, while preserving the fact that it was a named * character, so that the regex compiler knows this. * - * The structure of this section of code (besides checking for - * errors and upgrading to utf8) is: + * The structure of this section of code (besides checking for + * errors and upgrading to utf8) is: * If the named character is of the form \N{U+...}, pass it * through if a pattern; otherwise convert the code point * to utf8 @@ -3686,44 +3888,50 @@ S_scan_const(pTHX_ char *start) * only done if the code point requires it to be representable. * * Here, 's' points to the 'N'; the test below is guaranteed to - * succeed if we are being called on a pattern, as we already + * succeed if we are being called on a pattern, as we already * know from a test above that the next character is a '{'. A * non-pattern \N must mean 'named character', which requires * braces */ - s++; - if (*s != '{') { - yyerror("Missing braces on \\N{}"); + s++; + if (*s != '{') { + yyerror("Missing braces on \\N{}"); *d++ = '\0'; - continue; - } - s++; - - /* If there is no matching '}', it is an error. */ - if (! (e = (char *) memchr(s, '}', send - s))) { - if (! PL_lex_inpat) { - yyerror("Missing right brace on \\N{}"); - } else { - yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); - } + continue; + } + s++; + + /* If there is no matching '}', it is an error. */ + if (! (rbrace = (char *) memchr(s, '}', send - s))) { + if (! PL_lex_inpat) { + yyerror("Missing right brace on \\N{}"); + } else { + yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); + } yyquit(); /* Have exhausted the input. */ - } + } + + /* Here it looks like a named character */ + while (s < rbrace && isBLANK(*s)) { + s++; + } - /* Here it looks like a named character */ + e = rbrace; + while (s < e && isBLANK(*(e - 1))) { + e--; + } - if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ - s += 2; /* Skip to next char after the 'U+' */ - if (PL_lex_inpat) { + if (*s == 'U' && s[1] == '+') { /* \N{U+...} */ + s += 2; /* Skip to next char after the 'U+' */ + if (PL_lex_inpat) { /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */ /* Check the syntax. */ - const char *orig_s; - orig_s = s - 5; if (!isXDIGIT(*s)) { bad_NU: yyerror( "Invalid hexadecimal number in \\N{U+...}" ); - s = e + 1; + s = rbrace + 1; *d++ = '\0'; continue; } @@ -3737,19 +3945,29 @@ S_scan_const(pTHX_ char *start) } /* Pass everything through unchanged. - * +1 is for the '}' */ - Copy(orig_s, d, e - orig_s + 1, char); - d += e - orig_s + 1; - } - else { /* Not a pattern: convert the hex to string */ + * +1 is to include the '}' */ + Copy(bslash, d, rbrace - bslash + 1, char); + d += rbrace - bslash + 1; + } + else { /* Not a pattern: convert the hex to string */ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES - | PERL_SCAN_SILENT_ILLDIGIT - | PERL_SCAN_DISALLOW_PREFIX; + | PERL_SCAN_SILENT_ILLDIGIT + | PERL_SCAN_SILENT_OVERFLOW + | PERL_SCAN_DISALLOW_PREFIX; STRLEN len = e - s; + uv = grok_hex(s, &len, &flags, NULL); if (len == 0 || (len != (STRLEN)(e - s))) goto bad_NU; + if ( uv > MAX_LEGAL_CP + || (flags & PERL_SCAN_GREATER_THAN_UV_MAX)) + { + yyerror(form_cp_too_large_msg(16, s, len, 0)); + uv = 0; /* drop through to ensure range ends are + set */ + } + /* For non-tr///, if the destination is not in utf8, * unconditionally recode it to be so. This is * because \N{} implies Unicode semantics, and scalars @@ -3757,15 +3975,15 @@ S_scan_const(pTHX_ char *start) * tr/// doesn't care about Unicode rules, so no need * there to upgrade to UTF-8 for small enough code * points */ - if (! d_is_utf8 && ( uv > 0xFF + if (! d_is_utf8 && ( uv > 0xFF || PL_lex_inwhat != OP_TRANS)) { - /* See Note on sizing above. */ - const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1; + /* See Note on sizing above. */ + const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1; - SvCUR_set(sv, d - SvPVX_const(sv)); - SvPOK_on(sv); - *d = '\0'; + SvCUR_set(sv, d - SvPVX_const(sv)); + SvPOK_on(sv); + *d = '\0'; if (utf8_variant_count == 0) { SvUTF8_on(sv); @@ -3779,23 +3997,23 @@ S_scan_const(pTHX_ char *start) d = SvPVX(sv) + SvCUR(sv); } - d_is_utf8 = TRUE; + d_is_utf8 = TRUE; has_above_latin1 = TRUE; - } + } /* Add the (Unicode) code point to the output. */ - if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) { - *d++ = (char) LATIN1_TO_NATIVE(uv); - } - else { + if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) { + *d++ = (char) LATIN1_TO_NATIVE(uv); + } + else { d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, (ckWARN(WARN_PORTABLE)) ? UNICODE_WARN_PERL_EXTENDED : 0); } - } - } - else /* Here is \N{NAME} but not \N{U+...}. */ + } + } + else /* Here is \N{NAME} but not \N{U+...}. */ if (! (res = get_and_check_backslash_N_name_wrapper(s, e))) { /* Failed. We should die eventually, but for now use a NUL to keep parsing */ @@ -3806,20 +4024,20 @@ S_scan_const(pTHX_ char *start) const char *str = SvPV_const(res, len); if (PL_lex_inpat) { - if (! len) { /* The name resolved to an empty string */ + if (! len) { /* The name resolved to an empty string */ const char empty_N[] = "\\N{_}"; Copy(empty_N, d, sizeof(empty_N) - 1, char); d += sizeof(empty_N) - 1; - } - else { - /* In order to not lose information for the regex - * compiler, pass the result in the specially made - * syntax: \N{U+c1.c2.c3...}, where c1 etc. are - * the code points in hex of each character - * returned by charnames */ + } + else { + /* In order to not lose information for the regex + * compiler, pass the result in the specially made + * syntax: \N{U+c1.c2.c3...}, where c1 etc. are + * the code points in hex of each character + * returned by charnames */ - const char *str_end = str + len; - const STRLEN off = d - SvPVX_const(sv); + const char *str_end = str + len; + const STRLEN off = d - SvPVX_const(sv); if (! SvUTF8(res)) { /* For the non-UTF-8 case, we can determine the @@ -3836,7 +4054,7 @@ S_scan_const(pTHX_ char *start) /* +1 for trailing NUL */ + initial_len + 1 - + (STRLEN)(send - e)); + + (STRLEN)(send - rbrace)); Copy(initial_text, d, initial_len, char); d += initial_len; while (str < str_end) { @@ -3885,7 +4103,7 @@ S_scan_const(pTHX_ char *start) /* Make sure there is enough space to hold it */ d = off + SvGROW(sv, off + output_length - + (STRLEN)(send - e) + + (STRLEN)(send - rbrace) + 2); /* '}' + NUL */ /* And output it */ Copy(hex_string, d, output_length, char); @@ -3907,18 +4125,18 @@ S_scan_const(pTHX_ char *start) d = off + SvGROW(sv, off + output_length - + (STRLEN)(send - e) + + (STRLEN)(send - rbrace) + 2); /* '}' + NUL */ Copy(hex_string, d, output_length, char); d += output_length; } - } + } - *d++ = '}'; /* Done. Add the trailing brace */ - } - } - else { /* Here, not in a pattern. Convert the name to a - * string. */ + *d++ = '}'; /* Done. Add the trailing brace */ + } + } + else { /* Here, not in a pattern. Convert the name to a + * string. */ if (PL_lex_inwhat == OP_TRANS) { str = SvPV_const(res, len); @@ -3930,7 +4148,7 @@ S_scan_const(pTHX_ char *start) "%.*s must not be a named sequence" " in transliteration operator", /* +1 to include the "}" */ - (int) (e + 1 - start), start)); + (int) (rbrace + 1 - start), start)); *d++ = '\0'; goto end_backslash_N; } @@ -3951,13 +4169,13 @@ S_scan_const(pTHX_ char *start) /* Upgrade destination to be utf8 if this new * component is */ - if (! d_is_utf8 && SvUTF8(res)) { - /* See Note on sizing above. */ + if (! d_is_utf8 && SvUTF8(res)) { + /* See Note on sizing above. */ const STRLEN extra = len + (send - s) + 1; - SvCUR_set(sv, d - SvPVX_const(sv)); - SvPOK_on(sv); - *d = '\0'; + SvCUR_set(sv, d - SvPVX_const(sv)); + SvPOK_on(sv); + *d = '\0'; if (utf8_variant_count == 0) { SvUTF8_on(sv); @@ -3965,83 +4183,83 @@ S_scan_const(pTHX_ char *start) } else { sv_utf8_upgrade_flags_grow(sv, - SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, - extra); + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, + extra); d = SvPVX(sv) + SvCUR(sv); } - d_is_utf8 = TRUE; - } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */ + d_is_utf8 = TRUE; + } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */ - /* See Note on sizing above. (NOTE: SvCUR() is not - * set correctly here). */ - const STRLEN extra = len + (send - e) + 1; - const STRLEN off = d - SvPVX_const(sv); - d = off + SvGROW(sv, off + extra); - } - Copy(str, d, len, char); - d += len; - } + /* See Note on sizing above. (NOTE: SvCUR() is not + * set correctly here). */ + const STRLEN extra = len + (send - rbrace) + 1; + const STRLEN off = d - SvPVX_const(sv); + d = off + SvGROW(sv, off + extra); + } + Copy(str, d, len, char); + d += len; + } - SvREFCNT_dec(res); + SvREFCNT_dec(res); - } /* End \N{NAME} */ + } /* End \N{NAME} */ end_backslash_N: #ifdef EBCDIC backslash_N++; /* \N{} is defined to be Unicode */ #endif - s = e + 1; /* Point to just after the '}' */ - continue; + s = rbrace + 1; /* Point to just after the '}' */ + continue; - /* \c is a control character */ - case 'c': - s++; - if (s < send) { + /* \c is a control character */ + case 'c': + s++; + if (s < send) { const char * message; - if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) { + if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) { yyerror(message); yyquit(); /* Have always immediately croaked on errors in this */ } - d++; - } - else { - yyerror("Missing control char name in \\c"); - yyquit(); /* Are at end of input, no sense continuing */ - } + d++; + } + else { + yyerror("Missing control char name in \\c"); + yyquit(); /* Are at end of input, no sense continuing */ + } #ifdef EBCDIC non_portable_endpoint++; #endif break; - /* printf-style backslashes, formfeeds, newlines, etc */ - case 'b': - *d++ = '\b'; - break; - case 'n': - *d++ = '\n'; - break; - case 'r': - *d++ = '\r'; - break; - case 'f': - *d++ = '\f'; - break; - case 't': - *d++ = '\t'; - break; - case 'e': - *d++ = ESC_NATIVE; - break; - case 'a': - *d++ = '\a'; - break; - } /* end switch */ - - s++; - continue; - } /* end if (backslash) */ + /* printf-style backslashes, formfeeds, newlines, etc */ + case 'b': + *d++ = '\b'; + break; + case 'n': + *d++ = '\n'; + break; + case 'r': + *d++ = '\r'; + break; + case 'f': + *d++ = '\f'; + break; + case 't': + *d++ = '\t'; + break; + case 'e': + *d++ = ESC_NATIVE; + break; + case 'a': + *d++ = '\a'; + break; + } /* end switch */ + + s++; + continue; + } /* end if (backslash) */ default_action: /* Just copy the input to the output, though we may have to convert @@ -4050,17 +4268,17 @@ S_scan_const(pTHX_ char *start) * If the input has the same representation in UTF-8 as not, it will be * a single byte, and we don't care about UTF8ness; just copy the byte */ if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) { - *d++ = *s++; + *d++ = *s++; } else if (! s_is_utf8 && ! d_is_utf8) { /* If neither source nor output is UTF-8, is also a single byte, * just copy it; but this byte counts should we later have to * convert to UTF-8 */ - *d++ = *s++; + *d++ = *s++; utf8_variant_count++; } else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */ - const STRLEN len = UTF8SKIP(s); + const STRLEN len = UTF8SKIP(s); /* We expect the source to have already been checked for * malformedness */ @@ -4097,12 +4315,12 @@ S_scan_const(pTHX_ char *start) const STRLEN off = d - SvPVX(sv); const STRLEN extra = 2 + (send - s - 1) + 1; if (off + extra > SvLEN(sv)) { - d = off + SvGROW(sv, off + extra); - } + d = off + SvGROW(sv, off + extra); + } *d++ = UTF8_EIGHT_BIT_HI(*s); *d++ = UTF8_EIGHT_BIT_LO(*s); s++; - } + } } /* while loop to process each character */ { @@ -4133,47 +4351,47 @@ S_scan_const(pTHX_ char *start) SvPOK_on(sv); if (d_is_utf8) { - SvUTF8_on(sv); + SvUTF8_on(sv); } /* shrink the sv if we allocated more than we used */ if (SvCUR(sv) + 5 < SvLEN(sv)) { - SvPV_shrink_to_cur(sv); + SvPV_shrink_to_cur(sv); } /* return the substring (via pl_yylval) only if we parsed anything */ if (s > start) { - char *s2 = start; - for (; s2 < s; s2++) { - if (*s2 == '\n') - COPLINE_INC_WITH_HERELINES; - } - SvREFCNT_inc_simple_void_NN(sv); - if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) + char *s2 = start; + for (; s2 < s; s2++) { + if (*s2 == '\n') + COPLINE_INC_WITH_HERELINES; + } + SvREFCNT_inc_simple_void_NN(sv); + if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) && ! PL_parser->lex_re_reparsing) { - const char *const key = PL_lex_inpat ? "qr" : "q"; - const STRLEN keylen = PL_lex_inpat ? 2 : 1; - const char *type; - STRLEN typelen; - - if (PL_lex_inwhat == OP_TRANS) { - type = "tr"; - typelen = 2; - } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { - type = "s"; - typelen = 1; - } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { - type = "q"; - typelen = 1; - } else { - type = "qq"; - typelen = 2; - } - - sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, - type, typelen, NULL); - } + const char *const key = PL_lex_inpat ? "qr" : "q"; + const STRLEN keylen = PL_lex_inpat ? 2 : 1; + const char *type; + STRLEN typelen; + + if (PL_lex_inwhat == OP_TRANS) { + type = "tr"; + typelen = 2; + } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { + type = "s"; + typelen = 1; + } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { + type = "q"; + typelen = 1; + } else { + type = "qq"; + typelen = 2; + } + + sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL, + type, typelen, NULL); + } pl_yylval.opval = newSVOP(OP_CONST, 0, sv); } LEAVE_with_name("scan_const"); @@ -4208,133 +4426,133 @@ S_intuit_more(pTHX_ char *s, char *e) PERL_ARGS_ASSERT_INTUIT_MORE; if (PL_lex_brackets) - return TRUE; + return TRUE; if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{')) - return TRUE; + return TRUE; if (*s == '-' && s[1] == '>' && FEATURE_POSTDEREF_QQ_IS_ENABLED && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*'))) - ||(s[2] == '@' && memCHRs("*[{",s[3])) )) - return TRUE; + ||(s[2] == '@' && memCHRs("*[{",s[3])) )) + return TRUE; if (*s != '{' && *s != '[') - return FALSE; + return FALSE; PL_parser->sub_no_recover = TRUE; if (!PL_lex_inpat) - return TRUE; + return TRUE; /* In a pattern, so maybe we have {n,m}. */ if (*s == '{') { - if (regcurly(s)) { - return FALSE; - } - return TRUE; + if (regcurly(s, e, NULL)) { + return FALSE; + } + return TRUE; } /* On the other hand, maybe we have a character class */ s++; if (*s == ']' || *s == '^') - return FALSE; + return FALSE; else { /* this is terrifying, and it works */ - int weight; - char seen[256]; - const char * const send = (char *) memchr(s, ']', e - s); - unsigned char un_char, last_un_char; - char tmpbuf[sizeof PL_tokenbuf * 4]; - - if (!send) /* has to be an expression */ - return TRUE; - weight = 2; /* let's weigh the evidence */ - - if (*s == '$') - weight -= 3; - else if (isDIGIT(*s)) { - if (s[1] != ']') { - if (isDIGIT(s[1]) && s[2] == ']') - weight -= 10; - } - else - weight -= 100; - } - Zero(seen,256,char); - un_char = 255; - for (; s < send; s++) { - last_un_char = un_char; - un_char = (unsigned char)*s; - switch (*s) { - case '@': - case '&': - case '$': - weight -= seen[un_char] * 10; - if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { - int len; - scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); - len = (int)strlen(tmpbuf); - if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, + int weight; + char seen[256]; + const char * const send = (char *) memchr(s, ']', e - s); + unsigned char un_char, last_un_char; + char tmpbuf[sizeof PL_tokenbuf * 4]; + + if (!send) /* has to be an expression */ + return TRUE; + weight = 2; /* let's weigh the evidence */ + + if (*s == '$') + weight -= 3; + else if (isDIGIT(*s)) { + if (s[1] != ']') { + if (isDIGIT(s[1]) && s[2] == ']') + weight -= 10; + } + else + weight -= 100; + } + Zero(seen,256,char); + un_char = 255; + for (; s < send; s++) { + last_un_char = un_char; + un_char = (unsigned char)*s; + switch (*s) { + case '@': + case '&': + case '$': + weight -= seen[un_char] * 10; + if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { + int len; + scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); + len = (int)strlen(tmpbuf); + if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0, SVt_PV)) - weight -= 100; - else - weight -= 10; - } - else if (*s == '$' + weight -= 100; + else + weight -= 10; + } + else if (*s == '$' && s[1] && memCHRs("[#!%*<>()-=",s[1])) { - if (/*{*/ memCHRs("])} =",s[2])) - weight -= 10; - else - weight -= 1; - } - break; - case '\\': - un_char = 254; - if (s[1]) { - if (memCHRs("wds]",s[1])) - weight += 100; - else if (seen[(U8)'\''] || seen[(U8)'"']) - weight += 1; - else if (memCHRs("rnftbxcav",s[1])) - weight += 40; - else if (isDIGIT(s[1])) { - weight += 40; - while (s[1] && isDIGIT(s[1])) - s++; - } - } - else - weight += 100; - break; - case '-': - if (s[1] == '\\') - weight += 50; - if (memCHRs("aA01! ",last_un_char)) - weight += 30; - if (memCHRs("zZ79~",s[1])) - weight += 30; - if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) - weight -= 5; /* cope with negative subscript */ - break; - default: - if (!isWORDCHAR(last_un_char) - && !(last_un_char == '$' || last_un_char == '@' - || last_un_char == '&') - && isALPHA(*s) && s[1] && isALPHA(s[1])) { - char *d = s; - while (isALPHA(*s)) - s++; - if (keyword(d, s - d, 0)) - weight -= 150; - } - if (un_char == last_un_char + 1) - weight += 5; - weight -= seen[un_char]; - break; - } - seen[un_char]++; - } - if (weight >= 0) /* probably a character class */ - return FALSE; + if (/*{*/ memCHRs("])} =",s[2])) + weight -= 10; + else + weight -= 1; + } + break; + case '\\': + un_char = 254; + if (s[1]) { + if (memCHRs("wds]",s[1])) + weight += 100; + else if (seen[(U8)'\''] || seen[(U8)'"']) + weight += 1; + else if (memCHRs("rnftbxcav",s[1])) + weight += 40; + else if (isDIGIT(s[1])) { + weight += 40; + while (s[1] && isDIGIT(s[1])) + s++; + } + } + else + weight += 100; + break; + case '-': + if (s[1] == '\\') + weight += 50; + if (memCHRs("aA01! ",last_un_char)) + weight += 30; + if (memCHRs("zZ79~",s[1])) + weight += 30; + if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$')) + weight -= 5; /* cope with negative subscript */ + break; + default: + if (!isWORDCHAR(last_un_char) + && !(last_un_char == '$' || last_un_char == '@' + || last_un_char == '&') + && isALPHA(*s) && s[1] && isALPHA(s[1])) { + char *d = s; + while (isALPHA(*s)) + s++; + if (keyword(d, s - d, 0)) + weight -= 150; + } + if (un_char == last_un_char + 1) + weight += 5; + weight -= seen[un_char]; + break; + } + seen[un_char]++; + } + if (weight >= 0) /* probably a character class */ + return FALSE; } return TRUE; @@ -4368,38 +4586,41 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) char tmpbuf[sizeof PL_tokenbuf]; STRLEN len; GV* indirgv; - /* Mustn't actually add anything to a symbol table. - But also don't want to "initialise" any placeholder - constants that might already be there into full - blown PVGVs with attached PVCV. */ + /* Mustn't actually add anything to a symbol table. + But also don't want to "initialise" any placeholder + constants that might already be there into full + blown PVGVs with attached PVCV. */ GV * const gv = - ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; + ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL; PERL_ARGS_ASSERT_INTUIT_METHOD; + if (!FEATURE_INDIRECT_IS_ENABLED) + return 0; + if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) - return 0; + return 0; if (cv && SvPOK(cv)) { - const char *proto = CvPROTO(cv); - if (proto) { - while (*proto && (isSPACE(*proto) || *proto == ';')) - proto++; - if (*proto == '*') - return 0; - } + const char *proto = CvPROTO(cv); + if (proto) { + while (*proto && (isSPACE(*proto) || *proto == ';')) + proto++; + if (*proto == '*') + return 0; + } } if (*start == '$') { SSize_t start_off = start - SvPVX(PL_linestr); - if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY + if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || isUPPER(*PL_tokenbuf)) - return 0; + return 0; /* this could be $# */ if (isSPACE(*s)) s = skipspace(s); - PL_bufptr = SvPVX(PL_linestr) + start_off; - PL_expect = XREF; - return *s == '(' ? FUNCMETH : METHOD; + PL_bufptr = SvPVX(PL_linestr) + start_off; + PL_expect = XREF; + return *s == '(' ? FUNCMETH : METHOD; } s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); @@ -4409,31 +4630,31 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) */ if (!keyword(tmpbuf, len, 0)) { - if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { - len -= 2; - tmpbuf[len] = '\0'; - goto bare_package; - } - indirgv = gv_fetchpvn_flags(tmpbuf, len, - GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ), - SVt_PVCV); - if (indirgv && SvTYPE(indirgv) != SVt_NULL - && (!isGV(indirgv) || GvCVu(indirgv))) - return 0; - /* filehandle or package name makes it a method */ - if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { - s = skipspace(s); - if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') - return 0; /* no assumptions -- "=>" quotes bareword */ + if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { + len -= 2; + tmpbuf[len] = '\0'; + goto bare_package; + } + indirgv = gv_fetchpvn_flags(tmpbuf, len, + GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ), + SVt_PVCV); + if (indirgv && SvTYPE(indirgv) != SVt_NULL + && (!isGV(indirgv) || GvCVu(indirgv))) + return 0; + /* filehandle or package name makes it a method */ + if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) { + s = skipspace(s); + if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>') + return 0; /* no assumptions -- "=>" quotes bareword */ bare_package: NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, - S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); - NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; - PL_expect = XTERM; - force_next(BAREWORD); - PL_bufptr = s; - return *s == '(' ? FUNCMETH : METHOD; - } + S_newSV_maybe_utf8(aTHX_ tmpbuf, len)); + NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE; + PL_expect = XTERM; + force_next(BAREWORD); + PL_bufptr = s; + return *s == '(' ? FUNCMETH : METHOD; + } } return 0; } @@ -4459,70 +4680,77 @@ SV * Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) { if (!funcp) - return NULL; + return NULL; if (!PL_parser) - return NULL; + return NULL; if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) - Perl_croak(aTHX_ "Source filters apply only to byte streams"); + Perl_croak(aTHX_ "Source filters apply only to byte streams"); if (!PL_rsfp_filters) - PL_rsfp_filters = newAV(); + PL_rsfp_filters = newAV(); if (!datasv) - datasv = newSV(0); + datasv = newSV(0); SvUPGRADE(datasv, SVt_PVIO); IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */ IoFLAGS(datasv) |= IOf_FAKE_DIRP; DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n", - FPTR2DPTR(void *, IoANY(datasv)), - SvPV_nolen(datasv))); + FPTR2DPTR(void *, IoANY(datasv)), + SvPV_nolen(datasv))); av_unshift(PL_rsfp_filters, 1); av_store(PL_rsfp_filters, 0, datasv) ; if ( - !PL_parser->filtered + !PL_parser->filtered && PL_parser->lex_flags & LEX_EVALBYTES && PL_bufptr < PL_bufend ) { - const char *s = PL_bufptr; - while (s < PL_bufend) { - if (*s == '\n') { - SV *linestr = PL_parser->linestr; - char *buf = SvPVX(linestr); - STRLEN const bufptr_pos = PL_parser->bufptr - buf; - STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; - STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; - STRLEN const linestart_pos = PL_parser->linestart - buf; - STRLEN const last_uni_pos = - PL_parser->last_uni ? PL_parser->last_uni - buf : 0; - STRLEN const last_lop_pos = - PL_parser->last_lop ? PL_parser->last_lop - buf : 0; - av_push(PL_rsfp_filters, linestr); - PL_parser->linestr = - newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); - buf = SvPVX(PL_parser->linestr); - PL_parser->bufend = buf + SvCUR(PL_parser->linestr); - PL_parser->bufptr = buf + bufptr_pos; - PL_parser->oldbufptr = buf + oldbufptr_pos; - PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; - PL_parser->linestart = buf + linestart_pos; - if (PL_parser->last_uni) - PL_parser->last_uni = buf + last_uni_pos; - if (PL_parser->last_lop) - PL_parser->last_lop = buf + last_lop_pos; - SvLEN_set(linestr, SvCUR(linestr)); - SvCUR_set(linestr, s - SvPVX(linestr)); - PL_parser->filtered = 1; - break; - } - s++; - } + const char *s = PL_bufptr; + while (s < PL_bufend) { + if (*s == '\n') { + SV *linestr = PL_parser->linestr; + char *buf = SvPVX(linestr); + STRLEN const bufptr_pos = PL_parser->bufptr - buf; + STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf; + STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf; + STRLEN const linestart_pos = PL_parser->linestart - buf; + STRLEN const last_uni_pos = + PL_parser->last_uni ? PL_parser->last_uni - buf : 0; + STRLEN const last_lop_pos = + PL_parser->last_lop ? PL_parser->last_lop - buf : 0; + av_push(PL_rsfp_filters, linestr); + PL_parser->linestr = + newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr)); + buf = SvPVX(PL_parser->linestr); + PL_parser->bufend = buf + SvCUR(PL_parser->linestr); + PL_parser->bufptr = buf + bufptr_pos; + PL_parser->oldbufptr = buf + oldbufptr_pos; + PL_parser->oldoldbufptr = buf + oldoldbufptr_pos; + PL_parser->linestart = buf + linestart_pos; + if (PL_parser->last_uni) + PL_parser->last_uni = buf + last_uni_pos; + if (PL_parser->last_lop) + PL_parser->last_lop = buf + last_lop_pos; + SvLEN_set(linestr, SvCUR(linestr)); + SvCUR_set(linestr, s - SvPVX(linestr)); + PL_parser->filtered = 1; + break; + } + s++; + } } return(datasv); } +/* +=for apidoc_section $filters +=for apidoc filter_del + +Delete most recently added instance of the filter function argument + +=cut +*/ -/* Delete most recently added instance of this filter function. */ void Perl_filter_del(pTHX_ filter_t funcp) { @@ -4532,14 +4760,14 @@ Perl_filter_del(pTHX_ filter_t funcp) #ifdef DEBUGGING DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", - FPTR2DPTR(void*, funcp))); + FPTR2DPTR(void*, funcp))); #endif if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0) - return; + return; /* if filter is on top of stack (usual case) just pop it off */ datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters)); if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) { - sv_free(av_pop(PL_rsfp_filters)); + sv_free(av_pop(PL_rsfp_filters)); return; } @@ -4564,76 +4792,76 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) PERL_ARGS_ASSERT_FILTER_READ; if (!PL_parser || !PL_rsfp_filters) - return -1; + return -1; if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */ - /* Provide a default input filter to make life easy. */ - /* Note that we append to the line. This is handy. */ - DEBUG_P(PerlIO_printf(Perl_debug_log, - "filter_read %d: from rsfp\n", idx)); - if (correct_length) { - /* Want a block */ - int len ; - const int old_len = SvCUR(buf_sv); - - /* ensure buf_sv is large enough */ - SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; - if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, - correct_length)) <= 0) { - if (PerlIO_error(PL_rsfp)) - return -1; /* error */ - else - return 0 ; /* end of file */ - } - SvCUR_set(buf_sv, old_len + len) ; - SvPVX(buf_sv)[old_len + len] = '\0'; - } else { - /* Want a line */ + /* Provide a default input filter to make life easy. */ + /* Note that we append to the line. This is handy. */ + DEBUG_P(PerlIO_printf(Perl_debug_log, + "filter_read %d: from rsfp\n", idx)); + if (correct_length) { + /* Want a block */ + int len ; + const int old_len = SvCUR(buf_sv); + + /* ensure buf_sv is large enough */ + SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ; + if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, + correct_length)) <= 0) { + if (PerlIO_error(PL_rsfp)) + return -1; /* error */ + else + return 0 ; /* end of file */ + } + SvCUR_set(buf_sv, old_len + len) ; + SvPVX(buf_sv)[old_len + len] = '\0'; + } else { + /* Want a line */ if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) { - if (PerlIO_error(PL_rsfp)) - return -1; /* error */ - else - return 0 ; /* end of file */ - } - } - return SvCUR(buf_sv); + if (PerlIO_error(PL_rsfp)) + return -1; /* error */ + else + return 0 ; /* end of file */ + } + } + return SvCUR(buf_sv); } /* Skip this filter slot if filter has been deleted */ if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) { - DEBUG_P(PerlIO_printf(Perl_debug_log, - "filter_read %d: skipped (filter deleted)\n", - idx)); - return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ + DEBUG_P(PerlIO_printf(Perl_debug_log, + "filter_read %d: skipped (filter deleted)\n", + idx)); + return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */ } if (SvTYPE(datasv) != SVt_PVIO) { - if (correct_length) { - /* Want a block */ - const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); - if (!remainder) return 0; /* eof */ - if (correct_length > remainder) correct_length = remainder; - sv_catpvn(buf_sv, SvEND(datasv), correct_length); - SvCUR_set(datasv, SvCUR(datasv) + correct_length); - } else { - /* Want a line */ - const char *s = SvEND(datasv); - const char *send = SvPVX(datasv) + SvLEN(datasv); - while (s < send) { - if (*s == '\n') { - s++; - break; - } - s++; - } - if (s == send) return 0; /* eof */ - sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); - SvCUR_set(datasv, s-SvPVX(datasv)); - } - return SvCUR(buf_sv); + if (correct_length) { + /* Want a block */ + const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv); + if (!remainder) return 0; /* eof */ + if (correct_length > remainder) correct_length = remainder; + sv_catpvn(buf_sv, SvEND(datasv), correct_length); + SvCUR_set(datasv, SvCUR(datasv) + correct_length); + } else { + /* Want a line */ + const char *s = SvEND(datasv); + const char *send = SvPVX(datasv) + SvLEN(datasv); + while (s < send) { + if (*s == '\n') { + s++; + break; + } + s++; + } + if (s == send) return 0; /* eof */ + sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv)); + SvCUR_set(datasv, s-SvPVX(datasv)); + } + return SvCUR(buf_sv); } /* Get function pointer hidden within datasv */ funcp = DPTR2FPTR(filter_t, IoANY(datasv)); DEBUG_P(PerlIO_printf(Perl_debug_log, - "filter_read %d: via function %p (%s)\n", - idx, (void*)datasv, SvPV_nolen_const(datasv))); + "filter_read %d: via function %p (%s)\n", + idx, (void*)datasv, SvPV_nolen_const(datasv))); /* Call function. The function is expected to */ /* call "FILTER_READ(idx+1, buf_sv)" first. */ /* Return: <0:error, =0:eof, >0:not eof */ @@ -4651,16 +4879,16 @@ S_filter_gets(pTHX_ SV *sv, STRLEN append) #ifdef PERL_CR_FILTER if (!PL_rsfp_filters) { - filter_add(S_cr_textfilter,NULL); + filter_add(S_cr_textfilter,NULL); } #endif if (PL_rsfp_filters) { - if (!append) + if (!append) SvCUR_set(sv, 0); /* start with empty line */ if (FILTER_READ(0, sv, 0) > 0) return ( SvPVX(sv) ) ; else - return NULL ; + return NULL ; } else return (sv_gets(sv, PL_rsfp, append)); @@ -4688,9 +4916,9 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) /* use constant CLASS => 'MyClass' */ gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV); if (gv && GvCV(gv)) { - SV * const sv = cv_const_sv(GvCV(gv)); - if (sv) - return gv_stashsv(sv, 0); + SV * const sv = cv_const_sv(GvCV(gv)); + if (sv) + return gv_stashsv(sv, 0); } return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0); @@ -4702,36 +4930,36 @@ S_tokenize_use(pTHX_ int is_use, char *s) { PERL_ARGS_ASSERT_TOKENIZE_USE; if (PL_expect != XSTATE) - /* diag_listed_as: "use" not allowed in expression */ - yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", - is_use ? "use" : "no")); + /* diag_listed_as: "use" not allowed in expression */ + yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", + is_use ? "use" : "no")); PL_expect = XTERM; s = skipspace(s); if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) { - s = force_version(s, TRUE); - if (*s == ';' || *s == '}' - || (s = skipspace(s), (*s == ';' || *s == '}'))) { - NEXTVAL_NEXTTOKE.opval = NULL; - force_next(BAREWORD); - } - else if (*s == 'v') { - s = force_word(s,BAREWORD,FALSE,TRUE); - s = force_version(s, FALSE); - } + s = force_version(s, TRUE); + if (*s == ';' || *s == '}' + || (s = skipspace(s), (*s == ';' || *s == '}'))) { + NEXTVAL_NEXTTOKE.opval = NULL; + force_next(BAREWORD); + } + else if (*s == 'v') { + s = force_word(s,BAREWORD,FALSE,TRUE); + s = force_version(s, FALSE); + } } else { - s = force_word(s,BAREWORD,FALSE,TRUE); - s = force_version(s, FALSE); + s = force_word(s,BAREWORD,FALSE,TRUE); + s = force_version(s, FALSE); } pl_yylval.ival = is_use; return s; } #ifdef DEBUGGING static const char* const exp_name[] = - { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", - "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", - "SIGVAR", "TERMORDORDOR" - }; + { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK", + "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF", + "SIGVAR", "TERMORDORDOR" + }; #endif #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l) @@ -4753,7 +4981,7 @@ S_check_scalar_slice(pTHX_ char *s) PL_bufend, UTF)) { - return; + return; } while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || (*s && memCHRs(" \t$#+-'\"", *s))) @@ -4761,7 +4989,7 @@ S_check_scalar_slice(pTHX_ char *s) s += UTF ? UTF8SKIP(s) : 1; } if (*s == '}' || *s == ']') - pl_yylval.ival = OPpSLICEWARNING; + pl_yylval.ival = OPpSLICEWARNING; } #define lex_token_boundary() S_lex_token_boundary(aTHX) @@ -4780,7 +5008,7 @@ S_vcs_conflict_marker(pTHX_ char *s) PL_bufptr = s; yyerror("Version control conflict marker"); while (s < PL_bufend && *s != '\n') - s++; + s++; return s; } @@ -4877,7 +5105,14 @@ yyl_sigvar(pTHX_ char *s) break; } - TOKEN(sigil); + switch (sigil) { + case ',': TOKEN (PERLY_COMMA); + case '$': TOKEN (PERLY_DOLLAR); + case '@': TOKEN (PERLY_SNAIL); + case '%': TOKEN (PERLY_PERCENT_SIGN); + case ')': TOKEN (PERLY_PAREN_CLOSE); + default: TOKEN (sigil); + } } static int @@ -4890,7 +5125,7 @@ yyl_dollar(pTHX_ char *s) s++; POSTDEREF(DOLSHARP); } - POSTDEREF('$'); + POSTDEREF(PERLY_DOLLAR); } if ( s[1] == '#' @@ -4928,7 +5163,7 @@ yyl_dollar(pTHX_ char *s) if (!PL_tokenbuf[1]) { if (s == PL_bufend) yyerror("Final $ should be \\$ or $name"); - PREREF('$'); + PREREF(PERLY_DOLLAR); } { @@ -4943,13 +5178,40 @@ yyl_dollar(pTHX_ char *s) if (ckWARN(WARN_SYNTAX)) { char *t = s+1; - while ( isSPACE(*t) - || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) - || *t == '$') - { - t += UTF ? UTF8SKIP(t) : 1; + while ( t < PL_bufend ) { + if (isSPACE(*t)) { + do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t)); + /* consumed one or more space chars */ + } else if (*t == '$' || *t == '@') { + /* could be more than one '$' like $$ref or @$ref */ + do { t++; } while (t < PL_bufend && *t == '$'); + + /* could be an abigail style identifier like $ foo */ + while (t < PL_bufend && *t == ' ') t++; + + /* strip off the name of the var */ + while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) + t += UTF ? UTF8SKIP(t) : 1; + /* consumed a varname */ + } else if (isDIGIT(*t)) { + /* deal with hex constants like 0x11 */ + if (t[0] == '0' && t[1] == 'x') { + t += 2; + while (t < PL_bufend && isXDIGIT(*t)) t++; + } else { + /* deal with decimal/octal constants like 1 and 0123 */ + do { t++; } while (isDIGIT(*t)); + if (t 10) Renew(PL_lex_casestack, PL_lex_casemods + 2, char); @@ -5236,7 +5498,7 @@ yyl_interpcasemod(pTHX_ char *s) PL_lex_casestack[PL_lex_casemods] = '\0'; PL_lex_state = LEX_INTERPCONCAT; NEXTVAL_NEXTTOKE.ival = 0; - force_next((2<<24)|'('); + force_next((2<<24)|PERLY_PAREN_OPEN); if (*s == 'l') NEXTVAL_NEXTTOKE.ival = OP_LCFIRST; else if (*s == 'u') @@ -5259,7 +5521,7 @@ yyl_interpcasemod(pTHX_ char *s) PL_lex_starts = 0; /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ if (PL_lex_casemods == 1 && PL_lex_inpat) - TOKEN(','); + TOKEN(PERLY_COMMA); else AopNOASSIGN(OP_CONCAT); } @@ -5400,7 +5662,7 @@ yyl_hyphen(pTHX_ char *s) if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) { s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE); DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); - OPERATOR('-'); /* unary minus */ + OPERATOR(PERLY_MINUS); /* unary minus */ } switch (tmp) { case 'r': ftst = OP_FTEREAD; break; @@ -5501,7 +5763,7 @@ yyl_hyphen(pTHX_ char *s) else { if (isSPACE(*s) || !isSPACE(*PL_bufptr)) check_uni(); - OPERATOR('-'); /* unary minus */ + OPERATOR(PERLY_MINUS); /* unary minus */ } } } @@ -5530,7 +5792,7 @@ yyl_plus(pTHX_ char *s) else { if (isSPACE(*s) || !isSPACE(*PL_bufptr)) check_uni(); - OPERATOR('+'); + OPERATOR(PERLY_PLUS); } } @@ -5538,15 +5800,15 @@ static int yyl_star(pTHX_ char *s) { if (PL_expect == XPOSTDEREF) - POSTDEREF('*'); + POSTDEREF(PERLY_STAR); if (PL_expect != XOPERATOR) { s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE); PL_expect = XOPERATOR; - force_ident(PL_tokenbuf, '*'); + force_ident(PL_tokenbuf, PERLY_STAR); if (!*PL_tokenbuf) - PREREF('*'); - TERM('*'); + PREREF(PERLY_STAR); + TERM(PERLY_STAR); } s++; @@ -5586,13 +5848,13 @@ yyl_percent(pTHX_ char *s) Mop(OP_MODULO); } else if (PL_expect == XPOSTDEREF) - POSTDEREF('%'); + POSTDEREF(PERLY_PERCENT_SIGN); PL_tokenbuf[0] = '%'; s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); pl_yylval.ival = 0; if (!PL_tokenbuf[1]) { - PREREF('%'); + PREREF(PERLY_PERCENT_SIGN); } if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s, PL_bufend)) { @@ -5601,7 +5863,7 @@ yyl_percent(pTHX_ char *s) } PL_expect = XOPERATOR; force_ident_maybe_lex('%'); - TERM('%'); + TERM(PERLY_PERCENT_SIGN); } static int @@ -5763,7 +6025,7 @@ yyl_colon(pTHX_ char *s) : "Unterminated attribute list" ) ); if (attrs) op_free(attrs); - OPERATOR(':'); + OPERATOR(PERLY_COLON); } got_attrs: @@ -5788,7 +6050,7 @@ yyl_colon(pTHX_ char *s) } PL_lex_allbrackets--; - OPERATOR(':'); + OPERATOR(PERLY_COLON); } static int @@ -5881,7 +6143,7 @@ yyl_leftcurly(pTHX_ char *s, const U8 formbrack) const char minus = (PL_tokenbuf[0] == '-'); s = force_word(s + minus, BAREWORD, FALSE, TRUE); if (minus) - force_next('-'); + force_next(PERLY_MINUS); } } /* FALLTHROUGH */ @@ -6047,7 +6309,7 @@ yyl_leftcurly(pTHX_ char *s, const U8 formbrack) pl_yylval.ival = CopLINE(PL_curcop); PL_copline = NOLINE; /* invalidate current command line number */ - TOKEN(formbrack ? '=' : '{'); + TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN); } static int @@ -6088,21 +6350,21 @@ yyl_rightcurly(pTHX_ char *s, const U8 formbrack) return yylex(); /* ignore fake brackets */ } - force_next(formbrack ? '.' : '}'); + force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE); if (formbrack) LEAVE_with_name("lex_format"); if (formbrack == 2) { /* means . where arguments were expected */ - force_next(';'); + force_next(PERLY_SEMICOLON); TOKEN(FORMRBRACK); } - TOKEN(';'); + TOKEN(PERLY_SEMICOLON); } static int yyl_ampersand(pTHX_ char *s) { if (PL_expect == XPOSTDEREF) - POSTDEREF('&'); + POSTDEREF(PERLY_AMPERSAND); s++; if (*s++ == '&') { @@ -6148,9 +6410,9 @@ yyl_ampersand(pTHX_ char *s) if (PL_tokenbuf[1]) force_ident_maybe_lex('&'); else - PREREF('&'); + PREREF(PERLY_AMPERSAND); - TERM('&'); + TERM(PERLY_AMPERSAND); } static int @@ -6210,21 +6472,21 @@ yyl_bang(pTHX_ char *s) TOKEN(0); } - Eop(OP_NE); + ChEop(OP_NE); } if (tmp == '~') PMop(OP_NOT); s--; - OPERATOR('!'); + OPERATOR(PERLY_EXCLAMATION_MARK); } static int yyl_snail(pTHX_ char *s) { if (PL_expect == XPOSTDEREF) - POSTDEREF('@'); + POSTDEREF(PERLY_SNAIL); PL_tokenbuf[0] = '@'; s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); if (PL_expect == XOPERATOR) { @@ -6237,7 +6499,7 @@ yyl_snail(pTHX_ char *s) } pl_yylval.ival = 0; if (!PL_tokenbuf[1]) { - PREREF('@'); + PREREF(PERLY_SNAIL); } if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) s = skipspace(s); @@ -6256,7 +6518,7 @@ yyl_snail(pTHX_ char *s) } PL_expect = XOPERATOR; force_ident_maybe_lex('@'); - TERM('@'); + TERM(PERLY_SNAIL); } static int @@ -6295,14 +6557,12 @@ yyl_slash(pTHX_ char *s) static int yyl_leftsquare(pTHX_ char *s) { - char tmp; - if (PL_lex_brackets > 100) Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); PL_lex_brackstack[PL_lex_brackets++] = 0; PL_lex_allbrackets++; - tmp = *s++; - OPERATOR(tmp); + s++; + OPERATOR(PERLY_BRACKET_OPEN); } static int @@ -6325,7 +6585,7 @@ yyl_rightsquare(pTHX_ char *s) PL_lex_state = LEX_INTERPEND; } } - TERM(']'); + TERM(PERLY_BRACKET_CLOSE); } static int @@ -6339,7 +6599,7 @@ yyl_tilde(pTHX_ char *s) Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH), "Smartmatch is experimental"); - Eop(OP_SMARTMATCH); + NCEop(OP_SMARTMATCH); } s++; if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') { @@ -6358,7 +6618,7 @@ yyl_leftparen(pTHX_ char *s) PL_expect = XTERM; s = skipspace(s); PL_lex_allbrackets++; - TOKEN('('); + TOKEN(PERLY_PAREN_OPEN); } static int @@ -6370,8 +6630,8 @@ yyl_rightparen(pTHX_ char *s) PL_lex_allbrackets--; s = skipspace(s); if (*s == '{') - PREBLOCK(')'); - TERM(')'); + PREBLOCK(PERLY_PAREN_CLOSE); + TERM(PERLY_PAREN_CLOSE); } static int @@ -6407,14 +6667,14 @@ yyl_leftpointy(pTHX_ char *s) s -= 3; TOKEN(0); } - Eop(OP_NCMP); + NCEop(OP_NCMP); } s--; if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { s -= 2; TOKEN(0); } - Rop(OP_LE); + ChRop(OP_LE); } s--; @@ -6423,7 +6683,7 @@ yyl_leftpointy(pTHX_ char *s) TOKEN(0); } - Rop(OP_LT); + ChRop(OP_LT); } static int @@ -6443,7 +6703,7 @@ yyl_rightpointy(pTHX_ char *s) s -= 2; TOKEN(0); } - Rop(OP_GE); + ChRop(OP_GE); } s--; @@ -6452,7 +6712,7 @@ yyl_rightpointy(pTHX_ char *s) TOKEN(0); } - Rop(OP_GT); + ChRop(OP_GT); } static int @@ -6471,9 +6731,10 @@ yyl_sglquote(pTHX_ char *s) } static int -yyl_dblquote(pTHX_ char *s, STRLEN len) +yyl_dblquote(pTHX_ char *s) { char *d; + STRLEN len; s = scan_str(s,FALSE,FALSE,FALSE,NULL); DEBUG_T( { if (s) @@ -6566,7 +6827,7 @@ yyl_data_handle(pTHX) loc = PerlIO_tell(PL_rsfp); (void)PerlIO_seek(PL_rsfp, 0L, 0); } - if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) { + if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) { if (loc > 0) PerlIO_seek(PL_rsfp, loc, 0); } @@ -6673,24 +6934,68 @@ yyl_foreach(pTHX_ char *s) if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { char *p = s; SSize_t s_off = s - SvPVX(PL_linestr); - STRLEN len; - - if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) { - p += 2; + bool paren_is_valid = FALSE; + bool maybe_package = FALSE; + bool saw_core = FALSE; + bool core_valid = FALSE; + + if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) { + saw_core = TRUE; + p += 6; + } + if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) { + core_valid = TRUE; + paren_is_valid = TRUE; + if (isSPACE(p[2])) { + p = skipspace(p + 3); + maybe_package = TRUE; + } + else { + p += 2; + } + } + else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) { + core_valid = TRUE; + if (isSPACE(p[3])) { + p = skipspace(p + 4); + maybe_package = TRUE; + } + else { + p += 3; + } + } + else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) { + core_valid = TRUE; + if (isSPACE(p[5])) { + p = skipspace(p + 6); + } + else { + p += 5; + } + } + if (saw_core && !core_valid) { + Perl_croak(aTHX_ "Missing $ on loop variable"); } - else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) { - p += 3; + + if (maybe_package && !saw_core) { + /* skip optional package name, as in "for my abc $x (..)" */ + if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) { + STRLEN len; + p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); + p = skipspace(p); + paren_is_valid = FALSE; + } } - p = skipspace(p); - /* skip optional package name, as in "for my abc $x (..)" */ - if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) { - p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); - p = skipspace(p); + if (UNLIKELY(paren_is_valid && *p == '(')) { + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__FOR_LIST), + "for my (...) is experimental"); } - if (*p != '$' && *p != '\\') + else if (UNLIKELY(*p != '$' && *p != '\\')) { + /* "for myfoo (" will end up here, but with p pointing at the 'f' */ Perl_croak(aTHX_ "Missing $ on loop variable"); - + } /* The buffer may have been reallocated, update s */ s = SvPVX(PL_linestr) + s_off; } @@ -6767,7 +7072,7 @@ yyl_my(pTHX_ char *s, I32 my) OPERATOR(MY); } -static int yyl_try(pTHX_ char*, STRLEN); +static int yyl_try(pTHX_ char*); static bool yyl_eol_needs_semicolon(pTHX_ char **ps) @@ -6817,7 +7122,7 @@ yyl_eol_needs_semicolon(pTHX_ char **ps) } static int -yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) +yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s) { char *d; @@ -6833,7 +7138,7 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) if (!lex_next_chunk(fake_eof)) { CopLINE_dec(PL_curcop); s = PL_bufptr; - TOKEN(';'); /* not infinite loop because rsfp is NULL now */ + TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */ } CopLINE_dec(PL_curcop); s = PL_bufptr; @@ -6992,7 +7297,6 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) && !instr(s,"indir") && instr(PL_origargv[0],"perl")) { - dVAR; char **newargv; *ipathend = '\0'; @@ -7059,13 +7363,13 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) we must not do it again */ { SvPVCLEAR(PL_linestr); - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); PL_last_lop = PL_last_uni = NULL; PL_preambled = FALSE; if (PERLDB_LINE_OR_SAVESRC) (void)gv_fetchfile(PL_origfilename); - return yyl_try(aTHX_ s, len); + return YYL_RETRY; } } } @@ -7075,10 +7379,11 @@ yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len) if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_lex_state = LEX_FORMLINE; force_next(FORMRBRACK); - TOKEN(';'); + TOKEN(PERLY_SEMICOLON); } - return yyl_try(aTHX_ s, len); + PL_bufptr = s; + return YYL_RETRY; } static int @@ -7317,6 +7622,12 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c) } s = SvPVX(PL_linestr) + s_off; + if (((PL_opargs[PL_last_lop_op] >> OASHIFT) & 7) == OA_FILEREF + && !immediate_paren && !c.cv + && !FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) { + no_bareword_filehandle(PL_tokenbuf); + } + /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ /* Also, if "_" follows a filetest operator, it's a bareword */ @@ -7375,12 +7686,12 @@ yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c) op_free(pl_yylval.opval), force_next(PRIVATEREF); else op_free(c.rv2cv_op), force_next(BAREWORD); pl_yylval.ival = 0; - TOKEN('&'); + TOKEN(PERLY_AMPERSAND); } /* If followed by var or block, call it a method (unless sub) */ - if ((*s == '$' || *s == '{') && !c.cv) { + if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) { op_free(c.rv2cv_op); PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_METHOD; @@ -7464,7 +7775,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY___END__: if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) yyl_data_handle(aTHX); - return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len); + return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s); case KEY___SUB__: FUN0OP(CvCLONE(PL_compcv) @@ -7511,6 +7822,11 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_break: FUN0(OP_BREAK); + case KEY_catch: + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental"); + PREBLOCK(CATCH); + case KEY_chop: UNI(OP_CHOP); @@ -7540,18 +7856,13 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_cmp: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) return REPORT(0); - Eop(OP_SCMP); + NCEop(OP_SCMP); case KEY_caller: UNI(OP_CALLER); case KEY_crypt: -#ifdef FCRYPT - if (!PL_cryptseen) { - PL_cryptseen = TRUE; - init_des(); - } -#endif + LOP(OP_CRYPT,XTERM); case KEY_chmod: @@ -7575,6 +7886,11 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_default: PREBLOCK(DEFAULT); + case KEY_defer: + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental"); + PREBLOCK(DEFER); + case KEY_do: return yyl_do(aTHX_ s, orig_keyword); @@ -7614,7 +7930,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_eq: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) return REPORT(0); - Eop(OP_SEQ); + ChEop(OP_SEQ); case KEY_exists: UNI(OP_EXISTS); @@ -7667,6 +7983,11 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_endgrent: FUN0(OP_EGRENT); + case KEY_finally: + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental"); + PREBLOCK(FINALLY); + case KEY_for: case KEY_foreach: return yyl_foreach(aTHX_ s); @@ -7692,12 +8013,12 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_gt: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) return REPORT(0); - Rop(OP_SGT); + ChRop(OP_SGT); case KEY_ge: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) return REPORT(0); - Rop(OP_SGE); + ChRop(OP_SGE); case KEY_grep: LOP(OP_GREPSTART, XREF); @@ -7814,9 +8135,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct LOP(OP_IOCTL,XTERM); case KEY_isa: - Perl_ck_warner_d(aTHX_ - packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental"); - Rop(OP_ISA); + NCRop(OP_ISA); case KEY_join: LOP(OP_JOIN,XTERM); @@ -7845,12 +8164,12 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_lt: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) return REPORT(0); - Rop(OP_SLT); + ChRop(OP_SLT); case KEY_le: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) return REPORT(0); - Rop(OP_SLE); + ChRop(OP_SLE); case KEY_localtime: UNI(OP_LOCALTIME); @@ -7903,7 +8222,7 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_ne: if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) return REPORT(0); - Eop(OP_SNE); + ChEop(OP_SNE); case KEY_no: s = tokenize_use(0, s); @@ -8247,6 +8566,12 @@ yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct case KEY_truncate: LOP(OP_TRUNCATE,XTERM); + case KEY_try: + pl_yylval.ival = CopLINE(PL_curcop); + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental"); + PREBLOCK(TRY); + case KEY_uc: UNI(OP_UC); @@ -8382,7 +8707,6 @@ yyl_key_core(pTHX_ char *s, STRLEN len, struct code c) static int yyl_keylookup(pTHX_ char *s, GV *gv) { - dVAR; STRLEN len; bool anydelim; I32 key; @@ -8511,84 +8835,92 @@ yyl_keylookup(pTHX_ char *s, GV *gv) } static int -yyl_try(pTHX_ char *s, STRLEN len) +yyl_try(pTHX_ char *s) { char *d; GV *gv = NULL; + int tok; retry: switch (*s) { default: - if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) - return yyl_keylookup(aTHX_ s, gv); + if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) { + if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) + return tok; + goto retry_bufptr; + } yyl_croak_unrecognised(aTHX_ s); case 4: case 26: /* emulate EOF on ^D or ^Z */ - return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len); + if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY) + return tok; + retry_bufptr: + s = PL_bufptr; + goto retry; case 0: - if ((!PL_rsfp || PL_lex_inwhat) - && (!PL_parser->filtered || s+1 < PL_bufend)) { - PL_last_uni = 0; - PL_last_lop = 0; - if (PL_lex_brackets + if ((!PL_rsfp || PL_lex_inwhat) + && (!PL_parser->filtered || s+1 < PL_bufend)) { + PL_last_uni = 0; + PL_last_lop = 0; + if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) { - yyerror((const char *) - (PL_lex_formbrack - ? "Format not terminated" - : "Missing right curly or square bracket")); - } + yyerror((const char *) + (PL_lex_formbrack + ? "Format not terminated" + : "Missing right curly or square bracket")); + } DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n"); }); - TOKEN(0); - } - if (s++ < PL_bufend) - goto retry; /* ignore stray nulls */ - PL_last_uni = 0; - PL_last_lop = 0; - if (!PL_in_eval && !PL_preambled) { - PL_preambled = TRUE; - if (PL_perldb) { - /* Generate a string of Perl code to load the debugger. - * If PERL5DB is set, it will return the contents of that, - * otherwise a compile-time require of perl5db.pl. */ - - const char * const pdb = PerlEnv_getenv("PERL5DB"); - - if (pdb) { - sv_setpv(PL_linestr, pdb); - sv_catpvs(PL_linestr,";"); - } else { - SETERRNO(0,SS_NORMAL); - sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); - } - PL_parser->preambling = CopLINE(PL_curcop); - } else + TOKEN(0); + } + if (s++ < PL_bufend) + goto retry; /* ignore stray nulls */ + PL_last_uni = 0; + PL_last_lop = 0; + if (!PL_in_eval && !PL_preambled) { + PL_preambled = TRUE; + if (PL_perldb) { + /* Generate a string of Perl code to load the debugger. + * If PERL5DB is set, it will return the contents of that, + * otherwise a compile-time require of perl5db.pl. */ + + const char * const pdb = PerlEnv_getenv("PERL5DB"); + + if (pdb) { + sv_setpv(PL_linestr, pdb); + sv_catpvs(PL_linestr,";"); + } else { + SETERRNO(0,SS_NORMAL); + sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };"); + } + PL_parser->preambling = CopLINE(PL_curcop); + } else SvPVCLEAR(PL_linestr); - if (PL_preambleav) { - SV **svp = AvARRAY(PL_preambleav); - SV **const end = svp + AvFILLp(PL_preambleav); - while(svp <= end) { - sv_catsv(PL_linestr, *svp); - ++svp; - sv_catpvs(PL_linestr, ";"); - } - sv_free(MUTABLE_SV(PL_preambleav)); - PL_preambleav = NULL; - } - if (PL_minus_E) - sv_catpvs(PL_linestr, - "use feature ':5." STRINGIFY(PERL_VERSION) "';"); - if (PL_minus_n || PL_minus_p) { - sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); - if (PL_minus_l) - sv_catpvs(PL_linestr,"chomp;"); - if (PL_minus_a) { - if (PL_minus_F) { + if (PL_preambleav) { + SV **svp = AvARRAY(PL_preambleav); + SV **const end = svp + AvFILLp(PL_preambleav); + while(svp <= end) { + sv_catsv(PL_linestr, *svp); + ++svp; + sv_catpvs(PL_linestr, ";"); + } + sv_free(MUTABLE_SV(PL_preambleav)); + PL_preambleav = NULL; + } + if (PL_minus_E) + sv_catpvs(PL_linestr, + "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';"); + if (PL_minus_n || PL_minus_p) { + sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/); + if (PL_minus_l) + sv_catpvs(PL_linestr,"chomp;"); + if (PL_minus_a) { + if (PL_minus_F) { if ( ( *PL_splitstr == '/' || *PL_splitstr == '\'' || *PL_splitstr == '"') @@ -8596,54 +8928,56 @@ yyl_try(pTHX_ char *s, STRLEN len) { /* strchr is ok, because -F pattern can't contain * embeddded NULs */ - Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); + Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); + } + else { + /* "q\0${splitstr}\0" is legal perl. Yes, even NUL + bytes can be used as quoting characters. :-) */ + const char *splits = PL_splitstr; + sv_catpvs(PL_linestr, "our @F=split(q\0"); + do { + /* Need to \ \s */ + if (*splits == '\\') + sv_catpvn(PL_linestr, splits, 1); + sv_catpvn(PL_linestr, splits, 1); + } while (*splits++); + /* This loop will embed the trailing NUL of + PL_linestr as the last thing it does before + terminating. */ + sv_catpvs(PL_linestr, ");"); } - else { - /* "q\0${splitstr}\0" is legal perl. Yes, even NUL - bytes can be used as quoting characters. :-) */ - const char *splits = PL_splitstr; - sv_catpvs(PL_linestr, "our @F=split(q\0"); - do { - /* Need to \ \s */ - if (*splits == '\\') - sv_catpvn(PL_linestr, splits, 1); - sv_catpvn(PL_linestr, splits, 1); - } while (*splits++); - /* This loop will embed the trailing NUL of - PL_linestr as the last thing it does before - terminating. */ - sv_catpvs(PL_linestr, ");"); - } - } - else - sv_catpvs(PL_linestr,"our @F=split(' ');"); - } - } - sv_catpvs(PL_linestr, "\n"); - PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_last_lop = PL_last_uni = NULL; - if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) - update_debugger_info(PL_linestr, NULL, 0); - goto retry; - } - return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len); + } + else + sv_catpvs(PL_linestr,"our @F=split(' ');"); + } + } + sv_catpvs(PL_linestr, "\n"); + PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_last_lop = PL_last_uni = NULL; + if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash) + update_debugger_info(PL_linestr, NULL, 0); + goto retry; + } + if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY) + return tok; + goto retry_bufptr; case '\r': #ifdef PERL_STRICT_CR - Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); - Perl_croak(aTHX_ + Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r'); + Perl_croak(aTHX_ "\t(Maybe you didn't strip carriage returns after a network transfer?)\n"); #endif case ' ': case '\t': case '\f': case '\v': - s++; - goto retry; + s++; + goto retry; case '#': case '\n': { const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s); if (needs_semicolon) - TOKEN(';'); + TOKEN(PERLY_SEMICOLON); else goto retry; } @@ -8670,12 +9004,12 @@ yyl_try(pTHX_ char *s, STRLEN len) return yyl_tilde(aTHX_ s); case ',': - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) - TOKEN(0); - s++; - OPERATOR(','); + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) + TOKEN(0); + s++; + OPERATOR(PERLY_COMMA); case ':': - if (s[1] == ':') + if (s[1] == ':') return yyl_just_a_word(aTHX_ s, 0, 0, no_code); return yyl_colon(aTHX_ s + 1); @@ -8683,12 +9017,12 @@ yyl_try(pTHX_ char *s, STRLEN len) return yyl_leftparen(aTHX_ s + 1); case ';': - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) - TOKEN(0); - CLINE; - s++; - PL_expect = XSTATE; - TOKEN(';'); + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR) + TOKEN(0); + CLINE; + s++; + PL_expect = XSTATE; + TOKEN(PERLY_SEMICOLON); case ')': return yyl_rightparen(aTHX_ s); @@ -8700,8 +9034,8 @@ yyl_try(pTHX_ char *s, STRLEN len) return yyl_leftcurly(aTHX_ s + 1, 0); case '}': - if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) - TOKEN(0); + if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) + TOKEN(0); return yyl_rightcurly(aTHX_ s, 0); case '&': @@ -8712,41 +9046,41 @@ yyl_try(pTHX_ char *s, STRLEN len) case '=': if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n') - && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), "=====")) + && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "=====")) { s = vcs_conflict_marker(s + 7); goto retry; } - s++; - { - const char tmp = *s++; - if (tmp == '=') { - if (!PL_lex_allbrackets + s++; + { + const char tmp = *s++; + if (tmp == '=') { + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) { - s -= 2; - TOKEN(0); - } - Eop(OP_EQ); - } - if (tmp == '>') { - if (!PL_lex_allbrackets + s -= 2; + TOKEN(0); + } + ChEop(OP_EQ); + } + if (tmp == '>') { + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) { - s -= 2; - TOKEN(0); - } - OPERATOR(','); - } - if (tmp == '~') - PMop(OP_MATCH); - if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) - && memCHRs("+-*/%.^&|<",tmp)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Reversed %c= operator",(int)tmp); - s--; - if (PL_expect == XSTATE + s -= 2; + TOKEN(0); + } + OPERATOR(PERLY_COMMA); + } + if (tmp == '~') + PMop(OP_MATCH); + if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX) + && memCHRs("+-*/%.^&|<",tmp)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "Reversed %c= operator",(int)tmp); + s--; + if (PL_expect == XSTATE && isALPHA(tmp) && (s == PL_linestart+1 || s[-2] == '\n') ) { @@ -8775,38 +9109,38 @@ yyl_try(pTHX_ char *s, STRLEN len) PL_parser->in_pod = 1; goto retry; } - } - if (PL_expect == XBLOCK) { - const char *t = s; + } + if (PL_expect == XBLOCK) { + const char *t = s; #ifdef PERL_STRICT_CR - while (SPACE_OR_TAB(*t)) + while (SPACE_OR_TAB(*t)) #else - while (SPACE_OR_TAB(*t) || *t == '\r') + while (SPACE_OR_TAB(*t) || *t == '\r') #endif - t++; - if (*t == '\n' || *t == '#') { - ENTER_with_name("lex_format"); - SAVEI8(PL_parser->form_lex_state); - SAVEI32(PL_lex_formbrack); - PL_parser->form_lex_state = PL_lex_state; - PL_lex_formbrack = PL_lex_brackets + 1; + t++; + if (*t == '\n' || *t == '#') { + ENTER_with_name("lex_format"); + SAVEI8(PL_parser->form_lex_state); + SAVEI32(PL_lex_formbrack); + PL_parser->form_lex_state = PL_lex_state; + PL_lex_formbrack = PL_lex_brackets + 1; PL_parser->sub_error_count = PL_error_count; return yyl_leftcurly(aTHX_ s, 1); - } - } - if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { - s--; - TOKEN(0); - } - pl_yylval.ival = 0; - OPERATOR(ASSIGNOP); - - case '!': + } + } + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { + s--; + TOKEN(0); + } + pl_yylval.ival = 0; + OPERATOR(ASSIGNOP); + + case '!': return yyl_bang(aTHX_ s + 1); case '<': if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n') - && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) + && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<")) { s = vcs_conflict_marker(s + 7); goto retry; @@ -8815,7 +9149,7 @@ yyl_try(pTHX_ char *s, STRLEN len) case '>': if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n') - && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), ">>>>>")) + && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>")) { s = vcs_conflict_marker(s + 7); goto retry; @@ -8832,73 +9166,73 @@ yyl_try(pTHX_ char *s, STRLEN len) return yyl_slash(aTHX_ s); case '?': /* conditional */ - s++; - if (!PL_lex_allbrackets + s++; + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) { - s--; - TOKEN(0); - } - PL_lex_allbrackets++; - OPERATOR('?'); + s--; + TOKEN(0); + } + PL_lex_allbrackets++; + OPERATOR(PERLY_QUESTION_MARK); case '.': - if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack + if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack #ifdef PERL_STRICT_CR - && s[1] == '\n' + && s[1] == '\n' #else - && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) + && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n')) #endif - && (s == PL_linestart || s[-1] == '\n') ) - { - PL_expect = XSTATE; + && (s == PL_linestart || s[-1] == '\n') ) + { + PL_expect = XSTATE; /* formbrack==2 means dot seen where arguments expected */ return yyl_rightcurly(aTHX_ s, 2); - } - if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { - s += 3; - OPERATOR(YADAYADA); - } - if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { - char tmp = *s++; - if (*s == tmp) { - if (!PL_lex_allbrackets + } + if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { + s += 3; + OPERATOR(YADAYADA); + } + if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { + char tmp = *s++; + if (*s == tmp) { + if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) { - s--; - TOKEN(0); - } - s++; - if (*s == tmp) { - s++; - pl_yylval.ival = OPf_SPECIAL; - } - else - pl_yylval.ival = 0; - OPERATOR(DOTDOT); - } - if (*s == '=' && !PL_lex_allbrackets + s--; + TOKEN(0); + } + s++; + if (*s == tmp) { + s++; + pl_yylval.ival = OPf_SPECIAL; + } + else + pl_yylval.ival = 0; + OPERATOR(DOTDOT); + } + if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) { - s--; - TOKEN(0); - } - Aop(OP_CONCAT); - } - /* FALLTHROUGH */ + s--; + TOKEN(0); + } + Aop(OP_CONCAT); + } + /* FALLTHROUGH */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - s = scan_num(s, &pl_yylval); - DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); - if (PL_expect == XOPERATOR) - no_op("Number",s); - TERM(THING); + s = scan_num(s, &pl_yylval); + DEBUG_T( { printbuf("### Saw number in %s\n", s); } ); + if (PL_expect == XOPERATOR) + no_op("Number",s); + TERM(THING); case '\'': return yyl_sglquote(aTHX_ s); case '"': - return yyl_dblquote(aTHX_ s, len); + return yyl_dblquote(aTHX_ s); case '`': return yyl_backtick(aTHX_ s); @@ -8907,43 +9241,53 @@ yyl_try(pTHX_ char *s, STRLEN len) return yyl_backslash(aTHX_ s + 1); case 'v': - if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { - char *start = s + 2; - while (isDIGIT(*start) || *start == '_') - start++; - if (*start == '.' && isDIGIT(start[1])) { - s = scan_num(s, &pl_yylval); - TERM(THING); - } - else if ((*start == ':' && start[1] == ':') - || (PL_expect == XSTATE && *start == ':')) - return yyl_keylookup(aTHX_ s, gv); - else if (PL_expect == XSTATE) { - d = start; - while (d < PL_bufend && isSPACE(*d)) d++; - if (*d == ':') - return yyl_keylookup(aTHX_ s, gv); - } - /* avoid v123abc() or $h{v1}, allow C */ - if (!isALPHA(*start) && (PL_expect == XTERM - || PL_expect == XREF || PL_expect == XSTATE - || PL_expect == XTERMORDORDOR)) { - GV *const gv = gv_fetchpvn_flags(s, start - s, + if (isDIGIT(s[1]) && PL_expect != XOPERATOR) { + char *start = s + 2; + while (isDIGIT(*start) || *start == '_') + start++; + if (*start == '.' && isDIGIT(start[1])) { + s = scan_num(s, &pl_yylval); + TERM(THING); + } + else if ((*start == ':' && start[1] == ':') + || (PL_expect == XSTATE && *start == ':')) { + if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) + return tok; + goto retry_bufptr; + } + else if (PL_expect == XSTATE) { + d = start; + while (d < PL_bufend && isSPACE(*d)) d++; + if (*d == ':') { + if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) + return tok; + goto retry_bufptr; + } + } + /* avoid v123abc() or $h{v1}, allow C */ + if (!isALPHA(*start) && (PL_expect == XTERM + || PL_expect == XREF || PL_expect == XSTATE + || PL_expect == XTERMORDORDOR)) { + GV *const gv = gv_fetchpvn_flags(s, start - s, UTF ? SVf_UTF8 : 0, SVt_PVCV); - if (!gv) { - s = scan_num(s, &pl_yylval); - TERM(THING); - } - } - } - return yyl_keylookup(aTHX_ s, gv); + if (!gv) { + s = scan_num(s, &pl_yylval); + TERM(THING); + } + } + } + if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) + return tok; + goto retry_bufptr; case 'x': - if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { - s++; - Mop(OP_REPEAT); - } - return yyl_keylookup(aTHX_ s, gv); + if (isDIGIT(s[1]) && PL_expect == XOPERATOR) { + s++; + Mop(OP_REPEAT); + } + if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) + return tok; + goto retry_bufptr; case '_': case 'a': case 'A': @@ -8967,12 +9311,14 @@ yyl_try(pTHX_ char *s, STRLEN len) case 's': case 'S': case 't': case 'T': case 'u': case 'U': - case 'V': + case 'V': case 'w': case 'W': - case 'X': + case 'X': case 'y': case 'Y': case 'z': case 'Z': - return yyl_keylookup(aTHX_ s, gv); + if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY) + return tok; + goto retry_bufptr; } } @@ -8990,48 +9336,40 @@ yyl_try(pTHX_ char *s, STRLEN len) Structure: Check if we have already built the token; if so, use it. Switch based on the current state: - - if we have a case modifier in a string, deal with that - - handle other cases of interpolation inside a string - - scan the next line if we are inside a format + - if we have a case modifier in a string, deal with that + - handle other cases of interpolation inside a string + - scan the next line if we are inside a format In the normal state, switch on the next character: - - default: - if alphabetic, go to key lookup - unrecognized character - croak - - 0/4/26: handle end-of-line or EOF - - cases for whitespace - - \n and #: handle comments and line numbers - - various operators, brackets and sigils - - numbers - - quotes - - 'v': vstrings (or go to key lookup) - - 'x' repetition operator (or go to key lookup) - - other ASCII alphanumerics (key lookup begins here): - word before => ? - keyword plugin - scan built-in keyword (but do nothing with it yet) - check for statement label - check for lexical subs - return yyl_just_a_word if there is one - see whether built-in keyword is overridden - switch on keyword number: - - default: return yyl_just_a_word: - not a built-in keyword; handle bareword lookup - disambiguate between method and sub call - fall back to bareword - - cases for built-in keywords + - default: + if alphabetic, go to key lookup + unrecognized character - croak + - 0/4/26: handle end-of-line or EOF + - cases for whitespace + - \n and #: handle comments and line numbers + - various operators, brackets and sigils + - numbers + - quotes + - 'v': vstrings (or go to key lookup) + - 'x' repetition operator (or go to key lookup) + - other ASCII alphanumerics (key lookup begins here): + word before => ? + keyword plugin + scan built-in keyword (but do nothing with it yet) + check for statement label + check for lexical subs + return yyl_just_a_word if there is one + see whether built-in keyword is overridden + switch on keyword number: + - default: return yyl_just_a_word: + not a built-in keyword; handle bareword lookup + disambiguate between method and sub call + fall back to bareword + - cases for built-in keywords */ -#ifdef NETWARE -#define RSFP_FILENO (PL_rsfp) -#else -#define RSFP_FILENO (PerlIO_fileno(PL_rsfp)) -#endif - - int Perl_yylex(pTHX) { - dVAR; char *s = PL_bufptr; if (UNLIKELY(PL_parser->recheck_utf8_validity)) { @@ -9049,171 +9387,171 @@ Perl_yylex(pTHX) PL_parser->recheck_utf8_validity = FALSE; } DEBUG_T( { - SV* tmp = newSVpvs(""); - PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n", - (IV)CopLINE(PL_curcop), - lex_state_names[PL_lex_state], - exp_name[PL_expect], - pv_display(tmp, s, strlen(s), 0, 60)); - SvREFCNT_dec(tmp); + SV* tmp = newSVpvs(""); + PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n", + (IV)CopLINE(PL_curcop), + lex_state_names[PL_lex_state], + exp_name[PL_expect], + pv_display(tmp, s, strlen(s), 0, 60)); + SvREFCNT_dec(tmp); } ); /* when we've already built the next token, just pull it out of the queue */ if (PL_nexttoke) { - PL_nexttoke--; - pl_yylval = PL_nextval[PL_nexttoke]; - { - I32 next_type; - next_type = PL_nexttype[PL_nexttoke]; - if (next_type & (7<<24)) { - if (next_type & (1<<24)) { - if (PL_lex_brackets > 100) - Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); - PL_lex_brackstack[PL_lex_brackets++] = - (char) ((next_type >> 16) & 0xff); - } - if (next_type & (2<<24)) - PL_lex_allbrackets++; - if (next_type & (4<<24)) - PL_lex_allbrackets--; - next_type &= 0xffff; - } - return REPORT(next_type == 'p' ? pending_ident() : next_type); - } - } - - switch (PL_lex_state) { + PL_nexttoke--; + pl_yylval = PL_nextval[PL_nexttoke]; + { + I32 next_type; + next_type = PL_nexttype[PL_nexttoke]; + if (next_type & (7<<24)) { + if (next_type & (1<<24)) { + if (PL_lex_brackets > 100) + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); + PL_lex_brackstack[PL_lex_brackets++] = + (char) ((U8) (next_type >> 16)); + } + if (next_type & (2<<24)) + PL_lex_allbrackets++; + if (next_type & (4<<24)) + PL_lex_allbrackets--; + next_type &= 0xffff; + } + return REPORT(next_type == 'p' ? pending_ident() : next_type); + } + } + + switch (PL_lex_state) { case LEX_NORMAL: case LEX_INTERPNORMAL: - break; + break; /* interpolated case modifiers like \L \U, including \Q and \E. when we get here, PL_bufptr is at the \ */ case LEX_INTERPCASEMOD: - /* handle \E or end of string */ + /* handle \E or end of string */ return yyl_interpcasemod(aTHX_ s); case LEX_INTERPPUSH: return REPORT(sublex_push()); case LEX_INTERPSTART: - if (PL_bufptr == PL_bufend) - return REPORT(sublex_done()); - DEBUG_T({ + if (PL_bufptr == PL_bufend) + return REPORT(sublex_done()); + DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); }); - PL_expect = XTERM; + PL_expect = XTERM; /* for /@a/, we leave the joining for the regex engine to do * (unless we're within \Q etc) */ - PL_lex_dojoin = (*PL_bufptr == '@' + PL_lex_dojoin = (*PL_bufptr == '@' && (!PL_lex_inpat || PL_lex_casemods)); - PL_lex_state = LEX_INTERPNORMAL; - if (PL_lex_dojoin) { - NEXTVAL_NEXTTOKE.ival = 0; - force_next(','); - force_ident("\"", '$'); - NEXTVAL_NEXTTOKE.ival = 0; - force_next('$'); - NEXTVAL_NEXTTOKE.ival = 0; - force_next((2<<24)|'('); - NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ - force_next(FUNC); - } - /* Convert (?{...}) and friends to 'do {...}' */ - if (PL_lex_inpat && *PL_bufptr == '(') { - PL_parser->lex_shared->re_eval_start = PL_bufptr; - PL_bufptr += 2; - if (*PL_bufptr != '{') - PL_bufptr++; - PL_expect = XTERMBLOCK; - force_next(DO); - } - - if (PL_lex_starts++) { - s = PL_bufptr; - /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ - if (!PL_lex_casemods && PL_lex_inpat) - TOKEN(','); - else - AopNOASSIGN(OP_CONCAT); - } - return yylex(); + PL_lex_state = LEX_INTERPNORMAL; + if (PL_lex_dojoin) { + NEXTVAL_NEXTTOKE.ival = 0; + force_next(PERLY_COMMA); + force_ident("\"", PERLY_DOLLAR); + NEXTVAL_NEXTTOKE.ival = 0; + force_next(PERLY_DOLLAR); + NEXTVAL_NEXTTOKE.ival = 0; + force_next((2<<24)|PERLY_PAREN_OPEN); + NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ + force_next(FUNC); + } + /* Convert (?{...}) and friends to 'do {...}' */ + if (PL_lex_inpat && *PL_bufptr == '(') { + PL_parser->lex_shared->re_eval_start = PL_bufptr; + PL_bufptr += 2; + if (*PL_bufptr != '{') + PL_bufptr++; + PL_expect = XTERMBLOCK; + force_next(DO); + } + + if (PL_lex_starts++) { + s = PL_bufptr; + /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ + if (!PL_lex_casemods && PL_lex_inpat) + TOKEN(PERLY_COMMA); + else + AopNOASSIGN(OP_CONCAT); + } + return yylex(); case LEX_INTERPENDMAYBE: - if (intuit_more(PL_bufptr, PL_bufend)) { - PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ - break; - } - /* FALLTHROUGH */ + if (intuit_more(PL_bufptr, PL_bufend)) { + PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ + break; + } + /* FALLTHROUGH */ case LEX_INTERPEND: - if (PL_lex_dojoin) { - const U8 dojoin_was = PL_lex_dojoin; - PL_lex_dojoin = FALSE; - PL_lex_state = LEX_INTERPCONCAT; - PL_lex_allbrackets--; - return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN); - } - if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl - && SvEVALED(PL_lex_repl)) - { - if (PL_bufptr != PL_bufend) - Perl_croak(aTHX_ "Bad evalled substitution pattern"); - PL_lex_repl = NULL; - } - /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets - re_eval_str. If the here-doc body’s length equals the previous - value of re_eval_start, re_eval_start will now be null. So - check re_eval_str as well. */ - if (PL_parser->lex_shared->re_eval_start - || PL_parser->lex_shared->re_eval_str) { - SV *sv; - if (*PL_bufptr != ')') - Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); - PL_bufptr++; - /* having compiled a (?{..}) expression, return the original - * text too, as a const */ - if (PL_parser->lex_shared->re_eval_str) { - sv = PL_parser->lex_shared->re_eval_str; - PL_parser->lex_shared->re_eval_str = NULL; - SvCUR_set(sv, - PL_bufptr - PL_parser->lex_shared->re_eval_start); - SvPV_shrink_to_cur(sv); - } - else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, - PL_bufptr - PL_parser->lex_shared->re_eval_start); - NEXTVAL_NEXTTOKE.opval = + if (PL_lex_dojoin) { + const U8 dojoin_was = PL_lex_dojoin; + PL_lex_dojoin = FALSE; + PL_lex_state = LEX_INTERPCONCAT; + PL_lex_allbrackets--; + return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN); + } + if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl + && SvEVALED(PL_lex_repl)) + { + if (PL_bufptr != PL_bufend) + Perl_croak(aTHX_ "Bad evalled substitution pattern"); + PL_lex_repl = NULL; + } + /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets + re_eval_str. If the here-doc body's length equals the previous + value of re_eval_start, re_eval_start will now be null. So + check re_eval_str as well. */ + if (PL_parser->lex_shared->re_eval_start + || PL_parser->lex_shared->re_eval_str) { + SV *sv; + if (*PL_bufptr != ')') + Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); + PL_bufptr++; + /* having compiled a (?{..}) expression, return the original + * text too, as a const */ + if (PL_parser->lex_shared->re_eval_str) { + sv = PL_parser->lex_shared->re_eval_str; + PL_parser->lex_shared->re_eval_str = NULL; + SvCUR_set(sv, + PL_bufptr - PL_parser->lex_shared->re_eval_start); + SvPV_shrink_to_cur(sv); + } + else sv = newSVpvn(PL_parser->lex_shared->re_eval_start, + PL_bufptr - PL_parser->lex_shared->re_eval_start); + NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, - sv); - force_next(THING); - PL_parser->lex_shared->re_eval_start = NULL; - PL_expect = XTERM; - return REPORT(','); - } - - /* FALLTHROUGH */ + sv); + force_next(THING); + PL_parser->lex_shared->re_eval_start = NULL; + PL_expect = XTERM; + return REPORT(PERLY_COMMA); + } + + /* FALLTHROUGH */ case LEX_INTERPCONCAT: #ifdef DEBUGGING - if (PL_lex_brackets) - Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", - (long) PL_lex_brackets); + if (PL_lex_brackets) + Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", + (long) PL_lex_brackets); #endif - if (PL_bufptr == PL_bufend) - return REPORT(sublex_done()); + if (PL_bufptr == PL_bufend) + return REPORT(sublex_done()); - /* m'foo' still needs to be parsed for possible (?{...}) */ - if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { - SV *sv = newSVsv(PL_linestr); - sv = tokeq(sv); + /* m'foo' still needs to be parsed for possible (?{...}) */ + if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { + SV *sv = newSVsv(PL_linestr); + sv = tokeq(sv); pl_yylval.opval = newSVOP(OP_CONST, 0, sv); - s = PL_bufend; - } - else { + s = PL_bufend; + } + else { int save_error_count = PL_error_count; - s = scan_const(PL_bufptr); + s = scan_const(PL_bufptr); /* Set flag if this was a pattern and there were errors. op.c will * refuse to compile a pattern with this flag set. Otherwise, we @@ -9221,30 +9559,30 @@ Perl_yylex(pTHX) if (PL_lex_inpat && PL_error_count > save_error_count) { ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; } - if (*s == '\\') - PL_lex_state = LEX_INTERPCASEMOD; - else - PL_lex_state = LEX_INTERPSTART; - } - - if (s != PL_bufptr) { - NEXTVAL_NEXTTOKE = pl_yylval; - PL_expect = XTERM; - force_next(THING); - if (PL_lex_starts++) { - /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ - if (!PL_lex_casemods && PL_lex_inpat) - TOKEN(','); - else - AopNOASSIGN(OP_CONCAT); - } - else { - PL_bufptr = s; - return yylex(); - } - } - - return yylex(); + if (*s == '\\') + PL_lex_state = LEX_INTERPCASEMOD; + else + PL_lex_state = LEX_INTERPSTART; + } + + if (s != PL_bufptr) { + NEXTVAL_NEXTTOKE = pl_yylval; + PL_expect = XTERM; + force_next(THING); + if (PL_lex_starts++) { + /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */ + if (!PL_lex_casemods && PL_lex_inpat) + TOKEN(PERLY_COMMA); + else + AopNOASSIGN(OP_CONCAT); + } + else { + PL_bufptr = s; + return yylex(); + } + } + + return yylex(); case LEX_FORMLINE: if (PL_parser->sub_error_count != PL_error_count) { /* There was an error parsing a formline, which tends to @@ -9254,12 +9592,12 @@ Perl_yylex(pTHX) */ yyquit(); } - assert(PL_lex_formbrack); - s = scan_formline(PL_bufptr); - if (!PL_lex_formbrack) + assert(PL_lex_formbrack); + s = scan_formline(PL_bufptr); + if (!PL_lex_formbrack) return yyl_rightcurly(aTHX_ s, 1); - PL_bufptr = s; - return yylex(); + PL_bufptr = s; + return yylex(); } /* We really do *not* want PL_linestr ever becoming a COW. */ @@ -9280,7 +9618,7 @@ Perl_yylex(pTHX) expecting an operator) have been a sigil. */ bool expected_operator = (PL_expect == XOPERATOR); - int ret = yyl_try(aTHX_ s, 0); + int ret = yyl_try(aTHX_ s); switch (pl_yylval.ival) { case OP_BIT_AND: case OP_MODULO: @@ -9313,12 +9651,12 @@ Perl_yylex(pTHX) Structure: if we're in a my declaration - croak if they tried to say my($foo::bar) - build the ops for a my() declaration + croak if they tried to say my($foo::bar) + build the ops for a my() declaration if it's an access to a my() variable - build ops for access to a my() variable + build ops for access to a my() variable if in a dq string, and they've said @foo and we can't find @foo - warn + warn build ops for a bareword */ @@ -9389,7 +9727,7 @@ S_pending_ident(pTHX) PL_in_my = 0; pl_yylval.opval = o; - return PRIVATEREF; + return PRIVATEREF; } } @@ -9398,16 +9736,16 @@ S_pending_ident(pTHX) */ if (!has_colon) { - if (!PL_in_my) - tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, + if (!PL_in_my) + tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len, 0); if (tmp != NOT_IN_PAD) { /* might be an "our" variable" */ if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { /* build ops for a bareword */ - HV * const stash = PAD_COMPNAME_OURSTASH(tmp); - HEK * const stashname = HvNAME_HEK(stash); - SV * const sym = newSVhek(stashname); + HV * const stash = PAD_COMPNAME_OURSTASH(tmp); + HEK * const stashname = HvNAME_HEK(stash); + SV * const sym = newSVhek(stashname); sv_catpvs(sym, "::"); sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES )); pl_yylval.opval = newSVOP(OP_CONST, 0, sym); @@ -9441,29 +9779,29 @@ S_pending_ident(pTHX) ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG, SVt_PVAV); if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) - ) + ) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %" UTF8f - " in string", - UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); + "Possible unintended interpolation of %" UTF8f + " in string", + UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); } } /* build ops for a bareword */ pl_yylval.opval = newSVOP(OP_CONST, 0, - newSVpvn_flags(PL_tokenbuf + 1, + newSVpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, UTF ? SVf_UTF8 : 0 )); pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, - (PL_in_eval ? GV_ADDMULTI : GV_ADD) + (PL_in_eval ? GV_ADDMULTI : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), - ((PL_tokenbuf[0] == '$') ? SVt_PV - : (PL_tokenbuf[0] == '@') ? SVt_PVAV - : SVt_PVHV)); + ((PL_tokenbuf[0] == '$') ? SVt_PV + : (PL_tokenbuf[0] == '@') ? SVt_PVAV + : SVt_PVHV)); return BAREWORD; } @@ -9473,57 +9811,57 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) PERL_ARGS_ASSERT_CHECKCOMMA; if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */ - if (ckWARN(WARN_SYNTAX)) { - int level = 1; - const char *w; - for (w = s+2; *w && level; w++) { - if (*w == '(') - ++level; - else if (*w == ')') - --level; - } - while (isSPACE(*w)) - ++w; - /* the list of chars below is for end of statements or - * block / parens, boolean operators (&&, ||, //) and branch - * constructs (or, and, if, until, unless, while, err, for). - * Not a very solid hack... */ - if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w)) - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "%s (...) interpreted as function",name); - } + if (ckWARN(WARN_SYNTAX)) { + int level = 1; + const char *w; + for (w = s+2; *w && level; w++) { + if (*w == '(') + ++level; + else if (*w == ')') + --level; + } + while (isSPACE(*w)) + ++w; + /* the list of chars below is for end of statements or + * block / parens, boolean operators (&&, ||, //) and branch + * constructs (or, and, if, until, unless, while, err, for). + * Not a very solid hack... */ + if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w)) + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "%s (...) interpreted as function",name); + } } while (s < PL_bufend && isSPACE(*s)) - s++; + s++; if (*s == '(') - s++; + s++; while (s < PL_bufend && isSPACE(*s)) - s++; + s++; if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { - const char * const w = s; + const char * const w = s; s += UTF ? UTF8SKIP(s) : 1; - while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) - s += UTF ? UTF8SKIP(s) : 1; - while (s < PL_bufend && isSPACE(*s)) - s++; - if (*s == ',') { - GV* gv; - if (keyword(w, s - w, 0)) - return; - - gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); - if (gv && GvCVu(gv)) - return; - if (s - w <= 254) { + while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) + s += UTF ? UTF8SKIP(s) : 1; + while (s < PL_bufend && isSPACE(*s)) + s++; + if (*s == ',') { + GV* gv; + if (keyword(w, s - w, 0)) + return; + + gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); + if (gv && GvCVu(gv)) + return; + if (s - w <= 254) { PADOFFSET off; - char tmpbuf[256]; - Copy(w, tmpbuf+1, s - w, char); - *tmpbuf = '&'; - off = pad_findmy_pvn(tmpbuf, s-w+1, 0); - if (off != NOT_IN_PAD) return; - } - Perl_croak(aTHX_ "No comma allowed after %s", what); - } + char tmpbuf[256]; + Copy(w, tmpbuf+1, s - w, char); + *tmpbuf = '&'; + off = pad_findmy_pvn(tmpbuf, s-w+1, 0); + if (off != NOT_IN_PAD) return; + } + Perl_croak(aTHX_ "No comma allowed after %s", what); + } } } @@ -9540,7 +9878,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) STATIC SV * S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, - SV *sv, SV *pv, const char *type, STRLEN typelen, + SV *sv, SV *pv, const char *type, STRLEN typelen, const char ** error_msg) { dSP; @@ -9550,82 +9888,38 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, SV **cvp; SV *cv, *typesv; const char *why1 = "", *why2 = "", *why3 = ""; + const char * optional_colon = ":"; /* Only some messages have a colon */ + char *msg; PERL_ARGS_ASSERT_NEW_CONSTANT; /* We assume that this is true: */ - if (*key == 'c') { assert (strEQ(key, "charnames")); } assert(type || s); sv_2mortal(sv); /* Parent created it permanently */ - if (!table - || ! (PL_hints & HINT_LOCALIZE_HH) - || ! (cvp = hv_fetch(table, key, keylen, FALSE)) - || ! SvOK(*cvp)) + + if ( ! table + || ! (PL_hints & HINT_LOCALIZE_HH)) { - char *msg; - - /* Here haven't found what we're looking for. If it is charnames, - * perhaps it needs to be loaded. Try doing that before giving up */ - if (*key == 'c') { - Perl_load_module(aTHX_ - 0, - newSVpvs("_charnames"), - /* version parameter; no need to specify it, as if - * we get too early a version, will fail anyway, - * not being able to find '_charnames' */ - NULL, - newSVpvs(":full"), - newSVpvs(":short"), - NULL); - assert(sp == PL_stack_sp); - table = GvHV(PL_hintgv); - if (table - && (PL_hints & HINT_LOCALIZE_HH) - && (cvp = hv_fetch(table, key, keylen, FALSE)) - && SvOK(*cvp)) - { - goto now_ok; - } - } - if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { - msg = Perl_form(aTHX_ - "Constant(%.*s) unknown", - (int)(type ? typelen : len), - (type ? type: s)); - } - else { - why1 = "$^H{"; - why2 = key; - why3 = "} is not defined"; - report: - if (*key == 'c') { - msg = Perl_form(aTHX_ - /* The +3 is for '\N{'; -4 for that, plus '}' */ - "Unknown charname '%.*s'", (int)typelen - 4, type + 3 - ); - } - else { - msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s", - (int)(type ? typelen : len), - (type ? type: s), why1, why2, why3); - } - } - if (error_msg) { - *error_msg = msg; - } - else { - yyerror_pv(msg, UTF ? SVf_UTF8 : 0); - } - return SvREFCNT_inc_simple_NN(sv); + why1 = "unknown"; + optional_colon = ""; + goto report; } - now_ok: + + cvp = hv_fetch(table, key, keylen, FALSE); + if (!cvp || !SvOK(*cvp)) { + why1 = "$^H{"; + why2 = key; + why3 = "} is not defined"; + goto report; + } + cv = *cvp; if (!pv && s) - pv = newSVpvn_flags(s, len, SVs_TEMP); + pv = newSVpvn_flags(s, len, SVs_TEMP); if (type && pv) - typesv = newSVpvn_flags(type, typelen, SVs_TEMP); + typesv = newSVpvn_flags(type, typelen, SVs_TEMP); else - typesv = &PL_sv_undef; + typesv = &PL_sv_undef; PUSHSTACKi(PERLSI_OVERLOAD); ENTER ; @@ -9634,10 +9928,10 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, PUSHMARK(SP) ; EXTEND(sp, 3); if (pv) - PUSHs(pv); + PUSHs(pv); PUSHs(sv); if (pv) - PUSHs(typesv); + PUSHs(typesv); PUTBACK; call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL)); @@ -9645,17 +9939,17 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, /* Check the eval first */ if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) { - STRLEN errlen; - const char * errstr; - sv_catpvs(errsv, "Propagated"); - errstr = SvPV_const(errsv, errlen); - yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ - (void)POPs; - res = SvREFCNT_inc_simple_NN(sv); + STRLEN errlen; + const char * errstr; + sv_catpvs(errsv, "Propagated"); + errstr = SvPV_const(errsv, errlen); + yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */ + (void)POPs; + res = SvREFCNT_inc_simple_NN(sv); } else { - res = POPs; - SvREFCNT_inc_simple_void_NN(res); + res = POPs; + SvREFCNT_inc_simple_void_NN(res); } PUTBACK ; @@ -9663,16 +9957,31 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, LEAVE ; POPSTACK; - if (!SvOK(res)) { - why1 = "Call to &{$^H{"; - why2 = key; - why3 = "}} did not return a defined value"; - sv = res; - (void)sv_2mortal(sv); - goto report; + if (SvOK(res)) { + return res; } - return res; + sv = res; + (void)sv_2mortal(sv); + + why1 = "Call to &{$^H{"; + why2 = key; + why3 = "}} did not return a defined value"; + + report: + + msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s", + (int)(type ? typelen : len), + (type ? type: s), + optional_colon, + why1, why2, why3); + if (error_msg) { + *error_msg = msg; + } + else { + yyerror_pv(msg, UTF ? SVf_UTF8 : 0); + } + return SvREFCNT_inc_simple_NN(sv); } PERL_STATIC_INLINE void @@ -9731,7 +10040,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) { char *this_d; - char *d2; + char *d2; Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */ d2 = this_d; SAVEFREEPV(this_d); @@ -9745,7 +10054,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, *d2++ = '\\'; *d2++ = *olds++; } - else + else *d2++ = *olds++; } Perl_warner(aTHX_ packWARN(WARN_SYNTAX), @@ -9806,13 +10115,18 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) PERL_ARGS_ASSERT_SCAN_IDENT; if (isSPACE(*s) || !*s) - s = skipspace(s); - if (isDIGIT(*s)) { - while (isDIGIT(*s)) { - if (d >= e) - Perl_croak(aTHX_ "%s", ident_too_long); - *d++ = *s++; - } + s = skipspace(s); + if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */ + bool is_zero= *s == '0' ? TRUE : FALSE; + char *digit_start= d; + *d++ = *s++; + while (s < PL_bufend && isDIGIT(*s)) { + if (d >= e) + Perl_croak(aTHX_ "%s", ident_too_long); + *d++ = *s++; + } + if (is_zero && d - digit_start > 1) + Perl_croak(aTHX_ ident_var_zero_multi_digit); } else { /* See if it is a "normal" identifier */ parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE); @@ -9822,9 +10136,9 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) if (*d) { /* Either a digit variable, or parse_ident() found an identifier (anything valid as a bareword), so job done and return. */ - if (PL_lex_state != LEX_NORMAL) - PL_lex_state = LEX_INTERPENDMAYBE; - return s; + if (PL_lex_state != LEX_NORMAL) + PL_lex_state = LEX_INTERPENDMAYBE; + return s; } /* Here, it is not a run-of-the-mill identifier name */ @@ -9839,13 +10153,13 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) /* Dereferencing a value in a scalar variable. The alternatives are different syntaxes for a scalar variable. Using ' as a leading package separator isn't allowed. :: is. */ - return s; + return s; } /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ if (*s == '{') { - bracket = s - SvPVX(PL_linestr); - s++; - orig_copline = CopLINE(PL_curcop); + bracket = s - SvPVX(PL_linestr); + s++; + orig_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { s = skipspace(s); } @@ -9864,19 +10178,32 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) } else { *d = *s++; + /* special case to handle ${10}, ${11} the same way we handle ${1} etc */ + if (isDIGIT(*d)) { + bool is_zero= *d == '0' ? TRUE : FALSE; + char *digit_start= d; + while (s < PL_bufend && isDIGIT(*s)) { + d++; + if (d >= e) + Perl_croak(aTHX_ "%s", ident_too_long); + *d= *s++; + } + if (is_zero && d - digit_start > 1) + Perl_croak(aTHX_ ident_var_zero_multi_digit); + } d[1] = '\0'; } } /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ if (*d == '^' && *s && isCONTROLVAR(*s)) { - *d = toCTRL(*s); - s++; + *d = toCTRL(*s); + s++; } /* Warn about ambiguous code after unary operators if {...} notation isn't used. There's no difference in ambiguity; it's merely a heuristic about when not to warn. */ else if (ck_uni && bracket == -1) - check_uni(); + check_uni(); if (bracket != -1) { bool skip; char *s2; @@ -9909,26 +10236,26 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) if (s < PL_bufend && isSPACE(*s)) { s = skipspace(s); } - if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { + if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */ - if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { - const char * const brack = - (const char *) - ((*s == '[') ? "[...]" : "{...}"); + if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { + const char * const brack = + (const char *) + ((*s == '[') ? "[...]" : "{...}"); orig_copline = CopLINE(PL_curcop); CopLINE_set(PL_curcop, tmp_copline); /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */ - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of %c{%s%s} resolved to %c%s%s", - funny, dest, brack, funny, dest, brack); + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of %c{%s%s} resolved to %c%s%s", + funny, dest, brack, funny, dest, brack); CopLINE_set(PL_curcop, orig_copline); - } - bracket++; - PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); - PL_lex_allbrackets++; - return s; - } - } + } + bracket++; + PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK); + PL_lex_allbrackets++; + return s; + } + } if ( !tmp_copline ) tmp_copline = CopLINE(PL_curcop); @@ -9948,45 +10275,45 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) /* Now increment line numbers if applicable. */ if (skip) s = skipspace(s); - s++; - if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { - PL_lex_state = LEX_INTERPEND; - PL_expect = XREF; - } - if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) { - if (ckWARN(WARN_AMBIGUOUS) + s++; + if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { + PL_lex_state = LEX_INTERPEND; + PL_expect = XREF; + } + if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) { + if (ckWARN(WARN_AMBIGUOUS) && (keyword(dest, d - dest, 0) - || get_cvn_flags(dest, d - dest, is_utf8 + || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0))) - { + { SV *tmp = newSVpvn_flags( dest, d - dest, SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) ); - if (funny == '#') - funny = '@'; + if (funny == '#') + funny = '@'; orig_copline = CopLINE(PL_curcop); CopLINE_set(PL_curcop, tmp_copline); - Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf, - funny, SVfARG(tmp), funny, SVfARG(tmp)); + Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf, + funny, SVfARG(tmp), funny, SVfARG(tmp)); CopLINE_set(PL_curcop, orig_copline); - } - } - } - else { + } + } + } + else { /* Didn't find the closing } at the point we expected, so restore state such that the next thing to process is the opening { and */ - s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ + s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */ CopLINE_set(PL_curcop, orig_copline); PL_parser->herelines = herelines; - *dest = '\0'; + *dest = '\0'; PL_parser->sub_no_recover = TRUE; - } + } } else if ( PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s, PL_bufend)) - PL_lex_state = LEX_INTERPEND; + PL_lex_state = LEX_INTERPEND; return s; } @@ -10026,65 +10353,65 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break; case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; - case LOCALE_PAT_MOD: - if (*charset) { - goto multiple_charsets; - } - set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); - *charset = c; - break; - case UNICODE_PAT_MOD: - if (*charset) { - goto multiple_charsets; - } - set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); - *charset = c; - break; - case ASCII_RESTRICT_PAT_MOD: - if (! *charset) { - set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); - } - else { - - /* Error if previous modifier wasn't an 'a', but if it was, see - * if, and accept, a second occurrence (only) */ - if (*charset != 'a' - || get_regex_charset(*pmfl) - != REGEX_ASCII_RESTRICTED_CHARSET) - { - goto multiple_charsets; - } - set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); - } - *charset = c; - break; - case DEPENDS_PAT_MOD: - if (*charset) { - goto multiple_charsets; - } - set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); - *charset = c; - break; + case LOCALE_PAT_MOD: + if (*charset) { + goto multiple_charsets; + } + set_regex_charset(pmfl, REGEX_LOCALE_CHARSET); + *charset = c; + break; + case UNICODE_PAT_MOD: + if (*charset) { + goto multiple_charsets; + } + set_regex_charset(pmfl, REGEX_UNICODE_CHARSET); + *charset = c; + break; + case ASCII_RESTRICT_PAT_MOD: + if (! *charset) { + set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); + } + else { + + /* Error if previous modifier wasn't an 'a', but if it was, see + * if, and accept, a second occurrence (only) */ + if (*charset != 'a' + || get_regex_charset(*pmfl) + != REGEX_ASCII_RESTRICTED_CHARSET) + { + goto multiple_charsets; + } + set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET); + } + *charset = c; + break; + case DEPENDS_PAT_MOD: + if (*charset) { + goto multiple_charsets; + } + set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET); + *charset = c; + break; } (*s)++; return TRUE; multiple_charsets: - if (*charset != c) { - yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); - } - else if (c == 'a') { + if (*charset != c) { + yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); + } + else if (c == 'a') { /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */ - yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); - } - else { - yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); - } + yyerror("Regexp modifier \"/a\" may appear a maximum of twice"); + } + else { + yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c)); + } - /* Pretend that it worked, so will continue processing before dieing */ - (*s)++; - return TRUE; + /* Pretend that it worked, so will continue processing before dieing */ + (*s)++; + return TRUE; } STATIC char * @@ -10093,7 +10420,7 @@ S_scan_pat(pTHX_ char *start, I32 type) PMOP *pm; char *s; const char * const valid_flags = - (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); + (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ unsigned int x_mod_count = 0; @@ -10101,48 +10428,48 @@ S_scan_pat(pTHX_ char *start, I32 type) s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL); if (!s) - Perl_croak(aTHX_ "Search pattern not terminated"); + Perl_croak(aTHX_ "Search pattern not terminated"); pm = (PMOP*)newPMOP(type, 0); if (PL_multi_open == '?') { - /* This is the only point in the code that sets PMf_ONCE: */ - pm->op_pmflags |= PMf_ONCE; - - /* Hence it's safe to do this bit of PMOP book-keeping here, which - allows us to restrict the list needed by reset to just the ?? - matches. */ - assert(type != OP_TRANS); - if (PL_curstash) { - MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); - U32 elements; - if (!mg) { - mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, - 0); - } - elements = mg->mg_len / sizeof(PMOP**); - Renewc(mg->mg_ptr, elements + 1, PMOP*, char); - ((PMOP**)mg->mg_ptr) [elements++] = pm; - mg->mg_len = elements * sizeof(PMOP**); - PmopSTASH_set(pm,PL_curstash); - } + /* This is the only point in the code that sets PMf_ONCE: */ + pm->op_pmflags |= PMf_ONCE; + + /* Hence it's safe to do this bit of PMOP book-keeping here, which + allows us to restrict the list needed by reset to just the ?? + matches. */ + assert(type != OP_TRANS); + if (PL_curstash) { + MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab); + U32 elements; + if (!mg) { + mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0, + 0); + } + elements = mg->mg_len / sizeof(PMOP**); + Renewc(mg->mg_ptr, elements + 1, PMOP*, char); + ((PMOP**)mg->mg_ptr) [elements++] = pm; + mg->mg_len = elements * sizeof(PMOP**); + PmopSTASH_set(pm,PL_curstash); + } } /* if qr/...(?{..}).../, then need to parse the pattern within a new * anon CV. False positives like qr/[(?{]/ are harmless */ if (type == OP_QR) { - STRLEN len; - char *e, *p = SvPV(PL_lex_stuff, len); - e = p + len; - for (; p < e; p++) { - if (p[0] == '(' && p[1] == '?' - && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) - { - pm->op_pmflags |= PMf_HAS_CV; - break; - } - } - pm->op_pmflags |= PMf_IS_QR; + STRLEN len; + char *e, *p = SvPV(PL_lex_stuff, len); + e = p + len; + for (; p < e; p++) { + if (p[0] == '(' && p[1] == '?' + && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) + { + pm->op_pmflags |= PMf_HAS_CV; + break; + } + } + pm->op_pmflags |= PMf_IS_QR; } while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), @@ -10152,7 +10479,7 @@ S_scan_pat(pTHX_ char *start, I32 type) if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)) { Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), - "Use of /c modifier is meaningless without /g" ); + "Use of /c modifier is meaningless without /g" ); } PL_lex_op = (OP*)pm; @@ -10180,7 +10507,7 @@ S_scan_subst(pTHX_ char *start) s = scan_str(start, TRUE, FALSE, FALSE, &t); if (!s) - Perl_croak(aTHX_ "Substitution pattern not terminated"); + Perl_croak(aTHX_ "Substitution pattern not terminated"); s = t; @@ -10188,9 +10515,9 @@ S_scan_subst(pTHX_ char *start) first_line = CopLINE(PL_curcop); s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) { - SvREFCNT_dec_NN(PL_lex_stuff); - PL_lex_stuff = NULL; - Perl_croak(aTHX_ "Substitution replacement not terminated"); + SvREFCNT_dec_NN(PL_lex_stuff); + PL_lex_stuff = NULL; + Perl_croak(aTHX_ "Substitution replacement not terminated"); } PL_multi_start = first_start; /* so whole substitution is taken together */ @@ -10198,15 +10525,15 @@ S_scan_subst(pTHX_ char *start) while (*s) { - if (*s == EXEC_PAT_MOD) { - s++; - es++; - } - else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), + if (*s == EXEC_PAT_MOD) { + s++; + es++; + } + else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset, &x_mod_count)) - { - break; - } + { + break; + } } if ((pm->op_pmflags & PMf_CONTINUE)) { @@ -10214,24 +10541,24 @@ S_scan_subst(pTHX_ char *start) } if (es) { - SV * const repl = newSVpvs(""); + SV * const repl = newSVpvs(""); - PL_multi_end = 0; - pm->op_pmflags |= PMf_EVAL; + PL_multi_end = 0; + pm->op_pmflags |= PMf_EVAL; for (; es > 1; es--) { sv_catpvs(repl, "eval "); } sv_catpvs(repl, "do {"); - sv_catsv(repl, PL_parser->lex_sub_repl); - sv_catpvs(repl, "}"); - SvREFCNT_dec(PL_parser->lex_sub_repl); - PL_parser->lex_sub_repl = repl; + sv_catsv(repl, PL_parser->lex_sub_repl); + sv_catpvs(repl, "}"); + SvREFCNT_dec(PL_parser->lex_sub_repl); + PL_parser->lex_sub_repl = repl; } linediff = CopLINE(PL_curcop) - first_line; if (linediff) - CopLINE_set(PL_curcop, first_line); + CopLINE_set(PL_curcop, first_line); if (linediff || es) { /* the IVX field indicates that the replacement string is a s///e; @@ -10265,36 +10592,36 @@ S_scan_trans(pTHX_ char *start) s = scan_str(start,FALSE,FALSE,FALSE,&t); if (!s) - Perl_croak(aTHX_ "Transliteration pattern not terminated"); + Perl_croak(aTHX_ "Transliteration pattern not terminated"); s = t; s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) { - SvREFCNT_dec_NN(PL_lex_stuff); - PL_lex_stuff = NULL; - Perl_croak(aTHX_ "Transliteration replacement not terminated"); + SvREFCNT_dec_NN(PL_lex_stuff); + PL_lex_stuff = NULL; + Perl_croak(aTHX_ "Transliteration replacement not terminated"); } complement = del = squash = 0; while (1) { - switch (*s) { - case 'c': - complement = OPpTRANS_COMPLEMENT; - break; - case 'd': - del = OPpTRANS_DELETE; - break; - case 's': - squash = OPpTRANS_SQUASH; - break; - case 'r': - nondestruct = 1; - break; - default: - goto no_more; - } - s++; + switch (*s) { + case 'c': + complement = OPpTRANS_COMPLEMENT; + break; + case 'd': + del = OPpTRANS_DELETE; + break; + case 's': + squash = OPpTRANS_SQUASH; + break; + case 'r': + nondestruct = 1; + break; + default: + goto no_more; + } + s++; } no_more: @@ -10359,46 +10686,46 @@ S_scan_heredoc(pTHX_ char *s) peek = s; if (*peek == '~') { - indented = TRUE; - peek++; s++; + indented = TRUE; + peek++; s++; } while (SPACE_OR_TAB(*peek)) - peek++; + peek++; if (*peek == '`' || *peek == '\'' || *peek =='"') { - s = peek; - term = *s++; - s = delimcpy(d, e, s, PL_bufend, term, &len); - if (s == PL_bufend) - Perl_croak(aTHX_ "Unterminated delimiter for here document"); - d += len; - s++; + s = peek; + term = *s++; + s = delimcpy(d, e, s, PL_bufend, term, &len); + if (s == PL_bufend) + Perl_croak(aTHX_ "Unterminated delimiter for here document"); + d += len; + s++; } else { - if (*s == '\\') + if (*s == '\\') /* <<\FOO is equivalent to <<'FOO' */ - s++, term = '\''; - else - term = '"'; + s++, term = '\''; + else + term = '"'; - if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) - Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); + if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) + Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); - peek = s; + peek = s; while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) { - peek += UTF ? UTF8SKIP(peek) : 1; - } + peek += UTF ? UTF8SKIP(peek) : 1; + } - len = (peek - s >= e - d) ? (e - d) : (peek - s); - Copy(s, d, len, char); - s += len; - d += len; + len = (peek - s >= e - d) ? (e - d) : (peek - s); + Copy(s, d, len, char); + s += len; + d += len; } if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1) - Perl_croak(aTHX_ "Delimiter for here document is too long"); + Perl_croak(aTHX_ "Delimiter for here document is too long"); *d++ = '\n'; *d = '\0'; @@ -10407,37 +10734,37 @@ S_scan_heredoc(pTHX_ char *s) #ifndef PERL_STRICT_CR d = (char *) memchr(s, '\r', PL_bufend - s); if (d) { - char * const olds = s; - s = d; - while (s < PL_bufend) { - if (*s == '\r') { - *d++ = '\n'; - if (*++s == '\n') - s++; - } - else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ - *d++ = *s++; - s++; - } - else - *d++ = *s++; - } - *d = '\0'; - PL_bufend = d; - SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); - s = olds; + char * const olds = s; + s = d; + while (s < PL_bufend) { + if (*s == '\r') { + *d++ = '\n'; + if (*++s == '\n') + s++; + } + else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */ + *d++ = *s++; + s++; + } + else + *d++ = *s++; + } + *d = '\0'; + PL_bufend = d; + SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr)); + s = olds; } #endif tmpstr = newSV_type(SVt_PVIV); SvGROW(tmpstr, 80); if (term == '\'') { - op_type = OP_CONST; - SvIV_set(tmpstr, -1); + op_type = OP_CONST; + SvIV_set(tmpstr, -1); } else if (term == '`') { - op_type = OP_BACKTICK; - SvIV_set(tmpstr, '\\'); + op_type = OP_BACKTICK; + SvIV_set(tmpstr, '\\'); } PL_multi_start = origline + 1 + PL_parser->herelines; @@ -10445,14 +10772,14 @@ S_scan_heredoc(pTHX_ char *s) /* inside a string eval or quote-like operator */ if (!infile || PL_lex_inwhat) { - SV *linestr; - char *bufend; - char * const olds = s; - PERL_CONTEXT * const cx = CX_CUR(); - /* These two fields are not set until an inner lexing scope is - entered. But we need them set here. */ - shared->ls_bufptr = s; - shared->ls_linestr = PL_linestr; + SV *linestr; + char *bufend; + char * const olds = s; + PERL_CONTEXT * const cx = CX_CUR(); + /* These two fields are not set until an inner lexing scope is + entered. But we need them set here. */ + shared->ls_bufptr = s; + shared->ls_linestr = PL_linestr; if (PL_lex_inwhat) { /* Look for a newline. If the current buffer does not have one, @@ -10460,10 +10787,10 @@ S_scan_heredoc(pTHX_ char *s) up as many levels as necessary to find one with a newline after bufptr. */ - while (!(s = (char *)memchr( + while (!(s = (char *)memchr( (void *)shared->ls_bufptr, '\n', SvEND(shared->ls_linestr)-shared->ls_bufptr - ))) + ))) { shared = shared->ls_prev; /* shared is only null if we have gone beyond the outermost @@ -10488,100 +10815,100 @@ S_scan_heredoc(pTHX_ char *s) } } } - else { /* eval or we've already hit EOF */ - s = (char*)memchr((void*)s, '\n', PL_bufend - s); - if (!s) + else { /* eval or we've already hit EOF */ + s = (char*)memchr((void*)s, '\n', PL_bufend - s); + if (!s) goto interminable; - } - - linestr = shared->ls_linestr; - bufend = SvEND(linestr); - d = s; - if (indented) { - char *myolds = s; - - while (s < bufend - len + 1) { - if (*s++ == '\n') - ++PL_parser->herelines; - - if (memEQ(s, PL_tokenbuf + 1, len - 1)) { - char *backup = s; - indent_len = 0; - - /* Only valid if it's preceded by whitespace only */ - while (backup != myolds && --backup >= myolds) { - if (! SPACE_OR_TAB(*backup)) { - break; - } - indent_len++; - } - - /* No whitespace or all! */ - if (backup == s || *backup == '\n') { - Newx(indent, indent_len + 1, char); - memcpy(indent, backup + 1, indent_len); - indent[indent_len] = 0; - s--; /* before our delimiter */ - PL_parser->herelines--; /* this line doesn't count */ - break; - } - } - } - } + } + + linestr = shared->ls_linestr; + bufend = SvEND(linestr); + d = s; + if (indented) { + char *myolds = s; + + while (s < bufend - len + 1) { + if (*s++ == '\n') + ++PL_parser->herelines; + + if (memEQ(s, PL_tokenbuf + 1, len - 1)) { + char *backup = s; + indent_len = 0; + + /* Only valid if it's preceded by whitespace only */ + while (backup != myolds && --backup >= myolds) { + if (! SPACE_OR_TAB(*backup)) { + break; + } + indent_len++; + } + + /* No whitespace or all! */ + if (backup == s || *backup == '\n') { + Newx(indent, indent_len + 1, char); + memcpy(indent, backup + 1, indent_len); + indent[indent_len] = 0; + s--; /* before our delimiter */ + PL_parser->herelines--; /* this line doesn't count */ + break; + } + } + } + } else { - while (s < bufend - len + 1 - && memNE(s,PL_tokenbuf,len) ) - { - if (*s++ == '\n') - ++PL_parser->herelines; - } - } - - if (s >= bufend - len + 1) { - goto interminable; - } - - sv_setpvn(tmpstr,d+1,s-d); - s += len - 1; - /* the preceding stmt passes a newline */ - PL_parser->herelines++; - - /* s now points to the newline after the heredoc terminator. - d points to the newline before the body of the heredoc. - */ - - /* We are going to modify linestr in place here, so set - aside copies of the string if necessary for re-evals or - (caller $n)[6]. */ - /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we - check shared->re_eval_str. */ - if (shared->re_eval_start || shared->re_eval_str) { - /* Set aside the rest of the regexp */ - if (!shared->re_eval_str) - shared->re_eval_str = - newSVpvn(shared->re_eval_start, - bufend - shared->re_eval_start); - shared->re_eval_start -= s-d; - } - - if (cxstack_ix >= 0 + while (s < bufend - len + 1 + && memNE(s,PL_tokenbuf,len) ) + { + if (*s++ == '\n') + ++PL_parser->herelines; + } + } + + if (s >= bufend - len + 1) { + goto interminable; + } + + sv_setpvn(tmpstr,d+1,s-d); + s += len - 1; + /* the preceding stmt passes a newline */ + PL_parser->herelines++; + + /* s now points to the newline after the heredoc terminator. + d points to the newline before the body of the heredoc. + */ + + /* We are going to modify linestr in place here, so set + aside copies of the string if necessary for re-evals or + (caller $n)[6]. */ + /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we + check shared->re_eval_str. */ + if (shared->re_eval_start || shared->re_eval_str) { + /* Set aside the rest of the regexp */ + if (!shared->re_eval_str) + shared->re_eval_str = + newSVpvn(shared->re_eval_start, + bufend - shared->re_eval_start); + shared->re_eval_start -= s-d; + } + + if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL && cx->blk_eval.cur_text == linestr) { - cx->blk_eval.cur_text = newSVsv(linestr); - cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */ - } - - /* Copy everything from s onwards back to d. */ - Move(s,d,bufend-s + 1,char); - SvCUR_set(linestr, SvCUR(linestr) - (s-d)); - /* Setting PL_bufend only applies when we have not dug deeper - into other scopes, because sublex_done sets PL_bufend to - SvEND(PL_linestr). */ - if (shared == PL_parser->lex_shared) + cx->blk_eval.cur_text = newSVsv(linestr); + cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */ + } + + /* Copy everything from s onwards back to d. */ + Move(s,d,bufend-s + 1,char); + SvCUR_set(linestr, SvCUR(linestr) - (s-d)); + /* Setting PL_bufend only applies when we have not dug deeper + into other scopes, because sublex_done sets PL_bufend to + SvEND(PL_linestr). */ + if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr); - s = olds; + s = olds; } else { SV *linestr_save; @@ -10610,7 +10937,7 @@ S_scan_heredoc(pTHX_ char *s) does not matter what PL_linestr points to, since we are about to croak; but in a quote-like op, linestr_save will have been prospectively freed already, via - SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to + SAVEFREESV(PL_linestr) in sublex_push, so it's easier to restore PL_linestr. */ SvREFCNT_dec_NN(PL_linestr); PL_linestr = linestr_save; @@ -10706,59 +11033,59 @@ S_scan_heredoc(pTHX_ char *s) PL_multi_end = origline + PL_parser->herelines; if (indented && indent) { - STRLEN linecount = 1; - STRLEN herelen = SvCUR(tmpstr); - char *ss = SvPVX(tmpstr); - char *se = ss + herelen; + STRLEN linecount = 1; + STRLEN herelen = SvCUR(tmpstr); + char *ss = SvPVX(tmpstr); + char *se = ss + herelen; SV *newstr = newSV(herelen+1); SvPOK_on(newstr); - /* Trim leading whitespace */ - while (ss < se) { - /* newline only? Copy and move on */ - if (*ss == '\n') { - sv_catpvs(newstr,"\n"); - ss++; - linecount++; + /* Trim leading whitespace */ + while (ss < se) { + /* newline only? Copy and move on */ + if (*ss == '\n') { + sv_catpvs(newstr,"\n"); + ss++; + linecount++; - /* Found our indentation? Strip it */ - } + /* Found our indentation? Strip it */ + } else if (se - ss >= indent_len - && memEQ(ss, indent, indent_len)) - { - STRLEN le = 0; - ss += indent_len; + && memEQ(ss, indent, indent_len)) + { + STRLEN le = 0; + ss += indent_len; - while ((ss + le) < se && *(ss + le) != '\n') - le++; + while ((ss + le) < se && *(ss + le) != '\n') + le++; - sv_catpvn(newstr, ss, le); - ss += le; + sv_catpvn(newstr, ss, le); + ss += le; - /* Line doesn't begin with our indentation? Croak */ - } + /* Line doesn't begin with our indentation? Croak */ + } else { Safefree(indent); - Perl_croak(aTHX_ - "Indentation on line %d of here-doc doesn't match delimiter", - (int)linecount - ); - } - } /* while */ + Perl_croak(aTHX_ + "Indentation on line %d of here-doc doesn't match delimiter", + (int)linecount + ); + } + } /* while */ /* avoid sv_setsv() as we dont wan't to COW here */ sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr)); - Safefree(indent); - SvREFCNT_dec_NN(newstr); + Safefree(indent); + SvREFCNT_dec_NN(newstr); } if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { - SvPV_shrink_to_cur(tmpstr); + SvPV_shrink_to_cur(tmpstr); } if (!IN_BYTES) { - if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) - SvUTF8_on(tmpstr); + if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) + SvUTF8_on(tmpstr); } PL_lex_stuff = tmpstr; @@ -10767,7 +11094,7 @@ S_scan_heredoc(pTHX_ char *s) interminable: if (indent) - Safefree(indent); + Safefree(indent); SvREFCNT_dec(tmpstr); CopLINE_set(PL_curcop, origline); missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); @@ -10777,7 +11104,7 @@ S_scan_heredoc(pTHX_ char *s) /* scan_inputsymbol takes: position of first '<' in input buffer returns: position of first char following the matching '>' in - input buffer + input buffer side-effects: pl_yylval and lex_op are set. This code handles: @@ -10806,7 +11133,7 @@ S_scan_inputsymbol(pTHX_ char *start) end = (char *) memchr(s, '\n', PL_bufend - s); if (!end) - end = PL_bufend; + end = PL_bufend; if (s[1] == '<' && s[2] == '>' && s[3] == '>') { nomagicopen = TRUE; *d = '\0'; @@ -10821,9 +11148,9 @@ S_scan_inputsymbol(pTHX_ char *start) */ if (len >= (I32)sizeof PL_tokenbuf) - Perl_croak(aTHX_ "Excessively long <> operator"); + Perl_croak(aTHX_ "Excessively long <> operator"); if (s >= end) - Perl_croak(aTHX_ "Unterminated <> operator"); + Perl_croak(aTHX_ "Unterminated <> operator"); s++; @@ -10838,7 +11165,7 @@ S_scan_inputsymbol(pTHX_ char *start) /* allow or */ while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') { - d += UTF ? UTF8SKIP(d) : 1; + d += UTF ? UTF8SKIP(d) : 1; } /* If we've tried to read what we allow filehandles to look like, and @@ -10848,86 +11175,91 @@ S_scan_inputsymbol(pTHX_ char *start) */ if (d - PL_tokenbuf != len) { - pl_yylval.ival = OP_GLOB; - s = scan_str(start,FALSE,FALSE,FALSE,NULL); - if (!s) - Perl_croak(aTHX_ "Glob not terminated"); - return s; + pl_yylval.ival = OP_GLOB; + s = scan_str(start,FALSE,FALSE,FALSE,NULL); + if (!s) + Perl_croak(aTHX_ "Glob not terminated"); + return s; } else { - bool readline_overriden = FALSE; - GV *gv_readline; - /* we're in a filehandle read situation */ - d = PL_tokenbuf; - - /* turn <> into */ - if (!len) - Copy("ARGV",d,5,char); - - /* Check whether readline() is overriden */ - if ((gv_readline = gv_override("readline",8))) - readline_overriden = TRUE; - - /* if <$fh>, create the ops to turn the variable into a - filehandle - */ - if (*d == '$') { - /* try to find it in the pad for this block, otherwise find - add symbol table ops - */ - const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); - if (tmp != NOT_IN_PAD) { - if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { - HV * const stash = PAD_COMPNAME_OURSTASH(tmp); - HEK * const stashname = HvNAME_HEK(stash); - SV * const sym = sv_2mortal(newSVhek(stashname)); - sv_catpvs(sym, "::"); - sv_catpv(sym, d+1); - d = SvPVX(sym); - goto intro_sym; - } - else { - OP * const o = newOP(OP_PADSV, 0); - o->op_targ = tmp; - PL_lex_op = readline_overriden + bool readline_overriden = FALSE; + GV *gv_readline; + /* we're in a filehandle read situation */ + d = PL_tokenbuf; + + /* turn <> into */ + if (!len) + Copy("ARGV",d,5,char); + + /* Check whether readline() is overriden */ + if ((gv_readline = gv_override("readline",8))) + readline_overriden = TRUE; + + /* if <$fh>, create the ops to turn the variable into a + filehandle + */ + if (*d == '$') { + /* try to find it in the pad for this block, otherwise find + add symbol table ops + */ + const PADOFFSET tmp = pad_findmy_pvn(d, len, 0); + if (tmp != NOT_IN_PAD) { + if (PAD_COMPNAME_FLAGS_isOUR(tmp)) { + HV * const stash = PAD_COMPNAME_OURSTASH(tmp); + HEK * const stashname = HvNAME_HEK(stash); + SV * const sym = sv_2mortal(newSVhek(stashname)); + sv_catpvs(sym, "::"); + sv_catpv(sym, d+1); + d = SvPVX(sym); + goto intro_sym; + } + else { + OP * const o = newOP(OP_PADSV, 0); + o->op_targ = tmp; + PL_lex_op = readline_overriden ? newUNOP(OP_ENTERSUB, OPf_STACKED, - op_append_elem(OP_LIST, o, - newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) + op_append_elem(OP_LIST, o, + newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) : newUNOP(OP_READLINE, 0, o); - } - } - else { - GV *gv; - ++d; + } + } + else { + GV *gv; + ++d; intro_sym: - gv = gv_fetchpv(d, - GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), - SVt_PV); - PL_lex_op = readline_overriden + gv = gv_fetchpv(d, + GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ), + SVt_PV); + PL_lex_op = readline_overriden ? newUNOP(OP_ENTERSUB, OPf_STACKED, - op_append_elem(OP_LIST, - newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), - newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) + op_append_elem(OP_LIST, + newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), + newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) : newUNOP(OP_READLINE, 0, - newUNOP(OP_RV2SV, 0, - newGVOP(OP_GV, 0, gv))); - } - /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ - pl_yylval.ival = OP_NULL; - } - - /* If it's none of the above, it must be a literal filehandle - ( or ) so build a simple readline OP */ - else { - GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); - PL_lex_op = readline_overriden + newUNOP(OP_RV2SV, 0, + newGVOP(OP_GV, 0, gv))); + } + /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */ + pl_yylval.ival = OP_NULL; + } + + /* If it's none of the above, it must be a literal filehandle + ( or ) so build a simple readline OP */ + else { + GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO); + PL_lex_op = readline_overriden ? newUNOP(OP_ENTERSUB, OPf_STACKED, - op_append_elem(OP_LIST, - newGVOP(OP_GV, 0, gv), - newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) + op_append_elem(OP_LIST, + newGVOP(OP_GV, 0, gv), + newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv)); - pl_yylval.ival = OP_NULL; - } + pl_yylval.ival = OP_NULL; + + /* leave the token generation above to avoid confusing the parser */ + if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) { + no_bareword_filehandle(d); + } + } } return s; @@ -10936,36 +11268,36 @@ S_scan_inputsymbol(pTHX_ char *start) /* scan_str takes: - start position in buffer + start position in buffer keep_bracketed_quoted preserve \ quoting of embedded delimiters, but only if they are of the open/close form - keep_delims preserve the delimiters around the string - re_reparse compiling a run-time /(?{})/: - collapse // to /, and skip encoding src - delimp if non-null, this is set to the position of - the closing delimiter, or just after it if - the closing and opening delimiters differ - (i.e., the opening delimiter of a substitu- - tion replacement) + keep_delims preserve the delimiters around the string + re_reparse compiling a run-time /(?{})/: + collapse // to /, and skip encoding src + delimp if non-null, this is set to the position of + the closing delimiter, or just after it if + the closing and opening delimiters differ + (i.e., the opening delimiter of a substitu- + tion replacement) returns: position to continue reading from buffer side-effects: multi_start, multi_close, lex_repl or lex_stuff, and - updates the read buffer. + updates the read buffer. This subroutine pulls a string out of the input. It is called for: - q single quotes q(literal text) - ' single quotes 'literal text' - qq double quotes qq(interpolate $here please) - " double quotes "interpolate $here please" - qx backticks qx(/bin/ls -l) - ` backticks `/bin/ls -l` - qw quote words @EXPORT_OK = qw( func() $spam ) - m// regexp match m/this/ - s/// regexp substitute s/this/that/ - tr/// string transliterate tr/this/that/ - y/// string transliterate y/this/that/ - ($*@) sub prototypes sub foo ($) - (stuff) sub attr parameters sub foo : attr(stuff) - <> readline or globs , <>, <$fh>, or <*.c> + q single quotes q(literal text) + ' single quotes 'literal text' + qq double quotes qq(interpolate $here please) + " double quotes "interpolate $here please" + qx backticks qx(/bin/ls -l) + ` backticks `/bin/ls -l` + qw quote words @EXPORT_OK = qw( func() $spam ) + m// regexp match m/this/ + s/// regexp substitute s/this/that/ + tr/// string transliterate tr/this/that/ + y/// string transliterate y/this/that/ + ($*@) sub prototypes sub foo ($) + (stuff) sub attr parameters sub foo : attr(stuff) + <> readline or globs , <>, <$fh>, or <*.c> In most of these cases (all but <>, patterns and transliterate) yylex() calls scan_str(). m// makes yylex() call scan_pat() which @@ -10988,25 +11320,20 @@ S_scan_inputsymbol(pTHX_ char *start) char * Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse, - char **delimp + char **delimp ) { SV *sv; /* scalar value: string */ - const char *tmps; /* temp string, used for delimiter matching */ char *s = start; /* current position in the buffer */ - char term; /* terminating character */ char *to; /* current position in the sv's data */ - I32 brackets = 1; /* bracket nesting level */ + int brackets = 1; /* bracket nesting level */ bool d_is_utf8 = FALSE; /* is there any utf8 content? */ - IV termcode; /* terminating char. code */ - U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */ - STRLEN termlen; /* length of terminating string */ + UV open_delim_code; /* code point */ + char open_delim_str[UTF8_MAXBYTES+1]; + STRLEN delim_byte_len; /* each delimiter currently is the same number + of bytes */ line_t herelines; - /* The delimiters that have a mirror-image closing one */ - const char * opening_delims = "([{<"; - const char * closing_delims = ")]}>"; - /* The only non-UTF character that isn't a stand alone grapheme is * white-space, hence can't be a delimiter. */ const char * non_grapheme_msg = "Use of unassigned code point or" @@ -11015,43 +11342,122 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int PERL_ARGS_ASSERT_SCAN_STR; /* skip space before the delimiter */ - if (isSPACE(*s)) { - s = skipspace(s); + if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so + 'start' also has to change */ + s = start = skipspace(s); } /* mark where we are, in case we need to report errors */ CLINE; - /* after skipping whitespace, the next character is the terminator */ - term = *s; - if (!UTF || UTF8_IS_INVARIANT(term)) { - termcode = termstr[0] = term; - termlen = 1; + /* after skipping whitespace, the next character is the delimiter */ + if (! UTF || UTF8_IS_INVARIANT(*s)) { + open_delim_code = (U8) *s; + open_delim_str[0] = *s; + delim_byte_len = 1; } else { - termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); - if (UTF && UNLIKELY(! is_grapheme((U8 *) start, - (U8 *) s, - (U8 *) PL_bufend, - termcode))) + open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, + &delim_byte_len); + if (UNLIKELY(! is_grapheme((U8 *) start, + (U8 *) s, + (U8 *) PL_bufend, + open_delim_code))) { yyerror(non_grapheme_msg); } - Copy(s, termstr, termlen, U8); + Copy(s, open_delim_str, delim_byte_len, char); } + open_delim_str[delim_byte_len] = '\0'; /* Only for safety */ + /* mark where we are */ PL_multi_start = CopLINE(PL_curcop); - PL_multi_open = termcode; + PL_multi_open = open_delim_code; herelines = PL_parser->herelines; + const char * legal_paired_opening_delims; + const char * legal_paired_closing_delims; + const char * deprecated_opening_delims; + if (FEATURE_MORE_DELIMS_IS_ENABLED) { + if (UTF) { + legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS; + legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS; + + /* We are deprecating using a closing delimiter as the opening, in + * case we want in the future to accept them reversed. The string + * may include ones that are legal, but the code below won't look + * at this string unless it didn't find a legal opening one */ + deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS; + } + else { + legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS; + legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS; + deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS; + } + } + else { + legal_paired_opening_delims = "([{<"; + legal_paired_closing_delims = ")]}>"; + deprecated_opening_delims = (UTF) + ? DEPRECATED_OPENING_UTF8_BRACKETS + : DEPRECATED_OPENING_NON_UTF8_BRACKETS; + } + + const char * legal_paired_opening_delims_end = legal_paired_opening_delims + + strlen(legal_paired_opening_delims); + const char * deprecated_delims_end = deprecated_opening_delims + + strlen(deprecated_opening_delims); + + const char * close_delim_str = open_delim_str; + UV close_delim_code = open_delim_code; + /* If the delimiter has a mirror-image closing one, get it */ - if (term && (tmps = strchr(opening_delims, term))) { - termcode = termstr[0] = term = closing_delims[tmps - opening_delims]; + const char *tmps = ninstr(legal_paired_opening_delims, + legal_paired_opening_delims_end, + open_delim_str, open_delim_str + delim_byte_len); + if (tmps) { + /* Here, there is a paired delimiter, and tmps points to its position + in the string of the accepted opening paired delimiters. The + corresponding position in the string of closing ones is the + beginning of the paired mate. Both contain the same number of + bytes. */ + close_delim_str = legal_paired_closing_delims + + (tmps - legal_paired_opening_delims); + + /* The list of paired delimiters contains all the ASCII ones that have + * always been legal, and no other ASCIIs. Don't raise a message if + * using one of these */ + if (! isASCII(open_delim_code)) { + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS), + "Use of '%" UTF8f "' is experimental as a string delimiter", + UTF8fARG(UTF, delim_byte_len, open_delim_str)); + } + + close_delim_code = (UTF) + ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL) + : * (U8 *) close_delim_str; + } + else { /* Here, the delimiter isn't paired, hence the close is the same as + the open; and has aready been set up. But make sure it isn't + deprecated to use this particular delimiter, as we plan + eventually to make it paired. */ + if (ninstr(deprecated_opening_delims, deprecated_delims_end, + open_delim_str, open_delim_str + delim_byte_len)) + { + Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), + "Use of '%" UTF8f "' is deprecated as a string delimiter", + UTF8fARG(UTF, delim_byte_len, open_delim_str)); + } + + /* Note that a NUL may be used as a delimiter, and this happens when + * delimitting an empty string, and no special handling for it is + * needed, as ninstr() calls are used */ } - PL_multi_close = termcode; + PL_multi_close = close_delim_code; if (PL_multi_open == PL_multi_close) { keep_bracketed_quoted = FALSE; @@ -11060,145 +11466,144 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int /* create a new SV to hold the contents. 79 is the SV's initial length. What a random number. */ sv = newSV_type(SVt_PVIV); - SvGROW(sv, 80); - SvIV_set(sv, termcode); + SvGROW(sv, 79); + SvIV_set(sv, close_delim_code); (void)SvPOK_only(sv); /* validate pointer */ /* move past delimiter and try to read a complete string */ if (keep_delims) - sv_catpvn(sv, s, termlen); - s += termlen; + sv_catpvn(sv, s, delim_byte_len); + s += delim_byte_len; for (;;) { - /* extend sv if need be */ - SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); - /* set 'to' to the next character in the sv's string */ - to = SvPVX(sv)+SvCUR(sv); - - /* if open delimiter is the close delimiter read unbridle */ - if (PL_multi_open == PL_multi_close) { - for (; s < PL_bufend; s++,to++) { - /* embedded newlines increment the current line number */ - if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) - COPLINE_INC_WITH_HERELINES; - /* handle quoted delimiters */ - if (*s == '\\' && s+1 < PL_bufend && term != '\\') { - if (!keep_bracketed_quoted - && (s[1] == term - || (re_reparse && s[1] == '\\')) - ) - s++; - else /* any other quotes are simply copied straight through */ - *to++ = *s++; - } - /* terminate when run out of buffer (the for() condition), or - have found the terminator */ - else if (*s == term) { /* First byte of terminator matches */ - if (termlen == 1) /* If is the only byte, are done */ - break; - - /* If the remainder of the terminator matches, also are - * done, after checking that is a separate grapheme */ - if ( s + termlen <= PL_bufend - && memEQ(s + 1, (char*)termstr + 1, termlen - 1)) - { - if ( UTF - && UNLIKELY(! is_grapheme((U8 *) start, - (U8 *) s, - (U8 *) PL_bufend, - termcode))) - { - yyerror(non_grapheme_msg); - } - break; - } - } - else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) { - d_is_utf8 = TRUE; + /* extend sv if need be */ + SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1); + /* set 'to' to the next character in the sv's string */ + to = SvPVX(sv)+SvCUR(sv); + + /* read until we run out of string, or we find the closing delimiter */ + while (s < PL_bufend) { + /* embedded newlines increment the line count */ + if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) + COPLINE_INC_WITH_HERELINES; + + /* backslashes can escape the closing delimiter */ + if ( *s == '\\' && s < PL_bufend - delim_byte_len + + /* ... but not if the delimiter itself is a backslash */ + && close_delim_code != '\\') + { + /* Here, we have an escaping backslash. If we're supposed to + * discard those that escape the closing delimiter, just + * discard this one */ + if ( ! keep_bracketed_quoted + && ( memEQ(s + 1, open_delim_str, delim_byte_len) + || ( PL_multi_open == PL_multi_close + && re_reparse && s[1] == '\\') + || memEQ(s + 1, close_delim_str, delim_byte_len))) + { + s++; } - - *to = *s; - } - } - - /* if the terminator isn't the same as the start character (e.g., - matched brackets), we have to allow more in the quoting, and - be prepared for nested brackets. - */ - else { - /* read until we run out of string, or we find the terminator */ - for (; s < PL_bufend; s++,to++) { - /* embedded newlines increment the line count */ - if (*s == '\n' && !PL_rsfp && !PL_parser->filtered) - COPLINE_INC_WITH_HERELINES; - /* backslashes can escape the open or closing characters */ - if (*s == '\\' && s+1 < PL_bufend) { - if (!keep_bracketed_quoted - && ( ((UV)s[1] == PL_multi_open) - || ((UV)s[1] == PL_multi_close) )) - { - s++; - } - else - *to++ = *s++; + else /* any other escapes are simply copied straight through */ + *to++ = *s++; + } + else if ( s < PL_bufend - (delim_byte_len - 1) + && memEQ(s, close_delim_str, delim_byte_len) + && --brackets <= 0) + { + /* Found unescaped closing delimiter, unnested if we care about + * that; so are done. + * + * In the case of the opening and closing delimiters being + * different, we have to deal with nesting; the conditional + * above makes sure we don't get here until the nesting level, + * 'brackets', is back down to zero. In the other case, + * nesting isn't an issue, and 'brackets' never can get + * incremented above 0, so will come here at the first closing + * delimiter. + * + * Only grapheme delimiters are legal. */ + if ( UTF /* All Non-UTF-8's are graphemes */ + && UNLIKELY(! is_grapheme((U8 *) start, + (U8 *) s, + (U8 *) PL_bufend, + close_delim_code))) + { + yyerror(non_grapheme_msg); } - /* allow nested opens and closes */ - else if ((UV)*s == PL_multi_close && --brackets <= 0) - break; - else if ((UV)*s == PL_multi_open) - brackets++; - else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) - d_is_utf8 = TRUE; - *to = *s; - } - } - /* terminate the copied string and update the sv's end-of-string */ - *to = '\0'; - SvCUR_set(sv, to - SvPVX_const(sv)); - - /* - * this next chunk reads more into the buffer if we're not done yet - */ - - if (s < PL_bufend) - break; /* handle case where we are done yet :-) */ + + break; + } + /* No nesting if open eq close */ + else if ( PL_multi_open != PL_multi_close + && s < PL_bufend - (delim_byte_len - 1) + && memEQ(s, open_delim_str, delim_byte_len)) + { + brackets++; + } + + /* Here, still in the middle of the string; copy this character */ + if (! UTF || UTF8_IS_INVARIANT((U8) *s)) { + *to++ = *s++; + } + else { + size_t this_char_len = UTF8SKIP(s); + Copy(s, to, this_char_len, char); + s += this_char_len; + to += this_char_len; + + d_is_utf8 = TRUE; + } + } /* End of loop through buffer */ + + /* Here, found end of the string, OR ran out of buffer: terminate the + * copied string and update the sv's end-of-string */ + *to = '\0'; + SvCUR_set(sv, to - SvPVX_const(sv)); + + /* + * this next chunk reads more into the buffer if we're not done yet + */ + + if (s < PL_bufend) + break; /* handle case where we are done yet :-) */ #ifndef PERL_STRICT_CR - if (to - SvPVX_const(sv) >= 2) { - if ( (to[-2] == '\r' && to[-1] == '\n') + if (to - SvPVX_const(sv) >= 2) { + if ( (to[-2] == '\r' && to[-1] == '\n') || (to[-2] == '\n' && to[-1] == '\r')) - { - to[-2] = '\n'; - to--; - SvCUR_set(sv, to - SvPVX_const(sv)); - } - else if (to[-1] == '\r') - to[-1] = '\n'; - } - else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') - to[-1] = '\n'; + { + to[-2] = '\n'; + to--; + SvCUR_set(sv, to - SvPVX_const(sv)); + } + else if (to[-1] == '\r') + to[-1] = '\n'; + } + else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') + to[-1] = '\n'; #endif - /* if we're out of file, or a read fails, bail and reset the current - line marker so we can report where the unterminated string began - */ - COPLINE_INC_WITH_HERELINES; - PL_bufptr = PL_bufend; - if (!lex_next_chunk(0)) { - sv_free(sv); - CopLINE_set(PL_curcop, (line_t)PL_multi_start); - return NULL; - } - s = start = PL_bufptr; - } + /* if we're out of file, or a read fails, bail and reset the current + line marker so we can report where the unterminated string began + */ + COPLINE_INC_WITH_HERELINES; + PL_bufptr = PL_bufend; + if (!lex_next_chunk(0)) { + sv_free(sv); + CopLINE_set(PL_curcop, (line_t)PL_multi_start); + return NULL; + } + s = start = PL_bufptr; + } /* End of infinite loop */ /* at this point, we have successfully read the delimited string */ if (keep_delims) - sv_catpvn(sv, s, termlen); - s += termlen; + sv_catpvn(sv, s, delim_byte_len); + s += delim_byte_len; if (d_is_utf8) - SvUTF8_on(sv); + SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); CopLINE_set(PL_curcop, PL_multi_start); @@ -11206,8 +11611,8 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int /* if we allocated too much space, give some back */ if (SvCUR(sv) + 5 < SvLEN(sv)) { - SvLEN_set(sv, SvCUR(sv) + 1); - SvPV_renew(sv, SvLEN(sv)); + SvLEN_set(sv, SvCUR(sv) + 1); + SvPV_shrink_to_cur(sv); } /* decide whether this is the first or second quoted string we've read @@ -11215,10 +11620,10 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int */ if (PL_lex_stuff) - PL_parser->lex_sub_repl = sv; + PL_parser->lex_sub_repl = sv; else - PL_lex_stuff = sv; - if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s; + PL_lex_stuff = sv; + if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s; return s; } @@ -11233,7 +11638,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12. \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34 0b[01](_?[01])* binary integers - 0[0-7](_?[0-7])* octal integers + 0o?[0-7](_?[0-7])* octal integers 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats @@ -11259,13 +11664,13 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) bool warned_about_underscore = 0; I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */ #define WARN_ABOUT_UNDERSCORE() \ - do { \ - if (!warned_about_underscore) { \ - warned_about_underscore = 1; \ - Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \ - "Misplaced _ in number"); \ - } \ - } while(0) + do { \ + if (!warned_about_underscore) { \ + warned_about_underscore = 1; \ + Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \ + "Misplaced _ in number"); \ + } \ + } while(0) /* Hexadecimal floating point. * * In many places (where we have quads and NV is IEEE 754 double) @@ -11289,6 +11694,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) NV hexfp_mult = 1.0; UV high_non_zero = 0; /* highest digit */ int non_zero_integer_digits = 0; + bool new_octal = FALSE; /* octal with "0o" prefix */ PERL_ARGS_ASSERT_SCAN_NUM; @@ -11296,145 +11702,145 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) switch (*s) { default: - Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); + Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); /* if it starts with a 0, it could be an octal number, a decimal in 0.13 disguise, or a hexadecimal number, or a binary number. */ case '0': - { - /* variables: - u holds the "number so far" - overflowed was the number more than we can hold? - - Shift is used when we add a digit. It also serves as an "are - we in octal/hex/binary?" indicator to disallow hex characters - when in octal mode. - */ - NV n = 0.0; - UV u = 0; - bool overflowed = FALSE; - bool just_zero = TRUE; /* just plain 0 or binary number? */ + { + /* variables: + u holds the "number so far" + overflowed was the number more than we can hold? + + Shift is used when we add a digit. It also serves as an "are + we in octal/hex/binary?" indicator to disallow hex characters + when in octal mode. + */ + NV n = 0.0; + UV u = 0; + bool overflowed = FALSE; + bool just_zero = TRUE; /* just plain 0 or binary number? */ bool has_digs = FALSE; - static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; - static const char* const bases[5] = - { "", "binary", "", "octal", "hexadecimal" }; - static const char* const Bases[5] = - { "", "Binary", "", "Octal", "Hexadecimal" }; - static const char* const maxima[5] = - { "", - "0b11111111111111111111111111111111", - "", - "037777777777", - "0xffffffff" }; - const char *base, *Base, *max; - - /* check for hex */ - if (isALPHA_FOLD_EQ(s[1], 'x')) { - shift = 4; - s += 2; - just_zero = FALSE; - } else if (isALPHA_FOLD_EQ(s[1], 'b')) { - shift = 1; - s += 2; - just_zero = FALSE; - } - /* check for a decimal in disguise */ - else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) - goto decimal; - /* so it must be octal */ - else { - shift = 3; - s++; - } - - if (*s == '_') { - WARN_ABOUT_UNDERSCORE(); - lastub = s++; - } - - base = bases[shift]; - Base = Bases[shift]; - max = maxima[shift]; - - /* read the rest of the number */ - for (;;) { - /* x is used in the overflow test, - b is the digit we're adding on. */ - UV x, b; - - switch (*s) { - - /* if we don't mention it, we're done */ - default: - goto out; - - /* _ are ignored -- but warned about if consecutive */ - case '_': - if (lastub && s == lastub + 1) - WARN_ABOUT_UNDERSCORE(); - lastub = s++; - break; - - /* 8 and 9 are not octal */ - case '8': case '9': - if (shift == 3) - yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); - /* FALLTHROUGH */ - - /* octal digits */ - case '2': case '3': case '4': - case '5': case '6': case '7': - if (shift == 1) - yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); - /* FALLTHROUGH */ - - case '0': case '1': - b = *s++ & 15; /* ASCII digit -> value of digit */ - goto digit; - - /* hex digits */ - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - /* make sure they said 0x */ - if (shift != 4) - goto out; - b = (*s++ & 7) + 9; - - /* Prepare to put the digit we have onto the end - of the number so far. We check for overflows. - */ - - digit: - just_zero = FALSE; + static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 }; + static const char* const bases[5] = + { "", "binary", "", "octal", "hexadecimal" }; + static const char* const Bases[5] = + { "", "Binary", "", "Octal", "Hexadecimal" }; + static const char* const maxima[5] = + { "", + "0b11111111111111111111111111111111", + "", + "037777777777", + "0xffffffff" }; + + /* check for hex */ + if (isALPHA_FOLD_EQ(s[1], 'x')) { + shift = 4; + s += 2; + just_zero = FALSE; + } else if (isALPHA_FOLD_EQ(s[1], 'b')) { + shift = 1; + s += 2; + just_zero = FALSE; + } + /* check for a decimal in disguise */ + else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e')) + goto decimal; + /* so it must be octal */ + else { + shift = 3; + s++; + if (isALPHA_FOLD_EQ(*s, 'o')) { + s++; + just_zero = FALSE; + new_octal = TRUE; + } + } + + if (*s == '_') { + WARN_ABOUT_UNDERSCORE(); + lastub = s++; + } + + /* read the rest of the number */ + for (;;) { + /* x is used in the overflow test, + b is the digit we're adding on. */ + UV x, b; + + switch (*s) { + + /* if we don't mention it, we're done */ + default: + goto out; + + /* _ are ignored -- but warned about if consecutive */ + case '_': + if (lastub && s == lastub + 1) + WARN_ABOUT_UNDERSCORE(); + lastub = s++; + break; + + /* 8 and 9 are not octal */ + case '8': case '9': + if (shift == 3) + yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s)); + /* FALLTHROUGH */ + + /* octal digits */ + case '2': case '3': case '4': + case '5': case '6': case '7': + if (shift == 1) + yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s)); + /* FALLTHROUGH */ + + case '0': case '1': + b = *s++ & 15; /* ASCII digit -> value of digit */ + goto digit; + + /* hex digits */ + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + /* make sure they said 0x */ + if (shift != 4) + goto out; + b = (*s++ & 7) + 9; + + /* Prepare to put the digit we have onto the end + of the number so far. We check for overflows. + */ + + digit: + just_zero = FALSE; has_digs = TRUE; - if (!overflowed) { - assert(shift >= 0); - x = u << shift; /* make room for the digit */ + if (!overflowed) { + assert(shift >= 0); + x = u << shift; /* make room for the digit */ total_bits += shift; - if ((x >> shift) != u - && !(PL_hints & HINT_NEW_BINARY)) { - overflowed = TRUE; - n = (NV) u; - Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in %s number", - base); - } else - u = x | b; /* add the digit to the end */ - } - if (overflowed) { - n *= nvshift[shift]; - /* If an NV has not enough bits in its - * mantissa to represent an UV this summing of - * small low-order numbers is a waste of time - * (because the NV cannot preserve the - * low-order bits anyway): we could just - * remember when did we overflow and in the - * end just multiply n by the right - * amount. */ - n += (NV) b; - } + if ((x >> shift) != u + && !(PL_hints & HINT_NEW_BINARY)) { + overflowed = TRUE; + n = (NV) u; + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in %s number", + bases[shift]); + } else + u = x | b; /* add the digit to the end */ + } + if (overflowed) { + n *= nvshift[shift]; + /* If an NV has not enough bits in its + * mantissa to represent an UV this summing of + * small low-order numbers is a waste of time + * (because the NV cannot preserve the + * low-order bits anyway): we could just + * remember when did we overflow and in the + * end just multiply n by the right + * amount. */ + n += (NV) b; + } if (high_non_zero == 0 && b > 0) high_non_zero = b; @@ -11448,18 +11854,18 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) goto out; } - break; - } - } + break; + } + } - /* if we get here, we had success: make a scalar value from - the number. - */ - out: + /* if we get here, we had success: make a scalar value from + the number. + */ + out: - /* final misplaced underbar check */ - if (s[-1] == '_') - WARN_ABOUT_UNDERSCORE(); + /* final misplaced underbar check */ + if (s[-1] == '_') + WARN_ABOUT_UNDERSCORE(); if (UNLIKELY(HEXFP_PEEK(s))) { /* Do sloppy (on the underbars) but quick detection @@ -11500,7 +11906,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (significant_bits < NV_MANT_DIG) { /* We are in the long "run" of xdigits, * accumulate the full four bits. */ - assert(shift >= 0); + assert(shift >= 0); hexfp_uquad <<= shift; hexfp_uquad |= b; hexfp_frac_bits += shift; @@ -11513,9 +11919,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) significant_bits - NV_MANT_DIG; if (tail <= 0) tail += shift; - assert(tail >= 0); + assert(tail >= 0); hexfp_uquad <<= tail; - assert((shift - tail) >= 0); + assert((shift - tail) >= 0); hexfp_uquad |= b >> (shift - tail); hexfp_frac_bits += tail; @@ -11622,8 +12028,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } } - if (shift != 3 && !has_digs) { - /* 0x or 0b with no digits, treat it as an error. + if (!just_zero && !has_digs) { + /* 0x, 0o or 0b with no digits, treat it as an error. Originally this backed up the parse before the b or x, but that has the potential for silent changes in behaviour, like for: "0x.3" and "0x+$foo". @@ -11633,34 +12039,36 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) if (*d) ++d; /* so the user sees the bad non-digit */ PL_bufptr = (char *)d; /* so yyerror reports the context */ yyerror(Perl_form(aTHX_ "No digits found for %s literal", - shift == 4 ? "hexadecimal" : "binary")); + bases[shift])); PL_bufptr = oldbp; } - if (overflowed) { - if (n > 4294967295.0) - Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); - sv = newSVnv(n); - } - else { + if (overflowed) { + if (n > 4294967295.0) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Bases[shift], + new_octal ? "0o37777777777" : maxima[shift]); + sv = newSVnv(n); + } + else { #if UVSIZE > 4 - if (u > 0xffffffff) - Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), - "%s number > %s non-portable", - Base, max); + if (u > 0xffffffff) + Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), + "%s number > %s non-portable", + Bases[shift], + new_octal ? "0o37777777777" : maxima[shift]); #endif - sv = newSVuv(u); - } - if (just_zero && (PL_hints & HINT_NEW_INTEGER)) - sv = new_constant(start, s - start, "integer", - sv, NULL, NULL, 0, NULL); - else if (PL_hints & HINT_NEW_BINARY) - sv = new_constant(start, s - start, "binary", + sv = newSVuv(u); + } + if (just_zero && (PL_hints & HINT_NEW_INTEGER)) + sv = new_constant(start, s - start, "integer", + sv, NULL, NULL, 0, NULL); + else if (PL_hints & HINT_NEW_BINARY) + sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0, NULL); - } - break; + } + break; /* handle decimal numbers. @@ -11669,8 +12077,8 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': decimal: - d = PL_tokenbuf; - e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ + d = PL_tokenbuf; + e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */ floatit = FALSE; if (hexfp) { floatit = TRUE; @@ -11681,6 +12089,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) s = start + 2; break; case 3: + if (new_octal) { + *d++ = 'o'; + s = start + 2; + break; + } s = start + 1; break; case 1: @@ -11692,75 +12105,75 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } } - /* read next group of digits and _ and copy into d */ - while (isDIGIT(*s) + /* read next group of digits and _ and copy into d */ + while (isDIGIT(*s) || *s == '_' || UNLIKELY(hexfp && isXDIGIT(*s))) { - /* skip underscores, checking for misplaced ones - if -w is on - */ - if (*s == '_') { - if (lastub && s == lastub + 1) - WARN_ABOUT_UNDERSCORE(); - lastub = s++; - } - else { - /* check for end of fixed-length buffer */ - if (d >= e) - Perl_croak(aTHX_ "%s", number_too_long); - /* if we're ok, copy the character */ - *d++ = *s++; - } - } - - /* final misplaced underbar check */ - if (lastub && s == lastub + 1) - WARN_ABOUT_UNDERSCORE(); - - /* read a decimal portion if there is one. avoid - 3..5 being interpreted as the number 3. followed - by .5 - */ - if (*s == '.' && s[1] != '.') { - floatit = TRUE; - *d++ = *s++; - - if (*s == '_') { - WARN_ABOUT_UNDERSCORE(); - lastub = s; - } - - /* copy, ignoring underbars, until we run out of digits. - */ - for (; isDIGIT(*s) + /* skip underscores, checking for misplaced ones + if -w is on + */ + if (*s == '_') { + if (lastub && s == lastub + 1) + WARN_ABOUT_UNDERSCORE(); + lastub = s++; + } + else { + /* check for end of fixed-length buffer */ + if (d >= e) + Perl_croak(aTHX_ "%s", number_too_long); + /* if we're ok, copy the character */ + *d++ = *s++; + } + } + + /* final misplaced underbar check */ + if (lastub && s == lastub + 1) + WARN_ABOUT_UNDERSCORE(); + + /* read a decimal portion if there is one. avoid + 3..5 being interpreted as the number 3. followed + by .5 + */ + if (*s == '.' && s[1] != '.') { + floatit = TRUE; + *d++ = *s++; + + if (*s == '_') { + WARN_ABOUT_UNDERSCORE(); + lastub = s; + } + + /* copy, ignoring underbars, until we run out of digits. + */ + for (; isDIGIT(*s) || *s == '_' || UNLIKELY(hexfp && isXDIGIT(*s)); s++) { - /* fixed length buffer check */ - if (d >= e) - Perl_croak(aTHX_ "%s", number_too_long); - if (*s == '_') { - if (lastub && s == lastub + 1) - WARN_ABOUT_UNDERSCORE(); - lastub = s; - } - else - *d++ = *s; - } - /* fractional part ending in underbar? */ - if (s[-1] == '_') - WARN_ABOUT_UNDERSCORE(); - if (*s == '.' && isDIGIT(s[1])) { - /* oops, it's really a v-string, but without the "v" */ - s = start; - goto vstring; - } - } - - /* read exponent part, if present */ - if ((isALPHA_FOLD_EQ(*s, 'e') + /* fixed length buffer check */ + if (d >= e) + Perl_croak(aTHX_ "%s", number_too_long); + if (*s == '_') { + if (lastub && s == lastub + 1) + WARN_ABOUT_UNDERSCORE(); + lastub = s; + } + else + *d++ = *s; + } + /* fractional part ending in underbar? */ + if (s[-1] == '_') + WARN_ABOUT_UNDERSCORE(); + if (*s == '.' && isDIGIT(s[1])) { + /* oops, it's really a v-string, but without the "v" */ + s = start; + goto vstring; + } + } + + /* read exponent part, if present */ + if ((isALPHA_FOLD_EQ(*s, 'e') || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) && memCHRs("+-0123456789_", s[1])) { @@ -11771,47 +12184,47 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* regardless of whether user said 3E5 or 3e5, use lower 'e', ditto for p (hexfloats) */ if ((isALPHA_FOLD_EQ(*s, 'e'))) { - /* At least some Mach atof()s don't grok 'E' */ + /* At least some Mach atof()s don't grok 'E' */ *d++ = 'e'; } else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) { *d++ = 'p'; } - s++; + s++; - /* stray preinitial _ */ - if (*s == '_') { - WARN_ABOUT_UNDERSCORE(); - lastub = s++; - } + /* stray preinitial _ */ + if (*s == '_') { + WARN_ABOUT_UNDERSCORE(); + lastub = s++; + } - /* allow positive or negative exponent */ - if (*s == '+' || *s == '-') - *d++ = *s++; + /* allow positive or negative exponent */ + if (*s == '+' || *s == '-') + *d++ = *s++; - /* stray initial _ */ - if (*s == '_') { - WARN_ABOUT_UNDERSCORE(); - lastub = s++; - } + /* stray initial _ */ + if (*s == '_') { + WARN_ABOUT_UNDERSCORE(); + lastub = s++; + } - /* read digits of exponent */ - while (isDIGIT(*s) || *s == '_') { - if (isDIGIT(*s)) { + /* read digits of exponent */ + while (isDIGIT(*s) || *s == '_') { + if (isDIGIT(*s)) { ++exp_digits; - if (d >= e) - Perl_croak(aTHX_ "%s", number_too_long); - *d++ = *s++; - } - else { - if (((lastub && s == lastub + 1) + if (d >= e) + Perl_croak(aTHX_ "%s", number_too_long); + *d++ = *s++; + } + else { + if (((lastub && s == lastub + 1) || (!isDIGIT(s[1]) && s[1] != '_'))) - WARN_ABOUT_UNDERSCORE(); - lastub = s++; - } - } + WARN_ABOUT_UNDERSCORE(); + lastub = s++; + } + } if (!exp_digits) { /* no exponent digits, the [eEpP] could be for something else, @@ -11826,34 +12239,34 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) else { floatit = TRUE; } - } + } - /* + /* We try to do an integer conversion first if no characters indicating "float" have been found. - */ + */ - if (!floatit) { - UV uv; - const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); + if (!floatit) { + UV uv; + const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv); if (flags == IS_NUMBER_IN_UV) { if (uv <= IV_MAX) - sv = newSViv(uv); /* Prefer IVs over UVs. */ + sv = newSViv(uv); /* Prefer IVs over UVs. */ else - sv = newSVuv(uv); + sv = newSVuv(uv); } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) { if (uv <= (UV) IV_MIN) sv = newSViv(-(IV)uv); else - floatit = TRUE; + floatit = TRUE; } else floatit = TRUE; } - if (floatit) { - /* terminate the string */ - *d = '\0'; + if (floatit) { + /* terminate the string */ + *d = '\0'; if (UNLIKELY(hexfp)) { # ifdef NV_MANT_DIG if (significant_bits > NV_MANT_DIG) @@ -11869,35 +12282,35 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) nv = Atof(PL_tokenbuf); } sv = newSVnv(nv); - } + } - if ( floatit - ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { - const char *const key = floatit ? "float" : "integer"; - const STRLEN keylen = floatit ? 5 : 7; - sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, - key, keylen, sv, NULL, NULL, 0, NULL); - } - break; + if ( floatit + ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) { + const char *const key = floatit ? "float" : "integer"; + const STRLEN keylen = floatit ? 5 : 7; + sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf, + key, keylen, sv, NULL, NULL, 0, NULL); + } + break; /* if it starts with a v, it could be a v-string */ case 'v': vstring: - sv = newSV(5); /* preallocate storage space */ - ENTER_with_name("scan_vstring"); - SAVEFREESV(sv); - s = scan_vstring(s, PL_bufend, sv); - SvREFCNT_inc_simple_void_NN(sv); - LEAVE_with_name("scan_vstring"); - break; + sv = newSV(5); /* preallocate storage space */ + ENTER_with_name("scan_vstring"); + SAVEFREESV(sv); + s = scan_vstring(s, PL_bufend, sv); + SvREFCNT_inc_simple_void_NN(sv); + LEAVE_with_name("scan_vstring"); + break; } /* make the op for the constant and return */ if (sv) - lvalp->opval = newSVOP(OP_CONST, 0, sv); + lvalp->opval = newSVOP(OP_CONST, 0, sv); else - lvalp->opval = NULL; + lvalp->opval = NULL; return (char *)s; } @@ -11913,89 +12326,89 @@ S_scan_formline(pTHX_ char *s) while (!needargs) { char *eol; - if (*s == '.') { + if (*s == '.') { char *t = s+1; #ifdef PERL_STRICT_CR - while (SPACE_OR_TAB(*t)) - t++; + while (SPACE_OR_TAB(*t)) + t++; #else - while (SPACE_OR_TAB(*t) || *t == '\r') - t++; + while (SPACE_OR_TAB(*t) || *t == '\r') + t++; #endif - if (*t == '\n' || t == PL_bufend) { - eofmt = TRUE; - break; - } - } - eol = (char *) memchr(s,'\n',PL_bufend-s); - if (!eol++) - eol = PL_bufend; - if (*s != '#') { + if (*t == '\n' || t == PL_bufend) { + eofmt = TRUE; + break; + } + } + eol = (char *) memchr(s,'\n',PL_bufend-s); + if (!eol++) + eol = PL_bufend; + if (*s != '#') { char *t; - for (t = s; t < eol; t++) { - if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { - needargs = FALSE; - goto enough; /* ~~ must be first line in formline */ - } - if (*t == '@' || *t == '^') - needargs = TRUE; - } - if (eol > s) { - sv_catpvn(stuff, s, eol-s); + for (t = s; t < eol; t++) { + if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { + needargs = FALSE; + goto enough; /* ~~ must be first line in formline */ + } + if (*t == '@' || *t == '^') + needargs = TRUE; + } + if (eol > s) { + sv_catpvn(stuff, s, eol-s); #ifndef PERL_STRICT_CR - if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { - char *end = SvPVX(stuff) + SvCUR(stuff); - end[-2] = '\n'; - end[-1] = '\0'; - SvCUR_set(stuff, SvCUR(stuff) - 1); - } + if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') { + char *end = SvPVX(stuff) + SvCUR(stuff); + end[-2] = '\n'; + end[-1] = '\0'; + SvCUR_set(stuff, SvCUR(stuff) - 1); + } #endif - } - else - break; - } - s = (char*)eol; - if ((PL_rsfp || PL_parser->filtered) - && PL_parser->form_lex_state == LEX_NORMAL) { - bool got_some; - PL_bufptr = PL_bufend; - COPLINE_INC_WITH_HERELINES; - got_some = lex_next_chunk(0); - CopLINE_dec(PL_curcop); - s = PL_bufptr; - if (!got_some) - break; - } - incline(s, PL_bufend); + } + else + break; + } + s = (char*)eol; + if ((PL_rsfp || PL_parser->filtered) + && PL_parser->form_lex_state == LEX_NORMAL) { + bool got_some; + PL_bufptr = PL_bufend; + COPLINE_INC_WITH_HERELINES; + got_some = lex_next_chunk(0); + CopLINE_dec(PL_curcop); + s = PL_bufptr; + if (!got_some) + break; + } + incline(s, PL_bufend); } enough: if (!SvCUR(stuff) || needargs) - PL_lex_state = PL_parser->form_lex_state; + PL_lex_state = PL_parser->form_lex_state; if (SvCUR(stuff)) { - PL_expect = XSTATE; - if (needargs) { - const char *s2 = s; - while (isSPACE(*s2) && *s2 != '\n') - s2++; - if (*s2 == '{') { - PL_expect = XTERMBLOCK; - NEXTVAL_NEXTTOKE.ival = 0; - force_next(DO); - } - NEXTVAL_NEXTTOKE.ival = 0; - force_next(FORMLBRACK); - } - if (!IN_BYTES) { - if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) - SvUTF8_on(stuff); - } + PL_expect = XSTATE; + if (needargs) { + const char *s2 = s; + while (isSPACE(*s2) && *s2 != '\n') + s2++; + if (*s2 == '{') { + PL_expect = XTERMBLOCK; + NEXTVAL_NEXTTOKE.ival = 0; + force_next(DO); + } + NEXTVAL_NEXTTOKE.ival = 0; + force_next(FORMLBRACK); + } + if (!IN_BYTES) { + if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff))) + SvUTF8_on(stuff); + } NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff); - force_next(THING); + force_next(THING); } else { - SvREFCNT_dec(stuff); - if (eofmt) - PL_lex_formbrack = 0; + SvREFCNT_dec(stuff); + if (eofmt) + PL_lex_formbrack = 0; } return s; } @@ -12018,7 +12431,7 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv)); CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax; if (outsidecv && CvPADLIST(outsidecv)) - CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; + CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id; return oldsavestack_ix; } @@ -12118,7 +12531,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) * processing unconditionally */ if (s != NULL) { - if (!yychar || (yychar == ';' && !PL_rsfp)) + if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp)) sv_catpvs(where_sv, "at EOF"); else if ( PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr @@ -12126,37 +12539,18 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) && PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) { - /* - Only for NetWare: - The code below is removed for NetWare because it - abends/crashes on NetWare when the script has error such as - not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ -#ifndef NETWARE while (isSPACE(*PL_oldoldbufptr)) PL_oldoldbufptr++; -#endif context = PL_oldoldbufptr; contlen = PL_bufptr - PL_oldoldbufptr; } else if ( PL_oldbufptr && PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 - && PL_oldbufptr != PL_bufptr) { - /* - Only for NetWare: - The code below is removed for NetWare because it - abends/crashes on NetWare when the script has error such as - not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ -#ifndef NETWARE + && PL_oldbufptr != PL_bufptr) + { while (isSPACE(*PL_oldbufptr)) PL_oldbufptr++; -#endif context = PL_oldbufptr; contlen = PL_bufptr - PL_oldbufptr; } @@ -12213,7 +12607,7 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) const char * msg = ""; const char * const name = OutCopFILE(PL_curcop); - if (PL_in_eval) { + if (PL_in_eval) { SV * errsv = ERRSV; if (SvCUR(errsv)) { msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv)); @@ -12241,41 +12635,41 @@ S_swallow_bom(pTHX_ U8 *s) switch (s[0]) { case 0xFF: - if (s[1] == 0xFE) { - /* UTF-16 little-endian? (or UTF-32LE?) */ - if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ - /* diag_listed_as: Unsupported script encoding %s */ - Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); + if (s[1] == 0xFE) { + /* UTF-16 little-endian? (or UTF-32LE?) */ + if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ + /* diag_listed_as: Unsupported script encoding %s */ + Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); #ifndef PERL_NO_UTF16_FILTER #ifdef DEBUGGING - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); #endif - s += 2; - if (PL_bufend > (char*)s) { - s = add_utf16_textfilter(s, TRUE); - } + s += 2; + if (PL_bufend > (char*)s) { + s = add_utf16_textfilter(s, TRUE); + } #else - /* diag_listed_as: Unsupported script encoding %s */ - Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); + /* diag_listed_as: Unsupported script encoding %s */ + Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); #endif - } - break; + } + break; case 0xFE: - if (s[1] == 0xFF) { /* UTF-16 big-endian? */ + if (s[1] == 0xFF) { /* UTF-16 big-endian? */ #ifndef PERL_NO_UTF16_FILTER #ifdef DEBUGGING - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); #endif - s += 2; - if (PL_bufend > (char *)s) { - s = add_utf16_textfilter(s, FALSE); - } + s += 2; + if (PL_bufend > (char *)s) { + s = add_utf16_textfilter(s, FALSE); + } #else - /* diag_listed_as: Unsupported script encoding %s */ - Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); + /* diag_listed_as: Unsupported script encoding %s */ + Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); #endif - } - break; + } + break; case BOM_UTF8_FIRST_BYTE: { if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) { #ifdef DEBUGGING @@ -12286,46 +12680,46 @@ S_swallow_bom(pTHX_ U8 *s) break; } case 0: - if (slen > 3) { - if (s[1] == 0) { - if (s[2] == 0xFE && s[3] == 0xFF) { - /* UTF-32 big-endian */ - /* diag_listed_as: Unsupported script encoding %s */ - Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); - } - } - else if (s[2] == 0 && s[3] != 0) { - /* Leading bytes - * 00 xx 00 xx - * are a good indicator of UTF-16BE. */ + if (slen > 3) { + if (s[1] == 0) { + if (s[2] == 0xFE && s[3] == 0xFF) { + /* UTF-32 big-endian */ + /* diag_listed_as: Unsupported script encoding %s */ + Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); + } + } + else if (s[2] == 0 && s[3] != 0) { + /* Leading bytes + * 00 xx 00 xx + * are a good indicator of UTF-16BE. */ #ifndef PERL_NO_UTF16_FILTER #ifdef DEBUGGING - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); #endif - s = add_utf16_textfilter(s, FALSE); + s = add_utf16_textfilter(s, FALSE); #else - /* diag_listed_as: Unsupported script encoding %s */ - Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); + /* diag_listed_as: Unsupported script encoding %s */ + Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); #endif - } - } + } + } break; default: - if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { - /* Leading bytes - * xx 00 xx 00 - * are a good indicator of UTF-16LE. */ + if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) { + /* Leading bytes + * xx 00 xx 00 + * are a good indicator of UTF-16LE. */ #ifndef PERL_NO_UTF16_FILTER #ifdef DEBUGGING - if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); + if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); #endif - s = add_utf16_textfilter(s, TRUE); + s = add_utf16_textfilter(s, TRUE); #else - /* diag_listed_as: Unsupported script encoding %s */ - Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); + /* diag_listed_as: Unsupported script encoding %s */ + Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); #endif - } + } } return (char*)s; } @@ -12350,111 +12744,111 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) from this file, we can be sure that we're not called in block mode. Hence don't bother writing code to deal with block mode. */ if (maxlen) { - Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); + Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen); } if (status < 0) { - Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status); + Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status); } DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", - FPTR2DPTR(void *, S_utf16_textfilter), - reverse ? 'l' : 'b', idx, maxlen, status, - (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", + FPTR2DPTR(void *, S_utf16_textfilter), + reverse ? 'l' : 'b', idx, maxlen, status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); while (1) { - STRLEN chars; - STRLEN have; - Size_t newlen; - U8 *end; - /* First, look in our buffer of existing UTF-8 data: */ - char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); - - if (nl) { - ++nl; - } else if (status == 0) { - /* EOF */ - IoPAGE(filter) = 0; - nl = SvEND(utf8_buffer); - } - if (nl) { - STRLEN got = nl - SvPVX(utf8_buffer); - /* Did we have anything to append? */ - retval = got != 0; - sv_catpvn(sv, SvPVX(utf8_buffer), got); - /* Everything else in this code works just fine if SVp_POK isn't - set. This, however, needs it, and we need it to work, else - we loop infinitely because the buffer is never consumed. */ - sv_chop(utf8_buffer, nl); - break; - } - - /* OK, not a complete line there, so need to read some more UTF-16. - Read an extra octect if the buffer currently has an odd number. */ - while (1) { - if (status <= 0) - break; - if (SvCUR(utf16_buffer) >= 2) { - /* Location of the high octet of the last complete code point. - Gosh, UTF-16 is a pain. All the benefits of variable length, - *coupled* with all the benefits of partial reads and - endianness. */ - const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) - + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); - - if (*last_hi < 0xd8 || *last_hi > 0xdb) { - break; - } - - /* We have the first half of a surrogate. Read more. */ - DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); - } - - status = FILTER_READ(idx + 1, utf16_buffer, - 160 + (SvCUR(utf16_buffer) & 1)); - DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer))); - DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); - if (status < 0) { - /* Error */ - IoPAGE(filter) = status; - return status; - } - } + STRLEN chars; + STRLEN have; + Size_t newlen; + U8 *end; + /* First, look in our buffer of existing UTF-8 data: */ + char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer)); + + if (nl) { + ++nl; + } else if (status == 0) { + /* EOF */ + IoPAGE(filter) = 0; + nl = SvEND(utf8_buffer); + } + if (nl) { + STRLEN got = nl - SvPVX(utf8_buffer); + /* Did we have anything to append? */ + retval = got != 0; + sv_catpvn(sv, SvPVX(utf8_buffer), got); + /* Everything else in this code works just fine if SVp_POK isn't + set. This, however, needs it, and we need it to work, else + we loop infinitely because the buffer is never consumed. */ + sv_chop(utf8_buffer, nl); + break; + } + + /* OK, not a complete line there, so need to read some more UTF-16. + Read an extra octect if the buffer currently has an odd number. */ + while (1) { + if (status <= 0) + break; + if (SvCUR(utf16_buffer) >= 2) { + /* Location of the high octet of the last complete code point. + Gosh, UTF-16 is a pain. All the benefits of variable length, + *coupled* with all the benefits of partial reads and + endianness. */ + const U8 *const last_hi = (U8*)SvPVX(utf16_buffer) + + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2)); + + if (*last_hi < 0xd8 || *last_hi > 0xdb) { + break; + } + + /* We have the first half of a surrogate. Read more. */ + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi)); + } + + status = FILTER_READ(idx + 1, utf16_buffer, + 160 + (SvCUR(utf16_buffer) & 1)); + DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer))); + DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);}); + if (status < 0) { + /* Error */ + IoPAGE(filter) = status; + return status; + } + } /* 'chars' isn't quite the right name, as code points above 0xFFFF * require 4 bytes per char */ - chars = SvCUR(utf16_buffer) >> 1; - have = SvCUR(utf8_buffer); + chars = SvCUR(utf16_buffer) >> 1; + have = SvCUR(utf8_buffer); /* Assume the worst case size as noted by the functions: twice the * number of input bytes */ - SvGROW(utf8_buffer, have + chars * 4 + 1); - - if (reverse) { - end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), - (U8*)SvPVX_const(utf8_buffer) + have, - chars * 2, &newlen); - } else { - end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), - (U8*)SvPVX_const(utf8_buffer) + have, - chars * 2, &newlen); - } - SvCUR_set(utf8_buffer, have + newlen); - *end = '\0'; - - /* No need to keep this SV "well-formed" with a '\0' after the end, as - it's private to us, and utf16_to_utf8{,reversed} take a - (pointer,length) pair, rather than a NUL-terminated string. */ - if(SvCUR(utf16_buffer) & 1) { - *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; - SvCUR_set(utf16_buffer, 1); - } else { - SvCUR_set(utf16_buffer, 0); - } + SvGROW(utf8_buffer, have + chars * 4 + 1); + + if (reverse) { + end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } else { + end = utf16_to_utf8((U8*)SvPVX(utf16_buffer), + (U8*)SvPVX_const(utf8_buffer) + have, + chars * 2, &newlen); + } + SvCUR_set(utf8_buffer, have + newlen); + *end = '\0'; + + /* No need to keep this SV "well-formed" with a '\0' after the end, as + it's private to us, and utf16_to_utf8{,reversed} take a + (pointer,length) pair, rather than a NUL-terminated string. */ + if(SvCUR(utf16_buffer) & 1) { + *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1]; + SvCUR_set(utf16_buffer, 1); + } else { + SvCUR_set(utf16_buffer, 0); + } } DEBUG_P(PerlIO_printf(Perl_debug_log, - "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", - status, - (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); + "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n", + status, + (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer))); DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);}); return retval; } @@ -12475,9 +12869,9 @@ S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) ignore any error return from this. */ SvCUR_set(PL_linestr, 0); if (FILTER_READ(0, PL_linestr, 0)) { - SvUTF8_on(PL_linestr); + SvUTF8_on(PL_linestr); } else { - SvUTF8_on(PL_linestr); + SvUTF8_on(PL_linestr); } PL_bufend = SvEND(PL_linestr); return (U8*)SvPVX(PL_linestr); @@ -12485,13 +12879,15 @@ S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed) #endif /* +=for apidoc scan_vstring + Returns a pointer to the next character after the parsed vstring, as well as updating the passed in sv. Function must be called like - sv = sv_2mortal(newSV(5)); - s = scan_vstring(s,e,sv); + sv = sv_2mortal(newSV(5)); + s = scan_vstring(s,e,sv); where s and e are the start and end of the string. The sv should already be large enough to store the vstring @@ -12502,6 +12898,7 @@ calling scope, hence the sv_2mortal in the example (to prevent a leak). Make sure to do SvREFCNT_inc afterwards if you use sv_2mortal. +=cut */ char * @@ -12514,69 +12911,69 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) if (*pos == 'v') pos++; /* get past 'v' */ while (pos < e && (isDIGIT(*pos) || *pos == '_')) - pos++; + pos++; if ( *pos != '.') { - /* this may not be a v-string if followed by => */ - const char *next = pos; - while (next < e && isSPACE(*next)) - ++next; - if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { - /* return string not v-string */ - sv_setpvn(sv,(char *)s,pos-s); - return (char *)pos; - } + /* this may not be a v-string if followed by => */ + const char *next = pos; + while (next < e && isSPACE(*next)) + ++next; + if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) { + /* return string not v-string */ + sv_setpvn(sv,(char *)s,pos-s); + return (char *)pos; + } } if (!isALPHA(*pos)) { - U8 tmpbuf[UTF8_MAXBYTES+1]; + U8 tmpbuf[UTF8_MAXBYTES+1]; - if (*s == 'v') - s++; /* get past 'v' */ + if (*s == 'v') + s++; /* get past 'v' */ SvPVCLEAR(sv); - for (;;) { - /* this is atoi() that tolerates underscores */ - U8 *tmpend; - UV rev = 0; - const char *end = pos; - UV mult = 1; - while (--end >= s) { - if (*end != '_') { - const UV orev = rev; - rev += (*end - '0') * mult; - mult *= 10; - if (orev > rev) - /* diag_listed_as: Integer overflow in %s number */ - Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), - "Integer overflow in decimal number"); - } - } - - /* Append native character for the rev point */ - tmpend = uvchr_to_utf8(tmpbuf, rev); - sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); - if (!UVCHR_IS_INVARIANT(rev)) - SvUTF8_on(sv); - if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) - s = ++pos; - else { - s = pos; - break; - } - while (pos < e && (isDIGIT(*pos) || *pos == '_')) - pos++; - } - SvPOK_on(sv); - sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); - SvRMAGICAL_on(sv); + for (;;) { + /* this is atoi() that tolerates underscores */ + U8 *tmpend; + UV rev = 0; + const char *end = pos; + UV mult = 1; + while (--end >= s) { + if (*end != '_') { + const UV orev = rev; + rev += (*end - '0') * mult; + mult *= 10; + if (orev > rev) + /* diag_listed_as: Integer overflow in %s number */ + Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), + "Integer overflow in decimal number"); + } + } + + /* Append native character for the rev point */ + tmpend = uvchr_to_utf8(tmpbuf, rev); + sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf); + if (!UVCHR_IS_INVARIANT(rev)) + SvUTF8_on(sv); + if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1])) + s = ++pos; + else { + s = pos; + break; + } + while (pos < e && (isDIGIT(*pos) || *pos == '_')) + pos++; + } + SvPOK_on(sv); + sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start); + SvRMAGICAL_on(sv); } return (char *)s; } int Perl_keyword_plugin_standard(pTHX_ - char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) + char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) { PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD; PERL_UNUSED_CONTEXT; @@ -12587,6 +12984,7 @@ Perl_keyword_plugin_standard(pTHX_ } /* +=for apidoc_section $lexer =for apidoc wrap_keyword_plugin Puts a C function into the chain of keyword plugins. This is the @@ -12623,7 +13021,7 @@ look something like this: static Perl_keyword_plugin_t next_keyword_plugin; static OP *my_keyword_plugin(pTHX_ - char *keyword_plugin, STRLEN keyword_len, OP **op_ptr) + char *keyword_ptr, STRLEN keyword_len, OP **op_ptr) { if (memEQs(keyword_ptr, keyword_len, "my_new_keyword")) { @@ -12646,7 +13044,6 @@ void Perl_wrap_keyword_plugin(pTHX_ Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) { - dVAR; PERL_UNUSED_CONTEXT; PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; @@ -12665,14 +13062,14 @@ S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof) { SAVEI32(PL_lex_brackets); if (PL_lex_brackets > 100) - Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); + Renew(PL_lex_brackstack, PL_lex_brackets + 10, char); PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF; SAVEI32(PL_lex_allbrackets); PL_lex_allbrackets = 0; SAVEI8(PL_lex_fakeeof); PL_lex_fakeeof = (U8)fakeeof; if(yyparse(gramtype) && !PL_parser->error_count) - qerror(Perl_mess(aTHX_ "Parse error")); + qerror(Perl_mess(aTHX_ "Parse error")); } #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p) @@ -12695,12 +13092,12 @@ S_parse_expr(pTHX_ I32 fakeeof, U32 flags) { OP *exprop; if (flags & ~PARSE_OPTIONAL) - Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr"); exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof); if (!exprop && !(flags & PARSE_OPTIONAL)) { - if (!PL_parser->error_count) - qerror(Perl_mess(aTHX_ "Parse error")); - exprop = newOP(OP_NULL, 0); + if (!PL_parser->error_count) + qerror(Perl_mess(aTHX_ "Parse error")); + exprop = newOP(OP_NULL, 0); } return exprop; } @@ -12869,7 +13266,7 @@ OP * Perl_parse_block(pTHX_ U32 flags) { if (flags) - Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block"); return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER); } @@ -12907,7 +13304,7 @@ OP * Perl_parse_barestmt(pTHX_ U32 flags) { if (flags) - Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt"); return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER); } @@ -12935,49 +13332,49 @@ SV * Perl_parse_label(pTHX_ U32 flags) { if (flags & ~PARSE_OPTIONAL) - Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label"); if (PL_nexttoke) { - PL_parser->yychar = yylex(); - if (PL_parser->yychar == LABEL) { - SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv; - PL_parser->yychar = YYEMPTY; - cSVOPx(pl_yylval.opval)->op_sv = NULL; - op_free(pl_yylval.opval); - return labelsv; - } else { - yyunlex(); - goto no_label; - } + PL_parser->yychar = yylex(); + if (PL_parser->yychar == LABEL) { + SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv; + PL_parser->yychar = YYEMPTY; + cSVOPx(pl_yylval.opval)->op_sv = NULL; + op_free(pl_yylval.opval); + return labelsv; + } else { + yyunlex(); + goto no_label; + } } else { - char *s, *t; - STRLEN wlen, bufptr_pos; - lex_read_space(0); - t = s = PL_bufptr; + char *s, *t; + STRLEN wlen, bufptr_pos; + lex_read_space(0); + t = s = PL_bufptr; if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) - goto no_label; - t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); - if (word_takes_any_delimiter(s, wlen)) - goto no_label; - bufptr_pos = s - SvPVX(PL_linestr); - PL_bufptr = t; - lex_read_space(LEX_KEEP_PREVIOUS); - t = PL_bufptr; - s = SvPVX(PL_linestr) + bufptr_pos; - if (t[0] == ':' && t[1] != ':') { - PL_oldoldbufptr = PL_oldbufptr; - PL_oldbufptr = s; - PL_bufptr = t+1; - return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); - } else { - PL_bufptr = s; - no_label: - if (flags & PARSE_OPTIONAL) { - return NULL; - } else { - qerror(Perl_mess(aTHX_ "Parse error")); - return newSVpvs("x"); - } - } + goto no_label; + t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); + if (word_takes_any_delimiter(s, wlen)) + goto no_label; + bufptr_pos = s - SvPVX(PL_linestr); + PL_bufptr = t; + lex_read_space(LEX_KEEP_PREVIOUS); + t = PL_bufptr; + s = SvPVX(PL_linestr) + bufptr_pos; + if (t[0] == ':' && t[1] != ':') { + PL_oldoldbufptr = PL_oldbufptr; + PL_oldbufptr = s; + PL_bufptr = t+1; + return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); + } else { + PL_bufptr = s; + no_label: + if (flags & PARSE_OPTIONAL) { + return NULL; + } else { + qerror(Perl_mess(aTHX_ "Parse error")); + return newSVpvs("x"); + } + } } } @@ -13012,7 +13409,7 @@ OP * Perl_parse_fullstmt(pTHX_ U32 flags) { if (flags) - Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt"); return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER); } @@ -13052,11 +13449,11 @@ Perl_parse_stmtseq(pTHX_ U32 flags) OP *stmtseqop; I32 c; if (flags) - Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); + Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq"); stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING); c = lex_peek_unichar(0); if (c != -1 && c != /*{*/'}') - qerror(Perl_mess(aTHX_ "Parse error")); + qerror(Perl_mess(aTHX_ "Parse error")); return stmtseqop; }