X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/9c3c07f8ec0a449a2ff42754f2952773c0863fed..b54d603d2b0409d931d988215873268c9de799d1:/toke.c diff --git a/toke.c b/toke.c index f1d09ef..11b235f 100644 --- a/toke.c +++ b/toke.c @@ -137,7 +137,7 @@ static const char* const ident_too_long = "Identifier too long"; * 1999-02-27 mjd-perl-patch@plover.com */ #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x))) -#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t') +#define SPACE_OR_TAB(c) isBLANK_A(c) /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement @@ -427,7 +427,11 @@ S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp) if (name) Perl_sv_catpv(aTHX_ report, name); else if ((char)rv > ' ' && (char)rv <= '~') + { Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv); + if ((char)rv == 'p') + sv_catpvs(report, " (pending identifier)"); + } else if (!rv) sv_catpvs(report, "EOF"); else @@ -549,16 +553,14 @@ S_no_op(pTHX_ const char *const what, char *s) NOOP; if (t < PL_bufptr && isSPACE(*t)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Do you need to predeclare %"SVf"?)\n", - SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "\t(Do you need to predeclare %"UTF8f"?)\n", + UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr)); } else { assert(s >= oldbp); Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Missing operator before %"SVf"?)\n", - SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "\t(Missing operator before %"UTF8f"?)\n", + UTF8fARG(UTF, s - oldbp, oldbp)); } } PL_bufptr = oldbp; @@ -1576,6 +1578,100 @@ Perl_lex_read_space(pTHX_ U32 flags) } /* + +=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn + +This function performs syntax checking on a prototype, C. +If C is true, any illegal characters or mismatched brackets +will trigger illegalproto warnings, declaring that they were +detected in the prototype for C. + +The return value is C if this is a valid prototype, and +C if it is not, regardless of whether C was C or +C. + +Note that C is a valid C and will always return C. + +=cut + + */ + +bool +Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) +{ + STRLEN len, origlen; + char *p = proto ? SvPV(proto, len) : NULL; + bool bad_proto = FALSE; + bool in_brackets = FALSE; + bool after_slash = FALSE; + char greedy_proto = ' '; + bool proto_after_greedy_proto = FALSE; + bool must_be_last = FALSE; + bool underscore = FALSE; + bool seen_underscore = FALSE; + + PERL_ARGS_ASSERT_VALIDATE_PROTO; + + if (!proto) + return TRUE; + + origlen = len; + for (; len--; p++) { + if (!isSPACE(*p)) { + if (must_be_last) + proto_after_greedy_proto = TRUE; + if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { + bad_proto = TRUE; + } + else { + if (underscore) { + if(!strchr(";@%", *p)) + bad_proto = TRUE; + underscore = FALSE; + } + + if (*p == '[') + in_brackets = TRUE; + else if (*p == ']') + in_brackets = FALSE; + else if ((*p == '@' || *p == '%') && + !after_slash && + !in_brackets ) { + must_be_last = TRUE; + greedy_proto = *p; + } + else if (*p == '_') + underscore = seen_underscore = TRUE; + } + if (*p == '\\') + after_slash = TRUE; + else + after_slash = FALSE; + } + } + + if (warn) { + SV *tmpsv = newSVpvs_flags("", SVs_TEMP); + p -= origlen; + p = SvUTF8(proto) + ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8), + origlen, UNI_DISPLAY_ISPRINT) + : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); + + if (proto_after_greedy_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Prototype after '%c' for %"SVf" : %s", + greedy_proto, SVfARG(name), p); + if (bad_proto) + Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), + "Illegal character %sin prototype for %"SVf" : %s", + seen_underscore ? "after '_' " : "", SVfARG(name), p); + } + + return (! (proto_after_greedy_proto || bad_proto) ); +} + +/* * S_incline * This subroutine has nothing to do with tilting, whether at windmills * or pinball tables. Its name is short for "increment line". It @@ -2110,7 +2206,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) */ STATIC char * -S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick) +S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) { dVAR; char *s; @@ -2121,12 +2217,16 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, in start = SKIPSPACE1(start); s = start; if (isIDFIRST_lazy_if(s,UTF) || - (allow_pack && *s == ':') || - (allow_initial_tick && *s == '\'') ) + (allow_pack && *s == ':') ) { s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len); - if (check_keyword && keyword(PL_tokenbuf, len, 0)) + if (check_keyword) { + char *s2 = PL_tokenbuf; + if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6)) + s2 += 6, len -= 6; + if (keyword(s2, len, 0)) return start; + } start_force(PL_curforce); if (PL_madskills) curmad('X', newSVpvn(start,s-start)); @@ -2728,13 +2828,17 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } - if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "A sequence of multiple spaces in a charnames " + "alias definition is deprecated"); } s++; } - if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Trailing white-space in a charnames alias " + "definition is deprecated"); } } else { @@ -2771,8 +2875,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) if (! isCHARNAME_CONT(*s)) { goto bad_charname; } - if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated"); + if (*s == ' ' && *(s-1) == ' ' + && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "A sequence of multiple spaces in a charnam" + "es alias definition is deprecated"); } s++; } @@ -2798,8 +2905,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += UTF8SKIP(s); } } - if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) { - Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated"); + if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) { + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), + "Trailing white-space in a charnames alias " + "definition is deprecated"); } } @@ -3978,19 +4087,14 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv)) return 0; if (cv && SvPOK(cv)) { - const char *proto = CvPROTO(cv); - if (proto) { - if (*proto == ';') - proto++; - if (*proto == '*') - return 0; - } + const char *proto = CvPROTO(cv); + if (proto) { + while (*proto && (isSPACE(*proto) || *proto == ';')) + proto++; + if (*proto == '*') + return 0; + } } - s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); - /* start is the beginning of the possible filehandle/object, - * and s is the end of it - * tmpbuf is a copy of it - */ if (*start == '$') { if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || @@ -4007,6 +4111,13 @@ S_intuit_method(pTHX_ char *start, GV *gv, CV *cv) PL_expect = XREF; return *s == '(' ? FUNCMETH : METHOD; } + + s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len); + /* start is the beginning of the possible filehandle/object, + * and s is the end of it + * tmpbuf is a copy of it (but with single quotes as double colons) + */ + if (!keyword(tmpbuf, len, 0)) { if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') { len -= 2; @@ -4537,12 +4648,12 @@ S_tokenize_use(pTHX_ int is_use, char *s) { force_next(WORD); } else if (*s == 'v') { - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = force_version(s, FALSE); } } else { - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = force_version(s, FALSE); } pl_yylval.ival = is_use; @@ -4621,6 +4732,7 @@ Perl_yylex(pTHX) char *d; STRLEN len; bool bof = FALSE; + const bool saw_infix_sigil = PL_parser->saw_infix_sigil; U8 formbrack = 0; U32 fake_eof = 0; @@ -5023,6 +5135,7 @@ Perl_yylex(pTHX) s = PL_bufptr; PL_oldoldbufptr = PL_oldbufptr; PL_oldbufptr = s; + PL_parser->saw_infix_sigil = 0; retry: #ifdef PERL_MAD @@ -5534,7 +5647,7 @@ Perl_yylex(pTHX) s++; if (strnEQ(s,"=>",2)) { - s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE); + s = force_word(PL_bufptr,WORD,FALSE,FALSE); DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); OPERATOR('-'); /* unary minus */ } @@ -5606,7 +5719,7 @@ Perl_yylex(pTHX) s++; s = SKIPSPACE1(s); if (isIDFIRST_lazy_if(s,UTF)) { - s = force_word(s,METHOD,FALSE,TRUE,FALSE); + s = force_word(s,METHOD,FALSE,TRUE); TOKEN(ARROW); } else if (*s == '$') @@ -5678,6 +5791,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; Mop(OP_MULTIPLY); case '%': @@ -5686,6 +5800,7 @@ Perl_yylex(pTHX) PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) TOKEN(0); ++s; + PL_parser->saw_infix_sigil = 1; Mop(OP_MODULO); } PL_tokenbuf[0] = '%'; @@ -5969,7 +6084,7 @@ Perl_yylex(pTHX) d++; if (*d == '}') { const char minus = (PL_tokenbuf[0] == '-'); - s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); + s = force_word(s + minus, WORD, FALSE, TRUE); if (minus) force_next('-'); } @@ -6179,6 +6294,7 @@ Perl_yylex(pTHX) s--; TOKEN(0); } + PL_parser->saw_infix_sigil = 1; BAop(OP_BIT_AND); } @@ -6482,9 +6598,8 @@ Perl_yylex(pTHX) if (*t == ';' && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "You need to quote \"%"SVf"\"", - SVfARG(newSVpvn_flags(tmpbuf, len, - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "You need to quote \"%"UTF8f"\"", + UTF8fARG(UTF, len, tmpbuf)); } } } @@ -6569,11 +6684,9 @@ Perl_yylex(pTHX) PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Scalar value %"SVf" better written as $%"SVf, - SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr), - SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))), - SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1), - SVs_TEMP | (UTF ? SVf_UTF8 : 0 )))); + "Scalar value %"UTF8f" better written as $%"UTF8f, + UTF8fARG(UTF, t-PL_bufptr, PL_bufptr), + UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1)); } } } @@ -6921,8 +7034,7 @@ Perl_yylex(pTHX) else { rv2cv_op = newOP(OP_PADANY, 0); rv2cv_op->op_targ = off; - rv2cv_op = (OP*)newCVREF(0, rv2cv_op); - cv = (CV *)PAD_SV(off); + cv = find_lexical_cv(off); } lex = TRUE; goto just_a_word; @@ -7017,9 +7129,8 @@ Perl_yylex(pTHX) s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, TRUE, &morelen); if (!morelen) - Perl_croak(aTHX_ "Bad name after %"SVf"%s", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP )), + Perl_croak(aTHX_ "Bad name after %"UTF8f"%s", + UTF8fARG(UTF, len, PL_tokenbuf), *s == '\'' ? "'" : "::"); len += morelen; pkgname = 1; @@ -7046,9 +7157,8 @@ Perl_yylex(pTHX) if (ckWARN(WARN_BAREWORD) && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) Perl_warner(aTHX_ packWARN(WARN_BAREWORD), - "Bareword \"%"SVf"\" refers to nonexistent package", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); + "Bareword \"%"UTF8f"\" refers to nonexistent package", + UTF8fARG(UTF, len, PL_tokenbuf)); len -= 2; PL_tokenbuf[len] = '\0'; gv = NULL; @@ -7164,9 +7274,13 @@ Perl_yylex(pTHX) if (*s == '=' && s[1] == '>' && !pkgname) { op_free(rv2cv_op); CLINE; + /* This is our own scalar, created a few lines above, + so this is safe. */ + SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv); sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf); if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv); + SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv); TERM(WORD); } @@ -7234,10 +7348,11 @@ Perl_yylex(pTHX) if (cv) { if (lastchar == '-' && penultchar != '-') { - const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP ); + const STRLEN l = len ? len : strlen(PL_tokenbuf); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%"SVf" resolved as -&%"SVf"()", - SVfARG(tmpsv), SVfARG(tmpsv)); + "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()", + UTF8fARG(UTF, l, PL_tokenbuf), + UTF8fARG(UTF, l, PL_tokenbuf)); } /* Check for a constant sub */ if ((sv = cv_const_sv(cv))) { @@ -7251,7 +7366,8 @@ Perl_yylex(pTHX) } op_free(pl_yylval.opval); - pl_yylval.opval = rv2cv_op; + pl_yylval.opval = + off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -7265,6 +7381,7 @@ Perl_yylex(pTHX) STRLEN protolen = CvPROTOLEN(cv); const char *proto = CvPROTO(cv); bool optional; + proto = S_strip_spaces(aTHX_ proto, &protolen); if (!protolen) TERM(FUNC0SUB); if ((optional = *proto == ';')) @@ -7347,7 +7464,8 @@ Perl_yylex(pTHX) gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV); op_free(pl_yylval.opval); - pl_yylval.opval = rv2cv_op; + pl_yylval.opval = + off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op; pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN; PL_last_lop = PL_oldbufptr; PL_last_lop_op = OP_ENTERSUB; @@ -7408,12 +7526,13 @@ Perl_yylex(pTHX) op_free(rv2cv_op); safe_bareword: - if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { + if ((lastchar == '*' || lastchar == '%' || lastchar == '&') + && saw_infix_sigil) { Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%"SVf, - lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf, - strlen(PL_tokenbuf), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + "Operator or semicolon missing before %c%"UTF8f, + lastchar, + UTF8fARG(UTF, strlen(PL_tokenbuf), + PL_tokenbuf)); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c resolved as operator %c", lastchar, lastchar); @@ -7573,9 +7692,8 @@ Perl_yylex(pTHX) goto just_a_word; } if (!tmp) - Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword", - SVfARG(newSVpvn_flags(PL_tokenbuf, len, - (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); + Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword", + UTF8fARG(UTF, len, PL_tokenbuf)); if (tmp < 0) tmp = -tmp; else if (tmp == KEY_require || tmp == KEY_do @@ -7728,7 +7846,7 @@ Perl_yylex(pTHX) case KEY_dump: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_DUMP); case KEY_else: @@ -7861,7 +7979,7 @@ Perl_yylex(pTHX) case KEY_goto: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_GOTO); case KEY_gmtime: @@ -7987,7 +8105,7 @@ Perl_yylex(pTHX) case KEY_last: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_LAST); case KEY_lc: @@ -8095,7 +8213,7 @@ Perl_yylex(pTHX) case KEY_next: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_NEXT); case KEY_ne: @@ -8131,11 +8249,9 @@ Perl_yylex(pTHX) && !(t[0] == ':' && t[1] == ':') && !keyword(s, d-s, 0) ) { - SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)); Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), - "Precedence problem: open %"SVf" should be open(%"SVf")", - SVfARG(tmpsv), SVfARG(tmpsv)); + "Precedence problem: open %"UTF8f" should be open(%"UTF8f")", + UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s)); } } LOP(OP_OPEN,XTERM); @@ -8179,7 +8295,7 @@ Perl_yylex(pTHX) LOP(OP_PACK,XTERM); case KEY_package: - s = force_word(s,WORD,FALSE,TRUE,FALSE); + s = force_word(s,WORD,FALSE,TRUE); s = SKIPSPACE1(s); s = force_strict_version(s); PL_lex_expect = XBLOCK; @@ -8282,7 +8398,7 @@ Perl_yylex(pTHX) || (s = force_version(s, TRUE), *s == 'v')) { *PL_tokenbuf = '\0'; - s = force_word(s,WORD,TRUE,TRUE,FALSE); + s = force_word(s,WORD,TRUE,TRUE); if (isIDFIRST_lazy_if(PL_tokenbuf,UTF)) gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD | (UTF ? SVf_UTF8 : 0)); @@ -8307,7 +8423,7 @@ Perl_yylex(pTHX) case KEY_redo: PL_expect = XOPERATOR; - s = force_word(s,WORD,TRUE,FALSE,FALSE); + s = force_word(s,WORD,TRUE,FALSE); LOOPX(OP_REDO); case KEY_rename: @@ -8448,7 +8564,7 @@ Perl_yylex(pTHX) checkcomma(s,PL_tokenbuf,"subroutine name"); s = SKIPSPACE1(s); PL_expect = XTERM; - s = force_word(s,WORD,TRUE,TRUE,FALSE); + s = force_word(s,WORD,TRUE,TRUE); LOP(OP_SORT,XREF); case KEY_split: @@ -8480,10 +8596,12 @@ Perl_yylex(pTHX) really_sub: { char * const tmpbuf = PL_tokenbuf + 1; - SSize_t tboffset = 0; expectation attrful; bool have_name, have_proto; const int key = tmp; +#ifndef PERL_MAD + SV *format_name = NULL; +#endif #ifdef PERL_MAD SV *tmpwhite = 0; @@ -8510,13 +8628,14 @@ Perl_yylex(pTHX) PL_expect = XBLOCK; attrful = XATTRBLOCK; - /* remember buffer pos'n for later force_word */ - tboffset = s - PL_oldbufptr; d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, &len); #ifdef PERL_MAD if (PL_madskills) nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr)); +#else + if (key == KEY_format) + format_name = S_newSV_maybe_utf8(aTHX_ s, d - s); #endif *PL_tokenbuf = '&'; if (memchr(tmpbuf, ':', len) || key != KEY_sub @@ -8563,89 +8682,24 @@ Perl_yylex(pTHX) #ifdef PERL_MAD PL_thistoken = subtoken; s = d; - PERL_UNUSED_VAR(tboffset); #else - if (have_name) - (void) force_word(PL_oldbufptr + tboffset, WORD, - FALSE, TRUE, TRUE); + if (format_name) { + start_force(PL_curforce); + NEXTVAL_NEXTTOKE.opval + = (OP*)newSVOP(OP_CONST,0, format_name); + NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE; + force_next(WORD); + } #endif PREBLOCK(FORMAT); } /* Look for a prototype */ if (*s == '(') { - char *p; - bool bad_proto = FALSE; - bool in_brackets = FALSE; - char greedy_proto = ' '; - bool proto_after_greedy_proto = FALSE; - bool must_be_last = FALSE; - bool underscore = FALSE; - bool seen_underscore = FALSE; - const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); - STRLEN tmplen; - s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - /* strip spaces and check for bad characters */ - d = SvPV(PL_lex_stuff, tmplen); - tmp = 0; - for (p = d; tmplen; tmplen--, ++p) { - if (!isSPACE(*p)) { - d[tmp++] = *p; - - if (warnillegalproto) { - if (must_be_last) - proto_after_greedy_proto = TRUE; - if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') { - bad_proto = TRUE; - } - else { - if ( underscore ) { - if ( !strchr(";@%", *p) ) - bad_proto = TRUE; - underscore = FALSE; - } - if ( *p == '[' ) { - in_brackets = TRUE; - } - else if ( *p == ']' ) { - in_brackets = FALSE; - } - else if ( (*p == '@' || *p == '%') && - ( tmp < 2 || d[tmp-2] != '\\' ) && - !in_brackets ) { - must_be_last = TRUE; - greedy_proto = *p; - } - else if ( *p == '_' ) { - underscore = seen_underscore = TRUE; - } - } - } - } - } - d[tmp] = '\0'; - if (proto_after_greedy_proto) - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Prototype after '%c' for %"SVf" : %s", - greedy_proto, SVfARG(PL_subname), d); - if (bad_proto) { - SV *dsv = newSVpvs_flags("", SVs_TEMP); - Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), - "Illegal character %sin prototype for %"SVf" : %s", - seen_underscore ? "after '_' " : "", - SVfARG(PL_subname), - SvUTF8(PL_lex_stuff) - ? sv_uni_display(dsv, - newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), - tmp, - UNI_DISPLAY_ISPRINT) - : pv_pretty(dsv, d, tmp, 60, NULL, NULL, - PERL_PV_ESCAPE_NONASCII)); - } - SvCUR_set(PL_lex_stuff, tmp); + (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); have_proto = TRUE; #ifdef PERL_MAD @@ -8982,9 +9036,9 @@ S_pending_ident(pTHX) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %"SVf" in string", - SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len, - SVs_TEMP | ( UTF ? SVf_UTF8 : 0 )))); + "Possible unintended interpolation of %"UTF8f + " in string", + UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf)); } } @@ -9298,6 +9352,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck *d = '\0'; d = dest; if (*d) { + /* Either a digit variable, or parse_ident() found an identifier + (anything valid as a bareword), so job done and return. */ if (PL_lex_state != LEX_NORMAL) PL_lex_state = LEX_INTERPENDMAYBE; return s; @@ -9309,8 +9365,12 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck || s[1] == '{' || strnEQ(s+1,"::",2)) ) { + /* Dereferencing a value in a scalar variable. + The alternatives are different syntaxes for a scalar variable. + Using ' as a leading package separator isn't allowed. :: is. */ return s; } + /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */ if (*s == '{') { bracket = s; s++; @@ -9318,12 +9378,12 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck s++; } -#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)*(d)) \ - || isCNTRL_A((U8)*(d)) \ - || isDIGIT_A((U8)*(d)) \ - || (!(u) && !UTF8_IS_INVARIANT((U8)*(d)))) +#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \ + || isCNTRL_A((U8)(d)) \ + || isDIGIT_A((U8)(d)) \ + || (!(u) && !UTF8_IS_INVARIANT((U8)(d)))) if (s < send - && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(s, is_utf8))) + && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8))) { if (is_utf8) { const STRLEN skip = UTF8SKIP(s); @@ -9337,20 +9397,29 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck d[1] = '\0'; } } + /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */ if (*d == '^' && *s && isCONTROLVAR(*s)) { *d = toCTRL(*s); s++; } + /* Warn about ambiguous code after unary operators if {...} notation isn't + used. There's no difference in ambiguity; it's merely a heuristic + about when not to warn. */ else if (ck_uni && !bracket) check_uni(); if (bracket) { + /* If we were processing {...} notation then... */ if (isIDFIRST_lazy_if(d,is_utf8)) { + /* if it starts as a valid identifier, assume that it is one. + (the later check for } being at the expected point will trap + cases where this doesn't pan out.) */ d += is_utf8 ? UTF8SKIP(d) : 1; parse_ident(&s, &d, e, 1, is_utf8); *d = '\0'; while (s < send && SPACE_OR_TAB(*s)) s++; if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { + /* ${foo[0]} and ${foo{bar}} notation. */ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { const char * const brack = (const char *) @@ -9368,7 +9437,7 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck } /* Handle extended ${^Foo} variables * 1999-02-27 mjd-perl-patch@plover.com */ - else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */ + else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ && isWORDCHAR(*s)) { d++; @@ -9383,6 +9452,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck while (s < send && SPACE_OR_TAB(*s)) s++; + /* Expect to find a closing } after consuming any trailing whitespace. + */ if (*s == '}') { s++; if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) { @@ -9405,6 +9476,8 @@ S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck } } else { + /* Didn't find the closing } at the point we expected, so restore + state such that the next thing to process is the opening { and */ s = bracket; /* let the parser handle it */ *dest = '\0'; } @@ -11356,9 +11429,8 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); if (context) - Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n", - SVfARG(newSVpvn_flags(context, contlen, - SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); + Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n", + UTF8fARG(UTF, contlen, context)); else Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {