X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/ef3ff34de0f1e12e7961a83cbf63dcbb91808ba2..e8d55f27af460b2aea0e4f6867acad7ae6e154cc:/toke.c diff --git a/toke.c b/toke.c index 864fda7..68ec96b 100644 --- a/toke.c +++ b/toke.c @@ -464,13 +464,6 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s) #endif -static int -S_deprecate_commaless_var_list(pTHX) { - PL_expect = XTERM; - deprecate_fatal_in("5.28", "Use of comma-less variable list is deprecated"); - return REPORT(','); /* grandfather non-comma-format format */ -} - /* * S_ao * @@ -727,6 +720,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->lex_state = LEX_NORMAL; parser->expect = XSTATE; parser->rsfp = rsfp; + parser->recheck_utf8_validity = FALSE; parser->rsfp_filters = !(flags & LEX_START_SAME_FILTER) || !oparser ? NULL @@ -747,9 +741,10 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) s = SvPV_const(line, len); - if (SvUTF8(line) && ! is_utf8_string_loc((U8 *) s, - SvCUR(line), - &first_bad_char_loc)) + if ( SvUTF8(line) + && UNLIKELY(! is_utf8_string_loc((U8 *) s, + SvCUR(line), + &first_bad_char_loc))) { _force_out_malformed_utf8_message(first_bad_char_loc, (U8 *) s + SvCUR(line), @@ -1055,12 +1050,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { p++; highhalf++; - } else if (! UTF8_IS_INVARIANT(c)) { - _force_out_malformed_utf8_message((U8 *) p, (U8 *) e, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ - } + } else assert(UTF8_IS_INVARIANT(c)); } if (!highhalf) goto plain_copy; @@ -1274,6 +1264,24 @@ Perl_lex_discard_to(pTHX_ char *ptr) PL_parser->last_lop -= discard_len; } +void +Perl_notify_parser_that_changed_to_utf8(pTHX) +{ + /* Called when $^H is changed to indicate that HINT_UTF8 has changed from + * off to on. At compile time, this has the effect of entering a 'use + * utf8' section. This means that any input was not previously checked for + * UTF-8 (because it was off), but now we do need to check it, or our + * assumptions about the input being sane could be wrong, and we could + * segfault. This routine just sets a flag so that the next time we look + * at the input we do the well-formed UTF-8 check. If we aren't in the + * proper phase, there may not be a parser object, but if there is, setting + * the flag is harmless */ + + if (PL_parser) { + PL_parser->recheck_utf8_validity = TRUE; + } +} + /* =for apidoc Amx|bool|lex_next_chunk|U32 flags @@ -1615,7 +1623,7 @@ Note that C is a valid C and will always return C. */ bool -Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) +Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) { STRLEN len, origlen; char *p; @@ -1677,6 +1685,13 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) origlen, UNI_DISPLAY_ISPRINT) : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); + if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { + SV *name2 = sv_2mortal(newSVsv(PL_curstname)); + sv_catpvs(name2, "::"); + sv_catsv(name2, (SV *)name); + name = name2; + } + if (proto_after_greedy_proto) Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), "Prototype after '%c' for %" SVf " : %s", @@ -2575,29 +2590,14 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) SV *cv; SV *rv; HV *stash; - const U8* first_bad_char_loc; const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; if (!SvCUR(res)) { - deprecate_fatal_in("5.28", "Unknown charname '' is deprecated"); - return res; - } - - if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, - e - backslash_ptr, - &first_bad_char_loc)) - { - _force_out_malformed_utf8_message(first_bad_char_loc, - (U8 *) PL_parser->bufend, - 0, - 0 /* 0 means don't die */ ); - yyerror_pv(Perl_form(aTHX_ - "Malformed UTF-8 character immediately after '%.*s'", - (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr), - SVf_UTF8); - return NULL; + /* diag_listed_as: Unknown charname '%s' */ + yyerror("Unknown charname ''"); + return NULL; } res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, @@ -2709,6 +2709,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) } } if (*(s-1) == ' ') { + /* diag_listed_as: charnames alias definitions may not contain + trailing white-space; marked by <-- HERE in %s + */ yyerror_pv( Perl_form(aTHX_ "charnames alias definitions may not contain trailing " @@ -2724,11 +2727,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) const U8* first_bad_char_loc; STRLEN len; const char* const str = SvPV_const(res, len); - if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { + if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len, + &first_bad_char_loc))) + { _force_out_malformed_utf8_message(first_bad_char_loc, (U8 *) PL_parser->bufend, 0, 0 /* 0 means don't die */ ); + /* diag_listed_as: Malformed UTF-8 returned by \N{%s} + immediately after '%s' */ yyerror_pv( Perl_form(aTHX_ "Malformed UTF-8 returned by %.*s immediately after '%.*s'", @@ -2746,6 +2753,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) /* The final %.*s makes sure that should the trailing NUL be missing * that this print won't run off the end of the string */ + /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE + in \N{%s} */ yyerror_pv( Perl_form(aTHX_ "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", @@ -2757,6 +2766,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) } multi_spaces: + /* diag_listed_as: charnames alias definitions may not contain a + sequence of multiple spaces; marked by <-- HERE + in %s */ yyerror_pv( Perl_form(aTHX_ "charnames alias definitions may not contain a sequence of " @@ -2957,9 +2969,9 @@ S_scan_const(pTHX_ char *start) /* Here, we don't think we're in a range. If the new character * is not a hyphen; or if it is a hyphen, but it's too close to - * either edge to indicate a range, then it's a regular - * character. */ - if (*s != '-' || s >= send - 1 || s == start) { + * either edge to indicate a range, or if we haven't output any + * characters yet then it's a regular character. */ + if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) { /* A regular character. Process like any other, but first * clear any flags */ @@ -3274,18 +3286,18 @@ S_scan_const(pTHX_ char *start) #endif /* Always gets run for ASCII, and sometimes for EBCDIC. */ { - SSize_t i; - /* Here, no conversions are necessary, which means that the * first character in the range is already in 'd' and * valid, so we can skip overwriting it */ if (has_utf8) { + SSize_t i; d += UTF8SKIP(d); for (i = range_min + 1; i <= range_max; i++) { append_utf8_from_native_byte((U8) i, (U8 **) &d); } } else { + SSize_t i; d++; assert(range_min + 1 <= range_max); for (i = range_min + 1; i < range_max; i++) { @@ -3615,11 +3627,12 @@ S_scan_const(pTHX_ char *start) * For non-patterns, the named characters are converted to * their string equivalents. In patterns, named characters are * not converted to their ultimate forms for the same reasons - * that other escapes aren't. Instead, they are converted to - * the \N{U+...} form to get the value from the charnames that - * is in effect right now, while preserving the fact that it - * was a named character, so that the regex compiler knows - * this. + * that other escapes aren't (mainly that the ultimate + * character could be considered a meta-symbol by the regex + * compiler). Instead, they are converted to the \N{U+...} + * form to get the value from the charnames that is in effect + * right now, while preserving the fact that it was a named + * character, so that the regex compiler knows this. * * The structure of this section of code (besides checking for * errors and upgrading to utf8) is: @@ -3640,6 +3653,7 @@ S_scan_const(pTHX_ char *start) s++; if (*s != '{') { yyerror("Missing braces on \\N{}"); + *d++ = '\0'; continue; } s++; @@ -3651,7 +3665,7 @@ S_scan_const(pTHX_ char *start) } else { yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); } - continue; + yyquit(); /* Have exhausted the input. */ } /* Here it looks like a named character */ @@ -3670,6 +3684,7 @@ S_scan_const(pTHX_ char *start) "Invalid hexadecimal number in \\N{U+...}" ); s = e + 1; + *d++ = '\0'; continue; } while (++s < e) { @@ -3868,6 +3883,7 @@ S_scan_const(pTHX_ char *start) " in transliteration operator", /* +1 to include the "}" */ (int) (e + 1 - start), start)); + *d++ = '\0'; goto end_backslash_N; } @@ -3933,15 +3949,16 @@ S_scan_const(pTHX_ char *start) case 'c': s++; if (s < send) { - *d++ = grok_bslash_c(*s++, 1); + *d++ = grok_bslash_c(*s, 1); } else { yyerror("Missing control char name in \\c"); + yyquit(); /* Are at end of input, no sense continuing */ } #ifdef EBCDIC non_portable_endpoint++; #endif - continue; + break; /* printf-style backslashes, formfeeds, newlines, etc */ case 'b': @@ -4427,8 +4444,8 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) PL_parser->last_uni = buf + last_uni_pos; if (PL_parser->last_lop) PL_parser->last_lop = buf + last_lop_pos; - SvLEN(linestr) = SvCUR(linestr); - SvCUR(linestr) = s-SvPVX(linestr); + SvLEN_set(linestr, SvCUR(linestr)); + SvCUR_set(linestr, s - SvPVX(linestr)); PL_parser->filtered = 1; break; } @@ -4614,6 +4631,7 @@ S_tokenize_use(pTHX_ int is_use, char *s) { PERL_ARGS_ASSERT_TOKENIZE_USE; if (PL_expect != XSTATE) + /* diag_listed_as: "use" not allowed in expression */ yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", is_use ? "use" : "no")); PL_expect = XTERM; @@ -4759,6 +4777,20 @@ Perl_yylex(pTHX) GV *gv = NULL; GV **gvp = NULL; + if (UNLIKELY(PL_parser->recheck_utf8_validity)) { + const U8* first_bad_char_loc; + if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr, + PL_bufend - PL_bufptr, + &first_bad_char_loc))) + { + _force_out_malformed_utf8_message(first_bad_char_loc, + (U8 *) PL_bufend, + 0, + 1 /* 1 means die */ ); + NOT_REACHED; /* NOTREACHED */ + } + PL_parser->recheck_utf8_validity = FALSE; + } DEBUG_T( { SV* tmp = newSVpvs(""); PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n", @@ -5006,7 +5038,16 @@ Perl_yylex(pTHX) s = PL_bufend; } else { + int save_error_count = PL_error_count; + s = scan_const(PL_bufptr); + + /* Set flag if this was a pattern and there were errors. op.c will + * refuse to compile a pattern with this flag set. Otherwise, we + * could get segfaults, etc. */ + if (PL_lex_inpat && PL_error_count > save_error_count) { + ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; + } if (*s == '\\') PL_lex_state = LEX_INTERPCASEMOD; else @@ -5085,12 +5126,43 @@ Perl_yylex(pTHX) 0, cBOOL(UTF), FALSE); *dest = '\0'; assert(PL_tokenbuf[1]); /* we have a variable name */ + } + else { + *PL_tokenbuf = 0; + PL_in_my = 0; + } + + s = skipspace(s); + /* parse the = for the default ourselves to avoid '+=' etc being accepted here + * as the ASSIGNOP, and exclude other tokens that start with = + */ + if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) { + /* save now to report with the same context as we did when + * all ASSIGNOPS were accepted */ + PL_oldbufptr = s; + + ++s; + NEXTVAL_NEXTTOKE.ival = 0; + force_next(ASSIGNOP); + PL_expect = XTERM; + } + else if (*s == ',' || *s == ')') { + PL_expect = XOPERATOR; + } + else { + /* make sure the context shows the unexpected character and + * hopefully a bit more */ + if (*s) ++s; + while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') + s++; + PL_bufptr = s; /* for error reporting */ + yyerror("Illegal operator following parameter in a subroutine signature"); + PL_in_my = 0; + } + if (*PL_tokenbuf) { NEXTVAL_NEXTTOKE.ival = sigil; force_next('p'); /* force a signature pending identifier */ } - else - PL_in_my = 0; - PL_expect = XOPERATOR; break; case ')': @@ -5115,12 +5187,6 @@ Perl_yylex(pTHX) switch (*s) { default: if (UTF) { - if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) { - _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ - } if (isIDFIRST_utf8_safe(s, PL_bufend)) { goto keylookup; } @@ -5142,12 +5208,23 @@ Perl_yylex(pTHX) else { c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); } - len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); - if (len > UNRECOGNIZED_PRECEDE_COUNT) { - d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT; - } else { + + if (s >= PL_linestart) { d = PL_linestart; } + else { + /* somehow (probably due to a parse failure), PL_linestart has advanced + * pass PL_bufptr, get a reasonable beginning of line + */ + d = s; + while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n') + --d; + } + len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d); + if (len > UNRECOGNIZED_PRECEDE_COUNT) { + d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT; + } + Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c, UTF8fARG(UTF, (s - d), d), (int) len + 1); @@ -5374,8 +5451,6 @@ Perl_yylex(pTHX) d = instr(s,"perl -"); if (!d) { d = instr(s,"perl"); - if (d && d[4] == '6') - d = NULL; #if defined(DOSISH) /* avoid getting into infinite loops when shebang * line contains "Perl" rather than "perl" */ @@ -5877,27 +5952,12 @@ Perl_yylex(pTHX) PL_lex_stuff = NULL; } else { - if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { - sv_free(sv); - if (PL_in_my == KEY_our) { - deprecate_disappears_in("5.28", - "Attribute \"unique\" is deprecated"); - } - else - Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); - } - /* NOTE: any CV attrs applied here need to be part of the CVf_BUILTIN_ATTRS define in cv.h! */ - else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { + if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { sv_free(sv); CvLVALUE_on(PL_compcv); } - else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { - sv_free(sv); - deprecate_disappears_in("5.28", - "Attribute \"locked\" is deprecated"); - } else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { sv_free(sv); CvMETHOD_on(PL_compcv); @@ -6201,8 +6261,10 @@ Perl_yylex(pTHX) break; } if (strEQs(s, "sub")) { + PL_bufptr = s; d = s + 3; d = skipspace(d); + s = PL_bufptr; if (*d == ':') { PL_expect = XTERM; break; @@ -6543,12 +6605,7 @@ Perl_yylex(pTHX) case '$': CLINE; - if (PL_expect == XOPERATOR) { - if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - return deprecate_commaless_var_list(); - } - } - else if (PL_expect == XPOSTDEREF) { + if (PL_expect == XPOSTDEREF) { if (s[1] == '#') { s++; POSTDEREF(DOLSHARP); @@ -6838,10 +6895,6 @@ Perl_yylex(pTHX) TERM(THING); case '\'': - if ( PL_expect == XOPERATOR - && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack)) - return deprecate_commaless_var_list(); - s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) missingterm(NULL); @@ -6854,10 +6907,6 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '"': - if ( PL_expect == XOPERATOR - && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack)) - return deprecate_commaless_var_list(); - s = scan_str(s,FALSE,FALSE,FALSE,NULL); DEBUG_T( { if (s) @@ -7183,6 +7232,7 @@ Perl_yylex(pTHX) orig_keyword = 0; lex = 0; off = 0; + /* FALLTHROUGH */ default: /* not a keyword */ just_a_word: { int pkgname = 0; @@ -7891,6 +7941,7 @@ Perl_yylex(pTHX) && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { char *p = s; + SSize_t s_off = s - SvPVX(PL_linestr); if ((PL_bufend - p) >= 3 && strEQs(p, "my") && isSPACE(*(p + 2))) @@ -7908,6 +7959,9 @@ Perl_yylex(pTHX) } if (*p != '$' && *p != '\\') Perl_croak(aTHX_ "Missing $ on loop variable"); + + /* The buffer may have been reallocated, update s */ + s = SvPVX(PL_linestr) + s_off; } OPERATOR(FOR); @@ -8622,7 +8676,8 @@ Perl_yylex(pTHX) COPLINE_SET_FROM_MULTI_END; if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); + (void)validate_proto(PL_subname, PL_lex_stuff, + ckWARN(WARN_ILLEGALPROTO), 0); have_proto = TRUE; s = skipspace(s); @@ -8857,8 +8912,11 @@ S_pending_ident(pTHX) if (PL_in_my) { if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ if (has_colon) + /* diag_listed_as: No package name allowed for variable %s + in "our" */ yyerror_pv(Perl_form(aTHX_ "No package name allowed for " - "variable %s in \"our\"", + "%se %s in \"our\"", + *PL_tokenbuf=='&' ?"subroutin":"variabl", PL_tokenbuf), UTF ? SVf_UTF8 : 0); tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); } @@ -9018,7 +9076,6 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) s++; if (*s == ',') { GV* gv; - PADOFFSET off; if (keyword(w, s - w, 0)) return; @@ -9026,6 +9083,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) if (gv && GvCVu(gv)) return; if (s - w <= 254) { + PADOFFSET off; char tmpbuf[256]; Copy(w, tmpbuf+1, s - w, char); *tmpbuf = '&'; @@ -9362,19 +9420,36 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) bool skip; char *s2; /* If we were processing {...} notation then... */ - if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) { - /* if it starts as a valid identifier, assume that it is one. - (the later check for } being at the expected point will trap - cases where this doesn't pan out.) */ - d += is_utf8 ? UTF8SKIP(d) : 1; - parse_ident(&s, &d, e, 1, is_utf8, TRUE); - *d = '\0'; + if (isIDFIRST_lazy_if_safe(d, e, is_utf8) + || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ + && isWORDCHAR(*s)) + ) { + /* note we have to check for a normal identifier first, + * as it handles utf8 symbols, and only after that has + * been ruled out can we look at the caret words */ + if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) { + /* if it starts as a valid identifier, assume that it is one. + (the later check for } being at the expected point will trap + cases where this doesn't pan out.) */ + d += is_utf8 ? UTF8SKIP(d) : 1; + parse_ident(&s, &d, e, 1, is_utf8, TRUE); + *d = '\0'; + } + else { /* caret word: ${^Foo} ${^CAPTURE[0]} */ + d++; + while (isWORDCHAR(*s) && d < e) { + *d++ = *s++; + } + if (d >= e) + Perl_croak(aTHX_ "%s", ident_too_long); + *d = '\0'; + } tmp_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { s = skipspace(s); } if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { - /* ${foo[0]} and ${foo{bar}} notation. */ + /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { const char * const brack = (const char *) @@ -9393,26 +9468,16 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) return s; } } - /* Handle extended ${^Foo} variables - * 1999-02-27 mjd-perl-patch@plover.com */ - else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ - && isWORDCHAR(*s)) - { - d++; - while (isWORDCHAR(*s) && d < e) { - *d++ = *s++; - } - if (d >= e) - Perl_croak(aTHX_ "%s", ident_too_long); - *d = '\0'; - } if ( !tmp_copline ) tmp_copline = CopLINE(PL_curcop); - if ((skip = s < PL_bufend && isSPACE(*s))) + if ((skip = s < PL_bufend && isSPACE(*s))) { /* Avoid incrementing line numbers or resetting PL_linestart, in case we have to back up. */ + STRLEN s_off = s - SvPVX(PL_linestr); s2 = peekspace(s); + s = SvPVX(PL_linestr) + s_off; + } else s2 = s; @@ -9689,18 +9754,14 @@ S_scan_subst(pTHX_ char *start) PL_multi_end = 0; pm->op_pmflags |= PMf_EVAL; - while (es-- > 0) { - if (es) - sv_catpvs(repl, "eval "); - else - sv_catpvs(repl, "do "); - } - sv_catpvs(repl, "{"); + for (; es > 1; es--) { + sv_catpvs(repl, "eval "); + } + sv_catpvs(repl, "do {"); sv_catsv(repl, PL_parser->lex_sub_repl); sv_catpvs(repl, "}"); SvREFCNT_dec(PL_parser->lex_sub_repl); PL_parser->lex_sub_repl = repl; - es = 1; } @@ -9856,7 +9917,7 @@ S_scan_heredoc(pTHX_ char *s) else term = '"'; if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) - deprecate_fatal_in("5.28", "Use of bare << to mean <<\"\" is deprecated"); + Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); peek = s; while ( isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) @@ -11187,9 +11248,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) && strchr("+-0123456789_", s[1])) { - floatit = TRUE; + int exp_digits = 0; + const char *save_s = s; + char * save_d = d; - /* regardless of whether user said 3E5 or 3e5, use lower 'e', + /* regardless of whether user said 3E5 or 3e5, use lower 'e', ditto for p (hexfloats) */ if ((isALPHA_FOLD_EQ(*s, 'e'))) { /* At least some Mach atof()s don't grok 'E' */ @@ -11221,6 +11284,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* read digits of exponent */ while (isDIGIT(*s) || *s == '_') { if (isDIGIT(*s)) { + ++exp_digits; if (d >= e) Perl_croak(aTHX_ "%s", number_too_long); *d++ = *s++; @@ -11232,6 +11296,20 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) lastub = s++; } } + + if (!exp_digits) { + /* no exponent digits, the [eEpP] could be for something else, + * though in practice we don't get here for p since that's preparsed + * earlier, and results in only the 0xX being consumed, so behave similarly + * for decimal floats and consume only the D.DD, leaving the [eE] to the + * next token. + */ + s = save_s; + d = save_d; + } + else { + floatit = TRUE; + } } @@ -11313,8 +11391,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) STATIC char * S_scan_formline(pTHX_ char *s) { - char *eol; - char *t; SV * const stuff = newSVpvs(""); bool needargs = FALSE; bool eofmt = FALSE; @@ -11322,8 +11398,9 @@ S_scan_formline(pTHX_ char *s) PERL_ARGS_ASSERT_SCAN_FORMLINE; while (!needargs) { + char *eol; if (*s == '.') { - t = s+1; + char *t = s+1; #ifdef PERL_STRICT_CR while (SPACE_OR_TAB(*t)) t++; @@ -11340,6 +11417,7 @@ S_scan_formline(pTHX_ char *s) if (!eol++) eol = PL_bufend; if (*s != '#') { + char *t; for (t = s; t < eol; t++) { if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { needargs = FALSE; @@ -11441,6 +11519,29 @@ S_yywarn(pTHX_ const char *const s, U32 flags) return 0; } +void +Perl_abort_execution(pTHX_ const char * const msg, const char * const name) +{ + PERL_ARGS_ASSERT_ABORT_EXECUTION; + + if (PL_minus_c) + Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name); + else { + Perl_croak(aTHX_ + "%sExecution of %s aborted due to compilation errors.\n", msg, name); + } + NOT_REACHED; /* NOTREACHED */ +} + +void +Perl_yyquit(pTHX) +{ + /* Called, after at least one error has been found, to abort the parse now, + * instead of trying to forge ahead */ + + yyerror_pvn(NULL, 0, 0); +} + int Perl_yyerror(pTHX_ const char *const s) { @@ -11464,101 +11565,120 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) SV * const where_sv = newSVpvs_flags("", SVs_TEMP); int yychar = PL_parser->yychar; - PERL_ARGS_ASSERT_YYERROR_PVN; - - if (!yychar || (yychar == ';' && !PL_rsfp)) - sv_catpvs(where_sv, "at EOF"); - else if ( PL_oldoldbufptr - && PL_bufptr > PL_oldoldbufptr - && PL_bufptr - PL_oldoldbufptr < 200 - && PL_oldoldbufptr != PL_oldbufptr - && PL_oldbufptr != PL_bufptr) - { - /* - Only for NetWare: - The code below is removed for NetWare because it abends/crashes on NetWare - when the script has error such as not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ + /* Output error message 's' with length 'len'. 'flags' are SV flags that + * apply. If the number of errors found is large enough, it abandons + * parsing. If 's' is NULL, there is no message, and it abandons + * processing unconditionally */ + + if (s != NULL) { + if (!yychar || (yychar == ';' && !PL_rsfp)) + sv_catpvs(where_sv, "at EOF"); + else if ( PL_oldoldbufptr + && PL_bufptr > PL_oldoldbufptr + && PL_bufptr - PL_oldoldbufptr < 200 + && PL_oldoldbufptr != PL_oldbufptr + && PL_oldbufptr != PL_bufptr) + { + /* + Only for NetWare: + The code below is removed for NetWare because it + abends/crashes on NetWare when the script has error such as + not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ #ifndef NETWARE - while (isSPACE(*PL_oldoldbufptr)) - PL_oldoldbufptr++; + while (isSPACE(*PL_oldoldbufptr)) + PL_oldoldbufptr++; #endif - context = PL_oldoldbufptr; - contlen = PL_bufptr - PL_oldoldbufptr; - } - else if ( PL_oldbufptr - && PL_bufptr > PL_oldbufptr - && PL_bufptr - PL_oldbufptr < 200 - && PL_oldbufptr != PL_bufptr) { - /* - Only for NetWare: - The code below is removed for NetWare because it abends/crashes on NetWare - when the script has error such as not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ + context = PL_oldoldbufptr; + contlen = PL_bufptr - PL_oldoldbufptr; + } + else if ( PL_oldbufptr + && PL_bufptr > PL_oldbufptr + && PL_bufptr - PL_oldbufptr < 200 + && PL_oldbufptr != PL_bufptr) { + /* + Only for NetWare: + The code below is removed for NetWare because it + abends/crashes on NetWare when the script has error such as + not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ #ifndef NETWARE - while (isSPACE(*PL_oldbufptr)) - PL_oldbufptr++; + while (isSPACE(*PL_oldbufptr)) + PL_oldbufptr++; #endif - context = PL_oldbufptr; - contlen = PL_bufptr - PL_oldbufptr; - } - else if (yychar > 255) - sv_catpvs(where_sv, "next token ???"); - else if (yychar == YYEMPTY) { - if (PL_lex_state == LEX_NORMAL) - sv_catpvs(where_sv, "at end of line"); - else if (PL_lex_inpat) - sv_catpvs(where_sv, "within pattern"); - else - sv_catpvs(where_sv, "within string"); - } - else { - sv_catpvs(where_sv, "next char "); - if (yychar < 32) - Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); - else if (isPRINT_LC(yychar)) { - const char string = yychar; - sv_catpvn(where_sv, &string, 1); - } - else - Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); - } - msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); - Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", - OutCopFILE(PL_curcop), - (IV)(PL_parser->preambling == NOLINE - ? CopLINE(PL_curcop) - : PL_parser->preambling)); - if (context) - Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", - UTF8fARG(UTF, contlen, context)); - else - Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); - if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { - Perl_sv_catpvf(aTHX_ msg, - " (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n", - (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); - PL_multi_end = 0; - } - if (PL_in_eval & EVAL_WARNONLY) { - PL_in_eval &= ~EVAL_WARNONLY; - Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); - } - else { - qerror(msg); + context = PL_oldbufptr; + contlen = PL_bufptr - PL_oldbufptr; + } + else if (yychar > 255) + sv_catpvs(where_sv, "next token ???"); + else if (yychar == YYEMPTY) { + if (PL_lex_state == LEX_NORMAL) + sv_catpvs(where_sv, "at end of line"); + else if (PL_lex_inpat) + sv_catpvs(where_sv, "within pattern"); + else + sv_catpvs(where_sv, "within string"); + } + else { + sv_catpvs(where_sv, "next char "); + if (yychar < 32) + Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); + else if (isPRINT_LC(yychar)) { + const char string = yychar; + sv_catpvn(where_sv, &string, 1); + } + else + Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); + } + msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); + Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", + OutCopFILE(PL_curcop), + (IV)(PL_parser->preambling == NOLINE + ? CopLINE(PL_curcop) + : PL_parser->preambling)); + if (context) + Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", + UTF8fARG(UTF, contlen, context)); + else + Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); + if ( PL_multi_start < PL_multi_end + && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) + { + Perl_sv_catpvf(aTHX_ msg, + " (Might be a runaway multi-line %c%c string starting on" + " line %" IVdf ")\n", + (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); + PL_multi_end = 0; + } + if (PL_in_eval & EVAL_WARNONLY) { + PL_in_eval &= ~EVAL_WARNONLY; + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); + } + else { + qerror(msg); + } } - if (PL_error_count >= 10) { - SV * errsv; - if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) - Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n", - SVfARG(errsv), OutCopFILE(PL_curcop)); - else - Perl_croak(aTHX_ "%s has too many errors.\n", - OutCopFILE(PL_curcop)); + if (s == NULL || PL_error_count >= 10) { + const char * msg = ""; + const char * const name = OutCopFILE(PL_curcop); + + if (PL_in_eval) { + SV * errsv = ERRSV; + if (SvCUR(errsv)) { + msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv)); + } + } + + if (s == NULL) { + abort_execution(msg, name); + } + else { + Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name); + } } PL_in_my = 0; PL_in_my_stash = NULL; @@ -11580,7 +11700,9 @@ S_swallow_bom(pTHX_ U8 *s) /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); +#endif s += 2; if (PL_bufend > (char*)s) { s = add_utf16_textfilter(s, TRUE); @@ -11594,7 +11716,9 @@ S_swallow_bom(pTHX_ U8 *s) case 0xFE: if (s[1] == 0xFF) { /* UTF-16 big-endian? */ #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); +#endif s += 2; if (PL_bufend > (char *)s) { s = add_utf16_textfilter(s, FALSE); @@ -11608,7 +11732,9 @@ S_swallow_bom(pTHX_ U8 *s) case BOM_UTF8_FIRST_BYTE: { const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); +#endif s += len + 1; /* UTF-8 */ } break; @@ -11627,7 +11753,9 @@ S_swallow_bom(pTHX_ U8 *s) * 00 xx 00 xx * are a good indicator of UTF-16BE. */ #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); +#endif s = add_utf16_textfilter(s, FALSE); #else /* diag_listed_as: Unsupported script encoding %s */ @@ -11643,7 +11771,9 @@ S_swallow_bom(pTHX_ U8 *s) * xx 00 xx 00 * are a good indicator of UTF-16LE. */ #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); +#endif s = add_utf16_textfilter(s, TRUE); #else /* diag_listed_as: Unsupported script encoding %s */