X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/3dd4eaeb8ac39e08179145b86aedda36584a3509..c0040b7e8ed06f80decc703246e417668aa3a7c1:/toke.c diff --git a/toke.c b/toke.c index c60d32d..fc87252 100644 --- a/toke.c +++ b/toke.c @@ -39,6 +39,7 @@ Individual members of C have their own documentation. #define PERL_IN_TOKE_C #include "perl.h" #include "dquote_inline.h" +#include "invlist_inline.h" #define new_constant(a,b,c,d,e,f,g) \ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) @@ -310,6 +311,7 @@ static struct debug_tokens { { ANDAND, TOKENTYPE_NONE, "ANDAND" }, { ANDOP, TOKENTYPE_NONE, "ANDOP" }, { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, + { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" }, { ARROW, TOKENTYPE_NONE, "ARROW" }, { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, @@ -367,6 +369,7 @@ static struct debug_tokens { { RELOP, TOKENTYPE_OPNUM, "RELOP" }, { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, + { SIGSUB, TOKENTYPE_NONE, "SIGSUB" }, { SUB, TOKENTYPE_NONE, "SUB" }, { THING, TOKENTYPE_OPVAL, "THING" }, { UMINUS, TOKENTYPE_NONE, "UMINUS" }, @@ -456,21 +459,14 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s) PERL_ARGS_ASSERT_PRINTBUF; - GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */ + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */ PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; SvREFCNT_dec(tmp); } #endif -static int -S_deprecate_commaless_var_list(pTHX) { - PL_expect = XTERM; - deprecate_fatal_in("5.28", "Use of comma-less variable list is deprecated"); - return REPORT(','); /* grandfather non-comma-format format */ -} - /* * S_ao * @@ -563,16 +559,18 @@ S_no_op(pTHX_ const char *const what, char *s) */ STATIC void -S_missingterm(pTHX_ char *s) +S_missingterm(pTHX_ char *s, STRLEN len) { char tmpbuf[UTF8_MAXBYTES + 1]; char q; bool uni = FALSE; SV *sv; if (s) { - char * const nl = strrchr(s,'\n'); - if (nl) - *nl = '\0'; + char * const nl = (char *) my_memrchr(s, '\n', len); + if (nl) { + *nl = '\0'; + len = nl - s; + } uni = UTF; } else if (PL_multi_close < 32) { @@ -580,24 +578,28 @@ S_missingterm(pTHX_ char *s) tmpbuf[1] = (char)toCTRL(PL_multi_close); tmpbuf[2] = '\0'; s = tmpbuf; + len = 2; } else { if (LIKELY(PL_multi_close < 256)) { *tmpbuf = (char)PL_multi_close; tmpbuf[1] = '\0'; + len = 1; } else { + char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); + *end = '\0'; + len = end - tmpbuf; uni = TRUE; - *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0; } s = tmpbuf; } - q = strchr(s,'"') ? '\'' : '"'; - sv = sv_2mortal(newSVpv(s,0)); + q = memchr(s, '"', len) ? '\'' : '"'; + sv = sv_2mortal(newSVpvn(s, len)); if (uni) SvUTF8_on(sv); - Perl_croak(aTHX_ "Can't find string terminator %c%" SVf - "%c anywhere before EOF",q,SVfARG(sv),q); + Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c" + " anywhere before EOF", q, SVfARG(sv), q); } #include "feature.h" @@ -669,7 +671,7 @@ S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen) Creates and initialises a new lexer/parser state object, supplying a context in which to lex and parse from a new source of Perl code. A pointer to the new state object is placed in L. An entry -is made on the save stack so that upon unwinding the new state object +is made on the save stack so that upon unwinding, the new state object will be destroyed and the former value of L will be restored. Nothing else need be done to clean up the parsing context. @@ -701,6 +703,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) { const char *s = NULL; yy_parser *parser, *oparser; + if (flags && flags & ~LEX_START_FLAGS) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start"); @@ -726,6 +729,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->lex_state = LEX_NORMAL; parser->expect = XSTATE; parser->rsfp = rsfp; + parser->recheck_utf8_validity = FALSE; parser->rsfp_filters = !(flags & LEX_START_SAME_FILTER) || !oparser ? NULL @@ -742,7 +746,22 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) if (line) { STRLEN len; + const U8* first_bad_char_loc; + s = SvPV_const(line, len); + + if ( SvUTF8(line) + && UNLIKELY(! is_utf8_string_loc((U8 *) s, + SvCUR(line), + &first_bad_char_loc))) + { + _force_out_malformed_utf8_message(first_bad_char_loc, + (U8 *) s + SvCUR(line), + 0, + 1 /* 1 means die */ ); + NOT_REACHED; /* NOTREACHED */ + } + parser->linestr = flags & LEX_START_COPIED ? SvREFCNT_inc_simple_NN(line) : newSVpvn_flags(s, len, SvUTF8(line)); @@ -751,6 +770,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) } else { parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2); } + parser->oldoldbufptr = parser->oldbufptr = parser->bufptr = @@ -1018,13 +1038,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) SvCUR(PL_parser->linestr) + len+highhalf); PL_parser->bufend += len+highhalf; for (p = pv; p != e; p++) { - U8 c = (U8)*p; - if (! UTF8_IS_INVARIANT(c)) { - *bufptr++ = UTF8_TWO_BYTE_HI(c); - *bufptr++ = UTF8_TWO_BYTE_LO(c); - } else { - *bufptr++ = (char)c; - } + append_utf8_from_native_byte(*p, (U8 **) &bufptr); } } } else { @@ -1039,12 +1053,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) { p++; highhalf++; - } else if (! UTF8_IS_INVARIANT(c)) { - _force_out_malformed_utf8_message((U8 *) p, (U8 *) e, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ - } + } else assert(UTF8_IS_INVARIANT(c)); } if (!highhalf) goto plain_copy; @@ -1258,6 +1267,24 @@ Perl_lex_discard_to(pTHX_ char *ptr) PL_parser->last_lop -= discard_len; } +void +Perl_notify_parser_that_changed_to_utf8(pTHX) +{ + /* Called when $^H is changed to indicate that HINT_UTF8 has changed from + * off to on. At compile time, this has the effect of entering a 'use + * utf8' section. This means that any input was not previously checked for + * UTF-8 (because it was off), but now we do need to check it, or our + * assumptions about the input being sane could be wrong, and we could + * segfault. This routine just sets a flag so that the next time we look + * at the input we do the well-formed UTF-8 check. If we aren't in the + * proper phase, there may not be a parser object, but if there is, setting + * the flag is harmless */ + + if (PL_parser) { + PL_parser->recheck_utf8_validity = TRUE; + } +} + /* =for apidoc Amx|bool|lex_next_chunk|U32 flags @@ -1293,7 +1320,6 @@ Perl_lex_next_chunk(pTHX_ U32 flags) STRLEN linestart_pos, last_uni_pos, last_lop_pos; bool got_some_for_debugger = 0; bool got_some; - const U8* first_bad_char_loc; if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM)) Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk"); @@ -1360,15 +1386,19 @@ Perl_lex_next_chunk(pTHX_ U32 flags) PL_parser->bufend = buf + new_bufend_pos; PL_parser->bufptr = buf + bufptr_pos; - if (UTF && ! is_utf8_string_loc((U8 *) PL_parser->bufptr, - PL_parser->bufend - PL_parser->bufptr, - &first_bad_char_loc)) - { - _force_out_malformed_utf8_message(first_bad_char_loc, - (U8 *) PL_parser->bufend, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ + if (UTF) { + const U8* first_bad_char_loc; + if (UNLIKELY(! is_utf8_string_loc( + (U8 *) PL_parser->bufptr, + PL_parser->bufend - PL_parser->bufptr, + &first_bad_char_loc))) + { + _force_out_malformed_utf8_message(first_bad_char_loc, + (U8 *) PL_parser->bufend, + 0, + 1 /* 1 means die */ ); + NOT_REACHED; /* NOTREACHED */ + } } PL_parser->oldbufptr = buf + oldbufptr_pos; @@ -1545,7 +1575,7 @@ Perl_lex_read_space(pTHX_ U32 flags) if (s == bufend) need_incline = 1; else - incline(s); + incline(s, bufend); } } else if (isSPACE(c)) { s++; @@ -1564,7 +1594,7 @@ Perl_lex_read_space(pTHX_ U32 flags) if (!got_more) break; if (can_incline && need_incline && PL_parser->rsfp) { - incline(s); + incline(s, bufend); need_incline = 0; } } else if (!c) { @@ -1596,7 +1626,7 @@ Note that C is a valid C and will always return C. */ bool -Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) +Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) { STRLEN len, origlen; char *p; @@ -1658,6 +1688,13 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) origlen, UNI_DISPLAY_ISPRINT) : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII); + if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) { + SV *name2 = sv_2mortal(newSVsv(PL_curstname)); + sv_catpvs(name2, "::"); + sv_catsv(name2, (SV *)name); + name = name2; + } + if (proto_after_greedy_proto) Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO), "Prototype after '%c' for %" SVf " : %s", @@ -1690,7 +1727,7 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn) */ STATIC void -S_incline(pTHX_ const char *s) +S_incline(pTHX_ const char *s, const char *end) { const char *t; const char *n; @@ -1700,6 +1737,8 @@ S_incline(pTHX_ const char *s) PERL_ARGS_ASSERT_INCLINE; + assert(end >= s); + COPLINE_INC_WITH_HERELINES; if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL && s+1 == PL_bufend && *s == ';') { @@ -1711,8 +1750,8 @@ S_incline(pTHX_ const char *s) return; while (SPACE_OR_TAB(*s)) s++; - if (strEQs(s, "line")) - s += 4; + if (memBEGINs(s, (STRLEN) (end - s), "line")) + s += sizeof("line") - 1; else return; if (SPACE_OR_TAB(*s)) @@ -1731,7 +1770,7 @@ S_incline(pTHX_ const char *s) return; while (SPACE_OR_TAB(*s)) s++; - if (*s == '"' && (t = strchr(s+1, '"'))) { + if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { s++; e = t + 1; } @@ -1885,7 +1924,6 @@ STATIC void S_check_uni(pTHX) { const char *s; - const char *t; if (PL_oldoldbufptr != PL_last_uni) return; @@ -1894,7 +1932,7 @@ S_check_uni(pTHX) s = PL_last_uni; while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') s += UTF ? UTF8SKIP(s) : 1; - if ((t = strchr(s, '(')) && t < PL_bufptr) + if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) return; Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), @@ -1973,7 +2011,7 @@ S_force_next(pTHX_ I32 type) * S_postderef * * This subroutine handles postfix deref syntax after the arrow has already - * been emitted. @* $* etc. are emitted as two separate token right here. + * been emitted. @* $* etc. are emitted as two separate tokens right here. * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits * only the first, leaving yylex to find the next. */ @@ -2028,10 +2066,9 @@ STATIC SV * S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { SV * const sv = newSVpvn_utf8(start, len, - !IN_BYTES - && UTF - && !is_utf8_invariant_string((const U8*)start, len) - && is_utf8_string((const U8*)start, len)); + ! IN_BYTES + && UTF + && is_utf8_non_invariant_string((const U8*)start, len)); return sv; } @@ -2069,8 +2106,10 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) if (check_keyword) { char *s2 = PL_tokenbuf; STRLEN len2 = len; - if (allow_pack && len > 6 && strEQs(s2, "CORE::")) - s2 += 6, len2 -= 6; + if (allow_pack && memBEGINPs(s2, len, "CORE::")) { + s2 += sizeof("CORE::") - 1; + len2 -= sizeof("CORE::") - 1; + } if (keyword(s2, len2, 0)) return start; } @@ -2143,7 +2182,7 @@ Perl_str_to_version(pTHX_ SV *sv) STRLEN len; const char *start = SvPV_const(sv,len); const char * const end = start + len; - const bool utf = SvUTF8(sv) ? TRUE : FALSE; + const bool utf = cBOOL(SvUTF8(sv)); PERL_ARGS_ASSERT_STR_TO_VERSION; @@ -2249,10 +2288,9 @@ S_force_strict_version(pTHX_ char *s) /* * S_tokeq - * Tokenize a quoted string passed in as an SV. It finds the next - * chunk, up to end of string or a backslash. It may make a new - * SV containing that chunk (if HINT_NEW_STRING is on). It also - * turns \\ into \. + * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv', + * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is + * unchanged, and a new SV containing the modified input is returned. */ STATIC SV * @@ -2355,6 +2393,8 @@ S_sublex_start(pTHX) PL_parser->lex_super_state = PL_lex_state; PL_parser->lex_sub_inwhat = (U16)op_type; PL_parser->lex_sub_op = PL_lex_op; + PL_parser->sub_no_recover = FALSE; + PL_parser->sub_error_count = PL_error_count; PL_lex_state = LEX_INTERPPUSH; PL_expect = XTERM; @@ -2534,6 +2574,20 @@ S_sublex_done(pTHX) else { const line_t l = CopLINE(PL_curcop); LEAVE; + if (PL_parser->sub_error_count != PL_error_count) { + const char * const name = OutCopFILE(PL_curcop); + if (PL_parser->sub_no_recover) { + const char * msg = ""; + if (PL_in_eval) { + SV *errsv = ERRSV; + if (SvCUR(ERRSV)) { + msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv)); + } + } + abort_execution(msg, name); + NOT_REACHED; + } + } if (PL_multi_close == '<') PL_parser->herelines += l - PL_multi_end; PL_bufend = SvPVX(PL_linestr); @@ -2557,29 +2611,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) SV *cv; SV *rv; HV *stash; - const U8* first_bad_char_loc; const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; if (!SvCUR(res)) { - deprecate_fatal_in("5.28", "Unknown charname '' is deprecated"); - return res; - } - - if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr, - e - backslash_ptr, - &first_bad_char_loc)) - { - _force_out_malformed_utf8_message(first_bad_char_loc, - (U8 *) PL_parser->bufend, - 0, - 0 /* 0 means don't die */ ); - yyerror_pv(Perl_form(aTHX_ - "Malformed UTF-8 character immediately after '%.*s'", - (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr), - SVf_UTF8); - return NULL; + SvREFCNT_dec_NN(res); + /* diag_listed_as: Unknown charname '%s' */ + yyerror("Unknown charname ''"); + return NULL; } res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, @@ -2599,8 +2639,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL)) { const char * const name = HvNAME(stash); - if (HvNAMELEN(stash) == sizeof("_charnames")-1 - && strEQ(name, "_charnames")) { + if (memEQs(name, HvNAMELEN(stash), "_charnames")) { return res; } } @@ -2645,14 +2684,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += 2; } else { - if (! PL_utf8_charname_begin) { - U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; - PL_utf8_charname_begin = _core_swash_init("utf8", - "_Perl_Charname_Begin", - &PL_sv_undef, - 1, 0, NULL, &flags); - } - if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { + if (! _invlist_contains_cp(PL_utf8_charname_begin, + utf8_to_uvchr_buf((U8 *) s, + (U8 *) e, + NULL))) + { goto bad_charname; } s += UTF8SKIP(s); @@ -2676,14 +2712,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += 2; } else { - if (! PL_utf8_charname_continue) { - U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; - PL_utf8_charname_continue = _core_swash_init("utf8", - "_Perl_Charname_Continue", - &PL_sv_undef, - 1, 0, NULL, &flags); - } - if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { + if (! _invlist_contains_cp(PL_utf8_charname_continue, + utf8_to_uvchr_buf((U8 *) s, + (U8 *) e, + NULL))) + { goto bad_charname; } s += UTF8SKIP(s); @@ -2691,6 +2724,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) } } if (*(s-1) == ' ') { + /* diag_listed_as: charnames alias definitions may not contain + trailing white-space; marked by <-- HERE in %s + */ yyerror_pv( Perl_form(aTHX_ "charnames alias definitions may not contain trailing " @@ -2706,11 +2742,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) const U8* first_bad_char_loc; STRLEN len; const char* const str = SvPV_const(res, len); - if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) { + if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len, + &first_bad_char_loc))) + { _force_out_malformed_utf8_message(first_bad_char_loc, (U8 *) PL_parser->bufend, 0, 0 /* 0 means don't die */ ); + /* diag_listed_as: Malformed UTF-8 returned by \N{%s} + immediately after '%s' */ yyerror_pv( Perl_form(aTHX_ "Malformed UTF-8 returned by %.*s immediately after '%.*s'", @@ -2728,6 +2768,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) /* The final %.*s makes sure that should the trailing NUL be missing * that this print won't run off the end of the string */ + /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE + in \N{%s} */ yyerror_pv( Perl_form(aTHX_ "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s", @@ -2739,6 +2781,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) } multi_spaces: + /* diag_listed_as: charnames alias definitions may not contain a + sequence of multiple spaces; marked by <-- HERE + in %s */ yyerror_pv( Perl_form(aTHX_ "charnames alias definitions may not contain a sequence of " @@ -2837,7 +2882,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) } (end if backslash) handle regular character } (end while character to read) - + */ STATIC char * @@ -2852,8 +2897,6 @@ S_scan_const(pTHX_ char *start) bool didrange = FALSE; /* did we just finish a range? */ bool in_charclass = FALSE; /* within /[...]/ */ bool has_utf8 = FALSE; /* Output constant is UTF8 */ - bool has_above_latin1 = FALSE; /* does something require special - handling in tr/// ? */ bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be UTF8? But, this can show as true when the source isn't utf8, as for @@ -2865,8 +2908,16 @@ S_scan_const(pTHX_ char *start) should we have to convert to UTF-8) */ SV *res; /* result from charnames */ - STRLEN offset_to_max; /* The offset in the output to where the range - high-end character is temporarily placed */ + STRLEN offset_to_max = 0; /* The offset in the output to where the range + high-end character is temporarily placed */ + + /* Does something require special handling in tr/// ? This avoids extra + * work in a less likely case. As such, khw didn't feel it was worth + * adding any branches to the more mainline code to handle this, which + * means that this doesn't get set in some circumstances when things like + * \x{100} get expanded out. As a result there needs to be extra testing + * done in the tr code */ + bool has_above_latin1 = FALSE; /* Note on sizing: The scanned constant is placed into sv, which is * initialized by newSV() assuming one byte of output for every byte of @@ -2912,8 +2963,8 @@ S_scan_const(pTHX_ char *start) * range, so for most cases we just drop down and handle the value * as any other. There are two exceptions. * - * 1. A minus sign indicates that we are actually going to have - * a range. In this case, skip the '-', set a flag, then drop + * 1. A hyphen indicates that we are actually going to have a + * range. In this case, skip the '-', set a flag, then drop * down to handle what should be the end range value. * 2. After we've handled that value, the next time through, that * flag is set and we fix up the range. @@ -2931,12 +2982,11 @@ S_scan_const(pTHX_ char *start) if (! dorange) { - /* Here, we don't think we're in a range. If we've processed - * at least one character, then see if this next one is a '-', - * indicating the previous one was the start of a range. But - * don't bother if we're too close to the end for the minus to - * mean that. */ - if (*s != '-' || s >= send - 1 || s == start) { + /* Here, we don't think we're in a range. If the new character + * is not a hyphen; or if it is a hyphen, but it's too close to + * either edge to indicate a range, or if we haven't output any + * characters yet then it's a regular character. */ + if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) { /* A regular character. Process like any other, but first * clear any flags */ @@ -2946,25 +2996,26 @@ S_scan_const(pTHX_ char *start) non_portable_endpoint = 0; backslash_N = 0; #endif - /* The tests here and the following 'else' for being above - * Latin1 suffice to find all such occurences in the - * constant, except those added by a backslash escape - * sequence, like \x{100}. And all those set - * 'has_above_latin1' as appropriate */ + /* The tests here for being above Latin1 and similar ones + * in the following 'else' suffice to find all such + * occurences in the constant, except those added by a + * backslash escape sequence, like \x{100}. Mostly, those + * set 'has_above_latin1' as appropriate */ if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { has_above_latin1 = TRUE; } /* Drops down to generic code to process current byte */ } - else { + else { /* Is a '-' in the context where it means a range */ if (didrange) { /* Something like y/A-C-Z// */ - Perl_croak(aTHX_ "Ambiguous range in transliteration operator"); + Perl_croak(aTHX_ "Ambiguous range in transliteration" + " operator"); } dorange = TRUE; - s++; /* Skip past the minus */ + s++; /* Skip past the hyphen */ /* d now points to where the end-range character will be * placed. Save it so won't have to go finding it later, @@ -2978,6 +3029,8 @@ S_scan_const(pTHX_ char *start) if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) { has_above_latin1 = TRUE; } + + /* Drops down to generic code to process current byte */ } } /* End of not a range */ else { @@ -2989,26 +3042,33 @@ S_scan_const(pTHX_ char *start) * 'd' points to just beyond the range end in the 'sv' string, * where we would next place something * 'offset_to_max' is the offset in 'sv' at which the character - * before 'd' begins. + * (the range's maximum end point) before 'd' begins. */ - const char * max_ptr = SvPVX_const(sv) + offset_to_max; - const char * min_ptr; + char * max_ptr = SvPVX(sv) + offset_to_max; + char * min_ptr; IV range_min; IV range_max; /* last character in range */ - STRLEN save_offset; STRLEN grow; + Size_t offset_to_min = 0; + Size_t extras = 0; #ifdef EBCDIC bool convert_unicode; IV real_range_max = 0; #endif - - /* Get the range-ends code point values. */ + /* Get the code point values of the range ends. */ if (has_utf8) { /* We know the utf8 is valid, because we just constructed * it ourselves in previous loop iterations */ min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1); range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL); range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL); + + /* This compensates for not all code setting + * 'has_above_latin1', so that we don't skip stuff that + * should be executed */ + if (range_max > 255) { + has_above_latin1 = TRUE; + } } else { min_ptr = max_ptr - 1; @@ -3016,6 +3076,23 @@ S_scan_const(pTHX_ char *start) range_max = * (U8*) max_ptr; } + /* If the range is just a single code point, like tr/a-a/.../, + * that code point is already in the output, twice. We can + * just back up over the second instance and avoid all the rest + * of the work. But if it is a variant character, it's been + * counted twice, so decrement. (This unlikely scenario is + * special cased, like the one for a range of 2 code points + * below, only because the main-line code below needs a range + * of 3 or more to work without special casing. Might as well + * get it out of the way now.) */ + if (UNLIKELY(range_max == range_min)) { + d = max_ptr; + if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) { + utf8_variant_count--; + } + goto range_done; + } + #ifdef EBCDIC /* On EBCDIC platforms, we may have to deal with portable * ranges. These happen if at least one range endpoint is a @@ -3023,16 +3100,16 @@ S_scan_const(pTHX_ char *start) * [A-Z] or [a-z], and both ends are literal characters, * like 'A', and not like \x{C1} */ convert_unicode = - cBOOL(backslash_N) /* \N{} forces Unicode, hence - portable range */ - || ( ! non_portable_endpoint - && (( isLOWER_A(range_min) && isLOWER_A(range_max)) - || (isUPPER_A(range_min) && isUPPER_A(range_max)))); + cBOOL(backslash_N) /* \N{} forces Unicode, + hence portable range */ + || ( ! non_portable_endpoint + && (( isLOWER_A(range_min) && isLOWER_A(range_max)) + || (isUPPER_A(range_min) && isUPPER_A(range_max)))); if (convert_unicode) { /* Special handling is needed for these portable ranges. - * They are defined to all be in Unicode terms, which - * include all Unicode code points between the end points. + * They are defined to be in Unicode terms, which includes + * all the Unicode code points between the end points. * Convert to Unicode to get the Unicode range. Later we * will convert each code point in the range back to * native. */ @@ -3050,7 +3127,6 @@ S_scan_const(pTHX_ char *start) range_max = UNI_TO_NATIVE(range_max); } #endif - /* Use the characters themselves for the error message if * ASCII printables; otherwise some visible representation * of them */ @@ -3061,32 +3137,40 @@ S_scan_const(pTHX_ char *start) } #ifdef EBCDIC else if (convert_unicode) { - /* diag_listed_as: Invalid range "%s" in transliteration operator */ + /* diag_listed_as: Invalid range "%s" in transliteration operator */ Perl_croak(aTHX_ - "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\"" - " in transliteration operator", - range_min, range_max); + "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" + UVXf "}\" in transliteration operator", + range_min, range_max); } #endif else { - /* diag_listed_as: Invalid range "%s" in transliteration operator */ + /* diag_listed_as: Invalid range "%s" in transliteration operator */ Perl_croak(aTHX_ - "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" - " in transliteration operator", - range_min, range_max); + "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\"" + " in transliteration operator", + range_min, range_max); } } + /* If the range is exactly two code points long, they are + * already both in the output */ + if (UNLIKELY(range_min + 1 == range_max)) { + goto range_done; + } + + /* Here the range contains at least 3 code points */ + if (has_utf8) { /* If everything in the transliteration is below 256, we * can avoid special handling later. A translation table - * of each of those bytes is created. And so we expand out - * all ranges to their constituent code points. But if - * we've encountered something above 255, the expanding - * won't help, so skip doing that. But if it's EBCDIC, we - * may have to look at each character below 256 if we have - * to convert to/from Unicode values */ + * for each of those bytes is created by op.c. So we + * expand out all ranges to their constituent code points. + * But if we've encountered something above 255, the + * expanding won't help, so skip doing that. But if it's + * EBCDIC, we may have to look at each character below 256 + * if we have to convert to/from Unicode values */ if ( has_above_latin1 #ifdef EBCDIC && (range_min > 255 || ! convert_unicode) @@ -3095,7 +3179,7 @@ S_scan_const(pTHX_ char *start) /* Move the high character one byte to the right; then * insert between it and the range begin, an illegal * byte which serves to indicate this is a range (using - * a '-' could be ambiguous). */ + * a '-' would be ambiguous). */ char *e = d++; while (e-- > max_ptr) { *(e + 1) = *e; @@ -3117,51 +3201,94 @@ S_scan_const(pTHX_ char *start) } /* Here we need to expand out the string to contain each - * character in the range. Grow the output to handle this */ + * character in the range. Grow the output to handle this. + * For non-UTF8, we need a byte for each code point in the + * range, minus the three that we've already allocated for: the + * hyphen, the min, and the max. For UTF-8, we need this + * plus an extra byte for each code point that occupies two + * bytes (is variant) when in UTF-8 (except we've already + * allocated for the end points, including if they are + * variants). For ASCII platforms and Unicode ranges on EBCDIC + * platforms, it's easy to calculate a precise number. To + * start, we count the variants in the range, which we need + * elsewhere in this function anyway. (For the case where it + * isn't easy to calculate, 'extras' has been initialized to 0, + * and the calculation is done in a loop further down.) */ +#ifdef EBCDIC + if (convert_unicode) +#endif + { + /* This is executed unconditionally on ASCII, and for + * Unicode ranges on EBCDIC. Under these conditions, all + * code points above a certain value are variant; and none + * under that value are. We just need to find out how much + * of the range is above that value. We don't count the + * end points here, as they will already have been counted + * as they were parsed. */ + if (range_min >= UTF_CONTINUATION_MARK) { + + /* The whole range is made up of variants */ + extras = (range_max - 1) - (range_min + 1) + 1; + } + else if (range_max >= UTF_CONTINUATION_MARK) { + + /* Only the higher portion of the range is variants */ + extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1; + } - save_offset = min_ptr - SvPVX_const(sv); + utf8_variant_count += extras; + } - /* The base growth is the number of code points in the range */ - grow = range_max - range_min + 1; - if (has_utf8) { + /* The base growth is the number of code points in the range, + * not including the endpoints, which have already been sized + * for (and output). We don't subtract for the hyphen, as it + * has been parsed but not output, and the SvGROW below is + * based only on what's been output plus what's left to parse. + * */ + grow = (range_max - 1) - (range_min + 1) + 1; - /* But if the output is UTF-8, some of those characters may - * need two bytes (since the maximum range value here is - * 255, the max bytes per character is two). On ASCII - * platforms, it's not much trouble to get an accurate - * count of what's needed. But on EBCDIC, the ones that - * need 2 bytes are scattered around, so just use a worst - * case value instead of calculating for that platform. */ + if (has_utf8) { #ifdef EBCDIC - grow *= 2; -#else - /* Only those above 127 require 2 bytes. This may be - * everything in the range, or not */ - if (range_min > 127) { + /* In some cases in EBCDIC, we haven't yet calculated a + * precise amount needed for the UTF-8 variants. Just + * assume the worst case, that everything will expand by a + * byte */ + if (! convert_unicode) { grow *= 2; } - else if (range_max > 127) { - grow += range_max - 127; - } + else #endif + { + /* Otherwise we know exactly how many variants there + * are in the range. */ + grow += extras; + } } - /* Subtract 3 for the bytes that were already accounted for - * (min, max, and the hyphen) */ - d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3); + /* Grow, but position the output to overwrite the range min end + * point, because in some cases we overwrite that */ + SvCUR_set(sv, d - SvPVX_const(sv)); + offset_to_min = min_ptr - SvPVX_const(sv); + /* See Note on sizing above. */ + d = offset_to_min + SvGROW(sv, SvCUR(sv) + + (send - s) + + grow + + 1 /* Trailing NUL */ ); + + /* Now, we can expand out the range. */ #ifdef EBCDIC - /* Here, we expand out the range. */ if (convert_unicode) { - IV i; + SSize_t i; /* Recall that the min and max are now in Unicode terms, so * we have to convert each character to its native * equivalent */ if (has_utf8) { for (i = range_min; i <= range_max; i++) { - append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i), - (U8 **) &d); + append_utf8_from_native_byte( + LATIN1_TO_NATIVE((U8) i), + (U8 **) &d); } } else { @@ -3174,34 +3301,51 @@ S_scan_const(pTHX_ char *start) #endif /* Always gets run for ASCII, and sometimes for EBCDIC. */ { - IV i; - /* Here, no conversions are necessary, which means that the * first character in the range is already in 'd' and * valid, so we can skip overwriting it */ if (has_utf8) { + SSize_t i; d += UTF8SKIP(d); for (i = range_min + 1; i <= range_max; i++) { append_utf8_from_native_byte((U8) i, (U8 **) &d); } } else { + SSize_t i; d++; - for (i = range_min + 1; i <= range_max; i++) { + assert(range_min + 1 <= range_max); + for (i = range_min + 1; i < range_max; i++) { +#ifdef EBCDIC + /* In this case on EBCDIC, we haven't calculated + * the variants. Do it here, as we go along */ + if (! UVCHR_IS_INVARIANT(i)) { + utf8_variant_count++; + } +#endif *d++ = (char)i; } + + /* The range_max is done outside the loop so as to + * avoid having to special case not incrementing + * 'utf8_variant_count' on EBCDIC (it's already been + * counted when originally parsed) */ + *d++ = (char) range_max; } } #ifdef EBCDIC - /* If the original range extended above 255, add in that portion. */ + /* If the original range extended above 255, add in that + * portion. */ if (real_range_max) { *d++ = (char) UTF8_TWO_BYTE_HI(0x100); *d++ = (char) UTF8_TWO_BYTE_LO(0x100); - if (real_range_max > 0x101) - *d++ = (char) ILLEGAL_UTF8_BYTE; - if (real_range_max > 0x100) + if (real_range_max > 0x100) { + if (real_range_max > 0x101) { + *d++ = (char) ILLEGAL_UTF8_BYTE; + } d = (char*)uvchr_to_utf8((U8*)d, real_range_max); + } } #endif @@ -3232,11 +3376,9 @@ S_scan_const(pTHX_ char *start) if (!esc) in_charclass = FALSE; } - - /* skip for regexp comments /(?#comment)/, except for the last - * char, which will be done separately. - * Stop on (?{..}) and friends */ - + /* skip for regexp comments /(?#comment)/, except for the last + * char, which will be done separately. Stop on (?{..}) and + * friends */ else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) { if (s[2] == '#') { while (s+1 < send && *s != ')') @@ -3249,8 +3391,7 @@ S_scan_const(pTHX_ char *start) break; } } - - /* likewise skip #-initiated comments in //x patterns */ + /* likewise skip #-initiated comments in //x patterns */ else if (*s == '#' && PL_lex_inpat && !in_charclass @@ -3259,14 +3400,13 @@ S_scan_const(pTHX_ char *start) while (s < send && *s != '\n') *d++ = *s++; } - - /* no further processing of single-quoted regex */ + /* no further processing of single-quoted regex */ else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') goto default_action; - /* check for embedded arrays - (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) - */ + /* check for embedded arrays + * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) + */ else if (*s == '@' && s[1]) { if (UTF ? isIDFIRST_utf8_safe(s+1, send) @@ -3279,10 +3419,8 @@ S_scan_const(pTHX_ char *start) if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) break; /* in regexp, neither @+ nor @- are interpolated */ } - - /* check for embedded scalars. only stop if we're sure it's a - variable. - */ + /* check for embedded scalars. only stop if we're sure it's a + * variable. */ else if (*s == '$') { if (!PL_lex_inpat) /* not a regexp, so $ must be var */ break; @@ -3383,7 +3521,8 @@ S_scan_const(pTHX_ char *start) { const char* error; - bool valid = grok_bslash_o(&s, &uv, &error, + bool valid = grok_bslash_o(&s, PL_bufend, + &uv, &error, TRUE, /* Output warning */ FALSE, /* Not strict */ TRUE, /* Output warnings for @@ -3401,7 +3540,8 @@ S_scan_const(pTHX_ char *start) { const char* error; - bool valid = grok_bslash_x(&s, &uv, &error, + bool valid = grok_bslash_x(&s, PL_bufend, + &uv, &error, TRUE, /* Output warning */ FALSE, /* Not strict */ TRUE, /* Output warnings for @@ -3415,7 +3555,7 @@ S_scan_const(pTHX_ char *start) NUM_ESCAPE_INSERT: /* Insert oct or hex escaped character. */ - + /* Here uv is the ordinal of the next character being added */ if (UVCHR_IS_INVARIANT(uv)) { *d++ = (char) uv; @@ -3504,11 +3644,12 @@ S_scan_const(pTHX_ char *start) * For non-patterns, the named characters are converted to * their string equivalents. In patterns, named characters are * not converted to their ultimate forms for the same reasons - * that other escapes aren't. Instead, they are converted to - * the \N{U+...} form to get the value from the charnames that - * is in effect right now, while preserving the fact that it - * was a named character, so that the regex compiler knows - * this. + * that other escapes aren't (mainly that the ultimate + * character could be considered a meta-symbol by the regex + * compiler). Instead, they are converted to the \N{U+...} + * form to get the value from the charnames that is in effect + * right now, while preserving the fact that it was a named + * character, so that the regex compiler knows this. * * The structure of this section of code (besides checking for * errors and upgrading to utf8) is: @@ -3529,18 +3670,19 @@ S_scan_const(pTHX_ char *start) s++; if (*s != '{') { yyerror("Missing braces on \\N{}"); + *d++ = '\0'; continue; } s++; /* If there is no matching '}', it is an error. */ - if (! (e = strchr(s, '}'))) { + if (! (e = (char *) memchr(s, '}', send - s))) { if (! PL_lex_inpat) { yyerror("Missing right brace on \\N{}"); } else { yyerror("Missing right brace on \\N{} or unescaped left brace after \\N"); } - continue; + yyquit(); /* Have exhausted the input. */ } /* Here it looks like a named character */ @@ -3559,6 +3701,7 @@ S_scan_const(pTHX_ char *start) "Invalid hexadecimal number in \\N{U+...}" ); s = e + 1; + *d++ = '\0'; continue; } while (++s < e) { @@ -3757,6 +3900,7 @@ S_scan_const(pTHX_ char *start) " in transliteration operator", /* +1 to include the "}" */ (int) (e + 1 - start), start)); + *d++ = '\0'; goto end_backslash_N; } @@ -3822,15 +3966,16 @@ S_scan_const(pTHX_ char *start) case 'c': s++; if (s < send) { - *d++ = grok_bslash_c(*s++, 1); + *d++ = grok_bslash_c(*s, 1); } else { yyerror("Missing control char name in \\c"); + yyquit(); /* Are at end of input, no sense continuing */ } #ifdef EBCDIC non_portable_endpoint++; #endif - continue; + break; /* printf-style backslashes, formfeeds, newlines, etc */ case 'b': @@ -4010,7 +4155,7 @@ S_scan_const(pTHX_ char *start) /* This is the one truly awful dwimmer necessary to conflate C and sed. */ STATIC int -S_intuit_more(pTHX_ char *s) +S_intuit_more(pTHX_ char *s, char *e) { PERL_ARGS_ASSERT_INTUIT_MORE; @@ -4025,6 +4170,7 @@ S_intuit_more(pTHX_ char *s) return TRUE; if (*s != '{' && *s != '[') return FALSE; + PL_parser->sub_no_recover = TRUE; if (!PL_lex_inpat) return TRUE; @@ -4045,7 +4191,7 @@ S_intuit_more(pTHX_ char *s) /* this is terrifying, and it works */ int weight; char seen[256]; - const char * const send = strchr(s,']'); + const char * const send = (char *) memchr(s, ']', e - s); unsigned char un_char, last_un_char; char tmpbuf[sizeof PL_tokenbuf * 4]; @@ -4075,10 +4221,7 @@ S_intuit_more(pTHX_ char *s) weight -= seen[un_char] * 10; if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) { int len; - char *tmp = PL_bufend; - PL_bufend = (char*)send; - scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); - PL_bufend = tmp; + scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE); len = (int)strlen(tmpbuf); if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0, SVt_PV)) @@ -4199,11 +4342,14 @@ S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv) } if (*start == '$') { + SSize_t start_off = start - SvPVX(PL_linestr); if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY || isUPPER(*PL_tokenbuf)) return 0; - s = skipspace(s); - PL_bufptr = start; + /* this could be $# */ + if (isSPACE(*s)) + s = skipspace(s); + PL_bufptr = SvPVX(PL_linestr) + start_off; PL_expect = XREF; return *s == '(' ? FUNCMETH : METHOD; } @@ -4316,8 +4462,8 @@ Perl_filter_add(pTHX_ filter_t funcp, SV *datasv) PL_parser->last_uni = buf + last_uni_pos; if (PL_parser->last_lop) PL_parser->last_lop = buf + last_lop_pos; - SvLEN(linestr) = SvCUR(linestr); - SvCUR(linestr) = s-SvPVX(linestr); + SvLEN_set(linestr, SvCUR(linestr)); + SvCUR_set(linestr, s - SvPVX(linestr)); PL_parser->filtered = 1; break; } @@ -4360,6 +4506,7 @@ I32 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) { filter_t funcp; + I32 ret; SV *datasv = NULL; /* This API is bad. It should have been using unsigned int for maxlen. Not sure if we want to change the API, but if not we should sanity @@ -4442,7 +4589,11 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) /* Call function. The function is expected to */ /* call "FILTER_READ(idx+1, buf_sv)" first. */ /* Return: <0:error, =0:eof, >0:not eof */ - return (*funcp)(aTHX_ idx, buf_sv, correct_length); + ENTER; + save_scalar(PL_errgv); + ret = (*funcp)(aTHX_ idx, buf_sv, correct_length); + LEAVE; + return ret; } STATIC char * @@ -4474,7 +4625,7 @@ S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len) PERL_ARGS_ASSERT_FIND_IN_MY_STASH; - if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__")) + if (memEQs(pkgname, len, "__PACKAGE__")) return PL_curstash; if (len > 2 @@ -4503,6 +4654,7 @@ S_tokenize_use(pTHX_ int is_use, char *s) { PERL_ARGS_ASSERT_TOKENIZE_USE; if (PL_expect != XSTATE) + /* diag_listed_as: "use" not allowed in expression */ yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression", is_use ? "use" : "no")); PL_expect = XTERM; @@ -4648,6 +4800,20 @@ Perl_yylex(pTHX) GV *gv = NULL; GV **gvp = NULL; + if (UNLIKELY(PL_parser->recheck_utf8_validity)) { + const U8* first_bad_char_loc; + if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr, + PL_bufend - PL_bufptr, + &first_bad_char_loc))) + { + _force_out_malformed_utf8_message(first_bad_char_loc, + (U8 *) PL_bufend, + 0, + 1 /* 1 means die */ ); + NOT_REACHED; /* NOTREACHED */ + } + PL_parser->recheck_utf8_validity = FALSE; + } DEBUG_T( { SV* tmp = newSVpvs(""); PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n", @@ -4734,8 +4900,11 @@ Perl_yylex(pTHX) } else { I32 tmp; - if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) + if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u") + || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l")) + { tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ + } if ((*s == 'L' || *s == 'U' || *s == 'F') && (strpbrk(PL_lex_casestack, "LUF"))) { @@ -4826,7 +4995,7 @@ Perl_yylex(pTHX) return yylex(); case LEX_INTERPENDMAYBE: - if (intuit_more(PL_bufptr)) { + if (intuit_more(PL_bufptr, PL_bufend)) { PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ break; } @@ -4895,7 +5064,16 @@ Perl_yylex(pTHX) s = PL_bufend; } else { + int save_error_count = PL_error_count; + s = scan_const(PL_bufptr); + + /* Set flag if this was a pattern and there were errors. op.c will + * refuse to compile a pattern with this flag set. Otherwise, we + * could get segfaults, etc. */ + if (PL_lex_inpat && PL_error_count > save_error_count) { + ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR; + } if (*s == '\\') PL_lex_state = LEX_INTERPCASEMOD; else @@ -4921,6 +5099,7 @@ Perl_yylex(pTHX) return yylex(); case LEX_FORMLINE: + assert(PL_lex_formbrack); s = scan_formline(PL_bufptr); if (!PL_lex_formbrack) { @@ -4946,7 +5125,7 @@ Perl_yylex(pTHX) * as a var; e.g. ($, ...) would be seen as the var '$,' */ - char sigil; + U8 sigil; s = skipspace(s); sigil = *s++; @@ -4971,15 +5150,46 @@ Perl_yylex(pTHX) /* read var name, including sigil, into PL_tokenbuf */ PL_tokenbuf[0] = sigil; parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1, - 0, cBOOL(UTF), FALSE); + 0, cBOOL(UTF), FALSE, FALSE); *dest = '\0'; assert(PL_tokenbuf[1]); /* we have a variable name */ + } + else { + *PL_tokenbuf = 0; + PL_in_my = 0; + } + + s = skipspace(s); + /* parse the = for the default ourselves to avoid '+=' etc being accepted here + * as the ASSIGNOP, and exclude other tokens that start with = + */ + if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) { + /* save now to report with the same context as we did when + * all ASSIGNOPS were accepted */ + PL_oldbufptr = s; + + ++s; + NEXTVAL_NEXTTOKE.ival = 0; + force_next(ASSIGNOP); + PL_expect = XTERM; + } + else if (*s == ',' || *s == ')') { + PL_expect = XOPERATOR; + } + else { + /* make sure the context shows the unexpected character and + * hopefully a bit more */ + if (*s) ++s; + while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')') + s++; + PL_bufptr = s; /* for error reporting */ + yyerror("Illegal operator following parameter in a subroutine signature"); + PL_in_my = 0; + } + if (*PL_tokenbuf) { NEXTVAL_NEXTTOKE.ival = sigil; force_next('p'); /* force a signature pending identifier */ } - else - PL_in_my = 0; - PL_expect = XOPERATOR; break; case ')': @@ -5004,12 +5214,6 @@ Perl_yylex(pTHX) switch (*s) { default: if (UTF) { - if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) { - _force_out_malformed_utf8_message((U8 *) s, (U8 *) PL_bufend, - 0, - 1 /* 1 means die */ ); - NOT_REACHED; /* NOTREACHED */ - } if (isIDFIRST_utf8_safe(s, PL_bufend)) { goto keylookup; } @@ -5031,12 +5235,23 @@ Perl_yylex(pTHX) else { c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); } - len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); - if (len > UNRECOGNIZED_PRECEDE_COUNT) { - d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT; - } else { + + if (s >= PL_linestart) { d = PL_linestart; } + else { + /* somehow (probably due to a parse failure), PL_linestart has advanced + * pass PL_bufptr, get a reasonable beginning of line + */ + d = s; + while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n') + --d; + } + len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d); + if (len > UNRECOGNIZED_PRECEDE_COUNT) { + d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT; + } + Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c, UTF8fARG(UTF, (s - d), d), (int) len + 1); @@ -5105,10 +5320,15 @@ Perl_yylex(pTHX) sv_catpvs(PL_linestr,"chomp;"); if (PL_minus_a) { if (PL_minus_F) { - if ((*PL_splitstr == '/' || *PL_splitstr == '\'' - || *PL_splitstr == '"') - && strchr(PL_splitstr + 1, *PL_splitstr)) + if ( ( *PL_splitstr == '/' + || *PL_splitstr == '\'' + || *PL_splitstr == '"') + && strchr(PL_splitstr + 1, *PL_splitstr)) + { + /* strchr is ok, because -F pattern can't contain + * embeddded NULs */ Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); + } else { /* "q\0${splitstr}\0" is legal perl. Yes, even NUL bytes can be used as quoting characters. :-) */ @@ -5140,7 +5360,7 @@ Perl_yylex(pTHX) } do { fake_eof = 0; - bof = PL_rsfp ? TRUE : FALSE; + bof = cBOOL(PL_rsfp); if (0) { fake_eof: fake_eof = LEX_FAKE_EOF; @@ -5157,10 +5377,10 @@ Perl_yylex(pTHX) /* If it looks like the start of a BOM or raw UTF-16, * check if it in fact is. */ if (bof && PL_rsfp - && (*s == 0 + && ( *s == 0 || *(U8*)s == BOM_UTF8_FIRST_BYTE - || *(U8*)s >= 0xFE - || s[1] == 0)) + || *(U8*)s >= 0xFE + || s[1] == 0)) { Off_t offset = (IV)PerlIO_tell(PL_rsfp); bof = (offset == (Off_t)SvCUR(PL_linestr)); @@ -5176,7 +5396,9 @@ Perl_yylex(pTHX) } if (PL_parser->in_pod) { /* Incest with pod. */ - if (*s == '=' && strEQs(s, "=cut") && !isALPHA(s[4])) { + if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") + && !isALPHA(s[4])) + { SvPVCLEAR(PL_linestr); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -5185,7 +5407,7 @@ Perl_yylex(pTHX) } } if (PL_rsfp || PL_parser->filtered) - incline(s); + incline(s, PL_bufend); } while (PL_parser->in_pod); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -5263,8 +5485,6 @@ Perl_yylex(pTHX) d = instr(s,"perl -"); if (!d) { d = instr(s,"perl"); - if (d && d[4] == '6') - d = NULL; #if defined(DOSISH) /* avoid getting into infinite loops when shebang * line contains "Perl" rather than "perl" */ @@ -5412,24 +5632,20 @@ Perl_yylex(pTHX) && !PL_rsfp && !PL_parser->filtered) { /* handle eval qq[#line 1 "foo"\n ...] */ CopLINE_dec(PL_curcop); - incline(s); + incline(s, PL_bufend); } d = s; while (d < PL_bufend && *d != '\n') d++; if (d < PL_bufend) d++; - else if (d > PL_bufend) - /* Found by Ilya: feed random input to Perl. */ - Perl_croak(aTHX_ "panic: input overflow, %p > %p", - d, PL_bufend); s = d; if (in_comment && d == PL_bufend && PL_lex_state == LEX_INTERPNORMAL && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; else - incline(s); + incline(s, PL_bufend); if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_lex_state = LEX_FORMLINE; force_next(FORMRBRACK); @@ -5443,11 +5659,8 @@ Perl_yylex(pTHX) { s++; if (s < PL_bufend) - incline(s); + incline(s, PL_bufend); } - else if (s > PL_bufend) - /* Found by Ilya: feed random input to Perl. */ - Perl_croak(aTHX_ "panic: input overflow"); } goto retry; case '-': @@ -5462,7 +5675,7 @@ Perl_yylex(pTHX) while (s < PL_bufend && SPACE_OR_TAB(*s)) s++; - if (strEQs(s,"=>")) { + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) { s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE); DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); OPERATOR('-'); /* unary minus */ @@ -5642,13 +5855,13 @@ Perl_yylex(pTHX) } else if (PL_expect == XPOSTDEREF) POSTDEREF('%'); PL_tokenbuf[0] = '%'; - s = scan_ident(s, PL_tokenbuf + 1, - sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); pl_yylval.ival = 0; if (!PL_tokenbuf[1]) { PREREF('%'); } - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) { if (*s == '[') PL_tokenbuf[0] = '@'; } @@ -5726,9 +5939,17 @@ Perl_yylex(pTHX) case XATTRTERM: PL_expect = XTERMBLOCK; grabattrs: + /* NB: as well as parsing normal attributes, we also end up + * here if there is something looking like attributes + * following a signature (which is illegal, but used to be + * legal in 5.20..5.26). If the latter, we still parse the + * attributes so that error messages(s) are less confusing, + * but ignore them (parser->sig_seen). + */ s = skipspace(s); attrs = NULL; while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { + bool sig = PL_parser->sig_seen; I32 tmp; SV *sv; d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); @@ -5767,43 +5988,31 @@ Perl_yylex(pTHX) PL_lex_stuff = NULL; } else { - if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) { - sv_free(sv); - if (PL_in_my == KEY_our) { - deprecate_disappears_in("5.28", - "Attribute \"unique\" is deprecated"); - } - else - Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables"); - } - /* NOTE: any CV attrs applied here need to be part of the CVf_BUILTIN_ATTRS define in cv.h! */ - else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) { + if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) { sv_free(sv); - CvLVALUE_on(PL_compcv); + if (!sig) + CvLVALUE_on(PL_compcv); } - else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) { + else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) { sv_free(sv); - deprecate_disappears_in("5.28", - "Attribute \"locked\" is deprecated"); + if (!sig) + CvMETHOD_on(PL_compcv); } - else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) { - sv_free(sv); - CvMETHOD_on(PL_compcv); - } - else if (!PL_in_my && len == 5 - && strnEQ(SvPVX(sv), "const", len)) + else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) { sv_free(sv); - Perl_ck_warner_d(aTHX_ - packWARN(WARN_EXPERIMENTAL__CONST_ATTR), - ":const is experimental" - ); - CvANONCONST_on(PL_compcv); - if (!CvANON(PL_compcv)) - yyerror(":const is not permitted on named " - "subroutines"); + if (!sig) { + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__CONST_ATTR), + ":const is experimental" + ); + CvANONCONST_on(PL_compcv); + if (!CvANON(PL_compcv)) + yyerror(":const is not permitted on named " + "subroutines"); + } } /* After we've set the flags, it could be argued that we don't need to do the attributes.pm-based setting @@ -5856,6 +6065,14 @@ Perl_yylex(pTHX) } } got_attrs: + if (PL_parser->sig_seen) { + /* see comment about about sig_seen and parser error + * handling */ + if (attrs) + op_free(attrs); + Perl_croak(aTHX_ "Subroutine attributes must come " + "before the signature"); + } if (attrs) { NEXTVAL_NEXTTOKE.opval = attrs; force_next(THING); @@ -6090,9 +6307,11 @@ Perl_yylex(pTHX) PL_expect = XTERM; break; } - if (strEQs(s, "sub")) { + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) { + PL_bufptr = s; d = s + 3; d = skipspace(d); + s = PL_bufptr; if (*d == ':') { PL_expect = XTERM; break; @@ -6114,6 +6333,7 @@ Perl_yylex(pTHX) if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF) TOKEN(0); rightbracket: + assert(s != PL_bufend); s++; if (PL_lex_brackets <= 0) /* diag_listed_as: Unmatched right %s bracket */ @@ -6144,7 +6364,7 @@ Perl_yylex(pTHX) return yylex(); /* ignore fake brackets */ } force_next(formbrack ? '.' : '}'); - if (formbrack) LEAVE; + if (formbrack) LEAVE_with_name("lex_format"); if (formbrack == 2) { /* means . where arguments were expected */ force_next(';'); TOKEN(FORMRBRACK); @@ -6189,8 +6409,7 @@ Perl_yylex(pTHX) } PL_tokenbuf[0] = '&'; - s = scan_ident(s - 1, PL_tokenbuf + 1, - sizeof PL_tokenbuf - 1, TRUE); + s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE); pl_yylval.ival = (OPpENTERSUB_AMPER<<8); if (PL_tokenbuf[1]) { force_ident_maybe_lex('&'); @@ -6224,7 +6443,9 @@ Perl_yylex(pTHX) { const char tmp = *s++; if (tmp == '=') { - if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "=====")) { + if ( (s == PL_linestart+2 || s[-3] == '\n') + && memBEGINs(s, (STRLEN) (PL_bufend - s), "=====")) + { s = vcs_conflict_marker(s + 5); goto retry; } @@ -6256,19 +6477,21 @@ Perl_yylex(pTHX) && isALPHA(tmp) && (s == PL_linestart+1 || s[-2] == '\n') ) { - if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered) - || PL_lex_state != LEX_NORMAL) { + if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered) + || PL_lex_state != LEX_NORMAL) + { d = PL_bufend; while (s < d) { if (*s++ == '\n') { - incline(s); - if (strEQs(s,"=cut")) { - s = strchr(s,'\n'); + incline(s, PL_bufend); + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) + { + s = (char *) memchr(s,'\n', d - s); if (s) s++; else s = d; - incline(s); + incline(s, PL_bufend); goto retry; } } @@ -6290,7 +6513,7 @@ Perl_yylex(pTHX) t++; if (*t == '\n' || *t == '#') { formbrack = 1; - ENTER; + ENTER_with_name("lex_format"); SAVEI8(PL_parser->form_lex_state); SAVEI32(PL_lex_formbrack); PL_parser->form_lex_state = PL_lex_state; @@ -6340,10 +6563,12 @@ Perl_yylex(pTHX) OPERATOR('!'); case '<': if (PL_expect != XOPERATOR) { - if (s[1] != '<' && !strchr(s,'>')) + if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) check_uni(); if (s[1] == '<' && s[2] != '>') { - if ((s == PL_linestart || s[-1] == '\n') && strEQs(s+2, "<<<<<")) { + if ( (s == PL_linestart || s[-1] == '\n') + && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) + { s = vcs_conflict_marker(s + 7); goto retry; } @@ -6358,7 +6583,9 @@ Perl_yylex(pTHX) { char tmp = *s++; if (tmp == '<') { - if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "<<<<<")) { + if ( (s == PL_linestart+2 || s[-3] == '\n') + && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<")) + { s = vcs_conflict_marker(s + 5); goto retry; } @@ -6402,7 +6629,9 @@ Perl_yylex(pTHX) { const char tmp = *s++; if (tmp == '>') { - if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, ">>>>>")) { + if ( (s == PL_linestart+2 || s[-3] == '\n') + && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>")) + { s = vcs_conflict_marker(s + 5); goto retry; } @@ -6434,12 +6663,7 @@ Perl_yylex(pTHX) case '$': CLINE; - if (PL_expect == XOPERATOR) { - if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { - return deprecate_commaless_var_list(); - } - } - else if (PL_expect == XPOSTDEREF) { + if (PL_expect == XPOSTDEREF) { if (s[1] == '#') { s++; POSTDEREF(DOLSHARP); @@ -6470,8 +6694,7 @@ Perl_yylex(pTHX) } PL_tokenbuf[0] = '$'; - s = scan_ident(s, PL_tokenbuf + 1, - sizeof PL_tokenbuf - 1, FALSE); + s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE); if (PL_expect == XOPERATOR) { d = s; if (PL_bufptr > s) { @@ -6492,8 +6715,8 @@ Perl_yylex(pTHX) if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) s = skipspace(s); - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) - && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) { if (*s == '[') { PL_tokenbuf[0] = '@'; if (ckWARN(WARN_SYNTAX)) { @@ -6518,30 +6741,32 @@ Perl_yylex(pTHX) else if (*s == '{') { char *t; PL_tokenbuf[0] = '%'; - if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) - && (t = strchr(s, '}')) && (t = strchr(t, '='))) - { - char tmpbuf[sizeof PL_tokenbuf]; - do { - t++; - } while (isSPACE(*t)); - if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) { - STRLEN len; - t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, - &len); - while (isSPACE(*t)) - t++; - if ( *t == ';' - && get_cvn_flags(tmpbuf, len, UTF - ? SVf_UTF8 - : 0)) - { - Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "You need to quote \"%" UTF8f "\"", - UTF8fARG(UTF, len, tmpbuf)); - } - } - } + if ( strEQ(PL_tokenbuf+1, "SIG") + && ckWARN(WARN_SYNTAX) + && (t = (char *) memchr(s, '}', PL_bufend - s)) + && (t = (char *) memchr(t, '=', PL_bufend - t))) + { + char tmpbuf[sizeof PL_tokenbuf]; + do { + t++; + } while (isSPACE(*t)); + if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) { + STRLEN len; + t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, + &len); + while (isSPACE(*t)) + t++; + if ( *t == ';' + && get_cvn_flags(tmpbuf, len, UTF + ? SVf_UTF8 + : 0)) + { + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "You need to quote \"%" UTF8f "\"", + UTF8fARG(UTF, len, tmpbuf)); + } + } + } } } @@ -6621,7 +6846,9 @@ Perl_yylex(pTHX) } if (PL_lex_state == LEX_NORMAL) s = skipspace(s); - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) + { if (*s == '{') PL_tokenbuf[0] = '%'; @@ -6730,13 +6957,9 @@ Perl_yylex(pTHX) TERM(THING); case '\'': - if ( PL_expect == XOPERATOR - && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack)) - return deprecate_commaless_var_list(); - s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { @@ -6746,10 +6969,6 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '"': - if ( PL_expect == XOPERATOR - && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack)) - return deprecate_commaless_var_list(); - s = scan_str(s,FALSE,FALSE,FALSE,NULL); DEBUG_T( { if (s) @@ -6762,7 +6981,7 @@ Perl_yylex(pTHX) no_op("String",s); } if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_CONST; /* FIXME. I think that this can be const if char *d is replaced by more localised variables. */ @@ -6788,7 +7007,7 @@ Perl_yylex(pTHX) if (PL_expect == XOPERATOR) no_op("Backticks",s); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_BACKTICK; TERM(sublex_start()); @@ -6893,7 +7112,7 @@ Perl_yylex(pTHX) /* x::* is just a word, unless x is "CORE" */ if (!anydelim && *s == ':' && s[1] == ':') { - if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE; + if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE; goto just_a_word; } @@ -7031,7 +7250,7 @@ Perl_yylex(pTHX) if (tmp == KEY_dump) { Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED), "dump() better written as CORE::dump(). " - "dump() will no longer be available " + "dump() will no longer be available " "in Perl 5.30"); } gv = NULL; @@ -7075,12 +7294,26 @@ Perl_yylex(pTHX) orig_keyword = 0; lex = 0; off = 0; + /* FALLTHROUGH */ default: /* not a keyword */ just_a_word: { int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); bool safebw; + bool no_op_error = FALSE; + if (PL_expect == XOPERATOR) { + if (PL_bufptr == PL_linestart) { + CopLINE_dec(PL_curcop); + Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); + CopLINE_inc(PL_curcop); + } + else + /* We want to call no_op with s pointing after the + bareword, so defer it. But we want it to come + before the Bad name croak. */ + no_op_error = TRUE; + } /* Get the rest if it looks like a package qualifier */ @@ -7088,6 +7321,10 @@ Perl_yylex(pTHX) STRLEN morelen; s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, TRUE, &morelen); + if (no_op_error) { + no_op("Bareword",s); + no_op_error = FALSE; + } if (!morelen) Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", UTF8fARG(UTF, len, PL_tokenbuf), @@ -7096,15 +7333,8 @@ Perl_yylex(pTHX) pkgname = 1; } - if (PL_expect == XOPERATOR) { - if (PL_bufptr == PL_linestart) { - CopLINE_dec(PL_curcop); - Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); - CopLINE_inc(PL_curcop); - } - else + if (no_op_error) no_op("Bareword",s); - } /* See if the name is "Foo::", in which case Foo is a bareword @@ -7183,17 +7413,24 @@ Perl_yylex(pTHX) == OA_FILEREF)) { bool immediate_paren = *s == '('; + SSize_t s_off; /* (Now we can afford to cross potential line boundary.) */ s = skipspace(s); + /* intuit_method() can indirectly call lex_next_chunk(), + * invalidating s + */ + s_off = s - SvPVX(PL_linestr); /* Two barewords in a row may indicate method call. */ if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$') && (tmp = intuit_method(s, lex ? NULL : sv, cv))) { + /* the code at method: doesn't use s */ goto method; } + s = SvPVX(PL_linestr) + s_off; /* If not a declared subroutine, it's an indirect object. */ /* (But it's an indir obj regardless for sort.) */ @@ -7408,10 +7645,10 @@ Perl_yylex(pTHX) if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) { /* PL_warn_reserved is constant */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, PL_tokenbuf); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } } } @@ -7466,14 +7703,6 @@ Perl_yylex(pTHX) if (!GvIO(gv)) GvIOp(gv) = newIO(); IoIFP(GvIOp(gv)) = PL_rsfp; -#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC) - { - const int fd = PerlIO_fileno(PL_rsfp); - if (fd >= 3) { - fcntl(fd,F_SETFD, FD_CLOEXEC); - } - } -#endif /* Mark this internal pseudo-handle as clean */ IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; if ((PerlIO*)PL_rsfp == PerlIO_stdin()) @@ -7658,7 +7887,7 @@ Perl_yylex(pTHX) *PL_tokenbuf = '&'; d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, 1, &len); - if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE")) + if (len && memNEs(PL_tokenbuf+1, len, "CORE") && !keyword(PL_tokenbuf + 1, len, 0)) { SSize_t off = s-SvPVX(PL_linestr); d = skipspace(d); @@ -7717,7 +7946,7 @@ Perl_yylex(pTHX) case KEY_exists: UNI(OP_EXISTS); - + case KEY_exit: UNI(OP_EXIT); @@ -7776,15 +8005,19 @@ Perl_yylex(pTHX) && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { char *p = s; + SSize_t s_off = s - SvPVX(PL_linestr); - if ((PL_bufend - p) >= 3 - && strEQs(p, "my") && isSPACE(*(p + 2))) + if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") + && isSPACE(*(p + 2))) { - p += 2; + p += 2; + } + else if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") + && isSPACE(*(p + 3))) + { + p += 3; } - else if ((PL_bufend - p) >= 4 - && strEQs(p, "our") && isSPACE(*(p + 3))) - p += 3; + p = skipspace(p); /* skip optional package name, as in "for my abc $x (..)" */ if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) { @@ -7793,6 +8026,9 @@ Perl_yylex(pTHX) } if (*p != '$' && *p != '\\') Perl_croak(aTHX_ "Missing $ on loop variable"); + + /* The buffer may have been reallocated, update s */ + s = SvPVX(PL_linestr) + s_off; } OPERATOR(FOR); @@ -7953,7 +8189,7 @@ Perl_yylex(pTHX) case KEY_last: LOOPX(OP_LAST); - + case KEY_lc: UNI(OP_LC); @@ -8032,7 +8268,7 @@ Perl_yylex(pTHX) s = skipspace(s); if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len); - if (len == 3 && strEQs(PL_tokenbuf, "sub")) + if (memEQs(PL_tokenbuf, len, "sub")) goto really_sub; PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len); if (!PL_in_my_stash) { @@ -8133,7 +8369,7 @@ Perl_yylex(pTHX) case KEY_pos: UNIDOR(OP_POS); - + case KEY_pack: LOP(OP_PACK,XTERM); @@ -8149,7 +8385,7 @@ Perl_yylex(pTHX) case KEY_q: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; pl_yylval.ival = OP_CONST; TERM(sublex_start()); @@ -8161,7 +8397,7 @@ Perl_yylex(pTHX) OP *words = NULL; s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; PL_expect = XOPERATOR; if (SvCUR(PL_lex_stuff)) { @@ -8210,7 +8446,7 @@ Perl_yylex(pTHX) case KEY_qq: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_STRINGIFY; if (SvIVX(PL_lex_stuff) == '\'') SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ @@ -8223,7 +8459,7 @@ Perl_yylex(pTHX) case KEY_qx: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_BACKTICK; TERM(sublex_start()); @@ -8321,7 +8557,7 @@ Perl_yylex(pTHX) case KEY_chomp: UNI(OP_CHOMP); - + case KEY_scalar: UNI(OP_SCALAR); @@ -8439,22 +8675,24 @@ Perl_yylex(pTHX) really_sub: { char * const tmpbuf = PL_tokenbuf + 1; - expectation attrful; bool have_name, have_proto; const int key = tmp; SV *format_name = NULL; + bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED; SSize_t off = s-SvPVX(PL_linestr); s = skipspace(s); d = SvPVX(PL_linestr)+off; + SAVEBOOL(PL_parser->sig_seen); + PL_parser->sig_seen = FALSE; + if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '\'' || (*s == ':' && s[1] == ':')) { - PL_expect = XBLOCK; - attrful = XATTRBLOCK; + PL_expect = XATTRBLOCK; d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, &len); if (key == KEY_format) @@ -8485,8 +8723,7 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "Missing name in \"%s\"", PL_bufptr); } - PL_expect = XTERMBLOCK; - attrful = XATTRTERM; + PL_expect = XATTRTERM; sv_setpvs(PL_subname,"?"); have_name = FALSE; } @@ -8502,12 +8739,13 @@ Perl_yylex(pTHX) } /* Look for a prototype */ - if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) { + if (*s == '(' && !is_sigsub) { s = scan_str(s,FALSE,FALSE,FALSE,NULL); COPLINE_SET_FROM_MULTI_END; if (!s) Perl_croak(aTHX_ "Prototype not terminated"); - (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO)); + (void)validate_proto(PL_subname, PL_lex_stuff, + ckWARN(WARN_ILLEGALPROTO), 0); have_proto = TRUE; s = skipspace(s); @@ -8515,9 +8753,9 @@ Perl_yylex(pTHX) else have_proto = FALSE; - if (*s == ':' && s[1] != ':') - PL_expect = attrful; - else if ((*s != '{' && *s != '(') && key != KEY_format) { + if ( !(*s == ':' && s[1] != ':') + && (*s != '{' && *s != '(') && key != KEY_format) + { assert(key == KEY_sub || key == KEY_AUTOLOAD || key == KEY_DESTROY || key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK || @@ -8541,10 +8779,16 @@ Perl_yylex(pTHX) sv_setpvs(PL_subname, "__ANON__"); else sv_setpvs(PL_subname, "__ANON__::__ANON__"); - TOKEN(ANONSUB); + if (is_sigsub) + TOKEN(ANON_SIGSUB); + else + TOKEN(ANONSUB); } force_ident_maybe_lex('&'); - TOKEN(SUB); + if (is_sigsub) + TOKEN(SIGSUB); + else + TOKEN(SUB); } case KEY_system: @@ -8732,6 +8976,7 @@ S_pending_ident(pTHX) DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Pending identifier '%s'\n", PL_tokenbuf); }); + assert(tokenbuf_len >= 2); /* if we're in a my(), we can't allow dynamics here. $foo'bar has already been turned into $foo::bar, so @@ -8742,8 +8987,11 @@ S_pending_ident(pTHX) if (PL_in_my) { if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ if (has_colon) + /* diag_listed_as: No package name allowed for variable %s + in "our" */ yyerror_pv(Perl_form(aTHX_ "No package name allowed for " - "variable %s in \"our\"", + "%se %s in \"our\"", + *PL_tokenbuf=='&' ?"subroutin":"variabl", PL_tokenbuf), UTF ? SVf_UTF8 : 0); tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); } @@ -8752,13 +9000,13 @@ S_pending_ident(pTHX) if (has_colon) { /* "my" variable %s can't be in a package */ /* PL_no_myglob is constant */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); yyerror_pv(Perl_form(aTHX_ PL_no_myglob, PL_in_my == KEY_my ? "my" : "state", *PL_tokenbuf == '&' ? "subroutin" : "variabl", PL_tokenbuf), UTF ? SVf_UTF8 : 0); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } if (PL_in_my == KEY_sigvar) { @@ -8804,7 +9052,7 @@ S_pending_ident(pTHX) HEK * const stashname = HvNAME_HEK(stash); SV * const sym = newSVhek(stashname); sv_catpvs(sym, "::"); - sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); + sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES )); pl_yylval.opval = newSVOP(OP_CONST, 0, sym); pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') @@ -8832,7 +9080,7 @@ S_pending_ident(pTHX) && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { - GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, + GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG, SVt_PVAV); if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) @@ -8849,11 +9097,11 @@ S_pending_ident(pTHX) /* build ops for a bareword */ pl_yylval.opval = newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1, - tokenbuf_len - 1, + tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, UTF ? SVf_UTF8 : 0 )); pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') - gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, + gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (PL_in_eval ? GV_ADDMULTI : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), ((PL_tokenbuf[0] == '$') ? SVt_PV @@ -8903,7 +9151,6 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) s++; if (*s == ',') { GV* gv; - PADOFFSET off; if (keyword(w, s - w, 0)) return; @@ -8911,6 +9158,7 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) if (gv && GvCVu(gv)) return; if (s - w <= 254) { + PADOFFSET off; char tmpbuf[256]; Copy(w, tmpbuf+1, s - w, char); *tmpbuf = '&'; @@ -8961,7 +9209,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, || ! SvOK(*cvp)) { char *msg; - + /* Here haven't found what we're looking for. If it is charnames, * perhaps it needs to be loaded. Try doing that before giving up */ if (*key == 'c') { @@ -9070,8 +9318,10 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, PERL_STATIC_INLINE void S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, - bool is_utf8, bool check_dollar) + bool is_utf8, bool check_dollar, bool tick_warn) { + int saw_tick = 0; + const char *olds = *s; PERL_ARGS_ASSERT_PARSE_IDENT; while (*s < PL_bufend) { @@ -9105,6 +9355,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, *(*d)++ = ':'; *(*d)++ = ':'; (*s)++; + saw_tick++; } else if (allow_package && **s == ':' && (*s)[1] == ':' /* Disallow things like Foo::$bar. For the curious, this is @@ -9118,6 +9369,30 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, else break; } + if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL + && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) { + char *d; + char *d2; + Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */ + d2 = d; + SAVEFREEPV(d); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "Old package separator used in string"); + if (olds[-1] == '#') + *d2++ = olds[-2]; + *d2++ = olds[-1]; + while (olds < *s) { + if (*olds == '\'') { + *d2++ = '\\'; + *d2++ = *olds++; + } + else + *d2++ = *olds++; + } + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Did you mean \"%" UTF8f "\" instead?)\n", + UTF8fARG(is_utf8, d2-d, d)); + } return; } @@ -9133,7 +9408,7 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN PERL_ARGS_ASSERT_SCAN_WORD; - parse_ident(&s, &d, e, allow_package, is_utf8, TRUE); + parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE); *d = '\0'; *slp = d - dest; return s; @@ -9181,7 +9456,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) } } else { /* See if it is a "normal" identifier */ - parse_ident(&s, &d, e, 1, is_utf8, FALSE); + parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE); } *d = '\0'; d = dest; @@ -9200,7 +9475,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) || isDIGIT_A((U8)s[1]) || s[1] == '$' || s[1] == '{' - || strEQs(s+1,"::")) ) + || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) ) { /* Dereferencing a value in a scalar variable. The alternatives are different syntaxes for a scalar variable. @@ -9247,19 +9522,36 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) bool skip; char *s2; /* If we were processing {...} notation then... */ - if (isIDFIRST_lazy_if_safe(d, e, is_utf8)) { - /* if it starts as a valid identifier, assume that it is one. - (the later check for } being at the expected point will trap - cases where this doesn't pan out.) */ - d += is_utf8 ? UTF8SKIP(d) : 1; - parse_ident(&s, &d, e, 1, is_utf8, TRUE); - *d = '\0'; + if (isIDFIRST_lazy_if_safe(d, e, is_utf8) + || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ + && isWORDCHAR(*s)) + ) { + /* note we have to check for a normal identifier first, + * as it handles utf8 symbols, and only after that has + * been ruled out can we look at the caret words */ + if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) { + /* if it starts as a valid identifier, assume that it is one. + (the later check for } being at the expected point will trap + cases where this doesn't pan out.) */ + d += is_utf8 ? UTF8SKIP(d) : 1; + parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE); + *d = '\0'; + } + else { /* caret word: ${^Foo} ${^CAPTURE[0]} */ + d++; + while (isWORDCHAR(*s) && d < e) { + *d++ = *s++; + } + if (d >= e) + Perl_croak(aTHX_ "%s", ident_too_long); + *d = '\0'; + } tmp_copline = CopLINE(PL_curcop); if (s < PL_bufend && isSPACE(*s)) { s = skipspace(s); } if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) { - /* ${foo[0]} and ${foo{bar}} notation. */ + /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */ if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) { const char * const brack = (const char *) @@ -9278,26 +9570,16 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) return s; } } - /* Handle extended ${^Foo} variables - * 1999-02-27 mjd-perl-patch@plover.com */ - else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */ - && isWORDCHAR(*s)) - { - d++; - while (isWORDCHAR(*s) && d < e) { - *d++ = *s++; - } - if (d >= e) - Perl_croak(aTHX_ "%s", ident_too_long); - *d = '\0'; - } if ( !tmp_copline ) tmp_copline = CopLINE(PL_curcop); - if ((skip = s < PL_bufend && isSPACE(*s))) + if ((skip = s < PL_bufend && isSPACE(*s))) { /* Avoid incrementing line numbers or resetting PL_linestart, in case we have to back up. */ + STRLEN s_off = s - SvPVX(PL_linestr); s2 = peekspace(s); + s = SvPVX(PL_linestr) + s_off; + } else s2 = s; @@ -9339,9 +9621,12 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) CopLINE_set(PL_curcop, orig_copline); PL_parser->herelines = herelines; *dest = '\0'; + PL_parser->sub_no_recover = TRUE; } } - else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) + else if ( PL_lex_state == LEX_INTERPNORMAL + && !PL_lex_brackets + && !intuit_more(s, PL_bufend)) PL_lex_state = LEX_INTERPEND; return s; } @@ -9574,18 +9859,14 @@ S_scan_subst(pTHX_ char *start) PL_multi_end = 0; pm->op_pmflags |= PMf_EVAL; - while (es-- > 0) { - if (es) - sv_catpvs(repl, "eval "); - else - sv_catpvs(repl, "do "); - } - sv_catpvs(repl, "{"); + for (; es > 1; es--) { + sv_catpvs(repl, "eval "); + } + sv_catpvs(repl, "do {"); sv_catsv(repl, PL_parser->lex_sub_repl); sv_catpvs(repl, "}"); SvREFCNT_dec(PL_parser->lex_sub_repl); PL_parser->lex_sub_repl = repl; - es = 1; } @@ -9598,7 +9879,7 @@ S_scan_subst(pTHX_ char *start) * the NVX field indicates how many src code lines the replacement * spreads over */ sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV); - ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0; + ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff; ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = cBOOL(es); } @@ -9741,11 +10022,9 @@ S_scan_heredoc(pTHX_ char *s) else term = '"'; if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)) - deprecate_fatal_in("5.28", "Use of bare << to mean <<\"\" is deprecated"); + Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden"); peek = s; - while ( - isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) - { + while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) { peek += UTF ? UTF8SKIP(peek) : 1; } len = (peek - s >= e - d) ? (e - d) : (peek - s); @@ -9760,7 +10039,7 @@ S_scan_heredoc(pTHX_ char *s) len = d - PL_tokenbuf; #ifndef PERL_STRICT_CR - d = strchr(s, '\r'); + d = (char *) memchr(s, '\r', PL_bufend - s); if (d) { char * const olds = s; s = d; @@ -9869,8 +10148,9 @@ S_scan_heredoc(pTHX_ char *s) /* No whitespace or all! */ if (backup == s || *backup == '\n') { - Newxz(indent, indent_len + 1, char); + Newx(indent, indent_len + 1, char); memcpy(indent, backup + 1, indent_len); + indent[indent_len] = 0; s--; /* before our delimiter */ PL_parser->herelines--; /* this line doesn't count */ break; @@ -10004,8 +10284,9 @@ S_scan_heredoc(pTHX_ char *s) /* All whitespace or none! */ if (backup == found || SPACE_OR_TAB(*backup)) { - Newxz(indent, indent_len + 1, char); + Newx(indent, indent_len + 1, char); memcpy(indent, backup, indent_len); + indent[indent_len] = 0; SvREFCNT_dec(PL_linestr); PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); @@ -10096,7 +10377,7 @@ S_scan_heredoc(pTHX_ char *s) interminable: SvREFCNT_dec(tmpstr); CopLINE_set(PL_curcop, origline); - missingterm(PL_tokenbuf + 1); + missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); } /* scan_inputsymbol @@ -10129,7 +10410,7 @@ S_scan_inputsymbol(pTHX_ char *start) PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; - end = strchr(s, '\n'); + end = (char *) memchr(s, '\n', PL_bufend - s); if (!end) end = PL_bufend; if (s[1] == '<' && s[2] == '>' && s[3] == '>') { @@ -10291,7 +10572,7 @@ S_scan_inputsymbol(pTHX_ char *start) ($*@) sub prototypes sub foo ($) (stuff) sub attr parameters sub foo : attr(stuff) <> readline or globs , <>, <$fh>, or <*.c> - + In most of these cases (all but <>, patterns and transliterate) yylex() calls scan_str(). m// makes yylex() call scan_pat() which calls scan_str(). s/// makes yylex() call scan_subst() which calls @@ -10324,7 +10605,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ IV termcode; /* terminating char. code */ - U8 termstr[UTF8_MAXBYTES]; /* terminating string */ + U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */ STRLEN termlen; /* length of terminating string */ line_t herelines; @@ -10335,7 +10616,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re const char * non_grapheme_msg = "Use of unassigned code point or" " non-standalone grapheme for a delimiter" " will be a fatal error starting in Perl" - " v5.30"; + " 5.30"; /* The only non-UTF character that isn't a stand alone grapheme is * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */ bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED); @@ -10434,10 +10715,15 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re } /* terminate when run out of buffer (the for() condition), or have found the terminator */ - else if (*s == term) { - if (termlen == 1) + else if (*s == term) { /* First byte of terminator matches */ + if (termlen == 1) /* If is the only byte, are done */ break; - if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen)) + + /* If the remainder of the terminator matches, also are + * done, after checking that is a separate grapheme */ + if ( s + termlen <= PL_bufend + && memEQ(s + 1, (char*)termstr + 1, termlen - 1)) + { if ( check_grapheme && UNLIKELY(! _is_grapheme((U8 *) start, (U8 *) s, @@ -10448,13 +10734,16 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re "%s", non_grapheme_msg); } break; + } } - else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) + else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) { has_utf8 = TRUE; + } + *to = *s; } } - + /* if the terminator isn't the same as the start character (e.g., matched brackets), we have to allow more in the quoting, and be prepared for nested brackets. @@ -10512,7 +10801,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r') to[-1] = '\n'; #endif - + /* if we're out of file, or a read fails, bail and reset the current line marker so we can report where the unterminated string began */ @@ -10743,6 +11032,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) digit: just_zero = FALSE; if (!overflowed) { + assert(shift >= 0); x = u << shift; /* make room for the digit */ total_bits += shift; @@ -10823,19 +11113,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) NV nv_mult = 1.0; #endif bool accumulate = TRUE; - for (h++; (isXDIGIT(*h) || *h == '_'); h++) { + U8 b; + int lim = 1 << shift; + for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) || + *h == '_'); h++) { if (isXDIGIT(*h)) { - U8 b = XDIGIT_VALUE(*h); significant_bits += shift; #ifdef HEXFP_UQUAD if (accumulate) { if (significant_bits < NV_MANT_DIG) { /* We are in the long "run" of xdigits, * accumulate the full four bits. */ + assert(shift >= 0); hexfp_uquad <<= shift; hexfp_uquad |= b; hexfp_frac_bits += shift; - } else { + } else if (significant_bits - shift < NV_MANT_DIG) { /* We are at a hexdigit either at, * or straddling, the edge of mantissa. * We will try grabbing as many as @@ -10844,7 +11137,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) significant_bits - NV_MANT_DIG; if (tail <= 0) tail += shift; + assert(tail >= 0); hexfp_uquad <<= tail; + assert((shift - tail) >= 0); hexfp_uquad |= b >> (shift - tail); hexfp_frac_bits += tail; @@ -10883,7 +11178,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } #else /* HEXFP_NV */ if (accumulate) { - nv_mult /= 16.0; + nv_mult /= nvshift[shift]; if (nv_mult > 0.0) hexfp_nv += b * nv_mult; else @@ -11064,9 +11359,11 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p'))) && strchr("+-0123456789_", s[1])) { - floatit = TRUE; + int exp_digits = 0; + const char *save_s = s; + char * save_d = d; - /* regardless of whether user said 3E5 or 3e5, use lower 'e', + /* regardless of whether user said 3E5 or 3e5, use lower 'e', ditto for p (hexfloats) */ if ((isALPHA_FOLD_EQ(*s, 'e'))) { /* At least some Mach atof()s don't grok 'E' */ @@ -11098,6 +11395,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) /* read digits of exponent */ while (isDIGIT(*s) || *s == '_') { if (isDIGIT(*s)) { + ++exp_digits; if (d >= e) Perl_croak(aTHX_ "%s", number_too_long); *d++ = *s++; @@ -11109,6 +11407,20 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) lastub = s++; } } + + if (!exp_digits) { + /* no exponent digits, the [eEpP] could be for something else, + * though in practice we don't get here for p since that's preparsed + * earlier, and results in only the 0xX being consumed, so behave similarly + * for decimal floats and consume only the D.DD, leaving the [eE] to the + * next token. + */ + s = save_s; + d = save_d; + } + else { + floatit = TRUE; + } } @@ -11135,7 +11447,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) floatit = TRUE; } if (floatit) { - STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); /* terminate the string */ *d = '\0'; if (UNLIKELY(hexfp)) { @@ -11152,7 +11463,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } else { nv = Atof(PL_tokenbuf); } - RESTORE_LC_NUMERIC_UNDERLYING(); sv = newSVnv(nv); } @@ -11190,8 +11500,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) STATIC char * S_scan_formline(pTHX_ char *s) { - char *eol; - char *t; SV * const stuff = newSVpvs(""); bool needargs = FALSE; bool eofmt = FALSE; @@ -11199,8 +11507,9 @@ S_scan_formline(pTHX_ char *s) PERL_ARGS_ASSERT_SCAN_FORMLINE; while (!needargs) { + char *eol; if (*s == '.') { - t = s+1; + char *t = s+1; #ifdef PERL_STRICT_CR while (SPACE_OR_TAB(*t)) t++; @@ -11217,6 +11526,7 @@ S_scan_formline(pTHX_ char *s) if (!eol++) eol = PL_bufend; if (*s != '#') { + char *t; for (t = s; t < eol; t++) { if (*t == '~' && t[1] == '~' && SvCUR(stuff)) { needargs = FALSE; @@ -11251,7 +11561,7 @@ S_scan_formline(pTHX_ char *s) if (!got_some) break; } - incline(s); + incline(s, PL_bufend); } enough: if (!SvCUR(stuff) || needargs) @@ -11308,6 +11618,39 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) return oldsavestack_ix; } + +/* Do extra initialisation of a CV (typically one just created by + * start_subparse()) if that CV is for a named sub + */ + +void +Perl_init_named_cv(pTHX_ CV *cv, OP *nameop) +{ + PERL_ARGS_ASSERT_INIT_NAMED_CV; + + if (nameop->op_type == OP_CONST) { + const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv); + if ( strEQ(name, "BEGIN") + || strEQ(name, "END") + || strEQ(name, "INIT") + || strEQ(name, "CHECK") + || strEQ(name, "UNITCHECK") + ) + CvSPECIAL_on(cv); + } + else + /* State subs inside anonymous subs need to be + clonable themselves. */ + if ( CvANON(CvOUTSIDE(cv)) + || CvCLONE(CvOUTSIDE(cv)) + || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST( + CvOUTSIDE(cv) + ))[nameop->op_targ]) + ) + CvCLONE_on(cv); +} + + static int S_yywarn(pTHX_ const char *const s, U32 flags) { @@ -11318,6 +11661,29 @@ S_yywarn(pTHX_ const char *const s, U32 flags) return 0; } +void +Perl_abort_execution(pTHX_ const char * const msg, const char * const name) +{ + PERL_ARGS_ASSERT_ABORT_EXECUTION; + + if (PL_minus_c) + Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name); + else { + Perl_croak(aTHX_ + "%sExecution of %s aborted due to compilation errors.\n", msg, name); + } + NOT_REACHED; /* NOTREACHED */ +} + +void +Perl_yyquit(pTHX) +{ + /* Called, after at least one error has been found, to abort the parse now, + * instead of trying to forge ahead */ + + yyerror_pvn(NULL, 0, 0); +} + int Perl_yyerror(pTHX_ const char *const s) { @@ -11341,100 +11707,120 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) SV * const where_sv = newSVpvs_flags("", SVs_TEMP); int yychar = PL_parser->yychar; - PERL_ARGS_ASSERT_YYERROR_PVN; - - if (!yychar || (yychar == ';' && !PL_rsfp)) - sv_catpvs(where_sv, "at EOF"); - else if ( PL_oldoldbufptr - && PL_bufptr > PL_oldoldbufptr - && PL_bufptr - PL_oldoldbufptr < 200 - && PL_oldoldbufptr != PL_oldbufptr - && PL_oldbufptr != PL_bufptr) - { - /* - Only for NetWare: - The code below is removed for NetWare because it abends/crashes on NetWare - when the script has error such as not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ + /* Output error message 's' with length 'len'. 'flags' are SV flags that + * apply. If the number of errors found is large enough, it abandons + * parsing. If 's' is NULL, there is no message, and it abandons + * processing unconditionally */ + + if (s != NULL) { + if (!yychar || (yychar == ';' && !PL_rsfp)) + sv_catpvs(where_sv, "at EOF"); + else if ( PL_oldoldbufptr + && PL_bufptr > PL_oldoldbufptr + && PL_bufptr - PL_oldoldbufptr < 200 + && PL_oldoldbufptr != PL_oldbufptr + && PL_oldbufptr != PL_bufptr) + { + /* + Only for NetWare: + The code below is removed for NetWare because it + abends/crashes on NetWare when the script has error such as + not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ #ifndef NETWARE - while (isSPACE(*PL_oldoldbufptr)) - PL_oldoldbufptr++; + while (isSPACE(*PL_oldoldbufptr)) + PL_oldoldbufptr++; #endif - context = PL_oldoldbufptr; - contlen = PL_bufptr - PL_oldoldbufptr; - } - else if ( PL_oldbufptr - && PL_bufptr > PL_oldbufptr - && PL_bufptr - PL_oldbufptr < 200 - && PL_oldbufptr != PL_bufptr) { - /* - Only for NetWare: - The code below is removed for NetWare because it abends/crashes on NetWare - when the script has error such as not having the closing quotes like: - if ($var eq "value) - Checking of white spaces is anyway done in NetWare code. - */ + context = PL_oldoldbufptr; + contlen = PL_bufptr - PL_oldoldbufptr; + } + else if ( PL_oldbufptr + && PL_bufptr > PL_oldbufptr + && PL_bufptr - PL_oldbufptr < 200 + && PL_oldbufptr != PL_bufptr) { + /* + Only for NetWare: + The code below is removed for NetWare because it + abends/crashes on NetWare when the script has error such as + not having the closing quotes like: + if ($var eq "value) + Checking of white spaces is anyway done in NetWare code. + */ #ifndef NETWARE - while (isSPACE(*PL_oldbufptr)) - PL_oldbufptr++; + while (isSPACE(*PL_oldbufptr)) + PL_oldbufptr++; #endif - context = PL_oldbufptr; - contlen = PL_bufptr - PL_oldbufptr; - } - else if (yychar > 255) - sv_catpvs(where_sv, "next token ???"); - else if (yychar == YYEMPTY) { - if (PL_lex_state == LEX_NORMAL) - sv_catpvs(where_sv, "at end of line"); - else if (PL_lex_inpat) - sv_catpvs(where_sv, "within pattern"); - else - sv_catpvs(where_sv, "within string"); - } - else { - sv_catpvs(where_sv, "next char "); - if (yychar < 32) - Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); - else if (isPRINT_LC(yychar)) { - const char string = yychar; - sv_catpvn(where_sv, &string, 1); - } - else - Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); - } - msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); - Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", - OutCopFILE(PL_curcop), - (IV)(PL_parser->preambling == NOLINE - ? CopLINE(PL_curcop) - : PL_parser->preambling)); - if (context) - Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", - UTF8fARG(UTF, contlen, context)); - else - Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); - if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { - Perl_sv_catpvf(aTHX_ msg, - " (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n", - (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); - PL_multi_end = 0; - } - if (PL_in_eval & EVAL_WARNONLY) { - PL_in_eval &= ~EVAL_WARNONLY; - Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); + context = PL_oldbufptr; + contlen = PL_bufptr - PL_oldbufptr; + } + else if (yychar > 255) + sv_catpvs(where_sv, "next token ???"); + else if (yychar == YYEMPTY) { + if (PL_lex_state == LEX_NORMAL) + sv_catpvs(where_sv, "at end of line"); + else if (PL_lex_inpat) + sv_catpvs(where_sv, "within pattern"); + else + sv_catpvs(where_sv, "within string"); + } + else { + sv_catpvs(where_sv, "next char "); + if (yychar < 32) + Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); + else if (isPRINT_LC(yychar)) { + const char string = yychar; + sv_catpvn(where_sv, &string, 1); + } + else + Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); + } + msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP); + Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ", + OutCopFILE(PL_curcop), + (IV)(PL_parser->preambling == NOLINE + ? CopLINE(PL_curcop) + : PL_parser->preambling)); + if (context) + Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n", + UTF8fARG(UTF, contlen, context)); + else + Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv)); + if ( PL_multi_start < PL_multi_end + && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) + { + Perl_sv_catpvf(aTHX_ msg, + " (Might be a runaway multi-line %c%c string starting on" + " line %" IVdf ")\n", + (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start); + PL_multi_end = 0; + } + if (PL_in_eval & EVAL_WARNONLY) { + PL_in_eval &= ~EVAL_WARNONLY; + Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg)); + } + else { + qerror(msg); + } } - else - qerror(msg); - if (PL_error_count >= 10) { - SV * errsv; - if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv))) - Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n", - SVfARG(errsv), OutCopFILE(PL_curcop)); - else - Perl_croak(aTHX_ "%s has too many errors.\n", - OutCopFILE(PL_curcop)); + if (s == NULL || PL_error_count >= 10) { + const char * msg = ""; + const char * const name = OutCopFILE(PL_curcop); + + if (PL_in_eval) { + SV * errsv = ERRSV; + if (SvCUR(errsv)) { + msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv)); + } + } + + if (s == NULL) { + abort_execution(msg, name); + } + else { + Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name); + } } PL_in_my = 0; PL_in_my_stash = NULL; @@ -11456,7 +11842,9 @@ S_swallow_bom(pTHX_ U8 *s) /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); +#endif s += 2; if (PL_bufend > (char*)s) { s = add_utf16_textfilter(s, TRUE); @@ -11470,7 +11858,9 @@ S_swallow_bom(pTHX_ U8 *s) case 0xFE: if (s[1] == 0xFF) { /* UTF-16 big-endian? */ #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n"); +#endif s += 2; if (PL_bufend > (char *)s) { s = add_utf16_textfilter(s, FALSE); @@ -11482,10 +11872,11 @@ S_swallow_bom(pTHX_ U8 *s) } break; case BOM_UTF8_FIRST_BYTE: { - const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ - if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { + if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) { +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); - s += len + 1; /* UTF-8 */ +#endif + s += sizeof(BOM_UTF8) - 1; /* UTF-8 */ } break; } @@ -11503,7 +11894,9 @@ S_swallow_bom(pTHX_ U8 *s) * 00 xx 00 xx * are a good indicator of UTF-16BE. */ #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); +#endif s = add_utf16_textfilter(s, FALSE); #else /* diag_listed_as: Unsupported script encoding %s */ @@ -11519,7 +11912,9 @@ S_swallow_bom(pTHX_ U8 *s) * xx 00 xx 00 * are a good indicator of UTF-16LE. */ #ifndef PERL_NO_UTF16_FILTER +#ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); +#endif s = add_utf16_textfilter(s, TRUE); #else /* diag_listed_as: Unsupported script encoding %s */ @@ -11620,9 +12015,14 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) } } + /* 'chars' isn't quite the right name, as code points above 0xFFFF + * require 4 bytes per char */ chars = SvCUR(utf16_buffer) >> 1; have = SvCUR(utf8_buffer); - SvGROW(utf8_buffer, have + chars * 3 + 1); + + /* Assume the worst case size as noted by the functions: twice the + * number of input bytes */ + SvGROW(utf8_buffer, have + chars * 4 + 1); if (reverse) { end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), @@ -11781,6 +12181,79 @@ Perl_keyword_plugin_standard(pTHX_ return KEYWORD_PLUGIN_DECLINE; } +/* +=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p + +Puts a C function into the chain of keyword plugins. This is the +preferred way to manipulate the L variable. +C is a pointer to the C function that is to be added to the +keyword plugin chain, and C points to the storage location +where a pointer to the next function in the chain will be stored. The +value of C is written into the L variable, +while the value previously stored there is written to C<*old_plugin_p>. + +L is global to an entire process, and a module wishing +to hook keyword parsing may find itself invoked more than once per +process, typically in different threads. To handle that situation, this +function is idempotent. The location C<*old_plugin_p> must initially +(once per process) contain a null pointer. A C variable of static +duration (declared at file scope, typically also marked C to give +it internal linkage) will be implicitly initialised appropriately, if it +does not have an explicit initialiser. This function will only actually +modify the plugin chain if it finds C<*old_plugin_p> to be null. This +function is also thread safe on the small scale. It uses appropriate +locking to avoid race conditions in accessing L. + +When this function is called, the function referenced by C +must be ready to be called, except for C<*old_plugin_p> being unfilled. +In a threading situation, C may be called immediately, even +before this function has returned. C<*old_plugin_p> will always be +appropriately set before C is called. If C +decides not to do anything special with the identifier that it is given +(which is the usual case for most calls to a keyword plugin), it must +chain the plugin function referenced by C<*old_plugin_p>. + +Taken all together, XS code to install a keyword plugin should typically +look something like this: + + static Perl_keyword_plugin_t next_keyword_plugin; + static OP *my_keyword_plugin(pTHX_ + char *keyword_plugin, STRLEN keyword_len, OP **op_ptr) + { + if (memEQs(keyword_ptr, keyword_len, + "my_new_keyword")) { + ... + } else { + return next_keyword_plugin(aTHX_ + keyword_ptr, keyword_len, op_ptr); + } + } + BOOT: + wrap_keyword_plugin(my_keyword_plugin, + &next_keyword_plugin); + +Direct access to L should be avoided. + +=cut +*/ + +void +Perl_wrap_keyword_plugin(pTHX_ + Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) +{ + dVAR; + + PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; + if (*old_plugin_p) return; + KEYWORD_PLUGIN_MUTEX_LOCK; + if (!*old_plugin_p) { + *old_plugin_p = PL_keyword_plugin; + PL_keyword_plugin = new_plugin; + } + KEYWORD_PLUGIN_MUTEX_UNLOCK; +} + #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) static void S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)