X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/b59bf0b2884b21b6f3ce5eca607ab7a6096d87f5..9b2983ca78e5369d17559ca0aa5af58e9da3724a:/toke.c diff --git a/toke.c b/toke.c index c9e0f8b..5a3fe78 100644 --- a/toke.c +++ b/toke.c @@ -39,6 +39,7 @@ Individual members of C have their own documentation. #define PERL_IN_TOKE_C #include "perl.h" #include "dquote_inline.h" +#include "invlist_inline.h" #define new_constant(a,b,c,d,e,f,g) \ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g) @@ -310,6 +311,7 @@ static struct debug_tokens { { ANDAND, TOKENTYPE_NONE, "ANDAND" }, { ANDOP, TOKENTYPE_NONE, "ANDOP" }, { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" }, + { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" }, { ARROW, TOKENTYPE_NONE, "ARROW" }, { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" }, { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" }, @@ -367,6 +369,7 @@ static struct debug_tokens { { RELOP, TOKENTYPE_OPNUM, "RELOP" }, { REQUIRE, TOKENTYPE_NONE, "REQUIRE" }, { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" }, + { SIGSUB, TOKENTYPE_NONE, "SIGSUB" }, { SUB, TOKENTYPE_NONE, "SUB" }, { THING, TOKENTYPE_OPVAL, "THING" }, { UMINUS, TOKENTYPE_NONE, "UMINUS" }, @@ -456,9 +459,9 @@ S_printbuf(pTHX_ const char *const fmt, const char *const s) PERL_ARGS_ASSERT_PRINTBUF; - GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */ + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */ PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60)); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; SvREFCNT_dec(tmp); } @@ -556,16 +559,18 @@ S_no_op(pTHX_ const char *const what, char *s) */ STATIC void -S_missingterm(pTHX_ char *s) +S_missingterm(pTHX_ char *s, STRLEN len) { char tmpbuf[UTF8_MAXBYTES + 1]; char q; bool uni = FALSE; SV *sv; if (s) { - char * const nl = strrchr(s,'\n'); - if (nl) - *nl = '\0'; + char * const nl = (char *) my_memrchr(s, '\n', len); + if (nl) { + *nl = '\0'; + len = nl - s; + } uni = UTF; } else if (PL_multi_close < 32) { @@ -573,24 +578,28 @@ S_missingterm(pTHX_ char *s) tmpbuf[1] = (char)toCTRL(PL_multi_close); tmpbuf[2] = '\0'; s = tmpbuf; + len = 2; } else { if (LIKELY(PL_multi_close < 256)) { *tmpbuf = (char)PL_multi_close; tmpbuf[1] = '\0'; + len = 1; } else { + char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); + *end = '\0'; + len = end - tmpbuf; uni = TRUE; - *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0; } s = tmpbuf; } - q = strchr(s,'"') ? '\'' : '"'; - sv = sv_2mortal(newSVpv(s,0)); + q = memchr(s, '"', len) ? '\'' : '"'; + sv = sv_2mortal(newSVpvn(s, len)); if (uni) SvUTF8_on(sv); - Perl_croak(aTHX_ "Can't find string terminator %c%" SVf - "%c anywhere before EOF",q,SVfARG(sv),q); + Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c" + " anywhere before EOF", q, SVfARG(sv), q); } #include "feature.h" @@ -1029,13 +1038,7 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags) SvCUR(PL_parser->linestr) + len+highhalf); PL_parser->bufend += len+highhalf; for (p = pv; p != e; p++) { - U8 c = (U8)*p; - if (! UTF8_IS_INVARIANT(c)) { - *bufptr++ = UTF8_TWO_BYTE_HI(c); - *bufptr++ = UTF8_TWO_BYTE_LO(c); - } else { - *bufptr++ = (char)c; - } + append_utf8_from_native_byte(*p, (U8 **) &bufptr); } } } else { @@ -1572,7 +1575,7 @@ Perl_lex_read_space(pTHX_ U32 flags) if (s == bufend) need_incline = 1; else - incline(s); + incline(s, bufend); } } else if (isSPACE(c)) { s++; @@ -1591,7 +1594,7 @@ Perl_lex_read_space(pTHX_ U32 flags) if (!got_more) break; if (can_incline && need_incline && PL_parser->rsfp) { - incline(s); + incline(s, bufend); need_incline = 0; } } else if (!c) { @@ -1724,7 +1727,7 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) */ STATIC void -S_incline(pTHX_ const char *s) +S_incline(pTHX_ const char *s, const char *end) { const char *t; const char *n; @@ -1734,6 +1737,8 @@ S_incline(pTHX_ const char *s) PERL_ARGS_ASSERT_INCLINE; + assert(end >= s); + COPLINE_INC_WITH_HERELINES; if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL && s+1 == PL_bufend && *s == ';') { @@ -1745,8 +1750,8 @@ S_incline(pTHX_ const char *s) return; while (SPACE_OR_TAB(*s)) s++; - if (strBEGINs(s, "line")) - s += 4; + if (memBEGINs(s, (STRLEN) (end - s), "line")) + s += sizeof("line") - 1; else return; if (SPACE_OR_TAB(*s)) @@ -1765,7 +1770,7 @@ S_incline(pTHX_ const char *s) return; while (SPACE_OR_TAB(*s)) s++; - if (*s == '"' && (t = strchr(s+1, '"'))) { + if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { s++; e = t + 1; } @@ -1824,14 +1829,14 @@ S_incline(pTHX_ const char *s) } else if (GvAV(cfgv)) { AV * const av = GvAV(cfgv); - const I32 start = CopLINE(PL_curcop)+1; - I32 items = AvFILLp(av) - start; + const line_t start = CopLINE(PL_curcop)+1; + SSize_t items = AvFILLp(av) - start; if (items > 0) { AV * const av2 = GvAVn(gv2); SV **svp = AvARRAY(av) + start; - I32 l = (I32)line_num+1; - while (items--) - av_store(av2, l++, SvREFCNT_inc(*svp++)); + Size_t l = line_num+1; + while (items-- && l < SSize_t_MAX && l == (line_t)l) + av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++)); } } } @@ -1919,7 +1924,6 @@ STATIC void S_check_uni(pTHX) { const char *s; - const char *t; if (PL_oldoldbufptr != PL_last_uni) return; @@ -1928,7 +1932,7 @@ S_check_uni(pTHX) s = PL_last_uni; while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') s += UTF ? UTF8SKIP(s) : 1; - if ((t = strchr(s, '(')) && t < PL_bufptr) + if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) return; Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), @@ -2007,7 +2011,7 @@ S_force_next(pTHX_ I32 type) * S_postderef * * This subroutine handles postfix deref syntax after the arrow has already - * been emitted. @* $* etc. are emitted as two separate token right here. + * been emitted. @* $* etc. are emitted as two separate tokens right here. * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits * only the first, leaving yylex to find the next. */ @@ -2062,10 +2066,9 @@ STATIC SV * S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) { SV * const sv = newSVpvn_utf8(start, len, - !IN_BYTES - && UTF - && !is_utf8_invariant_string((const U8*)start, len) - && is_utf8_string((const U8*)start, len)); + ! IN_BYTES + && UTF + && is_utf8_non_invariant_string((const U8*)start, len)); return sv; } @@ -2103,8 +2106,10 @@ S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack) if (check_keyword) { char *s2 = PL_tokenbuf; STRLEN len2 = len; - if (allow_pack && len > 6 && strBEGINs(s2, "CORE::")) - s2 += 6, len2 -= 6; + if (allow_pack && memBEGINPs(s2, len, "CORE::")) { + s2 += sizeof("CORE::") - 1; + len2 -= sizeof("CORE::") - 1; + } if (keyword(s2, len2, 0)) return start; } @@ -2388,6 +2393,8 @@ S_sublex_start(pTHX) PL_parser->lex_super_state = PL_lex_state; PL_parser->lex_sub_inwhat = (U16)op_type; PL_parser->lex_sub_op = PL_lex_op; + PL_parser->sub_no_recover = FALSE; + PL_parser->sub_error_count = PL_error_count; PL_lex_state = LEX_INTERPPUSH; PL_expect = XTERM; @@ -2567,6 +2574,12 @@ S_sublex_done(pTHX) else { const line_t l = CopLINE(PL_curcop); LEAVE; + if (PL_parser->sub_error_count != PL_error_count) { + if (PL_parser->sub_no_recover) { + yyquit(); + NOT_REACHED; + } + } if (PL_multi_close == '<') PL_parser->herelines += l - PL_multi_end; PL_bufend = SvPVX(PL_linestr); @@ -2663,14 +2676,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += 2; } else { - if (! PL_utf8_charname_begin) { - U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; - PL_utf8_charname_begin = _core_swash_init("utf8", - "_Perl_Charname_Begin", - &PL_sv_undef, - 1, 0, NULL, &flags); - } - if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) { + if (! _invlist_contains_cp(PL_utf8_charname_begin, + utf8_to_uvchr_buf((U8 *) s, + (U8 *) e, + NULL))) + { goto bad_charname; } s += UTF8SKIP(s); @@ -2694,14 +2704,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) s += 2; } else { - if (! PL_utf8_charname_continue) { - U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST; - PL_utf8_charname_continue = _core_swash_init("utf8", - "_Perl_Charname_Continue", - &PL_sv_undef, - 1, 0, NULL, &flags); - } - if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) { + if (! _invlist_contains_cp(PL_utf8_charname_continue, + utf8_to_uvchr_buf((U8 *) s, + (U8 *) e, + NULL))) + { goto bad_charname; } s += UTF8SKIP(s); @@ -2893,8 +2900,8 @@ S_scan_const(pTHX_ char *start) should we have to convert to UTF-8) */ SV *res; /* result from charnames */ - STRLEN offset_to_max; /* The offset in the output to where the range - high-end character is temporarily placed */ + STRLEN offset_to_max = 0; /* The offset in the output to where the range + high-end character is temporarily placed */ /* Does something require special handling in tr/// ? This avoids extra * work in a less likely case. As such, khw didn't feel it was worth @@ -3506,7 +3513,8 @@ S_scan_const(pTHX_ char *start) { const char* error; - bool valid = grok_bslash_o(&s, &uv, &error, + bool valid = grok_bslash_o(&s, PL_bufend, + &uv, &error, TRUE, /* Output warning */ FALSE, /* Not strict */ TRUE, /* Output warnings for @@ -3524,7 +3532,8 @@ S_scan_const(pTHX_ char *start) { const char* error; - bool valid = grok_bslash_x(&s, &uv, &error, + bool valid = grok_bslash_x(&s, PL_bufend, + &uv, &error, TRUE, /* Output warning */ FALSE, /* Not strict */ TRUE, /* Output warnings for @@ -3659,7 +3668,7 @@ S_scan_const(pTHX_ char *start) s++; /* If there is no matching '}', it is an error. */ - if (! (e = strchr(s, '}'))) { + if (! (e = (char *) memchr(s, '}', send - s))) { if (! PL_lex_inpat) { yyerror("Missing right brace on \\N{}"); } else { @@ -4138,7 +4147,7 @@ S_scan_const(pTHX_ char *start) /* This is the one truly awful dwimmer necessary to conflate C and sed. */ STATIC int -S_intuit_more(pTHX_ char *s) +S_intuit_more(pTHX_ char *s, char *e) { PERL_ARGS_ASSERT_INTUIT_MORE; @@ -4153,6 +4162,7 @@ S_intuit_more(pTHX_ char *s) return TRUE; if (*s != '{' && *s != '[') return FALSE; + PL_parser->sub_no_recover = TRUE; if (!PL_lex_inpat) return TRUE; @@ -4173,7 +4183,7 @@ S_intuit_more(pTHX_ char *s) /* this is terrifying, and it works */ int weight; char seen[256]; - const char * const send = strchr(s,']'); + const char * const send = (char *) memchr(s, ']', e - s); unsigned char un_char, last_un_char; char tmpbuf[sizeof PL_tokenbuf * 4]; @@ -4488,6 +4498,7 @@ I32 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) { filter_t funcp; + I32 ret; SV *datasv = NULL; /* This API is bad. It should have been using unsigned int for maxlen. Not sure if we want to change the API, but if not we should sanity @@ -4570,7 +4581,11 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) /* Call function. The function is expected to */ /* call "FILTER_READ(idx+1, buf_sv)" first. */ /* Return: <0:error, =0:eof, >0:not eof */ - return (*funcp)(aTHX_ idx, buf_sv, correct_length); + ENTER; + save_scalar(PL_errgv); + ret = (*funcp)(aTHX_ idx, buf_sv, correct_length); + LEAVE; + return ret; } STATIC char * @@ -4877,8 +4892,11 @@ Perl_yylex(pTHX) } else { I32 tmp; - if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) + if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u") + || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l")) + { tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ + } if ((*s == 'L' || *s == 'U' || *s == 'F') && (strpbrk(PL_lex_casestack, "LUF"))) { @@ -4969,7 +4987,7 @@ Perl_yylex(pTHX) return yylex(); case LEX_INTERPENDMAYBE: - if (intuit_more(PL_bufptr)) { + if (intuit_more(PL_bufptr, PL_bufend)) { PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ break; } @@ -5073,6 +5091,14 @@ Perl_yylex(pTHX) return yylex(); case LEX_FORMLINE: + if (PL_parser->sub_error_count != PL_error_count) { + /* There was an error parsing a formline, which tends to + mess up the parser. + Unlike interpolated sub-parsing, we can't treat any of + these as recoverable, so no need to check sub_no_recover. + */ + yyquit(); + } assert(PL_lex_formbrack); s = scan_formline(PL_bufptr); if (!PL_lex_formbrack) @@ -5124,7 +5150,7 @@ Perl_yylex(pTHX) /* read var name, including sigil, into PL_tokenbuf */ PL_tokenbuf[0] = sigil; parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1, - 0, cBOOL(UTF), FALSE); + 0, cBOOL(UTF), FALSE, FALSE); *dest = '\0'; assert(PL_tokenbuf[1]); /* we have a variable name */ } @@ -5298,7 +5324,11 @@ Perl_yylex(pTHX) || *PL_splitstr == '\'' || *PL_splitstr == '"') && strchr(PL_splitstr + 1, *PL_splitstr)) + { + /* strchr is ok, because -F pattern can't contain + * embeddded NULs */ Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); + } else { /* "q\0${splitstr}\0" is legal perl. Yes, even NUL bytes can be used as quoting characters. :-) */ @@ -5366,7 +5396,9 @@ Perl_yylex(pTHX) } if (PL_parser->in_pod) { /* Incest with pod. */ - if (*s == '=' && strBEGINs(s, "=cut") && !isALPHA(s[4])) { + if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut") + && !isALPHA(s[4])) + { SvPVCLEAR(PL_linestr); PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr); PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -5375,7 +5407,7 @@ Perl_yylex(pTHX) } } if (PL_rsfp || PL_parser->filtered) - incline(s); + incline(s, PL_bufend); } while (PL_parser->in_pod); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -5600,7 +5632,7 @@ Perl_yylex(pTHX) && !PL_rsfp && !PL_parser->filtered) { /* handle eval qq[#line 1 "foo"\n ...] */ CopLINE_dec(PL_curcop); - incline(s); + incline(s, PL_bufend); } d = s; while (d < PL_bufend && *d != '\n') @@ -5613,7 +5645,7 @@ Perl_yylex(pTHX) && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; else - incline(s); + incline(s, PL_bufend); if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_lex_state = LEX_FORMLINE; force_next(FORMRBRACK); @@ -5627,7 +5659,7 @@ Perl_yylex(pTHX) { s++; if (s < PL_bufend) - incline(s); + incline(s, PL_bufend); } } goto retry; @@ -5643,7 +5675,7 @@ Perl_yylex(pTHX) while (s < PL_bufend && SPACE_OR_TAB(*s)) s++; - if (strBEGINs(s,"=>")) { + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) { s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE); DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } ); OPERATOR('-'); /* unary minus */ @@ -5828,7 +5860,8 @@ Perl_yylex(pTHX) if (!PL_tokenbuf[1]) { PREREF('%'); } - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) { if (*s == '[') PL_tokenbuf[0] = '@'; } @@ -5906,9 +5939,17 @@ Perl_yylex(pTHX) case XATTRTERM: PL_expect = XTERMBLOCK; grabattrs: + /* NB: as well as parsing normal attributes, we also end up + * here if there is something looking like attributes + * following a signature (which is illegal, but used to be + * legal in 5.20..5.26). If the latter, we still parse the + * attributes so that error messages(s) are less confusing, + * but ignore them (parser->sig_seen). + */ s = skipspace(s); attrs = NULL; while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) { + bool sig = PL_parser->sig_seen; I32 tmp; SV *sv; d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); @@ -5951,23 +5992,27 @@ Perl_yylex(pTHX) the CVf_BUILTIN_ATTRS define in cv.h! */ if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) { sv_free(sv); - CvLVALUE_on(PL_compcv); + if (!sig) + CvLVALUE_on(PL_compcv); } else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) { sv_free(sv); - CvMETHOD_on(PL_compcv); + if (!sig) + CvMETHOD_on(PL_compcv); } else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) { sv_free(sv); - Perl_ck_warner_d(aTHX_ - packWARN(WARN_EXPERIMENTAL__CONST_ATTR), - ":const is experimental" - ); - CvANONCONST_on(PL_compcv); - if (!CvANON(PL_compcv)) - yyerror(":const is not permitted on named " - "subroutines"); + if (!sig) { + Perl_ck_warner_d(aTHX_ + packWARN(WARN_EXPERIMENTAL__CONST_ATTR), + ":const is experimental" + ); + CvANONCONST_on(PL_compcv); + if (!CvANON(PL_compcv)) + yyerror(":const is not permitted on named " + "subroutines"); + } } /* After we've set the flags, it could be argued that we don't need to do the attributes.pm-based setting @@ -6020,6 +6065,14 @@ Perl_yylex(pTHX) } } got_attrs: + if (PL_parser->sig_seen) { + /* see comment about about sig_seen and parser error + * handling */ + if (attrs) + op_free(attrs); + Perl_croak(aTHX_ "Subroutine attributes must come " + "before the signature"); + } if (attrs) { NEXTVAL_NEXTTOKE.opval = attrs; force_next(THING); @@ -6254,7 +6307,7 @@ Perl_yylex(pTHX) PL_expect = XTERM; break; } - if (strBEGINs(s, "sub")) { + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) { PL_bufptr = s; d = s + 3; d = skipspace(d); @@ -6390,7 +6443,9 @@ Perl_yylex(pTHX) { const char tmp = *s++; if (tmp == '=') { - if ((s == PL_linestart+2 || s[-3] == '\n') && strBEGINs(s, "=====")) { + if ( (s == PL_linestart+2 || s[-3] == '\n') + && memBEGINs(s, (STRLEN) (PL_bufend - s), "=====")) + { s = vcs_conflict_marker(s + 5); goto retry; } @@ -6428,14 +6483,15 @@ Perl_yylex(pTHX) d = PL_bufend; while (s < d) { if (*s++ == '\n') { - incline(s); - if (strBEGINs(s,"=cut")) { - s = strchr(s,'\n'); + incline(s, PL_bufend); + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) + { + s = (char *) memchr(s,'\n', d - s); if (s) s++; else s = d; - incline(s); + incline(s, PL_bufend); goto retry; } } @@ -6462,6 +6518,7 @@ Perl_yylex(pTHX) SAVEI32(PL_lex_formbrack); PL_parser->form_lex_state = PL_lex_state; PL_lex_formbrack = PL_lex_brackets + 1; + PL_parser->sub_error_count = PL_error_count; goto leftbracket; } } @@ -6507,10 +6564,12 @@ Perl_yylex(pTHX) OPERATOR('!'); case '<': if (PL_expect != XOPERATOR) { - if (s[1] != '<' && !strchr(s,'>')) + if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) check_uni(); if (s[1] == '<' && s[2] != '>') { - if ((s == PL_linestart || s[-1] == '\n') && strBEGINs(s+2, "<<<<<")) { + if ( (s == PL_linestart || s[-1] == '\n') + && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<")) + { s = vcs_conflict_marker(s + 7); goto retry; } @@ -6525,7 +6584,9 @@ Perl_yylex(pTHX) { char tmp = *s++; if (tmp == '<') { - if ((s == PL_linestart+2 || s[-3] == '\n') && strBEGINs(s, "<<<<<")) { + if ( (s == PL_linestart+2 || s[-3] == '\n') + && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<")) + { s = vcs_conflict_marker(s + 5); goto retry; } @@ -6569,7 +6630,9 @@ Perl_yylex(pTHX) { const char tmp = *s++; if (tmp == '>') { - if ((s == PL_linestart+2 || s[-3] == '\n') && strBEGINs(s, ">>>>>")) { + if ( (s == PL_linestart+2 || s[-3] == '\n') + && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>")) + { s = vcs_conflict_marker(s + 5); goto retry; } @@ -6653,8 +6716,8 @@ Perl_yylex(pTHX) if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) s = skipspace(s); - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) - && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) { if (*s == '[') { PL_tokenbuf[0] = '@'; if (ckWARN(WARN_SYNTAX)) { @@ -6679,8 +6742,10 @@ Perl_yylex(pTHX) else if (*s == '{') { char *t; PL_tokenbuf[0] = '%'; - if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) - && (t = strchr(s, '}')) && (t = strchr(t, '='))) + if ( strEQ(PL_tokenbuf+1, "SIG") + && ckWARN(WARN_SYNTAX) + && (t = (char *) memchr(s, '}', PL_bufend - s)) + && (t = (char *) memchr(t, '=', PL_bufend - t))) { char tmpbuf[sizeof PL_tokenbuf]; do { @@ -6782,7 +6847,9 @@ Perl_yylex(pTHX) } if (PL_lex_state == LEX_NORMAL) s = skipspace(s); - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) + { if (*s == '{') PL_tokenbuf[0] = '%'; @@ -6853,7 +6920,7 @@ Perl_yylex(pTHX) } if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { s += 3; - TERM(YADAYADA); + OPERATOR(YADAYADA); } if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { char tmp = *s++; @@ -6893,7 +6960,7 @@ Perl_yylex(pTHX) case '\'': s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { @@ -6915,7 +6982,7 @@ Perl_yylex(pTHX) no_op("String",s); } if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_CONST; /* FIXME. I think that this can be const if char *d is replaced by more localised variables. */ @@ -6941,7 +7008,7 @@ Perl_yylex(pTHX) if (PL_expect == XOPERATOR) no_op("Backticks",s); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_BACKTICK; TERM(sublex_start()); @@ -7182,10 +7249,7 @@ Perl_yylex(pTHX) else { /* no override */ tmp = -tmp; if (tmp == KEY_dump) { - Perl_ck_warner_d(aTHX_ packWARN2(WARN_MISC,WARN_DEPRECATED), - "dump() better written as CORE::dump(). " - "dump() will no longer be available " - "in Perl 5.30"); + Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30"); } gv = NULL; gvp = 0; @@ -7234,7 +7298,20 @@ Perl_yylex(pTHX) int pkgname = 0; const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]); bool safebw; + bool no_op_error = FALSE; + if (PL_expect == XOPERATOR) { + if (PL_bufptr == PL_linestart) { + CopLINE_dec(PL_curcop); + Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); + CopLINE_inc(PL_curcop); + } + else + /* We want to call no_op with s pointing after the + bareword, so defer it. But we want it to come + before the Bad name croak. */ + no_op_error = TRUE; + } /* Get the rest if it looks like a package qualifier */ @@ -7242,6 +7319,10 @@ Perl_yylex(pTHX) STRLEN morelen; s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, TRUE, &morelen); + if (no_op_error) { + no_op("Bareword",s); + no_op_error = FALSE; + } if (!morelen) Perl_croak(aTHX_ "Bad name after %" UTF8f "%s", UTF8fARG(UTF, len, PL_tokenbuf), @@ -7250,15 +7331,8 @@ Perl_yylex(pTHX) pkgname = 1; } - if (PL_expect == XOPERATOR) { - if (PL_bufptr == PL_linestart) { - CopLINE_dec(PL_curcop); - Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi); - CopLINE_inc(PL_curcop); - } - else + if (no_op_error) no_op("Bareword",s); - } /* See if the name is "Foo::", in which case Foo is a bareword @@ -7569,10 +7643,10 @@ Perl_yylex(pTHX) if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) { /* PL_warn_reserved is constant */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved, PL_tokenbuf); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } } } @@ -7627,14 +7701,6 @@ Perl_yylex(pTHX) if (!GvIO(gv)) GvIOp(gv) = newIO(); IoIFP(GvIOp(gv)) = PL_rsfp; -#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC) - { - const int fd = PerlIO_fileno(PL_rsfp); - if (fd >= 3) { - fcntl(fd,F_SETFD, FD_CLOEXEC); - } - } -#endif /* Mark this internal pseudo-handle as clean */ IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT; if ((PerlIO*)PL_rsfp == PerlIO_stdin()) @@ -7939,14 +8005,17 @@ Perl_yylex(pTHX) char *p = s; SSize_t s_off = s - SvPVX(PL_linestr); - if ((PL_bufend - p) >= 3 - && strBEGINs(p, "my") && isSPACE(*(p + 2))) + if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") + && isSPACE(*(p + 2))) { - p += 2; + p += 2; + } + else if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") + && isSPACE(*(p + 3))) + { + p += 3; } - else if ((PL_bufend - p) >= 4 - && strBEGINs(p, "our") && isSPACE(*(p + 3))) - p += 3; + p = skipspace(p); /* skip optional package name, as in "for my abc $x (..)" */ if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) { @@ -8314,7 +8383,7 @@ Perl_yylex(pTHX) case KEY_q: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; pl_yylval.ival = OP_CONST; TERM(sublex_start()); @@ -8326,7 +8395,7 @@ Perl_yylex(pTHX) OP *words = NULL; s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; PL_expect = XOPERATOR; if (SvCUR(PL_lex_stuff)) { @@ -8375,7 +8444,7 @@ Perl_yylex(pTHX) case KEY_qq: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_STRINGIFY; if (SvIVX(PL_lex_stuff) == '\'') SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ @@ -8388,7 +8457,7 @@ Perl_yylex(pTHX) case KEY_qx: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_BACKTICK; TERM(sublex_start()); @@ -8604,22 +8673,24 @@ Perl_yylex(pTHX) really_sub: { char * const tmpbuf = PL_tokenbuf + 1; - expectation attrful; bool have_name, have_proto; const int key = tmp; SV *format_name = NULL; + bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED; SSize_t off = s-SvPVX(PL_linestr); s = skipspace(s); d = SvPVX(PL_linestr)+off; + SAVEBOOL(PL_parser->sig_seen); + PL_parser->sig_seen = FALSE; + if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '\'' || (*s == ':' && s[1] == ':')) { - PL_expect = XBLOCK; - attrful = XATTRBLOCK; + PL_expect = XATTRBLOCK; d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE, &len); if (key == KEY_format) @@ -8650,8 +8721,7 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "Missing name in \"%s\"", PL_bufptr); } - PL_expect = XTERMBLOCK; - attrful = XATTRTERM; + PL_expect = XATTRTERM; sv_setpvs(PL_subname,"?"); have_name = FALSE; } @@ -8667,11 +8737,11 @@ Perl_yylex(pTHX) } /* Look for a prototype */ - if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) { + if (*s == '(' && !is_sigsub) { s = scan_str(s,FALSE,FALSE,FALSE,NULL); - COPLINE_SET_FROM_MULTI_END; if (!s) Perl_croak(aTHX_ "Prototype not terminated"); + COPLINE_SET_FROM_MULTI_END; (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO), 0); have_proto = TRUE; @@ -8681,9 +8751,9 @@ Perl_yylex(pTHX) else have_proto = FALSE; - if (*s == ':' && s[1] != ':') - PL_expect = attrful; - else if ((*s != '{' && *s != '(') && key != KEY_format) { + if ( !(*s == ':' && s[1] != ':') + && (*s != '{' && *s != '(') && key != KEY_format) + { assert(key == KEY_sub || key == KEY_AUTOLOAD || key == KEY_DESTROY || key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK || @@ -8707,10 +8777,16 @@ Perl_yylex(pTHX) sv_setpvs(PL_subname, "__ANON__"); else sv_setpvs(PL_subname, "__ANON__::__ANON__"); - TOKEN(ANONSUB); + if (is_sigsub) + TOKEN(ANON_SIGSUB); + else + TOKEN(ANONSUB); } force_ident_maybe_lex('&'); - TOKEN(SUB); + if (is_sigsub) + TOKEN(SIGSUB); + else + TOKEN(SUB); } case KEY_system: @@ -8898,6 +8974,7 @@ S_pending_ident(pTHX) DEBUG_T({ PerlIO_printf(Perl_debug_log, "### Pending identifier '%s'\n", PL_tokenbuf); }); + assert(tokenbuf_len >= 2); /* if we're in a my(), we can't allow dynamics here. $foo'bar has already been turned into $foo::bar, so @@ -8921,13 +8998,13 @@ S_pending_ident(pTHX) if (has_colon) { /* "my" variable %s can't be in a package */ /* PL_no_myglob is constant */ - GCC_DIAG_IGNORE(-Wformat-nonliteral); + GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); yyerror_pv(Perl_form(aTHX_ PL_no_myglob, PL_in_my == KEY_my ? "my" : "state", *PL_tokenbuf == '&' ? "subroutin" : "variabl", PL_tokenbuf), UTF ? SVf_UTF8 : 0); - GCC_DIAG_RESTORE; + GCC_DIAG_RESTORE_STMT; } if (PL_in_my == KEY_sigvar) { @@ -8973,7 +9050,7 @@ S_pending_ident(pTHX) HEK * const stashname = HvNAME_HEK(stash); SV * const sym = newSVhek(stashname); sv_catpvs(sym, "::"); - sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES )); + sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES )); pl_yylval.opval = newSVOP(OP_CONST, 0, sym); pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') @@ -9001,7 +9078,7 @@ S_pending_ident(pTHX) && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) { - GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, + GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG, SVt_PVAV); if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv))) @@ -9018,11 +9095,11 @@ S_pending_ident(pTHX) /* build ops for a bareword */ pl_yylval.opval = newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1, - tokenbuf_len - 1, + tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, UTF ? SVf_UTF8 : 0 )); pl_yylval.opval->op_private = OPpCONST_ENTERED; if (pit != '&') - gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1, + gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (PL_in_eval ? GV_ADDMULTI : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ), ((PL_tokenbuf[0] == '$') ? SVt_PV @@ -9239,8 +9316,10 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, PERL_STATIC_INLINE void S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, - bool is_utf8, bool check_dollar) + bool is_utf8, bool check_dollar, bool tick_warn) { + int saw_tick = 0; + const char *olds = *s; PERL_ARGS_ASSERT_PARSE_IDENT; while (*s < PL_bufend) { @@ -9274,6 +9353,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, *(*d)++ = ':'; *(*d)++ = ':'; (*s)++; + saw_tick++; } else if (allow_package && **s == ':' && (*s)[1] == ':' /* Disallow things like Foo::$bar. For the curious, this is @@ -9287,6 +9367,30 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, else break; } + if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL + && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) { + char *d; + char *d2; + Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */ + d2 = d; + SAVEFREEPV(d); + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "Old package separator used in string"); + if (olds[-1] == '#') + *d2++ = olds[-2]; + *d2++ = olds[-1]; + while (olds < *s) { + if (*olds == '\'') { + *d2++ = '\\'; + *d2++ = *olds++; + } + else + *d2++ = *olds++; + } + Perl_warner(aTHX_ packWARN(WARN_SYNTAX), + "\t(Did you mean \"%" UTF8f "\" instead?)\n", + UTF8fARG(is_utf8, d2-d, d)); + } return; } @@ -9302,7 +9406,7 @@ S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN PERL_ARGS_ASSERT_SCAN_WORD; - parse_ident(&s, &d, e, allow_package, is_utf8, TRUE); + parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE); *d = '\0'; *slp = d - dest; return s; @@ -9350,7 +9454,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) } } else { /* See if it is a "normal" identifier */ - parse_ident(&s, &d, e, 1, is_utf8, FALSE); + parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE); } *d = '\0'; d = dest; @@ -9369,7 +9473,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) || isDIGIT_A((U8)s[1]) || s[1] == '$' || s[1] == '{' - || strBEGINs(s+1,"::")) ) + || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) ) { /* Dereferencing a value in a scalar variable. The alternatives are different syntaxes for a scalar variable. @@ -9428,7 +9532,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) (the later check for } being at the expected point will trap cases where this doesn't pan out.) */ d += is_utf8 ? UTF8SKIP(d) : 1; - parse_ident(&s, &d, e, 1, is_utf8, TRUE); + parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE); *d = '\0'; } else { /* caret word: ${^Foo} ${^CAPTURE[0]} */ @@ -9515,9 +9619,12 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) CopLINE_set(PL_curcop, orig_copline); PL_parser->herelines = herelines; *dest = '\0'; + PL_parser->sub_no_recover = TRUE; } } - else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) + else if ( PL_lex_state == LEX_INTERPNORMAL + && !PL_lex_brackets + && !intuit_more(s, PL_bufend)) PL_lex_state = LEX_INTERPEND; return s; } @@ -9770,7 +9877,7 @@ S_scan_subst(pTHX_ char *start) * the NVX field indicates how many src code lines the replacement * spreads over */ sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV); - ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0; + ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff; ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = cBOOL(es); } @@ -9930,7 +10037,7 @@ S_scan_heredoc(pTHX_ char *s) len = d - PL_tokenbuf; #ifndef PERL_STRICT_CR - d = strchr(s, '\r'); + d = (char *) memchr(s, '\r', PL_bufend - s); if (d) { char * const olds = s; s = d; @@ -10039,8 +10146,9 @@ S_scan_heredoc(pTHX_ char *s) /* No whitespace or all! */ if (backup == s || *backup == '\n') { - Newxz(indent, indent_len + 1, char); + Newx(indent, indent_len + 1, char); memcpy(indent, backup + 1, indent_len); + indent[indent_len] = 0; s--; /* before our delimiter */ PL_parser->herelines--; /* this line doesn't count */ break; @@ -10174,8 +10282,9 @@ S_scan_heredoc(pTHX_ char *s) /* All whitespace or none! */ if (backup == found || SPACE_OR_TAB(*backup)) { - Newxz(indent, indent_len + 1, char); + Newx(indent, indent_len + 1, char); memcpy(indent, backup, indent_len); + indent[indent_len] = 0; SvREFCNT_dec(PL_linestr); PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); @@ -10220,7 +10329,7 @@ S_scan_heredoc(pTHX_ char *s) while (ss < se) { /* newline only? Copy and move on */ if (*ss == '\n') { - sv_catpv(newstr,"\n"); + sv_catpvs(newstr,"\n"); ss++; linecount++; @@ -10266,7 +10375,7 @@ S_scan_heredoc(pTHX_ char *s) interminable: SvREFCNT_dec(tmpstr); CopLINE_set(PL_curcop, origline); - missingterm(PL_tokenbuf + 1); + missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); } /* scan_inputsymbol @@ -10299,7 +10408,7 @@ S_scan_inputsymbol(pTHX_ char *start) PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; - end = strchr(s, '\n'); + end = (char *) memchr(s, '\n', PL_bufend - s); if (!end) end = PL_bufend; if (s[1] == '<' && s[2] == '>' && s[3] == '>') { @@ -10494,7 +10603,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re I32 brackets = 1; /* bracket nesting level */ bool has_utf8 = FALSE; /* is there any utf8 content? */ IV termcode; /* terminating char. code */ - U8 termstr[UTF8_MAXBYTES]; /* terminating string */ + U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */ STRLEN termlen; /* length of terminating string */ line_t herelines; @@ -10502,14 +10611,11 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re const char * opening_delims = "([{<"; const char * closing_delims = ")]}>"; + /* The only non-UTF character that isn't a stand alone grapheme is + * white-space, hence can't be a delimiter. */ const char * non_grapheme_msg = "Use of unassigned code point or" " non-standalone grapheme for a delimiter" - " will be a fatal error starting in Perl" - " 5.30"; - /* The only non-UTF character that isn't a stand alone grapheme is - * white-space, hence can't be a delimiter. So can skip for non-UTF-8 */ - bool check_grapheme = UTF && ckWARN_d(WARN_DEPRECATED); - + " is not allowed"; PERL_ARGS_ASSERT_SCAN_STR; /* skip space before the delimiter */ @@ -10528,26 +10634,12 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re } else { termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); - if (check_grapheme) { - if ( UNLIKELY(UNICODE_IS_SUPER(termcode)) - || UNLIKELY(UNICODE_IS_NONCHAR(termcode))) - { - /* These are considered graphemes, and since the ending - * delimiter will be the same, we don't have to check the other - * end */ - check_grapheme = FALSE; - } - else if (UNLIKELY(! _is_grapheme((U8 *) start, - (U8 *) s, - (U8 *) PL_bufend, - termcode))) - { - Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "%s", non_grapheme_msg); - - /* Don't have to check the other end, as have already warned at - * this one */ - check_grapheme = FALSE; - } + if (UTF && UNLIKELY(! _is_grapheme((U8 *) start, + (U8 *) s, + (U8 *) PL_bufend, + termcode))) + { + yyerror(non_grapheme_msg); } Copy(s, termstr, termlen, U8); @@ -10613,14 +10705,13 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re if ( s + termlen <= PL_bufend && memEQ(s + 1, (char*)termstr + 1, termlen - 1)) { - if ( check_grapheme + if ( UTF && UNLIKELY(! _is_grapheme((U8 *) start, - (U8 *) s, - (U8 *) PL_bufend, + (U8 *) s, + (U8 *) PL_bufend, termcode))) { - Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), - "%s", non_grapheme_msg); + yyerror(non_grapheme_msg); } break; } @@ -10921,6 +11012,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) digit: just_zero = FALSE; if (!overflowed) { + assert(shift >= 0); x = u << shift; /* make room for the digit */ total_bits += shift; @@ -11001,19 +11093,22 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) NV nv_mult = 1.0; #endif bool accumulate = TRUE; - for (h++; (isXDIGIT(*h) || *h == '_'); h++) { + U8 b; + int lim = 1 << shift; + for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) || + *h == '_'); h++) { if (isXDIGIT(*h)) { - U8 b = XDIGIT_VALUE(*h); significant_bits += shift; #ifdef HEXFP_UQUAD if (accumulate) { if (significant_bits < NV_MANT_DIG) { /* We are in the long "run" of xdigits, * accumulate the full four bits. */ + assert(shift >= 0); hexfp_uquad <<= shift; hexfp_uquad |= b; hexfp_frac_bits += shift; - } else { + } else if (significant_bits - shift < NV_MANT_DIG) { /* We are at a hexdigit either at, * or straddling, the edge of mantissa. * We will try grabbing as many as @@ -11022,7 +11117,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) significant_bits - NV_MANT_DIG; if (tail <= 0) tail += shift; + assert(tail >= 0); hexfp_uquad <<= tail; + assert((shift - tail) >= 0); hexfp_uquad |= b >> (shift - tail); hexfp_frac_bits += tail; @@ -11061,7 +11158,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } #else /* HEXFP_NV */ if (accumulate) { - nv_mult /= 16.0; + nv_mult /= nvshift[shift]; if (nv_mult > 0.0) hexfp_nv += b * nv_mult; else @@ -11330,7 +11427,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) floatit = TRUE; } if (floatit) { - STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD(); /* terminate the string */ *d = '\0'; if (UNLIKELY(hexfp)) { @@ -11347,7 +11443,6 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) } else { nv = Atof(PL_tokenbuf); } - RESTORE_LC_NUMERIC_UNDERLYING(); sv = newSVnv(nv); } @@ -11446,7 +11541,7 @@ S_scan_formline(pTHX_ char *s) if (!got_some) break; } - incline(s); + incline(s, PL_bufend); } enough: if (!SvCUR(stuff) || needargs) @@ -11503,6 +11598,39 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) return oldsavestack_ix; } + +/* Do extra initialisation of a CV (typically one just created by + * start_subparse()) if that CV is for a named sub + */ + +void +Perl_init_named_cv(pTHX_ CV *cv, OP *nameop) +{ + PERL_ARGS_ASSERT_INIT_NAMED_CV; + + if (nameop->op_type == OP_CONST) { + const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv); + if ( strEQ(name, "BEGIN") + || strEQ(name, "END") + || strEQ(name, "INIT") + || strEQ(name, "CHECK") + || strEQ(name, "UNITCHECK") + ) + CvSPECIAL_on(cv); + } + else + /* State subs inside anonymous subs need to be + clonable themselves. */ + if ( CvANON(CvOUTSIDE(cv)) + || CvCLONE(CvOUTSIDE(cv)) + || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST( + CvOUTSIDE(cv) + ))[nameop->op_targ]) + ) + CvCLONE_on(cv); +} + + static int S_yywarn(pTHX_ const char *const s, U32 flags) { @@ -11724,12 +11852,11 @@ S_swallow_bom(pTHX_ U8 *s) } break; case BOM_UTF8_FIRST_BYTE: { - const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */ - if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) { + if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) { #ifdef DEBUGGING if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n"); #endif - s += len + 1; /* UTF-8 */ + s += sizeof(BOM_UTF8) - 1; /* UTF-8 */ } break; } @@ -11868,9 +11995,14 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) } } + /* 'chars' isn't quite the right name, as code points above 0xFFFF + * require 4 bytes per char */ chars = SvCUR(utf16_buffer) >> 1; have = SvCUR(utf8_buffer); - SvGROW(utf8_buffer, have + chars * 3 + 1); + + /* Assume the worst case size as noted by the functions: twice the + * number of input bytes */ + SvGROW(utf8_buffer, have + chars * 4 + 1); if (reverse) { end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), @@ -12029,6 +12161,79 @@ Perl_keyword_plugin_standard(pTHX_ return KEYWORD_PLUGIN_DECLINE; } +/* +=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p + +Puts a C function into the chain of keyword plugins. This is the +preferred way to manipulate the L variable. +C is a pointer to the C function that is to be added to the +keyword plugin chain, and C points to the storage location +where a pointer to the next function in the chain will be stored. The +value of C is written into the L variable, +while the value previously stored there is written to C<*old_plugin_p>. + +L is global to an entire process, and a module wishing +to hook keyword parsing may find itself invoked more than once per +process, typically in different threads. To handle that situation, this +function is idempotent. The location C<*old_plugin_p> must initially +(once per process) contain a null pointer. A C variable of static +duration (declared at file scope, typically also marked C to give +it internal linkage) will be implicitly initialised appropriately, if it +does not have an explicit initialiser. This function will only actually +modify the plugin chain if it finds C<*old_plugin_p> to be null. This +function is also thread safe on the small scale. It uses appropriate +locking to avoid race conditions in accessing L. + +When this function is called, the function referenced by C +must be ready to be called, except for C<*old_plugin_p> being unfilled. +In a threading situation, C may be called immediately, even +before this function has returned. C<*old_plugin_p> will always be +appropriately set before C is called. If C +decides not to do anything special with the identifier that it is given +(which is the usual case for most calls to a keyword plugin), it must +chain the plugin function referenced by C<*old_plugin_p>. + +Taken all together, XS code to install a keyword plugin should typically +look something like this: + + static Perl_keyword_plugin_t next_keyword_plugin; + static OP *my_keyword_plugin(pTHX_ + char *keyword_plugin, STRLEN keyword_len, OP **op_ptr) + { + if (memEQs(keyword_ptr, keyword_len, + "my_new_keyword")) { + ... + } else { + return next_keyword_plugin(aTHX_ + keyword_ptr, keyword_len, op_ptr); + } + } + BOOT: + wrap_keyword_plugin(my_keyword_plugin, + &next_keyword_plugin); + +Direct access to L should be avoided. + +=cut +*/ + +void +Perl_wrap_keyword_plugin(pTHX_ + Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) +{ + dVAR; + + PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; + if (*old_plugin_p) return; + KEYWORD_PLUGIN_MUTEX_LOCK; + if (!*old_plugin_p) { + *old_plugin_p = PL_keyword_plugin; + PL_keyword_plugin = new_plugin; + } + KEYWORD_PLUGIN_MUTEX_UNLOCK; +} + #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) static void S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)