X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/de62715824b6848c0aabf63dab4e7bcdc9945f9f..93cd6fca2453b14be3c49ba8708aa01b7dab5829:/toke.c diff --git a/toke.c b/toke.c index 734b02c..f94c0d5 100644 --- a/toke.c +++ b/toke.c @@ -556,16 +556,18 @@ S_no_op(pTHX_ const char *const what, char *s) */ STATIC void -S_missingterm(pTHX_ char *s) +S_missingterm(pTHX_ char *s, STRLEN len) { char tmpbuf[UTF8_MAXBYTES + 1]; char q; bool uni = FALSE; SV *sv; if (s) { - char * const nl = strrchr(s,'\n'); - if (nl) - *nl = '\0'; + char * const nl = (char *) my_memrchr(s, '\n', len); + if (nl) { + *nl = '\0'; + len = nl - s; + } uni = UTF; } else if (PL_multi_close < 32) { @@ -573,24 +575,28 @@ S_missingterm(pTHX_ char *s) tmpbuf[1] = (char)toCTRL(PL_multi_close); tmpbuf[2] = '\0'; s = tmpbuf; + len = 2; } else { if (LIKELY(PL_multi_close < 256)) { *tmpbuf = (char)PL_multi_close; tmpbuf[1] = '\0'; + len = 1; } else { + char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close); + *end = '\0'; + len = end - tmpbuf; uni = TRUE; - *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0; } s = tmpbuf; } - q = strchr(s,'"') ? '\'' : '"'; - sv = sv_2mortal(newSVpv(s,0)); + q = memchr(s, '"', len) ? '\'' : '"'; + sv = sv_2mortal(newSVpvn(s, len)); if (uni) SvUTF8_on(sv); - Perl_croak(aTHX_ "Can't find string terminator %c%" SVf - "%c anywhere before EOF",q,SVfARG(sv),q); + Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c" + " anywhere before EOF", q, SVfARG(sv), q); } #include "feature.h" @@ -1572,7 +1578,7 @@ Perl_lex_read_space(pTHX_ U32 flags) if (s == bufend) need_incline = 1; else - incline(s); + incline(s, bufend); } } else if (isSPACE(c)) { s++; @@ -1591,7 +1597,7 @@ Perl_lex_read_space(pTHX_ U32 flags) if (!got_more) break; if (can_incline && need_incline && PL_parser->rsfp) { - incline(s); + incline(s, bufend); need_incline = 0; } } else if (!c) { @@ -1724,7 +1730,7 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) */ STATIC void -S_incline(pTHX_ const char *s) +S_incline(pTHX_ const char *s, const char *end) { const char *t; const char *n; @@ -1734,6 +1740,8 @@ S_incline(pTHX_ const char *s) PERL_ARGS_ASSERT_INCLINE; + assert(end >= s); + COPLINE_INC_WITH_HERELINES; if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL && s+1 == PL_bufend && *s == ';') { @@ -1745,8 +1753,8 @@ S_incline(pTHX_ const char *s) return; while (SPACE_OR_TAB(*s)) s++; - if (strBEGINs(s, "line")) - s += 4; + if (memBEGINs(s, (STRLEN) (end - s), "line")) + s += sizeof("line") - 1; else return; if (SPACE_OR_TAB(*s)) @@ -1765,7 +1773,7 @@ S_incline(pTHX_ const char *s) return; while (SPACE_OR_TAB(*s)) s++; - if (*s == '"' && (t = strchr(s+1, '"'))) { + if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) { s++; e = t + 1; } @@ -1919,7 +1927,6 @@ STATIC void S_check_uni(pTHX) { const char *s; - const char *t; if (PL_oldoldbufptr != PL_last_uni) return; @@ -1928,7 +1935,7 @@ S_check_uni(pTHX) s = PL_last_uni; while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-') s += UTF ? UTF8SKIP(s) : 1; - if ((t = strchr(s, '(')) && t < PL_bufptr) + if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s)) return; Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), @@ -3508,7 +3515,8 @@ S_scan_const(pTHX_ char *start) { const char* error; - bool valid = grok_bslash_o(&s, &uv, &error, + bool valid = grok_bslash_o(&s, PL_bufend, + &uv, &error, TRUE, /* Output warning */ FALSE, /* Not strict */ TRUE, /* Output warnings for @@ -3526,7 +3534,8 @@ S_scan_const(pTHX_ char *start) { const char* error; - bool valid = grok_bslash_x(&s, &uv, &error, + bool valid = grok_bslash_x(&s, PL_bufend, + &uv, &error, TRUE, /* Output warning */ FALSE, /* Not strict */ TRUE, /* Output warnings for @@ -3661,7 +3670,7 @@ S_scan_const(pTHX_ char *start) s++; /* If there is no matching '}', it is an error. */ - if (! (e = strchr(s, '}'))) { + if (! (e = (char *) memchr(s, '}', send - s))) { if (! PL_lex_inpat) { yyerror("Missing right brace on \\N{}"); } else { @@ -4140,7 +4149,7 @@ S_scan_const(pTHX_ char *start) /* This is the one truly awful dwimmer necessary to conflate C and sed. */ STATIC int -S_intuit_more(pTHX_ char *s) +S_intuit_more(pTHX_ char *s, char *e) { PERL_ARGS_ASSERT_INTUIT_MORE; @@ -4175,7 +4184,7 @@ S_intuit_more(pTHX_ char *s) /* this is terrifying, and it works */ int weight; char seen[256]; - const char * const send = strchr(s,']'); + const char * const send = (char *) memchr(s, ']', e - s); unsigned char un_char, last_un_char; char tmpbuf[sizeof PL_tokenbuf * 4]; @@ -4490,6 +4499,7 @@ I32 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) { filter_t funcp; + I32 ret; SV *datasv = NULL; /* This API is bad. It should have been using unsigned int for maxlen. Not sure if we want to change the API, but if not we should sanity @@ -4572,7 +4582,11 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen) /* Call function. The function is expected to */ /* call "FILTER_READ(idx+1, buf_sv)" first. */ /* Return: <0:error, =0:eof, >0:not eof */ - return (*funcp)(aTHX_ idx, buf_sv, correct_length); + ENTER; + save_scalar(PL_errgv); + ret = (*funcp)(aTHX_ idx, buf_sv, correct_length); + LEAVE; + return ret; } STATIC char * @@ -4974,7 +4988,7 @@ Perl_yylex(pTHX) return yylex(); case LEX_INTERPENDMAYBE: - if (intuit_more(PL_bufptr)) { + if (intuit_more(PL_bufptr, PL_bufend)) { PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */ break; } @@ -5303,7 +5317,11 @@ Perl_yylex(pTHX) || *PL_splitstr == '\'' || *PL_splitstr == '"') && strchr(PL_splitstr + 1, *PL_splitstr)) + { + /* strchr is ok, because -F pattern can't contain + * embeddded NULs */ Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr); + } else { /* "q\0${splitstr}\0" is legal perl. Yes, even NUL bytes can be used as quoting characters. :-) */ @@ -5382,7 +5400,7 @@ Perl_yylex(pTHX) } } if (PL_rsfp || PL_parser->filtered) - incline(s); + incline(s, PL_bufend); } while (PL_parser->in_pod); PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s; PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); @@ -5607,7 +5625,7 @@ Perl_yylex(pTHX) && !PL_rsfp && !PL_parser->filtered) { /* handle eval qq[#line 1 "foo"\n ...] */ CopLINE_dec(PL_curcop); - incline(s); + incline(s, PL_bufend); } d = s; while (d < PL_bufend && *d != '\n') @@ -5620,7 +5638,7 @@ Perl_yylex(pTHX) && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && SvEVALED(PL_lex_repl) && d[-1] == '}') s--; else - incline(s); + incline(s, PL_bufend); if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) { PL_lex_state = LEX_FORMLINE; force_next(FORMRBRACK); @@ -5634,7 +5652,7 @@ Perl_yylex(pTHX) { s++; if (s < PL_bufend) - incline(s); + incline(s, PL_bufend); } } goto retry; @@ -5835,7 +5853,8 @@ Perl_yylex(pTHX) if (!PL_tokenbuf[1]) { PREREF('%'); } - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) { if (*s == '[') PL_tokenbuf[0] = '@'; } @@ -6437,14 +6456,15 @@ Perl_yylex(pTHX) d = PL_bufend; while (s < d) { if (*s++ == '\n') { - incline(s); - if (strBEGINs(s,"=cut")) { - s = strchr(s,'\n'); + incline(s, PL_bufend); + if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut")) + { + s = (char *) memchr(s,'\n', d - s); if (s) s++; else s = d; - incline(s); + incline(s, PL_bufend); goto retry; } } @@ -6516,7 +6536,7 @@ Perl_yylex(pTHX) OPERATOR('!'); case '<': if (PL_expect != XOPERATOR) { - if (s[1] != '<' && !strchr(s,'>')) + if (s[1] != '<' && !memchr(s,'>', PL_bufend - s)) check_uni(); if (s[1] == '<' && s[2] != '>') { if ( (s == PL_linestart || s[-1] == '\n') @@ -6668,8 +6688,8 @@ Perl_yylex(pTHX) if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) s = skipspace(s); - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) - && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) { if (*s == '[') { PL_tokenbuf[0] = '@'; if (ckWARN(WARN_SYNTAX)) { @@ -6694,8 +6714,10 @@ Perl_yylex(pTHX) else if (*s == '{') { char *t; PL_tokenbuf[0] = '%'; - if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX) - && (t = strchr(s, '}')) && (t = strchr(t, '='))) + if ( strEQ(PL_tokenbuf+1, "SIG") + && ckWARN(WARN_SYNTAX) + && (t = (char *) memchr(s, '}', PL_bufend - s)) + && (t = (char *) memchr(t, '=', PL_bufend - t))) { char tmpbuf[sizeof PL_tokenbuf]; do { @@ -6797,7 +6819,9 @@ Perl_yylex(pTHX) } if (PL_lex_state == LEX_NORMAL) s = skipspace(s); - if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) { + if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) + && intuit_more(s, PL_bufend)) + { if (*s == '{') PL_tokenbuf[0] = '%'; @@ -6868,7 +6892,7 @@ Perl_yylex(pTHX) } if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') { s += 3; - TERM(YADAYADA); + OPERATOR(YADAYADA); } if (PL_expect == XOPERATOR || !isDIGIT(s[1])) { char tmp = *s++; @@ -6908,7 +6932,7 @@ Perl_yylex(pTHX) case '\'': s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { @@ -6930,7 +6954,7 @@ Perl_yylex(pTHX) no_op("String",s); } if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_CONST; /* FIXME. I think that this can be const if char *d is replaced by more localised variables. */ @@ -6956,7 +6980,7 @@ Perl_yylex(pTHX) if (PL_expect == XOPERATOR) no_op("Backticks",s); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_BACKTICK; TERM(sublex_start()); @@ -8332,7 +8356,7 @@ Perl_yylex(pTHX) case KEY_q: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; pl_yylval.ival = OP_CONST; TERM(sublex_start()); @@ -8344,7 +8368,7 @@ Perl_yylex(pTHX) OP *words = NULL; s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); COPLINE_SET_FROM_MULTI_END; PL_expect = XOPERATOR; if (SvCUR(PL_lex_stuff)) { @@ -8393,7 +8417,7 @@ Perl_yylex(pTHX) case KEY_qq: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_STRINGIFY; if (SvIVX(PL_lex_stuff) == '\'') SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */ @@ -8406,7 +8430,7 @@ Perl_yylex(pTHX) case KEY_qx: s = scan_str(s,FALSE,FALSE,FALSE,NULL); if (!s) - missingterm(NULL); + missingterm(NULL, 0); pl_yylval.ival = OP_BACKTICK; TERM(sublex_start()); @@ -9535,7 +9559,9 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni) *dest = '\0'; } } - else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s)) + else if ( PL_lex_state == LEX_INTERPNORMAL + && !PL_lex_brackets + && !intuit_more(s, PL_bufend)) PL_lex_state = LEX_INTERPEND; return s; } @@ -9948,7 +9974,7 @@ S_scan_heredoc(pTHX_ char *s) len = d - PL_tokenbuf; #ifndef PERL_STRICT_CR - d = strchr(s, '\r'); + d = (char *) memchr(s, '\r', PL_bufend - s); if (d) { char * const olds = s; s = d; @@ -10057,8 +10083,9 @@ S_scan_heredoc(pTHX_ char *s) /* No whitespace or all! */ if (backup == s || *backup == '\n') { - Newxz(indent, indent_len + 1, char); + Newx(indent, indent_len + 1, char); memcpy(indent, backup + 1, indent_len); + indent[indent_len] = 0; s--; /* before our delimiter */ PL_parser->herelines--; /* this line doesn't count */ break; @@ -10192,8 +10219,9 @@ S_scan_heredoc(pTHX_ char *s) /* All whitespace or none! */ if (backup == found || SPACE_OR_TAB(*backup)) { - Newxz(indent, indent_len + 1, char); + Newx(indent, indent_len + 1, char); memcpy(indent, backup, indent_len); + indent[indent_len] = 0; SvREFCNT_dec(PL_linestr); PL_linestr = linestr_save; PL_linestart = SvPVX(linestr_save); @@ -10284,7 +10312,7 @@ S_scan_heredoc(pTHX_ char *s) interminable: SvREFCNT_dec(tmpstr); CopLINE_set(PL_curcop, origline); - missingterm(PL_tokenbuf + 1); + missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1); } /* scan_inputsymbol @@ -10317,7 +10345,7 @@ S_scan_inputsymbol(pTHX_ char *start) PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL; - end = strchr(s, '\n'); + end = (char *) memchr(s, '\n', PL_bufend - s); if (!end) end = PL_bufend; if (s[1] == '<' && s[2] == '>' && s[3] == '>') { @@ -11464,7 +11492,7 @@ S_scan_formline(pTHX_ char *s) if (!got_some) break; } - incline(s); + incline(s, PL_bufend); } enough: if (!SvCUR(stuff) || needargs) @@ -11885,9 +11913,14 @@ S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen) } } + /* 'chars' isn't quite the right name, as code points above 0xFFFF + * require 4 bytes per char */ chars = SvCUR(utf16_buffer) >> 1; have = SvCUR(utf8_buffer); - SvGROW(utf8_buffer, have + chars * 3 + 1); + + /* Assume the worst case size as noted by the functions: twice the + * number of input bytes */ + SvGROW(utf8_buffer, have + chars * 4 + 1); if (reverse) { end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer), @@ -12046,6 +12079,79 @@ Perl_keyword_plugin_standard(pTHX_ return KEYWORD_PLUGIN_DECLINE; } +/* +=for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p + +Puts a C function into the chain of keyword plugins. This is the +preferred way to manipulate the L variable. +C is a pointer to the C function that is to be added to the +keyword plugin chain, and C points to the storage location +where a pointer to the next function in the chain will be stored. The +value of C is written into the L variable, +while the value previously stored there is written to C<*old_plugin_p>. + +L is global to an entire process, and a module wishing +to hook keyword parsing may find itself invoked more than once per +process, typically in different threads. To handle that situation, this +function is idempotent. The location C<*old_plugin_p> must initially +(once per process) contain a null pointer. A C variable of static +duration (declared at file scope, typically also marked C to give +it internal linkage) will be implicitly initialised appropriately, if it +does not have an explicit initialiser. This function will only actually +modify the plugin chain if it finds C<*old_plugin_p> to be null. This +function is also thread safe on the small scale. It uses appropriate +locking to avoid race conditions in accessing L. + +When this function is called, the function referenced by C +must be ready to be called, except for C<*old_plugin_p> being unfilled. +In a threading situation, C may be called immediately, even +before this function has returned. C<*old_plugin_p> will always be +appropriately set before C is called. If C +decides not to do anything special with the identifier that it is given +(which is the usual case for most calls to a keyword plugin), it must +chain the plugin function referenced by C<*old_plugin_p>. + +Taken all together, XS code to install a keyword plugin should typically +look something like this: + + static Perl_keyword_plugin_t next_keyword_plugin; + static OP *my_keyword_plugin(pTHX_ + char *keyword_plugin, STRLEN keyword_len, OP **op_ptr) + { + if (memEQs(keyword_ptr, keyword_len, + "my_new_keyword")) { + ... + } else { + return next_keyword_plugin(aTHX_ + keyword_ptr, keyword_len, op_ptr); + } + } + BOOT: + wrap_keyword_plugin(my_keyword_plugin, + &next_keyword_plugin); + +Direct access to L should be avoided. + +=cut +*/ + +void +Perl_wrap_keyword_plugin(pTHX_ + Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p) +{ + dVAR; + + PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN; + if (*old_plugin_p) return; + KEYWORD_PLUGIN_MUTEX_LOCK; + if (!*old_plugin_p) { + *old_plugin_p = PL_keyword_plugin; + PL_keyword_plugin = new_plugin; + } + KEYWORD_PLUGIN_MUTEX_UNLOCK; +} + #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p) static void S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)