X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/1a35f9ffbd3ab4c89b6f5cad456b2b317c85e96e..6429869b8e4c941462c3b00b0e44dcccb91d5564:/toke.c diff --git a/toke.c b/toke.c index e5da941..df73b88 100644 --- a/toke.c +++ b/toke.c @@ -148,6 +148,9 @@ static const char ident_too_long[] = "Identifier too long"; /* LEX_* are values for PL_lex_state, the state of the lexer. * They are arranged oddly so that the guard on the switch statement * can get by with a single comparison (if the compiler is smart enough). + * + * These values refer to the various states within a sublex parse, + * i.e. within a double quotish string */ /* #define LEX_NOTPARSING 11 is done in perl.h. */ @@ -359,7 +362,7 @@ static struct debug_tokens { { GIVEN, TOKENTYPE_IVAL, "GIVEN" }, { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" }, { IF, TOKENTYPE_IVAL, "IF" }, - { LABEL, TOKENTYPE_PVAL, "LABEL" }, + { LABEL, TOKENTYPE_OPVAL, "LABEL" }, { LOCAL, TOKENTYPE_IVAL, "LOCAL" }, { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" }, { LSTOP, TOKENTYPE_OPNUM, "LSTOP" }, @@ -537,24 +540,28 @@ S_no_op(pTHX_ const char *const what, char *s) s = oldbp; else PL_bufptr = s; - yywarn(Perl_form(aTHX_ "%s found where operator expected", what)); + yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0); if (ckWARN_d(WARN_SYNTAX)) { if (is_first) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\t(Missing semicolon on previous line?)\n"); else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) { const char *t; - for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) + for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); + t += UTF ? UTF8SKIP(t) : 1) NOOP; if (t < PL_bufptr && isSPACE(*t)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Do you need to predeclare %.*s?)\n", - (int)(t - PL_oldoldbufptr), PL_oldoldbufptr); + "\t(Do you need to predeclare %"SVf"?)\n", + SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr), + SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); } else { assert(s >= oldbp); Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp); + "\t(Missing operator before %"SVf"?)\n", + SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp), + SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); } } PL_bufptr = oldbp; @@ -595,6 +602,8 @@ S_missingterm(pTHX_ char *s) Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q); } +#include "feature.h" + /* * Check whether the named feature is enabled. */ @@ -602,16 +611,18 @@ bool Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen) { dVAR; - HV * const hinthv = GvHV(PL_hintgv); char he_name[8 + MAX_FEATURE_LEN] = "feature_"; PERL_ARGS_ASSERT_FEATURE_IS_ENABLED; + assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM); + if (namelen > MAX_FEATURE_LEN) return FALSE; memcpy(&he_name[8], name, namelen); - return (hinthv && hv_exists(hinthv, he_name, 8 + namelen)); + return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0, + REFCOUNTED_HE_EXISTS)); } /* @@ -680,7 +691,13 @@ used by perl internally, so extensions should always pass zero. */ /* LEX_START_SAME_FILTER indicates that this is not a new file, so it - can share filters with the current parser. */ + can share filters with the current parser. + LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the + caller, hence isn't owned by the parser, so shouldn't be closed on parser + destruction. This is used to handle the case of defaulting to reading the + script from the standard input because no filename was given on the command + line (without getting confused by situation where STDIN has been closed, so + the script handle is opened on fd 0) */ void Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) @@ -747,7 +764,8 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags) parser->linestart = SvPVX(parser->linestr); parser->bufend = parser->bufptr + SvCUR(parser->linestr); parser->last_lop = parser->last_uni = NULL; - parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES); + parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES + |LEX_DONT_CLOSE_RSFP); parser->in_pod = parser->filtered = 0; } @@ -763,7 +781,7 @@ Perl_parser_free(pTHX_ const yy_parser *parser) PL_curcop = parser->saved_curcop; SvREFCNT_dec(parser->linestr); - if (parser->rsfp == PerlIO_stdin()) + if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) PerlIO_clearerr(parser->rsfp); else if (parser->rsfp && (!parser->old_parser || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp))) @@ -1279,7 +1297,7 @@ Perl_lex_next_chunk(pTHX_ U32 flags) /* End of real input. Close filehandle (unless it was STDIN), * then add implicit termination. */ - if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin()) + if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP) PerlIO_clearerr(PL_parser->rsfp); else if (PL_parser->rsfp) (void)PerlIO_close(PL_parser->rsfp); @@ -2185,11 +2203,13 @@ S_force_version(pTHX_ char *s, int guessing) if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) { SV *ver; #ifdef USE_LOCALE_NUMERIC - char *loc = setlocale(LC_NUMERIC, "C"); + char *loc = savepv(setlocale(LC_NUMERIC, NULL)); + setlocale(LC_NUMERIC, "C"); #endif s = scan_num(s, &pl_yylval); #ifdef USE_LOCALE_NUMERIC setlocale(LC_NUMERIC, loc); + Safefree(loc); #endif version = pl_yylval.opval; ver = cSVOPx(version)->op_sv; @@ -2332,14 +2352,10 @@ S_tokeq(pTHX_ SV *sv) * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They * interact with PL_lex_state, and create fake ( ... ) argument lists * to handle functions and concatenation. - * They assume that whoever calls them will be setting up a fake - * join call, because each subthing puts a ',' after it. This lets - * "lower \luPpEr" - * become - * join($, , 'lower ', lcfirst( 'uPpEr', ) ,) - * - * (I'm not sure whether the spurious commas at the end of lcfirst's - * arguments and join's arguments are created or not). + * For example, + * "foo\lbar" + * is tokenised as + * stringify ( const[foo] concat lcfirst ( const[bar] ) ) */ /* @@ -2433,6 +2449,7 @@ S_sublex_push(pTHX) SAVEI32(PL_lex_casemods); SAVEI32(PL_lex_starts); SAVEI8(PL_lex_state); + SAVEPPTR(PL_sublex_info.re_eval_start); SAVEVPTR(PL_lex_inpat); SAVEI16(PL_lex_inwhat); SAVECOPLINE(PL_curcop); @@ -2449,6 +2466,7 @@ S_sublex_push(pTHX) PL_linestr = PL_lex_stuff; PL_lex_stuff = NULL; + PL_sublex_info.re_eval_start = NULL; PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr); @@ -2558,8 +2576,11 @@ S_sublex_done(pTHX) /* scan_const - Extracts a pattern, double-quoted string, or transliteration. This - is terrifying code. + Extracts the next constant part of a pattern, double-quoted string, + or transliteration. This is terrifying code. + + For example, in parsing the double-quoted string "ab\x63$d", it would + stop at the '$' and return an OP_CONST containing 'abc'. It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's processing a pattern (PL_lex_inpat is true), a transliteration @@ -2567,15 +2588,22 @@ S_sublex_done(pTHX) Returns a pointer to the character scanned up to. If this is advanced from the start pointer supplied (i.e. if anything was - successfully parsed), will leave an OP for the substring scanned + successfully parsed), will leave an OP_CONST for the substring scanned in pl_yylval. Caller must intuit reason for not parsing further by looking at the next characters herself. In patterns: - backslashes: - constants: \N{NAME} only - case and quoting: \U \Q \E - stops on @ and $, but not for $ as tail anchor + expand: + \N{ABC} => \N{U+41.42.43} + + pass through: + all other \-char, including \N and \N{ apart from \N{ABC} + + stops on: + @ and $ where it appears to be a var, but not for $ as tail anchor + \l \L \u \U \Q \E + (?{ or (??{ + In transliterations: characters are VERY literal, except for - not at the start or end @@ -2605,7 +2633,7 @@ S_sublex_done(pTHX) it's a tail anchor if $ is the last thing in the string, or if it's followed by one of "()| \r\n\t" - \1 (backreferences) are turned into $1 + \1 (backreferences) are turned into $1 in substitutions The structure of the code is while (there's a character to process) { @@ -2644,6 +2672,7 @@ S_scan_const(pTHX_ char *start) register char *d = SvPVX(sv); /* destination for copies */ bool dorange = FALSE; /* are we in a translit range? */ bool didrange = FALSE; /* did we just finish a range? */ + bool in_charclass = FALSE; /* within /[...]/ */ bool has_utf8 = FALSE; /* Output constant is UTF8 */ bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be UTF8? But, this can @@ -2833,33 +2862,38 @@ S_scan_const(pTHX_ char *start) /* if we get here, we're not doing a transliteration */ - /* skip for regexp comments /(?#comment)/ and code /(?{code})/, - except for the last char, which will be done separately. */ + else if (*s == '[' && PL_lex_inpat && !in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = TRUE; + } + + else if (*s == ']' && PL_lex_inpat && in_charclass) { + char *s1 = s-1; + int esc = 0; + while (s1 >= start && *s1-- == '\\') + esc = !esc; + if (!esc) + in_charclass = FALSE; + } + + /* skip for regexp comments /(?#comment)/, except for the last + * char, which will be done separately. + * Stop on (?{..}) and friends */ + else if (*s == '(' && PL_lex_inpat && s[1] == '?') { if (s[2] == '#') { while (s+1 < send && *s != ')') *d++ = NATIVE_TO_NEED(has_utf8,*s++); } - else if (s[2] == '{' /* This should match regcomp.c */ - || (s[2] == '?' && s[3] == '{')) + else if (!PL_lex_casemods && !in_charclass && + ( s[2] == '{' /* This should match regcomp.c */ + || (s[2] == '?' && s[3] == '{'))) { - I32 count = 1; - char *regparse = s + (s[2] == '{' ? 3 : 4); - char c; - - while (count && (c = *regparse)) { - if (c == '\\' && regparse[1]) - regparse++; - else if (c == '{') - count++; - else if (c == '}') - count--; - regparse++; - } - if (*regparse != ')') - regparse--; /* Leave one char for continuation. */ - while (s < regparse) - *d++ = NATIVE_TO_NEED(has_utf8,*s++); + break; } } @@ -2870,6 +2904,10 @@ S_scan_const(pTHX_ char *start) *d++ = NATIVE_TO_NEED(has_utf8,*s++); } + /* no further processing of single-quoted regex */ + else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') + goto default_action; + /* check for embedded arrays (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) */ @@ -2916,7 +2954,7 @@ S_scan_const(pTHX_ char *start) } /* string-change backslash escapes */ - if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) { + if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) { --s; break; } @@ -2953,7 +2991,7 @@ S_scan_const(pTHX_ char *start) /* FALL THROUGH */ default: { - if ((isALPHA(*s) || isDIGIT(*s))) + if ((isALNUMC(*s))) Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Unrecognized escape \\%c passed through", *s); @@ -2989,29 +3027,16 @@ S_scan_const(pTHX_ char *start) /* eg. \x24 indicates the hex constant 0x24 */ case 'x': - ++s; - if (*s == '{') { - char* const e = strchr(s, '}'); - I32 flags = PERL_SCAN_ALLOW_UNDERSCORES | - PERL_SCAN_DISALLOW_PREFIX; + { STRLEN len; + const char* error; - ++s; - if (!e) { - yyerror("Missing right brace on \\x{}"); + bool valid = grok_bslash_x(s, &uv, &len, &error, 1); + s += len; + if (! valid) { + yyerror(error); continue; } - len = e - s; - uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); - s = e + 1; - } - else { - { - STRLEN len = 2; - I32 flags = PERL_SCAN_DISALLOW_PREFIX; - uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL)); - s += len; - } } NUM_ESCAPE_INSERT: @@ -3503,7 +3528,8 @@ S_scan_const(pTHX_ char *start) *d = '\0'; SvCUR_set(sv, d - SvPVX_const(sv)); if (SvCUR(sv) >= SvLEN(sv)) - Perl_croak(aTHX_ "panic: constant overflowed allocated space"); + Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf + " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv)); SvPOK_on(sv); if (PL_encoding && !has_utf8) { @@ -3538,6 +3564,9 @@ S_scan_const(pTHX_ char *start) } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) { type = "s"; typelen = 1; + } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') { + type = "q"; + typelen = 1; } else { type = "qq"; typelen = 2; @@ -4213,6 +4242,7 @@ Perl_madlex(pTHX) case FUNC0SUB: case UNIOPSUB: case LSTOPSUB: + case LABEL: if (pl_yylval.opval) append_madprops(PL_thismad, pl_yylval.opval, 0); PL_thismad = 0; @@ -4273,10 +4303,6 @@ Perl_madlex(pTHX) } break; - /* pval */ - case LABEL: - break; - /* ival */ default: break; @@ -4470,7 +4496,9 @@ Perl_yylex(pTHX) case LEX_INTERPCASEMOD: #ifdef DEBUGGING if (PL_bufptr != PL_bufend && *PL_bufptr != '\\') - Perl_croak(aTHX_ "panic: INTERPCASEMOD"); + Perl_croak(aTHX_ + "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u", + PL_bufptr, PL_bufend, *PL_bufptr); #endif /* handle \E or end of string */ if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') { @@ -4480,7 +4508,8 @@ Perl_yylex(pTHX) PL_lex_casestack[PL_lex_casemods] = '\0'; if (PL_bufptr != PL_bufend - && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) { + && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q' + || oldmod == 'F')) { PL_bufptr += 2; PL_lex_state = LEX_INTERPCONCAT; #ifdef PERL_MAD @@ -4491,6 +4520,11 @@ Perl_yylex(pTHX) PL_lex_allbrackets--; return REPORT(')'); } + else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) { + /* Got an unpaired \E */ + Perl_ck_warner(aTHX_ packWARN(WARN_MISC), + "Useless use of \\E"); + } #ifdef PERL_MAD while (PL_bufptr != PL_bufend && PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') { @@ -4525,8 +4559,10 @@ Perl_yylex(pTHX) if (!PL_madskills) /* when just compiling don't need correct */ if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3)) tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */ - if ((*s == 'L' || *s == 'U') && - (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) { + if ((*s == 'L' || *s == 'U' || *s == 'F') && + (strchr(PL_lex_casestack, 'L') + || strchr(PL_lex_casestack, 'U') + || strchr(PL_lex_casestack, 'F'))) { PL_lex_casestack[--PL_lex_casemods] = '\0'; PL_lex_allbrackets--; return REPORT(')'); @@ -4550,8 +4586,10 @@ Perl_yylex(pTHX) NEXTVAL_NEXTTOKE.ival = OP_UC; else if (*s == 'Q') NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA; + else if (*s == 'F') + NEXTVAL_NEXTTOKE.ival = OP_FC; else - Perl_croak(aTHX_ "panic: yylex"); + Perl_croak(aTHX_ "panic: yylex, *s=%u", *s); if (PL_madskills) { SV* const tmpsv = newSVpvs("\\ "); /* replace the space with the character we want to escape @@ -4588,7 +4626,7 @@ Perl_yylex(pTHX) case LEX_INTERPSTART: if (PL_bufptr == PL_bufend) return REPORT(sublex_done()); - DEBUG_T({ PerlIO_printf(Perl_debug_log, + DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); }); PL_expect = XTERM; PL_lex_dojoin = (*PL_bufptr == '@'); @@ -4609,6 +4647,18 @@ Perl_yylex(pTHX) NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */ force_next(FUNC); } + /* Convert (?{...}) and friends to 'do {...}' */ + if (PL_lex_inpat && *PL_bufptr == '(') { + PL_sublex_info.re_eval_start = PL_bufptr; + PL_bufptr += 2; + if (*PL_bufptr != '{') + PL_bufptr++; + start_force(PL_curforce); + /* XXX probably need a CURMAD(something) here */ + PL_expect = XTERMBLOCK; + force_next(DO); + } + if (PL_lex_starts++) { s = PL_bufptr; #ifdef PERL_MAD @@ -4654,21 +4704,38 @@ Perl_yylex(pTHX) Perl_croak(aTHX_ "Bad evalled substitution pattern"); PL_lex_repl = NULL; } + if (PL_sublex_info.re_eval_start) { + if (*PL_bufptr != ')') + Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'"); + PL_bufptr++; + /* having compiled a (?{..}) expression, return the original + * text too, as a const */ + start_force(PL_curforce); + /* XXX probably need a CURMAD(something) here */ + NEXTVAL_NEXTTOKE.opval = + (OP*)newSVOP(OP_CONST, 0, + newSVpvn(PL_sublex_info.re_eval_start, + PL_bufptr - PL_sublex_info.re_eval_start)); + force_next(THING); + PL_sublex_info.re_eval_start = NULL; + PL_expect = XTERM; + return REPORT(','); + } + /* FALLTHROUGH */ case LEX_INTERPCONCAT: #ifdef DEBUGGING if (PL_lex_brackets) - Perl_croak(aTHX_ "panic: INTERPCONCAT"); + Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld", + (long) PL_lex_brackets); #endif if (PL_bufptr == PL_bufend) return REPORT(sublex_done()); - if (SvIVX(PL_linestr) == '\'') { + /* m'foo' still needs to be parsed for possible (?{...}) */ + if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) { SV *sv = newSVsv(PL_linestr); - if (!PL_lex_inpat) - sv = tokeq(sv); - else if ( PL_hints & HINT_NEW_RE ) - sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1); + sv = tokeq(sv); pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv); s = PL_bufend; } @@ -4734,7 +4801,12 @@ Perl_yylex(pTHX) if (isIDFIRST_lazy_if(s,UTF)) goto keylookup; { - unsigned char c = *s; + SV *dsv = newSVpvs_flags("", SVs_TEMP); + const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s, + UTF8SKIP(s), + SVs_TEMP | SVf_UTF8), + 10, UNI_DISPLAY_ISPRINT)) + : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s); len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart); if (len > UNRECOGNIZED_PRECEDE_COUNT) { d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT; @@ -4742,7 +4814,10 @@ Perl_yylex(pTHX) d = PL_linestart; } *s = '\0'; - Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1); + sv_setpv(dsv, d); + if (UTF) + SvUTF8_on(dsv); + Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1); } case 4: case 26: @@ -5145,7 +5220,8 @@ Perl_yylex(pTHX) if (d < PL_bufend) d++; else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */ - Perl_croak(aTHX_ "panic: input overflow"); + Perl_croak(aTHX_ "panic: input overflow, %p > %p", + d, PL_bufend); #ifdef PERL_MAD if (PL_madskills) PL_thiswhite = newSVpvn(s, d - s); @@ -5458,7 +5534,7 @@ Perl_yylex(pTHX) } sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0); if (*d == '(') { - d = scan_str(d,TRUE,TRUE); + d = scan_str(d,TRUE,TRUE,FALSE); if (!d) { /* MUST advance bufptr here to avoid bogus "at end of line" context messages from yyerror(). @@ -6144,10 +6220,12 @@ Perl_yylex(pTHX) &len); while (isSPACE(*t)) t++; - if (*t == ';' && get_cvn_flags(tmpbuf, len, 0)) + if (*t == ';' + && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0)) Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "You need to quote \"%s\"", - tmpbuf); + "You need to quote \"%"SVf"\"", + SVfARG(newSVpvn_flags(tmpbuf, len, + SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); } } } @@ -6226,14 +6304,17 @@ Perl_yylex(pTHX) if (ckWARN(WARN_SYNTAX)) { const char *t = s + 1; while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t))) - t++; + t += UTF ? UTF8SKIP(t) : 1; if (*t == '}' || *t == ']') { t++; PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */ + /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), - "Scalar value %.*s better written as $%.*s", - (int)(t-PL_bufptr), PL_bufptr, - (int)(t-PL_bufptr-1), PL_bufptr+1); + "Scalar value %"SVf" better written as $%"SVf, + SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr), + SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))), + SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1), + SVs_TEMP | (UTF ? SVf_UTF8 : 0 )))); } } } @@ -6349,7 +6430,7 @@ Perl_yylex(pTHX) TERM(THING); case '\'': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6364,7 +6445,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '"': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw string before %s\n", s); } ); if (PL_expect == XOPERATOR) { if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) { @@ -6387,7 +6468,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case '`': - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } ); if (PL_expect == XOPERATOR) no_op("Backticks",s); @@ -6528,7 +6609,9 @@ Perl_yylex(pTHX) if (!anydelim && PL_expect == XSTATE && d < PL_bufend && *d == ':' && *(d + 1) != ':') { s = d + 1; - pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf); + pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, + newSVpvn_flags(PL_tokenbuf, + len, UTF ? SVf_UTF8 : 0)); CLINE; TOKEN(LABEL); } @@ -6614,7 +6697,9 @@ Perl_yylex(pTHX) s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len, TRUE, &morelen); if (!morelen) - Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf, + Perl_croak(aTHX_ "Bad name after %"SVf"%s", + SVfARG(newSVpvn_flags(PL_tokenbuf, len, + (UTF ? SVf_UTF8 : 0) | SVs_TEMP )), *s == '\'' ? "'" : "::"); len += morelen; pkgname = 1; @@ -6640,8 +6725,9 @@ Perl_yylex(pTHX) if (ckWARN(WARN_BAREWORD) && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV)) Perl_warner(aTHX_ packWARN(WARN_BAREWORD), - "Bareword \"%s\" refers to nonexistent package", - PL_tokenbuf); + "Bareword \"%"SVf"\" refers to nonexistent package", + SVfARG(newSVpvn_flags(PL_tokenbuf, len, + (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); len -= 2; PL_tokenbuf[len] = '\0'; gv = NULL; @@ -6822,10 +6908,12 @@ Perl_yylex(pTHX) /* Not a method, so call it a subroutine (if defined) */ if (cv) { - if (lastchar == '-') - Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of -%s resolved as -&%s()", - PL_tokenbuf, PL_tokenbuf); + if (lastchar == '-') { + const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP ); + Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), + "Ambiguous use of -%"SVf" resolved as -&%"SVf"()", + SVfARG(tmpsv), SVfARG(tmpsv)); + } /* Check for a constant sub */ if ((sv = cv_const_sv(cv))) { its_constant: @@ -6997,8 +7085,10 @@ Perl_yylex(pTHX) safe_bareword: if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) { Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Operator or semicolon missing before %c%s", - lastchar, PL_tokenbuf); + "Operator or semicolon missing before %c%"SVf, + lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf, + strlen(PL_tokenbuf), + SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), "Ambiguous use of %c resolved as operator %c", lastchar, lastchar); @@ -7149,7 +7239,9 @@ Perl_yylex(pTHX) d = s; s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len); if (!(tmp = keyword(PL_tokenbuf, len, 1))) - Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf); + Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword", + SVfARG(newSVpvn_flags(PL_tokenbuf, len, + (UTF ? SVf_UTF8 : 0) | SVs_TEMP))); if (tmp < 0) tmp = -tmp; else if (tmp == KEY_require || tmp == KEY_do @@ -7397,6 +7489,9 @@ Perl_yylex(pTHX) case KEY_fork: FUN0(OP_FORK); + case KEY_fc: + UNI(OP_FC); + case KEY_fcntl: LOP(OP_FCNTL,XTERM); @@ -7625,7 +7720,7 @@ Perl_yylex(pTHX) char tmpbuf[1024]; PL_bufptr = s; my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf); - yyerror(tmpbuf); + yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0); } #ifdef PERL_MAD if (PL_madskills) { /* just add type to declarator token */ @@ -7665,18 +7760,27 @@ Perl_yylex(pTHX) s = SKIPSPACE1(s); if (isIDFIRST_lazy_if(s,UTF)) { const char *t; - for (d = s; isALNUM_lazy_if(d,UTF);) - d++; + for (d = s; isALNUM_lazy_if(d,UTF);) { + d += UTF ? UTF8SKIP(d) : 1; + if (UTF) { + while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) { + d += UTF ? UTF8SKIP(d) : 1; + } + } + } for (t=d; isSPACE(*t);) t++; if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE) /* [perl #16184] */ && !(t[0] == '=' && t[1] == '>') + && !(t[0] == ':' && t[1] == ':') + && !keyword(s, d-s, 0) ) { - int parms_len = (int)(d-s); + SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s), + SVs_TEMP | (UTF ? SVf_UTF8 : 0)); Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE), - "Precedence problem: open %.*s should be open(%.*s)", - parms_len, s, parms_len, s); + "Precedence problem: open %"SVf" should be open(%"SVf")", + SVfARG(tmpsv), SVfARG(tmpsv)); } } LOP(OP_OPEN,XTERM); @@ -7730,7 +7834,7 @@ Perl_yylex(pTHX) LOP(OP_PIPE_OP,XTERM); case KEY_q: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_CONST; @@ -7741,7 +7845,7 @@ Perl_yylex(pTHX) case KEY_qw: { OP *words = NULL; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); PL_expect = XOPERATOR; @@ -7791,7 +7895,7 @@ Perl_yylex(pTHX) } case KEY_qq: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); pl_yylval.ival = OP_STRINGIFY; @@ -7804,7 +7908,7 @@ Perl_yylex(pTHX) TERM(sublex_start()); case KEY_qx: - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) missingterm(NULL); readpipe_override(); @@ -7986,8 +8090,6 @@ Perl_yylex(pTHX) case KEY_sort: checkcomma(s,PL_tokenbuf,"subroutine name"); s = SKIPSPACE1(s); - if (*s == ';' || *s == ')') /* probably a close */ - Perl_croak(aTHX_ "sort is now a reserved word"); PL_expect = XTERM; s = force_word(s,WORD,TRUE,TRUE,FALSE); LOP(OP_SORT,XREF); @@ -8115,7 +8217,7 @@ Perl_yylex(pTHX) const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO); STRLEN tmplen; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Prototype not terminated"); /* strip spaces and check for bad characters */ @@ -8133,7 +8235,7 @@ Perl_yylex(pTHX) } else { if ( underscore ) { - if ( *p != ';' ) + if ( !strchr(";@%", *p) ) bad_proto = TRUE; underscore = FALSE; } @@ -8167,9 +8269,13 @@ Perl_yylex(pTHX) "Illegal character %sin prototype for %"SVf" : %s", seen_underscore ? "after '_' " : "", SVfARG(PL_subname), - sv_uni_display(dsv, - newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)), - tmp, UNI_DISPLAY_ISPRINT)); + SvUTF8(PL_lex_stuff) + ? sv_uni_display(dsv, + newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8), + tmp, + UNI_DISPLAY_ISPRINT) + : pv_pretty(dsv, d, tmp, 60, NULL, NULL, + PERL_PV_ESCAPE_NONASCII)); } SvCUR_set(PL_lex_stuff, tmp); have_proto = TRUE; @@ -8422,15 +8528,16 @@ S_pending_ident(pTHX) if (PL_in_my) { if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */ if (has_colon) - yyerror(Perl_form(aTHX_ "No package name allowed for " + yyerror_pv(Perl_form(aTHX_ "No package name allowed for " "variable %s in \"our\"", - PL_tokenbuf)); + PL_tokenbuf), UTF ? SVf_UTF8 : 0); tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0); } else { if (has_colon) - yyerror(Perl_form(aTHX_ PL_no_myglob, - PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf)); + yyerror_pv(Perl_form(aTHX_ PL_no_myglob, + PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf), + UTF ? SVf_UTF8 : 0); pl_yylval.opval = newOP(OP_PADANY, 0); pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, @@ -8517,8 +8624,9 @@ S_pending_ident(pTHX) { /* Downgraded from fatal to warning 20000522 mjd */ Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Possible unintended interpolation of %s in string", - PL_tokenbuf); + "Possible unintended interpolation of %"SVf" in string", + SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len, + SVs_TEMP | ( UTF ? SVf_UTF8 : 0 )))); } } @@ -8571,9 +8679,10 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) while (s < PL_bufend && isSPACE(*s)) s++; if (isIDFIRST_lazy_if(s,UTF)) { - const char * const w = s++; + const char * const w = s; + s += UTF ? UTF8SKIP(s) : 1; while (isALNUM_lazy_if(s,UTF)) - s++; + s += UTF ? UTF8SKIP(s) : 1; while (s < PL_bufend && isSPACE(*s)) s++; if (*s == ',') { @@ -8599,7 +8708,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, SV *sv, SV *pv, const char *type, STRLEN typelen) { dVAR; dSP; - HV * const table = GvHV(PL_hintgv); /* ^H */ + HV * table = GvHV(PL_hintgv); /* ^H */ SV *res; SV **cvp; SV *cv, *typesv; @@ -8607,43 +8716,57 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, PERL_ARGS_ASSERT_NEW_CONSTANT; - if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { + /* charnames doesn't work well if there have been errors found */ + if (PL_error_count > 0 && strEQ(key,"charnames")) + return &PL_sv_undef; + + if (!table + || ! (PL_hints & HINT_LOCALIZE_HH) + || ! (cvp = hv_fetch(table, key, keylen, FALSE)) + || ! SvOK(*cvp)) + { SV *msg; - why2 = (const char *) - (strEQ(key,"charnames") - ? "(possibly a missing \"use charnames ...\")" - : ""); - msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s", - (type ? type: "undef"), why2); - - /* This is convoluted and evil ("goto considered harmful") - * but I do not understand the intricacies of all the different - * failure modes of %^H in here. The goal here is to make - * the most probable error message user-friendly. --jhi */ - - goto msgdone; - + /* Here haven't found what we're looking for. If it is charnames, + * perhaps it needs to be loaded. Try doing that before giving up */ + if (strEQ(key,"charnames")) { + Perl_load_module(aTHX_ + 0, + newSVpvs("_charnames"), + /* version parameter; no need to specify it, as if + * we get too early a version, will fail anyway, + * not being able to find '_charnames' */ + NULL, + newSVpvs(":full"), + newSVpvs(":short"), + NULL); + SPAGAIN; + table = GvHV(PL_hintgv); + if (table + && (PL_hints & HINT_LOCALIZE_HH) + && (cvp = hv_fetch(table, key, keylen, FALSE)) + && SvOK(*cvp)) + { + goto now_ok; + } + } + if (!table || !(PL_hints & HINT_LOCALIZE_HH)) { + msg = Perl_newSVpvf(aTHX_ + "Constant(%s) unknown", (type ? type: "undef")); + } + else { + why1 = "$^H{"; + why2 = key; + why3 = "} is not defined"; report: msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s", (type ? type: "undef"), why1, why2, why3); - msgdone: + } yyerror(SvPVX_const(msg)); SvREFCNT_dec(msg); return sv; } - - /* charnames doesn't work well if there have been errors found */ - if (PL_error_count > 0 && strEQ(key,"charnames")) - return &PL_sv_undef; - - cvp = hv_fetch(table, key, keylen, FALSE); - if (!cvp || !SvOK(*cvp)) { - why1 = "$^H{"; - why2 = key; - why3 = "} is not defined"; - goto report; - } +now_ok: sv_2mortal(sv); /* Parent created it permanently */ cv = *cvp; if (!pv && s) @@ -8712,7 +8835,7 @@ S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_packag for (;;) { if (d >= e) Perl_croak(aTHX_ ident_too_long); - if (isALNUM(*s)) /* UTF handled below */ + if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */ *d++ = *s++; else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) { *d++ = ':'; @@ -8808,8 +8931,6 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL bracket = s; s++; } - else if (ck_uni) - check_uni(); if (s < send) { if (UTF) { const STRLEN skip = UTF8SKIP(s); @@ -8827,6 +8948,8 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL *d = toCTRL(*s); s++; } + else if (ck_uni && !bracket) + check_uni(); if (bracket) { if (isSPACE(s[-1])) { while (s < send) { @@ -8897,13 +9020,15 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL if (PL_lex_state == LEX_NORMAL) { if (ckWARN(WARN_AMBIGUOUS) && (keyword(dest, d - dest, 0) - || get_cvn_flags(dest, d - dest, 0))) + || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0))) { + SV *tmp = newSVpvn_flags( dest, d - dest, + SVs_TEMP | (UTF ? SVf_UTF8 : 0) ); if (funny == '#') funny = '@'; Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of %c{%s} resolved to %c%s", - funny, dest, funny, dest); + "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf, + funny, tmp, funny, tmp); } } } @@ -8923,18 +9048,24 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in * the parse starting at 's', based on the subset that are valid in this * context input to this routine in 'valid_flags'. Advances s. Returns - * TRUE if the input was a valid flag, so the next char may be as well; - * otherwise FALSE. 'charset' should point to a NUL upon first call on the - * current regex. This routine will set it to any charset modifier found. - * The caller shouldn't change it. This way, another charset modifier - * encountered in the parse can be detected as an error, as we have decided - * allow only one */ + * TRUE if the input should be treated as a valid flag, so the next char + * may be as well; otherwise FALSE. 'charset' should point to a NUL upon + * first call on the current regex. This routine will set it to any + * charset modifier found. The caller shouldn't change it. This way, + * another charset modifier encountered in the parse can be detected as an + * error, as we have decided to allow only one */ const char c = **s; - - if (! strchr(valid_flags, c)) { - if (isALNUM(c)) { - goto deprecate; + STRLEN charlen = UTF ? UTF8SKIP(*s) : 1; + + if ( charlen != 1 || ! strchr(valid_flags, c) ) { + if (isALNUM_lazy_if(*s, UTF)) { + yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", charlen, *s), + UTF ? SVf_UTF8 : 0); + (*s) += charlen; + /* Pretend that it worked, so will continue processing before + * dieing */ + return TRUE; } return FALSE; } @@ -8948,34 +9079,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break; case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break; case LOCALE_PAT_MOD: - - /* In 5.14, qr//lt is legal but deprecated; the 't' means they - * can't be regex modifiers. - * In 5.14, s///le is legal and ambiguous. Try to disambiguate as - * much as easily done. s///lei, for example, has to mean regex - * modifiers if it's not an error (as does any word character - * following the 'e'). Otherwise, we resolve to the backwards- - * compatible, but less likely 's/// le ...', i.e. as meaning - * less-than-or-equal. The reason it's not likely is that s// - * returns a number for code in the field (/r returns a string, but - * that wasn't added until the 5.13 series), and so '<=' should be - * used for comparing, not 'le'. */ - if (*((*s) + 1) == 't') { - goto deprecate; - } - else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) { - - /* 'e' is valid only for substitutes, s///e. If it is not - * valid in the current context, then 'm//le' must mean the - * comparison operator, so use the regular deprecation message. - */ - if (! strchr(valid_flags, 'e')) { - goto deprecate; - } - Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS), - "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way"); - return FALSE; - } if (*charset) { goto multiple_charsets; } @@ -8983,11 +9086,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse *charset = c; break; case UNICODE_PAT_MOD: - /* In 5.14, qr//unless and qr//until are legal but deprecated; the - * 'n' means they can't be regex modifiers */ - if (*((*s) + 1) == 'n') { - goto deprecate; - } if (*charset) { goto multiple_charsets; } @@ -8995,12 +9093,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse *charset = c; break; case ASCII_RESTRICT_PAT_MOD: - /* In 5.14, qr//and is legal but deprecated; the 'n' means they - * can't be regex modifiers */ - if (*((*s) + 1) == 'n') { - goto deprecate; - } - if (! *charset) { set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET); } @@ -9030,11 +9122,6 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse (*s)++; return TRUE; - deprecate: - Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), - "Having no space between pattern and following word is deprecated"); - return FALSE; - multiple_charsets: if (*charset != c) { yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c)); @@ -9056,7 +9143,7 @@ S_scan_pat(pTHX_ char *start, I32 type) { dVAR; PMOP *pm; - char *s = scan_str(start,!!PL_madskills,FALSE); + char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing); const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ @@ -9066,6 +9153,9 @@ S_scan_pat(pTHX_ char *start, I32 type) PERL_ARGS_ASSERT_SCAN_PAT; + /* this was only needed for the initial scan_str; set it to false + * so that any (?{}) code blocks etc are parsed normally */ + PL_reg_state.re_reparsing = FALSE; if (!s) { const char * const delimiter = skipspace(start); Perl_croak(aTHX_ @@ -9101,6 +9191,25 @@ S_scan_pat(pTHX_ char *start, I32 type) #ifdef PERL_MAD modstart = s; #endif + + /* if qr/...(?{..}).../, then need to parse the pattern within a new + * anon CV. False positives like qr/[(?{]/ are harmless */ + + if (type == OP_QR) { + STRLEN len; + char *e, *p = SvPV(PL_lex_stuff, len); + e = p + len; + for (; p < e; p++) { + if (p[0] == '(' && p[1] == '?' + && (p[2] == '{' || (p[2] == '?' && p[3] == '{'))) + { + pm->op_pmflags |= PMf_HAS_CV; + break; + } + } + pm->op_pmflags |= PMf_IS_QR; + } + while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {}; #ifdef PERL_MAD if (PL_madskills && modstart != s) { @@ -9137,7 +9246,7 @@ S_scan_subst(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Substitution pattern not terminated"); @@ -9155,7 +9264,7 @@ S_scan_subst(pTHX_ char *start) #endif first_start = PL_multi_start; - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9233,7 +9342,6 @@ S_scan_trans(pTHX_ char *start) dVAR; register char* s; OP *o; - short *tbl; U8 squash; U8 del; U8 complement; @@ -9246,7 +9354,7 @@ S_scan_trans(pTHX_ char *start) pl_yylval.ival = OP_NULL; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Transliteration pattern not terminated"); @@ -9262,7 +9370,7 @@ S_scan_trans(pTHX_ char *start) } #endif - s = scan_str(s,!!PL_madskills,FALSE); + s = scan_str(s,!!PL_madskills,FALSE,FALSE); if (!s) { if (PL_lex_stuff) { SvREFCNT_dec(PL_lex_stuff); @@ -9301,8 +9409,7 @@ S_scan_trans(pTHX_ char *start) } no_more: - tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short)); - o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl); + o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL); o->op_private &= ~OPpTRANS_ALL; o->op_private |= del|squash|complement| (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| @@ -9655,7 +9762,7 @@ S_scan_inputsymbol(pTHX_ char *start) if (d - PL_tokenbuf != len) { pl_yylval.ival = OP_GLOB; - s = scan_str(start,!!PL_madskills,FALSE); + s = scan_str(start,!!PL_madskills,FALSE,FALSE); if (!s) Perl_croak(aTHX_ "Glob not terminated"); return s; @@ -9755,6 +9862,8 @@ intro_sym: takes: start position in buffer keep_quoted preserve \ on the embedded delimiter(s) keep_delims preserve the delimiters around the string + re_reparse compiling a run-time /(?{})/: + collapse // to /, and skip encoding src returns: position to continue reading from buffer side-effects: multi_start, multi_close, lex_repl or lex_stuff, and updates the read buffer. @@ -9795,7 +9904,7 @@ intro_sym: */ STATIC char * -S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) +S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse) { dVAR; SV *sv; /* scalar value: string */ @@ -9839,7 +9948,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) termlen = 1; } else { - termcode = utf8_to_uvchr((U8*)s, &termlen); + termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen); Copy(s, termstr, termlen, U8); if (!UTF8_IS_INVARIANT(term)) has_utf8 = TRUE; @@ -9874,7 +9983,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } #endif for (;;) { - if (PL_encoding && !UTF) { + if (PL_encoding && !UTF && !re_reparse) { bool cont = TRUE; while (cont) { @@ -9956,9 +10065,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) CopLINE_inc(PL_curcop); /* handle quoted delimiters */ if (*s == '\\' && s+1 < PL_bufend && term != '\\') { - if (!keep_quoted && s[1] == term) + if (!keep_quoted + && (s[1] == term + || (re_reparse && s[1] == '\\')) + ) s++; - /* any other quotes are simply copied straight through */ + /* any other quotes are simply copied straight through */ else *to++ = *s++; } @@ -10059,7 +10171,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) /* at this point, we have successfully read the delimited string */ - if (!PL_encoding || UTF) { + if (!PL_encoding || UTF || re_reparse) { #ifdef PERL_MAD if (PL_madskills) { char * const tstart = SvPVX(PL_linestr) + stuffstart; @@ -10091,7 +10203,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims) } } #endif - if (has_utf8 || PL_encoding) + if (has_utf8 || (PL_encoding && !re_reparse)) SvUTF8_on(sv); PL_multi_end = CopLINE(PL_curcop); @@ -10154,7 +10266,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp) switch (*s) { default: - Perl_croak(aTHX_ "panic: scan_num"); + Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s); /* if it starts with a 0, it could be an octal number, a decimal in 0.13 disguise, or a hexadecimal number, or a binary number. */ @@ -10669,14 +10781,14 @@ Perl_start_subparse(pTHX_ I32 is_format, U32 flags) #pragma segment Perl_yylex #endif static int -S_yywarn(pTHX_ const char *const s) +S_yywarn(pTHX_ const char *const s, U32 flags) { dVAR; PERL_ARGS_ASSERT_YYWARN; PL_in_eval |= EVAL_WARNONLY; - yyerror(s); + yyerror_pv(s, flags); PL_in_eval &= ~EVAL_WARNONLY; return 0; } @@ -10684,17 +10796,32 @@ S_yywarn(pTHX_ const char *const s) int Perl_yyerror(pTHX_ const char *const s) { + PERL_ARGS_ASSERT_YYERROR; + return yyerror_pvn(s, strlen(s), 0); +} + +int +Perl_yyerror_pv(pTHX_ const char *const s, U32 flags) +{ + PERL_ARGS_ASSERT_YYERROR_PV; + return yyerror_pvn(s, strlen(s), flags); +} + +int +Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags) +{ dVAR; - const char *where = NULL; const char *context = NULL; int contlen = -1; SV *msg; + SV * const where_sv = newSVpvs_flags("", SVs_TEMP); int yychar = PL_parser->yychar; + U32 is_utf8 = flags & SVf_UTF8; - PERL_ARGS_ASSERT_YYERROR; + PERL_ARGS_ASSERT_YYERROR_PVN; if (!yychar || (yychar == ';' && !PL_rsfp)) - where = "at EOF"; + sv_catpvs(where_sv, "at EOF"); else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) { @@ -10729,18 +10856,18 @@ Perl_yyerror(pTHX_ const char *const s) contlen = PL_bufptr - PL_oldbufptr; } else if (yychar > 255) - where = "next token ???"; + sv_catpvs(where_sv, "next token ???"); else if (yychar == -2) { /* YYEMPTY */ if (PL_lex_state == LEX_NORMAL || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL)) - where = "at end of line"; + sv_catpvs(where_sv, "at end of line"); else if (PL_lex_inpat) - where = "within pattern"; + sv_catpvs(where_sv, "within pattern"); else - where = "within string"; + sv_catpvs(where_sv, "within string"); } else { - SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP); + sv_catpvs(where_sv, "next char "); if (yychar < 32) Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar)); else if (isPRINT_LC(yychar)) { @@ -10749,15 +10876,16 @@ Perl_yyerror(pTHX_ const char *const s) } else Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255); - where = SvPVX_const(where_sv); } - msg = sv_2mortal(newSVpv(s, 0)); + msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8)); Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ", OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop)); if (context) - Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context); + Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n", + SVfARG(newSVpvn_flags(context, contlen, + SVs_TEMP | (UTF ? SVf_UTF8 : 0)))); else - Perl_sv_catpvf(aTHX_ msg, "%s\n", where); + Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv)); if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) { Perl_sv_catpvf(aTHX_ msg, " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n", @@ -10798,6 +10926,7 @@ S_swallow_bom(pTHX_ U8 *s) if (s[1] == 0xFE) { /* UTF-16 little-endian? (or UTF-32LE?) */ if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */ + /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE"); #ifndef PERL_NO_UTF16_FILTER if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n"); @@ -10806,6 +10935,7 @@ S_swallow_bom(pTHX_ U8 *s) s = add_utf16_textfilter(s, TRUE); } #else + /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); #endif } @@ -10819,6 +10949,7 @@ S_swallow_bom(pTHX_ U8 *s) s = add_utf16_textfilter(s, FALSE); } #else + /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); #endif } @@ -10834,6 +10965,7 @@ S_swallow_bom(pTHX_ U8 *s) if (s[1] == 0) { if (s[2] == 0xFE && s[3] == 0xFF) { /* UTF-32 big-endian */ + /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE"); } } @@ -10845,6 +10977,7 @@ S_swallow_bom(pTHX_ U8 *s) if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n"); s = add_utf16_textfilter(s, FALSE); #else + /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE"); #endif } @@ -10867,6 +11000,7 @@ S_swallow_bom(pTHX_ U8 *s) if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n"); s = add_utf16_textfilter(s, TRUE); #else + /* diag_listed_as: Unsupported script encoding %s */ Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE"); #endif } @@ -11083,6 +11217,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv) rev += (*end - '0') * mult; mult *= 10; if (orev > rev) + /* diag_listed_as: Integer overflow in %s number */ Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW), "Integer overflow in decimal number"); } @@ -11401,15 +11536,10 @@ Perl_parse_label(pTHX_ U32 flags) if (PL_lex_state == LEX_KNOWNEXT) { PL_parser->yychar = yylex(); if (PL_parser->yychar == LABEL) { - char *lpv = pl_yylval.pval; - STRLEN llen = strlen(lpv); SV *lsv; PL_parser->yychar = YYEMPTY; lsv = newSV_type(SVt_PV); - SvPV_set(lsv, lpv); - SvCUR_set(lsv, llen); - SvLEN_set(lsv, llen+1); - SvPOK_on(lsv); + sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv); return lsv; } else { yyunlex(); @@ -11417,17 +11547,12 @@ Perl_parse_label(pTHX_ U32 flags) } } else { char *s, *t; - U8 c; STRLEN wlen, bufptr_pos; lex_read_space(0); t = s = PL_bufptr; - c = (U8)*s; - if (!isIDFIRST_A(c)) + if (!isIDFIRST_lazy_if(s, UTF)) goto no_label; - do { - c = (U8)*++t; - } while(isWORDCHAR_A(c)); - wlen = t - s; + t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen); if (word_takes_any_delimeter(s, wlen)) goto no_label; bufptr_pos = s - SvPVX(PL_linestr); @@ -11439,7 +11564,7 @@ Perl_parse_label(pTHX_ U32 flags) PL_oldoldbufptr = PL_oldbufptr; PL_oldbufptr = s; PL_bufptr = t+1; - return newSVpvn(s, wlen); + return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0); } else { PL_bufptr = s; no_label: @@ -11532,29 +11657,12 @@ Perl_parse_stmtseq(pTHX_ U32 flags) return stmtseqop; } -void -Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist) -{ - PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST; - deprecate("qw(...) as parentheses"); - force_next((4<<24)|')'); - if (qwlist->op_type == OP_STUB) { - op_free(qwlist); - } - else { - start_force(PL_curforce); - NEXTVAL_NEXTTOKE.opval = qwlist; - force_next(THING); - } - force_next((2<<24)|'('); -} - /* * Local variables: * c-indentation-style: bsd * c-basic-offset: 4 - * indent-tabs-mode: t + * indent-tabs-mode: nil * End: * - * ex: set ts=8 sts=4 sw=4 noet: + * ex: set ts=8 sts=4 sw=4 et: */