3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_pending_ident (PL_parser->pending_ident)
70 #define PL_preambled (PL_parser->preambled)
71 #define PL_sublex_info (PL_parser->sublex_info)
72 #define PL_linestr (PL_parser->linestr)
73 #define PL_expect (PL_parser->expect)
74 #define PL_copline (PL_parser->copline)
75 #define PL_bufptr (PL_parser->bufptr)
76 #define PL_oldbufptr (PL_parser->oldbufptr)
77 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78 #define PL_linestart (PL_parser->linestart)
79 #define PL_bufend (PL_parser->bufend)
80 #define PL_last_uni (PL_parser->last_uni)
81 #define PL_last_lop (PL_parser->last_lop)
82 #define PL_last_lop_op (PL_parser->last_lop_op)
83 #define PL_lex_state (PL_parser->lex_state)
84 #define PL_rsfp (PL_parser->rsfp)
85 #define PL_rsfp_filters (PL_parser->rsfp_filters)
86 #define PL_in_my (PL_parser->in_my)
87 #define PL_in_my_stash (PL_parser->in_my_stash)
88 #define PL_tokenbuf (PL_parser->tokenbuf)
89 #define PL_multi_end (PL_parser->multi_end)
90 #define PL_error_count (PL_parser->error_count)
93 # define PL_endwhite (PL_parser->endwhite)
94 # define PL_faketokens (PL_parser->faketokens)
95 # define PL_lasttoke (PL_parser->lasttoke)
96 # define PL_nextwhite (PL_parser->nextwhite)
97 # define PL_realtokenstart (PL_parser->realtokenstart)
98 # define PL_skipwhite (PL_parser->skipwhite)
99 # define PL_thisclose (PL_parser->thisclose)
100 # define PL_thismad (PL_parser->thismad)
101 # define PL_thisopen (PL_parser->thisopen)
102 # define PL_thisstuff (PL_parser->thisstuff)
103 # define PL_thistoken (PL_parser->thistoken)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_thiswhite (PL_parser->thiswhite)
106 # define PL_nexttoke (PL_parser->nexttoke)
107 # define PL_curforce (PL_parser->curforce)
109 # define PL_nexttoke (PL_parser->nexttoke)
110 # define PL_nexttype (PL_parser->nexttype)
111 # define PL_nextval (PL_parser->nextval)
114 /* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
117 S_pending_ident(pTHX);
119 static const char ident_too_long[] = "Identifier too long";
122 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
123 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
125 # define CURMAD(slot,sv)
126 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
129 #define XENUMMASK 0x3f
130 #define XFAKEEOF 0x40
131 #define XFAKEBRACK 0x80
133 #ifdef USE_UTF8_SCRIPTS
134 # define UTF (!IN_BYTES)
136 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
139 /* The maximum number of characters preceding the unrecognized one to display */
140 #define UNRECOGNIZED_PRECEDE_COUNT 10
142 /* In variables named $^X, these are the legal values for X.
143 * 1999-02-27 mjd-perl-patch@plover.com */
144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
148 /* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
150 * can get by with a single comparison (if the compiler is smart enough).
152 * These values refer to the various states within a sublex parse,
153 * i.e. within a double quotish string
156 /* #define LEX_NOTPARSING 11 is done in perl.h. */
158 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
159 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
160 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
161 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
162 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
164 /* at end of code, eg "$x" followed by: */
165 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
166 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
168 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
169 string or after \E, $foo, etc */
170 #define LEX_INTERPCONST 2 /* NOT USED */
171 #define LEX_FORMLINE 1 /* expecting a format line */
172 #define LEX_KNOWNEXT 0 /* next token known; just return it */
176 static const char* const lex_state_names[] = {
195 #include "keywords.h"
197 /* CLINE is a macro that ensures PL_copline has a sane value */
202 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
205 # define SKIPSPACE0(s) skipspace0(s)
206 # define SKIPSPACE1(s) skipspace1(s)
207 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
208 # define PEEKSPACE(s) skipspace2(s,0)
210 # define SKIPSPACE0(s) skipspace(s)
211 # define SKIPSPACE1(s) skipspace(s)
212 # define SKIPSPACE2(s,tsv) skipspace(s)
213 # define PEEKSPACE(s) skipspace(s)
217 * Convenience functions to return different tokens and prime the
218 * lexer for the next token. They all take an argument.
220 * TOKEN : generic token (used for '(', DOLSHARP, etc)
221 * OPERATOR : generic operator
222 * AOPERATOR : assignment operator
223 * PREBLOCK : beginning the block after an if, while, foreach, ...
224 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
225 * PREREF : *EXPR where EXPR is not a simple identifier
226 * TERM : expression term
227 * LOOPX : loop exiting command (goto, last, dump, etc)
228 * FTST : file test operator
229 * FUN0 : zero-argument function
230 * FUN0OP : zero-argument function, with its op created in this file
231 * FUN1 : not used, except for not, which isn't a UNIOP
232 * BOop : bitwise or or xor
234 * SHop : shift operator
235 * PWop : power operator
236 * PMop : pattern-matching operator
237 * Aop : addition-level operator
238 * Mop : multiplication-level operator
239 * Eop : equality-testing operator
240 * Rop : relational operator <= != gt
242 * Also see LOP and lop() below.
245 #ifdef DEBUGGING /* Serve -DT. */
246 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
248 # define REPORT(retval) (retval)
251 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
252 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
253 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
254 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
255 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
256 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
257 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
258 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
259 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
260 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
261 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
262 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
263 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
264 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
265 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
266 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
267 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
268 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
269 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
270 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
271 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
273 /* This bit of chicanery makes a unary function followed by
274 * a parenthesis into a function with one argument, highest precedence.
275 * The UNIDOR macro is for unary functions that can be followed by the //
276 * operator (such as C<shift // 0>).
278 #define UNI3(f,x,have_x) { \
279 pl_yylval.ival = f; \
280 if (have_x) PL_expect = x; \
282 PL_last_uni = PL_oldbufptr; \
283 PL_last_lop_op = f; \
285 return REPORT( (int)FUNC1 ); \
287 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
289 #define UNI(f) UNI3(f,XTERM,1)
290 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
291 #define UNIPROTO(f,optional) { \
292 if (optional) PL_last_uni = PL_oldbufptr; \
296 #define UNIBRACK(f) UNI3(f,0,0)
298 /* grandfather return to old style */
301 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
302 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
303 pl_yylval.ival = (f); \
311 /* how to interpret the pl_yylval associated with the token */
315 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
320 static struct debug_tokens {
322 enum token_type type;
324 } const debug_tokens[] =
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
347 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
348 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
373 { PEG, TOKENTYPE_NONE, "PEG" },
374 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
375 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
376 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
377 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
378 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
379 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
380 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
381 { PREINC, TOKENTYPE_NONE, "PREINC" },
382 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
383 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
384 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
385 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
386 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
387 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
388 { SUB, TOKENTYPE_NONE, "SUB" },
389 { THING, TOKENTYPE_OPVAL, "THING" },
390 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
391 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
392 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
393 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
394 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
395 { USE, TOKENTYPE_IVAL, "USE" },
396 { WHEN, TOKENTYPE_IVAL, "WHEN" },
397 { WHILE, TOKENTYPE_IVAL, "WHILE" },
398 { WORD, TOKENTYPE_OPVAL, "WORD" },
399 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
400 { 0, TOKENTYPE_NONE, NULL }
403 /* dump the returned token in rv, plus any optional arg in pl_yylval */
406 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
410 PERL_ARGS_ASSERT_TOKEREPORT;
413 const char *name = NULL;
414 enum token_type type = TOKENTYPE_NONE;
415 const struct debug_tokens *p;
416 SV* const report = newSVpvs("<== ");
418 for (p = debug_tokens; p->token; p++) {
419 if (p->token == (int)rv) {
426 Perl_sv_catpv(aTHX_ report, name);
427 else if ((char)rv > ' ' && (char)rv < '~')
428 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
430 sv_catpvs(report, "EOF");
432 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
437 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
439 case TOKENTYPE_OPNUM:
440 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
441 PL_op_name[lvalp->ival]);
444 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
446 case TOKENTYPE_OPVAL:
448 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
449 PL_op_name[lvalp->opval->op_type]);
450 if (lvalp->opval->op_type == OP_CONST) {
451 Perl_sv_catpvf(aTHX_ report, " %s",
452 SvPEEK(cSVOPx_sv(lvalp->opval)));
457 sv_catpvs(report, "(opval=null)");
460 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
466 /* print the buffer with suitable escapes */
469 S_printbuf(pTHX_ const char *const fmt, const char *const s)
471 SV* const tmp = newSVpvs("");
473 PERL_ARGS_ASSERT_PRINTBUF;
475 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
482 S_deprecate_commaless_var_list(pTHX) {
484 deprecate("comma-less variable list");
485 return REPORT(','); /* grandfather non-comma-format format */
491 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
492 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
496 S_ao(pTHX_ int toketype)
499 if (*PL_bufptr == '=') {
501 if (toketype == ANDAND)
502 pl_yylval.ival = OP_ANDASSIGN;
503 else if (toketype == OROR)
504 pl_yylval.ival = OP_ORASSIGN;
505 else if (toketype == DORDOR)
506 pl_yylval.ival = OP_DORASSIGN;
514 * When Perl expects an operator and finds something else, no_op
515 * prints the warning. It always prints "<something> found where
516 * operator expected. It prints "Missing semicolon on previous line?"
517 * if the surprise occurs at the start of the line. "do you need to
518 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
519 * where the compiler doesn't know if foo is a method call or a function.
520 * It prints "Missing operator before end of line" if there's nothing
521 * after the missing operator, or "... before <...>" if there is something
522 * after the missing operator.
526 S_no_op(pTHX_ const char *const what, char *s)
529 char * const oldbp = PL_bufptr;
530 const bool is_first = (PL_oldbufptr == PL_linestart);
532 PERL_ARGS_ASSERT_NO_OP;
538 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
539 if (ckWARN_d(WARN_SYNTAX)) {
541 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
542 "\t(Missing semicolon on previous line?)\n");
543 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
545 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
546 t += UTF ? UTF8SKIP(t) : 1)
548 if (t < PL_bufptr && isSPACE(*t))
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
550 "\t(Do you need to predeclare %"SVf"?)\n",
551 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
552 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
556 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
557 "\t(Missing operator before %"SVf"?)\n",
558 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
559 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
567 * Complain about missing quote/regexp/heredoc terminator.
568 * If it's called with NULL then it cauterizes the line buffer.
569 * If we're in a delimited string and the delimiter is a control
570 * character, it's reformatted into a two-char sequence like ^C.
575 S_missingterm(pTHX_ char *s)
581 char * const nl = strrchr(s,'\n');
585 else if (isCNTRL(PL_multi_close)) {
587 tmpbuf[1] = (char)toCTRL(PL_multi_close);
592 *tmpbuf = (char)PL_multi_close;
596 q = strchr(s,'"') ? '\'' : '"';
597 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
603 * Check whether the named feature is enabled.
606 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
609 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
611 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
613 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
615 if (namelen > MAX_FEATURE_LEN)
617 memcpy(&he_name[8], name, namelen);
619 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
620 REFCOUNTED_HE_EXISTS));
624 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
625 * utf16-to-utf8-reversed.
628 #ifdef PERL_CR_FILTER
632 const char *s = SvPVX_const(sv);
633 const char * const e = s + SvCUR(sv);
635 PERL_ARGS_ASSERT_STRIP_RETURN;
637 /* outer loop optimized to do nothing if there are no CR-LFs */
639 if (*s++ == '\r' && *s == '\n') {
640 /* hit a CR-LF, need to copy the rest */
644 if (*s == '\r' && s[1] == '\n')
655 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
657 const I32 count = FILTER_READ(idx+1, sv, maxlen);
658 if (count > 0 && !maxlen)
665 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
667 Creates and initialises a new lexer/parser state object, supplying
668 a context in which to lex and parse from a new source of Perl code.
669 A pointer to the new state object is placed in L</PL_parser>. An entry
670 is made on the save stack so that upon unwinding the new state object
671 will be destroyed and the former value of L</PL_parser> will be restored.
672 Nothing else need be done to clean up the parsing context.
674 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
675 non-null, provides a string (in SV form) containing code to be parsed.
676 A copy of the string is made, so subsequent modification of I<line>
677 does not affect parsing. I<rsfp>, if non-null, provides an input stream
678 from which code will be read to be parsed. If both are non-null, the
679 code in I<line> comes first and must consist of complete lines of input,
680 and I<rsfp> supplies the remainder of the source.
682 The I<flags> parameter is reserved for future use. Currently it is only
683 used by perl internally, so extensions should always pass zero.
688 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
689 can share filters with the current parser.
690 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
691 caller, hence isn't owned by the parser, so shouldn't be closed on parser
692 destruction. This is used to handle the case of defaulting to reading the
693 script from the standard input because no filename was given on the command
694 line (without getting confused by situation where STDIN has been closed, so
695 the script handle is opened on fd 0) */
698 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
701 const char *s = NULL;
702 yy_parser *parser, *oparser;
703 if (flags && flags & ~LEX_START_FLAGS)
704 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
706 /* create and initialise a parser */
708 Newxz(parser, 1, yy_parser);
709 parser->old_parser = oparser = PL_parser;
712 parser->stack = NULL;
714 parser->stack_size = 0;
716 /* on scope exit, free this parser and restore any outer one */
718 parser->saved_curcop = PL_curcop;
720 /* initialise lexer state */
723 parser->curforce = -1;
725 parser->nexttoke = 0;
727 parser->error_count = oparser ? oparser->error_count : 0;
728 parser->copline = NOLINE;
729 parser->lex_state = LEX_NORMAL;
730 parser->expect = XSTATE;
732 parser->rsfp_filters =
733 !(flags & LEX_START_SAME_FILTER) || !oparser
735 : MUTABLE_AV(SvREFCNT_inc(
736 oparser->rsfp_filters
737 ? oparser->rsfp_filters
738 : (oparser->rsfp_filters = newAV())
741 Newx(parser->lex_brackstack, 120, char);
742 Newx(parser->lex_casestack, 12, char);
743 *parser->lex_casestack = '\0';
747 s = SvPV_const(line, len);
748 parser->linestr = flags & LEX_START_COPIED
749 ? SvREFCNT_inc_simple_NN(line)
750 : newSVpvn_flags(s, len, SvUTF8(line));
751 if (!len || s[len-1] != ';')
752 sv_catpvs(parser->linestr, "\n;");
754 parser->linestr = newSVpvs("\n;");
756 parser->oldoldbufptr =
759 parser->linestart = SvPVX(parser->linestr);
760 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
761 parser->last_lop = parser->last_uni = NULL;
762 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
763 |LEX_DONT_CLOSE_RSFP);
765 parser->in_pod = parser->filtered = 0;
769 /* delete a parser object */
772 Perl_parser_free(pTHX_ const yy_parser *parser)
774 PERL_ARGS_ASSERT_PARSER_FREE;
776 PL_curcop = parser->saved_curcop;
777 SvREFCNT_dec(parser->linestr);
779 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
780 PerlIO_clearerr(parser->rsfp);
781 else if (parser->rsfp && (!parser->old_parser ||
782 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
783 PerlIO_close(parser->rsfp);
784 SvREFCNT_dec(parser->rsfp_filters);
786 Safefree(parser->lex_brackstack);
787 Safefree(parser->lex_casestack);
788 PL_parser = parser->old_parser;
794 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
796 Buffer scalar containing the chunk currently under consideration of the
797 text currently being lexed. This is always a plain string scalar (for
798 which C<SvPOK> is true). It is not intended to be used as a scalar by
799 normal scalar means; instead refer to the buffer directly by the pointer
800 variables described below.
802 The lexer maintains various C<char*> pointers to things in the
803 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
804 reallocated, all of these pointers must be updated. Don't attempt to
805 do this manually, but rather use L</lex_grow_linestr> if you need to
806 reallocate the buffer.
808 The content of the text chunk in the buffer is commonly exactly one
809 complete line of input, up to and including a newline terminator,
810 but there are situations where it is otherwise. The octets of the
811 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
812 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
813 flag on this scalar, which may disagree with it.
815 For direct examination of the buffer, the variable
816 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
817 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
818 of these pointers is usually preferable to examination of the scalar
819 through normal scalar means.
821 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
823 Direct pointer to the end of the chunk of text currently being lexed, the
824 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
825 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
826 always located at the end of the buffer, and does not count as part of
827 the buffer's contents.
829 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
831 Points to the current position of lexing inside the lexer buffer.
832 Characters around this point may be freely examined, within
833 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
834 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
835 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
837 Lexing code (whether in the Perl core or not) moves this pointer past
838 the characters that it consumes. It is also expected to perform some
839 bookkeeping whenever a newline character is consumed. This movement
840 can be more conveniently performed by the function L</lex_read_to>,
841 which handles newlines appropriately.
843 Interpretation of the buffer's octets can be abstracted out by
844 using the slightly higher-level functions L</lex_peek_unichar> and
845 L</lex_read_unichar>.
847 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
849 Points to the start of the current line inside the lexer buffer.
850 This is useful for indicating at which column an error occurred, and
851 not much else. This must be updated by any lexing code that consumes
852 a newline; the function L</lex_read_to> handles this detail.
858 =for apidoc Amx|bool|lex_bufutf8
860 Indicates whether the octets in the lexer buffer
861 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
862 of Unicode characters. If not, they should be interpreted as Latin-1
863 characters. This is analogous to the C<SvUTF8> flag for scalars.
865 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
866 contains valid UTF-8. Lexing code must be robust in the face of invalid
869 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
870 is significant, but not the whole story regarding the input character
871 encoding. Normally, when a file is being read, the scalar contains octets
872 and its C<SvUTF8> flag is off, but the octets should be interpreted as
873 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
874 however, the scalar may have the C<SvUTF8> flag on, and in this case its
875 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
876 is in effect. This logic may change in the future; use this function
877 instead of implementing the logic yourself.
883 Perl_lex_bufutf8(pTHX)
889 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
891 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
892 at least I<len> octets (including terminating NUL). Returns a
893 pointer to the reallocated buffer. This is necessary before making
894 any direct modification of the buffer that would increase its length.
895 L</lex_stuff_pvn> provides a more convenient way to insert text into
898 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
899 this function updates all of the lexer's variables that point directly
906 Perl_lex_grow_linestr(pTHX_ STRLEN len)
910 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
911 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
912 linestr = PL_parser->linestr;
913 buf = SvPVX(linestr);
914 if (len <= SvLEN(linestr))
916 bufend_pos = PL_parser->bufend - buf;
917 bufptr_pos = PL_parser->bufptr - buf;
918 oldbufptr_pos = PL_parser->oldbufptr - buf;
919 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
920 linestart_pos = PL_parser->linestart - buf;
921 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
922 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
923 re_eval_start_pos = PL_sublex_info.re_eval_start ?
924 PL_sublex_info.re_eval_start - buf : 0;
926 buf = sv_grow(linestr, len);
928 PL_parser->bufend = buf + bufend_pos;
929 PL_parser->bufptr = buf + bufptr_pos;
930 PL_parser->oldbufptr = buf + oldbufptr_pos;
931 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
932 PL_parser->linestart = buf + linestart_pos;
933 if (PL_parser->last_uni)
934 PL_parser->last_uni = buf + last_uni_pos;
935 if (PL_parser->last_lop)
936 PL_parser->last_lop = buf + last_lop_pos;
937 if (PL_sublex_info.re_eval_start)
938 PL_sublex_info.re_eval_start = buf + re_eval_start_pos;
943 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
945 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
946 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
947 reallocating the buffer if necessary. This means that lexing code that
948 runs later will see the characters as if they had appeared in the input.
949 It is not recommended to do this as part of normal parsing, and most
950 uses of this facility run the risk of the inserted characters being
951 interpreted in an unintended manner.
953 The string to be inserted is represented by I<len> octets starting
954 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
955 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
956 The characters are recoded for the lexer buffer, according to how the
957 buffer is currently being interpreted (L</lex_bufutf8>). If a string
958 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
959 function is more convenient.
965 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
969 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
970 if (flags & ~(LEX_STUFF_UTF8))
971 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
973 if (flags & LEX_STUFF_UTF8) {
977 const char *p, *e = pv+len;
978 for (p = pv; p != e; p++)
979 highhalf += !!(((U8)*p) & 0x80);
982 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
983 bufptr = PL_parser->bufptr;
984 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
985 SvCUR_set(PL_parser->linestr,
986 SvCUR(PL_parser->linestr) + len+highhalf);
987 PL_parser->bufend += len+highhalf;
988 for (p = pv; p != e; p++) {
991 *bufptr++ = (char)(0xc0 | (c >> 6));
992 *bufptr++ = (char)(0x80 | (c & 0x3f));
999 if (flags & LEX_STUFF_UTF8) {
1000 STRLEN highhalf = 0;
1001 const char *p, *e = pv+len;
1002 for (p = pv; p != e; p++) {
1005 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1006 "non-Latin-1 character into Latin-1 input");
1007 } else if (c >= 0xc2 && p+1 != e &&
1008 (((U8)p[1]) & 0xc0) == 0x80) {
1011 } else if (c >= 0x80) {
1012 /* malformed UTF-8 */
1014 SAVESPTR(PL_warnhook);
1015 PL_warnhook = PERL_WARNHOOK_FATAL;
1016 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1022 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1023 bufptr = PL_parser->bufptr;
1024 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1025 SvCUR_set(PL_parser->linestr,
1026 SvCUR(PL_parser->linestr) + len-highhalf);
1027 PL_parser->bufend += len-highhalf;
1028 for (p = pv; p != e; p++) {
1031 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1034 *bufptr++ = (char)c;
1039 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1040 bufptr = PL_parser->bufptr;
1041 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1042 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1043 PL_parser->bufend += len;
1044 Copy(pv, bufptr, len, char);
1050 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1052 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1053 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1054 reallocating the buffer if necessary. This means that lexing code that
1055 runs later will see the characters as if they had appeared in the input.
1056 It is not recommended to do this as part of normal parsing, and most
1057 uses of this facility run the risk of the inserted characters being
1058 interpreted in an unintended manner.
1060 The string to be inserted is represented by octets starting at I<pv>
1061 and continuing to the first nul. These octets are interpreted as either
1062 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1063 in I<flags>. The characters are recoded for the lexer buffer, according
1064 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1065 If it is not convenient to nul-terminate a string to be inserted, the
1066 L</lex_stuff_pvn> function is more appropriate.
1072 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1074 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1075 lex_stuff_pvn(pv, strlen(pv), flags);
1079 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1081 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1082 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1083 reallocating the buffer if necessary. This means that lexing code that
1084 runs later will see the characters as if they had appeared in the input.
1085 It is not recommended to do this as part of normal parsing, and most
1086 uses of this facility run the risk of the inserted characters being
1087 interpreted in an unintended manner.
1089 The string to be inserted is the string value of I<sv>. The characters
1090 are recoded for the lexer buffer, according to how the buffer is currently
1091 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1092 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1093 need to construct a scalar.
1099 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1107 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111 =for apidoc Amx|void|lex_unstuff|char *ptr
1113 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1114 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1115 This hides the discarded text from any lexing code that runs later,
1116 as if the text had never appeared.
1118 This is not the normal way to consume lexed text. For that, use
1125 Perl_lex_unstuff(pTHX_ char *ptr)
1129 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1130 buf = PL_parser->bufptr;
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1135 bufend = PL_parser->bufend;
1137 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138 unstuff_len = ptr - buf;
1139 Move(ptr, buf, bufend+1-ptr, char);
1140 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1141 PL_parser->bufend = bufend - unstuff_len;
1145 =for apidoc Amx|void|lex_read_to|char *ptr
1147 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1148 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1149 performing the correct bookkeeping whenever a newline character is passed.
1150 This is the normal way to consume lexed text.
1152 Interpretation of the buffer's octets can be abstracted out by
1153 using the slightly higher-level functions L</lex_peek_unichar> and
1154 L</lex_read_unichar>.
1160 Perl_lex_read_to(pTHX_ char *ptr)
1163 PERL_ARGS_ASSERT_LEX_READ_TO;
1164 s = PL_parser->bufptr;
1165 if (ptr < s || ptr > PL_parser->bufend)
1166 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1167 for (; s != ptr; s++)
1169 CopLINE_inc(PL_curcop);
1170 PL_parser->linestart = s+1;
1172 PL_parser->bufptr = ptr;
1176 =for apidoc Amx|void|lex_discard_to|char *ptr
1178 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1179 up to I<ptr>. The remaining content of the buffer will be moved, and
1180 all pointers into the buffer updated appropriately. I<ptr> must not
1181 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1182 it is not permitted to discard text that has yet to be lexed.
1184 Normally it is not necessarily to do this directly, because it suffices to
1185 use the implicit discarding behaviour of L</lex_next_chunk> and things
1186 based on it. However, if a token stretches across multiple lines,
1187 and the lexing code has kept multiple lines of text in the buffer for
1188 that purpose, then after completion of the token it would be wise to
1189 explicitly discard the now-unneeded earlier lines, to avoid future
1190 multi-line tokens growing the buffer without bound.
1196 Perl_lex_discard_to(pTHX_ char *ptr)
1200 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1201 buf = SvPVX(PL_parser->linestr);
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1206 if (ptr > PL_parser->bufptr)
1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1208 discard_len = ptr - buf;
1209 if (PL_parser->oldbufptr < ptr)
1210 PL_parser->oldbufptr = ptr;
1211 if (PL_parser->oldoldbufptr < ptr)
1212 PL_parser->oldoldbufptr = ptr;
1213 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1214 PL_parser->last_uni = NULL;
1215 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1216 PL_parser->last_lop = NULL;
1217 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1218 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1219 PL_parser->bufend -= discard_len;
1220 PL_parser->bufptr -= discard_len;
1221 PL_parser->oldbufptr -= discard_len;
1222 PL_parser->oldoldbufptr -= discard_len;
1223 if (PL_parser->last_uni)
1224 PL_parser->last_uni -= discard_len;
1225 if (PL_parser->last_lop)
1226 PL_parser->last_lop -= discard_len;
1230 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1232 Reads in the next chunk of text to be lexed, appending it to
1233 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1234 looked to the end of the current chunk and wants to know more. It is
1235 usual, but not necessary, for lexing to have consumed the entirety of
1236 the current chunk at this time.
1238 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1239 chunk (i.e., the current chunk has been entirely consumed), normally the
1240 current chunk will be discarded at the same time that the new chunk is
1241 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1242 will not be discarded. If the current chunk has not been entirely
1243 consumed, then it will not be discarded regardless of the flag.
1245 Returns true if some new text was added to the buffer, or false if the
1246 buffer has reached the end of the input text.
1251 #define LEX_FAKE_EOF 0x80000000
1252 #define LEX_NO_TERM 0x40000000
1255 Perl_lex_next_chunk(pTHX_ U32 flags)
1259 STRLEN old_bufend_pos, new_bufend_pos;
1260 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1261 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1262 bool got_some_for_debugger = 0;
1264 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1265 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1266 linestr = PL_parser->linestr;
1267 buf = SvPVX(linestr);
1268 if (!(flags & LEX_KEEP_PREVIOUS) &&
1269 PL_parser->bufptr == PL_parser->bufend) {
1270 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1272 if (PL_parser->last_uni != PL_parser->bufend)
1273 PL_parser->last_uni = NULL;
1274 if (PL_parser->last_lop != PL_parser->bufend)
1275 PL_parser->last_lop = NULL;
1276 last_uni_pos = last_lop_pos = 0;
1280 old_bufend_pos = PL_parser->bufend - buf;
1281 bufptr_pos = PL_parser->bufptr - buf;
1282 oldbufptr_pos = PL_parser->oldbufptr - buf;
1283 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1284 linestart_pos = PL_parser->linestart - buf;
1285 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1286 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1288 if (flags & LEX_FAKE_EOF) {
1290 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1292 } else if (filter_gets(linestr, old_bufend_pos)) {
1294 got_some_for_debugger = 1;
1295 } else if (flags & LEX_NO_TERM) {
1298 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1299 sv_setpvs(linestr, "");
1301 /* End of real input. Close filehandle (unless it was STDIN),
1302 * then add implicit termination.
1304 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1305 PerlIO_clearerr(PL_parser->rsfp);
1306 else if (PL_parser->rsfp)
1307 (void)PerlIO_close(PL_parser->rsfp);
1308 PL_parser->rsfp = NULL;
1309 PL_parser->in_pod = PL_parser->filtered = 0;
1311 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1314 if (!PL_in_eval && PL_minus_p) {
1316 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1317 PL_minus_n = PL_minus_p = 0;
1318 } else if (!PL_in_eval && PL_minus_n) {
1319 sv_catpvs(linestr, /*{*/";}");
1322 sv_catpvs(linestr, ";");
1325 buf = SvPVX(linestr);
1326 new_bufend_pos = SvCUR(linestr);
1327 PL_parser->bufend = buf + new_bufend_pos;
1328 PL_parser->bufptr = buf + bufptr_pos;
1329 PL_parser->oldbufptr = buf + oldbufptr_pos;
1330 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1331 PL_parser->linestart = buf + linestart_pos;
1332 if (PL_parser->last_uni)
1333 PL_parser->last_uni = buf + last_uni_pos;
1334 if (PL_parser->last_lop)
1335 PL_parser->last_lop = buf + last_lop_pos;
1336 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1337 PL_curstash != PL_debstash) {
1338 /* debugger active and we're not compiling the debugger code,
1339 * so store the line into the debugger's array of lines
1341 update_debugger_info(NULL, buf+old_bufend_pos,
1342 new_bufend_pos-old_bufend_pos);
1348 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1350 Looks ahead one (Unicode) character in the text currently being lexed.
1351 Returns the codepoint (unsigned integer value) of the next character,
1352 or -1 if lexing has reached the end of the input text. To consume the
1353 peeked character, use L</lex_read_unichar>.
1355 If the next character is in (or extends into) the next chunk of input
1356 text, the next chunk will be read in. Normally the current chunk will be
1357 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1358 then the current chunk will not be discarded.
1360 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1361 is encountered, an exception is generated.
1367 Perl_lex_peek_unichar(pTHX_ U32 flags)
1371 if (flags & ~(LEX_KEEP_PREVIOUS))
1372 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1373 s = PL_parser->bufptr;
1374 bufend = PL_parser->bufend;
1380 if (!lex_next_chunk(flags))
1382 s = PL_parser->bufptr;
1383 bufend = PL_parser->bufend;
1389 len = PL_utf8skip[head];
1390 while ((STRLEN)(bufend-s) < len) {
1391 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1393 s = PL_parser->bufptr;
1394 bufend = PL_parser->bufend;
1397 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1398 if (retlen == (STRLEN)-1) {
1399 /* malformed UTF-8 */
1401 SAVESPTR(PL_warnhook);
1402 PL_warnhook = PERL_WARNHOOK_FATAL;
1403 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1409 if (!lex_next_chunk(flags))
1411 s = PL_parser->bufptr;
1418 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1420 Reads the next (Unicode) character in the text currently being lexed.
1421 Returns the codepoint (unsigned integer value) of the character read,
1422 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1423 if lexing has reached the end of the input text. To non-destructively
1424 examine the next character, use L</lex_peek_unichar> instead.
1426 If the next character is in (or extends into) the next chunk of input
1427 text, the next chunk will be read in. Normally the current chunk will be
1428 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1429 then the current chunk will not be discarded.
1431 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1432 is encountered, an exception is generated.
1438 Perl_lex_read_unichar(pTHX_ U32 flags)
1441 if (flags & ~(LEX_KEEP_PREVIOUS))
1442 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1443 c = lex_peek_unichar(flags);
1446 CopLINE_inc(PL_curcop);
1448 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1450 ++(PL_parser->bufptr);
1456 =for apidoc Amx|void|lex_read_space|U32 flags
1458 Reads optional spaces, in Perl style, in the text currently being
1459 lexed. The spaces may include ordinary whitespace characters and
1460 Perl-style comments. C<#line> directives are processed if encountered.
1461 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1462 at a non-space character (or the end of the input text).
1464 If spaces extend into the next chunk of input text, the next chunk will
1465 be read in. Normally the current chunk will be discarded at the same
1466 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1467 chunk will not be discarded.
1472 #define LEX_NO_NEXT_CHUNK 0x80000000
1475 Perl_lex_read_space(pTHX_ U32 flags)
1478 bool need_incline = 0;
1479 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1480 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1483 sv_free(PL_skipwhite);
1484 PL_skipwhite = NULL;
1487 PL_skipwhite = newSVpvs("");
1488 #endif /* PERL_MAD */
1489 s = PL_parser->bufptr;
1490 bufend = PL_parser->bufend;
1496 } while (!(c == '\n' || (c == 0 && s == bufend)));
1497 } else if (c == '\n') {
1499 PL_parser->linestart = s;
1504 } else if (isSPACE(c)) {
1506 } else if (c == 0 && s == bufend) {
1510 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1511 #endif /* PERL_MAD */
1512 if (flags & LEX_NO_NEXT_CHUNK)
1514 PL_parser->bufptr = s;
1515 CopLINE_inc(PL_curcop);
1516 got_more = lex_next_chunk(flags);
1517 CopLINE_dec(PL_curcop);
1518 s = PL_parser->bufptr;
1519 bufend = PL_parser->bufend;
1522 if (need_incline && PL_parser->rsfp) {
1532 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1533 #endif /* PERL_MAD */
1534 PL_parser->bufptr = s;
1539 * This subroutine has nothing to do with tilting, whether at windmills
1540 * or pinball tables. Its name is short for "increment line". It
1541 * increments the current line number in CopLINE(PL_curcop) and checks
1542 * to see whether the line starts with a comment of the form
1543 * # line 500 "foo.pm"
1544 * If so, it sets the current line number and file to the values in the comment.
1548 S_incline(pTHX_ const char *s)
1556 PERL_ARGS_ASSERT_INCLINE;
1558 CopLINE_inc(PL_curcop);
1561 while (SPACE_OR_TAB(*s))
1563 if (strnEQ(s, "line", 4))
1567 if (SPACE_OR_TAB(*s))
1571 while (SPACE_OR_TAB(*s))
1579 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1581 while (SPACE_OR_TAB(*s))
1583 if (*s == '"' && (t = strchr(s+1, '"'))) {
1589 while (!isSPACE(*t))
1593 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1595 if (*e != '\n' && *e != '\0')
1596 return; /* false alarm */
1598 line_num = atoi(n)-1;
1601 const STRLEN len = t - s;
1602 SV *const temp_sv = CopFILESV(PL_curcop);
1607 cf = SvPVX(temp_sv);
1608 tmplen = SvCUR(temp_sv);
1614 if (!PL_rsfp && !PL_parser->filtered) {
1615 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1616 * to *{"::_<newfilename"} */
1617 /* However, the long form of evals is only turned on by the
1618 debugger - usually they're "(eval %lu)" */
1622 STRLEN tmplen2 = len;
1623 if (tmplen + 2 <= sizeof smallbuf)
1626 Newx(tmpbuf, tmplen + 2, char);
1629 memcpy(tmpbuf + 2, cf, tmplen);
1631 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1636 if (tmplen2 + 2 <= sizeof smallbuf)
1639 Newx(tmpbuf2, tmplen2 + 2, char);
1641 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1642 /* Either they malloc'd it, or we malloc'd it,
1643 so no prefix is present in ours. */
1648 memcpy(tmpbuf2 + 2, s, tmplen2);
1651 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1653 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1654 /* adjust ${"::_<newfilename"} to store the new file name */
1655 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1656 /* The line number may differ. If that is the case,
1657 alias the saved lines that are in the array.
1658 Otherwise alias the whole array. */
1659 if (CopLINE(PL_curcop) == line_num) {
1660 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1661 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1663 else if (GvAV(*gvp)) {
1664 AV * const av = GvAV(*gvp);
1665 const I32 start = CopLINE(PL_curcop)+1;
1666 I32 items = AvFILLp(av) - start;
1668 AV * const av2 = GvAVn(gv2);
1669 SV **svp = AvARRAY(av) + start;
1670 I32 l = (I32)line_num+1;
1672 av_store(av2, l++, SvREFCNT_inc(*svp++));
1677 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1679 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1681 CopFILE_free(PL_curcop);
1682 CopFILE_setn(PL_curcop, s, len);
1684 CopLINE_set(PL_curcop, line_num);
1688 /* skip space before PL_thistoken */
1691 S_skipspace0(pTHX_ register char *s)
1693 PERL_ARGS_ASSERT_SKIPSPACE0;
1700 PL_thiswhite = newSVpvs("");
1701 sv_catsv(PL_thiswhite, PL_skipwhite);
1702 sv_free(PL_skipwhite);
1705 PL_realtokenstart = s - SvPVX(PL_linestr);
1709 /* skip space after PL_thistoken */
1712 S_skipspace1(pTHX_ register char *s)
1714 const char *start = s;
1715 I32 startoff = start - SvPVX(PL_linestr);
1717 PERL_ARGS_ASSERT_SKIPSPACE1;
1722 start = SvPVX(PL_linestr) + startoff;
1723 if (!PL_thistoken && PL_realtokenstart >= 0) {
1724 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1725 PL_thistoken = newSVpvn(tstart, start - tstart);
1727 PL_realtokenstart = -1;
1730 PL_nextwhite = newSVpvs("");
1731 sv_catsv(PL_nextwhite, PL_skipwhite);
1732 sv_free(PL_skipwhite);
1739 S_skipspace2(pTHX_ register char *s, SV **svp)
1742 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1743 const I32 startoff = s - SvPVX(PL_linestr);
1745 PERL_ARGS_ASSERT_SKIPSPACE2;
1748 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1749 if (!PL_madskills || !svp)
1751 start = SvPVX(PL_linestr) + startoff;
1752 if (!PL_thistoken && PL_realtokenstart >= 0) {
1753 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1754 PL_thistoken = newSVpvn(tstart, start - tstart);
1755 PL_realtokenstart = -1;
1759 *svp = newSVpvs("");
1760 sv_setsv(*svp, PL_skipwhite);
1761 sv_free(PL_skipwhite);
1770 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1772 AV *av = CopFILEAVx(PL_curcop);
1774 SV * const sv = newSV_type(SVt_PVMG);
1776 sv_setsv(sv, orig_sv);
1778 sv_setpvn(sv, buf, len);
1781 av_store(av, (I32)CopLINE(PL_curcop), sv);
1787 * Called to gobble the appropriate amount and type of whitespace.
1788 * Skips comments as well.
1792 S_skipspace(pTHX_ register char *s)
1796 #endif /* PERL_MAD */
1797 PERL_ARGS_ASSERT_SKIPSPACE;
1800 sv_free(PL_skipwhite);
1801 PL_skipwhite = NULL;
1803 #endif /* PERL_MAD */
1804 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1805 while (s < PL_bufend && SPACE_OR_TAB(*s))
1808 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1810 lex_read_space(LEX_KEEP_PREVIOUS |
1811 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1812 LEX_NO_NEXT_CHUNK : 0));
1814 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1815 if (PL_linestart > PL_bufptr)
1816 PL_bufptr = PL_linestart;
1821 PL_skipwhite = newSVpvn(start, s-start);
1822 #endif /* PERL_MAD */
1828 * Check the unary operators to ensure there's no ambiguity in how they're
1829 * used. An ambiguous piece of code would be:
1831 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1832 * the +5 is its argument.
1842 if (PL_oldoldbufptr != PL_last_uni)
1844 while (isSPACE(*PL_last_uni))
1847 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1849 if ((t = strchr(s, '(')) && t < PL_bufptr)
1852 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1853 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1854 (int)(s - PL_last_uni), PL_last_uni);
1858 * LOP : macro to build a list operator. Its behaviour has been replaced
1859 * with a subroutine, S_lop() for which LOP is just another name.
1862 #define LOP(f,x) return lop(f,x,s)
1866 * Build a list operator (or something that might be one). The rules:
1867 * - if we have a next token, then it's a list operator [why?]
1868 * - if the next thing is an opening paren, then it's a function
1869 * - else it's a list operator
1873 S_lop(pTHX_ I32 f, int x, char *s)
1877 PERL_ARGS_ASSERT_LOP;
1883 PL_last_lop = PL_oldbufptr;
1884 PL_last_lop_op = (OPCODE)f;
1893 return REPORT(FUNC);
1896 return REPORT(FUNC);
1899 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1900 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1901 return REPORT(LSTOP);
1908 * Sets up for an eventual force_next(). start_force(0) basically does
1909 * an unshift, while start_force(-1) does a push. yylex removes items
1914 S_start_force(pTHX_ int where)
1918 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1919 where = PL_lasttoke;
1920 assert(PL_curforce < 0 || PL_curforce == where);
1921 if (PL_curforce != where) {
1922 for (i = PL_lasttoke; i > where; --i) {
1923 PL_nexttoke[i] = PL_nexttoke[i-1];
1927 if (PL_curforce < 0) /* in case of duplicate start_force() */
1928 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1929 PL_curforce = where;
1932 curmad('^', newSVpvs(""));
1933 CURMAD('_', PL_nextwhite);
1938 S_curmad(pTHX_ char slot, SV *sv)
1944 if (PL_curforce < 0)
1945 where = &PL_thismad;
1947 where = &PL_nexttoke[PL_curforce].next_mad;
1953 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1955 else if (PL_encoding) {
1956 sv_recode_to_utf8(sv, PL_encoding);
1961 /* keep a slot open for the head of the list? */
1962 if (slot != '_' && *where && (*where)->mad_key == '^') {
1963 (*where)->mad_key = slot;
1964 sv_free(MUTABLE_SV(((*where)->mad_val)));
1965 (*where)->mad_val = (void*)sv;
1968 addmad(newMADsv(slot, sv), where, 0);
1971 # define start_force(where) NOOP
1972 # define curmad(slot, sv) NOOP
1977 * When the lexer realizes it knows the next token (for instance,
1978 * it is reordering tokens for the parser) then it can call S_force_next
1979 * to know what token to return the next time the lexer is called. Caller
1980 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1981 * and possibly PL_expect to ensure the lexer handles the token correctly.
1985 S_force_next(pTHX_ I32 type)
1990 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1991 tokereport(type, &NEXTVAL_NEXTTOKE);
1994 /* Don’t let opslab_force_free snatch it */
1995 if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
1996 assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
1997 NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2000 if (PL_curforce < 0)
2001 start_force(PL_lasttoke);
2002 PL_nexttoke[PL_curforce].next_type = type;
2003 if (PL_lex_state != LEX_KNOWNEXT)
2004 PL_lex_defer = PL_lex_state;
2005 PL_lex_state = LEX_KNOWNEXT;
2006 PL_lex_expect = PL_expect;
2009 PL_nexttype[PL_nexttoke] = type;
2011 if (PL_lex_state != LEX_KNOWNEXT) {
2012 PL_lex_defer = PL_lex_state;
2013 PL_lex_expect = PL_expect;
2014 PL_lex_state = LEX_KNOWNEXT;
2022 int yyc = PL_parser->yychar;
2023 if (yyc != YYEMPTY) {
2026 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2027 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2028 PL_lex_allbrackets--;
2030 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2031 } else if (yyc == '('/*)*/) {
2032 PL_lex_allbrackets--;
2037 PL_parser->yychar = YYEMPTY;
2042 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2045 SV * const sv = newSVpvn_utf8(start, len,
2048 && !is_ascii_string((const U8*)start, len)
2049 && is_utf8_string((const U8*)start, len));
2055 * When the lexer knows the next thing is a word (for instance, it has
2056 * just seen -> and it knows that the next char is a word char, then
2057 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2061 * char *start : buffer position (must be within PL_linestr)
2062 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2063 * int check_keyword : if true, Perl checks to make sure the word isn't
2064 * a keyword (do this if the word is a label, e.g. goto FOO)
2065 * int allow_pack : if true, : characters will also be allowed (require,
2066 * use, etc. do this)
2067 * int allow_initial_tick : used by the "sub" lexer only.
2071 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2077 PERL_ARGS_ASSERT_FORCE_WORD;
2079 start = SKIPSPACE1(start);
2081 if (isIDFIRST_lazy_if(s,UTF) ||
2082 (allow_pack && *s == ':') ||
2083 (allow_initial_tick && *s == '\'') )
2085 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2086 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2088 start_force(PL_curforce);
2090 curmad('X', newSVpvn(start,s-start));
2091 if (token == METHOD) {
2096 PL_expect = XOPERATOR;
2100 curmad('g', newSVpvs( "forced" ));
2101 NEXTVAL_NEXTTOKE.opval
2102 = (OP*)newSVOP(OP_CONST,0,
2103 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2104 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2112 * Called when the lexer wants $foo *foo &foo etc, but the program
2113 * text only contains the "foo" portion. The first argument is a pointer
2114 * to the "foo", and the second argument is the type symbol to prefix.
2115 * Forces the next token to be a "WORD".
2116 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2120 S_force_ident(pTHX_ register const char *s, int kind)
2124 PERL_ARGS_ASSERT_FORCE_IDENT;
2127 const STRLEN len = strlen(s);
2128 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2129 UTF ? SVf_UTF8 : 0));
2130 start_force(PL_curforce);
2131 NEXTVAL_NEXTTOKE.opval = o;
2134 o->op_private = OPpCONST_ENTERED;
2135 /* XXX see note in pp_entereval() for why we forgo typo
2136 warnings if the symbol must be introduced in an eval.
2138 gv_fetchpvn_flags(s, len,
2139 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2140 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2141 kind == '$' ? SVt_PV :
2142 kind == '@' ? SVt_PVAV :
2143 kind == '%' ? SVt_PVHV :
2151 Perl_str_to_version(pTHX_ SV *sv)
2156 const char *start = SvPV_const(sv,len);
2157 const char * const end = start + len;
2158 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2160 PERL_ARGS_ASSERT_STR_TO_VERSION;
2162 while (start < end) {
2166 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2171 retval += ((NV)n)/nshift;
2180 * Forces the next token to be a version number.
2181 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2182 * and if "guessing" is TRUE, then no new token is created (and the caller
2183 * must use an alternative parsing method).
2187 S_force_version(pTHX_ char *s, int guessing)
2193 I32 startoff = s - SvPVX(PL_linestr);
2196 PERL_ARGS_ASSERT_FORCE_VERSION;
2204 while (isDIGIT(*d) || *d == '_' || *d == '.')
2208 start_force(PL_curforce);
2209 curmad('X', newSVpvn(s,d-s));
2212 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2214 #ifdef USE_LOCALE_NUMERIC
2215 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2216 setlocale(LC_NUMERIC, "C");
2218 s = scan_num(s, &pl_yylval);
2219 #ifdef USE_LOCALE_NUMERIC
2220 setlocale(LC_NUMERIC, loc);
2223 version = pl_yylval.opval;
2224 ver = cSVOPx(version)->op_sv;
2225 if (SvPOK(ver) && !SvNIOK(ver)) {
2226 SvUPGRADE(ver, SVt_PVNV);
2227 SvNV_set(ver, str_to_version(ver));
2228 SvNOK_on(ver); /* hint that it is a version */
2231 else if (guessing) {
2234 sv_free(PL_nextwhite); /* let next token collect whitespace */
2236 s = SvPVX(PL_linestr) + startoff;
2244 if (PL_madskills && !version) {
2245 sv_free(PL_nextwhite); /* let next token collect whitespace */
2247 s = SvPVX(PL_linestr) + startoff;
2250 /* NOTE: The parser sees the package name and the VERSION swapped */
2251 start_force(PL_curforce);
2252 NEXTVAL_NEXTTOKE.opval = version;
2259 * S_force_strict_version
2260 * Forces the next token to be a version number using strict syntax rules.
2264 S_force_strict_version(pTHX_ char *s)
2269 I32 startoff = s - SvPVX(PL_linestr);
2271 const char *errstr = NULL;
2273 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2275 while (isSPACE(*s)) /* leading whitespace */
2278 if (is_STRICT_VERSION(s,&errstr)) {
2280 s = (char *)scan_version(s, ver, 0);
2281 version = newSVOP(OP_CONST, 0, ver);
2283 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2284 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2288 yyerror(errstr); /* version required */
2293 if (PL_madskills && !version) {
2294 sv_free(PL_nextwhite); /* let next token collect whitespace */
2296 s = SvPVX(PL_linestr) + startoff;
2299 /* NOTE: The parser sees the package name and the VERSION swapped */
2300 start_force(PL_curforce);
2301 NEXTVAL_NEXTTOKE.opval = version;
2309 * Tokenize a quoted string passed in as an SV. It finds the next
2310 * chunk, up to end of string or a backslash. It may make a new
2311 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2316 S_tokeq(pTHX_ SV *sv)
2325 PERL_ARGS_ASSERT_TOKEQ;
2330 s = SvPV_force(sv, len);
2331 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2334 /* This is relying on the SV being "well formed" with a trailing '\0' */
2335 while (s < send && !(*s == '\\' && s[1] == '\\'))
2340 if ( PL_hints & HINT_NEW_STRING ) {
2341 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2345 if (s + 1 < send && (s[1] == '\\'))
2346 s++; /* all that, just for this */
2351 SvCUR_set(sv, d - SvPVX_const(sv));
2353 if ( PL_hints & HINT_NEW_STRING )
2354 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2359 * Now come three functions related to double-quote context,
2360 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2361 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2362 * interact with PL_lex_state, and create fake ( ... ) argument lists
2363 * to handle functions and concatenation.
2367 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2372 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2374 * Pattern matching will set PL_lex_op to the pattern-matching op to
2375 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2377 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2379 * Everything else becomes a FUNC.
2381 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2382 * had an OP_CONST or OP_READLINE). This just sets us up for a
2383 * call to S_sublex_push().
2387 S_sublex_start(pTHX)
2390 const I32 op_type = pl_yylval.ival;
2392 if (op_type == OP_NULL) {
2393 pl_yylval.opval = PL_lex_op;
2397 if (op_type == OP_CONST || op_type == OP_READLINE) {
2398 SV *sv = tokeq(PL_lex_stuff);
2400 if (SvTYPE(sv) == SVt_PVIV) {
2401 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2403 const char * const p = SvPV_const(sv, len);
2404 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2408 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2409 PL_lex_stuff = NULL;
2410 /* Allow <FH> // "foo" */
2411 if (op_type == OP_READLINE)
2412 PL_expect = XTERMORDORDOR;
2415 else if (op_type == OP_BACKTICK && PL_lex_op) {
2416 /* readpipe() vas overriden */
2417 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2418 pl_yylval.opval = PL_lex_op;
2420 PL_lex_stuff = NULL;
2424 PL_sublex_info.super_state = PL_lex_state;
2425 PL_sublex_info.sub_inwhat = (U16)op_type;
2426 PL_sublex_info.sub_op = PL_lex_op;
2427 PL_lex_state = LEX_INTERPPUSH;
2431 pl_yylval.opval = PL_lex_op;
2441 * Create a new scope to save the lexing state. The scope will be
2442 * ended in S_sublex_done. Returns a '(', starting the function arguments
2443 * to the uc, lc, etc. found before.
2444 * Sets PL_lex_state to LEX_INTERPCONCAT.
2453 PL_lex_state = PL_sublex_info.super_state;
2454 SAVEBOOL(PL_lex_dojoin);
2455 SAVEI32(PL_lex_brackets);
2456 SAVEI32(PL_lex_allbrackets);
2457 SAVEI32(PL_lex_formbrack);
2458 SAVEI8(PL_lex_fakeeof);
2459 SAVEI32(PL_lex_casemods);
2460 SAVEI32(PL_lex_starts);
2461 SAVEI8(PL_lex_state);
2462 SAVESPTR(PL_lex_repl);
2463 SAVEPPTR(PL_sublex_info.re_eval_start);
2464 SAVESPTR(PL_sublex_info.re_eval_str);
2465 SAVEPPTR(PL_sublex_info.super_bufptr);
2466 SAVEVPTR(PL_lex_inpat);
2467 SAVEI16(PL_lex_inwhat);
2468 SAVECOPLINE(PL_curcop);
2469 SAVEPPTR(PL_bufptr);
2470 SAVEPPTR(PL_bufend);
2471 SAVEPPTR(PL_oldbufptr);
2472 SAVEPPTR(PL_oldoldbufptr);
2473 SAVEPPTR(PL_last_lop);
2474 SAVEPPTR(PL_last_uni);
2475 SAVEPPTR(PL_linestart);
2476 SAVESPTR(PL_linestr);
2477 SAVEGENERICPV(PL_lex_brackstack);
2478 SAVEGENERICPV(PL_lex_casestack);
2480 /* The here-doc parser needs to be able to peek into outer lexing
2481 scopes to find the body of the here-doc. We use SvIVX(PL_linestr)
2482 to store the outer PL_bufptr and SvNVX to store the outer
2483 PL_linestr. Since SvIVX already means something else, we use
2484 PL_sublex_info.super_bufptr for the innermost scope (the one we are
2485 now entering), and a localised SvIVX for outer scopes.
2487 SvUPGRADE(PL_linestr, SVt_PVIV);
2488 /* A null super_bufptr means the outer lexing scope is not peekable,
2489 because it is a single line from an input stream. */
2490 SAVEIV(SvIVX(PL_linestr));
2491 SvIVX(PL_linestr) = PTR2IV(PL_sublex_info.super_bufptr);
2492 PL_sublex_info.super_bufptr =
2493 (SvTYPE(PL_linestr) < SVt_PVNV || !SvNVX(PL_linestr))
2494 && (PL_rsfp || PL_parser->filtered)
2497 SvUPGRADE(PL_lex_stuff, SVt_PVNV);
2498 SvNVX(PL_lex_stuff) = PTR2NV(PL_linestr);
2500 PL_linestr = PL_lex_stuff;
2501 PL_lex_repl = PL_sublex_info.repl;
2502 PL_lex_stuff = NULL;
2503 PL_sublex_info.repl = NULL;
2504 PL_sublex_info.re_eval_start = NULL;
2505 PL_sublex_info.re_eval_str = NULL;
2507 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2508 = SvPVX(PL_linestr);
2509 PL_bufend += SvCUR(PL_linestr);
2510 PL_last_lop = PL_last_uni = NULL;
2511 SAVEFREESV(PL_linestr);
2513 PL_lex_dojoin = FALSE;
2514 PL_lex_brackets = PL_lex_formbrack = 0;
2515 PL_lex_allbrackets = 0;
2516 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2517 Newx(PL_lex_brackstack, 120, char);
2518 Newx(PL_lex_casestack, 12, char);
2519 PL_lex_casemods = 0;
2520 *PL_lex_casestack = '\0';
2522 PL_lex_state = LEX_INTERPCONCAT;
2523 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2525 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2526 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2527 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2528 PL_lex_inpat = PL_sublex_info.sub_op;
2530 PL_lex_inpat = NULL;
2537 * Restores lexer state after a S_sublex_push.
2544 if (!PL_lex_starts++) {
2545 SV * const sv = newSVpvs("");
2546 if (SvUTF8(PL_linestr))
2548 PL_expect = XOPERATOR;
2549 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2553 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2554 PL_lex_state = LEX_INTERPCASEMOD;
2558 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2559 assert(PL_lex_inwhat != OP_TRANSR);
2560 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2561 SvUPGRADE(PL_lex_repl, SVt_PVNV);
2562 SvNVX(PL_lex_repl) = SvNVX(PL_linestr);
2563 PL_linestr = PL_lex_repl;
2565 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2566 PL_bufend += SvCUR(PL_linestr);
2567 PL_last_lop = PL_last_uni = NULL;
2568 SAVEFREESV(PL_linestr);
2569 PL_lex_dojoin = FALSE;
2570 PL_lex_brackets = 0;
2571 PL_lex_allbrackets = 0;
2572 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2573 PL_lex_casemods = 0;
2574 *PL_lex_casestack = '\0';
2576 if (SvEVALED(PL_lex_repl)) {
2577 PL_lex_state = LEX_INTERPNORMAL;
2579 /* we don't clear PL_lex_repl here, so that we can check later
2580 whether this is an evalled subst; that means we rely on the
2581 logic to ensure sublex_done() is called again only via the
2582 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2585 PL_lex_state = LEX_INTERPCONCAT;
2595 PL_endwhite = newSVpvs("");
2596 sv_catsv(PL_endwhite, PL_thiswhite);
2600 sv_setpvs(PL_thistoken,"");
2602 PL_realtokenstart = -1;
2606 PL_bufend = SvPVX(PL_linestr);
2607 PL_bufend += SvCUR(PL_linestr);
2608 PL_expect = XOPERATOR;
2609 PL_sublex_info.sub_inwhat = 0;
2617 Extracts the next constant part of a pattern, double-quoted string,
2618 or transliteration. This is terrifying code.
2620 For example, in parsing the double-quoted string "ab\x63$d", it would
2621 stop at the '$' and return an OP_CONST containing 'abc'.
2623 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2624 processing a pattern (PL_lex_inpat is true), a transliteration
2625 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2627 Returns a pointer to the character scanned up to. If this is
2628 advanced from the start pointer supplied (i.e. if anything was
2629 successfully parsed), will leave an OP_CONST for the substring scanned
2630 in pl_yylval. Caller must intuit reason for not parsing further
2631 by looking at the next characters herself.
2635 \N{ABC} => \N{U+41.42.43}
2638 all other \-char, including \N and \N{ apart from \N{ABC}
2641 @ and $ where it appears to be a var, but not for $ as tail anchor
2646 In transliterations:
2647 characters are VERY literal, except for - not at the start or end
2648 of the string, which indicates a range. If the range is in bytes,
2649 scan_const expands the range to the full set of intermediate
2650 characters. If the range is in utf8, the hyphen is replaced with
2651 a certain range mark which will be handled by pmtrans() in op.c.
2653 In double-quoted strings:
2655 double-quoted style: \r and \n
2656 constants: \x31, etc.
2657 deprecated backrefs: \1 (in substitution replacements)
2658 case and quoting: \U \Q \E
2661 scan_const does *not* construct ops to handle interpolated strings.
2662 It stops processing as soon as it finds an embedded $ or @ variable
2663 and leaves it to the caller to work out what's going on.
2665 embedded arrays (whether in pattern or not) could be:
2666 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2668 $ in double-quoted strings must be the symbol of an embedded scalar.
2670 $ in pattern could be $foo or could be tail anchor. Assumption:
2671 it's a tail anchor if $ is the last thing in the string, or if it's
2672 followed by one of "()| \r\n\t"
2674 \1 (backreferences) are turned into $1 in substitutions
2676 The structure of the code is
2677 while (there's a character to process) {
2678 handle transliteration ranges
2679 skip regexp comments /(?#comment)/ and codes /(?{code})/
2680 skip #-initiated comments in //x patterns
2681 check for embedded arrays
2682 check for embedded scalars
2684 deprecate \1 in substitution replacements
2685 handle string-changing backslashes \l \U \Q \E, etc.
2686 switch (what was escaped) {
2687 handle \- in a transliteration (becomes a literal -)
2688 if a pattern and not \N{, go treat as regular character
2689 handle \132 (octal characters)
2690 handle \x15 and \x{1234} (hex characters)
2691 handle \N{name} (named characters, also \N{3,5} in a pattern)
2692 handle \cV (control characters)
2693 handle printf-style backslashes (\f, \r, \n, etc)
2696 } (end if backslash)
2697 handle regular character
2698 } (end while character to read)
2703 S_scan_const(pTHX_ char *start)
2706 char *send = PL_bufend; /* end of the constant */
2707 SV *sv = newSV(send - start); /* sv for the constant. See
2708 note below on sizing. */
2709 char *s = start; /* start of the constant */
2710 char *d = SvPVX(sv); /* destination for copies */
2711 bool dorange = FALSE; /* are we in a translit range? */
2712 bool didrange = FALSE; /* did we just finish a range? */
2713 bool in_charclass = FALSE; /* within /[...]/ */
2714 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2715 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2716 to be UTF8? But, this can
2717 show as true when the source
2718 isn't utf8, as for example
2719 when it is entirely composed
2722 /* Note on sizing: The scanned constant is placed into sv, which is
2723 * initialized by newSV() assuming one byte of output for every byte of
2724 * input. This routine expects newSV() to allocate an extra byte for a
2725 * trailing NUL, which this routine will append if it gets to the end of
2726 * the input. There may be more bytes of input than output (eg., \N{LATIN
2727 * CAPITAL LETTER A}), or more output than input if the constant ends up
2728 * recoded to utf8, but each time a construct is found that might increase
2729 * the needed size, SvGROW() is called. Its size parameter each time is
2730 * based on the best guess estimate at the time, namely the length used so
2731 * far, plus the length the current construct will occupy, plus room for
2732 * the trailing NUL, plus one byte for every input byte still unscanned */
2736 UV literal_endpoint = 0;
2737 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2740 PERL_ARGS_ASSERT_SCAN_CONST;
2742 assert(PL_lex_inwhat != OP_TRANSR);
2743 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2744 /* If we are doing a trans and we know we want UTF8 set expectation */
2745 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2746 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2750 while (s < send || dorange) {
2752 /* get transliterations out of the way (they're most literal) */
2753 if (PL_lex_inwhat == OP_TRANS) {
2754 /* expand a range A-Z to the full set of characters. AIE! */
2756 I32 i; /* current expanded character */
2757 I32 min; /* first character in range */
2758 I32 max; /* last character in range */
2769 char * const c = (char*)utf8_hop((U8*)d, -1);
2773 *c = (char)UTF_TO_NATIVE(0xff);
2774 /* mark the range as done, and continue */
2780 i = d - SvPVX_const(sv); /* remember current offset */
2783 SvLEN(sv) + (has_utf8 ?
2784 (512 - UTF_CONTINUATION_MARK +
2787 /* How many two-byte within 0..255: 128 in UTF-8,
2788 * 96 in UTF-8-mod. */
2790 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2792 d = SvPVX(sv) + i; /* refresh d after realloc */
2796 for (j = 0; j <= 1; j++) {
2797 char * const c = (char*)utf8_hop((U8*)d, -1);
2798 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2804 max = (U8)0xff; /* only to \xff */
2805 uvmax = uv; /* \x{100} to uvmax */
2807 d = c; /* eat endpoint chars */
2812 d -= 2; /* eat the first char and the - */
2813 min = (U8)*d; /* first char in range */
2814 max = (U8)d[1]; /* last char in range */
2821 "Invalid range \"%c-%c\" in transliteration operator",
2822 (char)min, (char)max);
2826 if (literal_endpoint == 2 &&
2827 ((isLOWER(min) && isLOWER(max)) ||
2828 (isUPPER(min) && isUPPER(max)))) {
2830 for (i = min; i <= max; i++)
2832 *d++ = NATIVE_TO_NEED(has_utf8,i);
2834 for (i = min; i <= max; i++)
2836 *d++ = NATIVE_TO_NEED(has_utf8,i);
2841 for (i = min; i <= max; i++)
2844 const U8 ch = (U8)NATIVE_TO_UTF(i);
2845 if (UNI_IS_INVARIANT(ch))
2848 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2849 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2858 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2860 *d++ = (char)UTF_TO_NATIVE(0xff);
2862 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2866 /* mark the range as done, and continue */
2870 literal_endpoint = 0;
2875 /* range begins (ignore - as first or last char) */
2876 else if (*s == '-' && s+1 < send && s != start) {
2878 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2885 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2895 literal_endpoint = 0;
2896 native_range = TRUE;
2901 /* if we get here, we're not doing a transliteration */
2903 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2906 while (s1 >= start && *s1-- == '\\')
2909 in_charclass = TRUE;
2912 else if (*s == ']' && PL_lex_inpat && in_charclass) {
2915 while (s1 >= start && *s1-- == '\\')
2918 in_charclass = FALSE;
2921 /* skip for regexp comments /(?#comment)/, except for the last
2922 * char, which will be done separately.
2923 * Stop on (?{..}) and friends */
2925 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2927 while (s+1 < send && *s != ')')
2928 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2930 else if (!PL_lex_casemods && !in_charclass &&
2931 ( s[2] == '{' /* This should match regcomp.c */
2932 || (s[2] == '?' && s[3] == '{')))
2938 /* likewise skip #-initiated comments in //x patterns */
2939 else if (*s == '#' && PL_lex_inpat &&
2940 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2941 while (s+1 < send && *s != '\n')
2942 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2945 /* no further processing of single-quoted regex */
2946 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2947 goto default_action;
2949 /* check for embedded arrays
2950 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2952 else if (*s == '@' && s[1]) {
2953 if (isALNUM_lazy_if(s+1,UTF))
2955 if (strchr(":'{$", s[1]))
2957 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2958 break; /* in regexp, neither @+ nor @- are interpolated */
2961 /* check for embedded scalars. only stop if we're sure it's a
2964 else if (*s == '$') {
2965 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2967 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2969 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2970 "Possible unintended interpolation of $\\ in regex");
2972 break; /* in regexp, $ might be tail anchor */
2976 /* End of else if chain - OP_TRANS rejoin rest */
2979 if (*s == '\\' && s+1 < send) {
2980 char* e; /* Can be used for ending '}', etc. */
2984 /* warn on \1 - \9 in substitution replacements, but note that \11
2985 * is an octal; and \19 is \1 followed by '9' */
2986 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2987 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2989 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2994 /* string-change backslash escapes */
2995 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
2999 /* In a pattern, process \N, but skip any other backslash escapes.
3000 * This is because we don't want to translate an escape sequence
3001 * into a meta symbol and have the regex compiler use the meta
3002 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3003 * in spite of this, we do have to process \N here while the proper
3004 * charnames handler is in scope. See bugs #56444 and #62056.
3005 * There is a complication because \N in a pattern may also stand
3006 * for 'match a non-nl', and not mean a charname, in which case its
3007 * processing should be deferred to the regex compiler. To be a
3008 * charname it must be followed immediately by a '{', and not look
3009 * like \N followed by a curly quantifier, i.e., not something like
3010 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3012 else if (PL_lex_inpat
3015 || regcurly(s + 1)))
3017 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3018 goto default_action;
3023 /* quoted - in transliterations */
3025 if (PL_lex_inwhat == OP_TRANS) {
3033 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3034 "Unrecognized escape \\%c passed through",
3036 /* default action is to copy the quoted character */
3037 goto default_action;
3040 /* eg. \132 indicates the octal constant 0132 */
3041 case '0': case '1': case '2': case '3':
3042 case '4': case '5': case '6': case '7':
3046 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3049 goto NUM_ESCAPE_INSERT;
3051 /* eg. \o{24} indicates the octal constant \024 */
3057 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3063 goto NUM_ESCAPE_INSERT;
3066 /* eg. \x24 indicates the hex constant 0x24 */
3072 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3081 /* Insert oct or hex escaped character. There will always be
3082 * enough room in sv since such escapes will be longer than any
3083 * UTF-8 sequence they can end up as, except if they force us
3084 * to recode the rest of the string into utf8 */
3086 /* Here uv is the ordinal of the next character being added in
3087 * unicode (converted from native). */
3088 if (!UNI_IS_INVARIANT(uv)) {
3089 if (!has_utf8 && uv > 255) {
3090 /* Might need to recode whatever we have accumulated so
3091 * far if it contains any chars variant in utf8 or
3094 SvCUR_set(sv, d - SvPVX_const(sv));
3097 /* See Note on sizing above. */
3098 sv_utf8_upgrade_flags_grow(sv,
3099 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3100 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3101 d = SvPVX(sv) + SvCUR(sv);
3106 d = (char*)uvuni_to_utf8((U8*)d, uv);
3107 if (PL_lex_inwhat == OP_TRANS &&
3108 PL_sublex_info.sub_op) {
3109 PL_sublex_info.sub_op->op_private |=
3110 (PL_lex_repl ? OPpTRANS_FROM_UTF
3114 if (uv > 255 && !dorange)
3115 native_range = FALSE;
3128 /* In a non-pattern \N must be a named character, like \N{LATIN
3129 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3130 * mean to match a non-newline. For non-patterns, named
3131 * characters are converted to their string equivalents. In
3132 * patterns, named characters are not converted to their
3133 * ultimate forms for the same reasons that other escapes
3134 * aren't. Instead, they are converted to the \N{U+...} form
3135 * to get the value from the charnames that is in effect right
3136 * now, while preserving the fact that it was a named character
3137 * so that the regex compiler knows this */
3139 /* This section of code doesn't generally use the
3140 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3141 * a close examination of this macro and determined it is a
3142 * no-op except on utfebcdic variant characters. Every
3143 * character generated by this that would normally need to be
3144 * enclosed by this macro is invariant, so the macro is not
3145 * needed, and would complicate use of copy(). XXX There are
3146 * other parts of this file where the macro is used
3147 * inconsistently, but are saved by it being a no-op */
3149 /* The structure of this section of code (besides checking for
3150 * errors and upgrading to utf8) is:
3151 * Further disambiguate between the two meanings of \N, and if
3152 * not a charname, go process it elsewhere
3153 * If of form \N{U+...}, pass it through if a pattern;
3154 * otherwise convert to utf8
3155 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3156 * pattern; otherwise convert to utf8 */
3158 /* Here, s points to the 'N'; the test below is guaranteed to
3159 * succeed if we are being called on a pattern as we already
3160 * know from a test above that the next character is a '{'.
3161 * On a non-pattern \N must mean 'named sequence, which
3162 * requires braces */
3165 yyerror("Missing braces on \\N{}");
3170 /* If there is no matching '}', it is an error. */
3171 if (! (e = strchr(s, '}'))) {
3172 if (! PL_lex_inpat) {
3173 yyerror("Missing right brace on \\N{}");
3175 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3180 /* Here it looks like a named character */
3184 /* XXX This block is temporary code. \N{} implies that the
3185 * pattern is to have Unicode semantics, and therefore
3186 * currently has to be encoded in utf8. By putting it in
3187 * utf8 now, we save a whole pass in the regular expression
3188 * compiler. Once that code is changed so Unicode
3189 * semantics doesn't necessarily have to be in utf8, this
3190 * block should be removed. However, the code that parses
3191 * the output of this would have to be changed to not
3192 * necessarily expect utf8 */
3194 SvCUR_set(sv, d - SvPVX_const(sv));
3197 /* See Note on sizing above. */
3198 sv_utf8_upgrade_flags_grow(sv,
3199 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3200 /* 5 = '\N{' + cur char + NUL */
3201 (STRLEN)(send - s) + 5);
3202 d = SvPVX(sv) + SvCUR(sv);
3207 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3208 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3209 | PERL_SCAN_DISALLOW_PREFIX;
3212 /* For \N{U+...}, the '...' is a unicode value even on
3213 * EBCDIC machines */
3214 s += 2; /* Skip to next char after the 'U+' */
3216 uv = grok_hex(s, &len, &flags, NULL);
3217 if (len == 0 || len != (STRLEN)(e - s)) {
3218 yyerror("Invalid hexadecimal number in \\N{U+...}");
3225 /* On non-EBCDIC platforms, pass through to the regex
3226 * compiler unchanged. The reason we evaluated the
3227 * number above is to make sure there wasn't a syntax
3228 * error. But on EBCDIC we convert to native so
3229 * downstream code can continue to assume it's native
3231 s -= 5; /* Include the '\N{U+' */
3233 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3236 (unsigned int) UNI_TO_NATIVE(uv));
3238 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3242 else { /* Not a pattern: convert the hex to string */
3244 /* If destination is not in utf8, unconditionally
3245 * recode it to be so. This is because \N{} implies
3246 * Unicode semantics, and scalars have to be in utf8
3247 * to guarantee those semantics */
3249 SvCUR_set(sv, d - SvPVX_const(sv));
3252 /* See Note on sizing above. */
3253 sv_utf8_upgrade_flags_grow(
3255 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3256 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3257 d = SvPVX(sv) + SvCUR(sv);
3261 /* Add the string to the output */
3262 if (UNI_IS_INVARIANT(uv)) {
3265 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3268 else { /* Here is \N{NAME} but not \N{U+...}. */
3270 SV *res; /* result from charnames */
3271 const char *str; /* the string in 'res' */
3272 STRLEN len; /* its length */
3274 /* Get the value for NAME */
3275 res = newSVpvn(s, e - s);
3276 res = new_constant( NULL, 0, "charnames",
3277 /* includes all of: \N{...} */
3278 res, NULL, s - 3, e - s + 4 );
3280 /* Most likely res will be in utf8 already since the
3281 * standard charnames uses pack U, but a custom translator
3282 * can leave it otherwise, so make sure. XXX This can be
3283 * revisited to not have charnames use utf8 for characters
3284 * that don't need it when regexes don't have to be in utf8
3285 * for Unicode semantics. If doing so, remember EBCDIC */
3286 sv_utf8_upgrade(res);
3287 str = SvPV_const(res, len);
3289 /* Don't accept malformed input */
3290 if (! is_utf8_string((U8 *) str, len)) {
3291 yyerror("Malformed UTF-8 returned by \\N");
3293 else if (PL_lex_inpat) {
3295 if (! len) { /* The name resolved to an empty string */
3296 Copy("\\N{}", d, 4, char);
3300 /* In order to not lose information for the regex
3301 * compiler, pass the result in the specially made
3302 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3303 * the code points in hex of each character
3304 * returned by charnames */
3306 const char *str_end = str + len;
3307 STRLEN char_length; /* cur char's byte length */
3308 STRLEN output_length; /* and the number of bytes
3309 after this is translated
3311 const STRLEN off = d - SvPVX_const(sv);
3313 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3314 * max('U+', '.'); and 1 for NUL */
3315 char hex_string[2 * UTF8_MAXBYTES + 5];
3317 /* Get the first character of the result. */
3318 U32 uv = utf8n_to_uvuni((U8 *) str,
3323 /* The call to is_utf8_string() above hopefully
3324 * guarantees that there won't be an error. But
3325 * it's easy here to make sure. The function just
3326 * above warns and returns 0 if invalid utf8, but
3327 * it can also return 0 if the input is validly a
3328 * NUL. Disambiguate */
3329 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3330 uv = UNICODE_REPLACEMENT;
3333 /* Convert first code point to hex, including the
3334 * boiler plate before it. For all these, we
3335 * convert to native format so that downstream code
3336 * can continue to assume the input is native */
3338 my_snprintf(hex_string, sizeof(hex_string),
3340 (unsigned int) UNI_TO_NATIVE(uv));
3342 /* Make sure there is enough space to hold it */
3343 d = off + SvGROW(sv, off
3345 + (STRLEN)(send - e)
3346 + 2); /* '}' + NUL */
3348 Copy(hex_string, d, output_length, char);
3351 /* For each subsequent character, append dot and
3352 * its ordinal in hex */
3353 while ((str += char_length) < str_end) {
3354 const STRLEN off = d - SvPVX_const(sv);
3355 U32 uv = utf8n_to_uvuni((U8 *) str,
3359 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3360 uv = UNICODE_REPLACEMENT;
3364 my_snprintf(hex_string, sizeof(hex_string),
3366 (unsigned int) UNI_TO_NATIVE(uv));
3368 d = off + SvGROW(sv, off
3370 + (STRLEN)(send - e)
3371 + 2); /* '}' + NUL */
3372 Copy(hex_string, d, output_length, char);
3376 *d++ = '}'; /* Done. Add the trailing brace */
3379 else { /* Here, not in a pattern. Convert the name to a
3382 /* If destination is not in utf8, unconditionally
3383 * recode it to be so. This is because \N{} implies
3384 * Unicode semantics, and scalars have to be in utf8
3385 * to guarantee those semantics */
3387 SvCUR_set(sv, d - SvPVX_const(sv));
3390 /* See Note on sizing above. */
3391 sv_utf8_upgrade_flags_grow(sv,
3392 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3393 len + (STRLEN)(send - s) + 1);
3394 d = SvPVX(sv) + SvCUR(sv);
3396 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3398 /* See Note on sizing above. (NOTE: SvCUR() is not
3399 * set correctly here). */
3400 const STRLEN off = d - SvPVX_const(sv);
3401 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3403 Copy(str, d, len, char);
3408 /* Deprecate non-approved name syntax */
3409 if (ckWARN_d(WARN_DEPRECATED)) {
3410 bool problematic = FALSE;
3413 /* For non-ut8 input, look to see that the first
3414 * character is an alpha, then loop through the rest
3415 * checking that each is a continuation */
3417 if (! isALPHAU(*i)) problematic = TRUE;
3418 else for (i = s + 1; i < e; i++) {
3419 if (isCHARNAME_CONT(*i)) continue;
3425 /* Similarly for utf8. For invariants can check
3426 * directly. We accept anything above the latin1
3427 * range because it is immaterial to Perl if it is
3428 * correct or not, and is expensive to check. But
3429 * it is fairly easy in the latin1 range to convert
3430 * the variants into a single character and check
3432 if (UTF8_IS_INVARIANT(*i)) {
3433 if (! isALPHAU(*i)) problematic = TRUE;
3434 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3435 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3441 if (! problematic) for (i = s + UTF8SKIP(s);
3445 if (UTF8_IS_INVARIANT(*i)) {
3446 if (isCHARNAME_CONT(*i)) continue;
3447 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3449 } else if (isCHARNAME_CONT(
3451 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3460 /* The e-i passed to the final %.*s makes sure that
3461 * should the trailing NUL be missing that this
3462 * print won't run off the end of the string */
3463 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3464 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3465 (int)(i - s + 1), s, (int)(e - i), i + 1);
3468 } /* End \N{NAME} */
3471 native_range = FALSE; /* \N{} is defined to be Unicode */
3473 s = e + 1; /* Point to just after the '}' */
3476 /* \c is a control character */
3480 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3483 yyerror("Missing control char name in \\c");
3487 /* printf-style backslashes, formfeeds, newlines, etc */
3489 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3492 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3495 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3498 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3501 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3504 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3507 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3513 } /* end if (backslash) */
3520 /* If we started with encoded form, or already know we want it,
3521 then encode the next character */
3522 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3526 /* One might think that it is wasted effort in the case of the
3527 * source being utf8 (this_utf8 == TRUE) to take the next character
3528 * in the source, convert it to an unsigned value, and then convert
3529 * it back again. But the source has not been validated here. The
3530 * routine that does the conversion checks for errors like
3533 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3534 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3536 SvCUR_set(sv, d - SvPVX_const(sv));
3539 /* See Note on sizing above. */
3540 sv_utf8_upgrade_flags_grow(sv,
3541 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3542 need + (STRLEN)(send - s) + 1);
3543 d = SvPVX(sv) + SvCUR(sv);
3545 } else if (need > len) {
3546 /* encoded value larger than old, may need extra space (NOTE:
3547 * SvCUR() is not set correctly here). See Note on sizing
3549 const STRLEN off = d - SvPVX_const(sv);
3550 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3554 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3556 if (uv > 255 && !dorange)
3557 native_range = FALSE;
3561 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3563 } /* while loop to process each character */
3565 /* terminate the string and set up the sv */
3567 SvCUR_set(sv, d - SvPVX_const(sv));
3568 if (SvCUR(sv) >= SvLEN(sv))
3569 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3570 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3573 if (PL_encoding && !has_utf8) {
3574 sv_recode_to_utf8(sv, PL_encoding);
3580 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3581 PL_sublex_info.sub_op->op_private |=
3582 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3586 /* shrink the sv if we allocated more than we used */
3587 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3588 SvPV_shrink_to_cur(sv);
3591 /* return the substring (via pl_yylval) only if we parsed anything */
3592 if (s > PL_bufptr) {
3593 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3594 const char *const key = PL_lex_inpat ? "qr" : "q";
3595 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3599 if (PL_lex_inwhat == OP_TRANS) {
3602 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3605 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3613 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3616 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3623 * Returns TRUE if there's more to the expression (e.g., a subscript),
3626 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3628 * ->[ and ->{ return TRUE
3629 * { and [ outside a pattern are always subscripts, so return TRUE
3630 * if we're outside a pattern and it's not { or [, then return FALSE
3631 * if we're in a pattern and the first char is a {
3632 * {4,5} (any digits around the comma) returns FALSE
3633 * if we're in a pattern and the first char is a [
3635 * [SOMETHING] has a funky algorithm to decide whether it's a
3636 * character class or not. It has to deal with things like
3637 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3638 * anything else returns TRUE
3641 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3644 S_intuit_more(pTHX_ register char *s)
3648 PERL_ARGS_ASSERT_INTUIT_MORE;
3650 if (PL_lex_brackets)
3652 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3654 if (*s != '{' && *s != '[')
3659 /* In a pattern, so maybe we have {n,m}. */
3667 /* On the other hand, maybe we have a character class */
3670 if (*s == ']' || *s == '^')
3673 /* this is terrifying, and it works */
3674 int weight = 2; /* let's weigh the evidence */
3676 unsigned char un_char = 255, last_un_char;
3677 const char * const send = strchr(s,']');
3678 char tmpbuf[sizeof PL_tokenbuf * 4];
3680 if (!send) /* has to be an expression */
3683 Zero(seen,256,char);
3686 else if (isDIGIT(*s)) {
3688 if (isDIGIT(s[1]) && s[2] == ']')
3694 for (; s < send; s++) {
3695 last_un_char = un_char;
3696 un_char = (unsigned char)*s;
3701 weight -= seen[un_char] * 10;
3702 if (isALNUM_lazy_if(s+1,UTF)) {
3704 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3705 len = (int)strlen(tmpbuf);
3706 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3707 UTF ? SVf_UTF8 : 0, SVt_PV))
3712 else if (*s == '$' && s[1] &&
3713 strchr("[#!%*<>()-=",s[1])) {
3714 if (/*{*/ strchr("])} =",s[2]))
3723 if (strchr("wds]",s[1]))
3725 else if (seen[(U8)'\''] || seen[(U8)'"'])
3727 else if (strchr("rnftbxcav",s[1]))
3729 else if (isDIGIT(s[1])) {
3731 while (s[1] && isDIGIT(s[1]))
3741 if (strchr("aA01! ",last_un_char))
3743 if (strchr("zZ79~",s[1]))
3745 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3746 weight -= 5; /* cope with negative subscript */
3749 if (!isALNUM(last_un_char)
3750 && !(last_un_char == '$' || last_un_char == '@'
3751 || last_un_char == '&')
3752 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3757 if (keyword(tmpbuf, d - tmpbuf, 0))
3760 if (un_char == last_un_char + 1)
3762 weight -= seen[un_char];
3767 if (weight >= 0) /* probably a character class */
3777 * Does all the checking to disambiguate
3779 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3780 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3782 * First argument is the stuff after the first token, e.g. "bar".
3784 * Not a method if foo is a filehandle.
3785 * Not a method if foo is a subroutine prototyped to take a filehandle.
3786 * Not a method if it's really "Foo $bar"
3787 * Method if it's "foo $bar"
3788 * Not a method if it's really "print foo $bar"
3789 * Method if it's really "foo package::" (interpreted as package->foo)
3790 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3791 * Not a method if bar is a filehandle or package, but is quoted with
3796 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3799 char *s = start + (*start == '$');
3800 char tmpbuf[sizeof PL_tokenbuf];
3807 PERL_ARGS_ASSERT_INTUIT_METHOD;
3809 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3811 if (cv && SvPOK(cv)) {
3812 const char *proto = CvPROTO(cv);
3820 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3821 /* start is the beginning of the possible filehandle/object,
3822 * and s is the end of it
3823 * tmpbuf is a copy of it
3826 if (*start == '$') {
3827 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3828 isUPPER(*PL_tokenbuf))
3831 len = start - SvPVX(PL_linestr);
3835 start = SvPVX(PL_linestr) + len;
3839 return *s == '(' ? FUNCMETH : METHOD;
3841 if (!keyword(tmpbuf, len, 0)) {
3842 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3846 soff = s - SvPVX(PL_linestr);
3850 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3851 if (indirgv && GvCVu(indirgv))
3853 /* filehandle or package name makes it a method */
3854 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3856 soff = s - SvPVX(PL_linestr);
3859 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3860 return 0; /* no assumptions -- "=>" quotes bareword */
3862 start_force(PL_curforce);
3863 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3864 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3865 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3867 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3868 ( UTF ? SVf_UTF8 : 0 )));
3873 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3875 return *s == '(' ? FUNCMETH : METHOD;
3881 /* Encoded script support. filter_add() effectively inserts a
3882 * 'pre-processing' function into the current source input stream.
3883 * Note that the filter function only applies to the current source file
3884 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3886 * The datasv parameter (which may be NULL) can be used to pass
3887 * private data to this instance of the filter. The filter function
3888 * can recover the SV using the FILTER_DATA macro and use it to
3889 * store private buffers and state information.
3891 * The supplied datasv parameter is upgraded to a PVIO type
3892 * and the IoDIRP/IoANY field is used to store the function pointer,
3893 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3894 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3895 * private use must be set using malloc'd pointers.
3899 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3908 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3909 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3911 if (!PL_rsfp_filters)
3912 PL_rsfp_filters = newAV();
3915 SvUPGRADE(datasv, SVt_PVIO);
3916 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3917 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3918 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3919 FPTR2DPTR(void *, IoANY(datasv)),
3920 SvPV_nolen(datasv)));
3921 av_unshift(PL_rsfp_filters, 1);
3922 av_store(PL_rsfp_filters, 0, datasv) ;
3924 !PL_parser->filtered
3925 && PL_parser->lex_flags & LEX_EVALBYTES
3926 && PL_bufptr < PL_bufend
3928 const char *s = PL_bufptr;
3929 while (s < PL_bufend) {
3931 SV *linestr = PL_parser->linestr;
3932 char *buf = SvPVX(linestr);
3933 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3934 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3935 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3936 STRLEN const linestart_pos = PL_parser->linestart - buf;
3937 STRLEN const last_uni_pos =
3938 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3939 STRLEN const last_lop_pos =
3940 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3941 av_push(PL_rsfp_filters, linestr);
3942 PL_parser->linestr =
3943 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3944 buf = SvPVX(PL_parser->linestr);
3945 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3946 PL_parser->bufptr = buf + bufptr_pos;
3947 PL_parser->oldbufptr = buf + oldbufptr_pos;
3948 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3949 PL_parser->linestart = buf + linestart_pos;
3950 if (PL_parser->last_uni)
3951 PL_parser->last_uni = buf + last_uni_pos;
3952 if (PL_parser->last_lop)
3953 PL_parser->last_lop = buf + last_lop_pos;
3954 SvLEN(linestr) = SvCUR(linestr);
3955 SvCUR(linestr) = s-SvPVX(linestr);
3956 PL_parser->filtered = 1;
3966 /* Delete most recently added instance of this filter function. */
3968 Perl_filter_del(pTHX_ filter_t funcp)
3973 PERL_ARGS_ASSERT_FILTER_DEL;
3976 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3977 FPTR2DPTR(void*, funcp)));
3979 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3981 /* if filter is on top of stack (usual case) just pop it off */
3982 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3983 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3984 sv_free(av_pop(PL_rsfp_filters));
3988 /* we need to search for the correct entry and clear it */
3989 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3993 /* Invoke the idxth filter function for the current rsfp. */
3994 /* maxlen 0 = read one text line */
3996 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4001 /* This API is bad. It should have been using unsigned int for maxlen.
4002 Not sure if we want to change the API, but if not we should sanity
4003 check the value here. */
4004 unsigned int correct_length
4013 PERL_ARGS_ASSERT_FILTER_READ;
4015 if (!PL_parser || !PL_rsfp_filters)
4017 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4018 /* Provide a default input filter to make life easy. */
4019 /* Note that we append to the line. This is handy. */
4020 DEBUG_P(PerlIO_printf(Perl_debug_log,
4021 "filter_read %d: from rsfp\n", idx));
4022 if (correct_length) {
4025 const int old_len = SvCUR(buf_sv);
4027 /* ensure buf_sv is large enough */
4028 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4029 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4030 correct_length)) <= 0) {
4031 if (PerlIO_error(PL_rsfp))
4032 return -1; /* error */
4034 return 0 ; /* end of file */
4036 SvCUR_set(buf_sv, old_len + len) ;
4037 SvPVX(buf_sv)[old_len + len] = '\0';
4040 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4041 if (PerlIO_error(PL_rsfp))
4042 return -1; /* error */
4044 return 0 ; /* end of file */
4047 return SvCUR(buf_sv);
4049 /* Skip this filter slot if filter has been deleted */
4050 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4051 DEBUG_P(PerlIO_printf(Perl_debug_log,
4052 "filter_read %d: skipped (filter deleted)\n",
4054 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4056 if (SvTYPE(datasv) != SVt_PVIO) {
4057 if (correct_length) {
4059 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4060 if (!remainder) return 0; /* eof */
4061 if (correct_length > remainder) correct_length = remainder;
4062 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4063 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4066 const char *s = SvEND(datasv);
4067 const char *send = SvPVX(datasv) + SvLEN(datasv);
4075 if (s == send) return 0; /* eof */
4076 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4077 SvCUR_set(datasv, s-SvPVX(datasv));
4079 return SvCUR(buf_sv);
4081 /* Get function pointer hidden within datasv */
4082 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4083 DEBUG_P(PerlIO_printf(Perl_debug_log,
4084 "filter_read %d: via function %p (%s)\n",
4085 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4086 /* Call function. The function is expected to */
4087 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4088 /* Return: <0:error, =0:eof, >0:not eof */
4089 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4093 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4097 PERL_ARGS_ASSERT_FILTER_GETS;
4099 #ifdef PERL_CR_FILTER
4100 if (!PL_rsfp_filters) {
4101 filter_add(S_cr_textfilter,NULL);
4104 if (PL_rsfp_filters) {
4106 SvCUR_set(sv, 0); /* start with empty line */
4107 if (FILTER_READ(0, sv, 0) > 0)
4108 return ( SvPVX(sv) ) ;
4113 return (sv_gets(sv, PL_rsfp, append));
4117 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4122 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4124 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4128 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4129 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4131 return GvHV(gv); /* Foo:: */
4134 /* use constant CLASS => 'MyClass' */
4135 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4136 if (gv && GvCV(gv)) {
4137 SV * const sv = cv_const_sv(GvCV(gv));
4139 pkgname = SvPV_const(sv, len);
4142 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4146 * S_readpipe_override
4147 * Check whether readpipe() is overridden, and generates the appropriate
4148 * optree, provided sublex_start() is called afterwards.
4151 S_readpipe_override(pTHX)
4154 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4155 pl_yylval.ival = OP_BACKTICK;
4157 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4159 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4160 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4161 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4163 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4164 op_append_elem(OP_LIST,
4165 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4166 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4173 * The intent of this yylex wrapper is to minimize the changes to the
4174 * tokener when we aren't interested in collecting madprops. It remains
4175 * to be seen how successful this strategy will be...
4182 char *s = PL_bufptr;
4184 /* make sure PL_thiswhite is initialized */
4188 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4189 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4190 return S_pending_ident(aTHX);
4192 /* previous token ate up our whitespace? */
4193 if (!PL_lasttoke && PL_nextwhite) {
4194 PL_thiswhite = PL_nextwhite;
4198 /* isolate the token, and figure out where it is without whitespace */
4199 PL_realtokenstart = -1;
4203 assert(PL_curforce < 0);
4205 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4206 if (!PL_thistoken) {
4207 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4208 PL_thistoken = newSVpvs("");
4210 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4211 PL_thistoken = newSVpvn(tstart, s - tstart);
4214 if (PL_thismad) /* install head */
4215 CURMAD('X', PL_thistoken);
4218 /* last whitespace of a sublex? */
4219 if (optype == ')' && PL_endwhite) {
4220 CURMAD('X', PL_endwhite);
4225 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4226 if (!PL_thiswhite && !PL_endwhite && !optype) {
4227 sv_free(PL_thistoken);
4232 /* put off final whitespace till peg */
4233 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4234 PL_nextwhite = PL_thiswhite;
4237 else if (PL_thisopen) {
4238 CURMAD('q', PL_thisopen);
4240 sv_free(PL_thistoken);
4244 /* Store actual token text as madprop X */
4245 CURMAD('X', PL_thistoken);
4249 /* add preceding whitespace as madprop _ */
4250 CURMAD('_', PL_thiswhite);
4254 /* add quoted material as madprop = */
4255 CURMAD('=', PL_thisstuff);
4259 /* add terminating quote as madprop Q */
4260 CURMAD('Q', PL_thisclose);
4264 /* special processing based on optype */
4268 /* opval doesn't need a TOKEN since it can already store mp */
4279 if (pl_yylval.opval)
4280 append_madprops(PL_thismad, pl_yylval.opval, 0);
4288 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4297 /* remember any fake bracket that lexer is about to discard */
4298 if (PL_lex_brackets == 1 &&
4299 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4302 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4305 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4306 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4309 break; /* don't bother looking for trailing comment */
4318 /* attach a trailing comment to its statement instead of next token */
4322 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4324 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4326 if (*s == '\n' || *s == '#') {
4327 while (s < PL_bufend && *s != '\n')
4331 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4332 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4345 /* Create new token struct. Note: opvals return early above. */
4346 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4353 S_tokenize_use(pTHX_ int is_use, char *s) {
4356 PERL_ARGS_ASSERT_TOKENIZE_USE;
4358 if (PL_expect != XSTATE)
4359 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4360 is_use ? "use" : "no"));
4363 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4364 s = force_version(s, TRUE);
4365 if (*s == ';' || *s == '}'
4366 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4367 start_force(PL_curforce);
4368 NEXTVAL_NEXTTOKE.opval = NULL;
4371 else if (*s == 'v') {
4372 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4373 s = force_version(s, FALSE);
4377 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4378 s = force_version(s, FALSE);
4380 pl_yylval.ival = is_use;
4384 static const char* const exp_name[] =
4385 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4386 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4390 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4392 S_word_takes_any_delimeter(char *p, STRLEN len)
4394 return (len == 1 && strchr("msyq", p[0])) ||
4396 (p[0] == 't' && p[1] == 'r') ||
4397 (p[0] == 'q' && strchr("qwxr", p[1]))));
4403 Works out what to call the token just pulled out of the input
4404 stream. The yacc parser takes care of taking the ops we return and
4405 stitching them into a tree.
4411 if read an identifier
4412 if we're in a my declaration
4413 croak if they tried to say my($foo::bar)
4414 build the ops for a my() declaration
4415 if it's an access to a my() variable
4416 are we in a sort block?
4417 croak if my($a); $a <=> $b
4418 build ops for access to a my() variable
4419 if in a dq string, and they've said @foo and we can't find @foo
4421 build ops for a bareword
4422 if we already built the token before, use it.
4427 #pragma segment Perl_yylex
4433 char *s = PL_bufptr;
4440 /* orig_keyword, gvp, and gv are initialized here because
4441 * jump to the label just_a_word_zero can bypass their
4442 * initialization later. */
4443 I32 orig_keyword = 0;
4448 SV* tmp = newSVpvs("");
4449 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4450 (IV)CopLINE(PL_curcop),
4451 lex_state_names[PL_lex_state],
4452 exp_name[PL_expect],
4453 pv_display(tmp, s, strlen(s), 0, 60));
4456 /* check if there's an identifier for us to look at */
4457 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4458 return REPORT(S_pending_ident(aTHX));
4460 /* no identifier pending identification */
4462 switch (PL_lex_state) {
4464 case LEX_NORMAL: /* Some compilers will produce faster */
4465 case LEX_INTERPNORMAL: /* code if we comment these out. */
4469 /* when we've already built the next token, just pull it out of the queue */
4473 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4475 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4476 PL_nexttoke[PL_lasttoke].next_mad = 0;
4477 if (PL_thismad && PL_thismad->mad_key == '_') {
4478 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4479 PL_thismad->mad_val = 0;
4480 mad_free(PL_thismad);
4485 PL_lex_state = PL_lex_defer;
4486 PL_expect = PL_lex_expect;
4487 PL_lex_defer = LEX_NORMAL;
4488 if (!PL_nexttoke[PL_lasttoke].next_type)
4493 pl_yylval = PL_nextval[PL_nexttoke];
4495 PL_lex_state = PL_lex_defer;
4496 PL_expect = PL_lex_expect;
4497 PL_lex_defer = LEX_NORMAL;
4503 next_type = PL_nexttoke[PL_lasttoke].next_type;
4505 next_type = PL_nexttype[PL_nexttoke];
4507 if (next_type & (7<<24)) {
4508 if (next_type & (1<<24)) {
4509 if (PL_lex_brackets > 100)
4510 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4511 PL_lex_brackstack[PL_lex_brackets++] =
4512 (char) ((next_type >> 16) & 0xff);
4514 if (next_type & (2<<24))
4515 PL_lex_allbrackets++;
4516 if (next_type & (4<<24))
4517 PL_lex_allbrackets--;
4518 next_type &= 0xffff;
4520 if (S_is_opval_token(next_type) && pl_yylval.opval)
4521 pl_yylval.opval->op_savefree = 0; /* release */
4522 return REPORT(next_type);
4525 /* interpolated case modifiers like \L \U, including \Q and \E.
4526 when we get here, PL_bufptr is at the \
4528 case LEX_INTERPCASEMOD:
4530 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4532 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4533 PL_bufptr, PL_bufend, *PL_bufptr);
4535 /* handle \E or end of string */
4536 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4538 if (PL_lex_casemods) {
4539 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4540 PL_lex_casestack[PL_lex_casemods] = '\0';
4542 if (PL_bufptr != PL_bufend
4543 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4544 || oldmod == 'F')) {
4546 PL_lex_state = LEX_INTERPCONCAT;
4549 PL_thistoken = newSVpvs("\\E");
4552 PL_lex_allbrackets--;
4555 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4556 /* Got an unpaired \E */
4557 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4558 "Useless use of \\E");
4561 while (PL_bufptr != PL_bufend &&
4562 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4564 PL_thiswhite = newSVpvs("");
4565 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4569 if (PL_bufptr != PL_bufend)
4572 PL_lex_state = LEX_INTERPCONCAT;
4576 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4577 "### Saw case modifier\n"); });
4579 if (s[1] == '\\' && s[2] == 'E') {
4582 PL_thiswhite = newSVpvs("");
4583 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4586 PL_lex_state = LEX_INTERPCONCAT;
4591 if (!PL_madskills) /* when just compiling don't need correct */
4592 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4593 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4594 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4595 (strchr(PL_lex_casestack, 'L')
4596 || strchr(PL_lex_casestack, 'U')
4597 || strchr(PL_lex_casestack, 'F'))) {
4598 PL_lex_casestack[--PL_lex_casemods] = '\0';
4599 PL_lex_allbrackets--;
4602 if (PL_lex_casemods > 10)
4603 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4604 PL_lex_casestack[PL_lex_casemods++] = *s;
4605 PL_lex_casestack[PL_lex_casemods] = '\0';
4606 PL_lex_state = LEX_INTERPCONCAT;
4607 start_force(PL_curforce);
4608 NEXTVAL_NEXTTOKE.ival = 0;
4609 force_next((2<<24)|'(');
4610 start_force(PL_curforce);
4612 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4614 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4616 NEXTVAL_NEXTTOKE.ival = OP_LC;
4618 NEXTVAL_NEXTTOKE.ival = OP_UC;
4620 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4622 NEXTVAL_NEXTTOKE.ival = OP_FC;
4624 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4626 SV* const tmpsv = newSVpvs("\\ ");
4627 /* replace the space with the character we want to escape
4629 SvPVX(tmpsv)[1] = *s;
4635 if (PL_lex_starts) {
4641 sv_free(PL_thistoken);
4642 PL_thistoken = newSVpvs("");
4645 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4646 if (PL_lex_casemods == 1 && PL_lex_inpat)
4655 case LEX_INTERPPUSH:
4656 return REPORT(sublex_push());
4658 case LEX_INTERPSTART:
4659 if (PL_bufptr == PL_bufend)
4660 return REPORT(sublex_done());
4661 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4662 "### Interpolated variable\n"); });
4664 PL_lex_dojoin = (*PL_bufptr == '@');
4665 PL_lex_state = LEX_INTERPNORMAL;
4666 if (PL_lex_dojoin) {
4667 start_force(PL_curforce);
4668 NEXTVAL_NEXTTOKE.ival = 0;
4670 start_force(PL_curforce);
4671 force_ident("\"", '$');
4672 start_force(PL_curforce);
4673 NEXTVAL_NEXTTOKE.ival = 0;
4675 start_force(PL_curforce);
4676 NEXTVAL_NEXTTOKE.ival = 0;
4677 force_next((2<<24)|'(');
4678 start_force(PL_curforce);
4679 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4682 /* Convert (?{...}) and friends to 'do {...}' */
4683 if (PL_lex_inpat && *PL_bufptr == '(') {
4684 PL_sublex_info.re_eval_start = PL_bufptr;
4686 if (*PL_bufptr != '{')
4688 start_force(PL_curforce);
4689 /* XXX probably need a CURMAD(something) here */
4690 PL_expect = XTERMBLOCK;
4694 if (PL_lex_starts++) {
4699 sv_free(PL_thistoken);
4700 PL_thistoken = newSVpvs("");
4703 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4704 if (!PL_lex_casemods && PL_lex_inpat)
4711 case LEX_INTERPENDMAYBE:
4712 if (intuit_more(PL_bufptr)) {
4713 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4719 if (PL_lex_dojoin) {
4720 PL_lex_dojoin = FALSE;
4721 PL_lex_state = LEX_INTERPCONCAT;
4725 sv_free(PL_thistoken);
4726 PL_thistoken = newSVpvs("");
4729 PL_lex_allbrackets--;
4732 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4733 && SvEVALED(PL_lex_repl))
4735 if (PL_bufptr != PL_bufend)
4736 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4739 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4740 re_eval_str. If the here-doc body’s length equals the previous
4741 value of re_eval_start, re_eval_start will now be null. So
4742 check re_eval_str as well. */
4743 if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
4745 if (*PL_bufptr != ')')
4746 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4748 /* having compiled a (?{..}) expression, return the original
4749 * text too, as a const */
4750 if (PL_sublex_info.re_eval_str) {
4751 sv = PL_sublex_info.re_eval_str;
4752 PL_sublex_info.re_eval_str = NULL;
4753 SvCUR_set(sv, PL_bufptr - PL_sublex_info.re_eval_start);
4754 SvPV_shrink_to_cur(sv);
4756 else sv = newSVpvn(PL_sublex_info.re_eval_start,
4757 PL_bufptr - PL_sublex_info.re_eval_start);
4758 start_force(PL_curforce);
4759 /* XXX probably need a CURMAD(something) here */
4760 NEXTVAL_NEXTTOKE.opval =
4761 (OP*)newSVOP(OP_CONST, 0,
4764 PL_sublex_info.re_eval_start = NULL;
4770 case LEX_INTERPCONCAT:
4772 if (PL_lex_brackets)
4773 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4774 (long) PL_lex_brackets);
4776 if (PL_bufptr == PL_bufend)
4777 return REPORT(sublex_done());
4779 /* m'foo' still needs to be parsed for possible (?{...}) */
4780 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4781 SV *sv = newSVsv(PL_linestr);
4783 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4787 s = scan_const(PL_bufptr);
4789 PL_lex_state = LEX_INTERPCASEMOD;
4791 PL_lex_state = LEX_INTERPSTART;
4794 if (s != PL_bufptr) {
4795 start_force(PL_curforce);
4797 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4799 NEXTVAL_NEXTTOKE = pl_yylval;
4802 if (PL_lex_starts++) {
4806 sv_free(PL_thistoken);
4807 PL_thistoken = newSVpvs("");
4810 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4811 if (!PL_lex_casemods && PL_lex_inpat)
4824 s = scan_formline(PL_bufptr);
4825 if (!PL_lex_formbrack)
4835 PL_oldoldbufptr = PL_oldbufptr;
4841 sv_free(PL_thistoken);
4844 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4848 if (isIDFIRST_lazy_if(s,UTF))
4851 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4852 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4854 SVs_TEMP | SVf_UTF8),
4855 10, UNI_DISPLAY_ISPRINT))
4856 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4857 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4858 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4859 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4867 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4871 goto fake_eof; /* emulate EOF on ^D or ^Z */
4877 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4880 if (PL_lex_brackets &&
4881 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4882 yyerror((const char *)
4884 ? "Format not terminated"
4885 : "Missing right curly or square bracket"));
4887 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4888 "### Tokener got EOF\n");
4892 if (s++ < PL_bufend)
4893 goto retry; /* ignore stray nulls */
4896 if (!PL_in_eval && !PL_preambled) {
4897 PL_preambled = TRUE;
4903 /* Generate a string of Perl code to load the debugger.
4904 * If PERL5DB is set, it will return the contents of that,
4905 * otherwise a compile-time require of perl5db.pl. */
4907 const char * const pdb = PerlEnv_getenv("PERL5DB");
4910 sv_setpv(PL_linestr, pdb);
4911 sv_catpvs(PL_linestr,";");
4913 SETERRNO(0,SS_NORMAL);
4914 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4917 sv_setpvs(PL_linestr,"");
4918 if (PL_preambleav) {
4919 SV **svp = AvARRAY(PL_preambleav);
4920 SV **const end = svp + AvFILLp(PL_preambleav);
4922 sv_catsv(PL_linestr, *svp);
4924 sv_catpvs(PL_linestr, ";");
4926 sv_free(MUTABLE_SV(PL_preambleav));
4927 PL_preambleav = NULL;
4930 sv_catpvs(PL_linestr,
4931 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4932 if (PL_minus_n || PL_minus_p) {
4933 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4935 sv_catpvs(PL_linestr,"chomp;");
4938 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4939 || *PL_splitstr == '"')
4940 && strchr(PL_splitstr + 1, *PL_splitstr))
4941 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4943 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4944 bytes can be used as quoting characters. :-) */
4945 const char *splits = PL_splitstr;
4946 sv_catpvs(PL_linestr, "our @F=split(q\0");
4949 if (*splits == '\\')
4950 sv_catpvn(PL_linestr, splits, 1);
4951 sv_catpvn(PL_linestr, splits, 1);
4952 } while (*splits++);
4953 /* This loop will embed the trailing NUL of
4954 PL_linestr as the last thing it does before
4956 sv_catpvs(PL_linestr, ");");
4960 sv_catpvs(PL_linestr,"our @F=split(' ');");
4963 sv_catpvs(PL_linestr, "\n");
4964 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4965 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4966 PL_last_lop = PL_last_uni = NULL;
4967 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4968 update_debugger_info(PL_linestr, NULL, 0);
4973 bof = PL_rsfp ? TRUE : FALSE;
4976 fake_eof = LEX_FAKE_EOF;
4978 PL_bufptr = PL_bufend;
4979 CopLINE_inc(PL_curcop);
4980 if (!lex_next_chunk(fake_eof)) {
4981 CopLINE_dec(PL_curcop);
4983 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4985 CopLINE_dec(PL_curcop);
4988 PL_realtokenstart = -1;
4991 /* If it looks like the start of a BOM or raw UTF-16,
4992 * check if it in fact is. */
4993 if (bof && PL_rsfp &&
4998 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4999 bof = (offset == (Off_t)SvCUR(PL_linestr));
5000 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5001 /* offset may include swallowed CR */
5003 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5006 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5007 s = swallow_bom((U8*)s);
5010 if (PL_parser->in_pod) {
5011 /* Incest with pod. */
5014 sv_catsv(PL_thiswhite, PL_linestr);
5016 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5017 sv_setpvs(PL_linestr, "");
5018 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5019 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5020 PL_last_lop = PL_last_uni = NULL;
5021 PL_parser->in_pod = 0;
5024 if (PL_rsfp || PL_parser->filtered)
5026 } while (PL_parser->in_pod);
5027 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5028 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5029 PL_last_lop = PL_last_uni = NULL;
5030 if (CopLINE(PL_curcop) == 1) {
5031 while (s < PL_bufend && isSPACE(*s))
5033 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5037 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5041 if (*s == '#' && *(s+1) == '!')
5043 #ifdef ALTERNATE_SHEBANG
5045 static char const as[] = ALTERNATE_SHEBANG;
5046 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5047 d = s + (sizeof(as) - 1);
5049 #endif /* ALTERNATE_SHEBANG */
5058 while (*d && !isSPACE(*d))
5062 #ifdef ARG_ZERO_IS_SCRIPT
5063 if (ipathend > ipath) {
5065 * HP-UX (at least) sets argv[0] to the script name,
5066 * which makes $^X incorrect. And Digital UNIX and Linux,
5067 * at least, set argv[0] to the basename of the Perl
5068 * interpreter. So, having found "#!", we'll set it right.
5070 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5072 assert(SvPOK(x) || SvGMAGICAL(x));
5073 if (sv_eq(x, CopFILESV(PL_curcop))) {
5074 sv_setpvn(x, ipath, ipathend - ipath);
5080 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5081 const char * const lstart = SvPV_const(x,llen);
5083 bstart += blen - llen;
5084 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5085 sv_setpvn(x, ipath, ipathend - ipath);
5090 TAINT_NOT; /* $^X is always tainted, but that's OK */
5092 #endif /* ARG_ZERO_IS_SCRIPT */
5097 d = instr(s,"perl -");
5099 d = instr(s,"perl");
5101 /* avoid getting into infinite loops when shebang
5102 * line contains "Perl" rather than "perl" */
5104 for (d = ipathend-4; d >= ipath; --d) {
5105 if ((*d == 'p' || *d == 'P')
5106 && !ibcmp(d, "perl", 4))
5116 #ifdef ALTERNATE_SHEBANG
5118 * If the ALTERNATE_SHEBANG on this system starts with a
5119 * character that can be part of a Perl expression, then if
5120 * we see it but not "perl", we're probably looking at the
5121 * start of Perl code, not a request to hand off to some
5122 * other interpreter. Similarly, if "perl" is there, but
5123 * not in the first 'word' of the line, we assume the line
5124 * contains the start of the Perl program.
5126 if (d && *s != '#') {
5127 const char *c = ipath;
5128 while (*c && !strchr("; \t\r\n\f\v#", *c))
5131 d = NULL; /* "perl" not in first word; ignore */
5133 *s = '#'; /* Don't try to parse shebang line */
5135 #endif /* ALTERNATE_SHEBANG */
5140 !instr(s,"indir") &&
5141 instr(PL_origargv[0],"perl"))
5148 while (s < PL_bufend && isSPACE(*s))
5150 if (s < PL_bufend) {
5151 Newx(newargv,PL_origargc+3,char*);
5153 while (s < PL_bufend && !isSPACE(*s))
5156 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5159 newargv = PL_origargv;
5162 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5164 Perl_croak(aTHX_ "Can't exec %s", ipath);
5167 while (*d && !isSPACE(*d))
5169 while (SPACE_OR_TAB(*d))
5173 const bool switches_done = PL_doswitches;
5174 const U32 oldpdb = PL_perldb;
5175 const bool oldn = PL_minus_n;
5176 const bool oldp = PL_minus_p;
5180 bool baduni = FALSE;
5182 const char *d2 = d1 + 1;
5183 if (parse_unicode_opts((const char **)&d2)
5187 if (baduni || *d1 == 'M' || *d1 == 'm') {
5188 const char * const m = d1;
5189 while (*d1 && !isSPACE(*d1))
5191 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5194 d1 = moreswitches(d1);
5196 if (PL_doswitches && !switches_done) {
5197 int argc = PL_origargc;
5198 char **argv = PL_origargv;
5201 } while (argc && argv[0][0] == '-' && argv[0][1]);
5202 init_argv_symbols(argc,argv);
5204 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5205 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5206 /* if we have already added "LINE: while (<>) {",
5207 we must not do it again */
5209 sv_setpvs(PL_linestr, "");
5210 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5211 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5212 PL_last_lop = PL_last_uni = NULL;
5213 PL_preambled = FALSE;
5214 if (PERLDB_LINE || PERLDB_SAVESRC)
5215 (void)gv_fetchfile(PL_origfilename);
5222 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5223 PL_lex_state = LEX_FORMLINE;
5224 start_force(PL_curforce);
5225 NEXTVAL_NEXTTOKE.ival = 0;
5226 force_next(FORMRBRACK);
5231 #ifdef PERL_STRICT_CR
5232 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5234 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5236 case ' ': case '\t': case '\f': case 013:
5238 PL_realtokenstart = -1;
5240 PL_thiswhite = newSVpvs("");
5241 sv_catpvn(PL_thiswhite, s, 1);
5248 PL_realtokenstart = -1;
5252 if (PL_lex_state != LEX_NORMAL ||
5253 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5254 if (*s == '#' && s == PL_linestart && PL_in_eval
5255 && !PL_rsfp && !PL_parser->filtered) {
5256 /* handle eval qq[#line 1 "foo"\n ...] */
5257 CopLINE_dec(PL_curcop);
5260 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5262 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5267 while (d < PL_bufend && *d != '\n')
5271 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5272 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5276 PL_thiswhite = newSVpvn(s, d - s);
5281 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5282 PL_lex_state = LEX_FORMLINE;
5283 start_force(PL_curforce);
5284 NEXTVAL_NEXTTOKE.ival = 0;
5285 force_next(FORMRBRACK);
5291 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5292 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5295 TOKEN(PEG); /* make sure any #! line is accessible */
5300 /* if (PL_madskills && PL_lex_formbrack) { */
5302 while (d < PL_bufend && *d != '\n')
5306 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5307 Perl_croak(aTHX_ "panic: input overflow");
5308 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5310 PL_thiswhite = newSVpvs("");
5311 if (CopLINE(PL_curcop) == 1) {
5312 sv_setpvs(PL_thiswhite, "");
5315 sv_catpvn(PL_thiswhite, s, d - s);
5329 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5337 while (s < PL_bufend && SPACE_OR_TAB(*s))
5340 if (strnEQ(s,"=>",2)) {
5341 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5342 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5343 OPERATOR('-'); /* unary minus */
5345 PL_last_uni = PL_oldbufptr;
5347 case 'r': ftst = OP_FTEREAD; break;
5348 case 'w': ftst = OP_FTEWRITE; break;
5349 case 'x': ftst = OP_FTEEXEC; break;
5350 case 'o': ftst = OP_FTEOWNED; break;
5351 case 'R': ftst = OP_FTRREAD; break;
5352 case 'W': ftst = OP_FTRWRITE; break;
5353 case 'X': ftst = OP_FTREXEC; break;
5354 case 'O': ftst = OP_FTROWNED; break;
5355 case 'e': ftst = OP_FTIS; break;
5356 case 'z': ftst = OP_FTZERO; break;
5357 case 's': ftst = OP_FTSIZE; break;
5358 case 'f': ftst = OP_FTFILE; break;
5359 case 'd': ftst = OP_FTDIR; break;
5360 case 'l': ftst = OP_FTLINK; break;
5361 case 'p': ftst = OP_FTPIPE; break;
5362 case 'S': ftst = OP_FTSOCK; break;
5363 case 'u': ftst = OP_FTSUID; break;
5364 case 'g': ftst = OP_FTSGID; break;
5365 case 'k': ftst = OP_FTSVTX; break;
5366 case 'b': ftst = OP_FTBLK; break;
5367 case 'c': ftst = OP_FTCHR; break;
5368 case 't': ftst = OP_FTTTY; break;
5369 case 'T': ftst = OP_FTTEXT; break;
5370 case 'B': ftst = OP_FTBINARY; break;
5371 case 'M': case 'A': case 'C':
5372 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5374 case 'M': ftst = OP_FTMTIME; break;
5375 case 'A': ftst = OP_FTATIME; break;
5376 case 'C': ftst = OP_FTCTIME; break;
5384 PL_last_lop_op = (OPCODE)ftst;
5385 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5386 "### Saw file test %c\n", (int)tmp);
5391 /* Assume it was a minus followed by a one-letter named
5392 * subroutine call (or a -bareword), then. */
5393 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5394 "### '-%c' looked like a file test but was not\n",
5401 const char tmp = *s++;
5404 if (PL_expect == XOPERATOR)
5409 else if (*s == '>') {
5412 if (isIDFIRST_lazy_if(s,UTF)) {
5413 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5421 if (PL_expect == XOPERATOR) {
5422 if (*s == '=' && !PL_lex_allbrackets &&
5423 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5430 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5432 OPERATOR('-'); /* unary minus */
5438 const char tmp = *s++;
5441 if (PL_expect == XOPERATOR)
5446 if (PL_expect == XOPERATOR) {
5447 if (*s == '=' && !PL_lex_allbrackets &&
5448 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5455 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5462 if (PL_expect != XOPERATOR) {
5463 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5464 PL_expect = XOPERATOR;
5465 force_ident(PL_tokenbuf, '*');
5473 if (*s == '=' && !PL_lex_allbrackets &&
5474 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5480 if (*s == '=' && !PL_lex_allbrackets &&
5481 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5488 if (PL_expect == XOPERATOR) {
5489 if (s[1] == '=' && !PL_lex_allbrackets &&
5490 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5495 PL_tokenbuf[0] = '%';
5496 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5497 sizeof PL_tokenbuf - 1, FALSE);
5498 if (!PL_tokenbuf[1]) {
5501 PL_pending_ident = '%';
5505 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5506 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5511 if (PL_lex_brackets > 100)
5512 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5513 PL_lex_brackstack[PL_lex_brackets++] = 0;
5514 PL_lex_allbrackets++;
5516 const char tmp = *s++;
5521 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5523 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5531 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5538 goto just_a_word_zero_gv;
5541 switch (PL_expect) {
5547 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5549 PL_bufptr = s; /* update in case we back off */
5552 "Use of := for an empty attribute list is not allowed");
5559 PL_expect = XTERMBLOCK;
5562 stuffstart = s - SvPVX(PL_linestr) - 1;
5566 while (isIDFIRST_lazy_if(s,UTF)) {
5569 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5570 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5571 if (tmp < 0) tmp = -tmp;
5586 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5588 d = scan_str(d,TRUE,TRUE,FALSE);
5590 /* MUST advance bufptr here to avoid bogus
5591 "at end of line" context messages from yyerror().
5593 PL_bufptr = s + len;
5594 yyerror("Unterminated attribute parameter in attribute list");
5598 return REPORT(0); /* EOF indicator */
5602 sv_catsv(sv, PL_lex_stuff);
5603 attrs = op_append_elem(OP_LIST, attrs,
5604 newSVOP(OP_CONST, 0, sv));
5605 SvREFCNT_dec(PL_lex_stuff);
5606 PL_lex_stuff = NULL;
5609 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5611 if (PL_in_my == KEY_our) {
5612 deprecate(":unique");
5615 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5618 /* NOTE: any CV attrs applied here need to be part of
5619 the CVf_BUILTIN_ATTRS define in cv.h! */
5620 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5622 CvLVALUE_on(PL_compcv);
5624 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5626 deprecate(":locked");
5628 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5630 CvMETHOD_on(PL_compcv);
5632 /* After we've set the flags, it could be argued that
5633 we don't need to do the attributes.pm-based setting
5634 process, and shouldn't bother appending recognized
5635 flags. To experiment with that, uncomment the
5636 following "else". (Note that's already been
5637 uncommented. That keeps the above-applied built-in
5638 attributes from being intercepted (and possibly
5639 rejected) by a package's attribute routines, but is
5640 justified by the performance win for the common case
5641 of applying only built-in attributes.) */
5643 attrs = op_append_elem(OP_LIST, attrs,
5644 newSVOP(OP_CONST, 0,
5648 if (*s == ':' && s[1] != ':')
5651 break; /* require real whitespace or :'s */
5652 /* XXX losing whitespace on sequential attributes here */
5656 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5657 if (*s != ';' && *s != '}' && *s != tmp
5658 && (tmp != '=' || *s != ')')) {
5659 const char q = ((*s == '\'') ? '"' : '\'');
5660 /* If here for an expression, and parsed no attrs, back
5662 if (tmp == '=' && !attrs) {
5666 /* MUST advance bufptr here to avoid bogus "at end of line"
5667 context messages from yyerror().
5670 yyerror( (const char *)
5672 ? Perl_form(aTHX_ "Invalid separator character "
5673 "%c%c%c in attribute list", q, *s, q)
5674 : "Unterminated attribute list" ) );
5682 start_force(PL_curforce);
5683 NEXTVAL_NEXTTOKE.opval = attrs;
5684 CURMAD('_', PL_nextwhite);
5689 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5690 (s - SvPVX(PL_linestr)) - stuffstart);
5695 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5699 PL_lex_allbrackets--;
5703 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5704 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5708 PL_lex_allbrackets++;
5711 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5717 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5720 PL_lex_allbrackets--;
5726 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5729 if (PL_lex_brackets <= 0)
5730 yyerror("Unmatched right square bracket");
5733 PL_lex_allbrackets--;
5734 if (PL_lex_state == LEX_INTERPNORMAL) {
5735 if (PL_lex_brackets == 0) {
5736 if (*s == '-' && s[1] == '>')
5737 PL_lex_state = LEX_INTERPENDMAYBE;
5738 else if (*s != '[' && *s != '{')
5739 PL_lex_state = LEX_INTERPEND;
5746 if (PL_lex_brackets > 100) {
5747 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5749 switch (PL_expect) {
5751 if (PL_oldoldbufptr == PL_last_lop)
5752 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5754 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5755 PL_lex_allbrackets++;
5756 OPERATOR(HASHBRACK);
5758 while (s < PL_bufend && SPACE_OR_TAB(*s))
5761 PL_tokenbuf[0] = '\0';
5762 if (d < PL_bufend && *d == '-') {
5763 PL_tokenbuf[0] = '-';
5765 while (d < PL_bufend && SPACE_OR_TAB(*d))
5768 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5769 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5771 while (d < PL_bufend && SPACE_OR_TAB(*d))
5774 const char minus = (PL_tokenbuf[0] == '-');
5775 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5783 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5784 PL_lex_allbrackets++;
5789 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5790 PL_lex_allbrackets++;
5795 if (PL_oldoldbufptr == PL_last_lop)
5796 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5798 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5799 PL_lex_allbrackets++;
5802 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5804 /* This hack is to get the ${} in the message. */
5806 yyerror("syntax error");
5809 OPERATOR(HASHBRACK);
5811 /* This hack serves to disambiguate a pair of curlies
5812 * as being a block or an anon hash. Normally, expectation
5813 * determines that, but in cases where we're not in a
5814 * position to expect anything in particular (like inside
5815 * eval"") we have to resolve the ambiguity. This code
5816 * covers the case where the first term in the curlies is a
5817 * quoted string. Most other cases need to be explicitly
5818 * disambiguated by prepending a "+" before the opening
5819 * curly in order to force resolution as an anon hash.
5821 * XXX should probably propagate the outer expectation
5822 * into eval"" to rely less on this hack, but that could
5823 * potentially break current behavior of eval"".
5827 if (*s == '\'' || *s == '"' || *s == '`') {
5828 /* common case: get past first string, handling escapes */
5829 for (t++; t < PL_bufend && *t != *s;)
5830 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5834 else if (*s == 'q') {
5837 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5840 /* skip q//-like construct */
5842 char open, close, term;
5845 while (t < PL_bufend && isSPACE(*t))
5847 /* check for q => */
5848 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5849 OPERATOR(HASHBRACK);
5853 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5857 for (t++; t < PL_bufend; t++) {
5858 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5860 else if (*t == open)
5864 for (t++; t < PL_bufend; t++) {
5865 if (*t == '\\' && t+1 < PL_bufend)
5867 else if (*t == close && --brackets <= 0)
5869 else if (*t == open)
5876 /* skip plain q word */
5877 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5880 else if (isALNUM_lazy_if(t,UTF)) {
5882 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5885 while (t < PL_bufend && isSPACE(*t))
5887 /* if comma follows first term, call it an anon hash */
5888 /* XXX it could be a comma expression with loop modifiers */
5889 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5890 || (*t == '=' && t[1] == '>')))
5891 OPERATOR(HASHBRACK);
5892 if (PL_expect == XREF)
5895 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5901 pl_yylval.ival = CopLINE(PL_curcop);
5902 if (isSPACE(*s) || *s == '#')
5903 PL_copline = NOLINE; /* invalidate current command line number */
5904 TOKEN(formbrack ? '=' : '{');
5906 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5910 if (PL_lex_brackets <= 0)
5911 yyerror("Unmatched right curly bracket");
5913 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5914 PL_lex_allbrackets--;
5915 if (PL_lex_state == LEX_INTERPNORMAL) {
5916 if (PL_lex_brackets == 0) {
5917 if (PL_expect & XFAKEBRACK) {
5918 PL_expect &= XENUMMASK;
5919 PL_lex_state = LEX_INTERPEND;
5924 PL_thiswhite = newSVpvs("");
5925 sv_catpvs(PL_thiswhite,"}");
5928 return yylex(); /* ignore fake brackets */
5930 if (*s == '-' && s[1] == '>')
5931 PL_lex_state = LEX_INTERPENDMAYBE;
5932 else if (*s != '[' && *s != '{')
5933 PL_lex_state = LEX_INTERPEND;
5936 if (PL_expect & XFAKEBRACK) {
5937 PL_expect &= XENUMMASK;
5939 return yylex(); /* ignore fake brackets */
5941 start_force(PL_curforce);
5943 curmad('X', newSVpvn(s-1,1));
5944 CURMAD('_', PL_thiswhite);
5946 force_next(formbrack ? '.' : '}');
5947 if (formbrack) LEAVE;
5950 PL_thistoken = newSVpvs("");
5952 if (formbrack == 2) { /* means . where arguments were expected */
5953 start_force(PL_curforce);
5961 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5962 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5969 if (PL_expect == XOPERATOR) {
5970 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5971 && isIDFIRST_lazy_if(s,UTF))
5973 CopLINE_dec(PL_curcop);
5974 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5975 CopLINE_inc(PL_curcop);
5977 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5978 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5985 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5987 PL_expect = XOPERATOR;
5988 force_ident(PL_tokenbuf, '&');
5992 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5998 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5999 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6006 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6007 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6015 const char tmp = *s++;
6017 if (!PL_lex_allbrackets &&
6018 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6025 if (!PL_lex_allbrackets &&
6026 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6034 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6035 && strchr("+-*/%.^&|<",tmp))
6036 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6037 "Reversed %c= operator",(int)tmp);
6039 if (PL_expect == XSTATE && isALPHA(tmp) &&
6040 (s == PL_linestart+1 || s[-2] == '\n') )
6042 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6043 || PL_lex_state != LEX_NORMAL) {
6048 if (strnEQ(s,"=cut",4)) {
6064 PL_thiswhite = newSVpvs("");
6065 sv_catpvn(PL_thiswhite, PL_linestart,
6066 PL_bufend - PL_linestart);
6070 PL_parser->in_pod = 1;
6074 if (PL_expect == XBLOCK) {
6076 #ifdef PERL_STRICT_CR
6077 while (SPACE_OR_TAB(*t))
6079 while (SPACE_OR_TAB(*t) || *t == '\r')
6082 if (*t == '\n' || *t == '#') {
6085 SAVEI8(PL_parser->form_lex_state);
6086 SAVEI32(PL_lex_formbrack);
6087 PL_parser->form_lex_state = PL_lex_state;
6088 PL_lex_formbrack = PL_lex_brackets + 1;
6092 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6101 const char tmp = *s++;
6103 /* was this !=~ where !~ was meant?
6104 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6106 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6107 const char *t = s+1;
6109 while (t < PL_bufend && isSPACE(*t))
6112 if (*t == '/' || *t == '?' ||
6113 ((*t == 'm' || *t == 's' || *t == 'y')
6114 && !isALNUM(t[1])) ||
6115 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6117 "!=~ should be !~");
6119 if (!PL_lex_allbrackets &&
6120 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6132 if (PL_expect != XOPERATOR) {
6133 if (s[1] != '<' && !strchr(s,'>'))
6136 s = scan_heredoc(s);
6138 s = scan_inputsymbol(s);
6139 TERM(sublex_start());
6145 if (*s == '=' && !PL_lex_allbrackets &&
6146 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6150 SHop(OP_LEFT_SHIFT);
6155 if (!PL_lex_allbrackets &&
6156 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6163 if (!PL_lex_allbrackets &&
6164 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6172 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6180 const char tmp = *s++;
6182 if (*s == '=' && !PL_lex_allbrackets &&
6183 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6187 SHop(OP_RIGHT_SHIFT);
6189 else if (tmp == '=') {
6190 if (!PL_lex_allbrackets &&
6191 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6199 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6208 if (PL_expect == XOPERATOR) {
6209 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6210 return deprecate_commaless_var_list();
6214 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6215 PL_tokenbuf[0] = '@';
6216 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6217 sizeof PL_tokenbuf - 1, FALSE);
6218 if (PL_expect == XOPERATOR)
6219 no_op("Array length", s);
6220 if (!PL_tokenbuf[1])
6222 PL_expect = XOPERATOR;
6223 PL_pending_ident = '#';
6227 PL_tokenbuf[0] = '$';
6228 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6229 sizeof PL_tokenbuf - 1, FALSE);
6230 if (PL_expect == XOPERATOR)
6232 if (!PL_tokenbuf[1]) {
6234 yyerror("Final $ should be \\$ or $name");
6240 const char tmp = *s;
6241 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6244 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6245 && intuit_more(s)) {
6247 PL_tokenbuf[0] = '@';
6248 if (ckWARN(WARN_SYNTAX)) {
6251 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6254 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6255 while (t < PL_bufend && *t != ']')
6257 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6258 "Multidimensional syntax %.*s not supported",
6259 (int)((t - PL_bufptr) + 1), PL_bufptr);
6263 else if (*s == '{') {
6265 PL_tokenbuf[0] = '%';
6266 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6267 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6269 char tmpbuf[sizeof PL_tokenbuf];
6272 } while (isSPACE(*t));
6273 if (isIDFIRST_lazy_if(t,UTF)) {
6275 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6280 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6281 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6282 "You need to quote \"%"SVf"\"",
6283 SVfARG(newSVpvn_flags(tmpbuf, len,
6284 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6290 PL_expect = XOPERATOR;
6291 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6292 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6293 if (!islop || PL_last_lop_op == OP_GREPSTART)
6294 PL_expect = XOPERATOR;
6295 else if (strchr("$@\"'`q", *s))
6296 PL_expect = XTERM; /* e.g. print $fh "foo" */
6297 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6298 PL_expect = XTERM; /* e.g. print $fh &sub */
6299 else if (isIDFIRST_lazy_if(s,UTF)) {
6300 char tmpbuf[sizeof PL_tokenbuf];
6302 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6303 if ((t2 = keyword(tmpbuf, len, 0))) {
6304 /* binary operators exclude handle interpretations */
6316 PL_expect = XTERM; /* e.g. print $fh length() */
6321 PL_expect = XTERM; /* e.g. print $fh subr() */
6324 else if (isDIGIT(*s))
6325 PL_expect = XTERM; /* e.g. print $fh 3 */
6326 else if (*s == '.' && isDIGIT(s[1]))
6327 PL_expect = XTERM; /* e.g. print $fh .3 */
6328 else if ((*s == '?' || *s == '-' || *s == '+')
6329 && !isSPACE(s[1]) && s[1] != '=')
6330 PL_expect = XTERM; /* e.g. print $fh -1 */
6331 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6333 PL_expect = XTERM; /* e.g. print $fh /.../
6334 XXX except DORDOR operator
6336 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6338 PL_expect = XTERM; /* print $fh <<"EOF" */
6341 PL_pending_ident = '$';
6345 if (PL_expect == XOPERATOR)
6347 PL_tokenbuf[0] = '@';
6348 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6349 if (!PL_tokenbuf[1]) {
6352 if (PL_lex_state == LEX_NORMAL)
6354 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6356 PL_tokenbuf[0] = '%';
6358 /* Warn about @ where they meant $. */
6359 if (*s == '[' || *s == '{') {
6360 if (ckWARN(WARN_SYNTAX)) {
6361 const char *t = s + 1;
6362 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6363 t += UTF ? UTF8SKIP(t) : 1;
6364 if (*t == '}' || *t == ']') {
6366 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6367 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6368 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6369 "Scalar value %"SVf" better written as $%"SVf,
6370 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6371 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6372 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6373 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6378 PL_pending_ident = '@';
6381 case '/': /* may be division, defined-or, or pattern */
6382 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6383 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6384 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6389 case '?': /* may either be conditional or pattern */
6390 if (PL_expect == XOPERATOR) {
6393 if (!PL_lex_allbrackets &&
6394 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6398 PL_lex_allbrackets++;
6404 /* A // operator. */
6405 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6406 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6407 LEX_FAKEEOF_LOGIC)) {
6415 if (*s == '=' && !PL_lex_allbrackets &&
6416 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6425 /* Disable warning on "study /blah/" */
6426 if (PL_oldoldbufptr == PL_last_uni
6427 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6428 || memNE(PL_last_uni, "study", 5)
6429 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6433 deprecate("?PATTERN? without explicit operator");
6434 s = scan_pat(s,OP_MATCH);
6435 TERM(sublex_start());
6439 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6440 #ifdef PERL_STRICT_CR
6443 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6445 && (s == PL_linestart || s[-1] == '\n') )
6448 formbrack = 2; /* dot seen where arguments expected */
6451 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6455 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6458 if (!PL_lex_allbrackets &&
6459 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6466 pl_yylval.ival = OPf_SPECIAL;
6472 if (*s == '=' && !PL_lex_allbrackets &&
6473 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6480 case '0': case '1': case '2': case '3': case '4':
6481 case '5': case '6': case '7': case '8': case '9':
6482 s = scan_num(s, &pl_yylval);
6483 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6484 if (PL_expect == XOPERATOR)
6489 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6490 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6491 if (PL_expect == XOPERATOR) {
6492 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6493 return deprecate_commaless_var_list();
6500 pl_yylval.ival = OP_CONST;
6501 TERM(sublex_start());
6504 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6505 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6506 if (PL_expect == XOPERATOR) {
6507 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6508 return deprecate_commaless_var_list();
6515 pl_yylval.ival = OP_CONST;
6516 /* FIXME. I think that this can be const if char *d is replaced by
6517 more localised variables. */
6518 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6519 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6520 pl_yylval.ival = OP_STRINGIFY;
6524 TERM(sublex_start());
6527 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6528 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6529 if (PL_expect == XOPERATOR)
6530 no_op("Backticks",s);
6533 readpipe_override();
6534 TERM(sublex_start());
6538 if (PL_lex_inwhat && isDIGIT(*s))
6539 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6541 if (PL_expect == XOPERATOR)
6542 no_op("Backslash",s);
6546 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6547 char *start = s + 2;
6548 while (isDIGIT(*start) || *start == '_')
6550 if (*start == '.' && isDIGIT(start[1])) {
6551 s = scan_num(s, &pl_yylval);
6554 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6555 else if (!isALPHA(*start) && (PL_expect == XTERM
6556 || PL_expect == XREF || PL_expect == XSTATE
6557 || PL_expect == XTERMORDORDOR)) {
6558 GV *const gv = gv_fetchpvn_flags(s, start - s,
6559 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6561 s = scan_num(s, &pl_yylval);
6568 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6611 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6613 /* Some keywords can be followed by any delimiter, including ':' */
6614 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6616 /* x::* is just a word, unless x is "CORE" */
6617 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6621 while (d < PL_bufend && isSPACE(*d))
6622 d++; /* no comments skipped here, or s### is misparsed */
6624 /* Is this a word before a => operator? */
6625 if (*d == '=' && d[1] == '>') {
6628 = (OP*)newSVOP(OP_CONST, 0,
6629 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6630 pl_yylval.opval->op_private = OPpCONST_BARE;
6634 /* Check for plugged-in keyword */
6638 char *saved_bufptr = PL_bufptr;
6640 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6642 if (result == KEYWORD_PLUGIN_DECLINE) {
6643 /* not a plugged-in keyword */
6644 PL_bufptr = saved_bufptr;
6645 } else if (result == KEYWORD_PLUGIN_STMT) {
6646 pl_yylval.opval = o;
6649 return REPORT(PLUGSTMT);
6650 } else if (result == KEYWORD_PLUGIN_EXPR) {
6651 pl_yylval.opval = o;
6653 PL_expect = XOPERATOR;
6654 return REPORT(PLUGEXPR);
6656 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6661 /* Check for built-in keyword */
6662 tmp = keyword(PL_tokenbuf, len, 0);
6664 /* Is this a label? */
6665 if (!anydelim && PL_expect == XSTATE
6666 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6668 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6669 newSVpvn_flags(PL_tokenbuf,
6670 len, UTF ? SVf_UTF8 : 0));
6675 if (tmp < 0) { /* second-class keyword? */
6676 GV *ogv = NULL; /* override (winner) */
6677 GV *hgv = NULL; /* hidden (loser) */
6678 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6680 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6681 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6684 if (GvIMPORTED_CV(gv))
6686 else if (! CvMETHOD(cv))
6690 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6691 UTF ? -(I32)len : (I32)len, FALSE)) &&
6692 (gv = *gvp) && isGV_with_GP(gv) &&
6693 GvCVu(gv) && GvIMPORTED_CV(gv))
6700 tmp = 0; /* overridden by import or by GLOBAL */
6703 && -tmp==KEY_lock /* XXX generalizable kludge */
6706 tmp = 0; /* any sub overrides "weak" keyword */
6708 else { /* no override */
6710 if (tmp == KEY_dump) {
6711 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6712 "dump() better written as CORE::dump()");
6716 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6717 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6718 "Ambiguous call resolved as CORE::%s(), "
6719 "qualify as such or use &",
6727 default: /* not a keyword */
6728 /* Trade off - by using this evil construction we can pull the
6729 variable gv into the block labelled keylookup. If not, then
6730 we have to give it function scope so that the goto from the
6731 earlier ':' case doesn't bypass the initialisation. */
6733 just_a_word_zero_gv:
6741 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6745 SV *nextPL_nextwhite = 0;
6749 /* Get the rest if it looks like a package qualifier */
6751 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6753 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6756 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6757 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6758 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6759 *s == '\'' ? "'" : "::");
6764 if (PL_expect == XOPERATOR) {
6765 if (PL_bufptr == PL_linestart) {
6766 CopLINE_dec(PL_curcop);
6767 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6768 CopLINE_inc(PL_curcop);
6771 no_op("Bareword",s);
6774 /* Look for a subroutine with this name in current package,
6775 unless name is "Foo::", in which case Foo is a bareword
6776 (and a package name). */
6778 if (len > 2 && !PL_madskills &&
6779 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6781 if (ckWARN(WARN_BAREWORD)
6782 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6783 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6784 "Bareword \"%"SVf"\" refers to nonexistent package",
6785 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6786 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6788 PL_tokenbuf[len] = '\0';
6794 /* Mustn't actually add anything to a symbol table.
6795 But also don't want to "initialise" any placeholder
6796 constants that might already be there into full
6797 blown PVGVs with attached PVCV. */
6798 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6799 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6805 /* if we saw a global override before, get the right name */
6807 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6808 len ? len : strlen(PL_tokenbuf));
6810 SV * const tmp_sv = sv;
6811 sv = newSVpvs("CORE::GLOBAL::");
6812 sv_catsv(sv, tmp_sv);
6813 SvREFCNT_dec(tmp_sv);
6817 if (PL_madskills && !PL_thistoken) {
6818 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6819 PL_thistoken = newSVpvn(start,s - start);
6820 PL_realtokenstart = s - SvPVX(PL_linestr);
6824 /* Presume this is going to be a bareword of some sort. */
6826 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6827 pl_yylval.opval->op_private = OPpCONST_BARE;
6829 /* And if "Foo::", then that's what it certainly is. */
6834 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6835 const_op->op_private = OPpCONST_BARE;
6836 rv2cv_op = newCVREF(0, const_op);
6838 cv = rv2cv_op_cv(rv2cv_op, 0);
6840 /* See if it's the indirect object for a list operator. */
6842 if (PL_oldoldbufptr &&
6843 PL_oldoldbufptr < PL_bufptr &&
6844 (PL_oldoldbufptr == PL_last_lop
6845 || PL_oldoldbufptr == PL_last_uni) &&
6846 /* NO SKIPSPACE BEFORE HERE! */
6847 (PL_expect == XREF ||
6848 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6850 bool immediate_paren = *s == '(';
6852 /* (Now we can afford to cross potential line boundary.) */
6853 s = SKIPSPACE2(s,nextPL_nextwhite);
6855 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6858 /* Two barewords in a row may indicate method call. */
6860 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6861 (tmp = intuit_method(s, gv, cv))) {
6863 if (tmp == METHOD && !PL_lex_allbrackets &&
6864 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6865 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6869 /* If not a declared subroutine, it's an indirect object. */
6870 /* (But it's an indir obj regardless for sort.) */
6871 /* Also, if "_" follows a filetest operator, it's a bareword */
6874 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6876 (PL_last_lop_op != OP_MAPSTART &&
6877 PL_last_lop_op != OP_GREPSTART))))
6878 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6879 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6882 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6887 PL_expect = XOPERATOR;
6890 s = SKIPSPACE2(s,nextPL_nextwhite);
6891 PL_nextwhite = nextPL_nextwhite;
6896 /* Is this a word before a => operator? */
6897 if (*s == '=' && s[1] == '>' && !pkgname) {
6900 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6901 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6902 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6906 /* If followed by a paren, it's certainly a subroutine. */
6911 while (SPACE_OR_TAB(*d))
6913 if (*d == ')' && (sv = cv_const_sv(cv))) {
6920 PL_nextwhite = PL_thiswhite;
6923 start_force(PL_curforce);
6925 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6926 PL_expect = XOPERATOR;
6929 PL_nextwhite = nextPL_nextwhite;
6930 curmad('X', PL_thistoken);
6931 PL_thistoken = newSVpvs("");
6940 /* If followed by var or block, call it a method (unless sub) */
6942 if ((*s == '$' || *s == '{') && !cv) {
6944 PL_last_lop = PL_oldbufptr;
6945 PL_last_lop_op = OP_METHOD;
6946 if (!PL_lex_allbrackets &&
6947 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6948 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6952 /* If followed by a bareword, see if it looks like indir obj. */
6955 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6956 && (tmp = intuit_method(s, gv, cv))) {
6958 if (tmp == METHOD && !PL_lex_allbrackets &&
6959 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6960 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6964 /* Not a method, so call it a subroutine (if defined) */
6967 if (lastchar == '-') {
6968 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
6969 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6970 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
6971 SVfARG(tmpsv), SVfARG(tmpsv));
6973 /* Check for a constant sub */
6974 if ((sv = cv_const_sv(cv))) {
6977 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6978 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6979 pl_yylval.opval->op_private = OPpCONST_FOLDED;
6980 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6984 op_free(pl_yylval.opval);
6985 pl_yylval.opval = rv2cv_op;
6986 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6987 PL_last_lop = PL_oldbufptr;
6988 PL_last_lop_op = OP_ENTERSUB;
6989 /* Is there a prototype? */
6996 STRLEN protolen = CvPROTOLEN(cv);
6997 const char *proto = CvPROTO(cv);
7001 if ((optional = *proto == ';'))
7004 while (*proto == ';');
7008 *proto == '$' || *proto == '_'
7009 || *proto == '*' || *proto == '+'
7014 *proto == '\\' && proto[1] && proto[2] == '\0'
7017 UNIPROTO(UNIOPSUB,optional);
7018 if (*proto == '\\' && proto[1] == '[') {
7019 const char *p = proto + 2;
7020 while(*p && *p != ']')
7022 if(*p == ']' && !p[1])
7023 UNIPROTO(UNIOPSUB,optional);
7025 if (*proto == '&' && *s == '{') {
7027 sv_setpvs(PL_subname, "__ANON__");
7029 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7030 if (!PL_lex_allbrackets &&
7031 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7032 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7039 PL_nextwhite = PL_thiswhite;
7042 start_force(PL_curforce);
7043 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7046 PL_nextwhite = nextPL_nextwhite;
7047 curmad('X', PL_thistoken);
7048 PL_thistoken = newSVpvs("");
7051 if (!PL_lex_allbrackets &&
7052 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7053 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7058 /* Guess harder when madskills require "best effort". */
7059 if (PL_madskills && (!gv || !GvCVu(gv))) {
7060 int probable_sub = 0;
7061 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7063 else if (isALPHA(*s)) {
7067 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7068 if (!keyword(tmpbuf, tmplen, 0))
7071 while (d < PL_bufend && isSPACE(*d))
7073 if (*d == '=' && d[1] == '>')
7078 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7080 op_free(pl_yylval.opval);
7081 pl_yylval.opval = rv2cv_op;
7082 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7083 PL_last_lop = PL_oldbufptr;
7084 PL_last_lop_op = OP_ENTERSUB;
7085 PL_nextwhite = PL_thiswhite;
7087 start_force(PL_curforce);
7088 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7090 PL_nextwhite = nextPL_nextwhite;
7091 curmad('X', PL_thistoken);
7092 PL_thistoken = newSVpvs("");
7094 if (!PL_lex_allbrackets &&
7095 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7096 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7100 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7103 if (!PL_lex_allbrackets &&
7104 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7105 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7110 /* Call it a bare word */
7112 if (PL_hints & HINT_STRICT_SUBS)
7113 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7116 /* after "print" and similar functions (corresponding to
7117 * "F? L" in opcode.pl), whatever wasn't already parsed as
7118 * a filehandle should be subject to "strict subs".
7119 * Likewise for the optional indirect-object argument to system
7120 * or exec, which can't be a bareword */
7121 if ((PL_last_lop_op == OP_PRINT
7122 || PL_last_lop_op == OP_PRTF
7123 || PL_last_lop_op == OP_SAY
7124 || PL_last_lop_op == OP_SYSTEM
7125 || PL_last_lop_op == OP_EXEC)
7126 && (PL_hints & HINT_STRICT_SUBS))
7127 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7128 if (lastchar != '-') {
7129 if (ckWARN(WARN_RESERVED)) {
7133 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7134 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7142 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7143 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7144 "Operator or semicolon missing before %c%"SVf,
7145 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7146 strlen(PL_tokenbuf),
7147 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7148 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7149 "Ambiguous use of %c resolved as operator %c",
7150 lastchar, lastchar);
7157 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7162 (OP*)newSVOP(OP_CONST, 0,
7163 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7166 case KEY___PACKAGE__:
7168 (OP*)newSVOP(OP_CONST, 0,
7170 ? newSVhek(HvNAME_HEK(PL_curstash))
7177 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7178 const char *pname = "main";
7181 if (PL_tokenbuf[2] == 'D')
7184 PL_curstash ? PL_curstash : PL_defstash;
7185 pname = HvNAME_get(stash);
7186 plen = HvNAMELEN (stash);
7187 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7189 gv = gv_fetchpvn_flags(
7190 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7191 plen+6, GV_ADD|putf8, SVt_PVIO
7195 GvIOp(gv) = newIO();
7196 IoIFP(GvIOp(gv)) = PL_rsfp;
7197 #if defined(HAS_FCNTL) && defined(F_SETFD)
7199 const int fd = PerlIO_fileno(PL_rsfp);
7200 fcntl(fd,F_SETFD,fd >= 3);
7203 /* Mark this internal pseudo-handle as clean */
7204 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7205 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7206 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7208 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7209 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7210 /* if the script was opened in binmode, we need to revert
7211 * it to text mode for compatibility; but only iff it has CRs
7212 * XXX this is a questionable hack at best. */
7213 if (PL_bufend-PL_bufptr > 2
7214 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7217 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7218 loc = PerlIO_tell(PL_rsfp);
7219 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7222 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7224 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7225 #endif /* NETWARE */
7227 PerlIO_seek(PL_rsfp, loc, 0);
7231 #ifdef PERLIO_LAYERS
7234 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7235 else if (PL_encoding) {
7242 XPUSHs(PL_encoding);
7244 call_method("name", G_SCALAR);
7248 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7249 Perl_form(aTHX_ ":encoding(%"SVf")",
7258 if (PL_realtokenstart >= 0) {
7259 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7261 PL_endwhite = newSVpvs("");
7262 sv_catsv(PL_endwhite, PL_thiswhite);
7264 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7265 PL_realtokenstart = -1;
7267 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7277 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7286 if (PL_expect == XSTATE) {
7293 if (*s == ':' && s[1] == ':') {
7297 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7298 if ((*s == ':' && s[1] == ':')
7299 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7303 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7307 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7308 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7309 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7312 else if (tmp == KEY_require || tmp == KEY_do
7314 /* that's a way to remember we saw "CORE::" */
7327 LOP(OP_ACCEPT,XTERM);
7330 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7335 LOP(OP_ATAN2,XTERM);
7341 LOP(OP_BINMODE,XTERM);
7344 LOP(OP_BLESS,XTERM);
7353 /* We have to disambiguate the two senses of
7354 "continue". If the next token is a '{' then
7355 treat it as the start of a continue block;
7356 otherwise treat it as a control operator.
7366 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7376 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7385 if (!PL_cryptseen) {
7386 PL_cryptseen = TRUE;
7390 LOP(OP_CRYPT,XTERM);
7393 LOP(OP_CHMOD,XTERM);
7396 LOP(OP_CHOWN,XTERM);
7399 LOP(OP_CONNECT,XTERM);
7418 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
7421 if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
7424 if (orig_keyword == KEY_do) {
7433 PL_hints |= HINT_BLOCK_SCOPE;
7443 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7444 STR_WITH_LEN("NDBM_File::"),
7445 STR_WITH_LEN("DB_File::"),
7446 STR_WITH_LEN("GDBM_File::"),
7447 STR_WITH_LEN("SDBM_File::"),
7448 STR_WITH_LEN("ODBM_File::"),
7450 LOP(OP_DBMOPEN,XTERM);
7456 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7463 pl_yylval.ival = CopLINE(PL_curcop);
7467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7481 if (*s == '{') { /* block eval */
7482 PL_expect = XTERMBLOCK;
7483 UNIBRACK(OP_ENTERTRY);
7485 else { /* string eval */
7487 UNIBRACK(OP_ENTEREVAL);
7492 UNIBRACK(-OP_ENTEREVAL);
7506 case KEY_endhostent:
7512 case KEY_endservent:
7515 case KEY_endprotoent:
7526 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7528 pl_yylval.ival = CopLINE(PL_curcop);
7530 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7533 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7536 if ((PL_bufend - p) >= 3 &&
7537 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7539 else if ((PL_bufend - p) >= 4 &&
7540 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7543 if (isIDFIRST_lazy_if(p,UTF)) {
7544 p = scan_ident(p, PL_bufend,
7545 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7549 Perl_croak(aTHX_ "Missing $ on loop variable");
7551 s = SvPVX(PL_linestr) + soff;
7557 LOP(OP_FORMLINE,XTERM);
7566 LOP(OP_FCNTL,XTERM);
7572 LOP(OP_FLOCK,XTERM);
7575 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7580 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7585 LOP(OP_GREPSTART, XREF);
7588 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7603 case KEY_getpriority:
7604 LOP(OP_GETPRIORITY,XTERM);
7606 case KEY_getprotobyname:
7609 case KEY_getprotobynumber:
7610 LOP(OP_GPBYNUMBER,XTERM);
7612 case KEY_getprotoent:
7624 case KEY_getpeername:
7625 UNI(OP_GETPEERNAME);
7627 case KEY_gethostbyname:
7630 case KEY_gethostbyaddr:
7631 LOP(OP_GHBYADDR,XTERM);
7633 case KEY_gethostent:
7636 case KEY_getnetbyname:
7639 case KEY_getnetbyaddr:
7640 LOP(OP_GNBYADDR,XTERM);
7645 case KEY_getservbyname:
7646 LOP(OP_GSBYNAME,XTERM);
7648 case KEY_getservbyport:
7649 LOP(OP_GSBYPORT,XTERM);
7651 case KEY_getservent:
7654 case KEY_getsockname:
7655 UNI(OP_GETSOCKNAME);
7657 case KEY_getsockopt:
7658 LOP(OP_GSOCKOPT,XTERM);
7673 pl_yylval.ival = CopLINE(PL_curcop);
7678 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7686 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7688 pl_yylval.ival = CopLINE(PL_curcop);
7692 LOP(OP_INDEX,XTERM);
7698 LOP(OP_IOCTL,XTERM);
7710 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7727 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7732 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7746 LOP(OP_LISTEN,XTERM);
7755 s = scan_pat(s,OP_MATCH);
7756 TERM(sublex_start());
7759 LOP(OP_MAPSTART, XREF);
7762 LOP(OP_MKDIR,XTERM);
7765 LOP(OP_MSGCTL,XTERM);
7768 LOP(OP_MSGGET,XTERM);
7771 LOP(OP_MSGRCV,XTERM);
7774 LOP(OP_MSGSND,XTERM);
7779 PL_in_my = (U16)tmp;
7781 if (isIDFIRST_lazy_if(s,UTF)) {
7785 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7786 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7788 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7789 if (!PL_in_my_stash) {
7792 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7793 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7796 if (PL_madskills) { /* just add type to declarator token */
7797 sv_catsv(PL_thistoken, PL_nextwhite);
7799 sv_catpvn(PL_thistoken, start, s - start);
7807 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7811 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7816 s = tokenize_use(0, s);
7820 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7823 if (!PL_lex_allbrackets &&
7824 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7825 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7831 if (isIDFIRST_lazy_if(s,UTF)) {
7833 for (d = s; isALNUM_lazy_if(d,UTF);) {
7834 d += UTF ? UTF8SKIP(d) : 1;
7836 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7837 d += UTF ? UTF8SKIP(d) : 1;
7841 for (t=d; isSPACE(*t);)
7843 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7845 && !(t[0] == '=' && t[1] == '>')
7846 && !(t[0] == ':' && t[1] == ':')
7847 && !keyword(s, d-s, 0)
7849 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7850 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7851 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7852 "Precedence problem: open %"SVf" should be open(%"SVf")",
7853 SVfARG(tmpsv), SVfARG(tmpsv));
7859 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7861 pl_yylval.ival = OP_OR;
7871 LOP(OP_OPEN_DIR,XTERM);
7874 checkcomma(s,PL_tokenbuf,"filehandle");
7878 checkcomma(s,PL_tokenbuf,"filehandle");
7897 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7899 s = force_strict_version(s);
7900 PL_lex_expect = XBLOCK;
7904 LOP(OP_PIPE_OP,XTERM);
7907 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7910 pl_yylval.ival = OP_CONST;
7911 TERM(sublex_start());
7918 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7921 PL_expect = XOPERATOR;
7922 if (SvCUR(PL_lex_stuff)) {
7923 int warned_comma = !ckWARN(WARN_QW);
7924 int warned_comment = warned_comma;
7925 d = SvPV_force(PL_lex_stuff, len);
7927 for (; isSPACE(*d) && len; --len, ++d)
7932 if (!warned_comma || !warned_comment) {
7933 for (; !isSPACE(*d) && len; --len, ++d) {
7934 if (!warned_comma && *d == ',') {
7935 Perl_warner(aTHX_ packWARN(WARN_QW),
7936 "Possible attempt to separate words with commas");
7939 else if (!warned_comment && *d == '#') {
7940 Perl_warner(aTHX_ packWARN(WARN_QW),
7941 "Possible attempt to put comments in qw() list");
7947 for (; !isSPACE(*d) && len; --len, ++d)
7950 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7951 words = op_append_elem(OP_LIST, words,
7952 newSVOP(OP_CONST, 0, tokeq(sv)));
7957 words = newNULLLIST();
7959 SvREFCNT_dec(PL_lex_stuff);
7960 PL_lex_stuff = NULL;
7962 PL_expect = XOPERATOR;
7963 pl_yylval.opval = sawparens(words);
7968 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7971 pl_yylval.ival = OP_STRINGIFY;
7972 if (SvIVX(PL_lex_stuff) == '\'')
7973 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7974 TERM(sublex_start());
7977 s = scan_pat(s,OP_QR);
7978 TERM(sublex_start());
7981 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7984 readpipe_override();
7985 TERM(sublex_start());
7993 s = force_version(s, FALSE);
7995 else if (*s != 'v' || !isDIGIT(s[1])
7996 || (s = force_version(s, TRUE), *s == 'v'))
7998 *PL_tokenbuf = '\0';
7999 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8000 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8001 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8002 GV_ADD | (UTF ? SVf_UTF8 : 0));
8004 yyerror("<> should be quotes");
8006 if (orig_keyword == KEY_require) {
8014 PL_last_uni = PL_oldbufptr;
8015 PL_last_lop_op = OP_REQUIRE;
8017 return REPORT( (int)REQUIRE );
8023 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8027 LOP(OP_RENAME,XTERM);
8036 LOP(OP_RINDEX,XTERM);
8045 UNIDOR(OP_READLINE);
8048 UNIDOR(OP_BACKTICK);
8057 LOP(OP_REVERSE,XTERM);
8060 UNIDOR(OP_READLINK);
8067 if (pl_yylval.opval)
8068 TERM(sublex_start());
8070 TOKEN(1); /* force error */
8073 checkcomma(s,PL_tokenbuf,"filehandle");
8083 LOP(OP_SELECT,XTERM);
8089 LOP(OP_SEMCTL,XTERM);
8092 LOP(OP_SEMGET,XTERM);
8095 LOP(OP_SEMOP,XTERM);
8101 LOP(OP_SETPGRP,XTERM);
8103 case KEY_setpriority:
8104 LOP(OP_SETPRIORITY,XTERM);
8106 case KEY_sethostent:
8112 case KEY_setservent:
8115 case KEY_setprotoent:
8125 LOP(OP_SEEKDIR,XTERM);
8127 case KEY_setsockopt:
8128 LOP(OP_SSOCKOPT,XTERM);
8134 LOP(OP_SHMCTL,XTERM);
8137 LOP(OP_SHMGET,XTERM);
8140 LOP(OP_SHMREAD,XTERM);
8143 LOP(OP_SHMWRITE,XTERM);
8146 LOP(OP_SHUTDOWN,XTERM);
8155 LOP(OP_SOCKET,XTERM);
8157 case KEY_socketpair:
8158 LOP(OP_SOCKPAIR,XTERM);
8161 checkcomma(s,PL_tokenbuf,"subroutine name");
8164 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8168 LOP(OP_SPLIT,XTERM);
8171 LOP(OP_SPRINTF,XTERM);
8174 LOP(OP_SPLICE,XTERM);
8189 LOP(OP_SUBSTR,XTERM);
8195 char tmpbuf[sizeof PL_tokenbuf];
8196 SSize_t tboffset = 0;
8197 expectation attrful;
8198 bool have_name, have_proto;
8199 const int key = tmp;
8204 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8205 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8209 s = SKIPSPACE2(s,tmpwhite);
8214 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8215 (*s == ':' && s[1] == ':'))
8218 SV *nametoke = NULL;
8222 attrful = XATTRBLOCK;
8223 /* remember buffer pos'n for later force_word */
8224 tboffset = s - PL_oldbufptr;
8225 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
8228 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8230 if (memchr(tmpbuf, ':', len))
8231 sv_setpvn(PL_subname, tmpbuf, len);
8233 sv_setsv(PL_subname,PL_curstname);
8234 sv_catpvs(PL_subname,"::");
8235 sv_catpvn(PL_subname,tmpbuf,len);
8237 if (SvUTF8(PL_linestr))
8238 SvUTF8_on(PL_subname);
8244 CURMAD('X', nametoke);
8245 CURMAD('_', tmpwhite);
8246 (void) force_word(PL_oldbufptr + tboffset, WORD,
8249 s = SKIPSPACE2(d,tmpwhite);
8256 Perl_croak(aTHX_ "Missing name in \"my sub\"");
8257 PL_expect = XTERMBLOCK;
8258 attrful = XATTRTERM;
8259 sv_setpvs(PL_subname,"?");
8263 if (key == KEY_format) {
8265 PL_thistoken = subtoken;
8269 (void) force_word(PL_oldbufptr + tboffset, WORD,
8275 /* Look for a prototype */
8278 bool bad_proto = FALSE;
8279 bool in_brackets = FALSE;
8280 char greedy_proto = ' ';
8281 bool proto_after_greedy_proto = FALSE;
8282 bool must_be_last = FALSE;
8283 bool underscore = FALSE;
8284 bool seen_underscore = FALSE;
8285 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8288 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8290 Perl_croak(aTHX_ "Prototype not terminated");
8291 /* strip spaces and check for bad characters */
8292 d = SvPV(PL_lex_stuff, tmplen);
8294 for (p = d; tmplen; tmplen--, ++p) {
8298 if (warnillegalproto) {
8300 proto_after_greedy_proto = TRUE;
8301 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8306 if ( !strchr(";@%", *p) )
8313 else if ( *p == ']' ) {
8314 in_brackets = FALSE;
8316 else if ( (*p == '@' || *p == '%') &&
8317 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8319 must_be_last = TRUE;
8322 else if ( *p == '_' ) {
8323 underscore = seen_underscore = TRUE;
8330 if (proto_after_greedy_proto)
8331 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8332 "Prototype after '%c' for %"SVf" : %s",
8333 greedy_proto, SVfARG(PL_subname), d);
8335 SV *dsv = newSVpvs_flags("", SVs_TEMP);
8336 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8337 "Illegal character %sin prototype for %"SVf" : %s",
8338 seen_underscore ? "after '_' " : "",
8340 SvUTF8(PL_lex_stuff)
8341 ? sv_uni_display(dsv,
8342 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8344 UNI_DISPLAY_ISPRINT)
8345 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8346 PERL_PV_ESCAPE_NONASCII));
8348 SvCUR_set(PL_lex_stuff, tmp);
8353 CURMAD('q', PL_thisopen);
8354 CURMAD('_', tmpwhite);
8355 CURMAD('=', PL_thisstuff);
8356 CURMAD('Q', PL_thisclose);
8357 NEXTVAL_NEXTTOKE.opval =
8358 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8359 PL_lex_stuff = NULL;
8362 s = SKIPSPACE2(s,tmpwhite);
8370 if (*s == ':' && s[1] != ':')
8371 PL_expect = attrful;
8372 else if (*s != '{' && key == KEY_sub) {
8374 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8375 else if (*s != ';' && *s != '}')
8376 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8383 curmad('^', newSVpvs(""));
8384 CURMAD('_', tmpwhite);
8388 PL_thistoken = subtoken;
8391 NEXTVAL_NEXTTOKE.opval =
8392 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8393 PL_lex_stuff = NULL;
8399 sv_setpvs(PL_subname, "__ANON__");
8401 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8405 (void) force_word(PL_oldbufptr + tboffset, WORD,
8414 LOP(OP_SYSTEM,XREF);
8417 LOP(OP_SYMLINK,XTERM);
8420 LOP(OP_SYSCALL,XTERM);
8423 LOP(OP_SYSOPEN,XTERM);
8426 LOP(OP_SYSSEEK,XTERM);
8429 LOP(OP_SYSREAD,XTERM);
8432 LOP(OP_SYSWRITE,XTERM);
8436 TERM(sublex_start());
8457 LOP(OP_TRUNCATE,XTERM);
8469 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8471 pl_yylval.ival = CopLINE(PL_curcop);
8475 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8477 pl_yylval.ival = CopLINE(PL_curcop);
8481 LOP(OP_UNLINK,XTERM);
8487 LOP(OP_UNPACK,XTERM);
8490 LOP(OP_UTIME,XTERM);
8496 LOP(OP_UNSHIFT,XTERM);
8499 s = tokenize_use(1, s);
8509 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8511 pl_yylval.ival = CopLINE(PL_curcop);
8515 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8517 pl_yylval.ival = CopLINE(PL_curcop);
8521 PL_hints |= HINT_BLOCK_SCOPE;
8528 LOP(OP_WAITPID,XTERM);
8537 ctl_l[0] = toCTRL('L');
8539 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8542 /* Make sure $^L is defined */
8543 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8548 if (PL_expect == XOPERATOR) {
8549 if (*s == '=' && !PL_lex_allbrackets &&
8550 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8558 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8560 pl_yylval.ival = OP_XOR;
8565 TERM(sublex_start());
8570 #pragma segment Main
8574 S_pending_ident(pTHX)
8578 /* pit holds the identifier we read and pending_ident is reset */
8579 char pit = PL_pending_ident;
8580 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8581 /* All routes through this function want to know if there is a colon. */
8582 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8583 PL_pending_ident = 0;
8585 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8586 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8587 "### Pending identifier '%s'\n", PL_tokenbuf); });
8589 /* if we're in a my(), we can't allow dynamics here.
8590 $foo'bar has already been turned into $foo::bar, so
8591 just check for colons.
8593 if it's a legal name, the OP is a PADANY.
8596 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8598 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8599 "variable %s in \"our\"",
8600 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8601 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8605 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8606 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8607 UTF ? SVf_UTF8 : 0);
8609 pl_yylval.opval = newOP(OP_PADANY, 0);
8610 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8611 UTF ? SVf_UTF8 : 0);
8617 build the ops for accesses to a my() variable.
8622 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8623 UTF ? SVf_UTF8 : 0);
8624 if (tmp != NOT_IN_PAD) {
8625 /* might be an "our" variable" */
8626 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8627 /* build ops for a bareword */
8628 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8629 HEK * const stashname = HvNAME_HEK(stash);
8630 SV * const sym = newSVhek(stashname);
8631 sv_catpvs(sym, "::");
8632 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8633 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8634 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8637 ? (GV_ADDMULTI | GV_ADDINEVAL)
8640 ((PL_tokenbuf[0] == '$') ? SVt_PV
8641 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8646 pl_yylval.opval = newOP(OP_PADANY, 0);
8647 pl_yylval.opval->op_targ = tmp;
8653 Whine if they've said @foo in a doublequoted string,
8654 and @foo isn't a variable we can find in the symbol
8657 if (ckWARN(WARN_AMBIGUOUS) &&
8658 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8659 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8660 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8661 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8662 /* DO NOT warn for @- and @+ */
8663 && !( PL_tokenbuf[2] == '\0' &&
8664 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8667 /* Downgraded from fatal to warning 20000522 mjd */
8668 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8669 "Possible unintended interpolation of %"SVf" in string",
8670 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8671 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8675 /* build ops for a bareword */
8676 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8678 UTF ? SVf_UTF8 : 0 ));
8679 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8680 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8681 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8682 | ( UTF ? SVf_UTF8 : 0 ),
8683 ((PL_tokenbuf[0] == '$') ? SVt_PV
8684 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8690 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8694 PERL_ARGS_ASSERT_CHECKCOMMA;
8696 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8697 if (ckWARN(WARN_SYNTAX)) {
8700 for (w = s+2; *w && level; w++) {
8708 /* the list of chars below is for end of statements or
8709 * block / parens, boolean operators (&&, ||, //) and branch
8710 * constructs (or, and, if, until, unless, while, err, for).
8711 * Not a very solid hack... */
8712 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8713 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8714 "%s (...) interpreted as function",name);
8717 while (s < PL_bufend && isSPACE(*s))
8721 while (s < PL_bufend && isSPACE(*s))
8723 if (isIDFIRST_lazy_if(s,UTF)) {
8724 const char * const w = s;
8725 s += UTF ? UTF8SKIP(s) : 1;
8726 while (isALNUM_lazy_if(s,UTF))
8727 s += UTF ? UTF8SKIP(s) : 1;
8728 while (s < PL_bufend && isSPACE(*s))
8732 if (keyword(w, s - w, 0))
8735 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8736 if (gv && GvCVu(gv))
8738 Perl_croak(aTHX_ "No comma allowed after %s", what);
8743 /* Either returns sv, or mortalizes sv and returns a new SV*.
8744 Best used as sv=new_constant(..., sv, ...).
8745 If s, pv are NULL, calls subroutine with one argument,
8746 and type is used with error messages only. */
8749 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8750 SV *sv, SV *pv, const char *type, STRLEN typelen)
8753 HV * table = GvHV(PL_hintgv); /* ^H */
8757 const char *why1 = "", *why2 = "", *why3 = "";
8759 PERL_ARGS_ASSERT_NEW_CONSTANT;
8761 /* charnames doesn't work well if there have been errors found */
8762 if (PL_error_count > 0 && strEQ(key,"charnames"))
8763 return &PL_sv_undef;
8766 || ! (PL_hints & HINT_LOCALIZE_HH)
8767 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8772 /* Here haven't found what we're looking for. If it is charnames,
8773 * perhaps it needs to be loaded. Try doing that before giving up */
8774 if (strEQ(key,"charnames")) {
8775 Perl_load_module(aTHX_
8777 newSVpvs("_charnames"),
8778 /* version parameter; no need to specify it, as if
8779 * we get too early a version, will fail anyway,
8780 * not being able to find '_charnames' */
8786 table = GvHV(PL_hintgv);
8788 && (PL_hints & HINT_LOCALIZE_HH)
8789 && (cvp = hv_fetch(table, key, keylen, FALSE))
8795 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8796 msg = Perl_newSVpvf(aTHX_
8797 "Constant(%s) unknown", (type ? type: "undef"));
8802 why3 = "} is not defined";
8804 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8805 (type ? type: "undef"), why1, why2, why3);
8807 yyerror(SvPVX_const(msg));
8812 sv_2mortal(sv); /* Parent created it permanently */
8815 pv = newSVpvn_flags(s, len, SVs_TEMP);
8817 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8819 typesv = &PL_sv_undef;
8821 PUSHSTACKi(PERLSI_OVERLOAD);
8833 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8837 /* Check the eval first */
8838 if (!PL_in_eval && SvTRUE(ERRSV)) {
8839 sv_catpvs(ERRSV, "Propagated");
8840 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8842 res = SvREFCNT_inc_simple(sv);
8846 SvREFCNT_inc_simple_void(res);
8855 why1 = "Call to &{$^H{";
8857 why3 = "}} did not return a defined value";
8865 /* Returns a NUL terminated string, with the length of the string written to
8869 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8873 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8875 PERL_ARGS_ASSERT_SCAN_WORD;
8879 Perl_croak(aTHX_ ident_too_long);
8880 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
8882 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8887 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8891 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8892 char *t = s + UTF8SKIP(s);
8894 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8898 Perl_croak(aTHX_ ident_too_long);
8899 Copy(s, d, len, char);
8912 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8915 char *bracket = NULL;
8918 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8920 PERL_ARGS_ASSERT_SCAN_IDENT;
8925 while (isDIGIT(*s)) {
8927 Perl_croak(aTHX_ ident_too_long);
8934 Perl_croak(aTHX_ ident_too_long);
8935 if (isALNUM(*s)) /* UTF handled below */
8937 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8942 else if (*s == ':' && s[1] == ':') {
8946 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8947 char *t = s + UTF8SKIP(s);
8948 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8950 if (d + (t - s) > e)
8951 Perl_croak(aTHX_ ident_too_long);
8952 Copy(s, d, t - s, char);
8963 if (PL_lex_state != LEX_NORMAL)
8964 PL_lex_state = LEX_INTERPENDMAYBE;
8967 if (*s == '$' && s[1] &&
8968 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8978 const STRLEN skip = UTF8SKIP(s);
8981 for ( i = 0; i < skip; i++ )
8989 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8993 else if (ck_uni && !bracket)
8996 if (isSPACE(s[-1])) {
8998 const char ch = *s++;
8999 if (!SPACE_OR_TAB(ch)) {
9005 if (isIDFIRST_lazy_if(d,UTF)) {
9009 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9010 end += UTF8SKIP(end);
9011 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9012 end += UTF8SKIP(end);
9014 Copy(s, d, end - s, char);
9019 while ((isALNUM(*s) || *s == ':') && d < e)
9022 Perl_croak(aTHX_ ident_too_long);
9025 while (s < send && SPACE_OR_TAB(*s))
9027 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9028 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9029 const char * const brack =
9031 ((*s == '[') ? "[...]" : "{...}");
9032 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9033 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9034 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9035 funny, dest, brack, funny, dest, brack);
9038 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9039 PL_lex_allbrackets++;
9043 /* Handle extended ${^Foo} variables
9044 * 1999-02-27 mjd-perl-patch@plover.com */
9045 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9049 while (isALNUM(*s) && d < e) {
9053 Perl_croak(aTHX_ ident_too_long);
9058 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9059 PL_lex_state = LEX_INTERPEND;
9062 if (PL_lex_state == LEX_NORMAL) {
9063 if (ckWARN(WARN_AMBIGUOUS) &&
9064 (keyword(dest, d - dest, 0)
9065 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9067 SV *tmp = newSVpvn_flags( dest, d - dest,
9068 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9071 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9072 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9073 funny, tmp, funny, tmp);
9078 s = bracket; /* let the parser handle it */
9082 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9083 PL_lex_state = LEX_INTERPEND;
9088 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9090 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9091 * the parse starting at 's', based on the subset that are valid in this
9092 * context input to this routine in 'valid_flags'. Advances s. Returns
9093 * TRUE if the input should be treated as a valid flag, so the next char
9094 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9095 * first call on the current regex. This routine will set it to any
9096 * charset modifier found. The caller shouldn't change it. This way,
9097 * another charset modifier encountered in the parse can be detected as an
9098 * error, as we have decided to allow only one */
9101 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9103 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9104 if (isALNUM_lazy_if(*s, UTF)) {
9105 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9106 UTF ? SVf_UTF8 : 0);
9108 /* Pretend that it worked, so will continue processing before
9117 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9118 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9119 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9120 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9121 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9122 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9123 case LOCALE_PAT_MOD:
9125 goto multiple_charsets;
9127 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9130 case UNICODE_PAT_MOD:
9132 goto multiple_charsets;
9134 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9137 case ASCII_RESTRICT_PAT_MOD:
9139 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9143 /* Error if previous modifier wasn't an 'a', but if it was, see
9144 * if, and accept, a second occurrence (only) */
9146 || get_regex_charset(*pmfl)
9147 != REGEX_ASCII_RESTRICTED_CHARSET)
9149 goto multiple_charsets;
9151 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9155 case DEPENDS_PAT_MOD:
9157 goto multiple_charsets;
9159 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9168 if (*charset != c) {
9169 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9171 else if (c == 'a') {
9172 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9175 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9178 /* Pretend that it worked, so will continue processing before dieing */
9184 S_scan_pat(pTHX_ char *start, I32 type)
9188 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9189 const char * const valid_flags =
9190 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9191 char charset = '\0'; /* character set modifier */
9196 PERL_ARGS_ASSERT_SCAN_PAT;
9198 /* this was only needed for the initial scan_str; set it to false
9199 * so that any (?{}) code blocks etc are parsed normally */
9200 PL_reg_state.re_reparsing = FALSE;
9202 const char * const delimiter = skipspace(start);
9206 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9207 : "Search pattern not terminated" ));
9210 pm = (PMOP*)newPMOP(type, 0);
9211 if (PL_multi_open == '?') {
9212 /* This is the only point in the code that sets PMf_ONCE: */
9213 pm->op_pmflags |= PMf_ONCE;
9215 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9216 allows us to restrict the list needed by reset to just the ??
9218 assert(type != OP_TRANS);
9220 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9223 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9226 elements = mg->mg_len / sizeof(PMOP**);
9227 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9228 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9229 mg->mg_len = elements * sizeof(PMOP**);
9230 PmopSTASH_set(pm,PL_curstash);
9237 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9238 * anon CV. False positives like qr/[(?{]/ are harmless */
9240 if (type == OP_QR) {
9242 char *e, *p = SvPV(PL_lex_stuff, len);
9244 for (; p < e; p++) {
9245 if (p[0] == '(' && p[1] == '?'
9246 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9248 pm->op_pmflags |= PMf_HAS_CV;
9252 pm->op_pmflags |= PMf_IS_QR;
9255 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9257 if (PL_madskills && modstart != s) {
9258 SV* tmptoken = newSVpvn(modstart, s - modstart);
9259 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9262 /* issue a warning if /c is specified,but /g is not */
9263 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9265 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9266 "Use of /c modifier is meaningless without /g" );
9269 PL_lex_op = (OP*)pm;
9270 pl_yylval.ival = OP_MATCH;
9275 S_scan_subst(pTHX_ char *start)
9282 char charset = '\0'; /* character set modifier */
9287 PERL_ARGS_ASSERT_SCAN_SUBST;
9289 pl_yylval.ival = OP_NULL;
9291 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9294 Perl_croak(aTHX_ "Substitution pattern not terminated");
9296 if (s[-1] == PL_multi_open)
9300 CURMAD('q', PL_thisopen);
9301 CURMAD('_', PL_thiswhite);
9302 CURMAD('E', PL_thisstuff);
9303 CURMAD('Q', PL_thisclose);
9304 PL_realtokenstart = s - SvPVX(PL_linestr);
9308 first_start = PL_multi_start;
9309 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9312 SvREFCNT_dec(PL_lex_stuff);
9313 PL_lex_stuff = NULL;
9315 Perl_croak(aTHX_ "Substitution replacement not terminated");
9317 PL_multi_start = first_start; /* so whole substitution is taken together */
9319 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9323 CURMAD('z', PL_thisopen);
9324 CURMAD('R', PL_thisstuff);
9325 CURMAD('Z', PL_thisclose);
9331 if (*s == EXEC_PAT_MOD) {
9335 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9344 curmad('m', newSVpvn(modstart, s - modstart));
9345 append_madprops(PL_thismad, (OP*)pm, 0);
9349 if ((pm->op_pmflags & PMf_CONTINUE)) {
9350 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9354 SV * const repl = newSVpvs("");
9357 pm->op_pmflags |= PMf_EVAL;
9360 sv_catpvs(repl, "eval ");
9362 sv_catpvs(repl, "do ");
9364 sv_catpvs(repl, "{");
9365 sv_catsv(repl, PL_sublex_info.repl);
9366 if (strchr(SvPVX(PL_sublex_info.repl), '#'))
9367 sv_catpvs(repl, "\n");
9368 sv_catpvs(repl, "}");
9370 SvREFCNT_dec(PL_sublex_info.repl);
9371 PL_sublex_info.repl = repl;
9374 PL_lex_op = (OP*)pm;
9375 pl_yylval.ival = OP_SUBST;
9380 S_scan_trans(pTHX_ char *start)
9388 bool nondestruct = 0;
9393 PERL_ARGS_ASSERT_SCAN_TRANS;
9395 pl_yylval.ival = OP_NULL;
9397 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9399 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9401 if (s[-1] == PL_multi_open)
9405 CURMAD('q', PL_thisopen);
9406 CURMAD('_', PL_thiswhite);
9407 CURMAD('E', PL_thisstuff);
9408 CURMAD('Q', PL_thisclose);
9409 PL_realtokenstart = s - SvPVX(PL_linestr);
9413 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9416 SvREFCNT_dec(PL_lex_stuff);
9417 PL_lex_stuff = NULL;
9419 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9422 CURMAD('z', PL_thisopen);
9423 CURMAD('R', PL_thisstuff);
9424 CURMAD('Z', PL_thisclose);
9427 complement = del = squash = 0;
9434 complement = OPpTRANS_COMPLEMENT;
9437 del = OPpTRANS_DELETE;
9440 squash = OPpTRANS_SQUASH;
9452 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9453 o->op_private &= ~OPpTRANS_ALL;
9454 o->op_private |= del|squash|complement|
9455 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9456 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9459 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9464 curmad('m', newSVpvn(modstart, s - modstart));
9465 append_madprops(PL_thismad, o, 0);
9474 Takes a pointer to the first < in <<FOO.
9475 Returns a pointer to the byte following <<FOO.
9477 This function scans a heredoc, which involves different methods
9478 depending on whether we are in a string eval, quoted construct, etc.
9479 This is because PL_linestr could containing a single line of input, or
9480 a whole string being evalled, or the contents of the current quote-
9483 The three methods are:
9484 - Steal lines from the input stream (stream)
9485 - Scan the heredoc in PL_linestr and remove it therefrom (linestr)
9486 - Peek at the PL_linestr of outer lexing scopes (peek)
9488 They are used in these cases:
9489 file scope or filtered eval stream
9491 multiline quoted construct linestr
9492 single-line quoted construct in file stream
9493 single-line quoted construct in eval or quote peek
9495 Single-line also applies to heredocs that begin on the last line of a
9496 quote-like operator.
9498 Peeking within a quote also involves falling back to the stream method,
9499 if the outer quote-like operators are all on one line (or the heredoc
9500 marker is on the last line).
9504 S_scan_heredoc(pTHX_ register char *s)
9508 I32 op_type = OP_SCALAR;
9512 const char *found_newline = 0;
9516 const bool infile = PL_rsfp || PL_parser->filtered;
9518 I32 stuffstart = s - SvPVX(PL_linestr);
9521 PL_realtokenstart = -1;
9524 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9527 d = PL_tokenbuf + 1;
9528 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9529 *PL_tokenbuf = '\n';
9531 while (SPACE_OR_TAB(*peek))
9533 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9536 s = delimcpy(d, e, s, PL_bufend, term, &len);
9538 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9544 /* <<\FOO is equivalent to <<'FOO' */
9548 if (!isALNUM_lazy_if(s,UTF))
9549 deprecate("bare << to mean <<\"\"");
9550 for (; isALNUM_lazy_if(s,UTF); s++) {
9555 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9556 Perl_croak(aTHX_ "Delimiter for here document is too long");
9559 len = d - PL_tokenbuf;
9563 tstart = PL_tokenbuf + 1;
9564 PL_thisclose = newSVpvn(tstart, len - 1);
9565 tstart = SvPVX(PL_linestr) + stuffstart;
9566 PL_thisopen = newSVpvn(tstart, s - tstart);
9567 stuffstart = s - SvPVX(PL_linestr);
9570 #ifndef PERL_STRICT_CR
9571 d = strchr(s, '\r');
9573 char * const olds = s;
9575 while (s < PL_bufend) {
9581 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9590 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9594 if ((infile && !PL_lex_inwhat)
9595 || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s))) {
9596 herewas = newSVpvn(s,PL_bufend-s);
9600 herewas = newSVpvn(s-1,found_newline-s+1);
9603 herewas = newSVpvn(s,found_newline-s);
9608 tstart = SvPVX(PL_linestr) + stuffstart;
9610 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9612 PL_thisstuff = newSVpvn(tstart, s - tstart);
9615 s += SvCUR(herewas);
9618 stuffstart = s - SvPVX(PL_linestr);
9624 tmpstr = newSV_type(SVt_PVIV);
9628 SvIV_set(tmpstr, -1);
9630 else if (term == '`') {
9631 op_type = OP_BACKTICK;
9632 SvIV_set(tmpstr, '\\');
9636 PL_multi_start = CopLINE(PL_curcop);
9637 PL_multi_open = PL_multi_close = '<';
9638 if (PL_lex_inwhat && !found_newline) {
9639 /* Peek into the line buffer of the parent lexing scope, going up
9640 as many levels as necessary to find one with a newline after
9641 bufptr. See the comments in sublex_push for how IVX and NVX
9644 SV *linestr = NUM2PTR(SV *, SvNVX(PL_linestr));
9645 char *bufptr = PL_sublex_info.super_bufptr;
9646 char *bufend = SvEND(linestr);
9647 char * const olds = s - SvCUR(herewas);
9648 char * const real_olds = s;
9653 while (!(s = (char *)memchr((void *)bufptr, '\n', bufend-bufptr))){
9654 if (SvIVX(linestr)) {
9655 bufptr = INT2PTR(char *, SvIVX(linestr));
9656 linestr = NUM2PTR(SV *, SvNVX(linestr));
9657 bufend = SvEND(linestr);
9669 while (s < bufend &&
9670 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9672 CopLINE_inc(PL_curcop);
9675 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9676 missingterm(PL_tokenbuf + 1);
9678 sv_setpvn(herewas,bufptr,d-bufptr+1);
9679 sv_setpvn(tmpstr,d+1,s-d);
9681 sv_catpvn(herewas,s,bufend-s);
9682 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9684 bufptr-SvPVX_const(linestr)
9690 else if (!infile || found_newline) {
9691 char * const olds = s - SvCUR(herewas);
9693 while (s < PL_bufend &&
9694 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9696 CopLINE_inc(PL_curcop);
9698 if (s >= PL_bufend) {
9699 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9700 missingterm(PL_tokenbuf + 1);
9702 sv_setpvn(tmpstr,d+1,s-d);
9706 sv_catpvn(PL_thisstuff, d + 1, s - d);
9708 PL_thisstuff = newSVpvn(d + 1, s - d);
9709 stuffstart = s - SvPVX(PL_linestr);
9713 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9715 /* s now points to the newline after the heredoc terminator.
9716 d points to the newline before the body of the heredoc.
9718 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9719 check PL_sublex_info.re_eval_str. */
9720 if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
9721 /* Set aside the rest of the regexp */
9722 if (!PL_sublex_info.re_eval_str)
9723 PL_sublex_info.re_eval_str =
9724 newSVpvn(PL_sublex_info.re_eval_start,
9725 PL_bufend - PL_sublex_info.re_eval_start);
9726 PL_sublex_info.re_eval_start -= s-d;
9728 /* Copy everything from s onwards back to d. */
9729 Move(s,d,PL_bufend-s + 1,char);
9730 SvCUR_set(PL_linestr, SvCUR(PL_linestr) - (s-d));
9731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9735 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9737 term = PL_tokenbuf[1];
9739 while (s >= PL_bufend) { /* multiple line string? */
9742 tstart = SvPVX(PL_linestr) + stuffstart;
9744 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9746 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9750 CopLINE_inc(PL_curcop);
9751 if (!lex_next_chunk(LEX_NO_TERM)
9752 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9753 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9754 missingterm(PL_tokenbuf + 1);
9756 CopLINE_dec(PL_curcop);
9757 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9758 lex_grow_linestr(SvCUR(PL_linestr) + 2);
9759 sv_catpvs(PL_linestr, "\n\0");
9763 stuffstart = s - SvPVX(PL_linestr);
9765 CopLINE_inc(PL_curcop);
9766 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9767 PL_last_lop = PL_last_uni = NULL;
9768 #ifndef PERL_STRICT_CR
9769 if (PL_bufend - PL_linestart >= 2) {
9770 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9771 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9773 PL_bufend[-2] = '\n';
9775 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9777 else if (PL_bufend[-1] == '\r')
9778 PL_bufend[-1] = '\n';
9780 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9781 PL_bufend[-1] = '\n';
9783 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9784 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9785 *(SvPVX(PL_linestr) + off ) = ' ';
9786 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9787 sv_catsv(PL_linestr,herewas);
9788 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9789 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9793 sv_catsv(tmpstr,PL_linestr);
9798 PL_multi_end = CopLINE(PL_curcop);
9799 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9800 SvPV_shrink_to_cur(tmpstr);
9802 SvREFCNT_dec(herewas);
9804 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9806 else if (PL_encoding)
9807 sv_recode_to_utf8(tmpstr, PL_encoding);
9809 PL_lex_stuff = tmpstr;
9810 pl_yylval.ival = op_type;
9815 takes: current position in input buffer
9816 returns: new position in input buffer
9817 side-effects: pl_yylval and lex_op are set.
9822 <FH> read from filehandle
9823 <pkg::FH> read from package qualified filehandle
9824 <pkg'FH> read from package qualified filehandle
9825 <$fh> read from filehandle in $fh
9831 S_scan_inputsymbol(pTHX_ char *start)
9834 char *s = start; /* current position in buffer */
9837 char *d = PL_tokenbuf; /* start of temp holding space */
9838 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9840 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9842 end = strchr(s, '\n');
9845 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9847 /* die if we didn't have space for the contents of the <>,
9848 or if it didn't end, or if we see a newline
9851 if (len >= (I32)sizeof PL_tokenbuf)
9852 Perl_croak(aTHX_ "Excessively long <> operator");
9854 Perl_croak(aTHX_ "Unterminated <> operator");
9859 Remember, only scalar variables are interpreted as filehandles by
9860 this code. Anything more complex (e.g., <$fh{$num}>) will be
9861 treated as a glob() call.
9862 This code makes use of the fact that except for the $ at the front,
9863 a scalar variable and a filehandle look the same.
9865 if (*d == '$' && d[1]) d++;
9867 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9868 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9869 d += UTF ? UTF8SKIP(d) : 1;
9871 /* If we've tried to read what we allow filehandles to look like, and
9872 there's still text left, then it must be a glob() and not a getline.
9873 Use scan_str to pull out the stuff between the <> and treat it
9874 as nothing more than a string.
9877 if (d - PL_tokenbuf != len) {
9878 pl_yylval.ival = OP_GLOB;
9879 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9881 Perl_croak(aTHX_ "Glob not terminated");
9885 bool readline_overriden = FALSE;
9888 /* we're in a filehandle read situation */
9891 /* turn <> into <ARGV> */
9893 Copy("ARGV",d,5,char);
9895 /* Check whether readline() is overriden */
9896 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9898 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9900 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9901 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9902 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9903 readline_overriden = TRUE;
9905 /* if <$fh>, create the ops to turn the variable into a
9909 /* try to find it in the pad for this block, otherwise find
9910 add symbol table ops
9912 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9913 if (tmp != NOT_IN_PAD) {
9914 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9915 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9916 HEK * const stashname = HvNAME_HEK(stash);
9917 SV * const sym = sv_2mortal(newSVhek(stashname));
9918 sv_catpvs(sym, "::");
9924 OP * const o = newOP(OP_PADSV, 0);
9926 PL_lex_op = readline_overriden
9927 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9928 op_append_elem(OP_LIST, o,
9929 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9930 : (OP*)newUNOP(OP_READLINE, 0, o);
9939 ? (GV_ADDMULTI | GV_ADDINEVAL)
9940 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9942 PL_lex_op = readline_overriden
9943 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9944 op_append_elem(OP_LIST,
9945 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9946 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9947 : (OP*)newUNOP(OP_READLINE, 0,
9948 newUNOP(OP_RV2SV, 0,
9949 newGVOP(OP_GV, 0, gv)));
9951 if (!readline_overriden)
9952 PL_lex_op->op_flags |= OPf_SPECIAL;
9953 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9954 pl_yylval.ival = OP_NULL;
9957 /* If it's none of the above, it must be a literal filehandle
9958 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9960 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9961 PL_lex_op = readline_overriden
9962 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9963 op_append_elem(OP_LIST,
9964 newGVOP(OP_GV, 0, gv),
9965 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9966 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9967 pl_yylval.ival = OP_NULL;
9976 takes: start position in buffer
9977 keep_quoted preserve \ on the embedded delimiter(s)
9978 keep_delims preserve the delimiters around the string
9979 re_reparse compiling a run-time /(?{})/:
9980 collapse // to /, and skip encoding src
9981 returns: position to continue reading from buffer
9982 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9983 updates the read buffer.
9985 This subroutine pulls a string out of the input. It is called for:
9986 q single quotes q(literal text)
9987 ' single quotes 'literal text'
9988 qq double quotes qq(interpolate $here please)
9989 " double quotes "interpolate $here please"
9990 qx backticks qx(/bin/ls -l)
9991 ` backticks `/bin/ls -l`
9992 qw quote words @EXPORT_OK = qw( func() $spam )
9993 m// regexp match m/this/
9994 s/// regexp substitute s/this/that/
9995 tr/// string transliterate tr/this/that/
9996 y/// string transliterate y/this/that/
9997 ($*@) sub prototypes sub foo ($)
9998 (stuff) sub attr parameters sub foo : attr(stuff)
9999 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10001 In most of these cases (all but <>, patterns and transliterate)
10002 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10003 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10004 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10007 It skips whitespace before the string starts, and treats the first
10008 character as the delimiter. If the delimiter is one of ([{< then
10009 the corresponding "close" character )]}> is used as the closing
10010 delimiter. It allows quoting of delimiters, and if the string has
10011 balanced delimiters ([{<>}]) it allows nesting.
10013 On success, the SV with the resulting string is put into lex_stuff or,
10014 if that is already non-NULL, into lex_repl. The second case occurs only
10015 when parsing the RHS of the special constructs s/// and tr/// (y///).
10016 For convenience, the terminating delimiter character is stuffed into
10021 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10024 SV *sv; /* scalar value: string */
10025 const char *tmps; /* temp string, used for delimiter matching */
10026 char *s = start; /* current position in the buffer */
10027 char term; /* terminating character */
10028 char *to; /* current position in the sv's data */
10029 I32 brackets = 1; /* bracket nesting level */
10030 bool has_utf8 = FALSE; /* is there any utf8 content? */
10031 I32 termcode; /* terminating char. code */
10032 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10033 STRLEN termlen; /* length of terminating string */
10034 int last_off = 0; /* last position for nesting bracket */
10040 PERL_ARGS_ASSERT_SCAN_STR;
10042 /* skip space before the delimiter */
10048 if (PL_realtokenstart >= 0) {
10049 stuffstart = PL_realtokenstart;
10050 PL_realtokenstart = -1;
10053 stuffstart = start - SvPVX(PL_linestr);
10055 /* mark where we are, in case we need to report errors */
10058 /* after skipping whitespace, the next character is the terminator */
10061 termcode = termstr[0] = term;
10065 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10066 Copy(s, termstr, termlen, U8);
10067 if (!UTF8_IS_INVARIANT(term))
10071 /* mark where we are */
10072 PL_multi_start = CopLINE(PL_curcop);
10073 PL_multi_open = term;
10075 /* find corresponding closing delimiter */
10076 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10077 termcode = termstr[0] = term = tmps[5];
10079 PL_multi_close = term;
10081 /* create a new SV to hold the contents. 79 is the SV's initial length.
10082 What a random number. */
10083 sv = newSV_type(SVt_PVIV);
10085 SvIV_set(sv, termcode);
10086 (void)SvPOK_only(sv); /* validate pointer */
10088 /* move past delimiter and try to read a complete string */
10090 sv_catpvn(sv, s, termlen);
10093 tstart = SvPVX(PL_linestr) + stuffstart;
10094 if (!PL_thisopen && !keep_delims) {
10095 PL_thisopen = newSVpvn(tstart, s - tstart);
10096 stuffstart = s - SvPVX(PL_linestr);
10100 if (PL_encoding && !UTF && !re_reparse) {
10104 int offset = s - SvPVX_const(PL_linestr);
10105 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10106 &offset, (char*)termstr, termlen);
10107 const char * const ns = SvPVX_const(PL_linestr) + offset;
10108 char * const svlast = SvEND(sv) - 1;
10110 for (; s < ns; s++) {
10111 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10112 CopLINE_inc(PL_curcop);
10115 goto read_more_line;
10117 /* handle quoted delimiters */
10118 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10120 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10122 if ((svlast-1 - t) % 2) {
10123 if (!keep_quoted) {
10124 *(svlast-1) = term;
10126 SvCUR_set(sv, SvCUR(sv) - 1);
10131 if (PL_multi_open == PL_multi_close) {
10137 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10138 /* At here, all closes are "was quoted" one,
10139 so we don't check PL_multi_close. */
10141 if (!keep_quoted && *(t+1) == PL_multi_open)
10146 else if (*t == PL_multi_open)
10154 SvCUR_set(sv, w - SvPVX_const(sv));
10156 last_off = w - SvPVX(sv);
10157 if (--brackets <= 0)
10162 if (!keep_delims) {
10163 SvCUR_set(sv, SvCUR(sv) - 1);
10169 /* extend sv if need be */
10170 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10171 /* set 'to' to the next character in the sv's string */
10172 to = SvPVX(sv)+SvCUR(sv);
10174 /* if open delimiter is the close delimiter read unbridle */
10175 if (PL_multi_open == PL_multi_close) {
10176 for (; s < PL_bufend; s++,to++) {
10177 /* embedded newlines increment the current line number */
10178 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10179 CopLINE_inc(PL_curcop);
10180 /* handle quoted delimiters */
10181 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10184 || (re_reparse && s[1] == '\\'))
10187 /* any other quotes are simply copied straight through */
10191 /* terminate when run out of buffer (the for() condition), or
10192 have found the terminator */
10193 else if (*s == term) {
10196 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10199 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10205 /* if the terminator isn't the same as the start character (e.g.,
10206 matched brackets), we have to allow more in the quoting, and
10207 be prepared for nested brackets.
10210 /* read until we run out of string, or we find the terminator */
10211 for (; s < PL_bufend; s++,to++) {
10212 /* embedded newlines increment the line count */
10213 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10214 CopLINE_inc(PL_curcop);
10215 /* backslashes can escape the open or closing characters */
10216 if (*s == '\\' && s+1 < PL_bufend) {
10217 if (!keep_quoted &&
10218 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10223 /* allow nested opens and closes */
10224 else if (*s == PL_multi_close && --brackets <= 0)
10226 else if (*s == PL_multi_open)
10228 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10233 /* terminate the copied string and update the sv's end-of-string */
10235 SvCUR_set(sv, to - SvPVX_const(sv));
10238 * this next chunk reads more into the buffer if we're not done yet
10242 break; /* handle case where we are done yet :-) */
10244 #ifndef PERL_STRICT_CR
10245 if (to - SvPVX_const(sv) >= 2) {
10246 if ((to[-2] == '\r' && to[-1] == '\n') ||
10247 (to[-2] == '\n' && to[-1] == '\r'))
10251 SvCUR_set(sv, to - SvPVX_const(sv));
10253 else if (to[-1] == '\r')
10256 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10261 /* if we're out of file, or a read fails, bail and reset the current
10262 line marker so we can report where the unterminated string began
10265 if (PL_madskills) {
10266 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10268 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10270 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10273 CopLINE_inc(PL_curcop);
10274 PL_bufptr = PL_bufend;
10275 if (!lex_next_chunk(0)) {
10277 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10286 /* at this point, we have successfully read the delimited string */
10288 if (!PL_encoding || UTF || re_reparse) {
10290 if (PL_madskills) {
10291 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10292 const int len = s - tstart;
10294 sv_catpvn(PL_thisstuff, tstart, len);
10296 PL_thisstuff = newSVpvn(tstart, len);
10297 if (!PL_thisclose && !keep_delims)
10298 PL_thisclose = newSVpvn(s,termlen);
10303 sv_catpvn(sv, s, termlen);
10308 if (PL_madskills) {
10309 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10310 const int len = s - tstart - termlen;
10312 sv_catpvn(PL_thisstuff, tstart, len);
10314 PL_thisstuff = newSVpvn(tstart, len);
10315 if (!PL_thisclose && !keep_delims)
10316 PL_thisclose = newSVpvn(s - termlen,termlen);
10320 if (has_utf8 || (PL_encoding && !re_reparse))
10323 PL_multi_end = CopLINE(PL_curcop);
10325 /* if we allocated too much space, give some back */
10326 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10327 SvLEN_set(sv, SvCUR(sv) + 1);
10328 SvPV_renew(sv, SvLEN(sv));
10331 /* decide whether this is the first or second quoted string we've read
10336 PL_sublex_info.repl = sv;
10344 takes: pointer to position in buffer
10345 returns: pointer to new position in buffer
10346 side-effects: builds ops for the constant in pl_yylval.op
10348 Read a number in any of the formats that Perl accepts:
10350 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10351 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10354 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10356 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10359 If it reads a number without a decimal point or an exponent, it will
10360 try converting the number to an integer and see if it can do so
10361 without loss of precision.
10365 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10368 const char *s = start; /* current position in buffer */
10369 char *d; /* destination in temp buffer */
10370 char *e; /* end of temp buffer */
10371 NV nv; /* number read, as a double */
10372 SV *sv = NULL; /* place to put the converted number */
10373 bool floatit; /* boolean: int or float? */
10374 const char *lastub = NULL; /* position of last underbar */
10375 static char const number_too_long[] = "Number too long";
10377 PERL_ARGS_ASSERT_SCAN_NUM;
10379 /* We use the first character to decide what type of number this is */
10383 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10385 /* if it starts with a 0, it could be an octal number, a decimal in
10386 0.13 disguise, or a hexadecimal number, or a binary number. */
10390 u holds the "number so far"
10391 shift the power of 2 of the base
10392 (hex == 4, octal == 3, binary == 1)
10393 overflowed was the number more than we can hold?
10395 Shift is used when we add a digit. It also serves as an "are
10396 we in octal/hex/binary?" indicator to disallow hex characters
10397 when in octal mode.
10402 bool overflowed = FALSE;
10403 bool just_zero = TRUE; /* just plain 0 or binary number? */
10404 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10405 static const char* const bases[5] =
10406 { "", "binary", "", "octal", "hexadecimal" };
10407 static const char* const Bases[5] =
10408 { "", "Binary", "", "Octal", "Hexadecimal" };
10409 static const char* const maxima[5] =
10411 "0b11111111111111111111111111111111",
10415 const char *base, *Base, *max;
10417 /* check for hex */
10418 if (s[1] == 'x' || s[1] == 'X') {
10422 } else if (s[1] == 'b' || s[1] == 'B') {
10427 /* check for a decimal in disguise */
10428 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10430 /* so it must be octal */
10437 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10438 "Misplaced _ in number");
10442 base = bases[shift];
10443 Base = Bases[shift];
10444 max = maxima[shift];
10446 /* read the rest of the number */
10448 /* x is used in the overflow test,
10449 b is the digit we're adding on. */
10454 /* if we don't mention it, we're done */
10458 /* _ are ignored -- but warned about if consecutive */
10460 if (lastub && s == lastub + 1)
10461 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10462 "Misplaced _ in number");
10466 /* 8 and 9 are not octal */
10467 case '8': case '9':
10469 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10473 case '2': case '3': case '4':
10474 case '5': case '6': case '7':
10476 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10479 case '0': case '1':
10480 b = *s++ & 15; /* ASCII digit -> value of digit */
10484 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10485 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10486 /* make sure they said 0x */
10489 b = (*s++ & 7) + 9;
10491 /* Prepare to put the digit we have onto the end
10492 of the number so far. We check for overflows.
10498 x = u << shift; /* make room for the digit */
10500 if ((x >> shift) != u
10501 && !(PL_hints & HINT_NEW_BINARY)) {
10504 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10505 "Integer overflow in %s number",
10508 u = x | b; /* add the digit to the end */
10511 n *= nvshift[shift];
10512 /* If an NV has not enough bits in its
10513 * mantissa to represent an UV this summing of
10514 * small low-order numbers is a waste of time
10515 * (because the NV cannot preserve the
10516 * low-order bits anyway): we could just
10517 * remember when did we overflow and in the
10518 * end just multiply n by the right
10526 /* if we get here, we had success: make a scalar value from
10531 /* final misplaced underbar check */
10532 if (s[-1] == '_') {
10533 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10537 if (n > 4294967295.0)
10538 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10539 "%s number > %s non-portable",
10545 if (u > 0xffffffff)
10546 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10547 "%s number > %s non-portable",
10552 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10553 sv = new_constant(start, s - start, "integer",
10554 sv, NULL, NULL, 0);
10555 else if (PL_hints & HINT_NEW_BINARY)
10556 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10561 handle decimal numbers.
10562 we're also sent here when we read a 0 as the first digit
10564 case '1': case '2': case '3': case '4': case '5':
10565 case '6': case '7': case '8': case '9': case '.':
10568 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10571 /* read next group of digits and _ and copy into d */
10572 while (isDIGIT(*s) || *s == '_') {
10573 /* skip underscores, checking for misplaced ones
10577 if (lastub && s == lastub + 1)
10578 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10579 "Misplaced _ in number");
10583 /* check for end of fixed-length buffer */
10585 Perl_croak(aTHX_ number_too_long);
10586 /* if we're ok, copy the character */
10591 /* final misplaced underbar check */
10592 if (lastub && s == lastub + 1) {
10593 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10596 /* read a decimal portion if there is one. avoid
10597 3..5 being interpreted as the number 3. followed
10600 if (*s == '.' && s[1] != '.') {
10605 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10606 "Misplaced _ in number");
10610 /* copy, ignoring underbars, until we run out of digits.
10612 for (; isDIGIT(*s) || *s == '_'; s++) {
10613 /* fixed length buffer check */
10615 Perl_croak(aTHX_ number_too_long);
10617 if (lastub && s == lastub + 1)
10618 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10619 "Misplaced _ in number");
10625 /* fractional part ending in underbar? */
10626 if (s[-1] == '_') {
10627 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10628 "Misplaced _ in number");
10630 if (*s == '.' && isDIGIT(s[1])) {
10631 /* oops, it's really a v-string, but without the "v" */
10637 /* read exponent part, if present */
10638 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10642 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10643 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10645 /* stray preinitial _ */
10647 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10648 "Misplaced _ in number");
10652 /* allow positive or negative exponent */
10653 if (*s == '+' || *s == '-')
10656 /* stray initial _ */
10658 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10659 "Misplaced _ in number");
10663 /* read digits of exponent */
10664 while (isDIGIT(*s) || *s == '_') {
10667 Perl_croak(aTHX_ number_too_long);
10671 if (((lastub && s == lastub + 1) ||
10672 (!isDIGIT(s[1]) && s[1] != '_')))
10673 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10674 "Misplaced _ in number");
10682 We try to do an integer conversion first if no characters
10683 indicating "float" have been found.
10688 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10690 if (flags == IS_NUMBER_IN_UV) {
10692 sv = newSViv(uv); /* Prefer IVs over UVs. */
10695 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10696 if (uv <= (UV) IV_MIN)
10697 sv = newSViv(-(IV)uv);
10704 /* terminate the string */
10706 nv = Atof(PL_tokenbuf);
10711 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10712 const char *const key = floatit ? "float" : "integer";
10713 const STRLEN keylen = floatit ? 5 : 7;
10714 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10715 key, keylen, sv, NULL, NULL, 0);
10719 /* if it starts with a v, it could be a v-string */
10722 sv = newSV(5); /* preallocate storage space */
10723 s = scan_vstring(s, PL_bufend, sv);
10727 /* make the op for the constant and return */
10730 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10732 lvalp->opval = NULL;
10738 S_scan_formline(pTHX_ register char *s)
10743 SV * const stuff = newSVpvs("");
10744 bool needargs = FALSE;
10745 bool eofmt = FALSE;
10747 char *tokenstart = s;
10748 SV* savewhite = NULL;
10750 if (PL_madskills) {
10751 savewhite = PL_thiswhite;
10756 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10758 while (!needargs) {
10761 #ifdef PERL_STRICT_CR
10762 while (SPACE_OR_TAB(*t))
10765 while (SPACE_OR_TAB(*t) || *t == '\r')
10768 if (*t == '\n' || t == PL_bufend) {
10773 eol = (char *) memchr(s,'\n',PL_bufend-s);
10777 for (t = s; t < eol; t++) {
10778 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10780 goto enough; /* ~~ must be first line in formline */
10782 if (*t == '@' || *t == '^')
10786 sv_catpvn(stuff, s, eol-s);
10787 #ifndef PERL_STRICT_CR
10788 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10789 char *end = SvPVX(stuff) + SvCUR(stuff);
10792 SvCUR_set(stuff, SvCUR(stuff) - 1);
10800 if ((PL_rsfp || PL_parser->filtered)
10801 && PL_parser->form_lex_state == LEX_NORMAL) {
10804 if (PL_madskills) {
10806 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10808 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10811 PL_bufptr = PL_bufend;
10812 CopLINE_inc(PL_curcop);
10813 got_some = lex_next_chunk(0);
10814 CopLINE_dec(PL_curcop);
10817 tokenstart = PL_bufptr;
10825 if (!SvCUR(stuff) || needargs)
10826 PL_lex_state = PL_parser->form_lex_state;
10827 if (SvCUR(stuff)) {
10828 PL_expect = XSTATE;
10830 start_force(PL_curforce);
10831 NEXTVAL_NEXTTOKE.ival = 0;
10832 force_next(FORMLBRACK);
10835 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10837 else if (PL_encoding)
10838 sv_recode_to_utf8(stuff, PL_encoding);
10840 start_force(PL_curforce);
10841 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10845 SvREFCNT_dec(stuff);
10847 PL_lex_formbrack = 0;
10850 if (PL_madskills) {
10852 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10854 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10855 PL_thiswhite = savewhite;
10862 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10865 const I32 oldsavestack_ix = PL_savestack_ix;
10866 CV* const outsidecv = PL_compcv;
10868 SAVEI32(PL_subline);
10869 save_item(PL_subname);
10870 SAVESPTR(PL_compcv);
10872 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10873 CvFLAGS(PL_compcv) |= flags;
10875 PL_subline = CopLINE(PL_curcop);
10876 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10877 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10878 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10879 if (outsidecv && CvPADLIST(outsidecv))
10880 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10882 return oldsavestack_ix;
10886 #pragma segment Perl_yylex
10889 S_yywarn(pTHX_ const char *const s, U32 flags)
10893 PERL_ARGS_ASSERT_YYWARN;
10895 PL_in_eval |= EVAL_WARNONLY;
10896 yyerror_pv(s, flags);
10897 PL_in_eval &= ~EVAL_WARNONLY;
10902 Perl_yyerror(pTHX_ const char *const s)
10904 PERL_ARGS_ASSERT_YYERROR;
10905 return yyerror_pvn(s, strlen(s), 0);
10909 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10911 PERL_ARGS_ASSERT_YYERROR_PV;
10912 return yyerror_pvn(s, strlen(s), flags);
10916 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10919 const char *context = NULL;
10922 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10923 int yychar = PL_parser->yychar;
10924 U32 is_utf8 = flags & SVf_UTF8;
10926 PERL_ARGS_ASSERT_YYERROR_PVN;
10928 if (!yychar || (yychar == ';' && !PL_rsfp))
10929 sv_catpvs(where_sv, "at EOF");
10930 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10931 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10932 PL_oldbufptr != PL_bufptr) {
10935 The code below is removed for NetWare because it abends/crashes on NetWare
10936 when the script has error such as not having the closing quotes like:
10937 if ($var eq "value)
10938 Checking of white spaces is anyway done in NetWare code.
10941 while (isSPACE(*PL_oldoldbufptr))
10944 context = PL_oldoldbufptr;
10945 contlen = PL_bufptr - PL_oldoldbufptr;
10947 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10948 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10951 The code below is removed for NetWare because it abends/crashes on NetWare
10952 when the script has error such as not having the closing quotes like:
10953 if ($var eq "value)
10954 Checking of white spaces is anyway done in NetWare code.
10957 while (isSPACE(*PL_oldbufptr))
10960 context = PL_oldbufptr;
10961 contlen = PL_bufptr - PL_oldbufptr;
10963 else if (yychar > 255)
10964 sv_catpvs(where_sv, "next token ???");
10965 else if (yychar == -2) { /* YYEMPTY */
10966 if (PL_lex_state == LEX_NORMAL ||
10967 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10968 sv_catpvs(where_sv, "at end of line");
10969 else if (PL_lex_inpat)
10970 sv_catpvs(where_sv, "within pattern");
10972 sv_catpvs(where_sv, "within string");
10975 sv_catpvs(where_sv, "next char ");
10977 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10978 else if (isPRINT_LC(yychar)) {
10979 const char string = yychar;
10980 sv_catpvn(where_sv, &string, 1);
10983 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10985 msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
10986 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10987 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10989 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
10990 SVfARG(newSVpvn_flags(context, contlen,
10991 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
10993 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10994 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10995 Perl_sv_catpvf(aTHX_ msg,
10996 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10997 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11000 if (PL_in_eval & EVAL_WARNONLY) {
11001 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11005 if (PL_error_count >= 10) {
11006 if (PL_in_eval && SvCUR(ERRSV))
11007 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11008 SVfARG(ERRSV), OutCopFILE(PL_curcop));
11010 Perl_croak(aTHX_ "%s has too many errors.\n",
11011 OutCopFILE(PL_curcop));
11014 PL_in_my_stash = NULL;
11018 #pragma segment Main
11022 S_swallow_bom(pTHX_ U8 *s)
11025 const STRLEN slen = SvCUR(PL_linestr);
11027 PERL_ARGS_ASSERT_SWALLOW_BOM;
11031 if (s[1] == 0xFE) {
11032 /* UTF-16 little-endian? (or UTF-32LE?) */
11033 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11034 /* diag_listed_as: Unsupported script encoding %s */
11035 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11036 #ifndef PERL_NO_UTF16_FILTER
11037 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11039 if (PL_bufend > (char*)s) {
11040 s = add_utf16_textfilter(s, TRUE);
11043 /* diag_listed_as: Unsupported script encoding %s */
11044 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11049 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11050 #ifndef PERL_NO_UTF16_FILTER
11051 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11053 if (PL_bufend > (char *)s) {
11054 s = add_utf16_textfilter(s, FALSE);
11057 /* diag_listed_as: Unsupported script encoding %s */
11058 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11063 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11064 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11065 s += 3; /* UTF-8 */
11071 if (s[2] == 0xFE && s[3] == 0xFF) {
11072 /* UTF-32 big-endian */
11073 /* diag_listed_as: Unsupported script encoding %s */
11074 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11077 else if (s[2] == 0 && s[3] != 0) {
11080 * are a good indicator of UTF-16BE. */
11081 #ifndef PERL_NO_UTF16_FILTER
11082 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11083 s = add_utf16_textfilter(s, FALSE);
11085 /* diag_listed_as: Unsupported script encoding %s */
11086 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11092 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11093 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11094 s += 4; /* UTF-8 */
11100 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11103 * are a good indicator of UTF-16LE. */
11104 #ifndef PERL_NO_UTF16_FILTER
11105 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11106 s = add_utf16_textfilter(s, TRUE);
11108 /* diag_listed_as: Unsupported script encoding %s */
11109 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11117 #ifndef PERL_NO_UTF16_FILTER
11119 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11122 SV *const filter = FILTER_DATA(idx);
11123 /* We re-use this each time round, throwing the contents away before we
11125 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11126 SV *const utf8_buffer = filter;
11127 IV status = IoPAGE(filter);
11128 const bool reverse = cBOOL(IoLINES(filter));
11131 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11133 /* As we're automatically added, at the lowest level, and hence only called
11134 from this file, we can be sure that we're not called in block mode. Hence
11135 don't bother writing code to deal with block mode. */
11137 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11140 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11142 DEBUG_P(PerlIO_printf(Perl_debug_log,
11143 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11144 FPTR2DPTR(void *, S_utf16_textfilter),
11145 reverse ? 'l' : 'b', idx, maxlen, status,
11146 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11153 /* First, look in our buffer of existing UTF-8 data: */
11154 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11158 } else if (status == 0) {
11160 IoPAGE(filter) = 0;
11161 nl = SvEND(utf8_buffer);
11164 STRLEN got = nl - SvPVX(utf8_buffer);
11165 /* Did we have anything to append? */
11167 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11168 /* Everything else in this code works just fine if SVp_POK isn't
11169 set. This, however, needs it, and we need it to work, else
11170 we loop infinitely because the buffer is never consumed. */
11171 sv_chop(utf8_buffer, nl);
11175 /* OK, not a complete line there, so need to read some more UTF-16.
11176 Read an extra octect if the buffer currently has an odd number. */
11180 if (SvCUR(utf16_buffer) >= 2) {
11181 /* Location of the high octet of the last complete code point.
11182 Gosh, UTF-16 is a pain. All the benefits of variable length,
11183 *coupled* with all the benefits of partial reads and
11185 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11186 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11188 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11192 /* We have the first half of a surrogate. Read more. */
11193 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11196 status = FILTER_READ(idx + 1, utf16_buffer,
11197 160 + (SvCUR(utf16_buffer) & 1));
11198 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11199 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11202 IoPAGE(filter) = status;
11207 chars = SvCUR(utf16_buffer) >> 1;
11208 have = SvCUR(utf8_buffer);
11209 SvGROW(utf8_buffer, have + chars * 3 + 1);
11212 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11213 (U8*)SvPVX_const(utf8_buffer) + have,
11214 chars * 2, &newlen);
11216 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11217 (U8*)SvPVX_const(utf8_buffer) + have,
11218 chars * 2, &newlen);
11220 SvCUR_set(utf8_buffer, have + newlen);
11223 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11224 it's private to us, and utf16_to_utf8{,reversed} take a
11225 (pointer,length) pair, rather than a NUL-terminated string. */
11226 if(SvCUR(utf16_buffer) & 1) {
11227 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11228 SvCUR_set(utf16_buffer, 1);
11230 SvCUR_set(utf16_buffer, 0);
11233 DEBUG_P(PerlIO_printf(Perl_debug_log,
11234 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11236 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11237 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11242 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11244 SV *filter = filter_add(S_utf16_textfilter, NULL);
11246 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11248 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11249 sv_setpvs(filter, "");
11250 IoLINES(filter) = reversed;
11251 IoPAGE(filter) = 1; /* Not EOF */
11253 /* Sadly, we have to return a valid pointer, come what may, so we have to
11254 ignore any error return from this. */
11255 SvCUR_set(PL_linestr, 0);
11256 if (FILTER_READ(0, PL_linestr, 0)) {
11257 SvUTF8_on(PL_linestr);
11259 SvUTF8_on(PL_linestr);
11261 PL_bufend = SvEND(PL_linestr);
11262 return (U8*)SvPVX(PL_linestr);
11267 Returns a pointer to the next character after the parsed
11268 vstring, as well as updating the passed in sv.
11270 Function must be called like
11273 s = scan_vstring(s,e,sv);
11275 where s and e are the start and end of the string.
11276 The sv should already be large enough to store the vstring
11277 passed in, for performance reasons.
11282 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11285 const char *pos = s;
11286 const char *start = s;
11288 PERL_ARGS_ASSERT_SCAN_VSTRING;
11290 if (*pos == 'v') pos++; /* get past 'v' */
11291 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11293 if ( *pos != '.') {
11294 /* this may not be a v-string if followed by => */
11295 const char *next = pos;
11296 while (next < e && isSPACE(*next))
11298 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11299 /* return string not v-string */
11300 sv_setpvn(sv,(char *)s,pos-s);
11301 return (char *)pos;
11305 if (!isALPHA(*pos)) {
11306 U8 tmpbuf[UTF8_MAXBYTES+1];
11309 s++; /* get past 'v' */
11314 /* this is atoi() that tolerates underscores */
11317 const char *end = pos;
11319 while (--end >= s) {
11321 const UV orev = rev;
11322 rev += (*end - '0') * mult;
11325 /* diag_listed_as: Integer overflow in %s number */
11326 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11327 "Integer overflow in decimal number");
11331 if (rev > 0x7FFFFFFF)
11332 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11334 /* Append native character for the rev point */
11335 tmpend = uvchr_to_utf8(tmpbuf, rev);
11336 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11337 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11339 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11345 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11349 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11356 Perl_keyword_plugin_standard(pTHX_
11357 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11359 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11360 PERL_UNUSED_CONTEXT;
11361 PERL_UNUSED_ARG(keyword_ptr);
11362 PERL_UNUSED_ARG(keyword_len);
11363 PERL_UNUSED_ARG(op_ptr);
11364 return KEYWORD_PLUGIN_DECLINE;
11367 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11369 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11371 SAVEI32(PL_lex_brackets);
11372 if (PL_lex_brackets > 100)
11373 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11374 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11375 SAVEI32(PL_lex_allbrackets);
11376 PL_lex_allbrackets = 0;
11377 SAVEI8(PL_lex_fakeeof);
11378 PL_lex_fakeeof = (U8)fakeeof;
11379 if(yyparse(gramtype) && !PL_parser->error_count)
11380 qerror(Perl_mess(aTHX_ "Parse error"));
11383 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11385 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11389 SAVEVPTR(PL_eval_root);
11390 PL_eval_root = NULL;
11391 parse_recdescent(gramtype, fakeeof);
11397 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11399 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11402 if (flags & ~PARSE_OPTIONAL)
11403 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11404 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11405 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11406 if (!PL_parser->error_count)
11407 qerror(Perl_mess(aTHX_ "Parse error"));
11408 exprop = newOP(OP_NULL, 0);
11414 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11416 Parse a Perl arithmetic expression. This may contain operators of precedence
11417 down to the bit shift operators. The expression must be followed (and thus
11418 terminated) either by a comparison or lower-precedence operator or by
11419 something that would normally terminate an expression such as semicolon.
11420 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11421 otherwise it is mandatory. It is up to the caller to ensure that the
11422 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11423 the source of the code to be parsed and the lexical context for the
11426 The op tree representing the expression is returned. If an optional
11427 expression is absent, a null pointer is returned, otherwise the pointer
11430 If an error occurs in parsing or compilation, in most cases a valid op
11431 tree is returned anyway. The error is reflected in the parser state,
11432 normally resulting in a single exception at the top level of parsing
11433 which covers all the compilation errors that occurred. Some compilation
11434 errors, however, will throw an exception immediately.
11440 Perl_parse_arithexpr(pTHX_ U32 flags)
11442 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11446 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11448 Parse a Perl term expression. This may contain operators of precedence
11449 down to the assignment operators. The expression must be followed (and thus
11450 terminated) either by a comma or lower-precedence operator or by
11451 something that would normally terminate an expression such as semicolon.
11452 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11453 otherwise it is mandatory. It is up to the caller to ensure that the
11454 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11455 the source of the code to be parsed and the lexical context for the
11458 The op tree representing the expression is returned. If an optional
11459 expression is absent, a null pointer is returned, otherwise the pointer
11462 If an error occurs in parsing or compilation, in most cases a valid op
11463 tree is returned anyway. The error is reflected in the parser state,
11464 normally resulting in a single exception at the top level of parsing
11465 which covers all the compilation errors that occurred. Some compilation
11466 errors, however, will throw an exception immediately.
11472 Perl_parse_termexpr(pTHX_ U32 flags)
11474 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11478 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11480 Parse a Perl list expression. This may contain operators of precedence
11481 down to the comma operator. The expression must be followed (and thus
11482 terminated) either by a low-precedence logic operator such as C<or> or by
11483 something that would normally terminate an expression such as semicolon.
11484 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11485 otherwise it is mandatory. It is up to the caller to ensure that the
11486 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11487 the source of the code to be parsed and the lexical context for the
11490 The op tree representing the expression is returned. If an optional
11491 expression is absent, a null pointer is returned, otherwise the pointer
11494 If an error occurs in parsing or compilation, in most cases a valid op
11495 tree is returned anyway. The error is reflected in the parser state,
11496 normally resulting in a single exception at the top level of parsing
11497 which covers all the compilation errors that occurred. Some compilation
11498 errors, however, will throw an exception immediately.
11504 Perl_parse_listexpr(pTHX_ U32 flags)
11506 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11510 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11512 Parse a single complete Perl expression. This allows the full
11513 expression grammar, including the lowest-precedence operators such
11514 as C<or>. The expression must be followed (and thus terminated) by a
11515 token that an expression would normally be terminated by: end-of-file,
11516 closing bracketing punctuation, semicolon, or one of the keywords that
11517 signals a postfix expression-statement modifier. If I<flags> includes
11518 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11519 mandatory. It is up to the caller to ensure that the dynamic parser
11520 state (L</PL_parser> et al) is correctly set to reflect the source of
11521 the code to be parsed and the lexical context for the expression.
11523 The op tree representing the expression is returned. If an optional
11524 expression is absent, a null pointer is returned, otherwise the pointer
11527 If an error occurs in parsing or compilation, in most cases a valid op
11528 tree is returned anyway. The error is reflected in the parser state,
11529 normally resulting in a single exception at the top level of parsing
11530 which covers all the compilation errors that occurred. Some compilation
11531 errors, however, will throw an exception immediately.
11537 Perl_parse_fullexpr(pTHX_ U32 flags)
11539 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11543 =for apidoc Amx|OP *|parse_block|U32 flags
11545 Parse a single complete Perl code block. This consists of an opening
11546 brace, a sequence of statements, and a closing brace. The block
11547 constitutes a lexical scope, so C<my> variables and various compile-time
11548 effects can be contained within it. It is up to the caller to ensure
11549 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11550 reflect the source of the code to be parsed and the lexical context for
11553 The op tree representing the code block is returned. This is always a
11554 real op, never a null pointer. It will normally be a C<lineseq> list,
11555 including C<nextstate> or equivalent ops. No ops to construct any kind
11556 of runtime scope are included by virtue of it being a block.
11558 If an error occurs in parsing or compilation, in most cases a valid op
11559 tree (most likely null) is returned anyway. The error is reflected in
11560 the parser state, normally resulting in a single exception at the top
11561 level of parsing which covers all the compilation errors that occurred.
11562 Some compilation errors, however, will throw an exception immediately.
11564 The I<flags> parameter is reserved for future use, and must always
11571 Perl_parse_block(pTHX_ U32 flags)
11574 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11575 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11579 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11581 Parse a single unadorned Perl statement. This may be a normal imperative
11582 statement or a declaration that has compile-time effect. It does not
11583 include any label or other affixture. It is up to the caller to ensure
11584 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11585 reflect the source of the code to be parsed and the lexical context for
11588 The op tree representing the statement is returned. This may be a
11589 null pointer if the statement is null, for example if it was actually
11590 a subroutine definition (which has compile-time side effects). If not
11591 null, it will be ops directly implementing the statement, suitable to
11592 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11593 equivalent op (except for those embedded in a scope contained entirely
11594 within the statement).
11596 If an error occurs in parsing or compilation, in most cases a valid op
11597 tree (most likely null) is returned anyway. The error is reflected in
11598 the parser state, normally resulting in a single exception at the top
11599 level of parsing which covers all the compilation errors that occurred.
11600 Some compilation errors, however, will throw an exception immediately.
11602 The I<flags> parameter is reserved for future use, and must always
11609 Perl_parse_barestmt(pTHX_ U32 flags)
11612 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11613 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11617 =for apidoc Amx|SV *|parse_label|U32 flags
11619 Parse a single label, possibly optional, of the type that may prefix a
11620 Perl statement. It is up to the caller to ensure that the dynamic parser
11621 state (L</PL_parser> et al) is correctly set to reflect the source of
11622 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11623 label is optional, otherwise it is mandatory.
11625 The name of the label is returned in the form of a fresh scalar. If an
11626 optional label is absent, a null pointer is returned.
11628 If an error occurs in parsing, which can only occur if the label is
11629 mandatory, a valid label is returned anyway. The error is reflected in
11630 the parser state, normally resulting in a single exception at the top
11631 level of parsing which covers all the compilation errors that occurred.
11637 Perl_parse_label(pTHX_ U32 flags)
11639 if (flags & ~PARSE_OPTIONAL)
11640 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11641 if (PL_lex_state == LEX_KNOWNEXT) {
11642 PL_parser->yychar = yylex();
11643 if (PL_parser->yychar == LABEL) {
11645 PL_parser->yychar = YYEMPTY;
11646 lsv = newSV_type(SVt_PV);
11647 sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11655 STRLEN wlen, bufptr_pos;
11658 if (!isIDFIRST_lazy_if(s, UTF))
11660 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11661 if (word_takes_any_delimeter(s, wlen))
11663 bufptr_pos = s - SvPVX(PL_linestr);
11665 lex_read_space(LEX_KEEP_PREVIOUS);
11667 s = SvPVX(PL_linestr) + bufptr_pos;
11668 if (t[0] == ':' && t[1] != ':') {
11669 PL_oldoldbufptr = PL_oldbufptr;
11672 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11676 if (flags & PARSE_OPTIONAL) {
11679 qerror(Perl_mess(aTHX_ "Parse error"));
11680 return newSVpvs("x");
11687 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11689 Parse a single complete Perl statement. This may be a normal imperative
11690 statement or a declaration that has compile-time effect, and may include
11691 optional labels. It is up to the caller to ensure that the dynamic
11692 parser state (L</PL_parser> et al) is correctly set to reflect the source
11693 of the code to be parsed and the lexical context for the statement.
11695 The op tree representing the statement is returned. This may be a
11696 null pointer if the statement is null, for example if it was actually
11697 a subroutine definition (which has compile-time side effects). If not
11698 null, it will be the result of a L</newSTATEOP> call, normally including
11699 a C<nextstate> or equivalent op.
11701 If an error occurs in parsing or compilation, in most cases a valid op
11702 tree (most likely null) is returned anyway. The error is reflected in
11703 the parser state, normally resulting in a single exception at the top
11704 level of parsing which covers all the compilation errors that occurred.
11705 Some compilation errors, however, will throw an exception immediately.
11707 The I<flags> parameter is reserved for future use, and must always
11714 Perl_parse_fullstmt(pTHX_ U32 flags)
11717 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11718 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11722 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11724 Parse a sequence of zero or more Perl statements. These may be normal
11725 imperative statements, including optional labels, or declarations
11726 that have compile-time effect, or any mixture thereof. The statement
11727 sequence ends when a closing brace or end-of-file is encountered in a
11728 place where a new statement could have validly started. It is up to
11729 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11730 is correctly set to reflect the source of the code to be parsed and the
11731 lexical context for the statements.
11733 The op tree representing the statement sequence is returned. This may
11734 be a null pointer if the statements were all null, for example if there
11735 were no statements or if there were only subroutine definitions (which
11736 have compile-time side effects). If not null, it will be a C<lineseq>
11737 list, normally including C<nextstate> or equivalent ops.
11739 If an error occurs in parsing or compilation, in most cases a valid op
11740 tree is returned anyway. The error is reflected in the parser state,
11741 normally resulting in a single exception at the top level of parsing
11742 which covers all the compilation errors that occurred. Some compilation
11743 errors, however, will throw an exception immediately.
11745 The I<flags> parameter is reserved for future use, and must always
11752 Perl_parse_stmtseq(pTHX_ U32 flags)
11757 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11758 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11759 c = lex_peek_unichar(0);
11760 if (c != -1 && c != /*{*/'}')
11761 qerror(Perl_mess(aTHX_ "Parse error"));
11767 * c-indentation-style: bsd
11768 * c-basic-offset: 4
11769 * indent-tabs-mode: nil
11772 * ex: set ts=8 sts=4 sw=4 et: