3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char ident_too_long[] = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN0OP : zero-argument function, with its op created in this file
225 * FUN1 : not used, except for not, which isn't a UNIOP
226 * BOop : bitwise or or xor
228 * SHop : shift operator
229 * PWop : power operator
230 * PMop : pattern-matching operator
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
234 * Rop : relational operator <= != gt
236 * Also see LOP and lop() below.
239 #ifdef DEBUGGING /* Serve -DT. */
240 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
242 # define REPORT(retval) (retval)
245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
267 /* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
272 #define UNI3(f,x,have_x) { \
273 pl_yylval.ival = f; \
274 if (have_x) PL_expect = x; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 #define UNI(f) UNI3(f,XTERM,1)
284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285 #define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
290 #define UNIBRACK(f) UNI3(f,0,0)
292 /* grandfather return to old style */
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
303 #define COPLINE_INC_WITH_HERELINES \
305 CopLINE_inc(PL_curcop); \
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
314 /* how to interpret the pl_yylval associated with the token */
318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
323 static struct debug_tokens {
325 enum token_type type;
327 } const debug_tokens[] =
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
361 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
375 { PEG, TOKENTYPE_NONE, "PEG" },
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
402 { 0, TOKENTYPE_NONE, NULL }
405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
412 PERL_ARGS_ASSERT_TOKEREPORT;
415 const char *name = NULL;
416 enum token_type type = TOKENTYPE_NONE;
417 const struct debug_tokens *p;
418 SV* const report = newSVpvs("<== ");
420 for (p = debug_tokens; p->token; p++) {
421 if (p->token == (int)rv) {
428 Perl_sv_catpv(aTHX_ report, name);
429 else if ((char)rv > ' ' && (char)rv <= '~')
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
432 sv_catpvs(report, "EOF");
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
443 PL_op_name[lvalp->ival]);
446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
448 case TOKENTYPE_OPVAL:
450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
453 Perl_sv_catpvf(aTHX_ report, " %s",
454 SvPEEK(cSVOPx_sv(lvalp->opval)));
459 sv_catpvs(report, "(opval=null)");
462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
468 /* print the buffer with suitable escapes */
471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
473 SV* const tmp = newSVpvs("");
475 PERL_ARGS_ASSERT_PRINTBUF;
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
484 S_deprecate_commaless_var_list(pTHX) {
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
498 S_ao(pTHX_ int toketype)
501 if (*PL_bufptr == '=') {
503 if (toketype == ANDAND)
504 pl_yylval.ival = OP_ANDASSIGN;
505 else if (toketype == OROR)
506 pl_yylval.ival = OP_ORASSIGN;
507 else if (toketype == DORDOR)
508 pl_yylval.ival = OP_DORASSIGN;
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
528 S_no_op(pTHX_ const char *const what, char *s)
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
534 PERL_ARGS_ASSERT_NO_OP;
540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
541 if (ckWARN_d(WARN_SYNTAX)) {
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
547 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
548 t += UTF ? UTF8SKIP(t) : 1)
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
569 * Complain about missing quote/regexp/heredoc terminator.
570 * If it's called with NULL then it cauterizes the line buffer.
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
577 S_missingterm(pTHX_ char *s)
583 char * const nl = strrchr(s,'\n');
587 else if (isCNTRL(PL_multi_close)) {
589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
594 *tmpbuf = (char)PL_multi_close;
598 q = strchr(s,'"') ? '\'' : '"';
599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
605 * Check whether the named feature is enabled.
608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
617 if (namelen > MAX_FEATURE_LEN)
619 memcpy(&he_name[8], name, namelen);
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
630 #ifdef PERL_CR_FILTER
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
637 PERL_ARGS_ASSERT_STRIP_RETURN;
639 /* outer loop optimized to do nothing if there are no CR-LFs */
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
646 if (*s == '\r' && s[1] == '\n')
657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
660 if (count > 0 && !maxlen)
667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
669 Creates and initialises a new lexer/parser state object, supplying
670 a context in which to lex and parse from a new source of Perl code.
671 A pointer to the new state object is placed in L</PL_parser>. An entry
672 is made on the save stack so that upon unwinding the new state object
673 will be destroyed and the former value of L</PL_parser> will be restored.
674 Nothing else need be done to clean up the parsing context.
676 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677 non-null, provides a string (in SV form) containing code to be parsed.
678 A copy of the string is made, so subsequent modification of I<line>
679 does not affect parsing. I<rsfp>, if non-null, provides an input stream
680 from which code will be read to be parsed. If both are non-null, the
681 code in I<line> comes first and must consist of complete lines of input,
682 and I<rsfp> supplies the remainder of the source.
684 The I<flags> parameter is reserved for future use. Currently it is only
685 used by perl internally, so extensions should always pass zero.
690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
703 const char *s = NULL;
704 yy_parser *parser, *oparser;
705 if (flags && flags & ~LEX_START_FLAGS)
706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
708 /* create and initialise a parser */
710 Newxz(parser, 1, yy_parser);
711 parser->old_parser = oparser = PL_parser;
714 parser->stack = NULL;
716 parser->stack_size = 0;
718 /* on scope exit, free this parser and restore any outer one */
720 parser->saved_curcop = PL_curcop;
722 /* initialise lexer state */
725 parser->curforce = -1;
727 parser->nexttoke = 0;
729 parser->error_count = oparser ? oparser->error_count : 0;
730 parser->copline = NOLINE;
731 parser->lex_state = LEX_NORMAL;
732 parser->expect = XSTATE;
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
746 Newxz(parser->lex_shared, 1, LEXSHARED);
750 s = SvPV_const(line, len);
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
754 sv_catpvs(parser->linestr, "\n;");
756 parser->linestr = newSVpvs("\n;");
758 parser->oldoldbufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
767 parser->in_pod = parser->filtered = 0;
771 /* delete a parser object */
774 Perl_parser_free(pTHX_ const yy_parser *parser)
776 PERL_ARGS_ASSERT_PARSER_FREE;
778 PL_curcop = parser->saved_curcop;
779 SvREFCNT_dec(parser->linestr);
781 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
782 PerlIO_clearerr(parser->rsfp);
783 else if (parser->rsfp && (!parser->old_parser ||
784 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
785 PerlIO_close(parser->rsfp);
786 SvREFCNT_dec(parser->rsfp_filters);
788 Safefree(parser->lex_brackstack);
789 Safefree(parser->lex_casestack);
790 Safefree(parser->lex_shared);
791 PL_parser = parser->old_parser;
797 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
799 Buffer scalar containing the chunk currently under consideration of the
800 text currently being lexed. This is always a plain string scalar (for
801 which C<SvPOK> is true). It is not intended to be used as a scalar by
802 normal scalar means; instead refer to the buffer directly by the pointer
803 variables described below.
805 The lexer maintains various C<char*> pointers to things in the
806 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
807 reallocated, all of these pointers must be updated. Don't attempt to
808 do this manually, but rather use L</lex_grow_linestr> if you need to
809 reallocate the buffer.
811 The content of the text chunk in the buffer is commonly exactly one
812 complete line of input, up to and including a newline terminator,
813 but there are situations where it is otherwise. The octets of the
814 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
815 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
816 flag on this scalar, which may disagree with it.
818 For direct examination of the buffer, the variable
819 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
820 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
821 of these pointers is usually preferable to examination of the scalar
822 through normal scalar means.
824 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
826 Direct pointer to the end of the chunk of text currently being lexed, the
827 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
828 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
829 always located at the end of the buffer, and does not count as part of
830 the buffer's contents.
832 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
834 Points to the current position of lexing inside the lexer buffer.
835 Characters around this point may be freely examined, within
836 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
837 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
838 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
840 Lexing code (whether in the Perl core or not) moves this pointer past
841 the characters that it consumes. It is also expected to perform some
842 bookkeeping whenever a newline character is consumed. This movement
843 can be more conveniently performed by the function L</lex_read_to>,
844 which handles newlines appropriately.
846 Interpretation of the buffer's octets can be abstracted out by
847 using the slightly higher-level functions L</lex_peek_unichar> and
848 L</lex_read_unichar>.
850 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
852 Points to the start of the current line inside the lexer buffer.
853 This is useful for indicating at which column an error occurred, and
854 not much else. This must be updated by any lexing code that consumes
855 a newline; the function L</lex_read_to> handles this detail.
861 =for apidoc Amx|bool|lex_bufutf8
863 Indicates whether the octets in the lexer buffer
864 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
865 of Unicode characters. If not, they should be interpreted as Latin-1
866 characters. This is analogous to the C<SvUTF8> flag for scalars.
868 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
869 contains valid UTF-8. Lexing code must be robust in the face of invalid
872 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
873 is significant, but not the whole story regarding the input character
874 encoding. Normally, when a file is being read, the scalar contains octets
875 and its C<SvUTF8> flag is off, but the octets should be interpreted as
876 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
877 however, the scalar may have the C<SvUTF8> flag on, and in this case its
878 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
879 is in effect. This logic may change in the future; use this function
880 instead of implementing the logic yourself.
886 Perl_lex_bufutf8(pTHX)
892 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
894 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
895 at least I<len> octets (including terminating NUL). Returns a
896 pointer to the reallocated buffer. This is necessary before making
897 any direct modification of the buffer that would increase its length.
898 L</lex_stuff_pvn> provides a more convenient way to insert text into
901 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
902 this function updates all of the lexer's variables that point directly
909 Perl_lex_grow_linestr(pTHX_ STRLEN len)
913 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
914 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
915 linestr = PL_parser->linestr;
916 buf = SvPVX(linestr);
917 if (len <= SvLEN(linestr))
919 bufend_pos = PL_parser->bufend - buf;
920 bufptr_pos = PL_parser->bufptr - buf;
921 oldbufptr_pos = PL_parser->oldbufptr - buf;
922 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
923 linestart_pos = PL_parser->linestart - buf;
924 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
925 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
926 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
927 PL_parser->lex_shared->re_eval_start - buf : 0;
929 buf = sv_grow(linestr, len);
931 PL_parser->bufend = buf + bufend_pos;
932 PL_parser->bufptr = buf + bufptr_pos;
933 PL_parser->oldbufptr = buf + oldbufptr_pos;
934 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
935 PL_parser->linestart = buf + linestart_pos;
936 if (PL_parser->last_uni)
937 PL_parser->last_uni = buf + last_uni_pos;
938 if (PL_parser->last_lop)
939 PL_parser->last_lop = buf + last_lop_pos;
940 if (PL_parser->lex_shared->re_eval_start)
941 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
946 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
948 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
949 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
950 reallocating the buffer if necessary. This means that lexing code that
951 runs later will see the characters as if they had appeared in the input.
952 It is not recommended to do this as part of normal parsing, and most
953 uses of this facility run the risk of the inserted characters being
954 interpreted in an unintended manner.
956 The string to be inserted is represented by I<len> octets starting
957 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
958 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
959 The characters are recoded for the lexer buffer, according to how the
960 buffer is currently being interpreted (L</lex_bufutf8>). If a string
961 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
962 function is more convenient.
968 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
972 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
973 if (flags & ~(LEX_STUFF_UTF8))
974 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
976 if (flags & LEX_STUFF_UTF8) {
980 const char *p, *e = pv+len;
981 for (p = pv; p != e; p++)
982 highhalf += !!(((U8)*p) & 0x80);
985 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
986 bufptr = PL_parser->bufptr;
987 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
988 SvCUR_set(PL_parser->linestr,
989 SvCUR(PL_parser->linestr) + len+highhalf);
990 PL_parser->bufend += len+highhalf;
991 for (p = pv; p != e; p++) {
994 *bufptr++ = (char)(0xc0 | (c >> 6));
995 *bufptr++ = (char)(0x80 | (c & 0x3f));
1002 if (flags & LEX_STUFF_UTF8) {
1003 STRLEN highhalf = 0;
1004 const char *p, *e = pv+len;
1005 for (p = pv; p != e; p++) {
1008 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1009 "non-Latin-1 character into Latin-1 input");
1010 } else if (c >= 0xc2 && p+1 != e &&
1011 (((U8)p[1]) & 0xc0) == 0x80) {
1014 } else if (c >= 0x80) {
1015 /* malformed UTF-8 */
1017 SAVESPTR(PL_warnhook);
1018 PL_warnhook = PERL_WARNHOOK_FATAL;
1019 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1025 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1026 bufptr = PL_parser->bufptr;
1027 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1028 SvCUR_set(PL_parser->linestr,
1029 SvCUR(PL_parser->linestr) + len-highhalf);
1030 PL_parser->bufend += len-highhalf;
1031 for (p = pv; p != e; p++) {
1034 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1037 *bufptr++ = (char)c;
1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043 bufptr = PL_parser->bufptr;
1044 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046 PL_parser->bufend += len;
1047 Copy(pv, bufptr, len, char);
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary. This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1063 The string to be inserted is represented by octets starting at I<pv>
1064 and continuing to the first nul. These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in I<flags>. The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1077 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078 lex_stuff_pvn(pv, strlen(pv), flags);
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary. This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1092 The string to be inserted is the string value of I<sv>. The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1106 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1110 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1121 This is not the normal way to consume lexed text. For that, use
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1132 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133 buf = PL_parser->bufptr;
1135 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138 bufend = PL_parser->bufend;
1140 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141 unstuff_len = ptr - buf;
1142 Move(ptr, buf, bufend+1-ptr, char);
1143 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144 PL_parser->bufend = bufend - unstuff_len;
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1163 Perl_lex_read_to(pTHX_ char *ptr)
1166 PERL_ARGS_ASSERT_LEX_READ_TO;
1167 s = PL_parser->bufptr;
1168 if (ptr < s || ptr > PL_parser->bufend)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170 for (; s != ptr; s++)
1172 COPLINE_INC_WITH_HERELINES;
1173 PL_parser->linestart = s+1;
1175 PL_parser->bufptr = ptr;
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to I<ptr>. The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately. I<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it. However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1203 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204 buf = SvPVX(PL_parser->linestr);
1206 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1209 if (ptr > PL_parser->bufptr)
1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211 discard_len = ptr - buf;
1212 if (PL_parser->oldbufptr < ptr)
1213 PL_parser->oldbufptr = ptr;
1214 if (PL_parser->oldoldbufptr < ptr)
1215 PL_parser->oldoldbufptr = ptr;
1216 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217 PL_parser->last_uni = NULL;
1218 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219 PL_parser->last_lop = NULL;
1220 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222 PL_parser->bufend -= discard_len;
1223 PL_parser->bufptr -= discard_len;
1224 PL_parser->oldbufptr -= discard_len;
1225 PL_parser->oldoldbufptr -= discard_len;
1226 if (PL_parser->last_uni)
1227 PL_parser->last_uni -= discard_len;
1228 if (PL_parser->last_lop)
1229 PL_parser->last_lop -= discard_len;
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more. It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1245 will not be discarded. If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM 0x40000000
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1262 STRLEN old_bufend_pos, new_bufend_pos;
1263 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265 bool got_some_for_debugger = 0;
1267 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269 linestr = PL_parser->linestr;
1270 buf = SvPVX(linestr);
1271 if (!(flags & LEX_KEEP_PREVIOUS) &&
1272 PL_parser->bufptr == PL_parser->bufend) {
1273 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1275 if (PL_parser->last_uni != PL_parser->bufend)
1276 PL_parser->last_uni = NULL;
1277 if (PL_parser->last_lop != PL_parser->bufend)
1278 PL_parser->last_lop = NULL;
1279 last_uni_pos = last_lop_pos = 0;
1283 old_bufend_pos = PL_parser->bufend - buf;
1284 bufptr_pos = PL_parser->bufptr - buf;
1285 oldbufptr_pos = PL_parser->oldbufptr - buf;
1286 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1287 linestart_pos = PL_parser->linestart - buf;
1288 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1289 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1291 if (flags & LEX_FAKE_EOF) {
1293 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1295 } else if (filter_gets(linestr, old_bufend_pos)) {
1297 got_some_for_debugger = 1;
1298 } else if (flags & LEX_NO_TERM) {
1301 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1302 sv_setpvs(linestr, "");
1304 /* End of real input. Close filehandle (unless it was STDIN),
1305 * then add implicit termination.
1307 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1308 PerlIO_clearerr(PL_parser->rsfp);
1309 else if (PL_parser->rsfp)
1310 (void)PerlIO_close(PL_parser->rsfp);
1311 PL_parser->rsfp = NULL;
1312 PL_parser->in_pod = PL_parser->filtered = 0;
1314 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1317 if (!PL_in_eval && PL_minus_p) {
1319 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1320 PL_minus_n = PL_minus_p = 0;
1321 } else if (!PL_in_eval && PL_minus_n) {
1322 sv_catpvs(linestr, /*{*/";}");
1325 sv_catpvs(linestr, ";");
1328 buf = SvPVX(linestr);
1329 new_bufend_pos = SvCUR(linestr);
1330 PL_parser->bufend = buf + new_bufend_pos;
1331 PL_parser->bufptr = buf + bufptr_pos;
1332 PL_parser->oldbufptr = buf + oldbufptr_pos;
1333 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1334 PL_parser->linestart = buf + linestart_pos;
1335 if (PL_parser->last_uni)
1336 PL_parser->last_uni = buf + last_uni_pos;
1337 if (PL_parser->last_lop)
1338 PL_parser->last_lop = buf + last_lop_pos;
1339 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1340 PL_curstash != PL_debstash) {
1341 /* debugger active and we're not compiling the debugger code,
1342 * so store the line into the debugger's array of lines
1344 update_debugger_info(NULL, buf+old_bufend_pos,
1345 new_bufend_pos-old_bufend_pos);
1351 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1353 Looks ahead one (Unicode) character in the text currently being lexed.
1354 Returns the codepoint (unsigned integer value) of the next character,
1355 or -1 if lexing has reached the end of the input text. To consume the
1356 peeked character, use L</lex_read_unichar>.
1358 If the next character is in (or extends into) the next chunk of input
1359 text, the next chunk will be read in. Normally the current chunk will be
1360 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1361 then the current chunk will not be discarded.
1363 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1364 is encountered, an exception is generated.
1370 Perl_lex_peek_unichar(pTHX_ U32 flags)
1374 if (flags & ~(LEX_KEEP_PREVIOUS))
1375 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1376 s = PL_parser->bufptr;
1377 bufend = PL_parser->bufend;
1383 if (!lex_next_chunk(flags))
1385 s = PL_parser->bufptr;
1386 bufend = PL_parser->bufend;
1392 len = PL_utf8skip[head];
1393 while ((STRLEN)(bufend-s) < len) {
1394 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1396 s = PL_parser->bufptr;
1397 bufend = PL_parser->bufend;
1400 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1401 if (retlen == (STRLEN)-1) {
1402 /* malformed UTF-8 */
1404 SAVESPTR(PL_warnhook);
1405 PL_warnhook = PERL_WARNHOOK_FATAL;
1406 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1412 if (!lex_next_chunk(flags))
1414 s = PL_parser->bufptr;
1421 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1423 Reads the next (Unicode) character in the text currently being lexed.
1424 Returns the codepoint (unsigned integer value) of the character read,
1425 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1426 if lexing has reached the end of the input text. To non-destructively
1427 examine the next character, use L</lex_peek_unichar> instead.
1429 If the next character is in (or extends into) the next chunk of input
1430 text, the next chunk will be read in. Normally the current chunk will be
1431 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1432 then the current chunk will not be discarded.
1434 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1435 is encountered, an exception is generated.
1441 Perl_lex_read_unichar(pTHX_ U32 flags)
1444 if (flags & ~(LEX_KEEP_PREVIOUS))
1445 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1446 c = lex_peek_unichar(flags);
1449 COPLINE_INC_WITH_HERELINES;
1451 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1453 ++(PL_parser->bufptr);
1459 =for apidoc Amx|void|lex_read_space|U32 flags
1461 Reads optional spaces, in Perl style, in the text currently being
1462 lexed. The spaces may include ordinary whitespace characters and
1463 Perl-style comments. C<#line> directives are processed if encountered.
1464 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1465 at a non-space character (or the end of the input text).
1467 If spaces extend into the next chunk of input text, the next chunk will
1468 be read in. Normally the current chunk will be discarded at the same
1469 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1470 chunk will not be discarded.
1475 #define LEX_NO_NEXT_CHUNK 0x80000000
1478 Perl_lex_read_space(pTHX_ U32 flags)
1481 bool need_incline = 0;
1482 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1483 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1486 sv_free(PL_skipwhite);
1487 PL_skipwhite = NULL;
1490 PL_skipwhite = newSVpvs("");
1491 #endif /* PERL_MAD */
1492 s = PL_parser->bufptr;
1493 bufend = PL_parser->bufend;
1499 } while (!(c == '\n' || (c == 0 && s == bufend)));
1500 } else if (c == '\n') {
1502 PL_parser->linestart = s;
1507 } else if (isSPACE(c)) {
1509 } else if (c == 0 && s == bufend) {
1513 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1514 #endif /* PERL_MAD */
1515 if (flags & LEX_NO_NEXT_CHUNK)
1517 PL_parser->bufptr = s;
1518 COPLINE_INC_WITH_HERELINES;
1519 got_more = lex_next_chunk(flags);
1520 CopLINE_dec(PL_curcop);
1521 s = PL_parser->bufptr;
1522 bufend = PL_parser->bufend;
1525 if (need_incline && PL_parser->rsfp) {
1535 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1536 #endif /* PERL_MAD */
1537 PL_parser->bufptr = s;
1542 * This subroutine has nothing to do with tilting, whether at windmills
1543 * or pinball tables. Its name is short for "increment line". It
1544 * increments the current line number in CopLINE(PL_curcop) and checks
1545 * to see whether the line starts with a comment of the form
1546 * # line 500 "foo.pm"
1547 * If so, it sets the current line number and file to the values in the comment.
1551 S_incline(pTHX_ const char *s)
1559 PERL_ARGS_ASSERT_INCLINE;
1561 COPLINE_INC_WITH_HERELINES;
1562 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1563 && s+1 == PL_bufend && *s == ';') {
1564 /* fake newline in string eval */
1565 CopLINE_dec(PL_curcop);
1570 while (SPACE_OR_TAB(*s))
1572 if (strnEQ(s, "line", 4))
1576 if (SPACE_OR_TAB(*s))
1580 while (SPACE_OR_TAB(*s))
1588 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1590 while (SPACE_OR_TAB(*s))
1592 if (*s == '"' && (t = strchr(s+1, '"'))) {
1598 while (!isSPACE(*t))
1602 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1604 if (*e != '\n' && *e != '\0')
1605 return; /* false alarm */
1607 line_num = atoi(n)-1;
1610 const STRLEN len = t - s;
1611 SV *const temp_sv = CopFILESV(PL_curcop);
1616 cf = SvPVX(temp_sv);
1617 tmplen = SvCUR(temp_sv);
1623 if (!PL_rsfp && !PL_parser->filtered) {
1624 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1625 * to *{"::_<newfilename"} */
1626 /* However, the long form of evals is only turned on by the
1627 debugger - usually they're "(eval %lu)" */
1631 STRLEN tmplen2 = len;
1632 if (tmplen + 2 <= sizeof smallbuf)
1635 Newx(tmpbuf, tmplen + 2, char);
1638 memcpy(tmpbuf + 2, cf, tmplen);
1640 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1645 if (tmplen2 + 2 <= sizeof smallbuf)
1648 Newx(tmpbuf2, tmplen2 + 2, char);
1650 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1651 /* Either they malloc'd it, or we malloc'd it,
1652 so no prefix is present in ours. */
1657 memcpy(tmpbuf2 + 2, s, tmplen2);
1660 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1662 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1663 /* adjust ${"::_<newfilename"} to store the new file name */
1664 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1665 /* The line number may differ. If that is the case,
1666 alias the saved lines that are in the array.
1667 Otherwise alias the whole array. */
1668 if (CopLINE(PL_curcop) == line_num) {
1669 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1670 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1672 else if (GvAV(*gvp)) {
1673 AV * const av = GvAV(*gvp);
1674 const I32 start = CopLINE(PL_curcop)+1;
1675 I32 items = AvFILLp(av) - start;
1677 AV * const av2 = GvAVn(gv2);
1678 SV **svp = AvARRAY(av) + start;
1679 I32 l = (I32)line_num+1;
1681 av_store(av2, l++, SvREFCNT_inc(*svp++));
1686 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1688 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1690 CopFILE_free(PL_curcop);
1691 CopFILE_setn(PL_curcop, s, len);
1693 CopLINE_set(PL_curcop, line_num);
1697 /* skip space before PL_thistoken */
1700 S_skipspace0(pTHX_ register char *s)
1702 PERL_ARGS_ASSERT_SKIPSPACE0;
1709 PL_thiswhite = newSVpvs("");
1710 sv_catsv(PL_thiswhite, PL_skipwhite);
1711 sv_free(PL_skipwhite);
1714 PL_realtokenstart = s - SvPVX(PL_linestr);
1718 /* skip space after PL_thistoken */
1721 S_skipspace1(pTHX_ register char *s)
1723 const char *start = s;
1724 I32 startoff = start - SvPVX(PL_linestr);
1726 PERL_ARGS_ASSERT_SKIPSPACE1;
1731 start = SvPVX(PL_linestr) + startoff;
1732 if (!PL_thistoken && PL_realtokenstart >= 0) {
1733 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1734 PL_thistoken = newSVpvn(tstart, start - tstart);
1736 PL_realtokenstart = -1;
1739 PL_nextwhite = newSVpvs("");
1740 sv_catsv(PL_nextwhite, PL_skipwhite);
1741 sv_free(PL_skipwhite);
1748 S_skipspace2(pTHX_ register char *s, SV **svp)
1751 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1752 const I32 startoff = s - SvPVX(PL_linestr);
1754 PERL_ARGS_ASSERT_SKIPSPACE2;
1757 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1758 if (!PL_madskills || !svp)
1760 start = SvPVX(PL_linestr) + startoff;
1761 if (!PL_thistoken && PL_realtokenstart >= 0) {
1762 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1763 PL_thistoken = newSVpvn(tstart, start - tstart);
1764 PL_realtokenstart = -1;
1768 *svp = newSVpvs("");
1769 sv_setsv(*svp, PL_skipwhite);
1770 sv_free(PL_skipwhite);
1779 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1781 AV *av = CopFILEAVx(PL_curcop);
1783 SV * const sv = newSV_type(SVt_PVMG);
1785 sv_setsv(sv, orig_sv);
1787 sv_setpvn(sv, buf, len);
1790 av_store(av, (I32)CopLINE(PL_curcop), sv);
1796 * Called to gobble the appropriate amount and type of whitespace.
1797 * Skips comments as well.
1801 S_skipspace(pTHX_ register char *s)
1805 #endif /* PERL_MAD */
1806 PERL_ARGS_ASSERT_SKIPSPACE;
1809 sv_free(PL_skipwhite);
1810 PL_skipwhite = NULL;
1812 #endif /* PERL_MAD */
1813 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1814 while (s < PL_bufend && SPACE_OR_TAB(*s))
1817 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1819 lex_read_space(LEX_KEEP_PREVIOUS |
1820 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1821 LEX_NO_NEXT_CHUNK : 0));
1823 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1824 if (PL_linestart > PL_bufptr)
1825 PL_bufptr = PL_linestart;
1830 PL_skipwhite = newSVpvn(start, s-start);
1831 #endif /* PERL_MAD */
1837 * Check the unary operators to ensure there's no ambiguity in how they're
1838 * used. An ambiguous piece of code would be:
1840 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1841 * the +5 is its argument.
1851 if (PL_oldoldbufptr != PL_last_uni)
1853 while (isSPACE(*PL_last_uni))
1856 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1858 if ((t = strchr(s, '(')) && t < PL_bufptr)
1861 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1862 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1863 (int)(s - PL_last_uni), PL_last_uni);
1867 * LOP : macro to build a list operator. Its behaviour has been replaced
1868 * with a subroutine, S_lop() for which LOP is just another name.
1871 #define LOP(f,x) return lop(f,x,s)
1875 * Build a list operator (or something that might be one). The rules:
1876 * - if we have a next token, then it's a list operator [why?]
1877 * - if the next thing is an opening paren, then it's a function
1878 * - else it's a list operator
1882 S_lop(pTHX_ I32 f, int x, char *s)
1886 PERL_ARGS_ASSERT_LOP;
1892 PL_last_lop = PL_oldbufptr;
1893 PL_last_lop_op = (OPCODE)f;
1902 return REPORT(FUNC);
1905 return REPORT(FUNC);
1908 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1909 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1910 return REPORT(LSTOP);
1917 * Sets up for an eventual force_next(). start_force(0) basically does
1918 * an unshift, while start_force(-1) does a push. yylex removes items
1923 S_start_force(pTHX_ int where)
1927 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1928 where = PL_lasttoke;
1929 assert(PL_curforce < 0 || PL_curforce == where);
1930 if (PL_curforce != where) {
1931 for (i = PL_lasttoke; i > where; --i) {
1932 PL_nexttoke[i] = PL_nexttoke[i-1];
1936 if (PL_curforce < 0) /* in case of duplicate start_force() */
1937 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1938 PL_curforce = where;
1941 curmad('^', newSVpvs(""));
1942 CURMAD('_', PL_nextwhite);
1947 S_curmad(pTHX_ char slot, SV *sv)
1953 if (PL_curforce < 0)
1954 where = &PL_thismad;
1956 where = &PL_nexttoke[PL_curforce].next_mad;
1962 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1964 else if (PL_encoding) {
1965 sv_recode_to_utf8(sv, PL_encoding);
1970 /* keep a slot open for the head of the list? */
1971 if (slot != '_' && *where && (*where)->mad_key == '^') {
1972 (*where)->mad_key = slot;
1973 sv_free(MUTABLE_SV(((*where)->mad_val)));
1974 (*where)->mad_val = (void*)sv;
1977 addmad(newMADsv(slot, sv), where, 0);
1980 # define start_force(where) NOOP
1981 # define curmad(slot, sv) NOOP
1986 * When the lexer realizes it knows the next token (for instance,
1987 * it is reordering tokens for the parser) then it can call S_force_next
1988 * to know what token to return the next time the lexer is called. Caller
1989 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1990 * and possibly PL_expect to ensure the lexer handles the token correctly.
1994 S_force_next(pTHX_ I32 type)
1999 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2000 tokereport(type, &NEXTVAL_NEXTTOKE);
2003 /* Don’t let opslab_force_free snatch it */
2004 if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2005 assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2006 NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2009 if (PL_curforce < 0)
2010 start_force(PL_lasttoke);
2011 PL_nexttoke[PL_curforce].next_type = type;
2012 if (PL_lex_state != LEX_KNOWNEXT)
2013 PL_lex_defer = PL_lex_state;
2014 PL_lex_state = LEX_KNOWNEXT;
2015 PL_lex_expect = PL_expect;
2018 PL_nexttype[PL_nexttoke] = type;
2020 if (PL_lex_state != LEX_KNOWNEXT) {
2021 PL_lex_defer = PL_lex_state;
2022 PL_lex_expect = PL_expect;
2023 PL_lex_state = LEX_KNOWNEXT;
2031 int yyc = PL_parser->yychar;
2032 if (yyc != YYEMPTY) {
2035 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2036 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2037 PL_lex_allbrackets--;
2039 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2040 } else if (yyc == '('/*)*/) {
2041 PL_lex_allbrackets--;
2046 PL_parser->yychar = YYEMPTY;
2051 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2054 SV * const sv = newSVpvn_utf8(start, len,
2057 && !is_ascii_string((const U8*)start, len)
2058 && is_utf8_string((const U8*)start, len));
2064 * When the lexer knows the next thing is a word (for instance, it has
2065 * just seen -> and it knows that the next char is a word char, then
2066 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2070 * char *start : buffer position (must be within PL_linestr)
2071 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2072 * int check_keyword : if true, Perl checks to make sure the word isn't
2073 * a keyword (do this if the word is a label, e.g. goto FOO)
2074 * int allow_pack : if true, : characters will also be allowed (require,
2075 * use, etc. do this)
2076 * int allow_initial_tick : used by the "sub" lexer only.
2080 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2086 PERL_ARGS_ASSERT_FORCE_WORD;
2088 start = SKIPSPACE1(start);
2090 if (isIDFIRST_lazy_if(s,UTF) ||
2091 (allow_pack && *s == ':') ||
2092 (allow_initial_tick && *s == '\'') )
2094 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2095 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2097 start_force(PL_curforce);
2099 curmad('X', newSVpvn(start,s-start));
2100 if (token == METHOD) {
2105 PL_expect = XOPERATOR;
2109 curmad('g', newSVpvs( "forced" ));
2110 NEXTVAL_NEXTTOKE.opval
2111 = (OP*)newSVOP(OP_CONST,0,
2112 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2113 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2121 * Called when the lexer wants $foo *foo &foo etc, but the program
2122 * text only contains the "foo" portion. The first argument is a pointer
2123 * to the "foo", and the second argument is the type symbol to prefix.
2124 * Forces the next token to be a "WORD".
2125 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2129 S_force_ident(pTHX_ register const char *s, int kind)
2133 PERL_ARGS_ASSERT_FORCE_IDENT;
2136 const STRLEN len = strlen(s);
2137 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2138 UTF ? SVf_UTF8 : 0));
2139 start_force(PL_curforce);
2140 NEXTVAL_NEXTTOKE.opval = o;
2143 o->op_private = OPpCONST_ENTERED;
2144 /* XXX see note in pp_entereval() for why we forgo typo
2145 warnings if the symbol must be introduced in an eval.
2147 gv_fetchpvn_flags(s, len,
2148 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2149 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2150 kind == '$' ? SVt_PV :
2151 kind == '@' ? SVt_PVAV :
2152 kind == '%' ? SVt_PVHV :
2160 S_force_ident_maybe_lex(pTHX_ char pit)
2162 start_force(PL_curforce);
2163 NEXTVAL_NEXTTOKE.ival = pit;
2168 Perl_str_to_version(pTHX_ SV *sv)
2173 const char *start = SvPV_const(sv,len);
2174 const char * const end = start + len;
2175 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2177 PERL_ARGS_ASSERT_STR_TO_VERSION;
2179 while (start < end) {
2183 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2188 retval += ((NV)n)/nshift;
2197 * Forces the next token to be a version number.
2198 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2199 * and if "guessing" is TRUE, then no new token is created (and the caller
2200 * must use an alternative parsing method).
2204 S_force_version(pTHX_ char *s, int guessing)
2210 I32 startoff = s - SvPVX(PL_linestr);
2213 PERL_ARGS_ASSERT_FORCE_VERSION;
2221 while (isDIGIT(*d) || *d == '_' || *d == '.')
2225 start_force(PL_curforce);
2226 curmad('X', newSVpvn(s,d-s));
2229 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2231 #ifdef USE_LOCALE_NUMERIC
2232 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2233 setlocale(LC_NUMERIC, "C");
2235 s = scan_num(s, &pl_yylval);
2236 #ifdef USE_LOCALE_NUMERIC
2237 setlocale(LC_NUMERIC, loc);
2240 version = pl_yylval.opval;
2241 ver = cSVOPx(version)->op_sv;
2242 if (SvPOK(ver) && !SvNIOK(ver)) {
2243 SvUPGRADE(ver, SVt_PVNV);
2244 SvNV_set(ver, str_to_version(ver));
2245 SvNOK_on(ver); /* hint that it is a version */
2248 else if (guessing) {
2251 sv_free(PL_nextwhite); /* let next token collect whitespace */
2253 s = SvPVX(PL_linestr) + startoff;
2261 if (PL_madskills && !version) {
2262 sv_free(PL_nextwhite); /* let next token collect whitespace */
2264 s = SvPVX(PL_linestr) + startoff;
2267 /* NOTE: The parser sees the package name and the VERSION swapped */
2268 start_force(PL_curforce);
2269 NEXTVAL_NEXTTOKE.opval = version;
2276 * S_force_strict_version
2277 * Forces the next token to be a version number using strict syntax rules.
2281 S_force_strict_version(pTHX_ char *s)
2286 I32 startoff = s - SvPVX(PL_linestr);
2288 const char *errstr = NULL;
2290 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2292 while (isSPACE(*s)) /* leading whitespace */
2295 if (is_STRICT_VERSION(s,&errstr)) {
2297 s = (char *)scan_version(s, ver, 0);
2298 version = newSVOP(OP_CONST, 0, ver);
2300 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2301 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2305 yyerror(errstr); /* version required */
2310 if (PL_madskills && !version) {
2311 sv_free(PL_nextwhite); /* let next token collect whitespace */
2313 s = SvPVX(PL_linestr) + startoff;
2316 /* NOTE: The parser sees the package name and the VERSION swapped */
2317 start_force(PL_curforce);
2318 NEXTVAL_NEXTTOKE.opval = version;
2326 * Tokenize a quoted string passed in as an SV. It finds the next
2327 * chunk, up to end of string or a backslash. It may make a new
2328 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2333 S_tokeq(pTHX_ SV *sv)
2342 PERL_ARGS_ASSERT_TOKEQ;
2347 s = SvPV_force(sv, len);
2348 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2351 /* This is relying on the SV being "well formed" with a trailing '\0' */
2352 while (s < send && !(*s == '\\' && s[1] == '\\'))
2357 if ( PL_hints & HINT_NEW_STRING ) {
2358 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2362 if (s + 1 < send && (s[1] == '\\'))
2363 s++; /* all that, just for this */
2368 SvCUR_set(sv, d - SvPVX_const(sv));
2370 if ( PL_hints & HINT_NEW_STRING )
2371 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2376 * Now come three functions related to double-quote context,
2377 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2378 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2379 * interact with PL_lex_state, and create fake ( ... ) argument lists
2380 * to handle functions and concatenation.
2384 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2389 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2391 * Pattern matching will set PL_lex_op to the pattern-matching op to
2392 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2394 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2396 * Everything else becomes a FUNC.
2398 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2399 * had an OP_CONST or OP_READLINE). This just sets us up for a
2400 * call to S_sublex_push().
2404 S_sublex_start(pTHX)
2407 const I32 op_type = pl_yylval.ival;
2409 if (op_type == OP_NULL) {
2410 pl_yylval.opval = PL_lex_op;
2414 if (op_type == OP_CONST || op_type == OP_READLINE) {
2415 SV *sv = tokeq(PL_lex_stuff);
2417 if (SvTYPE(sv) == SVt_PVIV) {
2418 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2420 const char * const p = SvPV_const(sv, len);
2421 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2425 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2426 PL_lex_stuff = NULL;
2427 /* Allow <FH> // "foo" */
2428 if (op_type == OP_READLINE)
2429 PL_expect = XTERMORDORDOR;
2432 else if (op_type == OP_BACKTICK && PL_lex_op) {
2433 /* readpipe() vas overriden */
2434 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2435 pl_yylval.opval = PL_lex_op;
2437 PL_lex_stuff = NULL;
2441 PL_sublex_info.super_state = PL_lex_state;
2442 PL_sublex_info.sub_inwhat = (U16)op_type;
2443 PL_sublex_info.sub_op = PL_lex_op;
2444 PL_lex_state = LEX_INTERPPUSH;
2448 pl_yylval.opval = PL_lex_op;
2458 * Create a new scope to save the lexing state. The scope will be
2459 * ended in S_sublex_done. Returns a '(', starting the function arguments
2460 * to the uc, lc, etc. found before.
2461 * Sets PL_lex_state to LEX_INTERPCONCAT.
2471 PL_lex_state = PL_sublex_info.super_state;
2472 SAVEBOOL(PL_lex_dojoin);
2473 SAVEI32(PL_lex_brackets);
2474 SAVEI32(PL_lex_allbrackets);
2475 SAVEI32(PL_lex_formbrack);
2476 SAVEI8(PL_lex_fakeeof);
2477 SAVEI32(PL_lex_casemods);
2478 SAVEI32(PL_lex_starts);
2479 SAVEI8(PL_lex_state);
2480 SAVESPTR(PL_lex_repl);
2481 SAVEVPTR(PL_lex_inpat);
2482 SAVEI16(PL_lex_inwhat);
2483 SAVECOPLINE(PL_curcop);
2484 SAVEPPTR(PL_bufptr);
2485 SAVEPPTR(PL_bufend);
2486 SAVEPPTR(PL_oldbufptr);
2487 SAVEPPTR(PL_oldoldbufptr);
2488 SAVEPPTR(PL_last_lop);
2489 SAVEPPTR(PL_last_uni);
2490 SAVEPPTR(PL_linestart);
2491 SAVESPTR(PL_linestr);
2492 SAVEGENERICPV(PL_lex_brackstack);
2493 SAVEGENERICPV(PL_lex_casestack);
2494 SAVEGENERICPV(PL_parser->lex_shared);
2496 /* The here-doc parser needs to be able to peek into outer lexing
2497 scopes to find the body of the here-doc. So we put PL_linestr and
2498 PL_bufptr into lex_shared, to ‘share’ those values.
2500 PL_parser->lex_shared->ls_linestr = PL_linestr;
2501 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2503 PL_linestr = PL_lex_stuff;
2504 PL_lex_repl = PL_sublex_info.repl;
2505 PL_lex_stuff = NULL;
2506 PL_sublex_info.repl = NULL;
2508 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2509 = SvPVX(PL_linestr);
2510 PL_bufend += SvCUR(PL_linestr);
2511 PL_last_lop = PL_last_uni = NULL;
2512 SAVEFREESV(PL_linestr);
2513 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2515 PL_lex_dojoin = FALSE;
2516 PL_lex_brackets = PL_lex_formbrack = 0;
2517 PL_lex_allbrackets = 0;
2518 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2519 Newx(PL_lex_brackstack, 120, char);
2520 Newx(PL_lex_casestack, 12, char);
2521 PL_lex_casemods = 0;
2522 *PL_lex_casestack = '\0';
2524 PL_lex_state = LEX_INTERPCONCAT;
2525 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2527 Newxz(shared, 1, LEXSHARED);
2528 shared->ls_prev = PL_parser->lex_shared;
2529 PL_parser->lex_shared = shared;
2531 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2532 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2533 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2534 PL_lex_inpat = PL_sublex_info.sub_op;
2536 PL_lex_inpat = NULL;
2543 * Restores lexer state after a S_sublex_push.
2550 if (!PL_lex_starts++) {
2551 SV * const sv = newSVpvs("");
2552 if (SvUTF8(PL_linestr))
2554 PL_expect = XOPERATOR;
2555 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2559 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2560 PL_lex_state = LEX_INTERPCASEMOD;
2564 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2565 assert(PL_lex_inwhat != OP_TRANSR);
2566 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2567 PL_linestr = PL_lex_repl;
2569 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2570 PL_bufend += SvCUR(PL_linestr);
2571 PL_last_lop = PL_last_uni = NULL;
2572 PL_lex_dojoin = FALSE;
2573 PL_lex_brackets = 0;
2574 PL_lex_allbrackets = 0;
2575 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2576 PL_lex_casemods = 0;
2577 *PL_lex_casestack = '\0';
2579 if (SvEVALED(PL_lex_repl)) {
2580 PL_lex_state = LEX_INTERPNORMAL;
2582 /* we don't clear PL_lex_repl here, so that we can check later
2583 whether this is an evalled subst; that means we rely on the
2584 logic to ensure sublex_done() is called again only via the
2585 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2588 PL_lex_state = LEX_INTERPCONCAT;
2598 PL_endwhite = newSVpvs("");
2599 sv_catsv(PL_endwhite, PL_thiswhite);
2603 sv_setpvs(PL_thistoken,"");
2605 PL_realtokenstart = -1;
2609 PL_bufend = SvPVX(PL_linestr);
2610 PL_bufend += SvCUR(PL_linestr);
2611 PL_expect = XOPERATOR;
2612 PL_sublex_info.sub_inwhat = 0;
2620 Extracts the next constant part of a pattern, double-quoted string,
2621 or transliteration. This is terrifying code.
2623 For example, in parsing the double-quoted string "ab\x63$d", it would
2624 stop at the '$' and return an OP_CONST containing 'abc'.
2626 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2627 processing a pattern (PL_lex_inpat is true), a transliteration
2628 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2630 Returns a pointer to the character scanned up to. If this is
2631 advanced from the start pointer supplied (i.e. if anything was
2632 successfully parsed), will leave an OP_CONST for the substring scanned
2633 in pl_yylval. Caller must intuit reason for not parsing further
2634 by looking at the next characters herself.
2638 \N{ABC} => \N{U+41.42.43}
2641 all other \-char, including \N and \N{ apart from \N{ABC}
2644 @ and $ where it appears to be a var, but not for $ as tail anchor
2649 In transliterations:
2650 characters are VERY literal, except for - not at the start or end
2651 of the string, which indicates a range. If the range is in bytes,
2652 scan_const expands the range to the full set of intermediate
2653 characters. If the range is in utf8, the hyphen is replaced with
2654 a certain range mark which will be handled by pmtrans() in op.c.
2656 In double-quoted strings:
2658 double-quoted style: \r and \n
2659 constants: \x31, etc.
2660 deprecated backrefs: \1 (in substitution replacements)
2661 case and quoting: \U \Q \E
2664 scan_const does *not* construct ops to handle interpolated strings.
2665 It stops processing as soon as it finds an embedded $ or @ variable
2666 and leaves it to the caller to work out what's going on.
2668 embedded arrays (whether in pattern or not) could be:
2669 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2671 $ in double-quoted strings must be the symbol of an embedded scalar.
2673 $ in pattern could be $foo or could be tail anchor. Assumption:
2674 it's a tail anchor if $ is the last thing in the string, or if it's
2675 followed by one of "()| \r\n\t"
2677 \1 (backreferences) are turned into $1 in substitutions
2679 The structure of the code is
2680 while (there's a character to process) {
2681 handle transliteration ranges
2682 skip regexp comments /(?#comment)/ and codes /(?{code})/
2683 skip #-initiated comments in //x patterns
2684 check for embedded arrays
2685 check for embedded scalars
2687 deprecate \1 in substitution replacements
2688 handle string-changing backslashes \l \U \Q \E, etc.
2689 switch (what was escaped) {
2690 handle \- in a transliteration (becomes a literal -)
2691 if a pattern and not \N{, go treat as regular character
2692 handle \132 (octal characters)
2693 handle \x15 and \x{1234} (hex characters)
2694 handle \N{name} (named characters, also \N{3,5} in a pattern)
2695 handle \cV (control characters)
2696 handle printf-style backslashes (\f, \r, \n, etc)
2699 } (end if backslash)
2700 handle regular character
2701 } (end while character to read)
2706 S_scan_const(pTHX_ char *start)
2709 char *send = PL_bufend; /* end of the constant */
2710 SV *sv = newSV(send - start); /* sv for the constant. See
2711 note below on sizing. */
2712 char *s = start; /* start of the constant */
2713 char *d = SvPVX(sv); /* destination for copies */
2714 bool dorange = FALSE; /* are we in a translit range? */
2715 bool didrange = FALSE; /* did we just finish a range? */
2716 bool in_charclass = FALSE; /* within /[...]/ */
2717 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2718 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2719 to be UTF8? But, this can
2720 show as true when the source
2721 isn't utf8, as for example
2722 when it is entirely composed
2725 /* Note on sizing: The scanned constant is placed into sv, which is
2726 * initialized by newSV() assuming one byte of output for every byte of
2727 * input. This routine expects newSV() to allocate an extra byte for a
2728 * trailing NUL, which this routine will append if it gets to the end of
2729 * the input. There may be more bytes of input than output (eg., \N{LATIN
2730 * CAPITAL LETTER A}), or more output than input if the constant ends up
2731 * recoded to utf8, but each time a construct is found that might increase
2732 * the needed size, SvGROW() is called. Its size parameter each time is
2733 * based on the best guess estimate at the time, namely the length used so
2734 * far, plus the length the current construct will occupy, plus room for
2735 * the trailing NUL, plus one byte for every input byte still unscanned */
2739 UV literal_endpoint = 0;
2740 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2743 PERL_ARGS_ASSERT_SCAN_CONST;
2745 assert(PL_lex_inwhat != OP_TRANSR);
2746 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2747 /* If we are doing a trans and we know we want UTF8 set expectation */
2748 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2749 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2753 while (s < send || dorange) {
2755 /* get transliterations out of the way (they're most literal) */
2756 if (PL_lex_inwhat == OP_TRANS) {
2757 /* expand a range A-Z to the full set of characters. AIE! */
2759 I32 i; /* current expanded character */
2760 I32 min; /* first character in range */
2761 I32 max; /* last character in range */
2772 char * const c = (char*)utf8_hop((U8*)d, -1);
2776 *c = (char)UTF_TO_NATIVE(0xff);
2777 /* mark the range as done, and continue */
2783 i = d - SvPVX_const(sv); /* remember current offset */
2786 SvLEN(sv) + (has_utf8 ?
2787 (512 - UTF_CONTINUATION_MARK +
2790 /* How many two-byte within 0..255: 128 in UTF-8,
2791 * 96 in UTF-8-mod. */
2793 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2795 d = SvPVX(sv) + i; /* refresh d after realloc */
2799 for (j = 0; j <= 1; j++) {
2800 char * const c = (char*)utf8_hop((U8*)d, -1);
2801 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2807 max = (U8)0xff; /* only to \xff */
2808 uvmax = uv; /* \x{100} to uvmax */
2810 d = c; /* eat endpoint chars */
2815 d -= 2; /* eat the first char and the - */
2816 min = (U8)*d; /* first char in range */
2817 max = (U8)d[1]; /* last char in range */
2825 "Invalid range \"%c-%c\" in transliteration operator",
2826 (char)min, (char)max);
2830 if (literal_endpoint == 2 &&
2831 ((isLOWER(min) && isLOWER(max)) ||
2832 (isUPPER(min) && isUPPER(max)))) {
2834 for (i = min; i <= max; i++)
2836 *d++ = NATIVE_TO_NEED(has_utf8,i);
2838 for (i = min; i <= max; i++)
2840 *d++ = NATIVE_TO_NEED(has_utf8,i);
2845 for (i = min; i <= max; i++)
2848 const U8 ch = (U8)NATIVE_TO_UTF(i);
2849 if (UNI_IS_INVARIANT(ch))
2852 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2853 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2862 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2864 *d++ = (char)UTF_TO_NATIVE(0xff);
2866 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2870 /* mark the range as done, and continue */
2874 literal_endpoint = 0;
2879 /* range begins (ignore - as first or last char) */
2880 else if (*s == '-' && s+1 < send && s != start) {
2883 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2890 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2900 literal_endpoint = 0;
2901 native_range = TRUE;
2906 /* if we get here, we're not doing a transliteration */
2908 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2911 while (s1 >= start && *s1-- == '\\')
2914 in_charclass = TRUE;
2917 else if (*s == ']' && PL_lex_inpat && in_charclass) {
2920 while (s1 >= start && *s1-- == '\\')
2923 in_charclass = FALSE;
2926 /* skip for regexp comments /(?#comment)/, except for the last
2927 * char, which will be done separately.
2928 * Stop on (?{..}) and friends */
2930 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2932 while (s+1 < send && *s != ')')
2933 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2935 else if (!PL_lex_casemods && !in_charclass &&
2936 ( s[2] == '{' /* This should match regcomp.c */
2937 || (s[2] == '?' && s[3] == '{')))
2943 /* likewise skip #-initiated comments in //x patterns */
2944 else if (*s == '#' && PL_lex_inpat &&
2945 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2946 while (s+1 < send && *s != '\n')
2947 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2950 /* no further processing of single-quoted regex */
2951 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2952 goto default_action;
2954 /* check for embedded arrays
2955 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2957 else if (*s == '@' && s[1]) {
2958 if (isALNUM_lazy_if(s+1,UTF))
2960 if (strchr(":'{$", s[1]))
2962 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2963 break; /* in regexp, neither @+ nor @- are interpolated */
2966 /* check for embedded scalars. only stop if we're sure it's a
2969 else if (*s == '$') {
2970 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2972 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2974 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2975 "Possible unintended interpolation of $\\ in regex");
2977 break; /* in regexp, $ might be tail anchor */
2981 /* End of else if chain - OP_TRANS rejoin rest */
2984 if (*s == '\\' && s+1 < send) {
2985 char* e; /* Can be used for ending '}', etc. */
2989 /* warn on \1 - \9 in substitution replacements, but note that \11
2990 * is an octal; and \19 is \1 followed by '9' */
2991 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2992 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2994 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2999 /* string-change backslash escapes */
3000 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3004 /* In a pattern, process \N, but skip any other backslash escapes.
3005 * This is because we don't want to translate an escape sequence
3006 * into a meta symbol and have the regex compiler use the meta
3007 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3008 * in spite of this, we do have to process \N here while the proper
3009 * charnames handler is in scope. See bugs #56444 and #62056.
3010 * There is a complication because \N in a pattern may also stand
3011 * for 'match a non-nl', and not mean a charname, in which case its
3012 * processing should be deferred to the regex compiler. To be a
3013 * charname it must be followed immediately by a '{', and not look
3014 * like \N followed by a curly quantifier, i.e., not something like
3015 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3017 else if (PL_lex_inpat
3020 || regcurly(s + 1)))
3022 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3023 goto default_action;
3028 /* quoted - in transliterations */
3030 if (PL_lex_inwhat == OP_TRANS) {
3038 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3039 "Unrecognized escape \\%c passed through",
3041 /* default action is to copy the quoted character */
3042 goto default_action;
3045 /* eg. \132 indicates the octal constant 0132 */
3046 case '0': case '1': case '2': case '3':
3047 case '4': case '5': case '6': case '7':
3051 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3054 goto NUM_ESCAPE_INSERT;
3056 /* eg. \o{24} indicates the octal constant \024 */
3062 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3068 goto NUM_ESCAPE_INSERT;
3071 /* eg. \x24 indicates the hex constant 0x24 */
3077 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3086 /* Insert oct or hex escaped character. There will always be
3087 * enough room in sv since such escapes will be longer than any
3088 * UTF-8 sequence they can end up as, except if they force us
3089 * to recode the rest of the string into utf8 */
3091 /* Here uv is the ordinal of the next character being added in
3092 * unicode (converted from native). */
3093 if (!UNI_IS_INVARIANT(uv)) {
3094 if (!has_utf8 && uv > 255) {
3095 /* Might need to recode whatever we have accumulated so
3096 * far if it contains any chars variant in utf8 or
3099 SvCUR_set(sv, d - SvPVX_const(sv));
3102 /* See Note on sizing above. */
3103 sv_utf8_upgrade_flags_grow(sv,
3104 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3105 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3106 d = SvPVX(sv) + SvCUR(sv);
3111 d = (char*)uvuni_to_utf8((U8*)d, uv);
3112 if (PL_lex_inwhat == OP_TRANS &&
3113 PL_sublex_info.sub_op) {
3114 PL_sublex_info.sub_op->op_private |=
3115 (PL_lex_repl ? OPpTRANS_FROM_UTF
3119 if (uv > 255 && !dorange)
3120 native_range = FALSE;
3133 /* In a non-pattern \N must be a named character, like \N{LATIN
3134 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3135 * mean to match a non-newline. For non-patterns, named
3136 * characters are converted to their string equivalents. In
3137 * patterns, named characters are not converted to their
3138 * ultimate forms for the same reasons that other escapes
3139 * aren't. Instead, they are converted to the \N{U+...} form
3140 * to get the value from the charnames that is in effect right
3141 * now, while preserving the fact that it was a named character
3142 * so that the regex compiler knows this */
3144 /* This section of code doesn't generally use the
3145 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3146 * a close examination of this macro and determined it is a
3147 * no-op except on utfebcdic variant characters. Every
3148 * character generated by this that would normally need to be
3149 * enclosed by this macro is invariant, so the macro is not
3150 * needed, and would complicate use of copy(). XXX There are
3151 * other parts of this file where the macro is used
3152 * inconsistently, but are saved by it being a no-op */
3154 /* The structure of this section of code (besides checking for
3155 * errors and upgrading to utf8) is:
3156 * Further disambiguate between the two meanings of \N, and if
3157 * not a charname, go process it elsewhere
3158 * If of form \N{U+...}, pass it through if a pattern;
3159 * otherwise convert to utf8
3160 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3161 * pattern; otherwise convert to utf8 */
3163 /* Here, s points to the 'N'; the test below is guaranteed to
3164 * succeed if we are being called on a pattern as we already
3165 * know from a test above that the next character is a '{'.
3166 * On a non-pattern \N must mean 'named sequence, which
3167 * requires braces */
3170 yyerror("Missing braces on \\N{}");
3175 /* If there is no matching '}', it is an error. */
3176 if (! (e = strchr(s, '}'))) {
3177 if (! PL_lex_inpat) {
3178 yyerror("Missing right brace on \\N{}");
3180 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3185 /* Here it looks like a named character */
3189 /* XXX This block is temporary code. \N{} implies that the
3190 * pattern is to have Unicode semantics, and therefore
3191 * currently has to be encoded in utf8. By putting it in
3192 * utf8 now, we save a whole pass in the regular expression
3193 * compiler. Once that code is changed so Unicode
3194 * semantics doesn't necessarily have to be in utf8, this
3195 * block should be removed. However, the code that parses
3196 * the output of this would have to be changed to not
3197 * necessarily expect utf8 */
3199 SvCUR_set(sv, d - SvPVX_const(sv));
3202 /* See Note on sizing above. */
3203 sv_utf8_upgrade_flags_grow(sv,
3204 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3205 /* 5 = '\N{' + cur char + NUL */
3206 (STRLEN)(send - s) + 5);
3207 d = SvPVX(sv) + SvCUR(sv);
3212 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3213 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3214 | PERL_SCAN_DISALLOW_PREFIX;
3217 /* For \N{U+...}, the '...' is a unicode value even on
3218 * EBCDIC machines */
3219 s += 2; /* Skip to next char after the 'U+' */
3221 uv = grok_hex(s, &len, &flags, NULL);
3222 if (len == 0 || len != (STRLEN)(e - s)) {
3223 yyerror("Invalid hexadecimal number in \\N{U+...}");
3230 /* On non-EBCDIC platforms, pass through to the regex
3231 * compiler unchanged. The reason we evaluated the
3232 * number above is to make sure there wasn't a syntax
3233 * error. But on EBCDIC we convert to native so
3234 * downstream code can continue to assume it's native
3236 s -= 5; /* Include the '\N{U+' */
3238 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3241 (unsigned int) UNI_TO_NATIVE(uv));
3243 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3247 else { /* Not a pattern: convert the hex to string */
3249 /* If destination is not in utf8, unconditionally
3250 * recode it to be so. This is because \N{} implies
3251 * Unicode semantics, and scalars have to be in utf8
3252 * to guarantee those semantics */
3254 SvCUR_set(sv, d - SvPVX_const(sv));
3257 /* See Note on sizing above. */
3258 sv_utf8_upgrade_flags_grow(
3260 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3261 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3262 d = SvPVX(sv) + SvCUR(sv);
3266 /* Add the string to the output */
3267 if (UNI_IS_INVARIANT(uv)) {
3270 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3273 else { /* Here is \N{NAME} but not \N{U+...}. */
3275 SV *res; /* result from charnames */
3276 const char *str; /* the string in 'res' */
3277 STRLEN len; /* its length */
3279 /* Get the value for NAME */
3280 res = newSVpvn(s, e - s);
3281 res = new_constant( NULL, 0, "charnames",
3282 /* includes all of: \N{...} */
3283 res, NULL, s - 3, e - s + 4 );
3285 /* Most likely res will be in utf8 already since the
3286 * standard charnames uses pack U, but a custom translator
3287 * can leave it otherwise, so make sure. XXX This can be
3288 * revisited to not have charnames use utf8 for characters
3289 * that don't need it when regexes don't have to be in utf8
3290 * for Unicode semantics. If doing so, remember EBCDIC */
3291 sv_utf8_upgrade(res);
3292 str = SvPV_const(res, len);
3294 /* Don't accept malformed input */
3295 if (! is_utf8_string((U8 *) str, len)) {
3296 yyerror("Malformed UTF-8 returned by \\N");
3298 else if (PL_lex_inpat) {
3300 if (! len) { /* The name resolved to an empty string */
3301 Copy("\\N{}", d, 4, char);
3305 /* In order to not lose information for the regex
3306 * compiler, pass the result in the specially made
3307 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3308 * the code points in hex of each character
3309 * returned by charnames */
3311 const char *str_end = str + len;
3312 STRLEN char_length; /* cur char's byte length */
3313 STRLEN output_length; /* and the number of bytes
3314 after this is translated
3316 const STRLEN off = d - SvPVX_const(sv);
3318 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3319 * max('U+', '.'); and 1 for NUL */
3320 char hex_string[2 * UTF8_MAXBYTES + 5];
3322 /* Get the first character of the result. */
3323 U32 uv = utf8n_to_uvuni((U8 *) str,
3328 /* The call to is_utf8_string() above hopefully
3329 * guarantees that there won't be an error. But
3330 * it's easy here to make sure. The function just
3331 * above warns and returns 0 if invalid utf8, but
3332 * it can also return 0 if the input is validly a
3333 * NUL. Disambiguate */
3334 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3335 uv = UNICODE_REPLACEMENT;
3338 /* Convert first code point to hex, including the
3339 * boiler plate before it. For all these, we
3340 * convert to native format so that downstream code
3341 * can continue to assume the input is native */
3343 my_snprintf(hex_string, sizeof(hex_string),
3345 (unsigned int) UNI_TO_NATIVE(uv));
3347 /* Make sure there is enough space to hold it */
3348 d = off + SvGROW(sv, off
3350 + (STRLEN)(send - e)
3351 + 2); /* '}' + NUL */
3353 Copy(hex_string, d, output_length, char);
3356 /* For each subsequent character, append dot and
3357 * its ordinal in hex */
3358 while ((str += char_length) < str_end) {
3359 const STRLEN off = d - SvPVX_const(sv);
3360 U32 uv = utf8n_to_uvuni((U8 *) str,
3364 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3365 uv = UNICODE_REPLACEMENT;
3369 my_snprintf(hex_string, sizeof(hex_string),
3371 (unsigned int) UNI_TO_NATIVE(uv));
3373 d = off + SvGROW(sv, off
3375 + (STRLEN)(send - e)
3376 + 2); /* '}' + NUL */
3377 Copy(hex_string, d, output_length, char);
3381 *d++ = '}'; /* Done. Add the trailing brace */
3384 else { /* Here, not in a pattern. Convert the name to a
3387 /* If destination is not in utf8, unconditionally
3388 * recode it to be so. This is because \N{} implies
3389 * Unicode semantics, and scalars have to be in utf8
3390 * to guarantee those semantics */
3392 SvCUR_set(sv, d - SvPVX_const(sv));
3395 /* See Note on sizing above. */
3396 sv_utf8_upgrade_flags_grow(sv,
3397 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3398 len + (STRLEN)(send - s) + 1);
3399 d = SvPVX(sv) + SvCUR(sv);
3401 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3403 /* See Note on sizing above. (NOTE: SvCUR() is not
3404 * set correctly here). */
3405 const STRLEN off = d - SvPVX_const(sv);
3406 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3408 Copy(str, d, len, char);
3413 /* Deprecate non-approved name syntax */
3414 if (ckWARN_d(WARN_DEPRECATED)) {
3415 bool problematic = FALSE;
3418 /* For non-ut8 input, look to see that the first
3419 * character is an alpha, then loop through the rest
3420 * checking that each is a continuation */
3422 if (! isALPHAU(*i)) problematic = TRUE;
3423 else for (i = s + 1; i < e; i++) {
3424 if (isCHARNAME_CONT(*i)) continue;
3430 /* Similarly for utf8. For invariants can check
3431 * directly. We accept anything above the latin1
3432 * range because it is immaterial to Perl if it is
3433 * correct or not, and is expensive to check. But
3434 * it is fairly easy in the latin1 range to convert
3435 * the variants into a single character and check
3437 if (UTF8_IS_INVARIANT(*i)) {
3438 if (! isALPHAU(*i)) problematic = TRUE;
3439 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3440 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3446 if (! problematic) for (i = s + UTF8SKIP(s);
3450 if (UTF8_IS_INVARIANT(*i)) {
3451 if (isCHARNAME_CONT(*i)) continue;
3452 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3454 } else if (isCHARNAME_CONT(
3456 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3465 /* The e-i passed to the final %.*s makes sure that
3466 * should the trailing NUL be missing that this
3467 * print won't run off the end of the string */
3468 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3469 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3470 (int)(i - s + 1), s, (int)(e - i), i + 1);
3473 } /* End \N{NAME} */
3476 native_range = FALSE; /* \N{} is defined to be Unicode */
3478 s = e + 1; /* Point to just after the '}' */
3481 /* \c is a control character */
3485 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3488 yyerror("Missing control char name in \\c");
3492 /* printf-style backslashes, formfeeds, newlines, etc */
3494 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3497 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3500 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3503 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3506 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3509 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3512 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3518 } /* end if (backslash) */
3525 /* If we started with encoded form, or already know we want it,
3526 then encode the next character */
3527 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3531 /* One might think that it is wasted effort in the case of the
3532 * source being utf8 (this_utf8 == TRUE) to take the next character
3533 * in the source, convert it to an unsigned value, and then convert
3534 * it back again. But the source has not been validated here. The
3535 * routine that does the conversion checks for errors like
3538 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3539 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3541 SvCUR_set(sv, d - SvPVX_const(sv));
3544 /* See Note on sizing above. */
3545 sv_utf8_upgrade_flags_grow(sv,
3546 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3547 need + (STRLEN)(send - s) + 1);
3548 d = SvPVX(sv) + SvCUR(sv);
3550 } else if (need > len) {
3551 /* encoded value larger than old, may need extra space (NOTE:
3552 * SvCUR() is not set correctly here). See Note on sizing
3554 const STRLEN off = d - SvPVX_const(sv);
3555 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3559 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3561 if (uv > 255 && !dorange)
3562 native_range = FALSE;
3566 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3568 } /* while loop to process each character */
3570 /* terminate the string and set up the sv */
3572 SvCUR_set(sv, d - SvPVX_const(sv));
3573 if (SvCUR(sv) >= SvLEN(sv))
3574 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3575 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3578 if (PL_encoding && !has_utf8) {
3579 sv_recode_to_utf8(sv, PL_encoding);
3585 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3586 PL_sublex_info.sub_op->op_private |=
3587 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3591 /* shrink the sv if we allocated more than we used */
3592 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3593 SvPV_shrink_to_cur(sv);
3596 /* return the substring (via pl_yylval) only if we parsed anything */
3597 if (s > PL_bufptr) {
3598 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3599 const char *const key = PL_lex_inpat ? "qr" : "q";
3600 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3604 if (PL_lex_inwhat == OP_TRANS) {
3607 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3610 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3618 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3621 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3628 * Returns TRUE if there's more to the expression (e.g., a subscript),
3631 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3633 * ->[ and ->{ return TRUE
3634 * { and [ outside a pattern are always subscripts, so return TRUE
3635 * if we're outside a pattern and it's not { or [, then return FALSE
3636 * if we're in a pattern and the first char is a {
3637 * {4,5} (any digits around the comma) returns FALSE
3638 * if we're in a pattern and the first char is a [
3640 * [SOMETHING] has a funky algorithm to decide whether it's a
3641 * character class or not. It has to deal with things like
3642 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3643 * anything else returns TRUE
3646 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3649 S_intuit_more(pTHX_ register char *s)
3653 PERL_ARGS_ASSERT_INTUIT_MORE;
3655 if (PL_lex_brackets)
3657 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3659 if (*s != '{' && *s != '[')
3664 /* In a pattern, so maybe we have {n,m}. */
3672 /* On the other hand, maybe we have a character class */
3675 if (*s == ']' || *s == '^')
3678 /* this is terrifying, and it works */
3679 int weight = 2; /* let's weigh the evidence */
3681 unsigned char un_char = 255, last_un_char;
3682 const char * const send = strchr(s,']');
3683 char tmpbuf[sizeof PL_tokenbuf * 4];
3685 if (!send) /* has to be an expression */
3688 Zero(seen,256,char);
3691 else if (isDIGIT(*s)) {
3693 if (isDIGIT(s[1]) && s[2] == ']')
3699 for (; s < send; s++) {
3700 last_un_char = un_char;
3701 un_char = (unsigned char)*s;
3706 weight -= seen[un_char] * 10;
3707 if (isALNUM_lazy_if(s+1,UTF)) {
3709 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3710 len = (int)strlen(tmpbuf);
3711 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3712 UTF ? SVf_UTF8 : 0, SVt_PV))
3717 else if (*s == '$' && s[1] &&
3718 strchr("[#!%*<>()-=",s[1])) {
3719 if (/*{*/ strchr("])} =",s[2]))
3728 if (strchr("wds]",s[1]))
3730 else if (seen[(U8)'\''] || seen[(U8)'"'])
3732 else if (strchr("rnftbxcav",s[1]))
3734 else if (isDIGIT(s[1])) {
3736 while (s[1] && isDIGIT(s[1]))
3746 if (strchr("aA01! ",last_un_char))
3748 if (strchr("zZ79~",s[1]))
3750 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3751 weight -= 5; /* cope with negative subscript */
3754 if (!isALNUM(last_un_char)
3755 && !(last_un_char == '$' || last_un_char == '@'
3756 || last_un_char == '&')
3757 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3762 if (keyword(tmpbuf, d - tmpbuf, 0))
3765 if (un_char == last_un_char + 1)
3767 weight -= seen[un_char];
3772 if (weight >= 0) /* probably a character class */
3782 * Does all the checking to disambiguate
3784 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3785 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3787 * First argument is the stuff after the first token, e.g. "bar".
3789 * Not a method if foo is a filehandle.
3790 * Not a method if foo is a subroutine prototyped to take a filehandle.
3791 * Not a method if it's really "Foo $bar"
3792 * Method if it's "foo $bar"
3793 * Not a method if it's really "print foo $bar"
3794 * Method if it's really "foo package::" (interpreted as package->foo)
3795 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3796 * Not a method if bar is a filehandle or package, but is quoted with
3801 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3804 char *s = start + (*start == '$');
3805 char tmpbuf[sizeof PL_tokenbuf];
3812 PERL_ARGS_ASSERT_INTUIT_METHOD;
3814 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3816 if (cv && SvPOK(cv)) {
3817 const char *proto = CvPROTO(cv);
3825 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3826 /* start is the beginning of the possible filehandle/object,
3827 * and s is the end of it
3828 * tmpbuf is a copy of it
3831 if (*start == '$') {
3832 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3833 isUPPER(*PL_tokenbuf))
3836 len = start - SvPVX(PL_linestr);
3840 start = SvPVX(PL_linestr) + len;
3844 return *s == '(' ? FUNCMETH : METHOD;
3846 if (!keyword(tmpbuf, len, 0)) {
3847 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3851 soff = s - SvPVX(PL_linestr);
3855 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3856 if (indirgv && GvCVu(indirgv))
3858 /* filehandle or package name makes it a method */
3859 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3861 soff = s - SvPVX(PL_linestr);
3864 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3865 return 0; /* no assumptions -- "=>" quotes bareword */
3867 start_force(PL_curforce);
3868 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3869 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3870 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3872 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3873 ( UTF ? SVf_UTF8 : 0 )));
3878 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3880 return *s == '(' ? FUNCMETH : METHOD;
3886 /* Encoded script support. filter_add() effectively inserts a
3887 * 'pre-processing' function into the current source input stream.
3888 * Note that the filter function only applies to the current source file
3889 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3891 * The datasv parameter (which may be NULL) can be used to pass
3892 * private data to this instance of the filter. The filter function
3893 * can recover the SV using the FILTER_DATA macro and use it to
3894 * store private buffers and state information.
3896 * The supplied datasv parameter is upgraded to a PVIO type
3897 * and the IoDIRP/IoANY field is used to store the function pointer,
3898 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3899 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3900 * private use must be set using malloc'd pointers.
3904 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3913 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3914 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3916 if (!PL_rsfp_filters)
3917 PL_rsfp_filters = newAV();
3920 SvUPGRADE(datasv, SVt_PVIO);
3921 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3922 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3923 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3924 FPTR2DPTR(void *, IoANY(datasv)),
3925 SvPV_nolen(datasv)));
3926 av_unshift(PL_rsfp_filters, 1);
3927 av_store(PL_rsfp_filters, 0, datasv) ;
3929 !PL_parser->filtered
3930 && PL_parser->lex_flags & LEX_EVALBYTES
3931 && PL_bufptr < PL_bufend
3933 const char *s = PL_bufptr;
3934 while (s < PL_bufend) {
3936 SV *linestr = PL_parser->linestr;
3937 char *buf = SvPVX(linestr);
3938 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3939 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3940 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3941 STRLEN const linestart_pos = PL_parser->linestart - buf;
3942 STRLEN const last_uni_pos =
3943 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3944 STRLEN const last_lop_pos =
3945 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3946 av_push(PL_rsfp_filters, linestr);
3947 PL_parser->linestr =
3948 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3949 buf = SvPVX(PL_parser->linestr);
3950 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3951 PL_parser->bufptr = buf + bufptr_pos;
3952 PL_parser->oldbufptr = buf + oldbufptr_pos;
3953 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3954 PL_parser->linestart = buf + linestart_pos;
3955 if (PL_parser->last_uni)
3956 PL_parser->last_uni = buf + last_uni_pos;
3957 if (PL_parser->last_lop)
3958 PL_parser->last_lop = buf + last_lop_pos;
3959 SvLEN(linestr) = SvCUR(linestr);
3960 SvCUR(linestr) = s-SvPVX(linestr);
3961 PL_parser->filtered = 1;
3971 /* Delete most recently added instance of this filter function. */
3973 Perl_filter_del(pTHX_ filter_t funcp)
3978 PERL_ARGS_ASSERT_FILTER_DEL;
3981 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3982 FPTR2DPTR(void*, funcp)));
3984 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3986 /* if filter is on top of stack (usual case) just pop it off */
3987 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3988 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3989 sv_free(av_pop(PL_rsfp_filters));
3993 /* we need to search for the correct entry and clear it */
3994 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3998 /* Invoke the idxth filter function for the current rsfp. */
3999 /* maxlen 0 = read one text line */
4001 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4006 /* This API is bad. It should have been using unsigned int for maxlen.
4007 Not sure if we want to change the API, but if not we should sanity
4008 check the value here. */
4009 unsigned int correct_length
4018 PERL_ARGS_ASSERT_FILTER_READ;
4020 if (!PL_parser || !PL_rsfp_filters)
4022 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4023 /* Provide a default input filter to make life easy. */
4024 /* Note that we append to the line. This is handy. */
4025 DEBUG_P(PerlIO_printf(Perl_debug_log,
4026 "filter_read %d: from rsfp\n", idx));
4027 if (correct_length) {
4030 const int old_len = SvCUR(buf_sv);
4032 /* ensure buf_sv is large enough */
4033 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4034 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4035 correct_length)) <= 0) {
4036 if (PerlIO_error(PL_rsfp))
4037 return -1; /* error */
4039 return 0 ; /* end of file */
4041 SvCUR_set(buf_sv, old_len + len) ;
4042 SvPVX(buf_sv)[old_len + len] = '\0';
4045 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4046 if (PerlIO_error(PL_rsfp))
4047 return -1; /* error */
4049 return 0 ; /* end of file */
4052 return SvCUR(buf_sv);
4054 /* Skip this filter slot if filter has been deleted */
4055 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4056 DEBUG_P(PerlIO_printf(Perl_debug_log,
4057 "filter_read %d: skipped (filter deleted)\n",
4059 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4061 if (SvTYPE(datasv) != SVt_PVIO) {
4062 if (correct_length) {
4064 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4065 if (!remainder) return 0; /* eof */
4066 if (correct_length > remainder) correct_length = remainder;
4067 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4068 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4071 const char *s = SvEND(datasv);
4072 const char *send = SvPVX(datasv) + SvLEN(datasv);
4080 if (s == send) return 0; /* eof */
4081 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4082 SvCUR_set(datasv, s-SvPVX(datasv));
4084 return SvCUR(buf_sv);
4086 /* Get function pointer hidden within datasv */
4087 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4088 DEBUG_P(PerlIO_printf(Perl_debug_log,
4089 "filter_read %d: via function %p (%s)\n",
4090 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4091 /* Call function. The function is expected to */
4092 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4093 /* Return: <0:error, =0:eof, >0:not eof */
4094 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4098 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4102 PERL_ARGS_ASSERT_FILTER_GETS;
4104 #ifdef PERL_CR_FILTER
4105 if (!PL_rsfp_filters) {
4106 filter_add(S_cr_textfilter,NULL);
4109 if (PL_rsfp_filters) {
4111 SvCUR_set(sv, 0); /* start with empty line */
4112 if (FILTER_READ(0, sv, 0) > 0)
4113 return ( SvPVX(sv) ) ;
4118 return (sv_gets(sv, PL_rsfp, append));
4122 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4127 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4129 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4133 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4134 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4136 return GvHV(gv); /* Foo:: */
4139 /* use constant CLASS => 'MyClass' */
4140 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4141 if (gv && GvCV(gv)) {
4142 SV * const sv = cv_const_sv(GvCV(gv));
4144 pkgname = SvPV_const(sv, len);
4147 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4151 * S_readpipe_override
4152 * Check whether readpipe() is overridden, and generates the appropriate
4153 * optree, provided sublex_start() is called afterwards.
4156 S_readpipe_override(pTHX)
4159 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4160 pl_yylval.ival = OP_BACKTICK;
4162 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4164 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4165 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4166 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4168 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4169 op_append_elem(OP_LIST,
4170 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4171 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4178 * The intent of this yylex wrapper is to minimize the changes to the
4179 * tokener when we aren't interested in collecting madprops. It remains
4180 * to be seen how successful this strategy will be...
4187 char *s = PL_bufptr;
4189 /* make sure PL_thiswhite is initialized */
4193 /* previous token ate up our whitespace? */
4194 if (!PL_lasttoke && PL_nextwhite) {
4195 PL_thiswhite = PL_nextwhite;
4199 /* isolate the token, and figure out where it is without whitespace */
4200 PL_realtokenstart = -1;
4204 assert(PL_curforce < 0);
4206 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4207 if (!PL_thistoken) {
4208 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4209 PL_thistoken = newSVpvs("");
4211 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4212 PL_thistoken = newSVpvn(tstart, s - tstart);
4215 if (PL_thismad) /* install head */
4216 CURMAD('X', PL_thistoken);
4219 /* last whitespace of a sublex? */
4220 if (optype == ')' && PL_endwhite) {
4221 CURMAD('X', PL_endwhite);
4226 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4227 if (!PL_thiswhite && !PL_endwhite && !optype) {
4228 sv_free(PL_thistoken);
4233 /* put off final whitespace till peg */
4234 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4235 PL_nextwhite = PL_thiswhite;
4238 else if (PL_thisopen) {
4239 CURMAD('q', PL_thisopen);
4241 sv_free(PL_thistoken);
4245 /* Store actual token text as madprop X */
4246 CURMAD('X', PL_thistoken);
4250 /* add preceding whitespace as madprop _ */
4251 CURMAD('_', PL_thiswhite);
4255 /* add quoted material as madprop = */
4256 CURMAD('=', PL_thisstuff);
4260 /* add terminating quote as madprop Q */
4261 CURMAD('Q', PL_thisclose);
4265 /* special processing based on optype */
4269 /* opval doesn't need a TOKEN since it can already store mp */
4280 if (pl_yylval.opval)
4281 append_madprops(PL_thismad, pl_yylval.opval, 0);
4289 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4298 /* remember any fake bracket that lexer is about to discard */
4299 if (PL_lex_brackets == 1 &&
4300 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4303 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4306 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4307 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4310 break; /* don't bother looking for trailing comment */
4319 /* attach a trailing comment to its statement instead of next token */
4323 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4325 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4327 if (*s == '\n' || *s == '#') {
4328 while (s < PL_bufend && *s != '\n')
4332 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4333 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4346 /* Create new token struct. Note: opvals return early above. */
4347 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4354 S_tokenize_use(pTHX_ int is_use, char *s) {
4357 PERL_ARGS_ASSERT_TOKENIZE_USE;
4359 if (PL_expect != XSTATE)
4360 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4361 is_use ? "use" : "no"));
4364 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4365 s = force_version(s, TRUE);
4366 if (*s == ';' || *s == '}'
4367 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4368 start_force(PL_curforce);
4369 NEXTVAL_NEXTTOKE.opval = NULL;
4372 else if (*s == 'v') {
4373 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4374 s = force_version(s, FALSE);
4378 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4379 s = force_version(s, FALSE);
4381 pl_yylval.ival = is_use;
4385 static const char* const exp_name[] =
4386 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4387 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4391 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4393 S_word_takes_any_delimeter(char *p, STRLEN len)
4395 return (len == 1 && strchr("msyq", p[0])) ||
4397 (p[0] == 't' && p[1] == 'r') ||
4398 (p[0] == 'q' && strchr("qwxr", p[1]))));
4404 Works out what to call the token just pulled out of the input
4405 stream. The yacc parser takes care of taking the ops we return and
4406 stitching them into a tree.
4412 if read an identifier
4413 if we're in a my declaration
4414 croak if they tried to say my($foo::bar)
4415 build the ops for a my() declaration
4416 if it's an access to a my() variable
4417 are we in a sort block?
4418 croak if my($a); $a <=> $b
4419 build ops for access to a my() variable
4420 if in a dq string, and they've said @foo and we can't find @foo
4422 build ops for a bareword
4423 if we already built the token before, use it.
4428 #pragma segment Perl_yylex
4434 char *s = PL_bufptr;
4441 /* orig_keyword, gvp, and gv are initialized here because
4442 * jump to the label just_a_word_zero can bypass their
4443 * initialization later. */
4444 I32 orig_keyword = 0;
4449 SV* tmp = newSVpvs("");
4450 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4451 (IV)CopLINE(PL_curcop),
4452 lex_state_names[PL_lex_state],
4453 exp_name[PL_expect],
4454 pv_display(tmp, s, strlen(s), 0, 60));
4458 switch (PL_lex_state) {
4460 case LEX_NORMAL: /* Some compilers will produce faster */
4461 case LEX_INTERPNORMAL: /* code if we comment these out. */
4465 /* when we've already built the next token, just pull it out of the queue */
4469 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4471 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4472 PL_nexttoke[PL_lasttoke].next_mad = 0;
4473 if (PL_thismad && PL_thismad->mad_key == '_') {
4474 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4475 PL_thismad->mad_val = 0;
4476 mad_free(PL_thismad);
4481 PL_lex_state = PL_lex_defer;
4482 PL_expect = PL_lex_expect;
4483 PL_lex_defer = LEX_NORMAL;
4484 if (!PL_nexttoke[PL_lasttoke].next_type)
4489 pl_yylval = PL_nextval[PL_nexttoke];
4491 PL_lex_state = PL_lex_defer;
4492 PL_expect = PL_lex_expect;
4493 PL_lex_defer = LEX_NORMAL;
4499 next_type = PL_nexttoke[PL_lasttoke].next_type;
4501 next_type = PL_nexttype[PL_nexttoke];
4503 if (next_type & (7<<24)) {
4504 if (next_type & (1<<24)) {
4505 if (PL_lex_brackets > 100)
4506 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4507 PL_lex_brackstack[PL_lex_brackets++] =
4508 (char) ((next_type >> 16) & 0xff);
4510 if (next_type & (2<<24))
4511 PL_lex_allbrackets++;
4512 if (next_type & (4<<24))
4513 PL_lex_allbrackets--;
4514 next_type &= 0xffff;
4516 if (S_is_opval_token(next_type) && pl_yylval.opval)
4517 pl_yylval.opval->op_savefree = 0; /* release */
4518 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4521 /* interpolated case modifiers like \L \U, including \Q and \E.
4522 when we get here, PL_bufptr is at the \
4524 case LEX_INTERPCASEMOD:
4526 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4528 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4529 PL_bufptr, PL_bufend, *PL_bufptr);
4531 /* handle \E or end of string */
4532 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4534 if (PL_lex_casemods) {
4535 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4536 PL_lex_casestack[PL_lex_casemods] = '\0';
4538 if (PL_bufptr != PL_bufend
4539 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4540 || oldmod == 'F')) {
4542 PL_lex_state = LEX_INTERPCONCAT;
4545 PL_thistoken = newSVpvs("\\E");
4548 PL_lex_allbrackets--;
4551 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4552 /* Got an unpaired \E */
4553 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4554 "Useless use of \\E");
4557 while (PL_bufptr != PL_bufend &&
4558 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4560 PL_thiswhite = newSVpvs("");
4561 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4565 if (PL_bufptr != PL_bufend)
4568 PL_lex_state = LEX_INTERPCONCAT;
4572 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4573 "### Saw case modifier\n"); });
4575 if (s[1] == '\\' && s[2] == 'E') {
4578 PL_thiswhite = newSVpvs("");
4579 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4582 PL_lex_state = LEX_INTERPCONCAT;
4587 if (!PL_madskills) /* when just compiling don't need correct */
4588 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4589 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4590 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4591 (strchr(PL_lex_casestack, 'L')
4592 || strchr(PL_lex_casestack, 'U')
4593 || strchr(PL_lex_casestack, 'F'))) {
4594 PL_lex_casestack[--PL_lex_casemods] = '\0';
4595 PL_lex_allbrackets--;
4598 if (PL_lex_casemods > 10)
4599 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4600 PL_lex_casestack[PL_lex_casemods++] = *s;
4601 PL_lex_casestack[PL_lex_casemods] = '\0';
4602 PL_lex_state = LEX_INTERPCONCAT;
4603 start_force(PL_curforce);
4604 NEXTVAL_NEXTTOKE.ival = 0;
4605 force_next((2<<24)|'(');
4606 start_force(PL_curforce);
4608 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4610 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4612 NEXTVAL_NEXTTOKE.ival = OP_LC;
4614 NEXTVAL_NEXTTOKE.ival = OP_UC;
4616 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4618 NEXTVAL_NEXTTOKE.ival = OP_FC;
4620 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4622 SV* const tmpsv = newSVpvs("\\ ");
4623 /* replace the space with the character we want to escape
4625 SvPVX(tmpsv)[1] = *s;
4631 if (PL_lex_starts) {
4637 sv_free(PL_thistoken);
4638 PL_thistoken = newSVpvs("");
4641 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4642 if (PL_lex_casemods == 1 && PL_lex_inpat)
4651 case LEX_INTERPPUSH:
4652 return REPORT(sublex_push());
4654 case LEX_INTERPSTART:
4655 if (PL_bufptr == PL_bufend)
4656 return REPORT(sublex_done());
4657 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4658 "### Interpolated variable\n"); });
4660 PL_lex_dojoin = (*PL_bufptr == '@');
4661 PL_lex_state = LEX_INTERPNORMAL;
4662 if (PL_lex_dojoin) {
4663 start_force(PL_curforce);
4664 NEXTVAL_NEXTTOKE.ival = 0;
4666 start_force(PL_curforce);
4667 force_ident("\"", '$');
4668 start_force(PL_curforce);
4669 NEXTVAL_NEXTTOKE.ival = 0;
4671 start_force(PL_curforce);
4672 NEXTVAL_NEXTTOKE.ival = 0;
4673 force_next((2<<24)|'(');
4674 start_force(PL_curforce);
4675 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4678 /* Convert (?{...}) and friends to 'do {...}' */
4679 if (PL_lex_inpat && *PL_bufptr == '(') {
4680 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4682 if (*PL_bufptr != '{')
4684 start_force(PL_curforce);
4685 /* XXX probably need a CURMAD(something) here */
4686 PL_expect = XTERMBLOCK;
4690 if (PL_lex_starts++) {
4695 sv_free(PL_thistoken);
4696 PL_thistoken = newSVpvs("");
4699 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4700 if (!PL_lex_casemods && PL_lex_inpat)
4707 case LEX_INTERPENDMAYBE:
4708 if (intuit_more(PL_bufptr)) {
4709 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4715 if (PL_lex_dojoin) {
4716 PL_lex_dojoin = FALSE;
4717 PL_lex_state = LEX_INTERPCONCAT;
4721 sv_free(PL_thistoken);
4722 PL_thistoken = newSVpvs("");
4725 PL_lex_allbrackets--;
4728 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4729 && SvEVALED(PL_lex_repl))
4731 if (PL_bufptr != PL_bufend)
4732 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4735 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4736 re_eval_str. If the here-doc body’s length equals the previous
4737 value of re_eval_start, re_eval_start will now be null. So
4738 check re_eval_str as well. */
4739 if (PL_parser->lex_shared->re_eval_start
4740 || PL_parser->lex_shared->re_eval_str) {
4742 if (*PL_bufptr != ')')
4743 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4745 /* having compiled a (?{..}) expression, return the original
4746 * text too, as a const */
4747 if (PL_parser->lex_shared->re_eval_str) {
4748 sv = PL_parser->lex_shared->re_eval_str;
4749 PL_parser->lex_shared->re_eval_str = NULL;
4751 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4752 SvPV_shrink_to_cur(sv);
4754 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4755 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4756 start_force(PL_curforce);
4757 /* XXX probably need a CURMAD(something) here */
4758 NEXTVAL_NEXTTOKE.opval =
4759 (OP*)newSVOP(OP_CONST, 0,
4762 PL_parser->lex_shared->re_eval_start = NULL;
4768 case LEX_INTERPCONCAT:
4770 if (PL_lex_brackets)
4771 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4772 (long) PL_lex_brackets);
4774 if (PL_bufptr == PL_bufend)
4775 return REPORT(sublex_done());
4777 /* m'foo' still needs to be parsed for possible (?{...}) */
4778 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4779 SV *sv = newSVsv(PL_linestr);
4781 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4785 s = scan_const(PL_bufptr);
4787 PL_lex_state = LEX_INTERPCASEMOD;
4789 PL_lex_state = LEX_INTERPSTART;
4792 if (s != PL_bufptr) {
4793 start_force(PL_curforce);
4795 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4797 NEXTVAL_NEXTTOKE = pl_yylval;
4800 if (PL_lex_starts++) {
4804 sv_free(PL_thistoken);
4805 PL_thistoken = newSVpvs("");
4808 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4809 if (!PL_lex_casemods && PL_lex_inpat)
4822 s = scan_formline(PL_bufptr);
4823 if (!PL_lex_formbrack)
4833 PL_oldoldbufptr = PL_oldbufptr;
4839 sv_free(PL_thistoken);
4842 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4846 if (isIDFIRST_lazy_if(s,UTF))
4849 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4850 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4852 SVs_TEMP | SVf_UTF8),
4853 10, UNI_DISPLAY_ISPRINT))
4854 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4855 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4856 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4857 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4865 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
4869 goto fake_eof; /* emulate EOF on ^D or ^Z */
4875 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4878 if (PL_lex_brackets &&
4879 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4880 yyerror((const char *)
4882 ? "Format not terminated"
4883 : "Missing right curly or square bracket"));
4885 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4886 "### Tokener got EOF\n");
4890 if (s++ < PL_bufend)
4891 goto retry; /* ignore stray nulls */
4894 if (!PL_in_eval && !PL_preambled) {
4895 PL_preambled = TRUE;
4901 /* Generate a string of Perl code to load the debugger.
4902 * If PERL5DB is set, it will return the contents of that,
4903 * otherwise a compile-time require of perl5db.pl. */
4905 const char * const pdb = PerlEnv_getenv("PERL5DB");
4908 sv_setpv(PL_linestr, pdb);
4909 sv_catpvs(PL_linestr,";");
4911 SETERRNO(0,SS_NORMAL);
4912 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4915 sv_setpvs(PL_linestr,"");
4916 if (PL_preambleav) {
4917 SV **svp = AvARRAY(PL_preambleav);
4918 SV **const end = svp + AvFILLp(PL_preambleav);
4920 sv_catsv(PL_linestr, *svp);
4922 sv_catpvs(PL_linestr, ";");
4924 sv_free(MUTABLE_SV(PL_preambleav));
4925 PL_preambleav = NULL;
4928 sv_catpvs(PL_linestr,
4929 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4930 if (PL_minus_n || PL_minus_p) {
4931 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4933 sv_catpvs(PL_linestr,"chomp;");
4936 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4937 || *PL_splitstr == '"')
4938 && strchr(PL_splitstr + 1, *PL_splitstr))
4939 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4941 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4942 bytes can be used as quoting characters. :-) */
4943 const char *splits = PL_splitstr;
4944 sv_catpvs(PL_linestr, "our @F=split(q\0");
4947 if (*splits == '\\')
4948 sv_catpvn(PL_linestr, splits, 1);
4949 sv_catpvn(PL_linestr, splits, 1);
4950 } while (*splits++);
4951 /* This loop will embed the trailing NUL of
4952 PL_linestr as the last thing it does before
4954 sv_catpvs(PL_linestr, ");");
4958 sv_catpvs(PL_linestr,"our @F=split(' ');");
4961 sv_catpvs(PL_linestr, "\n");
4962 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4963 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4964 PL_last_lop = PL_last_uni = NULL;
4965 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4966 update_debugger_info(PL_linestr, NULL, 0);
4971 bof = PL_rsfp ? TRUE : FALSE;
4974 fake_eof = LEX_FAKE_EOF;
4976 PL_bufptr = PL_bufend;
4977 COPLINE_INC_WITH_HERELINES;
4978 if (!lex_next_chunk(fake_eof)) {
4979 CopLINE_dec(PL_curcop);
4981 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4983 CopLINE_dec(PL_curcop);
4986 PL_realtokenstart = -1;
4989 /* If it looks like the start of a BOM or raw UTF-16,
4990 * check if it in fact is. */
4991 if (bof && PL_rsfp &&
4996 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4997 bof = (offset == (Off_t)SvCUR(PL_linestr));
4998 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4999 /* offset may include swallowed CR */
5001 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5004 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5005 s = swallow_bom((U8*)s);
5008 if (PL_parser->in_pod) {
5009 /* Incest with pod. */
5012 sv_catsv(PL_thiswhite, PL_linestr);
5014 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5015 sv_setpvs(PL_linestr, "");
5016 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5017 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5018 PL_last_lop = PL_last_uni = NULL;
5019 PL_parser->in_pod = 0;
5022 if (PL_rsfp || PL_parser->filtered)
5024 } while (PL_parser->in_pod);
5025 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5026 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5027 PL_last_lop = PL_last_uni = NULL;
5028 if (CopLINE(PL_curcop) == 1) {
5029 while (s < PL_bufend && isSPACE(*s))
5031 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5035 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5039 if (*s == '#' && *(s+1) == '!')
5041 #ifdef ALTERNATE_SHEBANG
5043 static char const as[] = ALTERNATE_SHEBANG;
5044 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5045 d = s + (sizeof(as) - 1);
5047 #endif /* ALTERNATE_SHEBANG */
5056 while (*d && !isSPACE(*d))
5060 #ifdef ARG_ZERO_IS_SCRIPT
5061 if (ipathend > ipath) {
5063 * HP-UX (at least) sets argv[0] to the script name,
5064 * which makes $^X incorrect. And Digital UNIX and Linux,
5065 * at least, set argv[0] to the basename of the Perl
5066 * interpreter. So, having found "#!", we'll set it right.
5068 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5070 assert(SvPOK(x) || SvGMAGICAL(x));
5071 if (sv_eq(x, CopFILESV(PL_curcop))) {
5072 sv_setpvn(x, ipath, ipathend - ipath);
5078 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5079 const char * const lstart = SvPV_const(x,llen);
5081 bstart += blen - llen;
5082 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5083 sv_setpvn(x, ipath, ipathend - ipath);
5088 TAINT_NOT; /* $^X is always tainted, but that's OK */
5090 #endif /* ARG_ZERO_IS_SCRIPT */
5095 d = instr(s,"perl -");
5097 d = instr(s,"perl");
5099 /* avoid getting into infinite loops when shebang
5100 * line contains "Perl" rather than "perl" */
5102 for (d = ipathend-4; d >= ipath; --d) {
5103 if ((*d == 'p' || *d == 'P')
5104 && !ibcmp(d, "perl", 4))
5114 #ifdef ALTERNATE_SHEBANG
5116 * If the ALTERNATE_SHEBANG on this system starts with a
5117 * character that can be part of a Perl expression, then if
5118 * we see it but not "perl", we're probably looking at the
5119 * start of Perl code, not a request to hand off to some
5120 * other interpreter. Similarly, if "perl" is there, but
5121 * not in the first 'word' of the line, we assume the line
5122 * contains the start of the Perl program.
5124 if (d && *s != '#') {
5125 const char *c = ipath;
5126 while (*c && !strchr("; \t\r\n\f\v#", *c))
5129 d = NULL; /* "perl" not in first word; ignore */
5131 *s = '#'; /* Don't try to parse shebang line */
5133 #endif /* ALTERNATE_SHEBANG */
5138 !instr(s,"indir") &&
5139 instr(PL_origargv[0],"perl"))
5146 while (s < PL_bufend && isSPACE(*s))
5148 if (s < PL_bufend) {
5149 Newx(newargv,PL_origargc+3,char*);
5151 while (s < PL_bufend && !isSPACE(*s))
5154 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5157 newargv = PL_origargv;
5160 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5162 Perl_croak(aTHX_ "Can't exec %s", ipath);
5165 while (*d && !isSPACE(*d))
5167 while (SPACE_OR_TAB(*d))
5171 const bool switches_done = PL_doswitches;
5172 const U32 oldpdb = PL_perldb;
5173 const bool oldn = PL_minus_n;
5174 const bool oldp = PL_minus_p;
5178 bool baduni = FALSE;
5180 const char *d2 = d1 + 1;
5181 if (parse_unicode_opts((const char **)&d2)
5185 if (baduni || *d1 == 'M' || *d1 == 'm') {
5186 const char * const m = d1;
5187 while (*d1 && !isSPACE(*d1))
5189 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5192 d1 = moreswitches(d1);
5194 if (PL_doswitches && !switches_done) {
5195 int argc = PL_origargc;
5196 char **argv = PL_origargv;
5199 } while (argc && argv[0][0] == '-' && argv[0][1]);
5200 init_argv_symbols(argc,argv);
5202 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5203 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5204 /* if we have already added "LINE: while (<>) {",
5205 we must not do it again */
5207 sv_setpvs(PL_linestr, "");
5208 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5209 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5210 PL_last_lop = PL_last_uni = NULL;
5211 PL_preambled = FALSE;
5212 if (PERLDB_LINE || PERLDB_SAVESRC)
5213 (void)gv_fetchfile(PL_origfilename);
5220 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5221 PL_lex_state = LEX_FORMLINE;
5222 start_force(PL_curforce);
5223 NEXTVAL_NEXTTOKE.ival = 0;
5224 force_next(FORMRBRACK);
5229 #ifdef PERL_STRICT_CR
5230 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5232 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5234 case ' ': case '\t': case '\f': case 013:
5236 PL_realtokenstart = -1;
5238 PL_thiswhite = newSVpvs("");
5239 sv_catpvn(PL_thiswhite, s, 1);
5246 PL_realtokenstart = -1;
5250 if (PL_lex_state != LEX_NORMAL ||
5251 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5252 if (*s == '#' && s == PL_linestart && PL_in_eval
5253 && !PL_rsfp && !PL_parser->filtered) {
5254 /* handle eval qq[#line 1 "foo"\n ...] */
5255 CopLINE_dec(PL_curcop);
5258 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5260 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5264 const bool in_comment = *s == '#';
5266 while (d < PL_bufend && *d != '\n')
5270 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5271 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5275 PL_thiswhite = newSVpvn(s, d - s);
5278 if (in_comment && d == PL_bufend
5279 && PL_lex_state == LEX_INTERPNORMAL
5280 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5281 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5284 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5285 PL_lex_state = LEX_FORMLINE;
5286 start_force(PL_curforce);
5287 NEXTVAL_NEXTTOKE.ival = 0;
5288 force_next(FORMRBRACK);
5294 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5295 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5298 TOKEN(PEG); /* make sure any #! line is accessible */
5303 /* if (PL_madskills && PL_lex_formbrack) { */
5305 while (d < PL_bufend && *d != '\n')
5309 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5310 Perl_croak(aTHX_ "panic: input overflow");
5311 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5313 PL_thiswhite = newSVpvs("");
5314 if (CopLINE(PL_curcop) == 1) {
5315 sv_setpvs(PL_thiswhite, "");
5318 sv_catpvn(PL_thiswhite, s, d - s);
5332 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5340 while (s < PL_bufend && SPACE_OR_TAB(*s))
5343 if (strnEQ(s,"=>",2)) {
5344 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5345 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5346 OPERATOR('-'); /* unary minus */
5348 PL_last_uni = PL_oldbufptr;
5350 case 'r': ftst = OP_FTEREAD; break;
5351 case 'w': ftst = OP_FTEWRITE; break;
5352 case 'x': ftst = OP_FTEEXEC; break;
5353 case 'o': ftst = OP_FTEOWNED; break;
5354 case 'R': ftst = OP_FTRREAD; break;
5355 case 'W': ftst = OP_FTRWRITE; break;
5356 case 'X': ftst = OP_FTREXEC; break;
5357 case 'O': ftst = OP_FTROWNED; break;
5358 case 'e': ftst = OP_FTIS; break;
5359 case 'z': ftst = OP_FTZERO; break;
5360 case 's': ftst = OP_FTSIZE; break;
5361 case 'f': ftst = OP_FTFILE; break;
5362 case 'd': ftst = OP_FTDIR; break;
5363 case 'l': ftst = OP_FTLINK; break;
5364 case 'p': ftst = OP_FTPIPE; break;
5365 case 'S': ftst = OP_FTSOCK; break;
5366 case 'u': ftst = OP_FTSUID; break;
5367 case 'g': ftst = OP_FTSGID; break;
5368 case 'k': ftst = OP_FTSVTX; break;
5369 case 'b': ftst = OP_FTBLK; break;
5370 case 'c': ftst = OP_FTCHR; break;
5371 case 't': ftst = OP_FTTTY; break;
5372 case 'T': ftst = OP_FTTEXT; break;
5373 case 'B': ftst = OP_FTBINARY; break;
5374 case 'M': case 'A': case 'C':
5375 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5377 case 'M': ftst = OP_FTMTIME; break;
5378 case 'A': ftst = OP_FTATIME; break;
5379 case 'C': ftst = OP_FTCTIME; break;
5387 PL_last_lop_op = (OPCODE)ftst;
5388 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5389 "### Saw file test %c\n", (int)tmp);
5394 /* Assume it was a minus followed by a one-letter named
5395 * subroutine call (or a -bareword), then. */
5396 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5397 "### '-%c' looked like a file test but was not\n",
5404 const char tmp = *s++;
5407 if (PL_expect == XOPERATOR)
5412 else if (*s == '>') {
5415 if (isIDFIRST_lazy_if(s,UTF)) {
5416 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5424 if (PL_expect == XOPERATOR) {
5425 if (*s == '=' && !PL_lex_allbrackets &&
5426 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5433 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5435 OPERATOR('-'); /* unary minus */
5441 const char tmp = *s++;
5444 if (PL_expect == XOPERATOR)
5449 if (PL_expect == XOPERATOR) {
5450 if (*s == '=' && !PL_lex_allbrackets &&
5451 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5458 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5465 if (PL_expect != XOPERATOR) {
5466 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5467 PL_expect = XOPERATOR;
5468 force_ident(PL_tokenbuf, '*');
5476 if (*s == '=' && !PL_lex_allbrackets &&
5477 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5483 if (*s == '=' && !PL_lex_allbrackets &&
5484 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5491 if (PL_expect == XOPERATOR) {
5492 if (s[1] == '=' && !PL_lex_allbrackets &&
5493 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5498 PL_tokenbuf[0] = '%';
5499 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5500 sizeof PL_tokenbuf - 1, FALSE);
5501 if (!PL_tokenbuf[1]) {
5504 PL_expect = XOPERATOR;
5505 force_ident_maybe_lex('%');
5509 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5510 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5515 if (PL_lex_brackets > 100)
5516 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5517 PL_lex_brackstack[PL_lex_brackets++] = 0;
5518 PL_lex_allbrackets++;
5520 const char tmp = *s++;
5525 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5527 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5535 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5542 goto just_a_word_zero_gv;
5545 switch (PL_expect) {
5551 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5553 PL_bufptr = s; /* update in case we back off */
5556 "Use of := for an empty attribute list is not allowed");
5563 PL_expect = XTERMBLOCK;
5566 stuffstart = s - SvPVX(PL_linestr) - 1;
5570 while (isIDFIRST_lazy_if(s,UTF)) {
5573 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5574 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5575 if (tmp < 0) tmp = -tmp;
5590 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5592 d = scan_str(d,TRUE,TRUE,FALSE);
5594 /* MUST advance bufptr here to avoid bogus
5595 "at end of line" context messages from yyerror().
5597 PL_bufptr = s + len;
5598 yyerror("Unterminated attribute parameter in attribute list");
5602 return REPORT(0); /* EOF indicator */
5606 sv_catsv(sv, PL_lex_stuff);
5607 attrs = op_append_elem(OP_LIST, attrs,
5608 newSVOP(OP_CONST, 0, sv));
5609 SvREFCNT_dec(PL_lex_stuff);
5610 PL_lex_stuff = NULL;
5613 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5615 if (PL_in_my == KEY_our) {
5616 deprecate(":unique");
5619 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5622 /* NOTE: any CV attrs applied here need to be part of
5623 the CVf_BUILTIN_ATTRS define in cv.h! */
5624 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5626 CvLVALUE_on(PL_compcv);
5628 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5630 deprecate(":locked");
5632 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5634 CvMETHOD_on(PL_compcv);
5636 /* After we've set the flags, it could be argued that
5637 we don't need to do the attributes.pm-based setting
5638 process, and shouldn't bother appending recognized
5639 flags. To experiment with that, uncomment the
5640 following "else". (Note that's already been
5641 uncommented. That keeps the above-applied built-in
5642 attributes from being intercepted (and possibly
5643 rejected) by a package's attribute routines, but is
5644 justified by the performance win for the common case
5645 of applying only built-in attributes.) */
5647 attrs = op_append_elem(OP_LIST, attrs,
5648 newSVOP(OP_CONST, 0,
5652 if (*s == ':' && s[1] != ':')
5655 break; /* require real whitespace or :'s */
5656 /* XXX losing whitespace on sequential attributes here */
5660 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5661 if (*s != ';' && *s != '}' && *s != tmp
5662 && (tmp != '=' || *s != ')')) {
5663 const char q = ((*s == '\'') ? '"' : '\'');
5664 /* If here for an expression, and parsed no attrs, back
5666 if (tmp == '=' && !attrs) {
5670 /* MUST advance bufptr here to avoid bogus "at end of line"
5671 context messages from yyerror().
5674 yyerror( (const char *)
5676 ? Perl_form(aTHX_ "Invalid separator character "
5677 "%c%c%c in attribute list", q, *s, q)
5678 : "Unterminated attribute list" ) );
5686 start_force(PL_curforce);
5687 NEXTVAL_NEXTTOKE.opval = attrs;
5688 CURMAD('_', PL_nextwhite);
5693 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5694 (s - SvPVX(PL_linestr)) - stuffstart);
5699 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5703 PL_lex_allbrackets--;
5707 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5708 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5712 PL_lex_allbrackets++;
5715 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5721 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5724 PL_lex_allbrackets--;
5730 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5733 if (PL_lex_brackets <= 0)
5734 yyerror("Unmatched right square bracket");
5737 PL_lex_allbrackets--;
5738 if (PL_lex_state == LEX_INTERPNORMAL) {
5739 if (PL_lex_brackets == 0) {
5740 if (*s == '-' && s[1] == '>')
5741 PL_lex_state = LEX_INTERPENDMAYBE;
5742 else if (*s != '[' && *s != '{')
5743 PL_lex_state = LEX_INTERPEND;
5750 if (PL_lex_brackets > 100) {
5751 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5753 switch (PL_expect) {
5755 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5756 PL_lex_allbrackets++;
5757 OPERATOR(HASHBRACK);
5759 while (s < PL_bufend && SPACE_OR_TAB(*s))
5762 PL_tokenbuf[0] = '\0';
5763 if (d < PL_bufend && *d == '-') {
5764 PL_tokenbuf[0] = '-';
5766 while (d < PL_bufend && SPACE_OR_TAB(*d))
5769 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5770 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5772 while (d < PL_bufend && SPACE_OR_TAB(*d))
5775 const char minus = (PL_tokenbuf[0] == '-');
5776 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5784 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5785 PL_lex_allbrackets++;
5790 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5791 PL_lex_allbrackets++;
5796 if (PL_oldoldbufptr == PL_last_lop)
5797 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5799 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5800 PL_lex_allbrackets++;
5803 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5805 /* This hack is to get the ${} in the message. */
5807 yyerror("syntax error");
5810 OPERATOR(HASHBRACK);
5812 /* This hack serves to disambiguate a pair of curlies
5813 * as being a block or an anon hash. Normally, expectation
5814 * determines that, but in cases where we're not in a
5815 * position to expect anything in particular (like inside
5816 * eval"") we have to resolve the ambiguity. This code
5817 * covers the case where the first term in the curlies is a
5818 * quoted string. Most other cases need to be explicitly
5819 * disambiguated by prepending a "+" before the opening
5820 * curly in order to force resolution as an anon hash.
5822 * XXX should probably propagate the outer expectation
5823 * into eval"" to rely less on this hack, but that could
5824 * potentially break current behavior of eval"".
5828 if (*s == '\'' || *s == '"' || *s == '`') {
5829 /* common case: get past first string, handling escapes */
5830 for (t++; t < PL_bufend && *t != *s;)
5831 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5835 else if (*s == 'q') {
5838 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5841 /* skip q//-like construct */
5843 char open, close, term;
5846 while (t < PL_bufend && isSPACE(*t))
5848 /* check for q => */
5849 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5850 OPERATOR(HASHBRACK);
5854 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5858 for (t++; t < PL_bufend; t++) {
5859 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5861 else if (*t == open)
5865 for (t++; t < PL_bufend; t++) {
5866 if (*t == '\\' && t+1 < PL_bufend)
5868 else if (*t == close && --brackets <= 0)
5870 else if (*t == open)
5877 /* skip plain q word */
5878 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5881 else if (isALNUM_lazy_if(t,UTF)) {
5883 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5886 while (t < PL_bufend && isSPACE(*t))
5888 /* if comma follows first term, call it an anon hash */
5889 /* XXX it could be a comma expression with loop modifiers */
5890 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5891 || (*t == '=' && t[1] == '>')))
5892 OPERATOR(HASHBRACK);
5893 if (PL_expect == XREF)
5896 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5902 pl_yylval.ival = CopLINE(PL_curcop);
5903 if (isSPACE(*s) || *s == '#')
5904 PL_copline = NOLINE; /* invalidate current command line number */
5905 TOKEN(formbrack ? '=' : '{');
5907 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5911 if (PL_lex_brackets <= 0)
5912 yyerror("Unmatched right curly bracket");
5914 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5915 PL_lex_allbrackets--;
5916 if (PL_lex_state == LEX_INTERPNORMAL) {
5917 if (PL_lex_brackets == 0) {
5918 if (PL_expect & XFAKEBRACK) {
5919 PL_expect &= XENUMMASK;
5920 PL_lex_state = LEX_INTERPEND;
5925 PL_thiswhite = newSVpvs("");
5926 sv_catpvs(PL_thiswhite,"}");
5929 return yylex(); /* ignore fake brackets */
5931 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5932 && SvEVALED(PL_lex_repl))
5933 PL_lex_state = LEX_INTERPEND;
5934 else if (*s == '-' && s[1] == '>')
5935 PL_lex_state = LEX_INTERPENDMAYBE;
5936 else if (*s != '[' && *s != '{')
5937 PL_lex_state = LEX_INTERPEND;
5940 if (PL_expect & XFAKEBRACK) {
5941 PL_expect &= XENUMMASK;
5943 return yylex(); /* ignore fake brackets */
5945 start_force(PL_curforce);
5947 curmad('X', newSVpvn(s-1,1));
5948 CURMAD('_', PL_thiswhite);
5950 force_next(formbrack ? '.' : '}');
5951 if (formbrack) LEAVE;
5954 PL_thistoken = newSVpvs("");
5956 if (formbrack == 2) { /* means . where arguments were expected */
5957 start_force(PL_curforce);
5965 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5966 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5973 if (PL_expect == XOPERATOR) {
5974 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5975 && isIDFIRST_lazy_if(s,UTF))
5977 CopLINE_dec(PL_curcop);
5978 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5979 CopLINE_inc(PL_curcop);
5981 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5982 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5989 PL_tokenbuf[0] = '&';
5990 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
5991 sizeof PL_tokenbuf - 1, TRUE);
5992 if (PL_tokenbuf[1]) {
5993 PL_expect = XOPERATOR;
5994 force_ident_maybe_lex('&');
5998 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6004 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6005 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6012 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6013 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6021 const char tmp = *s++;
6023 if (!PL_lex_allbrackets &&
6024 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6031 if (!PL_lex_allbrackets &&
6032 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6040 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6041 && strchr("+-*/%.^&|<",tmp))
6042 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6043 "Reversed %c= operator",(int)tmp);
6045 if (PL_expect == XSTATE && isALPHA(tmp) &&
6046 (s == PL_linestart+1 || s[-2] == '\n') )
6048 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6049 || PL_lex_state != LEX_NORMAL) {
6054 if (strnEQ(s,"=cut",4)) {
6070 PL_thiswhite = newSVpvs("");
6071 sv_catpvn(PL_thiswhite, PL_linestart,
6072 PL_bufend - PL_linestart);
6076 PL_parser->in_pod = 1;
6080 if (PL_expect == XBLOCK) {
6082 #ifdef PERL_STRICT_CR
6083 while (SPACE_OR_TAB(*t))
6085 while (SPACE_OR_TAB(*t) || *t == '\r')
6088 if (*t == '\n' || *t == '#') {
6091 SAVEI8(PL_parser->form_lex_state);
6092 SAVEI32(PL_lex_formbrack);
6093 PL_parser->form_lex_state = PL_lex_state;
6094 PL_lex_formbrack = PL_lex_brackets + 1;
6098 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6107 const char tmp = *s++;
6109 /* was this !=~ where !~ was meant?
6110 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6112 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6113 const char *t = s+1;
6115 while (t < PL_bufend && isSPACE(*t))
6118 if (*t == '/' || *t == '?' ||
6119 ((*t == 'm' || *t == 's' || *t == 'y')
6120 && !isALNUM(t[1])) ||
6121 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6122 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6123 "!=~ should be !~");
6125 if (!PL_lex_allbrackets &&
6126 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6138 if (PL_expect != XOPERATOR) {
6139 if (s[1] != '<' && !strchr(s,'>'))
6142 s = scan_heredoc(s);
6144 s = scan_inputsymbol(s);
6145 PL_expect = XOPERATOR;
6146 TOKEN(sublex_start());
6152 if (*s == '=' && !PL_lex_allbrackets &&
6153 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6157 SHop(OP_LEFT_SHIFT);
6162 if (!PL_lex_allbrackets &&
6163 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6170 if (!PL_lex_allbrackets &&
6171 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6179 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6187 const char tmp = *s++;
6189 if (*s == '=' && !PL_lex_allbrackets &&
6190 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6194 SHop(OP_RIGHT_SHIFT);
6196 else if (tmp == '=') {
6197 if (!PL_lex_allbrackets &&
6198 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6206 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6215 if (PL_expect == XOPERATOR) {
6216 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6217 return deprecate_commaless_var_list();
6221 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6222 PL_tokenbuf[0] = '@';
6223 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6224 sizeof PL_tokenbuf - 1, FALSE);
6225 if (PL_expect == XOPERATOR)
6226 no_op("Array length", s);
6227 if (!PL_tokenbuf[1])
6229 PL_expect = XOPERATOR;
6230 force_ident_maybe_lex('#');
6234 PL_tokenbuf[0] = '$';
6235 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6236 sizeof PL_tokenbuf - 1, FALSE);
6237 if (PL_expect == XOPERATOR)
6239 if (!PL_tokenbuf[1]) {
6241 yyerror("Final $ should be \\$ or $name");
6247 const char tmp = *s;
6248 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6251 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6252 && intuit_more(s)) {
6254 PL_tokenbuf[0] = '@';
6255 if (ckWARN(WARN_SYNTAX)) {
6258 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6261 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6262 while (t < PL_bufend && *t != ']')
6264 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6265 "Multidimensional syntax %.*s not supported",
6266 (int)((t - PL_bufptr) + 1), PL_bufptr);
6270 else if (*s == '{') {
6272 PL_tokenbuf[0] = '%';
6273 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6274 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6276 char tmpbuf[sizeof PL_tokenbuf];
6279 } while (isSPACE(*t));
6280 if (isIDFIRST_lazy_if(t,UTF)) {
6282 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6287 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6288 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6289 "You need to quote \"%"SVf"\"",
6290 SVfARG(newSVpvn_flags(tmpbuf, len,
6291 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6297 PL_expect = XOPERATOR;
6298 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6299 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6300 if (!islop || PL_last_lop_op == OP_GREPSTART)
6301 PL_expect = XOPERATOR;
6302 else if (strchr("$@\"'`q", *s))
6303 PL_expect = XTERM; /* e.g. print $fh "foo" */
6304 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6305 PL_expect = XTERM; /* e.g. print $fh &sub */
6306 else if (isIDFIRST_lazy_if(s,UTF)) {
6307 char tmpbuf[sizeof PL_tokenbuf];
6309 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6310 if ((t2 = keyword(tmpbuf, len, 0))) {
6311 /* binary operators exclude handle interpretations */
6323 PL_expect = XTERM; /* e.g. print $fh length() */
6328 PL_expect = XTERM; /* e.g. print $fh subr() */
6331 else if (isDIGIT(*s))
6332 PL_expect = XTERM; /* e.g. print $fh 3 */
6333 else if (*s == '.' && isDIGIT(s[1]))
6334 PL_expect = XTERM; /* e.g. print $fh .3 */
6335 else if ((*s == '?' || *s == '-' || *s == '+')
6336 && !isSPACE(s[1]) && s[1] != '=')
6337 PL_expect = XTERM; /* e.g. print $fh -1 */
6338 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6340 PL_expect = XTERM; /* e.g. print $fh /.../
6341 XXX except DORDOR operator
6343 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6345 PL_expect = XTERM; /* print $fh <<"EOF" */
6348 force_ident_maybe_lex('$');
6352 if (PL_expect == XOPERATOR)
6354 PL_tokenbuf[0] = '@';
6355 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6356 if (!PL_tokenbuf[1]) {
6359 if (PL_lex_state == LEX_NORMAL)
6361 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6363 PL_tokenbuf[0] = '%';
6365 /* Warn about @ where they meant $. */
6366 if (*s == '[' || *s == '{') {
6367 if (ckWARN(WARN_SYNTAX)) {
6368 const char *t = s + 1;
6369 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6370 t += UTF ? UTF8SKIP(t) : 1;
6371 if (*t == '}' || *t == ']') {
6373 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6374 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6375 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6376 "Scalar value %"SVf" better written as $%"SVf,
6377 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6378 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6379 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6380 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6385 PL_expect = XOPERATOR;
6386 force_ident_maybe_lex('@');
6389 case '/': /* may be division, defined-or, or pattern */
6390 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6391 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6392 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6397 case '?': /* may either be conditional or pattern */
6398 if (PL_expect == XOPERATOR) {
6401 if (!PL_lex_allbrackets &&
6402 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6406 PL_lex_allbrackets++;
6412 /* A // operator. */
6413 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6414 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6415 LEX_FAKEEOF_LOGIC)) {
6423 if (*s == '=' && !PL_lex_allbrackets &&
6424 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6433 /* Disable warning on "study /blah/" */
6434 if (PL_oldoldbufptr == PL_last_uni
6435 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6436 || memNE(PL_last_uni, "study", 5)
6437 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6441 deprecate("?PATTERN? without explicit operator");
6442 s = scan_pat(s,OP_MATCH);
6443 TERM(sublex_start());
6447 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6448 #ifdef PERL_STRICT_CR
6451 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6453 && (s == PL_linestart || s[-1] == '\n') )
6456 formbrack = 2; /* dot seen where arguments expected */
6459 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6463 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6466 if (!PL_lex_allbrackets &&
6467 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6474 pl_yylval.ival = OPf_SPECIAL;
6480 if (*s == '=' && !PL_lex_allbrackets &&
6481 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6488 case '0': case '1': case '2': case '3': case '4':
6489 case '5': case '6': case '7': case '8': case '9':
6490 s = scan_num(s, &pl_yylval);
6491 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6492 if (PL_expect == XOPERATOR)
6497 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6498 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6499 if (PL_expect == XOPERATOR) {
6500 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6501 return deprecate_commaless_var_list();
6508 pl_yylval.ival = OP_CONST;
6509 TERM(sublex_start());
6512 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6513 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6514 if (PL_expect == XOPERATOR) {
6515 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6516 return deprecate_commaless_var_list();
6523 pl_yylval.ival = OP_CONST;
6524 /* FIXME. I think that this can be const if char *d is replaced by
6525 more localised variables. */
6526 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6527 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6528 pl_yylval.ival = OP_STRINGIFY;
6532 TERM(sublex_start());
6535 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6536 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6537 if (PL_expect == XOPERATOR)
6538 no_op("Backticks",s);
6541 readpipe_override();
6542 TERM(sublex_start());
6546 if (PL_lex_inwhat && isDIGIT(*s))
6547 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6549 if (PL_expect == XOPERATOR)
6550 no_op("Backslash",s);
6554 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6555 char *start = s + 2;
6556 while (isDIGIT(*start) || *start == '_')
6558 if (*start == '.' && isDIGIT(start[1])) {
6559 s = scan_num(s, &pl_yylval);
6562 else if ((*start == ':' && start[1] == ':')
6563 || (PL_expect == XSTATE && *start == ':'))
6565 else if (PL_expect == XSTATE) {
6567 while (d < PL_bufend && isSPACE(*d)) d++;
6568 if (*d == ':') goto keylookup;
6570 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6571 if (!isALPHA(*start) && (PL_expect == XTERM
6572 || PL_expect == XREF || PL_expect == XSTATE
6573 || PL_expect == XTERMORDORDOR)) {
6574 GV *const gv = gv_fetchpvn_flags(s, start - s,
6575 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6577 s = scan_num(s, &pl_yylval);
6584 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6637 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6639 /* Some keywords can be followed by any delimiter, including ':' */
6640 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6642 /* x::* is just a word, unless x is "CORE" */
6643 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6647 while (d < PL_bufend && isSPACE(*d))
6648 d++; /* no comments skipped here, or s### is misparsed */
6650 /* Is this a word before a => operator? */
6651 if (*d == '=' && d[1] == '>') {
6654 = (OP*)newSVOP(OP_CONST, 0,
6655 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6656 pl_yylval.opval->op_private = OPpCONST_BARE;
6660 /* Check for plugged-in keyword */
6664 char *saved_bufptr = PL_bufptr;
6666 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6668 if (result == KEYWORD_PLUGIN_DECLINE) {
6669 /* not a plugged-in keyword */
6670 PL_bufptr = saved_bufptr;
6671 } else if (result == KEYWORD_PLUGIN_STMT) {
6672 pl_yylval.opval = o;
6675 return REPORT(PLUGSTMT);
6676 } else if (result == KEYWORD_PLUGIN_EXPR) {
6677 pl_yylval.opval = o;
6679 PL_expect = XOPERATOR;
6680 return REPORT(PLUGEXPR);
6682 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6687 /* Check for built-in keyword */
6688 tmp = keyword(PL_tokenbuf, len, 0);
6690 /* Is this a label? */
6691 if (!anydelim && PL_expect == XSTATE
6692 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6694 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6695 newSVpvn_flags(PL_tokenbuf,
6696 len, UTF ? SVf_UTF8 : 0));
6701 /* Check for lexical sub */
6702 if (PL_expect != XOPERATOR) {
6703 char tmpbuf[sizeof PL_tokenbuf + 1];
6705 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6706 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6707 if (off != NOT_IN_PAD) {
6708 assert(off); /* we assume this is boolean-true below */
6709 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6710 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6711 HEK * const stashname = HvNAME_HEK(stash);
6712 sv = newSVhek(stashname);
6713 sv_catpvs(sv, "::");
6714 sv_catpvn_flags(sv, PL_tokenbuf, len,
6715 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6716 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6721 rv2cv_op = newOP(OP_PADANY, 0);
6722 rv2cv_op->op_targ = off;
6723 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6724 cv = (CV *)PAD_SV(off);
6732 if (tmp < 0) { /* second-class keyword? */
6733 GV *ogv = NULL; /* override (winner) */
6734 GV *hgv = NULL; /* hidden (loser) */
6735 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6737 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6738 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6741 if (GvIMPORTED_CV(gv))
6743 else if (! CvMETHOD(cv))
6747 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6748 UTF ? -(I32)len : (I32)len, FALSE)) &&
6749 (gv = *gvp) && isGV_with_GP(gv) &&
6750 GvCVu(gv) && GvIMPORTED_CV(gv))
6757 tmp = 0; /* overridden by import or by GLOBAL */
6760 && -tmp==KEY_lock /* XXX generalizable kludge */
6763 tmp = 0; /* any sub overrides "weak" keyword */
6765 else { /* no override */
6767 if (tmp == KEY_dump) {
6768 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6769 "dump() better written as CORE::dump()");
6773 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6774 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6775 "Ambiguous call resolved as CORE::%s(), "
6776 "qualify as such or use &",
6784 default: /* not a keyword */
6785 /* Trade off - by using this evil construction we can pull the
6786 variable gv into the block labelled keylookup. If not, then
6787 we have to give it function scope so that the goto from the
6788 earlier ':' case doesn't bypass the initialisation. */
6790 just_a_word_zero_gv:
6802 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6803 const char penultchar =
6804 lastchar && PL_bufptr - 2 >= PL_linestart
6808 SV *nextPL_nextwhite = 0;
6812 /* Get the rest if it looks like a package qualifier */
6814 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6816 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6819 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6820 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6821 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6822 *s == '\'' ? "'" : "::");
6827 if (PL_expect == XOPERATOR) {
6828 if (PL_bufptr == PL_linestart) {
6829 CopLINE_dec(PL_curcop);
6830 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6831 CopLINE_inc(PL_curcop);
6834 no_op("Bareword",s);
6837 /* Look for a subroutine with this name in current package,
6838 unless this is a lexical sub, or name is "Foo::",
6839 in which case Foo is a bareword
6840 (and a package name). */
6842 if (len > 2 && !PL_madskills &&
6843 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6845 if (ckWARN(WARN_BAREWORD)
6846 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6847 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6848 "Bareword \"%"SVf"\" refers to nonexistent package",
6849 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6850 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6852 PL_tokenbuf[len] = '\0';
6858 /* Mustn't actually add anything to a symbol table.
6859 But also don't want to "initialise" any placeholder
6860 constants that might already be there into full
6861 blown PVGVs with attached PVCV. */
6862 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6863 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6869 /* if we saw a global override before, get the right name */
6872 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6873 len ? len : strlen(PL_tokenbuf));
6875 SV * const tmp_sv = sv;
6876 sv = newSVpvs("CORE::GLOBAL::");
6877 sv_catsv(sv, tmp_sv);
6878 SvREFCNT_dec(tmp_sv);
6882 if (PL_madskills && !PL_thistoken) {
6883 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6884 PL_thistoken = newSVpvn(start,s - start);
6885 PL_realtokenstart = s - SvPVX(PL_linestr);
6889 /* Presume this is going to be a bareword of some sort. */
6891 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6892 pl_yylval.opval->op_private = OPpCONST_BARE;
6894 /* And if "Foo::", then that's what it certainly is. */
6900 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6901 const_op->op_private = OPpCONST_BARE;
6902 rv2cv_op = newCVREF(0, const_op);
6903 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
6906 /* See if it's the indirect object for a list operator. */
6908 if (PL_oldoldbufptr &&
6909 PL_oldoldbufptr < PL_bufptr &&
6910 (PL_oldoldbufptr == PL_last_lop
6911 || PL_oldoldbufptr == PL_last_uni) &&
6912 /* NO SKIPSPACE BEFORE HERE! */
6913 (PL_expect == XREF ||
6914 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6916 bool immediate_paren = *s == '(';
6918 /* (Now we can afford to cross potential line boundary.) */
6919 s = SKIPSPACE2(s,nextPL_nextwhite);
6921 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6924 /* Two barewords in a row may indicate method call. */
6926 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6927 (tmp = intuit_method(s, gv, cv))) {
6929 if (tmp == METHOD && !PL_lex_allbrackets &&
6930 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6931 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6935 /* If not a declared subroutine, it's an indirect object. */
6936 /* (But it's an indir obj regardless for sort.) */
6937 /* Also, if "_" follows a filetest operator, it's a bareword */
6940 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6942 (PL_last_lop_op != OP_MAPSTART &&
6943 PL_last_lop_op != OP_GREPSTART))))
6944 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6945 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6948 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6953 PL_expect = XOPERATOR;
6956 s = SKIPSPACE2(s,nextPL_nextwhite);
6957 PL_nextwhite = nextPL_nextwhite;
6962 /* Is this a word before a => operator? */
6963 if (*s == '=' && s[1] == '>' && !pkgname) {
6966 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6967 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6968 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6972 /* If followed by a paren, it's certainly a subroutine. */
6977 while (SPACE_OR_TAB(*d))
6979 if (*d == ')' && (sv = cv_const_sv(cv))) {
6986 PL_nextwhite = PL_thiswhite;
6989 start_force(PL_curforce);
6991 NEXTVAL_NEXTTOKE.opval =
6992 off ? rv2cv_op : pl_yylval.opval;
6993 PL_expect = XOPERATOR;
6996 PL_nextwhite = nextPL_nextwhite;
6997 curmad('X', PL_thistoken);
6998 PL_thistoken = newSVpvs("");
7002 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7003 else op_free(rv2cv_op), force_next(WORD);
7008 /* If followed by var or block, call it a method (unless sub) */
7010 if ((*s == '$' || *s == '{') && !cv) {
7012 PL_last_lop = PL_oldbufptr;
7013 PL_last_lop_op = OP_METHOD;
7014 if (!PL_lex_allbrackets &&
7015 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7016 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7020 /* If followed by a bareword, see if it looks like indir obj. */
7023 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7024 && (tmp = intuit_method(s, gv, cv))) {
7026 if (tmp == METHOD && !PL_lex_allbrackets &&
7027 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7028 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7032 /* Not a method, so call it a subroutine (if defined) */
7035 if (lastchar == '-' && penultchar != '-') {
7036 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7037 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7038 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7039 SVfARG(tmpsv), SVfARG(tmpsv));
7041 /* Check for a constant sub */
7042 if ((sv = cv_const_sv(cv))) {
7045 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7046 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7047 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7048 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7052 op_free(pl_yylval.opval);
7053 pl_yylval.opval = rv2cv_op;
7054 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7055 PL_last_lop = PL_oldbufptr;
7056 PL_last_lop_op = OP_ENTERSUB;
7057 /* Is there a prototype? */
7064 STRLEN protolen = CvPROTOLEN(cv);
7065 const char *proto = CvPROTO(cv);
7069 if ((optional = *proto == ';'))
7072 while (*proto == ';');
7076 *proto == '$' || *proto == '_'
7077 || *proto == '*' || *proto == '+'
7082 *proto == '\\' && proto[1] && proto[2] == '\0'
7085 UNIPROTO(UNIOPSUB,optional);
7086 if (*proto == '\\' && proto[1] == '[') {
7087 const char *p = proto + 2;
7088 while(*p && *p != ']')
7090 if(*p == ']' && !p[1])
7091 UNIPROTO(UNIOPSUB,optional);
7093 if (*proto == '&' && *s == '{') {
7095 sv_setpvs(PL_subname, "__ANON__");
7097 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7098 if (!PL_lex_allbrackets &&
7099 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7100 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7107 PL_nextwhite = PL_thiswhite;
7110 start_force(PL_curforce);
7111 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7114 PL_nextwhite = nextPL_nextwhite;
7115 curmad('X', PL_thistoken);
7116 PL_thistoken = newSVpvs("");
7118 force_next(off ? PRIVATEREF : WORD);
7119 if (!PL_lex_allbrackets &&
7120 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7121 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7126 /* Guess harder when madskills require "best effort". */
7127 if (PL_madskills && (!gv || !GvCVu(gv))) {
7128 int probable_sub = 0;
7129 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7131 else if (isALPHA(*s)) {
7135 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7136 if (!keyword(tmpbuf, tmplen, 0))
7139 while (d < PL_bufend && isSPACE(*d))
7141 if (*d == '=' && d[1] == '>')
7146 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7148 op_free(pl_yylval.opval);
7149 pl_yylval.opval = rv2cv_op;
7150 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7151 PL_last_lop = PL_oldbufptr;
7152 PL_last_lop_op = OP_ENTERSUB;
7153 PL_nextwhite = PL_thiswhite;
7155 start_force(PL_curforce);
7156 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7158 PL_nextwhite = nextPL_nextwhite;
7159 curmad('X', PL_thistoken);
7160 PL_thistoken = newSVpvs("");
7161 force_next(off ? PRIVATEREF : WORD);
7162 if (!PL_lex_allbrackets &&
7163 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7164 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7168 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7170 force_next(off ? PRIVATEREF : WORD);
7171 if (!PL_lex_allbrackets &&
7172 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7173 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7178 /* Call it a bare word */
7180 if (PL_hints & HINT_STRICT_SUBS)
7181 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7184 /* after "print" and similar functions (corresponding to
7185 * "F? L" in opcode.pl), whatever wasn't already parsed as
7186 * a filehandle should be subject to "strict subs".
7187 * Likewise for the optional indirect-object argument to system
7188 * or exec, which can't be a bareword */
7189 if ((PL_last_lop_op == OP_PRINT
7190 || PL_last_lop_op == OP_PRTF
7191 || PL_last_lop_op == OP_SAY
7192 || PL_last_lop_op == OP_SYSTEM
7193 || PL_last_lop_op == OP_EXEC)
7194 && (PL_hints & HINT_STRICT_SUBS))
7195 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7196 if (lastchar != '-') {
7197 if (ckWARN(WARN_RESERVED)) {
7201 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7202 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7210 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7211 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7212 "Operator or semicolon missing before %c%"SVf,
7213 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7214 strlen(PL_tokenbuf),
7215 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7216 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7217 "Ambiguous use of %c resolved as operator %c",
7218 lastchar, lastchar);
7225 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7230 (OP*)newSVOP(OP_CONST, 0,
7231 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7234 case KEY___PACKAGE__:
7236 (OP*)newSVOP(OP_CONST, 0,
7238 ? newSVhek(HvNAME_HEK(PL_curstash))
7245 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7246 const char *pname = "main";
7249 if (PL_tokenbuf[2] == 'D')
7252 PL_curstash ? PL_curstash : PL_defstash;
7253 pname = HvNAME_get(stash);
7254 plen = HvNAMELEN (stash);
7255 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7257 gv = gv_fetchpvn_flags(
7258 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7259 plen+6, GV_ADD|putf8, SVt_PVIO
7263 GvIOp(gv) = newIO();
7264 IoIFP(GvIOp(gv)) = PL_rsfp;
7265 #if defined(HAS_FCNTL) && defined(F_SETFD)
7267 const int fd = PerlIO_fileno(PL_rsfp);
7268 fcntl(fd,F_SETFD,fd >= 3);
7271 /* Mark this internal pseudo-handle as clean */
7272 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7273 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7274 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7276 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7277 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7278 /* if the script was opened in binmode, we need to revert
7279 * it to text mode for compatibility; but only iff it has CRs
7280 * XXX this is a questionable hack at best. */
7281 if (PL_bufend-PL_bufptr > 2
7282 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7285 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7286 loc = PerlIO_tell(PL_rsfp);
7287 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7290 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7292 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7293 #endif /* NETWARE */
7295 PerlIO_seek(PL_rsfp, loc, 0);
7299 #ifdef PERLIO_LAYERS
7302 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7303 else if (PL_encoding) {
7310 XPUSHs(PL_encoding);
7312 call_method("name", G_SCALAR);
7316 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7317 Perl_form(aTHX_ ":encoding(%"SVf")",
7326 if (PL_realtokenstart >= 0) {
7327 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7329 PL_endwhite = newSVpvs("");
7330 sv_catsv(PL_endwhite, PL_thiswhite);
7332 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7333 PL_realtokenstart = -1;
7335 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7345 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7354 if (PL_expect == XSTATE) {
7361 if (*s == ':' && s[1] == ':') {
7365 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7366 if ((*s == ':' && s[1] == ':')
7367 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7371 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7375 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7376 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7377 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7380 else if (tmp == KEY_require || tmp == KEY_do
7382 /* that's a way to remember we saw "CORE::" */
7395 LOP(OP_ACCEPT,XTERM);
7398 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7403 LOP(OP_ATAN2,XTERM);
7409 LOP(OP_BINMODE,XTERM);
7412 LOP(OP_BLESS,XTERM);
7421 /* We have to disambiguate the two senses of
7422 "continue". If the next token is a '{' then
7423 treat it as the start of a continue block;
7424 otherwise treat it as a control operator.
7434 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7444 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7453 if (!PL_cryptseen) {
7454 PL_cryptseen = TRUE;
7458 LOP(OP_CRYPT,XTERM);
7461 LOP(OP_CHMOD,XTERM);
7464 LOP(OP_CHOWN,XTERM);
7467 LOP(OP_CONNECT,XTERM);
7487 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7489 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7492 force_ident_maybe_lex('&');
7497 if (orig_keyword == KEY_do) {
7506 PL_hints |= HINT_BLOCK_SCOPE;
7516 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7517 STR_WITH_LEN("NDBM_File::"),
7518 STR_WITH_LEN("DB_File::"),
7519 STR_WITH_LEN("GDBM_File::"),
7520 STR_WITH_LEN("SDBM_File::"),
7521 STR_WITH_LEN("ODBM_File::"),
7523 LOP(OP_DBMOPEN,XTERM);
7529 PL_expect = XOPERATOR;
7530 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7537 pl_yylval.ival = CopLINE(PL_curcop);
7541 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7555 if (*s == '{') { /* block eval */
7556 PL_expect = XTERMBLOCK;
7557 UNIBRACK(OP_ENTERTRY);
7559 else { /* string eval */
7561 UNIBRACK(OP_ENTEREVAL);
7566 UNIBRACK(-OP_ENTEREVAL);
7580 case KEY_endhostent:
7586 case KEY_endservent:
7589 case KEY_endprotoent:
7600 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7602 pl_yylval.ival = CopLINE(PL_curcop);
7604 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7607 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7610 if ((PL_bufend - p) >= 3 &&
7611 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7613 else if ((PL_bufend - p) >= 4 &&
7614 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7617 if (isIDFIRST_lazy_if(p,UTF)) {
7618 p = scan_ident(p, PL_bufend,
7619 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7623 Perl_croak(aTHX_ "Missing $ on loop variable");
7625 s = SvPVX(PL_linestr) + soff;
7631 LOP(OP_FORMLINE,XTERM);
7640 LOP(OP_FCNTL,XTERM);
7646 LOP(OP_FLOCK,XTERM);
7649 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7654 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7659 LOP(OP_GREPSTART, XREF);
7662 PL_expect = XOPERATOR;
7663 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7678 case KEY_getpriority:
7679 LOP(OP_GETPRIORITY,XTERM);
7681 case KEY_getprotobyname:
7684 case KEY_getprotobynumber:
7685 LOP(OP_GPBYNUMBER,XTERM);
7687 case KEY_getprotoent:
7699 case KEY_getpeername:
7700 UNI(OP_GETPEERNAME);
7702 case KEY_gethostbyname:
7705 case KEY_gethostbyaddr:
7706 LOP(OP_GHBYADDR,XTERM);
7708 case KEY_gethostent:
7711 case KEY_getnetbyname:
7714 case KEY_getnetbyaddr:
7715 LOP(OP_GNBYADDR,XTERM);
7720 case KEY_getservbyname:
7721 LOP(OP_GSBYNAME,XTERM);
7723 case KEY_getservbyport:
7724 LOP(OP_GSBYPORT,XTERM);
7726 case KEY_getservent:
7729 case KEY_getsockname:
7730 UNI(OP_GETSOCKNAME);
7732 case KEY_getsockopt:
7733 LOP(OP_GSOCKOPT,XTERM);
7748 pl_yylval.ival = CopLINE(PL_curcop);
7753 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7761 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7763 pl_yylval.ival = CopLINE(PL_curcop);
7767 LOP(OP_INDEX,XTERM);
7773 LOP(OP_IOCTL,XTERM);
7785 PL_expect = XOPERATOR;
7786 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7803 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7808 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7822 LOP(OP_LISTEN,XTERM);
7831 s = scan_pat(s,OP_MATCH);
7832 TERM(sublex_start());
7835 LOP(OP_MAPSTART, XREF);
7838 LOP(OP_MKDIR,XTERM);
7841 LOP(OP_MSGCTL,XTERM);
7844 LOP(OP_MSGGET,XTERM);
7847 LOP(OP_MSGRCV,XTERM);
7850 LOP(OP_MSGSND,XTERM);
7855 PL_in_my = (U16)tmp;
7857 if (isIDFIRST_lazy_if(s,UTF)) {
7861 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7862 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7864 if (!FEATURE_LEXSUBS_IS_ENABLED)
7866 "Experimental \"%s\" subs not enabled",
7867 tmp == KEY_my ? "my" :
7868 tmp == KEY_state ? "state" : "our");
7871 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7872 if (!PL_in_my_stash) {
7875 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7876 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7879 if (PL_madskills) { /* just add type to declarator token */
7880 sv_catsv(PL_thistoken, PL_nextwhite);
7882 sv_catpvn(PL_thistoken, start, s - start);
7890 PL_expect = XOPERATOR;
7891 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7895 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7900 s = tokenize_use(0, s);
7904 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7907 if (!PL_lex_allbrackets &&
7908 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7909 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7915 if (isIDFIRST_lazy_if(s,UTF)) {
7917 for (d = s; isALNUM_lazy_if(d,UTF);) {
7918 d += UTF ? UTF8SKIP(d) : 1;
7920 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7921 d += UTF ? UTF8SKIP(d) : 1;
7925 for (t=d; isSPACE(*t);)
7927 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7929 && !(t[0] == '=' && t[1] == '>')
7930 && !(t[0] == ':' && t[1] == ':')
7931 && !keyword(s, d-s, 0)
7933 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7934 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
7935 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7936 "Precedence problem: open %"SVf" should be open(%"SVf")",
7937 SVfARG(tmpsv), SVfARG(tmpsv));
7943 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7945 pl_yylval.ival = OP_OR;
7955 LOP(OP_OPEN_DIR,XTERM);
7958 checkcomma(s,PL_tokenbuf,"filehandle");
7962 checkcomma(s,PL_tokenbuf,"filehandle");
7981 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7983 s = force_strict_version(s);
7984 PL_lex_expect = XBLOCK;
7988 LOP(OP_PIPE_OP,XTERM);
7991 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
7994 pl_yylval.ival = OP_CONST;
7995 TERM(sublex_start());
8002 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8005 PL_expect = XOPERATOR;
8006 if (SvCUR(PL_lex_stuff)) {
8007 int warned_comma = !ckWARN(WARN_QW);
8008 int warned_comment = warned_comma;
8009 d = SvPV_force(PL_lex_stuff, len);
8011 for (; isSPACE(*d) && len; --len, ++d)
8016 if (!warned_comma || !warned_comment) {
8017 for (; !isSPACE(*d) && len; --len, ++d) {
8018 if (!warned_comma && *d == ',') {
8019 Perl_warner(aTHX_ packWARN(WARN_QW),
8020 "Possible attempt to separate words with commas");
8023 else if (!warned_comment && *d == '#') {
8024 Perl_warner(aTHX_ packWARN(WARN_QW),
8025 "Possible attempt to put comments in qw() list");
8031 for (; !isSPACE(*d) && len; --len, ++d)
8034 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8035 words = op_append_elem(OP_LIST, words,
8036 newSVOP(OP_CONST, 0, tokeq(sv)));
8041 words = newNULLLIST();
8043 SvREFCNT_dec(PL_lex_stuff);
8044 PL_lex_stuff = NULL;
8046 PL_expect = XOPERATOR;
8047 pl_yylval.opval = sawparens(words);
8052 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8055 pl_yylval.ival = OP_STRINGIFY;
8056 if (SvIVX(PL_lex_stuff) == '\'')
8057 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8058 TERM(sublex_start());
8061 s = scan_pat(s,OP_QR);
8062 TERM(sublex_start());
8065 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8068 readpipe_override();
8069 TERM(sublex_start());
8076 PL_expect = XOPERATOR;
8078 s = force_version(s, FALSE);
8080 else if (*s != 'v' || !isDIGIT(s[1])
8081 || (s = force_version(s, TRUE), *s == 'v'))
8083 *PL_tokenbuf = '\0';
8084 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8085 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8086 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8087 GV_ADD | (UTF ? SVf_UTF8 : 0));
8089 yyerror("<> should be quotes");
8091 if (orig_keyword == KEY_require) {
8099 PL_last_uni = PL_oldbufptr;
8100 PL_last_lop_op = OP_REQUIRE;
8102 return REPORT( (int)REQUIRE );
8108 PL_expect = XOPERATOR;
8109 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8113 LOP(OP_RENAME,XTERM);
8122 LOP(OP_RINDEX,XTERM);
8131 UNIDOR(OP_READLINE);
8134 UNIDOR(OP_BACKTICK);
8143 LOP(OP_REVERSE,XTERM);
8146 UNIDOR(OP_READLINK);
8153 if (pl_yylval.opval)
8154 TERM(sublex_start());
8156 TOKEN(1); /* force error */
8159 checkcomma(s,PL_tokenbuf,"filehandle");
8169 LOP(OP_SELECT,XTERM);
8175 LOP(OP_SEMCTL,XTERM);
8178 LOP(OP_SEMGET,XTERM);
8181 LOP(OP_SEMOP,XTERM);
8187 LOP(OP_SETPGRP,XTERM);
8189 case KEY_setpriority:
8190 LOP(OP_SETPRIORITY,XTERM);
8192 case KEY_sethostent:
8198 case KEY_setservent:
8201 case KEY_setprotoent:
8211 LOP(OP_SEEKDIR,XTERM);
8213 case KEY_setsockopt:
8214 LOP(OP_SSOCKOPT,XTERM);
8220 LOP(OP_SHMCTL,XTERM);
8223 LOP(OP_SHMGET,XTERM);
8226 LOP(OP_SHMREAD,XTERM);
8229 LOP(OP_SHMWRITE,XTERM);
8232 LOP(OP_SHUTDOWN,XTERM);
8241 LOP(OP_SOCKET,XTERM);
8243 case KEY_socketpair:
8244 LOP(OP_SOCKPAIR,XTERM);
8247 checkcomma(s,PL_tokenbuf,"subroutine name");
8250 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8254 LOP(OP_SPLIT,XTERM);
8257 LOP(OP_SPRINTF,XTERM);
8260 LOP(OP_SPLICE,XTERM);
8275 LOP(OP_SUBSTR,XTERM);
8281 char * const tmpbuf = PL_tokenbuf + 1;
8282 SSize_t tboffset = 0;
8283 expectation attrful;
8284 bool have_name, have_proto;
8285 const int key = tmp;
8290 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8291 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8295 s = SKIPSPACE2(s,tmpwhite);
8301 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8302 (*s == ':' && s[1] == ':'))
8305 SV *nametoke = NULL;
8309 attrful = XATTRBLOCK;
8310 /* remember buffer pos'n for later force_word */
8311 tboffset = s - PL_oldbufptr;
8312 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8316 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8319 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8321 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8323 sv_setpvn(PL_subname, tmpbuf, len);
8325 sv_setsv(PL_subname,PL_curstname);
8326 sv_catpvs(PL_subname,"::");
8327 sv_catpvn(PL_subname,tmpbuf,len);
8329 if (SvUTF8(PL_linestr))
8330 SvUTF8_on(PL_subname);
8336 CURMAD('X', nametoke);
8337 CURMAD('_', tmpwhite);
8338 force_ident_maybe_lex('&');
8340 s = SKIPSPACE2(d,tmpwhite);
8346 if (key == KEY_my || key == KEY_our || key==KEY_state)
8349 /* diag_listed_as: Missing name in "%s sub" */
8351 "Missing name in \"%s\"", PL_bufptr);
8353 PL_expect = XTERMBLOCK;
8354 attrful = XATTRTERM;
8355 sv_setpvs(PL_subname,"?");
8359 if (key == KEY_format) {
8361 PL_thistoken = subtoken;
8365 (void) force_word(PL_oldbufptr + tboffset, WORD,
8371 /* Look for a prototype */
8374 bool bad_proto = FALSE;
8375 bool in_brackets = FALSE;
8376 char greedy_proto = ' ';
8377 bool proto_after_greedy_proto = FALSE;
8378 bool must_be_last = FALSE;
8379 bool underscore = FALSE;
8380 bool seen_underscore = FALSE;
8381 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8384 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8386 Perl_croak(aTHX_ "Prototype not terminated");
8387 /* strip spaces and check for bad characters */
8388 d = SvPV(PL_lex_stuff, tmplen);
8390 for (p = d; tmplen; tmplen--, ++p) {
8394 if (warnillegalproto) {
8396 proto_after_greedy_proto = TRUE;
8397 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8402 if ( !strchr(";@%", *p) )
8409 else if ( *p == ']' ) {
8410 in_brackets = FALSE;
8412 else if ( (*p == '@' || *p == '%') &&
8413 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8415 must_be_last = TRUE;
8418 else if ( *p == '_' ) {
8419 underscore = seen_underscore = TRUE;
8426 if (proto_after_greedy_proto)
8427 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8428 "Prototype after '%c' for %"SVf" : %s",
8429 greedy_proto, SVfARG(PL_subname), d);
8431 SV *dsv = newSVpvs_flags("", SVs_TEMP);
8432 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8433 "Illegal character %sin prototype for %"SVf" : %s",
8434 seen_underscore ? "after '_' " : "",
8436 SvUTF8(PL_lex_stuff)
8437 ? sv_uni_display(dsv,
8438 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8440 UNI_DISPLAY_ISPRINT)
8441 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8442 PERL_PV_ESCAPE_NONASCII));
8444 SvCUR_set(PL_lex_stuff, tmp);
8449 CURMAD('q', PL_thisopen);
8450 CURMAD('_', tmpwhite);
8451 CURMAD('=', PL_thisstuff);
8452 CURMAD('Q', PL_thisclose);
8453 NEXTVAL_NEXTTOKE.opval =
8454 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8455 PL_lex_stuff = NULL;
8458 s = SKIPSPACE2(s,tmpwhite);
8466 if (*s == ':' && s[1] != ':')
8467 PL_expect = attrful;
8468 else if (*s != '{' && key == KEY_sub) {
8470 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8471 else if (*s != ';' && *s != '}')
8472 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8479 curmad('^', newSVpvs(""));
8480 CURMAD('_', tmpwhite);
8484 PL_thistoken = subtoken;
8487 NEXTVAL_NEXTTOKE.opval =
8488 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8489 PL_lex_stuff = NULL;
8495 sv_setpvs(PL_subname, "__ANON__");
8497 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8501 force_ident_maybe_lex('&');
8507 LOP(OP_SYSTEM,XREF);
8510 LOP(OP_SYMLINK,XTERM);
8513 LOP(OP_SYSCALL,XTERM);
8516 LOP(OP_SYSOPEN,XTERM);
8519 LOP(OP_SYSSEEK,XTERM);
8522 LOP(OP_SYSREAD,XTERM);
8525 LOP(OP_SYSWRITE,XTERM);
8530 TERM(sublex_start());
8551 LOP(OP_TRUNCATE,XTERM);
8563 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8565 pl_yylval.ival = CopLINE(PL_curcop);
8569 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8571 pl_yylval.ival = CopLINE(PL_curcop);
8575 LOP(OP_UNLINK,XTERM);
8581 LOP(OP_UNPACK,XTERM);
8584 LOP(OP_UTIME,XTERM);
8590 LOP(OP_UNSHIFT,XTERM);
8593 s = tokenize_use(1, s);
8603 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8605 pl_yylval.ival = CopLINE(PL_curcop);
8609 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8611 pl_yylval.ival = CopLINE(PL_curcop);
8615 PL_hints |= HINT_BLOCK_SCOPE;
8622 LOP(OP_WAITPID,XTERM);
8631 ctl_l[0] = toCTRL('L');
8633 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8636 /* Make sure $^L is defined */
8637 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8642 if (PL_expect == XOPERATOR) {
8643 if (*s == '=' && !PL_lex_allbrackets &&
8644 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8652 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8654 pl_yylval.ival = OP_XOR;
8660 #pragma segment Main
8664 S_pending_ident(pTHX)
8668 const char pit = (char)pl_yylval.ival;
8669 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8670 /* All routes through this function want to know if there is a colon. */
8671 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8673 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8674 "### Pending identifier '%s'\n", PL_tokenbuf); });
8676 /* if we're in a my(), we can't allow dynamics here.
8677 $foo'bar has already been turned into $foo::bar, so
8678 just check for colons.
8680 if it's a legal name, the OP is a PADANY.
8683 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8685 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8686 "variable %s in \"our\"",
8687 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8688 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8692 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8693 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8694 UTF ? SVf_UTF8 : 0);
8696 pl_yylval.opval = newOP(OP_PADANY, 0);
8697 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8698 UTF ? SVf_UTF8 : 0);
8704 build the ops for accesses to a my() variable.
8709 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8710 UTF ? SVf_UTF8 : 0);
8711 if (tmp != NOT_IN_PAD) {
8712 /* might be an "our" variable" */
8713 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8714 /* build ops for a bareword */
8715 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8716 HEK * const stashname = HvNAME_HEK(stash);
8717 SV * const sym = newSVhek(stashname);
8718 sv_catpvs(sym, "::");
8719 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8720 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8721 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8725 ? (GV_ADDMULTI | GV_ADDINEVAL)
8728 ((PL_tokenbuf[0] == '$') ? SVt_PV
8729 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8734 pl_yylval.opval = newOP(OP_PADANY, 0);
8735 pl_yylval.opval->op_targ = tmp;
8741 Whine if they've said @foo in a doublequoted string,
8742 and @foo isn't a variable we can find in the symbol
8745 if (ckWARN(WARN_AMBIGUOUS) &&
8746 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8747 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8748 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8749 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8750 /* DO NOT warn for @- and @+ */
8751 && !( PL_tokenbuf[2] == '\0' &&
8752 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8755 /* Downgraded from fatal to warning 20000522 mjd */
8756 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8757 "Possible unintended interpolation of %"SVf" in string",
8758 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8759 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8763 /* build ops for a bareword */
8764 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8765 newSVpvn_flags(PL_tokenbuf + 1,
8767 UTF ? SVf_UTF8 : 0 ));
8768 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8770 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8771 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8772 | ( UTF ? SVf_UTF8 : 0 ),
8773 ((PL_tokenbuf[0] == '$') ? SVt_PV
8774 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8780 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8784 PERL_ARGS_ASSERT_CHECKCOMMA;
8786 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8787 if (ckWARN(WARN_SYNTAX)) {
8790 for (w = s+2; *w && level; w++) {
8798 /* the list of chars below is for end of statements or
8799 * block / parens, boolean operators (&&, ||, //) and branch
8800 * constructs (or, and, if, until, unless, while, err, for).
8801 * Not a very solid hack... */
8802 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8803 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8804 "%s (...) interpreted as function",name);
8807 while (s < PL_bufend && isSPACE(*s))
8811 while (s < PL_bufend && isSPACE(*s))
8813 if (isIDFIRST_lazy_if(s,UTF)) {
8814 const char * const w = s;
8815 s += UTF ? UTF8SKIP(s) : 1;
8816 while (isALNUM_lazy_if(s,UTF))
8817 s += UTF ? UTF8SKIP(s) : 1;
8818 while (s < PL_bufend && isSPACE(*s))
8822 if (keyword(w, s - w, 0))
8825 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8826 if (gv && GvCVu(gv))
8828 Perl_croak(aTHX_ "No comma allowed after %s", what);
8833 /* Either returns sv, or mortalizes sv and returns a new SV*.
8834 Best used as sv=new_constant(..., sv, ...).
8835 If s, pv are NULL, calls subroutine with one argument,
8836 and type is used with error messages only. */
8839 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8840 SV *sv, SV *pv, const char *type, STRLEN typelen)
8843 HV * table = GvHV(PL_hintgv); /* ^H */
8847 const char *why1 = "", *why2 = "", *why3 = "";
8849 PERL_ARGS_ASSERT_NEW_CONSTANT;
8851 /* charnames doesn't work well if there have been errors found */
8852 if (PL_error_count > 0 && strEQ(key,"charnames"))
8853 return &PL_sv_undef;
8856 || ! (PL_hints & HINT_LOCALIZE_HH)
8857 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8862 /* Here haven't found what we're looking for. If it is charnames,
8863 * perhaps it needs to be loaded. Try doing that before giving up */
8864 if (strEQ(key,"charnames")) {
8865 Perl_load_module(aTHX_
8867 newSVpvs("_charnames"),
8868 /* version parameter; no need to specify it, as if
8869 * we get too early a version, will fail anyway,
8870 * not being able to find '_charnames' */
8876 table = GvHV(PL_hintgv);
8878 && (PL_hints & HINT_LOCALIZE_HH)
8879 && (cvp = hv_fetch(table, key, keylen, FALSE))
8885 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8886 msg = Perl_newSVpvf(aTHX_
8887 "Constant(%s) unknown", (type ? type: "undef"));
8892 why3 = "} is not defined";
8894 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8895 (type ? type: "undef"), why1, why2, why3);
8897 yyerror(SvPVX_const(msg));
8902 sv_2mortal(sv); /* Parent created it permanently */
8905 pv = newSVpvn_flags(s, len, SVs_TEMP);
8907 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8909 typesv = &PL_sv_undef;
8911 PUSHSTACKi(PERLSI_OVERLOAD);
8923 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8927 /* Check the eval first */
8928 if (!PL_in_eval && SvTRUE(ERRSV)) {
8929 sv_catpvs(ERRSV, "Propagated");
8930 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8932 res = SvREFCNT_inc_simple(sv);
8936 SvREFCNT_inc_simple_void(res);
8945 why1 = "Call to &{$^H{";
8947 why3 = "}} did not return a defined value";
8955 /* Returns a NUL terminated string, with the length of the string written to
8959 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8963 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8965 PERL_ARGS_ASSERT_SCAN_WORD;
8969 Perl_croak(aTHX_ ident_too_long);
8970 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
8972 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8977 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8981 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8982 char *t = s + UTF8SKIP(s);
8984 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8988 Perl_croak(aTHX_ ident_too_long);
8989 Copy(s, d, len, char);
9002 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9005 char *bracket = NULL;
9008 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9010 PERL_ARGS_ASSERT_SCAN_IDENT;
9015 while (isDIGIT(*s)) {
9017 Perl_croak(aTHX_ ident_too_long);
9024 Perl_croak(aTHX_ ident_too_long);
9025 if (isALNUM(*s)) /* UTF handled below */
9027 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9032 else if (*s == ':' && s[1] == ':') {
9036 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9037 char *t = s + UTF8SKIP(s);
9038 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9040 if (d + (t - s) > e)
9041 Perl_croak(aTHX_ ident_too_long);
9042 Copy(s, d, t - s, char);
9053 if (PL_lex_state != LEX_NORMAL)
9054 PL_lex_state = LEX_INTERPENDMAYBE;
9057 if (*s == '$' && s[1] &&
9058 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9068 const STRLEN skip = UTF8SKIP(s);
9071 for ( i = 0; i < skip; i++ )
9079 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9083 else if (ck_uni && !bracket)
9086 if (isSPACE(s[-1])) {
9088 const char ch = *s++;
9089 if (!SPACE_OR_TAB(ch)) {
9095 if (isIDFIRST_lazy_if(d,UTF)) {
9099 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9100 end += UTF8SKIP(end);
9101 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9102 end += UTF8SKIP(end);
9104 Copy(s, d, end - s, char);
9109 while ((isALNUM(*s) || *s == ':') && d < e)
9112 Perl_croak(aTHX_ ident_too_long);
9115 while (s < send && SPACE_OR_TAB(*s))
9117 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9118 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9119 const char * const brack =
9121 ((*s == '[') ? "[...]" : "{...}");
9122 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9123 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9124 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9125 funny, dest, brack, funny, dest, brack);
9128 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9129 PL_lex_allbrackets++;
9133 /* Handle extended ${^Foo} variables
9134 * 1999-02-27 mjd-perl-patch@plover.com */
9135 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9139 while (isALNUM(*s) && d < e) {
9143 Perl_croak(aTHX_ ident_too_long);
9148 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9149 PL_lex_state = LEX_INTERPEND;
9152 if (PL_lex_state == LEX_NORMAL) {
9153 if (ckWARN(WARN_AMBIGUOUS) &&
9154 (keyword(dest, d - dest, 0)
9155 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9157 SV *tmp = newSVpvn_flags( dest, d - dest,
9158 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9161 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9162 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9163 funny, tmp, funny, tmp);
9168 s = bracket; /* let the parser handle it */
9172 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9173 PL_lex_state = LEX_INTERPEND;
9178 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9180 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9181 * the parse starting at 's', based on the subset that are valid in this
9182 * context input to this routine in 'valid_flags'. Advances s. Returns
9183 * TRUE if the input should be treated as a valid flag, so the next char
9184 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9185 * first call on the current regex. This routine will set it to any
9186 * charset modifier found. The caller shouldn't change it. This way,
9187 * another charset modifier encountered in the parse can be detected as an
9188 * error, as we have decided to allow only one */
9191 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9193 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9194 if (isALNUM_lazy_if(*s, UTF)) {
9195 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9196 UTF ? SVf_UTF8 : 0);
9198 /* Pretend that it worked, so will continue processing before
9207 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9208 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9209 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9210 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9211 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9212 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9213 case LOCALE_PAT_MOD:
9215 goto multiple_charsets;
9217 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9220 case UNICODE_PAT_MOD:
9222 goto multiple_charsets;
9224 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9227 case ASCII_RESTRICT_PAT_MOD:
9229 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9233 /* Error if previous modifier wasn't an 'a', but if it was, see
9234 * if, and accept, a second occurrence (only) */
9236 || get_regex_charset(*pmfl)
9237 != REGEX_ASCII_RESTRICTED_CHARSET)
9239 goto multiple_charsets;
9241 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9245 case DEPENDS_PAT_MOD:
9247 goto multiple_charsets;
9249 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9258 if (*charset != c) {
9259 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9261 else if (c == 'a') {
9262 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9265 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9268 /* Pretend that it worked, so will continue processing before dieing */
9274 S_scan_pat(pTHX_ char *start, I32 type)
9278 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9279 const char * const valid_flags =
9280 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9281 char charset = '\0'; /* character set modifier */
9286 PERL_ARGS_ASSERT_SCAN_PAT;
9288 /* this was only needed for the initial scan_str; set it to false
9289 * so that any (?{}) code blocks etc are parsed normally */
9290 PL_reg_state.re_reparsing = FALSE;
9292 const char * const delimiter = skipspace(start);
9296 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9297 : "Search pattern not terminated" ));
9300 pm = (PMOP*)newPMOP(type, 0);
9301 if (PL_multi_open == '?') {
9302 /* This is the only point in the code that sets PMf_ONCE: */
9303 pm->op_pmflags |= PMf_ONCE;
9305 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9306 allows us to restrict the list needed by reset to just the ??
9308 assert(type != OP_TRANS);
9310 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9313 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9316 elements = mg->mg_len / sizeof(PMOP**);
9317 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9318 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9319 mg->mg_len = elements * sizeof(PMOP**);
9320 PmopSTASH_set(pm,PL_curstash);
9327 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9328 * anon CV. False positives like qr/[(?{]/ are harmless */
9330 if (type == OP_QR) {
9332 char *e, *p = SvPV(PL_lex_stuff, len);
9334 for (; p < e; p++) {
9335 if (p[0] == '(' && p[1] == '?'
9336 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9338 pm->op_pmflags |= PMf_HAS_CV;
9342 pm->op_pmflags |= PMf_IS_QR;
9345 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9347 if (PL_madskills && modstart != s) {
9348 SV* tmptoken = newSVpvn(modstart, s - modstart);
9349 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9352 /* issue a warning if /c is specified,but /g is not */
9353 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9355 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9356 "Use of /c modifier is meaningless without /g" );
9359 PL_lex_op = (OP*)pm;
9360 pl_yylval.ival = OP_MATCH;
9365 S_scan_subst(pTHX_ char *start)
9372 char charset = '\0'; /* character set modifier */
9377 PERL_ARGS_ASSERT_SCAN_SUBST;
9379 pl_yylval.ival = OP_NULL;
9381 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9384 Perl_croak(aTHX_ "Substitution pattern not terminated");
9386 if (s[-1] == PL_multi_open)
9390 CURMAD('q', PL_thisopen);
9391 CURMAD('_', PL_thiswhite);
9392 CURMAD('E', PL_thisstuff);
9393 CURMAD('Q', PL_thisclose);
9394 PL_realtokenstart = s - SvPVX(PL_linestr);
9398 first_start = PL_multi_start;
9399 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9402 SvREFCNT_dec(PL_lex_stuff);
9403 PL_lex_stuff = NULL;
9405 Perl_croak(aTHX_ "Substitution replacement not terminated");
9407 PL_multi_start = first_start; /* so whole substitution is taken together */
9409 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9413 CURMAD('z', PL_thisopen);
9414 CURMAD('R', PL_thisstuff);
9415 CURMAD('Z', PL_thisclose);
9421 if (*s == EXEC_PAT_MOD) {
9425 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9434 curmad('m', newSVpvn(modstart, s - modstart));
9435 append_madprops(PL_thismad, (OP*)pm, 0);
9439 if ((pm->op_pmflags & PMf_CONTINUE)) {
9440 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9444 SV * const repl = newSVpvs("");
9447 pm->op_pmflags |= PMf_EVAL;
9450 sv_catpvs(repl, "eval ");
9452 sv_catpvs(repl, "do ");
9454 sv_catpvs(repl, "{");
9455 sv_catsv(repl, PL_sublex_info.repl);
9456 sv_catpvs(repl, "}");
9458 SvREFCNT_dec(PL_sublex_info.repl);
9459 PL_sublex_info.repl = repl;
9462 PL_lex_op = (OP*)pm;
9463 pl_yylval.ival = OP_SUBST;
9468 S_scan_trans(pTHX_ char *start)
9476 bool nondestruct = 0;
9481 PERL_ARGS_ASSERT_SCAN_TRANS;
9483 pl_yylval.ival = OP_NULL;
9485 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9487 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9489 if (s[-1] == PL_multi_open)
9493 CURMAD('q', PL_thisopen);
9494 CURMAD('_', PL_thiswhite);
9495 CURMAD('E', PL_thisstuff);
9496 CURMAD('Q', PL_thisclose);
9497 PL_realtokenstart = s - SvPVX(PL_linestr);
9501 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9504 SvREFCNT_dec(PL_lex_stuff);
9505 PL_lex_stuff = NULL;
9507 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9510 CURMAD('z', PL_thisopen);
9511 CURMAD('R', PL_thisstuff);
9512 CURMAD('Z', PL_thisclose);
9515 complement = del = squash = 0;
9522 complement = OPpTRANS_COMPLEMENT;
9525 del = OPpTRANS_DELETE;
9528 squash = OPpTRANS_SQUASH;
9540 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9541 o->op_private &= ~OPpTRANS_ALL;
9542 o->op_private |= del|squash|complement|
9543 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9544 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9547 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9552 curmad('m', newSVpvn(modstart, s - modstart));
9553 append_madprops(PL_thismad, o, 0);
9562 Takes a pointer to the first < in <<FOO.
9563 Returns a pointer to the byte following <<FOO.
9565 This function scans a heredoc, which involves different methods
9566 depending on whether we are in a string eval, quoted construct, etc.
9567 This is because PL_linestr could containing a single line of input, or
9568 a whole string being evalled, or the contents of the current quote-
9571 The two basic methods are:
9572 - Steal lines from the input stream
9573 - Scan the heredoc in PL_linestr and remove it therefrom
9575 In a file scope or filtered eval, the first method is used; in a
9576 string eval, the second.
9578 In a quote-like operator, we have to choose between the two,
9579 depending on where we can find a newline. We peek into outer lex-
9580 ing scopes until we find one with a newline in it. If we reach the
9581 outermost lexing scope and it is a file, we use the stream method.
9582 Otherwise it is treated as an eval.
9586 S_scan_heredoc(pTHX_ register char *s)
9589 I32 op_type = OP_SCALAR;
9596 const bool infile = PL_rsfp || PL_parser->filtered;
9597 LEXSHARED *shared = PL_parser->lex_shared;
9599 I32 stuffstart = s - SvPVX(PL_linestr);
9602 PL_realtokenstart = -1;
9605 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9608 d = PL_tokenbuf + 1;
9609 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9610 *PL_tokenbuf = '\n';
9612 while (SPACE_OR_TAB(*peek))
9614 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9617 s = delimcpy(d, e, s, PL_bufend, term, &len);
9619 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9625 /* <<\FOO is equivalent to <<'FOO' */
9629 if (!isALNUM_lazy_if(s,UTF))
9630 deprecate("bare << to mean <<\"\"");
9631 for (; isALNUM_lazy_if(s,UTF); s++) {
9636 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9637 Perl_croak(aTHX_ "Delimiter for here document is too long");
9640 len = d - PL_tokenbuf;
9644 tstart = PL_tokenbuf + 1;
9645 PL_thisclose = newSVpvn(tstart, len - 1);
9646 tstart = SvPVX(PL_linestr) + stuffstart;
9647 PL_thisopen = newSVpvn(tstart, s - tstart);
9648 stuffstart = s - SvPVX(PL_linestr);
9651 #ifndef PERL_STRICT_CR
9652 d = strchr(s, '\r');
9654 char * const olds = s;
9656 while (s < PL_bufend) {
9662 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9671 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9677 tstart = SvPVX(PL_linestr) + stuffstart;
9679 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9681 PL_thisstuff = newSVpvn(tstart, s - tstart);
9684 stuffstart = s - SvPVX(PL_linestr);
9687 tmpstr = newSV_type(SVt_PVIV);
9691 SvIV_set(tmpstr, -1);
9693 else if (term == '`') {
9694 op_type = OP_BACKTICK;
9695 SvIV_set(tmpstr, '\\');
9698 PL_multi_start = CopLINE(PL_curcop) + 1;
9699 PL_multi_open = PL_multi_close = '<';
9700 /* inside a string eval or quote-like operator */
9701 if (!infile || PL_lex_inwhat) {
9704 char * const olds = s;
9705 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9706 /* These two fields are not set until an inner lexing scope is
9707 entered. But we need them set here. */
9708 shared->ls_bufptr = s;
9709 shared->ls_linestr = PL_linestr;
9711 /* Look for a newline. If the current buffer does not have one,
9712 peek into the line buffer of the parent lexing scope, going
9713 up as many levels as necessary to find one with a newline
9716 while (!(s = (char *)memchr(
9717 (void *)shared->ls_bufptr, '\n',
9718 SvEND(shared->ls_linestr)-shared->ls_bufptr
9720 shared = shared->ls_prev;
9721 /* shared is only null if we have gone beyond the outermost
9722 lexing scope. In a file, we will have broken out of the
9723 loop in the previous iteration. In an eval, the string buf-
9724 fer ends with "\n;", so the while condition below will have
9725 evaluated to false. So shared can never be null. */
9727 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9728 most lexing scope. In a file, shared->ls_linestr at that
9729 level is just one line, so there is no body to steal. */
9730 if (infile && !shared->ls_prev) {
9736 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9739 linestr = shared->ls_linestr;
9740 bufend = SvEND(linestr);
9742 while (s < bufend &&
9743 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9745 ++shared->herelines;
9750 sv_setpvn(tmpstr,d+1,s-d);
9754 sv_catpvn(PL_thisstuff, d + 1, s - d);
9756 PL_thisstuff = newSVpvn(d + 1, s - d);
9757 stuffstart = s - SvPVX(PL_linestr);
9761 /* the preceding stmt passes a newline */
9762 shared->herelines++;
9764 /* s now points to the newline after the heredoc terminator.
9765 d points to the newline before the body of the heredoc.
9768 /* We are going to modify linestr in place here, so set
9769 aside copies of the string if necessary for re-evals or
9771 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9772 check shared->re_eval_str. */
9773 if (shared->re_eval_start || shared->re_eval_str) {
9774 /* Set aside the rest of the regexp */
9775 if (!shared->re_eval_str)
9776 shared->re_eval_str =
9777 newSVpvn(shared->re_eval_start,
9778 bufend - shared->re_eval_start);
9779 shared->re_eval_start -= s-d;
9781 if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9782 && cx->blk_eval.cur_text == linestr) {
9783 cx->blk_eval.cur_text = newSVsv(linestr);
9784 SvSCREAM_on(cx->blk_eval.cur_text);
9786 /* Copy everything from s onwards back to d. */
9787 Move(s,d,bufend-s + 1,char);
9788 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9789 /* Setting PL_bufend only applies when we have not dug deeper
9790 into other scopes, because sublex_done sets PL_bufend to
9791 SvEND(PL_linestr). */
9792 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9799 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9800 term = PL_tokenbuf[1];
9802 linestr_save = PL_linestr; /* must restore this afterwards */
9803 d = s; /* and this */
9804 PL_linestr = newSVpvs("");
9805 PL_bufend = SvPVX(PL_linestr);
9809 tstart = SvPVX(PL_linestr) + stuffstart;
9811 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9813 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9816 PL_bufptr = PL_bufend;
9817 CopLINE_set(PL_curcop,
9818 PL_multi_start + shared->herelines);
9819 if (!lex_next_chunk(LEX_NO_TERM)
9820 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9821 SvREFCNT_dec(linestr_save);
9824 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9825 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9826 lex_grow_linestr(SvCUR(PL_linestr) + 2);
9827 sv_catpvs(PL_linestr, "\n\0");
9831 stuffstart = s - SvPVX(PL_linestr);
9833 shared->herelines++;
9834 PL_last_lop = PL_last_uni = NULL;
9835 #ifndef PERL_STRICT_CR
9836 if (PL_bufend - PL_linestart >= 2) {
9837 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9838 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9840 PL_bufend[-2] = '\n';
9842 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9844 else if (PL_bufend[-1] == '\r')
9845 PL_bufend[-1] = '\n';
9847 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9848 PL_bufend[-1] = '\n';
9850 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
9851 SvREFCNT_dec(PL_linestr);
9852 PL_linestr = linestr_save;
9853 PL_linestart = SvPVX(linestr_save);
9854 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9859 sv_catsv(tmpstr,PL_linestr);
9863 PL_multi_end = CopLINE(PL_curcop);
9864 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9865 SvPV_shrink_to_cur(tmpstr);
9868 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9870 else if (PL_encoding)
9871 sv_recode_to_utf8(tmpstr, PL_encoding);
9873 PL_lex_stuff = tmpstr;
9874 pl_yylval.ival = op_type;
9878 SvREFCNT_dec(tmpstr);
9879 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9880 missingterm(PL_tokenbuf + 1);
9884 takes: current position in input buffer
9885 returns: new position in input buffer
9886 side-effects: pl_yylval and lex_op are set.
9891 <FH> read from filehandle
9892 <pkg::FH> read from package qualified filehandle
9893 <pkg'FH> read from package qualified filehandle
9894 <$fh> read from filehandle in $fh
9900 S_scan_inputsymbol(pTHX_ char *start)
9903 char *s = start; /* current position in buffer */
9906 char *d = PL_tokenbuf; /* start of temp holding space */
9907 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9909 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9911 end = strchr(s, '\n');
9914 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9916 /* die if we didn't have space for the contents of the <>,
9917 or if it didn't end, or if we see a newline
9920 if (len >= (I32)sizeof PL_tokenbuf)
9921 Perl_croak(aTHX_ "Excessively long <> operator");
9923 Perl_croak(aTHX_ "Unterminated <> operator");
9928 Remember, only scalar variables are interpreted as filehandles by
9929 this code. Anything more complex (e.g., <$fh{$num}>) will be
9930 treated as a glob() call.
9931 This code makes use of the fact that except for the $ at the front,
9932 a scalar variable and a filehandle look the same.
9934 if (*d == '$' && d[1]) d++;
9936 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9937 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9938 d += UTF ? UTF8SKIP(d) : 1;
9940 /* If we've tried to read what we allow filehandles to look like, and
9941 there's still text left, then it must be a glob() and not a getline.
9942 Use scan_str to pull out the stuff between the <> and treat it
9943 as nothing more than a string.
9946 if (d - PL_tokenbuf != len) {
9947 pl_yylval.ival = OP_GLOB;
9948 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9950 Perl_croak(aTHX_ "Glob not terminated");
9954 bool readline_overriden = FALSE;
9957 /* we're in a filehandle read situation */
9960 /* turn <> into <ARGV> */
9962 Copy("ARGV",d,5,char);
9964 /* Check whether readline() is overriden */
9965 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9967 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9969 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9970 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9971 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9972 readline_overriden = TRUE;
9974 /* if <$fh>, create the ops to turn the variable into a
9978 /* try to find it in the pad for this block, otherwise find
9979 add symbol table ops
9981 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9982 if (tmp != NOT_IN_PAD) {
9983 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9984 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9985 HEK * const stashname = HvNAME_HEK(stash);
9986 SV * const sym = sv_2mortal(newSVhek(stashname));
9987 sv_catpvs(sym, "::");
9993 OP * const o = newOP(OP_PADSV, 0);
9995 PL_lex_op = readline_overriden
9996 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9997 op_append_elem(OP_LIST, o,
9998 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9999 : (OP*)newUNOP(OP_READLINE, 0, o);
10008 ? (GV_ADDMULTI | GV_ADDINEVAL)
10009 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10011 PL_lex_op = readline_overriden
10012 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10013 op_append_elem(OP_LIST,
10014 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10015 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10016 : (OP*)newUNOP(OP_READLINE, 0,
10017 newUNOP(OP_RV2SV, 0,
10018 newGVOP(OP_GV, 0, gv)));
10020 if (!readline_overriden)
10021 PL_lex_op->op_flags |= OPf_SPECIAL;
10022 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10023 pl_yylval.ival = OP_NULL;
10026 /* If it's none of the above, it must be a literal filehandle
10027 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10029 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10030 PL_lex_op = readline_overriden
10031 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10032 op_append_elem(OP_LIST,
10033 newGVOP(OP_GV, 0, gv),
10034 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10035 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10036 pl_yylval.ival = OP_NULL;
10045 takes: start position in buffer
10046 keep_quoted preserve \ on the embedded delimiter(s)
10047 keep_delims preserve the delimiters around the string
10048 re_reparse compiling a run-time /(?{})/:
10049 collapse // to /, and skip encoding src
10050 returns: position to continue reading from buffer
10051 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10052 updates the read buffer.
10054 This subroutine pulls a string out of the input. It is called for:
10055 q single quotes q(literal text)
10056 ' single quotes 'literal text'
10057 qq double quotes qq(interpolate $here please)
10058 " double quotes "interpolate $here please"
10059 qx backticks qx(/bin/ls -l)
10060 ` backticks `/bin/ls -l`
10061 qw quote words @EXPORT_OK = qw( func() $spam )
10062 m// regexp match m/this/
10063 s/// regexp substitute s/this/that/
10064 tr/// string transliterate tr/this/that/
10065 y/// string transliterate y/this/that/
10066 ($*@) sub prototypes sub foo ($)
10067 (stuff) sub attr parameters sub foo : attr(stuff)
10068 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10070 In most of these cases (all but <>, patterns and transliterate)
10071 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10072 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10073 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10076 It skips whitespace before the string starts, and treats the first
10077 character as the delimiter. If the delimiter is one of ([{< then
10078 the corresponding "close" character )]}> is used as the closing
10079 delimiter. It allows quoting of delimiters, and if the string has
10080 balanced delimiters ([{<>}]) it allows nesting.
10082 On success, the SV with the resulting string is put into lex_stuff or,
10083 if that is already non-NULL, into lex_repl. The second case occurs only
10084 when parsing the RHS of the special constructs s/// and tr/// (y///).
10085 For convenience, the terminating delimiter character is stuffed into
10090 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10093 SV *sv; /* scalar value: string */
10094 const char *tmps; /* temp string, used for delimiter matching */
10095 char *s = start; /* current position in the buffer */
10096 char term; /* terminating character */
10097 char *to; /* current position in the sv's data */
10098 I32 brackets = 1; /* bracket nesting level */
10099 bool has_utf8 = FALSE; /* is there any utf8 content? */
10100 I32 termcode; /* terminating char. code */
10101 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10102 STRLEN termlen; /* length of terminating string */
10103 int last_off = 0; /* last position for nesting bracket */
10109 PERL_ARGS_ASSERT_SCAN_STR;
10111 /* skip space before the delimiter */
10117 if (PL_realtokenstart >= 0) {
10118 stuffstart = PL_realtokenstart;
10119 PL_realtokenstart = -1;
10122 stuffstart = start - SvPVX(PL_linestr);
10124 /* mark where we are, in case we need to report errors */
10127 /* after skipping whitespace, the next character is the terminator */
10130 termcode = termstr[0] = term;
10134 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10135 Copy(s, termstr, termlen, U8);
10136 if (!UTF8_IS_INVARIANT(term))
10140 /* mark where we are */
10141 PL_multi_start = CopLINE(PL_curcop);
10142 PL_multi_open = term;
10144 /* find corresponding closing delimiter */
10145 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10146 termcode = termstr[0] = term = tmps[5];
10148 PL_multi_close = term;
10150 /* create a new SV to hold the contents. 79 is the SV's initial length.
10151 What a random number. */
10152 sv = newSV_type(SVt_PVIV);
10154 SvIV_set(sv, termcode);
10155 (void)SvPOK_only(sv); /* validate pointer */
10157 /* move past delimiter and try to read a complete string */
10159 sv_catpvn(sv, s, termlen);
10162 tstart = SvPVX(PL_linestr) + stuffstart;
10163 if (!PL_thisopen && !keep_delims) {
10164 PL_thisopen = newSVpvn(tstart, s - tstart);
10165 stuffstart = s - SvPVX(PL_linestr);
10169 if (PL_encoding && !UTF && !re_reparse) {
10173 int offset = s - SvPVX_const(PL_linestr);
10174 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10175 &offset, (char*)termstr, termlen);
10176 const char * const ns = SvPVX_const(PL_linestr) + offset;
10177 char * const svlast = SvEND(sv) - 1;
10179 for (; s < ns; s++) {
10180 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10181 COPLINE_INC_WITH_HERELINES;
10184 goto read_more_line;
10186 /* handle quoted delimiters */
10187 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10189 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10191 if ((svlast-1 - t) % 2) {
10192 if (!keep_quoted) {
10193 *(svlast-1) = term;
10195 SvCUR_set(sv, SvCUR(sv) - 1);
10200 if (PL_multi_open == PL_multi_close) {
10206 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10207 /* At here, all closes are "was quoted" one,
10208 so we don't check PL_multi_close. */
10210 if (!keep_quoted && *(t+1) == PL_multi_open)
10215 else if (*t == PL_multi_open)
10223 SvCUR_set(sv, w - SvPVX_const(sv));
10225 last_off = w - SvPVX(sv);
10226 if (--brackets <= 0)
10231 if (!keep_delims) {
10232 SvCUR_set(sv, SvCUR(sv) - 1);
10238 /* extend sv if need be */
10239 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10240 /* set 'to' to the next character in the sv's string */
10241 to = SvPVX(sv)+SvCUR(sv);
10243 /* if open delimiter is the close delimiter read unbridle */
10244 if (PL_multi_open == PL_multi_close) {
10245 for (; s < PL_bufend; s++,to++) {
10246 /* embedded newlines increment the current line number */
10247 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10248 COPLINE_INC_WITH_HERELINES;
10249 /* handle quoted delimiters */
10250 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10253 || (re_reparse && s[1] == '\\'))
10256 /* any other quotes are simply copied straight through */
10260 /* terminate when run out of buffer (the for() condition), or
10261 have found the terminator */
10262 else if (*s == term) {
10265 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10268 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10274 /* if the terminator isn't the same as the start character (e.g.,
10275 matched brackets), we have to allow more in the quoting, and
10276 be prepared for nested brackets.
10279 /* read until we run out of string, or we find the terminator */
10280 for (; s < PL_bufend; s++,to++) {
10281 /* embedded newlines increment the line count */
10282 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10283 COPLINE_INC_WITH_HERELINES;
10284 /* backslashes can escape the open or closing characters */
10285 if (*s == '\\' && s+1 < PL_bufend) {
10286 if (!keep_quoted &&
10287 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10292 /* allow nested opens and closes */
10293 else if (*s == PL_multi_close && --brackets <= 0)
10295 else if (*s == PL_multi_open)
10297 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10302 /* terminate the copied string and update the sv's end-of-string */
10304 SvCUR_set(sv, to - SvPVX_const(sv));
10307 * this next chunk reads more into the buffer if we're not done yet
10311 break; /* handle case where we are done yet :-) */
10313 #ifndef PERL_STRICT_CR
10314 if (to - SvPVX_const(sv) >= 2) {
10315 if ((to[-2] == '\r' && to[-1] == '\n') ||
10316 (to[-2] == '\n' && to[-1] == '\r'))
10320 SvCUR_set(sv, to - SvPVX_const(sv));
10322 else if (to[-1] == '\r')
10325 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10330 /* if we're out of file, or a read fails, bail and reset the current
10331 line marker so we can report where the unterminated string began
10334 if (PL_madskills) {
10335 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10337 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10339 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10342 COPLINE_INC_WITH_HERELINES;
10343 PL_bufptr = PL_bufend;
10344 if (!lex_next_chunk(0)) {
10346 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10355 /* at this point, we have successfully read the delimited string */
10357 if (!PL_encoding || UTF || re_reparse) {
10359 if (PL_madskills) {
10360 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10361 const int len = s - tstart;
10363 sv_catpvn(PL_thisstuff, tstart, len);
10365 PL_thisstuff = newSVpvn(tstart, len);
10366 if (!PL_thisclose && !keep_delims)
10367 PL_thisclose = newSVpvn(s,termlen);
10372 sv_catpvn(sv, s, termlen);
10377 if (PL_madskills) {
10378 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10379 const int len = s - tstart - termlen;
10381 sv_catpvn(PL_thisstuff, tstart, len);
10383 PL_thisstuff = newSVpvn(tstart, len);
10384 if (!PL_thisclose && !keep_delims)
10385 PL_thisclose = newSVpvn(s - termlen,termlen);
10389 if (has_utf8 || (PL_encoding && !re_reparse))
10392 PL_multi_end = CopLINE(PL_curcop);
10394 /* if we allocated too much space, give some back */
10395 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10396 SvLEN_set(sv, SvCUR(sv) + 1);
10397 SvPV_renew(sv, SvLEN(sv));
10400 /* decide whether this is the first or second quoted string we've read
10405 PL_sublex_info.repl = sv;
10413 takes: pointer to position in buffer
10414 returns: pointer to new position in buffer
10415 side-effects: builds ops for the constant in pl_yylval.op
10417 Read a number in any of the formats that Perl accepts:
10419 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10420 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10423 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10425 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10428 If it reads a number without a decimal point or an exponent, it will
10429 try converting the number to an integer and see if it can do so
10430 without loss of precision.
10434 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10437 const char *s = start; /* current position in buffer */
10438 char *d; /* destination in temp buffer */
10439 char *e; /* end of temp buffer */
10440 NV nv; /* number read, as a double */
10441 SV *sv = NULL; /* place to put the converted number */
10442 bool floatit; /* boolean: int or float? */
10443 const char *lastub = NULL; /* position of last underbar */
10444 static char const number_too_long[] = "Number too long";
10446 PERL_ARGS_ASSERT_SCAN_NUM;
10448 /* We use the first character to decide what type of number this is */
10452 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10454 /* if it starts with a 0, it could be an octal number, a decimal in
10455 0.13 disguise, or a hexadecimal number, or a binary number. */
10459 u holds the "number so far"
10460 shift the power of 2 of the base
10461 (hex == 4, octal == 3, binary == 1)
10462 overflowed was the number more than we can hold?
10464 Shift is used when we add a digit. It also serves as an "are
10465 we in octal/hex/binary?" indicator to disallow hex characters
10466 when in octal mode.
10471 bool overflowed = FALSE;
10472 bool just_zero = TRUE; /* just plain 0 or binary number? */
10473 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10474 static const char* const bases[5] =
10475 { "", "binary", "", "octal", "hexadecimal" };
10476 static const char* const Bases[5] =
10477 { "", "Binary", "", "Octal", "Hexadecimal" };
10478 static const char* const maxima[5] =
10480 "0b11111111111111111111111111111111",
10484 const char *base, *Base, *max;
10486 /* check for hex */
10487 if (s[1] == 'x' || s[1] == 'X') {
10491 } else if (s[1] == 'b' || s[1] == 'B') {
10496 /* check for a decimal in disguise */
10497 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10499 /* so it must be octal */
10506 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10507 "Misplaced _ in number");
10511 base = bases[shift];
10512 Base = Bases[shift];
10513 max = maxima[shift];
10515 /* read the rest of the number */
10517 /* x is used in the overflow test,
10518 b is the digit we're adding on. */
10523 /* if we don't mention it, we're done */
10527 /* _ are ignored -- but warned about if consecutive */
10529 if (lastub && s == lastub + 1)
10530 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10531 "Misplaced _ in number");
10535 /* 8 and 9 are not octal */
10536 case '8': case '9':
10538 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10542 case '2': case '3': case '4':
10543 case '5': case '6': case '7':
10545 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10548 case '0': case '1':
10549 b = *s++ & 15; /* ASCII digit -> value of digit */
10553 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10554 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10555 /* make sure they said 0x */
10558 b = (*s++ & 7) + 9;
10560 /* Prepare to put the digit we have onto the end
10561 of the number so far. We check for overflows.
10567 x = u << shift; /* make room for the digit */
10569 if ((x >> shift) != u
10570 && !(PL_hints & HINT_NEW_BINARY)) {
10573 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10574 "Integer overflow in %s number",
10577 u = x | b; /* add the digit to the end */
10580 n *= nvshift[shift];
10581 /* If an NV has not enough bits in its
10582 * mantissa to represent an UV this summing of
10583 * small low-order numbers is a waste of time
10584 * (because the NV cannot preserve the
10585 * low-order bits anyway): we could just
10586 * remember when did we overflow and in the
10587 * end just multiply n by the right
10595 /* if we get here, we had success: make a scalar value from
10600 /* final misplaced underbar check */
10601 if (s[-1] == '_') {
10602 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10606 if (n > 4294967295.0)
10607 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10608 "%s number > %s non-portable",
10614 if (u > 0xffffffff)
10615 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10616 "%s number > %s non-portable",
10621 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10622 sv = new_constant(start, s - start, "integer",
10623 sv, NULL, NULL, 0);
10624 else if (PL_hints & HINT_NEW_BINARY)
10625 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10630 handle decimal numbers.
10631 we're also sent here when we read a 0 as the first digit
10633 case '1': case '2': case '3': case '4': case '5':
10634 case '6': case '7': case '8': case '9': case '.':
10637 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10640 /* read next group of digits and _ and copy into d */
10641 while (isDIGIT(*s) || *s == '_') {
10642 /* skip underscores, checking for misplaced ones
10646 if (lastub && s == lastub + 1)
10647 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10648 "Misplaced _ in number");
10652 /* check for end of fixed-length buffer */
10654 Perl_croak(aTHX_ number_too_long);
10655 /* if we're ok, copy the character */
10660 /* final misplaced underbar check */
10661 if (lastub && s == lastub + 1) {
10662 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10665 /* read a decimal portion if there is one. avoid
10666 3..5 being interpreted as the number 3. followed
10669 if (*s == '.' && s[1] != '.') {
10674 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10675 "Misplaced _ in number");
10679 /* copy, ignoring underbars, until we run out of digits.
10681 for (; isDIGIT(*s) || *s == '_'; s++) {
10682 /* fixed length buffer check */
10684 Perl_croak(aTHX_ number_too_long);
10686 if (lastub && s == lastub + 1)
10687 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10688 "Misplaced _ in number");
10694 /* fractional part ending in underbar? */
10695 if (s[-1] == '_') {
10696 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10697 "Misplaced _ in number");
10699 if (*s == '.' && isDIGIT(s[1])) {
10700 /* oops, it's really a v-string, but without the "v" */
10706 /* read exponent part, if present */
10707 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10711 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10712 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10714 /* stray preinitial _ */
10716 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10717 "Misplaced _ in number");
10721 /* allow positive or negative exponent */
10722 if (*s == '+' || *s == '-')
10725 /* stray initial _ */
10727 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10728 "Misplaced _ in number");
10732 /* read digits of exponent */
10733 while (isDIGIT(*s) || *s == '_') {
10736 Perl_croak(aTHX_ number_too_long);
10740 if (((lastub && s == lastub + 1) ||
10741 (!isDIGIT(s[1]) && s[1] != '_')))
10742 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10743 "Misplaced _ in number");
10751 We try to do an integer conversion first if no characters
10752 indicating "float" have been found.
10757 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10759 if (flags == IS_NUMBER_IN_UV) {
10761 sv = newSViv(uv); /* Prefer IVs over UVs. */
10764 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10765 if (uv <= (UV) IV_MIN)
10766 sv = newSViv(-(IV)uv);
10773 /* terminate the string */
10775 nv = Atof(PL_tokenbuf);
10780 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10781 const char *const key = floatit ? "float" : "integer";
10782 const STRLEN keylen = floatit ? 5 : 7;
10783 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10784 key, keylen, sv, NULL, NULL, 0);
10788 /* if it starts with a v, it could be a v-string */
10791 sv = newSV(5); /* preallocate storage space */
10792 s = scan_vstring(s, PL_bufend, sv);
10796 /* make the op for the constant and return */
10799 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10801 lvalp->opval = NULL;
10807 S_scan_formline(pTHX_ register char *s)
10812 SV * const stuff = newSVpvs("");
10813 bool needargs = FALSE;
10814 bool eofmt = FALSE;
10816 char *tokenstart = s;
10817 SV* savewhite = NULL;
10819 if (PL_madskills) {
10820 savewhite = PL_thiswhite;
10825 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10827 while (!needargs) {
10830 #ifdef PERL_STRICT_CR
10831 while (SPACE_OR_TAB(*t))
10834 while (SPACE_OR_TAB(*t) || *t == '\r')
10837 if (*t == '\n' || t == PL_bufend) {
10842 eol = (char *) memchr(s,'\n',PL_bufend-s);
10846 for (t = s; t < eol; t++) {
10847 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10849 goto enough; /* ~~ must be first line in formline */
10851 if (*t == '@' || *t == '^')
10855 sv_catpvn(stuff, s, eol-s);
10856 #ifndef PERL_STRICT_CR
10857 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10858 char *end = SvPVX(stuff) + SvCUR(stuff);
10861 SvCUR_set(stuff, SvCUR(stuff) - 1);
10869 if ((PL_rsfp || PL_parser->filtered)
10870 && PL_parser->form_lex_state == LEX_NORMAL) {
10873 if (PL_madskills) {
10875 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10877 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10880 PL_bufptr = PL_bufend;
10881 COPLINE_INC_WITH_HERELINES;
10882 got_some = lex_next_chunk(0);
10883 CopLINE_dec(PL_curcop);
10886 tokenstart = PL_bufptr;
10894 if (!SvCUR(stuff) || needargs)
10895 PL_lex_state = PL_parser->form_lex_state;
10896 if (SvCUR(stuff)) {
10897 PL_expect = XSTATE;
10899 start_force(PL_curforce);
10900 NEXTVAL_NEXTTOKE.ival = 0;
10901 force_next(FORMLBRACK);
10904 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10906 else if (PL_encoding)
10907 sv_recode_to_utf8(stuff, PL_encoding);
10909 start_force(PL_curforce);
10910 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10914 SvREFCNT_dec(stuff);
10916 PL_lex_formbrack = 0;
10919 if (PL_madskills) {
10921 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10923 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10924 PL_thiswhite = savewhite;
10931 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10934 const I32 oldsavestack_ix = PL_savestack_ix;
10935 CV* const outsidecv = PL_compcv;
10937 SAVEI32(PL_subline);
10938 save_item(PL_subname);
10939 SAVESPTR(PL_compcv);
10941 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10942 CvFLAGS(PL_compcv) |= flags;
10944 PL_subline = CopLINE(PL_curcop);
10945 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10946 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10947 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10948 if (outsidecv && CvPADLIST(outsidecv))
10949 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10951 return oldsavestack_ix;
10955 #pragma segment Perl_yylex
10958 S_yywarn(pTHX_ const char *const s, U32 flags)
10962 PERL_ARGS_ASSERT_YYWARN;
10964 PL_in_eval |= EVAL_WARNONLY;
10965 yyerror_pv(s, flags);
10966 PL_in_eval &= ~EVAL_WARNONLY;
10971 Perl_yyerror(pTHX_ const char *const s)
10973 PERL_ARGS_ASSERT_YYERROR;
10974 return yyerror_pvn(s, strlen(s), 0);
10978 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10980 PERL_ARGS_ASSERT_YYERROR_PV;
10981 return yyerror_pvn(s, strlen(s), flags);
10985 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10988 const char *context = NULL;
10991 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10992 int yychar = PL_parser->yychar;
10993 U32 is_utf8 = flags & SVf_UTF8;
10995 PERL_ARGS_ASSERT_YYERROR_PVN;
10997 if (!yychar || (yychar == ';' && !PL_rsfp))
10998 sv_catpvs(where_sv, "at EOF");
10999 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11000 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11001 PL_oldbufptr != PL_bufptr) {
11004 The code below is removed for NetWare because it abends/crashes on NetWare
11005 when the script has error such as not having the closing quotes like:
11006 if ($var eq "value)
11007 Checking of white spaces is anyway done in NetWare code.
11010 while (isSPACE(*PL_oldoldbufptr))
11013 context = PL_oldoldbufptr;
11014 contlen = PL_bufptr - PL_oldoldbufptr;
11016 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11017 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11020 The code below is removed for NetWare because it abends/crashes on NetWare
11021 when the script has error such as not having the closing quotes like:
11022 if ($var eq "value)
11023 Checking of white spaces is anyway done in NetWare code.
11026 while (isSPACE(*PL_oldbufptr))
11029 context = PL_oldbufptr;
11030 contlen = PL_bufptr - PL_oldbufptr;
11032 else if (yychar > 255)
11033 sv_catpvs(where_sv, "next token ???");
11034 else if (yychar == -2) { /* YYEMPTY */
11035 if (PL_lex_state == LEX_NORMAL ||
11036 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11037 sv_catpvs(where_sv, "at end of line");
11038 else if (PL_lex_inpat)
11039 sv_catpvs(where_sv, "within pattern");
11041 sv_catpvs(where_sv, "within string");
11044 sv_catpvs(where_sv, "next char ");
11046 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11047 else if (isPRINT_LC(yychar)) {
11048 const char string = yychar;
11049 sv_catpvn(where_sv, &string, 1);
11052 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11054 msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
11055 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11056 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11058 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11059 SVfARG(newSVpvn_flags(context, contlen,
11060 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11062 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11063 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11064 Perl_sv_catpvf(aTHX_ msg,
11065 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11066 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11069 if (PL_in_eval & EVAL_WARNONLY) {
11070 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11074 if (PL_error_count >= 10) {
11075 if (PL_in_eval && SvCUR(ERRSV))
11076 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11077 SVfARG(ERRSV), OutCopFILE(PL_curcop));
11079 Perl_croak(aTHX_ "%s has too many errors.\n",
11080 OutCopFILE(PL_curcop));
11083 PL_in_my_stash = NULL;
11087 #pragma segment Main
11091 S_swallow_bom(pTHX_ U8 *s)
11094 const STRLEN slen = SvCUR(PL_linestr);
11096 PERL_ARGS_ASSERT_SWALLOW_BOM;
11100 if (s[1] == 0xFE) {
11101 /* UTF-16 little-endian? (or UTF-32LE?) */
11102 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11103 /* diag_listed_as: Unsupported script encoding %s */
11104 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11105 #ifndef PERL_NO_UTF16_FILTER
11106 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11108 if (PL_bufend > (char*)s) {
11109 s = add_utf16_textfilter(s, TRUE);
11112 /* diag_listed_as: Unsupported script encoding %s */
11113 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11118 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11119 #ifndef PERL_NO_UTF16_FILTER
11120 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11122 if (PL_bufend > (char *)s) {
11123 s = add_utf16_textfilter(s, FALSE);
11126 /* diag_listed_as: Unsupported script encoding %s */
11127 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11132 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11133 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11134 s += 3; /* UTF-8 */
11140 if (s[2] == 0xFE && s[3] == 0xFF) {
11141 /* UTF-32 big-endian */
11142 /* diag_listed_as: Unsupported script encoding %s */
11143 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11146 else if (s[2] == 0 && s[3] != 0) {
11149 * are a good indicator of UTF-16BE. */
11150 #ifndef PERL_NO_UTF16_FILTER
11151 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11152 s = add_utf16_textfilter(s, FALSE);
11154 /* diag_listed_as: Unsupported script encoding %s */
11155 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11161 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11162 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11163 s += 4; /* UTF-8 */
11169 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11172 * are a good indicator of UTF-16LE. */
11173 #ifndef PERL_NO_UTF16_FILTER
11174 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11175 s = add_utf16_textfilter(s, TRUE);
11177 /* diag_listed_as: Unsupported script encoding %s */
11178 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11186 #ifndef PERL_NO_UTF16_FILTER
11188 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11191 SV *const filter = FILTER_DATA(idx);
11192 /* We re-use this each time round, throwing the contents away before we
11194 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11195 SV *const utf8_buffer = filter;
11196 IV status = IoPAGE(filter);
11197 const bool reverse = cBOOL(IoLINES(filter));
11200 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11202 /* As we're automatically added, at the lowest level, and hence only called
11203 from this file, we can be sure that we're not called in block mode. Hence
11204 don't bother writing code to deal with block mode. */
11206 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11209 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11211 DEBUG_P(PerlIO_printf(Perl_debug_log,
11212 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11213 FPTR2DPTR(void *, S_utf16_textfilter),
11214 reverse ? 'l' : 'b', idx, maxlen, status,
11215 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11222 /* First, look in our buffer of existing UTF-8 data: */
11223 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11227 } else if (status == 0) {
11229 IoPAGE(filter) = 0;
11230 nl = SvEND(utf8_buffer);
11233 STRLEN got = nl - SvPVX(utf8_buffer);
11234 /* Did we have anything to append? */
11236 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11237 /* Everything else in this code works just fine if SVp_POK isn't
11238 set. This, however, needs it, and we need it to work, else
11239 we loop infinitely because the buffer is never consumed. */
11240 sv_chop(utf8_buffer, nl);
11244 /* OK, not a complete line there, so need to read some more UTF-16.
11245 Read an extra octect if the buffer currently has an odd number. */
11249 if (SvCUR(utf16_buffer) >= 2) {
11250 /* Location of the high octet of the last complete code point.
11251 Gosh, UTF-16 is a pain. All the benefits of variable length,
11252 *coupled* with all the benefits of partial reads and
11254 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11255 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11257 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11261 /* We have the first half of a surrogate. Read more. */
11262 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11265 status = FILTER_READ(idx + 1, utf16_buffer,
11266 160 + (SvCUR(utf16_buffer) & 1));
11267 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11268 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11271 IoPAGE(filter) = status;
11276 chars = SvCUR(utf16_buffer) >> 1;
11277 have = SvCUR(utf8_buffer);
11278 SvGROW(utf8_buffer, have + chars * 3 + 1);
11281 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11282 (U8*)SvPVX_const(utf8_buffer) + have,
11283 chars * 2, &newlen);
11285 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11286 (U8*)SvPVX_const(utf8_buffer) + have,
11287 chars * 2, &newlen);
11289 SvCUR_set(utf8_buffer, have + newlen);
11292 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11293 it's private to us, and utf16_to_utf8{,reversed} take a
11294 (pointer,length) pair, rather than a NUL-terminated string. */
11295 if(SvCUR(utf16_buffer) & 1) {
11296 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11297 SvCUR_set(utf16_buffer, 1);
11299 SvCUR_set(utf16_buffer, 0);
11302 DEBUG_P(PerlIO_printf(Perl_debug_log,
11303 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11305 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11306 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11311 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11313 SV *filter = filter_add(S_utf16_textfilter, NULL);
11315 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11317 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11318 sv_setpvs(filter, "");
11319 IoLINES(filter) = reversed;
11320 IoPAGE(filter) = 1; /* Not EOF */
11322 /* Sadly, we have to return a valid pointer, come what may, so we have to
11323 ignore any error return from this. */
11324 SvCUR_set(PL_linestr, 0);
11325 if (FILTER_READ(0, PL_linestr, 0)) {
11326 SvUTF8_on(PL_linestr);
11328 SvUTF8_on(PL_linestr);
11330 PL_bufend = SvEND(PL_linestr);
11331 return (U8*)SvPVX(PL_linestr);
11336 Returns a pointer to the next character after the parsed
11337 vstring, as well as updating the passed in sv.
11339 Function must be called like
11342 s = scan_vstring(s,e,sv);
11344 where s and e are the start and end of the string.
11345 The sv should already be large enough to store the vstring
11346 passed in, for performance reasons.
11351 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11354 const char *pos = s;
11355 const char *start = s;
11357 PERL_ARGS_ASSERT_SCAN_VSTRING;
11359 if (*pos == 'v') pos++; /* get past 'v' */
11360 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11362 if ( *pos != '.') {
11363 /* this may not be a v-string if followed by => */
11364 const char *next = pos;
11365 while (next < e && isSPACE(*next))
11367 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11368 /* return string not v-string */
11369 sv_setpvn(sv,(char *)s,pos-s);
11370 return (char *)pos;
11374 if (!isALPHA(*pos)) {
11375 U8 tmpbuf[UTF8_MAXBYTES+1];
11378 s++; /* get past 'v' */
11383 /* this is atoi() that tolerates underscores */
11386 const char *end = pos;
11388 while (--end >= s) {
11390 const UV orev = rev;
11391 rev += (*end - '0') * mult;
11394 /* diag_listed_as: Integer overflow in %s number */
11395 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11396 "Integer overflow in decimal number");
11400 if (rev > 0x7FFFFFFF)
11401 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11403 /* Append native character for the rev point */
11404 tmpend = uvchr_to_utf8(tmpbuf, rev);
11405 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11406 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11408 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11414 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11418 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11425 Perl_keyword_plugin_standard(pTHX_
11426 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11428 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11429 PERL_UNUSED_CONTEXT;
11430 PERL_UNUSED_ARG(keyword_ptr);
11431 PERL_UNUSED_ARG(keyword_len);
11432 PERL_UNUSED_ARG(op_ptr);
11433 return KEYWORD_PLUGIN_DECLINE;
11436 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11438 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11440 SAVEI32(PL_lex_brackets);
11441 if (PL_lex_brackets > 100)
11442 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11443 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11444 SAVEI32(PL_lex_allbrackets);
11445 PL_lex_allbrackets = 0;
11446 SAVEI8(PL_lex_fakeeof);
11447 PL_lex_fakeeof = (U8)fakeeof;
11448 if(yyparse(gramtype) && !PL_parser->error_count)
11449 qerror(Perl_mess(aTHX_ "Parse error"));
11452 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11454 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11458 SAVEVPTR(PL_eval_root);
11459 PL_eval_root = NULL;
11460 parse_recdescent(gramtype, fakeeof);
11466 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11468 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11471 if (flags & ~PARSE_OPTIONAL)
11472 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11473 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11474 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11475 if (!PL_parser->error_count)
11476 qerror(Perl_mess(aTHX_ "Parse error"));
11477 exprop = newOP(OP_NULL, 0);
11483 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11485 Parse a Perl arithmetic expression. This may contain operators of precedence
11486 down to the bit shift operators. The expression must be followed (and thus
11487 terminated) either by a comparison or lower-precedence operator or by
11488 something that would normally terminate an expression such as semicolon.
11489 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11490 otherwise it is mandatory. It is up to the caller to ensure that the
11491 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11492 the source of the code to be parsed and the lexical context for the
11495 The op tree representing the expression is returned. If an optional
11496 expression is absent, a null pointer is returned, otherwise the pointer
11499 If an error occurs in parsing or compilation, in most cases a valid op
11500 tree is returned anyway. The error is reflected in the parser state,
11501 normally resulting in a single exception at the top level of parsing
11502 which covers all the compilation errors that occurred. Some compilation
11503 errors, however, will throw an exception immediately.
11509 Perl_parse_arithexpr(pTHX_ U32 flags)
11511 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11515 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11517 Parse a Perl term expression. This may contain operators of precedence
11518 down to the assignment operators. The expression must be followed (and thus
11519 terminated) either by a comma or lower-precedence operator or by
11520 something that would normally terminate an expression such as semicolon.
11521 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11522 otherwise it is mandatory. It is up to the caller to ensure that the
11523 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11524 the source of the code to be parsed and the lexical context for the
11527 The op tree representing the expression is returned. If an optional
11528 expression is absent, a null pointer is returned, otherwise the pointer
11531 If an error occurs in parsing or compilation, in most cases a valid op
11532 tree is returned anyway. The error is reflected in the parser state,
11533 normally resulting in a single exception at the top level of parsing
11534 which covers all the compilation errors that occurred. Some compilation
11535 errors, however, will throw an exception immediately.
11541 Perl_parse_termexpr(pTHX_ U32 flags)
11543 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11547 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11549 Parse a Perl list expression. This may contain operators of precedence
11550 down to the comma operator. The expression must be followed (and thus
11551 terminated) either by a low-precedence logic operator such as C<or> or by
11552 something that would normally terminate an expression such as semicolon.
11553 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11554 otherwise it is mandatory. It is up to the caller to ensure that the
11555 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11556 the source of the code to be parsed and the lexical context for the
11559 The op tree representing the expression is returned. If an optional
11560 expression is absent, a null pointer is returned, otherwise the pointer
11563 If an error occurs in parsing or compilation, in most cases a valid op
11564 tree is returned anyway. The error is reflected in the parser state,
11565 normally resulting in a single exception at the top level of parsing
11566 which covers all the compilation errors that occurred. Some compilation
11567 errors, however, will throw an exception immediately.
11573 Perl_parse_listexpr(pTHX_ U32 flags)
11575 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11579 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11581 Parse a single complete Perl expression. This allows the full
11582 expression grammar, including the lowest-precedence operators such
11583 as C<or>. The expression must be followed (and thus terminated) by a
11584 token that an expression would normally be terminated by: end-of-file,
11585 closing bracketing punctuation, semicolon, or one of the keywords that
11586 signals a postfix expression-statement modifier. If I<flags> includes
11587 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11588 mandatory. It is up to the caller to ensure that the dynamic parser
11589 state (L</PL_parser> et al) is correctly set to reflect the source of
11590 the code to be parsed and the lexical context for the expression.
11592 The op tree representing the expression is returned. If an optional
11593 expression is absent, a null pointer is returned, otherwise the pointer
11596 If an error occurs in parsing or compilation, in most cases a valid op
11597 tree is returned anyway. The error is reflected in the parser state,
11598 normally resulting in a single exception at the top level of parsing
11599 which covers all the compilation errors that occurred. Some compilation
11600 errors, however, will throw an exception immediately.
11606 Perl_parse_fullexpr(pTHX_ U32 flags)
11608 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11612 =for apidoc Amx|OP *|parse_block|U32 flags
11614 Parse a single complete Perl code block. This consists of an opening
11615 brace, a sequence of statements, and a closing brace. The block
11616 constitutes a lexical scope, so C<my> variables and various compile-time
11617 effects can be contained within it. It is up to the caller to ensure
11618 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11619 reflect the source of the code to be parsed and the lexical context for
11622 The op tree representing the code block is returned. This is always a
11623 real op, never a null pointer. It will normally be a C<lineseq> list,
11624 including C<nextstate> or equivalent ops. No ops to construct any kind
11625 of runtime scope are included by virtue of it being a block.
11627 If an error occurs in parsing or compilation, in most cases a valid op
11628 tree (most likely null) is returned anyway. The error is reflected in
11629 the parser state, normally resulting in a single exception at the top
11630 level of parsing which covers all the compilation errors that occurred.
11631 Some compilation errors, however, will throw an exception immediately.
11633 The I<flags> parameter is reserved for future use, and must always
11640 Perl_parse_block(pTHX_ U32 flags)
11643 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11644 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11648 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11650 Parse a single unadorned Perl statement. This may be a normal imperative
11651 statement or a declaration that has compile-time effect. It does not
11652 include any label or other affixture. It is up to the caller to ensure
11653 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11654 reflect the source of the code to be parsed and the lexical context for
11657 The op tree representing the statement is returned. This may be a
11658 null pointer if the statement is null, for example if it was actually
11659 a subroutine definition (which has compile-time side effects). If not
11660 null, it will be ops directly implementing the statement, suitable to
11661 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11662 equivalent op (except for those embedded in a scope contained entirely
11663 within the statement).
11665 If an error occurs in parsing or compilation, in most cases a valid op
11666 tree (most likely null) is returned anyway. The error is reflected in
11667 the parser state, normally resulting in a single exception at the top
11668 level of parsing which covers all the compilation errors that occurred.
11669 Some compilation errors, however, will throw an exception immediately.
11671 The I<flags> parameter is reserved for future use, and must always
11678 Perl_parse_barestmt(pTHX_ U32 flags)
11681 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11682 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11686 =for apidoc Amx|SV *|parse_label|U32 flags
11688 Parse a single label, possibly optional, of the type that may prefix a
11689 Perl statement. It is up to the caller to ensure that the dynamic parser
11690 state (L</PL_parser> et al) is correctly set to reflect the source of
11691 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11692 label is optional, otherwise it is mandatory.
11694 The name of the label is returned in the form of a fresh scalar. If an
11695 optional label is absent, a null pointer is returned.
11697 If an error occurs in parsing, which can only occur if the label is
11698 mandatory, a valid label is returned anyway. The error is reflected in
11699 the parser state, normally resulting in a single exception at the top
11700 level of parsing which covers all the compilation errors that occurred.
11706 Perl_parse_label(pTHX_ U32 flags)
11708 if (flags & ~PARSE_OPTIONAL)
11709 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11710 if (PL_lex_state == LEX_KNOWNEXT) {
11711 PL_parser->yychar = yylex();
11712 if (PL_parser->yychar == LABEL) {
11714 PL_parser->yychar = YYEMPTY;
11715 lsv = newSV_type(SVt_PV);
11716 sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
11724 STRLEN wlen, bufptr_pos;
11727 if (!isIDFIRST_lazy_if(s, UTF))
11729 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11730 if (word_takes_any_delimeter(s, wlen))
11732 bufptr_pos = s - SvPVX(PL_linestr);
11734 lex_read_space(LEX_KEEP_PREVIOUS);
11736 s = SvPVX(PL_linestr) + bufptr_pos;
11737 if (t[0] == ':' && t[1] != ':') {
11738 PL_oldoldbufptr = PL_oldbufptr;
11741 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11745 if (flags & PARSE_OPTIONAL) {
11748 qerror(Perl_mess(aTHX_ "Parse error"));
11749 return newSVpvs("x");
11756 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11758 Parse a single complete Perl statement. This may be a normal imperative
11759 statement or a declaration that has compile-time effect, and may include
11760 optional labels. It is up to the caller to ensure that the dynamic
11761 parser state (L</PL_parser> et al) is correctly set to reflect the source
11762 of the code to be parsed and the lexical context for the statement.
11764 The op tree representing the statement is returned. This may be a
11765 null pointer if the statement is null, for example if it was actually
11766 a subroutine definition (which has compile-time side effects). If not
11767 null, it will be the result of a L</newSTATEOP> call, normally including
11768 a C<nextstate> or equivalent op.
11770 If an error occurs in parsing or compilation, in most cases a valid op
11771 tree (most likely null) is returned anyway. The error is reflected in
11772 the parser state, normally resulting in a single exception at the top
11773 level of parsing which covers all the compilation errors that occurred.
11774 Some compilation errors, however, will throw an exception immediately.
11776 The I<flags> parameter is reserved for future use, and must always
11783 Perl_parse_fullstmt(pTHX_ U32 flags)
11786 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11787 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11791 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11793 Parse a sequence of zero or more Perl statements. These may be normal
11794 imperative statements, including optional labels, or declarations
11795 that have compile-time effect, or any mixture thereof. The statement
11796 sequence ends when a closing brace or end-of-file is encountered in a
11797 place where a new statement could have validly started. It is up to
11798 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11799 is correctly set to reflect the source of the code to be parsed and the
11800 lexical context for the statements.
11802 The op tree representing the statement sequence is returned. This may
11803 be a null pointer if the statements were all null, for example if there
11804 were no statements or if there were only subroutine definitions (which
11805 have compile-time side effects). If not null, it will be a C<lineseq>
11806 list, normally including C<nextstate> or equivalent ops.
11808 If an error occurs in parsing or compilation, in most cases a valid op
11809 tree is returned anyway. The error is reflected in the parser state,
11810 normally resulting in a single exception at the top level of parsing
11811 which covers all the compilation errors that occurred. Some compilation
11812 errors, however, will throw an exception immediately.
11814 The I<flags> parameter is reserved for future use, and must always
11821 Perl_parse_stmtseq(pTHX_ U32 flags)
11826 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11827 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11828 c = lex_peek_unichar(0);
11829 if (c != -1 && c != /*{*/'}')
11830 qerror(Perl_mess(aTHX_ "Parse error"));
11836 * c-indentation-style: bsd
11837 * c-basic-offset: 4
11838 * indent-tabs-mode: nil
11841 * ex: set ts=8 sts=4 sw=4 et: