3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char ident_too_long[] = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN0OP : zero-argument function, with its op created in this file
225 * FUN1 : not used, except for not, which isn't a UNIOP
226 * BOop : bitwise or or xor
228 * SHop : shift operator
229 * PWop : power operator
230 * PMop : pattern-matching operator
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
234 * Rop : relational operator <= != gt
236 * Also see LOP and lop() below.
239 #ifdef DEBUGGING /* Serve -DT. */
240 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
242 # define REPORT(retval) (retval)
245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
267 /* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
272 #define UNI3(f,x,have_x) { \
273 pl_yylval.ival = f; \
274 if (have_x) PL_expect = x; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 #define UNI(f) UNI3(f,XTERM,1)
284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285 #define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
290 #define UNIBRACK(f) UNI3(f,0,0)
292 /* grandfather return to old style */
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
303 #define COPLINE_INC_WITH_HERELINES \
305 CopLINE_inc(PL_curcop); \
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
314 /* how to interpret the pl_yylval associated with the token */
318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
323 static struct debug_tokens {
325 enum token_type type;
327 } const debug_tokens[] =
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
375 { PEG, TOKENTYPE_NONE, "PEG" },
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
402 { 0, TOKENTYPE_NONE, NULL }
405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
412 PERL_ARGS_ASSERT_TOKEREPORT;
415 const char *name = NULL;
416 enum token_type type = TOKENTYPE_NONE;
417 const struct debug_tokens *p;
418 SV* const report = newSVpvs("<== ");
420 for (p = debug_tokens; p->token; p++) {
421 if (p->token == (int)rv) {
428 Perl_sv_catpv(aTHX_ report, name);
429 else if ((char)rv > ' ' && (char)rv <= '~')
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
432 sv_catpvs(report, "EOF");
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
443 PL_op_name[lvalp->ival]);
446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
448 case TOKENTYPE_OPVAL:
450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
453 Perl_sv_catpvf(aTHX_ report, " %s",
454 SvPEEK(cSVOPx_sv(lvalp->opval)));
459 sv_catpvs(report, "(opval=null)");
462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
468 /* print the buffer with suitable escapes */
471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
473 SV* const tmp = newSVpvs("");
475 PERL_ARGS_ASSERT_PRINTBUF;
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
484 S_deprecate_commaless_var_list(pTHX) {
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
498 S_ao(pTHX_ int toketype)
501 if (*PL_bufptr == '=') {
503 if (toketype == ANDAND)
504 pl_yylval.ival = OP_ANDASSIGN;
505 else if (toketype == OROR)
506 pl_yylval.ival = OP_ORASSIGN;
507 else if (toketype == DORDOR)
508 pl_yylval.ival = OP_DORASSIGN;
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
528 S_no_op(pTHX_ const char *const what, char *s)
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
534 PERL_ARGS_ASSERT_NO_OP;
540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
541 if (ckWARN_d(WARN_SYNTAX)) {
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
547 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
548 t += UTF ? UTF8SKIP(t) : 1)
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
569 * Complain about missing quote/regexp/heredoc terminator.
570 * If it's called with NULL then it cauterizes the line buffer.
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
577 S_missingterm(pTHX_ char *s)
583 char * const nl = strrchr(s,'\n');
587 else if (isCNTRL(PL_multi_close)) {
589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
594 *tmpbuf = (char)PL_multi_close;
598 q = strchr(s,'"') ? '\'' : '"';
599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
605 * Check whether the named feature is enabled.
608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
617 if (namelen > MAX_FEATURE_LEN)
619 memcpy(&he_name[8], name, namelen);
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
630 #ifdef PERL_CR_FILTER
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
637 PERL_ARGS_ASSERT_STRIP_RETURN;
639 /* outer loop optimized to do nothing if there are no CR-LFs */
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
646 if (*s == '\r' && s[1] == '\n')
657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
660 if (count > 0 && !maxlen)
667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
669 Creates and initialises a new lexer/parser state object, supplying
670 a context in which to lex and parse from a new source of Perl code.
671 A pointer to the new state object is placed in L</PL_parser>. An entry
672 is made on the save stack so that upon unwinding the new state object
673 will be destroyed and the former value of L</PL_parser> will be restored.
674 Nothing else need be done to clean up the parsing context.
676 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677 non-null, provides a string (in SV form) containing code to be parsed.
678 A copy of the string is made, so subsequent modification of I<line>
679 does not affect parsing. I<rsfp>, if non-null, provides an input stream
680 from which code will be read to be parsed. If both are non-null, the
681 code in I<line> comes first and must consist of complete lines of input,
682 and I<rsfp> supplies the remainder of the source.
684 The I<flags> parameter is reserved for future use. Currently it is only
685 used by perl internally, so extensions should always pass zero.
690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
703 const char *s = NULL;
704 yy_parser *parser, *oparser;
705 if (flags && flags & ~LEX_START_FLAGS)
706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
708 /* create and initialise a parser */
710 Newxz(parser, 1, yy_parser);
711 parser->old_parser = oparser = PL_parser;
714 parser->stack = NULL;
716 parser->stack_size = 0;
718 /* on scope exit, free this parser and restore any outer one */
720 parser->saved_curcop = PL_curcop;
722 /* initialise lexer state */
725 parser->curforce = -1;
727 parser->nexttoke = 0;
729 parser->error_count = oparser ? oparser->error_count : 0;
730 parser->copline = NOLINE;
731 parser->lex_state = LEX_NORMAL;
732 parser->expect = XSTATE;
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
746 Newxz(parser->lex_shared, 1, LEXSHARED);
750 s = SvPV_const(line, len);
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
754 sv_catpvs(parser->linestr, "\n;");
756 parser->linestr = newSVpvs("\n;");
758 parser->oldoldbufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
767 parser->in_pod = parser->filtered = 0;
771 /* delete a parser object */
774 Perl_parser_free(pTHX_ const yy_parser *parser)
777 I32 nexttoke = parser->lasttoke;
779 I32 nexttoke = parser->nexttoke;
782 PERL_ARGS_ASSERT_PARSER_FREE;
784 PL_curcop = parser->saved_curcop;
785 SvREFCNT_dec(parser->linestr);
787 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
788 PerlIO_clearerr(parser->rsfp);
789 else if (parser->rsfp && (!parser->old_parser ||
790 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
791 PerlIO_close(parser->rsfp);
792 SvREFCNT_dec(parser->rsfp_filters);
793 SvREFCNT_dec(parser->lex_stuff);
794 SvREFCNT_dec(parser->sublex_info.repl);
797 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
799 op_free(parser->nexttoke[nexttoke].next_val.opval);
801 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff))
802 op_free(parser->nextval[nexttoke].opval);
806 Safefree(parser->lex_brackstack);
807 Safefree(parser->lex_casestack);
808 Safefree(parser->lex_shared);
809 PL_parser = parser->old_parser;
815 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
817 Buffer scalar containing the chunk currently under consideration of the
818 text currently being lexed. This is always a plain string scalar (for
819 which C<SvPOK> is true). It is not intended to be used as a scalar by
820 normal scalar means; instead refer to the buffer directly by the pointer
821 variables described below.
823 The lexer maintains various C<char*> pointers to things in the
824 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
825 reallocated, all of these pointers must be updated. Don't attempt to
826 do this manually, but rather use L</lex_grow_linestr> if you need to
827 reallocate the buffer.
829 The content of the text chunk in the buffer is commonly exactly one
830 complete line of input, up to and including a newline terminator,
831 but there are situations where it is otherwise. The octets of the
832 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
833 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
834 flag on this scalar, which may disagree with it.
836 For direct examination of the buffer, the variable
837 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
838 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
839 of these pointers is usually preferable to examination of the scalar
840 through normal scalar means.
842 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
844 Direct pointer to the end of the chunk of text currently being lexed, the
845 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
846 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
847 always located at the end of the buffer, and does not count as part of
848 the buffer's contents.
850 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
852 Points to the current position of lexing inside the lexer buffer.
853 Characters around this point may be freely examined, within
854 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
855 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
856 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
858 Lexing code (whether in the Perl core or not) moves this pointer past
859 the characters that it consumes. It is also expected to perform some
860 bookkeeping whenever a newline character is consumed. This movement
861 can be more conveniently performed by the function L</lex_read_to>,
862 which handles newlines appropriately.
864 Interpretation of the buffer's octets can be abstracted out by
865 using the slightly higher-level functions L</lex_peek_unichar> and
866 L</lex_read_unichar>.
868 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
870 Points to the start of the current line inside the lexer buffer.
871 This is useful for indicating at which column an error occurred, and
872 not much else. This must be updated by any lexing code that consumes
873 a newline; the function L</lex_read_to> handles this detail.
879 =for apidoc Amx|bool|lex_bufutf8
881 Indicates whether the octets in the lexer buffer
882 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
883 of Unicode characters. If not, they should be interpreted as Latin-1
884 characters. This is analogous to the C<SvUTF8> flag for scalars.
886 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
887 contains valid UTF-8. Lexing code must be robust in the face of invalid
890 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
891 is significant, but not the whole story regarding the input character
892 encoding. Normally, when a file is being read, the scalar contains octets
893 and its C<SvUTF8> flag is off, but the octets should be interpreted as
894 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
895 however, the scalar may have the C<SvUTF8> flag on, and in this case its
896 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
897 is in effect. This logic may change in the future; use this function
898 instead of implementing the logic yourself.
904 Perl_lex_bufutf8(pTHX)
910 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
912 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
913 at least I<len> octets (including terminating NUL). Returns a
914 pointer to the reallocated buffer. This is necessary before making
915 any direct modification of the buffer that would increase its length.
916 L</lex_stuff_pvn> provides a more convenient way to insert text into
919 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
920 this function updates all of the lexer's variables that point directly
927 Perl_lex_grow_linestr(pTHX_ STRLEN len)
931 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
932 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
933 linestr = PL_parser->linestr;
934 buf = SvPVX(linestr);
935 if (len <= SvLEN(linestr))
937 bufend_pos = PL_parser->bufend - buf;
938 bufptr_pos = PL_parser->bufptr - buf;
939 oldbufptr_pos = PL_parser->oldbufptr - buf;
940 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
941 linestart_pos = PL_parser->linestart - buf;
942 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
943 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
944 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
945 PL_parser->lex_shared->re_eval_start - buf : 0;
947 buf = sv_grow(linestr, len);
949 PL_parser->bufend = buf + bufend_pos;
950 PL_parser->bufptr = buf + bufptr_pos;
951 PL_parser->oldbufptr = buf + oldbufptr_pos;
952 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
953 PL_parser->linestart = buf + linestart_pos;
954 if (PL_parser->last_uni)
955 PL_parser->last_uni = buf + last_uni_pos;
956 if (PL_parser->last_lop)
957 PL_parser->last_lop = buf + last_lop_pos;
958 if (PL_parser->lex_shared->re_eval_start)
959 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
964 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
966 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
967 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
968 reallocating the buffer if necessary. This means that lexing code that
969 runs later will see the characters as if they had appeared in the input.
970 It is not recommended to do this as part of normal parsing, and most
971 uses of this facility run the risk of the inserted characters being
972 interpreted in an unintended manner.
974 The string to be inserted is represented by I<len> octets starting
975 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
976 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
977 The characters are recoded for the lexer buffer, according to how the
978 buffer is currently being interpreted (L</lex_bufutf8>). If a string
979 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
980 function is more convenient.
986 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
990 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
991 if (flags & ~(LEX_STUFF_UTF8))
992 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
994 if (flags & LEX_STUFF_UTF8) {
997 STRLEN highhalf = 0; /* Count of variants */
998 const char *p, *e = pv+len;
999 for (p = pv; p != e; p++) {
1000 if (! UTF8_IS_INVARIANT(*p)) {
1006 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1007 bufptr = PL_parser->bufptr;
1008 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1009 SvCUR_set(PL_parser->linestr,
1010 SvCUR(PL_parser->linestr) + len+highhalf);
1011 PL_parser->bufend += len+highhalf;
1012 for (p = pv; p != e; p++) {
1014 if (! UTF8_IS_INVARIANT(c)) {
1015 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1016 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1018 *bufptr++ = (char)c;
1023 if (flags & LEX_STUFF_UTF8) {
1024 STRLEN highhalf = 0;
1025 const char *p, *e = pv+len;
1026 for (p = pv; p != e; p++) {
1028 if (UTF8_IS_ABOVE_LATIN1(c)) {
1029 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1030 "non-Latin-1 character into Latin-1 input");
1031 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1034 } else if (! UTF8_IS_INVARIANT(c)) {
1035 /* malformed UTF-8 */
1037 SAVESPTR(PL_warnhook);
1038 PL_warnhook = PERL_WARNHOOK_FATAL;
1039 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1045 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1046 bufptr = PL_parser->bufptr;
1047 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1048 SvCUR_set(PL_parser->linestr,
1049 SvCUR(PL_parser->linestr) + len-highhalf);
1050 PL_parser->bufend += len-highhalf;
1053 if (UTF8_IS_INVARIANT(*p)) {
1059 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1065 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1066 bufptr = PL_parser->bufptr;
1067 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1068 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1069 PL_parser->bufend += len;
1070 Copy(pv, bufptr, len, char);
1076 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1078 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1079 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1080 reallocating the buffer if necessary. This means that lexing code that
1081 runs later will see the characters as if they had appeared in the input.
1082 It is not recommended to do this as part of normal parsing, and most
1083 uses of this facility run the risk of the inserted characters being
1084 interpreted in an unintended manner.
1086 The string to be inserted is represented by octets starting at I<pv>
1087 and continuing to the first nul. These octets are interpreted as either
1088 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1089 in I<flags>. The characters are recoded for the lexer buffer, according
1090 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1091 If it is not convenient to nul-terminate a string to be inserted, the
1092 L</lex_stuff_pvn> function is more appropriate.
1098 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1100 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1101 lex_stuff_pvn(pv, strlen(pv), flags);
1105 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1107 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1108 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1109 reallocating the buffer if necessary. This means that lexing code that
1110 runs later will see the characters as if they had appeared in the input.
1111 It is not recommended to do this as part of normal parsing, and most
1112 uses of this facility run the risk of the inserted characters being
1113 interpreted in an unintended manner.
1115 The string to be inserted is the string value of I<sv>. The characters
1116 are recoded for the lexer buffer, according to how the buffer is currently
1117 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1118 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1119 need to construct a scalar.
1125 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1129 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1131 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1133 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1137 =for apidoc Amx|void|lex_unstuff|char *ptr
1139 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1140 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1141 This hides the discarded text from any lexing code that runs later,
1142 as if the text had never appeared.
1144 This is not the normal way to consume lexed text. For that, use
1151 Perl_lex_unstuff(pTHX_ char *ptr)
1155 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1156 buf = PL_parser->bufptr;
1158 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1161 bufend = PL_parser->bufend;
1163 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1164 unstuff_len = ptr - buf;
1165 Move(ptr, buf, bufend+1-ptr, char);
1166 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1167 PL_parser->bufend = bufend - unstuff_len;
1171 =for apidoc Amx|void|lex_read_to|char *ptr
1173 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1174 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1175 performing the correct bookkeeping whenever a newline character is passed.
1176 This is the normal way to consume lexed text.
1178 Interpretation of the buffer's octets can be abstracted out by
1179 using the slightly higher-level functions L</lex_peek_unichar> and
1180 L</lex_read_unichar>.
1186 Perl_lex_read_to(pTHX_ char *ptr)
1189 PERL_ARGS_ASSERT_LEX_READ_TO;
1190 s = PL_parser->bufptr;
1191 if (ptr < s || ptr > PL_parser->bufend)
1192 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1193 for (; s != ptr; s++)
1195 COPLINE_INC_WITH_HERELINES;
1196 PL_parser->linestart = s+1;
1198 PL_parser->bufptr = ptr;
1202 =for apidoc Amx|void|lex_discard_to|char *ptr
1204 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1205 up to I<ptr>. The remaining content of the buffer will be moved, and
1206 all pointers into the buffer updated appropriately. I<ptr> must not
1207 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1208 it is not permitted to discard text that has yet to be lexed.
1210 Normally it is not necessarily to do this directly, because it suffices to
1211 use the implicit discarding behaviour of L</lex_next_chunk> and things
1212 based on it. However, if a token stretches across multiple lines,
1213 and the lexing code has kept multiple lines of text in the buffer for
1214 that purpose, then after completion of the token it would be wise to
1215 explicitly discard the now-unneeded earlier lines, to avoid future
1216 multi-line tokens growing the buffer without bound.
1222 Perl_lex_discard_to(pTHX_ char *ptr)
1226 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1227 buf = SvPVX(PL_parser->linestr);
1229 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1232 if (ptr > PL_parser->bufptr)
1233 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1234 discard_len = ptr - buf;
1235 if (PL_parser->oldbufptr < ptr)
1236 PL_parser->oldbufptr = ptr;
1237 if (PL_parser->oldoldbufptr < ptr)
1238 PL_parser->oldoldbufptr = ptr;
1239 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1240 PL_parser->last_uni = NULL;
1241 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1242 PL_parser->last_lop = NULL;
1243 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1244 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1245 PL_parser->bufend -= discard_len;
1246 PL_parser->bufptr -= discard_len;
1247 PL_parser->oldbufptr -= discard_len;
1248 PL_parser->oldoldbufptr -= discard_len;
1249 if (PL_parser->last_uni)
1250 PL_parser->last_uni -= discard_len;
1251 if (PL_parser->last_lop)
1252 PL_parser->last_lop -= discard_len;
1256 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1258 Reads in the next chunk of text to be lexed, appending it to
1259 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1260 looked to the end of the current chunk and wants to know more. It is
1261 usual, but not necessary, for lexing to have consumed the entirety of
1262 the current chunk at this time.
1264 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1265 chunk (i.e., the current chunk has been entirely consumed), normally the
1266 current chunk will be discarded at the same time that the new chunk is
1267 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1268 will not be discarded. If the current chunk has not been entirely
1269 consumed, then it will not be discarded regardless of the flag.
1271 Returns true if some new text was added to the buffer, or false if the
1272 buffer has reached the end of the input text.
1277 #define LEX_FAKE_EOF 0x80000000
1278 #define LEX_NO_TERM 0x40000000
1281 Perl_lex_next_chunk(pTHX_ U32 flags)
1285 STRLEN old_bufend_pos, new_bufend_pos;
1286 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1287 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1288 bool got_some_for_debugger = 0;
1290 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1291 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1292 linestr = PL_parser->linestr;
1293 buf = SvPVX(linestr);
1294 if (!(flags & LEX_KEEP_PREVIOUS) &&
1295 PL_parser->bufptr == PL_parser->bufend) {
1296 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1298 if (PL_parser->last_uni != PL_parser->bufend)
1299 PL_parser->last_uni = NULL;
1300 if (PL_parser->last_lop != PL_parser->bufend)
1301 PL_parser->last_lop = NULL;
1302 last_uni_pos = last_lop_pos = 0;
1306 old_bufend_pos = PL_parser->bufend - buf;
1307 bufptr_pos = PL_parser->bufptr - buf;
1308 oldbufptr_pos = PL_parser->oldbufptr - buf;
1309 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1310 linestart_pos = PL_parser->linestart - buf;
1311 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1312 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1314 if (flags & LEX_FAKE_EOF) {
1316 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1318 } else if (filter_gets(linestr, old_bufend_pos)) {
1320 got_some_for_debugger = 1;
1321 } else if (flags & LEX_NO_TERM) {
1324 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1325 sv_setpvs(linestr, "");
1327 /* End of real input. Close filehandle (unless it was STDIN),
1328 * then add implicit termination.
1330 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1331 PerlIO_clearerr(PL_parser->rsfp);
1332 else if (PL_parser->rsfp)
1333 (void)PerlIO_close(PL_parser->rsfp);
1334 PL_parser->rsfp = NULL;
1335 PL_parser->in_pod = PL_parser->filtered = 0;
1337 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1340 if (!PL_in_eval && PL_minus_p) {
1342 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1343 PL_minus_n = PL_minus_p = 0;
1344 } else if (!PL_in_eval && PL_minus_n) {
1345 sv_catpvs(linestr, /*{*/";}");
1348 sv_catpvs(linestr, ";");
1351 buf = SvPVX(linestr);
1352 new_bufend_pos = SvCUR(linestr);
1353 PL_parser->bufend = buf + new_bufend_pos;
1354 PL_parser->bufptr = buf + bufptr_pos;
1355 PL_parser->oldbufptr = buf + oldbufptr_pos;
1356 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1357 PL_parser->linestart = buf + linestart_pos;
1358 if (PL_parser->last_uni)
1359 PL_parser->last_uni = buf + last_uni_pos;
1360 if (PL_parser->last_lop)
1361 PL_parser->last_lop = buf + last_lop_pos;
1362 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1363 PL_curstash != PL_debstash) {
1364 /* debugger active and we're not compiling the debugger code,
1365 * so store the line into the debugger's array of lines
1367 update_debugger_info(NULL, buf+old_bufend_pos,
1368 new_bufend_pos-old_bufend_pos);
1374 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1376 Looks ahead one (Unicode) character in the text currently being lexed.
1377 Returns the codepoint (unsigned integer value) of the next character,
1378 or -1 if lexing has reached the end of the input text. To consume the
1379 peeked character, use L</lex_read_unichar>.
1381 If the next character is in (or extends into) the next chunk of input
1382 text, the next chunk will be read in. Normally the current chunk will be
1383 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1384 then the current chunk will not be discarded.
1386 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1387 is encountered, an exception is generated.
1393 Perl_lex_peek_unichar(pTHX_ U32 flags)
1397 if (flags & ~(LEX_KEEP_PREVIOUS))
1398 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1399 s = PL_parser->bufptr;
1400 bufend = PL_parser->bufend;
1406 if (!lex_next_chunk(flags))
1408 s = PL_parser->bufptr;
1409 bufend = PL_parser->bufend;
1412 if (UTF8_IS_INVARIANT(head))
1414 if (UTF8_IS_START(head)) {
1415 len = UTF8SKIP(&head);
1416 while ((STRLEN)(bufend-s) < len) {
1417 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1419 s = PL_parser->bufptr;
1420 bufend = PL_parser->bufend;
1423 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1424 if (retlen == (STRLEN)-1) {
1425 /* malformed UTF-8 */
1427 SAVESPTR(PL_warnhook);
1428 PL_warnhook = PERL_WARNHOOK_FATAL;
1429 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1435 if (!lex_next_chunk(flags))
1437 s = PL_parser->bufptr;
1444 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1446 Reads the next (Unicode) character in the text currently being lexed.
1447 Returns the codepoint (unsigned integer value) of the character read,
1448 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1449 if lexing has reached the end of the input text. To non-destructively
1450 examine the next character, use L</lex_peek_unichar> instead.
1452 If the next character is in (or extends into) the next chunk of input
1453 text, the next chunk will be read in. Normally the current chunk will be
1454 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1455 then the current chunk will not be discarded.
1457 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1458 is encountered, an exception is generated.
1464 Perl_lex_read_unichar(pTHX_ U32 flags)
1467 if (flags & ~(LEX_KEEP_PREVIOUS))
1468 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1469 c = lex_peek_unichar(flags);
1472 COPLINE_INC_WITH_HERELINES;
1474 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1476 ++(PL_parser->bufptr);
1482 =for apidoc Amx|void|lex_read_space|U32 flags
1484 Reads optional spaces, in Perl style, in the text currently being
1485 lexed. The spaces may include ordinary whitespace characters and
1486 Perl-style comments. C<#line> directives are processed if encountered.
1487 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1488 at a non-space character (or the end of the input text).
1490 If spaces extend into the next chunk of input text, the next chunk will
1491 be read in. Normally the current chunk will be discarded at the same
1492 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1493 chunk will not be discarded.
1498 #define LEX_NO_NEXT_CHUNK 0x80000000
1501 Perl_lex_read_space(pTHX_ U32 flags)
1504 bool need_incline = 0;
1505 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1506 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1509 sv_free(PL_skipwhite);
1510 PL_skipwhite = NULL;
1513 PL_skipwhite = newSVpvs("");
1514 #endif /* PERL_MAD */
1515 s = PL_parser->bufptr;
1516 bufend = PL_parser->bufend;
1522 } while (!(c == '\n' || (c == 0 && s == bufend)));
1523 } else if (c == '\n') {
1525 PL_parser->linestart = s;
1530 } else if (isSPACE(c)) {
1532 } else if (c == 0 && s == bufend) {
1536 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1537 #endif /* PERL_MAD */
1538 if (flags & LEX_NO_NEXT_CHUNK)
1540 PL_parser->bufptr = s;
1541 COPLINE_INC_WITH_HERELINES;
1542 got_more = lex_next_chunk(flags);
1543 CopLINE_dec(PL_curcop);
1544 s = PL_parser->bufptr;
1545 bufend = PL_parser->bufend;
1548 if (need_incline && PL_parser->rsfp) {
1558 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1559 #endif /* PERL_MAD */
1560 PL_parser->bufptr = s;
1565 * This subroutine has nothing to do with tilting, whether at windmills
1566 * or pinball tables. Its name is short for "increment line". It
1567 * increments the current line number in CopLINE(PL_curcop) and checks
1568 * to see whether the line starts with a comment of the form
1569 * # line 500 "foo.pm"
1570 * If so, it sets the current line number and file to the values in the comment.
1574 S_incline(pTHX_ const char *s)
1582 PERL_ARGS_ASSERT_INCLINE;
1584 COPLINE_INC_WITH_HERELINES;
1585 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1586 && s+1 == PL_bufend && *s == ';') {
1587 /* fake newline in string eval */
1588 CopLINE_dec(PL_curcop);
1593 while (SPACE_OR_TAB(*s))
1595 if (strnEQ(s, "line", 4))
1599 if (SPACE_OR_TAB(*s))
1603 while (SPACE_OR_TAB(*s))
1611 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1613 while (SPACE_OR_TAB(*s))
1615 if (*s == '"' && (t = strchr(s+1, '"'))) {
1621 while (!isSPACE(*t))
1625 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1627 if (*e != '\n' && *e != '\0')
1628 return; /* false alarm */
1630 line_num = atoi(n)-1;
1633 const STRLEN len = t - s;
1634 SV *const temp_sv = CopFILESV(PL_curcop);
1639 cf = SvPVX(temp_sv);
1640 tmplen = SvCUR(temp_sv);
1646 if (!PL_rsfp && !PL_parser->filtered) {
1647 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1648 * to *{"::_<newfilename"} */
1649 /* However, the long form of evals is only turned on by the
1650 debugger - usually they're "(eval %lu)" */
1654 STRLEN tmplen2 = len;
1655 if (tmplen + 2 <= sizeof smallbuf)
1658 Newx(tmpbuf, tmplen + 2, char);
1661 memcpy(tmpbuf + 2, cf, tmplen);
1663 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1668 if (tmplen2 + 2 <= sizeof smallbuf)
1671 Newx(tmpbuf2, tmplen2 + 2, char);
1673 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1674 /* Either they malloc'd it, or we malloc'd it,
1675 so no prefix is present in ours. */
1680 memcpy(tmpbuf2 + 2, s, tmplen2);
1683 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1685 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1686 /* adjust ${"::_<newfilename"} to store the new file name */
1687 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1688 /* The line number may differ. If that is the case,
1689 alias the saved lines that are in the array.
1690 Otherwise alias the whole array. */
1691 if (CopLINE(PL_curcop) == line_num) {
1692 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1693 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1695 else if (GvAV(*gvp)) {
1696 AV * const av = GvAV(*gvp);
1697 const I32 start = CopLINE(PL_curcop)+1;
1698 I32 items = AvFILLp(av) - start;
1700 AV * const av2 = GvAVn(gv2);
1701 SV **svp = AvARRAY(av) + start;
1702 I32 l = (I32)line_num+1;
1704 av_store(av2, l++, SvREFCNT_inc(*svp++));
1709 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1711 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1713 CopFILE_free(PL_curcop);
1714 CopFILE_setn(PL_curcop, s, len);
1716 CopLINE_set(PL_curcop, line_num);
1720 /* skip space before PL_thistoken */
1723 S_skipspace0(pTHX_ register char *s)
1725 PERL_ARGS_ASSERT_SKIPSPACE0;
1732 PL_thiswhite = newSVpvs("");
1733 sv_catsv(PL_thiswhite, PL_skipwhite);
1734 sv_free(PL_skipwhite);
1737 PL_realtokenstart = s - SvPVX(PL_linestr);
1741 /* skip space after PL_thistoken */
1744 S_skipspace1(pTHX_ register char *s)
1746 const char *start = s;
1747 I32 startoff = start - SvPVX(PL_linestr);
1749 PERL_ARGS_ASSERT_SKIPSPACE1;
1754 start = SvPVX(PL_linestr) + startoff;
1755 if (!PL_thistoken && PL_realtokenstart >= 0) {
1756 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1757 PL_thistoken = newSVpvn(tstart, start - tstart);
1759 PL_realtokenstart = -1;
1762 PL_nextwhite = newSVpvs("");
1763 sv_catsv(PL_nextwhite, PL_skipwhite);
1764 sv_free(PL_skipwhite);
1771 S_skipspace2(pTHX_ register char *s, SV **svp)
1774 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1775 const I32 startoff = s - SvPVX(PL_linestr);
1777 PERL_ARGS_ASSERT_SKIPSPACE2;
1780 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1781 if (!PL_madskills || !svp)
1783 start = SvPVX(PL_linestr) + startoff;
1784 if (!PL_thistoken && PL_realtokenstart >= 0) {
1785 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1786 PL_thistoken = newSVpvn(tstart, start - tstart);
1787 PL_realtokenstart = -1;
1791 *svp = newSVpvs("");
1792 sv_setsv(*svp, PL_skipwhite);
1793 sv_free(PL_skipwhite);
1802 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1804 AV *av = CopFILEAVx(PL_curcop);
1806 SV * const sv = newSV_type(SVt_PVMG);
1808 sv_setsv(sv, orig_sv);
1810 sv_setpvn(sv, buf, len);
1813 av_store(av, (I32)CopLINE(PL_curcop), sv);
1819 * Called to gobble the appropriate amount and type of whitespace.
1820 * Skips comments as well.
1824 S_skipspace(pTHX_ register char *s)
1828 #endif /* PERL_MAD */
1829 PERL_ARGS_ASSERT_SKIPSPACE;
1832 sv_free(PL_skipwhite);
1833 PL_skipwhite = NULL;
1835 #endif /* PERL_MAD */
1836 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1837 while (s < PL_bufend && SPACE_OR_TAB(*s))
1840 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1842 lex_read_space(LEX_KEEP_PREVIOUS |
1843 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1844 LEX_NO_NEXT_CHUNK : 0));
1846 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1847 if (PL_linestart > PL_bufptr)
1848 PL_bufptr = PL_linestart;
1853 PL_skipwhite = newSVpvn(start, s-start);
1854 #endif /* PERL_MAD */
1860 * Check the unary operators to ensure there's no ambiguity in how they're
1861 * used. An ambiguous piece of code would be:
1863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1864 * the +5 is its argument.
1874 if (PL_oldoldbufptr != PL_last_uni)
1876 while (isSPACE(*PL_last_uni))
1879 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1881 if ((t = strchr(s, '(')) && t < PL_bufptr)
1884 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1885 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1886 (int)(s - PL_last_uni), PL_last_uni);
1890 * LOP : macro to build a list operator. Its behaviour has been replaced
1891 * with a subroutine, S_lop() for which LOP is just another name.
1894 #define LOP(f,x) return lop(f,x,s)
1898 * Build a list operator (or something that might be one). The rules:
1899 * - if we have a next token, then it's a list operator [why?]
1900 * - if the next thing is an opening paren, then it's a function
1901 * - else it's a list operator
1905 S_lop(pTHX_ I32 f, int x, char *s)
1909 PERL_ARGS_ASSERT_LOP;
1915 PL_last_lop = PL_oldbufptr;
1916 PL_last_lop_op = (OPCODE)f;
1925 return REPORT(FUNC);
1928 return REPORT(FUNC);
1931 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1932 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1933 return REPORT(LSTOP);
1940 * Sets up for an eventual force_next(). start_force(0) basically does
1941 * an unshift, while start_force(-1) does a push. yylex removes items
1946 S_start_force(pTHX_ int where)
1950 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1951 where = PL_lasttoke;
1952 assert(PL_curforce < 0 || PL_curforce == where);
1953 if (PL_curforce != where) {
1954 for (i = PL_lasttoke; i > where; --i) {
1955 PL_nexttoke[i] = PL_nexttoke[i-1];
1959 if (PL_curforce < 0) /* in case of duplicate start_force() */
1960 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1961 PL_curforce = where;
1964 curmad('^', newSVpvs(""));
1965 CURMAD('_', PL_nextwhite);
1970 S_curmad(pTHX_ char slot, SV *sv)
1976 if (PL_curforce < 0)
1977 where = &PL_thismad;
1979 where = &PL_nexttoke[PL_curforce].next_mad;
1985 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1987 else if (PL_encoding) {
1988 sv_recode_to_utf8(sv, PL_encoding);
1993 /* keep a slot open for the head of the list? */
1994 if (slot != '_' && *where && (*where)->mad_key == '^') {
1995 (*where)->mad_key = slot;
1996 sv_free(MUTABLE_SV(((*where)->mad_val)));
1997 (*where)->mad_val = (void*)sv;
2000 addmad(newMADsv(slot, sv), where, 0);
2003 # define start_force(where) NOOP
2004 # define curmad(slot, sv) NOOP
2009 * When the lexer realizes it knows the next token (for instance,
2010 * it is reordering tokens for the parser) then it can call S_force_next
2011 * to know what token to return the next time the lexer is called. Caller
2012 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2013 * and possibly PL_expect to ensure the lexer handles the token correctly.
2017 S_force_next(pTHX_ I32 type)
2022 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2023 tokereport(type, &NEXTVAL_NEXTTOKE);
2026 /* Don’t let opslab_force_free snatch it */
2027 if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2028 assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2029 NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2032 if (PL_curforce < 0)
2033 start_force(PL_lasttoke);
2034 PL_nexttoke[PL_curforce].next_type = type;
2035 if (PL_lex_state != LEX_KNOWNEXT)
2036 PL_lex_defer = PL_lex_state;
2037 PL_lex_state = LEX_KNOWNEXT;
2038 PL_lex_expect = PL_expect;
2041 PL_nexttype[PL_nexttoke] = type;
2043 if (PL_lex_state != LEX_KNOWNEXT) {
2044 PL_lex_defer = PL_lex_state;
2045 PL_lex_expect = PL_expect;
2046 PL_lex_state = LEX_KNOWNEXT;
2054 int yyc = PL_parser->yychar;
2055 if (yyc != YYEMPTY) {
2058 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2059 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2060 PL_lex_allbrackets--;
2062 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2063 } else if (yyc == '('/*)*/) {
2064 PL_lex_allbrackets--;
2069 PL_parser->yychar = YYEMPTY;
2074 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2077 SV * const sv = newSVpvn_utf8(start, len,
2080 && !is_ascii_string((const U8*)start, len)
2081 && is_utf8_string((const U8*)start, len));
2087 * When the lexer knows the next thing is a word (for instance, it has
2088 * just seen -> and it knows that the next char is a word char, then
2089 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2093 * char *start : buffer position (must be within PL_linestr)
2094 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2095 * int check_keyword : if true, Perl checks to make sure the word isn't
2096 * a keyword (do this if the word is a label, e.g. goto FOO)
2097 * int allow_pack : if true, : characters will also be allowed (require,
2098 * use, etc. do this)
2099 * int allow_initial_tick : used by the "sub" lexer only.
2103 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2109 PERL_ARGS_ASSERT_FORCE_WORD;
2111 start = SKIPSPACE1(start);
2113 if (isIDFIRST_lazy_if(s,UTF) ||
2114 (allow_pack && *s == ':') ||
2115 (allow_initial_tick && *s == '\'') )
2117 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2118 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2120 start_force(PL_curforce);
2122 curmad('X', newSVpvn(start,s-start));
2123 if (token == METHOD) {
2128 PL_expect = XOPERATOR;
2132 curmad('g', newSVpvs( "forced" ));
2133 NEXTVAL_NEXTTOKE.opval
2134 = (OP*)newSVOP(OP_CONST,0,
2135 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2136 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2144 * Called when the lexer wants $foo *foo &foo etc, but the program
2145 * text only contains the "foo" portion. The first argument is a pointer
2146 * to the "foo", and the second argument is the type symbol to prefix.
2147 * Forces the next token to be a "WORD".
2148 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2152 S_force_ident(pTHX_ register const char *s, int kind)
2156 PERL_ARGS_ASSERT_FORCE_IDENT;
2159 const STRLEN len = strlen(s);
2160 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2161 UTF ? SVf_UTF8 : 0));
2162 start_force(PL_curforce);
2163 NEXTVAL_NEXTTOKE.opval = o;
2166 o->op_private = OPpCONST_ENTERED;
2167 /* XXX see note in pp_entereval() for why we forgo typo
2168 warnings if the symbol must be introduced in an eval.
2170 gv_fetchpvn_flags(s, len,
2171 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2172 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2173 kind == '$' ? SVt_PV :
2174 kind == '@' ? SVt_PVAV :
2175 kind == '%' ? SVt_PVHV :
2183 S_force_ident_maybe_lex(pTHX_ char pit)
2185 start_force(PL_curforce);
2186 NEXTVAL_NEXTTOKE.ival = pit;
2191 Perl_str_to_version(pTHX_ SV *sv)
2196 const char *start = SvPV_const(sv,len);
2197 const char * const end = start + len;
2198 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2200 PERL_ARGS_ASSERT_STR_TO_VERSION;
2202 while (start < end) {
2206 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2211 retval += ((NV)n)/nshift;
2220 * Forces the next token to be a version number.
2221 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2222 * and if "guessing" is TRUE, then no new token is created (and the caller
2223 * must use an alternative parsing method).
2227 S_force_version(pTHX_ char *s, int guessing)
2233 I32 startoff = s - SvPVX(PL_linestr);
2236 PERL_ARGS_ASSERT_FORCE_VERSION;
2244 while (isDIGIT(*d) || *d == '_' || *d == '.')
2248 start_force(PL_curforce);
2249 curmad('X', newSVpvn(s,d-s));
2252 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2254 #ifdef USE_LOCALE_NUMERIC
2255 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2256 setlocale(LC_NUMERIC, "C");
2258 s = scan_num(s, &pl_yylval);
2259 #ifdef USE_LOCALE_NUMERIC
2260 setlocale(LC_NUMERIC, loc);
2263 version = pl_yylval.opval;
2264 ver = cSVOPx(version)->op_sv;
2265 if (SvPOK(ver) && !SvNIOK(ver)) {
2266 SvUPGRADE(ver, SVt_PVNV);
2267 SvNV_set(ver, str_to_version(ver));
2268 SvNOK_on(ver); /* hint that it is a version */
2271 else if (guessing) {
2274 sv_free(PL_nextwhite); /* let next token collect whitespace */
2276 s = SvPVX(PL_linestr) + startoff;
2284 if (PL_madskills && !version) {
2285 sv_free(PL_nextwhite); /* let next token collect whitespace */
2287 s = SvPVX(PL_linestr) + startoff;
2290 /* NOTE: The parser sees the package name and the VERSION swapped */
2291 start_force(PL_curforce);
2292 NEXTVAL_NEXTTOKE.opval = version;
2299 * S_force_strict_version
2300 * Forces the next token to be a version number using strict syntax rules.
2304 S_force_strict_version(pTHX_ char *s)
2309 I32 startoff = s - SvPVX(PL_linestr);
2311 const char *errstr = NULL;
2313 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2315 while (isSPACE(*s)) /* leading whitespace */
2318 if (is_STRICT_VERSION(s,&errstr)) {
2320 s = (char *)scan_version(s, ver, 0);
2321 version = newSVOP(OP_CONST, 0, ver);
2323 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2324 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2328 yyerror(errstr); /* version required */
2333 if (PL_madskills && !version) {
2334 sv_free(PL_nextwhite); /* let next token collect whitespace */
2336 s = SvPVX(PL_linestr) + startoff;
2339 /* NOTE: The parser sees the package name and the VERSION swapped */
2340 start_force(PL_curforce);
2341 NEXTVAL_NEXTTOKE.opval = version;
2349 * Tokenize a quoted string passed in as an SV. It finds the next
2350 * chunk, up to end of string or a backslash. It may make a new
2351 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2356 S_tokeq(pTHX_ SV *sv)
2365 PERL_ARGS_ASSERT_TOKEQ;
2370 s = SvPV_force(sv, len);
2371 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2374 /* This is relying on the SV being "well formed" with a trailing '\0' */
2375 while (s < send && !(*s == '\\' && s[1] == '\\'))
2380 if ( PL_hints & HINT_NEW_STRING ) {
2381 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2385 if (s + 1 < send && (s[1] == '\\'))
2386 s++; /* all that, just for this */
2391 SvCUR_set(sv, d - SvPVX_const(sv));
2393 if ( PL_hints & HINT_NEW_STRING )
2394 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2399 * Now come three functions related to double-quote context,
2400 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2401 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2402 * interact with PL_lex_state, and create fake ( ... ) argument lists
2403 * to handle functions and concatenation.
2407 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2412 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2414 * Pattern matching will set PL_lex_op to the pattern-matching op to
2415 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2417 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2419 * Everything else becomes a FUNC.
2421 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2422 * had an OP_CONST or OP_READLINE). This just sets us up for a
2423 * call to S_sublex_push().
2427 S_sublex_start(pTHX)
2430 const I32 op_type = pl_yylval.ival;
2432 if (op_type == OP_NULL) {
2433 pl_yylval.opval = PL_lex_op;
2437 if (op_type == OP_CONST || op_type == OP_READLINE) {
2438 SV *sv = tokeq(PL_lex_stuff);
2440 if (SvTYPE(sv) == SVt_PVIV) {
2441 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2443 const char * const p = SvPV_const(sv, len);
2444 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2448 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2449 PL_lex_stuff = NULL;
2450 /* Allow <FH> // "foo" */
2451 if (op_type == OP_READLINE)
2452 PL_expect = XTERMORDORDOR;
2455 else if (op_type == OP_BACKTICK && PL_lex_op) {
2456 /* readpipe() vas overriden */
2457 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2458 pl_yylval.opval = PL_lex_op;
2460 PL_lex_stuff = NULL;
2464 PL_sublex_info.super_state = PL_lex_state;
2465 PL_sublex_info.sub_inwhat = (U16)op_type;
2466 PL_sublex_info.sub_op = PL_lex_op;
2467 PL_lex_state = LEX_INTERPPUSH;
2471 pl_yylval.opval = PL_lex_op;
2481 * Create a new scope to save the lexing state. The scope will be
2482 * ended in S_sublex_done. Returns a '(', starting the function arguments
2483 * to the uc, lc, etc. found before.
2484 * Sets PL_lex_state to LEX_INTERPCONCAT.
2494 PL_lex_state = PL_sublex_info.super_state;
2495 SAVEBOOL(PL_lex_dojoin);
2496 SAVEI32(PL_lex_brackets);
2497 SAVEI32(PL_lex_allbrackets);
2498 SAVEI32(PL_lex_formbrack);
2499 SAVEI8(PL_lex_fakeeof);
2500 SAVEI32(PL_lex_casemods);
2501 SAVEI32(PL_lex_starts);
2502 SAVEI8(PL_lex_state);
2503 SAVESPTR(PL_lex_repl);
2504 SAVEVPTR(PL_lex_inpat);
2505 SAVEI16(PL_lex_inwhat);
2506 SAVECOPLINE(PL_curcop);
2507 SAVEPPTR(PL_bufptr);
2508 SAVEPPTR(PL_bufend);
2509 SAVEPPTR(PL_oldbufptr);
2510 SAVEPPTR(PL_oldoldbufptr);
2511 SAVEPPTR(PL_last_lop);
2512 SAVEPPTR(PL_last_uni);
2513 SAVEPPTR(PL_linestart);
2514 SAVESPTR(PL_linestr);
2515 SAVEGENERICPV(PL_lex_brackstack);
2516 SAVEGENERICPV(PL_lex_casestack);
2517 SAVEGENERICPV(PL_parser->lex_shared);
2519 /* The here-doc parser needs to be able to peek into outer lexing
2520 scopes to find the body of the here-doc. So we put PL_linestr and
2521 PL_bufptr into lex_shared, to ‘share’ those values.
2523 PL_parser->lex_shared->ls_linestr = PL_linestr;
2524 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2526 PL_linestr = PL_lex_stuff;
2527 PL_lex_repl = PL_sublex_info.repl;
2528 PL_lex_stuff = NULL;
2529 PL_sublex_info.repl = NULL;
2531 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2532 = SvPVX(PL_linestr);
2533 PL_bufend += SvCUR(PL_linestr);
2534 PL_last_lop = PL_last_uni = NULL;
2535 SAVEFREESV(PL_linestr);
2536 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2538 PL_lex_dojoin = FALSE;
2539 PL_lex_brackets = PL_lex_formbrack = 0;
2540 PL_lex_allbrackets = 0;
2541 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2542 Newx(PL_lex_brackstack, 120, char);
2543 Newx(PL_lex_casestack, 12, char);
2544 PL_lex_casemods = 0;
2545 *PL_lex_casestack = '\0';
2547 PL_lex_state = LEX_INTERPCONCAT;
2548 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2550 Newxz(shared, 1, LEXSHARED);
2551 shared->ls_prev = PL_parser->lex_shared;
2552 PL_parser->lex_shared = shared;
2554 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2555 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2556 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2557 PL_lex_inpat = PL_sublex_info.sub_op;
2559 PL_lex_inpat = NULL;
2566 * Restores lexer state after a S_sublex_push.
2573 if (!PL_lex_starts++) {
2574 SV * const sv = newSVpvs("");
2575 if (SvUTF8(PL_linestr))
2577 PL_expect = XOPERATOR;
2578 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2582 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2583 PL_lex_state = LEX_INTERPCASEMOD;
2587 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2588 assert(PL_lex_inwhat != OP_TRANSR);
2589 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2590 PL_linestr = PL_lex_repl;
2592 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2593 PL_bufend += SvCUR(PL_linestr);
2594 PL_last_lop = PL_last_uni = NULL;
2595 PL_lex_dojoin = FALSE;
2596 PL_lex_brackets = 0;
2597 PL_lex_allbrackets = 0;
2598 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2599 PL_lex_casemods = 0;
2600 *PL_lex_casestack = '\0';
2602 if (SvEVALED(PL_lex_repl)) {
2603 PL_lex_state = LEX_INTERPNORMAL;
2605 /* we don't clear PL_lex_repl here, so that we can check later
2606 whether this is an evalled subst; that means we rely on the
2607 logic to ensure sublex_done() is called again only via the
2608 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2611 PL_lex_state = LEX_INTERPCONCAT;
2621 PL_endwhite = newSVpvs("");
2622 sv_catsv(PL_endwhite, PL_thiswhite);
2626 sv_setpvs(PL_thistoken,"");
2628 PL_realtokenstart = -1;
2632 PL_bufend = SvPVX(PL_linestr);
2633 PL_bufend += SvCUR(PL_linestr);
2634 PL_expect = XOPERATOR;
2635 PL_sublex_info.sub_inwhat = 0;
2640 PERL_STATIC_INLINE SV*
2641 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2643 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2644 * interior, hence to the "}". Finds what the name resolves to, returning
2645 * an SV* containing it; NULL if no valid one found */
2647 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2654 const U8* first_bad_char_loc;
2655 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2657 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2659 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2661 &first_bad_char_loc))
2663 /* If warnings are on, this will print a more detailed analysis of what
2664 * is wrong than the error message below */
2665 utf8n_to_uvuni(first_bad_char_loc,
2666 e - ((char *) first_bad_char_loc),
2669 /* We deliberately don't try to print the malformed character, which
2670 * might not print very well; it also may be just the first of many
2671 * malformations, so don't print what comes after it */
2672 yyerror(Perl_form(aTHX_
2673 "Malformed UTF-8 character immediately after '%.*s'",
2674 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2678 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2679 /* include the <}> */
2680 e - backslash_ptr + 1);
2685 /* See if the charnames handler is the Perl core's, and if so, we can skip
2686 * the validation needed for a user-supplied one, as Perl's does its own
2688 table = GvHV(PL_hintgv); /* ^H */
2689 cvp = hv_fetchs(table, "charnames", FALSE);
2691 if (((rv = SvRV(cv)) != NULL)
2692 && ((stash = CvSTASH(rv)) != NULL))
2694 const char * const name = HvNAME(stash);
2695 if strEQ(name, "_charnames") {
2700 /* Here, it isn't Perl's charname handler. We can't rely on a
2701 * user-supplied handler to validate the input name. For non-ut8 input,
2702 * look to see that the first character is legal. Then loop through the
2703 * rest checking that each is a continuation */
2705 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2709 if (! isALPHAU(*s)) {
2714 if (! isCHARNAME_CONT(*s)) {
2721 /* Similarly for utf8. For invariants can check directly; for other
2722 * Latin1, can calculate their code point and check; otherwise use a
2724 if (UTF8_IS_INVARIANT(*s)) {
2725 if (! isALPHAU(*s)) {
2729 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2730 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2736 if (! PL_utf8_charname_begin) {
2737 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2738 PL_utf8_charname_begin = _core_swash_init("utf8",
2739 "_Perl_Charname_Begin",
2741 1, 0, NULL, &flags);
2743 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2750 if (UTF8_IS_INVARIANT(*s)) {
2751 if (! isCHARNAME_CONT(*s)) {
2756 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2757 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2765 if (! PL_utf8_charname_continue) {
2766 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2767 PL_utf8_charname_continue = _core_swash_init("utf8",
2768 "_Perl_Charname_Continue",
2770 1, 0, NULL, &flags);
2772 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2780 if (SvUTF8(res)) { /* Don't accept malformed input */
2781 const U8* first_bad_char_loc;
2783 const char* const str = SvPV_const(res, len);
2784 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2785 /* If warnings are on, this will print a more detailed analysis of
2786 * what is wrong than the error message below */
2787 utf8n_to_uvuni(first_bad_char_loc,
2788 (char *) first_bad_char_loc - str,
2791 /* We deliberately don't try to print the malformed character,
2792 * which might not print very well; it also may be just the first
2793 * of many malformations, so don't print what comes after it */
2796 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2797 (int) (e - backslash_ptr + 1), backslash_ptr,
2798 (int) ((char *) first_bad_char_loc - str), str
2808 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2810 /* The final %.*s makes sure that should the trailing NUL be missing
2811 * that this print won't run off the end of the string */
2814 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2815 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2816 (int)(e - s + bad_char_size), s + bad_char_size
2818 UTF ? SVf_UTF8 : 0);
2826 Extracts the next constant part of a pattern, double-quoted string,
2827 or transliteration. This is terrifying code.
2829 For example, in parsing the double-quoted string "ab\x63$d", it would
2830 stop at the '$' and return an OP_CONST containing 'abc'.
2832 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2833 processing a pattern (PL_lex_inpat is true), a transliteration
2834 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2836 Returns a pointer to the character scanned up to. If this is
2837 advanced from the start pointer supplied (i.e. if anything was
2838 successfully parsed), will leave an OP_CONST for the substring scanned
2839 in pl_yylval. Caller must intuit reason for not parsing further
2840 by looking at the next characters herself.
2844 \N{ABC} => \N{U+41.42.43}
2847 all other \-char, including \N and \N{ apart from \N{ABC}
2850 @ and $ where it appears to be a var, but not for $ as tail anchor
2855 In transliterations:
2856 characters are VERY literal, except for - not at the start or end
2857 of the string, which indicates a range. If the range is in bytes,
2858 scan_const expands the range to the full set of intermediate
2859 characters. If the range is in utf8, the hyphen is replaced with
2860 a certain range mark which will be handled by pmtrans() in op.c.
2862 In double-quoted strings:
2864 double-quoted style: \r and \n
2865 constants: \x31, etc.
2866 deprecated backrefs: \1 (in substitution replacements)
2867 case and quoting: \U \Q \E
2870 scan_const does *not* construct ops to handle interpolated strings.
2871 It stops processing as soon as it finds an embedded $ or @ variable
2872 and leaves it to the caller to work out what's going on.
2874 embedded arrays (whether in pattern or not) could be:
2875 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2877 $ in double-quoted strings must be the symbol of an embedded scalar.
2879 $ in pattern could be $foo or could be tail anchor. Assumption:
2880 it's a tail anchor if $ is the last thing in the string, or if it's
2881 followed by one of "()| \r\n\t"
2883 \1 (backreferences) are turned into $1 in substitutions
2885 The structure of the code is
2886 while (there's a character to process) {
2887 handle transliteration ranges
2888 skip regexp comments /(?#comment)/ and codes /(?{code})/
2889 skip #-initiated comments in //x patterns
2890 check for embedded arrays
2891 check for embedded scalars
2893 deprecate \1 in substitution replacements
2894 handle string-changing backslashes \l \U \Q \E, etc.
2895 switch (what was escaped) {
2896 handle \- in a transliteration (becomes a literal -)
2897 if a pattern and not \N{, go treat as regular character
2898 handle \132 (octal characters)
2899 handle \x15 and \x{1234} (hex characters)
2900 handle \N{name} (named characters, also \N{3,5} in a pattern)
2901 handle \cV (control characters)
2902 handle printf-style backslashes (\f, \r, \n, etc)
2905 } (end if backslash)
2906 handle regular character
2907 } (end while character to read)
2912 S_scan_const(pTHX_ char *start)
2915 char *send = PL_bufend; /* end of the constant */
2916 SV *sv = newSV(send - start); /* sv for the constant. See
2917 note below on sizing. */
2918 char *s = start; /* start of the constant */
2919 char *d = SvPVX(sv); /* destination for copies */
2920 bool dorange = FALSE; /* are we in a translit range? */
2921 bool didrange = FALSE; /* did we just finish a range? */
2922 bool in_charclass = FALSE; /* within /[...]/ */
2923 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2924 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2925 to be UTF8? But, this can
2926 show as true when the source
2927 isn't utf8, as for example
2928 when it is entirely composed
2930 SV *res; /* result from charnames */
2932 /* Note on sizing: The scanned constant is placed into sv, which is
2933 * initialized by newSV() assuming one byte of output for every byte of
2934 * input. This routine expects newSV() to allocate an extra byte for a
2935 * trailing NUL, which this routine will append if it gets to the end of
2936 * the input. There may be more bytes of input than output (eg., \N{LATIN
2937 * CAPITAL LETTER A}), or more output than input if the constant ends up
2938 * recoded to utf8, but each time a construct is found that might increase
2939 * the needed size, SvGROW() is called. Its size parameter each time is
2940 * based on the best guess estimate at the time, namely the length used so
2941 * far, plus the length the current construct will occupy, plus room for
2942 * the trailing NUL, plus one byte for every input byte still unscanned */
2946 UV literal_endpoint = 0;
2947 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2950 PERL_ARGS_ASSERT_SCAN_CONST;
2952 assert(PL_lex_inwhat != OP_TRANSR);
2953 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2954 /* If we are doing a trans and we know we want UTF8 set expectation */
2955 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2956 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2960 while (s < send || dorange) {
2962 /* get transliterations out of the way (they're most literal) */
2963 if (PL_lex_inwhat == OP_TRANS) {
2964 /* expand a range A-Z to the full set of characters. AIE! */
2966 I32 i; /* current expanded character */
2967 I32 min; /* first character in range */
2968 I32 max; /* last character in range */
2979 char * const c = (char*)utf8_hop((U8*)d, -1);
2983 *c = (char)UTF_TO_NATIVE(0xff);
2984 /* mark the range as done, and continue */
2990 i = d - SvPVX_const(sv); /* remember current offset */
2993 SvLEN(sv) + (has_utf8 ?
2994 (512 - UTF_CONTINUATION_MARK +
2997 /* How many two-byte within 0..255: 128 in UTF-8,
2998 * 96 in UTF-8-mod. */
3000 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3002 d = SvPVX(sv) + i; /* refresh d after realloc */
3006 for (j = 0; j <= 1; j++) {
3007 char * const c = (char*)utf8_hop((U8*)d, -1);
3008 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3014 max = (U8)0xff; /* only to \xff */
3015 uvmax = uv; /* \x{100} to uvmax */
3017 d = c; /* eat endpoint chars */
3022 d -= 2; /* eat the first char and the - */
3023 min = (U8)*d; /* first char in range */
3024 max = (U8)d[1]; /* last char in range */
3032 "Invalid range \"%c-%c\" in transliteration operator",
3033 (char)min, (char)max);
3037 if (literal_endpoint == 2 &&
3038 ((isLOWER(min) && isLOWER(max)) ||
3039 (isUPPER(min) && isUPPER(max)))) {
3041 for (i = min; i <= max; i++)
3043 *d++ = NATIVE_TO_NEED(has_utf8,i);
3045 for (i = min; i <= max; i++)
3047 *d++ = NATIVE_TO_NEED(has_utf8,i);
3052 for (i = min; i <= max; i++)
3055 const U8 ch = (U8)NATIVE_TO_UTF(i);
3056 if (UNI_IS_INVARIANT(ch))
3059 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3060 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3069 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3071 *d++ = (char)UTF_TO_NATIVE(0xff);
3073 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3077 /* mark the range as done, and continue */
3081 literal_endpoint = 0;
3086 /* range begins (ignore - as first or last char) */
3087 else if (*s == '-' && s+1 < send && s != start) {
3090 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3097 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
3107 literal_endpoint = 0;
3108 native_range = TRUE;
3113 /* if we get here, we're not doing a transliteration */
3115 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3118 while (s1 >= start && *s1-- == '\\')
3121 in_charclass = TRUE;
3124 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3127 while (s1 >= start && *s1-- == '\\')
3130 in_charclass = FALSE;
3133 /* skip for regexp comments /(?#comment)/, except for the last
3134 * char, which will be done separately.
3135 * Stop on (?{..}) and friends */
3137 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
3139 while (s+1 < send && *s != ')')
3140 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3142 else if (!PL_lex_casemods && !in_charclass &&
3143 ( s[2] == '{' /* This should match regcomp.c */
3144 || (s[2] == '?' && s[3] == '{')))
3150 /* likewise skip #-initiated comments in //x patterns */
3151 else if (*s == '#' && PL_lex_inpat &&
3152 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3153 while (s+1 < send && *s != '\n')
3154 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3157 /* no further processing of single-quoted regex */
3158 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3159 goto default_action;
3161 /* check for embedded arrays
3162 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3164 else if (*s == '@' && s[1]) {
3165 if (isALNUM_lazy_if(s+1,UTF))
3167 if (strchr(":'{$", s[1]))
3169 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3170 break; /* in regexp, neither @+ nor @- are interpolated */
3173 /* check for embedded scalars. only stop if we're sure it's a
3176 else if (*s == '$') {
3177 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3179 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3181 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3182 "Possible unintended interpolation of $\\ in regex");
3184 break; /* in regexp, $ might be tail anchor */
3188 /* End of else if chain - OP_TRANS rejoin rest */
3191 if (*s == '\\' && s+1 < send) {
3192 char* e; /* Can be used for ending '}', etc. */
3196 /* warn on \1 - \9 in substitution replacements, but note that \11
3197 * is an octal; and \19 is \1 followed by '9' */
3198 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3199 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3201 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3206 /* string-change backslash escapes */
3207 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3211 /* In a pattern, process \N, but skip any other backslash escapes.
3212 * This is because we don't want to translate an escape sequence
3213 * into a meta symbol and have the regex compiler use the meta
3214 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3215 * in spite of this, we do have to process \N here while the proper
3216 * charnames handler is in scope. See bugs #56444 and #62056.
3217 * There is a complication because \N in a pattern may also stand
3218 * for 'match a non-nl', and not mean a charname, in which case its
3219 * processing should be deferred to the regex compiler. To be a
3220 * charname it must be followed immediately by a '{', and not look
3221 * like \N followed by a curly quantifier, i.e., not something like
3222 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3224 else if (PL_lex_inpat
3227 || regcurly(s + 1)))
3229 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3230 goto default_action;
3235 /* quoted - in transliterations */
3237 if (PL_lex_inwhat == OP_TRANS) {
3245 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3246 "Unrecognized escape \\%c passed through",
3248 /* default action is to copy the quoted character */
3249 goto default_action;
3252 /* eg. \132 indicates the octal constant 0132 */
3253 case '0': case '1': case '2': case '3':
3254 case '4': case '5': case '6': case '7':
3258 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3261 goto NUM_ESCAPE_INSERT;
3263 /* eg. \o{24} indicates the octal constant \024 */
3269 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3275 goto NUM_ESCAPE_INSERT;
3278 /* eg. \x24 indicates the hex constant 0x24 */
3284 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3293 /* Insert oct or hex escaped character. There will always be
3294 * enough room in sv since such escapes will be longer than any
3295 * UTF-8 sequence they can end up as, except if they force us
3296 * to recode the rest of the string into utf8 */
3298 /* Here uv is the ordinal of the next character being added in
3299 * unicode (converted from native). */
3300 if (!UNI_IS_INVARIANT(uv)) {
3301 if (!has_utf8 && uv > 255) {
3302 /* Might need to recode whatever we have accumulated so
3303 * far if it contains any chars variant in utf8 or
3306 SvCUR_set(sv, d - SvPVX_const(sv));
3309 /* See Note on sizing above. */
3310 sv_utf8_upgrade_flags_grow(sv,
3311 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3312 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3313 d = SvPVX(sv) + SvCUR(sv);
3318 d = (char*)uvuni_to_utf8((U8*)d, uv);
3319 if (PL_lex_inwhat == OP_TRANS &&
3320 PL_sublex_info.sub_op) {
3321 PL_sublex_info.sub_op->op_private |=
3322 (PL_lex_repl ? OPpTRANS_FROM_UTF
3326 if (uv > 255 && !dorange)
3327 native_range = FALSE;
3340 /* In a non-pattern \N must be a named character, like \N{LATIN
3341 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3342 * mean to match a non-newline. For non-patterns, named
3343 * characters are converted to their string equivalents. In
3344 * patterns, named characters are not converted to their
3345 * ultimate forms for the same reasons that other escapes
3346 * aren't. Instead, they are converted to the \N{U+...} form
3347 * to get the value from the charnames that is in effect right
3348 * now, while preserving the fact that it was a named character
3349 * so that the regex compiler knows this */
3351 /* This section of code doesn't generally use the
3352 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3353 * a close examination of this macro and determined it is a
3354 * no-op except on utfebcdic variant characters. Every
3355 * character generated by this that would normally need to be
3356 * enclosed by this macro is invariant, so the macro is not
3357 * needed, and would complicate use of copy(). XXX There are
3358 * other parts of this file where the macro is used
3359 * inconsistently, but are saved by it being a no-op */
3361 /* The structure of this section of code (besides checking for
3362 * errors and upgrading to utf8) is:
3363 * Further disambiguate between the two meanings of \N, and if
3364 * not a charname, go process it elsewhere
3365 * If of form \N{U+...}, pass it through if a pattern;
3366 * otherwise convert to utf8
3367 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3368 * pattern; otherwise convert to utf8 */
3370 /* Here, s points to the 'N'; the test below is guaranteed to
3371 * succeed if we are being called on a pattern as we already
3372 * know from a test above that the next character is a '{'.
3373 * On a non-pattern \N must mean 'named sequence, which
3374 * requires braces */
3377 yyerror("Missing braces on \\N{}");
3382 /* If there is no matching '}', it is an error. */
3383 if (! (e = strchr(s, '}'))) {
3384 if (! PL_lex_inpat) {
3385 yyerror("Missing right brace on \\N{}");
3387 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3392 /* Here it looks like a named character */
3394 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3395 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3396 | PERL_SCAN_DISALLOW_PREFIX;
3399 /* For \N{U+...}, the '...' is a unicode value even on
3400 * EBCDIC machines */
3401 s += 2; /* Skip to next char after the 'U+' */
3403 uv = grok_hex(s, &len, &flags, NULL);
3404 if (len == 0 || len != (STRLEN)(e - s)) {
3405 yyerror("Invalid hexadecimal number in \\N{U+...}");
3412 /* On non-EBCDIC platforms, pass through to the regex
3413 * compiler unchanged. The reason we evaluated the
3414 * number above is to make sure there wasn't a syntax
3415 * error. But on EBCDIC we convert to native so
3416 * downstream code can continue to assume it's native
3418 s -= 5; /* Include the '\N{U+' */
3420 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3423 (unsigned int) UNI_TO_NATIVE(uv));
3425 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3429 else { /* Not a pattern: convert the hex to string */
3431 /* If destination is not in utf8, unconditionally
3432 * recode it to be so. This is because \N{} implies
3433 * Unicode semantics, and scalars have to be in utf8
3434 * to guarantee those semantics */
3436 SvCUR_set(sv, d - SvPVX_const(sv));
3439 /* See Note on sizing above. */
3440 sv_utf8_upgrade_flags_grow(
3442 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3443 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3444 d = SvPVX(sv) + SvCUR(sv);
3448 /* Add the string to the output */
3449 if (UNI_IS_INVARIANT(uv)) {
3452 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3455 else /* Here is \N{NAME} but not \N{U+...}. */
3456 if ((res = get_and_check_backslash_N_name(s, e)))
3459 const char *str = SvPV_const(res, len);
3462 if (! len) { /* The name resolved to an empty string */
3463 Copy("\\N{}", d, 4, char);
3467 /* In order to not lose information for the regex
3468 * compiler, pass the result in the specially made
3469 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3470 * the code points in hex of each character
3471 * returned by charnames */
3473 const char *str_end = str + len;
3474 const STRLEN off = d - SvPVX_const(sv);
3476 if (! SvUTF8(res)) {
3477 /* For the non-UTF-8 case, we can determine the
3478 * exact length needed without having to parse
3479 * through the string. Each character takes up
3480 * 2 hex digits plus either a trailing dot or
3482 d = off + SvGROW(sv, off
3484 + 6 /* For the "\N{U+", and
3486 + (STRLEN)(send - e));
3487 Copy("\\N{U+", d, 5, char);
3489 while (str < str_end) {
3491 my_snprintf(hex_string, sizeof(hex_string),
3492 "%02X.", (U8) *str);
3493 Copy(hex_string, d, 3, char);
3497 d--; /* We will overwrite below the final
3498 dot with a right brace */
3501 STRLEN char_length; /* cur char's byte length */
3503 /* and the number of bytes after this is
3504 * translated into hex digits */
3505 STRLEN output_length;
3507 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3508 * for max('U+', '.'); and 1 for NUL */
3509 char hex_string[2 * UTF8_MAXBYTES + 5];
3511 /* Get the first character of the result. */
3512 U32 uv = utf8n_to_uvuni((U8 *) str,
3516 /* Convert first code point to hex, including
3517 * the boiler plate before it. For all these,
3518 * we convert to native format so that
3519 * downstream code can continue to assume the
3520 * input is native */
3522 my_snprintf(hex_string, sizeof(hex_string),
3524 (unsigned int) UNI_TO_NATIVE(uv));
3526 /* Make sure there is enough space to hold it */
3527 d = off + SvGROW(sv, off
3529 + (STRLEN)(send - e)
3530 + 2); /* '}' + NUL */
3532 Copy(hex_string, d, output_length, char);
3535 /* For each subsequent character, append dot and
3536 * its ordinal in hex */
3537 while ((str += char_length) < str_end) {
3538 const STRLEN off = d - SvPVX_const(sv);
3539 U32 uv = utf8n_to_uvuni((U8 *) str,
3544 my_snprintf(hex_string,
3547 (unsigned int) UNI_TO_NATIVE(uv));
3549 d = off + SvGROW(sv, off
3551 + (STRLEN)(send - e)
3552 + 2); /* '}' + NUL */
3553 Copy(hex_string, d, output_length, char);
3558 *d++ = '}'; /* Done. Add the trailing brace */
3561 else { /* Here, not in a pattern. Convert the name to a
3564 /* If destination is not in utf8, unconditionally
3565 * recode it to be so. This is because \N{} implies
3566 * Unicode semantics, and scalars have to be in utf8
3567 * to guarantee those semantics */
3569 SvCUR_set(sv, d - SvPVX_const(sv));
3572 /* See Note on sizing above. */
3573 sv_utf8_upgrade_flags_grow(sv,
3574 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3575 len + (STRLEN)(send - s) + 1);
3576 d = SvPVX(sv) + SvCUR(sv);
3578 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3580 /* See Note on sizing above. (NOTE: SvCUR() is not
3581 * set correctly here). */
3582 const STRLEN off = d - SvPVX_const(sv);
3583 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3585 Copy(str, d, len, char);
3591 } /* End \N{NAME} */
3594 native_range = FALSE; /* \N{} is defined to be Unicode */
3596 s = e + 1; /* Point to just after the '}' */
3599 /* \c is a control character */
3603 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3606 yyerror("Missing control char name in \\c");
3610 /* printf-style backslashes, formfeeds, newlines, etc */
3612 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3615 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3618 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3621 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3624 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3627 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3630 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3636 } /* end if (backslash) */
3643 /* If we started with encoded form, or already know we want it,
3644 then encode the next character */
3645 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3649 /* One might think that it is wasted effort in the case of the
3650 * source being utf8 (this_utf8 == TRUE) to take the next character
3651 * in the source, convert it to an unsigned value, and then convert
3652 * it back again. But the source has not been validated here. The
3653 * routine that does the conversion checks for errors like
3656 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3657 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3659 SvCUR_set(sv, d - SvPVX_const(sv));
3662 /* See Note on sizing above. */
3663 sv_utf8_upgrade_flags_grow(sv,
3664 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3665 need + (STRLEN)(send - s) + 1);
3666 d = SvPVX(sv) + SvCUR(sv);
3668 } else if (need > len) {
3669 /* encoded value larger than old, may need extra space (NOTE:
3670 * SvCUR() is not set correctly here). See Note on sizing
3672 const STRLEN off = d - SvPVX_const(sv);
3673 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3677 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3679 if (uv > 255 && !dorange)
3680 native_range = FALSE;
3684 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3686 } /* while loop to process each character */
3688 /* terminate the string and set up the sv */
3690 SvCUR_set(sv, d - SvPVX_const(sv));
3691 if (SvCUR(sv) >= SvLEN(sv))
3692 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3693 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3696 if (PL_encoding && !has_utf8) {
3697 sv_recode_to_utf8(sv, PL_encoding);
3703 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3704 PL_sublex_info.sub_op->op_private |=
3705 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3709 /* shrink the sv if we allocated more than we used */
3710 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3711 SvPV_shrink_to_cur(sv);
3714 /* return the substring (via pl_yylval) only if we parsed anything */
3715 if (s > PL_bufptr) {
3716 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3717 const char *const key = PL_lex_inpat ? "qr" : "q";
3718 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3722 if (PL_lex_inwhat == OP_TRANS) {
3725 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3728 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3736 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3739 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3746 * Returns TRUE if there's more to the expression (e.g., a subscript),
3749 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3751 * ->[ and ->{ return TRUE
3752 * { and [ outside a pattern are always subscripts, so return TRUE
3753 * if we're outside a pattern and it's not { or [, then return FALSE
3754 * if we're in a pattern and the first char is a {
3755 * {4,5} (any digits around the comma) returns FALSE
3756 * if we're in a pattern and the first char is a [
3758 * [SOMETHING] has a funky algorithm to decide whether it's a
3759 * character class or not. It has to deal with things like
3760 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3761 * anything else returns TRUE
3764 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3767 S_intuit_more(pTHX_ register char *s)
3771 PERL_ARGS_ASSERT_INTUIT_MORE;
3773 if (PL_lex_brackets)
3775 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3777 if (*s != '{' && *s != '[')
3782 /* In a pattern, so maybe we have {n,m}. */
3790 /* On the other hand, maybe we have a character class */
3793 if (*s == ']' || *s == '^')
3796 /* this is terrifying, and it works */
3797 int weight = 2; /* let's weigh the evidence */
3799 unsigned char un_char = 255, last_un_char;
3800 const char * const send = strchr(s,']');
3801 char tmpbuf[sizeof PL_tokenbuf * 4];
3803 if (!send) /* has to be an expression */
3806 Zero(seen,256,char);
3809 else if (isDIGIT(*s)) {
3811 if (isDIGIT(s[1]) && s[2] == ']')
3817 for (; s < send; s++) {
3818 last_un_char = un_char;
3819 un_char = (unsigned char)*s;
3824 weight -= seen[un_char] * 10;
3825 if (isALNUM_lazy_if(s+1,UTF)) {
3827 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3828 len = (int)strlen(tmpbuf);
3829 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3830 UTF ? SVf_UTF8 : 0, SVt_PV))
3835 else if (*s == '$' && s[1] &&
3836 strchr("[#!%*<>()-=",s[1])) {
3837 if (/*{*/ strchr("])} =",s[2]))
3846 if (strchr("wds]",s[1]))
3848 else if (seen[(U8)'\''] || seen[(U8)'"'])
3850 else if (strchr("rnftbxcav",s[1]))
3852 else if (isDIGIT(s[1])) {
3854 while (s[1] && isDIGIT(s[1]))
3864 if (strchr("aA01! ",last_un_char))
3866 if (strchr("zZ79~",s[1]))
3868 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3869 weight -= 5; /* cope with negative subscript */
3872 if (!isALNUM(last_un_char)
3873 && !(last_un_char == '$' || last_un_char == '@'
3874 || last_un_char == '&')
3875 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3880 if (keyword(tmpbuf, d - tmpbuf, 0))
3883 if (un_char == last_un_char + 1)
3885 weight -= seen[un_char];
3890 if (weight >= 0) /* probably a character class */
3900 * Does all the checking to disambiguate
3902 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3903 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3905 * First argument is the stuff after the first token, e.g. "bar".
3907 * Not a method if foo is a filehandle.
3908 * Not a method if foo is a subroutine prototyped to take a filehandle.
3909 * Not a method if it's really "Foo $bar"
3910 * Method if it's "foo $bar"
3911 * Not a method if it's really "print foo $bar"
3912 * Method if it's really "foo package::" (interpreted as package->foo)
3913 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3914 * Not a method if bar is a filehandle or package, but is quoted with
3919 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3922 char *s = start + (*start == '$');
3923 char tmpbuf[sizeof PL_tokenbuf];
3930 PERL_ARGS_ASSERT_INTUIT_METHOD;
3932 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3934 if (cv && SvPOK(cv)) {
3935 const char *proto = CvPROTO(cv);
3943 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3944 /* start is the beginning of the possible filehandle/object,
3945 * and s is the end of it
3946 * tmpbuf is a copy of it
3949 if (*start == '$') {
3950 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3951 isUPPER(*PL_tokenbuf))
3954 len = start - SvPVX(PL_linestr);
3958 start = SvPVX(PL_linestr) + len;
3962 return *s == '(' ? FUNCMETH : METHOD;
3964 if (!keyword(tmpbuf, len, 0)) {
3965 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3969 soff = s - SvPVX(PL_linestr);
3973 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3974 if (indirgv && GvCVu(indirgv))
3976 /* filehandle or package name makes it a method */
3977 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3979 soff = s - SvPVX(PL_linestr);
3982 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3983 return 0; /* no assumptions -- "=>" quotes bareword */
3985 start_force(PL_curforce);
3986 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3987 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3988 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3990 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3991 ( UTF ? SVf_UTF8 : 0 )));
3996 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3998 return *s == '(' ? FUNCMETH : METHOD;
4004 /* Encoded script support. filter_add() effectively inserts a
4005 * 'pre-processing' function into the current source input stream.
4006 * Note that the filter function only applies to the current source file
4007 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4009 * The datasv parameter (which may be NULL) can be used to pass
4010 * private data to this instance of the filter. The filter function
4011 * can recover the SV using the FILTER_DATA macro and use it to
4012 * store private buffers and state information.
4014 * The supplied datasv parameter is upgraded to a PVIO type
4015 * and the IoDIRP/IoANY field is used to store the function pointer,
4016 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4017 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4018 * private use must be set using malloc'd pointers.
4022 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4031 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4032 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4034 if (!PL_rsfp_filters)
4035 PL_rsfp_filters = newAV();
4038 SvUPGRADE(datasv, SVt_PVIO);
4039 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4040 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4041 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4042 FPTR2DPTR(void *, IoANY(datasv)),
4043 SvPV_nolen(datasv)));
4044 av_unshift(PL_rsfp_filters, 1);
4045 av_store(PL_rsfp_filters, 0, datasv) ;
4047 !PL_parser->filtered
4048 && PL_parser->lex_flags & LEX_EVALBYTES
4049 && PL_bufptr < PL_bufend
4051 const char *s = PL_bufptr;
4052 while (s < PL_bufend) {
4054 SV *linestr = PL_parser->linestr;
4055 char *buf = SvPVX(linestr);
4056 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4057 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4058 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4059 STRLEN const linestart_pos = PL_parser->linestart - buf;
4060 STRLEN const last_uni_pos =
4061 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4062 STRLEN const last_lop_pos =
4063 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4064 av_push(PL_rsfp_filters, linestr);
4065 PL_parser->linestr =
4066 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4067 buf = SvPVX(PL_parser->linestr);
4068 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4069 PL_parser->bufptr = buf + bufptr_pos;
4070 PL_parser->oldbufptr = buf + oldbufptr_pos;
4071 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4072 PL_parser->linestart = buf + linestart_pos;
4073 if (PL_parser->last_uni)
4074 PL_parser->last_uni = buf + last_uni_pos;
4075 if (PL_parser->last_lop)
4076 PL_parser->last_lop = buf + last_lop_pos;
4077 SvLEN(linestr) = SvCUR(linestr);
4078 SvCUR(linestr) = s-SvPVX(linestr);
4079 PL_parser->filtered = 1;
4089 /* Delete most recently added instance of this filter function. */
4091 Perl_filter_del(pTHX_ filter_t funcp)
4096 PERL_ARGS_ASSERT_FILTER_DEL;
4099 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4100 FPTR2DPTR(void*, funcp)));
4102 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4104 /* if filter is on top of stack (usual case) just pop it off */
4105 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4106 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4107 sv_free(av_pop(PL_rsfp_filters));
4111 /* we need to search for the correct entry and clear it */
4112 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4116 /* Invoke the idxth filter function for the current rsfp. */
4117 /* maxlen 0 = read one text line */
4119 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4124 /* This API is bad. It should have been using unsigned int for maxlen.
4125 Not sure if we want to change the API, but if not we should sanity
4126 check the value here. */
4127 unsigned int correct_length
4136 PERL_ARGS_ASSERT_FILTER_READ;
4138 if (!PL_parser || !PL_rsfp_filters)
4140 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4141 /* Provide a default input filter to make life easy. */
4142 /* Note that we append to the line. This is handy. */
4143 DEBUG_P(PerlIO_printf(Perl_debug_log,
4144 "filter_read %d: from rsfp\n", idx));
4145 if (correct_length) {
4148 const int old_len = SvCUR(buf_sv);
4150 /* ensure buf_sv is large enough */
4151 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4152 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4153 correct_length)) <= 0) {
4154 if (PerlIO_error(PL_rsfp))
4155 return -1; /* error */
4157 return 0 ; /* end of file */
4159 SvCUR_set(buf_sv, old_len + len) ;
4160 SvPVX(buf_sv)[old_len + len] = '\0';
4163 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4164 if (PerlIO_error(PL_rsfp))
4165 return -1; /* error */
4167 return 0 ; /* end of file */
4170 return SvCUR(buf_sv);
4172 /* Skip this filter slot if filter has been deleted */
4173 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4174 DEBUG_P(PerlIO_printf(Perl_debug_log,
4175 "filter_read %d: skipped (filter deleted)\n",
4177 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4179 if (SvTYPE(datasv) != SVt_PVIO) {
4180 if (correct_length) {
4182 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4183 if (!remainder) return 0; /* eof */
4184 if (correct_length > remainder) correct_length = remainder;
4185 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4186 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4189 const char *s = SvEND(datasv);
4190 const char *send = SvPVX(datasv) + SvLEN(datasv);
4198 if (s == send) return 0; /* eof */
4199 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4200 SvCUR_set(datasv, s-SvPVX(datasv));
4202 return SvCUR(buf_sv);
4204 /* Get function pointer hidden within datasv */
4205 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4206 DEBUG_P(PerlIO_printf(Perl_debug_log,
4207 "filter_read %d: via function %p (%s)\n",
4208 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4209 /* Call function. The function is expected to */
4210 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4211 /* Return: <0:error, =0:eof, >0:not eof */
4212 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4216 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4220 PERL_ARGS_ASSERT_FILTER_GETS;
4222 #ifdef PERL_CR_FILTER
4223 if (!PL_rsfp_filters) {
4224 filter_add(S_cr_textfilter,NULL);
4227 if (PL_rsfp_filters) {
4229 SvCUR_set(sv, 0); /* start with empty line */
4230 if (FILTER_READ(0, sv, 0) > 0)
4231 return ( SvPVX(sv) ) ;
4236 return (sv_gets(sv, PL_rsfp, append));
4240 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4245 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4247 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4251 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4252 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4254 return GvHV(gv); /* Foo:: */
4257 /* use constant CLASS => 'MyClass' */
4258 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4259 if (gv && GvCV(gv)) {
4260 SV * const sv = cv_const_sv(GvCV(gv));
4262 pkgname = SvPV_const(sv, len);
4265 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4269 * S_readpipe_override
4270 * Check whether readpipe() is overridden, and generates the appropriate
4271 * optree, provided sublex_start() is called afterwards.
4274 S_readpipe_override(pTHX)
4277 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4278 pl_yylval.ival = OP_BACKTICK;
4280 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4282 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4283 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4284 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4286 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4287 op_append_elem(OP_LIST,
4288 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4289 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4296 * The intent of this yylex wrapper is to minimize the changes to the
4297 * tokener when we aren't interested in collecting madprops. It remains
4298 * to be seen how successful this strategy will be...
4305 char *s = PL_bufptr;
4307 /* make sure PL_thiswhite is initialized */
4311 /* previous token ate up our whitespace? */
4312 if (!PL_lasttoke && PL_nextwhite) {
4313 PL_thiswhite = PL_nextwhite;
4317 /* isolate the token, and figure out where it is without whitespace */
4318 PL_realtokenstart = -1;
4322 assert(PL_curforce < 0);
4324 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4325 if (!PL_thistoken) {
4326 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4327 PL_thistoken = newSVpvs("");
4329 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4330 PL_thistoken = newSVpvn(tstart, s - tstart);
4333 if (PL_thismad) /* install head */
4334 CURMAD('X', PL_thistoken);
4337 /* last whitespace of a sublex? */
4338 if (optype == ')' && PL_endwhite) {
4339 CURMAD('X', PL_endwhite);
4344 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4345 if (!PL_thiswhite && !PL_endwhite && !optype) {
4346 sv_free(PL_thistoken);
4351 /* put off final whitespace till peg */
4352 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4353 PL_nextwhite = PL_thiswhite;
4356 else if (PL_thisopen) {
4357 CURMAD('q', PL_thisopen);
4359 sv_free(PL_thistoken);
4363 /* Store actual token text as madprop X */
4364 CURMAD('X', PL_thistoken);
4368 /* add preceding whitespace as madprop _ */
4369 CURMAD('_', PL_thiswhite);
4373 /* add quoted material as madprop = */
4374 CURMAD('=', PL_thisstuff);
4378 /* add terminating quote as madprop Q */
4379 CURMAD('Q', PL_thisclose);
4383 /* special processing based on optype */
4387 /* opval doesn't need a TOKEN since it can already store mp */
4397 if (pl_yylval.opval)
4398 append_madprops(PL_thismad, pl_yylval.opval, 0);
4406 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4419 /* remember any fake bracket that lexer is about to discard */
4420 if (PL_lex_brackets == 1 &&
4421 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4424 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4427 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4428 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4431 break; /* don't bother looking for trailing comment */
4440 /* attach a trailing comment to its statement instead of next token */
4444 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4446 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4448 if (*s == '\n' || *s == '#') {
4449 while (s < PL_bufend && *s != '\n')
4453 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4454 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4467 /* Create new token struct. Note: opvals return early above. */
4468 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4475 S_tokenize_use(pTHX_ int is_use, char *s) {
4478 PERL_ARGS_ASSERT_TOKENIZE_USE;
4480 if (PL_expect != XSTATE)
4481 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4482 is_use ? "use" : "no"));
4485 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4486 s = force_version(s, TRUE);
4487 if (*s == ';' || *s == '}'
4488 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4489 start_force(PL_curforce);
4490 NEXTVAL_NEXTTOKE.opval = NULL;
4493 else if (*s == 'v') {
4494 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4495 s = force_version(s, FALSE);
4499 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4500 s = force_version(s, FALSE);
4502 pl_yylval.ival = is_use;
4506 static const char* const exp_name[] =
4507 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4508 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4512 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4514 S_word_takes_any_delimeter(char *p, STRLEN len)
4516 return (len == 1 && strchr("msyq", p[0])) ||
4518 (p[0] == 't' && p[1] == 'r') ||
4519 (p[0] == 'q' && strchr("qwxr", p[1]))));
4525 Works out what to call the token just pulled out of the input
4526 stream. The yacc parser takes care of taking the ops we return and
4527 stitching them into a tree.
4530 The type of the next token
4533 Switch based on the current state:
4534 - if we already built the token before, use it
4535 - if we have a case modifier in a string, deal with that
4536 - handle other cases of interpolation inside a string
4537 - scan the next line if we are inside a format
4538 In the normal state switch on the next character:
4540 if alphabetic, go to key lookup
4541 unrecoginized character - croak
4542 - 0/4/26: handle end-of-line or EOF
4543 - cases for whitespace
4544 - \n and #: handle comments and line numbers
4545 - various operators, brackets and sigils
4548 - 'v': vstrings (or go to key lookup)
4549 - 'x' repetition operator (or go to key lookup)
4550 - other ASCII alphanumerics (key lookup begins here):
4553 scan built-in keyword (but do nothing with it yet)
4554 check for statement label
4555 check for lexical subs
4556 goto just_a_word if there is one
4557 see whether built-in keyword is overridden
4558 switch on keyword number:
4559 - default: just_a_word:
4560 not a built-in keyword; handle bareword lookup
4561 disambiguate between method and sub call
4562 fall back to bareword
4563 - cases for built-in keywords
4568 #pragma segment Perl_yylex
4574 char *s = PL_bufptr;
4581 /* orig_keyword, gvp, and gv are initialized here because
4582 * jump to the label just_a_word_zero can bypass their
4583 * initialization later. */
4584 I32 orig_keyword = 0;
4589 SV* tmp = newSVpvs("");
4590 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4591 (IV)CopLINE(PL_curcop),
4592 lex_state_names[PL_lex_state],
4593 exp_name[PL_expect],
4594 pv_display(tmp, s, strlen(s), 0, 60));
4598 switch (PL_lex_state) {
4600 case LEX_NORMAL: /* Some compilers will produce faster */
4601 case LEX_INTERPNORMAL: /* code if we comment these out. */
4605 /* when we've already built the next token, just pull it out of the queue */
4609 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4611 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4612 PL_nexttoke[PL_lasttoke].next_mad = 0;
4613 if (PL_thismad && PL_thismad->mad_key == '_') {
4614 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4615 PL_thismad->mad_val = 0;
4616 mad_free(PL_thismad);
4621 PL_lex_state = PL_lex_defer;
4622 PL_expect = PL_lex_expect;
4623 PL_lex_defer = LEX_NORMAL;
4624 if (!PL_nexttoke[PL_lasttoke].next_type)
4629 pl_yylval = PL_nextval[PL_nexttoke];
4631 PL_lex_state = PL_lex_defer;
4632 PL_expect = PL_lex_expect;
4633 PL_lex_defer = LEX_NORMAL;
4639 next_type = PL_nexttoke[PL_lasttoke].next_type;
4641 next_type = PL_nexttype[PL_nexttoke];
4643 if (next_type & (7<<24)) {
4644 if (next_type & (1<<24)) {
4645 if (PL_lex_brackets > 100)
4646 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4647 PL_lex_brackstack[PL_lex_brackets++] =
4648 (char) ((next_type >> 16) & 0xff);
4650 if (next_type & (2<<24))
4651 PL_lex_allbrackets++;
4652 if (next_type & (4<<24))
4653 PL_lex_allbrackets--;
4654 next_type &= 0xffff;
4656 if (S_is_opval_token(next_type) && pl_yylval.opval)
4657 pl_yylval.opval->op_savefree = 0; /* release */
4658 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4661 /* interpolated case modifiers like \L \U, including \Q and \E.
4662 when we get here, PL_bufptr is at the \
4664 case LEX_INTERPCASEMOD:
4666 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4668 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4669 PL_bufptr, PL_bufend, *PL_bufptr);
4671 /* handle \E or end of string */
4672 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4674 if (PL_lex_casemods) {
4675 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4676 PL_lex_casestack[PL_lex_casemods] = '\0';
4678 if (PL_bufptr != PL_bufend
4679 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4680 || oldmod == 'F')) {
4682 PL_lex_state = LEX_INTERPCONCAT;
4685 PL_thistoken = newSVpvs("\\E");
4688 PL_lex_allbrackets--;
4691 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4692 /* Got an unpaired \E */
4693 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4694 "Useless use of \\E");
4697 while (PL_bufptr != PL_bufend &&
4698 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4700 PL_thiswhite = newSVpvs("");
4701 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4705 if (PL_bufptr != PL_bufend)
4708 PL_lex_state = LEX_INTERPCONCAT;
4712 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4713 "### Saw case modifier\n"); });
4715 if (s[1] == '\\' && s[2] == 'E') {
4718 PL_thiswhite = newSVpvs("");
4719 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4722 PL_lex_state = LEX_INTERPCONCAT;
4727 if (!PL_madskills) /* when just compiling don't need correct */
4728 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4729 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4730 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4731 (strchr(PL_lex_casestack, 'L')
4732 || strchr(PL_lex_casestack, 'U')
4733 || strchr(PL_lex_casestack, 'F'))) {
4734 PL_lex_casestack[--PL_lex_casemods] = '\0';
4735 PL_lex_allbrackets--;
4738 if (PL_lex_casemods > 10)
4739 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4740 PL_lex_casestack[PL_lex_casemods++] = *s;
4741 PL_lex_casestack[PL_lex_casemods] = '\0';
4742 PL_lex_state = LEX_INTERPCONCAT;
4743 start_force(PL_curforce);
4744 NEXTVAL_NEXTTOKE.ival = 0;
4745 force_next((2<<24)|'(');
4746 start_force(PL_curforce);
4748 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4750 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4752 NEXTVAL_NEXTTOKE.ival = OP_LC;
4754 NEXTVAL_NEXTTOKE.ival = OP_UC;
4756 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4758 NEXTVAL_NEXTTOKE.ival = OP_FC;
4760 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4762 SV* const tmpsv = newSVpvs("\\ ");
4763 /* replace the space with the character we want to escape
4765 SvPVX(tmpsv)[1] = *s;
4771 if (PL_lex_starts) {
4777 sv_free(PL_thistoken);
4778 PL_thistoken = newSVpvs("");
4781 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4782 if (PL_lex_casemods == 1 && PL_lex_inpat)
4791 case LEX_INTERPPUSH:
4792 return REPORT(sublex_push());
4794 case LEX_INTERPSTART:
4795 if (PL_bufptr == PL_bufend)
4796 return REPORT(sublex_done());
4797 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4798 "### Interpolated variable\n"); });
4800 PL_lex_dojoin = (*PL_bufptr == '@');
4801 PL_lex_state = LEX_INTERPNORMAL;
4802 if (PL_lex_dojoin) {
4803 start_force(PL_curforce);
4804 NEXTVAL_NEXTTOKE.ival = 0;
4806 start_force(PL_curforce);
4807 force_ident("\"", '$');
4808 start_force(PL_curforce);
4809 NEXTVAL_NEXTTOKE.ival = 0;
4811 start_force(PL_curforce);
4812 NEXTVAL_NEXTTOKE.ival = 0;
4813 force_next((2<<24)|'(');
4814 start_force(PL_curforce);
4815 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4818 /* Convert (?{...}) and friends to 'do {...}' */
4819 if (PL_lex_inpat && *PL_bufptr == '(') {
4820 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4822 if (*PL_bufptr != '{')
4824 start_force(PL_curforce);
4825 /* XXX probably need a CURMAD(something) here */
4826 PL_expect = XTERMBLOCK;
4830 if (PL_lex_starts++) {
4835 sv_free(PL_thistoken);
4836 PL_thistoken = newSVpvs("");
4839 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4840 if (!PL_lex_casemods && PL_lex_inpat)
4847 case LEX_INTERPENDMAYBE:
4848 if (intuit_more(PL_bufptr)) {
4849 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4855 if (PL_lex_dojoin) {
4856 PL_lex_dojoin = FALSE;
4857 PL_lex_state = LEX_INTERPCONCAT;
4861 sv_free(PL_thistoken);
4862 PL_thistoken = newSVpvs("");
4865 PL_lex_allbrackets--;
4868 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4869 && SvEVALED(PL_lex_repl))
4871 if (PL_bufptr != PL_bufend)
4872 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4875 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4876 re_eval_str. If the here-doc body’s length equals the previous
4877 value of re_eval_start, re_eval_start will now be null. So
4878 check re_eval_str as well. */
4879 if (PL_parser->lex_shared->re_eval_start
4880 || PL_parser->lex_shared->re_eval_str) {
4882 if (*PL_bufptr != ')')
4883 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4885 /* having compiled a (?{..}) expression, return the original
4886 * text too, as a const */
4887 if (PL_parser->lex_shared->re_eval_str) {
4888 sv = PL_parser->lex_shared->re_eval_str;
4889 PL_parser->lex_shared->re_eval_str = NULL;
4891 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4892 SvPV_shrink_to_cur(sv);
4894 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4895 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4896 start_force(PL_curforce);
4897 /* XXX probably need a CURMAD(something) here */
4898 NEXTVAL_NEXTTOKE.opval =
4899 (OP*)newSVOP(OP_CONST, 0,
4902 PL_parser->lex_shared->re_eval_start = NULL;
4908 case LEX_INTERPCONCAT:
4910 if (PL_lex_brackets)
4911 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4912 (long) PL_lex_brackets);
4914 if (PL_bufptr == PL_bufend)
4915 return REPORT(sublex_done());
4917 /* m'foo' still needs to be parsed for possible (?{...}) */
4918 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4919 SV *sv = newSVsv(PL_linestr);
4921 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4925 s = scan_const(PL_bufptr);
4927 PL_lex_state = LEX_INTERPCASEMOD;
4929 PL_lex_state = LEX_INTERPSTART;
4932 if (s != PL_bufptr) {
4933 start_force(PL_curforce);
4935 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4937 NEXTVAL_NEXTTOKE = pl_yylval;
4940 if (PL_lex_starts++) {
4944 sv_free(PL_thistoken);
4945 PL_thistoken = newSVpvs("");
4948 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4949 if (!PL_lex_casemods && PL_lex_inpat)
4962 s = scan_formline(PL_bufptr);
4963 if (!PL_lex_formbrack)
4973 PL_oldoldbufptr = PL_oldbufptr;
4979 sv_free(PL_thistoken);
4982 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4986 if (isIDFIRST_lazy_if(s,UTF))
4989 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4990 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4992 SVs_TEMP | SVf_UTF8),
4993 10, UNI_DISPLAY_ISPRINT))
4994 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4995 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4996 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4997 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5005 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5009 goto fake_eof; /* emulate EOF on ^D or ^Z */
5015 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5018 if (PL_lex_brackets &&
5019 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5020 yyerror((const char *)
5022 ? "Format not terminated"
5023 : "Missing right curly or square bracket"));
5025 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5026 "### Tokener got EOF\n");
5030 if (s++ < PL_bufend)
5031 goto retry; /* ignore stray nulls */
5034 if (!PL_in_eval && !PL_preambled) {
5035 PL_preambled = TRUE;
5041 /* Generate a string of Perl code to load the debugger.
5042 * If PERL5DB is set, it will return the contents of that,
5043 * otherwise a compile-time require of perl5db.pl. */
5045 const char * const pdb = PerlEnv_getenv("PERL5DB");
5048 sv_setpv(PL_linestr, pdb);
5049 sv_catpvs(PL_linestr,";");
5051 SETERRNO(0,SS_NORMAL);
5052 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5055 sv_setpvs(PL_linestr,"");
5056 if (PL_preambleav) {
5057 SV **svp = AvARRAY(PL_preambleav);
5058 SV **const end = svp + AvFILLp(PL_preambleav);
5060 sv_catsv(PL_linestr, *svp);
5062 sv_catpvs(PL_linestr, ";");
5064 sv_free(MUTABLE_SV(PL_preambleav));
5065 PL_preambleav = NULL;
5068 sv_catpvs(PL_linestr,
5069 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5070 if (PL_minus_n || PL_minus_p) {
5071 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5073 sv_catpvs(PL_linestr,"chomp;");
5076 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5077 || *PL_splitstr == '"')
5078 && strchr(PL_splitstr + 1, *PL_splitstr))
5079 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5081 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5082 bytes can be used as quoting characters. :-) */
5083 const char *splits = PL_splitstr;
5084 sv_catpvs(PL_linestr, "our @F=split(q\0");
5087 if (*splits == '\\')
5088 sv_catpvn(PL_linestr, splits, 1);
5089 sv_catpvn(PL_linestr, splits, 1);
5090 } while (*splits++);
5091 /* This loop will embed the trailing NUL of
5092 PL_linestr as the last thing it does before
5094 sv_catpvs(PL_linestr, ");");
5098 sv_catpvs(PL_linestr,"our @F=split(' ');");
5101 sv_catpvs(PL_linestr, "\n");
5102 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5103 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5104 PL_last_lop = PL_last_uni = NULL;
5105 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5106 update_debugger_info(PL_linestr, NULL, 0);
5111 bof = PL_rsfp ? TRUE : FALSE;
5114 fake_eof = LEX_FAKE_EOF;
5116 PL_bufptr = PL_bufend;
5117 COPLINE_INC_WITH_HERELINES;
5118 if (!lex_next_chunk(fake_eof)) {
5119 CopLINE_dec(PL_curcop);
5121 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5123 CopLINE_dec(PL_curcop);
5126 PL_realtokenstart = -1;
5129 /* If it looks like the start of a BOM or raw UTF-16,
5130 * check if it in fact is. */
5131 if (bof && PL_rsfp &&
5136 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5137 bof = (offset == (Off_t)SvCUR(PL_linestr));
5138 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5139 /* offset may include swallowed CR */
5141 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5144 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5145 s = swallow_bom((U8*)s);
5148 if (PL_parser->in_pod) {
5149 /* Incest with pod. */
5152 sv_catsv(PL_thiswhite, PL_linestr);
5154 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5155 sv_setpvs(PL_linestr, "");
5156 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5157 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5158 PL_last_lop = PL_last_uni = NULL;
5159 PL_parser->in_pod = 0;
5162 if (PL_rsfp || PL_parser->filtered)
5164 } while (PL_parser->in_pod);
5165 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5166 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5167 PL_last_lop = PL_last_uni = NULL;
5168 if (CopLINE(PL_curcop) == 1) {
5169 while (s < PL_bufend && isSPACE(*s))
5171 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5175 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5179 if (*s == '#' && *(s+1) == '!')
5181 #ifdef ALTERNATE_SHEBANG
5183 static char const as[] = ALTERNATE_SHEBANG;
5184 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5185 d = s + (sizeof(as) - 1);
5187 #endif /* ALTERNATE_SHEBANG */
5196 while (*d && !isSPACE(*d))
5200 #ifdef ARG_ZERO_IS_SCRIPT
5201 if (ipathend > ipath) {
5203 * HP-UX (at least) sets argv[0] to the script name,
5204 * which makes $^X incorrect. And Digital UNIX and Linux,
5205 * at least, set argv[0] to the basename of the Perl
5206 * interpreter. So, having found "#!", we'll set it right.
5208 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5210 assert(SvPOK(x) || SvGMAGICAL(x));
5211 if (sv_eq(x, CopFILESV(PL_curcop))) {
5212 sv_setpvn(x, ipath, ipathend - ipath);
5218 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5219 const char * const lstart = SvPV_const(x,llen);
5221 bstart += blen - llen;
5222 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5223 sv_setpvn(x, ipath, ipathend - ipath);
5228 TAINT_NOT; /* $^X is always tainted, but that's OK */
5230 #endif /* ARG_ZERO_IS_SCRIPT */
5235 d = instr(s,"perl -");
5237 d = instr(s,"perl");
5239 /* avoid getting into infinite loops when shebang
5240 * line contains "Perl" rather than "perl" */
5242 for (d = ipathend-4; d >= ipath; --d) {
5243 if ((*d == 'p' || *d == 'P')
5244 && !ibcmp(d, "perl", 4))
5254 #ifdef ALTERNATE_SHEBANG
5256 * If the ALTERNATE_SHEBANG on this system starts with a
5257 * character that can be part of a Perl expression, then if
5258 * we see it but not "perl", we're probably looking at the
5259 * start of Perl code, not a request to hand off to some
5260 * other interpreter. Similarly, if "perl" is there, but
5261 * not in the first 'word' of the line, we assume the line
5262 * contains the start of the Perl program.
5264 if (d && *s != '#') {
5265 const char *c = ipath;
5266 while (*c && !strchr("; \t\r\n\f\v#", *c))
5269 d = NULL; /* "perl" not in first word; ignore */
5271 *s = '#'; /* Don't try to parse shebang line */
5273 #endif /* ALTERNATE_SHEBANG */
5278 !instr(s,"indir") &&
5279 instr(PL_origargv[0],"perl"))
5286 while (s < PL_bufend && isSPACE(*s))
5288 if (s < PL_bufend) {
5289 Newx(newargv,PL_origargc+3,char*);
5291 while (s < PL_bufend && !isSPACE(*s))
5294 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5297 newargv = PL_origargv;
5300 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5302 Perl_croak(aTHX_ "Can't exec %s", ipath);
5305 while (*d && !isSPACE(*d))
5307 while (SPACE_OR_TAB(*d))
5311 const bool switches_done = PL_doswitches;
5312 const U32 oldpdb = PL_perldb;
5313 const bool oldn = PL_minus_n;
5314 const bool oldp = PL_minus_p;
5318 bool baduni = FALSE;
5320 const char *d2 = d1 + 1;
5321 if (parse_unicode_opts((const char **)&d2)
5325 if (baduni || *d1 == 'M' || *d1 == 'm') {
5326 const char * const m = d1;
5327 while (*d1 && !isSPACE(*d1))
5329 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5332 d1 = moreswitches(d1);
5334 if (PL_doswitches && !switches_done) {
5335 int argc = PL_origargc;
5336 char **argv = PL_origargv;
5339 } while (argc && argv[0][0] == '-' && argv[0][1]);
5340 init_argv_symbols(argc,argv);
5342 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5343 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5344 /* if we have already added "LINE: while (<>) {",
5345 we must not do it again */
5347 sv_setpvs(PL_linestr, "");
5348 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5349 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5350 PL_last_lop = PL_last_uni = NULL;
5351 PL_preambled = FALSE;
5352 if (PERLDB_LINE || PERLDB_SAVESRC)
5353 (void)gv_fetchfile(PL_origfilename);
5360 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5361 PL_lex_state = LEX_FORMLINE;
5362 start_force(PL_curforce);
5363 NEXTVAL_NEXTTOKE.ival = 0;
5364 force_next(FORMRBRACK);
5369 #ifdef PERL_STRICT_CR
5370 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5372 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5374 case ' ': case '\t': case '\f': case 013:
5376 PL_realtokenstart = -1;
5378 PL_thiswhite = newSVpvs("");
5379 sv_catpvn(PL_thiswhite, s, 1);
5386 PL_realtokenstart = -1;
5390 if (PL_lex_state != LEX_NORMAL ||
5391 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5392 if (*s == '#' && s == PL_linestart && PL_in_eval
5393 && !PL_rsfp && !PL_parser->filtered) {
5394 /* handle eval qq[#line 1 "foo"\n ...] */
5395 CopLINE_dec(PL_curcop);
5398 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5400 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5404 const bool in_comment = *s == '#';
5406 while (d < PL_bufend && *d != '\n')
5410 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5411 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5415 PL_thiswhite = newSVpvn(s, d - s);
5418 if (in_comment && d == PL_bufend
5419 && PL_lex_state == LEX_INTERPNORMAL
5420 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5421 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5424 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5425 PL_lex_state = LEX_FORMLINE;
5426 start_force(PL_curforce);
5427 NEXTVAL_NEXTTOKE.ival = 0;
5428 force_next(FORMRBRACK);
5434 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5435 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5438 TOKEN(PEG); /* make sure any #! line is accessible */
5443 /* if (PL_madskills && PL_lex_formbrack) { */
5445 while (d < PL_bufend && *d != '\n')
5449 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5450 Perl_croak(aTHX_ "panic: input overflow");
5451 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5453 PL_thiswhite = newSVpvs("");
5454 if (CopLINE(PL_curcop) == 1) {
5455 sv_setpvs(PL_thiswhite, "");
5458 sv_catpvn(PL_thiswhite, s, d - s);
5472 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5480 while (s < PL_bufend && SPACE_OR_TAB(*s))
5483 if (strnEQ(s,"=>",2)) {
5484 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5485 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5486 OPERATOR('-'); /* unary minus */
5488 PL_last_uni = PL_oldbufptr;
5490 case 'r': ftst = OP_FTEREAD; break;
5491 case 'w': ftst = OP_FTEWRITE; break;
5492 case 'x': ftst = OP_FTEEXEC; break;
5493 case 'o': ftst = OP_FTEOWNED; break;
5494 case 'R': ftst = OP_FTRREAD; break;
5495 case 'W': ftst = OP_FTRWRITE; break;
5496 case 'X': ftst = OP_FTREXEC; break;
5497 case 'O': ftst = OP_FTROWNED; break;
5498 case 'e': ftst = OP_FTIS; break;
5499 case 'z': ftst = OP_FTZERO; break;
5500 case 's': ftst = OP_FTSIZE; break;
5501 case 'f': ftst = OP_FTFILE; break;
5502 case 'd': ftst = OP_FTDIR; break;
5503 case 'l': ftst = OP_FTLINK; break;
5504 case 'p': ftst = OP_FTPIPE; break;
5505 case 'S': ftst = OP_FTSOCK; break;
5506 case 'u': ftst = OP_FTSUID; break;
5507 case 'g': ftst = OP_FTSGID; break;
5508 case 'k': ftst = OP_FTSVTX; break;
5509 case 'b': ftst = OP_FTBLK; break;
5510 case 'c': ftst = OP_FTCHR; break;
5511 case 't': ftst = OP_FTTTY; break;
5512 case 'T': ftst = OP_FTTEXT; break;
5513 case 'B': ftst = OP_FTBINARY; break;
5514 case 'M': case 'A': case 'C':
5515 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5517 case 'M': ftst = OP_FTMTIME; break;
5518 case 'A': ftst = OP_FTATIME; break;
5519 case 'C': ftst = OP_FTCTIME; break;
5527 PL_last_lop_op = (OPCODE)ftst;
5528 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5529 "### Saw file test %c\n", (int)tmp);
5534 /* Assume it was a minus followed by a one-letter named
5535 * subroutine call (or a -bareword), then. */
5536 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5537 "### '-%c' looked like a file test but was not\n",
5544 const char tmp = *s++;
5547 if (PL_expect == XOPERATOR)
5552 else if (*s == '>') {
5555 if (isIDFIRST_lazy_if(s,UTF)) {
5556 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5564 if (PL_expect == XOPERATOR) {
5565 if (*s == '=' && !PL_lex_allbrackets &&
5566 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5573 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5575 OPERATOR('-'); /* unary minus */
5581 const char tmp = *s++;
5584 if (PL_expect == XOPERATOR)
5589 if (PL_expect == XOPERATOR) {
5590 if (*s == '=' && !PL_lex_allbrackets &&
5591 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5598 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5605 if (PL_expect != XOPERATOR) {
5606 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5607 PL_expect = XOPERATOR;
5608 force_ident(PL_tokenbuf, '*');
5616 if (*s == '=' && !PL_lex_allbrackets &&
5617 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5623 if (*s == '=' && !PL_lex_allbrackets &&
5624 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5631 if (PL_expect == XOPERATOR) {
5632 if (s[1] == '=' && !PL_lex_allbrackets &&
5633 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5638 PL_tokenbuf[0] = '%';
5639 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5640 sizeof PL_tokenbuf - 1, FALSE);
5641 if (!PL_tokenbuf[1]) {
5644 PL_expect = XOPERATOR;
5645 force_ident_maybe_lex('%');
5649 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5650 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5655 if (PL_lex_brackets > 100)
5656 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5657 PL_lex_brackstack[PL_lex_brackets++] = 0;
5658 PL_lex_allbrackets++;
5660 const char tmp = *s++;
5665 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5667 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5675 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5682 goto just_a_word_zero_gv;
5685 switch (PL_expect) {
5691 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5693 PL_bufptr = s; /* update in case we back off */
5696 "Use of := for an empty attribute list is not allowed");
5703 PL_expect = XTERMBLOCK;
5706 stuffstart = s - SvPVX(PL_linestr) - 1;
5710 while (isIDFIRST_lazy_if(s,UTF)) {
5713 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5714 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5715 if (tmp < 0) tmp = -tmp;
5730 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5732 d = scan_str(d,TRUE,TRUE,FALSE);
5734 /* MUST advance bufptr here to avoid bogus
5735 "at end of line" context messages from yyerror().
5737 PL_bufptr = s + len;
5738 yyerror("Unterminated attribute parameter in attribute list");
5742 return REPORT(0); /* EOF indicator */
5746 sv_catsv(sv, PL_lex_stuff);
5747 attrs = op_append_elem(OP_LIST, attrs,
5748 newSVOP(OP_CONST, 0, sv));
5749 SvREFCNT_dec(PL_lex_stuff);
5750 PL_lex_stuff = NULL;
5753 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5755 if (PL_in_my == KEY_our) {
5756 deprecate(":unique");
5759 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5762 /* NOTE: any CV attrs applied here need to be part of
5763 the CVf_BUILTIN_ATTRS define in cv.h! */
5764 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5766 CvLVALUE_on(PL_compcv);
5768 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5770 deprecate(":locked");
5772 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5774 CvMETHOD_on(PL_compcv);
5776 /* After we've set the flags, it could be argued that
5777 we don't need to do the attributes.pm-based setting
5778 process, and shouldn't bother appending recognized
5779 flags. To experiment with that, uncomment the
5780 following "else". (Note that's already been
5781 uncommented. That keeps the above-applied built-in
5782 attributes from being intercepted (and possibly
5783 rejected) by a package's attribute routines, but is
5784 justified by the performance win for the common case
5785 of applying only built-in attributes.) */
5787 attrs = op_append_elem(OP_LIST, attrs,
5788 newSVOP(OP_CONST, 0,
5792 if (*s == ':' && s[1] != ':')
5795 break; /* require real whitespace or :'s */
5796 /* XXX losing whitespace on sequential attributes here */
5800 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5801 if (*s != ';' && *s != '}' && *s != tmp
5802 && (tmp != '=' || *s != ')')) {
5803 const char q = ((*s == '\'') ? '"' : '\'');
5804 /* If here for an expression, and parsed no attrs, back
5806 if (tmp == '=' && !attrs) {
5810 /* MUST advance bufptr here to avoid bogus "at end of line"
5811 context messages from yyerror().
5814 yyerror( (const char *)
5816 ? Perl_form(aTHX_ "Invalid separator character "
5817 "%c%c%c in attribute list", q, *s, q)
5818 : "Unterminated attribute list" ) );
5826 start_force(PL_curforce);
5827 NEXTVAL_NEXTTOKE.opval = attrs;
5828 CURMAD('_', PL_nextwhite);
5833 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5834 (s - SvPVX(PL_linestr)) - stuffstart);
5839 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5843 PL_lex_allbrackets--;
5847 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5848 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5852 PL_lex_allbrackets++;
5855 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5861 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5864 PL_lex_allbrackets--;
5870 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5873 if (PL_lex_brackets <= 0)
5874 yyerror("Unmatched right square bracket");
5877 PL_lex_allbrackets--;
5878 if (PL_lex_state == LEX_INTERPNORMAL) {
5879 if (PL_lex_brackets == 0) {
5880 if (*s == '-' && s[1] == '>')
5881 PL_lex_state = LEX_INTERPENDMAYBE;
5882 else if (*s != '[' && *s != '{')
5883 PL_lex_state = LEX_INTERPEND;
5890 if (PL_lex_brackets > 100) {
5891 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5893 switch (PL_expect) {
5895 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5896 PL_lex_allbrackets++;
5897 OPERATOR(HASHBRACK);
5899 while (s < PL_bufend && SPACE_OR_TAB(*s))
5902 PL_tokenbuf[0] = '\0';
5903 if (d < PL_bufend && *d == '-') {
5904 PL_tokenbuf[0] = '-';
5906 while (d < PL_bufend && SPACE_OR_TAB(*d))
5909 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5910 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5912 while (d < PL_bufend && SPACE_OR_TAB(*d))
5915 const char minus = (PL_tokenbuf[0] == '-');
5916 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5924 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5925 PL_lex_allbrackets++;
5930 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5931 PL_lex_allbrackets++;
5936 if (PL_oldoldbufptr == PL_last_lop)
5937 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5939 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5940 PL_lex_allbrackets++;
5943 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5945 /* This hack is to get the ${} in the message. */
5947 yyerror("syntax error");
5950 OPERATOR(HASHBRACK);
5952 /* This hack serves to disambiguate a pair of curlies
5953 * as being a block or an anon hash. Normally, expectation
5954 * determines that, but in cases where we're not in a
5955 * position to expect anything in particular (like inside
5956 * eval"") we have to resolve the ambiguity. This code
5957 * covers the case where the first term in the curlies is a
5958 * quoted string. Most other cases need to be explicitly
5959 * disambiguated by prepending a "+" before the opening
5960 * curly in order to force resolution as an anon hash.
5962 * XXX should probably propagate the outer expectation
5963 * into eval"" to rely less on this hack, but that could
5964 * potentially break current behavior of eval"".
5968 if (*s == '\'' || *s == '"' || *s == '`') {
5969 /* common case: get past first string, handling escapes */
5970 for (t++; t < PL_bufend && *t != *s;)
5971 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5975 else if (*s == 'q') {
5978 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5981 /* skip q//-like construct */
5983 char open, close, term;
5986 while (t < PL_bufend && isSPACE(*t))
5988 /* check for q => */
5989 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5990 OPERATOR(HASHBRACK);
5994 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5998 for (t++; t < PL_bufend; t++) {
5999 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6001 else if (*t == open)
6005 for (t++; t < PL_bufend; t++) {
6006 if (*t == '\\' && t+1 < PL_bufend)
6008 else if (*t == close && --brackets <= 0)
6010 else if (*t == open)
6017 /* skip plain q word */
6018 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
6021 else if (isALNUM_lazy_if(t,UTF)) {
6023 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
6026 while (t < PL_bufend && isSPACE(*t))
6028 /* if comma follows first term, call it an anon hash */
6029 /* XXX it could be a comma expression with loop modifiers */
6030 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6031 || (*t == '=' && t[1] == '>')))
6032 OPERATOR(HASHBRACK);
6033 if (PL_expect == XREF)
6036 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6042 pl_yylval.ival = CopLINE(PL_curcop);
6043 if (isSPACE(*s) || *s == '#')
6044 PL_copline = NOLINE; /* invalidate current command line number */
6045 TOKEN(formbrack ? '=' : '{');
6047 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6051 if (PL_lex_brackets <= 0)
6052 yyerror("Unmatched right curly bracket");
6054 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6055 PL_lex_allbrackets--;
6056 if (PL_lex_state == LEX_INTERPNORMAL) {
6057 if (PL_lex_brackets == 0) {
6058 if (PL_expect & XFAKEBRACK) {
6059 PL_expect &= XENUMMASK;
6060 PL_lex_state = LEX_INTERPEND;
6065 PL_thiswhite = newSVpvs("");
6066 sv_catpvs(PL_thiswhite,"}");
6069 return yylex(); /* ignore fake brackets */
6071 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6072 && SvEVALED(PL_lex_repl))
6073 PL_lex_state = LEX_INTERPEND;
6074 else if (*s == '-' && s[1] == '>')
6075 PL_lex_state = LEX_INTERPENDMAYBE;
6076 else if (*s != '[' && *s != '{')
6077 PL_lex_state = LEX_INTERPEND;
6080 if (PL_expect & XFAKEBRACK) {
6081 PL_expect &= XENUMMASK;
6083 return yylex(); /* ignore fake brackets */
6085 start_force(PL_curforce);
6087 curmad('X', newSVpvn(s-1,1));
6088 CURMAD('_', PL_thiswhite);
6090 force_next(formbrack ? '.' : '}');
6091 if (formbrack) LEAVE;
6094 PL_thistoken = newSVpvs("");
6096 if (formbrack == 2) { /* means . where arguments were expected */
6097 start_force(PL_curforce);
6105 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6106 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6113 if (PL_expect == XOPERATOR) {
6114 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6115 && isIDFIRST_lazy_if(s,UTF))
6117 CopLINE_dec(PL_curcop);
6118 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6119 CopLINE_inc(PL_curcop);
6121 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6122 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6129 PL_tokenbuf[0] = '&';
6130 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6131 sizeof PL_tokenbuf - 1, TRUE);
6132 if (PL_tokenbuf[1]) {
6133 PL_expect = XOPERATOR;
6134 force_ident_maybe_lex('&');
6138 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6144 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6145 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6152 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6153 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6161 const char tmp = *s++;
6163 if (!PL_lex_allbrackets &&
6164 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6171 if (!PL_lex_allbrackets &&
6172 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6180 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6181 && strchr("+-*/%.^&|<",tmp))
6182 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6183 "Reversed %c= operator",(int)tmp);
6185 if (PL_expect == XSTATE && isALPHA(tmp) &&
6186 (s == PL_linestart+1 || s[-2] == '\n') )
6188 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6189 || PL_lex_state != LEX_NORMAL) {
6194 if (strnEQ(s,"=cut",4)) {
6210 PL_thiswhite = newSVpvs("");
6211 sv_catpvn(PL_thiswhite, PL_linestart,
6212 PL_bufend - PL_linestart);
6216 PL_parser->in_pod = 1;
6220 if (PL_expect == XBLOCK) {
6222 #ifdef PERL_STRICT_CR
6223 while (SPACE_OR_TAB(*t))
6225 while (SPACE_OR_TAB(*t) || *t == '\r')
6228 if (*t == '\n' || *t == '#') {
6231 SAVEI8(PL_parser->form_lex_state);
6232 SAVEI32(PL_lex_formbrack);
6233 PL_parser->form_lex_state = PL_lex_state;
6234 PL_lex_formbrack = PL_lex_brackets + 1;
6238 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6247 const char tmp = *s++;
6249 /* was this !=~ where !~ was meant?
6250 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6252 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6253 const char *t = s+1;
6255 while (t < PL_bufend && isSPACE(*t))
6258 if (*t == '/' || *t == '?' ||
6259 ((*t == 'm' || *t == 's' || *t == 'y')
6260 && !isALNUM(t[1])) ||
6261 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6262 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6263 "!=~ should be !~");
6265 if (!PL_lex_allbrackets &&
6266 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6278 if (PL_expect != XOPERATOR) {
6279 if (s[1] != '<' && !strchr(s,'>'))
6282 s = scan_heredoc(s);
6284 s = scan_inputsymbol(s);
6285 PL_expect = XOPERATOR;
6286 TOKEN(sublex_start());
6292 if (*s == '=' && !PL_lex_allbrackets &&
6293 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6297 SHop(OP_LEFT_SHIFT);
6302 if (!PL_lex_allbrackets &&
6303 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6310 if (!PL_lex_allbrackets &&
6311 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6319 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6327 const char tmp = *s++;
6329 if (*s == '=' && !PL_lex_allbrackets &&
6330 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6334 SHop(OP_RIGHT_SHIFT);
6336 else if (tmp == '=') {
6337 if (!PL_lex_allbrackets &&
6338 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6346 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6355 if (PL_expect == XOPERATOR) {
6356 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6357 return deprecate_commaless_var_list();
6361 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6362 PL_tokenbuf[0] = '@';
6363 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6364 sizeof PL_tokenbuf - 1, FALSE);
6365 if (PL_expect == XOPERATOR)
6366 no_op("Array length", s);
6367 if (!PL_tokenbuf[1])
6369 PL_expect = XOPERATOR;
6370 force_ident_maybe_lex('#');
6374 PL_tokenbuf[0] = '$';
6375 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6376 sizeof PL_tokenbuf - 1, FALSE);
6377 if (PL_expect == XOPERATOR)
6379 if (!PL_tokenbuf[1]) {
6381 yyerror("Final $ should be \\$ or $name");
6387 const char tmp = *s;
6388 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6391 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6392 && intuit_more(s)) {
6394 PL_tokenbuf[0] = '@';
6395 if (ckWARN(WARN_SYNTAX)) {
6398 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6401 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6402 while (t < PL_bufend && *t != ']')
6404 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6405 "Multidimensional syntax %.*s not supported",
6406 (int)((t - PL_bufptr) + 1), PL_bufptr);
6410 else if (*s == '{') {
6412 PL_tokenbuf[0] = '%';
6413 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6414 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6416 char tmpbuf[sizeof PL_tokenbuf];
6419 } while (isSPACE(*t));
6420 if (isIDFIRST_lazy_if(t,UTF)) {
6422 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6427 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6429 "You need to quote \"%"SVf"\"",
6430 SVfARG(newSVpvn_flags(tmpbuf, len,
6431 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6437 PL_expect = XOPERATOR;
6438 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6439 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6440 if (!islop || PL_last_lop_op == OP_GREPSTART)
6441 PL_expect = XOPERATOR;
6442 else if (strchr("$@\"'`q", *s))
6443 PL_expect = XTERM; /* e.g. print $fh "foo" */
6444 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6445 PL_expect = XTERM; /* e.g. print $fh &sub */
6446 else if (isIDFIRST_lazy_if(s,UTF)) {
6447 char tmpbuf[sizeof PL_tokenbuf];
6449 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6450 if ((t2 = keyword(tmpbuf, len, 0))) {
6451 /* binary operators exclude handle interpretations */
6463 PL_expect = XTERM; /* e.g. print $fh length() */
6468 PL_expect = XTERM; /* e.g. print $fh subr() */
6471 else if (isDIGIT(*s))
6472 PL_expect = XTERM; /* e.g. print $fh 3 */
6473 else if (*s == '.' && isDIGIT(s[1]))
6474 PL_expect = XTERM; /* e.g. print $fh .3 */
6475 else if ((*s == '?' || *s == '-' || *s == '+')
6476 && !isSPACE(s[1]) && s[1] != '=')
6477 PL_expect = XTERM; /* e.g. print $fh -1 */
6478 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6480 PL_expect = XTERM; /* e.g. print $fh /.../
6481 XXX except DORDOR operator
6483 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6485 PL_expect = XTERM; /* print $fh <<"EOF" */
6488 force_ident_maybe_lex('$');
6492 if (PL_expect == XOPERATOR)
6494 PL_tokenbuf[0] = '@';
6495 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6496 if (!PL_tokenbuf[1]) {
6499 if (PL_lex_state == LEX_NORMAL)
6501 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6503 PL_tokenbuf[0] = '%';
6505 /* Warn about @ where they meant $. */
6506 if (*s == '[' || *s == '{') {
6507 if (ckWARN(WARN_SYNTAX)) {
6508 const char *t = s + 1;
6509 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6510 t += UTF ? UTF8SKIP(t) : 1;
6511 if (*t == '}' || *t == ']') {
6513 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6514 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6515 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6516 "Scalar value %"SVf" better written as $%"SVf,
6517 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6518 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6519 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6520 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6525 PL_expect = XOPERATOR;
6526 force_ident_maybe_lex('@');
6529 case '/': /* may be division, defined-or, or pattern */
6530 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6531 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6532 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6537 case '?': /* may either be conditional or pattern */
6538 if (PL_expect == XOPERATOR) {
6541 if (!PL_lex_allbrackets &&
6542 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6546 PL_lex_allbrackets++;
6552 /* A // operator. */
6553 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6554 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6555 LEX_FAKEEOF_LOGIC)) {
6563 if (*s == '=' && !PL_lex_allbrackets &&
6564 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6573 /* Disable warning on "study /blah/" */
6574 if (PL_oldoldbufptr == PL_last_uni
6575 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6576 || memNE(PL_last_uni, "study", 5)
6577 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6581 deprecate("?PATTERN? without explicit operator");
6582 s = scan_pat(s,OP_MATCH);
6583 TERM(sublex_start());
6587 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6588 #ifdef PERL_STRICT_CR
6591 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6593 && (s == PL_linestart || s[-1] == '\n') )
6596 formbrack = 2; /* dot seen where arguments expected */
6599 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6603 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6606 if (!PL_lex_allbrackets &&
6607 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6614 pl_yylval.ival = OPf_SPECIAL;
6620 if (*s == '=' && !PL_lex_allbrackets &&
6621 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6628 case '0': case '1': case '2': case '3': case '4':
6629 case '5': case '6': case '7': case '8': case '9':
6630 s = scan_num(s, &pl_yylval);
6631 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6632 if (PL_expect == XOPERATOR)
6637 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6638 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6639 if (PL_expect == XOPERATOR) {
6640 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6641 return deprecate_commaless_var_list();
6648 pl_yylval.ival = OP_CONST;
6649 TERM(sublex_start());
6652 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6653 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6654 if (PL_expect == XOPERATOR) {
6655 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6656 return deprecate_commaless_var_list();
6663 pl_yylval.ival = OP_CONST;
6664 /* FIXME. I think that this can be const if char *d is replaced by
6665 more localised variables. */
6666 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6667 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6668 pl_yylval.ival = OP_STRINGIFY;
6672 TERM(sublex_start());
6675 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6676 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6677 if (PL_expect == XOPERATOR)
6678 no_op("Backticks",s);
6681 readpipe_override();
6682 TERM(sublex_start());
6686 if (PL_lex_inwhat && isDIGIT(*s))
6687 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6689 if (PL_expect == XOPERATOR)
6690 no_op("Backslash",s);
6694 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6695 char *start = s + 2;
6696 while (isDIGIT(*start) || *start == '_')
6698 if (*start == '.' && isDIGIT(start[1])) {
6699 s = scan_num(s, &pl_yylval);
6702 else if ((*start == ':' && start[1] == ':')
6703 || (PL_expect == XSTATE && *start == ':'))
6705 else if (PL_expect == XSTATE) {
6707 while (d < PL_bufend && isSPACE(*d)) d++;
6708 if (*d == ':') goto keylookup;
6710 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6711 if (!isALPHA(*start) && (PL_expect == XTERM
6712 || PL_expect == XREF || PL_expect == XSTATE
6713 || PL_expect == XTERMORDORDOR)) {
6714 GV *const gv = gv_fetchpvn_flags(s, start - s,
6715 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6717 s = scan_num(s, &pl_yylval);
6724 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6777 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6779 /* Some keywords can be followed by any delimiter, including ':' */
6780 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6782 /* x::* is just a word, unless x is "CORE" */
6783 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6787 while (d < PL_bufend && isSPACE(*d))
6788 d++; /* no comments skipped here, or s### is misparsed */
6790 /* Is this a word before a => operator? */
6791 if (*d == '=' && d[1] == '>') {
6794 = (OP*)newSVOP(OP_CONST, 0,
6795 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6796 pl_yylval.opval->op_private = OPpCONST_BARE;
6800 /* Check for plugged-in keyword */
6804 char *saved_bufptr = PL_bufptr;
6806 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6808 if (result == KEYWORD_PLUGIN_DECLINE) {
6809 /* not a plugged-in keyword */
6810 PL_bufptr = saved_bufptr;
6811 } else if (result == KEYWORD_PLUGIN_STMT) {
6812 pl_yylval.opval = o;
6815 return REPORT(PLUGSTMT);
6816 } else if (result == KEYWORD_PLUGIN_EXPR) {
6817 pl_yylval.opval = o;
6819 PL_expect = XOPERATOR;
6820 return REPORT(PLUGEXPR);
6822 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6827 /* Check for built-in keyword */
6828 tmp = keyword(PL_tokenbuf, len, 0);
6830 /* Is this a label? */
6831 if (!anydelim && PL_expect == XSTATE
6832 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6834 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6835 pl_yylval.pval[len] = '\0';
6836 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6841 /* Check for lexical sub */
6842 if (PL_expect != XOPERATOR) {
6843 char tmpbuf[sizeof PL_tokenbuf + 1];
6845 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6846 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6847 if (off != NOT_IN_PAD) {
6848 assert(off); /* we assume this is boolean-true below */
6849 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6850 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6851 HEK * const stashname = HvNAME_HEK(stash);
6852 sv = newSVhek(stashname);
6853 sv_catpvs(sv, "::");
6854 sv_catpvn_flags(sv, PL_tokenbuf, len,
6855 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6856 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6861 rv2cv_op = newOP(OP_PADANY, 0);
6862 rv2cv_op->op_targ = off;
6863 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6864 cv = (CV *)PAD_SV(off);
6872 if (tmp < 0) { /* second-class keyword? */
6873 GV *ogv = NULL; /* override (winner) */
6874 GV *hgv = NULL; /* hidden (loser) */
6875 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6877 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6878 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6881 if (GvIMPORTED_CV(gv))
6883 else if (! CvMETHOD(cv))
6887 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6888 UTF ? -(I32)len : (I32)len, FALSE)) &&
6889 (gv = *gvp) && isGV_with_GP(gv) &&
6890 GvCVu(gv) && GvIMPORTED_CV(gv))
6897 tmp = 0; /* overridden by import or by GLOBAL */
6900 && -tmp==KEY_lock /* XXX generalizable kludge */
6903 tmp = 0; /* any sub overrides "weak" keyword */
6905 else { /* no override */
6907 if (tmp == KEY_dump) {
6908 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6909 "dump() better written as CORE::dump()");
6913 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6914 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6915 "Ambiguous call resolved as CORE::%s(), "
6916 "qualify as such or use &",
6924 default: /* not a keyword */
6925 /* Trade off - by using this evil construction we can pull the
6926 variable gv into the block labelled keylookup. If not, then
6927 we have to give it function scope so that the goto from the
6928 earlier ':' case doesn't bypass the initialisation. */
6930 just_a_word_zero_gv:
6942 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6943 const char penultchar =
6944 lastchar && PL_bufptr - 2 >= PL_linestart
6948 SV *nextPL_nextwhite = 0;
6952 /* Get the rest if it looks like a package qualifier */
6954 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6956 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6959 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6960 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6961 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6962 *s == '\'' ? "'" : "::");
6967 if (PL_expect == XOPERATOR) {
6968 if (PL_bufptr == PL_linestart) {
6969 CopLINE_dec(PL_curcop);
6970 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6971 CopLINE_inc(PL_curcop);
6974 no_op("Bareword",s);
6977 /* Look for a subroutine with this name in current package,
6978 unless this is a lexical sub, or name is "Foo::",
6979 in which case Foo is a bareword
6980 (and a package name). */
6982 if (len > 2 && !PL_madskills &&
6983 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6985 if (ckWARN(WARN_BAREWORD)
6986 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6987 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6988 "Bareword \"%"SVf"\" refers to nonexistent package",
6989 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6990 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6992 PL_tokenbuf[len] = '\0';
6998 /* Mustn't actually add anything to a symbol table.
6999 But also don't want to "initialise" any placeholder
7000 constants that might already be there into full
7001 blown PVGVs with attached PVCV. */
7002 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7003 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7009 /* if we saw a global override before, get the right name */
7012 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7013 len ? len : strlen(PL_tokenbuf));
7015 SV * const tmp_sv = sv;
7016 sv = newSVpvs("CORE::GLOBAL::");
7017 sv_catsv(sv, tmp_sv);
7018 SvREFCNT_dec(tmp_sv);
7022 if (PL_madskills && !PL_thistoken) {
7023 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7024 PL_thistoken = newSVpvn(start,s - start);
7025 PL_realtokenstart = s - SvPVX(PL_linestr);
7029 /* Presume this is going to be a bareword of some sort. */
7031 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7032 pl_yylval.opval->op_private = OPpCONST_BARE;
7034 /* And if "Foo::", then that's what it certainly is. */
7040 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7041 const_op->op_private = OPpCONST_BARE;
7042 rv2cv_op = newCVREF(0, const_op);
7043 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7046 /* See if it's the indirect object for a list operator. */
7048 if (PL_oldoldbufptr &&
7049 PL_oldoldbufptr < PL_bufptr &&
7050 (PL_oldoldbufptr == PL_last_lop
7051 || PL_oldoldbufptr == PL_last_uni) &&
7052 /* NO SKIPSPACE BEFORE HERE! */
7053 (PL_expect == XREF ||
7054 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7056 bool immediate_paren = *s == '(';
7058 /* (Now we can afford to cross potential line boundary.) */
7059 s = SKIPSPACE2(s,nextPL_nextwhite);
7061 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7064 /* Two barewords in a row may indicate method call. */
7066 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7067 (tmp = intuit_method(s, gv, cv))) {
7069 if (tmp == METHOD && !PL_lex_allbrackets &&
7070 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7071 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7075 /* If not a declared subroutine, it's an indirect object. */
7076 /* (But it's an indir obj regardless for sort.) */
7077 /* Also, if "_" follows a filetest operator, it's a bareword */
7080 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7082 (PL_last_lop_op != OP_MAPSTART &&
7083 PL_last_lop_op != OP_GREPSTART))))
7084 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7085 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7088 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7093 PL_expect = XOPERATOR;
7096 s = SKIPSPACE2(s,nextPL_nextwhite);
7097 PL_nextwhite = nextPL_nextwhite;
7102 /* Is this a word before a => operator? */
7103 if (*s == '=' && s[1] == '>' && !pkgname) {
7106 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7107 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7108 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7112 /* If followed by a paren, it's certainly a subroutine. */
7117 while (SPACE_OR_TAB(*d))
7119 if (*d == ')' && (sv = cv_const_sv(cv))) {
7126 PL_nextwhite = PL_thiswhite;
7129 start_force(PL_curforce);
7131 NEXTVAL_NEXTTOKE.opval =
7132 off ? rv2cv_op : pl_yylval.opval;
7133 PL_expect = XOPERATOR;
7136 PL_nextwhite = nextPL_nextwhite;
7137 curmad('X', PL_thistoken);
7138 PL_thistoken = newSVpvs("");
7142 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7143 else op_free(rv2cv_op), force_next(WORD);
7148 /* If followed by var or block, call it a method (unless sub) */
7150 if ((*s == '$' || *s == '{') && !cv) {
7152 PL_last_lop = PL_oldbufptr;
7153 PL_last_lop_op = OP_METHOD;
7154 if (!PL_lex_allbrackets &&
7155 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7156 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7160 /* If followed by a bareword, see if it looks like indir obj. */
7163 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7164 && (tmp = intuit_method(s, gv, cv))) {
7166 if (tmp == METHOD && !PL_lex_allbrackets &&
7167 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7168 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7172 /* Not a method, so call it a subroutine (if defined) */
7175 if (lastchar == '-' && penultchar != '-') {
7176 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7177 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7178 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7179 SVfARG(tmpsv), SVfARG(tmpsv));
7181 /* Check for a constant sub */
7182 if ((sv = cv_const_sv(cv))) {
7185 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7186 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7187 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7188 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7192 op_free(pl_yylval.opval);
7193 pl_yylval.opval = rv2cv_op;
7194 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7195 PL_last_lop = PL_oldbufptr;
7196 PL_last_lop_op = OP_ENTERSUB;
7197 /* Is there a prototype? */
7204 STRLEN protolen = CvPROTOLEN(cv);
7205 const char *proto = CvPROTO(cv);
7209 if ((optional = *proto == ';'))
7212 while (*proto == ';');
7216 *proto == '$' || *proto == '_'
7217 || *proto == '*' || *proto == '+'
7222 *proto == '\\' && proto[1] && proto[2] == '\0'
7225 UNIPROTO(UNIOPSUB,optional);
7226 if (*proto == '\\' && proto[1] == '[') {
7227 const char *p = proto + 2;
7228 while(*p && *p != ']')
7230 if(*p == ']' && !p[1])
7231 UNIPROTO(UNIOPSUB,optional);
7233 if (*proto == '&' && *s == '{') {
7235 sv_setpvs(PL_subname, "__ANON__");
7237 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7238 if (!PL_lex_allbrackets &&
7239 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7240 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7247 PL_nextwhite = PL_thiswhite;
7250 start_force(PL_curforce);
7251 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7254 PL_nextwhite = nextPL_nextwhite;
7255 curmad('X', PL_thistoken);
7256 PL_thistoken = newSVpvs("");
7258 force_next(off ? PRIVATEREF : WORD);
7259 if (!PL_lex_allbrackets &&
7260 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7261 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7266 /* Guess harder when madskills require "best effort". */
7267 if (PL_madskills && (!gv || !GvCVu(gv))) {
7268 int probable_sub = 0;
7269 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7271 else if (isALPHA(*s)) {
7275 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7276 if (!keyword(tmpbuf, tmplen, 0))
7279 while (d < PL_bufend && isSPACE(*d))
7281 if (*d == '=' && d[1] == '>')
7286 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7288 op_free(pl_yylval.opval);
7289 pl_yylval.opval = rv2cv_op;
7290 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7291 PL_last_lop = PL_oldbufptr;
7292 PL_last_lop_op = OP_ENTERSUB;
7293 PL_nextwhite = PL_thiswhite;
7295 start_force(PL_curforce);
7296 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7298 PL_nextwhite = nextPL_nextwhite;
7299 curmad('X', PL_thistoken);
7300 PL_thistoken = newSVpvs("");
7301 force_next(off ? PRIVATEREF : WORD);
7302 if (!PL_lex_allbrackets &&
7303 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7304 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7308 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7310 force_next(off ? PRIVATEREF : WORD);
7311 if (!PL_lex_allbrackets &&
7312 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7313 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7318 /* Call it a bare word */
7320 if (PL_hints & HINT_STRICT_SUBS)
7321 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7324 /* after "print" and similar functions (corresponding to
7325 * "F? L" in opcode.pl), whatever wasn't already parsed as
7326 * a filehandle should be subject to "strict subs".
7327 * Likewise for the optional indirect-object argument to system
7328 * or exec, which can't be a bareword */
7329 if ((PL_last_lop_op == OP_PRINT
7330 || PL_last_lop_op == OP_PRTF
7331 || PL_last_lop_op == OP_SAY
7332 || PL_last_lop_op == OP_SYSTEM
7333 || PL_last_lop_op == OP_EXEC)
7334 && (PL_hints & HINT_STRICT_SUBS))
7335 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7336 if (lastchar != '-') {
7337 if (ckWARN(WARN_RESERVED)) {
7341 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7342 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7350 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7351 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7352 "Operator or semicolon missing before %c%"SVf,
7353 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7354 strlen(PL_tokenbuf),
7355 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7356 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7357 "Ambiguous use of %c resolved as operator %c",
7358 lastchar, lastchar);
7365 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7370 (OP*)newSVOP(OP_CONST, 0,
7371 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7374 case KEY___PACKAGE__:
7376 (OP*)newSVOP(OP_CONST, 0,
7378 ? newSVhek(HvNAME_HEK(PL_curstash))
7385 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7386 const char *pname = "main";
7389 if (PL_tokenbuf[2] == 'D')
7392 PL_curstash ? PL_curstash : PL_defstash;
7393 pname = HvNAME_get(stash);
7394 plen = HvNAMELEN (stash);
7395 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7397 gv = gv_fetchpvn_flags(
7398 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7399 plen+6, GV_ADD|putf8, SVt_PVIO
7403 GvIOp(gv) = newIO();
7404 IoIFP(GvIOp(gv)) = PL_rsfp;
7405 #if defined(HAS_FCNTL) && defined(F_SETFD)
7407 const int fd = PerlIO_fileno(PL_rsfp);
7408 fcntl(fd,F_SETFD,fd >= 3);
7411 /* Mark this internal pseudo-handle as clean */
7412 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7413 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7414 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7416 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7417 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7418 /* if the script was opened in binmode, we need to revert
7419 * it to text mode for compatibility; but only iff it has CRs
7420 * XXX this is a questionable hack at best. */
7421 if (PL_bufend-PL_bufptr > 2
7422 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7425 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7426 loc = PerlIO_tell(PL_rsfp);
7427 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7430 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7432 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7433 #endif /* NETWARE */
7435 PerlIO_seek(PL_rsfp, loc, 0);
7439 #ifdef PERLIO_LAYERS
7442 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7443 else if (PL_encoding) {
7450 XPUSHs(PL_encoding);
7452 call_method("name", G_SCALAR);
7456 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7457 Perl_form(aTHX_ ":encoding(%"SVf")",
7466 if (PL_realtokenstart >= 0) {
7467 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7469 PL_endwhite = newSVpvs("");
7470 sv_catsv(PL_endwhite, PL_thiswhite);
7472 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7473 PL_realtokenstart = -1;
7475 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7485 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7494 if (PL_expect == XSTATE) {
7501 if (*s == ':' && s[1] == ':') {
7505 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7506 if ((*s == ':' && s[1] == ':')
7507 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7511 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7515 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7516 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7517 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7520 else if (tmp == KEY_require || tmp == KEY_do
7522 /* that's a way to remember we saw "CORE::" */
7535 LOP(OP_ACCEPT,XTERM);
7538 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7543 LOP(OP_ATAN2,XTERM);
7549 LOP(OP_BINMODE,XTERM);
7552 LOP(OP_BLESS,XTERM);
7561 /* We have to disambiguate the two senses of
7562 "continue". If the next token is a '{' then
7563 treat it as the start of a continue block;
7564 otherwise treat it as a control operator.
7574 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7584 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7593 if (!PL_cryptseen) {
7594 PL_cryptseen = TRUE;
7598 LOP(OP_CRYPT,XTERM);
7601 LOP(OP_CHMOD,XTERM);
7604 LOP(OP_CHOWN,XTERM);
7607 LOP(OP_CONNECT,XTERM);
7627 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7629 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7632 force_ident_maybe_lex('&');
7637 if (orig_keyword == KEY_do) {
7646 PL_hints |= HINT_BLOCK_SCOPE;
7656 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7657 STR_WITH_LEN("NDBM_File::"),
7658 STR_WITH_LEN("DB_File::"),
7659 STR_WITH_LEN("GDBM_File::"),
7660 STR_WITH_LEN("SDBM_File::"),
7661 STR_WITH_LEN("ODBM_File::"),
7663 LOP(OP_DBMOPEN,XTERM);
7669 PL_expect = XOPERATOR;
7670 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7677 pl_yylval.ival = CopLINE(PL_curcop);
7681 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7695 if (*s == '{') { /* block eval */
7696 PL_expect = XTERMBLOCK;
7697 UNIBRACK(OP_ENTERTRY);
7699 else { /* string eval */
7701 UNIBRACK(OP_ENTEREVAL);
7706 UNIBRACK(-OP_ENTEREVAL);
7720 case KEY_endhostent:
7726 case KEY_endservent:
7729 case KEY_endprotoent:
7740 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7742 pl_yylval.ival = CopLINE(PL_curcop);
7744 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7747 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7750 if ((PL_bufend - p) >= 3 &&
7751 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7753 else if ((PL_bufend - p) >= 4 &&
7754 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7757 if (isIDFIRST_lazy_if(p,UTF)) {
7758 p = scan_ident(p, PL_bufend,
7759 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7763 Perl_croak(aTHX_ "Missing $ on loop variable");
7765 s = SvPVX(PL_linestr) + soff;
7771 LOP(OP_FORMLINE,XTERM);
7780 LOP(OP_FCNTL,XTERM);
7786 LOP(OP_FLOCK,XTERM);
7789 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7794 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7799 LOP(OP_GREPSTART, XREF);
7802 PL_expect = XOPERATOR;
7803 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7818 case KEY_getpriority:
7819 LOP(OP_GETPRIORITY,XTERM);
7821 case KEY_getprotobyname:
7824 case KEY_getprotobynumber:
7825 LOP(OP_GPBYNUMBER,XTERM);
7827 case KEY_getprotoent:
7839 case KEY_getpeername:
7840 UNI(OP_GETPEERNAME);
7842 case KEY_gethostbyname:
7845 case KEY_gethostbyaddr:
7846 LOP(OP_GHBYADDR,XTERM);
7848 case KEY_gethostent:
7851 case KEY_getnetbyname:
7854 case KEY_getnetbyaddr:
7855 LOP(OP_GNBYADDR,XTERM);
7860 case KEY_getservbyname:
7861 LOP(OP_GSBYNAME,XTERM);
7863 case KEY_getservbyport:
7864 LOP(OP_GSBYPORT,XTERM);
7866 case KEY_getservent:
7869 case KEY_getsockname:
7870 UNI(OP_GETSOCKNAME);
7872 case KEY_getsockopt:
7873 LOP(OP_GSOCKOPT,XTERM);
7888 pl_yylval.ival = CopLINE(PL_curcop);
7893 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7901 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7903 pl_yylval.ival = CopLINE(PL_curcop);
7907 LOP(OP_INDEX,XTERM);
7913 LOP(OP_IOCTL,XTERM);
7925 PL_expect = XOPERATOR;
7926 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7943 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7948 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7962 LOP(OP_LISTEN,XTERM);
7971 s = scan_pat(s,OP_MATCH);
7972 TERM(sublex_start());
7975 LOP(OP_MAPSTART, XREF);
7978 LOP(OP_MKDIR,XTERM);
7981 LOP(OP_MSGCTL,XTERM);
7984 LOP(OP_MSGGET,XTERM);
7987 LOP(OP_MSGRCV,XTERM);
7990 LOP(OP_MSGSND,XTERM);
7995 PL_in_my = (U16)tmp;
7997 if (isIDFIRST_lazy_if(s,UTF)) {
8001 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8002 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8004 if (!FEATURE_LEXSUBS_IS_ENABLED)
8006 "Experimental \"%s\" subs not enabled",
8007 tmp == KEY_my ? "my" :
8008 tmp == KEY_state ? "state" : "our");
8011 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8012 if (!PL_in_my_stash) {
8015 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8016 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8019 if (PL_madskills) { /* just add type to declarator token */
8020 sv_catsv(PL_thistoken, PL_nextwhite);
8022 sv_catpvn(PL_thistoken, start, s - start);
8030 PL_expect = XOPERATOR;
8031 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8035 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8040 s = tokenize_use(0, s);
8044 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8047 if (!PL_lex_allbrackets &&
8048 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8049 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8055 if (isIDFIRST_lazy_if(s,UTF)) {
8057 for (d = s; isALNUM_lazy_if(d,UTF);) {
8058 d += UTF ? UTF8SKIP(d) : 1;
8060 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
8061 d += UTF ? UTF8SKIP(d) : 1;
8065 for (t=d; isSPACE(*t);)
8067 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8069 && !(t[0] == '=' && t[1] == '>')
8070 && !(t[0] == ':' && t[1] == ':')
8071 && !keyword(s, d-s, 0)
8073 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
8074 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
8075 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8076 "Precedence problem: open %"SVf" should be open(%"SVf")",
8077 SVfARG(tmpsv), SVfARG(tmpsv));
8083 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8085 pl_yylval.ival = OP_OR;
8095 LOP(OP_OPEN_DIR,XTERM);
8098 checkcomma(s,PL_tokenbuf,"filehandle");
8102 checkcomma(s,PL_tokenbuf,"filehandle");
8121 s = force_word(s,WORD,FALSE,TRUE,FALSE);
8123 s = force_strict_version(s);
8124 PL_lex_expect = XBLOCK;
8128 LOP(OP_PIPE_OP,XTERM);
8131 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8134 pl_yylval.ival = OP_CONST;
8135 TERM(sublex_start());
8142 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8145 PL_expect = XOPERATOR;
8146 if (SvCUR(PL_lex_stuff)) {
8147 int warned_comma = !ckWARN(WARN_QW);
8148 int warned_comment = warned_comma;
8149 d = SvPV_force(PL_lex_stuff, len);
8151 for (; isSPACE(*d) && len; --len, ++d)
8156 if (!warned_comma || !warned_comment) {
8157 for (; !isSPACE(*d) && len; --len, ++d) {
8158 if (!warned_comma && *d == ',') {
8159 Perl_warner(aTHX_ packWARN(WARN_QW),
8160 "Possible attempt to separate words with commas");
8163 else if (!warned_comment && *d == '#') {
8164 Perl_warner(aTHX_ packWARN(WARN_QW),
8165 "Possible attempt to put comments in qw() list");
8171 for (; !isSPACE(*d) && len; --len, ++d)
8174 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8175 words = op_append_elem(OP_LIST, words,
8176 newSVOP(OP_CONST, 0, tokeq(sv)));
8181 words = newNULLLIST();
8183 SvREFCNT_dec(PL_lex_stuff);
8184 PL_lex_stuff = NULL;
8186 PL_expect = XOPERATOR;
8187 pl_yylval.opval = sawparens(words);
8192 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8195 pl_yylval.ival = OP_STRINGIFY;
8196 if (SvIVX(PL_lex_stuff) == '\'')
8197 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8198 TERM(sublex_start());
8201 s = scan_pat(s,OP_QR);
8202 TERM(sublex_start());
8205 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8208 readpipe_override();
8209 TERM(sublex_start());
8216 PL_expect = XOPERATOR;
8218 s = force_version(s, FALSE);
8220 else if (*s != 'v' || !isDIGIT(s[1])
8221 || (s = force_version(s, TRUE), *s == 'v'))
8223 *PL_tokenbuf = '\0';
8224 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8225 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8226 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8227 GV_ADD | (UTF ? SVf_UTF8 : 0));
8229 yyerror("<> should be quotes");
8231 if (orig_keyword == KEY_require) {
8239 PL_last_uni = PL_oldbufptr;
8240 PL_last_lop_op = OP_REQUIRE;
8242 return REPORT( (int)REQUIRE );
8248 PL_expect = XOPERATOR;
8249 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8253 LOP(OP_RENAME,XTERM);
8262 LOP(OP_RINDEX,XTERM);
8271 UNIDOR(OP_READLINE);
8274 UNIDOR(OP_BACKTICK);
8283 LOP(OP_REVERSE,XTERM);
8286 UNIDOR(OP_READLINK);
8293 if (pl_yylval.opval)
8294 TERM(sublex_start());
8296 TOKEN(1); /* force error */
8299 checkcomma(s,PL_tokenbuf,"filehandle");
8309 LOP(OP_SELECT,XTERM);
8315 LOP(OP_SEMCTL,XTERM);
8318 LOP(OP_SEMGET,XTERM);
8321 LOP(OP_SEMOP,XTERM);
8327 LOP(OP_SETPGRP,XTERM);
8329 case KEY_setpriority:
8330 LOP(OP_SETPRIORITY,XTERM);
8332 case KEY_sethostent:
8338 case KEY_setservent:
8341 case KEY_setprotoent:
8351 LOP(OP_SEEKDIR,XTERM);
8353 case KEY_setsockopt:
8354 LOP(OP_SSOCKOPT,XTERM);
8360 LOP(OP_SHMCTL,XTERM);
8363 LOP(OP_SHMGET,XTERM);
8366 LOP(OP_SHMREAD,XTERM);
8369 LOP(OP_SHMWRITE,XTERM);
8372 LOP(OP_SHUTDOWN,XTERM);
8381 LOP(OP_SOCKET,XTERM);
8383 case KEY_socketpair:
8384 LOP(OP_SOCKPAIR,XTERM);
8387 checkcomma(s,PL_tokenbuf,"subroutine name");
8390 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8394 LOP(OP_SPLIT,XTERM);
8397 LOP(OP_SPRINTF,XTERM);
8400 LOP(OP_SPLICE,XTERM);
8415 LOP(OP_SUBSTR,XTERM);
8421 char * const tmpbuf = PL_tokenbuf + 1;
8422 SSize_t tboffset = 0;
8423 expectation attrful;
8424 bool have_name, have_proto;
8425 const int key = tmp;
8430 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8431 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8435 s = SKIPSPACE2(s,tmpwhite);
8441 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8442 (*s == ':' && s[1] == ':'))
8445 SV *nametoke = NULL;
8449 attrful = XATTRBLOCK;
8450 /* remember buffer pos'n for later force_word */
8451 tboffset = s - PL_oldbufptr;
8452 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8456 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8459 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8461 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8463 sv_setpvn(PL_subname, tmpbuf, len);
8465 sv_setsv(PL_subname,PL_curstname);
8466 sv_catpvs(PL_subname,"::");
8467 sv_catpvn(PL_subname,tmpbuf,len);
8469 if (SvUTF8(PL_linestr))
8470 SvUTF8_on(PL_subname);
8476 CURMAD('X', nametoke);
8477 CURMAD('_', tmpwhite);
8478 force_ident_maybe_lex('&');
8480 s = SKIPSPACE2(d,tmpwhite);
8486 if (key == KEY_my || key == KEY_our || key==KEY_state)
8489 /* diag_listed_as: Missing name in "%s sub" */
8491 "Missing name in \"%s\"", PL_bufptr);
8493 PL_expect = XTERMBLOCK;
8494 attrful = XATTRTERM;
8495 sv_setpvs(PL_subname,"?");
8499 if (key == KEY_format) {
8501 PL_thistoken = subtoken;
8505 (void) force_word(PL_oldbufptr + tboffset, WORD,
8511 /* Look for a prototype */
8514 bool bad_proto = FALSE;
8515 bool in_brackets = FALSE;
8516 char greedy_proto = ' ';
8517 bool proto_after_greedy_proto = FALSE;
8518 bool must_be_last = FALSE;
8519 bool underscore = FALSE;
8520 bool seen_underscore = FALSE;
8521 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8524 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8526 Perl_croak(aTHX_ "Prototype not terminated");
8527 /* strip spaces and check for bad characters */
8528 d = SvPV(PL_lex_stuff, tmplen);
8530 for (p = d; tmplen; tmplen--, ++p) {
8534 if (warnillegalproto) {
8536 proto_after_greedy_proto = TRUE;
8537 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8542 if ( !strchr(";@%", *p) )
8549 else if ( *p == ']' ) {
8550 in_brackets = FALSE;
8552 else if ( (*p == '@' || *p == '%') &&
8553 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8555 must_be_last = TRUE;
8558 else if ( *p == '_' ) {
8559 underscore = seen_underscore = TRUE;
8566 if (proto_after_greedy_proto)
8567 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8568 "Prototype after '%c' for %"SVf" : %s",
8569 greedy_proto, SVfARG(PL_subname), d);
8571 SV *dsv = newSVpvs_flags("", SVs_TEMP);
8572 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8573 "Illegal character %sin prototype for %"SVf" : %s",
8574 seen_underscore ? "after '_' " : "",
8576 SvUTF8(PL_lex_stuff)
8577 ? sv_uni_display(dsv,
8578 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8580 UNI_DISPLAY_ISPRINT)
8581 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8582 PERL_PV_ESCAPE_NONASCII));
8584 SvCUR_set(PL_lex_stuff, tmp);
8589 CURMAD('q', PL_thisopen);
8590 CURMAD('_', tmpwhite);
8591 CURMAD('=', PL_thisstuff);
8592 CURMAD('Q', PL_thisclose);
8593 NEXTVAL_NEXTTOKE.opval =
8594 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8595 PL_lex_stuff = NULL;
8598 s = SKIPSPACE2(s,tmpwhite);
8606 if (*s == ':' && s[1] != ':')
8607 PL_expect = attrful;
8608 else if (*s != '{' && key == KEY_sub) {
8610 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8611 else if (*s != ';' && *s != '}')
8612 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8619 curmad('^', newSVpvs(""));
8620 CURMAD('_', tmpwhite);
8624 PL_thistoken = subtoken;
8627 NEXTVAL_NEXTTOKE.opval =
8628 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8629 PL_lex_stuff = NULL;
8635 sv_setpvs(PL_subname, "__ANON__");
8637 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8641 force_ident_maybe_lex('&');
8647 LOP(OP_SYSTEM,XREF);
8650 LOP(OP_SYMLINK,XTERM);
8653 LOP(OP_SYSCALL,XTERM);
8656 LOP(OP_SYSOPEN,XTERM);
8659 LOP(OP_SYSSEEK,XTERM);
8662 LOP(OP_SYSREAD,XTERM);
8665 LOP(OP_SYSWRITE,XTERM);
8670 TERM(sublex_start());
8691 LOP(OP_TRUNCATE,XTERM);
8703 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8705 pl_yylval.ival = CopLINE(PL_curcop);
8709 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8711 pl_yylval.ival = CopLINE(PL_curcop);
8715 LOP(OP_UNLINK,XTERM);
8721 LOP(OP_UNPACK,XTERM);
8724 LOP(OP_UTIME,XTERM);
8730 LOP(OP_UNSHIFT,XTERM);
8733 s = tokenize_use(1, s);
8743 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8745 pl_yylval.ival = CopLINE(PL_curcop);
8749 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8751 pl_yylval.ival = CopLINE(PL_curcop);
8755 PL_hints |= HINT_BLOCK_SCOPE;
8762 LOP(OP_WAITPID,XTERM);
8771 ctl_l[0] = toCTRL('L');
8773 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8776 /* Make sure $^L is defined */
8777 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8782 if (PL_expect == XOPERATOR) {
8783 if (*s == '=' && !PL_lex_allbrackets &&
8784 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8792 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8794 pl_yylval.ival = OP_XOR;
8800 #pragma segment Main
8806 Looks up an identifier in the pad or in a package
8809 PRIVATEREF if this is a lexical name.
8810 WORD if this belongs to a package.
8813 if we're in a my declaration
8814 croak if they tried to say my($foo::bar)
8815 build the ops for a my() declaration
8816 if it's an access to a my() variable
8817 build ops for access to a my() variable
8818 if in a dq string, and they've said @foo and we can't find @foo
8820 build ops for a bareword
8824 S_pending_ident(pTHX)
8828 const char pit = (char)pl_yylval.ival;
8829 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8830 /* All routes through this function want to know if there is a colon. */
8831 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8833 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8834 "### Pending identifier '%s'\n", PL_tokenbuf); });
8836 /* if we're in a my(), we can't allow dynamics here.
8837 $foo'bar has already been turned into $foo::bar, so
8838 just check for colons.
8840 if it's a legal name, the OP is a PADANY.
8843 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8845 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8846 "variable %s in \"our\"",
8847 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8848 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8852 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8853 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8854 UTF ? SVf_UTF8 : 0);
8856 pl_yylval.opval = newOP(OP_PADANY, 0);
8857 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8858 UTF ? SVf_UTF8 : 0);
8864 build the ops for accesses to a my() variable.
8869 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8870 UTF ? SVf_UTF8 : 0);
8871 if (tmp != NOT_IN_PAD) {
8872 /* might be an "our" variable" */
8873 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8874 /* build ops for a bareword */
8875 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8876 HEK * const stashname = HvNAME_HEK(stash);
8877 SV * const sym = newSVhek(stashname);
8878 sv_catpvs(sym, "::");
8879 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8880 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8881 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8885 ? (GV_ADDMULTI | GV_ADDINEVAL)
8888 ((PL_tokenbuf[0] == '$') ? SVt_PV
8889 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8894 pl_yylval.opval = newOP(OP_PADANY, 0);
8895 pl_yylval.opval->op_targ = tmp;
8901 Whine if they've said @foo in a doublequoted string,
8902 and @foo isn't a variable we can find in the symbol
8905 if (ckWARN(WARN_AMBIGUOUS) &&
8906 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8907 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8908 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8909 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8910 /* DO NOT warn for @- and @+ */
8911 && !( PL_tokenbuf[2] == '\0' &&
8912 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8915 /* Downgraded from fatal to warning 20000522 mjd */
8916 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8917 "Possible unintended interpolation of %"SVf" in string",
8918 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8919 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8923 /* build ops for a bareword */
8924 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8925 newSVpvn_flags(PL_tokenbuf + 1,
8927 UTF ? SVf_UTF8 : 0 ));
8928 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8930 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8931 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8932 | ( UTF ? SVf_UTF8 : 0 ),
8933 ((PL_tokenbuf[0] == '$') ? SVt_PV
8934 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8940 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8944 PERL_ARGS_ASSERT_CHECKCOMMA;
8946 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8947 if (ckWARN(WARN_SYNTAX)) {
8950 for (w = s+2; *w && level; w++) {
8958 /* the list of chars below is for end of statements or
8959 * block / parens, boolean operators (&&, ||, //) and branch
8960 * constructs (or, and, if, until, unless, while, err, for).
8961 * Not a very solid hack... */
8962 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8963 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8964 "%s (...) interpreted as function",name);
8967 while (s < PL_bufend && isSPACE(*s))
8971 while (s < PL_bufend && isSPACE(*s))
8973 if (isIDFIRST_lazy_if(s,UTF)) {
8974 const char * const w = s;
8975 s += UTF ? UTF8SKIP(s) : 1;
8976 while (isALNUM_lazy_if(s,UTF))
8977 s += UTF ? UTF8SKIP(s) : 1;
8978 while (s < PL_bufend && isSPACE(*s))
8982 if (keyword(w, s - w, 0))
8985 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8986 if (gv && GvCVu(gv))
8988 Perl_croak(aTHX_ "No comma allowed after %s", what);
8993 /* Either returns sv, or mortalizes sv and returns a new SV*.
8994 Best used as sv=new_constant(..., sv, ...).
8995 If s, pv are NULL, calls subroutine with one argument,
8996 and <type> is used with error messages only.
8997 <type> is assumed to be well formed UTF-8 */
9000 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9001 SV *sv, SV *pv, const char *type, STRLEN typelen)
9004 HV * table = GvHV(PL_hintgv); /* ^H */
9008 const char *why1 = "", *why2 = "", *why3 = "";
9010 PERL_ARGS_ASSERT_NEW_CONSTANT;
9012 /* charnames doesn't work well if there have been errors found */
9013 if (PL_error_count > 0 && strEQ(key,"charnames"))
9014 return &PL_sv_undef;
9017 || ! (PL_hints & HINT_LOCALIZE_HH)
9018 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9023 /* Here haven't found what we're looking for. If it is charnames,
9024 * perhaps it needs to be loaded. Try doing that before giving up */
9025 if (strEQ(key,"charnames")) {
9026 Perl_load_module(aTHX_
9028 newSVpvs("_charnames"),
9029 /* version parameter; no need to specify it, as if
9030 * we get too early a version, will fail anyway,
9031 * not being able to find '_charnames' */
9037 table = GvHV(PL_hintgv);
9039 && (PL_hints & HINT_LOCALIZE_HH)
9040 && (cvp = hv_fetch(table, key, keylen, FALSE))
9046 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9047 msg = Perl_newSVpvf(aTHX_
9048 "Constant(%s) unknown", (type ? type: "undef"));
9053 why3 = "} is not defined";
9055 if (strEQ(key,"charnames")) {
9056 yyerror_pv(Perl_form(aTHX_
9057 /* The +3 is for '\N{'; -4 for that, plus '}' */
9058 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9060 UTF ? SVf_UTF8 : 0);
9064 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9065 (type ? type: "undef"), why1, why2, why3);
9068 yyerror(SvPVX_const(msg));
9073 sv_2mortal(sv); /* Parent created it permanently */
9076 pv = newSVpvn_flags(s, len, SVs_TEMP);
9078 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9080 typesv = &PL_sv_undef;
9082 PUSHSTACKi(PERLSI_OVERLOAD);
9094 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9098 /* Check the eval first */
9099 if (!PL_in_eval && SvTRUE(ERRSV)) {
9101 const char * errstr;
9102 sv_catpvs(ERRSV, "Propagated");
9103 errstr = SvPV_const(ERRSV, errlen);
9104 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9106 res = SvREFCNT_inc_simple(sv);
9110 SvREFCNT_inc_simple_void(res);
9119 why1 = "Call to &{$^H{";
9121 why3 = "}} did not return a defined value";
9129 /* Returns a NUL terminated string, with the length of the string written to
9133 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9137 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9139 PERL_ARGS_ASSERT_SCAN_WORD;
9143 Perl_croak(aTHX_ ident_too_long);
9144 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
9146 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
9151 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
9155 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9156 char *t = s + UTF8SKIP(s);
9158 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9162 Perl_croak(aTHX_ ident_too_long);
9163 Copy(s, d, len, char);
9176 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9179 char *bracket = NULL;
9182 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9184 PERL_ARGS_ASSERT_SCAN_IDENT;
9189 while (isDIGIT(*s)) {
9191 Perl_croak(aTHX_ ident_too_long);
9198 Perl_croak(aTHX_ ident_too_long);
9199 if (isALNUM(*s)) /* UTF handled below */
9201 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9206 else if (*s == ':' && s[1] == ':') {
9210 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9211 char *t = s + UTF8SKIP(s);
9212 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9214 if (d + (t - s) > e)
9215 Perl_croak(aTHX_ ident_too_long);
9216 Copy(s, d, t - s, char);
9227 if (PL_lex_state != LEX_NORMAL)
9228 PL_lex_state = LEX_INTERPENDMAYBE;
9231 if (*s == '$' && s[1] &&
9232 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9242 const STRLEN skip = UTF8SKIP(s);
9245 for ( i = 0; i < skip; i++ )
9253 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9257 else if (ck_uni && !bracket)
9260 if (isSPACE(s[-1])) {
9262 const char ch = *s++;
9263 if (!SPACE_OR_TAB(ch)) {
9269 if (isIDFIRST_lazy_if(d,UTF)) {
9273 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9274 end += UTF8SKIP(end);
9275 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9276 end += UTF8SKIP(end);
9278 Copy(s, d, end - s, char);
9283 while ((isALNUM(*s) || *s == ':') && d < e)
9286 Perl_croak(aTHX_ ident_too_long);
9289 while (s < send && SPACE_OR_TAB(*s))
9291 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9292 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9293 const char * const brack =
9295 ((*s == '[') ? "[...]" : "{...}");
9296 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9297 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9298 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9299 funny, dest, brack, funny, dest, brack);
9302 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9303 PL_lex_allbrackets++;
9307 /* Handle extended ${^Foo} variables
9308 * 1999-02-27 mjd-perl-patch@plover.com */
9309 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9313 while (isALNUM(*s) && d < e) {
9317 Perl_croak(aTHX_ ident_too_long);
9322 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9323 PL_lex_state = LEX_INTERPEND;
9326 if (PL_lex_state == LEX_NORMAL) {
9327 if (ckWARN(WARN_AMBIGUOUS) &&
9328 (keyword(dest, d - dest, 0)
9329 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9331 SV *tmp = newSVpvn_flags( dest, d - dest,
9332 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9335 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9336 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9337 funny, tmp, funny, tmp);
9342 s = bracket; /* let the parser handle it */
9346 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9347 PL_lex_state = LEX_INTERPEND;
9352 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9354 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9355 * the parse starting at 's', based on the subset that are valid in this
9356 * context input to this routine in 'valid_flags'. Advances s. Returns
9357 * TRUE if the input should be treated as a valid flag, so the next char
9358 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9359 * first call on the current regex. This routine will set it to any
9360 * charset modifier found. The caller shouldn't change it. This way,
9361 * another charset modifier encountered in the parse can be detected as an
9362 * error, as we have decided to allow only one */
9365 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9367 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9368 if (isALNUM_lazy_if(*s, UTF)) {
9369 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9370 UTF ? SVf_UTF8 : 0);
9372 /* Pretend that it worked, so will continue processing before
9381 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9382 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9383 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9384 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9385 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9386 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9387 case LOCALE_PAT_MOD:
9389 goto multiple_charsets;
9391 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9394 case UNICODE_PAT_MOD:
9396 goto multiple_charsets;
9398 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9401 case ASCII_RESTRICT_PAT_MOD:
9403 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9407 /* Error if previous modifier wasn't an 'a', but if it was, see
9408 * if, and accept, a second occurrence (only) */
9410 || get_regex_charset(*pmfl)
9411 != REGEX_ASCII_RESTRICTED_CHARSET)
9413 goto multiple_charsets;
9415 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9419 case DEPENDS_PAT_MOD:
9421 goto multiple_charsets;
9423 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9432 if (*charset != c) {
9433 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9435 else if (c == 'a') {
9436 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9439 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9442 /* Pretend that it worked, so will continue processing before dieing */
9448 S_scan_pat(pTHX_ char *start, I32 type)
9452 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9453 const char * const valid_flags =
9454 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9455 char charset = '\0'; /* character set modifier */
9460 PERL_ARGS_ASSERT_SCAN_PAT;
9462 /* this was only needed for the initial scan_str; set it to false
9463 * so that any (?{}) code blocks etc are parsed normally */
9464 PL_reg_state.re_reparsing = FALSE;
9466 const char * const delimiter = skipspace(start);
9470 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9471 : "Search pattern not terminated" ));
9474 pm = (PMOP*)newPMOP(type, 0);
9475 if (PL_multi_open == '?') {
9476 /* This is the only point in the code that sets PMf_ONCE: */
9477 pm->op_pmflags |= PMf_ONCE;
9479 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9480 allows us to restrict the list needed by reset to just the ??
9482 assert(type != OP_TRANS);
9484 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9487 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9490 elements = mg->mg_len / sizeof(PMOP**);
9491 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9492 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9493 mg->mg_len = elements * sizeof(PMOP**);
9494 PmopSTASH_set(pm,PL_curstash);
9501 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9502 * anon CV. False positives like qr/[(?{]/ are harmless */
9504 if (type == OP_QR) {
9506 char *e, *p = SvPV(PL_lex_stuff, len);
9508 for (; p < e; p++) {
9509 if (p[0] == '(' && p[1] == '?'
9510 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9512 pm->op_pmflags |= PMf_HAS_CV;
9516 pm->op_pmflags |= PMf_IS_QR;
9519 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9521 if (PL_madskills && modstart != s) {
9522 SV* tmptoken = newSVpvn(modstart, s - modstart);
9523 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9526 /* issue a warning if /c is specified,but /g is not */
9527 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9529 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9530 "Use of /c modifier is meaningless without /g" );
9533 PL_lex_op = (OP*)pm;
9534 pl_yylval.ival = OP_MATCH;
9539 S_scan_subst(pTHX_ char *start)
9546 char charset = '\0'; /* character set modifier */
9551 PERL_ARGS_ASSERT_SCAN_SUBST;
9553 pl_yylval.ival = OP_NULL;
9555 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9558 Perl_croak(aTHX_ "Substitution pattern not terminated");
9560 if (s[-1] == PL_multi_open)
9564 CURMAD('q', PL_thisopen);
9565 CURMAD('_', PL_thiswhite);
9566 CURMAD('E', PL_thisstuff);
9567 CURMAD('Q', PL_thisclose);
9568 PL_realtokenstart = s - SvPVX(PL_linestr);
9572 first_start = PL_multi_start;
9573 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9576 SvREFCNT_dec(PL_lex_stuff);
9577 PL_lex_stuff = NULL;
9579 Perl_croak(aTHX_ "Substitution replacement not terminated");
9581 PL_multi_start = first_start; /* so whole substitution is taken together */
9583 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9587 CURMAD('z', PL_thisopen);
9588 CURMAD('R', PL_thisstuff);
9589 CURMAD('Z', PL_thisclose);
9595 if (*s == EXEC_PAT_MOD) {
9599 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9608 curmad('m', newSVpvn(modstart, s - modstart));
9609 append_madprops(PL_thismad, (OP*)pm, 0);
9613 if ((pm->op_pmflags & PMf_CONTINUE)) {
9614 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9618 SV * const repl = newSVpvs("");
9621 pm->op_pmflags |= PMf_EVAL;
9624 sv_catpvs(repl, "eval ");
9626 sv_catpvs(repl, "do ");
9628 sv_catpvs(repl, "{");
9629 sv_catsv(repl, PL_sublex_info.repl);
9630 sv_catpvs(repl, "}");
9632 SvREFCNT_dec(PL_sublex_info.repl);
9633 PL_sublex_info.repl = repl;
9636 PL_lex_op = (OP*)pm;
9637 pl_yylval.ival = OP_SUBST;
9642 S_scan_trans(pTHX_ char *start)
9650 bool nondestruct = 0;
9655 PERL_ARGS_ASSERT_SCAN_TRANS;
9657 pl_yylval.ival = OP_NULL;
9659 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9661 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9663 if (s[-1] == PL_multi_open)
9667 CURMAD('q', PL_thisopen);
9668 CURMAD('_', PL_thiswhite);
9669 CURMAD('E', PL_thisstuff);
9670 CURMAD('Q', PL_thisclose);
9671 PL_realtokenstart = s - SvPVX(PL_linestr);
9675 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9678 SvREFCNT_dec(PL_lex_stuff);
9679 PL_lex_stuff = NULL;
9681 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9684 CURMAD('z', PL_thisopen);
9685 CURMAD('R', PL_thisstuff);
9686 CURMAD('Z', PL_thisclose);
9689 complement = del = squash = 0;
9696 complement = OPpTRANS_COMPLEMENT;
9699 del = OPpTRANS_DELETE;
9702 squash = OPpTRANS_SQUASH;
9714 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9715 o->op_private &= ~OPpTRANS_ALL;
9716 o->op_private |= del|squash|complement|
9717 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9718 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9721 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9726 curmad('m', newSVpvn(modstart, s - modstart));
9727 append_madprops(PL_thismad, o, 0);
9736 Takes a pointer to the first < in <<FOO.
9737 Returns a pointer to the byte following <<FOO.
9739 This function scans a heredoc, which involves different methods
9740 depending on whether we are in a string eval, quoted construct, etc.
9741 This is because PL_linestr could containing a single line of input, or
9742 a whole string being evalled, or the contents of the current quote-
9745 The two basic methods are:
9746 - Steal lines from the input stream
9747 - Scan the heredoc in PL_linestr and remove it therefrom
9749 In a file scope or filtered eval, the first method is used; in a
9750 string eval, the second.
9752 In a quote-like operator, we have to choose between the two,
9753 depending on where we can find a newline. We peek into outer lex-
9754 ing scopes until we find one with a newline in it. If we reach the
9755 outermost lexing scope and it is a file, we use the stream method.
9756 Otherwise it is treated as an eval.
9760 S_scan_heredoc(pTHX_ register char *s)
9763 I32 op_type = OP_SCALAR;
9770 const bool infile = PL_rsfp || PL_parser->filtered;
9771 LEXSHARED *shared = PL_parser->lex_shared;
9773 I32 stuffstart = s - SvPVX(PL_linestr);
9776 PL_realtokenstart = -1;
9779 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9782 d = PL_tokenbuf + 1;
9783 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9784 *PL_tokenbuf = '\n';
9786 while (SPACE_OR_TAB(*peek))
9788 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9791 s = delimcpy(d, e, s, PL_bufend, term, &len);
9793 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9799 /* <<\FOO is equivalent to <<'FOO' */
9803 if (!isALNUM_lazy_if(s,UTF))
9804 deprecate("bare << to mean <<\"\"");
9805 for (; isALNUM_lazy_if(s,UTF); s++) {
9810 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9811 Perl_croak(aTHX_ "Delimiter for here document is too long");
9814 len = d - PL_tokenbuf;
9818 tstart = PL_tokenbuf + 1;
9819 PL_thisclose = newSVpvn(tstart, len - 1);
9820 tstart = SvPVX(PL_linestr) + stuffstart;
9821 PL_thisopen = newSVpvn(tstart, s - tstart);
9822 stuffstart = s - SvPVX(PL_linestr);
9825 #ifndef PERL_STRICT_CR
9826 d = strchr(s, '\r');
9828 char * const olds = s;
9830 while (s < PL_bufend) {
9836 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9845 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9851 tstart = SvPVX(PL_linestr) + stuffstart;
9853 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9855 PL_thisstuff = newSVpvn(tstart, s - tstart);
9858 stuffstart = s - SvPVX(PL_linestr);
9861 tmpstr = newSV_type(SVt_PVIV);
9865 SvIV_set(tmpstr, -1);
9867 else if (term == '`') {
9868 op_type = OP_BACKTICK;
9869 SvIV_set(tmpstr, '\\');
9872 PL_multi_start = CopLINE(PL_curcop) + 1;
9873 PL_multi_open = PL_multi_close = '<';
9874 /* inside a string eval or quote-like operator */
9875 if (!infile || PL_lex_inwhat) {
9878 char * const olds = s;
9879 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9880 /* These two fields are not set until an inner lexing scope is
9881 entered. But we need them set here. */
9882 shared->ls_bufptr = s;
9883 shared->ls_linestr = PL_linestr;
9885 /* Look for a newline. If the current buffer does not have one,
9886 peek into the line buffer of the parent lexing scope, going
9887 up as many levels as necessary to find one with a newline
9890 while (!(s = (char *)memchr(
9891 (void *)shared->ls_bufptr, '\n',
9892 SvEND(shared->ls_linestr)-shared->ls_bufptr
9894 shared = shared->ls_prev;
9895 /* shared is only null if we have gone beyond the outermost
9896 lexing scope. In a file, we will have broken out of the
9897 loop in the previous iteration. In an eval, the string buf-
9898 fer ends with "\n;", so the while condition below will have
9899 evaluated to false. So shared can never be null. */
9901 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9902 most lexing scope. In a file, shared->ls_linestr at that
9903 level is just one line, so there is no body to steal. */
9904 if (infile && !shared->ls_prev) {
9910 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9913 linestr = shared->ls_linestr;
9914 bufend = SvEND(linestr);
9916 while (s < bufend &&
9917 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9919 ++shared->herelines;
9924 sv_setpvn(tmpstr,d+1,s-d);
9928 sv_catpvn(PL_thisstuff, d + 1, s - d);
9930 PL_thisstuff = newSVpvn(d + 1, s - d);
9931 stuffstart = s - SvPVX(PL_linestr);
9935 /* the preceding stmt passes a newline */
9936 shared->herelines++;
9938 /* s now points to the newline after the heredoc terminator.
9939 d points to the newline before the body of the heredoc.
9942 /* We are going to modify linestr in place here, so set
9943 aside copies of the string if necessary for re-evals or
9945 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9946 check shared->re_eval_str. */
9947 if (shared->re_eval_start || shared->re_eval_str) {
9948 /* Set aside the rest of the regexp */
9949 if (!shared->re_eval_str)
9950 shared->re_eval_str =
9951 newSVpvn(shared->re_eval_start,
9952 bufend - shared->re_eval_start);
9953 shared->re_eval_start -= s-d;
9955 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9956 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9957 cx->blk_eval.cur_text == linestr)
9959 cx->blk_eval.cur_text = newSVsv(linestr);
9960 SvSCREAM_on(cx->blk_eval.cur_text);
9962 /* Copy everything from s onwards back to d. */
9963 Move(s,d,bufend-s + 1,char);
9964 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9965 /* Setting PL_bufend only applies when we have not dug deeper
9966 into other scopes, because sublex_done sets PL_bufend to
9967 SvEND(PL_linestr). */
9968 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9975 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9976 term = PL_tokenbuf[1];
9978 linestr_save = PL_linestr; /* must restore this afterwards */
9979 d = s; /* and this */
9980 PL_linestr = newSVpvs("");
9981 PL_bufend = SvPVX(PL_linestr);
9985 tstart = SvPVX(PL_linestr) + stuffstart;
9987 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9989 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9992 PL_bufptr = PL_bufend;
9993 CopLINE_set(PL_curcop,
9994 PL_multi_start + shared->herelines);
9995 if (!lex_next_chunk(LEX_NO_TERM)
9996 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9997 SvREFCNT_dec(linestr_save);
10000 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10001 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10002 lex_grow_linestr(SvCUR(PL_linestr) + 2);
10003 sv_catpvs(PL_linestr, "\n\0");
10007 stuffstart = s - SvPVX(PL_linestr);
10009 shared->herelines++;
10010 PL_last_lop = PL_last_uni = NULL;
10011 #ifndef PERL_STRICT_CR
10012 if (PL_bufend - PL_linestart >= 2) {
10013 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10014 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10016 PL_bufend[-2] = '\n';
10018 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10020 else if (PL_bufend[-1] == '\r')
10021 PL_bufend[-1] = '\n';
10023 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10024 PL_bufend[-1] = '\n';
10026 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10027 SvREFCNT_dec(PL_linestr);
10028 PL_linestr = linestr_save;
10029 PL_linestart = SvPVX(linestr_save);
10030 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10035 sv_catsv(tmpstr,PL_linestr);
10039 PL_multi_end = CopLINE(PL_curcop);
10040 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10041 SvPV_shrink_to_cur(tmpstr);
10044 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10046 else if (PL_encoding)
10047 sv_recode_to_utf8(tmpstr, PL_encoding);
10049 PL_lex_stuff = tmpstr;
10050 pl_yylval.ival = op_type;
10054 SvREFCNT_dec(tmpstr);
10055 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10056 missingterm(PL_tokenbuf + 1);
10059 /* scan_inputsymbol
10060 takes: current position in input buffer
10061 returns: new position in input buffer
10062 side-effects: pl_yylval and lex_op are set.
10067 <FH> read from filehandle
10068 <pkg::FH> read from package qualified filehandle
10069 <pkg'FH> read from package qualified filehandle
10070 <$fh> read from filehandle in $fh
10071 <*.h> filename glob
10076 S_scan_inputsymbol(pTHX_ char *start)
10079 char *s = start; /* current position in buffer */
10082 char *d = PL_tokenbuf; /* start of temp holding space */
10083 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10085 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10087 end = strchr(s, '\n');
10090 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10092 /* die if we didn't have space for the contents of the <>,
10093 or if it didn't end, or if we see a newline
10096 if (len >= (I32)sizeof PL_tokenbuf)
10097 Perl_croak(aTHX_ "Excessively long <> operator");
10099 Perl_croak(aTHX_ "Unterminated <> operator");
10104 Remember, only scalar variables are interpreted as filehandles by
10105 this code. Anything more complex (e.g., <$fh{$num}>) will be
10106 treated as a glob() call.
10107 This code makes use of the fact that except for the $ at the front,
10108 a scalar variable and a filehandle look the same.
10110 if (*d == '$' && d[1]) d++;
10112 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10113 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10114 d += UTF ? UTF8SKIP(d) : 1;
10116 /* If we've tried to read what we allow filehandles to look like, and
10117 there's still text left, then it must be a glob() and not a getline.
10118 Use scan_str to pull out the stuff between the <> and treat it
10119 as nothing more than a string.
10122 if (d - PL_tokenbuf != len) {
10123 pl_yylval.ival = OP_GLOB;
10124 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
10126 Perl_croak(aTHX_ "Glob not terminated");
10130 bool readline_overriden = FALSE;
10133 /* we're in a filehandle read situation */
10136 /* turn <> into <ARGV> */
10138 Copy("ARGV",d,5,char);
10140 /* Check whether readline() is overriden */
10141 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10143 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10145 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10146 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10147 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10148 readline_overriden = TRUE;
10150 /* if <$fh>, create the ops to turn the variable into a
10154 /* try to find it in the pad for this block, otherwise find
10155 add symbol table ops
10157 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10158 if (tmp != NOT_IN_PAD) {
10159 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10160 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10161 HEK * const stashname = HvNAME_HEK(stash);
10162 SV * const sym = sv_2mortal(newSVhek(stashname));
10163 sv_catpvs(sym, "::");
10164 sv_catpv(sym, d+1);
10169 OP * const o = newOP(OP_PADSV, 0);
10171 PL_lex_op = readline_overriden
10172 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10173 op_append_elem(OP_LIST, o,
10174 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10175 : (OP*)newUNOP(OP_READLINE, 0, o);
10184 ? (GV_ADDMULTI | GV_ADDINEVAL)
10185 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10187 PL_lex_op = readline_overriden
10188 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10189 op_append_elem(OP_LIST,
10190 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10191 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10192 : (OP*)newUNOP(OP_READLINE, 0,
10193 newUNOP(OP_RV2SV, 0,
10194 newGVOP(OP_GV, 0, gv)));
10196 if (!readline_overriden)
10197 PL_lex_op->op_flags |= OPf_SPECIAL;
10198 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10199 pl_yylval.ival = OP_NULL;
10202 /* If it's none of the above, it must be a literal filehandle
10203 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10205 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10206 PL_lex_op = readline_overriden
10207 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10208 op_append_elem(OP_LIST,
10209 newGVOP(OP_GV, 0, gv),
10210 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10211 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10212 pl_yylval.ival = OP_NULL;
10221 takes: start position in buffer
10222 keep_quoted preserve \ on the embedded delimiter(s)
10223 keep_delims preserve the delimiters around the string
10224 re_reparse compiling a run-time /(?{})/:
10225 collapse // to /, and skip encoding src
10226 returns: position to continue reading from buffer
10227 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10228 updates the read buffer.
10230 This subroutine pulls a string out of the input. It is called for:
10231 q single quotes q(literal text)
10232 ' single quotes 'literal text'
10233 qq double quotes qq(interpolate $here please)
10234 " double quotes "interpolate $here please"
10235 qx backticks qx(/bin/ls -l)
10236 ` backticks `/bin/ls -l`
10237 qw quote words @EXPORT_OK = qw( func() $spam )
10238 m// regexp match m/this/
10239 s/// regexp substitute s/this/that/
10240 tr/// string transliterate tr/this/that/
10241 y/// string transliterate y/this/that/
10242 ($*@) sub prototypes sub foo ($)
10243 (stuff) sub attr parameters sub foo : attr(stuff)
10244 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10246 In most of these cases (all but <>, patterns and transliterate)
10247 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10248 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10249 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10252 It skips whitespace before the string starts, and treats the first
10253 character as the delimiter. If the delimiter is one of ([{< then
10254 the corresponding "close" character )]}> is used as the closing
10255 delimiter. It allows quoting of delimiters, and if the string has
10256 balanced delimiters ([{<>}]) it allows nesting.
10258 On success, the SV with the resulting string is put into lex_stuff or,
10259 if that is already non-NULL, into lex_repl. The second case occurs only
10260 when parsing the RHS of the special constructs s/// and tr/// (y///).
10261 For convenience, the terminating delimiter character is stuffed into
10266 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10269 SV *sv; /* scalar value: string */
10270 const char *tmps; /* temp string, used for delimiter matching */
10271 char *s = start; /* current position in the buffer */
10272 char term; /* terminating character */
10273 char *to; /* current position in the sv's data */
10274 I32 brackets = 1; /* bracket nesting level */
10275 bool has_utf8 = FALSE; /* is there any utf8 content? */
10276 I32 termcode; /* terminating char. code */
10277 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10278 STRLEN termlen; /* length of terminating string */
10279 int last_off = 0; /* last position for nesting bracket */
10285 PERL_ARGS_ASSERT_SCAN_STR;
10287 /* skip space before the delimiter */
10293 if (PL_realtokenstart >= 0) {
10294 stuffstart = PL_realtokenstart;
10295 PL_realtokenstart = -1;
10298 stuffstart = start - SvPVX(PL_linestr);
10300 /* mark where we are, in case we need to report errors */
10303 /* after skipping whitespace, the next character is the terminator */
10306 termcode = termstr[0] = term;
10310 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10311 Copy(s, termstr, termlen, U8);
10312 if (!UTF8_IS_INVARIANT(term))
10316 /* mark where we are */
10317 PL_multi_start = CopLINE(PL_curcop);
10318 PL_multi_open = term;
10320 /* find corresponding closing delimiter */
10321 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10322 termcode = termstr[0] = term = tmps[5];
10324 PL_multi_close = term;
10326 /* create a new SV to hold the contents. 79 is the SV's initial length.
10327 What a random number. */
10328 sv = newSV_type(SVt_PVIV);
10330 SvIV_set(sv, termcode);
10331 (void)SvPOK_only(sv); /* validate pointer */
10333 /* move past delimiter and try to read a complete string */
10335 sv_catpvn(sv, s, termlen);
10338 tstart = SvPVX(PL_linestr) + stuffstart;
10339 if (!PL_thisopen && !keep_delims) {
10340 PL_thisopen = newSVpvn(tstart, s - tstart);
10341 stuffstart = s - SvPVX(PL_linestr);
10345 if (PL_encoding && !UTF && !re_reparse) {
10349 int offset = s - SvPVX_const(PL_linestr);
10350 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10351 &offset, (char*)termstr, termlen);
10352 const char * const ns = SvPVX_const(PL_linestr) + offset;
10353 char * const svlast = SvEND(sv) - 1;
10355 for (; s < ns; s++) {
10356 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10357 COPLINE_INC_WITH_HERELINES;
10360 goto read_more_line;
10362 /* handle quoted delimiters */
10363 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10365 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10367 if ((svlast-1 - t) % 2) {
10368 if (!keep_quoted) {
10369 *(svlast-1) = term;
10371 SvCUR_set(sv, SvCUR(sv) - 1);
10376 if (PL_multi_open == PL_multi_close) {
10382 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10383 /* At here, all closes are "was quoted" one,
10384 so we don't check PL_multi_close. */
10386 if (!keep_quoted && *(t+1) == PL_multi_open)
10391 else if (*t == PL_multi_open)
10399 SvCUR_set(sv, w - SvPVX_const(sv));
10401 last_off = w - SvPVX(sv);
10402 if (--brackets <= 0)
10407 if (!keep_delims) {
10408 SvCUR_set(sv, SvCUR(sv) - 1);
10414 /* extend sv if need be */
10415 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10416 /* set 'to' to the next character in the sv's string */
10417 to = SvPVX(sv)+SvCUR(sv);
10419 /* if open delimiter is the close delimiter read unbridle */
10420 if (PL_multi_open == PL_multi_close) {
10421 for (; s < PL_bufend; s++,to++) {
10422 /* embedded newlines increment the current line number */
10423 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10424 COPLINE_INC_WITH_HERELINES;
10425 /* handle quoted delimiters */
10426 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10429 || (re_reparse && s[1] == '\\'))
10432 /* any other quotes are simply copied straight through */
10436 /* terminate when run out of buffer (the for() condition), or
10437 have found the terminator */
10438 else if (*s == term) {
10441 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10444 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10450 /* if the terminator isn't the same as the start character (e.g.,
10451 matched brackets), we have to allow more in the quoting, and
10452 be prepared for nested brackets.
10455 /* read until we run out of string, or we find the terminator */
10456 for (; s < PL_bufend; s++,to++) {
10457 /* embedded newlines increment the line count */
10458 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10459 COPLINE_INC_WITH_HERELINES;
10460 /* backslashes can escape the open or closing characters */
10461 if (*s == '\\' && s+1 < PL_bufend) {
10462 if (!keep_quoted &&
10463 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10468 /* allow nested opens and closes */
10469 else if (*s == PL_multi_close && --brackets <= 0)
10471 else if (*s == PL_multi_open)
10473 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10478 /* terminate the copied string and update the sv's end-of-string */
10480 SvCUR_set(sv, to - SvPVX_const(sv));
10483 * this next chunk reads more into the buffer if we're not done yet
10487 break; /* handle case where we are done yet :-) */
10489 #ifndef PERL_STRICT_CR
10490 if (to - SvPVX_const(sv) >= 2) {
10491 if ((to[-2] == '\r' && to[-1] == '\n') ||
10492 (to[-2] == '\n' && to[-1] == '\r'))
10496 SvCUR_set(sv, to - SvPVX_const(sv));
10498 else if (to[-1] == '\r')
10501 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10506 /* if we're out of file, or a read fails, bail and reset the current
10507 line marker so we can report where the unterminated string began
10510 if (PL_madskills) {
10511 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10513 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10515 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10518 COPLINE_INC_WITH_HERELINES;
10519 PL_bufptr = PL_bufend;
10520 if (!lex_next_chunk(0)) {
10522 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10531 /* at this point, we have successfully read the delimited string */
10533 if (!PL_encoding || UTF || re_reparse) {
10535 if (PL_madskills) {
10536 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10537 const int len = s - tstart;
10539 sv_catpvn(PL_thisstuff, tstart, len);
10541 PL_thisstuff = newSVpvn(tstart, len);
10542 if (!PL_thisclose && !keep_delims)
10543 PL_thisclose = newSVpvn(s,termlen);
10548 sv_catpvn(sv, s, termlen);
10553 if (PL_madskills) {
10554 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10555 const int len = s - tstart - termlen;
10557 sv_catpvn(PL_thisstuff, tstart, len);
10559 PL_thisstuff = newSVpvn(tstart, len);
10560 if (!PL_thisclose && !keep_delims)
10561 PL_thisclose = newSVpvn(s - termlen,termlen);
10565 if (has_utf8 || (PL_encoding && !re_reparse))
10568 PL_multi_end = CopLINE(PL_curcop);
10570 /* if we allocated too much space, give some back */
10571 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10572 SvLEN_set(sv, SvCUR(sv) + 1);
10573 SvPV_renew(sv, SvLEN(sv));
10576 /* decide whether this is the first or second quoted string we've read
10581 PL_sublex_info.repl = sv;
10589 takes: pointer to position in buffer
10590 returns: pointer to new position in buffer
10591 side-effects: builds ops for the constant in pl_yylval.op
10593 Read a number in any of the formats that Perl accepts:
10595 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10596 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10599 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10601 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10604 If it reads a number without a decimal point or an exponent, it will
10605 try converting the number to an integer and see if it can do so
10606 without loss of precision.
10610 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10613 const char *s = start; /* current position in buffer */
10614 char *d; /* destination in temp buffer */
10615 char *e; /* end of temp buffer */
10616 NV nv; /* number read, as a double */
10617 SV *sv = NULL; /* place to put the converted number */
10618 bool floatit; /* boolean: int or float? */
10619 const char *lastub = NULL; /* position of last underbar */
10620 static char const number_too_long[] = "Number too long";
10622 PERL_ARGS_ASSERT_SCAN_NUM;
10624 /* We use the first character to decide what type of number this is */
10628 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10630 /* if it starts with a 0, it could be an octal number, a decimal in
10631 0.13 disguise, or a hexadecimal number, or a binary number. */
10635 u holds the "number so far"
10636 shift the power of 2 of the base
10637 (hex == 4, octal == 3, binary == 1)
10638 overflowed was the number more than we can hold?
10640 Shift is used when we add a digit. It also serves as an "are
10641 we in octal/hex/binary?" indicator to disallow hex characters
10642 when in octal mode.
10647 bool overflowed = FALSE;
10648 bool just_zero = TRUE; /* just plain 0 or binary number? */
10649 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10650 static const char* const bases[5] =
10651 { "", "binary", "", "octal", "hexadecimal" };
10652 static const char* const Bases[5] =
10653 { "", "Binary", "", "Octal", "Hexadecimal" };
10654 static const char* const maxima[5] =
10656 "0b11111111111111111111111111111111",
10660 const char *base, *Base, *max;
10662 /* check for hex */
10663 if (s[1] == 'x' || s[1] == 'X') {
10667 } else if (s[1] == 'b' || s[1] == 'B') {
10672 /* check for a decimal in disguise */
10673 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10675 /* so it must be octal */
10682 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10683 "Misplaced _ in number");
10687 base = bases[shift];
10688 Base = Bases[shift];
10689 max = maxima[shift];
10691 /* read the rest of the number */
10693 /* x is used in the overflow test,
10694 b is the digit we're adding on. */
10699 /* if we don't mention it, we're done */
10703 /* _ are ignored -- but warned about if consecutive */
10705 if (lastub && s == lastub + 1)
10706 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10707 "Misplaced _ in number");
10711 /* 8 and 9 are not octal */
10712 case '8': case '9':
10714 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10718 case '2': case '3': case '4':
10719 case '5': case '6': case '7':
10721 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10724 case '0': case '1':
10725 b = *s++ & 15; /* ASCII digit -> value of digit */
10729 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10730 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10731 /* make sure they said 0x */
10734 b = (*s++ & 7) + 9;
10736 /* Prepare to put the digit we have onto the end
10737 of the number so far. We check for overflows.
10743 x = u << shift; /* make room for the digit */
10745 if ((x >> shift) != u
10746 && !(PL_hints & HINT_NEW_BINARY)) {
10749 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10750 "Integer overflow in %s number",
10753 u = x | b; /* add the digit to the end */
10756 n *= nvshift[shift];
10757 /* If an NV has not enough bits in its
10758 * mantissa to represent an UV this summing of
10759 * small low-order numbers is a waste of time
10760 * (because the NV cannot preserve the
10761 * low-order bits anyway): we could just
10762 * remember when did we overflow and in the
10763 * end just multiply n by the right
10771 /* if we get here, we had success: make a scalar value from
10776 /* final misplaced underbar check */
10777 if (s[-1] == '_') {
10778 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10782 if (n > 4294967295.0)
10783 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10784 "%s number > %s non-portable",
10790 if (u > 0xffffffff)
10791 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10792 "%s number > %s non-portable",
10797 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10798 sv = new_constant(start, s - start, "integer",
10799 sv, NULL, NULL, 0);
10800 else if (PL_hints & HINT_NEW_BINARY)
10801 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10806 handle decimal numbers.
10807 we're also sent here when we read a 0 as the first digit
10809 case '1': case '2': case '3': case '4': case '5':
10810 case '6': case '7': case '8': case '9': case '.':
10813 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10816 /* read next group of digits and _ and copy into d */
10817 while (isDIGIT(*s) || *s == '_') {
10818 /* skip underscores, checking for misplaced ones
10822 if (lastub && s == lastub + 1)
10823 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10824 "Misplaced _ in number");
10828 /* check for end of fixed-length buffer */
10830 Perl_croak(aTHX_ number_too_long);
10831 /* if we're ok, copy the character */
10836 /* final misplaced underbar check */
10837 if (lastub && s == lastub + 1) {
10838 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10841 /* read a decimal portion if there is one. avoid
10842 3..5 being interpreted as the number 3. followed
10845 if (*s == '.' && s[1] != '.') {
10850 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10851 "Misplaced _ in number");
10855 /* copy, ignoring underbars, until we run out of digits.
10857 for (; isDIGIT(*s) || *s == '_'; s++) {
10858 /* fixed length buffer check */
10860 Perl_croak(aTHX_ number_too_long);
10862 if (lastub && s == lastub + 1)
10863 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10864 "Misplaced _ in number");
10870 /* fractional part ending in underbar? */
10871 if (s[-1] == '_') {
10872 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10873 "Misplaced _ in number");
10875 if (*s == '.' && isDIGIT(s[1])) {
10876 /* oops, it's really a v-string, but without the "v" */
10882 /* read exponent part, if present */
10883 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10887 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10888 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10890 /* stray preinitial _ */
10892 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10893 "Misplaced _ in number");
10897 /* allow positive or negative exponent */
10898 if (*s == '+' || *s == '-')
10901 /* stray initial _ */
10903 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10904 "Misplaced _ in number");
10908 /* read digits of exponent */
10909 while (isDIGIT(*s) || *s == '_') {
10912 Perl_croak(aTHX_ number_too_long);
10916 if (((lastub && s == lastub + 1) ||
10917 (!isDIGIT(s[1]) && s[1] != '_')))
10918 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10919 "Misplaced _ in number");
10927 We try to do an integer conversion first if no characters
10928 indicating "float" have been found.
10933 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10935 if (flags == IS_NUMBER_IN_UV) {
10937 sv = newSViv(uv); /* Prefer IVs over UVs. */
10940 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10941 if (uv <= (UV) IV_MIN)
10942 sv = newSViv(-(IV)uv);
10949 /* terminate the string */
10951 nv = Atof(PL_tokenbuf);
10956 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10957 const char *const key = floatit ? "float" : "integer";
10958 const STRLEN keylen = floatit ? 5 : 7;
10959 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10960 key, keylen, sv, NULL, NULL, 0);
10964 /* if it starts with a v, it could be a v-string */
10967 sv = newSV(5); /* preallocate storage space */
10968 s = scan_vstring(s, PL_bufend, sv);
10972 /* make the op for the constant and return */
10975 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10977 lvalp->opval = NULL;
10983 S_scan_formline(pTHX_ register char *s)
10988 SV * const stuff = newSVpvs("");
10989 bool needargs = FALSE;
10990 bool eofmt = FALSE;
10992 char *tokenstart = s;
10993 SV* savewhite = NULL;
10995 if (PL_madskills) {
10996 savewhite = PL_thiswhite;
11001 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11003 while (!needargs) {
11006 #ifdef PERL_STRICT_CR
11007 while (SPACE_OR_TAB(*t))
11010 while (SPACE_OR_TAB(*t) || *t == '\r')
11013 if (*t == '\n' || t == PL_bufend) {
11018 eol = (char *) memchr(s,'\n',PL_bufend-s);
11022 for (t = s; t < eol; t++) {
11023 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11025 goto enough; /* ~~ must be first line in formline */
11027 if (*t == '@' || *t == '^')
11031 sv_catpvn(stuff, s, eol-s);
11032 #ifndef PERL_STRICT_CR
11033 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11034 char *end = SvPVX(stuff) + SvCUR(stuff);
11037 SvCUR_set(stuff, SvCUR(stuff) - 1);
11045 if ((PL_rsfp || PL_parser->filtered)
11046 && PL_parser->form_lex_state == LEX_NORMAL) {
11049 if (PL_madskills) {
11051 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11053 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11056 PL_bufptr = PL_bufend;
11057 COPLINE_INC_WITH_HERELINES;
11058 got_some = lex_next_chunk(0);
11059 CopLINE_dec(PL_curcop);
11062 tokenstart = PL_bufptr;
11070 if (!SvCUR(stuff) || needargs)
11071 PL_lex_state = PL_parser->form_lex_state;
11072 if (SvCUR(stuff)) {
11073 PL_expect = XSTATE;
11075 start_force(PL_curforce);
11076 NEXTVAL_NEXTTOKE.ival = 0;
11077 force_next(FORMLBRACK);
11080 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11082 else if (PL_encoding)
11083 sv_recode_to_utf8(stuff, PL_encoding);
11085 start_force(PL_curforce);
11086 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11090 SvREFCNT_dec(stuff);
11092 PL_lex_formbrack = 0;
11095 if (PL_madskills) {
11097 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11099 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11100 PL_thiswhite = savewhite;
11107 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11110 const I32 oldsavestack_ix = PL_savestack_ix;
11111 CV* const outsidecv = PL_compcv;
11113 SAVEI32(PL_subline);
11114 save_item(PL_subname);
11115 SAVESPTR(PL_compcv);
11117 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11118 CvFLAGS(PL_compcv) |= flags;
11120 PL_subline = CopLINE(PL_curcop);
11121 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11122 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11123 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11124 if (outsidecv && CvPADLIST(outsidecv))
11125 CvPADLIST(PL_compcv)->xpadl_outid =
11126 PadlistNAMES(CvPADLIST(outsidecv));
11128 return oldsavestack_ix;
11132 #pragma segment Perl_yylex
11135 S_yywarn(pTHX_ const char *const s, U32 flags)
11139 PERL_ARGS_ASSERT_YYWARN;
11141 PL_in_eval |= EVAL_WARNONLY;
11142 yyerror_pv(s, flags);
11143 PL_in_eval &= ~EVAL_WARNONLY;
11148 Perl_yyerror(pTHX_ const char *const s)
11150 PERL_ARGS_ASSERT_YYERROR;
11151 return yyerror_pvn(s, strlen(s), 0);
11155 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11157 PERL_ARGS_ASSERT_YYERROR_PV;
11158 return yyerror_pvn(s, strlen(s), flags);
11162 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11165 const char *context = NULL;
11168 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11169 int yychar = PL_parser->yychar;
11171 PERL_ARGS_ASSERT_YYERROR_PVN;
11173 if (!yychar || (yychar == ';' && !PL_rsfp))
11174 sv_catpvs(where_sv, "at EOF");
11175 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11176 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11177 PL_oldbufptr != PL_bufptr) {
11180 The code below is removed for NetWare because it abends/crashes on NetWare
11181 when the script has error such as not having the closing quotes like:
11182 if ($var eq "value)
11183 Checking of white spaces is anyway done in NetWare code.
11186 while (isSPACE(*PL_oldoldbufptr))
11189 context = PL_oldoldbufptr;
11190 contlen = PL_bufptr - PL_oldoldbufptr;
11192 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11193 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11196 The code below is removed for NetWare because it abends/crashes on NetWare
11197 when the script has error such as not having the closing quotes like:
11198 if ($var eq "value)
11199 Checking of white spaces is anyway done in NetWare code.
11202 while (isSPACE(*PL_oldbufptr))
11205 context = PL_oldbufptr;
11206 contlen = PL_bufptr - PL_oldbufptr;
11208 else if (yychar > 255)
11209 sv_catpvs(where_sv, "next token ???");
11210 else if (yychar == -2) { /* YYEMPTY */
11211 if (PL_lex_state == LEX_NORMAL ||
11212 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11213 sv_catpvs(where_sv, "at end of line");
11214 else if (PL_lex_inpat)
11215 sv_catpvs(where_sv, "within pattern");
11217 sv_catpvs(where_sv, "within string");
11220 sv_catpvs(where_sv, "next char ");
11222 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11223 else if (isPRINT_LC(yychar)) {
11224 const char string = yychar;
11225 sv_catpvn(where_sv, &string, 1);
11228 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11230 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11231 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11232 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11234 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11235 SVfARG(newSVpvn_flags(context, contlen,
11236 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11238 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11239 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11240 Perl_sv_catpvf(aTHX_ msg,
11241 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11242 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11245 if (PL_in_eval & EVAL_WARNONLY) {
11246 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11250 if (PL_error_count >= 10) {
11251 if (PL_in_eval && SvCUR(ERRSV))
11252 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11253 SVfARG(ERRSV), OutCopFILE(PL_curcop));
11255 Perl_croak(aTHX_ "%s has too many errors.\n",
11256 OutCopFILE(PL_curcop));
11259 PL_in_my_stash = NULL;
11263 #pragma segment Main
11267 S_swallow_bom(pTHX_ U8 *s)
11270 const STRLEN slen = SvCUR(PL_linestr);
11272 PERL_ARGS_ASSERT_SWALLOW_BOM;
11276 if (s[1] == 0xFE) {
11277 /* UTF-16 little-endian? (or UTF-32LE?) */
11278 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11279 /* diag_listed_as: Unsupported script encoding %s */
11280 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11281 #ifndef PERL_NO_UTF16_FILTER
11282 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11284 if (PL_bufend > (char*)s) {
11285 s = add_utf16_textfilter(s, TRUE);
11288 /* diag_listed_as: Unsupported script encoding %s */
11289 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11294 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11295 #ifndef PERL_NO_UTF16_FILTER
11296 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11298 if (PL_bufend > (char *)s) {
11299 s = add_utf16_textfilter(s, FALSE);
11302 /* diag_listed_as: Unsupported script encoding %s */
11303 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11308 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11309 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11310 s += 3; /* UTF-8 */
11316 if (s[2] == 0xFE && s[3] == 0xFF) {
11317 /* UTF-32 big-endian */
11318 /* diag_listed_as: Unsupported script encoding %s */
11319 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11322 else if (s[2] == 0 && s[3] != 0) {
11325 * are a good indicator of UTF-16BE. */
11326 #ifndef PERL_NO_UTF16_FILTER
11327 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11328 s = add_utf16_textfilter(s, FALSE);
11330 /* diag_listed_as: Unsupported script encoding %s */
11331 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11337 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11338 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11339 s += 4; /* UTF-8 */
11345 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11348 * are a good indicator of UTF-16LE. */
11349 #ifndef PERL_NO_UTF16_FILTER
11350 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11351 s = add_utf16_textfilter(s, TRUE);
11353 /* diag_listed_as: Unsupported script encoding %s */
11354 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11362 #ifndef PERL_NO_UTF16_FILTER
11364 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11367 SV *const filter = FILTER_DATA(idx);
11368 /* We re-use this each time round, throwing the contents away before we
11370 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11371 SV *const utf8_buffer = filter;
11372 IV status = IoPAGE(filter);
11373 const bool reverse = cBOOL(IoLINES(filter));
11376 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11378 /* As we're automatically added, at the lowest level, and hence only called
11379 from this file, we can be sure that we're not called in block mode. Hence
11380 don't bother writing code to deal with block mode. */
11382 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11385 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11387 DEBUG_P(PerlIO_printf(Perl_debug_log,
11388 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11389 FPTR2DPTR(void *, S_utf16_textfilter),
11390 reverse ? 'l' : 'b', idx, maxlen, status,
11391 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11398 /* First, look in our buffer of existing UTF-8 data: */
11399 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11403 } else if (status == 0) {
11405 IoPAGE(filter) = 0;
11406 nl = SvEND(utf8_buffer);
11409 STRLEN got = nl - SvPVX(utf8_buffer);
11410 /* Did we have anything to append? */
11412 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11413 /* Everything else in this code works just fine if SVp_POK isn't
11414 set. This, however, needs it, and we need it to work, else
11415 we loop infinitely because the buffer is never consumed. */
11416 sv_chop(utf8_buffer, nl);
11420 /* OK, not a complete line there, so need to read some more UTF-16.
11421 Read an extra octect if the buffer currently has an odd number. */
11425 if (SvCUR(utf16_buffer) >= 2) {
11426 /* Location of the high octet of the last complete code point.
11427 Gosh, UTF-16 is a pain. All the benefits of variable length,
11428 *coupled* with all the benefits of partial reads and
11430 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11431 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11433 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11437 /* We have the first half of a surrogate. Read more. */
11438 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11441 status = FILTER_READ(idx + 1, utf16_buffer,
11442 160 + (SvCUR(utf16_buffer) & 1));
11443 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11444 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11447 IoPAGE(filter) = status;
11452 chars = SvCUR(utf16_buffer) >> 1;
11453 have = SvCUR(utf8_buffer);
11454 SvGROW(utf8_buffer, have + chars * 3 + 1);
11457 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11458 (U8*)SvPVX_const(utf8_buffer) + have,
11459 chars * 2, &newlen);
11461 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11462 (U8*)SvPVX_const(utf8_buffer) + have,
11463 chars * 2, &newlen);
11465 SvCUR_set(utf8_buffer, have + newlen);
11468 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11469 it's private to us, and utf16_to_utf8{,reversed} take a
11470 (pointer,length) pair, rather than a NUL-terminated string. */
11471 if(SvCUR(utf16_buffer) & 1) {
11472 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11473 SvCUR_set(utf16_buffer, 1);
11475 SvCUR_set(utf16_buffer, 0);
11478 DEBUG_P(PerlIO_printf(Perl_debug_log,
11479 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11481 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11482 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11487 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11489 SV *filter = filter_add(S_utf16_textfilter, NULL);
11491 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11493 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11494 sv_setpvs(filter, "");
11495 IoLINES(filter) = reversed;
11496 IoPAGE(filter) = 1; /* Not EOF */
11498 /* Sadly, we have to return a valid pointer, come what may, so we have to
11499 ignore any error return from this. */
11500 SvCUR_set(PL_linestr, 0);
11501 if (FILTER_READ(0, PL_linestr, 0)) {
11502 SvUTF8_on(PL_linestr);
11504 SvUTF8_on(PL_linestr);
11506 PL_bufend = SvEND(PL_linestr);
11507 return (U8*)SvPVX(PL_linestr);
11512 Returns a pointer to the next character after the parsed
11513 vstring, as well as updating the passed in sv.
11515 Function must be called like
11518 s = scan_vstring(s,e,sv);
11520 where s and e are the start and end of the string.
11521 The sv should already be large enough to store the vstring
11522 passed in, for performance reasons.
11527 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11530 const char *pos = s;
11531 const char *start = s;
11533 PERL_ARGS_ASSERT_SCAN_VSTRING;
11535 if (*pos == 'v') pos++; /* get past 'v' */
11536 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11538 if ( *pos != '.') {
11539 /* this may not be a v-string if followed by => */
11540 const char *next = pos;
11541 while (next < e && isSPACE(*next))
11543 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11544 /* return string not v-string */
11545 sv_setpvn(sv,(char *)s,pos-s);
11546 return (char *)pos;
11550 if (!isALPHA(*pos)) {
11551 U8 tmpbuf[UTF8_MAXBYTES+1];
11554 s++; /* get past 'v' */
11559 /* this is atoi() that tolerates underscores */
11562 const char *end = pos;
11564 while (--end >= s) {
11566 const UV orev = rev;
11567 rev += (*end - '0') * mult;
11570 /* diag_listed_as: Integer overflow in %s number */
11571 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11572 "Integer overflow in decimal number");
11576 if (rev > 0x7FFFFFFF)
11577 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11579 /* Append native character for the rev point */
11580 tmpend = uvchr_to_utf8(tmpbuf, rev);
11581 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11582 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11584 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11590 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11594 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11601 Perl_keyword_plugin_standard(pTHX_
11602 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11604 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11605 PERL_UNUSED_CONTEXT;
11606 PERL_UNUSED_ARG(keyword_ptr);
11607 PERL_UNUSED_ARG(keyword_len);
11608 PERL_UNUSED_ARG(op_ptr);
11609 return KEYWORD_PLUGIN_DECLINE;
11612 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11614 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11616 SAVEI32(PL_lex_brackets);
11617 if (PL_lex_brackets > 100)
11618 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11619 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11620 SAVEI32(PL_lex_allbrackets);
11621 PL_lex_allbrackets = 0;
11622 SAVEI8(PL_lex_fakeeof);
11623 PL_lex_fakeeof = (U8)fakeeof;
11624 if(yyparse(gramtype) && !PL_parser->error_count)
11625 qerror(Perl_mess(aTHX_ "Parse error"));
11628 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11630 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11634 SAVEVPTR(PL_eval_root);
11635 PL_eval_root = NULL;
11636 parse_recdescent(gramtype, fakeeof);
11642 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11644 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11647 if (flags & ~PARSE_OPTIONAL)
11648 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11649 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11650 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11651 if (!PL_parser->error_count)
11652 qerror(Perl_mess(aTHX_ "Parse error"));
11653 exprop = newOP(OP_NULL, 0);
11659 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11661 Parse a Perl arithmetic expression. This may contain operators of precedence
11662 down to the bit shift operators. The expression must be followed (and thus
11663 terminated) either by a comparison or lower-precedence operator or by
11664 something that would normally terminate an expression such as semicolon.
11665 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11666 otherwise it is mandatory. It is up to the caller to ensure that the
11667 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11668 the source of the code to be parsed and the lexical context for the
11671 The op tree representing the expression is returned. If an optional
11672 expression is absent, a null pointer is returned, otherwise the pointer
11675 If an error occurs in parsing or compilation, in most cases a valid op
11676 tree is returned anyway. The error is reflected in the parser state,
11677 normally resulting in a single exception at the top level of parsing
11678 which covers all the compilation errors that occurred. Some compilation
11679 errors, however, will throw an exception immediately.
11685 Perl_parse_arithexpr(pTHX_ U32 flags)
11687 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11691 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11693 Parse a Perl term expression. This may contain operators of precedence
11694 down to the assignment operators. The expression must be followed (and thus
11695 terminated) either by a comma or lower-precedence operator or by
11696 something that would normally terminate an expression such as semicolon.
11697 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11698 otherwise it is mandatory. It is up to the caller to ensure that the
11699 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11700 the source of the code to be parsed and the lexical context for the
11703 The op tree representing the expression is returned. If an optional
11704 expression is absent, a null pointer is returned, otherwise the pointer
11707 If an error occurs in parsing or compilation, in most cases a valid op
11708 tree is returned anyway. The error is reflected in the parser state,
11709 normally resulting in a single exception at the top level of parsing
11710 which covers all the compilation errors that occurred. Some compilation
11711 errors, however, will throw an exception immediately.
11717 Perl_parse_termexpr(pTHX_ U32 flags)
11719 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11723 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11725 Parse a Perl list expression. This may contain operators of precedence
11726 down to the comma operator. The expression must be followed (and thus
11727 terminated) either by a low-precedence logic operator such as C<or> or by
11728 something that would normally terminate an expression such as semicolon.
11729 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11730 otherwise it is mandatory. It is up to the caller to ensure that the
11731 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11732 the source of the code to be parsed and the lexical context for the
11735 The op tree representing the expression is returned. If an optional
11736 expression is absent, a null pointer is returned, otherwise the pointer
11739 If an error occurs in parsing or compilation, in most cases a valid op
11740 tree is returned anyway. The error is reflected in the parser state,
11741 normally resulting in a single exception at the top level of parsing
11742 which covers all the compilation errors that occurred. Some compilation
11743 errors, however, will throw an exception immediately.
11749 Perl_parse_listexpr(pTHX_ U32 flags)
11751 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11755 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11757 Parse a single complete Perl expression. This allows the full
11758 expression grammar, including the lowest-precedence operators such
11759 as C<or>. The expression must be followed (and thus terminated) by a
11760 token that an expression would normally be terminated by: end-of-file,
11761 closing bracketing punctuation, semicolon, or one of the keywords that
11762 signals a postfix expression-statement modifier. If I<flags> includes
11763 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11764 mandatory. It is up to the caller to ensure that the dynamic parser
11765 state (L</PL_parser> et al) is correctly set to reflect the source of
11766 the code to be parsed and the lexical context for the expression.
11768 The op tree representing the expression is returned. If an optional
11769 expression is absent, a null pointer is returned, otherwise the pointer
11772 If an error occurs in parsing or compilation, in most cases a valid op
11773 tree is returned anyway. The error is reflected in the parser state,
11774 normally resulting in a single exception at the top level of parsing
11775 which covers all the compilation errors that occurred. Some compilation
11776 errors, however, will throw an exception immediately.
11782 Perl_parse_fullexpr(pTHX_ U32 flags)
11784 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11788 =for apidoc Amx|OP *|parse_block|U32 flags
11790 Parse a single complete Perl code block. This consists of an opening
11791 brace, a sequence of statements, and a closing brace. The block
11792 constitutes a lexical scope, so C<my> variables and various compile-time
11793 effects can be contained within it. It is up to the caller to ensure
11794 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11795 reflect the source of the code to be parsed and the lexical context for
11798 The op tree representing the code block is returned. This is always a
11799 real op, never a null pointer. It will normally be a C<lineseq> list,
11800 including C<nextstate> or equivalent ops. No ops to construct any kind
11801 of runtime scope are included by virtue of it being a block.
11803 If an error occurs in parsing or compilation, in most cases a valid op
11804 tree (most likely null) is returned anyway. The error is reflected in
11805 the parser state, normally resulting in a single exception at the top
11806 level of parsing which covers all the compilation errors that occurred.
11807 Some compilation errors, however, will throw an exception immediately.
11809 The I<flags> parameter is reserved for future use, and must always
11816 Perl_parse_block(pTHX_ U32 flags)
11819 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11820 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11824 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11826 Parse a single unadorned Perl statement. This may be a normal imperative
11827 statement or a declaration that has compile-time effect. It does not
11828 include any label or other affixture. It is up to the caller to ensure
11829 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11830 reflect the source of the code to be parsed and the lexical context for
11833 The op tree representing the statement is returned. This may be a
11834 null pointer if the statement is null, for example if it was actually
11835 a subroutine definition (which has compile-time side effects). If not
11836 null, it will be ops directly implementing the statement, suitable to
11837 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11838 equivalent op (except for those embedded in a scope contained entirely
11839 within the statement).
11841 If an error occurs in parsing or compilation, in most cases a valid op
11842 tree (most likely null) is returned anyway. The error is reflected in
11843 the parser state, normally resulting in a single exception at the top
11844 level of parsing which covers all the compilation errors that occurred.
11845 Some compilation errors, however, will throw an exception immediately.
11847 The I<flags> parameter is reserved for future use, and must always
11854 Perl_parse_barestmt(pTHX_ U32 flags)
11857 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11858 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11862 =for apidoc Amx|SV *|parse_label|U32 flags
11864 Parse a single label, possibly optional, of the type that may prefix a
11865 Perl statement. It is up to the caller to ensure that the dynamic parser
11866 state (L</PL_parser> et al) is correctly set to reflect the source of
11867 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11868 label is optional, otherwise it is mandatory.
11870 The name of the label is returned in the form of a fresh scalar. If an
11871 optional label is absent, a null pointer is returned.
11873 If an error occurs in parsing, which can only occur if the label is
11874 mandatory, a valid label is returned anyway. The error is reflected in
11875 the parser state, normally resulting in a single exception at the top
11876 level of parsing which covers all the compilation errors that occurred.
11882 Perl_parse_label(pTHX_ U32 flags)
11884 if (flags & ~PARSE_OPTIONAL)
11885 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11886 if (PL_lex_state == LEX_KNOWNEXT) {
11887 PL_parser->yychar = yylex();
11888 if (PL_parser->yychar == LABEL) {
11889 char * const lpv = pl_yylval.pval;
11890 STRLEN llen = strlen(lpv);
11891 PL_parser->yychar = YYEMPTY;
11892 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11899 STRLEN wlen, bufptr_pos;
11902 if (!isIDFIRST_lazy_if(s, UTF))
11904 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11905 if (word_takes_any_delimeter(s, wlen))
11907 bufptr_pos = s - SvPVX(PL_linestr);
11909 lex_read_space(LEX_KEEP_PREVIOUS);
11911 s = SvPVX(PL_linestr) + bufptr_pos;
11912 if (t[0] == ':' && t[1] != ':') {
11913 PL_oldoldbufptr = PL_oldbufptr;
11916 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11920 if (flags & PARSE_OPTIONAL) {
11923 qerror(Perl_mess(aTHX_ "Parse error"));
11924 return newSVpvs("x");
11931 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11933 Parse a single complete Perl statement. This may be a normal imperative
11934 statement or a declaration that has compile-time effect, and may include
11935 optional labels. It is up to the caller to ensure that the dynamic
11936 parser state (L</PL_parser> et al) is correctly set to reflect the source
11937 of the code to be parsed and the lexical context for the statement.
11939 The op tree representing the statement is returned. This may be a
11940 null pointer if the statement is null, for example if it was actually
11941 a subroutine definition (which has compile-time side effects). If not
11942 null, it will be the result of a L</newSTATEOP> call, normally including
11943 a C<nextstate> or equivalent op.
11945 If an error occurs in parsing or compilation, in most cases a valid op
11946 tree (most likely null) is returned anyway. The error is reflected in
11947 the parser state, normally resulting in a single exception at the top
11948 level of parsing which covers all the compilation errors that occurred.
11949 Some compilation errors, however, will throw an exception immediately.
11951 The I<flags> parameter is reserved for future use, and must always
11958 Perl_parse_fullstmt(pTHX_ U32 flags)
11961 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11962 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11966 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11968 Parse a sequence of zero or more Perl statements. These may be normal
11969 imperative statements, including optional labels, or declarations
11970 that have compile-time effect, or any mixture thereof. The statement
11971 sequence ends when a closing brace or end-of-file is encountered in a
11972 place where a new statement could have validly started. It is up to
11973 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11974 is correctly set to reflect the source of the code to be parsed and the
11975 lexical context for the statements.
11977 The op tree representing the statement sequence is returned. This may
11978 be a null pointer if the statements were all null, for example if there
11979 were no statements or if there were only subroutine definitions (which
11980 have compile-time side effects). If not null, it will be a C<lineseq>
11981 list, normally including C<nextstate> or equivalent ops.
11983 If an error occurs in parsing or compilation, in most cases a valid op
11984 tree is returned anyway. The error is reflected in the parser state,
11985 normally resulting in a single exception at the top level of parsing
11986 which covers all the compilation errors that occurred. Some compilation
11987 errors, however, will throw an exception immediately.
11989 The I<flags> parameter is reserved for future use, and must always
11996 Perl_parse_stmtseq(pTHX_ U32 flags)
12001 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12002 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12003 c = lex_peek_unichar(0);
12004 if (c != -1 && c != /*{*/'}')
12005 qerror(Perl_mess(aTHX_ "Parse error"));
12011 * c-indentation-style: bsd
12012 * c-basic-offset: 4
12013 * indent-tabs-mode: nil
12016 * ex: set ts=8 sts=4 sw=4 et: