3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_static.c"
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_defer (PL_parser->lex_defer)
56 #define PL_lex_dojoin (PL_parser->lex_dojoin)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_preambled (PL_parser->preambled)
68 #define PL_sublex_info (PL_parser->sublex_info)
69 #define PL_linestr (PL_parser->linestr)
70 #define PL_expect (PL_parser->expect)
71 #define PL_copline (PL_parser->copline)
72 #define PL_bufptr (PL_parser->bufptr)
73 #define PL_oldbufptr (PL_parser->oldbufptr)
74 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
75 #define PL_linestart (PL_parser->linestart)
76 #define PL_bufend (PL_parser->bufend)
77 #define PL_last_uni (PL_parser->last_uni)
78 #define PL_last_lop (PL_parser->last_lop)
79 #define PL_last_lop_op (PL_parser->last_lop_op)
80 #define PL_lex_state (PL_parser->lex_state)
81 #define PL_rsfp (PL_parser->rsfp)
82 #define PL_rsfp_filters (PL_parser->rsfp_filters)
83 #define PL_in_my (PL_parser->in_my)
84 #define PL_in_my_stash (PL_parser->in_my_stash)
85 #define PL_tokenbuf (PL_parser->tokenbuf)
86 #define PL_multi_end (PL_parser->multi_end)
87 #define PL_error_count (PL_parser->error_count)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_nexttype (PL_parser->nexttype)
91 # define PL_nextval (PL_parser->nextval)
93 static const char* const ident_too_long = "Identifier too long";
95 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
97 #define XENUMMASK 0x3f
99 #define XFAKEBRACK 0x80
101 #ifdef USE_UTF8_SCRIPTS
102 # define UTF cBOOL(!IN_BYTES)
104 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
107 /* The maximum number of characters preceding the unrecognized one to display */
108 #define UNRECOGNIZED_PRECEDE_COUNT 10
110 /* In variables named $^X, these are the legal values for X.
111 * 1999-02-27 mjd-perl-patch@plover.com */
112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
114 #define SPACE_OR_TAB(c) isBLANK_A(c)
116 #define HEXFP_PEEK(s) \
118 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
119 isALPHA_FOLD_EQ(s[0], 'p'))
121 /* LEX_* are values for PL_lex_state, the state of the lexer.
122 * They are arranged oddly so that the guard on the switch statement
123 * can get by with a single comparison (if the compiler is smart enough).
125 * These values refer to the various states within a sublex parse,
126 * i.e. within a double quotish string
129 /* #define LEX_NOTPARSING 11 is done in perl.h. */
131 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
132 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
133 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
134 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
135 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
137 /* at end of code, eg "$x" followed by: */
138 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
139 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
141 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
142 string or after \E, $foo, etc */
143 #define LEX_INTERPCONST 2 /* NOT USED */
144 #define LEX_FORMLINE 1 /* expecting a format line */
145 #define LEX_KNOWNEXT 0 /* next token known; just return it */
149 static const char* const lex_state_names[] = {
164 #include "keywords.h"
166 /* CLINE is a macro that ensures PL_copline has a sane value */
168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
171 * Convenience functions to return different tokens and prime the
172 * lexer for the next token. They all take an argument.
174 * TOKEN : generic token (used for '(', DOLSHARP, etc)
175 * OPERATOR : generic operator
176 * AOPERATOR : assignment operator
177 * PREBLOCK : beginning the block after an if, while, foreach, ...
178 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
179 * PREREF : *EXPR where EXPR is not a simple identifier
180 * TERM : expression term
181 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
182 * LOOPX : loop exiting command (goto, last, dump, etc)
183 * FTST : file test operator
184 * FUN0 : zero-argument function
185 * FUN0OP : zero-argument function, with its op created in this file
186 * FUN1 : not used, except for not, which isn't a UNIOP
187 * BOop : bitwise or or xor
189 * BCop : bitwise complement
190 * SHop : shift operator
191 * PWop : power operator
192 * PMop : pattern-matching operator
193 * Aop : addition-level operator
194 * AopNOASSIGN : addition-level operator that is never part of .=
195 * Mop : multiplication-level operator
196 * Eop : equality-testing operator
197 * Rop : relational operator <= != gt
199 * Also see LOP and lop() below.
202 #ifdef DEBUGGING /* Serve -DT. */
203 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
205 # define REPORT(retval) (retval)
208 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
209 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
210 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
211 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
213 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
214 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
215 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
216 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
218 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
220 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
221 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
222 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
223 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
224 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
225 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
226 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
228 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
229 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
230 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
231 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
232 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
233 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
234 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
235 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
237 /* This bit of chicanery makes a unary function followed by
238 * a parenthesis into a function with one argument, highest precedence.
239 * The UNIDOR macro is for unary functions that can be followed by the //
240 * operator (such as C<shift // 0>).
242 #define UNI3(f,x,have_x) { \
243 pl_yylval.ival = f; \
244 if (have_x) PL_expect = x; \
246 PL_last_uni = PL_oldbufptr; \
247 PL_last_lop_op = f; \
249 return REPORT( (int)FUNC1 ); \
251 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
253 #define UNI(f) UNI3(f,XTERM,1)
254 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
255 #define UNIPROTO(f,optional) { \
256 if (optional) PL_last_uni = PL_oldbufptr; \
260 #define UNIBRACK(f) UNI3(f,0,0)
262 /* grandfather return to old style */
265 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
266 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
267 pl_yylval.ival = (f); \
273 #define COPLINE_INC_WITH_HERELINES \
275 CopLINE_inc(PL_curcop); \
276 if (PL_parser->herelines) \
277 CopLINE(PL_curcop) += PL_parser->herelines, \
278 PL_parser->herelines = 0; \
280 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
281 * is no sublex_push to follow. */
282 #define COPLINE_SET_FROM_MULTI_END \
284 CopLINE_set(PL_curcop, PL_multi_end); \
285 if (PL_multi_end != PL_multi_start) \
286 PL_parser->herelines = 0; \
292 /* how to interpret the pl_yylval associated with the token */
296 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
301 static struct debug_tokens {
303 enum token_type type;
305 } const debug_tokens[] =
307 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
308 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
309 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
310 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
311 { ARROW, TOKENTYPE_NONE, "ARROW" },
312 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
313 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
314 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
315 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
316 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
317 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
318 { DO, TOKENTYPE_NONE, "DO" },
319 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
320 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
321 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
322 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
323 { ELSE, TOKENTYPE_NONE, "ELSE" },
324 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
325 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
326 { FOR, TOKENTYPE_IVAL, "FOR" },
327 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
328 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
329 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
330 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
331 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
332 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
333 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
334 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
335 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
336 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
337 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
338 { IF, TOKENTYPE_IVAL, "IF" },
339 { LABEL, TOKENTYPE_PVAL, "LABEL" },
340 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
341 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
342 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
343 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
344 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
345 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
346 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
347 { MY, TOKENTYPE_IVAL, "MY" },
348 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
349 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
350 { OROP, TOKENTYPE_IVAL, "OROP" },
351 { OROR, TOKENTYPE_NONE, "OROR" },
352 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
353 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
354 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
355 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
356 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
357 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
358 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
359 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
360 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
361 { PREINC, TOKENTYPE_NONE, "PREINC" },
362 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
363 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
364 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
365 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
366 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
367 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
368 { SUB, TOKENTYPE_NONE, "SUB" },
369 { THING, TOKENTYPE_OPVAL, "THING" },
370 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
371 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
372 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
373 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
374 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
375 { USE, TOKENTYPE_IVAL, "USE" },
376 { WHEN, TOKENTYPE_IVAL, "WHEN" },
377 { WHILE, TOKENTYPE_IVAL, "WHILE" },
378 { WORD, TOKENTYPE_OPVAL, "WORD" },
379 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
380 { 0, TOKENTYPE_NONE, NULL }
383 /* dump the returned token in rv, plus any optional arg in pl_yylval */
386 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
388 PERL_ARGS_ASSERT_TOKEREPORT;
391 const char *name = NULL;
392 enum token_type type = TOKENTYPE_NONE;
393 const struct debug_tokens *p;
394 SV* const report = newSVpvs("<== ");
396 for (p = debug_tokens; p->token; p++) {
397 if (p->token == (int)rv) {
404 Perl_sv_catpv(aTHX_ report, name);
405 else if (isGRAPH(rv))
407 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
409 sv_catpvs(report, " (pending identifier)");
412 sv_catpvs(report, "EOF");
414 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
419 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
421 case TOKENTYPE_OPNUM:
422 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
423 PL_op_name[lvalp->ival]);
426 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
428 case TOKENTYPE_OPVAL:
430 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
431 PL_op_name[lvalp->opval->op_type]);
432 if (lvalp->opval->op_type == OP_CONST) {
433 Perl_sv_catpvf(aTHX_ report, " %s",
434 SvPEEK(cSVOPx_sv(lvalp->opval)));
439 sv_catpvs(report, "(opval=null)");
442 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
448 /* print the buffer with suitable escapes */
451 S_printbuf(pTHX_ const char *const fmt, const char *const s)
453 SV* const tmp = newSVpvs("");
455 PERL_ARGS_ASSERT_PRINTBUF;
457 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
458 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
466 S_deprecate_commaless_var_list(pTHX) {
468 deprecate("comma-less variable list");
469 return REPORT(','); /* grandfather non-comma-format format */
475 * This subroutine looks for an '=' next to the operator that has just been
476 * parsed and turns it into an ASSIGNOP if it finds one.
480 S_ao(pTHX_ int toketype)
482 if (*PL_bufptr == '=') {
484 if (toketype == ANDAND)
485 pl_yylval.ival = OP_ANDASSIGN;
486 else if (toketype == OROR)
487 pl_yylval.ival = OP_ORASSIGN;
488 else if (toketype == DORDOR)
489 pl_yylval.ival = OP_DORASSIGN;
492 return REPORT(toketype);
497 * When Perl expects an operator and finds something else, no_op
498 * prints the warning. It always prints "<something> found where
499 * operator expected. It prints "Missing semicolon on previous line?"
500 * if the surprise occurs at the start of the line. "do you need to
501 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502 * where the compiler doesn't know if foo is a method call or a function.
503 * It prints "Missing operator before end of line" if there's nothing
504 * after the missing operator, or "... before <...>" if there is something
505 * after the missing operator.
507 * PL_bufptr is expected to point to the start of the thing that was found,
508 * and s after the next token or partial token.
512 S_no_op(pTHX_ const char *const what, char *s)
514 char * const oldbp = PL_bufptr;
515 const bool is_first = (PL_oldbufptr == PL_linestart);
517 PERL_ARGS_ASSERT_NO_OP;
523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
524 if (ckWARN_d(WARN_SYNTAX)) {
526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 "\t(Missing semicolon on previous line?)\n");
528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
530 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
531 t += UTF ? UTF8SKIP(t) : 1)
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %"UTF8f"?)\n",
536 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
541 "\t(Missing operator before %"UTF8f"?)\n",
542 UTF8fARG(UTF, s - oldbp, oldbp));
550 * Complain about missing quote/regexp/heredoc terminator.
551 * If it's called with NULL then it cauterizes the line buffer.
552 * If we're in a delimited string and the delimiter is a control
553 * character, it's reformatted into a two-char sequence like ^C.
558 S_missingterm(pTHX_ char *s)
563 char * const nl = strrchr(s,'\n');
567 else if ((U8) PL_multi_close < 32) {
569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
574 *tmpbuf = (char)PL_multi_close;
578 q = strchr(s,'"') ? '\'' : '"';
579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
585 * Check whether the named feature is enabled.
588 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
590 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
592 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
594 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
596 if (namelen > MAX_FEATURE_LEN)
598 memcpy(&he_name[8], name, namelen);
600 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
601 REFCOUNTED_HE_EXISTS));
605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
606 * utf16-to-utf8-reversed.
609 #ifdef PERL_CR_FILTER
613 const char *s = SvPVX_const(sv);
614 const char * const e = s + SvCUR(sv);
616 PERL_ARGS_ASSERT_STRIP_RETURN;
618 /* outer loop optimized to do nothing if there are no CR-LFs */
620 if (*s++ == '\r' && *s == '\n') {
621 /* hit a CR-LF, need to copy the rest */
625 if (*s == '\r' && s[1] == '\n')
636 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
638 const I32 count = FILTER_READ(idx+1, sv, maxlen);
639 if (count > 0 && !maxlen)
646 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
648 Creates and initialises a new lexer/parser state object, supplying
649 a context in which to lex and parse from a new source of Perl code.
650 A pointer to the new state object is placed in L</PL_parser>. An entry
651 is made on the save stack so that upon unwinding the new state object
652 will be destroyed and the former value of L</PL_parser> will be restored.
653 Nothing else need be done to clean up the parsing context.
655 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
656 non-null, provides a string (in SV form) containing code to be parsed.
657 A copy of the string is made, so subsequent modification of I<line>
658 does not affect parsing. I<rsfp>, if non-null, provides an input stream
659 from which code will be read to be parsed. If both are non-null, the
660 code in I<line> comes first and must consist of complete lines of input,
661 and I<rsfp> supplies the remainder of the source.
663 The I<flags> parameter is reserved for future use. Currently it is only
664 used by perl internally, so extensions should always pass zero.
669 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
670 can share filters with the current parser.
671 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
672 caller, hence isn't owned by the parser, so shouldn't be closed on parser
673 destruction. This is used to handle the case of defaulting to reading the
674 script from the standard input because no filename was given on the command
675 line (without getting confused by situation where STDIN has been closed, so
676 the script handle is opened on fd 0) */
679 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
681 const char *s = NULL;
682 yy_parser *parser, *oparser;
683 if (flags && flags & ~LEX_START_FLAGS)
684 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
686 /* create and initialise a parser */
688 Newxz(parser, 1, yy_parser);
689 parser->old_parser = oparser = PL_parser;
692 parser->stack = NULL;
694 parser->stack_size = 0;
696 /* on scope exit, free this parser and restore any outer one */
698 parser->saved_curcop = PL_curcop;
700 /* initialise lexer state */
702 parser->nexttoke = 0;
703 parser->error_count = oparser ? oparser->error_count : 0;
704 parser->copline = parser->preambling = NOLINE;
705 parser->lex_state = LEX_NORMAL;
706 parser->expect = XSTATE;
708 parser->rsfp_filters =
709 !(flags & LEX_START_SAME_FILTER) || !oparser
711 : MUTABLE_AV(SvREFCNT_inc(
712 oparser->rsfp_filters
713 ? oparser->rsfp_filters
714 : (oparser->rsfp_filters = newAV())
717 Newx(parser->lex_brackstack, 120, char);
718 Newx(parser->lex_casestack, 12, char);
719 *parser->lex_casestack = '\0';
720 Newxz(parser->lex_shared, 1, LEXSHARED);
724 s = SvPV_const(line, len);
725 parser->linestr = flags & LEX_START_COPIED
726 ? SvREFCNT_inc_simple_NN(line)
727 : newSVpvn_flags(s, len, SvUTF8(line));
728 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
730 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
732 parser->oldoldbufptr =
735 parser->linestart = SvPVX(parser->linestr);
736 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
737 parser->last_lop = parser->last_uni = NULL;
739 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
740 |LEX_DONT_CLOSE_RSFP));
741 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
742 |LEX_DONT_CLOSE_RSFP));
744 parser->in_pod = parser->filtered = 0;
748 /* delete a parser object */
751 Perl_parser_free(pTHX_ const yy_parser *parser)
753 PERL_ARGS_ASSERT_PARSER_FREE;
755 PL_curcop = parser->saved_curcop;
756 SvREFCNT_dec(parser->linestr);
758 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
759 PerlIO_clearerr(parser->rsfp);
760 else if (parser->rsfp && (!parser->old_parser ||
761 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
762 PerlIO_close(parser->rsfp);
763 SvREFCNT_dec(parser->rsfp_filters);
764 SvREFCNT_dec(parser->lex_stuff);
765 SvREFCNT_dec(parser->sublex_info.repl);
767 Safefree(parser->lex_brackstack);
768 Safefree(parser->lex_casestack);
769 Safefree(parser->lex_shared);
770 PL_parser = parser->old_parser;
775 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
777 I32 nexttoke = parser->nexttoke;
778 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
780 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
781 && parser->nextval[nexttoke].opval
782 && parser->nextval[nexttoke].opval->op_slabbed
783 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
784 op_free(parser->nextval[nexttoke].opval);
785 parser->nextval[nexttoke].opval = NULL;
792 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
794 Buffer scalar containing the chunk currently under consideration of the
795 text currently being lexed. This is always a plain string scalar (for
796 which C<SvPOK> is true). It is not intended to be used as a scalar by
797 normal scalar means; instead refer to the buffer directly by the pointer
798 variables described below.
800 The lexer maintains various C<char*> pointers to things in the
801 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
802 reallocated, all of these pointers must be updated. Don't attempt to
803 do this manually, but rather use L</lex_grow_linestr> if you need to
804 reallocate the buffer.
806 The content of the text chunk in the buffer is commonly exactly one
807 complete line of input, up to and including a newline terminator,
808 but there are situations where it is otherwise. The octets of the
809 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
810 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
811 flag on this scalar, which may disagree with it.
813 For direct examination of the buffer, the variable
814 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
815 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
816 of these pointers is usually preferable to examination of the scalar
817 through normal scalar means.
819 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
821 Direct pointer to the end of the chunk of text currently being lexed, the
822 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
823 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
824 always located at the end of the buffer, and does not count as part of
825 the buffer's contents.
827 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
829 Points to the current position of lexing inside the lexer buffer.
830 Characters around this point may be freely examined, within
831 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
832 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
833 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
835 Lexing code (whether in the Perl core or not) moves this pointer past
836 the characters that it consumes. It is also expected to perform some
837 bookkeeping whenever a newline character is consumed. This movement
838 can be more conveniently performed by the function L</lex_read_to>,
839 which handles newlines appropriately.
841 Interpretation of the buffer's octets can be abstracted out by
842 using the slightly higher-level functions L</lex_peek_unichar> and
843 L</lex_read_unichar>.
845 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
847 Points to the start of the current line inside the lexer buffer.
848 This is useful for indicating at which column an error occurred, and
849 not much else. This must be updated by any lexing code that consumes
850 a newline; the function L</lex_read_to> handles this detail.
856 =for apidoc Amx|bool|lex_bufutf8
858 Indicates whether the octets in the lexer buffer
859 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
860 of Unicode characters. If not, they should be interpreted as Latin-1
861 characters. This is analogous to the C<SvUTF8> flag for scalars.
863 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
864 contains valid UTF-8. Lexing code must be robust in the face of invalid
867 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
868 is significant, but not the whole story regarding the input character
869 encoding. Normally, when a file is being read, the scalar contains octets
870 and its C<SvUTF8> flag is off, but the octets should be interpreted as
871 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
872 however, the scalar may have the C<SvUTF8> flag on, and in this case its
873 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
874 is in effect. This logic may change in the future; use this function
875 instead of implementing the logic yourself.
881 Perl_lex_bufutf8(pTHX)
887 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
889 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
890 at least I<len> octets (including terminating C<NUL>). Returns a
891 pointer to the reallocated buffer. This is necessary before making
892 any direct modification of the buffer that would increase its length.
893 L</lex_stuff_pvn> provides a more convenient way to insert text into
896 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
897 this function updates all of the lexer's variables that point directly
904 Perl_lex_grow_linestr(pTHX_ STRLEN len)
908 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
909 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
910 linestr = PL_parser->linestr;
911 buf = SvPVX(linestr);
912 if (len <= SvLEN(linestr))
914 bufend_pos = PL_parser->bufend - buf;
915 bufptr_pos = PL_parser->bufptr - buf;
916 oldbufptr_pos = PL_parser->oldbufptr - buf;
917 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
918 linestart_pos = PL_parser->linestart - buf;
919 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
920 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
921 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
922 PL_parser->lex_shared->re_eval_start - buf : 0;
924 buf = sv_grow(linestr, len);
926 PL_parser->bufend = buf + bufend_pos;
927 PL_parser->bufptr = buf + bufptr_pos;
928 PL_parser->oldbufptr = buf + oldbufptr_pos;
929 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
930 PL_parser->linestart = buf + linestart_pos;
931 if (PL_parser->last_uni)
932 PL_parser->last_uni = buf + last_uni_pos;
933 if (PL_parser->last_lop)
934 PL_parser->last_lop = buf + last_lop_pos;
935 if (PL_parser->lex_shared->re_eval_start)
936 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
941 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
943 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
944 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
945 reallocating the buffer if necessary. This means that lexing code that
946 runs later will see the characters as if they had appeared in the input.
947 It is not recommended to do this as part of normal parsing, and most
948 uses of this facility run the risk of the inserted characters being
949 interpreted in an unintended manner.
951 The string to be inserted is represented by I<len> octets starting
952 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
953 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
954 The characters are recoded for the lexer buffer, according to how the
955 buffer is currently being interpreted (L</lex_bufutf8>). If a string
956 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
957 function is more convenient.
963 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
967 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
968 if (flags & ~(LEX_STUFF_UTF8))
969 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
971 if (flags & LEX_STUFF_UTF8) {
974 STRLEN highhalf = 0; /* Count of variants */
975 const char *p, *e = pv+len;
976 for (p = pv; p != e; p++) {
977 if (! UTF8_IS_INVARIANT(*p)) {
983 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
984 bufptr = PL_parser->bufptr;
985 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
986 SvCUR_set(PL_parser->linestr,
987 SvCUR(PL_parser->linestr) + len+highhalf);
988 PL_parser->bufend += len+highhalf;
989 for (p = pv; p != e; p++) {
991 if (! UTF8_IS_INVARIANT(c)) {
992 *bufptr++ = UTF8_TWO_BYTE_HI(c);
993 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1000 if (flags & LEX_STUFF_UTF8) {
1001 STRLEN highhalf = 0;
1002 const char *p, *e = pv+len;
1003 for (p = pv; p != e; p++) {
1005 if (UTF8_IS_ABOVE_LATIN1(c)) {
1006 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1007 "non-Latin-1 character into Latin-1 input");
1008 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1011 } else if (! UTF8_IS_INVARIANT(c)) {
1012 /* malformed UTF-8 */
1014 SAVESPTR(PL_warnhook);
1015 PL_warnhook = PERL_WARNHOOK_FATAL;
1016 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1022 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1023 bufptr = PL_parser->bufptr;
1024 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1025 SvCUR_set(PL_parser->linestr,
1026 SvCUR(PL_parser->linestr) + len-highhalf);
1027 PL_parser->bufend += len-highhalf;
1030 if (UTF8_IS_INVARIANT(*p)) {
1036 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043 bufptr = PL_parser->bufptr;
1044 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046 PL_parser->bufend += len;
1047 Copy(pv, bufptr, len, char);
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary. This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1063 The string to be inserted is represented by octets starting at I<pv>
1064 and continuing to the first nul. These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in I<flags>. The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1077 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078 lex_stuff_pvn(pv, strlen(pv), flags);
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary. This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1092 The string to be inserted is the string value of I<sv>. The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1106 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1110 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1121 This is not the normal way to consume lexed text. For that, use
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1132 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133 buf = PL_parser->bufptr;
1135 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138 bufend = PL_parser->bufend;
1140 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141 unstuff_len = ptr - buf;
1142 Move(ptr, buf, bufend+1-ptr, char);
1143 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144 PL_parser->bufend = bufend - unstuff_len;
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1163 Perl_lex_read_to(pTHX_ char *ptr)
1166 PERL_ARGS_ASSERT_LEX_READ_TO;
1167 s = PL_parser->bufptr;
1168 if (ptr < s || ptr > PL_parser->bufend)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170 for (; s != ptr; s++)
1172 COPLINE_INC_WITH_HERELINES;
1173 PL_parser->linestart = s+1;
1175 PL_parser->bufptr = ptr;
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to I<ptr>. The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately. I<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it. However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1203 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204 buf = SvPVX(PL_parser->linestr);
1206 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1209 if (ptr > PL_parser->bufptr)
1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211 discard_len = ptr - buf;
1212 if (PL_parser->oldbufptr < ptr)
1213 PL_parser->oldbufptr = ptr;
1214 if (PL_parser->oldoldbufptr < ptr)
1215 PL_parser->oldoldbufptr = ptr;
1216 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217 PL_parser->last_uni = NULL;
1218 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219 PL_parser->last_lop = NULL;
1220 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222 PL_parser->bufend -= discard_len;
1223 PL_parser->bufptr -= discard_len;
1224 PL_parser->oldbufptr -= discard_len;
1225 PL_parser->oldoldbufptr -= discard_len;
1226 if (PL_parser->last_uni)
1227 PL_parser->last_uni -= discard_len;
1228 if (PL_parser->last_lop)
1229 PL_parser->last_lop -= discard_len;
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more. It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1245 will not be discarded. If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM 0x40000000 /* here-doc */
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1262 STRLEN old_bufend_pos, new_bufend_pos;
1263 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265 bool got_some_for_debugger = 0;
1267 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1271 linestr = PL_parser->linestr;
1272 buf = SvPVX(linestr);
1273 if (!(flags & LEX_KEEP_PREVIOUS) &&
1274 PL_parser->bufptr == PL_parser->bufend) {
1275 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1277 if (PL_parser->last_uni != PL_parser->bufend)
1278 PL_parser->last_uni = NULL;
1279 if (PL_parser->last_lop != PL_parser->bufend)
1280 PL_parser->last_lop = NULL;
1281 last_uni_pos = last_lop_pos = 0;
1285 old_bufend_pos = PL_parser->bufend - buf;
1286 bufptr_pos = PL_parser->bufptr - buf;
1287 oldbufptr_pos = PL_parser->oldbufptr - buf;
1288 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1289 linestart_pos = PL_parser->linestart - buf;
1290 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1291 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1293 if (flags & LEX_FAKE_EOF) {
1295 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1297 } else if (filter_gets(linestr, old_bufend_pos)) {
1299 got_some_for_debugger = 1;
1300 } else if (flags & LEX_NO_TERM) {
1303 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1304 sv_setpvs(linestr, "");
1306 /* End of real input. Close filehandle (unless it was STDIN),
1307 * then add implicit termination.
1309 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1310 PerlIO_clearerr(PL_parser->rsfp);
1311 else if (PL_parser->rsfp)
1312 (void)PerlIO_close(PL_parser->rsfp);
1313 PL_parser->rsfp = NULL;
1314 PL_parser->in_pod = PL_parser->filtered = 0;
1315 if (!PL_in_eval && PL_minus_p) {
1317 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1318 PL_minus_n = PL_minus_p = 0;
1319 } else if (!PL_in_eval && PL_minus_n) {
1320 sv_catpvs(linestr, /*{*/";}");
1323 sv_catpvs(linestr, ";");
1326 buf = SvPVX(linestr);
1327 new_bufend_pos = SvCUR(linestr);
1328 PL_parser->bufend = buf + new_bufend_pos;
1329 PL_parser->bufptr = buf + bufptr_pos;
1330 PL_parser->oldbufptr = buf + oldbufptr_pos;
1331 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1332 PL_parser->linestart = buf + linestart_pos;
1333 if (PL_parser->last_uni)
1334 PL_parser->last_uni = buf + last_uni_pos;
1335 if (PL_parser->last_lop)
1336 PL_parser->last_lop = buf + last_lop_pos;
1337 if (PL_parser->preambling != NOLINE) {
1338 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1339 PL_parser->preambling = NOLINE;
1341 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1342 PL_curstash != PL_debstash) {
1343 /* debugger active and we're not compiling the debugger code,
1344 * so store the line into the debugger's array of lines
1346 update_debugger_info(NULL, buf+old_bufend_pos,
1347 new_bufend_pos-old_bufend_pos);
1353 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1355 Looks ahead one (Unicode) character in the text currently being lexed.
1356 Returns the codepoint (unsigned integer value) of the next character,
1357 or -1 if lexing has reached the end of the input text. To consume the
1358 peeked character, use L</lex_read_unichar>.
1360 If the next character is in (or extends into) the next chunk of input
1361 text, the next chunk will be read in. Normally the current chunk will be
1362 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1363 then the current chunk will not be discarded.
1365 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1366 is encountered, an exception is generated.
1372 Perl_lex_peek_unichar(pTHX_ U32 flags)
1376 if (flags & ~(LEX_KEEP_PREVIOUS))
1377 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1378 s = PL_parser->bufptr;
1379 bufend = PL_parser->bufend;
1385 if (!lex_next_chunk(flags))
1387 s = PL_parser->bufptr;
1388 bufend = PL_parser->bufend;
1391 if (UTF8_IS_INVARIANT(head))
1393 if (UTF8_IS_START(head)) {
1394 len = UTF8SKIP(&head);
1395 while ((STRLEN)(bufend-s) < len) {
1396 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1398 s = PL_parser->bufptr;
1399 bufend = PL_parser->bufend;
1402 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1403 if (retlen == (STRLEN)-1) {
1404 /* malformed UTF-8 */
1406 SAVESPTR(PL_warnhook);
1407 PL_warnhook = PERL_WARNHOOK_FATAL;
1408 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1414 if (!lex_next_chunk(flags))
1416 s = PL_parser->bufptr;
1423 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1425 Reads the next (Unicode) character in the text currently being lexed.
1426 Returns the codepoint (unsigned integer value) of the character read,
1427 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1428 if lexing has reached the end of the input text. To non-destructively
1429 examine the next character, use L</lex_peek_unichar> instead.
1431 If the next character is in (or extends into) the next chunk of input
1432 text, the next chunk will be read in. Normally the current chunk will be
1433 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1434 then the current chunk will not be discarded.
1436 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1437 is encountered, an exception is generated.
1443 Perl_lex_read_unichar(pTHX_ U32 flags)
1446 if (flags & ~(LEX_KEEP_PREVIOUS))
1447 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1448 c = lex_peek_unichar(flags);
1451 COPLINE_INC_WITH_HERELINES;
1453 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1455 ++(PL_parser->bufptr);
1461 =for apidoc Amx|void|lex_read_space|U32 flags
1463 Reads optional spaces, in Perl style, in the text currently being
1464 lexed. The spaces may include ordinary whitespace characters and
1465 Perl-style comments. C<#line> directives are processed if encountered.
1466 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1467 at a non-space character (or the end of the input text).
1469 If spaces extend into the next chunk of input text, the next chunk will
1470 be read in. Normally the current chunk will be discarded at the same
1471 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1472 chunk will not be discarded.
1477 #define LEX_NO_INCLINE 0x40000000
1478 #define LEX_NO_NEXT_CHUNK 0x80000000
1481 Perl_lex_read_space(pTHX_ U32 flags)
1484 const bool can_incline = !(flags & LEX_NO_INCLINE);
1485 bool need_incline = 0;
1486 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1487 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1488 s = PL_parser->bufptr;
1489 bufend = PL_parser->bufend;
1495 } while (!(c == '\n' || (c == 0 && s == bufend)));
1496 } else if (c == '\n') {
1499 PL_parser->linestart = s;
1505 } else if (isSPACE(c)) {
1507 } else if (c == 0 && s == bufend) {
1510 if (flags & LEX_NO_NEXT_CHUNK)
1512 PL_parser->bufptr = s;
1513 l = CopLINE(PL_curcop);
1514 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1515 got_more = lex_next_chunk(flags);
1516 CopLINE_set(PL_curcop, l);
1517 s = PL_parser->bufptr;
1518 bufend = PL_parser->bufend;
1521 if (can_incline && need_incline && PL_parser->rsfp) {
1531 PL_parser->bufptr = s;
1536 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1538 This function performs syntax checking on a prototype, C<proto>.
1539 If C<warn> is true, any illegal characters or mismatched brackets
1540 will trigger illegalproto warnings, declaring that they were
1541 detected in the prototype for C<name>.
1543 The return value is C<true> if this is a valid prototype, and
1544 C<false> if it is not, regardless of whether C<warn> was C<true> or
1547 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1554 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1556 STRLEN len, origlen;
1557 char *p = proto ? SvPV(proto, len) : NULL;
1558 bool bad_proto = FALSE;
1559 bool in_brackets = FALSE;
1560 bool after_slash = FALSE;
1561 char greedy_proto = ' ';
1562 bool proto_after_greedy_proto = FALSE;
1563 bool must_be_last = FALSE;
1564 bool underscore = FALSE;
1565 bool bad_proto_after_underscore = FALSE;
1567 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1573 for (; len--; p++) {
1576 proto_after_greedy_proto = TRUE;
1578 if (!strchr(";@%", *p))
1579 bad_proto_after_underscore = TRUE;
1582 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1589 in_brackets = FALSE;
1590 else if ((*p == '@' || *p == '%') &&
1593 must_be_last = TRUE;
1602 after_slash = FALSE;
1607 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1610 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1611 origlen, UNI_DISPLAY_ISPRINT)
1612 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1614 if (proto_after_greedy_proto)
1615 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1616 "Prototype after '%c' for %"SVf" : %s",
1617 greedy_proto, SVfARG(name), p);
1619 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1620 "Missing ']' in prototype for %"SVf" : %s",
1623 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1624 "Illegal character in prototype for %"SVf" : %s",
1626 if (bad_proto_after_underscore)
1627 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1628 "Illegal character after '_' in prototype for %"SVf" : %s",
1632 return (! (proto_after_greedy_proto || bad_proto) );
1637 * This subroutine has nothing to do with tilting, whether at windmills
1638 * or pinball tables. Its name is short for "increment line". It
1639 * increments the current line number in CopLINE(PL_curcop) and checks
1640 * to see whether the line starts with a comment of the form
1641 * # line 500 "foo.pm"
1642 * If so, it sets the current line number and file to the values in the comment.
1646 S_incline(pTHX_ const char *s)
1653 PERL_ARGS_ASSERT_INCLINE;
1655 COPLINE_INC_WITH_HERELINES;
1656 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1657 && s+1 == PL_bufend && *s == ';') {
1658 /* fake newline in string eval */
1659 CopLINE_dec(PL_curcop);
1664 while (SPACE_OR_TAB(*s))
1666 if (strnEQ(s, "line", 4))
1670 if (SPACE_OR_TAB(*s))
1674 while (SPACE_OR_TAB(*s))
1682 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1684 while (SPACE_OR_TAB(*s))
1686 if (*s == '"' && (t = strchr(s+1, '"'))) {
1692 while (!isSPACE(*t))
1696 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1698 if (*e != '\n' && *e != '\0')
1699 return; /* false alarm */
1701 line_num = grok_atou(n, &e) - 1;
1704 const STRLEN len = t - s;
1706 if (!PL_rsfp && !PL_parser->filtered) {
1707 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1708 * to *{"::_<newfilename"} */
1709 /* However, the long form of evals is only turned on by the
1710 debugger - usually they're "(eval %lu)" */
1711 GV * const cfgv = CopFILEGV(PL_curcop);
1714 STRLEN tmplen2 = len;
1718 if (tmplen2 + 2 <= sizeof smallbuf)
1721 Newx(tmpbuf2, tmplen2 + 2, char);
1726 memcpy(tmpbuf2 + 2, s, tmplen2);
1729 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1731 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1732 /* adjust ${"::_<newfilename"} to store the new file name */
1733 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1734 /* The line number may differ. If that is the case,
1735 alias the saved lines that are in the array.
1736 Otherwise alias the whole array. */
1737 if (CopLINE(PL_curcop) == line_num) {
1738 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1739 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1741 else if (GvAV(cfgv)) {
1742 AV * const av = GvAV(cfgv);
1743 const I32 start = CopLINE(PL_curcop)+1;
1744 I32 items = AvFILLp(av) - start;
1746 AV * const av2 = GvAVn(gv2);
1747 SV **svp = AvARRAY(av) + start;
1748 I32 l = (I32)line_num+1;
1750 av_store(av2, l++, SvREFCNT_inc(*svp++));
1755 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1758 CopFILE_free(PL_curcop);
1759 CopFILE_setn(PL_curcop, s, len);
1761 CopLINE_set(PL_curcop, line_num);
1764 #define skipspace(s) skipspace_flags(s, 0)
1768 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1770 AV *av = CopFILEAVx(PL_curcop);
1773 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1775 sv = *av_fetch(av, 0, 1);
1776 SvUPGRADE(sv, SVt_PVMG);
1778 if (!SvPOK(sv)) sv_setpvs(sv,"");
1780 sv_catsv(sv, orig_sv);
1782 sv_catpvn(sv, buf, len);
1787 if (PL_parser->preambling == NOLINE)
1788 av_store(av, CopLINE(PL_curcop), sv);
1794 * Called to gobble the appropriate amount and type of whitespace.
1795 * Skips comments as well.
1799 S_skipspace_flags(pTHX_ char *s, U32 flags)
1801 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1802 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1803 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1806 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1808 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1809 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1810 LEX_NO_NEXT_CHUNK : 0));
1812 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1813 if (PL_linestart > PL_bufptr)
1814 PL_bufptr = PL_linestart;
1822 * Check the unary operators to ensure there's no ambiguity in how they're
1823 * used. An ambiguous piece of code would be:
1825 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1826 * the +5 is its argument.
1835 if (PL_oldoldbufptr != PL_last_uni)
1837 while (isSPACE(*PL_last_uni))
1840 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1842 if ((t = strchr(s, '(')) && t < PL_bufptr)
1845 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1846 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1847 (int)(s - PL_last_uni), PL_last_uni);
1851 * LOP : macro to build a list operator. Its behaviour has been replaced
1852 * with a subroutine, S_lop() for which LOP is just another name.
1855 #define LOP(f,x) return lop(f,x,s)
1859 * Build a list operator (or something that might be one). The rules:
1860 * - if we have a next token, then it's a list operator (no parens) for
1861 * which the next token has already been parsed; e.g.,
1864 * - if the next thing is an opening paren, then it's a function
1865 * - else it's a list operator
1869 S_lop(pTHX_ I32 f, int x, char *s)
1871 PERL_ARGS_ASSERT_LOP;
1876 PL_last_lop = PL_oldbufptr;
1877 PL_last_lop_op = (OPCODE)f;
1882 return REPORT(FUNC);
1885 return REPORT(FUNC);
1888 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1889 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1890 return REPORT(LSTOP);
1896 * When the lexer realizes it knows the next token (for instance,
1897 * it is reordering tokens for the parser) then it can call S_force_next
1898 * to know what token to return the next time the lexer is called. Caller
1899 * will need to set PL_nextval[] and possibly PL_expect to ensure
1900 * the lexer handles the token correctly.
1904 S_force_next(pTHX_ I32 type)
1908 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1909 tokereport(type, &NEXTVAL_NEXTTOKE);
1912 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1913 PL_nexttype[PL_nexttoke] = type;
1915 if (PL_lex_state != LEX_KNOWNEXT) {
1916 PL_lex_defer = PL_lex_state;
1917 PL_lex_state = LEX_KNOWNEXT;
1924 * This subroutine handles postfix deref syntax after the arrow has already
1925 * been emitted. @* $* etc. are emitted as two separate token right here.
1926 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1927 * only the first, leaving yylex to find the next.
1931 S_postderef(pTHX_ int const funny, char const next)
1933 assert(funny == DOLSHARP || strchr("$@%&*", funny));
1934 assert(strchr("*[{", next));
1936 PL_expect = XOPERATOR;
1937 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1938 assert('@' == funny || '$' == funny || DOLSHARP == funny);
1939 PL_lex_state = LEX_INTERPEND;
1940 force_next(POSTJOIN);
1946 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1947 && !PL_lex_brackets)
1949 PL_expect = XOPERATOR;
1958 int yyc = PL_parser->yychar;
1959 if (yyc != YYEMPTY) {
1961 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1962 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1963 PL_lex_allbrackets--;
1965 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1966 } else if (yyc == '('/*)*/) {
1967 PL_lex_allbrackets--;
1972 PL_parser->yychar = YYEMPTY;
1977 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1979 SV * const sv = newSVpvn_utf8(start, len,
1982 && !is_invariant_string((const U8*)start, len)
1983 && is_utf8_string((const U8*)start, len));
1989 * When the lexer knows the next thing is a word (for instance, it has
1990 * just seen -> and it knows that the next char is a word char, then
1991 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1995 * char *start : buffer position (must be within PL_linestr)
1996 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1997 * int check_keyword : if true, Perl checks to make sure the word isn't
1998 * a keyword (do this if the word is a label, e.g. goto FOO)
1999 * int allow_pack : if true, : characters will also be allowed (require,
2000 * use, etc. do this)
2004 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2009 PERL_ARGS_ASSERT_FORCE_WORD;
2011 start = skipspace(start);
2013 if (isIDFIRST_lazy_if(s,UTF) ||
2014 (allow_pack && *s == ':') )
2016 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2017 if (check_keyword) {
2018 char *s2 = PL_tokenbuf;
2020 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2022 if (keyword(s2, len2, 0))
2025 if (token == METHOD) {
2030 PL_expect = XOPERATOR;
2033 NEXTVAL_NEXTTOKE.opval
2034 = (OP*)newSVOP(OP_CONST,0,
2035 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2036 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2044 * Called when the lexer wants $foo *foo &foo etc, but the program
2045 * text only contains the "foo" portion. The first argument is a pointer
2046 * to the "foo", and the second argument is the type symbol to prefix.
2047 * Forces the next token to be a "WORD".
2048 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2052 S_force_ident(pTHX_ const char *s, int kind)
2054 PERL_ARGS_ASSERT_FORCE_IDENT;
2057 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2058 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2059 UTF ? SVf_UTF8 : 0));
2060 NEXTVAL_NEXTTOKE.opval = o;
2063 o->op_private = OPpCONST_ENTERED;
2064 /* XXX see note in pp_entereval() for why we forgo typo
2065 warnings if the symbol must be introduced in an eval.
2067 gv_fetchpvn_flags(s, len,
2068 (PL_in_eval ? GV_ADDMULTI
2069 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2070 kind == '$' ? SVt_PV :
2071 kind == '@' ? SVt_PVAV :
2072 kind == '%' ? SVt_PVHV :
2080 S_force_ident_maybe_lex(pTHX_ char pit)
2082 NEXTVAL_NEXTTOKE.ival = pit;
2087 Perl_str_to_version(pTHX_ SV *sv)
2092 const char *start = SvPV_const(sv,len);
2093 const char * const end = start + len;
2094 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2096 PERL_ARGS_ASSERT_STR_TO_VERSION;
2098 while (start < end) {
2102 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2107 retval += ((NV)n)/nshift;
2116 * Forces the next token to be a version number.
2117 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2118 * and if "guessing" is TRUE, then no new token is created (and the caller
2119 * must use an alternative parsing method).
2123 S_force_version(pTHX_ char *s, int guessing)
2128 PERL_ARGS_ASSERT_FORCE_VERSION;
2136 while (isDIGIT(*d) || *d == '_' || *d == '.')
2138 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2140 s = scan_num(s, &pl_yylval);
2141 version = pl_yylval.opval;
2142 ver = cSVOPx(version)->op_sv;
2143 if (SvPOK(ver) && !SvNIOK(ver)) {
2144 SvUPGRADE(ver, SVt_PVNV);
2145 SvNV_set(ver, str_to_version(ver));
2146 SvNOK_on(ver); /* hint that it is a version */
2149 else if (guessing) {
2154 /* NOTE: The parser sees the package name and the VERSION swapped */
2155 NEXTVAL_NEXTTOKE.opval = version;
2162 * S_force_strict_version
2163 * Forces the next token to be a version number using strict syntax rules.
2167 S_force_strict_version(pTHX_ char *s)
2170 const char *errstr = NULL;
2172 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2174 while (isSPACE(*s)) /* leading whitespace */
2177 if (is_STRICT_VERSION(s,&errstr)) {
2179 s = (char *)scan_version(s, ver, 0);
2180 version = newSVOP(OP_CONST, 0, ver);
2182 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2183 (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2187 yyerror(errstr); /* version required */
2191 /* NOTE: The parser sees the package name and the VERSION swapped */
2192 NEXTVAL_NEXTTOKE.opval = version;
2200 * Tokenize a quoted string passed in as an SV. It finds the next
2201 * chunk, up to end of string or a backslash. It may make a new
2202 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2207 S_tokeq(pTHX_ SV *sv)
2214 PERL_ARGS_ASSERT_TOKEQ;
2218 assert (!SvIsCOW(sv));
2219 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2223 /* This is relying on the SV being "well formed" with a trailing '\0' */
2224 while (s < send && !(*s == '\\' && s[1] == '\\'))
2229 if ( PL_hints & HINT_NEW_STRING ) {
2230 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2231 SVs_TEMP | SvUTF8(sv));
2235 if (s + 1 < send && (s[1] == '\\'))
2236 s++; /* all that, just for this */
2241 SvCUR_set(sv, d - SvPVX_const(sv));
2243 if ( PL_hints & HINT_NEW_STRING )
2244 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2249 * Now come three functions related to double-quote context,
2250 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2251 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2252 * interact with PL_lex_state, and create fake ( ... ) argument lists
2253 * to handle functions and concatenation.
2257 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2262 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2264 * Pattern matching will set PL_lex_op to the pattern-matching op to
2265 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2267 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2269 * Everything else becomes a FUNC.
2271 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2272 * had an OP_CONST or OP_READLINE). This just sets us up for a
2273 * call to S_sublex_push().
2277 S_sublex_start(pTHX)
2279 const I32 op_type = pl_yylval.ival;
2281 if (op_type == OP_NULL) {
2282 pl_yylval.opval = PL_lex_op;
2286 if (op_type == OP_CONST) {
2287 SV *sv = PL_lex_stuff;
2288 PL_lex_stuff = NULL;
2291 if (SvTYPE(sv) == SVt_PVIV) {
2292 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2294 const char * const p = SvPV_const(sv, len);
2295 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2299 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2303 PL_sublex_info.super_state = PL_lex_state;
2304 PL_sublex_info.sub_inwhat = (U16)op_type;
2305 PL_sublex_info.sub_op = PL_lex_op;
2306 PL_lex_state = LEX_INTERPPUSH;
2310 pl_yylval.opval = PL_lex_op;
2320 * Create a new scope to save the lexing state. The scope will be
2321 * ended in S_sublex_done. Returns a '(', starting the function arguments
2322 * to the uc, lc, etc. found before.
2323 * Sets PL_lex_state to LEX_INTERPCONCAT.
2330 const bool is_heredoc = PL_multi_close == '<';
2333 PL_lex_state = PL_sublex_info.super_state;
2334 SAVEI8(PL_lex_dojoin);
2335 SAVEI32(PL_lex_brackets);
2336 SAVEI32(PL_lex_allbrackets);
2337 SAVEI32(PL_lex_formbrack);
2338 SAVEI8(PL_lex_fakeeof);
2339 SAVEI32(PL_lex_casemods);
2340 SAVEI32(PL_lex_starts);
2341 SAVEI8(PL_lex_state);
2342 SAVESPTR(PL_lex_repl);
2343 SAVEVPTR(PL_lex_inpat);
2344 SAVEI16(PL_lex_inwhat);
2347 SAVECOPLINE(PL_curcop);
2348 SAVEI32(PL_multi_end);
2349 SAVEI32(PL_parser->herelines);
2350 PL_parser->herelines = 0;
2352 SAVEI8(PL_multi_close);
2353 SAVEPPTR(PL_bufptr);
2354 SAVEPPTR(PL_bufend);
2355 SAVEPPTR(PL_oldbufptr);
2356 SAVEPPTR(PL_oldoldbufptr);
2357 SAVEPPTR(PL_last_lop);
2358 SAVEPPTR(PL_last_uni);
2359 SAVEPPTR(PL_linestart);
2360 SAVESPTR(PL_linestr);
2361 SAVEGENERICPV(PL_lex_brackstack);
2362 SAVEGENERICPV(PL_lex_casestack);
2363 SAVEGENERICPV(PL_parser->lex_shared);
2364 SAVEBOOL(PL_parser->lex_re_reparsing);
2365 SAVEI32(PL_copline);
2367 /* The here-doc parser needs to be able to peek into outer lexing
2368 scopes to find the body of the here-doc. So we put PL_linestr and
2369 PL_bufptr into lex_shared, to ‘share’ those values.
2371 PL_parser->lex_shared->ls_linestr = PL_linestr;
2372 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2374 PL_linestr = PL_lex_stuff;
2375 PL_lex_repl = PL_sublex_info.repl;
2376 PL_lex_stuff = NULL;
2377 PL_sublex_info.repl = NULL;
2379 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2380 set for an inner quote-like operator and then an error causes scope-
2381 popping. We must not have a PL_lex_stuff value left dangling, as
2382 that breaks assumptions elsewhere. See bug #123617. */
2383 SAVEGENERICSV(PL_lex_stuff);
2384 SAVEGENERICSV(PL_sublex_info.repl);
2386 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2387 = SvPVX(PL_linestr);
2388 PL_bufend += SvCUR(PL_linestr);
2389 PL_last_lop = PL_last_uni = NULL;
2390 SAVEFREESV(PL_linestr);
2391 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2393 PL_lex_dojoin = FALSE;
2394 PL_lex_brackets = PL_lex_formbrack = 0;
2395 PL_lex_allbrackets = 0;
2396 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2397 Newx(PL_lex_brackstack, 120, char);
2398 Newx(PL_lex_casestack, 12, char);
2399 PL_lex_casemods = 0;
2400 *PL_lex_casestack = '\0';
2402 PL_lex_state = LEX_INTERPCONCAT;
2404 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2405 PL_copline = NOLINE;
2407 Newxz(shared, 1, LEXSHARED);
2408 shared->ls_prev = PL_parser->lex_shared;
2409 PL_parser->lex_shared = shared;
2411 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2412 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2413 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2414 PL_lex_inpat = PL_sublex_info.sub_op;
2416 PL_lex_inpat = NULL;
2418 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2419 PL_in_eval &= ~EVAL_RE_REPARSING;
2426 * Restores lexer state after a S_sublex_push.
2432 if (!PL_lex_starts++) {
2433 SV * const sv = newSVpvs("");
2434 if (SvUTF8(PL_linestr))
2436 PL_expect = XOPERATOR;
2437 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2441 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2442 PL_lex_state = LEX_INTERPCASEMOD;
2446 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2447 assert(PL_lex_inwhat != OP_TRANSR);
2449 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2450 PL_linestr = PL_lex_repl;
2452 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2453 PL_bufend += SvCUR(PL_linestr);
2454 PL_last_lop = PL_last_uni = NULL;
2455 PL_lex_dojoin = FALSE;
2456 PL_lex_brackets = 0;
2457 PL_lex_allbrackets = 0;
2458 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2459 PL_lex_casemods = 0;
2460 *PL_lex_casestack = '\0';
2462 if (SvEVALED(PL_lex_repl)) {
2463 PL_lex_state = LEX_INTERPNORMAL;
2465 /* we don't clear PL_lex_repl here, so that we can check later
2466 whether this is an evalled subst; that means we rely on the
2467 logic to ensure sublex_done() is called again only via the
2468 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2471 PL_lex_state = LEX_INTERPCONCAT;
2474 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2475 CopLINE(PL_curcop) +=
2476 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2477 + PL_parser->herelines;
2478 PL_parser->herelines = 0;
2483 const line_t l = CopLINE(PL_curcop);
2485 if (PL_multi_close == '<')
2486 PL_parser->herelines += l - PL_multi_end;
2487 PL_bufend = SvPVX(PL_linestr);
2488 PL_bufend += SvCUR(PL_linestr);
2489 PL_expect = XOPERATOR;
2494 PERL_STATIC_INLINE SV*
2495 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2497 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2498 * interior, hence to the "}". Finds what the name resolves to, returning
2499 * an SV* containing it; NULL if no valid one found */
2501 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2508 const U8* first_bad_char_loc;
2509 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2511 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2516 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2518 &first_bad_char_loc))
2520 /* If warnings are on, this will print a more detailed analysis of what
2521 * is wrong than the error message below */
2522 utf8n_to_uvchr(first_bad_char_loc,
2523 e - ((char *) first_bad_char_loc),
2526 /* We deliberately don't try to print the malformed character, which
2527 * might not print very well; it also may be just the first of many
2528 * malformations, so don't print what comes after it */
2529 yyerror(Perl_form(aTHX_
2530 "Malformed UTF-8 character immediately after '%.*s'",
2531 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2535 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2536 /* include the <}> */
2537 e - backslash_ptr + 1);
2539 SvREFCNT_dec_NN(res);
2543 /* See if the charnames handler is the Perl core's, and if so, we can skip
2544 * the validation needed for a user-supplied one, as Perl's does its own
2546 table = GvHV(PL_hintgv); /* ^H */
2547 cvp = hv_fetchs(table, "charnames", FALSE);
2548 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2549 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2551 const char * const name = HvNAME(stash);
2552 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2553 && strEQ(name, "_charnames")) {
2558 /* Here, it isn't Perl's charname handler. We can't rely on a
2559 * user-supplied handler to validate the input name. For non-ut8 input,
2560 * look to see that the first character is legal. Then loop through the
2561 * rest checking that each is a continuation */
2563 /* This code makes the reasonable assumption that the only Latin1-range
2564 * characters that begin a character name alias are alphabetic, otherwise
2565 * would have to create a isCHARNAME_BEGIN macro */
2568 if (! isALPHAU(*s)) {
2573 if (! isCHARNAME_CONT(*s)) {
2576 if (*s == ' ' && *(s-1) == ' ') {
2579 if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2580 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2581 "NO-BREAK SPACE in a charnames "
2582 "alias definition is deprecated");
2588 /* Similarly for utf8. For invariants can check directly; for other
2589 * Latin1, can calculate their code point and check; otherwise use a
2591 if (UTF8_IS_INVARIANT(*s)) {
2592 if (! isALPHAU(*s)) {
2596 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2597 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2603 if (! PL_utf8_charname_begin) {
2604 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2605 PL_utf8_charname_begin = _core_swash_init("utf8",
2606 "_Perl_Charname_Begin",
2608 1, 0, NULL, &flags);
2610 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2617 if (UTF8_IS_INVARIANT(*s)) {
2618 if (! isCHARNAME_CONT(*s)) {
2621 if (*s == ' ' && *(s-1) == ' ') {
2626 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2627 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2631 if (*s == *NBSP_UTF8
2632 && *(s+1) == *(NBSP_UTF8+1)
2633 && ckWARN_d(WARN_DEPRECATED))
2635 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2636 "NO-BREAK SPACE in a charnames "
2637 "alias definition is deprecated");
2642 if (! PL_utf8_charname_continue) {
2643 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2644 PL_utf8_charname_continue = _core_swash_init("utf8",
2645 "_Perl_Charname_Continue",
2647 1, 0, NULL, &flags);
2649 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2656 if (*(s-1) == ' ') {
2659 "charnames alias definitions may not contain trailing "
2660 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2661 (int)(s - backslash_ptr + 1), backslash_ptr,
2662 (int)(e - s + 1), s + 1
2664 UTF ? SVf_UTF8 : 0);
2668 if (SvUTF8(res)) { /* Don't accept malformed input */
2669 const U8* first_bad_char_loc;
2671 const char* const str = SvPV_const(res, len);
2672 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2673 /* If warnings are on, this will print a more detailed analysis of
2674 * what is wrong than the error message below */
2675 utf8n_to_uvchr(first_bad_char_loc,
2676 (char *) first_bad_char_loc - str,
2679 /* We deliberately don't try to print the malformed character,
2680 * which might not print very well; it also may be just the first
2681 * of many malformations, so don't print what comes after it */
2684 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2685 (int) (e - backslash_ptr + 1), backslash_ptr,
2686 (int) ((char *) first_bad_char_loc - str), str
2697 /* The final %.*s makes sure that should the trailing NUL be missing
2698 * that this print won't run off the end of the string */
2701 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2702 (int)(s - backslash_ptr + 1), backslash_ptr,
2703 (int)(e - s + 1), s + 1
2705 UTF ? SVf_UTF8 : 0);
2712 "charnames alias definitions may not contain a sequence of "
2713 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2714 (int)(s - backslash_ptr + 1), backslash_ptr,
2715 (int)(e - s + 1), s + 1
2717 UTF ? SVf_UTF8 : 0);
2724 Extracts the next constant part of a pattern, double-quoted string,
2725 or transliteration. This is terrifying code.
2727 For example, in parsing the double-quoted string "ab\x63$d", it would
2728 stop at the '$' and return an OP_CONST containing 'abc'.
2730 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2731 processing a pattern (PL_lex_inpat is true), a transliteration
2732 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2734 Returns a pointer to the character scanned up to. If this is
2735 advanced from the start pointer supplied (i.e. if anything was
2736 successfully parsed), will leave an OP_CONST for the substring scanned
2737 in pl_yylval. Caller must intuit reason for not parsing further
2738 by looking at the next characters herself.
2742 \N{FOO} => \N{U+hex_for_character_FOO}
2743 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2746 all other \-char, including \N and \N{ apart from \N{ABC}
2749 @ and $ where it appears to be a var, but not for $ as tail anchor
2754 In transliterations:
2755 characters are VERY literal, except for - not at the start or end
2756 of the string, which indicates a range. If the range is in bytes,
2757 scan_const expands the range to the full set of intermediate
2758 characters. If the range is in utf8, the hyphen is replaced with
2759 a certain range mark which will be handled by pmtrans() in op.c.
2761 In double-quoted strings:
2763 double-quoted style: \r and \n
2764 constants: \x31, etc.
2765 deprecated backrefs: \1 (in substitution replacements)
2766 case and quoting: \U \Q \E
2769 scan_const does *not* construct ops to handle interpolated strings.
2770 It stops processing as soon as it finds an embedded $ or @ variable
2771 and leaves it to the caller to work out what's going on.
2773 embedded arrays (whether in pattern or not) could be:
2774 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2776 $ in double-quoted strings must be the symbol of an embedded scalar.
2778 $ in pattern could be $foo or could be tail anchor. Assumption:
2779 it's a tail anchor if $ is the last thing in the string, or if it's
2780 followed by one of "()| \r\n\t"
2782 \1 (backreferences) are turned into $1 in substitutions
2784 The structure of the code is
2785 while (there's a character to process) {
2786 handle transliteration ranges
2787 skip regexp comments /(?#comment)/ and codes /(?{code})/
2788 skip #-initiated comments in //x patterns
2789 check for embedded arrays
2790 check for embedded scalars
2792 deprecate \1 in substitution replacements
2793 handle string-changing backslashes \l \U \Q \E, etc.
2794 switch (what was escaped) {
2795 handle \- in a transliteration (becomes a literal -)
2796 if a pattern and not \N{, go treat as regular character
2797 handle \132 (octal characters)
2798 handle \x15 and \x{1234} (hex characters)
2799 handle \N{name} (named characters, also \N{3,5} in a pattern)
2800 handle \cV (control characters)
2801 handle printf-style backslashes (\f, \r, \n, etc)
2804 } (end if backslash)
2805 handle regular character
2806 } (end while character to read)
2811 S_scan_const(pTHX_ char *start)
2813 char *send = PL_bufend; /* end of the constant */
2814 SV *sv = newSV(send - start); /* sv for the constant. See note below
2816 char *s = start; /* start of the constant */
2817 char *d = SvPVX(sv); /* destination for copies */
2818 bool dorange = FALSE; /* are we in a translit range? */
2819 bool didrange = FALSE; /* did we just finish a range? */
2820 bool in_charclass = FALSE; /* within /[...]/ */
2821 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2822 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2823 UTF8? But, this can show as true
2824 when the source isn't utf8, as for
2825 example when it is entirely composed
2827 SV *res; /* result from charnames */
2829 /* Note on sizing: The scanned constant is placed into sv, which is
2830 * initialized by newSV() assuming one byte of output for every byte of
2831 * input. This routine expects newSV() to allocate an extra byte for a
2832 * trailing NUL, which this routine will append if it gets to the end of
2833 * the input. There may be more bytes of input than output (eg., \N{LATIN
2834 * CAPITAL LETTER A}), or more output than input if the constant ends up
2835 * recoded to utf8, but each time a construct is found that might increase
2836 * the needed size, SvGROW() is called. Its size parameter each time is
2837 * based on the best guess estimate at the time, namely the length used so
2838 * far, plus the length the current construct will occupy, plus room for
2839 * the trailing NUL, plus one byte for every input byte still unscanned */
2841 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2844 UV literal_endpoint = 0;
2845 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2848 PERL_ARGS_ASSERT_SCAN_CONST;
2850 assert(PL_lex_inwhat != OP_TRANSR);
2851 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2852 /* If we are doing a trans and we know we want UTF8 set expectation */
2853 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2854 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2857 /* Protect sv from errors and fatal warnings. */
2858 ENTER_with_name("scan_const");
2861 while (s < send || dorange) {
2863 /* get transliterations out of the way (they're most literal) */
2864 if (PL_lex_inwhat == OP_TRANS) {
2865 /* expand a range A-Z to the full set of characters. AIE! */
2867 I32 i; /* current expanded character */
2868 I32 min; /* first character in range */
2869 I32 max; /* last character in range */
2880 char * const c = (char*)utf8_hop((U8*)d, -1);
2884 *c = (char) ILLEGAL_UTF8_BYTE;
2885 /* mark the range as done, and continue */
2891 i = d - SvPVX_const(sv); /* remember current offset */
2894 SvLEN(sv) + ((has_utf8)
2895 ? (512 - UTF_CONTINUATION_MARK
2898 /* How many two-byte within 0..255: 128 in UTF-8,
2899 * 96 in UTF-8-mod. */
2901 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2903 d = SvPVX(sv) + i; /* refresh d after realloc */
2907 for (j = 0; j <= 1; j++) {
2908 char * const c = (char*)utf8_hop((U8*)d, -1);
2909 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2915 max = (U8)0xff; /* only to \xff */
2916 uvmax = uv; /* \x{100} to uvmax */
2918 d = c; /* eat endpoint chars */
2923 d -= 2; /* eat the first char and the - */
2924 min = (U8)*d; /* first char in range */
2925 max = (U8)d[1]; /* last char in range */
2932 "Invalid range \"%c-%c\" in transliteration operator",
2933 (char)min, (char)max);
2937 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
2938 * any subsets of these ranges into individual characters */
2939 if (literal_endpoint == 2 &&
2940 ((isLOWER_A(min) && isLOWER_A(max)) ||
2941 (isUPPER_A(min) && isUPPER_A(max))))
2943 for (i = min; i <= max; i++) {
2950 for (i = min; i <= max; i++)
2953 append_utf8_from_native_byte(i, &d);
2961 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2963 *d++ = (char) ILLEGAL_UTF8_BYTE;
2965 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2969 /* mark the range as done, and continue */
2973 literal_endpoint = 0;
2978 /* range begins (ignore - as first or last char) */
2979 else if (*s == '-' && s+1 < send && s != start) {
2981 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2988 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
2998 literal_endpoint = 0;
2999 native_range = TRUE;
3004 /* if we get here, we're not doing a transliteration */
3006 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3009 while (s1 >= start && *s1-- == '\\')
3012 in_charclass = TRUE;
3015 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3018 while (s1 >= start && *s1-- == '\\')
3021 in_charclass = FALSE;
3024 /* skip for regexp comments /(?#comment)/, except for the last
3025 * char, which will be done separately.
3026 * Stop on (?{..}) and friends */
3028 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3030 while (s+1 < send && *s != ')')
3033 else if (!PL_lex_casemods &&
3034 ( s[2] == '{' /* This should match regcomp.c */
3035 || (s[2] == '?' && s[3] == '{')))
3041 /* likewise skip #-initiated comments in //x patterns */
3042 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3043 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3044 while (s+1 < send && *s != '\n')
3048 /* no further processing of single-quoted regex */
3049 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3050 goto default_action;
3052 /* check for embedded arrays
3053 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3055 else if (*s == '@' && s[1]) {
3056 if (isWORDCHAR_lazy_if(s+1,UTF))
3058 if (strchr(":'{$", s[1]))
3060 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3061 break; /* in regexp, neither @+ nor @- are interpolated */
3064 /* check for embedded scalars. only stop if we're sure it's a
3067 else if (*s == '$') {
3068 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3070 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3072 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3073 "Possible unintended interpolation of $\\ in regex");
3075 break; /* in regexp, $ might be tail anchor */
3079 /* End of else if chain - OP_TRANS rejoin rest */
3082 if (*s == '\\' && s+1 < send) {
3083 char* e; /* Can be used for ending '}', etc. */
3087 /* warn on \1 - \9 in substitution replacements, but note that \11
3088 * is an octal; and \19 is \1 followed by '9' */
3089 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3090 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3092 /* diag_listed_as: \%d better written as $%d */
3093 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3098 /* string-change backslash escapes */
3099 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3103 /* In a pattern, process \N, but skip any other backslash escapes.
3104 * This is because we don't want to translate an escape sequence
3105 * into a meta symbol and have the regex compiler use the meta
3106 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3107 * in spite of this, we do have to process \N here while the proper
3108 * charnames handler is in scope. See bugs #56444 and #62056.
3110 * There is a complication because \N in a pattern may also stand
3111 * for 'match a non-nl', and not mean a charname, in which case its
3112 * processing should be deferred to the regex compiler. To be a
3113 * charname it must be followed immediately by a '{', and not look
3114 * like \N followed by a curly quantifier, i.e., not something like
3115 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3117 else if (PL_lex_inpat
3120 || regcurly(s + 1)))
3123 goto default_action;
3128 /* quoted - in transliterations */
3130 if (PL_lex_inwhat == OP_TRANS) {
3137 if ((isALPHANUMERIC(*s)))
3138 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3139 "Unrecognized escape \\%c passed through",
3141 /* default action is to copy the quoted character */
3142 goto default_action;
3145 /* eg. \132 indicates the octal constant 0132 */
3146 case '0': case '1': case '2': case '3':
3147 case '4': case '5': case '6': case '7':
3149 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3151 uv = grok_oct(s, &len, &flags, NULL);
3153 if (len < 3 && s < send && isDIGIT(*s)
3154 && ckWARN(WARN_MISC))
3156 Perl_warner(aTHX_ packWARN(WARN_MISC),
3157 "%s", form_short_octal_warning(s, len));
3160 goto NUM_ESCAPE_INSERT;
3162 /* eg. \o{24} indicates the octal constant \024 */
3167 bool valid = grok_bslash_o(&s, &uv, &error,
3168 TRUE, /* Output warning */
3169 FALSE, /* Not strict */
3170 TRUE, /* Output warnings for
3177 goto NUM_ESCAPE_INSERT;
3180 /* eg. \x24 indicates the hex constant 0x24 */
3185 bool valid = grok_bslash_x(&s, &uv, &error,
3186 TRUE, /* Output warning */
3187 FALSE, /* Not strict */
3188 TRUE, /* Output warnings for
3198 /* Insert oct or hex escaped character. There will always be
3199 * enough room in sv since such escapes will be longer than any
3200 * UTF-8 sequence they can end up as, except if they force us
3201 * to recode the rest of the string into utf8 */
3203 /* Here uv is the ordinal of the next character being added */
3204 if (!UVCHR_IS_INVARIANT(uv)) {
3205 if (!has_utf8 && uv > 255) {
3206 /* Might need to recode whatever we have accumulated so
3207 * far if it contains any chars variant in utf8 or
3210 SvCUR_set(sv, d - SvPVX_const(sv));
3213 /* See Note on sizing above. */
3214 sv_utf8_upgrade_flags_grow(
3216 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3217 /* Above-latin1 in string
3218 * implies no encoding */
3219 |SV_UTF8_NO_ENCODING,
3220 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3221 d = SvPVX(sv) + SvCUR(sv);
3226 d = (char*)uvchr_to_utf8((U8*)d, uv);
3227 if (PL_lex_inwhat == OP_TRANS &&
3228 PL_sublex_info.sub_op) {
3229 PL_sublex_info.sub_op->op_private |=
3230 (PL_lex_repl ? OPpTRANS_FROM_UTF
3234 if (uv > 255 && !dorange)
3235 native_range = FALSE;
3248 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3249 * named character, like \N{LATIN SMALL LETTER A}, or a named
3250 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3251 * GRAVE}. For convenience all three forms are referred to as
3252 * "named characters" below.
3254 * For patterns, \N also can mean to match a non-newline. Code
3255 * before this 'switch' statement should already have handled
3256 * this situation, and hence this code only has to deal with
3257 * the named character cases.
3259 * For non-patterns, the named characters are converted to
3260 * their string equivalents. In patterns, named characters are
3261 * not converted to their ultimate forms for the same reasons
3262 * that other escapes aren't. Instead, they are converted to
3263 * the \N{U+...} form to get the value from the charnames that
3264 * is in effect right now, while preserving the fact that it
3265 * was a named character, so that the regex compiler knows
3268 * The structure of this section of code (besides checking for
3269 * errors and upgrading to utf8) is:
3270 * If the named character is of the form \N{U+...}, pass it
3271 * through if a pattern; otherwise convert the code point
3273 * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
3274 * if a pattern; otherwise convert to utf8
3276 * If the regex compiler should ever need to differentiate
3277 * between the \N{U+...} and \N{name} forms, that could easily
3278 * be done here by stripping any leading zeros from the
3279 * \N{U+...} case, and adding them to the other one. */
3281 /* Here, 's' points to the 'N'; the test below is guaranteed to
3282 * succeed if we are being called on a pattern, as we already
3283 * know from a test above that the next character is a '{'. A
3284 * non-pattern \N must mean 'named character', which requires
3288 yyerror("Missing braces on \\N{}");
3293 /* If there is no matching '}', it is an error. */
3294 if (! (e = strchr(s, '}'))) {
3295 if (! PL_lex_inpat) {
3296 yyerror("Missing right brace on \\N{}");
3298 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3303 /* Here it looks like a named character */
3305 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3306 s += 2; /* Skip to next char after the 'U+' */
3309 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3310 /* Check the syntax. */
3313 if (!isXDIGIT(*s)) {
3316 "Invalid hexadecimal number in \\N{U+...}"
3324 else if ((*s == '.' || *s == '_')
3330 /* Pass everything through unchanged.
3331 * +1 is for the '}' */
3332 Copy(orig_s, d, e - orig_s + 1, char);
3333 d += e - orig_s + 1;
3335 else { /* Not a pattern: convert the hex to string */
3336 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3337 | PERL_SCAN_SILENT_ILLDIGIT
3338 | PERL_SCAN_DISALLOW_PREFIX;
3340 uv = grok_hex(s, &len, &flags, NULL);
3341 if (len == 0 || (len != (STRLEN)(e - s)))
3344 /* If the destination is not in utf8, unconditionally
3345 * recode it to be so. This is because \N{} implies
3346 * Unicode semantics, and scalars have to be in utf8
3347 * to guarantee those semantics */
3349 SvCUR_set(sv, d - SvPVX_const(sv));
3352 /* See Note on sizing above. */
3353 sv_utf8_upgrade_flags_grow(
3355 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3356 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3357 d = SvPVX(sv) + SvCUR(sv);
3361 /* Add the (Unicode) code point to the output. */
3362 if (UNI_IS_INVARIANT(uv)) {
3363 *d++ = (char) LATIN1_TO_NATIVE(uv);
3366 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3370 else /* Here is \N{NAME} but not \N{U+...}. */
3371 if ((res = get_and_check_backslash_N_name(s, e)))
3374 const char *str = SvPV_const(res, len);
3377 if (! len) { /* The name resolved to an empty string */
3378 Copy("\\N{}", d, 4, char);
3382 /* In order to not lose information for the regex
3383 * compiler, pass the result in the specially made
3384 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3385 * the code points in hex of each character
3386 * returned by charnames */
3388 const char *str_end = str + len;
3389 const STRLEN off = d - SvPVX_const(sv);
3391 if (! SvUTF8(res)) {
3392 /* For the non-UTF-8 case, we can determine the
3393 * exact length needed without having to parse
3394 * through the string. Each character takes up
3395 * 2 hex digits plus either a trailing dot or
3397 const char initial_text[] = "\\N{U+";
3398 const STRLEN initial_len = sizeof(initial_text)
3400 d = off + SvGROW(sv, off
3403 /* +1 for trailing NUL */
3406 + (STRLEN)(send - e));
3407 Copy(initial_text, d, initial_len, char);
3409 while (str < str_end) {
3412 my_snprintf(hex_string,
3414 "%02X.", (U8) *str);
3415 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
3416 Copy(hex_string, d, 3, char);
3420 d--; /* Below, we will overwrite the final
3421 dot with a right brace */
3424 STRLEN char_length; /* cur char's byte length */
3426 /* and the number of bytes after this is
3427 * translated into hex digits */
3428 STRLEN output_length;
3430 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3431 * for max('U+', '.'); and 1 for NUL */
3432 char hex_string[2 * UTF8_MAXBYTES + 5];
3434 /* Get the first character of the result. */
3435 U32 uv = utf8n_to_uvchr((U8 *) str,
3439 /* Convert first code point to hex, including
3440 * the boiler plate before it. */
3442 my_snprintf(hex_string, sizeof(hex_string),
3446 /* Make sure there is enough space to hold it */
3447 d = off + SvGROW(sv, off
3449 + (STRLEN)(send - e)
3450 + 2); /* '}' + NUL */
3452 Copy(hex_string, d, output_length, char);
3455 /* For each subsequent character, append dot and
3456 * its ordinal in hex */
3457 while ((str += char_length) < str_end) {
3458 const STRLEN off = d - SvPVX_const(sv);
3459 U32 uv = utf8n_to_uvchr((U8 *) str,
3464 my_snprintf(hex_string,
3469 d = off + SvGROW(sv, off
3471 + (STRLEN)(send - e)
3472 + 2); /* '}' + NUL */
3473 Copy(hex_string, d, output_length, char);
3478 *d++ = '}'; /* Done. Add the trailing brace */
3481 else { /* Here, not in a pattern. Convert the name to a
3484 /* If destination is not in utf8, unconditionally
3485 * recode it to be so. This is because \N{} implies
3486 * Unicode semantics, and scalars have to be in utf8
3487 * to guarantee those semantics */
3489 SvCUR_set(sv, d - SvPVX_const(sv));
3492 /* See Note on sizing above. */
3493 sv_utf8_upgrade_flags_grow(sv,
3494 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3495 len + (STRLEN)(send - s) + 1);
3496 d = SvPVX(sv) + SvCUR(sv);
3498 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3500 /* See Note on sizing above. (NOTE: SvCUR() is not
3501 * set correctly here). */
3502 const STRLEN off = d - SvPVX_const(sv);
3503 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3505 if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
3506 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3507 str = SvPV_const(res, len);
3509 Copy(str, d, len, char);
3515 } /* End \N{NAME} */
3518 native_range = FALSE; /* \N{} is defined to be Unicode */
3520 s = e + 1; /* Point to just after the '}' */
3523 /* \c is a control character */
3527 *d++ = grok_bslash_c(*s++, 1);
3530 yyerror("Missing control char name in \\c");
3534 /* printf-style backslashes, formfeeds, newlines, etc */
3560 } /* end if (backslash) */
3567 /* If we started with encoded form, or already know we want it,
3568 then encode the next character */
3569 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3573 /* One might think that it is wasted effort in the case of the
3574 * source being utf8 (this_utf8 == TRUE) to take the next character
3575 * in the source, convert it to an unsigned value, and then convert
3576 * it back again. But the source has not been validated here. The
3577 * routine that does the conversion checks for errors like
3580 const UV nextuv = (this_utf8)
3581 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3583 const STRLEN need = UNISKIP(nextuv);
3585 SvCUR_set(sv, d - SvPVX_const(sv));
3588 /* See Note on sizing above. */
3589 sv_utf8_upgrade_flags_grow(sv,
3590 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3591 need + (STRLEN)(send - s) + 1);
3592 d = SvPVX(sv) + SvCUR(sv);
3594 } else if (need > len) {
3595 /* encoded value larger than old, may need extra space (NOTE:
3596 * SvCUR() is not set correctly here). See Note on sizing
3598 const STRLEN off = d - SvPVX_const(sv);
3599 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3603 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3605 if (uv > 255 && !dorange)
3606 native_range = FALSE;
3612 } /* while loop to process each character */
3614 /* terminate the string and set up the sv */
3616 SvCUR_set(sv, d - SvPVX_const(sv));
3617 if (SvCUR(sv) >= SvLEN(sv))
3618 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3619 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3622 if (IN_ENCODING && !has_utf8) {
3623 sv_recode_to_utf8(sv, _get_encoding());
3629 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3630 PL_sublex_info.sub_op->op_private |=
3631 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3635 /* shrink the sv if we allocated more than we used */
3636 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3637 SvPV_shrink_to_cur(sv);
3640 /* return the substring (via pl_yylval) only if we parsed anything */
3643 for (; s2 < s; s2++) {
3645 COPLINE_INC_WITH_HERELINES;
3647 SvREFCNT_inc_simple_void_NN(sv);
3648 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3649 && ! PL_parser->lex_re_reparsing)
3651 const char *const key = PL_lex_inpat ? "qr" : "q";
3652 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3656 if (PL_lex_inwhat == OP_TRANS) {
3659 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3662 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3670 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3673 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3675 LEAVE_with_name("scan_const");
3680 * Returns TRUE if there's more to the expression (e.g., a subscript),
3683 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3685 * ->[ and ->{ return TRUE
3686 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3687 * { and [ outside a pattern are always subscripts, so return TRUE
3688 * if we're outside a pattern and it's not { or [, then return FALSE
3689 * if we're in a pattern and the first char is a {
3690 * {4,5} (any digits around the comma) returns FALSE
3691 * if we're in a pattern and the first char is a [
3693 * [SOMETHING] has a funky algorithm to decide whether it's a
3694 * character class or not. It has to deal with things like
3695 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3696 * anything else returns TRUE
3699 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3702 S_intuit_more(pTHX_ char *s)
3704 PERL_ARGS_ASSERT_INTUIT_MORE;
3706 if (PL_lex_brackets)
3708 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3710 if (*s == '-' && s[1] == '>'
3711 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3712 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3713 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3715 if (*s != '{' && *s != '[')
3720 /* In a pattern, so maybe we have {n,m}. */
3728 /* On the other hand, maybe we have a character class */
3731 if (*s == ']' || *s == '^')
3734 /* this is terrifying, and it works */
3737 const char * const send = strchr(s,']');
3738 unsigned char un_char, last_un_char;
3739 char tmpbuf[sizeof PL_tokenbuf * 4];
3741 if (!send) /* has to be an expression */
3743 weight = 2; /* let's weigh the evidence */
3747 else if (isDIGIT(*s)) {
3749 if (isDIGIT(s[1]) && s[2] == ']')
3755 Zero(seen,256,char);
3757 for (; s < send; s++) {
3758 last_un_char = un_char;
3759 un_char = (unsigned char)*s;
3764 weight -= seen[un_char] * 10;
3765 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3767 char *tmp = PL_bufend;
3768 PL_bufend = (char*)send;
3769 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3771 len = (int)strlen(tmpbuf);
3772 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3773 UTF ? SVf_UTF8 : 0, SVt_PV))
3778 else if (*s == '$' && s[1] &&
3779 strchr("[#!%*<>()-=",s[1])) {
3780 if (/*{*/ strchr("])} =",s[2]))
3789 if (strchr("wds]",s[1]))
3791 else if (seen[(U8)'\''] || seen[(U8)'"'])
3793 else if (strchr("rnftbxcav",s[1]))
3795 else if (isDIGIT(s[1])) {
3797 while (s[1] && isDIGIT(s[1]))
3807 if (strchr("aA01! ",last_un_char))
3809 if (strchr("zZ79~",s[1]))
3811 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3812 weight -= 5; /* cope with negative subscript */
3815 if (!isWORDCHAR(last_un_char)
3816 && !(last_un_char == '$' || last_un_char == '@'
3817 || last_un_char == '&')
3818 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3822 if (keyword(d, s - d, 0))
3825 if (un_char == last_un_char + 1)
3827 weight -= seen[un_char];
3832 if (weight >= 0) /* probably a character class */
3842 * Does all the checking to disambiguate
3844 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3845 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3847 * First argument is the stuff after the first token, e.g. "bar".
3849 * Not a method if foo is a filehandle.
3850 * Not a method if foo is a subroutine prototyped to take a filehandle.
3851 * Not a method if it's really "Foo $bar"
3852 * Method if it's "foo $bar"
3853 * Not a method if it's really "print foo $bar"
3854 * Method if it's really "foo package::" (interpreted as package->foo)
3855 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3856 * Not a method if bar is a filehandle or package, but is quoted with
3861 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
3863 char *s = start + (*start == '$');
3864 char tmpbuf[sizeof PL_tokenbuf];
3867 /* Mustn't actually add anything to a symbol table.
3868 But also don't want to "initialise" any placeholder
3869 constants that might already be there into full
3870 blown PVGVs with attached PVCV. */
3872 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
3874 PERL_ARGS_ASSERT_INTUIT_METHOD;
3876 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3878 if (cv && SvPOK(cv)) {
3879 const char *proto = CvPROTO(cv);
3881 while (*proto && (isSPACE(*proto) || *proto == ';'))
3888 if (*start == '$') {
3889 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3890 isUPPER(*PL_tokenbuf))
3895 return *s == '(' ? FUNCMETH : METHOD;
3898 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3899 /* start is the beginning of the possible filehandle/object,
3900 * and s is the end of it
3901 * tmpbuf is a copy of it (but with single quotes as double colons)
3904 if (!keyword(tmpbuf, len, 0)) {
3905 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3910 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3911 if (indirgv && GvCVu(indirgv))
3913 /* filehandle or package name makes it a method */
3914 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3916 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3917 return 0; /* no assumptions -- "=>" quotes bareword */
3919 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3920 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3921 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3925 return *s == '(' ? FUNCMETH : METHOD;
3931 /* Encoded script support. filter_add() effectively inserts a
3932 * 'pre-processing' function into the current source input stream.
3933 * Note that the filter function only applies to the current source file
3934 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3936 * The datasv parameter (which may be NULL) can be used to pass
3937 * private data to this instance of the filter. The filter function
3938 * can recover the SV using the FILTER_DATA macro and use it to
3939 * store private buffers and state information.
3941 * The supplied datasv parameter is upgraded to a PVIO type
3942 * and the IoDIRP/IoANY field is used to store the function pointer,
3943 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3944 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3945 * private use must be set using malloc'd pointers.
3949 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3957 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3958 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3960 if (!PL_rsfp_filters)
3961 PL_rsfp_filters = newAV();
3964 SvUPGRADE(datasv, SVt_PVIO);
3965 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3966 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3967 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3968 FPTR2DPTR(void *, IoANY(datasv)),
3969 SvPV_nolen(datasv)));
3970 av_unshift(PL_rsfp_filters, 1);
3971 av_store(PL_rsfp_filters, 0, datasv) ;
3973 !PL_parser->filtered
3974 && PL_parser->lex_flags & LEX_EVALBYTES
3975 && PL_bufptr < PL_bufend
3977 const char *s = PL_bufptr;
3978 while (s < PL_bufend) {
3980 SV *linestr = PL_parser->linestr;
3981 char *buf = SvPVX(linestr);
3982 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3983 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3984 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3985 STRLEN const linestart_pos = PL_parser->linestart - buf;
3986 STRLEN const last_uni_pos =
3987 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3988 STRLEN const last_lop_pos =
3989 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3990 av_push(PL_rsfp_filters, linestr);
3991 PL_parser->linestr =
3992 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3993 buf = SvPVX(PL_parser->linestr);
3994 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3995 PL_parser->bufptr = buf + bufptr_pos;
3996 PL_parser->oldbufptr = buf + oldbufptr_pos;
3997 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3998 PL_parser->linestart = buf + linestart_pos;
3999 if (PL_parser->last_uni)
4000 PL_parser->last_uni = buf + last_uni_pos;
4001 if (PL_parser->last_lop)
4002 PL_parser->last_lop = buf + last_lop_pos;
4003 SvLEN(linestr) = SvCUR(linestr);
4004 SvCUR(linestr) = s-SvPVX(linestr);
4005 PL_parser->filtered = 1;
4015 /* Delete most recently added instance of this filter function. */
4017 Perl_filter_del(pTHX_ filter_t funcp)
4021 PERL_ARGS_ASSERT_FILTER_DEL;
4024 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4025 FPTR2DPTR(void*, funcp)));
4027 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4029 /* if filter is on top of stack (usual case) just pop it off */
4030 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4031 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4032 sv_free(av_pop(PL_rsfp_filters));
4036 /* we need to search for the correct entry and clear it */
4037 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4041 /* Invoke the idxth filter function for the current rsfp. */
4042 /* maxlen 0 = read one text line */
4044 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4048 /* This API is bad. It should have been using unsigned int for maxlen.
4049 Not sure if we want to change the API, but if not we should sanity
4050 check the value here. */
4051 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4053 PERL_ARGS_ASSERT_FILTER_READ;
4055 if (!PL_parser || !PL_rsfp_filters)
4057 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4058 /* Provide a default input filter to make life easy. */
4059 /* Note that we append to the line. This is handy. */
4060 DEBUG_P(PerlIO_printf(Perl_debug_log,
4061 "filter_read %d: from rsfp\n", idx));
4062 if (correct_length) {
4065 const int old_len = SvCUR(buf_sv);
4067 /* ensure buf_sv is large enough */
4068 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4069 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4070 correct_length)) <= 0) {
4071 if (PerlIO_error(PL_rsfp))
4072 return -1; /* error */
4074 return 0 ; /* end of file */
4076 SvCUR_set(buf_sv, old_len + len) ;
4077 SvPVX(buf_sv)[old_len + len] = '\0';
4080 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4081 if (PerlIO_error(PL_rsfp))
4082 return -1; /* error */
4084 return 0 ; /* end of file */
4087 return SvCUR(buf_sv);
4089 /* Skip this filter slot if filter has been deleted */
4090 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4091 DEBUG_P(PerlIO_printf(Perl_debug_log,
4092 "filter_read %d: skipped (filter deleted)\n",
4094 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4096 if (SvTYPE(datasv) != SVt_PVIO) {
4097 if (correct_length) {
4099 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4100 if (!remainder) return 0; /* eof */
4101 if (correct_length > remainder) correct_length = remainder;
4102 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4103 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4106 const char *s = SvEND(datasv);
4107 const char *send = SvPVX(datasv) + SvLEN(datasv);
4115 if (s == send) return 0; /* eof */
4116 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4117 SvCUR_set(datasv, s-SvPVX(datasv));
4119 return SvCUR(buf_sv);
4121 /* Get function pointer hidden within datasv */
4122 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4123 DEBUG_P(PerlIO_printf(Perl_debug_log,
4124 "filter_read %d: via function %p (%s)\n",
4125 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4126 /* Call function. The function is expected to */
4127 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4128 /* Return: <0:error, =0:eof, >0:not eof */
4129 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4133 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4135 PERL_ARGS_ASSERT_FILTER_GETS;
4137 #ifdef PERL_CR_FILTER
4138 if (!PL_rsfp_filters) {
4139 filter_add(S_cr_textfilter,NULL);
4142 if (PL_rsfp_filters) {
4144 SvCUR_set(sv, 0); /* start with empty line */
4145 if (FILTER_READ(0, sv, 0) > 0)
4146 return ( SvPVX(sv) ) ;
4151 return (sv_gets(sv, PL_rsfp, append));
4155 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4159 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4161 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4165 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4166 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4168 return GvHV(gv); /* Foo:: */
4171 /* use constant CLASS => 'MyClass' */
4172 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4173 if (gv && GvCV(gv)) {
4174 SV * const sv = cv_const_sv(GvCV(gv));
4176 return gv_stashsv(sv, 0);
4179 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4184 S_tokenize_use(pTHX_ int is_use, char *s) {
4185 PERL_ARGS_ASSERT_TOKENIZE_USE;
4187 if (PL_expect != XSTATE)
4188 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4189 is_use ? "use" : "no"));
4192 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4193 s = force_version(s, TRUE);
4194 if (*s == ';' || *s == '}'
4195 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4196 NEXTVAL_NEXTTOKE.opval = NULL;
4199 else if (*s == 'v') {
4200 s = force_word(s,WORD,FALSE,TRUE);
4201 s = force_version(s, FALSE);
4205 s = force_word(s,WORD,FALSE,TRUE);
4206 s = force_version(s, FALSE);
4208 pl_yylval.ival = is_use;
4212 static const char* const exp_name[] =
4213 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4214 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4219 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4221 S_word_takes_any_delimeter(char *p, STRLEN len)
4223 return (len == 1 && strchr("msyq", p[0])) ||
4225 (p[0] == 't' && p[1] == 'r') ||
4226 (p[0] == 'q' && strchr("qwxr", p[1]))));
4230 S_check_scalar_slice(pTHX_ char *s)
4233 while (*s == ' ' || *s == '\t') s++;
4234 if (*s == 'q' && s[1] == 'w'
4235 && !isWORDCHAR_lazy_if(s+2,UTF))
4237 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4238 s += UTF ? UTF8SKIP(s) : 1;
4239 if (*s == '}' || *s == ']')
4240 pl_yylval.ival = OPpSLICEWARNING;
4246 Works out what to call the token just pulled out of the input
4247 stream. The yacc parser takes care of taking the ops we return and
4248 stitching them into a tree.
4251 The type of the next token
4254 Switch based on the current state:
4255 - if we already built the token before, use it
4256 - if we have a case modifier in a string, deal with that
4257 - handle other cases of interpolation inside a string
4258 - scan the next line if we are inside a format
4259 In the normal state switch on the next character:
4261 if alphabetic, go to key lookup
4262 unrecoginized character - croak
4263 - 0/4/26: handle end-of-line or EOF
4264 - cases for whitespace
4265 - \n and #: handle comments and line numbers
4266 - various operators, brackets and sigils
4269 - 'v': vstrings (or go to key lookup)
4270 - 'x' repetition operator (or go to key lookup)
4271 - other ASCII alphanumerics (key lookup begins here):
4274 scan built-in keyword (but do nothing with it yet)
4275 check for statement label
4276 check for lexical subs
4277 goto just_a_word if there is one
4278 see whether built-in keyword is overridden
4279 switch on keyword number:
4280 - default: just_a_word:
4281 not a built-in keyword; handle bareword lookup
4282 disambiguate between method and sub call
4283 fall back to bareword
4284 - cases for built-in keywords
4292 char *s = PL_bufptr;
4296 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4300 /* orig_keyword, gvp, and gv are initialized here because
4301 * jump to the label just_a_word_zero can bypass their
4302 * initialization later. */
4303 I32 orig_keyword = 0;
4308 SV* tmp = newSVpvs("");
4309 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4310 (IV)CopLINE(PL_curcop),
4311 lex_state_names[PL_lex_state],
4312 exp_name[PL_expect],
4313 pv_display(tmp, s, strlen(s), 0, 60));
4317 /* when we've already built the next token, just pull it out of the queue */
4320 pl_yylval = PL_nextval[PL_nexttoke];
4322 PL_lex_state = PL_lex_defer;
4323 PL_lex_defer = LEX_NORMAL;
4327 next_type = PL_nexttype[PL_nexttoke];
4328 if (next_type & (7<<24)) {
4329 if (next_type & (1<<24)) {
4330 if (PL_lex_brackets > 100)
4331 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4332 PL_lex_brackstack[PL_lex_brackets++] =
4333 (char) ((next_type >> 16) & 0xff);
4335 if (next_type & (2<<24))
4336 PL_lex_allbrackets++;
4337 if (next_type & (4<<24))
4338 PL_lex_allbrackets--;
4339 next_type &= 0xffff;
4341 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4345 switch (PL_lex_state) {
4347 case LEX_INTERPNORMAL:
4350 /* interpolated case modifiers like \L \U, including \Q and \E.
4351 when we get here, PL_bufptr is at the \
4353 case LEX_INTERPCASEMOD:
4355 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4357 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4358 PL_bufptr, PL_bufend, *PL_bufptr);
4360 /* handle \E or end of string */
4361 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4363 if (PL_lex_casemods) {
4364 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4365 PL_lex_casestack[PL_lex_casemods] = '\0';
4367 if (PL_bufptr != PL_bufend
4368 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4369 || oldmod == 'F')) {
4371 PL_lex_state = LEX_INTERPCONCAT;
4373 PL_lex_allbrackets--;
4376 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4377 /* Got an unpaired \E */
4378 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4379 "Useless use of \\E");
4381 if (PL_bufptr != PL_bufend)
4383 PL_lex_state = LEX_INTERPCONCAT;
4387 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4388 "### Saw case modifier\n"); });
4390 if (s[1] == '\\' && s[2] == 'E') {
4392 PL_lex_state = LEX_INTERPCONCAT;
4397 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4398 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4399 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4400 (strchr(PL_lex_casestack, 'L')
4401 || strchr(PL_lex_casestack, 'U')
4402 || strchr(PL_lex_casestack, 'F'))) {
4403 PL_lex_casestack[--PL_lex_casemods] = '\0';
4404 PL_lex_allbrackets--;
4407 if (PL_lex_casemods > 10)
4408 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4409 PL_lex_casestack[PL_lex_casemods++] = *s;
4410 PL_lex_casestack[PL_lex_casemods] = '\0';
4411 PL_lex_state = LEX_INTERPCONCAT;
4412 NEXTVAL_NEXTTOKE.ival = 0;
4413 force_next((2<<24)|'(');
4415 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4417 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4419 NEXTVAL_NEXTTOKE.ival = OP_LC;
4421 NEXTVAL_NEXTTOKE.ival = OP_UC;
4423 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4425 NEXTVAL_NEXTTOKE.ival = OP_FC;
4427 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4431 if (PL_lex_starts) {
4434 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4435 if (PL_lex_casemods == 1 && PL_lex_inpat)
4438 AopNOASSIGN(OP_CONCAT);
4444 case LEX_INTERPPUSH:
4445 return REPORT(sublex_push());
4447 case LEX_INTERPSTART:
4448 if (PL_bufptr == PL_bufend)
4449 return REPORT(sublex_done());
4450 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4451 "### Interpolated variable\n"); });
4453 /* for /@a/, we leave the joining for the regex engine to do
4454 * (unless we're within \Q etc) */
4455 PL_lex_dojoin = (*PL_bufptr == '@'
4456 && (!PL_lex_inpat || PL_lex_casemods));
4457 PL_lex_state = LEX_INTERPNORMAL;
4458 if (PL_lex_dojoin) {
4459 NEXTVAL_NEXTTOKE.ival = 0;
4461 force_ident("\"", '$');
4462 NEXTVAL_NEXTTOKE.ival = 0;
4464 NEXTVAL_NEXTTOKE.ival = 0;
4465 force_next((2<<24)|'(');
4466 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4469 /* Convert (?{...}) and friends to 'do {...}' */
4470 if (PL_lex_inpat && *PL_bufptr == '(') {
4471 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4473 if (*PL_bufptr != '{')
4475 PL_expect = XTERMBLOCK;
4479 if (PL_lex_starts++) {
4481 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4482 if (!PL_lex_casemods && PL_lex_inpat)
4485 AopNOASSIGN(OP_CONCAT);
4489 case LEX_INTERPENDMAYBE:
4490 if (intuit_more(PL_bufptr)) {
4491 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4497 /* Treat state as LEX_NORMAL if we have no inner lexing scope.
4498 XXX This hack can be removed if we stop setting PL_lex_state to
4499 LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */
4500 if (UNLIKELY(!PL_lex_inwhat)) {
4501 PL_lex_state = LEX_NORMAL;
4505 if (PL_lex_dojoin) {
4506 const U8 dojoin_was = PL_lex_dojoin;
4507 PL_lex_dojoin = FALSE;
4508 PL_lex_state = LEX_INTERPCONCAT;
4509 PL_lex_allbrackets--;
4510 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4512 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4513 && SvEVALED(PL_lex_repl))
4515 if (PL_bufptr != PL_bufend)
4516 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4519 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4520 re_eval_str. If the here-doc body’s length equals the previous
4521 value of re_eval_start, re_eval_start will now be null. So
4522 check re_eval_str as well. */
4523 if (PL_parser->lex_shared->re_eval_start
4524 || PL_parser->lex_shared->re_eval_str) {
4526 if (*PL_bufptr != ')')
4527 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4529 /* having compiled a (?{..}) expression, return the original
4530 * text too, as a const */
4531 if (PL_parser->lex_shared->re_eval_str) {
4532 sv = PL_parser->lex_shared->re_eval_str;
4533 PL_parser->lex_shared->re_eval_str = NULL;
4535 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4536 SvPV_shrink_to_cur(sv);
4538 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4539 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4540 NEXTVAL_NEXTTOKE.opval =
4541 (OP*)newSVOP(OP_CONST, 0,
4544 PL_parser->lex_shared->re_eval_start = NULL;
4550 case LEX_INTERPCONCAT:
4552 if (PL_lex_brackets)
4553 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4554 (long) PL_lex_brackets);
4556 /* Treat state as LEX_NORMAL when not in an inner lexing scope.
4557 XXX This hack can be removed if we stop setting PL_lex_state to
4559 if (UNLIKELY(!PL_lex_inwhat)) {
4560 PL_lex_state = LEX_NORMAL;
4564 if (PL_bufptr == PL_bufend)
4565 return REPORT(sublex_done());
4567 /* m'foo' still needs to be parsed for possible (?{...}) */
4568 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4569 SV *sv = newSVsv(PL_linestr);
4571 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4575 s = scan_const(PL_bufptr);
4577 PL_lex_state = LEX_INTERPCASEMOD;
4579 PL_lex_state = LEX_INTERPSTART;
4582 if (s != PL_bufptr) {
4583 NEXTVAL_NEXTTOKE = pl_yylval;
4586 if (PL_lex_starts++) {
4587 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4588 if (!PL_lex_casemods && PL_lex_inpat)
4591 AopNOASSIGN(OP_CONCAT);
4601 s = scan_formline(PL_bufptr);
4602 if (!PL_lex_formbrack)
4611 /* We really do *not* want PL_linestr ever becoming a COW. */
4612 assert (!SvIsCOW(PL_linestr));
4614 PL_oldoldbufptr = PL_oldbufptr;
4616 PL_parser->saw_infix_sigil = 0;
4621 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4624 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4625 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4627 SVs_TEMP | SVf_UTF8),
4628 10, UNI_DISPLAY_ISPRINT)
4629 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4630 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4631 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4632 d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4636 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4637 UTF8fARG(UTF, (s - d), d),
4642 goto fake_eof; /* emulate EOF on ^D or ^Z */
4644 if ((!PL_rsfp || PL_lex_inwhat)
4645 && (!PL_parser->filtered || s+1 < PL_bufend)) {
4648 if (PL_lex_brackets &&
4649 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4650 yyerror((const char *)
4652 ? "Format not terminated"
4653 : "Missing right curly or square bracket"));
4655 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4656 "### Tokener got EOF\n");
4660 if (s++ < PL_bufend)
4661 goto retry; /* ignore stray nulls */
4664 if (!PL_in_eval && !PL_preambled) {
4665 PL_preambled = TRUE;
4667 /* Generate a string of Perl code to load the debugger.
4668 * If PERL5DB is set, it will return the contents of that,
4669 * otherwise a compile-time require of perl5db.pl. */
4671 const char * const pdb = PerlEnv_getenv("PERL5DB");
4674 sv_setpv(PL_linestr, pdb);
4675 sv_catpvs(PL_linestr,";");
4677 SETERRNO(0,SS_NORMAL);
4678 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4680 PL_parser->preambling = CopLINE(PL_curcop);
4682 sv_setpvs(PL_linestr,"");
4683 if (PL_preambleav) {
4684 SV **svp = AvARRAY(PL_preambleav);
4685 SV **const end = svp + AvFILLp(PL_preambleav);
4687 sv_catsv(PL_linestr, *svp);
4689 sv_catpvs(PL_linestr, ";");
4691 sv_free(MUTABLE_SV(PL_preambleav));
4692 PL_preambleav = NULL;
4695 sv_catpvs(PL_linestr,
4696 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4697 if (PL_minus_n || PL_minus_p) {
4698 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4700 sv_catpvs(PL_linestr,"chomp;");
4703 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4704 || *PL_splitstr == '"')
4705 && strchr(PL_splitstr + 1, *PL_splitstr))
4706 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4708 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4709 bytes can be used as quoting characters. :-) */
4710 const char *splits = PL_splitstr;
4711 sv_catpvs(PL_linestr, "our @F=split(q\0");
4714 if (*splits == '\\')
4715 sv_catpvn(PL_linestr, splits, 1);
4716 sv_catpvn(PL_linestr, splits, 1);
4717 } while (*splits++);
4718 /* This loop will embed the trailing NUL of
4719 PL_linestr as the last thing it does before
4721 sv_catpvs(PL_linestr, ");");
4725 sv_catpvs(PL_linestr,"our @F=split(' ');");
4728 sv_catpvs(PL_linestr, "\n");
4729 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4731 PL_last_lop = PL_last_uni = NULL;
4732 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4733 update_debugger_info(PL_linestr, NULL, 0);
4738 bof = PL_rsfp ? TRUE : FALSE;
4741 fake_eof = LEX_FAKE_EOF;
4743 PL_bufptr = PL_bufend;
4744 COPLINE_INC_WITH_HERELINES;
4745 if (!lex_next_chunk(fake_eof)) {
4746 CopLINE_dec(PL_curcop);
4748 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4750 CopLINE_dec(PL_curcop);
4752 /* If it looks like the start of a BOM or raw UTF-16,
4753 * check if it in fact is. */
4754 if (bof && PL_rsfp &&
4756 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4759 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4760 bof = (offset == (Off_t)SvCUR(PL_linestr));
4761 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4762 /* offset may include swallowed CR */
4764 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4767 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4768 s = swallow_bom((U8*)s);
4771 if (PL_parser->in_pod) {
4772 /* Incest with pod. */
4773 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4774 sv_setpvs(PL_linestr, "");
4775 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4776 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4777 PL_last_lop = PL_last_uni = NULL;
4778 PL_parser->in_pod = 0;
4781 if (PL_rsfp || PL_parser->filtered)
4783 } while (PL_parser->in_pod);
4784 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4785 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4786 PL_last_lop = PL_last_uni = NULL;
4787 if (CopLINE(PL_curcop) == 1) {
4788 while (s < PL_bufend && isSPACE(*s))
4790 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4794 if (*s == '#' && *(s+1) == '!')
4796 #ifdef ALTERNATE_SHEBANG
4798 static char const as[] = ALTERNATE_SHEBANG;
4799 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4800 d = s + (sizeof(as) - 1);
4802 #endif /* ALTERNATE_SHEBANG */
4811 while (*d && !isSPACE(*d))
4815 #ifdef ARG_ZERO_IS_SCRIPT
4816 if (ipathend > ipath) {
4818 * HP-UX (at least) sets argv[0] to the script name,
4819 * which makes $^X incorrect. And Digital UNIX and Linux,
4820 * at least, set argv[0] to the basename of the Perl
4821 * interpreter. So, having found "#!", we'll set it right.
4823 SV* copfilesv = CopFILESV(PL_curcop);
4826 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4828 assert(SvPOK(x) || SvGMAGICAL(x));
4829 if (sv_eq(x, copfilesv)) {
4830 sv_setpvn(x, ipath, ipathend - ipath);
4836 const char *bstart = SvPV_const(copfilesv, blen);
4837 const char * const lstart = SvPV_const(x, llen);
4839 bstart += blen - llen;
4840 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4841 sv_setpvn(x, ipath, ipathend - ipath);
4848 /* Anything to do if no copfilesv? */
4850 TAINT_NOT; /* $^X is always tainted, but that's OK */
4852 #endif /* ARG_ZERO_IS_SCRIPT */
4857 d = instr(s,"perl -");
4859 d = instr(s,"perl");
4861 /* avoid getting into infinite loops when shebang
4862 * line contains "Perl" rather than "perl" */
4864 for (d = ipathend-4; d >= ipath; --d) {
4865 if (isALPHA_FOLD_EQ(*d, 'p')
4866 && !ibcmp(d, "perl", 4))
4876 #ifdef ALTERNATE_SHEBANG
4878 * If the ALTERNATE_SHEBANG on this system starts with a
4879 * character that can be part of a Perl expression, then if
4880 * we see it but not "perl", we're probably looking at the
4881 * start of Perl code, not a request to hand off to some
4882 * other interpreter. Similarly, if "perl" is there, but
4883 * not in the first 'word' of the line, we assume the line
4884 * contains the start of the Perl program.
4886 if (d && *s != '#') {
4887 const char *c = ipath;
4888 while (*c && !strchr("; \t\r\n\f\v#", *c))
4891 d = NULL; /* "perl" not in first word; ignore */
4893 *s = '#'; /* Don't try to parse shebang line */
4895 #endif /* ALTERNATE_SHEBANG */
4900 !instr(s,"indir") &&
4901 instr(PL_origargv[0],"perl"))
4908 while (s < PL_bufend && isSPACE(*s))
4910 if (s < PL_bufend) {
4911 Newx(newargv,PL_origargc+3,char*);
4913 while (s < PL_bufend && !isSPACE(*s))
4916 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4919 newargv = PL_origargv;
4922 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4924 Perl_croak(aTHX_ "Can't exec %s", ipath);
4927 while (*d && !isSPACE(*d))
4929 while (SPACE_OR_TAB(*d))
4933 const bool switches_done = PL_doswitches;
4934 const U32 oldpdb = PL_perldb;
4935 const bool oldn = PL_minus_n;
4936 const bool oldp = PL_minus_p;
4940 bool baduni = FALSE;
4942 const char *d2 = d1 + 1;
4943 if (parse_unicode_opts((const char **)&d2)
4947 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
4948 const char * const m = d1;
4949 while (*d1 && !isSPACE(*d1))
4951 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4954 d1 = moreswitches(d1);
4956 if (PL_doswitches && !switches_done) {
4957 int argc = PL_origargc;
4958 char **argv = PL_origargv;
4961 } while (argc && argv[0][0] == '-' && argv[0][1]);
4962 init_argv_symbols(argc,argv);
4964 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4965 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4966 /* if we have already added "LINE: while (<>) {",
4967 we must not do it again */
4969 sv_setpvs(PL_linestr, "");
4970 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4971 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4972 PL_last_lop = PL_last_uni = NULL;
4973 PL_preambled = FALSE;
4974 if (PERLDB_LINE || PERLDB_SAVESRC)
4975 (void)gv_fetchfile(PL_origfilename);
4982 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4983 PL_lex_state = LEX_FORMLINE;
4984 force_next(FORMRBRACK);
4989 #ifdef PERL_STRICT_CR
4990 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4992 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4994 case ' ': case '\t': case '\f': case '\v':
4999 if (PL_lex_state != LEX_NORMAL ||
5000 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5001 const bool in_comment = *s == '#';
5002 if (*s == '#' && s == PL_linestart && PL_in_eval
5003 && !PL_rsfp && !PL_parser->filtered) {
5004 /* handle eval qq[#line 1 "foo"\n ...] */
5005 CopLINE_dec(PL_curcop);
5009 while (d < PL_bufend && *d != '\n')
5013 else if (d > PL_bufend)
5014 /* Found by Ilya: feed random input to Perl. */
5015 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5018 if (in_comment && d == PL_bufend
5019 && PL_lex_state == LEX_INTERPNORMAL
5020 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5021 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5024 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5025 PL_lex_state = LEX_FORMLINE;
5026 force_next(FORMRBRACK);
5031 while (s < PL_bufend && *s != '\n')
5039 else if (s > PL_bufend)
5040 /* Found by Ilya: feed random input to Perl. */
5041 Perl_croak(aTHX_ "panic: input overflow");
5045 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5053 while (s < PL_bufend && SPACE_OR_TAB(*s))
5056 if (strnEQ(s,"=>",2)) {
5057 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5058 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5059 OPERATOR('-'); /* unary minus */
5062 case 'r': ftst = OP_FTEREAD; break;
5063 case 'w': ftst = OP_FTEWRITE; break;
5064 case 'x': ftst = OP_FTEEXEC; break;
5065 case 'o': ftst = OP_FTEOWNED; break;
5066 case 'R': ftst = OP_FTRREAD; break;
5067 case 'W': ftst = OP_FTRWRITE; break;
5068 case 'X': ftst = OP_FTREXEC; break;
5069 case 'O': ftst = OP_FTROWNED; break;
5070 case 'e': ftst = OP_FTIS; break;
5071 case 'z': ftst = OP_FTZERO; break;
5072 case 's': ftst = OP_FTSIZE; break;
5073 case 'f': ftst = OP_FTFILE; break;
5074 case 'd': ftst = OP_FTDIR; break;
5075 case 'l': ftst = OP_FTLINK; break;
5076 case 'p': ftst = OP_FTPIPE; break;
5077 case 'S': ftst = OP_FTSOCK; break;
5078 case 'u': ftst = OP_FTSUID; break;
5079 case 'g': ftst = OP_FTSGID; break;
5080 case 'k': ftst = OP_FTSVTX; break;
5081 case 'b': ftst = OP_FTBLK; break;
5082 case 'c': ftst = OP_FTCHR; break;
5083 case 't': ftst = OP_FTTTY; break;
5084 case 'T': ftst = OP_FTTEXT; break;
5085 case 'B': ftst = OP_FTBINARY; break;
5086 case 'M': case 'A': case 'C':
5087 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5089 case 'M': ftst = OP_FTMTIME; break;
5090 case 'A': ftst = OP_FTATIME; break;
5091 case 'C': ftst = OP_FTCTIME; break;
5099 PL_last_uni = PL_oldbufptr;
5100 PL_last_lop_op = (OPCODE)ftst;
5101 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5102 "### Saw file test %c\n", (int)tmp);
5107 /* Assume it was a minus followed by a one-letter named
5108 * subroutine call (or a -bareword), then. */
5109 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5110 "### '-%c' looked like a file test but was not\n",
5117 const char tmp = *s++;
5120 if (PL_expect == XOPERATOR)
5125 else if (*s == '>') {
5128 if (FEATURE_POSTDEREF_IS_ENABLED && (
5129 ((*s == '$' || *s == '&') && s[1] == '*')
5130 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5131 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5132 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5135 Perl_ck_warner_d(aTHX_
5136 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5137 "Postfix dereference is experimental"
5139 PL_expect = XPOSTDEREF;
5142 if (isIDFIRST_lazy_if(s,UTF)) {
5143 s = force_word(s,METHOD,FALSE,TRUE);
5151 if (PL_expect == XOPERATOR) {
5152 if (*s == '=' && !PL_lex_allbrackets &&
5153 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5160 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5162 OPERATOR('-'); /* unary minus */
5168 const char tmp = *s++;
5171 if (PL_expect == XOPERATOR)
5176 if (PL_expect == XOPERATOR) {
5177 if (*s == '=' && !PL_lex_allbrackets &&
5178 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5185 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5192 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5193 if (PL_expect != XOPERATOR) {
5194 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5195 PL_expect = XOPERATOR;
5196 force_ident(PL_tokenbuf, '*');
5204 if (*s == '=' && !PL_lex_allbrackets &&
5205 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5211 if (*s == '=' && !PL_lex_allbrackets &&
5212 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5216 PL_parser->saw_infix_sigil = 1;
5221 if (PL_expect == XOPERATOR) {
5222 if (s[1] == '=' && !PL_lex_allbrackets &&
5223 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5226 PL_parser->saw_infix_sigil = 1;
5229 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5230 PL_tokenbuf[0] = '%';
5231 s = scan_ident(s, PL_tokenbuf + 1,
5232 sizeof PL_tokenbuf - 1, FALSE);
5234 if (!PL_tokenbuf[1]) {
5237 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5239 PL_tokenbuf[0] = '@';
5241 PL_expect = XOPERATOR;
5242 force_ident_maybe_lex('%');
5247 bof = FEATURE_BITWISE_IS_ENABLED;
5248 if (bof && s[1] == '.')
5250 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5251 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5257 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5259 if (PL_lex_brackets > 100)
5260 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5261 PL_lex_brackstack[PL_lex_brackets++] = 0;
5262 PL_lex_allbrackets++;
5264 const char tmp = *s++;
5269 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5271 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5274 Perl_ck_warner_d(aTHX_
5275 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5276 "Smartmatch is experimental");
5280 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5282 BCop(OP_SCOMPLEMENT);
5284 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5286 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5293 goto just_a_word_zero_gv;
5299 switch (PL_expect) {
5301 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5303 PL_bufptr = s; /* update in case we back off */
5306 "Use of := for an empty attribute list is not allowed");
5313 PL_expect = XTERMBLOCK;
5317 while (isIDFIRST_lazy_if(s,UTF)) {
5320 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5321 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5322 if (tmp < 0) tmp = -tmp;
5337 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5339 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5340 COPLINE_SET_FROM_MULTI_END;
5342 /* MUST advance bufptr here to avoid bogus
5343 "at end of line" context messages from yyerror().
5345 PL_bufptr = s + len;
5346 yyerror("Unterminated attribute parameter in attribute list");
5350 return REPORT(0); /* EOF indicator */
5354 sv_catsv(sv, PL_lex_stuff);
5355 attrs = op_append_elem(OP_LIST, attrs,
5356 newSVOP(OP_CONST, 0, sv));
5357 SvREFCNT_dec_NN(PL_lex_stuff);
5358 PL_lex_stuff = NULL;
5361 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5363 if (PL_in_my == KEY_our) {
5364 deprecate(":unique");
5367 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5370 /* NOTE: any CV attrs applied here need to be part of
5371 the CVf_BUILTIN_ATTRS define in cv.h! */
5372 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5374 CvLVALUE_on(PL_compcv);
5376 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5378 deprecate(":locked");
5380 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5382 CvMETHOD_on(PL_compcv);
5384 else if (!PL_in_my && len == 5
5385 && strnEQ(SvPVX(sv), "const", len))
5388 Perl_ck_warner_d(aTHX_
5389 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5390 ":const is experimental"
5392 CvANONCONST_on(PL_compcv);
5393 if (!CvANON(PL_compcv))
5394 yyerror(":const is not permitted on named "
5397 /* After we've set the flags, it could be argued that
5398 we don't need to do the attributes.pm-based setting
5399 process, and shouldn't bother appending recognized
5400 flags. To experiment with that, uncomment the
5401 following "else". (Note that's already been
5402 uncommented. That keeps the above-applied built-in
5403 attributes from being intercepted (and possibly
5404 rejected) by a package's attribute routines, but is
5405 justified by the performance win for the common case
5406 of applying only built-in attributes.) */
5408 attrs = op_append_elem(OP_LIST, attrs,
5409 newSVOP(OP_CONST, 0,
5413 if (*s == ':' && s[1] != ':')
5416 break; /* require real whitespace or :'s */
5417 /* XXX losing whitespace on sequential attributes here */
5420 if (*s != ';' && *s != '}' &&
5421 !(PL_expect == XOPERATOR
5422 ? (*s == '=' || *s == ')')
5423 : (*s == '{' || *s == '('))) {
5424 const char q = ((*s == '\'') ? '"' : '\'');
5425 /* If here for an expression, and parsed no attrs, back
5427 if (PL_expect == XOPERATOR && !attrs) {
5431 /* MUST advance bufptr here to avoid bogus "at end of line"
5432 context messages from yyerror().
5435 yyerror( (const char *)
5437 ? Perl_form(aTHX_ "Invalid separator character "
5438 "%c%c%c in attribute list", q, *s, q)
5439 : "Unterminated attribute list" ) );
5447 NEXTVAL_NEXTTOKE.opval = attrs;
5453 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5457 PL_lex_allbrackets--;
5461 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5462 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5466 PL_lex_allbrackets++;
5469 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5476 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5479 PL_lex_allbrackets--;
5485 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5488 if (PL_lex_brackets <= 0)
5489 /* diag_listed_as: Unmatched right %s bracket */
5490 yyerror("Unmatched right square bracket");
5493 PL_lex_allbrackets--;
5494 if (PL_lex_state == LEX_INTERPNORMAL) {
5495 if (PL_lex_brackets == 0) {
5496 if (*s == '-' && s[1] == '>')
5497 PL_lex_state = LEX_INTERPENDMAYBE;
5498 else if (*s != '[' && *s != '{')
5499 PL_lex_state = LEX_INTERPEND;
5506 if (PL_lex_brackets > 100) {
5507 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5509 switch (PL_expect) {
5512 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5513 PL_lex_allbrackets++;
5514 OPERATOR(HASHBRACK);
5516 while (s < PL_bufend && SPACE_OR_TAB(*s))
5519 PL_tokenbuf[0] = '\0';
5520 if (d < PL_bufend && *d == '-') {
5521 PL_tokenbuf[0] = '-';
5523 while (d < PL_bufend && SPACE_OR_TAB(*d))
5526 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5527 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5529 while (d < PL_bufend && SPACE_OR_TAB(*d))
5532 const char minus = (PL_tokenbuf[0] == '-');
5533 s = force_word(s + minus, WORD, FALSE, TRUE);
5541 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5542 PL_lex_allbrackets++;
5547 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5548 PL_lex_allbrackets++;
5552 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5553 PL_lex_allbrackets++;
5558 if (PL_oldoldbufptr == PL_last_lop)
5559 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5561 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5562 PL_lex_allbrackets++;
5565 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5567 /* This hack is to get the ${} in the message. */
5569 yyerror("syntax error");
5572 OPERATOR(HASHBRACK);
5574 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5575 /* ${...} or @{...} etc., but not print {...}
5576 * Skip the disambiguation and treat this as a block.
5578 goto block_expectation;
5580 /* This hack serves to disambiguate a pair of curlies
5581 * as being a block or an anon hash. Normally, expectation
5582 * determines that, but in cases where we're not in a
5583 * position to expect anything in particular (like inside
5584 * eval"") we have to resolve the ambiguity. This code
5585 * covers the case where the first term in the curlies is a
5586 * quoted string. Most other cases need to be explicitly
5587 * disambiguated by prepending a "+" before the opening
5588 * curly in order to force resolution as an anon hash.
5590 * XXX should probably propagate the outer expectation
5591 * into eval"" to rely less on this hack, but that could
5592 * potentially break current behavior of eval"".
5596 if (*s == '\'' || *s == '"' || *s == '`') {
5597 /* common case: get past first string, handling escapes */
5598 for (t++; t < PL_bufend && *t != *s;)
5603 else if (*s == 'q') {
5606 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5607 && !isWORDCHAR(*t))))
5609 /* skip q//-like construct */
5611 char open, close, term;
5614 while (t < PL_bufend && isSPACE(*t))
5616 /* check for q => */
5617 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5618 OPERATOR(HASHBRACK);
5622 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5626 for (t++; t < PL_bufend; t++) {
5627 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5629 else if (*t == open)
5633 for (t++; t < PL_bufend; t++) {
5634 if (*t == '\\' && t+1 < PL_bufend)
5636 else if (*t == close && --brackets <= 0)
5638 else if (*t == open)
5645 /* skip plain q word */
5646 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5649 else if (isWORDCHAR_lazy_if(t,UTF)) {
5651 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5654 while (t < PL_bufend && isSPACE(*t))
5656 /* if comma follows first term, call it an anon hash */
5657 /* XXX it could be a comma expression with loop modifiers */
5658 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5659 || (*t == '=' && t[1] == '>')))
5660 OPERATOR(HASHBRACK);
5661 if (PL_expect == XREF)
5664 /* If there is an opening brace or 'sub:', treat it
5665 as a term to make ${{...}}{k} and &{sub:attr...}
5666 dwim. Otherwise, treat it as a statement, so
5667 map {no strict; ...} works.
5674 if (strnEQ(s, "sub", 3)) {
5685 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5691 pl_yylval.ival = CopLINE(PL_curcop);
5692 PL_copline = NOLINE; /* invalidate current command line number */
5693 TOKEN(formbrack ? '=' : '{');
5695 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5699 if (PL_lex_brackets <= 0)
5700 /* diag_listed_as: Unmatched right %s bracket */
5701 yyerror("Unmatched right curly bracket");
5703 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5704 PL_lex_allbrackets--;
5705 if (PL_lex_state == LEX_INTERPNORMAL) {
5706 if (PL_lex_brackets == 0) {
5707 if (PL_expect & XFAKEBRACK) {
5708 PL_expect &= XENUMMASK;
5709 PL_lex_state = LEX_INTERPEND;
5711 return yylex(); /* ignore fake brackets */
5713 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5714 && SvEVALED(PL_lex_repl))
5715 PL_lex_state = LEX_INTERPEND;
5716 else if (*s == '-' && s[1] == '>')
5717 PL_lex_state = LEX_INTERPENDMAYBE;
5718 else if (*s != '[' && *s != '{')
5719 PL_lex_state = LEX_INTERPEND;
5722 if (PL_expect & XFAKEBRACK) {
5723 PL_expect &= XENUMMASK;
5725 return yylex(); /* ignore fake brackets */
5727 force_next(formbrack ? '.' : '}');
5728 if (formbrack) LEAVE;
5729 if (formbrack == 2) { /* means . where arguments were expected */
5735 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5738 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5739 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5746 if (PL_expect == XOPERATOR) {
5747 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5748 && isIDFIRST_lazy_if(s,UTF))
5750 CopLINE_dec(PL_curcop);
5751 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5752 CopLINE_inc(PL_curcop);
5755 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5757 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5758 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5764 PL_parser->saw_infix_sigil = 1;
5765 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5771 PL_tokenbuf[0] = '&';
5772 s = scan_ident(s - 1, PL_tokenbuf + 1,
5773 sizeof PL_tokenbuf - 1, TRUE);
5774 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5775 if (PL_tokenbuf[1]) {
5776 force_ident_maybe_lex('&');
5785 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5786 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5794 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5796 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5797 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5801 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
5805 const char tmp = *s++;
5807 if (!PL_lex_allbrackets &&
5808 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5815 if (!PL_lex_allbrackets &&
5816 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5824 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5825 && strchr("+-*/%.^&|<",tmp))
5826 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5827 "Reversed %c= operator",(int)tmp);
5829 if (PL_expect == XSTATE && isALPHA(tmp) &&
5830 (s == PL_linestart+1 || s[-2] == '\n') )
5832 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5833 || PL_lex_state != LEX_NORMAL) {
5838 if (strnEQ(s,"=cut",4)) {
5852 PL_parser->in_pod = 1;
5856 if (PL_expect == XBLOCK) {
5858 #ifdef PERL_STRICT_CR
5859 while (SPACE_OR_TAB(*t))
5861 while (SPACE_OR_TAB(*t) || *t == '\r')
5864 if (*t == '\n' || *t == '#') {
5867 SAVEI8(PL_parser->form_lex_state);
5868 SAVEI32(PL_lex_formbrack);
5869 PL_parser->form_lex_state = PL_lex_state;
5870 PL_lex_formbrack = PL_lex_brackets + 1;
5874 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5883 const char tmp = *s++;
5885 /* was this !=~ where !~ was meant?
5886 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5888 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5889 const char *t = s+1;
5891 while (t < PL_bufend && isSPACE(*t))
5894 if (*t == '/' || *t == '?' ||
5895 ((*t == 'm' || *t == 's' || *t == 'y')
5896 && !isWORDCHAR(t[1])) ||
5897 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5898 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5899 "!=~ should be !~");
5901 if (!PL_lex_allbrackets &&
5902 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5914 if (PL_expect != XOPERATOR) {
5915 if (s[1] != '<' && !strchr(s,'>'))
5917 if (s[1] == '<' && s[2] != '>')
5918 s = scan_heredoc(s);
5920 s = scan_inputsymbol(s);
5921 PL_expect = XOPERATOR;
5922 TOKEN(sublex_start());
5928 if (*s == '=' && !PL_lex_allbrackets &&
5929 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5933 SHop(OP_LEFT_SHIFT);
5938 if (!PL_lex_allbrackets &&
5939 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5946 if (!PL_lex_allbrackets &&
5947 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5955 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5963 const char tmp = *s++;
5965 if (*s == '=' && !PL_lex_allbrackets &&
5966 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5970 SHop(OP_RIGHT_SHIFT);
5972 else if (tmp == '=') {
5973 if (!PL_lex_allbrackets &&
5974 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5982 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5991 if (PL_expect == XOPERATOR) {
5992 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5993 return deprecate_commaless_var_list();
5996 else if (PL_expect == XPOSTDEREF) {
5999 POSTDEREF(DOLSHARP);
6004 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6005 PL_tokenbuf[0] = '@';
6006 s = scan_ident(s + 1, PL_tokenbuf + 1,
6007 sizeof PL_tokenbuf - 1, FALSE);
6008 if (PL_expect == XOPERATOR)
6009 no_op("Array length", s);
6010 if (!PL_tokenbuf[1])
6012 PL_expect = XOPERATOR;
6013 force_ident_maybe_lex('#');
6017 PL_tokenbuf[0] = '$';
6018 s = scan_ident(s, PL_tokenbuf + 1,
6019 sizeof PL_tokenbuf - 1, FALSE);
6020 if (PL_expect == XOPERATOR) {
6022 if (PL_bufptr > s) {
6024 PL_bufptr = PL_oldbufptr;
6028 if (!PL_tokenbuf[1]) {
6030 yyerror("Final $ should be \\$ or $name");
6036 const char tmp = *s;
6037 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6040 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6041 && intuit_more(s)) {
6043 PL_tokenbuf[0] = '@';
6044 if (ckWARN(WARN_SYNTAX)) {
6047 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6050 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6051 while (t < PL_bufend && *t != ']')
6053 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6054 "Multidimensional syntax %.*s not supported",
6055 (int)((t - PL_bufptr) + 1), PL_bufptr);
6059 else if (*s == '{') {
6061 PL_tokenbuf[0] = '%';
6062 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6063 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6065 char tmpbuf[sizeof PL_tokenbuf];
6068 } while (isSPACE(*t));
6069 if (isIDFIRST_lazy_if(t,UTF)) {
6071 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6076 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6077 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6078 "You need to quote \"%"UTF8f"\"",
6079 UTF8fARG(UTF, len, tmpbuf));
6085 PL_expect = XOPERATOR;
6086 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6087 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6088 if (!islop || PL_last_lop_op == OP_GREPSTART)
6089 PL_expect = XOPERATOR;
6090 else if (strchr("$@\"'`q", *s))
6091 PL_expect = XTERM; /* e.g. print $fh "foo" */
6092 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6093 PL_expect = XTERM; /* e.g. print $fh &sub */
6094 else if (isIDFIRST_lazy_if(s,UTF)) {
6095 char tmpbuf[sizeof PL_tokenbuf];
6097 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6098 if ((t2 = keyword(tmpbuf, len, 0))) {
6099 /* binary operators exclude handle interpretations */
6111 PL_expect = XTERM; /* e.g. print $fh length() */
6116 PL_expect = XTERM; /* e.g. print $fh subr() */
6119 else if (isDIGIT(*s))
6120 PL_expect = XTERM; /* e.g. print $fh 3 */
6121 else if (*s == '.' && isDIGIT(s[1]))
6122 PL_expect = XTERM; /* e.g. print $fh .3 */
6123 else if ((*s == '?' || *s == '-' || *s == '+')
6124 && !isSPACE(s[1]) && s[1] != '=')
6125 PL_expect = XTERM; /* e.g. print $fh -1 */
6126 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6128 PL_expect = XTERM; /* e.g. print $fh /.../
6129 XXX except DORDOR operator
6131 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6133 PL_expect = XTERM; /* print $fh <<"EOF" */
6136 force_ident_maybe_lex('$');
6140 if (PL_expect == XOPERATOR)
6142 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6143 PL_tokenbuf[0] = '@';
6144 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6146 if (!PL_tokenbuf[1]) {
6149 if (PL_lex_state == LEX_NORMAL)
6151 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6153 PL_tokenbuf[0] = '%';
6155 /* Warn about @ where they meant $. */
6156 if (*s == '[' || *s == '{') {
6157 if (ckWARN(WARN_SYNTAX)) {
6158 S_check_scalar_slice(aTHX_ s);
6162 PL_expect = XOPERATOR;
6163 force_ident_maybe_lex('@');
6166 case '/': /* may be division, defined-or, or pattern */
6167 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6168 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6169 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6174 else if (PL_expect == XOPERATOR) {
6176 if (*s == '=' && !PL_lex_allbrackets &&
6177 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6184 /* Disable warning on "study /blah/" */
6185 if (PL_oldoldbufptr == PL_last_uni
6186 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6187 || memNE(PL_last_uni, "study", 5)
6188 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6191 s = scan_pat(s,OP_MATCH);
6192 TERM(sublex_start());
6195 case '?': /* conditional */
6197 if (!PL_lex_allbrackets &&
6198 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6202 PL_lex_allbrackets++;
6206 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6207 #ifdef PERL_STRICT_CR
6210 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6212 && (s == PL_linestart || s[-1] == '\n') )
6215 formbrack = 2; /* dot seen where arguments expected */
6218 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6222 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6225 if (!PL_lex_allbrackets &&
6226 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6233 pl_yylval.ival = OPf_SPECIAL;
6239 if (*s == '=' && !PL_lex_allbrackets &&
6240 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6247 case '0': case '1': case '2': case '3': case '4':
6248 case '5': case '6': case '7': case '8': case '9':
6249 s = scan_num(s, &pl_yylval);
6250 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6251 if (PL_expect == XOPERATOR)
6256 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6259 COPLINE_SET_FROM_MULTI_END;
6260 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6261 if (PL_expect == XOPERATOR) {
6262 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6263 return deprecate_commaless_var_list();
6268 pl_yylval.ival = OP_CONST;
6269 TERM(sublex_start());
6272 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6275 printbuf("### Saw string before %s\n", s);
6277 PerlIO_printf(Perl_debug_log,
6278 "### Saw unterminated string\n");
6280 if (PL_expect == XOPERATOR) {
6281 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6282 return deprecate_commaless_var_list();
6289 pl_yylval.ival = OP_CONST;
6290 /* FIXME. I think that this can be const if char *d is replaced by
6291 more localised variables. */
6292 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6293 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6294 pl_yylval.ival = OP_STRINGIFY;
6298 if (pl_yylval.ival == OP_CONST)
6299 COPLINE_SET_FROM_MULTI_END;
6300 TERM(sublex_start());
6303 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6304 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6305 if (PL_expect == XOPERATOR)
6306 no_op("Backticks",s);
6309 pl_yylval.ival = OP_BACKTICK;
6310 TERM(sublex_start());
6314 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6316 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6318 if (PL_expect == XOPERATOR)
6319 no_op("Backslash",s);
6323 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6324 char *start = s + 2;
6325 while (isDIGIT(*start) || *start == '_')
6327 if (*start == '.' && isDIGIT(start[1])) {
6328 s = scan_num(s, &pl_yylval);
6331 else if ((*start == ':' && start[1] == ':')
6332 || (PL_expect == XSTATE && *start == ':'))
6334 else if (PL_expect == XSTATE) {
6336 while (d < PL_bufend && isSPACE(*d)) d++;
6337 if (*d == ':') goto keylookup;
6339 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6340 if (!isALPHA(*start) && (PL_expect == XTERM
6341 || PL_expect == XREF || PL_expect == XSTATE
6342 || PL_expect == XTERMORDORDOR)) {
6343 GV *const gv = gv_fetchpvn_flags(s, start - s,
6344 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6346 s = scan_num(s, &pl_yylval);
6353 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6406 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6408 /* Some keywords can be followed by any delimiter, including ':' */
6409 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6411 /* x::* is just a word, unless x is "CORE" */
6412 if (!anydelim && *s == ':' && s[1] == ':') {
6413 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6418 while (d < PL_bufend && isSPACE(*d))
6419 d++; /* no comments skipped here, or s### is misparsed */
6421 /* Is this a word before a => operator? */
6422 if (*d == '=' && d[1] == '>') {
6426 = (OP*)newSVOP(OP_CONST, 0,
6427 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6428 pl_yylval.opval->op_private = OPpCONST_BARE;
6432 /* Check for plugged-in keyword */
6436 char *saved_bufptr = PL_bufptr;
6438 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6440 if (result == KEYWORD_PLUGIN_DECLINE) {
6441 /* not a plugged-in keyword */
6442 PL_bufptr = saved_bufptr;
6443 } else if (result == KEYWORD_PLUGIN_STMT) {
6444 pl_yylval.opval = o;
6446 if (!PL_nexttoke) PL_expect = XSTATE;
6447 return REPORT(PLUGSTMT);
6448 } else if (result == KEYWORD_PLUGIN_EXPR) {
6449 pl_yylval.opval = o;
6451 if (!PL_nexttoke) PL_expect = XOPERATOR;
6452 return REPORT(PLUGEXPR);
6454 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6459 /* Check for built-in keyword */
6460 tmp = keyword(PL_tokenbuf, len, 0);
6462 /* Is this a label? */
6463 if (!anydelim && PL_expect == XSTATE
6464 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6466 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6467 pl_yylval.pval[len] = '\0';
6468 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6473 /* Check for lexical sub */
6474 if (PL_expect != XOPERATOR) {
6475 char tmpbuf[sizeof PL_tokenbuf + 1];
6477 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6478 off = pad_findmy_pvn(tmpbuf, len+1, 0);
6479 if (off != NOT_IN_PAD) {
6480 assert(off); /* we assume this is boolean-true below */
6481 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6482 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6483 HEK * const stashname = HvNAME_HEK(stash);
6484 sv = newSVhek(stashname);
6485 sv_catpvs(sv, "::");
6486 sv_catpvn_flags(sv, PL_tokenbuf, len,
6487 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6488 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6498 rv2cv_op = newOP(OP_PADANY, 0);
6499 rv2cv_op->op_targ = off;
6500 cv = find_lexical_cv(off);
6508 if (tmp < 0) { /* second-class keyword? */
6509 GV *ogv = NULL; /* override (winner) */
6510 GV *hgv = NULL; /* hidden (loser) */
6511 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6513 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6514 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6518 if (GvIMPORTED_CV(gv))
6520 else if (! CvMETHOD(cv))
6524 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6528 ? GvCVu(gv) && GvIMPORTED_CV(gv)
6529 : SvPCS_IMPORTED(gv)
6530 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6539 tmp = 0; /* overridden by import or by GLOBAL */
6542 && -tmp==KEY_lock /* XXX generalizable kludge */
6545 tmp = 0; /* any sub overrides "weak" keyword */
6547 else { /* no override */
6549 if (tmp == KEY_dump) {
6550 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6551 "dump() better written as CORE::dump()");
6555 if (hgv && tmp != KEY_x) /* never ambiguous */
6556 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6557 "Ambiguous call resolved as CORE::%s(), "
6558 "qualify as such or use &",
6563 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6564 && (!anydelim || *s != '#')) {
6565 /* no override, and not s### either; skipspace is safe here
6566 * check for => on following line */
6568 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6569 STRLEN soff = s - SvPVX(PL_linestr);
6570 s = skipspace_flags(s, LEX_NO_INCLINE);
6571 arrow = *s == '=' && s[1] == '>';
6572 PL_bufptr = SvPVX(PL_linestr) + bufoff;
6573 s = SvPVX(PL_linestr) + soff;
6581 default: /* not a keyword */
6582 /* Trade off - by using this evil construction we can pull the
6583 variable gv into the block labelled keylookup. If not, then
6584 we have to give it function scope so that the goto from the
6585 earlier ':' case doesn't bypass the initialisation. */
6587 just_a_word_zero_gv:
6599 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6603 /* Get the rest if it looks like a package qualifier */
6605 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6607 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6610 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6611 UTF8fARG(UTF, len, PL_tokenbuf),
6612 *s == '\'' ? "'" : "::");
6617 if (PL_expect == XOPERATOR) {
6618 if (PL_bufptr == PL_linestart) {
6619 CopLINE_dec(PL_curcop);
6620 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6621 CopLINE_inc(PL_curcop);
6624 no_op("Bareword",s);
6627 /* See if the name is "Foo::",
6628 in which case Foo is a bareword
6629 (and a package name). */
6632 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6634 if (ckWARN(WARN_BAREWORD)
6635 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6636 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6637 "Bareword \"%"UTF8f"\" refers to nonexistent package",
6638 UTF8fARG(UTF, len, PL_tokenbuf));
6640 PL_tokenbuf[len] = '\0';
6649 /* if we saw a global override before, get the right name */
6652 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6655 SV * const tmp_sv = sv;
6656 sv = newSVpvs("CORE::GLOBAL::");
6657 sv_catsv(sv, tmp_sv);
6658 SvREFCNT_dec(tmp_sv);
6662 /* Presume this is going to be a bareword of some sort. */
6664 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6665 pl_yylval.opval->op_private = OPpCONST_BARE;
6667 /* And if "Foo::", then that's what it certainly is. */
6673 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6674 const_op->op_private = OPpCONST_BARE;
6676 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6680 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6683 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6686 /* Use this var to track whether intuit_method has been
6687 called. intuit_method returns 0 or > 255. */
6690 /* See if it's the indirect object for a list operator. */
6692 if (PL_oldoldbufptr &&
6693 PL_oldoldbufptr < PL_bufptr &&
6694 (PL_oldoldbufptr == PL_last_lop
6695 || PL_oldoldbufptr == PL_last_uni) &&
6696 /* NO SKIPSPACE BEFORE HERE! */
6697 (PL_expect == XREF ||
6698 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6700 bool immediate_paren = *s == '(';
6702 /* (Now we can afford to cross potential line boundary.) */
6705 /* Two barewords in a row may indicate method call. */
6707 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6708 (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6712 /* If not a declared subroutine, it's an indirect object. */
6713 /* (But it's an indir obj regardless for sort.) */
6714 /* Also, if "_" follows a filetest operator, it's a bareword */
6717 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6719 (PL_last_lop_op != OP_MAPSTART &&
6720 PL_last_lop_op != OP_GREPSTART))))
6721 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6722 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6725 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6730 PL_expect = XOPERATOR;
6733 /* Is this a word before a => operator? */
6734 if (*s == '=' && s[1] == '>' && !pkgname) {
6737 if (gvp || (lex && !off)) {
6738 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
6739 /* This is our own scalar, created a few lines
6740 above, so this is safe. */
6742 sv_setpv(sv, PL_tokenbuf);
6743 if (UTF && !IN_BYTES
6744 && is_utf8_string((U8*)PL_tokenbuf, len))
6751 /* If followed by a paren, it's certainly a subroutine. */
6756 while (SPACE_OR_TAB(*d))
6758 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6763 NEXTVAL_NEXTTOKE.opval =
6764 off ? rv2cv_op : pl_yylval.opval;
6766 op_free(pl_yylval.opval), force_next(PRIVATEREF);
6767 else op_free(rv2cv_op), force_next(WORD);
6772 /* If followed by var or block, call it a method (unless sub) */
6774 if ((*s == '$' || *s == '{') && !cv) {
6776 PL_last_lop = PL_oldbufptr;
6777 PL_last_lop_op = OP_METHOD;
6778 if (!PL_lex_allbrackets &&
6779 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6780 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6781 PL_expect = XBLOCKTERM;
6783 return REPORT(METHOD);
6786 /* If followed by a bareword, see if it looks like indir obj. */
6788 if (tmp == 1 && !orig_keyword
6789 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6790 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6793 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
6795 sv_setpvn(sv, PL_tokenbuf, len);
6796 if (UTF && !IN_BYTES
6797 && is_utf8_string((U8*)PL_tokenbuf, len))
6799 else SvUTF8_off(sv);
6802 if (tmp == METHOD && !PL_lex_allbrackets &&
6803 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6804 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6808 /* Not a method, so call it a subroutine (if defined) */
6811 /* Check for a constant sub */
6812 if ((sv = cv_const_sv_or_av(cv))) {
6815 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6816 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6817 if (SvTYPE(sv) == SVt_PVAV)
6818 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6821 pl_yylval.opval->op_private = 0;
6822 pl_yylval.opval->op_folded = 1;
6823 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6828 op_free(pl_yylval.opval);
6830 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6831 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6832 PL_last_lop = PL_oldbufptr;
6833 PL_last_lop_op = OP_ENTERSUB;
6834 /* Is there a prototype? */
6838 STRLEN protolen = CvPROTOLEN(cv);
6839 const char *proto = CvPROTO(cv);
6841 proto = S_strip_spaces(aTHX_ proto, &protolen);
6844 if ((optional = *proto == ';'))
6847 while (*proto == ';');
6851 *proto == '$' || *proto == '_'
6852 || *proto == '*' || *proto == '+'
6857 *proto == '\\' && proto[1] && proto[2] == '\0'
6860 UNIPROTO(UNIOPSUB,optional);
6861 if (*proto == '\\' && proto[1] == '[') {
6862 const char *p = proto + 2;
6863 while(*p && *p != ']')
6865 if(*p == ']' && !p[1])
6866 UNIPROTO(UNIOPSUB,optional);
6868 if (*proto == '&' && *s == '{') {
6870 sv_setpvs(PL_subname, "__ANON__");
6872 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6873 if (!PL_lex_allbrackets &&
6874 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6875 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6879 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6881 force_next(off ? PRIVATEREF : WORD);
6882 if (!PL_lex_allbrackets &&
6883 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6884 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6888 /* Call it a bare word */
6890 if (PL_hints & HINT_STRICT_SUBS)
6891 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6894 /* after "print" and similar functions (corresponding to
6895 * "F? L" in opcode.pl), whatever wasn't already parsed as
6896 * a filehandle should be subject to "strict subs".
6897 * Likewise for the optional indirect-object argument to system
6898 * or exec, which can't be a bareword */
6899 if ((PL_last_lop_op == OP_PRINT
6900 || PL_last_lop_op == OP_PRTF
6901 || PL_last_lop_op == OP_SAY
6902 || PL_last_lop_op == OP_SYSTEM
6903 || PL_last_lop_op == OP_EXEC)
6904 && (PL_hints & HINT_STRICT_SUBS))
6905 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6906 if (lastchar != '-') {
6907 if (ckWARN(WARN_RESERVED)) {
6911 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6913 /* PL_warn_reserved is constant */
6914 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6915 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6925 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6926 && saw_infix_sigil) {
6927 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6928 "Operator or semicolon missing before %c%"UTF8f,
6930 UTF8fARG(UTF, strlen(PL_tokenbuf),
6932 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6933 "Ambiguous use of %c resolved as operator %c",
6934 lastchar, lastchar);
6941 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6946 (OP*)newSVOP(OP_CONST, 0,
6947 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6950 case KEY___PACKAGE__:
6952 (OP*)newSVOP(OP_CONST, 0,
6954 ? newSVhek(HvNAME_HEK(PL_curstash))
6961 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6962 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6965 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6967 gv_init(gv,stash,"DATA",4,0);
6970 GvIOp(gv) = newIO();
6971 IoIFP(GvIOp(gv)) = PL_rsfp;
6972 #if defined(HAS_FCNTL) && defined(F_SETFD)
6974 const int fd = PerlIO_fileno(PL_rsfp);
6975 fcntl(fd,F_SETFD,fd >= 3);
6978 /* Mark this internal pseudo-handle as clean */
6979 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6980 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6981 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6983 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6984 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6985 /* if the script was opened in binmode, we need to revert
6986 * it to text mode for compatibility; but only iff it has CRs
6987 * XXX this is a questionable hack at best. */
6988 if (PL_bufend-PL_bufptr > 2
6989 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6992 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6993 loc = PerlIO_tell(PL_rsfp);
6994 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6997 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6999 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7000 #endif /* NETWARE */
7002 PerlIO_seek(PL_rsfp, loc, 0);
7006 #ifdef PERLIO_LAYERS
7009 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7010 else if (IN_ENCODING) {
7016 XPUSHs(_get_encoding());
7018 call_method("name", G_SCALAR);
7022 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7023 Perl_form(aTHX_ ":encoding(%"SVf")",
7036 FUN0OP(CvCLONE(PL_compcv)
7037 ? newOP(OP_RUNCV, 0)
7038 : newPVOP(OP_RUNCV,0,NULL));
7047 if (PL_expect == XSTATE) {
7058 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7059 if ((*s == ':' && s[1] == ':')
7060 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7064 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7068 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7069 UTF8fARG(UTF, len, PL_tokenbuf));
7072 else if (tmp == KEY_require || tmp == KEY_do
7074 /* that's a way to remember we saw "CORE::" */
7086 LOP(OP_ACCEPT,XTERM);
7089 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7094 LOP(OP_ATAN2,XTERM);
7100 LOP(OP_BINMODE,XTERM);
7103 LOP(OP_BLESS,XTERM);
7112 /* We have to disambiguate the two senses of
7113 "continue". If the next token is a '{' then
7114 treat it as the start of a continue block;
7115 otherwise treat it as a control operator.
7125 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7135 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7144 if (!PL_cryptseen) {
7145 PL_cryptseen = TRUE;
7149 LOP(OP_CRYPT,XTERM);
7152 LOP(OP_CHMOD,XTERM);
7155 LOP(OP_CHOWN,XTERM);
7158 LOP(OP_CONNECT,XTERM);
7178 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7180 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7181 && !keyword(PL_tokenbuf + 1, len, 0)) {
7184 force_ident_maybe_lex('&');
7189 if (orig_keyword == KEY_do) {
7198 PL_hints |= HINT_BLOCK_SCOPE;
7208 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7209 STR_WITH_LEN("NDBM_File::"),
7210 STR_WITH_LEN("DB_File::"),
7211 STR_WITH_LEN("GDBM_File::"),
7212 STR_WITH_LEN("SDBM_File::"),
7213 STR_WITH_LEN("ODBM_File::"),
7215 LOP(OP_DBMOPEN,XTERM);
7227 pl_yylval.ival = CopLINE(PL_curcop);
7231 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7243 if (*s == '{') { /* block eval */
7244 PL_expect = XTERMBLOCK;
7245 UNIBRACK(OP_ENTERTRY);
7247 else { /* string eval */
7249 UNIBRACK(OP_ENTEREVAL);
7254 UNIBRACK(-OP_ENTEREVAL);
7268 case KEY_endhostent:
7274 case KEY_endservent:
7277 case KEY_endprotoent:
7288 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7290 pl_yylval.ival = CopLINE(PL_curcop);
7292 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7295 if ((PL_bufend - p) >= 3 &&
7296 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7298 else if ((PL_bufend - p) >= 4 &&
7299 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7302 /* skip optional package name, as in "for my abc $x (..)" */
7303 if (isIDFIRST_lazy_if(p,UTF)) {
7304 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7308 Perl_croak(aTHX_ "Missing $ on loop variable");
7313 LOP(OP_FORMLINE,XTERM);
7322 LOP(OP_FCNTL,XTERM);
7328 LOP(OP_FLOCK,XTERM);
7331 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7336 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7341 LOP(OP_GREPSTART, XREF);
7358 case KEY_getpriority:
7359 LOP(OP_GETPRIORITY,XTERM);
7361 case KEY_getprotobyname:
7364 case KEY_getprotobynumber:
7365 LOP(OP_GPBYNUMBER,XTERM);
7367 case KEY_getprotoent:
7379 case KEY_getpeername:
7380 UNI(OP_GETPEERNAME);
7382 case KEY_gethostbyname:
7385 case KEY_gethostbyaddr:
7386 LOP(OP_GHBYADDR,XTERM);
7388 case KEY_gethostent:
7391 case KEY_getnetbyname:
7394 case KEY_getnetbyaddr:
7395 LOP(OP_GNBYADDR,XTERM);
7400 case KEY_getservbyname:
7401 LOP(OP_GSBYNAME,XTERM);
7403 case KEY_getservbyport:
7404 LOP(OP_GSBYPORT,XTERM);
7406 case KEY_getservent:
7409 case KEY_getsockname:
7410 UNI(OP_GETSOCKNAME);
7412 case KEY_getsockopt:
7413 LOP(OP_GSOCKOPT,XTERM);
7428 pl_yylval.ival = CopLINE(PL_curcop);
7429 Perl_ck_warner_d(aTHX_
7430 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7431 "given is experimental");
7436 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7444 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7446 pl_yylval.ival = CopLINE(PL_curcop);
7450 LOP(OP_INDEX,XTERM);
7456 LOP(OP_IOCTL,XTERM);
7484 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7489 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7503 LOP(OP_LISTEN,XTERM);
7512 s = scan_pat(s,OP_MATCH);
7513 TERM(sublex_start());
7516 LOP(OP_MAPSTART, XREF);
7519 LOP(OP_MKDIR,XTERM);
7522 LOP(OP_MSGCTL,XTERM);
7525 LOP(OP_MSGGET,XTERM);
7528 LOP(OP_MSGRCV,XTERM);
7531 LOP(OP_MSGSND,XTERM);
7536 PL_in_my = (U16)tmp;
7538 if (isIDFIRST_lazy_if(s,UTF)) {
7539 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7540 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7542 if (!FEATURE_LEXSUBS_IS_ENABLED)
7544 "Experimental \"%s\" subs not enabled",
7545 tmp == KEY_my ? "my" :
7546 tmp == KEY_state ? "state" : "our");
7547 Perl_ck_warner_d(aTHX_
7548 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7549 "The lexical_subs feature is experimental");
7552 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7553 if (!PL_in_my_stash) {
7557 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7558 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7559 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7569 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7574 s = tokenize_use(0, s);
7578 if (*s == '(' || (s = skipspace(s), *s == '('))
7581 if (!PL_lex_allbrackets &&
7582 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7583 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7589 if (isIDFIRST_lazy_if(s,UTF)) {
7591 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7593 for (t=d; isSPACE(*t);)
7595 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7597 && !(t[0] == '=' && t[1] == '>')
7598 && !(t[0] == ':' && t[1] == ':')
7599 && !keyword(s, d-s, 0)
7601 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7602 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7603 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7609 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7611 pl_yylval.ival = OP_OR;
7621 LOP(OP_OPEN_DIR,XTERM);
7624 checkcomma(s,PL_tokenbuf,"filehandle");
7628 checkcomma(s,PL_tokenbuf,"filehandle");
7647 s = force_word(s,WORD,FALSE,TRUE);
7649 s = force_strict_version(s);
7653 LOP(OP_PIPE_OP,XTERM);
7656 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7659 COPLINE_SET_FROM_MULTI_END;
7660 pl_yylval.ival = OP_CONST;
7661 TERM(sublex_start());
7668 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7671 COPLINE_SET_FROM_MULTI_END;
7672 PL_expect = XOPERATOR;
7673 if (SvCUR(PL_lex_stuff)) {
7674 int warned_comma = !ckWARN(WARN_QW);
7675 int warned_comment = warned_comma;
7676 d = SvPV_force(PL_lex_stuff, len);
7678 for (; isSPACE(*d) && len; --len, ++d)
7683 if (!warned_comma || !warned_comment) {
7684 for (; !isSPACE(*d) && len; --len, ++d) {
7685 if (!warned_comma && *d == ',') {
7686 Perl_warner(aTHX_ packWARN(WARN_QW),
7687 "Possible attempt to separate words with commas");
7690 else if (!warned_comment && *d == '#') {
7691 Perl_warner(aTHX_ packWARN(WARN_QW),
7692 "Possible attempt to put comments in qw() list");
7698 for (; !isSPACE(*d) && len; --len, ++d)
7701 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7702 words = op_append_elem(OP_LIST, words,
7703 newSVOP(OP_CONST, 0, tokeq(sv)));
7708 words = newNULLLIST();
7709 SvREFCNT_dec_NN(PL_lex_stuff);
7710 PL_lex_stuff = NULL;
7711 PL_expect = XOPERATOR;
7712 pl_yylval.opval = sawparens(words);
7717 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7720 pl_yylval.ival = OP_STRINGIFY;
7721 if (SvIVX(PL_lex_stuff) == '\'')
7722 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7723 TERM(sublex_start());
7726 s = scan_pat(s,OP_QR);
7727 TERM(sublex_start());
7730 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7733 pl_yylval.ival = OP_BACKTICK;
7734 TERM(sublex_start());
7742 s = force_version(s, FALSE);
7744 else if (*s != 'v' || !isDIGIT(s[1])
7745 || (s = force_version(s, TRUE), *s == 'v'))
7747 *PL_tokenbuf = '\0';
7748 s = force_word(s,WORD,TRUE,TRUE);
7749 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7750 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7751 GV_ADD | (UTF ? SVf_UTF8 : 0));
7753 yyerror("<> at require-statement should be quotes");
7755 if (orig_keyword == KEY_require) {
7761 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7763 PL_last_uni = PL_oldbufptr;
7764 PL_last_lop_op = OP_REQUIRE;
7766 return REPORT( (int)REQUIRE );
7775 LOP(OP_RENAME,XTERM);
7784 LOP(OP_RINDEX,XTERM);
7793 UNIDOR(OP_READLINE);
7796 UNIDOR(OP_BACKTICK);
7805 LOP(OP_REVERSE,XTERM);
7808 UNIDOR(OP_READLINK);
7815 if (pl_yylval.opval)
7816 TERM(sublex_start());
7818 TOKEN(1); /* force error */
7821 checkcomma(s,PL_tokenbuf,"filehandle");
7831 LOP(OP_SELECT,XTERM);
7837 LOP(OP_SEMCTL,XTERM);
7840 LOP(OP_SEMGET,XTERM);
7843 LOP(OP_SEMOP,XTERM);
7849 LOP(OP_SETPGRP,XTERM);
7851 case KEY_setpriority:
7852 LOP(OP_SETPRIORITY,XTERM);
7854 case KEY_sethostent:
7860 case KEY_setservent:
7863 case KEY_setprotoent:
7873 LOP(OP_SEEKDIR,XTERM);
7875 case KEY_setsockopt:
7876 LOP(OP_SSOCKOPT,XTERM);
7882 LOP(OP_SHMCTL,XTERM);
7885 LOP(OP_SHMGET,XTERM);
7888 LOP(OP_SHMREAD,XTERM);
7891 LOP(OP_SHMWRITE,XTERM);
7894 LOP(OP_SHUTDOWN,XTERM);
7903 LOP(OP_SOCKET,XTERM);
7905 case KEY_socketpair:
7906 LOP(OP_SOCKPAIR,XTERM);
7909 checkcomma(s,PL_tokenbuf,"subroutine name");
7912 s = force_word(s,WORD,TRUE,TRUE);
7916 LOP(OP_SPLIT,XTERM);
7919 LOP(OP_SPRINTF,XTERM);
7922 LOP(OP_SPLICE,XTERM);
7937 LOP(OP_SUBSTR,XTERM);
7943 char * const tmpbuf = PL_tokenbuf + 1;
7944 expectation attrful;
7945 bool have_name, have_proto;
7946 const int key = tmp;
7947 SV *format_name = NULL;
7952 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7953 (*s == ':' && s[1] == ':'))
7957 attrful = XATTRBLOCK;
7958 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7960 if (key == KEY_format)
7961 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7963 if (memchr(tmpbuf, ':', len) || key != KEY_sub
7965 PL_tokenbuf, len + 1, 0
7967 sv_setpvn(PL_subname, tmpbuf, len);
7969 sv_setsv(PL_subname,PL_curstname);
7970 sv_catpvs(PL_subname,"::");
7971 sv_catpvn(PL_subname,tmpbuf,len);
7973 if (SvUTF8(PL_linestr))
7974 SvUTF8_on(PL_subname);
7981 if (key == KEY_my || key == KEY_our || key==KEY_state)
7984 /* diag_listed_as: Missing name in "%s sub" */
7986 "Missing name in \"%s\"", PL_bufptr);
7988 PL_expect = XTERMBLOCK;
7989 attrful = XATTRTERM;
7990 sv_setpvs(PL_subname,"?");
7994 if (key == KEY_format) {
7996 NEXTVAL_NEXTTOKE.opval
7997 = (OP*)newSVOP(OP_CONST,0, format_name);
7998 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8004 /* Look for a prototype */
8005 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8006 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8007 COPLINE_SET_FROM_MULTI_END;
8009 Perl_croak(aTHX_ "Prototype not terminated");
8010 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8018 if (*s == ':' && s[1] != ':')
8019 PL_expect = attrful;
8020 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8022 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8023 else if (*s != ';' && *s != '}')
8024 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8028 NEXTVAL_NEXTTOKE.opval =
8029 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8030 PL_lex_stuff = NULL;
8035 sv_setpvs(PL_subname, "__ANON__");
8037 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8040 force_ident_maybe_lex('&');
8045 LOP(OP_SYSTEM,XREF);
8048 LOP(OP_SYMLINK,XTERM);
8051 LOP(OP_SYSCALL,XTERM);
8054 LOP(OP_SYSOPEN,XTERM);
8057 LOP(OP_SYSSEEK,XTERM);
8060 LOP(OP_SYSREAD,XTERM);
8063 LOP(OP_SYSWRITE,XTERM);
8068 TERM(sublex_start());
8089 LOP(OP_TRUNCATE,XTERM);
8101 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8103 pl_yylval.ival = CopLINE(PL_curcop);
8107 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8109 pl_yylval.ival = CopLINE(PL_curcop);
8113 LOP(OP_UNLINK,XTERM);
8119 LOP(OP_UNPACK,XTERM);
8122 LOP(OP_UTIME,XTERM);
8128 LOP(OP_UNSHIFT,XTERM);
8131 s = tokenize_use(1, s);
8141 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8143 pl_yylval.ival = CopLINE(PL_curcop);
8144 Perl_ck_warner_d(aTHX_
8145 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8146 "when is experimental");
8150 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8152 pl_yylval.ival = CopLINE(PL_curcop);
8156 PL_hints |= HINT_BLOCK_SCOPE;
8163 LOP(OP_WAITPID,XTERM);
8169 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8170 * we use the same number on EBCDIC */
8171 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8175 if (PL_expect == XOPERATOR) {
8176 if (*s == '=' && !PL_lex_allbrackets &&
8177 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8185 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8187 pl_yylval.ival = OP_XOR;
8196 Looks up an identifier in the pad or in a package
8199 PRIVATEREF if this is a lexical name.
8200 WORD if this belongs to a package.
8203 if we're in a my declaration
8204 croak if they tried to say my($foo::bar)
8205 build the ops for a my() declaration
8206 if it's an access to a my() variable
8207 build ops for access to a my() variable
8208 if in a dq string, and they've said @foo and we can't find @foo
8210 build ops for a bareword
8214 S_pending_ident(pTHX)
8217 const char pit = (char)pl_yylval.ival;
8218 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8219 /* All routes through this function want to know if there is a colon. */
8220 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8222 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8223 "### Pending identifier '%s'\n", PL_tokenbuf); });
8225 /* if we're in a my(), we can't allow dynamics here.
8226 $foo'bar has already been turned into $foo::bar, so
8227 just check for colons.
8229 if it's a legal name, the OP is a PADANY.
8232 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8234 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8235 "variable %s in \"our\"",
8236 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8237 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8241 /* "my" variable %s can't be in a package */
8242 /* PL_no_myglob is constant */
8243 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8244 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8245 PL_in_my == KEY_my ? "my" : "state",
8246 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8248 UTF ? SVf_UTF8 : 0);
8252 pl_yylval.opval = newOP(OP_PADANY, 0);
8253 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8254 UTF ? SVf_UTF8 : 0);
8260 build the ops for accesses to a my() variable.
8265 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8267 if (tmp != NOT_IN_PAD) {
8268 /* might be an "our" variable" */
8269 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8270 /* build ops for a bareword */
8271 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8272 HEK * const stashname = HvNAME_HEK(stash);
8273 SV * const sym = newSVhek(stashname);
8274 sv_catpvs(sym, "::");
8275 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8276 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8277 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8281 ((PL_tokenbuf[0] == '$') ? SVt_PV
8282 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8287 pl_yylval.opval = newOP(OP_PADANY, 0);
8288 pl_yylval.opval->op_targ = tmp;
8294 Whine if they've said @foo in a doublequoted string,
8295 and @foo isn't a variable we can find in the symbol
8298 if (ckWARN(WARN_AMBIGUOUS) &&
8299 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8300 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8301 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8302 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8303 /* DO NOT warn for @- and @+ */
8304 && !( PL_tokenbuf[2] == '\0' &&
8305 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8308 /* Downgraded from fatal to warning 20000522 mjd */
8309 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8310 "Possible unintended interpolation of %"UTF8f
8312 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8316 /* build ops for a bareword */
8317 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8318 newSVpvn_flags(PL_tokenbuf + 1,
8320 UTF ? SVf_UTF8 : 0 ));
8321 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8323 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8324 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8325 | ( UTF ? SVf_UTF8 : 0 ),
8326 ((PL_tokenbuf[0] == '$') ? SVt_PV
8327 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8333 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8335 PERL_ARGS_ASSERT_CHECKCOMMA;
8337 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8338 if (ckWARN(WARN_SYNTAX)) {
8341 for (w = s+2; *w && level; w++) {
8349 /* the list of chars below is for end of statements or
8350 * block / parens, boolean operators (&&, ||, //) and branch
8351 * constructs (or, and, if, until, unless, while, err, for).
8352 * Not a very solid hack... */
8353 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8354 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8355 "%s (...) interpreted as function",name);
8358 while (s < PL_bufend && isSPACE(*s))
8362 while (s < PL_bufend && isSPACE(*s))
8364 if (isIDFIRST_lazy_if(s,UTF)) {
8365 const char * const w = s;
8366 s += UTF ? UTF8SKIP(s) : 1;
8367 while (isWORDCHAR_lazy_if(s,UTF))
8368 s += UTF ? UTF8SKIP(s) : 1;
8369 while (s < PL_bufend && isSPACE(*s))
8374 if (keyword(w, s - w, 0))
8377 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8378 if (gv && GvCVu(gv))
8382 Copy(w, tmpbuf+1, s - w, char);
8384 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8385 if (off != NOT_IN_PAD) return;
8387 Perl_croak(aTHX_ "No comma allowed after %s", what);
8392 /* S_new_constant(): do any overload::constant lookup.
8394 Either returns sv, or mortalizes/frees sv and returns a new SV*.
8395 Best used as sv=new_constant(..., sv, ...).
8396 If s, pv are NULL, calls subroutine with one argument,
8397 and <type> is used with error messages only.
8398 <type> is assumed to be well formed UTF-8 */
8401 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8402 SV *sv, SV *pv, const char *type, STRLEN typelen)
8405 HV * table = GvHV(PL_hintgv); /* ^H */
8410 const char *why1 = "", *why2 = "", *why3 = "";
8412 PERL_ARGS_ASSERT_NEW_CONSTANT;
8413 /* We assume that this is true: */
8414 if (*key == 'c') { assert (strEQ(key, "charnames")); }
8417 /* charnames doesn't work well if there have been errors found */
8418 if (PL_error_count > 0 && *key == 'c')
8420 SvREFCNT_dec_NN(sv);
8421 return &PL_sv_undef;
8424 sv_2mortal(sv); /* Parent created it permanently */
8426 || ! (PL_hints & HINT_LOCALIZE_HH)
8427 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8432 /* Here haven't found what we're looking for. If it is charnames,
8433 * perhaps it needs to be loaded. Try doing that before giving up */
8435 Perl_load_module(aTHX_
8437 newSVpvs("_charnames"),
8438 /* version parameter; no need to specify it, as if
8439 * we get too early a version, will fail anyway,
8440 * not being able to find '_charnames' */
8445 assert(sp == PL_stack_sp);
8446 table = GvHV(PL_hintgv);
8448 && (PL_hints & HINT_LOCALIZE_HH)
8449 && (cvp = hv_fetch(table, key, keylen, FALSE))
8455 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8456 msg = Perl_form(aTHX_
8457 "Constant(%.*s) unknown",
8458 (int)(type ? typelen : len),
8464 why3 = "} is not defined";
8467 msg = Perl_form(aTHX_
8468 /* The +3 is for '\N{'; -4 for that, plus '}' */
8469 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8473 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8474 (int)(type ? typelen : len),
8475 (type ? type: s), why1, why2, why3);
8478 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8479 return SvREFCNT_inc_simple_NN(sv);
8484 pv = newSVpvn_flags(s, len, SVs_TEMP);
8486 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8488 typesv = &PL_sv_undef;
8490 PUSHSTACKi(PERLSI_OVERLOAD);
8502 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8506 /* Check the eval first */
8507 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8509 const char * errstr;
8510 sv_catpvs(errsv, "Propagated");
8511 errstr = SvPV_const(errsv, errlen);
8512 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8514 res = SvREFCNT_inc_simple_NN(sv);
8518 SvREFCNT_inc_simple_void_NN(res);
8527 why1 = "Call to &{$^H{";
8529 why3 = "}} did not return a defined value";
8531 (void)sv_2mortal(sv);
8538 PERL_STATIC_INLINE void
8539 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8540 PERL_ARGS_ASSERT_PARSE_IDENT;
8544 Perl_croak(aTHX_ "%s", ident_too_long);
8545 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8546 /* The UTF-8 case must come first, otherwise things
8547 * like c\N{COMBINING TILDE} would start failing, as the
8548 * isWORDCHAR_A case below would gobble the 'c' up.
8551 char *t = *s + UTF8SKIP(*s);
8552 while (isIDCONT_utf8((U8*)t))
8554 if (*d + (t - *s) > e)
8555 Perl_croak(aTHX_ "%s", ident_too_long);
8556 Copy(*s, *d, t - *s, char);
8560 else if ( isWORDCHAR_A(**s) ) {
8563 } while (isWORDCHAR_A(**s) && *d < e);
8565 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8570 else if (allow_package && **s == ':' && (*s)[1] == ':'
8571 /* Disallow things like Foo::$bar. For the curious, this is
8572 * the code path that triggers the "Bad name after" warning
8573 * when looking for barewords.
8575 && (*s)[2] != '$') {
8585 /* Returns a NUL terminated string, with the length of the string written to
8589 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8592 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8593 bool is_utf8 = cBOOL(UTF);
8595 PERL_ARGS_ASSERT_SCAN_WORD;
8597 parse_ident(&s, &d, e, allow_package, is_utf8);
8604 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8606 I32 herelines = PL_parser->herelines;
8607 SSize_t bracket = -1;
8610 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8611 bool is_utf8 = cBOOL(UTF);
8612 I32 orig_copline = 0, tmp_copline = 0;
8614 PERL_ARGS_ASSERT_SCAN_IDENT;
8616 if (isSPACE(*s) || !*s)
8619 while (isDIGIT(*s)) {
8621 Perl_croak(aTHX_ "%s", ident_too_long);
8626 parse_ident(&s, &d, e, 1, is_utf8);
8631 /* Either a digit variable, or parse_ident() found an identifier
8632 (anything valid as a bareword), so job done and return. */
8633 if (PL_lex_state != LEX_NORMAL)
8634 PL_lex_state = LEX_INTERPENDMAYBE;
8637 if (*s == '$' && s[1] &&
8638 (isIDFIRST_lazy_if(s+1,is_utf8)
8639 || isDIGIT_A((U8)s[1])
8642 || strnEQ(s+1,"::",2)) )
8644 /* Dereferencing a value in a scalar variable.
8645 The alternatives are different syntaxes for a scalar variable.
8646 Using ' as a leading package separator isn't allowed. :: is. */
8649 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
8651 bracket = s - SvPVX(PL_linestr);
8653 orig_copline = CopLINE(PL_curcop);
8654 if (s < PL_bufend && isSPACE(*s)) {
8659 /* Is the byte 'd' a legal single character identifier name? 'u' is true
8660 * iff Unicode semantics are to be used. The legal ones are any of:
8661 * a) all ASCII characters except:
8662 * 1) space-type ones, like \t and SPACE;
8665 * The final case currently doesn't get this far in the program, so we
8666 * don't test for it. If that were to change, it would be ok to allow it.
8667 * c) When not under Unicode rules, any upper Latin1 character
8668 * d) Otherwise, when unicode rules are used, all XIDS characters.
8670 * Because all ASCII characters have the same representation whether
8671 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8672 * '{' without knowing if is UTF-8 or not.
8673 * EBCDIC already uses the rules that ASCII platforms will use after the
8674 * deprecation cycle; see comment below about the deprecation. */
8676 # define VALID_LEN_ONE_IDENT(s, is_utf8) \
8677 (isGRAPH_A(*(s)) || ((is_utf8) \
8678 ? isIDFIRST_utf8((U8*) (s)) \
8680 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8682 # define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \
8683 && LIKELY(*(s) != '\0') \
8685 || isASCII_utf8((U8*) (s)) \
8686 || isIDFIRST_utf8((U8*) (s))))
8688 if ((s <= PL_bufend - (is_utf8)
8691 && VALID_LEN_ONE_IDENT(s, is_utf8))
8693 /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
8694 * because often it has no graphic representation. (We can't get to
8695 * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8698 ? ! isGRAPH_utf8( (U8*) s)
8699 : (! isGRAPH_L1( (U8) *s)
8700 || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
8702 /* Split messages for back compat */
8703 if (isCNTRL_A( (U8) *s)) {
8704 deprecate("literal control characters in variable names");
8707 deprecate("literal non-graphic characters in variable names");
8712 const STRLEN skip = UTF8SKIP(s);
8715 for ( i = 0; i < skip; i++ )
8723 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8724 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8728 /* Warn about ambiguous code after unary operators if {...} notation isn't
8729 used. There's no difference in ambiguity; it's merely a heuristic
8730 about when not to warn. */
8731 else if (ck_uni && bracket == -1)
8733 if (bracket != -1) {
8734 /* If we were processing {...} notation then... */
8735 if (isIDFIRST_lazy_if(d,is_utf8)) {
8736 /* if it starts as a valid identifier, assume that it is one.
8737 (the later check for } being at the expected point will trap
8738 cases where this doesn't pan out.) */
8739 d += is_utf8 ? UTF8SKIP(d) : 1;
8740 parse_ident(&s, &d, e, 1, is_utf8);
8742 tmp_copline = CopLINE(PL_curcop);
8743 if (s < PL_bufend && isSPACE(*s)) {
8746 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8747 /* ${foo[0]} and ${foo{bar}} notation. */
8748 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8749 const char * const brack =
8751 ((*s == '[') ? "[...]" : "{...}");
8752 orig_copline = CopLINE(PL_curcop);
8753 CopLINE_set(PL_curcop, tmp_copline);
8754 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8755 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8756 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8757 funny, dest, brack, funny, dest, brack);
8758 CopLINE_set(PL_curcop, orig_copline);
8761 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8762 PL_lex_allbrackets++;
8766 /* Handle extended ${^Foo} variables
8767 * 1999-02-27 mjd-perl-patch@plover.com */
8768 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8772 while (isWORDCHAR(*s) && d < e) {
8776 Perl_croak(aTHX_ "%s", ident_too_long);
8781 tmp_copline = CopLINE(PL_curcop);
8782 if (s < PL_bufend && isSPACE(*s)) {
8786 /* Expect to find a closing } after consuming any trailing whitespace.
8790 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8791 PL_lex_state = LEX_INTERPEND;
8794 if (PL_lex_state == LEX_NORMAL) {
8795 if (ckWARN(WARN_AMBIGUOUS) &&
8796 (keyword(dest, d - dest, 0)
8797 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8799 SV *tmp = newSVpvn_flags( dest, d - dest,
8800 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8803 orig_copline = CopLINE(PL_curcop);
8804 CopLINE_set(PL_curcop, tmp_copline);
8805 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8806 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8807 funny, SVfARG(tmp), funny, SVfARG(tmp));
8808 CopLINE_set(PL_curcop, orig_copline);
8813 /* Didn't find the closing } at the point we expected, so restore
8814 state such that the next thing to process is the opening { and */
8815 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8816 CopLINE_set(PL_curcop, orig_copline);
8817 PL_parser->herelines = herelines;
8821 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8822 PL_lex_state = LEX_INTERPEND;
8827 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
8829 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
8830 * found in the parse starting at 's', based on the subset that are valid
8831 * in this context input to this routine in 'valid_flags'. Advances s.
8832 * Returns TRUE if the input should be treated as a valid flag, so the next
8833 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
8834 * upon first call on the current regex. This routine will set it to any
8835 * charset modifier found. The caller shouldn't change it. This way,
8836 * another charset modifier encountered in the parse can be detected as an
8837 * error, as we have decided to allow only one */
8840 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8842 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8843 if (isWORDCHAR_lazy_if(*s, UTF)) {
8844 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8845 UTF ? SVf_UTF8 : 0);
8847 /* Pretend that it worked, so will continue processing before
8856 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
8857 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8858 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8859 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8860 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8861 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8862 case LOCALE_PAT_MOD:
8864 goto multiple_charsets;
8866 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8869 case UNICODE_PAT_MOD:
8871 goto multiple_charsets;
8873 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8876 case ASCII_RESTRICT_PAT_MOD:
8878 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8882 /* Error if previous modifier wasn't an 'a', but if it was, see
8883 * if, and accept, a second occurrence (only) */
8885 || get_regex_charset(*pmfl)
8886 != REGEX_ASCII_RESTRICTED_CHARSET)
8888 goto multiple_charsets;
8890 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8894 case DEPENDS_PAT_MOD:
8896 goto multiple_charsets;
8898 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8907 if (*charset != c) {
8908 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8910 else if (c == 'a') {
8911 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8912 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8915 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8918 /* Pretend that it worked, so will continue processing before dieing */
8924 S_scan_pat(pTHX_ char *start, I32 type)
8928 const char * const valid_flags =
8929 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8930 char charset = '\0'; /* character set modifier */
8931 unsigned int x_mod_count = 0;
8933 PERL_ARGS_ASSERT_SCAN_PAT;
8935 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8937 Perl_croak(aTHX_ "Search pattern not terminated");
8939 pm = (PMOP*)newPMOP(type, 0);
8940 if (PL_multi_open == '?') {
8941 /* This is the only point in the code that sets PMf_ONCE: */
8942 pm->op_pmflags |= PMf_ONCE;
8944 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8945 allows us to restrict the list needed by reset to just the ??
8947 assert(type != OP_TRANS);
8949 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8952 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8955 elements = mg->mg_len / sizeof(PMOP**);
8956 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8957 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8958 mg->mg_len = elements * sizeof(PMOP**);
8959 PmopSTASH_set(pm,PL_curstash);
8963 /* if qr/...(?{..}).../, then need to parse the pattern within a new
8964 * anon CV. False positives like qr/[(?{]/ are harmless */
8966 if (type == OP_QR) {
8968 char *e, *p = SvPV(PL_lex_stuff, len);
8970 for (; p < e; p++) {
8971 if (p[0] == '(' && p[1] == '?'
8972 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8974 pm->op_pmflags |= PMf_HAS_CV;
8978 pm->op_pmflags |= PMf_IS_QR;
8981 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
8982 &s, &charset, &x_mod_count))
8984 /* issue a warning if /c is specified,but /g is not */
8985 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8987 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8988 "Use of /c modifier is meaningless without /g" );
8991 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
8993 PL_lex_op = (OP*)pm;
8994 pl_yylval.ival = OP_MATCH;
8999 S_scan_subst(pTHX_ char *start)
9006 char charset = '\0'; /* character set modifier */
9007 unsigned int x_mod_count = 0;
9010 PERL_ARGS_ASSERT_SCAN_SUBST;
9012 pl_yylval.ival = OP_NULL;
9014 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9017 Perl_croak(aTHX_ "Substitution pattern not terminated");
9021 first_start = PL_multi_start;
9022 first_line = CopLINE(PL_curcop);
9023 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9025 SvREFCNT_dec_NN(PL_lex_stuff);
9026 PL_lex_stuff = NULL;
9027 Perl_croak(aTHX_ "Substitution replacement not terminated");
9029 PL_multi_start = first_start; /* so whole substitution is taken together */
9031 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9035 if (*s == EXEC_PAT_MOD) {
9039 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9040 &s, &charset, &x_mod_count))
9046 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9048 if ((pm->op_pmflags & PMf_CONTINUE)) {
9049 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9053 SV * const repl = newSVpvs("");
9056 pm->op_pmflags |= PMf_EVAL;
9059 sv_catpvs(repl, "eval ");
9061 sv_catpvs(repl, "do ");
9063 sv_catpvs(repl, "{");
9064 sv_catsv(repl, PL_sublex_info.repl);
9065 sv_catpvs(repl, "}");
9067 SvREFCNT_dec(PL_sublex_info.repl);
9068 PL_sublex_info.repl = repl;
9070 if (CopLINE(PL_curcop) != first_line) {
9071 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9072 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9073 CopLINE(PL_curcop) - first_line;
9074 CopLINE_set(PL_curcop, first_line);
9077 PL_lex_op = (OP*)pm;
9078 pl_yylval.ival = OP_SUBST;
9083 S_scan_trans(pTHX_ char *start)
9090 bool nondestruct = 0;
9093 PERL_ARGS_ASSERT_SCAN_TRANS;
9095 pl_yylval.ival = OP_NULL;
9097 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9099 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9103 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9105 SvREFCNT_dec_NN(PL_lex_stuff);
9106 PL_lex_stuff = NULL;
9107 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9110 complement = del = squash = 0;
9114 complement = OPpTRANS_COMPLEMENT;
9117 del = OPpTRANS_DELETE;
9120 squash = OPpTRANS_SQUASH;
9132 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9133 o->op_private &= ~OPpTRANS_ALL;
9134 o->op_private |= del|squash|complement|
9135 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9136 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9139 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9146 Takes a pointer to the first < in <<FOO.
9147 Returns a pointer to the byte following <<FOO.
9149 This function scans a heredoc, which involves different methods
9150 depending on whether we are in a string eval, quoted construct, etc.
9151 This is because PL_linestr could containing a single line of input, or
9152 a whole string being evalled, or the contents of the current quote-
9155 The two basic methods are:
9156 - Steal lines from the input stream
9157 - Scan the heredoc in PL_linestr and remove it therefrom
9159 In a file scope or filtered eval, the first method is used; in a
9160 string eval, the second.
9162 In a quote-like operator, we have to choose between the two,
9163 depending on where we can find a newline. We peek into outer lex-
9164 ing scopes until we find one with a newline in it. If we reach the
9165 outermost lexing scope and it is a file, we use the stream method.
9166 Otherwise it is treated as an eval.
9170 S_scan_heredoc(pTHX_ char *s)
9172 I32 op_type = OP_SCALAR;
9179 const bool infile = PL_rsfp || PL_parser->filtered;
9180 const line_t origline = CopLINE(PL_curcop);
9181 LEXSHARED *shared = PL_parser->lex_shared;
9183 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9186 d = PL_tokenbuf + 1;
9187 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9188 *PL_tokenbuf = '\n';
9190 while (SPACE_OR_TAB(*peek))
9192 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9195 s = delimcpy(d, e, s, PL_bufend, term, &len);
9197 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9203 /* <<\FOO is equivalent to <<'FOO' */
9207 if (!isWORDCHAR_lazy_if(s,UTF))
9208 deprecate("bare << to mean <<\"\"");
9209 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9214 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9215 Perl_croak(aTHX_ "Delimiter for here document is too long");
9218 len = d - PL_tokenbuf;
9220 #ifndef PERL_STRICT_CR
9221 d = strchr(s, '\r');
9223 char * const olds = s;
9225 while (s < PL_bufend) {
9231 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9240 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9245 tmpstr = newSV_type(SVt_PVIV);
9249 SvIV_set(tmpstr, -1);
9251 else if (term == '`') {
9252 op_type = OP_BACKTICK;
9253 SvIV_set(tmpstr, '\\');
9256 PL_multi_start = origline + 1 + PL_parser->herelines;
9257 PL_multi_open = PL_multi_close = '<';
9258 /* inside a string eval or quote-like operator */
9259 if (!infile || PL_lex_inwhat) {
9262 char * const olds = s;
9263 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9264 /* These two fields are not set until an inner lexing scope is
9265 entered. But we need them set here. */
9266 shared->ls_bufptr = s;
9267 shared->ls_linestr = PL_linestr;
9269 /* Look for a newline. If the current buffer does not have one,
9270 peek into the line buffer of the parent lexing scope, going
9271 up as many levels as necessary to find one with a newline
9274 while (!(s = (char *)memchr(
9275 (void *)shared->ls_bufptr, '\n',
9276 SvEND(shared->ls_linestr)-shared->ls_bufptr
9278 shared = shared->ls_prev;
9279 /* shared is only null if we have gone beyond the outermost
9280 lexing scope. In a file, we will have broken out of the
9281 loop in the previous iteration. In an eval, the string buf-
9282 fer ends with "\n;", so the while condition above will have
9283 evaluated to false. So shared can never be null. */
9285 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9286 most lexing scope. In a file, shared->ls_linestr at that
9287 level is just one line, so there is no body to steal. */
9288 if (infile && !shared->ls_prev) {
9294 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9297 linestr = shared->ls_linestr;
9298 bufend = SvEND(linestr);
9300 while (s < bufend - len + 1 &&
9301 memNE(s,PL_tokenbuf,len) ) {
9303 ++PL_parser->herelines;
9305 if (s >= bufend - len + 1) {
9308 sv_setpvn(tmpstr,d+1,s-d);
9310 /* the preceding stmt passes a newline */
9311 PL_parser->herelines++;
9313 /* s now points to the newline after the heredoc terminator.
9314 d points to the newline before the body of the heredoc.
9317 /* We are going to modify linestr in place here, so set
9318 aside copies of the string if necessary for re-evals or
9320 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9321 check shared->re_eval_str. */
9322 if (shared->re_eval_start || shared->re_eval_str) {
9323 /* Set aside the rest of the regexp */
9324 if (!shared->re_eval_str)
9325 shared->re_eval_str =
9326 newSVpvn(shared->re_eval_start,
9327 bufend - shared->re_eval_start);
9328 shared->re_eval_start -= s-d;
9330 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9331 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9332 cx->blk_eval.cur_text == linestr)
9334 cx->blk_eval.cur_text = newSVsv(linestr);
9335 SvSCREAM_on(cx->blk_eval.cur_text);
9337 /* Copy everything from s onwards back to d. */
9338 Move(s,d,bufend-s + 1,char);
9339 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9340 /* Setting PL_bufend only applies when we have not dug deeper
9341 into other scopes, because sublex_done sets PL_bufend to
9342 SvEND(PL_linestr). */
9343 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9350 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9351 term = PL_tokenbuf[1];
9353 linestr_save = PL_linestr; /* must restore this afterwards */
9354 d = s; /* and this */
9355 PL_linestr = newSVpvs("");
9356 PL_bufend = SvPVX(PL_linestr);
9358 PL_bufptr = PL_bufend;
9359 CopLINE_set(PL_curcop,
9360 origline + 1 + PL_parser->herelines);
9361 if (!lex_next_chunk(LEX_NO_TERM)
9362 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9363 /* Simply freeing linestr_save might seem simpler here, as it
9364 does not matter what PL_linestr points to, since we are
9365 about to croak; but in a quote-like op, linestr_save
9366 will have been prospectively freed already, via
9367 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9368 restore PL_linestr. */
9369 SvREFCNT_dec_NN(PL_linestr);
9370 PL_linestr = linestr_save;
9373 CopLINE_set(PL_curcop, origline);
9374 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9375 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9376 /* ^That should be enough to avoid this needing to grow: */
9377 sv_catpvs(PL_linestr, "\n\0");
9378 assert(s == SvPVX(PL_linestr));
9379 PL_bufend = SvEND(PL_linestr);
9382 PL_parser->herelines++;
9383 PL_last_lop = PL_last_uni = NULL;
9384 #ifndef PERL_STRICT_CR
9385 if (PL_bufend - PL_linestart >= 2) {
9386 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9387 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9389 PL_bufend[-2] = '\n';
9391 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9393 else if (PL_bufend[-1] == '\r')
9394 PL_bufend[-1] = '\n';
9396 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9397 PL_bufend[-1] = '\n';
9399 if (*s == term && PL_bufend-s >= len
9400 && memEQ(s,PL_tokenbuf + 1,len)) {
9401 SvREFCNT_dec(PL_linestr);
9402 PL_linestr = linestr_save;
9403 PL_linestart = SvPVX(linestr_save);
9404 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9409 sv_catsv(tmpstr,PL_linestr);
9413 PL_multi_end = origline + PL_parser->herelines;
9414 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9415 SvPV_shrink_to_cur(tmpstr);
9418 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9420 else if (IN_ENCODING)
9421 sv_recode_to_utf8(tmpstr, _get_encoding());
9423 PL_lex_stuff = tmpstr;
9424 pl_yylval.ival = op_type;
9428 SvREFCNT_dec(tmpstr);
9429 CopLINE_set(PL_curcop, origline);
9430 missingterm(PL_tokenbuf + 1);
9434 takes: current position in input buffer
9435 returns: new position in input buffer
9436 side-effects: pl_yylval and lex_op are set.
9441 <<>> read from ARGV without magic open
9442 <FH> read from filehandle
9443 <pkg::FH> read from package qualified filehandle
9444 <pkg'FH> read from package qualified filehandle
9445 <$fh> read from filehandle in $fh
9451 S_scan_inputsymbol(pTHX_ char *start)
9453 char *s = start; /* current position in buffer */
9456 bool nomagicopen = FALSE;
9457 char *d = PL_tokenbuf; /* start of temp holding space */
9458 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9460 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9462 end = strchr(s, '\n');
9465 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9472 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9474 /* die if we didn't have space for the contents of the <>,
9475 or if it didn't end, or if we see a newline
9478 if (len >= (I32)sizeof PL_tokenbuf)
9479 Perl_croak(aTHX_ "Excessively long <> operator");
9481 Perl_croak(aTHX_ "Unterminated <> operator");
9486 Remember, only scalar variables are interpreted as filehandles by
9487 this code. Anything more complex (e.g., <$fh{$num}>) will be
9488 treated as a glob() call.
9489 This code makes use of the fact that except for the $ at the front,
9490 a scalar variable and a filehandle look the same.
9492 if (*d == '$' && d[1]) d++;
9494 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9495 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9496 d += UTF ? UTF8SKIP(d) : 1;
9498 /* If we've tried to read what we allow filehandles to look like, and
9499 there's still text left, then it must be a glob() and not a getline.
9500 Use scan_str to pull out the stuff between the <> and treat it
9501 as nothing more than a string.
9504 if (d - PL_tokenbuf != len) {
9505 pl_yylval.ival = OP_GLOB;
9506 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9508 Perl_croak(aTHX_ "Glob not terminated");
9512 bool readline_overriden = FALSE;
9514 /* we're in a filehandle read situation */
9517 /* turn <> into <ARGV> */
9519 Copy("ARGV",d,5,char);
9521 /* Check whether readline() is overriden */
9522 if ((gv_readline = gv_override("readline",8)))
9523 readline_overriden = TRUE;
9525 /* if <$fh>, create the ops to turn the variable into a
9529 /* try to find it in the pad for this block, otherwise find
9530 add symbol table ops
9532 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9533 if (tmp != NOT_IN_PAD) {
9534 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9535 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9536 HEK * const stashname = HvNAME_HEK(stash);
9537 SV * const sym = sv_2mortal(newSVhek(stashname));
9538 sv_catpvs(sym, "::");
9544 OP * const o = newOP(OP_PADSV, 0);
9546 PL_lex_op = readline_overriden
9547 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9548 op_append_elem(OP_LIST, o,
9549 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9550 : (OP*)newUNOP(OP_READLINE, 0, o);
9558 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9560 PL_lex_op = readline_overriden
9561 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9562 op_append_elem(OP_LIST,
9563 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9564 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9565 : (OP*)newUNOP(OP_READLINE, 0,
9566 newUNOP(OP_RV2SV, 0,
9567 newGVOP(OP_GV, 0, gv)));
9569 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9570 pl_yylval.ival = OP_NULL;
9573 /* If it's none of the above, it must be a literal filehandle
9574 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9576 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9577 PL_lex_op = readline_overriden
9578 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9579 op_append_elem(OP_LIST,
9580 newGVOP(OP_GV, 0, gv),
9581 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9582 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9583 pl_yylval.ival = OP_NULL;
9593 start position in buffer
9594 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
9595 only if they are of the open/close form
9596 keep_delims preserve the delimiters around the string
9597 re_reparse compiling a run-time /(?{})/:
9598 collapse // to /, and skip encoding src
9599 delimp if non-null, this is set to the position of
9600 the closing delimiter, or just after it if
9601 the closing and opening delimiters differ
9602 (i.e., the opening delimiter of a substitu-
9604 returns: position to continue reading from buffer
9605 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9606 updates the read buffer.
9608 This subroutine pulls a string out of the input. It is called for:
9609 q single quotes q(literal text)
9610 ' single quotes 'literal text'
9611 qq double quotes qq(interpolate $here please)
9612 " double quotes "interpolate $here please"
9613 qx backticks qx(/bin/ls -l)
9614 ` backticks `/bin/ls -l`
9615 qw quote words @EXPORT_OK = qw( func() $spam )
9616 m// regexp match m/this/
9617 s/// regexp substitute s/this/that/
9618 tr/// string transliterate tr/this/that/
9619 y/// string transliterate y/this/that/
9620 ($*@) sub prototypes sub foo ($)
9621 (stuff) sub attr parameters sub foo : attr(stuff)
9622 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9624 In most of these cases (all but <>, patterns and transliterate)
9625 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9626 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9627 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9630 It skips whitespace before the string starts, and treats the first
9631 character as the delimiter. If the delimiter is one of ([{< then
9632 the corresponding "close" character )]}> is used as the closing
9633 delimiter. It allows quoting of delimiters, and if the string has
9634 balanced delimiters ([{<>}]) it allows nesting.
9636 On success, the SV with the resulting string is put into lex_stuff or,
9637 if that is already non-NULL, into lex_repl. The second case occurs only
9638 when parsing the RHS of the special constructs s/// and tr/// (y///).
9639 For convenience, the terminating delimiter character is stuffed into
9644 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9648 SV *sv; /* scalar value: string */
9649 const char *tmps; /* temp string, used for delimiter matching */
9650 char *s = start; /* current position in the buffer */
9651 char term; /* terminating character */
9652 char *to; /* current position in the sv's data */
9653 I32 brackets = 1; /* bracket nesting level */
9654 bool has_utf8 = FALSE; /* is there any utf8 content? */
9655 I32 termcode; /* terminating char. code */
9656 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9657 STRLEN termlen; /* length of terminating string */
9658 int last_off = 0; /* last position for nesting bracket */
9661 PERL_ARGS_ASSERT_SCAN_STR;
9663 /* skip space before the delimiter */
9668 /* mark where we are, in case we need to report errors */
9671 /* after skipping whitespace, the next character is the terminator */
9674 termcode = termstr[0] = term;
9678 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9679 Copy(s, termstr, termlen, U8);
9680 if (!UTF8_IS_INVARIANT(term))
9684 /* mark where we are */
9685 PL_multi_start = CopLINE(PL_curcop);
9686 PL_multi_open = term;
9687 herelines = PL_parser->herelines;
9689 /* find corresponding closing delimiter */
9690 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9691 termcode = termstr[0] = term = tmps[5];
9693 PL_multi_close = term;
9695 if (PL_multi_open == PL_multi_close) {
9696 keep_bracketed_quoted = FALSE;
9699 /* create a new SV to hold the contents. 79 is the SV's initial length.
9700 What a random number. */
9701 sv = newSV_type(SVt_PVIV);
9703 SvIV_set(sv, termcode);
9704 (void)SvPOK_only(sv); /* validate pointer */
9706 /* move past delimiter and try to read a complete string */
9708 sv_catpvn(sv, s, termlen);
9711 if (IN_ENCODING && !UTF && !re_reparse) {
9715 int offset = s - SvPVX_const(PL_linestr);
9716 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
9717 &offset, (char*)termstr, termlen);
9721 if (SvIsCOW(PL_linestr)) {
9722 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9723 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9724 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9725 char *buf = SvPVX(PL_linestr);
9726 bufend_pos = PL_parser->bufend - buf;
9727 bufptr_pos = PL_parser->bufptr - buf;
9728 oldbufptr_pos = PL_parser->oldbufptr - buf;
9729 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9730 linestart_pos = PL_parser->linestart - buf;
9731 last_uni_pos = PL_parser->last_uni
9732 ? PL_parser->last_uni - buf
9734 last_lop_pos = PL_parser->last_lop
9735 ? PL_parser->last_lop - buf
9738 PL_parser->lex_shared->re_eval_start ?
9739 PL_parser->lex_shared->re_eval_start - buf : 0;
9742 sv_force_normal(PL_linestr);
9744 buf = SvPVX(PL_linestr);
9745 PL_parser->bufend = buf + bufend_pos;
9746 PL_parser->bufptr = buf + bufptr_pos;
9747 PL_parser->oldbufptr = buf + oldbufptr_pos;
9748 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9749 PL_parser->linestart = buf + linestart_pos;
9750 if (PL_parser->last_uni)
9751 PL_parser->last_uni = buf + last_uni_pos;
9752 if (PL_parser->last_lop)
9753 PL_parser->last_lop = buf + last_lop_pos;
9754 if (PL_parser->lex_shared->re_eval_start)
9755 PL_parser->lex_shared->re_eval_start =
9756 buf + re_eval_start_pos;
9759 ns = SvPVX_const(PL_linestr) + offset;
9760 svlast = SvEND(sv) - 1;
9762 for (; s < ns; s++) {
9763 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9764 COPLINE_INC_WITH_HERELINES;
9767 goto read_more_line;
9769 /* handle quoted delimiters */
9770 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9772 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9774 if ((svlast-1 - t) % 2) {
9775 if (!keep_bracketed_quoted) {
9778 SvCUR_set(sv, SvCUR(sv) - 1);
9783 if (PL_multi_open == PL_multi_close) {
9789 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9790 /* At here, all closes are "was quoted" one,
9791 so we don't check PL_multi_close. */
9793 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9798 else if (*t == PL_multi_open)
9806 SvCUR_set(sv, w - SvPVX_const(sv));
9808 last_off = w - SvPVX(sv);
9809 if (--brackets <= 0)
9815 SvCUR_set(sv, SvCUR(sv) - 1);
9821 /* extend sv if need be */
9822 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9823 /* set 'to' to the next character in the sv's string */
9824 to = SvPVX(sv)+SvCUR(sv);
9826 /* if open delimiter is the close delimiter read unbridle */
9827 if (PL_multi_open == PL_multi_close) {
9828 for (; s < PL_bufend; s++,to++) {
9829 /* embedded newlines increment the current line number */
9830 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9831 COPLINE_INC_WITH_HERELINES;
9832 /* handle quoted delimiters */
9833 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9834 if (!keep_bracketed_quoted
9836 || (re_reparse && s[1] == '\\'))
9839 else /* any other quotes are simply copied straight through */
9842 /* terminate when run out of buffer (the for() condition), or
9843 have found the terminator */
9844 else if (*s == term) {
9847 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9850 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9856 /* if the terminator isn't the same as the start character (e.g.,
9857 matched brackets), we have to allow more in the quoting, and
9858 be prepared for nested brackets.
9861 /* read until we run out of string, or we find the terminator */
9862 for (; s < PL_bufend; s++,to++) {
9863 /* embedded newlines increment the line count */
9864 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9865 COPLINE_INC_WITH_HERELINES;
9866 /* backslashes can escape the open or closing characters */
9867 if (*s == '\\' && s+1 < PL_bufend) {
9868 if (!keep_bracketed_quoted &&
9869 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9876 /* allow nested opens and closes */
9877 else if (*s == PL_multi_close && --brackets <= 0)
9879 else if (*s == PL_multi_open)
9881 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9886 /* terminate the copied string and update the sv's end-of-string */
9888 SvCUR_set(sv, to - SvPVX_const(sv));
9891 * this next chunk reads more into the buffer if we're not done yet
9895 break; /* handle case where we are done yet :-) */
9897 #ifndef PERL_STRICT_CR
9898 if (to - SvPVX_const(sv) >= 2) {
9899 if ((to[-2] == '\r' && to[-1] == '\n') ||
9900 (to[-2] == '\n' && to[-1] == '\r'))
9904 SvCUR_set(sv, to - SvPVX_const(sv));
9906 else if (to[-1] == '\r')
9909 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9914 /* if we're out of file, or a read fails, bail and reset the current
9915 line marker so we can report where the unterminated string began
9917 COPLINE_INC_WITH_HERELINES;
9918 PL_bufptr = PL_bufend;
9919 if (!lex_next_chunk(0)) {
9921 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9927 /* at this point, we have successfully read the delimited string */
9929 if (!IN_ENCODING || UTF || re_reparse) {
9932 sv_catpvn(sv, s, termlen);
9935 if (has_utf8 || (IN_ENCODING && !re_reparse))
9938 PL_multi_end = CopLINE(PL_curcop);
9939 CopLINE_set(PL_curcop, PL_multi_start);
9940 PL_parser->herelines = herelines;
9942 /* if we allocated too much space, give some back */
9943 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9944 SvLEN_set(sv, SvCUR(sv) + 1);
9945 SvPV_renew(sv, SvLEN(sv));
9948 /* decide whether this is the first or second quoted string we've read
9953 PL_sublex_info.repl = sv;
9956 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9962 takes: pointer to position in buffer
9963 returns: pointer to new position in buffer
9964 side-effects: builds ops for the constant in pl_yylval.op
9966 Read a number in any of the formats that Perl accepts:
9968 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9969 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
9970 0b[01](_?[01])* binary integers
9971 0[0-7](_?[0-7])* octal integers
9972 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
9973 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
9975 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9978 If it reads a number without a decimal point or an exponent, it will
9979 try converting the number to an integer and see if it can do so
9980 without loss of precision.
9984 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9986 const char *s = start; /* current position in buffer */
9987 char *d; /* destination in temp buffer */
9988 char *e; /* end of temp buffer */
9989 NV nv; /* number read, as a double */
9990 SV *sv = NULL; /* place to put the converted number */
9991 bool floatit; /* boolean: int or float? */
9992 const char *lastub = NULL; /* position of last underbar */
9993 static const char* const number_too_long = "Number too long";
9994 /* Hexadecimal floating point.
9996 * In many places (where we have quads and NV is IEEE 754 double)
9997 * we can fit the mantissa bits of a NV into an unsigned quad.
9998 * (Note that UVs might not be quads even when we have quads.)
9999 * This will not work everywhere, though (either no quads, or
10000 * using long doubles), in which case we have to resort to NV,
10001 * which will probably mean horrible loss of precision due to
10002 * multiple fp operations. */
10003 bool hexfp = FALSE;
10004 int total_bits = 0;
10005 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10006 # define HEXFP_UQUAD
10007 Uquad_t hexfp_uquad = 0;
10008 int hexfp_frac_bits = 0;
10013 NV hexfp_mult = 1.0;
10014 UV high_non_zero = 0; /* highest digit */
10016 PERL_ARGS_ASSERT_SCAN_NUM;
10018 /* We use the first character to decide what type of number this is */
10022 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10024 /* if it starts with a 0, it could be an octal number, a decimal in
10025 0.13 disguise, or a hexadecimal number, or a binary number. */
10029 u holds the "number so far"
10030 shift the power of 2 of the base
10031 (hex == 4, octal == 3, binary == 1)
10032 overflowed was the number more than we can hold?
10034 Shift is used when we add a digit. It also serves as an "are
10035 we in octal/hex/binary?" indicator to disallow hex characters
10036 when in octal mode.
10041 bool overflowed = FALSE;
10042 bool just_zero = TRUE; /* just plain 0 or binary number? */
10043 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10044 static const char* const bases[5] =
10045 { "", "binary", "", "octal", "hexadecimal" };
10046 static const char* const Bases[5] =
10047 { "", "Binary", "", "Octal", "Hexadecimal" };
10048 static const char* const maxima[5] =
10050 "0b11111111111111111111111111111111",
10054 const char *base, *Base, *max;
10056 /* check for hex */
10057 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10061 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10066 /* check for a decimal in disguise */
10067 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10069 /* so it must be octal */
10076 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10077 "Misplaced _ in number");
10081 base = bases[shift];
10082 Base = Bases[shift];
10083 max = maxima[shift];
10085 /* read the rest of the number */
10087 /* x is used in the overflow test,
10088 b is the digit we're adding on. */
10093 /* if we don't mention it, we're done */
10097 /* _ are ignored -- but warned about if consecutive */
10099 if (lastub && s == lastub + 1)
10100 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10101 "Misplaced _ in number");
10105 /* 8 and 9 are not octal */
10106 case '8': case '9':
10108 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10112 case '2': case '3': case '4':
10113 case '5': case '6': case '7':
10115 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10118 case '0': case '1':
10119 b = *s++ & 15; /* ASCII digit -> value of digit */
10123 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10124 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10125 /* make sure they said 0x */
10128 b = (*s++ & 7) + 9;
10130 /* Prepare to put the digit we have onto the end
10131 of the number so far. We check for overflows.
10137 x = u << shift; /* make room for the digit */
10139 total_bits += shift;
10141 if ((x >> shift) != u
10142 && !(PL_hints & HINT_NEW_BINARY)) {
10145 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10146 "Integer overflow in %s number",
10149 u = x | b; /* add the digit to the end */
10152 n *= nvshift[shift];
10153 /* If an NV has not enough bits in its
10154 * mantissa to represent an UV this summing of
10155 * small low-order numbers is a waste of time
10156 * (because the NV cannot preserve the
10157 * low-order bits anyway): we could just
10158 * remember when did we overflow and in the
10159 * end just multiply n by the right
10164 if (high_non_zero == 0 && b > 0)
10167 /* this could be hexfp, but peek ahead
10168 * to avoid matching ".." */
10169 if (UNLIKELY(HEXFP_PEEK(s))) {
10177 /* if we get here, we had success: make a scalar value from
10182 /* final misplaced underbar check */
10183 if (s[-1] == '_') {
10184 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10187 if (UNLIKELY(HEXFP_PEEK(s))) {
10188 /* Do sloppy (on the underbars) but quick detection
10189 * (and value construction) for hexfp, the decimal
10190 * detection will shortly be more thorough with the
10191 * underbar checks. */
10195 #else /* HEXFP_NV */
10200 NV mult = 1 / 16.0;
10203 while (isXDIGIT(*h) || *h == '_') {
10204 if (isXDIGIT(*h)) {
10205 U8 b = XDIGIT_VALUE(*h);
10206 total_bits += shift;
10208 hexfp_uquad <<= shift;
10210 hexfp_frac_bits += shift;
10211 #else /* HEXFP_NV */
10212 hexfp_nv += b * mult;
10220 if (total_bits >= 4) {
10221 if (high_non_zero < 0x8)
10223 if (high_non_zero < 0x4)
10225 if (high_non_zero < 0x2)
10229 if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
10230 bool negexp = FALSE;
10234 else if (*h == '-') {
10240 while (isDIGIT(*h) || *h == '_') {
10243 hexfp_exp += *h - '0';
10246 -hexfp_exp < NV_MIN_EXP - 1) {
10247 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10248 "Hexadecimal float: exponent underflow");
10255 hexfp_exp > NV_MAX_EXP - 1) {
10256 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10257 "Hexadecimal float: exponent overflow");
10266 hexfp_exp = -hexfp_exp;
10268 hexfp_exp -= hexfp_frac_bits;
10270 hexfp_mult = pow(2.0, hexfp_exp);
10278 if (n > 4294967295.0)
10279 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10280 "%s number > %s non-portable",
10286 if (u > 0xffffffff)
10287 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10288 "%s number > %s non-portable",
10293 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10294 sv = new_constant(start, s - start, "integer",
10295 sv, NULL, NULL, 0);
10296 else if (PL_hints & HINT_NEW_BINARY)
10297 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10302 handle decimal numbers.
10303 we're also sent here when we read a 0 as the first digit
10305 case '1': case '2': case '3': case '4': case '5':
10306 case '6': case '7': case '8': case '9': case '.':
10309 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10318 /* read next group of digits and _ and copy into d */
10319 while (isDIGIT(*s) || *s == '_' ||
10320 UNLIKELY(hexfp && isXDIGIT(*s))) {
10321 /* skip underscores, checking for misplaced ones
10325 if (lastub && s == lastub + 1)
10326 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10327 "Misplaced _ in number");
10331 /* check for end of fixed-length buffer */
10333 Perl_croak(aTHX_ "%s", number_too_long);
10334 /* if we're ok, copy the character */
10339 /* final misplaced underbar check */
10340 if (lastub && s == lastub + 1) {
10341 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10344 /* read a decimal portion if there is one. avoid
10345 3..5 being interpreted as the number 3. followed
10348 if (*s == '.' && s[1] != '.') {
10353 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10354 "Misplaced _ in number");
10358 /* copy, ignoring underbars, until we run out of digits.
10360 for (; isDIGIT(*s) || *s == '_' ||
10361 UNLIKELY(hexfp && isXDIGIT(*s));
10363 /* fixed length buffer check */
10365 Perl_croak(aTHX_ "%s", number_too_long);
10367 if (lastub && s == lastub + 1)
10368 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10369 "Misplaced _ in number");
10375 /* fractional part ending in underbar? */
10376 if (s[-1] == '_') {
10377 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10378 "Misplaced _ in number");
10380 if (*s == '.' && isDIGIT(s[1])) {
10381 /* oops, it's really a v-string, but without the "v" */
10387 /* read exponent part, if present */
10388 if ((isALPHA_FOLD_EQ(*s, 'e')
10389 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10390 && strchr("+-0123456789_", s[1]))
10394 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10395 ditto for p (hexfloats) */
10396 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10397 /* At least some Mach atof()s don't grok 'E' */
10400 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10407 /* stray preinitial _ */
10409 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10410 "Misplaced _ in number");
10414 /* allow positive or negative exponent */
10415 if (*s == '+' || *s == '-')
10418 /* stray initial _ */
10420 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10421 "Misplaced _ in number");
10425 /* read digits of exponent */
10426 while (isDIGIT(*s) || *s == '_') {
10429 Perl_croak(aTHX_ "%s", number_too_long);
10433 if (((lastub && s == lastub + 1) ||
10434 (!isDIGIT(s[1]) && s[1] != '_')))
10435 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10436 "Misplaced _ in number");
10444 We try to do an integer conversion first if no characters
10445 indicating "float" have been found.
10450 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10452 if (flags == IS_NUMBER_IN_UV) {
10454 sv = newSViv(uv); /* Prefer IVs over UVs. */
10457 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10458 if (uv <= (UV) IV_MIN)
10459 sv = newSViv(-(IV)uv);
10466 STORE_NUMERIC_LOCAL_SET_STANDARD();
10467 /* terminate the string */
10469 if (UNLIKELY(hexfp)) {
10470 # ifdef NV_MANT_DIG
10471 if (total_bits > NV_MANT_DIG)
10472 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10473 "Hexadecimal float: mantissa overflow");
10476 nv = hexfp_uquad * hexfp_mult;
10477 #else /* HEXFP_NV */
10478 nv = hexfp_nv * hexfp_mult;
10481 nv = Atof(PL_tokenbuf);
10483 RESTORE_NUMERIC_LOCAL();
10488 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10489 const char *const key = floatit ? "float" : "integer";
10490 const STRLEN keylen = floatit ? 5 : 7;
10491 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10492 key, keylen, sv, NULL, NULL, 0);
10496 /* if it starts with a v, it could be a v-string */
10499 sv = newSV(5); /* preallocate storage space */
10500 ENTER_with_name("scan_vstring");
10502 s = scan_vstring(s, PL_bufend, sv);
10503 SvREFCNT_inc_simple_void_NN(sv);
10504 LEAVE_with_name("scan_vstring");
10508 /* make the op for the constant and return */
10511 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10513 lvalp->opval = NULL;
10519 S_scan_formline(pTHX_ char *s)
10523 SV * const stuff = newSVpvs("");
10524 bool needargs = FALSE;
10525 bool eofmt = FALSE;
10527 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10529 while (!needargs) {
10532 #ifdef PERL_STRICT_CR
10533 while (SPACE_OR_TAB(*t))
10536 while (SPACE_OR_TAB(*t) || *t == '\r')
10539 if (*t == '\n' || t == PL_bufend) {
10544 eol = (char *) memchr(s,'\n',PL_bufend-s);
10548 for (t = s; t < eol; t++) {
10549 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10551 goto enough; /* ~~ must be first line in formline */
10553 if (*t == '@' || *t == '^')
10557 sv_catpvn(stuff, s, eol-s);
10558 #ifndef PERL_STRICT_CR
10559 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10560 char *end = SvPVX(stuff) + SvCUR(stuff);
10563 SvCUR_set(stuff, SvCUR(stuff) - 1);
10571 if ((PL_rsfp || PL_parser->filtered)
10572 && PL_parser->form_lex_state == LEX_NORMAL) {
10574 PL_bufptr = PL_bufend;
10575 COPLINE_INC_WITH_HERELINES;
10576 got_some = lex_next_chunk(0);
10577 CopLINE_dec(PL_curcop);
10585 if (!SvCUR(stuff) || needargs)
10586 PL_lex_state = PL_parser->form_lex_state;
10587 if (SvCUR(stuff)) {
10588 PL_expect = XSTATE;
10590 const char *s2 = s;
10591 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10595 PL_expect = XTERMBLOCK;
10596 NEXTVAL_NEXTTOKE.ival = 0;
10599 NEXTVAL_NEXTTOKE.ival = 0;
10600 force_next(FORMLBRACK);
10603 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10605 else if (IN_ENCODING)
10606 sv_recode_to_utf8(stuff, _get_encoding());
10608 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10612 SvREFCNT_dec(stuff);
10614 PL_lex_formbrack = 0;
10620 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10622 const I32 oldsavestack_ix = PL_savestack_ix;
10623 CV* const outsidecv = PL_compcv;
10625 SAVEI32(PL_subline);
10626 save_item(PL_subname);
10627 SAVESPTR(PL_compcv);
10629 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10630 CvFLAGS(PL_compcv) |= flags;
10632 PL_subline = CopLINE(PL_curcop);
10633 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10634 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10635 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10636 if (outsidecv && CvPADLIST(outsidecv))
10637 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10639 return oldsavestack_ix;
10643 S_yywarn(pTHX_ const char *const s, U32 flags)
10645 PERL_ARGS_ASSERT_YYWARN;
10647 PL_in_eval |= EVAL_WARNONLY;
10648 yyerror_pv(s, flags);
10653 Perl_yyerror(pTHX_ const char *const s)
10655 PERL_ARGS_ASSERT_YYERROR;
10656 return yyerror_pvn(s, strlen(s), 0);
10660 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10662 PERL_ARGS_ASSERT_YYERROR_PV;
10663 return yyerror_pvn(s, strlen(s), flags);
10667 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10669 const char *context = NULL;
10672 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10673 int yychar = PL_parser->yychar;
10675 PERL_ARGS_ASSERT_YYERROR_PVN;
10677 if (!yychar || (yychar == ';' && !PL_rsfp))
10678 sv_catpvs(where_sv, "at EOF");
10679 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10680 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10681 PL_oldbufptr != PL_bufptr) {
10684 The code below is removed for NetWare because it abends/crashes on NetWare
10685 when the script has error such as not having the closing quotes like:
10686 if ($var eq "value)
10687 Checking of white spaces is anyway done in NetWare code.
10690 while (isSPACE(*PL_oldoldbufptr))
10693 context = PL_oldoldbufptr;
10694 contlen = PL_bufptr - PL_oldoldbufptr;
10696 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10697 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10700 The code below is removed for NetWare because it abends/crashes on NetWare
10701 when the script has error such as not having the closing quotes like:
10702 if ($var eq "value)
10703 Checking of white spaces is anyway done in NetWare code.
10706 while (isSPACE(*PL_oldbufptr))
10709 context = PL_oldbufptr;
10710 contlen = PL_bufptr - PL_oldbufptr;
10712 else if (yychar > 255)
10713 sv_catpvs(where_sv, "next token ???");
10714 else if (yychar == YYEMPTY) {
10715 if (PL_lex_state == LEX_NORMAL ||
10716 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10717 sv_catpvs(where_sv, "at end of line");
10718 else if (PL_lex_inpat)
10719 sv_catpvs(where_sv, "within pattern");
10721 sv_catpvs(where_sv, "within string");
10724 sv_catpvs(where_sv, "next char ");
10726 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10727 else if (isPRINT_LC(yychar)) {
10728 const char string = yychar;
10729 sv_catpvn(where_sv, &string, 1);
10732 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10734 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10735 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10736 OutCopFILE(PL_curcop),
10737 (IV)(PL_parser->preambling == NOLINE
10738 ? CopLINE(PL_curcop)
10739 : PL_parser->preambling));
10741 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10742 UTF8fARG(UTF, contlen, context));
10744 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10745 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10746 Perl_sv_catpvf(aTHX_ msg,
10747 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10748 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10751 if (PL_in_eval & EVAL_WARNONLY) {
10752 PL_in_eval &= ~EVAL_WARNONLY;
10753 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10757 if (PL_error_count >= 10) {
10759 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10760 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10761 SVfARG(errsv), OutCopFILE(PL_curcop));
10763 Perl_croak(aTHX_ "%s has too many errors.\n",
10764 OutCopFILE(PL_curcop));
10767 PL_in_my_stash = NULL;
10772 S_swallow_bom(pTHX_ U8 *s)
10774 const STRLEN slen = SvCUR(PL_linestr);
10776 PERL_ARGS_ASSERT_SWALLOW_BOM;
10780 if (s[1] == 0xFE) {
10781 /* UTF-16 little-endian? (or UTF-32LE?) */
10782 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10783 /* diag_listed_as: Unsupported script encoding %s */
10784 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10785 #ifndef PERL_NO_UTF16_FILTER
10786 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10788 if (PL_bufend > (char*)s) {
10789 s = add_utf16_textfilter(s, TRUE);
10792 /* diag_listed_as: Unsupported script encoding %s */
10793 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10798 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10799 #ifndef PERL_NO_UTF16_FILTER
10800 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10802 if (PL_bufend > (char *)s) {
10803 s = add_utf16_textfilter(s, FALSE);
10806 /* diag_listed_as: Unsupported script encoding %s */
10807 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10811 case BOM_UTF8_FIRST_BYTE: {
10812 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10813 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10814 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10815 s += len + 1; /* UTF-8 */
10822 if (s[2] == 0xFE && s[3] == 0xFF) {
10823 /* UTF-32 big-endian */
10824 /* diag_listed_as: Unsupported script encoding %s */
10825 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10828 else if (s[2] == 0 && s[3] != 0) {
10831 * are a good indicator of UTF-16BE. */
10832 #ifndef PERL_NO_UTF16_FILTER
10833 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10834 s = add_utf16_textfilter(s, FALSE);
10836 /* diag_listed_as: Unsupported script encoding %s */
10837 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10844 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10847 * are a good indicator of UTF-16LE. */
10848 #ifndef PERL_NO_UTF16_FILTER
10849 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10850 s = add_utf16_textfilter(s, TRUE);
10852 /* diag_listed_as: Unsupported script encoding %s */
10853 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10861 #ifndef PERL_NO_UTF16_FILTER
10863 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10865 SV *const filter = FILTER_DATA(idx);
10866 /* We re-use this each time round, throwing the contents away before we
10868 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10869 SV *const utf8_buffer = filter;
10870 IV status = IoPAGE(filter);
10871 const bool reverse = cBOOL(IoLINES(filter));
10874 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10876 /* As we're automatically added, at the lowest level, and hence only called
10877 from this file, we can be sure that we're not called in block mode. Hence
10878 don't bother writing code to deal with block mode. */
10880 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10883 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10885 DEBUG_P(PerlIO_printf(Perl_debug_log,
10886 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10887 FPTR2DPTR(void *, S_utf16_textfilter),
10888 reverse ? 'l' : 'b', idx, maxlen, status,
10889 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10896 /* First, look in our buffer of existing UTF-8 data: */
10897 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10901 } else if (status == 0) {
10903 IoPAGE(filter) = 0;
10904 nl = SvEND(utf8_buffer);
10907 STRLEN got = nl - SvPVX(utf8_buffer);
10908 /* Did we have anything to append? */
10910 sv_catpvn(sv, SvPVX(utf8_buffer), got);
10911 /* Everything else in this code works just fine if SVp_POK isn't
10912 set. This, however, needs it, and we need it to work, else
10913 we loop infinitely because the buffer is never consumed. */
10914 sv_chop(utf8_buffer, nl);
10918 /* OK, not a complete line there, so need to read some more UTF-16.
10919 Read an extra octect if the buffer currently has an odd number. */
10923 if (SvCUR(utf16_buffer) >= 2) {
10924 /* Location of the high octet of the last complete code point.
10925 Gosh, UTF-16 is a pain. All the benefits of variable length,
10926 *coupled* with all the benefits of partial reads and
10928 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10929 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10931 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10935 /* We have the first half of a surrogate. Read more. */
10936 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10939 status = FILTER_READ(idx + 1, utf16_buffer,
10940 160 + (SvCUR(utf16_buffer) & 1));
10941 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10942 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10945 IoPAGE(filter) = status;
10950 chars = SvCUR(utf16_buffer) >> 1;
10951 have = SvCUR(utf8_buffer);
10952 SvGROW(utf8_buffer, have + chars * 3 + 1);
10955 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10956 (U8*)SvPVX_const(utf8_buffer) + have,
10957 chars * 2, &newlen);
10959 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10960 (U8*)SvPVX_const(utf8_buffer) + have,
10961 chars * 2, &newlen);
10963 SvCUR_set(utf8_buffer, have + newlen);
10966 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10967 it's private to us, and utf16_to_utf8{,reversed} take a
10968 (pointer,length) pair, rather than a NUL-terminated string. */
10969 if(SvCUR(utf16_buffer) & 1) {
10970 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10971 SvCUR_set(utf16_buffer, 1);
10973 SvCUR_set(utf16_buffer, 0);
10976 DEBUG_P(PerlIO_printf(Perl_debug_log,
10977 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10979 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10980 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10985 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10987 SV *filter = filter_add(S_utf16_textfilter, NULL);
10989 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10991 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10992 sv_setpvs(filter, "");
10993 IoLINES(filter) = reversed;
10994 IoPAGE(filter) = 1; /* Not EOF */
10996 /* Sadly, we have to return a valid pointer, come what may, so we have to
10997 ignore any error return from this. */
10998 SvCUR_set(PL_linestr, 0);
10999 if (FILTER_READ(0, PL_linestr, 0)) {
11000 SvUTF8_on(PL_linestr);
11002 SvUTF8_on(PL_linestr);
11004 PL_bufend = SvEND(PL_linestr);
11005 return (U8*)SvPVX(PL_linestr);
11010 Returns a pointer to the next character after the parsed
11011 vstring, as well as updating the passed in sv.
11013 Function must be called like
11015 sv = sv_2mortal(newSV(5));
11016 s = scan_vstring(s,e,sv);
11018 where s and e are the start and end of the string.
11019 The sv should already be large enough to store the vstring
11020 passed in, for performance reasons.
11022 This function may croak if fatal warnings are enabled in the
11023 calling scope, hence the sv_2mortal in the example (to prevent
11024 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11030 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11032 const char *pos = s;
11033 const char *start = s;
11035 PERL_ARGS_ASSERT_SCAN_VSTRING;
11037 if (*pos == 'v') pos++; /* get past 'v' */
11038 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11040 if ( *pos != '.') {
11041 /* this may not be a v-string if followed by => */
11042 const char *next = pos;
11043 while (next < e && isSPACE(*next))
11045 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11046 /* return string not v-string */
11047 sv_setpvn(sv,(char *)s,pos-s);
11048 return (char *)pos;
11052 if (!isALPHA(*pos)) {
11053 U8 tmpbuf[UTF8_MAXBYTES+1];
11056 s++; /* get past 'v' */
11061 /* this is atoi() that tolerates underscores */
11064 const char *end = pos;
11066 while (--end >= s) {
11068 const UV orev = rev;
11069 rev += (*end - '0') * mult;
11072 /* diag_listed_as: Integer overflow in %s number */
11073 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11074 "Integer overflow in decimal number");
11078 if (rev > 0x7FFFFFFF)
11079 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11081 /* Append native character for the rev point */
11082 tmpend = uvchr_to_utf8(tmpbuf, rev);
11083 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11084 if (!UVCHR_IS_INVARIANT(rev))
11086 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11092 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11096 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11103 Perl_keyword_plugin_standard(pTHX_
11104 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11106 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11107 PERL_UNUSED_CONTEXT;
11108 PERL_UNUSED_ARG(keyword_ptr);
11109 PERL_UNUSED_ARG(keyword_len);
11110 PERL_UNUSED_ARG(op_ptr);
11111 return KEYWORD_PLUGIN_DECLINE;
11114 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11116 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11118 SAVEI32(PL_lex_brackets);
11119 if (PL_lex_brackets > 100)
11120 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11121 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11122 SAVEI32(PL_lex_allbrackets);
11123 PL_lex_allbrackets = 0;
11124 SAVEI8(PL_lex_fakeeof);
11125 PL_lex_fakeeof = (U8)fakeeof;
11126 if(yyparse(gramtype) && !PL_parser->error_count)
11127 qerror(Perl_mess(aTHX_ "Parse error"));
11130 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11132 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11136 SAVEVPTR(PL_eval_root);
11137 PL_eval_root = NULL;
11138 parse_recdescent(gramtype, fakeeof);
11144 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11146 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11149 if (flags & ~PARSE_OPTIONAL)
11150 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11151 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11152 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11153 if (!PL_parser->error_count)
11154 qerror(Perl_mess(aTHX_ "Parse error"));
11155 exprop = newOP(OP_NULL, 0);
11161 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11163 Parse a Perl arithmetic expression. This may contain operators of precedence
11164 down to the bit shift operators. The expression must be followed (and thus
11165 terminated) either by a comparison or lower-precedence operator or by
11166 something that would normally terminate an expression such as semicolon.
11167 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11168 otherwise it is mandatory. It is up to the caller to ensure that the
11169 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11170 the source of the code to be parsed and the lexical context for the
11173 The op tree representing the expression is returned. If an optional
11174 expression is absent, a null pointer is returned, otherwise the pointer
11177 If an error occurs in parsing or compilation, in most cases a valid op
11178 tree is returned anyway. The error is reflected in the parser state,
11179 normally resulting in a single exception at the top level of parsing
11180 which covers all the compilation errors that occurred. Some compilation
11181 errors, however, will throw an exception immediately.
11187 Perl_parse_arithexpr(pTHX_ U32 flags)
11189 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11193 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11195 Parse a Perl term expression. This may contain operators of precedence
11196 down to the assignment operators. The expression must be followed (and thus
11197 terminated) either by a comma or lower-precedence operator or by
11198 something that would normally terminate an expression such as semicolon.
11199 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11200 otherwise it is mandatory. It is up to the caller to ensure that the
11201 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11202 the source of the code to be parsed and the lexical context for the
11205 The op tree representing the expression is returned. If an optional
11206 expression is absent, a null pointer is returned, otherwise the pointer
11209 If an error occurs in parsing or compilation, in most cases a valid op
11210 tree is returned anyway. The error is reflected in the parser state,
11211 normally resulting in a single exception at the top level of parsing
11212 which covers all the compilation errors that occurred. Some compilation
11213 errors, however, will throw an exception immediately.
11219 Perl_parse_termexpr(pTHX_ U32 flags)
11221 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11225 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11227 Parse a Perl list expression. This may contain operators of precedence
11228 down to the comma operator. The expression must be followed (and thus
11229 terminated) either by a low-precedence logic operator such as C<or> or by
11230 something that would normally terminate an expression such as semicolon.
11231 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11232 otherwise it is mandatory. It is up to the caller to ensure that the
11233 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11234 the source of the code to be parsed and the lexical context for the
11237 The op tree representing the expression is returned. If an optional
11238 expression is absent, a null pointer is returned, otherwise the pointer
11241 If an error occurs in parsing or compilation, in most cases a valid op
11242 tree is returned anyway. The error is reflected in the parser state,
11243 normally resulting in a single exception at the top level of parsing
11244 which covers all the compilation errors that occurred. Some compilation
11245 errors, however, will throw an exception immediately.
11251 Perl_parse_listexpr(pTHX_ U32 flags)
11253 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11257 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11259 Parse a single complete Perl expression. This allows the full
11260 expression grammar, including the lowest-precedence operators such
11261 as C<or>. The expression must be followed (and thus terminated) by a
11262 token that an expression would normally be terminated by: end-of-file,
11263 closing bracketing punctuation, semicolon, or one of the keywords that
11264 signals a postfix expression-statement modifier. If I<flags> includes
11265 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11266 mandatory. It is up to the caller to ensure that the dynamic parser
11267 state (L</PL_parser> et al) is correctly set to reflect the source of
11268 the code to be parsed and the lexical context for the expression.
11270 The op tree representing the expression is returned. If an optional
11271 expression is absent, a null pointer is returned, otherwise the pointer
11274 If an error occurs in parsing or compilation, in most cases a valid op
11275 tree is returned anyway. The error is reflected in the parser state,
11276 normally resulting in a single exception at the top level of parsing
11277 which covers all the compilation errors that occurred. Some compilation
11278 errors, however, will throw an exception immediately.
11284 Perl_parse_fullexpr(pTHX_ U32 flags)
11286 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11290 =for apidoc Amx|OP *|parse_block|U32 flags
11292 Parse a single complete Perl code block. This consists of an opening
11293 brace, a sequence of statements, and a closing brace. The block
11294 constitutes a lexical scope, so C<my> variables and various compile-time
11295 effects can be contained within it. It is up to the caller to ensure
11296 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11297 reflect the source of the code to be parsed and the lexical context for
11300 The op tree representing the code block is returned. This is always a
11301 real op, never a null pointer. It will normally be a C<lineseq> list,
11302 including C<nextstate> or equivalent ops. No ops to construct any kind
11303 of runtime scope are included by virtue of it being a block.
11305 If an error occurs in parsing or compilation, in most cases a valid op
11306 tree (most likely null) is returned anyway. The error is reflected in
11307 the parser state, normally resulting in a single exception at the top
11308 level of parsing which covers all the compilation errors that occurred.
11309 Some compilation errors, however, will throw an exception immediately.
11311 The I<flags> parameter is reserved for future use, and must always
11318 Perl_parse_block(pTHX_ U32 flags)
11321 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11322 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11326 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11328 Parse a single unadorned Perl statement. This may be a normal imperative
11329 statement or a declaration that has compile-time effect. It does not
11330 include any label or other affixture. It is up to the caller to ensure
11331 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11332 reflect the source of the code to be parsed and the lexical context for
11335 The op tree representing the statement is returned. This may be a
11336 null pointer if the statement is null, for example if it was actually
11337 a subroutine definition (which has compile-time side effects). If not
11338 null, it will be ops directly implementing the statement, suitable to
11339 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11340 equivalent op (except for those embedded in a scope contained entirely
11341 within the statement).
11343 If an error occurs in parsing or compilation, in most cases a valid op
11344 tree (most likely null) is returned anyway. The error is reflected in
11345 the parser state, normally resulting in a single exception at the top
11346 level of parsing which covers all the compilation errors that occurred.
11347 Some compilation errors, however, will throw an exception immediately.
11349 The I<flags> parameter is reserved for future use, and must always
11356 Perl_parse_barestmt(pTHX_ U32 flags)
11359 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11360 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11364 =for apidoc Amx|SV *|parse_label|U32 flags
11366 Parse a single label, possibly optional, of the type that may prefix a
11367 Perl statement. It is up to the caller to ensure that the dynamic parser
11368 state (L</PL_parser> et al) is correctly set to reflect the source of
11369 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11370 label is optional, otherwise it is mandatory.
11372 The name of the label is returned in the form of a fresh scalar. If an
11373 optional label is absent, a null pointer is returned.
11375 If an error occurs in parsing, which can only occur if the label is
11376 mandatory, a valid label is returned anyway. The error is reflected in
11377 the parser state, normally resulting in a single exception at the top
11378 level of parsing which covers all the compilation errors that occurred.
11384 Perl_parse_label(pTHX_ U32 flags)
11386 if (flags & ~PARSE_OPTIONAL)
11387 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11388 if (PL_lex_state == LEX_KNOWNEXT) {
11389 PL_parser->yychar = yylex();
11390 if (PL_parser->yychar == LABEL) {
11391 char * const lpv = pl_yylval.pval;
11392 STRLEN llen = strlen(lpv);
11393 PL_parser->yychar = YYEMPTY;
11394 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11401 STRLEN wlen, bufptr_pos;
11404 if (!isIDFIRST_lazy_if(s, UTF))
11406 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11407 if (word_takes_any_delimeter(s, wlen))
11409 bufptr_pos = s - SvPVX(PL_linestr);
11411 lex_read_space(LEX_KEEP_PREVIOUS);
11413 s = SvPVX(PL_linestr) + bufptr_pos;
11414 if (t[0] == ':' && t[1] != ':') {
11415 PL_oldoldbufptr = PL_oldbufptr;
11418 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11422 if (flags & PARSE_OPTIONAL) {
11425 qerror(Perl_mess(aTHX_ "Parse error"));
11426 return newSVpvs("x");
11433 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11435 Parse a single complete Perl statement. This may be a normal imperative
11436 statement or a declaration that has compile-time effect, and may include
11437 optional labels. It is up to the caller to ensure that the dynamic
11438 parser state (L</PL_parser> et al) is correctly set to reflect the source
11439 of the code to be parsed and the lexical context for the statement.
11441 The op tree representing the statement is returned. This may be a
11442 null pointer if the statement is null, for example if it was actually
11443 a subroutine definition (which has compile-time side effects). If not
11444 null, it will be the result of a L</newSTATEOP> call, normally including
11445 a C<nextstate> or equivalent op.
11447 If an error occurs in parsing or compilation, in most cases a valid op
11448 tree (most likely null) is returned anyway. The error is reflected in
11449 the parser state, normally resulting in a single exception at the top
11450 level of parsing which covers all the compilation errors that occurred.
11451 Some compilation errors, however, will throw an exception immediately.
11453 The I<flags> parameter is reserved for future use, and must always
11460 Perl_parse_fullstmt(pTHX_ U32 flags)
11463 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11464 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11468 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11470 Parse a sequence of zero or more Perl statements. These may be normal
11471 imperative statements, including optional labels, or declarations
11472 that have compile-time effect, or any mixture thereof. The statement
11473 sequence ends when a closing brace or end-of-file is encountered in a
11474 place where a new statement could have validly started. It is up to
11475 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11476 is correctly set to reflect the source of the code to be parsed and the
11477 lexical context for the statements.
11479 The op tree representing the statement sequence is returned. This may
11480 be a null pointer if the statements were all null, for example if there
11481 were no statements or if there were only subroutine definitions (which
11482 have compile-time side effects). If not null, it will be a C<lineseq>
11483 list, normally including C<nextstate> or equivalent ops.
11485 If an error occurs in parsing or compilation, in most cases a valid op
11486 tree is returned anyway. The error is reflected in the parser state,
11487 normally resulting in a single exception at the top level of parsing
11488 which covers all the compilation errors that occurred. Some compilation
11489 errors, however, will throw an exception immediately.
11491 The I<flags> parameter is reserved for future use, and must always
11498 Perl_parse_stmtseq(pTHX_ U32 flags)
11503 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11504 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11505 c = lex_peek_unichar(0);
11506 if (c != -1 && c != /*{*/'}')
11507 qerror(Perl_mess(aTHX_ "Parse error"));
11511 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11513 S_lex_token_boundary(pTHX)
11515 PL_oldoldbufptr = PL_oldbufptr;
11516 PL_oldbufptr = PL_bufptr;
11519 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11521 S_parse_opt_lexvar(pTHX)
11526 lex_token_boundary();
11527 sigil = lex_read_unichar(0);
11528 if (lex_peek_unichar(0) == '#') {
11529 qerror(Perl_mess(aTHX_ "Parse error"));
11533 c = lex_peek_unichar(0);
11534 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11537 d = PL_tokenbuf + 1;
11538 PL_tokenbuf[0] = (char)sigil;
11539 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11541 if (d == PL_tokenbuf+1)
11543 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11544 OPf_MOD | (OPpLVAL_INTRO<<8));
11545 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11550 Perl_parse_subsignature(pTHX)
11553 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11554 OP *initops = NULL;
11556 c = lex_peek_unichar(0);
11557 while (c != /*(*/')') {
11561 if (prev_type == 2)
11562 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11563 var = parse_opt_lexvar();
11565 newBINOP(OP_AELEM, 0,
11566 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11568 newSVOP(OP_CONST, 0, newSViv(pos))) :
11571 c = lex_peek_unichar(0);
11573 lex_token_boundary();
11574 lex_read_unichar(0);
11576 c = lex_peek_unichar(0);
11577 if (c == ',' || c == /*(*/')') {
11579 qerror(Perl_mess(aTHX_ "Optional parameter "
11580 "lacks default expression"));
11582 OP *defexpr = parse_termexpr(0);
11583 if (defexpr->op_type == OP_UNDEF &&
11584 !(defexpr->op_flags & OPf_KIDS)) {
11589 scalar(newUNOP(OP_RV2AV, 0,
11590 newGVOP(OP_GV, 0, PL_defgv))),
11591 newSVOP(OP_CONST, 0, newSViv(pos+1)));
11593 newCONDOP(0, ifop, expr, defexpr) :
11594 newLOGOP(OP_OR, 0, ifop, defexpr);
11599 if (prev_type == 1)
11600 qerror(Perl_mess(aTHX_ "Mandatory parameter "
11601 "follows optional parameter"));
11603 min_arity = pos + 1;
11605 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11607 initops = op_append_list(OP_LINESEQ, initops,
11608 newSTATEOP(0, NULL, expr));
11614 if (prev_type == 2)
11615 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11616 var = parse_opt_lexvar();
11618 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11619 newBINOP(OP_BIT_AND, 0,
11620 scalar(newUNOP(OP_RV2AV, 0,
11621 newGVOP(OP_GV, 0, PL_defgv))),
11622 newSVOP(OP_CONST, 0, newSViv(1))),
11623 op_convert_list(OP_DIE, 0,
11624 op_convert_list(OP_SPRINTF, 0,
11625 op_append_list(OP_LIST,
11626 newSVOP(OP_CONST, 0,
11627 newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
11629 op_append_list(OP_LIST,
11630 newSVOP(OP_CONST, 0, newSViv(1)),
11631 newSVOP(OP_CONST, 0, newSViv(2))),
11632 newOP(OP_CALLER, 0))))));
11633 if (pos != min_arity)
11634 chkop = newLOGOP(OP_AND, 0,
11636 scalar(newUNOP(OP_RV2AV, 0,
11637 newGVOP(OP_GV, 0, PL_defgv))),
11638 newSVOP(OP_CONST, 0, newSViv(pos))),
11640 initops = op_append_list(OP_LINESEQ,
11641 newSTATEOP(0, NULL, chkop),
11646 op_prepend_elem(OP_ASLICE,
11647 newOP(OP_PUSHMARK, 0),
11648 newLISTOP(OP_ASLICE, 0,
11650 newSVOP(OP_CONST, 0, newSViv(pos)),
11651 newUNOP(OP_AV2ARYLEN, 0,
11652 ref(newUNOP(OP_RV2AV, 0,
11653 newGVOP(OP_GV, 0, PL_defgv)),
11655 ref(newUNOP(OP_RV2AV, 0,
11656 newGVOP(OP_GV, 0, PL_defgv)),
11658 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11659 initops = op_append_list(OP_LINESEQ, initops,
11660 newSTATEOP(0, NULL,
11661 newASSIGNOP(OPf_STACKED, var, 0, slice)));
11668 qerror(Perl_mess(aTHX_ "Parse error"));
11672 c = lex_peek_unichar(0);
11674 case /*(*/')': break;
11677 lex_token_boundary();
11678 lex_read_unichar(0);
11680 c = lex_peek_unichar(0);
11681 } while (c == ',');
11687 if (min_arity != 0) {
11688 initops = op_append_list(OP_LINESEQ,
11689 newSTATEOP(0, NULL,
11692 scalar(newUNOP(OP_RV2AV, 0,
11693 newGVOP(OP_GV, 0, PL_defgv))),
11694 newSVOP(OP_CONST, 0, newSViv(min_arity))),
11695 op_convert_list(OP_DIE, 0,
11696 op_convert_list(OP_SPRINTF, 0,
11697 op_append_list(OP_LIST,
11698 newSVOP(OP_CONST, 0,
11699 newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
11701 op_append_list(OP_LIST,
11702 newSVOP(OP_CONST, 0, newSViv(1)),
11703 newSVOP(OP_CONST, 0, newSViv(2))),
11704 newOP(OP_CALLER, 0))))))),
11707 if (max_arity != -1) {
11708 initops = op_append_list(OP_LINESEQ,
11709 newSTATEOP(0, NULL,
11712 scalar(newUNOP(OP_RV2AV, 0,
11713 newGVOP(OP_GV, 0, PL_defgv))),
11714 newSVOP(OP_CONST, 0, newSViv(max_arity))),
11715 op_convert_list(OP_DIE, 0,
11716 op_convert_list(OP_SPRINTF, 0,
11717 op_append_list(OP_LIST,
11718 newSVOP(OP_CONST, 0,
11719 newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
11721 op_append_list(OP_LIST,
11722 newSVOP(OP_CONST, 0, newSViv(1)),
11723 newSVOP(OP_CONST, 0, newSViv(2))),
11724 newOP(OP_CALLER, 0))))))),
11732 * c-indentation-style: bsd
11733 * c-basic-offset: 4
11734 * indent-tabs-mode: nil
11737 * ex: set ts=8 sts=4 sw=4 et: