3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_static.c"
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_defer (PL_parser->lex_defer)
56 #define PL_lex_dojoin (PL_parser->lex_dojoin)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_preambled (PL_parser->preambled)
68 #define PL_sublex_info (PL_parser->sublex_info)
69 #define PL_linestr (PL_parser->linestr)
70 #define PL_expect (PL_parser->expect)
71 #define PL_copline (PL_parser->copline)
72 #define PL_bufptr (PL_parser->bufptr)
73 #define PL_oldbufptr (PL_parser->oldbufptr)
74 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
75 #define PL_linestart (PL_parser->linestart)
76 #define PL_bufend (PL_parser->bufend)
77 #define PL_last_uni (PL_parser->last_uni)
78 #define PL_last_lop (PL_parser->last_lop)
79 #define PL_last_lop_op (PL_parser->last_lop_op)
80 #define PL_lex_state (PL_parser->lex_state)
81 #define PL_rsfp (PL_parser->rsfp)
82 #define PL_rsfp_filters (PL_parser->rsfp_filters)
83 #define PL_in_my (PL_parser->in_my)
84 #define PL_in_my_stash (PL_parser->in_my_stash)
85 #define PL_tokenbuf (PL_parser->tokenbuf)
86 #define PL_multi_end (PL_parser->multi_end)
87 #define PL_error_count (PL_parser->error_count)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_nexttype (PL_parser->nexttype)
91 # define PL_nextval (PL_parser->nextval)
93 static const char* const ident_too_long = "Identifier too long";
95 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
97 #define XENUMMASK 0x3f
99 #define XFAKEBRACK 0x80
101 #ifdef USE_UTF8_SCRIPTS
102 # define UTF cBOOL(!IN_BYTES)
104 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
107 /* The maximum number of characters preceding the unrecognized one to display */
108 #define UNRECOGNIZED_PRECEDE_COUNT 10
110 /* In variables named $^X, these are the legal values for X.
111 * 1999-02-27 mjd-perl-patch@plover.com */
112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
114 #define SPACE_OR_TAB(c) isBLANK_A(c)
116 #define HEXFP_PEEK(s) \
118 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
119 isALPHA_FOLD_EQ(s[0], 'p'))
121 /* LEX_* are values for PL_lex_state, the state of the lexer.
122 * They are arranged oddly so that the guard on the switch statement
123 * can get by with a single comparison (if the compiler is smart enough).
125 * These values refer to the various states within a sublex parse,
126 * i.e. within a double quotish string
129 /* #define LEX_NOTPARSING 11 is done in perl.h. */
131 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
132 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
133 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
134 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
135 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
137 /* at end of code, eg "$x" followed by: */
138 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
139 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
141 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
142 string or after \E, $foo, etc */
143 #define LEX_INTERPCONST 2 /* NOT USED */
144 #define LEX_FORMLINE 1 /* expecting a format line */
145 #define LEX_KNOWNEXT 0 /* next token known; just return it */
149 static const char* const lex_state_names[] = {
164 #include "keywords.h"
166 /* CLINE is a macro that ensures PL_copline has a sane value */
168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
171 * Convenience functions to return different tokens and prime the
172 * lexer for the next token. They all take an argument.
174 * TOKEN : generic token (used for '(', DOLSHARP, etc)
175 * OPERATOR : generic operator
176 * AOPERATOR : assignment operator
177 * PREBLOCK : beginning the block after an if, while, foreach, ...
178 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
179 * PREREF : *EXPR where EXPR is not a simple identifier
180 * TERM : expression term
181 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
182 * LOOPX : loop exiting command (goto, last, dump, etc)
183 * FTST : file test operator
184 * FUN0 : zero-argument function
185 * FUN0OP : zero-argument function, with its op created in this file
186 * FUN1 : not used, except for not, which isn't a UNIOP
187 * BOop : bitwise or or xor
189 * BCop : bitwise complement
190 * SHop : shift operator
191 * PWop : power operator
192 * PMop : pattern-matching operator
193 * Aop : addition-level operator
194 * AopNOASSIGN : addition-level operator that is never part of .=
195 * Mop : multiplication-level operator
196 * Eop : equality-testing operator
197 * Rop : relational operator <= != gt
199 * Also see LOP and lop() below.
202 #ifdef DEBUGGING /* Serve -DT. */
203 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
205 # define REPORT(retval) (retval)
208 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
209 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
210 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
211 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
213 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
214 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
215 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
216 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
218 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
220 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
221 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
222 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
223 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
224 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
225 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
226 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
228 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
229 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
230 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
231 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
232 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
233 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
234 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
235 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
237 /* This bit of chicanery makes a unary function followed by
238 * a parenthesis into a function with one argument, highest precedence.
239 * The UNIDOR macro is for unary functions that can be followed by the //
240 * operator (such as C<shift // 0>).
242 #define UNI3(f,x,have_x) { \
243 pl_yylval.ival = f; \
244 if (have_x) PL_expect = x; \
246 PL_last_uni = PL_oldbufptr; \
247 PL_last_lop_op = f; \
249 return REPORT( (int)FUNC1 ); \
251 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
253 #define UNI(f) UNI3(f,XTERM,1)
254 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
255 #define UNIPROTO(f,optional) { \
256 if (optional) PL_last_uni = PL_oldbufptr; \
260 #define UNIBRACK(f) UNI3(f,0,0)
262 /* grandfather return to old style */
265 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
266 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
267 pl_yylval.ival = (f); \
273 #define COPLINE_INC_WITH_HERELINES \
275 CopLINE_inc(PL_curcop); \
276 if (PL_parser->herelines) \
277 CopLINE(PL_curcop) += PL_parser->herelines, \
278 PL_parser->herelines = 0; \
280 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
281 * is no sublex_push to follow. */
282 #define COPLINE_SET_FROM_MULTI_END \
284 CopLINE_set(PL_curcop, PL_multi_end); \
285 if (PL_multi_end != PL_multi_start) \
286 PL_parser->herelines = 0; \
292 /* how to interpret the pl_yylval associated with the token */
296 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
301 static struct debug_tokens {
303 enum token_type type;
305 } const debug_tokens[] =
307 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
308 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
309 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
310 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
311 { ARROW, TOKENTYPE_NONE, "ARROW" },
312 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
313 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
314 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
315 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
316 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
317 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
318 { DO, TOKENTYPE_NONE, "DO" },
319 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
320 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
321 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
322 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
323 { ELSE, TOKENTYPE_NONE, "ELSE" },
324 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
325 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
326 { FOR, TOKENTYPE_IVAL, "FOR" },
327 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
328 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
329 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
330 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
331 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
332 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
333 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
334 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
335 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
336 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
337 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
338 { IF, TOKENTYPE_IVAL, "IF" },
339 { LABEL, TOKENTYPE_PVAL, "LABEL" },
340 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
341 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
342 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
343 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
344 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
345 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
346 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
347 { MY, TOKENTYPE_IVAL, "MY" },
348 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
349 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
350 { OROP, TOKENTYPE_IVAL, "OROP" },
351 { OROR, TOKENTYPE_NONE, "OROR" },
352 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
353 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
354 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
355 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
356 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
357 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
358 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
359 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
360 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
361 { PREINC, TOKENTYPE_NONE, "PREINC" },
362 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
363 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
364 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
365 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
366 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
367 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
368 { SUB, TOKENTYPE_NONE, "SUB" },
369 { THING, TOKENTYPE_OPVAL, "THING" },
370 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
371 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
372 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
373 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
374 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
375 { USE, TOKENTYPE_IVAL, "USE" },
376 { WHEN, TOKENTYPE_IVAL, "WHEN" },
377 { WHILE, TOKENTYPE_IVAL, "WHILE" },
378 { WORD, TOKENTYPE_OPVAL, "WORD" },
379 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
380 { 0, TOKENTYPE_NONE, NULL }
383 /* dump the returned token in rv, plus any optional arg in pl_yylval */
386 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
388 PERL_ARGS_ASSERT_TOKEREPORT;
391 const char *name = NULL;
392 enum token_type type = TOKENTYPE_NONE;
393 const struct debug_tokens *p;
394 SV* const report = newSVpvs("<== ");
396 for (p = debug_tokens; p->token; p++) {
397 if (p->token == (int)rv) {
404 Perl_sv_catpv(aTHX_ report, name);
405 else if (isGRAPH(rv))
407 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
409 sv_catpvs(report, " (pending identifier)");
412 sv_catpvs(report, "EOF");
414 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
419 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
421 case TOKENTYPE_OPNUM:
422 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
423 PL_op_name[lvalp->ival]);
426 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
428 case TOKENTYPE_OPVAL:
430 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
431 PL_op_name[lvalp->opval->op_type]);
432 if (lvalp->opval->op_type == OP_CONST) {
433 Perl_sv_catpvf(aTHX_ report, " %s",
434 SvPEEK(cSVOPx_sv(lvalp->opval)));
439 sv_catpvs(report, "(opval=null)");
442 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
448 /* print the buffer with suitable escapes */
451 S_printbuf(pTHX_ const char *const fmt, const char *const s)
453 SV* const tmp = newSVpvs("");
455 PERL_ARGS_ASSERT_PRINTBUF;
457 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
458 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
466 S_deprecate_commaless_var_list(pTHX) {
468 deprecate("comma-less variable list");
469 return REPORT(','); /* grandfather non-comma-format format */
475 * This subroutine looks for an '=' next to the operator that has just been
476 * parsed and turns it into an ASSIGNOP if it finds one.
480 S_ao(pTHX_ int toketype)
482 if (*PL_bufptr == '=') {
484 if (toketype == ANDAND)
485 pl_yylval.ival = OP_ANDASSIGN;
486 else if (toketype == OROR)
487 pl_yylval.ival = OP_ORASSIGN;
488 else if (toketype == DORDOR)
489 pl_yylval.ival = OP_DORASSIGN;
492 return REPORT(toketype);
497 * When Perl expects an operator and finds something else, no_op
498 * prints the warning. It always prints "<something> found where
499 * operator expected. It prints "Missing semicolon on previous line?"
500 * if the surprise occurs at the start of the line. "do you need to
501 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502 * where the compiler doesn't know if foo is a method call or a function.
503 * It prints "Missing operator before end of line" if there's nothing
504 * after the missing operator, or "... before <...>" if there is something
505 * after the missing operator.
509 S_no_op(pTHX_ const char *const what, char *s)
511 char * const oldbp = PL_bufptr;
512 const bool is_first = (PL_oldbufptr == PL_linestart);
514 PERL_ARGS_ASSERT_NO_OP;
520 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
521 if (ckWARN_d(WARN_SYNTAX)) {
523 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
524 "\t(Missing semicolon on previous line?)\n");
525 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
527 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
528 t += UTF ? UTF8SKIP(t) : 1)
530 if (t < PL_bufptr && isSPACE(*t))
531 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
532 "\t(Do you need to predeclare %"UTF8f"?)\n",
533 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
537 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
538 "\t(Missing operator before %"UTF8f"?)\n",
539 UTF8fARG(UTF, s - oldbp, oldbp));
547 * Complain about missing quote/regexp/heredoc terminator.
548 * If it's called with NULL then it cauterizes the line buffer.
549 * If we're in a delimited string and the delimiter is a control
550 * character, it's reformatted into a two-char sequence like ^C.
555 S_missingterm(pTHX_ char *s)
560 char * const nl = strrchr(s,'\n');
564 else if ((U8) PL_multi_close < 32) {
566 tmpbuf[1] = (char)toCTRL(PL_multi_close);
571 *tmpbuf = (char)PL_multi_close;
575 q = strchr(s,'"') ? '\'' : '"';
576 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
582 * Check whether the named feature is enabled.
585 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
587 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
589 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
591 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
593 if (namelen > MAX_FEATURE_LEN)
595 memcpy(&he_name[8], name, namelen);
597 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
598 REFCOUNTED_HE_EXISTS));
602 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
603 * utf16-to-utf8-reversed.
606 #ifdef PERL_CR_FILTER
610 const char *s = SvPVX_const(sv);
611 const char * const e = s + SvCUR(sv);
613 PERL_ARGS_ASSERT_STRIP_RETURN;
615 /* outer loop optimized to do nothing if there are no CR-LFs */
617 if (*s++ == '\r' && *s == '\n') {
618 /* hit a CR-LF, need to copy the rest */
622 if (*s == '\r' && s[1] == '\n')
633 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
635 const I32 count = FILTER_READ(idx+1, sv, maxlen);
636 if (count > 0 && !maxlen)
643 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
645 Creates and initialises a new lexer/parser state object, supplying
646 a context in which to lex and parse from a new source of Perl code.
647 A pointer to the new state object is placed in L</PL_parser>. An entry
648 is made on the save stack so that upon unwinding the new state object
649 will be destroyed and the former value of L</PL_parser> will be restored.
650 Nothing else need be done to clean up the parsing context.
652 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
653 non-null, provides a string (in SV form) containing code to be parsed.
654 A copy of the string is made, so subsequent modification of I<line>
655 does not affect parsing. I<rsfp>, if non-null, provides an input stream
656 from which code will be read to be parsed. If both are non-null, the
657 code in I<line> comes first and must consist of complete lines of input,
658 and I<rsfp> supplies the remainder of the source.
660 The I<flags> parameter is reserved for future use. Currently it is only
661 used by perl internally, so extensions should always pass zero.
666 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
667 can share filters with the current parser.
668 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
669 caller, hence isn't owned by the parser, so shouldn't be closed on parser
670 destruction. This is used to handle the case of defaulting to reading the
671 script from the standard input because no filename was given on the command
672 line (without getting confused by situation where STDIN has been closed, so
673 the script handle is opened on fd 0) */
676 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
678 const char *s = NULL;
679 yy_parser *parser, *oparser;
680 if (flags && flags & ~LEX_START_FLAGS)
681 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
683 /* create and initialise a parser */
685 Newxz(parser, 1, yy_parser);
686 parser->old_parser = oparser = PL_parser;
689 parser->stack = NULL;
691 parser->stack_size = 0;
693 /* on scope exit, free this parser and restore any outer one */
695 parser->saved_curcop = PL_curcop;
697 /* initialise lexer state */
699 parser->nexttoke = 0;
700 parser->error_count = oparser ? oparser->error_count : 0;
701 parser->copline = parser->preambling = NOLINE;
702 parser->lex_state = LEX_NORMAL;
703 parser->expect = XSTATE;
705 parser->rsfp_filters =
706 !(flags & LEX_START_SAME_FILTER) || !oparser
708 : MUTABLE_AV(SvREFCNT_inc(
709 oparser->rsfp_filters
710 ? oparser->rsfp_filters
711 : (oparser->rsfp_filters = newAV())
714 Newx(parser->lex_brackstack, 120, char);
715 Newx(parser->lex_casestack, 12, char);
716 *parser->lex_casestack = '\0';
717 Newxz(parser->lex_shared, 1, LEXSHARED);
721 s = SvPV_const(line, len);
722 parser->linestr = flags & LEX_START_COPIED
723 ? SvREFCNT_inc_simple_NN(line)
724 : newSVpvn_flags(s, len, SvUTF8(line));
725 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
727 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
729 parser->oldoldbufptr =
732 parser->linestart = SvPVX(parser->linestr);
733 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
734 parser->last_lop = parser->last_uni = NULL;
736 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
737 |LEX_DONT_CLOSE_RSFP));
738 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
739 |LEX_DONT_CLOSE_RSFP));
741 parser->in_pod = parser->filtered = 0;
745 /* delete a parser object */
748 Perl_parser_free(pTHX_ const yy_parser *parser)
750 PERL_ARGS_ASSERT_PARSER_FREE;
752 PL_curcop = parser->saved_curcop;
753 SvREFCNT_dec(parser->linestr);
755 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
756 PerlIO_clearerr(parser->rsfp);
757 else if (parser->rsfp && (!parser->old_parser ||
758 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
759 PerlIO_close(parser->rsfp);
760 SvREFCNT_dec(parser->rsfp_filters);
761 SvREFCNT_dec(parser->lex_stuff);
762 SvREFCNT_dec(parser->sublex_info.repl);
764 Safefree(parser->lex_brackstack);
765 Safefree(parser->lex_casestack);
766 Safefree(parser->lex_shared);
767 PL_parser = parser->old_parser;
772 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
774 I32 nexttoke = parser->nexttoke;
775 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
777 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
778 && parser->nextval[nexttoke].opval
779 && parser->nextval[nexttoke].opval->op_slabbed
780 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
781 op_free(parser->nextval[nexttoke].opval);
782 parser->nextval[nexttoke].opval = NULL;
789 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
791 Buffer scalar containing the chunk currently under consideration of the
792 text currently being lexed. This is always a plain string scalar (for
793 which C<SvPOK> is true). It is not intended to be used as a scalar by
794 normal scalar means; instead refer to the buffer directly by the pointer
795 variables described below.
797 The lexer maintains various C<char*> pointers to things in the
798 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
799 reallocated, all of these pointers must be updated. Don't attempt to
800 do this manually, but rather use L</lex_grow_linestr> if you need to
801 reallocate the buffer.
803 The content of the text chunk in the buffer is commonly exactly one
804 complete line of input, up to and including a newline terminator,
805 but there are situations where it is otherwise. The octets of the
806 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
807 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
808 flag on this scalar, which may disagree with it.
810 For direct examination of the buffer, the variable
811 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
812 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
813 of these pointers is usually preferable to examination of the scalar
814 through normal scalar means.
816 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
818 Direct pointer to the end of the chunk of text currently being lexed, the
819 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
820 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
821 always located at the end of the buffer, and does not count as part of
822 the buffer's contents.
824 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
826 Points to the current position of lexing inside the lexer buffer.
827 Characters around this point may be freely examined, within
828 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
829 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
830 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
832 Lexing code (whether in the Perl core or not) moves this pointer past
833 the characters that it consumes. It is also expected to perform some
834 bookkeeping whenever a newline character is consumed. This movement
835 can be more conveniently performed by the function L</lex_read_to>,
836 which handles newlines appropriately.
838 Interpretation of the buffer's octets can be abstracted out by
839 using the slightly higher-level functions L</lex_peek_unichar> and
840 L</lex_read_unichar>.
842 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
844 Points to the start of the current line inside the lexer buffer.
845 This is useful for indicating at which column an error occurred, and
846 not much else. This must be updated by any lexing code that consumes
847 a newline; the function L</lex_read_to> handles this detail.
853 =for apidoc Amx|bool|lex_bufutf8
855 Indicates whether the octets in the lexer buffer
856 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
857 of Unicode characters. If not, they should be interpreted as Latin-1
858 characters. This is analogous to the C<SvUTF8> flag for scalars.
860 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
861 contains valid UTF-8. Lexing code must be robust in the face of invalid
864 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
865 is significant, but not the whole story regarding the input character
866 encoding. Normally, when a file is being read, the scalar contains octets
867 and its C<SvUTF8> flag is off, but the octets should be interpreted as
868 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
869 however, the scalar may have the C<SvUTF8> flag on, and in this case its
870 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
871 is in effect. This logic may change in the future; use this function
872 instead of implementing the logic yourself.
878 Perl_lex_bufutf8(pTHX)
884 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
886 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
887 at least I<len> octets (including terminating C<NUL>). Returns a
888 pointer to the reallocated buffer. This is necessary before making
889 any direct modification of the buffer that would increase its length.
890 L</lex_stuff_pvn> provides a more convenient way to insert text into
893 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
894 this function updates all of the lexer's variables that point directly
901 Perl_lex_grow_linestr(pTHX_ STRLEN len)
905 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
906 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
907 linestr = PL_parser->linestr;
908 buf = SvPVX(linestr);
909 if (len <= SvLEN(linestr))
911 bufend_pos = PL_parser->bufend - buf;
912 bufptr_pos = PL_parser->bufptr - buf;
913 oldbufptr_pos = PL_parser->oldbufptr - buf;
914 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
915 linestart_pos = PL_parser->linestart - buf;
916 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
917 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
918 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
919 PL_parser->lex_shared->re_eval_start - buf : 0;
921 buf = sv_grow(linestr, len);
923 PL_parser->bufend = buf + bufend_pos;
924 PL_parser->bufptr = buf + bufptr_pos;
925 PL_parser->oldbufptr = buf + oldbufptr_pos;
926 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
927 PL_parser->linestart = buf + linestart_pos;
928 if (PL_parser->last_uni)
929 PL_parser->last_uni = buf + last_uni_pos;
930 if (PL_parser->last_lop)
931 PL_parser->last_lop = buf + last_lop_pos;
932 if (PL_parser->lex_shared->re_eval_start)
933 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
938 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
940 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
941 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
942 reallocating the buffer if necessary. This means that lexing code that
943 runs later will see the characters as if they had appeared in the input.
944 It is not recommended to do this as part of normal parsing, and most
945 uses of this facility run the risk of the inserted characters being
946 interpreted in an unintended manner.
948 The string to be inserted is represented by I<len> octets starting
949 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
950 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
951 The characters are recoded for the lexer buffer, according to how the
952 buffer is currently being interpreted (L</lex_bufutf8>). If a string
953 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
954 function is more convenient.
960 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
964 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
965 if (flags & ~(LEX_STUFF_UTF8))
966 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
968 if (flags & LEX_STUFF_UTF8) {
971 STRLEN highhalf = 0; /* Count of variants */
972 const char *p, *e = pv+len;
973 for (p = pv; p != e; p++) {
974 if (! UTF8_IS_INVARIANT(*p)) {
980 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
981 bufptr = PL_parser->bufptr;
982 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
983 SvCUR_set(PL_parser->linestr,
984 SvCUR(PL_parser->linestr) + len+highhalf);
985 PL_parser->bufend += len+highhalf;
986 for (p = pv; p != e; p++) {
988 if (! UTF8_IS_INVARIANT(c)) {
989 *bufptr++ = UTF8_TWO_BYTE_HI(c);
990 *bufptr++ = UTF8_TWO_BYTE_LO(c);
997 if (flags & LEX_STUFF_UTF8) {
999 const char *p, *e = pv+len;
1000 for (p = pv; p != e; p++) {
1002 if (UTF8_IS_ABOVE_LATIN1(c)) {
1003 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1004 "non-Latin-1 character into Latin-1 input");
1005 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1008 } else if (! UTF8_IS_INVARIANT(c)) {
1009 /* malformed UTF-8 */
1011 SAVESPTR(PL_warnhook);
1012 PL_warnhook = PERL_WARNHOOK_FATAL;
1013 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1019 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1020 bufptr = PL_parser->bufptr;
1021 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1022 SvCUR_set(PL_parser->linestr,
1023 SvCUR(PL_parser->linestr) + len-highhalf);
1024 PL_parser->bufend += len-highhalf;
1027 if (UTF8_IS_INVARIANT(*p)) {
1033 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1039 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1040 bufptr = PL_parser->bufptr;
1041 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1042 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1043 PL_parser->bufend += len;
1044 Copy(pv, bufptr, len, char);
1050 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1052 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1053 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1054 reallocating the buffer if necessary. This means that lexing code that
1055 runs later will see the characters as if they had appeared in the input.
1056 It is not recommended to do this as part of normal parsing, and most
1057 uses of this facility run the risk of the inserted characters being
1058 interpreted in an unintended manner.
1060 The string to be inserted is represented by octets starting at I<pv>
1061 and continuing to the first nul. These octets are interpreted as either
1062 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1063 in I<flags>. The characters are recoded for the lexer buffer, according
1064 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1065 If it is not convenient to nul-terminate a string to be inserted, the
1066 L</lex_stuff_pvn> function is more appropriate.
1072 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1074 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1075 lex_stuff_pvn(pv, strlen(pv), flags);
1079 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1081 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1082 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1083 reallocating the buffer if necessary. This means that lexing code that
1084 runs later will see the characters as if they had appeared in the input.
1085 It is not recommended to do this as part of normal parsing, and most
1086 uses of this facility run the risk of the inserted characters being
1087 interpreted in an unintended manner.
1089 The string to be inserted is the string value of I<sv>. The characters
1090 are recoded for the lexer buffer, according to how the buffer is currently
1091 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1092 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1093 need to construct a scalar.
1099 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1107 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111 =for apidoc Amx|void|lex_unstuff|char *ptr
1113 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1114 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1115 This hides the discarded text from any lexing code that runs later,
1116 as if the text had never appeared.
1118 This is not the normal way to consume lexed text. For that, use
1125 Perl_lex_unstuff(pTHX_ char *ptr)
1129 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1130 buf = PL_parser->bufptr;
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1135 bufend = PL_parser->bufend;
1137 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138 unstuff_len = ptr - buf;
1139 Move(ptr, buf, bufend+1-ptr, char);
1140 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1141 PL_parser->bufend = bufend - unstuff_len;
1145 =for apidoc Amx|void|lex_read_to|char *ptr
1147 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1148 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1149 performing the correct bookkeeping whenever a newline character is passed.
1150 This is the normal way to consume lexed text.
1152 Interpretation of the buffer's octets can be abstracted out by
1153 using the slightly higher-level functions L</lex_peek_unichar> and
1154 L</lex_read_unichar>.
1160 Perl_lex_read_to(pTHX_ char *ptr)
1163 PERL_ARGS_ASSERT_LEX_READ_TO;
1164 s = PL_parser->bufptr;
1165 if (ptr < s || ptr > PL_parser->bufend)
1166 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1167 for (; s != ptr; s++)
1169 COPLINE_INC_WITH_HERELINES;
1170 PL_parser->linestart = s+1;
1172 PL_parser->bufptr = ptr;
1176 =for apidoc Amx|void|lex_discard_to|char *ptr
1178 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1179 up to I<ptr>. The remaining content of the buffer will be moved, and
1180 all pointers into the buffer updated appropriately. I<ptr> must not
1181 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1182 it is not permitted to discard text that has yet to be lexed.
1184 Normally it is not necessarily to do this directly, because it suffices to
1185 use the implicit discarding behaviour of L</lex_next_chunk> and things
1186 based on it. However, if a token stretches across multiple lines,
1187 and the lexing code has kept multiple lines of text in the buffer for
1188 that purpose, then after completion of the token it would be wise to
1189 explicitly discard the now-unneeded earlier lines, to avoid future
1190 multi-line tokens growing the buffer without bound.
1196 Perl_lex_discard_to(pTHX_ char *ptr)
1200 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1201 buf = SvPVX(PL_parser->linestr);
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1206 if (ptr > PL_parser->bufptr)
1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1208 discard_len = ptr - buf;
1209 if (PL_parser->oldbufptr < ptr)
1210 PL_parser->oldbufptr = ptr;
1211 if (PL_parser->oldoldbufptr < ptr)
1212 PL_parser->oldoldbufptr = ptr;
1213 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1214 PL_parser->last_uni = NULL;
1215 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1216 PL_parser->last_lop = NULL;
1217 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1218 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1219 PL_parser->bufend -= discard_len;
1220 PL_parser->bufptr -= discard_len;
1221 PL_parser->oldbufptr -= discard_len;
1222 PL_parser->oldoldbufptr -= discard_len;
1223 if (PL_parser->last_uni)
1224 PL_parser->last_uni -= discard_len;
1225 if (PL_parser->last_lop)
1226 PL_parser->last_lop -= discard_len;
1230 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1232 Reads in the next chunk of text to be lexed, appending it to
1233 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1234 looked to the end of the current chunk and wants to know more. It is
1235 usual, but not necessary, for lexing to have consumed the entirety of
1236 the current chunk at this time.
1238 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1239 chunk (i.e., the current chunk has been entirely consumed), normally the
1240 current chunk will be discarded at the same time that the new chunk is
1241 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1242 will not be discarded. If the current chunk has not been entirely
1243 consumed, then it will not be discarded regardless of the flag.
1245 Returns true if some new text was added to the buffer, or false if the
1246 buffer has reached the end of the input text.
1251 #define LEX_FAKE_EOF 0x80000000
1252 #define LEX_NO_TERM 0x40000000
1255 Perl_lex_next_chunk(pTHX_ U32 flags)
1259 STRLEN old_bufend_pos, new_bufend_pos;
1260 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1261 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1262 bool got_some_for_debugger = 0;
1264 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1265 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1266 linestr = PL_parser->linestr;
1267 buf = SvPVX(linestr);
1268 if (!(flags & LEX_KEEP_PREVIOUS) &&
1269 PL_parser->bufptr == PL_parser->bufend) {
1270 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1272 if (PL_parser->last_uni != PL_parser->bufend)
1273 PL_parser->last_uni = NULL;
1274 if (PL_parser->last_lop != PL_parser->bufend)
1275 PL_parser->last_lop = NULL;
1276 last_uni_pos = last_lop_pos = 0;
1280 old_bufend_pos = PL_parser->bufend - buf;
1281 bufptr_pos = PL_parser->bufptr - buf;
1282 oldbufptr_pos = PL_parser->oldbufptr - buf;
1283 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1284 linestart_pos = PL_parser->linestart - buf;
1285 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1286 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1288 if (flags & LEX_FAKE_EOF) {
1290 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1292 } else if (filter_gets(linestr, old_bufend_pos)) {
1294 got_some_for_debugger = 1;
1295 } else if (flags & LEX_NO_TERM) {
1298 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1299 sv_setpvs(linestr, "");
1301 /* End of real input. Close filehandle (unless it was STDIN),
1302 * then add implicit termination.
1304 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1305 PerlIO_clearerr(PL_parser->rsfp);
1306 else if (PL_parser->rsfp)
1307 (void)PerlIO_close(PL_parser->rsfp);
1308 PL_parser->rsfp = NULL;
1309 PL_parser->in_pod = PL_parser->filtered = 0;
1310 if (!PL_in_eval && PL_minus_p) {
1312 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1313 PL_minus_n = PL_minus_p = 0;
1314 } else if (!PL_in_eval && PL_minus_n) {
1315 sv_catpvs(linestr, /*{*/";}");
1318 sv_catpvs(linestr, ";");
1321 buf = SvPVX(linestr);
1322 new_bufend_pos = SvCUR(linestr);
1323 PL_parser->bufend = buf + new_bufend_pos;
1324 PL_parser->bufptr = buf + bufptr_pos;
1325 PL_parser->oldbufptr = buf + oldbufptr_pos;
1326 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1327 PL_parser->linestart = buf + linestart_pos;
1328 if (PL_parser->last_uni)
1329 PL_parser->last_uni = buf + last_uni_pos;
1330 if (PL_parser->last_lop)
1331 PL_parser->last_lop = buf + last_lop_pos;
1332 if (PL_parser->preambling != NOLINE) {
1333 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1334 PL_parser->preambling = NOLINE;
1336 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1337 PL_curstash != PL_debstash) {
1338 /* debugger active and we're not compiling the debugger code,
1339 * so store the line into the debugger's array of lines
1341 update_debugger_info(NULL, buf+old_bufend_pos,
1342 new_bufend_pos-old_bufend_pos);
1348 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1350 Looks ahead one (Unicode) character in the text currently being lexed.
1351 Returns the codepoint (unsigned integer value) of the next character,
1352 or -1 if lexing has reached the end of the input text. To consume the
1353 peeked character, use L</lex_read_unichar>.
1355 If the next character is in (or extends into) the next chunk of input
1356 text, the next chunk will be read in. Normally the current chunk will be
1357 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1358 then the current chunk will not be discarded.
1360 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1361 is encountered, an exception is generated.
1367 Perl_lex_peek_unichar(pTHX_ U32 flags)
1371 if (flags & ~(LEX_KEEP_PREVIOUS))
1372 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1373 s = PL_parser->bufptr;
1374 bufend = PL_parser->bufend;
1380 if (!lex_next_chunk(flags))
1382 s = PL_parser->bufptr;
1383 bufend = PL_parser->bufend;
1386 if (UTF8_IS_INVARIANT(head))
1388 if (UTF8_IS_START(head)) {
1389 len = UTF8SKIP(&head);
1390 while ((STRLEN)(bufend-s) < len) {
1391 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1393 s = PL_parser->bufptr;
1394 bufend = PL_parser->bufend;
1397 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1398 if (retlen == (STRLEN)-1) {
1399 /* malformed UTF-8 */
1401 SAVESPTR(PL_warnhook);
1402 PL_warnhook = PERL_WARNHOOK_FATAL;
1403 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1409 if (!lex_next_chunk(flags))
1411 s = PL_parser->bufptr;
1418 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1420 Reads the next (Unicode) character in the text currently being lexed.
1421 Returns the codepoint (unsigned integer value) of the character read,
1422 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1423 if lexing has reached the end of the input text. To non-destructively
1424 examine the next character, use L</lex_peek_unichar> instead.
1426 If the next character is in (or extends into) the next chunk of input
1427 text, the next chunk will be read in. Normally the current chunk will be
1428 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1429 then the current chunk will not be discarded.
1431 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1432 is encountered, an exception is generated.
1438 Perl_lex_read_unichar(pTHX_ U32 flags)
1441 if (flags & ~(LEX_KEEP_PREVIOUS))
1442 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1443 c = lex_peek_unichar(flags);
1446 COPLINE_INC_WITH_HERELINES;
1448 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1450 ++(PL_parser->bufptr);
1456 =for apidoc Amx|void|lex_read_space|U32 flags
1458 Reads optional spaces, in Perl style, in the text currently being
1459 lexed. The spaces may include ordinary whitespace characters and
1460 Perl-style comments. C<#line> directives are processed if encountered.
1461 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1462 at a non-space character (or the end of the input text).
1464 If spaces extend into the next chunk of input text, the next chunk will
1465 be read in. Normally the current chunk will be discarded at the same
1466 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1467 chunk will not be discarded.
1472 #define LEX_NO_INCLINE 0x40000000
1473 #define LEX_NO_NEXT_CHUNK 0x80000000
1476 Perl_lex_read_space(pTHX_ U32 flags)
1479 const bool can_incline = !(flags & LEX_NO_INCLINE);
1480 bool need_incline = 0;
1481 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1482 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1483 s = PL_parser->bufptr;
1484 bufend = PL_parser->bufend;
1490 } while (!(c == '\n' || (c == 0 && s == bufend)));
1491 } else if (c == '\n') {
1494 PL_parser->linestart = s;
1500 } else if (isSPACE(c)) {
1502 } else if (c == 0 && s == bufend) {
1505 if (flags & LEX_NO_NEXT_CHUNK)
1507 PL_parser->bufptr = s;
1508 l = CopLINE(PL_curcop);
1509 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1510 got_more = lex_next_chunk(flags);
1511 CopLINE_set(PL_curcop, l);
1512 s = PL_parser->bufptr;
1513 bufend = PL_parser->bufend;
1516 if (can_incline && need_incline && PL_parser->rsfp) {
1524 PL_parser->bufptr = s;
1529 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1531 This function performs syntax checking on a prototype, C<proto>.
1532 If C<warn> is true, any illegal characters or mismatched brackets
1533 will trigger illegalproto warnings, declaring that they were
1534 detected in the prototype for C<name>.
1536 The return value is C<true> if this is a valid prototype, and
1537 C<false> if it is not, regardless of whether C<warn> was C<true> or
1540 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1547 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1549 STRLEN len, origlen;
1550 char *p = proto ? SvPV(proto, len) : NULL;
1551 bool bad_proto = FALSE;
1552 bool in_brackets = FALSE;
1553 bool after_slash = FALSE;
1554 char greedy_proto = ' ';
1555 bool proto_after_greedy_proto = FALSE;
1556 bool must_be_last = FALSE;
1557 bool underscore = FALSE;
1558 bool bad_proto_after_underscore = FALSE;
1560 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1566 for (; len--; p++) {
1569 proto_after_greedy_proto = TRUE;
1571 if (!strchr(";@%", *p))
1572 bad_proto_after_underscore = TRUE;
1575 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1582 in_brackets = FALSE;
1583 else if ((*p == '@' || *p == '%') &&
1586 must_be_last = TRUE;
1595 after_slash = FALSE;
1600 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1603 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1604 origlen, UNI_DISPLAY_ISPRINT)
1605 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1607 if (proto_after_greedy_proto)
1608 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1609 "Prototype after '%c' for %"SVf" : %s",
1610 greedy_proto, SVfARG(name), p);
1612 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1613 "Missing ']' in prototype for %"SVf" : %s",
1616 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1617 "Illegal character in prototype for %"SVf" : %s",
1619 if (bad_proto_after_underscore)
1620 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1621 "Illegal character after '_' in prototype for %"SVf" : %s",
1625 return (! (proto_after_greedy_proto || bad_proto) );
1630 * This subroutine has nothing to do with tilting, whether at windmills
1631 * or pinball tables. Its name is short for "increment line". It
1632 * increments the current line number in CopLINE(PL_curcop) and checks
1633 * to see whether the line starts with a comment of the form
1634 * # line 500 "foo.pm"
1635 * If so, it sets the current line number and file to the values in the comment.
1639 S_incline(pTHX_ const char *s)
1646 PERL_ARGS_ASSERT_INCLINE;
1648 COPLINE_INC_WITH_HERELINES;
1649 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1650 && s+1 == PL_bufend && *s == ';') {
1651 /* fake newline in string eval */
1652 CopLINE_dec(PL_curcop);
1657 while (SPACE_OR_TAB(*s))
1659 if (strnEQ(s, "line", 4))
1663 if (SPACE_OR_TAB(*s))
1667 while (SPACE_OR_TAB(*s))
1675 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1677 while (SPACE_OR_TAB(*s))
1679 if (*s == '"' && (t = strchr(s+1, '"'))) {
1685 while (!isSPACE(*t))
1689 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1691 if (*e != '\n' && *e != '\0')
1692 return; /* false alarm */
1694 line_num = grok_atou(n, &e) - 1;
1697 const STRLEN len = t - s;
1699 if (!PL_rsfp && !PL_parser->filtered) {
1700 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1701 * to *{"::_<newfilename"} */
1702 /* However, the long form of evals is only turned on by the
1703 debugger - usually they're "(eval %lu)" */
1704 GV * const cfgv = CopFILEGV(PL_curcop);
1707 STRLEN tmplen2 = len;
1711 if (tmplen2 + 2 <= sizeof smallbuf)
1714 Newx(tmpbuf2, tmplen2 + 2, char);
1719 memcpy(tmpbuf2 + 2, s, tmplen2);
1722 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1724 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1725 /* adjust ${"::_<newfilename"} to store the new file name */
1726 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1727 /* The line number may differ. If that is the case,
1728 alias the saved lines that are in the array.
1729 Otherwise alias the whole array. */
1730 if (CopLINE(PL_curcop) == line_num) {
1731 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1732 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1734 else if (GvAV(cfgv)) {
1735 AV * const av = GvAV(cfgv);
1736 const I32 start = CopLINE(PL_curcop)+1;
1737 I32 items = AvFILLp(av) - start;
1739 AV * const av2 = GvAVn(gv2);
1740 SV **svp = AvARRAY(av) + start;
1741 I32 l = (I32)line_num+1;
1743 av_store(av2, l++, SvREFCNT_inc(*svp++));
1748 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1751 CopFILE_free(PL_curcop);
1752 CopFILE_setn(PL_curcop, s, len);
1754 CopLINE_set(PL_curcop, line_num);
1757 #define skipspace(s) skipspace_flags(s, 0)
1761 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1763 AV *av = CopFILEAVx(PL_curcop);
1766 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1768 sv = *av_fetch(av, 0, 1);
1769 SvUPGRADE(sv, SVt_PVMG);
1771 if (!SvPOK(sv)) sv_setpvs(sv,"");
1773 sv_catsv(sv, orig_sv);
1775 sv_catpvn(sv, buf, len);
1780 if (PL_parser->preambling == NOLINE)
1781 av_store(av, CopLINE(PL_curcop), sv);
1787 * Called to gobble the appropriate amount and type of whitespace.
1788 * Skips comments as well.
1792 S_skipspace_flags(pTHX_ char *s, U32 flags)
1794 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1795 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1796 while (s < PL_bufend && SPACE_OR_TAB(*s))
1799 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1801 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1802 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1803 LEX_NO_NEXT_CHUNK : 0));
1805 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1806 if (PL_linestart > PL_bufptr)
1807 PL_bufptr = PL_linestart;
1815 * Check the unary operators to ensure there's no ambiguity in how they're
1816 * used. An ambiguous piece of code would be:
1818 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1819 * the +5 is its argument.
1828 if (PL_oldoldbufptr != PL_last_uni)
1830 while (isSPACE(*PL_last_uni))
1833 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1835 if ((t = strchr(s, '(')) && t < PL_bufptr)
1838 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1839 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1840 (int)(s - PL_last_uni), PL_last_uni);
1844 * LOP : macro to build a list operator. Its behaviour has been replaced
1845 * with a subroutine, S_lop() for which LOP is just another name.
1848 #define LOP(f,x) return lop(f,x,s)
1852 * Build a list operator (or something that might be one). The rules:
1853 * - if we have a next token, then it's a list operator (no parens) for
1854 * which the next token has already been parsed; e.g.,
1857 * - if the next thing is an opening paren, then it's a function
1858 * - else it's a list operator
1862 S_lop(pTHX_ I32 f, int x, char *s)
1864 PERL_ARGS_ASSERT_LOP;
1869 PL_last_lop = PL_oldbufptr;
1870 PL_last_lop_op = (OPCODE)f;
1875 return REPORT(FUNC);
1878 return REPORT(FUNC);
1881 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1882 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1883 return REPORT(LSTOP);
1889 * When the lexer realizes it knows the next token (for instance,
1890 * it is reordering tokens for the parser) then it can call S_force_next
1891 * to know what token to return the next time the lexer is called. Caller
1892 * will need to set PL_nextval[] and possibly PL_expect to ensure
1893 * the lexer handles the token correctly.
1897 S_force_next(pTHX_ I32 type)
1901 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1902 tokereport(type, &NEXTVAL_NEXTTOKE);
1905 PL_nexttype[PL_nexttoke] = type;
1907 if (PL_lex_state != LEX_KNOWNEXT) {
1908 PL_lex_defer = PL_lex_state;
1909 PL_lex_state = LEX_KNOWNEXT;
1916 * This subroutine handles postfix deref syntax after the arrow has already
1917 * been emitted. @* $* etc. are emitted as two separate token right here.
1918 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1919 * only the first, leaving yylex to find the next.
1923 S_postderef(pTHX_ int const funny, char const next)
1925 assert(funny == DOLSHARP || strchr("$@%&*", funny));
1926 assert(strchr("*[{", next));
1928 PL_expect = XOPERATOR;
1929 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1930 assert('@' == funny || '$' == funny || DOLSHARP == funny);
1931 PL_lex_state = LEX_INTERPEND;
1932 force_next(POSTJOIN);
1938 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1939 && !PL_lex_brackets)
1941 PL_expect = XOPERATOR;
1950 int yyc = PL_parser->yychar;
1951 if (yyc != YYEMPTY) {
1953 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1954 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1955 PL_lex_allbrackets--;
1957 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1958 } else if (yyc == '('/*)*/) {
1959 PL_lex_allbrackets--;
1964 PL_parser->yychar = YYEMPTY;
1969 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1971 SV * const sv = newSVpvn_utf8(start, len,
1974 && !is_invariant_string((const U8*)start, len)
1975 && is_utf8_string((const U8*)start, len));
1981 * When the lexer knows the next thing is a word (for instance, it has
1982 * just seen -> and it knows that the next char is a word char, then
1983 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1987 * char *start : buffer position (must be within PL_linestr)
1988 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1989 * int check_keyword : if true, Perl checks to make sure the word isn't
1990 * a keyword (do this if the word is a label, e.g. goto FOO)
1991 * int allow_pack : if true, : characters will also be allowed (require,
1992 * use, etc. do this)
1996 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2001 PERL_ARGS_ASSERT_FORCE_WORD;
2003 start = skipspace(start);
2005 if (isIDFIRST_lazy_if(s,UTF) ||
2006 (allow_pack && *s == ':') )
2008 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2009 if (check_keyword) {
2010 char *s2 = PL_tokenbuf;
2012 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2014 if (keyword(s2, len2, 0))
2017 if (token == METHOD) {
2022 PL_expect = XOPERATOR;
2025 NEXTVAL_NEXTTOKE.opval
2026 = (OP*)newSVOP(OP_CONST,0,
2027 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2028 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2036 * Called when the lexer wants $foo *foo &foo etc, but the program
2037 * text only contains the "foo" portion. The first argument is a pointer
2038 * to the "foo", and the second argument is the type symbol to prefix.
2039 * Forces the next token to be a "WORD".
2040 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2044 S_force_ident(pTHX_ const char *s, int kind)
2046 PERL_ARGS_ASSERT_FORCE_IDENT;
2049 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2050 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2051 UTF ? SVf_UTF8 : 0));
2052 NEXTVAL_NEXTTOKE.opval = o;
2055 o->op_private = OPpCONST_ENTERED;
2056 /* XXX see note in pp_entereval() for why we forgo typo
2057 warnings if the symbol must be introduced in an eval.
2059 gv_fetchpvn_flags(s, len,
2060 (PL_in_eval ? GV_ADDMULTI
2061 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2062 kind == '$' ? SVt_PV :
2063 kind == '@' ? SVt_PVAV :
2064 kind == '%' ? SVt_PVHV :
2072 S_force_ident_maybe_lex(pTHX_ char pit)
2074 NEXTVAL_NEXTTOKE.ival = pit;
2079 Perl_str_to_version(pTHX_ SV *sv)
2084 const char *start = SvPV_const(sv,len);
2085 const char * const end = start + len;
2086 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2088 PERL_ARGS_ASSERT_STR_TO_VERSION;
2090 while (start < end) {
2094 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2099 retval += ((NV)n)/nshift;
2108 * Forces the next token to be a version number.
2109 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2110 * and if "guessing" is TRUE, then no new token is created (and the caller
2111 * must use an alternative parsing method).
2115 S_force_version(pTHX_ char *s, int guessing)
2120 PERL_ARGS_ASSERT_FORCE_VERSION;
2128 while (isDIGIT(*d) || *d == '_' || *d == '.')
2130 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2132 s = scan_num(s, &pl_yylval);
2133 version = pl_yylval.opval;
2134 ver = cSVOPx(version)->op_sv;
2135 if (SvPOK(ver) && !SvNIOK(ver)) {
2136 SvUPGRADE(ver, SVt_PVNV);
2137 SvNV_set(ver, str_to_version(ver));
2138 SvNOK_on(ver); /* hint that it is a version */
2141 else if (guessing) {
2146 /* NOTE: The parser sees the package name and the VERSION swapped */
2147 NEXTVAL_NEXTTOKE.opval = version;
2154 * S_force_strict_version
2155 * Forces the next token to be a version number using strict syntax rules.
2159 S_force_strict_version(pTHX_ char *s)
2162 const char *errstr = NULL;
2164 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2166 while (isSPACE(*s)) /* leading whitespace */
2169 if (is_STRICT_VERSION(s,&errstr)) {
2171 s = (char *)scan_version(s, ver, 0);
2172 version = newSVOP(OP_CONST, 0, ver);
2174 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2175 (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2179 yyerror(errstr); /* version required */
2183 /* NOTE: The parser sees the package name and the VERSION swapped */
2184 NEXTVAL_NEXTTOKE.opval = version;
2192 * Tokenize a quoted string passed in as an SV. It finds the next
2193 * chunk, up to end of string or a backslash. It may make a new
2194 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2199 S_tokeq(pTHX_ SV *sv)
2206 PERL_ARGS_ASSERT_TOKEQ;
2210 assert (!SvIsCOW(sv));
2211 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2215 /* This is relying on the SV being "well formed" with a trailing '\0' */
2216 while (s < send && !(*s == '\\' && s[1] == '\\'))
2221 if ( PL_hints & HINT_NEW_STRING ) {
2222 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2223 SVs_TEMP | SvUTF8(sv));
2227 if (s + 1 < send && (s[1] == '\\'))
2228 s++; /* all that, just for this */
2233 SvCUR_set(sv, d - SvPVX_const(sv));
2235 if ( PL_hints & HINT_NEW_STRING )
2236 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2241 * Now come three functions related to double-quote context,
2242 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2243 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2244 * interact with PL_lex_state, and create fake ( ... ) argument lists
2245 * to handle functions and concatenation.
2249 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2254 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2256 * Pattern matching will set PL_lex_op to the pattern-matching op to
2257 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2259 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2261 * Everything else becomes a FUNC.
2263 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2264 * had an OP_CONST or OP_READLINE). This just sets us up for a
2265 * call to S_sublex_push().
2269 S_sublex_start(pTHX)
2271 const I32 op_type = pl_yylval.ival;
2273 if (op_type == OP_NULL) {
2274 pl_yylval.opval = PL_lex_op;
2278 if (op_type == OP_CONST) {
2279 SV *sv = tokeq(PL_lex_stuff);
2281 if (SvTYPE(sv) == SVt_PVIV) {
2282 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2284 const char * const p = SvPV_const(sv, len);
2285 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2289 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2290 PL_lex_stuff = NULL;
2294 PL_sublex_info.super_state = PL_lex_state;
2295 PL_sublex_info.sub_inwhat = (U16)op_type;
2296 PL_sublex_info.sub_op = PL_lex_op;
2297 PL_lex_state = LEX_INTERPPUSH;
2301 pl_yylval.opval = PL_lex_op;
2311 * Create a new scope to save the lexing state. The scope will be
2312 * ended in S_sublex_done. Returns a '(', starting the function arguments
2313 * to the uc, lc, etc. found before.
2314 * Sets PL_lex_state to LEX_INTERPCONCAT.
2321 const bool is_heredoc = PL_multi_close == '<';
2324 PL_lex_state = PL_sublex_info.super_state;
2325 SAVEI8(PL_lex_dojoin);
2326 SAVEI32(PL_lex_brackets);
2327 SAVEI32(PL_lex_allbrackets);
2328 SAVEI32(PL_lex_formbrack);
2329 SAVEI8(PL_lex_fakeeof);
2330 SAVEI32(PL_lex_casemods);
2331 SAVEI32(PL_lex_starts);
2332 SAVEI8(PL_lex_state);
2333 SAVESPTR(PL_lex_repl);
2334 SAVEVPTR(PL_lex_inpat);
2335 SAVEI16(PL_lex_inwhat);
2338 SAVECOPLINE(PL_curcop);
2339 SAVEI32(PL_multi_end);
2340 SAVEI32(PL_parser->herelines);
2341 PL_parser->herelines = 0;
2343 SAVEI8(PL_multi_close);
2344 SAVEPPTR(PL_bufptr);
2345 SAVEPPTR(PL_bufend);
2346 SAVEPPTR(PL_oldbufptr);
2347 SAVEPPTR(PL_oldoldbufptr);
2348 SAVEPPTR(PL_last_lop);
2349 SAVEPPTR(PL_last_uni);
2350 SAVEPPTR(PL_linestart);
2351 SAVESPTR(PL_linestr);
2352 SAVEGENERICPV(PL_lex_brackstack);
2353 SAVEGENERICPV(PL_lex_casestack);
2354 SAVEGENERICPV(PL_parser->lex_shared);
2355 SAVEBOOL(PL_parser->lex_re_reparsing);
2356 SAVEI32(PL_copline);
2358 /* The here-doc parser needs to be able to peek into outer lexing
2359 scopes to find the body of the here-doc. So we put PL_linestr and
2360 PL_bufptr into lex_shared, to ‘share’ those values.
2362 PL_parser->lex_shared->ls_linestr = PL_linestr;
2363 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2365 PL_linestr = PL_lex_stuff;
2366 PL_lex_repl = PL_sublex_info.repl;
2367 PL_lex_stuff = NULL;
2368 PL_sublex_info.repl = NULL;
2370 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2371 = SvPVX(PL_linestr);
2372 PL_bufend += SvCUR(PL_linestr);
2373 PL_last_lop = PL_last_uni = NULL;
2374 SAVEFREESV(PL_linestr);
2375 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2377 PL_lex_dojoin = FALSE;
2378 PL_lex_brackets = PL_lex_formbrack = 0;
2379 PL_lex_allbrackets = 0;
2380 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2381 Newx(PL_lex_brackstack, 120, char);
2382 Newx(PL_lex_casestack, 12, char);
2383 PL_lex_casemods = 0;
2384 *PL_lex_casestack = '\0';
2386 PL_lex_state = LEX_INTERPCONCAT;
2388 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2389 PL_copline = NOLINE;
2391 Newxz(shared, 1, LEXSHARED);
2392 shared->ls_prev = PL_parser->lex_shared;
2393 PL_parser->lex_shared = shared;
2395 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2396 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2397 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2398 PL_lex_inpat = PL_sublex_info.sub_op;
2400 PL_lex_inpat = NULL;
2402 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2403 PL_in_eval &= ~EVAL_RE_REPARSING;
2410 * Restores lexer state after a S_sublex_push.
2416 if (!PL_lex_starts++) {
2417 SV * const sv = newSVpvs("");
2418 if (SvUTF8(PL_linestr))
2420 PL_expect = XOPERATOR;
2421 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2425 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2426 PL_lex_state = LEX_INTERPCASEMOD;
2430 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2431 assert(PL_lex_inwhat != OP_TRANSR);
2433 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2434 PL_linestr = PL_lex_repl;
2436 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2437 PL_bufend += SvCUR(PL_linestr);
2438 PL_last_lop = PL_last_uni = NULL;
2439 PL_lex_dojoin = FALSE;
2440 PL_lex_brackets = 0;
2441 PL_lex_allbrackets = 0;
2442 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2443 PL_lex_casemods = 0;
2444 *PL_lex_casestack = '\0';
2446 if (SvEVALED(PL_lex_repl)) {
2447 PL_lex_state = LEX_INTERPNORMAL;
2449 /* we don't clear PL_lex_repl here, so that we can check later
2450 whether this is an evalled subst; that means we rely on the
2451 logic to ensure sublex_done() is called again only via the
2452 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2455 PL_lex_state = LEX_INTERPCONCAT;
2458 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2459 CopLINE(PL_curcop) +=
2460 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2461 + PL_parser->herelines;
2462 PL_parser->herelines = 0;
2467 const line_t l = CopLINE(PL_curcop);
2469 if (PL_multi_close == '<')
2470 PL_parser->herelines += l - PL_multi_end;
2471 PL_bufend = SvPVX(PL_linestr);
2472 PL_bufend += SvCUR(PL_linestr);
2473 PL_expect = XOPERATOR;
2474 PL_sublex_info.sub_inwhat = 0;
2479 PERL_STATIC_INLINE SV*
2480 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2482 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2483 * interior, hence to the "}". Finds what the name resolves to, returning
2484 * an SV* containing it; NULL if no valid one found */
2486 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2493 const U8* first_bad_char_loc;
2494 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2496 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2501 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2503 &first_bad_char_loc))
2505 /* If warnings are on, this will print a more detailed analysis of what
2506 * is wrong than the error message below */
2507 utf8n_to_uvchr(first_bad_char_loc,
2508 e - ((char *) first_bad_char_loc),
2511 /* We deliberately don't try to print the malformed character, which
2512 * might not print very well; it also may be just the first of many
2513 * malformations, so don't print what comes after it */
2514 yyerror(Perl_form(aTHX_
2515 "Malformed UTF-8 character immediately after '%.*s'",
2516 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2520 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2521 /* include the <}> */
2522 e - backslash_ptr + 1);
2524 SvREFCNT_dec_NN(res);
2528 /* See if the charnames handler is the Perl core's, and if so, we can skip
2529 * the validation needed for a user-supplied one, as Perl's does its own
2531 table = GvHV(PL_hintgv); /* ^H */
2532 cvp = hv_fetchs(table, "charnames", FALSE);
2533 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2534 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2536 const char * const name = HvNAME(stash);
2537 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2538 && strEQ(name, "_charnames")) {
2543 /* Here, it isn't Perl's charname handler. We can't rely on a
2544 * user-supplied handler to validate the input name. For non-ut8 input,
2545 * look to see that the first character is legal. Then loop through the
2546 * rest checking that each is a continuation */
2548 /* This code makes the reasonable assumption that the only Latin1-range
2549 * characters that begin a character name alias are alphabetic, otherwise
2550 * would have to create a isCHARNAME_BEGIN macro */
2553 if (! isALPHAU(*s)) {
2558 if (! isCHARNAME_CONT(*s)) {
2561 if (*s == ' ' && *(s-1) == ' ') {
2564 if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2565 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2566 "NO-BREAK SPACE in a charnames "
2567 "alias definition is deprecated");
2573 /* Similarly for utf8. For invariants can check directly; for other
2574 * Latin1, can calculate their code point and check; otherwise use a
2576 if (UTF8_IS_INVARIANT(*s)) {
2577 if (! isALPHAU(*s)) {
2581 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2582 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2588 if (! PL_utf8_charname_begin) {
2589 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2590 PL_utf8_charname_begin = _core_swash_init("utf8",
2591 "_Perl_Charname_Begin",
2593 1, 0, NULL, &flags);
2595 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2602 if (UTF8_IS_INVARIANT(*s)) {
2603 if (! isCHARNAME_CONT(*s)) {
2606 if (*s == ' ' && *(s-1) == ' ') {
2611 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2612 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2616 if (*s == *NBSP_UTF8
2617 && *(s+1) == *(NBSP_UTF8+1)
2618 && ckWARN_d(WARN_DEPRECATED))
2620 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2621 "NO-BREAK SPACE in a charnames "
2622 "alias definition is deprecated");
2627 if (! PL_utf8_charname_continue) {
2628 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2629 PL_utf8_charname_continue = _core_swash_init("utf8",
2630 "_Perl_Charname_Continue",
2632 1, 0, NULL, &flags);
2634 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2641 if (*(s-1) == ' ') {
2644 "charnames alias definitions may not contain trailing "
2645 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2646 (int)(s - backslash_ptr + 1), backslash_ptr,
2647 (int)(e - s + 1), s + 1
2649 UTF ? SVf_UTF8 : 0);
2653 if (SvUTF8(res)) { /* Don't accept malformed input */
2654 const U8* first_bad_char_loc;
2656 const char* const str = SvPV_const(res, len);
2657 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2658 /* If warnings are on, this will print a more detailed analysis of
2659 * what is wrong than the error message below */
2660 utf8n_to_uvchr(first_bad_char_loc,
2661 (char *) first_bad_char_loc - str,
2664 /* We deliberately don't try to print the malformed character,
2665 * which might not print very well; it also may be just the first
2666 * of many malformations, so don't print what comes after it */
2669 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2670 (int) (e - backslash_ptr + 1), backslash_ptr,
2671 (int) ((char *) first_bad_char_loc - str), str
2682 /* The final %.*s makes sure that should the trailing NUL be missing
2683 * that this print won't run off the end of the string */
2686 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2687 (int)(s - backslash_ptr + 1), backslash_ptr,
2688 (int)(e - s + 1), s + 1
2690 UTF ? SVf_UTF8 : 0);
2697 "charnames alias definitions may not contain a sequence of "
2698 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2699 (int)(s - backslash_ptr + 1), backslash_ptr,
2700 (int)(e - s + 1), s + 1
2702 UTF ? SVf_UTF8 : 0);
2709 Extracts the next constant part of a pattern, double-quoted string,
2710 or transliteration. This is terrifying code.
2712 For example, in parsing the double-quoted string "ab\x63$d", it would
2713 stop at the '$' and return an OP_CONST containing 'abc'.
2715 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2716 processing a pattern (PL_lex_inpat is true), a transliteration
2717 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2719 Returns a pointer to the character scanned up to. If this is
2720 advanced from the start pointer supplied (i.e. if anything was
2721 successfully parsed), will leave an OP_CONST for the substring scanned
2722 in pl_yylval. Caller must intuit reason for not parsing further
2723 by looking at the next characters herself.
2727 \N{FOO} => \N{U+hex_for_character_FOO}
2728 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2731 all other \-char, including \N and \N{ apart from \N{ABC}
2734 @ and $ where it appears to be a var, but not for $ as tail anchor
2739 In transliterations:
2740 characters are VERY literal, except for - not at the start or end
2741 of the string, which indicates a range. If the range is in bytes,
2742 scan_const expands the range to the full set of intermediate
2743 characters. If the range is in utf8, the hyphen is replaced with
2744 a certain range mark which will be handled by pmtrans() in op.c.
2746 In double-quoted strings:
2748 double-quoted style: \r and \n
2749 constants: \x31, etc.
2750 deprecated backrefs: \1 (in substitution replacements)
2751 case and quoting: \U \Q \E
2754 scan_const does *not* construct ops to handle interpolated strings.
2755 It stops processing as soon as it finds an embedded $ or @ variable
2756 and leaves it to the caller to work out what's going on.
2758 embedded arrays (whether in pattern or not) could be:
2759 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2761 $ in double-quoted strings must be the symbol of an embedded scalar.
2763 $ in pattern could be $foo or could be tail anchor. Assumption:
2764 it's a tail anchor if $ is the last thing in the string, or if it's
2765 followed by one of "()| \r\n\t"
2767 \1 (backreferences) are turned into $1 in substitutions
2769 The structure of the code is
2770 while (there's a character to process) {
2771 handle transliteration ranges
2772 skip regexp comments /(?#comment)/ and codes /(?{code})/
2773 skip #-initiated comments in //x patterns
2774 check for embedded arrays
2775 check for embedded scalars
2777 deprecate \1 in substitution replacements
2778 handle string-changing backslashes \l \U \Q \E, etc.
2779 switch (what was escaped) {
2780 handle \- in a transliteration (becomes a literal -)
2781 if a pattern and not \N{, go treat as regular character
2782 handle \132 (octal characters)
2783 handle \x15 and \x{1234} (hex characters)
2784 handle \N{name} (named characters, also \N{3,5} in a pattern)
2785 handle \cV (control characters)
2786 handle printf-style backslashes (\f, \r, \n, etc)
2789 } (end if backslash)
2790 handle regular character
2791 } (end while character to read)
2796 S_scan_const(pTHX_ char *start)
2798 char *send = PL_bufend; /* end of the constant */
2799 SV *sv = newSV(send - start); /* sv for the constant. See note below
2801 char *s = start; /* start of the constant */
2802 char *d = SvPVX(sv); /* destination for copies */
2803 bool dorange = FALSE; /* are we in a translit range? */
2804 bool didrange = FALSE; /* did we just finish a range? */
2805 bool in_charclass = FALSE; /* within /[...]/ */
2806 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2807 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2808 UTF8? But, this can show as true
2809 when the source isn't utf8, as for
2810 example when it is entirely composed
2812 SV *res; /* result from charnames */
2814 /* Note on sizing: The scanned constant is placed into sv, which is
2815 * initialized by newSV() assuming one byte of output for every byte of
2816 * input. This routine expects newSV() to allocate an extra byte for a
2817 * trailing NUL, which this routine will append if it gets to the end of
2818 * the input. There may be more bytes of input than output (eg., \N{LATIN
2819 * CAPITAL LETTER A}), or more output than input if the constant ends up
2820 * recoded to utf8, but each time a construct is found that might increase
2821 * the needed size, SvGROW() is called. Its size parameter each time is
2822 * based on the best guess estimate at the time, namely the length used so
2823 * far, plus the length the current construct will occupy, plus room for
2824 * the trailing NUL, plus one byte for every input byte still unscanned */
2826 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2829 UV literal_endpoint = 0;
2830 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2833 PERL_ARGS_ASSERT_SCAN_CONST;
2835 assert(PL_lex_inwhat != OP_TRANSR);
2836 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2837 /* If we are doing a trans and we know we want UTF8 set expectation */
2838 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2839 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2842 /* Protect sv from errors and fatal warnings. */
2843 ENTER_with_name("scan_const");
2846 while (s < send || dorange) {
2848 /* get transliterations out of the way (they're most literal) */
2849 if (PL_lex_inwhat == OP_TRANS) {
2850 /* expand a range A-Z to the full set of characters. AIE! */
2852 I32 i; /* current expanded character */
2853 I32 min; /* first character in range */
2854 I32 max; /* last character in range */
2865 char * const c = (char*)utf8_hop((U8*)d, -1);
2869 *c = (char) ILLEGAL_UTF8_BYTE;
2870 /* mark the range as done, and continue */
2876 i = d - SvPVX_const(sv); /* remember current offset */
2879 SvLEN(sv) + ((has_utf8)
2880 ? (512 - UTF_CONTINUATION_MARK
2883 /* How many two-byte within 0..255: 128 in UTF-8,
2884 * 96 in UTF-8-mod. */
2886 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2888 d = SvPVX(sv) + i; /* refresh d after realloc */
2892 for (j = 0; j <= 1; j++) {
2893 char * const c = (char*)utf8_hop((U8*)d, -1);
2894 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2900 max = (U8)0xff; /* only to \xff */
2901 uvmax = uv; /* \x{100} to uvmax */
2903 d = c; /* eat endpoint chars */
2908 d -= 2; /* eat the first char and the - */
2909 min = (U8)*d; /* first char in range */
2910 max = (U8)d[1]; /* last char in range */
2917 "Invalid range \"%c-%c\" in transliteration operator",
2918 (char)min, (char)max);
2922 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
2923 * any subsets of these ranges into individual characters */
2924 if (literal_endpoint == 2 &&
2925 ((isLOWER_A(min) && isLOWER_A(max)) ||
2926 (isUPPER_A(min) && isUPPER_A(max))))
2928 for (i = min; i <= max; i++) {
2935 for (i = min; i <= max; i++)
2938 append_utf8_from_native_byte(i, &d);
2946 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2948 *d++ = (char) ILLEGAL_UTF8_BYTE;
2950 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2954 /* mark the range as done, and continue */
2958 literal_endpoint = 0;
2963 /* range begins (ignore - as first or last char) */
2964 else if (*s == '-' && s+1 < send && s != start) {
2966 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2973 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
2983 literal_endpoint = 0;
2984 native_range = TRUE;
2989 /* if we get here, we're not doing a transliteration */
2991 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2994 while (s1 >= start && *s1-- == '\\')
2997 in_charclass = TRUE;
3000 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3003 while (s1 >= start && *s1-- == '\\')
3006 in_charclass = FALSE;
3009 /* skip for regexp comments /(?#comment)/, except for the last
3010 * char, which will be done separately.
3011 * Stop on (?{..}) and friends */
3013 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3015 while (s+1 < send && *s != ')')
3018 else if (!PL_lex_casemods &&
3019 ( s[2] == '{' /* This should match regcomp.c */
3020 || (s[2] == '?' && s[3] == '{')))
3026 /* likewise skip #-initiated comments in //x patterns */
3027 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3028 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3029 while (s+1 < send && *s != '\n')
3033 /* no further processing of single-quoted regex */
3034 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3035 goto default_action;
3037 /* check for embedded arrays
3038 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3040 else if (*s == '@' && s[1]) {
3041 if (isWORDCHAR_lazy_if(s+1,UTF))
3043 if (strchr(":'{$", s[1]))
3045 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3046 break; /* in regexp, neither @+ nor @- are interpolated */
3049 /* check for embedded scalars. only stop if we're sure it's a
3052 else if (*s == '$') {
3053 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3055 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3057 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3058 "Possible unintended interpolation of $\\ in regex");
3060 break; /* in regexp, $ might be tail anchor */
3064 /* End of else if chain - OP_TRANS rejoin rest */
3067 if (*s == '\\' && s+1 < send) {
3068 char* e; /* Can be used for ending '}', etc. */
3072 /* warn on \1 - \9 in substitution replacements, but note that \11
3073 * is an octal; and \19 is \1 followed by '9' */
3074 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3075 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3077 /* diag_listed_as: \%d better written as $%d */
3078 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3083 /* string-change backslash escapes */
3084 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3088 /* In a pattern, process \N, but skip any other backslash escapes.
3089 * This is because we don't want to translate an escape sequence
3090 * into a meta symbol and have the regex compiler use the meta
3091 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3092 * in spite of this, we do have to process \N here while the proper
3093 * charnames handler is in scope. See bugs #56444 and #62056.
3095 * There is a complication because \N in a pattern may also stand
3096 * for 'match a non-nl', and not mean a charname, in which case its
3097 * processing should be deferred to the regex compiler. To be a
3098 * charname it must be followed immediately by a '{', and not look
3099 * like \N followed by a curly quantifier, i.e., not something like
3100 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3102 else if (PL_lex_inpat
3105 || regcurly(s + 1)))
3108 goto default_action;
3113 /* quoted - in transliterations */
3115 if (PL_lex_inwhat == OP_TRANS) {
3122 if ((isALPHANUMERIC(*s)))
3123 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3124 "Unrecognized escape \\%c passed through",
3126 /* default action is to copy the quoted character */
3127 goto default_action;
3130 /* eg. \132 indicates the octal constant 0132 */
3131 case '0': case '1': case '2': case '3':
3132 case '4': case '5': case '6': case '7':
3134 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3136 uv = grok_oct(s, &len, &flags, NULL);
3138 if (len < 3 && s < send && isDIGIT(*s)
3139 && ckWARN(WARN_MISC))
3141 Perl_warner(aTHX_ packWARN(WARN_MISC),
3142 "%s", form_short_octal_warning(s, len));
3145 goto NUM_ESCAPE_INSERT;
3147 /* eg. \o{24} indicates the octal constant \024 */
3152 bool valid = grok_bslash_o(&s, &uv, &error,
3153 TRUE, /* Output warning */
3154 FALSE, /* Not strict */
3155 TRUE, /* Output warnings for
3162 goto NUM_ESCAPE_INSERT;
3165 /* eg. \x24 indicates the hex constant 0x24 */
3170 bool valid = grok_bslash_x(&s, &uv, &error,
3171 TRUE, /* Output warning */
3172 FALSE, /* Not strict */
3173 TRUE, /* Output warnings for
3183 /* Insert oct or hex escaped character. There will always be
3184 * enough room in sv since such escapes will be longer than any
3185 * UTF-8 sequence they can end up as, except if they force us
3186 * to recode the rest of the string into utf8 */
3188 /* Here uv is the ordinal of the next character being added */
3189 if (!UVCHR_IS_INVARIANT(uv)) {
3190 if (!has_utf8 && uv > 255) {
3191 /* Might need to recode whatever we have accumulated so
3192 * far if it contains any chars variant in utf8 or
3195 SvCUR_set(sv, d - SvPVX_const(sv));
3198 /* See Note on sizing above. */
3199 sv_utf8_upgrade_flags_grow(
3201 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3202 /* Above-latin1 in string
3203 * implies no encoding */
3204 |SV_UTF8_NO_ENCODING,
3205 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3206 d = SvPVX(sv) + SvCUR(sv);
3211 d = (char*)uvchr_to_utf8((U8*)d, uv);
3212 if (PL_lex_inwhat == OP_TRANS &&
3213 PL_sublex_info.sub_op) {
3214 PL_sublex_info.sub_op->op_private |=
3215 (PL_lex_repl ? OPpTRANS_FROM_UTF
3219 if (uv > 255 && !dorange)
3220 native_range = FALSE;
3233 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3234 * named character, like \N{LATIN SMALL LETTER A}, or a named
3235 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3236 * GRAVE}. For convenience all three forms are referred to as
3237 * "named characters" below.
3239 * For patterns, \N also can mean to match a non-newline. Code
3240 * before this 'switch' statement should already have handled
3241 * this situation, and hence this code only has to deal with
3242 * the named character cases.
3244 * For non-patterns, the named characters are converted to
3245 * their string equivalents. In patterns, named characters are
3246 * not converted to their ultimate forms for the same reasons
3247 * that other escapes aren't. Instead, they are converted to
3248 * the \N{U+...} form to get the value from the charnames that
3249 * is in effect right now, while preserving the fact that it
3250 * was a named character, so that the regex compiler knows
3253 * The structure of this section of code (besides checking for
3254 * errors and upgrading to utf8) is:
3255 * If the named character is of the form \N{U+...}, pass it
3256 * through if a pattern; otherwise convert the code point
3258 * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
3259 * if a pattern; otherwise convert to utf8
3261 * If the regex compiler should ever need to differentiate
3262 * between the \N{U+...} and \N{name} forms, that could easily
3263 * be done here by stripping any leading zeros from the
3264 * \N{U+...} case, and adding them to the other one. */
3266 /* Here, 's' points to the 'N'; the test below is guaranteed to
3267 * succeed if we are being called on a pattern, as we already
3268 * know from a test above that the next character is a '{'. A
3269 * non-pattern \N must mean 'named character', which requires
3273 yyerror("Missing braces on \\N{}");
3278 /* If there is no matching '}', it is an error. */
3279 if (! (e = strchr(s, '}'))) {
3280 if (! PL_lex_inpat) {
3281 yyerror("Missing right brace on \\N{}");
3283 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3288 /* Here it looks like a named character */
3290 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3291 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3292 | PERL_SCAN_SILENT_ILLDIGIT
3293 | PERL_SCAN_DISALLOW_PREFIX;
3296 s += 2; /* Skip to next char after the 'U+' */
3298 uv = grok_hex(s, &len, &flags, NULL);
3300 || ( len != (STRLEN)(e - s) && s[len] != '.'
3304 yyerror("Invalid hexadecimal number in \\N{U+...}");
3311 s -= 5; /* Include the '\N{U+' */
3312 /* On EBCDIC platforms, in \N{U+...}, the '...' is a
3313 * Unicode value, so convert to native so downstream
3314 * code can continue to assume it's native */
3315 /* XXX This should be in the regexp parser,
3316 because doing it here makes /\N{U+41}/ and
3317 =~ '\N{U+41}' do different things. */
3318 d += my_snprintf(d, e - s + 1 + 1, /* includes the '}'
3321 (unsigned int) UNI_TO_NATIVE(uv));
3326 uv = grok_hex(s, &len, &flags, NULL);
3328 || (len != (STRLEN)(e - s) && s[len] != '.'))
3332 d, e - s + 1 + 1, ".%X",
3333 (unsigned int)UNI_TO_NATIVE(uv)
3339 /* On non-EBCDIC platforms, pass it through unchanged.
3340 * The reason we evaluate the numbers is to make
3341 * sure there wasn't a syntax error. */
3342 const char * const orig_s = s - 5;
3346 uv = grok_hex(s, &len, &flags, NULL);
3348 || (len != (STRLEN)(e - s) && s[len] != '.'))
3351 /* +1 is for the '}' */
3352 Copy(orig_s, d, e - orig_s + 1, char);
3353 d += e - orig_s + 1;
3356 else { /* Not a pattern: convert the hex to string */
3358 /* If the destination is not in utf8, unconditionally
3359 * recode it to be so. This is because \N{} implies
3360 * Unicode semantics, and scalars have to be in utf8
3361 * to guarantee those semantics */
3363 SvCUR_set(sv, d - SvPVX_const(sv));
3366 /* See Note on sizing above. */
3367 sv_utf8_upgrade_flags_grow(
3369 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3370 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3371 d = SvPVX(sv) + SvCUR(sv);
3375 /* Add the (Unicode) code point to the output. */
3376 if (UNI_IS_INVARIANT(uv)) {
3377 *d++ = (char) LATIN1_TO_NATIVE(uv);
3380 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3384 else /* Here is \N{NAME} but not \N{U+...}. */
3385 if ((res = get_and_check_backslash_N_name(s, e)))
3388 const char *str = SvPV_const(res, len);
3391 if (! len) { /* The name resolved to an empty string */
3392 Copy("\\N{}", d, 4, char);
3396 /* In order to not lose information for the regex
3397 * compiler, pass the result in the specially made
3398 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3399 * the code points in hex of each character
3400 * returned by charnames */
3402 const char *str_end = str + len;
3403 const STRLEN off = d - SvPVX_const(sv);
3405 if (! SvUTF8(res)) {
3406 /* For the non-UTF-8 case, we can determine the
3407 * exact length needed without having to parse
3408 * through the string. Each character takes up
3409 * 2 hex digits plus either a trailing dot or
3411 const char initial_text[] = "\\N{U+";
3412 const STRLEN initial_len = sizeof(initial_text)
3414 d = off + SvGROW(sv, off
3417 /* +1 for trailing NUL */
3420 + (STRLEN)(send - e));
3421 Copy(initial_text, d, initial_len, char);
3423 while (str < str_end) {
3426 my_snprintf(hex_string,
3428 "%02X.", (U8) *str);
3429 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
3430 Copy(hex_string, d, 3, char);
3434 d--; /* Below, we will overwrite the final
3435 dot with a right brace */
3438 STRLEN char_length; /* cur char's byte length */
3440 /* and the number of bytes after this is
3441 * translated into hex digits */
3442 STRLEN output_length;
3444 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3445 * for max('U+', '.'); and 1 for NUL */
3446 char hex_string[2 * UTF8_MAXBYTES + 5];
3448 /* Get the first character of the result. */
3449 U32 uv = utf8n_to_uvchr((U8 *) str,
3453 /* Convert first code point to hex, including
3454 * the boiler plate before it. */
3456 my_snprintf(hex_string, sizeof(hex_string),
3460 /* Make sure there is enough space to hold it */
3461 d = off + SvGROW(sv, off
3463 + (STRLEN)(send - e)
3464 + 2); /* '}' + NUL */
3466 Copy(hex_string, d, output_length, char);
3469 /* For each subsequent character, append dot and
3470 * its ordinal in hex */
3471 while ((str += char_length) < str_end) {
3472 const STRLEN off = d - SvPVX_const(sv);
3473 U32 uv = utf8n_to_uvchr((U8 *) str,
3478 my_snprintf(hex_string,
3483 d = off + SvGROW(sv, off
3485 + (STRLEN)(send - e)
3486 + 2); /* '}' + NUL */
3487 Copy(hex_string, d, output_length, char);
3492 *d++ = '}'; /* Done. Add the trailing brace */
3495 else { /* Here, not in a pattern. Convert the name to a
3498 /* If destination is not in utf8, unconditionally
3499 * recode it to be so. This is because \N{} implies
3500 * Unicode semantics, and scalars have to be in utf8
3501 * to guarantee those semantics */
3503 SvCUR_set(sv, d - SvPVX_const(sv));
3506 /* See Note on sizing above. */
3507 sv_utf8_upgrade_flags_grow(sv,
3508 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3509 len + (STRLEN)(send - s) + 1);
3510 d = SvPVX(sv) + SvCUR(sv);
3512 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3514 /* See Note on sizing above. (NOTE: SvCUR() is not
3515 * set correctly here). */
3516 const STRLEN off = d - SvPVX_const(sv);
3517 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3519 if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
3520 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3521 str = SvPV_const(res, len);
3523 Copy(str, d, len, char);
3529 } /* End \N{NAME} */
3532 native_range = FALSE; /* \N{} is defined to be Unicode */
3534 s = e + 1; /* Point to just after the '}' */
3537 /* \c is a control character */
3541 *d++ = grok_bslash_c(*s++, 1);
3544 yyerror("Missing control char name in \\c");
3548 /* printf-style backslashes, formfeeds, newlines, etc */
3574 } /* end if (backslash) */
3581 /* If we started with encoded form, or already know we want it,
3582 then encode the next character */
3583 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3587 /* One might think that it is wasted effort in the case of the
3588 * source being utf8 (this_utf8 == TRUE) to take the next character
3589 * in the source, convert it to an unsigned value, and then convert
3590 * it back again. But the source has not been validated here. The
3591 * routine that does the conversion checks for errors like
3594 const UV nextuv = (this_utf8)
3595 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3597 const STRLEN need = UNISKIP(nextuv);
3599 SvCUR_set(sv, d - SvPVX_const(sv));
3602 /* See Note on sizing above. */
3603 sv_utf8_upgrade_flags_grow(sv,
3604 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3605 need + (STRLEN)(send - s) + 1);
3606 d = SvPVX(sv) + SvCUR(sv);
3608 } else if (need > len) {
3609 /* encoded value larger than old, may need extra space (NOTE:
3610 * SvCUR() is not set correctly here). See Note on sizing
3612 const STRLEN off = d - SvPVX_const(sv);
3613 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3617 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3619 if (uv > 255 && !dorange)
3620 native_range = FALSE;
3626 } /* while loop to process each character */
3628 /* terminate the string and set up the sv */
3630 SvCUR_set(sv, d - SvPVX_const(sv));
3631 if (SvCUR(sv) >= SvLEN(sv))
3632 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3633 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3636 if (IN_ENCODING && !has_utf8) {
3637 sv_recode_to_utf8(sv, _get_encoding());
3643 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3644 PL_sublex_info.sub_op->op_private |=
3645 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3649 /* shrink the sv if we allocated more than we used */
3650 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3651 SvPV_shrink_to_cur(sv);
3654 /* return the substring (via pl_yylval) only if we parsed anything */
3657 for (; s2 < s; s2++) {
3659 COPLINE_INC_WITH_HERELINES;
3661 SvREFCNT_inc_simple_void_NN(sv);
3662 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3663 && ! PL_parser->lex_re_reparsing)
3665 const char *const key = PL_lex_inpat ? "qr" : "q";
3666 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3670 if (PL_lex_inwhat == OP_TRANS) {
3673 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3676 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3684 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3687 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3689 LEAVE_with_name("scan_const");
3694 * Returns TRUE if there's more to the expression (e.g., a subscript),
3697 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3699 * ->[ and ->{ return TRUE
3700 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3701 * { and [ outside a pattern are always subscripts, so return TRUE
3702 * if we're outside a pattern and it's not { or [, then return FALSE
3703 * if we're in a pattern and the first char is a {
3704 * {4,5} (any digits around the comma) returns FALSE
3705 * if we're in a pattern and the first char is a [
3707 * [SOMETHING] has a funky algorithm to decide whether it's a
3708 * character class or not. It has to deal with things like
3709 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3710 * anything else returns TRUE
3713 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3716 S_intuit_more(pTHX_ char *s)
3718 PERL_ARGS_ASSERT_INTUIT_MORE;
3720 if (PL_lex_brackets)
3722 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3724 if (*s == '-' && s[1] == '>'
3725 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3726 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3727 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3729 if (*s != '{' && *s != '[')
3734 /* In a pattern, so maybe we have {n,m}. */
3742 /* On the other hand, maybe we have a character class */
3745 if (*s == ']' || *s == '^')
3748 /* this is terrifying, and it works */
3751 const char * const send = strchr(s,']');
3752 unsigned char un_char, last_un_char;
3753 char tmpbuf[sizeof PL_tokenbuf * 4];
3755 if (!send) /* has to be an expression */
3757 weight = 2; /* let's weigh the evidence */
3761 else if (isDIGIT(*s)) {
3763 if (isDIGIT(s[1]) && s[2] == ']')
3769 Zero(seen,256,char);
3771 for (; s < send; s++) {
3772 last_un_char = un_char;
3773 un_char = (unsigned char)*s;
3778 weight -= seen[un_char] * 10;
3779 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3781 char *tmp = PL_bufend;
3782 PL_bufend = (char*)send;
3783 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3785 len = (int)strlen(tmpbuf);
3786 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3787 UTF ? SVf_UTF8 : 0, SVt_PV))
3792 else if (*s == '$' && s[1] &&
3793 strchr("[#!%*<>()-=",s[1])) {
3794 if (/*{*/ strchr("])} =",s[2]))
3803 if (strchr("wds]",s[1]))
3805 else if (seen[(U8)'\''] || seen[(U8)'"'])
3807 else if (strchr("rnftbxcav",s[1]))
3809 else if (isDIGIT(s[1])) {
3811 while (s[1] && isDIGIT(s[1]))
3821 if (strchr("aA01! ",last_un_char))
3823 if (strchr("zZ79~",s[1]))
3825 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3826 weight -= 5; /* cope with negative subscript */
3829 if (!isWORDCHAR(last_un_char)
3830 && !(last_un_char == '$' || last_un_char == '@'
3831 || last_un_char == '&')
3832 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3836 if (keyword(d, s - d, 0))
3839 if (un_char == last_un_char + 1)
3841 weight -= seen[un_char];
3846 if (weight >= 0) /* probably a character class */
3856 * Does all the checking to disambiguate
3858 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3859 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3861 * First argument is the stuff after the first token, e.g. "bar".
3863 * Not a method if foo is a filehandle.
3864 * Not a method if foo is a subroutine prototyped to take a filehandle.
3865 * Not a method if it's really "Foo $bar"
3866 * Method if it's "foo $bar"
3867 * Not a method if it's really "print foo $bar"
3868 * Method if it's really "foo package::" (interpreted as package->foo)
3869 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3870 * Not a method if bar is a filehandle or package, but is quoted with
3875 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
3877 char *s = start + (*start == '$');
3878 char tmpbuf[sizeof PL_tokenbuf];
3881 /* Mustn't actually add anything to a symbol table.
3882 But also don't want to "initialise" any placeholder
3883 constants that might already be there into full
3884 blown PVGVs with attached PVCV. */
3886 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
3888 PERL_ARGS_ASSERT_INTUIT_METHOD;
3890 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3892 if (cv && SvPOK(cv)) {
3893 const char *proto = CvPROTO(cv);
3895 while (*proto && (isSPACE(*proto) || *proto == ';'))
3902 if (*start == '$') {
3903 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3904 isUPPER(*PL_tokenbuf))
3909 return *s == '(' ? FUNCMETH : METHOD;
3912 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3913 /* start is the beginning of the possible filehandle/object,
3914 * and s is the end of it
3915 * tmpbuf is a copy of it (but with single quotes as double colons)
3918 if (!keyword(tmpbuf, len, 0)) {
3919 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3924 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3925 if (indirgv && GvCVu(indirgv))
3927 /* filehandle or package name makes it a method */
3928 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3930 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3931 return 0; /* no assumptions -- "=>" quotes bareword */
3933 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3934 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3935 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3939 return *s == '(' ? FUNCMETH : METHOD;
3945 /* Encoded script support. filter_add() effectively inserts a
3946 * 'pre-processing' function into the current source input stream.
3947 * Note that the filter function only applies to the current source file
3948 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3950 * The datasv parameter (which may be NULL) can be used to pass
3951 * private data to this instance of the filter. The filter function
3952 * can recover the SV using the FILTER_DATA macro and use it to
3953 * store private buffers and state information.
3955 * The supplied datasv parameter is upgraded to a PVIO type
3956 * and the IoDIRP/IoANY field is used to store the function pointer,
3957 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3958 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3959 * private use must be set using malloc'd pointers.
3963 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3971 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3972 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3974 if (!PL_rsfp_filters)
3975 PL_rsfp_filters = newAV();
3978 SvUPGRADE(datasv, SVt_PVIO);
3979 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3980 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3981 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3982 FPTR2DPTR(void *, IoANY(datasv)),
3983 SvPV_nolen(datasv)));
3984 av_unshift(PL_rsfp_filters, 1);
3985 av_store(PL_rsfp_filters, 0, datasv) ;
3987 !PL_parser->filtered
3988 && PL_parser->lex_flags & LEX_EVALBYTES
3989 && PL_bufptr < PL_bufend
3991 const char *s = PL_bufptr;
3992 while (s < PL_bufend) {
3994 SV *linestr = PL_parser->linestr;
3995 char *buf = SvPVX(linestr);
3996 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3997 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3998 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3999 STRLEN const linestart_pos = PL_parser->linestart - buf;
4000 STRLEN const last_uni_pos =
4001 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4002 STRLEN const last_lop_pos =
4003 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4004 av_push(PL_rsfp_filters, linestr);
4005 PL_parser->linestr =
4006 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4007 buf = SvPVX(PL_parser->linestr);
4008 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4009 PL_parser->bufptr = buf + bufptr_pos;
4010 PL_parser->oldbufptr = buf + oldbufptr_pos;
4011 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4012 PL_parser->linestart = buf + linestart_pos;
4013 if (PL_parser->last_uni)
4014 PL_parser->last_uni = buf + last_uni_pos;
4015 if (PL_parser->last_lop)
4016 PL_parser->last_lop = buf + last_lop_pos;
4017 SvLEN(linestr) = SvCUR(linestr);
4018 SvCUR(linestr) = s-SvPVX(linestr);
4019 PL_parser->filtered = 1;
4029 /* Delete most recently added instance of this filter function. */
4031 Perl_filter_del(pTHX_ filter_t funcp)
4035 PERL_ARGS_ASSERT_FILTER_DEL;
4038 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4039 FPTR2DPTR(void*, funcp)));
4041 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4043 /* if filter is on top of stack (usual case) just pop it off */
4044 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4045 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4046 sv_free(av_pop(PL_rsfp_filters));
4050 /* we need to search for the correct entry and clear it */
4051 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4055 /* Invoke the idxth filter function for the current rsfp. */
4056 /* maxlen 0 = read one text line */
4058 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4062 /* This API is bad. It should have been using unsigned int for maxlen.
4063 Not sure if we want to change the API, but if not we should sanity
4064 check the value here. */
4065 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4067 PERL_ARGS_ASSERT_FILTER_READ;
4069 if (!PL_parser || !PL_rsfp_filters)
4071 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4072 /* Provide a default input filter to make life easy. */
4073 /* Note that we append to the line. This is handy. */
4074 DEBUG_P(PerlIO_printf(Perl_debug_log,
4075 "filter_read %d: from rsfp\n", idx));
4076 if (correct_length) {
4079 const int old_len = SvCUR(buf_sv);
4081 /* ensure buf_sv is large enough */
4082 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4083 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4084 correct_length)) <= 0) {
4085 if (PerlIO_error(PL_rsfp))
4086 return -1; /* error */
4088 return 0 ; /* end of file */
4090 SvCUR_set(buf_sv, old_len + len) ;
4091 SvPVX(buf_sv)[old_len + len] = '\0';
4094 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4095 if (PerlIO_error(PL_rsfp))
4096 return -1; /* error */
4098 return 0 ; /* end of file */
4101 return SvCUR(buf_sv);
4103 /* Skip this filter slot if filter has been deleted */
4104 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4105 DEBUG_P(PerlIO_printf(Perl_debug_log,
4106 "filter_read %d: skipped (filter deleted)\n",
4108 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4110 if (SvTYPE(datasv) != SVt_PVIO) {
4111 if (correct_length) {
4113 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4114 if (!remainder) return 0; /* eof */
4115 if (correct_length > remainder) correct_length = remainder;
4116 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4117 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4120 const char *s = SvEND(datasv);
4121 const char *send = SvPVX(datasv) + SvLEN(datasv);
4129 if (s == send) return 0; /* eof */
4130 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4131 SvCUR_set(datasv, s-SvPVX(datasv));
4133 return SvCUR(buf_sv);
4135 /* Get function pointer hidden within datasv */
4136 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4137 DEBUG_P(PerlIO_printf(Perl_debug_log,
4138 "filter_read %d: via function %p (%s)\n",
4139 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4140 /* Call function. The function is expected to */
4141 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4142 /* Return: <0:error, =0:eof, >0:not eof */
4143 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4147 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4149 PERL_ARGS_ASSERT_FILTER_GETS;
4151 #ifdef PERL_CR_FILTER
4152 if (!PL_rsfp_filters) {
4153 filter_add(S_cr_textfilter,NULL);
4156 if (PL_rsfp_filters) {
4158 SvCUR_set(sv, 0); /* start with empty line */
4159 if (FILTER_READ(0, sv, 0) > 0)
4160 return ( SvPVX(sv) ) ;
4165 return (sv_gets(sv, PL_rsfp, append));
4169 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4173 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4175 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4179 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4180 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4182 return GvHV(gv); /* Foo:: */
4185 /* use constant CLASS => 'MyClass' */
4186 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4187 if (gv && GvCV(gv)) {
4188 SV * const sv = cv_const_sv(GvCV(gv));
4190 return gv_stashsv(sv, 0);
4193 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4198 S_tokenize_use(pTHX_ int is_use, char *s) {
4199 PERL_ARGS_ASSERT_TOKENIZE_USE;
4201 if (PL_expect != XSTATE)
4202 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4203 is_use ? "use" : "no"));
4206 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4207 s = force_version(s, TRUE);
4208 if (*s == ';' || *s == '}'
4209 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4210 NEXTVAL_NEXTTOKE.opval = NULL;
4213 else if (*s == 'v') {
4214 s = force_word(s,WORD,FALSE,TRUE);
4215 s = force_version(s, FALSE);
4219 s = force_word(s,WORD,FALSE,TRUE);
4220 s = force_version(s, FALSE);
4222 pl_yylval.ival = is_use;
4226 static const char* const exp_name[] =
4227 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4228 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4233 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4235 S_word_takes_any_delimeter(char *p, STRLEN len)
4237 return (len == 1 && strchr("msyq", p[0])) ||
4239 (p[0] == 't' && p[1] == 'r') ||
4240 (p[0] == 'q' && strchr("qwxr", p[1]))));
4244 S_check_scalar_slice(pTHX_ char *s)
4247 while (*s == ' ' || *s == '\t') s++;
4248 if (*s == 'q' && s[1] == 'w'
4249 && !isWORDCHAR_lazy_if(s+2,UTF))
4251 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4252 s += UTF ? UTF8SKIP(s) : 1;
4253 if (*s == '}' || *s == ']')
4254 pl_yylval.ival = OPpSLICEWARNING;
4260 Works out what to call the token just pulled out of the input
4261 stream. The yacc parser takes care of taking the ops we return and
4262 stitching them into a tree.
4265 The type of the next token
4268 Switch based on the current state:
4269 - if we already built the token before, use it
4270 - if we have a case modifier in a string, deal with that
4271 - handle other cases of interpolation inside a string
4272 - scan the next line if we are inside a format
4273 In the normal state switch on the next character:
4275 if alphabetic, go to key lookup
4276 unrecoginized character - croak
4277 - 0/4/26: handle end-of-line or EOF
4278 - cases for whitespace
4279 - \n and #: handle comments and line numbers
4280 - various operators, brackets and sigils
4283 - 'v': vstrings (or go to key lookup)
4284 - 'x' repetition operator (or go to key lookup)
4285 - other ASCII alphanumerics (key lookup begins here):
4288 scan built-in keyword (but do nothing with it yet)
4289 check for statement label
4290 check for lexical subs
4291 goto just_a_word if there is one
4292 see whether built-in keyword is overridden
4293 switch on keyword number:
4294 - default: just_a_word:
4295 not a built-in keyword; handle bareword lookup
4296 disambiguate between method and sub call
4297 fall back to bareword
4298 - cases for built-in keywords
4306 char *s = PL_bufptr;
4310 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4314 /* orig_keyword, gvp, and gv are initialized here because
4315 * jump to the label just_a_word_zero can bypass their
4316 * initialization later. */
4317 I32 orig_keyword = 0;
4322 SV* tmp = newSVpvs("");
4323 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4324 (IV)CopLINE(PL_curcop),
4325 lex_state_names[PL_lex_state],
4326 exp_name[PL_expect],
4327 pv_display(tmp, s, strlen(s), 0, 60));
4331 switch (PL_lex_state) {
4333 case LEX_INTERPNORMAL:
4336 /* when we've already built the next token, just pull it out of the queue */
4339 pl_yylval = PL_nextval[PL_nexttoke];
4341 PL_lex_state = PL_lex_defer;
4342 PL_lex_defer = LEX_NORMAL;
4346 next_type = PL_nexttype[PL_nexttoke];
4347 if (next_type & (7<<24)) {
4348 if (next_type & (1<<24)) {
4349 if (PL_lex_brackets > 100)
4350 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4351 PL_lex_brackstack[PL_lex_brackets++] =
4352 (char) ((next_type >> 16) & 0xff);
4354 if (next_type & (2<<24))
4355 PL_lex_allbrackets++;
4356 if (next_type & (4<<24))
4357 PL_lex_allbrackets--;
4358 next_type &= 0xffff;
4360 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4363 /* interpolated case modifiers like \L \U, including \Q and \E.
4364 when we get here, PL_bufptr is at the \
4366 case LEX_INTERPCASEMOD:
4368 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4370 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4371 PL_bufptr, PL_bufend, *PL_bufptr);
4373 /* handle \E or end of string */
4374 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4376 if (PL_lex_casemods) {
4377 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4378 PL_lex_casestack[PL_lex_casemods] = '\0';
4380 if (PL_bufptr != PL_bufend
4381 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4382 || oldmod == 'F')) {
4384 PL_lex_state = LEX_INTERPCONCAT;
4386 PL_lex_allbrackets--;
4389 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4390 /* Got an unpaired \E */
4391 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4392 "Useless use of \\E");
4394 if (PL_bufptr != PL_bufend)
4396 PL_lex_state = LEX_INTERPCONCAT;
4400 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4401 "### Saw case modifier\n"); });
4403 if (s[1] == '\\' && s[2] == 'E') {
4405 PL_lex_state = LEX_INTERPCONCAT;
4410 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4411 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4412 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4413 (strchr(PL_lex_casestack, 'L')
4414 || strchr(PL_lex_casestack, 'U')
4415 || strchr(PL_lex_casestack, 'F'))) {
4416 PL_lex_casestack[--PL_lex_casemods] = '\0';
4417 PL_lex_allbrackets--;
4420 if (PL_lex_casemods > 10)
4421 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4422 PL_lex_casestack[PL_lex_casemods++] = *s;
4423 PL_lex_casestack[PL_lex_casemods] = '\0';
4424 PL_lex_state = LEX_INTERPCONCAT;
4425 NEXTVAL_NEXTTOKE.ival = 0;
4426 force_next((2<<24)|'(');
4428 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4430 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4432 NEXTVAL_NEXTTOKE.ival = OP_LC;
4434 NEXTVAL_NEXTTOKE.ival = OP_UC;
4436 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4438 NEXTVAL_NEXTTOKE.ival = OP_FC;
4440 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4444 if (PL_lex_starts) {
4447 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4448 if (PL_lex_casemods == 1 && PL_lex_inpat)
4451 AopNOASSIGN(OP_CONCAT);
4457 case LEX_INTERPPUSH:
4458 return REPORT(sublex_push());
4460 case LEX_INTERPSTART:
4461 if (PL_bufptr == PL_bufend)
4462 return REPORT(sublex_done());
4463 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4464 "### Interpolated variable\n"); });
4466 /* for /@a/, we leave the joining for the regex engine to do
4467 * (unless we're within \Q etc) */
4468 PL_lex_dojoin = (*PL_bufptr == '@'
4469 && (!PL_lex_inpat || PL_lex_casemods));
4470 PL_lex_state = LEX_INTERPNORMAL;
4471 if (PL_lex_dojoin) {
4472 NEXTVAL_NEXTTOKE.ival = 0;
4474 force_ident("\"", '$');
4475 NEXTVAL_NEXTTOKE.ival = 0;
4477 NEXTVAL_NEXTTOKE.ival = 0;
4478 force_next((2<<24)|'(');
4479 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4482 /* Convert (?{...}) and friends to 'do {...}' */
4483 if (PL_lex_inpat && *PL_bufptr == '(') {
4484 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4486 if (*PL_bufptr != '{')
4488 PL_expect = XTERMBLOCK;
4492 if (PL_lex_starts++) {
4494 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4495 if (!PL_lex_casemods && PL_lex_inpat)
4498 AopNOASSIGN(OP_CONCAT);
4502 case LEX_INTERPENDMAYBE:
4503 if (intuit_more(PL_bufptr)) {
4504 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4510 if (PL_lex_dojoin) {
4511 const U8 dojoin_was = PL_lex_dojoin;
4512 PL_lex_dojoin = FALSE;
4513 PL_lex_state = LEX_INTERPCONCAT;
4514 PL_lex_allbrackets--;
4515 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4517 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4518 && SvEVALED(PL_lex_repl))
4520 if (PL_bufptr != PL_bufend)
4521 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4524 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4525 re_eval_str. If the here-doc body’s length equals the previous
4526 value of re_eval_start, re_eval_start will now be null. So
4527 check re_eval_str as well. */
4528 if (PL_parser->lex_shared->re_eval_start
4529 || PL_parser->lex_shared->re_eval_str) {
4531 if (*PL_bufptr != ')')
4532 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4534 /* having compiled a (?{..}) expression, return the original
4535 * text too, as a const */
4536 if (PL_parser->lex_shared->re_eval_str) {
4537 sv = PL_parser->lex_shared->re_eval_str;
4538 PL_parser->lex_shared->re_eval_str = NULL;
4540 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4541 SvPV_shrink_to_cur(sv);
4543 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4544 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4545 NEXTVAL_NEXTTOKE.opval =
4546 (OP*)newSVOP(OP_CONST, 0,
4549 PL_parser->lex_shared->re_eval_start = NULL;
4555 case LEX_INTERPCONCAT:
4557 if (PL_lex_brackets)
4558 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4559 (long) PL_lex_brackets);
4561 if (PL_bufptr == PL_bufend)
4562 return REPORT(sublex_done());
4564 /* m'foo' still needs to be parsed for possible (?{...}) */
4565 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4566 SV *sv = newSVsv(PL_linestr);
4568 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4572 s = scan_const(PL_bufptr);
4574 PL_lex_state = LEX_INTERPCASEMOD;
4576 PL_lex_state = LEX_INTERPSTART;
4579 if (s != PL_bufptr) {
4580 NEXTVAL_NEXTTOKE = pl_yylval;
4583 if (PL_lex_starts++) {
4584 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4585 if (!PL_lex_casemods && PL_lex_inpat)
4588 AopNOASSIGN(OP_CONCAT);
4598 s = scan_formline(PL_bufptr);
4599 if (!PL_lex_formbrack)
4608 /* We really do *not* want PL_linestr ever becoming a COW. */
4609 assert (!SvIsCOW(PL_linestr));
4611 PL_oldoldbufptr = PL_oldbufptr;
4613 PL_parser->saw_infix_sigil = 0;
4618 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4621 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4622 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4624 SVs_TEMP | SVf_UTF8),
4625 10, UNI_DISPLAY_ISPRINT)
4626 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4627 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4628 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4629 d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4633 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4634 UTF8fARG(UTF, (s - d), d),
4639 goto fake_eof; /* emulate EOF on ^D or ^Z */
4641 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4644 if (PL_lex_brackets &&
4645 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4646 yyerror((const char *)
4648 ? "Format not terminated"
4649 : "Missing right curly or square bracket"));
4651 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4652 "### Tokener got EOF\n");
4656 if (s++ < PL_bufend)
4657 goto retry; /* ignore stray nulls */
4660 if (!PL_in_eval && !PL_preambled) {
4661 PL_preambled = TRUE;
4663 /* Generate a string of Perl code to load the debugger.
4664 * If PERL5DB is set, it will return the contents of that,
4665 * otherwise a compile-time require of perl5db.pl. */
4667 const char * const pdb = PerlEnv_getenv("PERL5DB");
4670 sv_setpv(PL_linestr, pdb);
4671 sv_catpvs(PL_linestr,";");
4673 SETERRNO(0,SS_NORMAL);
4674 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4676 PL_parser->preambling = CopLINE(PL_curcop);
4678 sv_setpvs(PL_linestr,"");
4679 if (PL_preambleav) {
4680 SV **svp = AvARRAY(PL_preambleav);
4681 SV **const end = svp + AvFILLp(PL_preambleav);
4683 sv_catsv(PL_linestr, *svp);
4685 sv_catpvs(PL_linestr, ";");
4687 sv_free(MUTABLE_SV(PL_preambleav));
4688 PL_preambleav = NULL;
4691 sv_catpvs(PL_linestr,
4692 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4693 if (PL_minus_n || PL_minus_p) {
4694 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4696 sv_catpvs(PL_linestr,"chomp;");
4699 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4700 || *PL_splitstr == '"')
4701 && strchr(PL_splitstr + 1, *PL_splitstr))
4702 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4704 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4705 bytes can be used as quoting characters. :-) */
4706 const char *splits = PL_splitstr;
4707 sv_catpvs(PL_linestr, "our @F=split(q\0");
4710 if (*splits == '\\')
4711 sv_catpvn(PL_linestr, splits, 1);
4712 sv_catpvn(PL_linestr, splits, 1);
4713 } while (*splits++);
4714 /* This loop will embed the trailing NUL of
4715 PL_linestr as the last thing it does before
4717 sv_catpvs(PL_linestr, ");");
4721 sv_catpvs(PL_linestr,"our @F=split(' ');");
4724 sv_catpvs(PL_linestr, "\n");
4725 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4726 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4727 PL_last_lop = PL_last_uni = NULL;
4728 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4729 update_debugger_info(PL_linestr, NULL, 0);
4734 bof = PL_rsfp ? TRUE : FALSE;
4737 fake_eof = LEX_FAKE_EOF;
4739 PL_bufptr = PL_bufend;
4740 COPLINE_INC_WITH_HERELINES;
4741 if (!lex_next_chunk(fake_eof)) {
4742 CopLINE_dec(PL_curcop);
4744 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4746 CopLINE_dec(PL_curcop);
4748 /* If it looks like the start of a BOM or raw UTF-16,
4749 * check if it in fact is. */
4750 if (bof && PL_rsfp &&
4752 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4755 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4756 bof = (offset == (Off_t)SvCUR(PL_linestr));
4757 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4758 /* offset may include swallowed CR */
4760 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4763 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4764 s = swallow_bom((U8*)s);
4767 if (PL_parser->in_pod) {
4768 /* Incest with pod. */
4769 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4770 sv_setpvs(PL_linestr, "");
4771 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4772 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4773 PL_last_lop = PL_last_uni = NULL;
4774 PL_parser->in_pod = 0;
4777 if (PL_rsfp || PL_parser->filtered)
4779 } while (PL_parser->in_pod);
4780 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4781 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4782 PL_last_lop = PL_last_uni = NULL;
4783 if (CopLINE(PL_curcop) == 1) {
4784 while (s < PL_bufend && isSPACE(*s))
4786 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4790 if (*s == '#' && *(s+1) == '!')
4792 #ifdef ALTERNATE_SHEBANG
4794 static char const as[] = ALTERNATE_SHEBANG;
4795 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4796 d = s + (sizeof(as) - 1);
4798 #endif /* ALTERNATE_SHEBANG */
4807 while (*d && !isSPACE(*d))
4811 #ifdef ARG_ZERO_IS_SCRIPT
4812 if (ipathend > ipath) {
4814 * HP-UX (at least) sets argv[0] to the script name,
4815 * which makes $^X incorrect. And Digital UNIX and Linux,
4816 * at least, set argv[0] to the basename of the Perl
4817 * interpreter. So, having found "#!", we'll set it right.
4819 SV* copfilesv = CopFILESV(PL_curcop);
4822 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4824 assert(SvPOK(x) || SvGMAGICAL(x));
4825 if (sv_eq(x, copfilesv)) {
4826 sv_setpvn(x, ipath, ipathend - ipath);
4832 const char *bstart = SvPV_const(copfilesv, blen);
4833 const char * const lstart = SvPV_const(x, llen);
4835 bstart += blen - llen;
4836 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4837 sv_setpvn(x, ipath, ipathend - ipath);
4844 /* Anything to do if no copfilesv? */
4846 TAINT_NOT; /* $^X is always tainted, but that's OK */
4848 #endif /* ARG_ZERO_IS_SCRIPT */
4853 d = instr(s,"perl -");
4855 d = instr(s,"perl");
4857 /* avoid getting into infinite loops when shebang
4858 * line contains "Perl" rather than "perl" */
4860 for (d = ipathend-4; d >= ipath; --d) {
4861 if (isALPHA_FOLD_EQ(*d, 'p')
4862 && !ibcmp(d, "perl", 4))
4872 #ifdef ALTERNATE_SHEBANG
4874 * If the ALTERNATE_SHEBANG on this system starts with a
4875 * character that can be part of a Perl expression, then if
4876 * we see it but not "perl", we're probably looking at the
4877 * start of Perl code, not a request to hand off to some
4878 * other interpreter. Similarly, if "perl" is there, but
4879 * not in the first 'word' of the line, we assume the line
4880 * contains the start of the Perl program.
4882 if (d && *s != '#') {
4883 const char *c = ipath;
4884 while (*c && !strchr("; \t\r\n\f\v#", *c))
4887 d = NULL; /* "perl" not in first word; ignore */
4889 *s = '#'; /* Don't try to parse shebang line */
4891 #endif /* ALTERNATE_SHEBANG */
4896 !instr(s,"indir") &&
4897 instr(PL_origargv[0],"perl"))
4904 while (s < PL_bufend && isSPACE(*s))
4906 if (s < PL_bufend) {
4907 Newx(newargv,PL_origargc+3,char*);
4909 while (s < PL_bufend && !isSPACE(*s))
4912 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4915 newargv = PL_origargv;
4918 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4920 Perl_croak(aTHX_ "Can't exec %s", ipath);
4923 while (*d && !isSPACE(*d))
4925 while (SPACE_OR_TAB(*d))
4929 const bool switches_done = PL_doswitches;
4930 const U32 oldpdb = PL_perldb;
4931 const bool oldn = PL_minus_n;
4932 const bool oldp = PL_minus_p;
4936 bool baduni = FALSE;
4938 const char *d2 = d1 + 1;
4939 if (parse_unicode_opts((const char **)&d2)
4943 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
4944 const char * const m = d1;
4945 while (*d1 && !isSPACE(*d1))
4947 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4950 d1 = moreswitches(d1);
4952 if (PL_doswitches && !switches_done) {
4953 int argc = PL_origargc;
4954 char **argv = PL_origargv;
4957 } while (argc && argv[0][0] == '-' && argv[0][1]);
4958 init_argv_symbols(argc,argv);
4960 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4961 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4962 /* if we have already added "LINE: while (<>) {",
4963 we must not do it again */
4965 sv_setpvs(PL_linestr, "");
4966 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4967 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4968 PL_last_lop = PL_last_uni = NULL;
4969 PL_preambled = FALSE;
4970 if (PERLDB_LINE || PERLDB_SAVESRC)
4971 (void)gv_fetchfile(PL_origfilename);
4978 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4979 PL_lex_state = LEX_FORMLINE;
4980 NEXTVAL_NEXTTOKE.ival = 0;
4981 force_next(FORMRBRACK);
4986 #ifdef PERL_STRICT_CR
4987 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4989 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4991 case ' ': case '\t': case '\f': case '\v':
4996 if (PL_lex_state != LEX_NORMAL ||
4997 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
4998 const bool in_comment = *s == '#';
4999 if (*s == '#' && s == PL_linestart && PL_in_eval
5000 && !PL_rsfp && !PL_parser->filtered) {
5001 /* handle eval qq[#line 1 "foo"\n ...] */
5002 CopLINE_dec(PL_curcop);
5006 while (d < PL_bufend && *d != '\n')
5010 else if (d > PL_bufend)
5011 /* Found by Ilya: feed random input to Perl. */
5012 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5015 if (in_comment && d == PL_bufend
5016 && PL_lex_state == LEX_INTERPNORMAL
5017 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5018 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5021 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5022 PL_lex_state = LEX_FORMLINE;
5023 NEXTVAL_NEXTTOKE.ival = 0;
5024 force_next(FORMRBRACK);
5029 while (s < PL_bufend && *s != '\n')
5037 else if (s > PL_bufend)
5038 /* Found by Ilya: feed random input to Perl. */
5039 Perl_croak(aTHX_ "panic: input overflow");
5043 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5051 while (s < PL_bufend && SPACE_OR_TAB(*s))
5054 if (strnEQ(s,"=>",2)) {
5055 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5056 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5057 OPERATOR('-'); /* unary minus */
5060 case 'r': ftst = OP_FTEREAD; break;
5061 case 'w': ftst = OP_FTEWRITE; break;
5062 case 'x': ftst = OP_FTEEXEC; break;
5063 case 'o': ftst = OP_FTEOWNED; break;
5064 case 'R': ftst = OP_FTRREAD; break;
5065 case 'W': ftst = OP_FTRWRITE; break;
5066 case 'X': ftst = OP_FTREXEC; break;
5067 case 'O': ftst = OP_FTROWNED; break;
5068 case 'e': ftst = OP_FTIS; break;
5069 case 'z': ftst = OP_FTZERO; break;
5070 case 's': ftst = OP_FTSIZE; break;
5071 case 'f': ftst = OP_FTFILE; break;
5072 case 'd': ftst = OP_FTDIR; break;
5073 case 'l': ftst = OP_FTLINK; break;
5074 case 'p': ftst = OP_FTPIPE; break;
5075 case 'S': ftst = OP_FTSOCK; break;
5076 case 'u': ftst = OP_FTSUID; break;
5077 case 'g': ftst = OP_FTSGID; break;
5078 case 'k': ftst = OP_FTSVTX; break;
5079 case 'b': ftst = OP_FTBLK; break;
5080 case 'c': ftst = OP_FTCHR; break;
5081 case 't': ftst = OP_FTTTY; break;
5082 case 'T': ftst = OP_FTTEXT; break;
5083 case 'B': ftst = OP_FTBINARY; break;
5084 case 'M': case 'A': case 'C':
5085 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5087 case 'M': ftst = OP_FTMTIME; break;
5088 case 'A': ftst = OP_FTATIME; break;
5089 case 'C': ftst = OP_FTCTIME; break;
5097 PL_last_uni = PL_oldbufptr;
5098 PL_last_lop_op = (OPCODE)ftst;
5099 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5100 "### Saw file test %c\n", (int)tmp);
5105 /* Assume it was a minus followed by a one-letter named
5106 * subroutine call (or a -bareword), then. */
5107 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5108 "### '-%c' looked like a file test but was not\n",
5115 const char tmp = *s++;
5118 if (PL_expect == XOPERATOR)
5123 else if (*s == '>') {
5126 if (FEATURE_POSTDEREF_IS_ENABLED && (
5127 ((*s == '$' || *s == '&') && s[1] == '*')
5128 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5129 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5130 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5133 Perl_ck_warner_d(aTHX_
5134 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5135 "Postfix dereference is experimental"
5137 PL_expect = XPOSTDEREF;
5140 if (isIDFIRST_lazy_if(s,UTF)) {
5141 s = force_word(s,METHOD,FALSE,TRUE);
5149 if (PL_expect == XOPERATOR) {
5150 if (*s == '=' && !PL_lex_allbrackets &&
5151 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5158 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5160 OPERATOR('-'); /* unary minus */
5166 const char tmp = *s++;
5169 if (PL_expect == XOPERATOR)
5174 if (PL_expect == XOPERATOR) {
5175 if (*s == '=' && !PL_lex_allbrackets &&
5176 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5183 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5190 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5191 if (PL_expect != XOPERATOR) {
5192 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5193 PL_expect = XOPERATOR;
5194 force_ident(PL_tokenbuf, '*');
5202 if (*s == '=' && !PL_lex_allbrackets &&
5203 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5209 if (*s == '=' && !PL_lex_allbrackets &&
5210 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5214 PL_parser->saw_infix_sigil = 1;
5219 if (PL_expect == XOPERATOR) {
5220 if (s[1] == '=' && !PL_lex_allbrackets &&
5221 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5224 PL_parser->saw_infix_sigil = 1;
5227 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5228 PL_tokenbuf[0] = '%';
5229 s = scan_ident(s, PL_tokenbuf + 1,
5230 sizeof PL_tokenbuf - 1, FALSE);
5232 if (!PL_tokenbuf[1]) {
5235 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5237 PL_tokenbuf[0] = '@';
5239 PL_expect = XOPERATOR;
5240 force_ident_maybe_lex('%');
5245 bof = FEATURE_BITWISE_IS_ENABLED;
5246 if (bof && s[1] == '.')
5248 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5249 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5255 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5257 if (PL_lex_brackets > 100)
5258 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5259 PL_lex_brackstack[PL_lex_brackets++] = 0;
5260 PL_lex_allbrackets++;
5262 const char tmp = *s++;
5267 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5269 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5272 Perl_ck_warner_d(aTHX_
5273 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5274 "Smartmatch is experimental");
5278 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5280 BCop(OP_SCOMPLEMENT);
5282 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5284 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5291 goto just_a_word_zero_gv;
5297 switch (PL_expect) {
5299 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5301 PL_bufptr = s; /* update in case we back off */
5304 "Use of := for an empty attribute list is not allowed");
5311 PL_expect = XTERMBLOCK;
5315 while (isIDFIRST_lazy_if(s,UTF)) {
5318 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5319 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5320 if (tmp < 0) tmp = -tmp;
5335 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5337 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5338 COPLINE_SET_FROM_MULTI_END;
5340 /* MUST advance bufptr here to avoid bogus
5341 "at end of line" context messages from yyerror().
5343 PL_bufptr = s + len;
5344 yyerror("Unterminated attribute parameter in attribute list");
5348 return REPORT(0); /* EOF indicator */
5352 sv_catsv(sv, PL_lex_stuff);
5353 attrs = op_append_elem(OP_LIST, attrs,
5354 newSVOP(OP_CONST, 0, sv));
5355 SvREFCNT_dec(PL_lex_stuff);
5356 PL_lex_stuff = NULL;
5359 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5361 if (PL_in_my == KEY_our) {
5362 deprecate(":unique");
5365 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5368 /* NOTE: any CV attrs applied here need to be part of
5369 the CVf_BUILTIN_ATTRS define in cv.h! */
5370 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5372 CvLVALUE_on(PL_compcv);
5374 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5376 deprecate(":locked");
5378 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5380 CvMETHOD_on(PL_compcv);
5382 else if (!PL_in_my && len == 5
5383 && strnEQ(SvPVX(sv), "const", len))
5386 Perl_ck_warner_d(aTHX_
5387 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5388 ":const is experimental"
5390 CvANONCONST_on(PL_compcv);
5391 if (!CvANON(PL_compcv))
5392 yyerror(":const is not permitted on named "
5395 /* After we've set the flags, it could be argued that
5396 we don't need to do the attributes.pm-based setting
5397 process, and shouldn't bother appending recognized
5398 flags. To experiment with that, uncomment the
5399 following "else". (Note that's already been
5400 uncommented. That keeps the above-applied built-in
5401 attributes from being intercepted (and possibly
5402 rejected) by a package's attribute routines, but is
5403 justified by the performance win for the common case
5404 of applying only built-in attributes.) */
5406 attrs = op_append_elem(OP_LIST, attrs,
5407 newSVOP(OP_CONST, 0,
5411 if (*s == ':' && s[1] != ':')
5414 break; /* require real whitespace or :'s */
5415 /* XXX losing whitespace on sequential attributes here */
5418 if (*s != ';' && *s != '}' &&
5419 !(PL_expect == XOPERATOR
5420 ? (*s == '=' || *s == ')')
5421 : (*s == '{' || *s == '('))) {
5422 const char q = ((*s == '\'') ? '"' : '\'');
5423 /* If here for an expression, and parsed no attrs, back
5425 if (PL_expect == XOPERATOR && !attrs) {
5429 /* MUST advance bufptr here to avoid bogus "at end of line"
5430 context messages from yyerror().
5433 yyerror( (const char *)
5435 ? Perl_form(aTHX_ "Invalid separator character "
5436 "%c%c%c in attribute list", q, *s, q)
5437 : "Unterminated attribute list" ) );
5445 NEXTVAL_NEXTTOKE.opval = attrs;
5451 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5455 PL_lex_allbrackets--;
5459 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5460 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5464 PL_lex_allbrackets++;
5467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5474 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5477 PL_lex_allbrackets--;
5483 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5486 if (PL_lex_brackets <= 0)
5487 /* diag_listed_as: Unmatched right %s bracket */
5488 yyerror("Unmatched right square bracket");
5491 PL_lex_allbrackets--;
5492 if (PL_lex_state == LEX_INTERPNORMAL) {
5493 if (PL_lex_brackets == 0) {
5494 if (*s == '-' && s[1] == '>')
5495 PL_lex_state = LEX_INTERPENDMAYBE;
5496 else if (*s != '[' && *s != '{')
5497 PL_lex_state = LEX_INTERPEND;
5504 if (PL_lex_brackets > 100) {
5505 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5507 switch (PL_expect) {
5509 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5510 PL_lex_allbrackets++;
5511 OPERATOR(HASHBRACK);
5513 while (s < PL_bufend && SPACE_OR_TAB(*s))
5516 PL_tokenbuf[0] = '\0';
5517 if (d < PL_bufend && *d == '-') {
5518 PL_tokenbuf[0] = '-';
5520 while (d < PL_bufend && SPACE_OR_TAB(*d))
5523 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5524 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5526 while (d < PL_bufend && SPACE_OR_TAB(*d))
5529 const char minus = (PL_tokenbuf[0] == '-');
5530 s = force_word(s + minus, WORD, FALSE, TRUE);
5538 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5539 PL_lex_allbrackets++;
5544 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5545 PL_lex_allbrackets++;
5549 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5550 PL_lex_allbrackets++;
5555 if (PL_oldoldbufptr == PL_last_lop)
5556 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5558 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5559 PL_lex_allbrackets++;
5562 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5564 /* This hack is to get the ${} in the message. */
5566 yyerror("syntax error");
5569 OPERATOR(HASHBRACK);
5571 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5572 /* ${...} or @{...} etc., but not print {...}
5573 * Skip the disambiguation and treat this as a block.
5575 goto block_expectation;
5577 /* This hack serves to disambiguate a pair of curlies
5578 * as being a block or an anon hash. Normally, expectation
5579 * determines that, but in cases where we're not in a
5580 * position to expect anything in particular (like inside
5581 * eval"") we have to resolve the ambiguity. This code
5582 * covers the case where the first term in the curlies is a
5583 * quoted string. Most other cases need to be explicitly
5584 * disambiguated by prepending a "+" before the opening
5585 * curly in order to force resolution as an anon hash.
5587 * XXX should probably propagate the outer expectation
5588 * into eval"" to rely less on this hack, but that could
5589 * potentially break current behavior of eval"".
5593 if (*s == '\'' || *s == '"' || *s == '`') {
5594 /* common case: get past first string, handling escapes */
5595 for (t++; t < PL_bufend && *t != *s;)
5600 else if (*s == 'q') {
5603 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5604 && !isWORDCHAR(*t))))
5606 /* skip q//-like construct */
5608 char open, close, term;
5611 while (t < PL_bufend && isSPACE(*t))
5613 /* check for q => */
5614 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5615 OPERATOR(HASHBRACK);
5619 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5623 for (t++; t < PL_bufend; t++) {
5624 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5626 else if (*t == open)
5630 for (t++; t < PL_bufend; t++) {
5631 if (*t == '\\' && t+1 < PL_bufend)
5633 else if (*t == close && --brackets <= 0)
5635 else if (*t == open)
5642 /* skip plain q word */
5643 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5646 else if (isWORDCHAR_lazy_if(t,UTF)) {
5648 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5651 while (t < PL_bufend && isSPACE(*t))
5653 /* if comma follows first term, call it an anon hash */
5654 /* XXX it could be a comma expression with loop modifiers */
5655 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5656 || (*t == '=' && t[1] == '>')))
5657 OPERATOR(HASHBRACK);
5658 if (PL_expect == XREF)
5661 /* If there is an opening brace or 'sub:', treat it
5662 as a term to make ${{...}}{k} and &{sub:attr...}
5663 dwim. Otherwise, treat it as a statement, so
5664 map {no strict; ...} works.
5671 if (strnEQ(s, "sub", 3)) {
5682 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5688 pl_yylval.ival = CopLINE(PL_curcop);
5689 PL_copline = NOLINE; /* invalidate current command line number */
5690 TOKEN(formbrack ? '=' : '{');
5692 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5696 if (PL_lex_brackets <= 0)
5697 /* diag_listed_as: Unmatched right %s bracket */
5698 yyerror("Unmatched right curly bracket");
5700 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5701 PL_lex_allbrackets--;
5702 if (PL_lex_state == LEX_INTERPNORMAL) {
5703 if (PL_lex_brackets == 0) {
5704 if (PL_expect & XFAKEBRACK) {
5705 PL_expect &= XENUMMASK;
5706 PL_lex_state = LEX_INTERPEND;
5708 return yylex(); /* ignore fake brackets */
5710 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5711 && SvEVALED(PL_lex_repl))
5712 PL_lex_state = LEX_INTERPEND;
5713 else if (*s == '-' && s[1] == '>')
5714 PL_lex_state = LEX_INTERPENDMAYBE;
5715 else if (*s != '[' && *s != '{')
5716 PL_lex_state = LEX_INTERPEND;
5719 if (PL_expect & XFAKEBRACK) {
5720 PL_expect &= XENUMMASK;
5722 return yylex(); /* ignore fake brackets */
5724 force_next(formbrack ? '.' : '}');
5725 if (formbrack) LEAVE;
5726 if (formbrack == 2) { /* means . where arguments were expected */
5732 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5735 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5736 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5743 if (PL_expect == XOPERATOR) {
5744 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5745 && isIDFIRST_lazy_if(s,UTF))
5747 CopLINE_dec(PL_curcop);
5748 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5749 CopLINE_inc(PL_curcop);
5752 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5754 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5755 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5761 PL_parser->saw_infix_sigil = 1;
5762 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5768 PL_tokenbuf[0] = '&';
5769 s = scan_ident(s - 1, PL_tokenbuf + 1,
5770 sizeof PL_tokenbuf - 1, TRUE);
5771 if (PL_tokenbuf[1]) {
5772 PL_expect = XOPERATOR;
5773 force_ident_maybe_lex('&');
5777 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5783 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5784 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5792 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5794 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5795 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5799 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
5803 const char tmp = *s++;
5805 if (!PL_lex_allbrackets &&
5806 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5813 if (!PL_lex_allbrackets &&
5814 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5822 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5823 && strchr("+-*/%.^&|<",tmp))
5824 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5825 "Reversed %c= operator",(int)tmp);
5827 if (PL_expect == XSTATE && isALPHA(tmp) &&
5828 (s == PL_linestart+1 || s[-2] == '\n') )
5830 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5831 || PL_lex_state != LEX_NORMAL) {
5836 if (strnEQ(s,"=cut",4)) {
5850 PL_parser->in_pod = 1;
5854 if (PL_expect == XBLOCK) {
5856 #ifdef PERL_STRICT_CR
5857 while (SPACE_OR_TAB(*t))
5859 while (SPACE_OR_TAB(*t) || *t == '\r')
5862 if (*t == '\n' || *t == '#') {
5865 SAVEI8(PL_parser->form_lex_state);
5866 SAVEI32(PL_lex_formbrack);
5867 PL_parser->form_lex_state = PL_lex_state;
5868 PL_lex_formbrack = PL_lex_brackets + 1;
5872 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5881 const char tmp = *s++;
5883 /* was this !=~ where !~ was meant?
5884 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5886 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5887 const char *t = s+1;
5889 while (t < PL_bufend && isSPACE(*t))
5892 if (*t == '/' || *t == '?' ||
5893 ((*t == 'm' || *t == 's' || *t == 'y')
5894 && !isWORDCHAR(t[1])) ||
5895 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5896 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5897 "!=~ should be !~");
5899 if (!PL_lex_allbrackets &&
5900 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5912 if (PL_expect != XOPERATOR) {
5913 if (s[1] != '<' && !strchr(s,'>'))
5915 if (s[1] == '<' && s[2] != '>')
5916 s = scan_heredoc(s);
5918 s = scan_inputsymbol(s);
5919 PL_expect = XOPERATOR;
5920 TOKEN(sublex_start());
5926 if (*s == '=' && !PL_lex_allbrackets &&
5927 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5931 SHop(OP_LEFT_SHIFT);
5936 if (!PL_lex_allbrackets &&
5937 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5944 if (!PL_lex_allbrackets &&
5945 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5953 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5961 const char tmp = *s++;
5963 if (*s == '=' && !PL_lex_allbrackets &&
5964 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5968 SHop(OP_RIGHT_SHIFT);
5970 else if (tmp == '=') {
5971 if (!PL_lex_allbrackets &&
5972 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5980 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5989 if (PL_expect == XOPERATOR) {
5990 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5991 return deprecate_commaless_var_list();
5994 else if (PL_expect == XPOSTDEREF) {
5997 POSTDEREF(DOLSHARP);
6002 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6003 PL_tokenbuf[0] = '@';
6004 s = scan_ident(s + 1, PL_tokenbuf + 1,
6005 sizeof PL_tokenbuf - 1, FALSE);
6006 if (PL_expect == XOPERATOR)
6007 no_op("Array length", s);
6008 if (!PL_tokenbuf[1])
6010 PL_expect = XOPERATOR;
6011 force_ident_maybe_lex('#');
6015 PL_tokenbuf[0] = '$';
6016 s = scan_ident(s, PL_tokenbuf + 1,
6017 sizeof PL_tokenbuf - 1, FALSE);
6018 if (PL_expect == XOPERATOR)
6020 if (!PL_tokenbuf[1]) {
6022 yyerror("Final $ should be \\$ or $name");
6028 const char tmp = *s;
6029 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6032 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6033 && intuit_more(s)) {
6035 PL_tokenbuf[0] = '@';
6036 if (ckWARN(WARN_SYNTAX)) {
6039 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6042 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6043 while (t < PL_bufend && *t != ']')
6045 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6046 "Multidimensional syntax %.*s not supported",
6047 (int)((t - PL_bufptr) + 1), PL_bufptr);
6051 else if (*s == '{') {
6053 PL_tokenbuf[0] = '%';
6054 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6055 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6057 char tmpbuf[sizeof PL_tokenbuf];
6060 } while (isSPACE(*t));
6061 if (isIDFIRST_lazy_if(t,UTF)) {
6063 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6068 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6069 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6070 "You need to quote \"%"UTF8f"\"",
6071 UTF8fARG(UTF, len, tmpbuf));
6077 PL_expect = XOPERATOR;
6078 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6079 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6080 if (!islop || PL_last_lop_op == OP_GREPSTART)
6081 PL_expect = XOPERATOR;
6082 else if (strchr("$@\"'`q", *s))
6083 PL_expect = XTERM; /* e.g. print $fh "foo" */
6084 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6085 PL_expect = XTERM; /* e.g. print $fh &sub */
6086 else if (isIDFIRST_lazy_if(s,UTF)) {
6087 char tmpbuf[sizeof PL_tokenbuf];
6089 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6090 if ((t2 = keyword(tmpbuf, len, 0))) {
6091 /* binary operators exclude handle interpretations */
6103 PL_expect = XTERM; /* e.g. print $fh length() */
6108 PL_expect = XTERM; /* e.g. print $fh subr() */
6111 else if (isDIGIT(*s))
6112 PL_expect = XTERM; /* e.g. print $fh 3 */
6113 else if (*s == '.' && isDIGIT(s[1]))
6114 PL_expect = XTERM; /* e.g. print $fh .3 */
6115 else if ((*s == '?' || *s == '-' || *s == '+')
6116 && !isSPACE(s[1]) && s[1] != '=')
6117 PL_expect = XTERM; /* e.g. print $fh -1 */
6118 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6120 PL_expect = XTERM; /* e.g. print $fh /.../
6121 XXX except DORDOR operator
6123 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6125 PL_expect = XTERM; /* print $fh <<"EOF" */
6128 force_ident_maybe_lex('$');
6132 if (PL_expect == XOPERATOR)
6134 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6135 PL_tokenbuf[0] = '@';
6136 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6138 if (!PL_tokenbuf[1]) {
6141 if (PL_lex_state == LEX_NORMAL)
6143 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6145 PL_tokenbuf[0] = '%';
6147 /* Warn about @ where they meant $. */
6148 if (*s == '[' || *s == '{') {
6149 if (ckWARN(WARN_SYNTAX)) {
6150 S_check_scalar_slice(aTHX_ s);
6154 PL_expect = XOPERATOR;
6155 force_ident_maybe_lex('@');
6158 case '/': /* may be division, defined-or, or pattern */
6159 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6160 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6161 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6166 else if (PL_expect == XOPERATOR) {
6168 if (*s == '=' && !PL_lex_allbrackets &&
6169 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6176 /* Disable warning on "study /blah/" */
6177 if (PL_oldoldbufptr == PL_last_uni
6178 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6179 || memNE(PL_last_uni, "study", 5)
6180 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6183 s = scan_pat(s,OP_MATCH);
6184 TERM(sublex_start());
6187 case '?': /* conditional */
6189 if (!PL_lex_allbrackets &&
6190 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6194 PL_lex_allbrackets++;
6198 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6199 #ifdef PERL_STRICT_CR
6202 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6204 && (s == PL_linestart || s[-1] == '\n') )
6207 formbrack = 2; /* dot seen where arguments expected */
6210 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6214 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6217 if (!PL_lex_allbrackets &&
6218 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6225 pl_yylval.ival = OPf_SPECIAL;
6231 if (*s == '=' && !PL_lex_allbrackets &&
6232 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6239 case '0': case '1': case '2': case '3': case '4':
6240 case '5': case '6': case '7': case '8': case '9':
6241 s = scan_num(s, &pl_yylval);
6242 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6243 if (PL_expect == XOPERATOR)
6248 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6251 COPLINE_SET_FROM_MULTI_END;
6252 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6253 if (PL_expect == XOPERATOR) {
6254 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6255 return deprecate_commaless_var_list();
6260 pl_yylval.ival = OP_CONST;
6261 TERM(sublex_start());
6264 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6267 printbuf("### Saw string before %s\n", s);
6269 PerlIO_printf(Perl_debug_log,
6270 "### Saw unterminated string\n");
6272 if (PL_expect == XOPERATOR) {
6273 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6274 return deprecate_commaless_var_list();
6281 pl_yylval.ival = OP_CONST;
6282 /* FIXME. I think that this can be const if char *d is replaced by
6283 more localised variables. */
6284 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6285 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6286 pl_yylval.ival = OP_STRINGIFY;
6290 if (pl_yylval.ival == OP_CONST)
6291 COPLINE_SET_FROM_MULTI_END;
6292 TERM(sublex_start());
6295 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6296 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6297 if (PL_expect == XOPERATOR)
6298 no_op("Backticks",s);
6301 pl_yylval.ival = OP_BACKTICK;
6302 TERM(sublex_start());
6306 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6308 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6310 if (PL_expect == XOPERATOR)
6311 no_op("Backslash",s);
6315 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6316 char *start = s + 2;
6317 while (isDIGIT(*start) || *start == '_')
6319 if (*start == '.' && isDIGIT(start[1])) {
6320 s = scan_num(s, &pl_yylval);
6323 else if ((*start == ':' && start[1] == ':')
6324 || (PL_expect == XSTATE && *start == ':'))
6326 else if (PL_expect == XSTATE) {
6328 while (d < PL_bufend && isSPACE(*d)) d++;
6329 if (*d == ':') goto keylookup;
6331 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6332 if (!isALPHA(*start) && (PL_expect == XTERM
6333 || PL_expect == XREF || PL_expect == XSTATE
6334 || PL_expect == XTERMORDORDOR)) {
6335 GV *const gv = gv_fetchpvn_flags(s, start - s,
6336 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6338 s = scan_num(s, &pl_yylval);
6345 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6398 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6400 /* Some keywords can be followed by any delimiter, including ':' */
6401 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6403 /* x::* is just a word, unless x is "CORE" */
6404 if (!anydelim && *s == ':' && s[1] == ':') {
6405 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6410 while (d < PL_bufend && isSPACE(*d))
6411 d++; /* no comments skipped here, or s### is misparsed */
6413 /* Is this a word before a => operator? */
6414 if (*d == '=' && d[1] == '>') {
6418 = (OP*)newSVOP(OP_CONST, 0,
6419 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6420 pl_yylval.opval->op_private = OPpCONST_BARE;
6424 /* Check for plugged-in keyword */
6428 char *saved_bufptr = PL_bufptr;
6430 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6432 if (result == KEYWORD_PLUGIN_DECLINE) {
6433 /* not a plugged-in keyword */
6434 PL_bufptr = saved_bufptr;
6435 } else if (result == KEYWORD_PLUGIN_STMT) {
6436 pl_yylval.opval = o;
6438 if (!PL_nexttoke) PL_expect = XSTATE;
6439 return REPORT(PLUGSTMT);
6440 } else if (result == KEYWORD_PLUGIN_EXPR) {
6441 pl_yylval.opval = o;
6443 if (!PL_nexttoke) PL_expect = XOPERATOR;
6444 return REPORT(PLUGEXPR);
6446 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6451 /* Check for built-in keyword */
6452 tmp = keyword(PL_tokenbuf, len, 0);
6454 /* Is this a label? */
6455 if (!anydelim && PL_expect == XSTATE
6456 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6458 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6459 pl_yylval.pval[len] = '\0';
6460 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6465 /* Check for lexical sub */
6466 if (PL_expect != XOPERATOR) {
6467 char tmpbuf[sizeof PL_tokenbuf + 1];
6469 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6470 off = pad_findmy_pvn(tmpbuf, len+1, 0);
6471 if (off != NOT_IN_PAD) {
6472 assert(off); /* we assume this is boolean-true below */
6473 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6474 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6475 HEK * const stashname = HvNAME_HEK(stash);
6476 sv = newSVhek(stashname);
6477 sv_catpvs(sv, "::");
6478 sv_catpvn_flags(sv, PL_tokenbuf, len,
6479 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6480 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6490 rv2cv_op = newOP(OP_PADANY, 0);
6491 rv2cv_op->op_targ = off;
6492 cv = find_lexical_cv(off);
6500 if (tmp < 0) { /* second-class keyword? */
6501 GV *ogv = NULL; /* override (winner) */
6502 GV *hgv = NULL; /* hidden (loser) */
6503 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6505 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6506 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6510 if (GvIMPORTED_CV(gv))
6512 else if (! CvMETHOD(cv))
6516 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6520 ? GvCVu(gv) && GvIMPORTED_CV(gv)
6521 : SvPCS_IMPORTED(gv)
6522 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6531 tmp = 0; /* overridden by import or by GLOBAL */
6534 && -tmp==KEY_lock /* XXX generalizable kludge */
6537 tmp = 0; /* any sub overrides "weak" keyword */
6539 else { /* no override */
6541 if (tmp == KEY_dump) {
6542 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6543 "dump() better written as CORE::dump()");
6547 if (hgv && tmp != KEY_x) /* never ambiguous */
6548 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6549 "Ambiguous call resolved as CORE::%s(), "
6550 "qualify as such or use &",
6555 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6556 && (!anydelim || *s != '#')) {
6557 /* no override, and not s### either; skipspace is safe here
6558 * check for => on following line */
6560 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6561 STRLEN soff = s - SvPVX(PL_linestr);
6562 s = skipspace_flags(s, LEX_NO_INCLINE);
6563 arrow = *s == '=' && s[1] == '>';
6564 PL_bufptr = SvPVX(PL_linestr) + bufoff;
6565 s = SvPVX(PL_linestr) + soff;
6573 default: /* not a keyword */
6574 /* Trade off - by using this evil construction we can pull the
6575 variable gv into the block labelled keylookup. If not, then
6576 we have to give it function scope so that the goto from the
6577 earlier ':' case doesn't bypass the initialisation. */
6579 just_a_word_zero_gv:
6591 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6595 /* Get the rest if it looks like a package qualifier */
6597 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6599 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6602 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6603 UTF8fARG(UTF, len, PL_tokenbuf),
6604 *s == '\'' ? "'" : "::");
6609 if (PL_expect == XOPERATOR) {
6610 if (PL_bufptr == PL_linestart) {
6611 CopLINE_dec(PL_curcop);
6612 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6613 CopLINE_inc(PL_curcop);
6616 no_op("Bareword",s);
6619 /* See if the name is "Foo::",
6620 in which case Foo is a bareword
6621 (and a package name). */
6624 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6626 if (ckWARN(WARN_BAREWORD)
6627 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6628 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6629 "Bareword \"%"UTF8f"\" refers to nonexistent package",
6630 UTF8fARG(UTF, len, PL_tokenbuf));
6632 PL_tokenbuf[len] = '\0';
6641 /* if we saw a global override before, get the right name */
6644 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6647 SV * const tmp_sv = sv;
6648 sv = newSVpvs("CORE::GLOBAL::");
6649 sv_catsv(sv, tmp_sv);
6650 SvREFCNT_dec(tmp_sv);
6654 /* Presume this is going to be a bareword of some sort. */
6656 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6657 pl_yylval.opval->op_private = OPpCONST_BARE;
6659 /* And if "Foo::", then that's what it certainly is. */
6665 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6666 const_op->op_private = OPpCONST_BARE;
6668 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6672 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6675 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6678 /* Use this var to track whether intuit_method has been
6679 called. intuit_method returns 0 or > 255. */
6682 /* See if it's the indirect object for a list operator. */
6684 if (PL_oldoldbufptr &&
6685 PL_oldoldbufptr < PL_bufptr &&
6686 (PL_oldoldbufptr == PL_last_lop
6687 || PL_oldoldbufptr == PL_last_uni) &&
6688 /* NO SKIPSPACE BEFORE HERE! */
6689 (PL_expect == XREF ||
6690 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6692 bool immediate_paren = *s == '(';
6694 /* (Now we can afford to cross potential line boundary.) */
6697 /* Two barewords in a row may indicate method call. */
6699 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6700 (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6704 /* If not a declared subroutine, it's an indirect object. */
6705 /* (But it's an indir obj regardless for sort.) */
6706 /* Also, if "_" follows a filetest operator, it's a bareword */
6709 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6711 (PL_last_lop_op != OP_MAPSTART &&
6712 PL_last_lop_op != OP_GREPSTART))))
6713 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6714 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6717 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6722 PL_expect = XOPERATOR;
6725 /* Is this a word before a => operator? */
6726 if (*s == '=' && s[1] == '>' && !pkgname) {
6729 if (gvp || (lex && !off)) {
6730 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
6731 /* This is our own scalar, created a few lines
6732 above, so this is safe. */
6734 sv_setpv(sv, PL_tokenbuf);
6735 if (UTF && !IN_BYTES
6736 && is_utf8_string((U8*)PL_tokenbuf, len))
6743 /* If followed by a paren, it's certainly a subroutine. */
6748 while (SPACE_OR_TAB(*d))
6750 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6755 NEXTVAL_NEXTTOKE.opval =
6756 off ? rv2cv_op : pl_yylval.opval;
6758 op_free(pl_yylval.opval), force_next(PRIVATEREF);
6759 else op_free(rv2cv_op), force_next(WORD);
6764 /* If followed by var or block, call it a method (unless sub) */
6766 if ((*s == '$' || *s == '{') && !cv) {
6768 PL_last_lop = PL_oldbufptr;
6769 PL_last_lop_op = OP_METHOD;
6770 if (!PL_lex_allbrackets &&
6771 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6772 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6773 PL_expect = XBLOCKTERM;
6775 return REPORT(METHOD);
6778 /* If followed by a bareword, see if it looks like indir obj. */
6780 if (tmp == 1 && !orig_keyword
6781 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6782 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6785 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
6787 sv_setpvn(sv, PL_tokenbuf, len);
6788 if (UTF && !IN_BYTES
6789 && is_utf8_string((U8*)PL_tokenbuf, len))
6791 else SvUTF8_off(sv);
6794 if (tmp == METHOD && !PL_lex_allbrackets &&
6795 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6796 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6800 /* Not a method, so call it a subroutine (if defined) */
6803 /* Check for a constant sub */
6804 if ((sv = cv_const_sv_or_av(cv))) {
6807 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6808 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6809 if (SvTYPE(sv) == SVt_PVAV)
6810 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6813 pl_yylval.opval->op_private = 0;
6814 pl_yylval.opval->op_folded = 1;
6815 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6820 op_free(pl_yylval.opval);
6822 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6823 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6824 PL_last_lop = PL_oldbufptr;
6825 PL_last_lop_op = OP_ENTERSUB;
6826 /* Is there a prototype? */
6830 STRLEN protolen = CvPROTOLEN(cv);
6831 const char *proto = CvPROTO(cv);
6833 proto = S_strip_spaces(aTHX_ proto, &protolen);
6836 if ((optional = *proto == ';'))
6839 while (*proto == ';');
6843 *proto == '$' || *proto == '_'
6844 || *proto == '*' || *proto == '+'
6849 *proto == '\\' && proto[1] && proto[2] == '\0'
6852 UNIPROTO(UNIOPSUB,optional);
6853 if (*proto == '\\' && proto[1] == '[') {
6854 const char *p = proto + 2;
6855 while(*p && *p != ']')
6857 if(*p == ']' && !p[1])
6858 UNIPROTO(UNIOPSUB,optional);
6860 if (*proto == '&' && *s == '{') {
6862 sv_setpvs(PL_subname, "__ANON__");
6864 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6865 if (!PL_lex_allbrackets &&
6866 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6867 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6871 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6873 force_next(off ? PRIVATEREF : WORD);
6874 if (!PL_lex_allbrackets &&
6875 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6876 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6880 /* Call it a bare word */
6882 if (PL_hints & HINT_STRICT_SUBS)
6883 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6886 /* after "print" and similar functions (corresponding to
6887 * "F? L" in opcode.pl), whatever wasn't already parsed as
6888 * a filehandle should be subject to "strict subs".
6889 * Likewise for the optional indirect-object argument to system
6890 * or exec, which can't be a bareword */
6891 if ((PL_last_lop_op == OP_PRINT
6892 || PL_last_lop_op == OP_PRTF
6893 || PL_last_lop_op == OP_SAY
6894 || PL_last_lop_op == OP_SYSTEM
6895 || PL_last_lop_op == OP_EXEC)
6896 && (PL_hints & HINT_STRICT_SUBS))
6897 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6898 if (lastchar != '-') {
6899 if (ckWARN(WARN_RESERVED)) {
6903 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6905 /* PL_warn_reserved is constant */
6906 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6907 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6917 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6918 && saw_infix_sigil) {
6919 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6920 "Operator or semicolon missing before %c%"UTF8f,
6922 UTF8fARG(UTF, strlen(PL_tokenbuf),
6924 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6925 "Ambiguous use of %c resolved as operator %c",
6926 lastchar, lastchar);
6933 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6938 (OP*)newSVOP(OP_CONST, 0,
6939 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6942 case KEY___PACKAGE__:
6944 (OP*)newSVOP(OP_CONST, 0,
6946 ? newSVhek(HvNAME_HEK(PL_curstash))
6953 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6954 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6957 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6959 gv_init(gv,stash,"DATA",4,0);
6962 GvIOp(gv) = newIO();
6963 IoIFP(GvIOp(gv)) = PL_rsfp;
6964 #if defined(HAS_FCNTL) && defined(F_SETFD)
6966 const int fd = PerlIO_fileno(PL_rsfp);
6967 fcntl(fd,F_SETFD,fd >= 3);
6970 /* Mark this internal pseudo-handle as clean */
6971 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6972 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6973 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6975 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6976 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6977 /* if the script was opened in binmode, we need to revert
6978 * it to text mode for compatibility; but only iff it has CRs
6979 * XXX this is a questionable hack at best. */
6980 if (PL_bufend-PL_bufptr > 2
6981 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6984 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6985 loc = PerlIO_tell(PL_rsfp);
6986 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6989 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6991 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6992 #endif /* NETWARE */
6994 PerlIO_seek(PL_rsfp, loc, 0);
6998 #ifdef PERLIO_LAYERS
7001 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7002 else if (IN_ENCODING) {
7008 XPUSHs(_get_encoding());
7010 call_method("name", G_SCALAR);
7014 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7015 Perl_form(aTHX_ ":encoding(%"SVf")",
7028 FUN0OP(CvCLONE(PL_compcv)
7029 ? newOP(OP_RUNCV, 0)
7030 : newPVOP(OP_RUNCV,0,NULL));
7039 if (PL_expect == XSTATE) {
7050 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7051 if ((*s == ':' && s[1] == ':')
7052 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7056 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7060 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7061 UTF8fARG(UTF, len, PL_tokenbuf));
7064 else if (tmp == KEY_require || tmp == KEY_do
7066 /* that's a way to remember we saw "CORE::" */
7078 LOP(OP_ACCEPT,XTERM);
7081 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7086 LOP(OP_ATAN2,XTERM);
7092 LOP(OP_BINMODE,XTERM);
7095 LOP(OP_BLESS,XTERM);
7104 /* We have to disambiguate the two senses of
7105 "continue". If the next token is a '{' then
7106 treat it as the start of a continue block;
7107 otherwise treat it as a control operator.
7117 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7127 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7136 if (!PL_cryptseen) {
7137 PL_cryptseen = TRUE;
7141 LOP(OP_CRYPT,XTERM);
7144 LOP(OP_CHMOD,XTERM);
7147 LOP(OP_CHOWN,XTERM);
7150 LOP(OP_CONNECT,XTERM);
7170 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7172 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7173 && !keyword(PL_tokenbuf + 1, len, 0)) {
7176 force_ident_maybe_lex('&');
7181 if (orig_keyword == KEY_do) {
7190 PL_hints |= HINT_BLOCK_SCOPE;
7200 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7201 STR_WITH_LEN("NDBM_File::"),
7202 STR_WITH_LEN("DB_File::"),
7203 STR_WITH_LEN("GDBM_File::"),
7204 STR_WITH_LEN("SDBM_File::"),
7205 STR_WITH_LEN("ODBM_File::"),
7207 LOP(OP_DBMOPEN,XTERM);
7219 pl_yylval.ival = CopLINE(PL_curcop);
7223 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7235 if (*s == '{') { /* block eval */
7236 PL_expect = XTERMBLOCK;
7237 UNIBRACK(OP_ENTERTRY);
7239 else { /* string eval */
7241 UNIBRACK(OP_ENTEREVAL);
7246 UNIBRACK(-OP_ENTEREVAL);
7260 case KEY_endhostent:
7266 case KEY_endservent:
7269 case KEY_endprotoent:
7280 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7282 pl_yylval.ival = CopLINE(PL_curcop);
7284 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7287 if ((PL_bufend - p) >= 3 &&
7288 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7290 else if ((PL_bufend - p) >= 4 &&
7291 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7294 /* skip optional package name, as in "for my abc $x (..)" */
7295 if (isIDFIRST_lazy_if(p,UTF)) {
7296 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7300 Perl_croak(aTHX_ "Missing $ on loop variable");
7305 LOP(OP_FORMLINE,XTERM);
7314 LOP(OP_FCNTL,XTERM);
7320 LOP(OP_FLOCK,XTERM);
7323 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7328 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7333 LOP(OP_GREPSTART, XREF);
7350 case KEY_getpriority:
7351 LOP(OP_GETPRIORITY,XTERM);
7353 case KEY_getprotobyname:
7356 case KEY_getprotobynumber:
7357 LOP(OP_GPBYNUMBER,XTERM);
7359 case KEY_getprotoent:
7371 case KEY_getpeername:
7372 UNI(OP_GETPEERNAME);
7374 case KEY_gethostbyname:
7377 case KEY_gethostbyaddr:
7378 LOP(OP_GHBYADDR,XTERM);
7380 case KEY_gethostent:
7383 case KEY_getnetbyname:
7386 case KEY_getnetbyaddr:
7387 LOP(OP_GNBYADDR,XTERM);
7392 case KEY_getservbyname:
7393 LOP(OP_GSBYNAME,XTERM);
7395 case KEY_getservbyport:
7396 LOP(OP_GSBYPORT,XTERM);
7398 case KEY_getservent:
7401 case KEY_getsockname:
7402 UNI(OP_GETSOCKNAME);
7404 case KEY_getsockopt:
7405 LOP(OP_GSOCKOPT,XTERM);
7420 pl_yylval.ival = CopLINE(PL_curcop);
7421 Perl_ck_warner_d(aTHX_
7422 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7423 "given is experimental");
7428 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7436 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7438 pl_yylval.ival = CopLINE(PL_curcop);
7442 LOP(OP_INDEX,XTERM);
7448 LOP(OP_IOCTL,XTERM);
7476 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7481 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7495 LOP(OP_LISTEN,XTERM);
7504 s = scan_pat(s,OP_MATCH);
7505 TERM(sublex_start());
7508 LOP(OP_MAPSTART, XREF);
7511 LOP(OP_MKDIR,XTERM);
7514 LOP(OP_MSGCTL,XTERM);
7517 LOP(OP_MSGGET,XTERM);
7520 LOP(OP_MSGRCV,XTERM);
7523 LOP(OP_MSGSND,XTERM);
7528 PL_in_my = (U16)tmp;
7530 if (isIDFIRST_lazy_if(s,UTF)) {
7531 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7532 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7534 if (!FEATURE_LEXSUBS_IS_ENABLED)
7536 "Experimental \"%s\" subs not enabled",
7537 tmp == KEY_my ? "my" :
7538 tmp == KEY_state ? "state" : "our");
7539 Perl_ck_warner_d(aTHX_
7540 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7541 "The lexical_subs feature is experimental");
7544 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7545 if (!PL_in_my_stash) {
7549 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7550 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7551 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7561 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7566 s = tokenize_use(0, s);
7570 if (*s == '(' || (s = skipspace(s), *s == '('))
7573 if (!PL_lex_allbrackets &&
7574 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7575 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7581 if (isIDFIRST_lazy_if(s,UTF)) {
7583 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7585 for (t=d; isSPACE(*t);)
7587 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7589 && !(t[0] == '=' && t[1] == '>')
7590 && !(t[0] == ':' && t[1] == ':')
7591 && !keyword(s, d-s, 0)
7593 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7594 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7595 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7601 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7603 pl_yylval.ival = OP_OR;
7613 LOP(OP_OPEN_DIR,XTERM);
7616 checkcomma(s,PL_tokenbuf,"filehandle");
7620 checkcomma(s,PL_tokenbuf,"filehandle");
7639 s = force_word(s,WORD,FALSE,TRUE);
7641 s = force_strict_version(s);
7645 LOP(OP_PIPE_OP,XTERM);
7648 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7651 COPLINE_SET_FROM_MULTI_END;
7652 pl_yylval.ival = OP_CONST;
7653 TERM(sublex_start());
7660 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7663 COPLINE_SET_FROM_MULTI_END;
7664 PL_expect = XOPERATOR;
7665 if (SvCUR(PL_lex_stuff)) {
7666 int warned_comma = !ckWARN(WARN_QW);
7667 int warned_comment = warned_comma;
7668 d = SvPV_force(PL_lex_stuff, len);
7670 for (; isSPACE(*d) && len; --len, ++d)
7675 if (!warned_comma || !warned_comment) {
7676 for (; !isSPACE(*d) && len; --len, ++d) {
7677 if (!warned_comma && *d == ',') {
7678 Perl_warner(aTHX_ packWARN(WARN_QW),
7679 "Possible attempt to separate words with commas");
7682 else if (!warned_comment && *d == '#') {
7683 Perl_warner(aTHX_ packWARN(WARN_QW),
7684 "Possible attempt to put comments in qw() list");
7690 for (; !isSPACE(*d) && len; --len, ++d)
7693 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7694 words = op_append_elem(OP_LIST, words,
7695 newSVOP(OP_CONST, 0, tokeq(sv)));
7700 words = newNULLLIST();
7702 SvREFCNT_dec(PL_lex_stuff);
7703 PL_lex_stuff = NULL;
7705 PL_expect = XOPERATOR;
7706 pl_yylval.opval = sawparens(words);
7711 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7714 pl_yylval.ival = OP_STRINGIFY;
7715 if (SvIVX(PL_lex_stuff) == '\'')
7716 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7717 TERM(sublex_start());
7720 s = scan_pat(s,OP_QR);
7721 TERM(sublex_start());
7724 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7727 pl_yylval.ival = OP_BACKTICK;
7728 TERM(sublex_start());
7736 s = force_version(s, FALSE);
7738 else if (*s != 'v' || !isDIGIT(s[1])
7739 || (s = force_version(s, TRUE), *s == 'v'))
7741 *PL_tokenbuf = '\0';
7742 s = force_word(s,WORD,TRUE,TRUE);
7743 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7744 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7745 GV_ADD | (UTF ? SVf_UTF8 : 0));
7747 yyerror("<> at require-statement should be quotes");
7749 if (orig_keyword == KEY_require) {
7755 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7757 PL_last_uni = PL_oldbufptr;
7758 PL_last_lop_op = OP_REQUIRE;
7760 return REPORT( (int)REQUIRE );
7769 LOP(OP_RENAME,XTERM);
7778 LOP(OP_RINDEX,XTERM);
7787 UNIDOR(OP_READLINE);
7790 UNIDOR(OP_BACKTICK);
7799 LOP(OP_REVERSE,XTERM);
7802 UNIDOR(OP_READLINK);
7809 if (pl_yylval.opval)
7810 TERM(sublex_start());
7812 TOKEN(1); /* force error */
7815 checkcomma(s,PL_tokenbuf,"filehandle");
7825 LOP(OP_SELECT,XTERM);
7831 LOP(OP_SEMCTL,XTERM);
7834 LOP(OP_SEMGET,XTERM);
7837 LOP(OP_SEMOP,XTERM);
7843 LOP(OP_SETPGRP,XTERM);
7845 case KEY_setpriority:
7846 LOP(OP_SETPRIORITY,XTERM);
7848 case KEY_sethostent:
7854 case KEY_setservent:
7857 case KEY_setprotoent:
7867 LOP(OP_SEEKDIR,XTERM);
7869 case KEY_setsockopt:
7870 LOP(OP_SSOCKOPT,XTERM);
7876 LOP(OP_SHMCTL,XTERM);
7879 LOP(OP_SHMGET,XTERM);
7882 LOP(OP_SHMREAD,XTERM);
7885 LOP(OP_SHMWRITE,XTERM);
7888 LOP(OP_SHUTDOWN,XTERM);
7897 LOP(OP_SOCKET,XTERM);
7899 case KEY_socketpair:
7900 LOP(OP_SOCKPAIR,XTERM);
7903 checkcomma(s,PL_tokenbuf,"subroutine name");
7906 s = force_word(s,WORD,TRUE,TRUE);
7910 LOP(OP_SPLIT,XTERM);
7913 LOP(OP_SPRINTF,XTERM);
7916 LOP(OP_SPLICE,XTERM);
7931 LOP(OP_SUBSTR,XTERM);
7937 char * const tmpbuf = PL_tokenbuf + 1;
7938 expectation attrful;
7939 bool have_name, have_proto;
7940 const int key = tmp;
7941 SV *format_name = NULL;
7946 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7947 (*s == ':' && s[1] == ':'))
7951 attrful = XATTRBLOCK;
7952 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7954 if (key == KEY_format)
7955 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7957 if (memchr(tmpbuf, ':', len) || key != KEY_sub
7959 PL_tokenbuf, len + 1, 0
7961 sv_setpvn(PL_subname, tmpbuf, len);
7963 sv_setsv(PL_subname,PL_curstname);
7964 sv_catpvs(PL_subname,"::");
7965 sv_catpvn(PL_subname,tmpbuf,len);
7967 if (SvUTF8(PL_linestr))
7968 SvUTF8_on(PL_subname);
7975 if (key == KEY_my || key == KEY_our || key==KEY_state)
7978 /* diag_listed_as: Missing name in "%s sub" */
7980 "Missing name in \"%s\"", PL_bufptr);
7982 PL_expect = XTERMBLOCK;
7983 attrful = XATTRTERM;
7984 sv_setpvs(PL_subname,"?");
7988 if (key == KEY_format) {
7990 NEXTVAL_NEXTTOKE.opval
7991 = (OP*)newSVOP(OP_CONST,0, format_name);
7992 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
7998 /* Look for a prototype */
7999 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8000 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8001 COPLINE_SET_FROM_MULTI_END;
8003 Perl_croak(aTHX_ "Prototype not terminated");
8004 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8012 if (*s == ':' && s[1] != ':')
8013 PL_expect = attrful;
8014 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8016 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8017 else if (*s != ';' && *s != '}')
8018 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8022 NEXTVAL_NEXTTOKE.opval =
8023 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8024 PL_lex_stuff = NULL;
8029 sv_setpvs(PL_subname, "__ANON__");
8031 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8034 force_ident_maybe_lex('&');
8039 LOP(OP_SYSTEM,XREF);
8042 LOP(OP_SYMLINK,XTERM);
8045 LOP(OP_SYSCALL,XTERM);
8048 LOP(OP_SYSOPEN,XTERM);
8051 LOP(OP_SYSSEEK,XTERM);
8054 LOP(OP_SYSREAD,XTERM);
8057 LOP(OP_SYSWRITE,XTERM);
8062 TERM(sublex_start());
8083 LOP(OP_TRUNCATE,XTERM);
8095 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8097 pl_yylval.ival = CopLINE(PL_curcop);
8101 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8103 pl_yylval.ival = CopLINE(PL_curcop);
8107 LOP(OP_UNLINK,XTERM);
8113 LOP(OP_UNPACK,XTERM);
8116 LOP(OP_UTIME,XTERM);
8122 LOP(OP_UNSHIFT,XTERM);
8125 s = tokenize_use(1, s);
8135 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8137 pl_yylval.ival = CopLINE(PL_curcop);
8138 Perl_ck_warner_d(aTHX_
8139 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8140 "when is experimental");
8144 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8146 pl_yylval.ival = CopLINE(PL_curcop);
8150 PL_hints |= HINT_BLOCK_SCOPE;
8157 LOP(OP_WAITPID,XTERM);
8163 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8164 * we use the same number on EBCDIC */
8165 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8169 if (PL_expect == XOPERATOR) {
8170 if (*s == '=' && !PL_lex_allbrackets &&
8171 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8179 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8181 pl_yylval.ival = OP_XOR;
8190 Looks up an identifier in the pad or in a package
8193 PRIVATEREF if this is a lexical name.
8194 WORD if this belongs to a package.
8197 if we're in a my declaration
8198 croak if they tried to say my($foo::bar)
8199 build the ops for a my() declaration
8200 if it's an access to a my() variable
8201 build ops for access to a my() variable
8202 if in a dq string, and they've said @foo and we can't find @foo
8204 build ops for a bareword
8208 S_pending_ident(pTHX)
8211 const char pit = (char)pl_yylval.ival;
8212 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8213 /* All routes through this function want to know if there is a colon. */
8214 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8216 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8217 "### Pending identifier '%s'\n", PL_tokenbuf); });
8219 /* if we're in a my(), we can't allow dynamics here.
8220 $foo'bar has already been turned into $foo::bar, so
8221 just check for colons.
8223 if it's a legal name, the OP is a PADANY.
8226 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8228 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8229 "variable %s in \"our\"",
8230 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8231 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8235 /* "my" variable %s can't be in a package */
8236 /* PL_no_myglob is constant */
8237 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8238 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8239 PL_in_my == KEY_my ? "my" : "state",
8240 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8242 UTF ? SVf_UTF8 : 0);
8246 pl_yylval.opval = newOP(OP_PADANY, 0);
8247 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8248 UTF ? SVf_UTF8 : 0);
8254 build the ops for accesses to a my() variable.
8259 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8261 if (tmp != NOT_IN_PAD) {
8262 /* might be an "our" variable" */
8263 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8264 /* build ops for a bareword */
8265 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8266 HEK * const stashname = HvNAME_HEK(stash);
8267 SV * const sym = newSVhek(stashname);
8268 sv_catpvs(sym, "::");
8269 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8270 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8271 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8275 ((PL_tokenbuf[0] == '$') ? SVt_PV
8276 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8281 pl_yylval.opval = newOP(OP_PADANY, 0);
8282 pl_yylval.opval->op_targ = tmp;
8288 Whine if they've said @foo in a doublequoted string,
8289 and @foo isn't a variable we can find in the symbol
8292 if (ckWARN(WARN_AMBIGUOUS) &&
8293 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8294 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8295 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8296 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8297 /* DO NOT warn for @- and @+ */
8298 && !( PL_tokenbuf[2] == '\0' &&
8299 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8302 /* Downgraded from fatal to warning 20000522 mjd */
8303 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8304 "Possible unintended interpolation of %"UTF8f
8306 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8310 /* build ops for a bareword */
8311 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8312 newSVpvn_flags(PL_tokenbuf + 1,
8314 UTF ? SVf_UTF8 : 0 ));
8315 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8317 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8318 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8319 | ( UTF ? SVf_UTF8 : 0 ),
8320 ((PL_tokenbuf[0] == '$') ? SVt_PV
8321 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8327 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8329 PERL_ARGS_ASSERT_CHECKCOMMA;
8331 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8332 if (ckWARN(WARN_SYNTAX)) {
8335 for (w = s+2; *w && level; w++) {
8343 /* the list of chars below is for end of statements or
8344 * block / parens, boolean operators (&&, ||, //) and branch
8345 * constructs (or, and, if, until, unless, while, err, for).
8346 * Not a very solid hack... */
8347 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8348 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8349 "%s (...) interpreted as function",name);
8352 while (s < PL_bufend && isSPACE(*s))
8356 while (s < PL_bufend && isSPACE(*s))
8358 if (isIDFIRST_lazy_if(s,UTF)) {
8359 const char * const w = s;
8360 s += UTF ? UTF8SKIP(s) : 1;
8361 while (isWORDCHAR_lazy_if(s,UTF))
8362 s += UTF ? UTF8SKIP(s) : 1;
8363 while (s < PL_bufend && isSPACE(*s))
8368 if (keyword(w, s - w, 0))
8371 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8372 if (gv && GvCVu(gv))
8376 Copy(w, tmpbuf+1, s - w, char);
8378 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8379 if (off != NOT_IN_PAD) return;
8381 Perl_croak(aTHX_ "No comma allowed after %s", what);
8386 /* S_new_constant(): do any overload::constant lookup.
8388 Either returns sv, or mortalizes/frees sv and returns a new SV*.
8389 Best used as sv=new_constant(..., sv, ...).
8390 If s, pv are NULL, calls subroutine with one argument,
8391 and <type> is used with error messages only.
8392 <type> is assumed to be well formed UTF-8 */
8395 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8396 SV *sv, SV *pv, const char *type, STRLEN typelen)
8399 HV * table = GvHV(PL_hintgv); /* ^H */
8404 const char *why1 = "", *why2 = "", *why3 = "";
8406 PERL_ARGS_ASSERT_NEW_CONSTANT;
8407 /* We assume that this is true: */
8408 if (*key == 'c') { assert (strEQ(key, "charnames")); }
8411 /* charnames doesn't work well if there have been errors found */
8412 if (PL_error_count > 0 && *key == 'c')
8414 SvREFCNT_dec_NN(sv);
8415 return &PL_sv_undef;
8418 sv_2mortal(sv); /* Parent created it permanently */
8420 || ! (PL_hints & HINT_LOCALIZE_HH)
8421 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8426 /* Here haven't found what we're looking for. If it is charnames,
8427 * perhaps it needs to be loaded. Try doing that before giving up */
8429 Perl_load_module(aTHX_
8431 newSVpvs("_charnames"),
8432 /* version parameter; no need to specify it, as if
8433 * we get too early a version, will fail anyway,
8434 * not being able to find '_charnames' */
8439 assert(sp == PL_stack_sp);
8440 table = GvHV(PL_hintgv);
8442 && (PL_hints & HINT_LOCALIZE_HH)
8443 && (cvp = hv_fetch(table, key, keylen, FALSE))
8449 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8450 msg = Perl_form(aTHX_
8451 "Constant(%.*s) unknown",
8452 (int)(type ? typelen : len),
8458 why3 = "} is not defined";
8461 msg = Perl_form(aTHX_
8462 /* The +3 is for '\N{'; -4 for that, plus '}' */
8463 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8467 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8468 (int)(type ? typelen : len),
8469 (type ? type: s), why1, why2, why3);
8472 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8473 return SvREFCNT_inc_simple_NN(sv);
8478 pv = newSVpvn_flags(s, len, SVs_TEMP);
8480 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8482 typesv = &PL_sv_undef;
8484 PUSHSTACKi(PERLSI_OVERLOAD);
8496 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8500 /* Check the eval first */
8501 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8503 const char * errstr;
8504 sv_catpvs(errsv, "Propagated");
8505 errstr = SvPV_const(errsv, errlen);
8506 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8508 res = SvREFCNT_inc_simple_NN(sv);
8512 SvREFCNT_inc_simple_void_NN(res);
8521 why1 = "Call to &{$^H{";
8523 why3 = "}} did not return a defined value";
8525 (void)sv_2mortal(sv);
8532 PERL_STATIC_INLINE void
8533 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8534 PERL_ARGS_ASSERT_PARSE_IDENT;
8538 Perl_croak(aTHX_ "%s", ident_too_long);
8539 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8540 /* The UTF-8 case must come first, otherwise things
8541 * like c\N{COMBINING TILDE} would start failing, as the
8542 * isWORDCHAR_A case below would gobble the 'c' up.
8545 char *t = *s + UTF8SKIP(*s);
8546 while (isIDCONT_utf8((U8*)t))
8548 if (*d + (t - *s) > e)
8549 Perl_croak(aTHX_ "%s", ident_too_long);
8550 Copy(*s, *d, t - *s, char);
8554 else if ( isWORDCHAR_A(**s) ) {
8557 } while (isWORDCHAR_A(**s) && *d < e);
8559 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8564 else if (allow_package && **s == ':' && (*s)[1] == ':'
8565 /* Disallow things like Foo::$bar. For the curious, this is
8566 * the code path that triggers the "Bad name after" warning
8567 * when looking for barewords.
8569 && (*s)[2] != '$') {
8579 /* Returns a NUL terminated string, with the length of the string written to
8583 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8586 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8587 bool is_utf8 = cBOOL(UTF);
8589 PERL_ARGS_ASSERT_SCAN_WORD;
8591 parse_ident(&s, &d, e, allow_package, is_utf8);
8598 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8600 I32 herelines = PL_parser->herelines;
8601 SSize_t bracket = -1;
8604 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8605 bool is_utf8 = cBOOL(UTF);
8606 I32 orig_copline = 0, tmp_copline = 0;
8608 PERL_ARGS_ASSERT_SCAN_IDENT;
8613 while (isDIGIT(*s)) {
8615 Perl_croak(aTHX_ "%s", ident_too_long);
8620 parse_ident(&s, &d, e, 1, is_utf8);
8625 /* Either a digit variable, or parse_ident() found an identifier
8626 (anything valid as a bareword), so job done and return. */
8627 if (PL_lex_state != LEX_NORMAL)
8628 PL_lex_state = LEX_INTERPENDMAYBE;
8631 if (*s == '$' && s[1] &&
8632 (isIDFIRST_lazy_if(s+1,is_utf8)
8633 || isDIGIT_A((U8)s[1])
8636 || strnEQ(s+1,"::",2)) )
8638 /* Dereferencing a value in a scalar variable.
8639 The alternatives are different syntaxes for a scalar variable.
8640 Using ' as a leading package separator isn't allowed. :: is. */
8643 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
8645 bracket = s - SvPVX(PL_linestr);
8647 orig_copline = CopLINE(PL_curcop);
8648 if (s < PL_bufend && isSPACE(*s)) {
8653 /* Is the byte 'd' a legal single character identifier name? 'u' is true
8654 * iff Unicode semantics are to be used. The legal ones are any of:
8655 * a) all ASCII characters except:
8656 * 1) space-type ones, like \t and SPACE;
8659 * The final case currently doesn't get this far in the program, so we
8660 * don't test for it. If that were to change, it would be ok to allow it.
8661 * c) When not under Unicode rules, any upper Latin1 character
8662 * d) Otherwise, when unicode rules are used, all XIDS characters.
8664 * Because all ASCII characters have the same representation whether
8665 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8666 * '{' without knowing if is UTF-8 or not.
8667 * EBCDIC already uses the rules that ASCII platforms will use after the
8668 * deprecation cycle; see comment below about the deprecation. */
8670 # define VALID_LEN_ONE_IDENT(s, is_utf8) \
8671 (isGRAPH_A(*(s)) || ((is_utf8) \
8672 ? isIDFIRST_utf8((U8*) (s)) \
8674 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8676 # define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \
8677 && LIKELY(*(s) != '\0') \
8679 || isASCII_utf8((U8*) (s)) \
8680 || isIDFIRST_utf8((U8*) (s))))
8682 if ((s <= PL_bufend - (is_utf8)
8685 && VALID_LEN_ONE_IDENT(s, is_utf8))
8687 /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
8688 * because often it has no graphic representation. (We can't get to
8689 * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8692 ? ! isGRAPH_utf8( (U8*) s)
8693 : (! isGRAPH_L1( (U8) *s)
8694 || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
8696 /* Split messages for back compat */
8697 if (isCNTRL_A( (U8) *s)) {
8698 deprecate("literal control characters in variable names");
8701 deprecate("literal non-graphic characters in variable names");
8706 const STRLEN skip = UTF8SKIP(s);
8709 for ( i = 0; i < skip; i++ )
8717 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8718 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8722 /* Warn about ambiguous code after unary operators if {...} notation isn't
8723 used. There's no difference in ambiguity; it's merely a heuristic
8724 about when not to warn. */
8725 else if (ck_uni && bracket == -1)
8727 if (bracket != -1) {
8728 /* If we were processing {...} notation then... */
8729 if (isIDFIRST_lazy_if(d,is_utf8)) {
8730 /* if it starts as a valid identifier, assume that it is one.
8731 (the later check for } being at the expected point will trap
8732 cases where this doesn't pan out.) */
8733 d += is_utf8 ? UTF8SKIP(d) : 1;
8734 parse_ident(&s, &d, e, 1, is_utf8);
8736 tmp_copline = CopLINE(PL_curcop);
8737 if (s < PL_bufend && isSPACE(*s)) {
8740 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8741 /* ${foo[0]} and ${foo{bar}} notation. */
8742 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8743 const char * const brack =
8745 ((*s == '[') ? "[...]" : "{...}");
8746 orig_copline = CopLINE(PL_curcop);
8747 CopLINE_set(PL_curcop, tmp_copline);
8748 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8749 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8750 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8751 funny, dest, brack, funny, dest, brack);
8752 CopLINE_set(PL_curcop, orig_copline);
8755 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8756 PL_lex_allbrackets++;
8760 /* Handle extended ${^Foo} variables
8761 * 1999-02-27 mjd-perl-patch@plover.com */
8762 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8766 while (isWORDCHAR(*s) && d < e) {
8770 Perl_croak(aTHX_ "%s", ident_too_long);
8775 tmp_copline = CopLINE(PL_curcop);
8776 if (s < PL_bufend && isSPACE(*s)) {
8780 /* Expect to find a closing } after consuming any trailing whitespace.
8784 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8785 PL_lex_state = LEX_INTERPEND;
8788 if (PL_lex_state == LEX_NORMAL) {
8789 if (ckWARN(WARN_AMBIGUOUS) &&
8790 (keyword(dest, d - dest, 0)
8791 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8793 SV *tmp = newSVpvn_flags( dest, d - dest,
8794 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8797 orig_copline = CopLINE(PL_curcop);
8798 CopLINE_set(PL_curcop, tmp_copline);
8799 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8800 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8801 funny, SVfARG(tmp), funny, SVfARG(tmp));
8802 CopLINE_set(PL_curcop, orig_copline);
8807 /* Didn't find the closing } at the point we expected, so restore
8808 state such that the next thing to process is the opening { and */
8809 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8810 CopLINE_set(PL_curcop, orig_copline);
8811 PL_parser->herelines = herelines;
8815 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8816 PL_lex_state = LEX_INTERPEND;
8821 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
8823 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
8824 * found in the parse starting at 's', based on the subset that are valid
8825 * in this context input to this routine in 'valid_flags'. Advances s.
8826 * Returns TRUE if the input should be treated as a valid flag, so the next
8827 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
8828 * upon first call on the current regex. This routine will set it to any
8829 * charset modifier found. The caller shouldn't change it. This way,
8830 * another charset modifier encountered in the parse can be detected as an
8831 * error, as we have decided to allow only one */
8834 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8836 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8837 if (isWORDCHAR_lazy_if(*s, UTF)) {
8838 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8839 UTF ? SVf_UTF8 : 0);
8841 /* Pretend that it worked, so will continue processing before
8850 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
8851 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8852 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8853 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8854 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8855 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8856 case LOCALE_PAT_MOD:
8858 goto multiple_charsets;
8860 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8863 case UNICODE_PAT_MOD:
8865 goto multiple_charsets;
8867 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8870 case ASCII_RESTRICT_PAT_MOD:
8872 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8876 /* Error if previous modifier wasn't an 'a', but if it was, see
8877 * if, and accept, a second occurrence (only) */
8879 || get_regex_charset(*pmfl)
8880 != REGEX_ASCII_RESTRICTED_CHARSET)
8882 goto multiple_charsets;
8884 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8888 case DEPENDS_PAT_MOD:
8890 goto multiple_charsets;
8892 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8901 if (*charset != c) {
8902 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8904 else if (c == 'a') {
8905 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8906 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8909 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8912 /* Pretend that it worked, so will continue processing before dieing */
8918 S_scan_pat(pTHX_ char *start, I32 type)
8922 const char * const valid_flags =
8923 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8924 char charset = '\0'; /* character set modifier */
8925 unsigned int x_mod_count = 0;
8927 PERL_ARGS_ASSERT_SCAN_PAT;
8929 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8931 Perl_croak(aTHX_ "Search pattern not terminated");
8933 pm = (PMOP*)newPMOP(type, 0);
8934 if (PL_multi_open == '?') {
8935 /* This is the only point in the code that sets PMf_ONCE: */
8936 pm->op_pmflags |= PMf_ONCE;
8938 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8939 allows us to restrict the list needed by reset to just the ??
8941 assert(type != OP_TRANS);
8943 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8946 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8949 elements = mg->mg_len / sizeof(PMOP**);
8950 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8951 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8952 mg->mg_len = elements * sizeof(PMOP**);
8953 PmopSTASH_set(pm,PL_curstash);
8957 /* if qr/...(?{..}).../, then need to parse the pattern within a new
8958 * anon CV. False positives like qr/[(?{]/ are harmless */
8960 if (type == OP_QR) {
8962 char *e, *p = SvPV(PL_lex_stuff, len);
8964 for (; p < e; p++) {
8965 if (p[0] == '(' && p[1] == '?'
8966 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8968 pm->op_pmflags |= PMf_HAS_CV;
8972 pm->op_pmflags |= PMf_IS_QR;
8975 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
8976 &s, &charset, &x_mod_count))
8978 /* issue a warning if /c is specified,but /g is not */
8979 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8981 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8982 "Use of /c modifier is meaningless without /g" );
8985 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
8987 PL_lex_op = (OP*)pm;
8988 pl_yylval.ival = OP_MATCH;
8993 S_scan_subst(pTHX_ char *start)
9000 char charset = '\0'; /* character set modifier */
9001 unsigned int x_mod_count = 0;
9004 PERL_ARGS_ASSERT_SCAN_SUBST;
9006 pl_yylval.ival = OP_NULL;
9008 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9011 Perl_croak(aTHX_ "Substitution pattern not terminated");
9015 first_start = PL_multi_start;
9016 first_line = CopLINE(PL_curcop);
9017 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9020 SvREFCNT_dec(PL_lex_stuff);
9021 PL_lex_stuff = NULL;
9023 Perl_croak(aTHX_ "Substitution replacement not terminated");
9025 PL_multi_start = first_start; /* so whole substitution is taken together */
9027 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9031 if (*s == EXEC_PAT_MOD) {
9035 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9036 &s, &charset, &x_mod_count))
9042 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9044 if ((pm->op_pmflags & PMf_CONTINUE)) {
9045 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9049 SV * const repl = newSVpvs("");
9052 pm->op_pmflags |= PMf_EVAL;
9055 sv_catpvs(repl, "eval ");
9057 sv_catpvs(repl, "do ");
9059 sv_catpvs(repl, "{");
9060 sv_catsv(repl, PL_sublex_info.repl);
9061 sv_catpvs(repl, "}");
9063 SvREFCNT_dec(PL_sublex_info.repl);
9064 PL_sublex_info.repl = repl;
9066 if (CopLINE(PL_curcop) != first_line) {
9067 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9068 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9069 CopLINE(PL_curcop) - first_line;
9070 CopLINE_set(PL_curcop, first_line);
9073 PL_lex_op = (OP*)pm;
9074 pl_yylval.ival = OP_SUBST;
9079 S_scan_trans(pTHX_ char *start)
9086 bool nondestruct = 0;
9089 PERL_ARGS_ASSERT_SCAN_TRANS;
9091 pl_yylval.ival = OP_NULL;
9093 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9095 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9099 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9102 SvREFCNT_dec(PL_lex_stuff);
9103 PL_lex_stuff = NULL;
9105 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9108 complement = del = squash = 0;
9112 complement = OPpTRANS_COMPLEMENT;
9115 del = OPpTRANS_DELETE;
9118 squash = OPpTRANS_SQUASH;
9130 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9131 o->op_private &= ~OPpTRANS_ALL;
9132 o->op_private |= del|squash|complement|
9133 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9134 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9137 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9144 Takes a pointer to the first < in <<FOO.
9145 Returns a pointer to the byte following <<FOO.
9147 This function scans a heredoc, which involves different methods
9148 depending on whether we are in a string eval, quoted construct, etc.
9149 This is because PL_linestr could containing a single line of input, or
9150 a whole string being evalled, or the contents of the current quote-
9153 The two basic methods are:
9154 - Steal lines from the input stream
9155 - Scan the heredoc in PL_linestr and remove it therefrom
9157 In a file scope or filtered eval, the first method is used; in a
9158 string eval, the second.
9160 In a quote-like operator, we have to choose between the two,
9161 depending on where we can find a newline. We peek into outer lex-
9162 ing scopes until we find one with a newline in it. If we reach the
9163 outermost lexing scope and it is a file, we use the stream method.
9164 Otherwise it is treated as an eval.
9168 S_scan_heredoc(pTHX_ char *s)
9170 I32 op_type = OP_SCALAR;
9177 const bool infile = PL_rsfp || PL_parser->filtered;
9178 const line_t origline = CopLINE(PL_curcop);
9179 LEXSHARED *shared = PL_parser->lex_shared;
9181 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9184 d = PL_tokenbuf + 1;
9185 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9186 *PL_tokenbuf = '\n';
9188 while (SPACE_OR_TAB(*peek))
9190 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9193 s = delimcpy(d, e, s, PL_bufend, term, &len);
9195 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9201 /* <<\FOO is equivalent to <<'FOO' */
9205 if (!isWORDCHAR_lazy_if(s,UTF))
9206 deprecate("bare << to mean <<\"\"");
9207 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9212 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9213 Perl_croak(aTHX_ "Delimiter for here document is too long");
9216 len = d - PL_tokenbuf;
9218 #ifndef PERL_STRICT_CR
9219 d = strchr(s, '\r');
9221 char * const olds = s;
9223 while (s < PL_bufend) {
9229 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9238 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9243 tmpstr = newSV_type(SVt_PVIV);
9247 SvIV_set(tmpstr, -1);
9249 else if (term == '`') {
9250 op_type = OP_BACKTICK;
9251 SvIV_set(tmpstr, '\\');
9254 PL_multi_start = origline + 1 + PL_parser->herelines;
9255 PL_multi_open = PL_multi_close = '<';
9256 /* inside a string eval or quote-like operator */
9257 if (!infile || PL_lex_inwhat) {
9260 char * const olds = s;
9261 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9262 /* These two fields are not set until an inner lexing scope is
9263 entered. But we need them set here. */
9264 shared->ls_bufptr = s;
9265 shared->ls_linestr = PL_linestr;
9267 /* Look for a newline. If the current buffer does not have one,
9268 peek into the line buffer of the parent lexing scope, going
9269 up as many levels as necessary to find one with a newline
9272 while (!(s = (char *)memchr(
9273 (void *)shared->ls_bufptr, '\n',
9274 SvEND(shared->ls_linestr)-shared->ls_bufptr
9276 shared = shared->ls_prev;
9277 /* shared is only null if we have gone beyond the outermost
9278 lexing scope. In a file, we will have broken out of the
9279 loop in the previous iteration. In an eval, the string buf-
9280 fer ends with "\n;", so the while condition above will have
9281 evaluated to false. So shared can never be null. */
9283 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9284 most lexing scope. In a file, shared->ls_linestr at that
9285 level is just one line, so there is no body to steal. */
9286 if (infile && !shared->ls_prev) {
9292 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9295 linestr = shared->ls_linestr;
9296 bufend = SvEND(linestr);
9298 while (s < bufend - len + 1 &&
9299 memNE(s,PL_tokenbuf,len) ) {
9301 ++PL_parser->herelines;
9303 if (s >= bufend - len + 1) {
9306 sv_setpvn(tmpstr,d+1,s-d);
9308 /* the preceding stmt passes a newline */
9309 PL_parser->herelines++;
9311 /* s now points to the newline after the heredoc terminator.
9312 d points to the newline before the body of the heredoc.
9315 /* We are going to modify linestr in place here, so set
9316 aside copies of the string if necessary for re-evals or
9318 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9319 check shared->re_eval_str. */
9320 if (shared->re_eval_start || shared->re_eval_str) {
9321 /* Set aside the rest of the regexp */
9322 if (!shared->re_eval_str)
9323 shared->re_eval_str =
9324 newSVpvn(shared->re_eval_start,
9325 bufend - shared->re_eval_start);
9326 shared->re_eval_start -= s-d;
9328 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9329 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9330 cx->blk_eval.cur_text == linestr)
9332 cx->blk_eval.cur_text = newSVsv(linestr);
9333 SvSCREAM_on(cx->blk_eval.cur_text);
9335 /* Copy everything from s onwards back to d. */
9336 Move(s,d,bufend-s + 1,char);
9337 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9338 /* Setting PL_bufend only applies when we have not dug deeper
9339 into other scopes, because sublex_done sets PL_bufend to
9340 SvEND(PL_linestr). */
9341 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9348 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9349 term = PL_tokenbuf[1];
9351 linestr_save = PL_linestr; /* must restore this afterwards */
9352 d = s; /* and this */
9353 PL_linestr = newSVpvs("");
9354 PL_bufend = SvPVX(PL_linestr);
9356 PL_bufptr = PL_bufend;
9357 CopLINE_set(PL_curcop,
9358 origline + 1 + PL_parser->herelines);
9359 if (!lex_next_chunk(LEX_NO_TERM)
9360 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9361 /* Simply freeing linestr_save might seem simpler here, as it
9362 does not matter what PL_linestr points to, since we are
9363 about to croak; but in a quote-like op, linestr_save
9364 will have been prospectively freed already, via
9365 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9366 restore PL_linestr. */
9367 SvREFCNT_dec_NN(PL_linestr);
9368 PL_linestr = linestr_save;
9371 CopLINE_set(PL_curcop, origline);
9372 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9373 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9374 /* ^That should be enough to avoid this needing to grow: */
9375 sv_catpvs(PL_linestr, "\n\0");
9376 assert(s == SvPVX(PL_linestr));
9377 PL_bufend = SvEND(PL_linestr);
9380 PL_parser->herelines++;
9381 PL_last_lop = PL_last_uni = NULL;
9382 #ifndef PERL_STRICT_CR
9383 if (PL_bufend - PL_linestart >= 2) {
9384 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9385 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9387 PL_bufend[-2] = '\n';
9389 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9391 else if (PL_bufend[-1] == '\r')
9392 PL_bufend[-1] = '\n';
9394 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9395 PL_bufend[-1] = '\n';
9397 if (*s == term && PL_bufend-s >= len
9398 && memEQ(s,PL_tokenbuf + 1,len)) {
9399 SvREFCNT_dec(PL_linestr);
9400 PL_linestr = linestr_save;
9401 PL_linestart = SvPVX(linestr_save);
9402 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9407 sv_catsv(tmpstr,PL_linestr);
9411 PL_multi_end = origline + PL_parser->herelines;
9412 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9413 SvPV_shrink_to_cur(tmpstr);
9416 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9418 else if (IN_ENCODING)
9419 sv_recode_to_utf8(tmpstr, _get_encoding());
9421 PL_lex_stuff = tmpstr;
9422 pl_yylval.ival = op_type;
9426 SvREFCNT_dec(tmpstr);
9427 CopLINE_set(PL_curcop, origline);
9428 missingterm(PL_tokenbuf + 1);
9432 takes: current position in input buffer
9433 returns: new position in input buffer
9434 side-effects: pl_yylval and lex_op are set.
9439 <<>> read from ARGV without magic open
9440 <FH> read from filehandle
9441 <pkg::FH> read from package qualified filehandle
9442 <pkg'FH> read from package qualified filehandle
9443 <$fh> read from filehandle in $fh
9449 S_scan_inputsymbol(pTHX_ char *start)
9451 char *s = start; /* current position in buffer */
9454 bool nomagicopen = FALSE;
9455 char *d = PL_tokenbuf; /* start of temp holding space */
9456 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9458 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9460 end = strchr(s, '\n');
9463 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9470 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9472 /* die if we didn't have space for the contents of the <>,
9473 or if it didn't end, or if we see a newline
9476 if (len >= (I32)sizeof PL_tokenbuf)
9477 Perl_croak(aTHX_ "Excessively long <> operator");
9479 Perl_croak(aTHX_ "Unterminated <> operator");
9484 Remember, only scalar variables are interpreted as filehandles by
9485 this code. Anything more complex (e.g., <$fh{$num}>) will be
9486 treated as a glob() call.
9487 This code makes use of the fact that except for the $ at the front,
9488 a scalar variable and a filehandle look the same.
9490 if (*d == '$' && d[1]) d++;
9492 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9493 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9494 d += UTF ? UTF8SKIP(d) : 1;
9496 /* If we've tried to read what we allow filehandles to look like, and
9497 there's still text left, then it must be a glob() and not a getline.
9498 Use scan_str to pull out the stuff between the <> and treat it
9499 as nothing more than a string.
9502 if (d - PL_tokenbuf != len) {
9503 pl_yylval.ival = OP_GLOB;
9504 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9506 Perl_croak(aTHX_ "Glob not terminated");
9510 bool readline_overriden = FALSE;
9512 /* we're in a filehandle read situation */
9515 /* turn <> into <ARGV> */
9517 Copy("ARGV",d,5,char);
9519 /* Check whether readline() is overriden */
9520 if ((gv_readline = gv_override("readline",8)))
9521 readline_overriden = TRUE;
9523 /* if <$fh>, create the ops to turn the variable into a
9527 /* try to find it in the pad for this block, otherwise find
9528 add symbol table ops
9530 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9531 if (tmp != NOT_IN_PAD) {
9532 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9533 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9534 HEK * const stashname = HvNAME_HEK(stash);
9535 SV * const sym = sv_2mortal(newSVhek(stashname));
9536 sv_catpvs(sym, "::");
9542 OP * const o = newOP(OP_PADSV, 0);
9544 PL_lex_op = readline_overriden
9545 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9546 op_append_elem(OP_LIST, o,
9547 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9548 : (OP*)newUNOP(OP_READLINE, 0, o);
9556 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9558 PL_lex_op = readline_overriden
9559 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9560 op_append_elem(OP_LIST,
9561 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9562 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9563 : (OP*)newUNOP(OP_READLINE, 0,
9564 newUNOP(OP_RV2SV, 0,
9565 newGVOP(OP_GV, 0, gv)));
9567 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9568 pl_yylval.ival = OP_NULL;
9571 /* If it's none of the above, it must be a literal filehandle
9572 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9574 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9575 PL_lex_op = readline_overriden
9576 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9577 op_append_elem(OP_LIST,
9578 newGVOP(OP_GV, 0, gv),
9579 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9580 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9581 pl_yylval.ival = OP_NULL;
9591 start position in buffer
9592 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
9593 only if they are of the open/close form
9594 keep_delims preserve the delimiters around the string
9595 re_reparse compiling a run-time /(?{})/:
9596 collapse // to /, and skip encoding src
9597 delimp if non-null, this is set to the position of
9598 the closing delimiter, or just after it if
9599 the closing and opening delimiters differ
9600 (i.e., the opening delimiter of a substitu-
9602 returns: position to continue reading from buffer
9603 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9604 updates the read buffer.
9606 This subroutine pulls a string out of the input. It is called for:
9607 q single quotes q(literal text)
9608 ' single quotes 'literal text'
9609 qq double quotes qq(interpolate $here please)
9610 " double quotes "interpolate $here please"
9611 qx backticks qx(/bin/ls -l)
9612 ` backticks `/bin/ls -l`
9613 qw quote words @EXPORT_OK = qw( func() $spam )
9614 m// regexp match m/this/
9615 s/// regexp substitute s/this/that/
9616 tr/// string transliterate tr/this/that/
9617 y/// string transliterate y/this/that/
9618 ($*@) sub prototypes sub foo ($)
9619 (stuff) sub attr parameters sub foo : attr(stuff)
9620 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9622 In most of these cases (all but <>, patterns and transliterate)
9623 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9624 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9625 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9628 It skips whitespace before the string starts, and treats the first
9629 character as the delimiter. If the delimiter is one of ([{< then
9630 the corresponding "close" character )]}> is used as the closing
9631 delimiter. It allows quoting of delimiters, and if the string has
9632 balanced delimiters ([{<>}]) it allows nesting.
9634 On success, the SV with the resulting string is put into lex_stuff or,
9635 if that is already non-NULL, into lex_repl. The second case occurs only
9636 when parsing the RHS of the special constructs s/// and tr/// (y///).
9637 For convenience, the terminating delimiter character is stuffed into
9642 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9646 SV *sv; /* scalar value: string */
9647 const char *tmps; /* temp string, used for delimiter matching */
9648 char *s = start; /* current position in the buffer */
9649 char term; /* terminating character */
9650 char *to; /* current position in the sv's data */
9651 I32 brackets = 1; /* bracket nesting level */
9652 bool has_utf8 = FALSE; /* is there any utf8 content? */
9653 I32 termcode; /* terminating char. code */
9654 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9655 STRLEN termlen; /* length of terminating string */
9656 int last_off = 0; /* last position for nesting bracket */
9659 PERL_ARGS_ASSERT_SCAN_STR;
9661 /* skip space before the delimiter */
9666 /* mark where we are, in case we need to report errors */
9669 /* after skipping whitespace, the next character is the terminator */
9672 termcode = termstr[0] = term;
9676 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9677 Copy(s, termstr, termlen, U8);
9678 if (!UTF8_IS_INVARIANT(term))
9682 /* mark where we are */
9683 PL_multi_start = CopLINE(PL_curcop);
9684 PL_multi_open = term;
9685 herelines = PL_parser->herelines;
9687 /* find corresponding closing delimiter */
9688 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9689 termcode = termstr[0] = term = tmps[5];
9691 PL_multi_close = term;
9693 if (PL_multi_open == PL_multi_close) {
9694 keep_bracketed_quoted = FALSE;
9697 /* create a new SV to hold the contents. 79 is the SV's initial length.
9698 What a random number. */
9699 sv = newSV_type(SVt_PVIV);
9701 SvIV_set(sv, termcode);
9702 (void)SvPOK_only(sv); /* validate pointer */
9704 /* move past delimiter and try to read a complete string */
9706 sv_catpvn(sv, s, termlen);
9709 if (IN_ENCODING && !UTF && !re_reparse) {
9713 int offset = s - SvPVX_const(PL_linestr);
9714 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
9715 &offset, (char*)termstr, termlen);
9719 if (SvIsCOW(PL_linestr)) {
9720 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9721 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9722 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9723 char *buf = SvPVX(PL_linestr);
9724 bufend_pos = PL_parser->bufend - buf;
9725 bufptr_pos = PL_parser->bufptr - buf;
9726 oldbufptr_pos = PL_parser->oldbufptr - buf;
9727 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9728 linestart_pos = PL_parser->linestart - buf;
9729 last_uni_pos = PL_parser->last_uni
9730 ? PL_parser->last_uni - buf
9732 last_lop_pos = PL_parser->last_lop
9733 ? PL_parser->last_lop - buf
9736 PL_parser->lex_shared->re_eval_start ?
9737 PL_parser->lex_shared->re_eval_start - buf : 0;
9740 sv_force_normal(PL_linestr);
9742 buf = SvPVX(PL_linestr);
9743 PL_parser->bufend = buf + bufend_pos;
9744 PL_parser->bufptr = buf + bufptr_pos;
9745 PL_parser->oldbufptr = buf + oldbufptr_pos;
9746 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9747 PL_parser->linestart = buf + linestart_pos;
9748 if (PL_parser->last_uni)
9749 PL_parser->last_uni = buf + last_uni_pos;
9750 if (PL_parser->last_lop)
9751 PL_parser->last_lop = buf + last_lop_pos;
9752 if (PL_parser->lex_shared->re_eval_start)
9753 PL_parser->lex_shared->re_eval_start =
9754 buf + re_eval_start_pos;
9757 ns = SvPVX_const(PL_linestr) + offset;
9758 svlast = SvEND(sv) - 1;
9760 for (; s < ns; s++) {
9761 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9762 COPLINE_INC_WITH_HERELINES;
9765 goto read_more_line;
9767 /* handle quoted delimiters */
9768 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9770 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9772 if ((svlast-1 - t) % 2) {
9773 if (!keep_bracketed_quoted) {
9776 SvCUR_set(sv, SvCUR(sv) - 1);
9781 if (PL_multi_open == PL_multi_close) {
9787 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9788 /* At here, all closes are "was quoted" one,
9789 so we don't check PL_multi_close. */
9791 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9796 else if (*t == PL_multi_open)
9804 SvCUR_set(sv, w - SvPVX_const(sv));
9806 last_off = w - SvPVX(sv);
9807 if (--brackets <= 0)
9813 SvCUR_set(sv, SvCUR(sv) - 1);
9819 /* extend sv if need be */
9820 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9821 /* set 'to' to the next character in the sv's string */
9822 to = SvPVX(sv)+SvCUR(sv);
9824 /* if open delimiter is the close delimiter read unbridle */
9825 if (PL_multi_open == PL_multi_close) {
9826 for (; s < PL_bufend; s++,to++) {
9827 /* embedded newlines increment the current line number */
9828 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9829 COPLINE_INC_WITH_HERELINES;
9830 /* handle quoted delimiters */
9831 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9832 if (!keep_bracketed_quoted
9834 || (re_reparse && s[1] == '\\'))
9837 else /* any other quotes are simply copied straight through */
9840 /* terminate when run out of buffer (the for() condition), or
9841 have found the terminator */
9842 else if (*s == term) {
9845 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9848 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9854 /* if the terminator isn't the same as the start character (e.g.,
9855 matched brackets), we have to allow more in the quoting, and
9856 be prepared for nested brackets.
9859 /* read until we run out of string, or we find the terminator */
9860 for (; s < PL_bufend; s++,to++) {
9861 /* embedded newlines increment the line count */
9862 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9863 COPLINE_INC_WITH_HERELINES;
9864 /* backslashes can escape the open or closing characters */
9865 if (*s == '\\' && s+1 < PL_bufend) {
9866 if (!keep_bracketed_quoted &&
9867 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9874 /* allow nested opens and closes */
9875 else if (*s == PL_multi_close && --brackets <= 0)
9877 else if (*s == PL_multi_open)
9879 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9884 /* terminate the copied string and update the sv's end-of-string */
9886 SvCUR_set(sv, to - SvPVX_const(sv));
9889 * this next chunk reads more into the buffer if we're not done yet
9893 break; /* handle case where we are done yet :-) */
9895 #ifndef PERL_STRICT_CR
9896 if (to - SvPVX_const(sv) >= 2) {
9897 if ((to[-2] == '\r' && to[-1] == '\n') ||
9898 (to[-2] == '\n' && to[-1] == '\r'))
9902 SvCUR_set(sv, to - SvPVX_const(sv));
9904 else if (to[-1] == '\r')
9907 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9912 /* if we're out of file, or a read fails, bail and reset the current
9913 line marker so we can report where the unterminated string began
9915 COPLINE_INC_WITH_HERELINES;
9916 PL_bufptr = PL_bufend;
9917 if (!lex_next_chunk(0)) {
9919 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9925 /* at this point, we have successfully read the delimited string */
9927 if (!IN_ENCODING || UTF || re_reparse) {
9930 sv_catpvn(sv, s, termlen);
9933 if (has_utf8 || (IN_ENCODING && !re_reparse))
9936 PL_multi_end = CopLINE(PL_curcop);
9937 CopLINE_set(PL_curcop, PL_multi_start);
9938 PL_parser->herelines = herelines;
9940 /* if we allocated too much space, give some back */
9941 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9942 SvLEN_set(sv, SvCUR(sv) + 1);
9943 SvPV_renew(sv, SvLEN(sv));
9946 /* decide whether this is the first or second quoted string we've read
9951 PL_sublex_info.repl = sv;
9954 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9960 takes: pointer to position in buffer
9961 returns: pointer to new position in buffer
9962 side-effects: builds ops for the constant in pl_yylval.op
9964 Read a number in any of the formats that Perl accepts:
9966 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9967 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
9968 0b[01](_?[01])* binary integers
9969 0[0-7](_?[0-7])* octal integers
9970 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
9971 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
9973 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9976 If it reads a number without a decimal point or an exponent, it will
9977 try converting the number to an integer and see if it can do so
9978 without loss of precision.
9982 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9984 const char *s = start; /* current position in buffer */
9985 char *d; /* destination in temp buffer */
9986 char *e; /* end of temp buffer */
9987 NV nv; /* number read, as a double */
9988 SV *sv = NULL; /* place to put the converted number */
9989 bool floatit; /* boolean: int or float? */
9990 const char *lastub = NULL; /* position of last underbar */
9991 static const char* const number_too_long = "Number too long";
9992 /* Hexadecimal floating point.
9994 * In many places (where we have quads and NV is IEEE 754 double)
9995 * we can fit the mantissa bits of a NV into an unsigned quad.
9996 * (Note that UVs might not be quads even when we have quads.)
9997 * This will not work everywhere, though (either no quads, or
9998 * using long doubles), in which case we have to resort to NV,
9999 * which will probably mean horrible loss of precision due to
10000 * multiple fp operations. */
10001 bool hexfp = FALSE;
10002 int total_bits = 0;
10003 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10004 # define HEXFP_UQUAD
10005 Uquad_t hexfp_uquad = 0;
10006 int hexfp_frac_bits = 0;
10011 NV hexfp_mult = 1.0;
10012 UV high_non_zero = 0; /* highest digit */
10014 PERL_ARGS_ASSERT_SCAN_NUM;
10016 /* We use the first character to decide what type of number this is */
10020 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10022 /* if it starts with a 0, it could be an octal number, a decimal in
10023 0.13 disguise, or a hexadecimal number, or a binary number. */
10027 u holds the "number so far"
10028 shift the power of 2 of the base
10029 (hex == 4, octal == 3, binary == 1)
10030 overflowed was the number more than we can hold?
10032 Shift is used when we add a digit. It also serves as an "are
10033 we in octal/hex/binary?" indicator to disallow hex characters
10034 when in octal mode.
10039 bool overflowed = FALSE;
10040 bool just_zero = TRUE; /* just plain 0 or binary number? */
10041 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10042 static const char* const bases[5] =
10043 { "", "binary", "", "octal", "hexadecimal" };
10044 static const char* const Bases[5] =
10045 { "", "Binary", "", "Octal", "Hexadecimal" };
10046 static const char* const maxima[5] =
10048 "0b11111111111111111111111111111111",
10052 const char *base, *Base, *max;
10054 /* check for hex */
10055 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10059 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10064 /* check for a decimal in disguise */
10065 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10067 /* so it must be octal */
10074 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10075 "Misplaced _ in number");
10079 base = bases[shift];
10080 Base = Bases[shift];
10081 max = maxima[shift];
10083 /* read the rest of the number */
10085 /* x is used in the overflow test,
10086 b is the digit we're adding on. */
10091 /* if we don't mention it, we're done */
10095 /* _ are ignored -- but warned about if consecutive */
10097 if (lastub && s == lastub + 1)
10098 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10099 "Misplaced _ in number");
10103 /* 8 and 9 are not octal */
10104 case '8': case '9':
10106 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10110 case '2': case '3': case '4':
10111 case '5': case '6': case '7':
10113 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10116 case '0': case '1':
10117 b = *s++ & 15; /* ASCII digit -> value of digit */
10121 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10122 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10123 /* make sure they said 0x */
10126 b = (*s++ & 7) + 9;
10128 /* Prepare to put the digit we have onto the end
10129 of the number so far. We check for overflows.
10135 x = u << shift; /* make room for the digit */
10137 total_bits += shift;
10139 if ((x >> shift) != u
10140 && !(PL_hints & HINT_NEW_BINARY)) {
10143 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10144 "Integer overflow in %s number",
10147 u = x | b; /* add the digit to the end */
10150 n *= nvshift[shift];
10151 /* If an NV has not enough bits in its
10152 * mantissa to represent an UV this summing of
10153 * small low-order numbers is a waste of time
10154 * (because the NV cannot preserve the
10155 * low-order bits anyway): we could just
10156 * remember when did we overflow and in the
10157 * end just multiply n by the right
10162 if (high_non_zero == 0 && b > 0)
10165 /* this could be hexfp, but peek ahead
10166 * to avoid matching ".." */
10167 if (UNLIKELY(HEXFP_PEEK(s))) {
10175 /* if we get here, we had success: make a scalar value from
10180 /* final misplaced underbar check */
10181 if (s[-1] == '_') {
10182 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10185 if (UNLIKELY(HEXFP_PEEK(s))) {
10186 /* Do sloppy (on the underbars) but quick detection
10187 * (and value construction) for hexfp, the decimal
10188 * detection will shortly be more thorough with the
10189 * underbar checks. */
10193 #else /* HEXFP_NV */
10198 NV mult = 1 / 16.0;
10201 while (isXDIGIT(*h) || *h == '_') {
10202 if (isXDIGIT(*h)) {
10203 U8 b = XDIGIT_VALUE(*h);
10204 total_bits += shift;
10206 hexfp_uquad <<= shift;
10208 hexfp_frac_bits += shift;
10209 #else /* HEXFP_NV */
10210 hexfp_nv += b * mult;
10218 if (total_bits >= 4) {
10219 if (high_non_zero < 0x8)
10221 if (high_non_zero < 0x4)
10223 if (high_non_zero < 0x2)
10227 if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
10228 bool negexp = FALSE;
10232 else if (*h == '-') {
10238 while (isDIGIT(*h) || *h == '_') {
10241 hexfp_exp += *h - '0';
10244 -hexfp_exp < NV_MIN_EXP - 1) {
10245 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10246 "Hexadecimal float: exponent underflow");
10253 hexfp_exp > NV_MAX_EXP - 1) {
10254 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10255 "Hexadecimal float: exponent overflow");
10264 hexfp_exp = -hexfp_exp;
10266 hexfp_exp -= hexfp_frac_bits;
10268 hexfp_mult = pow(2.0, hexfp_exp);
10276 if (n > 4294967295.0)
10277 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10278 "%s number > %s non-portable",
10284 if (u > 0xffffffff)
10285 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10286 "%s number > %s non-portable",
10291 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10292 sv = new_constant(start, s - start, "integer",
10293 sv, NULL, NULL, 0);
10294 else if (PL_hints & HINT_NEW_BINARY)
10295 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10300 handle decimal numbers.
10301 we're also sent here when we read a 0 as the first digit
10303 case '1': case '2': case '3': case '4': case '5':
10304 case '6': case '7': case '8': case '9': case '.':
10307 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10316 /* read next group of digits and _ and copy into d */
10317 while (isDIGIT(*s) || *s == '_' ||
10318 UNLIKELY(hexfp && isXDIGIT(*s))) {
10319 /* skip underscores, checking for misplaced ones
10323 if (lastub && s == lastub + 1)
10324 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10325 "Misplaced _ in number");
10329 /* check for end of fixed-length buffer */
10331 Perl_croak(aTHX_ "%s", number_too_long);
10332 /* if we're ok, copy the character */
10337 /* final misplaced underbar check */
10338 if (lastub && s == lastub + 1) {
10339 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10342 /* read a decimal portion if there is one. avoid
10343 3..5 being interpreted as the number 3. followed
10346 if (*s == '.' && s[1] != '.') {
10351 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10352 "Misplaced _ in number");
10356 /* copy, ignoring underbars, until we run out of digits.
10358 for (; isDIGIT(*s) || *s == '_' ||
10359 UNLIKELY(hexfp && isXDIGIT(*s));
10361 /* fixed length buffer check */
10363 Perl_croak(aTHX_ "%s", number_too_long);
10365 if (lastub && s == lastub + 1)
10366 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10367 "Misplaced _ in number");
10373 /* fractional part ending in underbar? */
10374 if (s[-1] == '_') {
10375 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10376 "Misplaced _ in number");
10378 if (*s == '.' && isDIGIT(s[1])) {
10379 /* oops, it's really a v-string, but without the "v" */
10385 /* read exponent part, if present */
10386 if ((isALPHA_FOLD_EQ(*s, 'e')
10387 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10388 && strchr("+-0123456789_", s[1]))
10392 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10393 ditto for p (hexfloats) */
10394 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10395 /* At least some Mach atof()s don't grok 'E' */
10398 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10405 /* stray preinitial _ */
10407 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10408 "Misplaced _ in number");
10412 /* allow positive or negative exponent */
10413 if (*s == '+' || *s == '-')
10416 /* stray initial _ */
10418 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10419 "Misplaced _ in number");
10423 /* read digits of exponent */
10424 while (isDIGIT(*s) || *s == '_') {
10427 Perl_croak(aTHX_ "%s", number_too_long);
10431 if (((lastub && s == lastub + 1) ||
10432 (!isDIGIT(s[1]) && s[1] != '_')))
10433 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10434 "Misplaced _ in number");
10442 We try to do an integer conversion first if no characters
10443 indicating "float" have been found.
10448 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10450 if (flags == IS_NUMBER_IN_UV) {
10452 sv = newSViv(uv); /* Prefer IVs over UVs. */
10455 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10456 if (uv <= (UV) IV_MIN)
10457 sv = newSViv(-(IV)uv);
10464 STORE_NUMERIC_LOCAL_SET_STANDARD();
10465 /* terminate the string */
10467 if (UNLIKELY(hexfp)) {
10468 # ifdef NV_MANT_DIG
10469 if (total_bits > NV_MANT_DIG)
10470 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10471 "Hexadecimal float: mantissa overflow");
10474 nv = hexfp_uquad * hexfp_mult;
10475 #else /* HEXFP_NV */
10476 nv = hexfp_nv * hexfp_mult;
10479 nv = Atof(PL_tokenbuf);
10481 RESTORE_NUMERIC_LOCAL();
10486 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10487 const char *const key = floatit ? "float" : "integer";
10488 const STRLEN keylen = floatit ? 5 : 7;
10489 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10490 key, keylen, sv, NULL, NULL, 0);
10494 /* if it starts with a v, it could be a v-string */
10497 sv = newSV(5); /* preallocate storage space */
10498 ENTER_with_name("scan_vstring");
10500 s = scan_vstring(s, PL_bufend, sv);
10501 SvREFCNT_inc_simple_void_NN(sv);
10502 LEAVE_with_name("scan_vstring");
10506 /* make the op for the constant and return */
10509 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10511 lvalp->opval = NULL;
10517 S_scan_formline(pTHX_ char *s)
10521 SV * const stuff = newSVpvs("");
10522 bool needargs = FALSE;
10523 bool eofmt = FALSE;
10525 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10527 while (!needargs) {
10530 #ifdef PERL_STRICT_CR
10531 while (SPACE_OR_TAB(*t))
10534 while (SPACE_OR_TAB(*t) || *t == '\r')
10537 if (*t == '\n' || t == PL_bufend) {
10542 eol = (char *) memchr(s,'\n',PL_bufend-s);
10546 for (t = s; t < eol; t++) {
10547 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10549 goto enough; /* ~~ must be first line in formline */
10551 if (*t == '@' || *t == '^')
10555 sv_catpvn(stuff, s, eol-s);
10556 #ifndef PERL_STRICT_CR
10557 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10558 char *end = SvPVX(stuff) + SvCUR(stuff);
10561 SvCUR_set(stuff, SvCUR(stuff) - 1);
10569 if ((PL_rsfp || PL_parser->filtered)
10570 && PL_parser->form_lex_state == LEX_NORMAL) {
10572 PL_bufptr = PL_bufend;
10573 COPLINE_INC_WITH_HERELINES;
10574 got_some = lex_next_chunk(0);
10575 CopLINE_dec(PL_curcop);
10583 if (!SvCUR(stuff) || needargs)
10584 PL_lex_state = PL_parser->form_lex_state;
10585 if (SvCUR(stuff)) {
10586 PL_expect = XSTATE;
10588 const char *s2 = s;
10589 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10593 PL_expect = XTERMBLOCK;
10594 NEXTVAL_NEXTTOKE.ival = 0;
10597 NEXTVAL_NEXTTOKE.ival = 0;
10598 force_next(FORMLBRACK);
10601 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10603 else if (IN_ENCODING)
10604 sv_recode_to_utf8(stuff, _get_encoding());
10606 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10610 SvREFCNT_dec(stuff);
10612 PL_lex_formbrack = 0;
10618 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10620 const I32 oldsavestack_ix = PL_savestack_ix;
10621 CV* const outsidecv = PL_compcv;
10623 SAVEI32(PL_subline);
10624 save_item(PL_subname);
10625 SAVESPTR(PL_compcv);
10627 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10628 CvFLAGS(PL_compcv) |= flags;
10630 PL_subline = CopLINE(PL_curcop);
10631 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10632 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10633 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10634 if (outsidecv && CvPADLIST(outsidecv))
10635 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10637 return oldsavestack_ix;
10641 S_yywarn(pTHX_ const char *const s, U32 flags)
10643 PERL_ARGS_ASSERT_YYWARN;
10645 PL_in_eval |= EVAL_WARNONLY;
10646 yyerror_pv(s, flags);
10651 Perl_yyerror(pTHX_ const char *const s)
10653 PERL_ARGS_ASSERT_YYERROR;
10654 return yyerror_pvn(s, strlen(s), 0);
10658 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10660 PERL_ARGS_ASSERT_YYERROR_PV;
10661 return yyerror_pvn(s, strlen(s), flags);
10665 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10667 const char *context = NULL;
10670 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10671 int yychar = PL_parser->yychar;
10673 PERL_ARGS_ASSERT_YYERROR_PVN;
10675 if (!yychar || (yychar == ';' && !PL_rsfp))
10676 sv_catpvs(where_sv, "at EOF");
10677 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10678 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10679 PL_oldbufptr != PL_bufptr) {
10682 The code below is removed for NetWare because it abends/crashes on NetWare
10683 when the script has error such as not having the closing quotes like:
10684 if ($var eq "value)
10685 Checking of white spaces is anyway done in NetWare code.
10688 while (isSPACE(*PL_oldoldbufptr))
10691 context = PL_oldoldbufptr;
10692 contlen = PL_bufptr - PL_oldoldbufptr;
10694 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10695 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10698 The code below is removed for NetWare because it abends/crashes on NetWare
10699 when the script has error such as not having the closing quotes like:
10700 if ($var eq "value)
10701 Checking of white spaces is anyway done in NetWare code.
10704 while (isSPACE(*PL_oldbufptr))
10707 context = PL_oldbufptr;
10708 contlen = PL_bufptr - PL_oldbufptr;
10710 else if (yychar > 255)
10711 sv_catpvs(where_sv, "next token ???");
10712 else if (yychar == YYEMPTY) {
10713 if (PL_lex_state == LEX_NORMAL ||
10714 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10715 sv_catpvs(where_sv, "at end of line");
10716 else if (PL_lex_inpat)
10717 sv_catpvs(where_sv, "within pattern");
10719 sv_catpvs(where_sv, "within string");
10722 sv_catpvs(where_sv, "next char ");
10724 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10725 else if (isPRINT_LC(yychar)) {
10726 const char string = yychar;
10727 sv_catpvn(where_sv, &string, 1);
10730 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10732 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10733 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10734 OutCopFILE(PL_curcop),
10735 (IV)(PL_parser->preambling == NOLINE
10736 ? CopLINE(PL_curcop)
10737 : PL_parser->preambling));
10739 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10740 UTF8fARG(UTF, contlen, context));
10742 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10743 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10744 Perl_sv_catpvf(aTHX_ msg,
10745 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10746 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10749 if (PL_in_eval & EVAL_WARNONLY) {
10750 PL_in_eval &= ~EVAL_WARNONLY;
10751 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10755 if (PL_error_count >= 10) {
10757 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10758 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10759 SVfARG(errsv), OutCopFILE(PL_curcop));
10761 Perl_croak(aTHX_ "%s has too many errors.\n",
10762 OutCopFILE(PL_curcop));
10765 PL_in_my_stash = NULL;
10770 S_swallow_bom(pTHX_ U8 *s)
10772 const STRLEN slen = SvCUR(PL_linestr);
10774 PERL_ARGS_ASSERT_SWALLOW_BOM;
10778 if (s[1] == 0xFE) {
10779 /* UTF-16 little-endian? (or UTF-32LE?) */
10780 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10781 /* diag_listed_as: Unsupported script encoding %s */
10782 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10783 #ifndef PERL_NO_UTF16_FILTER
10784 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10786 if (PL_bufend > (char*)s) {
10787 s = add_utf16_textfilter(s, TRUE);
10790 /* diag_listed_as: Unsupported script encoding %s */
10791 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10796 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10797 #ifndef PERL_NO_UTF16_FILTER
10798 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10800 if (PL_bufend > (char *)s) {
10801 s = add_utf16_textfilter(s, FALSE);
10804 /* diag_listed_as: Unsupported script encoding %s */
10805 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10809 case BOM_UTF8_FIRST_BYTE: {
10810 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10811 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10812 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10813 s += len + 1; /* UTF-8 */
10820 if (s[2] == 0xFE && s[3] == 0xFF) {
10821 /* UTF-32 big-endian */
10822 /* diag_listed_as: Unsupported script encoding %s */
10823 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10826 else if (s[2] == 0 && s[3] != 0) {
10829 * are a good indicator of UTF-16BE. */
10830 #ifndef PERL_NO_UTF16_FILTER
10831 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10832 s = add_utf16_textfilter(s, FALSE);
10834 /* diag_listed_as: Unsupported script encoding %s */
10835 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10842 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10845 * are a good indicator of UTF-16LE. */
10846 #ifndef PERL_NO_UTF16_FILTER
10847 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10848 s = add_utf16_textfilter(s, TRUE);
10850 /* diag_listed_as: Unsupported script encoding %s */
10851 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10859 #ifndef PERL_NO_UTF16_FILTER
10861 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10863 SV *const filter = FILTER_DATA(idx);
10864 /* We re-use this each time round, throwing the contents away before we
10866 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10867 SV *const utf8_buffer = filter;
10868 IV status = IoPAGE(filter);
10869 const bool reverse = cBOOL(IoLINES(filter));
10872 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10874 /* As we're automatically added, at the lowest level, and hence only called
10875 from this file, we can be sure that we're not called in block mode. Hence
10876 don't bother writing code to deal with block mode. */
10878 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10881 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10883 DEBUG_P(PerlIO_printf(Perl_debug_log,
10884 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10885 FPTR2DPTR(void *, S_utf16_textfilter),
10886 reverse ? 'l' : 'b', idx, maxlen, status,
10887 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10894 /* First, look in our buffer of existing UTF-8 data: */
10895 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10899 } else if (status == 0) {
10901 IoPAGE(filter) = 0;
10902 nl = SvEND(utf8_buffer);
10905 STRLEN got = nl - SvPVX(utf8_buffer);
10906 /* Did we have anything to append? */
10908 sv_catpvn(sv, SvPVX(utf8_buffer), got);
10909 /* Everything else in this code works just fine if SVp_POK isn't
10910 set. This, however, needs it, and we need it to work, else
10911 we loop infinitely because the buffer is never consumed. */
10912 sv_chop(utf8_buffer, nl);
10916 /* OK, not a complete line there, so need to read some more UTF-16.
10917 Read an extra octect if the buffer currently has an odd number. */
10921 if (SvCUR(utf16_buffer) >= 2) {
10922 /* Location of the high octet of the last complete code point.
10923 Gosh, UTF-16 is a pain. All the benefits of variable length,
10924 *coupled* with all the benefits of partial reads and
10926 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10927 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10929 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10933 /* We have the first half of a surrogate. Read more. */
10934 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10937 status = FILTER_READ(idx + 1, utf16_buffer,
10938 160 + (SvCUR(utf16_buffer) & 1));
10939 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10940 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10943 IoPAGE(filter) = status;
10948 chars = SvCUR(utf16_buffer) >> 1;
10949 have = SvCUR(utf8_buffer);
10950 SvGROW(utf8_buffer, have + chars * 3 + 1);
10953 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10954 (U8*)SvPVX_const(utf8_buffer) + have,
10955 chars * 2, &newlen);
10957 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10958 (U8*)SvPVX_const(utf8_buffer) + have,
10959 chars * 2, &newlen);
10961 SvCUR_set(utf8_buffer, have + newlen);
10964 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10965 it's private to us, and utf16_to_utf8{,reversed} take a
10966 (pointer,length) pair, rather than a NUL-terminated string. */
10967 if(SvCUR(utf16_buffer) & 1) {
10968 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10969 SvCUR_set(utf16_buffer, 1);
10971 SvCUR_set(utf16_buffer, 0);
10974 DEBUG_P(PerlIO_printf(Perl_debug_log,
10975 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10977 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10978 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10983 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10985 SV *filter = filter_add(S_utf16_textfilter, NULL);
10987 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10989 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10990 sv_setpvs(filter, "");
10991 IoLINES(filter) = reversed;
10992 IoPAGE(filter) = 1; /* Not EOF */
10994 /* Sadly, we have to return a valid pointer, come what may, so we have to
10995 ignore any error return from this. */
10996 SvCUR_set(PL_linestr, 0);
10997 if (FILTER_READ(0, PL_linestr, 0)) {
10998 SvUTF8_on(PL_linestr);
11000 SvUTF8_on(PL_linestr);
11002 PL_bufend = SvEND(PL_linestr);
11003 return (U8*)SvPVX(PL_linestr);
11008 Returns a pointer to the next character after the parsed
11009 vstring, as well as updating the passed in sv.
11011 Function must be called like
11013 sv = sv_2mortal(newSV(5));
11014 s = scan_vstring(s,e,sv);
11016 where s and e are the start and end of the string.
11017 The sv should already be large enough to store the vstring
11018 passed in, for performance reasons.
11020 This function may croak if fatal warnings are enabled in the
11021 calling scope, hence the sv_2mortal in the example (to prevent
11022 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11028 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11030 const char *pos = s;
11031 const char *start = s;
11033 PERL_ARGS_ASSERT_SCAN_VSTRING;
11035 if (*pos == 'v') pos++; /* get past 'v' */
11036 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11038 if ( *pos != '.') {
11039 /* this may not be a v-string if followed by => */
11040 const char *next = pos;
11041 while (next < e && isSPACE(*next))
11043 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11044 /* return string not v-string */
11045 sv_setpvn(sv,(char *)s,pos-s);
11046 return (char *)pos;
11050 if (!isALPHA(*pos)) {
11051 U8 tmpbuf[UTF8_MAXBYTES+1];
11054 s++; /* get past 'v' */
11059 /* this is atoi() that tolerates underscores */
11062 const char *end = pos;
11064 while (--end >= s) {
11066 const UV orev = rev;
11067 rev += (*end - '0') * mult;
11070 /* diag_listed_as: Integer overflow in %s number */
11071 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11072 "Integer overflow in decimal number");
11076 if (rev > 0x7FFFFFFF)
11077 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11079 /* Append native character for the rev point */
11080 tmpend = uvchr_to_utf8(tmpbuf, rev);
11081 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11082 if (!UVCHR_IS_INVARIANT(rev))
11084 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11090 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11094 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11101 Perl_keyword_plugin_standard(pTHX_
11102 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11104 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11105 PERL_UNUSED_CONTEXT;
11106 PERL_UNUSED_ARG(keyword_ptr);
11107 PERL_UNUSED_ARG(keyword_len);
11108 PERL_UNUSED_ARG(op_ptr);
11109 return KEYWORD_PLUGIN_DECLINE;
11112 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11114 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11116 SAVEI32(PL_lex_brackets);
11117 if (PL_lex_brackets > 100)
11118 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11119 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11120 SAVEI32(PL_lex_allbrackets);
11121 PL_lex_allbrackets = 0;
11122 SAVEI8(PL_lex_fakeeof);
11123 PL_lex_fakeeof = (U8)fakeeof;
11124 if(yyparse(gramtype) && !PL_parser->error_count)
11125 qerror(Perl_mess(aTHX_ "Parse error"));
11128 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11130 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11134 SAVEVPTR(PL_eval_root);
11135 PL_eval_root = NULL;
11136 parse_recdescent(gramtype, fakeeof);
11142 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11144 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11147 if (flags & ~PARSE_OPTIONAL)
11148 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11149 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11150 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11151 if (!PL_parser->error_count)
11152 qerror(Perl_mess(aTHX_ "Parse error"));
11153 exprop = newOP(OP_NULL, 0);
11159 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11161 Parse a Perl arithmetic expression. This may contain operators of precedence
11162 down to the bit shift operators. The expression must be followed (and thus
11163 terminated) either by a comparison or lower-precedence operator or by
11164 something that would normally terminate an expression such as semicolon.
11165 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11166 otherwise it is mandatory. It is up to the caller to ensure that the
11167 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11168 the source of the code to be parsed and the lexical context for the
11171 The op tree representing the expression is returned. If an optional
11172 expression is absent, a null pointer is returned, otherwise the pointer
11175 If an error occurs in parsing or compilation, in most cases a valid op
11176 tree is returned anyway. The error is reflected in the parser state,
11177 normally resulting in a single exception at the top level of parsing
11178 which covers all the compilation errors that occurred. Some compilation
11179 errors, however, will throw an exception immediately.
11185 Perl_parse_arithexpr(pTHX_ U32 flags)
11187 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11191 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11193 Parse a Perl term expression. This may contain operators of precedence
11194 down to the assignment operators. The expression must be followed (and thus
11195 terminated) either by a comma or lower-precedence operator or by
11196 something that would normally terminate an expression such as semicolon.
11197 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11198 otherwise it is mandatory. It is up to the caller to ensure that the
11199 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11200 the source of the code to be parsed and the lexical context for the
11203 The op tree representing the expression is returned. If an optional
11204 expression is absent, a null pointer is returned, otherwise the pointer
11207 If an error occurs in parsing or compilation, in most cases a valid op
11208 tree is returned anyway. The error is reflected in the parser state,
11209 normally resulting in a single exception at the top level of parsing
11210 which covers all the compilation errors that occurred. Some compilation
11211 errors, however, will throw an exception immediately.
11217 Perl_parse_termexpr(pTHX_ U32 flags)
11219 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11223 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11225 Parse a Perl list expression. This may contain operators of precedence
11226 down to the comma operator. The expression must be followed (and thus
11227 terminated) either by a low-precedence logic operator such as C<or> or by
11228 something that would normally terminate an expression such as semicolon.
11229 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11230 otherwise it is mandatory. It is up to the caller to ensure that the
11231 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11232 the source of the code to be parsed and the lexical context for the
11235 The op tree representing the expression is returned. If an optional
11236 expression is absent, a null pointer is returned, otherwise the pointer
11239 If an error occurs in parsing or compilation, in most cases a valid op
11240 tree is returned anyway. The error is reflected in the parser state,
11241 normally resulting in a single exception at the top level of parsing
11242 which covers all the compilation errors that occurred. Some compilation
11243 errors, however, will throw an exception immediately.
11249 Perl_parse_listexpr(pTHX_ U32 flags)
11251 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11255 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11257 Parse a single complete Perl expression. This allows the full
11258 expression grammar, including the lowest-precedence operators such
11259 as C<or>. The expression must be followed (and thus terminated) by a
11260 token that an expression would normally be terminated by: end-of-file,
11261 closing bracketing punctuation, semicolon, or one of the keywords that
11262 signals a postfix expression-statement modifier. If I<flags> includes
11263 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11264 mandatory. It is up to the caller to ensure that the dynamic parser
11265 state (L</PL_parser> et al) is correctly set to reflect the source of
11266 the code to be parsed and the lexical context for the expression.
11268 The op tree representing the expression is returned. If an optional
11269 expression is absent, a null pointer is returned, otherwise the pointer
11272 If an error occurs in parsing or compilation, in most cases a valid op
11273 tree is returned anyway. The error is reflected in the parser state,
11274 normally resulting in a single exception at the top level of parsing
11275 which covers all the compilation errors that occurred. Some compilation
11276 errors, however, will throw an exception immediately.
11282 Perl_parse_fullexpr(pTHX_ U32 flags)
11284 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11288 =for apidoc Amx|OP *|parse_block|U32 flags
11290 Parse a single complete Perl code block. This consists of an opening
11291 brace, a sequence of statements, and a closing brace. The block
11292 constitutes a lexical scope, so C<my> variables and various compile-time
11293 effects can be contained within it. It is up to the caller to ensure
11294 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11295 reflect the source of the code to be parsed and the lexical context for
11298 The op tree representing the code block is returned. This is always a
11299 real op, never a null pointer. It will normally be a C<lineseq> list,
11300 including C<nextstate> or equivalent ops. No ops to construct any kind
11301 of runtime scope are included by virtue of it being a block.
11303 If an error occurs in parsing or compilation, in most cases a valid op
11304 tree (most likely null) is returned anyway. The error is reflected in
11305 the parser state, normally resulting in a single exception at the top
11306 level of parsing which covers all the compilation errors that occurred.
11307 Some compilation errors, however, will throw an exception immediately.
11309 The I<flags> parameter is reserved for future use, and must always
11316 Perl_parse_block(pTHX_ U32 flags)
11319 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11320 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11324 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11326 Parse a single unadorned Perl statement. This may be a normal imperative
11327 statement or a declaration that has compile-time effect. It does not
11328 include any label or other affixture. It is up to the caller to ensure
11329 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11330 reflect the source of the code to be parsed and the lexical context for
11333 The op tree representing the statement is returned. This may be a
11334 null pointer if the statement is null, for example if it was actually
11335 a subroutine definition (which has compile-time side effects). If not
11336 null, it will be ops directly implementing the statement, suitable to
11337 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11338 equivalent op (except for those embedded in a scope contained entirely
11339 within the statement).
11341 If an error occurs in parsing or compilation, in most cases a valid op
11342 tree (most likely null) is returned anyway. The error is reflected in
11343 the parser state, normally resulting in a single exception at the top
11344 level of parsing which covers all the compilation errors that occurred.
11345 Some compilation errors, however, will throw an exception immediately.
11347 The I<flags> parameter is reserved for future use, and must always
11354 Perl_parse_barestmt(pTHX_ U32 flags)
11357 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11358 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11362 =for apidoc Amx|SV *|parse_label|U32 flags
11364 Parse a single label, possibly optional, of the type that may prefix a
11365 Perl statement. It is up to the caller to ensure that the dynamic parser
11366 state (L</PL_parser> et al) is correctly set to reflect the source of
11367 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11368 label is optional, otherwise it is mandatory.
11370 The name of the label is returned in the form of a fresh scalar. If an
11371 optional label is absent, a null pointer is returned.
11373 If an error occurs in parsing, which can only occur if the label is
11374 mandatory, a valid label is returned anyway. The error is reflected in
11375 the parser state, normally resulting in a single exception at the top
11376 level of parsing which covers all the compilation errors that occurred.
11382 Perl_parse_label(pTHX_ U32 flags)
11384 if (flags & ~PARSE_OPTIONAL)
11385 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11386 if (PL_lex_state == LEX_KNOWNEXT) {
11387 PL_parser->yychar = yylex();
11388 if (PL_parser->yychar == LABEL) {
11389 char * const lpv = pl_yylval.pval;
11390 STRLEN llen = strlen(lpv);
11391 PL_parser->yychar = YYEMPTY;
11392 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11399 STRLEN wlen, bufptr_pos;
11402 if (!isIDFIRST_lazy_if(s, UTF))
11404 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11405 if (word_takes_any_delimeter(s, wlen))
11407 bufptr_pos = s - SvPVX(PL_linestr);
11409 lex_read_space(LEX_KEEP_PREVIOUS);
11411 s = SvPVX(PL_linestr) + bufptr_pos;
11412 if (t[0] == ':' && t[1] != ':') {
11413 PL_oldoldbufptr = PL_oldbufptr;
11416 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11420 if (flags & PARSE_OPTIONAL) {
11423 qerror(Perl_mess(aTHX_ "Parse error"));
11424 return newSVpvs("x");
11431 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11433 Parse a single complete Perl statement. This may be a normal imperative
11434 statement or a declaration that has compile-time effect, and may include
11435 optional labels. It is up to the caller to ensure that the dynamic
11436 parser state (L</PL_parser> et al) is correctly set to reflect the source
11437 of the code to be parsed and the lexical context for the statement.
11439 The op tree representing the statement is returned. This may be a
11440 null pointer if the statement is null, for example if it was actually
11441 a subroutine definition (which has compile-time side effects). If not
11442 null, it will be the result of a L</newSTATEOP> call, normally including
11443 a C<nextstate> or equivalent op.
11445 If an error occurs in parsing or compilation, in most cases a valid op
11446 tree (most likely null) is returned anyway. The error is reflected in
11447 the parser state, normally resulting in a single exception at the top
11448 level of parsing which covers all the compilation errors that occurred.
11449 Some compilation errors, however, will throw an exception immediately.
11451 The I<flags> parameter is reserved for future use, and must always
11458 Perl_parse_fullstmt(pTHX_ U32 flags)
11461 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11462 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11466 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11468 Parse a sequence of zero or more Perl statements. These may be normal
11469 imperative statements, including optional labels, or declarations
11470 that have compile-time effect, or any mixture thereof. The statement
11471 sequence ends when a closing brace or end-of-file is encountered in a
11472 place where a new statement could have validly started. It is up to
11473 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11474 is correctly set to reflect the source of the code to be parsed and the
11475 lexical context for the statements.
11477 The op tree representing the statement sequence is returned. This may
11478 be a null pointer if the statements were all null, for example if there
11479 were no statements or if there were only subroutine definitions (which
11480 have compile-time side effects). If not null, it will be a C<lineseq>
11481 list, normally including C<nextstate> or equivalent ops.
11483 If an error occurs in parsing or compilation, in most cases a valid op
11484 tree is returned anyway. The error is reflected in the parser state,
11485 normally resulting in a single exception at the top level of parsing
11486 which covers all the compilation errors that occurred. Some compilation
11487 errors, however, will throw an exception immediately.
11489 The I<flags> parameter is reserved for future use, and must always
11496 Perl_parse_stmtseq(pTHX_ U32 flags)
11501 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11502 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11503 c = lex_peek_unichar(0);
11504 if (c != -1 && c != /*{*/'}')
11505 qerror(Perl_mess(aTHX_ "Parse error"));
11509 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11511 S_lex_token_boundary(pTHX)
11513 PL_oldoldbufptr = PL_oldbufptr;
11514 PL_oldbufptr = PL_bufptr;
11517 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11519 S_parse_opt_lexvar(pTHX)
11524 lex_token_boundary();
11525 sigil = lex_read_unichar(0);
11526 if (lex_peek_unichar(0) == '#') {
11527 qerror(Perl_mess(aTHX_ "Parse error"));
11531 c = lex_peek_unichar(0);
11532 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11535 d = PL_tokenbuf + 1;
11536 PL_tokenbuf[0] = (char)sigil;
11537 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11539 if (d == PL_tokenbuf+1)
11541 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11542 OPf_MOD | (OPpLVAL_INTRO<<8));
11543 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11548 Perl_parse_subsignature(pTHX)
11551 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11552 OP *initops = NULL;
11554 c = lex_peek_unichar(0);
11555 while (c != /*(*/')') {
11559 if (prev_type == 2)
11560 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11561 var = parse_opt_lexvar();
11563 newBINOP(OP_AELEM, 0,
11564 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11566 newSVOP(OP_CONST, 0, newSViv(pos))) :
11569 c = lex_peek_unichar(0);
11571 lex_token_boundary();
11572 lex_read_unichar(0);
11574 c = lex_peek_unichar(0);
11575 if (c == ',' || c == /*(*/')') {
11577 qerror(Perl_mess(aTHX_ "Optional parameter "
11578 "lacks default expression"));
11580 OP *defexpr = parse_termexpr(0);
11581 if (defexpr->op_type == OP_UNDEF &&
11582 !(defexpr->op_flags & OPf_KIDS)) {
11587 scalar(newUNOP(OP_RV2AV, 0,
11588 newGVOP(OP_GV, 0, PL_defgv))),
11589 newSVOP(OP_CONST, 0, newSViv(pos+1)));
11591 newCONDOP(0, ifop, expr, defexpr) :
11592 newLOGOP(OP_OR, 0, ifop, defexpr);
11597 if (prev_type == 1)
11598 qerror(Perl_mess(aTHX_ "Mandatory parameter "
11599 "follows optional parameter"));
11601 min_arity = pos + 1;
11603 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11605 initops = op_append_list(OP_LINESEQ, initops,
11606 newSTATEOP(0, NULL, expr));
11612 if (prev_type == 2)
11613 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11614 var = parse_opt_lexvar();
11616 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11617 newBINOP(OP_BIT_AND, 0,
11618 scalar(newUNOP(OP_RV2AV, 0,
11619 newGVOP(OP_GV, 0, PL_defgv))),
11620 newSVOP(OP_CONST, 0, newSViv(1))),
11621 op_convert_list(OP_DIE, 0,
11622 op_convert_list(OP_SPRINTF, 0,
11623 op_append_list(OP_LIST,
11624 newSVOP(OP_CONST, 0,
11625 newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
11627 op_append_list(OP_LIST,
11628 newSVOP(OP_CONST, 0, newSViv(1)),
11629 newSVOP(OP_CONST, 0, newSViv(2))),
11630 newOP(OP_CALLER, 0))))));
11631 if (pos != min_arity)
11632 chkop = newLOGOP(OP_AND, 0,
11634 scalar(newUNOP(OP_RV2AV, 0,
11635 newGVOP(OP_GV, 0, PL_defgv))),
11636 newSVOP(OP_CONST, 0, newSViv(pos))),
11638 initops = op_append_list(OP_LINESEQ,
11639 newSTATEOP(0, NULL, chkop),
11644 op_prepend_elem(OP_ASLICE,
11645 newOP(OP_PUSHMARK, 0),
11646 newLISTOP(OP_ASLICE, 0,
11648 newSVOP(OP_CONST, 0, newSViv(pos)),
11649 newUNOP(OP_AV2ARYLEN, 0,
11650 ref(newUNOP(OP_RV2AV, 0,
11651 newGVOP(OP_GV, 0, PL_defgv)),
11653 ref(newUNOP(OP_RV2AV, 0,
11654 newGVOP(OP_GV, 0, PL_defgv)),
11656 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11657 initops = op_append_list(OP_LINESEQ, initops,
11658 newSTATEOP(0, NULL,
11659 newASSIGNOP(OPf_STACKED, var, 0, slice)));
11666 qerror(Perl_mess(aTHX_ "Parse error"));
11670 c = lex_peek_unichar(0);
11672 case /*(*/')': break;
11675 lex_token_boundary();
11676 lex_read_unichar(0);
11678 c = lex_peek_unichar(0);
11679 } while (c == ',');
11685 if (min_arity != 0) {
11686 initops = op_append_list(OP_LINESEQ,
11687 newSTATEOP(0, NULL,
11690 scalar(newUNOP(OP_RV2AV, 0,
11691 newGVOP(OP_GV, 0, PL_defgv))),
11692 newSVOP(OP_CONST, 0, newSViv(min_arity))),
11693 op_convert_list(OP_DIE, 0,
11694 op_convert_list(OP_SPRINTF, 0,
11695 op_append_list(OP_LIST,
11696 newSVOP(OP_CONST, 0,
11697 newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
11699 op_append_list(OP_LIST,
11700 newSVOP(OP_CONST, 0, newSViv(1)),
11701 newSVOP(OP_CONST, 0, newSViv(2))),
11702 newOP(OP_CALLER, 0))))))),
11705 if (max_arity != -1) {
11706 initops = op_append_list(OP_LINESEQ,
11707 newSTATEOP(0, NULL,
11710 scalar(newUNOP(OP_RV2AV, 0,
11711 newGVOP(OP_GV, 0, PL_defgv))),
11712 newSVOP(OP_CONST, 0, newSViv(max_arity))),
11713 op_convert_list(OP_DIE, 0,
11714 op_convert_list(OP_SPRINTF, 0,
11715 op_append_list(OP_LIST,
11716 newSVOP(OP_CONST, 0,
11717 newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
11719 op_append_list(OP_LIST,
11720 newSVOP(OP_CONST, 0, newSViv(1)),
11721 newSVOP(OP_CONST, 0, newSViv(2))),
11722 newOP(OP_CALLER, 0))))))),
11730 * c-indentation-style: bsd
11731 * c-basic-offset: 4
11732 * indent-tabs-mode: nil
11735 * ex: set ts=8 sts=4 sw=4 et: