3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_static.c"
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_defer (PL_parser->lex_defer)
56 #define PL_lex_dojoin (PL_parser->lex_dojoin)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_preambled (PL_parser->preambled)
68 #define PL_sublex_info (PL_parser->sublex_info)
69 #define PL_linestr (PL_parser->linestr)
70 #define PL_expect (PL_parser->expect)
71 #define PL_copline (PL_parser->copline)
72 #define PL_bufptr (PL_parser->bufptr)
73 #define PL_oldbufptr (PL_parser->oldbufptr)
74 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
75 #define PL_linestart (PL_parser->linestart)
76 #define PL_bufend (PL_parser->bufend)
77 #define PL_last_uni (PL_parser->last_uni)
78 #define PL_last_lop (PL_parser->last_lop)
79 #define PL_last_lop_op (PL_parser->last_lop_op)
80 #define PL_lex_state (PL_parser->lex_state)
81 #define PL_rsfp (PL_parser->rsfp)
82 #define PL_rsfp_filters (PL_parser->rsfp_filters)
83 #define PL_in_my (PL_parser->in_my)
84 #define PL_in_my_stash (PL_parser->in_my_stash)
85 #define PL_tokenbuf (PL_parser->tokenbuf)
86 #define PL_multi_end (PL_parser->multi_end)
87 #define PL_error_count (PL_parser->error_count)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_nexttype (PL_parser->nexttype)
91 # define PL_nextval (PL_parser->nextval)
93 static const char* const ident_too_long = "Identifier too long";
95 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
97 #define XENUMMASK 0x3f
99 #define XFAKEBRACK 0x80
101 #ifdef USE_UTF8_SCRIPTS
102 # define UTF cBOOL(!IN_BYTES)
104 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
107 /* The maximum number of characters preceding the unrecognized one to display */
108 #define UNRECOGNIZED_PRECEDE_COUNT 10
110 /* In variables named $^X, these are the legal values for X.
111 * 1999-02-27 mjd-perl-patch@plover.com */
112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
114 #define SPACE_OR_TAB(c) isBLANK_A(c)
116 #define HEXFP_PEEK(s) \
118 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
119 isALPHA_FOLD_EQ(s[0], 'p'))
121 /* LEX_* are values for PL_lex_state, the state of the lexer.
122 * They are arranged oddly so that the guard on the switch statement
123 * can get by with a single comparison (if the compiler is smart enough).
125 * These values refer to the various states within a sublex parse,
126 * i.e. within a double quotish string
129 /* #define LEX_NOTPARSING 11 is done in perl.h. */
131 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
132 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
133 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
134 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
135 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
137 /* at end of code, eg "$x" followed by: */
138 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
139 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
141 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
142 string or after \E, $foo, etc */
143 #define LEX_INTERPCONST 2 /* NOT USED */
144 #define LEX_FORMLINE 1 /* expecting a format line */
145 #define LEX_KNOWNEXT 0 /* next token known; just return it */
149 static const char* const lex_state_names[] = {
164 #include "keywords.h"
166 /* CLINE is a macro that ensures PL_copline has a sane value */
168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
171 * Convenience functions to return different tokens and prime the
172 * lexer for the next token. They all take an argument.
174 * TOKEN : generic token (used for '(', DOLSHARP, etc)
175 * OPERATOR : generic operator
176 * AOPERATOR : assignment operator
177 * PREBLOCK : beginning the block after an if, while, foreach, ...
178 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
179 * PREREF : *EXPR where EXPR is not a simple identifier
180 * TERM : expression term
181 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
182 * LOOPX : loop exiting command (goto, last, dump, etc)
183 * FTST : file test operator
184 * FUN0 : zero-argument function
185 * FUN0OP : zero-argument function, with its op created in this file
186 * FUN1 : not used, except for not, which isn't a UNIOP
187 * BOop : bitwise or or xor
189 * SHop : shift operator
190 * PWop : power operator
191 * PMop : pattern-matching operator
192 * Aop : addition-level operator
193 * AopNOASSIGN : addition-level operator that is never part of .=
194 * Mop : multiplication-level operator
195 * Eop : equality-testing operator
196 * Rop : relational operator <= != gt
198 * Also see LOP and lop() below.
201 #ifdef DEBUGGING /* Serve -DT. */
202 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
204 # define REPORT(retval) (retval)
207 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
208 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
209 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
210 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
211 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
213 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
214 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
215 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
217 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
219 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
220 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
221 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
222 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
223 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
224 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
225 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
226 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
227 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
228 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
229 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
230 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
231 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
232 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
234 /* This bit of chicanery makes a unary function followed by
235 * a parenthesis into a function with one argument, highest precedence.
236 * The UNIDOR macro is for unary functions that can be followed by the //
237 * operator (such as C<shift // 0>).
239 #define UNI3(f,x,have_x) { \
240 pl_yylval.ival = f; \
241 if (have_x) PL_expect = x; \
243 PL_last_uni = PL_oldbufptr; \
244 PL_last_lop_op = f; \
246 return REPORT( (int)FUNC1 ); \
248 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
250 #define UNI(f) UNI3(f,XTERM,1)
251 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
252 #define UNIPROTO(f,optional) { \
253 if (optional) PL_last_uni = PL_oldbufptr; \
257 #define UNIBRACK(f) UNI3(f,0,0)
259 /* grandfather return to old style */
262 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
263 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
264 pl_yylval.ival = (f); \
270 #define COPLINE_INC_WITH_HERELINES \
272 CopLINE_inc(PL_curcop); \
273 if (PL_parser->herelines) \
274 CopLINE(PL_curcop) += PL_parser->herelines, \
275 PL_parser->herelines = 0; \
277 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
278 * is no sublex_push to follow. */
279 #define COPLINE_SET_FROM_MULTI_END \
281 CopLINE_set(PL_curcop, PL_multi_end); \
282 if (PL_multi_end != PL_multi_start) \
283 PL_parser->herelines = 0; \
289 /* how to interpret the pl_yylval associated with the token */
293 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
298 static struct debug_tokens {
300 enum token_type type;
302 } const debug_tokens[] =
304 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
305 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
306 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
307 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
308 { ARROW, TOKENTYPE_NONE, "ARROW" },
309 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
310 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
311 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
312 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
313 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
314 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
315 { DO, TOKENTYPE_NONE, "DO" },
316 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
317 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
318 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
319 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
320 { ELSE, TOKENTYPE_NONE, "ELSE" },
321 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
322 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
323 { FOR, TOKENTYPE_IVAL, "FOR" },
324 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
325 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
326 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
330 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
331 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
332 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
333 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
334 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
335 { IF, TOKENTYPE_IVAL, "IF" },
336 { LABEL, TOKENTYPE_PVAL, "LABEL" },
337 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
338 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
339 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
340 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
341 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
342 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
343 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
344 { MY, TOKENTYPE_IVAL, "MY" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
351 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
352 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
353 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
354 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
355 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
356 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
357 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
358 { PREINC, TOKENTYPE_NONE, "PREINC" },
359 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
360 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
361 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
362 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
363 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
364 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
365 { SUB, TOKENTYPE_NONE, "SUB" },
366 { THING, TOKENTYPE_OPVAL, "THING" },
367 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
368 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
369 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
370 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
371 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
372 { USE, TOKENTYPE_IVAL, "USE" },
373 { WHEN, TOKENTYPE_IVAL, "WHEN" },
374 { WHILE, TOKENTYPE_IVAL, "WHILE" },
375 { WORD, TOKENTYPE_OPVAL, "WORD" },
376 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
377 { 0, TOKENTYPE_NONE, NULL }
380 /* dump the returned token in rv, plus any optional arg in pl_yylval */
383 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
385 PERL_ARGS_ASSERT_TOKEREPORT;
388 const char *name = NULL;
389 enum token_type type = TOKENTYPE_NONE;
390 const struct debug_tokens *p;
391 SV* const report = newSVpvs("<== ");
393 for (p = debug_tokens; p->token; p++) {
394 if (p->token == (int)rv) {
401 Perl_sv_catpv(aTHX_ report, name);
402 else if (isGRAPH(rv))
404 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
406 sv_catpvs(report, " (pending identifier)");
409 sv_catpvs(report, "EOF");
411 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
416 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
418 case TOKENTYPE_OPNUM:
419 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
420 PL_op_name[lvalp->ival]);
423 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
425 case TOKENTYPE_OPVAL:
427 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
428 PL_op_name[lvalp->opval->op_type]);
429 if (lvalp->opval->op_type == OP_CONST) {
430 Perl_sv_catpvf(aTHX_ report, " %s",
431 SvPEEK(cSVOPx_sv(lvalp->opval)));
436 sv_catpvs(report, "(opval=null)");
439 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
445 /* print the buffer with suitable escapes */
448 S_printbuf(pTHX_ const char *const fmt, const char *const s)
450 SV* const tmp = newSVpvs("");
452 PERL_ARGS_ASSERT_PRINTBUF;
454 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
455 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
463 S_deprecate_commaless_var_list(pTHX) {
465 deprecate("comma-less variable list");
466 return REPORT(','); /* grandfather non-comma-format format */
472 * This subroutine looks for an '=' next to the operator that has just been
473 * parsed and turns it into an ASSIGNOP if it finds one.
477 S_ao(pTHX_ int toketype)
479 if (*PL_bufptr == '=') {
481 if (toketype == ANDAND)
482 pl_yylval.ival = OP_ANDASSIGN;
483 else if (toketype == OROR)
484 pl_yylval.ival = OP_ORASSIGN;
485 else if (toketype == DORDOR)
486 pl_yylval.ival = OP_DORASSIGN;
489 return REPORT(toketype);
494 * When Perl expects an operator and finds something else, no_op
495 * prints the warning. It always prints "<something> found where
496 * operator expected. It prints "Missing semicolon on previous line?"
497 * if the surprise occurs at the start of the line. "do you need to
498 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
499 * where the compiler doesn't know if foo is a method call or a function.
500 * It prints "Missing operator before end of line" if there's nothing
501 * after the missing operator, or "... before <...>" if there is something
502 * after the missing operator.
506 S_no_op(pTHX_ const char *const what, char *s)
508 char * const oldbp = PL_bufptr;
509 const bool is_first = (PL_oldbufptr == PL_linestart);
511 PERL_ARGS_ASSERT_NO_OP;
517 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
518 if (ckWARN_d(WARN_SYNTAX)) {
520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
521 "\t(Missing semicolon on previous line?)\n");
522 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
524 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
525 t += UTF ? UTF8SKIP(t) : 1)
527 if (t < PL_bufptr && isSPACE(*t))
528 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
529 "\t(Do you need to predeclare %"UTF8f"?)\n",
530 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Missing operator before %"UTF8f"?)\n",
536 UTF8fARG(UTF, s - oldbp, oldbp));
544 * Complain about missing quote/regexp/heredoc terminator.
545 * If it's called with NULL then it cauterizes the line buffer.
546 * If we're in a delimited string and the delimiter is a control
547 * character, it's reformatted into a two-char sequence like ^C.
552 S_missingterm(pTHX_ char *s)
557 char * const nl = strrchr(s,'\n');
561 else if ((U8) PL_multi_close < 32) {
563 tmpbuf[1] = (char)toCTRL(PL_multi_close);
568 *tmpbuf = (char)PL_multi_close;
572 q = strchr(s,'"') ? '\'' : '"';
573 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
579 * Check whether the named feature is enabled.
582 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
584 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
586 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
588 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
590 if (namelen > MAX_FEATURE_LEN)
592 memcpy(&he_name[8], name, namelen);
594 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
595 REFCOUNTED_HE_EXISTS));
599 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
600 * utf16-to-utf8-reversed.
603 #ifdef PERL_CR_FILTER
607 const char *s = SvPVX_const(sv);
608 const char * const e = s + SvCUR(sv);
610 PERL_ARGS_ASSERT_STRIP_RETURN;
612 /* outer loop optimized to do nothing if there are no CR-LFs */
614 if (*s++ == '\r' && *s == '\n') {
615 /* hit a CR-LF, need to copy the rest */
619 if (*s == '\r' && s[1] == '\n')
630 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
632 const I32 count = FILTER_READ(idx+1, sv, maxlen);
633 if (count > 0 && !maxlen)
640 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
642 Creates and initialises a new lexer/parser state object, supplying
643 a context in which to lex and parse from a new source of Perl code.
644 A pointer to the new state object is placed in L</PL_parser>. An entry
645 is made on the save stack so that upon unwinding the new state object
646 will be destroyed and the former value of L</PL_parser> will be restored.
647 Nothing else need be done to clean up the parsing context.
649 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
650 non-null, provides a string (in SV form) containing code to be parsed.
651 A copy of the string is made, so subsequent modification of I<line>
652 does not affect parsing. I<rsfp>, if non-null, provides an input stream
653 from which code will be read to be parsed. If both are non-null, the
654 code in I<line> comes first and must consist of complete lines of input,
655 and I<rsfp> supplies the remainder of the source.
657 The I<flags> parameter is reserved for future use. Currently it is only
658 used by perl internally, so extensions should always pass zero.
663 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
664 can share filters with the current parser.
665 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
666 caller, hence isn't owned by the parser, so shouldn't be closed on parser
667 destruction. This is used to handle the case of defaulting to reading the
668 script from the standard input because no filename was given on the command
669 line (without getting confused by situation where STDIN has been closed, so
670 the script handle is opened on fd 0) */
673 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
675 const char *s = NULL;
676 yy_parser *parser, *oparser;
677 if (flags && flags & ~LEX_START_FLAGS)
678 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
680 /* create and initialise a parser */
682 Newxz(parser, 1, yy_parser);
683 parser->old_parser = oparser = PL_parser;
686 parser->stack = NULL;
688 parser->stack_size = 0;
690 /* on scope exit, free this parser and restore any outer one */
692 parser->saved_curcop = PL_curcop;
694 /* initialise lexer state */
696 parser->nexttoke = 0;
697 parser->error_count = oparser ? oparser->error_count : 0;
698 parser->copline = parser->preambling = NOLINE;
699 parser->lex_state = LEX_NORMAL;
700 parser->expect = XSTATE;
702 parser->rsfp_filters =
703 !(flags & LEX_START_SAME_FILTER) || !oparser
705 : MUTABLE_AV(SvREFCNT_inc(
706 oparser->rsfp_filters
707 ? oparser->rsfp_filters
708 : (oparser->rsfp_filters = newAV())
711 Newx(parser->lex_brackstack, 120, char);
712 Newx(parser->lex_casestack, 12, char);
713 *parser->lex_casestack = '\0';
714 Newxz(parser->lex_shared, 1, LEXSHARED);
718 s = SvPV_const(line, len);
719 parser->linestr = flags & LEX_START_COPIED
720 ? SvREFCNT_inc_simple_NN(line)
721 : newSVpvn_flags(s, len, SvUTF8(line));
722 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
724 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
726 parser->oldoldbufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
733 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
734 |LEX_DONT_CLOSE_RSFP));
735 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
736 |LEX_DONT_CLOSE_RSFP));
738 parser->in_pod = parser->filtered = 0;
742 /* delete a parser object */
745 Perl_parser_free(pTHX_ const yy_parser *parser)
747 PERL_ARGS_ASSERT_PARSER_FREE;
749 PL_curcop = parser->saved_curcop;
750 SvREFCNT_dec(parser->linestr);
752 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
753 PerlIO_clearerr(parser->rsfp);
754 else if (parser->rsfp && (!parser->old_parser ||
755 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
756 PerlIO_close(parser->rsfp);
757 SvREFCNT_dec(parser->rsfp_filters);
758 SvREFCNT_dec(parser->lex_stuff);
759 SvREFCNT_dec(parser->sublex_info.repl);
761 Safefree(parser->lex_brackstack);
762 Safefree(parser->lex_casestack);
763 Safefree(parser->lex_shared);
764 PL_parser = parser->old_parser;
769 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
771 I32 nexttoke = parser->nexttoke;
772 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
774 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
775 && parser->nextval[nexttoke].opval
776 && parser->nextval[nexttoke].opval->op_slabbed
777 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
778 op_free(parser->nextval[nexttoke].opval);
779 parser->nextval[nexttoke].opval = NULL;
786 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
788 Buffer scalar containing the chunk currently under consideration of the
789 text currently being lexed. This is always a plain string scalar (for
790 which C<SvPOK> is true). It is not intended to be used as a scalar by
791 normal scalar means; instead refer to the buffer directly by the pointer
792 variables described below.
794 The lexer maintains various C<char*> pointers to things in the
795 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
796 reallocated, all of these pointers must be updated. Don't attempt to
797 do this manually, but rather use L</lex_grow_linestr> if you need to
798 reallocate the buffer.
800 The content of the text chunk in the buffer is commonly exactly one
801 complete line of input, up to and including a newline terminator,
802 but there are situations where it is otherwise. The octets of the
803 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
804 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
805 flag on this scalar, which may disagree with it.
807 For direct examination of the buffer, the variable
808 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
809 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
810 of these pointers is usually preferable to examination of the scalar
811 through normal scalar means.
813 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
815 Direct pointer to the end of the chunk of text currently being lexed, the
816 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
817 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
818 always located at the end of the buffer, and does not count as part of
819 the buffer's contents.
821 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
823 Points to the current position of lexing inside the lexer buffer.
824 Characters around this point may be freely examined, within
825 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
826 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
827 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
829 Lexing code (whether in the Perl core or not) moves this pointer past
830 the characters that it consumes. It is also expected to perform some
831 bookkeeping whenever a newline character is consumed. This movement
832 can be more conveniently performed by the function L</lex_read_to>,
833 which handles newlines appropriately.
835 Interpretation of the buffer's octets can be abstracted out by
836 using the slightly higher-level functions L</lex_peek_unichar> and
837 L</lex_read_unichar>.
839 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
841 Points to the start of the current line inside the lexer buffer.
842 This is useful for indicating at which column an error occurred, and
843 not much else. This must be updated by any lexing code that consumes
844 a newline; the function L</lex_read_to> handles this detail.
850 =for apidoc Amx|bool|lex_bufutf8
852 Indicates whether the octets in the lexer buffer
853 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
854 of Unicode characters. If not, they should be interpreted as Latin-1
855 characters. This is analogous to the C<SvUTF8> flag for scalars.
857 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
858 contains valid UTF-8. Lexing code must be robust in the face of invalid
861 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
862 is significant, but not the whole story regarding the input character
863 encoding. Normally, when a file is being read, the scalar contains octets
864 and its C<SvUTF8> flag is off, but the octets should be interpreted as
865 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
866 however, the scalar may have the C<SvUTF8> flag on, and in this case its
867 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
868 is in effect. This logic may change in the future; use this function
869 instead of implementing the logic yourself.
875 Perl_lex_bufutf8(pTHX)
881 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
883 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
884 at least I<len> octets (including terminating C<NUL>). Returns a
885 pointer to the reallocated buffer. This is necessary before making
886 any direct modification of the buffer that would increase its length.
887 L</lex_stuff_pvn> provides a more convenient way to insert text into
890 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
891 this function updates all of the lexer's variables that point directly
898 Perl_lex_grow_linestr(pTHX_ STRLEN len)
902 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
903 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
904 linestr = PL_parser->linestr;
905 buf = SvPVX(linestr);
906 if (len <= SvLEN(linestr))
908 bufend_pos = PL_parser->bufend - buf;
909 bufptr_pos = PL_parser->bufptr - buf;
910 oldbufptr_pos = PL_parser->oldbufptr - buf;
911 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
912 linestart_pos = PL_parser->linestart - buf;
913 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
914 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
915 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
916 PL_parser->lex_shared->re_eval_start - buf : 0;
918 buf = sv_grow(linestr, len);
920 PL_parser->bufend = buf + bufend_pos;
921 PL_parser->bufptr = buf + bufptr_pos;
922 PL_parser->oldbufptr = buf + oldbufptr_pos;
923 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
924 PL_parser->linestart = buf + linestart_pos;
925 if (PL_parser->last_uni)
926 PL_parser->last_uni = buf + last_uni_pos;
927 if (PL_parser->last_lop)
928 PL_parser->last_lop = buf + last_lop_pos;
929 if (PL_parser->lex_shared->re_eval_start)
930 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
935 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
937 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
938 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
939 reallocating the buffer if necessary. This means that lexing code that
940 runs later will see the characters as if they had appeared in the input.
941 It is not recommended to do this as part of normal parsing, and most
942 uses of this facility run the risk of the inserted characters being
943 interpreted in an unintended manner.
945 The string to be inserted is represented by I<len> octets starting
946 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
947 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
948 The characters are recoded for the lexer buffer, according to how the
949 buffer is currently being interpreted (L</lex_bufutf8>). If a string
950 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
951 function is more convenient.
957 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
961 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
962 if (flags & ~(LEX_STUFF_UTF8))
963 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
965 if (flags & LEX_STUFF_UTF8) {
968 STRLEN highhalf = 0; /* Count of variants */
969 const char *p, *e = pv+len;
970 for (p = pv; p != e; p++) {
971 if (! UTF8_IS_INVARIANT(*p)) {
977 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
978 bufptr = PL_parser->bufptr;
979 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
980 SvCUR_set(PL_parser->linestr,
981 SvCUR(PL_parser->linestr) + len+highhalf);
982 PL_parser->bufend += len+highhalf;
983 for (p = pv; p != e; p++) {
985 if (! UTF8_IS_INVARIANT(c)) {
986 *bufptr++ = UTF8_TWO_BYTE_HI(c);
987 *bufptr++ = UTF8_TWO_BYTE_LO(c);
994 if (flags & LEX_STUFF_UTF8) {
996 const char *p, *e = pv+len;
997 for (p = pv; p != e; p++) {
999 if (UTF8_IS_ABOVE_LATIN1(c)) {
1000 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1001 "non-Latin-1 character into Latin-1 input");
1002 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1005 } else if (! UTF8_IS_INVARIANT(c)) {
1006 /* malformed UTF-8 */
1008 SAVESPTR(PL_warnhook);
1009 PL_warnhook = PERL_WARNHOOK_FATAL;
1010 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1016 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1017 bufptr = PL_parser->bufptr;
1018 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1019 SvCUR_set(PL_parser->linestr,
1020 SvCUR(PL_parser->linestr) + len-highhalf);
1021 PL_parser->bufend += len-highhalf;
1024 if (UTF8_IS_INVARIANT(*p)) {
1030 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1036 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1037 bufptr = PL_parser->bufptr;
1038 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1039 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1040 PL_parser->bufend += len;
1041 Copy(pv, bufptr, len, char);
1047 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1049 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1050 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1051 reallocating the buffer if necessary. This means that lexing code that
1052 runs later will see the characters as if they had appeared in the input.
1053 It is not recommended to do this as part of normal parsing, and most
1054 uses of this facility run the risk of the inserted characters being
1055 interpreted in an unintended manner.
1057 The string to be inserted is represented by octets starting at I<pv>
1058 and continuing to the first nul. These octets are interpreted as either
1059 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1060 in I<flags>. The characters are recoded for the lexer buffer, according
1061 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1062 If it is not convenient to nul-terminate a string to be inserted, the
1063 L</lex_stuff_pvn> function is more appropriate.
1069 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1071 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1072 lex_stuff_pvn(pv, strlen(pv), flags);
1076 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1078 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1079 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1080 reallocating the buffer if necessary. This means that lexing code that
1081 runs later will see the characters as if they had appeared in the input.
1082 It is not recommended to do this as part of normal parsing, and most
1083 uses of this facility run the risk of the inserted characters being
1084 interpreted in an unintended manner.
1086 The string to be inserted is the string value of I<sv>. The characters
1087 are recoded for the lexer buffer, according to how the buffer is currently
1088 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1089 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1090 need to construct a scalar.
1096 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1100 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1102 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1104 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1108 =for apidoc Amx|void|lex_unstuff|char *ptr
1110 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1111 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1112 This hides the discarded text from any lexing code that runs later,
1113 as if the text had never appeared.
1115 This is not the normal way to consume lexed text. For that, use
1122 Perl_lex_unstuff(pTHX_ char *ptr)
1126 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1127 buf = PL_parser->bufptr;
1129 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1132 bufend = PL_parser->bufend;
1134 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1135 unstuff_len = ptr - buf;
1136 Move(ptr, buf, bufend+1-ptr, char);
1137 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1138 PL_parser->bufend = bufend - unstuff_len;
1142 =for apidoc Amx|void|lex_read_to|char *ptr
1144 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1145 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1146 performing the correct bookkeeping whenever a newline character is passed.
1147 This is the normal way to consume lexed text.
1149 Interpretation of the buffer's octets can be abstracted out by
1150 using the slightly higher-level functions L</lex_peek_unichar> and
1151 L</lex_read_unichar>.
1157 Perl_lex_read_to(pTHX_ char *ptr)
1160 PERL_ARGS_ASSERT_LEX_READ_TO;
1161 s = PL_parser->bufptr;
1162 if (ptr < s || ptr > PL_parser->bufend)
1163 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1164 for (; s != ptr; s++)
1166 COPLINE_INC_WITH_HERELINES;
1167 PL_parser->linestart = s+1;
1169 PL_parser->bufptr = ptr;
1173 =for apidoc Amx|void|lex_discard_to|char *ptr
1175 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1176 up to I<ptr>. The remaining content of the buffer will be moved, and
1177 all pointers into the buffer updated appropriately. I<ptr> must not
1178 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1179 it is not permitted to discard text that has yet to be lexed.
1181 Normally it is not necessarily to do this directly, because it suffices to
1182 use the implicit discarding behaviour of L</lex_next_chunk> and things
1183 based on it. However, if a token stretches across multiple lines,
1184 and the lexing code has kept multiple lines of text in the buffer for
1185 that purpose, then after completion of the token it would be wise to
1186 explicitly discard the now-unneeded earlier lines, to avoid future
1187 multi-line tokens growing the buffer without bound.
1193 Perl_lex_discard_to(pTHX_ char *ptr)
1197 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1198 buf = SvPVX(PL_parser->linestr);
1200 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1203 if (ptr > PL_parser->bufptr)
1204 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1205 discard_len = ptr - buf;
1206 if (PL_parser->oldbufptr < ptr)
1207 PL_parser->oldbufptr = ptr;
1208 if (PL_parser->oldoldbufptr < ptr)
1209 PL_parser->oldoldbufptr = ptr;
1210 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1211 PL_parser->last_uni = NULL;
1212 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1213 PL_parser->last_lop = NULL;
1214 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1215 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1216 PL_parser->bufend -= discard_len;
1217 PL_parser->bufptr -= discard_len;
1218 PL_parser->oldbufptr -= discard_len;
1219 PL_parser->oldoldbufptr -= discard_len;
1220 if (PL_parser->last_uni)
1221 PL_parser->last_uni -= discard_len;
1222 if (PL_parser->last_lop)
1223 PL_parser->last_lop -= discard_len;
1227 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1229 Reads in the next chunk of text to be lexed, appending it to
1230 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1231 looked to the end of the current chunk and wants to know more. It is
1232 usual, but not necessary, for lexing to have consumed the entirety of
1233 the current chunk at this time.
1235 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1236 chunk (i.e., the current chunk has been entirely consumed), normally the
1237 current chunk will be discarded at the same time that the new chunk is
1238 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1239 will not be discarded. If the current chunk has not been entirely
1240 consumed, then it will not be discarded regardless of the flag.
1242 Returns true if some new text was added to the buffer, or false if the
1243 buffer has reached the end of the input text.
1248 #define LEX_FAKE_EOF 0x80000000
1249 #define LEX_NO_TERM 0x40000000
1252 Perl_lex_next_chunk(pTHX_ U32 flags)
1256 STRLEN old_bufend_pos, new_bufend_pos;
1257 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1258 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1259 bool got_some_for_debugger = 0;
1261 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1262 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1263 linestr = PL_parser->linestr;
1264 buf = SvPVX(linestr);
1265 if (!(flags & LEX_KEEP_PREVIOUS) &&
1266 PL_parser->bufptr == PL_parser->bufend) {
1267 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1269 if (PL_parser->last_uni != PL_parser->bufend)
1270 PL_parser->last_uni = NULL;
1271 if (PL_parser->last_lop != PL_parser->bufend)
1272 PL_parser->last_lop = NULL;
1273 last_uni_pos = last_lop_pos = 0;
1277 old_bufend_pos = PL_parser->bufend - buf;
1278 bufptr_pos = PL_parser->bufptr - buf;
1279 oldbufptr_pos = PL_parser->oldbufptr - buf;
1280 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1281 linestart_pos = PL_parser->linestart - buf;
1282 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1283 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1285 if (flags & LEX_FAKE_EOF) {
1287 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1289 } else if (filter_gets(linestr, old_bufend_pos)) {
1291 got_some_for_debugger = 1;
1292 } else if (flags & LEX_NO_TERM) {
1295 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1296 sv_setpvs(linestr, "");
1298 /* End of real input. Close filehandle (unless it was STDIN),
1299 * then add implicit termination.
1301 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1302 PerlIO_clearerr(PL_parser->rsfp);
1303 else if (PL_parser->rsfp)
1304 (void)PerlIO_close(PL_parser->rsfp);
1305 PL_parser->rsfp = NULL;
1306 PL_parser->in_pod = PL_parser->filtered = 0;
1307 if (!PL_in_eval && PL_minus_p) {
1309 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1310 PL_minus_n = PL_minus_p = 0;
1311 } else if (!PL_in_eval && PL_minus_n) {
1312 sv_catpvs(linestr, /*{*/";}");
1315 sv_catpvs(linestr, ";");
1318 buf = SvPVX(linestr);
1319 new_bufend_pos = SvCUR(linestr);
1320 PL_parser->bufend = buf + new_bufend_pos;
1321 PL_parser->bufptr = buf + bufptr_pos;
1322 PL_parser->oldbufptr = buf + oldbufptr_pos;
1323 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1324 PL_parser->linestart = buf + linestart_pos;
1325 if (PL_parser->last_uni)
1326 PL_parser->last_uni = buf + last_uni_pos;
1327 if (PL_parser->last_lop)
1328 PL_parser->last_lop = buf + last_lop_pos;
1329 if (PL_parser->preambling != NOLINE) {
1330 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1331 PL_parser->preambling = NOLINE;
1333 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1334 PL_curstash != PL_debstash) {
1335 /* debugger active and we're not compiling the debugger code,
1336 * so store the line into the debugger's array of lines
1338 update_debugger_info(NULL, buf+old_bufend_pos,
1339 new_bufend_pos-old_bufend_pos);
1345 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1347 Looks ahead one (Unicode) character in the text currently being lexed.
1348 Returns the codepoint (unsigned integer value) of the next character,
1349 or -1 if lexing has reached the end of the input text. To consume the
1350 peeked character, use L</lex_read_unichar>.
1352 If the next character is in (or extends into) the next chunk of input
1353 text, the next chunk will be read in. Normally the current chunk will be
1354 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1355 then the current chunk will not be discarded.
1357 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1358 is encountered, an exception is generated.
1364 Perl_lex_peek_unichar(pTHX_ U32 flags)
1368 if (flags & ~(LEX_KEEP_PREVIOUS))
1369 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1370 s = PL_parser->bufptr;
1371 bufend = PL_parser->bufend;
1377 if (!lex_next_chunk(flags))
1379 s = PL_parser->bufptr;
1380 bufend = PL_parser->bufend;
1383 if (UTF8_IS_INVARIANT(head))
1385 if (UTF8_IS_START(head)) {
1386 len = UTF8SKIP(&head);
1387 while ((STRLEN)(bufend-s) < len) {
1388 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1390 s = PL_parser->bufptr;
1391 bufend = PL_parser->bufend;
1394 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1395 if (retlen == (STRLEN)-1) {
1396 /* malformed UTF-8 */
1398 SAVESPTR(PL_warnhook);
1399 PL_warnhook = PERL_WARNHOOK_FATAL;
1400 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1406 if (!lex_next_chunk(flags))
1408 s = PL_parser->bufptr;
1415 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1417 Reads the next (Unicode) character in the text currently being lexed.
1418 Returns the codepoint (unsigned integer value) of the character read,
1419 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1420 if lexing has reached the end of the input text. To non-destructively
1421 examine the next character, use L</lex_peek_unichar> instead.
1423 If the next character is in (or extends into) the next chunk of input
1424 text, the next chunk will be read in. Normally the current chunk will be
1425 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1426 then the current chunk will not be discarded.
1428 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1429 is encountered, an exception is generated.
1435 Perl_lex_read_unichar(pTHX_ U32 flags)
1438 if (flags & ~(LEX_KEEP_PREVIOUS))
1439 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1440 c = lex_peek_unichar(flags);
1443 COPLINE_INC_WITH_HERELINES;
1445 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1447 ++(PL_parser->bufptr);
1453 =for apidoc Amx|void|lex_read_space|U32 flags
1455 Reads optional spaces, in Perl style, in the text currently being
1456 lexed. The spaces may include ordinary whitespace characters and
1457 Perl-style comments. C<#line> directives are processed if encountered.
1458 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1459 at a non-space character (or the end of the input text).
1461 If spaces extend into the next chunk of input text, the next chunk will
1462 be read in. Normally the current chunk will be discarded at the same
1463 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1464 chunk will not be discarded.
1469 #define LEX_NO_INCLINE 0x40000000
1470 #define LEX_NO_NEXT_CHUNK 0x80000000
1473 Perl_lex_read_space(pTHX_ U32 flags)
1476 const bool can_incline = !(flags & LEX_NO_INCLINE);
1477 bool need_incline = 0;
1478 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1479 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1480 s = PL_parser->bufptr;
1481 bufend = PL_parser->bufend;
1487 } while (!(c == '\n' || (c == 0 && s == bufend)));
1488 } else if (c == '\n') {
1491 PL_parser->linestart = s;
1497 } else if (isSPACE(c)) {
1499 } else if (c == 0 && s == bufend) {
1502 if (flags & LEX_NO_NEXT_CHUNK)
1504 PL_parser->bufptr = s;
1505 l = CopLINE(PL_curcop);
1506 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1507 got_more = lex_next_chunk(flags);
1508 CopLINE_set(PL_curcop, l);
1509 s = PL_parser->bufptr;
1510 bufend = PL_parser->bufend;
1513 if (can_incline && need_incline && PL_parser->rsfp) {
1521 PL_parser->bufptr = s;
1526 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1528 This function performs syntax checking on a prototype, C<proto>.
1529 If C<warn> is true, any illegal characters or mismatched brackets
1530 will trigger illegalproto warnings, declaring that they were
1531 detected in the prototype for C<name>.
1533 The return value is C<true> if this is a valid prototype, and
1534 C<false> if it is not, regardless of whether C<warn> was C<true> or
1537 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1544 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1546 STRLEN len, origlen;
1547 char *p = proto ? SvPV(proto, len) : NULL;
1548 bool bad_proto = FALSE;
1549 bool in_brackets = FALSE;
1550 bool after_slash = FALSE;
1551 char greedy_proto = ' ';
1552 bool proto_after_greedy_proto = FALSE;
1553 bool must_be_last = FALSE;
1554 bool underscore = FALSE;
1555 bool bad_proto_after_underscore = FALSE;
1557 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1563 for (; len--; p++) {
1566 proto_after_greedy_proto = TRUE;
1568 if (!strchr(";@%", *p))
1569 bad_proto_after_underscore = TRUE;
1572 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1579 in_brackets = FALSE;
1580 else if ((*p == '@' || *p == '%') &&
1583 must_be_last = TRUE;
1592 after_slash = FALSE;
1597 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1600 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1601 origlen, UNI_DISPLAY_ISPRINT)
1602 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1604 if (proto_after_greedy_proto)
1605 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1606 "Prototype after '%c' for %"SVf" : %s",
1607 greedy_proto, SVfARG(name), p);
1609 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1610 "Missing ']' in prototype for %"SVf" : %s",
1613 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1614 "Illegal character in prototype for %"SVf" : %s",
1616 if (bad_proto_after_underscore)
1617 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1618 "Illegal character after '_' in prototype for %"SVf" : %s",
1622 return (! (proto_after_greedy_proto || bad_proto) );
1627 * This subroutine has nothing to do with tilting, whether at windmills
1628 * or pinball tables. Its name is short for "increment line". It
1629 * increments the current line number in CopLINE(PL_curcop) and checks
1630 * to see whether the line starts with a comment of the form
1631 * # line 500 "foo.pm"
1632 * If so, it sets the current line number and file to the values in the comment.
1636 S_incline(pTHX_ const char *s)
1643 PERL_ARGS_ASSERT_INCLINE;
1645 COPLINE_INC_WITH_HERELINES;
1646 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1647 && s+1 == PL_bufend && *s == ';') {
1648 /* fake newline in string eval */
1649 CopLINE_dec(PL_curcop);
1654 while (SPACE_OR_TAB(*s))
1656 if (strnEQ(s, "line", 4))
1660 if (SPACE_OR_TAB(*s))
1664 while (SPACE_OR_TAB(*s))
1672 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1674 while (SPACE_OR_TAB(*s))
1676 if (*s == '"' && (t = strchr(s+1, '"'))) {
1682 while (!isSPACE(*t))
1686 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1688 if (*e != '\n' && *e != '\0')
1689 return; /* false alarm */
1691 line_num = grok_atou(n, &e) - 1;
1694 const STRLEN len = t - s;
1696 if (!PL_rsfp && !PL_parser->filtered) {
1697 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1698 * to *{"::_<newfilename"} */
1699 /* However, the long form of evals is only turned on by the
1700 debugger - usually they're "(eval %lu)" */
1701 GV * const cfgv = CopFILEGV(PL_curcop);
1704 STRLEN tmplen2 = len;
1708 if (tmplen2 + 2 <= sizeof smallbuf)
1711 Newx(tmpbuf2, tmplen2 + 2, char);
1716 memcpy(tmpbuf2 + 2, s, tmplen2);
1719 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1721 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1722 /* adjust ${"::_<newfilename"} to store the new file name */
1723 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1724 /* The line number may differ. If that is the case,
1725 alias the saved lines that are in the array.
1726 Otherwise alias the whole array. */
1727 if (CopLINE(PL_curcop) == line_num) {
1728 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1729 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1731 else if (GvAV(cfgv)) {
1732 AV * const av = GvAV(cfgv);
1733 const I32 start = CopLINE(PL_curcop)+1;
1734 I32 items = AvFILLp(av) - start;
1736 AV * const av2 = GvAVn(gv2);
1737 SV **svp = AvARRAY(av) + start;
1738 I32 l = (I32)line_num+1;
1740 av_store(av2, l++, SvREFCNT_inc(*svp++));
1745 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1748 CopFILE_free(PL_curcop);
1749 CopFILE_setn(PL_curcop, s, len);
1751 CopLINE_set(PL_curcop, line_num);
1754 #define skipspace(s) skipspace_flags(s, 0)
1758 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1760 AV *av = CopFILEAVx(PL_curcop);
1763 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1765 sv = *av_fetch(av, 0, 1);
1766 SvUPGRADE(sv, SVt_PVMG);
1768 if (!SvPOK(sv)) sv_setpvs(sv,"");
1770 sv_catsv(sv, orig_sv);
1772 sv_catpvn(sv, buf, len);
1777 if (PL_parser->preambling == NOLINE)
1778 av_store(av, CopLINE(PL_curcop), sv);
1784 * Called to gobble the appropriate amount and type of whitespace.
1785 * Skips comments as well.
1789 S_skipspace_flags(pTHX_ char *s, U32 flags)
1791 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1792 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1793 while (s < PL_bufend && SPACE_OR_TAB(*s))
1796 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1798 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1799 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1800 LEX_NO_NEXT_CHUNK : 0));
1802 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1803 if (PL_linestart > PL_bufptr)
1804 PL_bufptr = PL_linestart;
1812 * Check the unary operators to ensure there's no ambiguity in how they're
1813 * used. An ambiguous piece of code would be:
1815 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1816 * the +5 is its argument.
1825 if (PL_oldoldbufptr != PL_last_uni)
1827 while (isSPACE(*PL_last_uni))
1830 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1832 if ((t = strchr(s, '(')) && t < PL_bufptr)
1835 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1836 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1837 (int)(s - PL_last_uni), PL_last_uni);
1841 * LOP : macro to build a list operator. Its behaviour has been replaced
1842 * with a subroutine, S_lop() for which LOP is just another name.
1845 #define LOP(f,x) return lop(f,x,s)
1849 * Build a list operator (or something that might be one). The rules:
1850 * - if we have a next token, then it's a list operator (no parens) for
1851 * which the next token has already been parsed; e.g.,
1854 * - if the next thing is an opening paren, then it's a function
1855 * - else it's a list operator
1859 S_lop(pTHX_ I32 f, int x, char *s)
1861 PERL_ARGS_ASSERT_LOP;
1866 PL_last_lop = PL_oldbufptr;
1867 PL_last_lop_op = (OPCODE)f;
1872 return REPORT(FUNC);
1875 return REPORT(FUNC);
1878 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1879 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1880 return REPORT(LSTOP);
1886 * When the lexer realizes it knows the next token (for instance,
1887 * it is reordering tokens for the parser) then it can call S_force_next
1888 * to know what token to return the next time the lexer is called. Caller
1889 * will need to set PL_nextval[] and possibly PL_expect to ensure
1890 * the lexer handles the token correctly.
1894 S_force_next(pTHX_ I32 type)
1898 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1899 tokereport(type, &NEXTVAL_NEXTTOKE);
1902 PL_nexttype[PL_nexttoke] = type;
1904 if (PL_lex_state != LEX_KNOWNEXT) {
1905 PL_lex_defer = PL_lex_state;
1906 PL_lex_state = LEX_KNOWNEXT;
1913 * This subroutine handles postfix deref syntax after the arrow has already
1914 * been emitted. @* $* etc. are emitted as two separate token right here.
1915 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1916 * only the first, leaving yylex to find the next.
1920 S_postderef(pTHX_ int const funny, char const next)
1922 assert(funny == DOLSHARP || strchr("$@%&*", funny));
1923 assert(strchr("*[{", next));
1925 PL_expect = XOPERATOR;
1926 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1927 assert('@' == funny || '$' == funny || DOLSHARP == funny);
1928 PL_lex_state = LEX_INTERPEND;
1929 force_next(POSTJOIN);
1935 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1936 && !PL_lex_brackets)
1938 PL_expect = XOPERATOR;
1947 int yyc = PL_parser->yychar;
1948 if (yyc != YYEMPTY) {
1950 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1951 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1952 PL_lex_allbrackets--;
1954 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1955 } else if (yyc == '('/*)*/) {
1956 PL_lex_allbrackets--;
1961 PL_parser->yychar = YYEMPTY;
1966 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1968 SV * const sv = newSVpvn_utf8(start, len,
1971 && !is_invariant_string((const U8*)start, len)
1972 && is_utf8_string((const U8*)start, len));
1978 * When the lexer knows the next thing is a word (for instance, it has
1979 * just seen -> and it knows that the next char is a word char, then
1980 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1984 * char *start : buffer position (must be within PL_linestr)
1985 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1986 * int check_keyword : if true, Perl checks to make sure the word isn't
1987 * a keyword (do this if the word is a label, e.g. goto FOO)
1988 * int allow_pack : if true, : characters will also be allowed (require,
1989 * use, etc. do this)
1993 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
1998 PERL_ARGS_ASSERT_FORCE_WORD;
2000 start = skipspace(start);
2002 if (isIDFIRST_lazy_if(s,UTF) ||
2003 (allow_pack && *s == ':') )
2005 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2006 if (check_keyword) {
2007 char *s2 = PL_tokenbuf;
2009 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2011 if (keyword(s2, len2, 0))
2014 if (token == METHOD) {
2019 PL_expect = XOPERATOR;
2022 NEXTVAL_NEXTTOKE.opval
2023 = (OP*)newSVOP(OP_CONST,0,
2024 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2025 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2033 * Called when the lexer wants $foo *foo &foo etc, but the program
2034 * text only contains the "foo" portion. The first argument is a pointer
2035 * to the "foo", and the second argument is the type symbol to prefix.
2036 * Forces the next token to be a "WORD".
2037 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2041 S_force_ident(pTHX_ const char *s, int kind)
2043 PERL_ARGS_ASSERT_FORCE_IDENT;
2046 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2047 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2048 UTF ? SVf_UTF8 : 0));
2049 NEXTVAL_NEXTTOKE.opval = o;
2052 o->op_private = OPpCONST_ENTERED;
2053 /* XXX see note in pp_entereval() for why we forgo typo
2054 warnings if the symbol must be introduced in an eval.
2056 gv_fetchpvn_flags(s, len,
2057 (PL_in_eval ? GV_ADDMULTI
2058 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2059 kind == '$' ? SVt_PV :
2060 kind == '@' ? SVt_PVAV :
2061 kind == '%' ? SVt_PVHV :
2069 S_force_ident_maybe_lex(pTHX_ char pit)
2071 NEXTVAL_NEXTTOKE.ival = pit;
2076 Perl_str_to_version(pTHX_ SV *sv)
2081 const char *start = SvPV_const(sv,len);
2082 const char * const end = start + len;
2083 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2085 PERL_ARGS_ASSERT_STR_TO_VERSION;
2087 while (start < end) {
2091 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2096 retval += ((NV)n)/nshift;
2105 * Forces the next token to be a version number.
2106 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2107 * and if "guessing" is TRUE, then no new token is created (and the caller
2108 * must use an alternative parsing method).
2112 S_force_version(pTHX_ char *s, int guessing)
2117 PERL_ARGS_ASSERT_FORCE_VERSION;
2125 while (isDIGIT(*d) || *d == '_' || *d == '.')
2127 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2129 s = scan_num(s, &pl_yylval);
2130 version = pl_yylval.opval;
2131 ver = cSVOPx(version)->op_sv;
2132 if (SvPOK(ver) && !SvNIOK(ver)) {
2133 SvUPGRADE(ver, SVt_PVNV);
2134 SvNV_set(ver, str_to_version(ver));
2135 SvNOK_on(ver); /* hint that it is a version */
2138 else if (guessing) {
2143 /* NOTE: The parser sees the package name and the VERSION swapped */
2144 NEXTVAL_NEXTTOKE.opval = version;
2151 * S_force_strict_version
2152 * Forces the next token to be a version number using strict syntax rules.
2156 S_force_strict_version(pTHX_ char *s)
2159 const char *errstr = NULL;
2161 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2163 while (isSPACE(*s)) /* leading whitespace */
2166 if (is_STRICT_VERSION(s,&errstr)) {
2168 s = (char *)scan_version(s, ver, 0);
2169 version = newSVOP(OP_CONST, 0, ver);
2171 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2172 (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2176 yyerror(errstr); /* version required */
2180 /* NOTE: The parser sees the package name and the VERSION swapped */
2181 NEXTVAL_NEXTTOKE.opval = version;
2189 * Tokenize a quoted string passed in as an SV. It finds the next
2190 * chunk, up to end of string or a backslash. It may make a new
2191 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2196 S_tokeq(pTHX_ SV *sv)
2203 PERL_ARGS_ASSERT_TOKEQ;
2207 assert (!SvIsCOW(sv));
2208 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2212 /* This is relying on the SV being "well formed" with a trailing '\0' */
2213 while (s < send && !(*s == '\\' && s[1] == '\\'))
2218 if ( PL_hints & HINT_NEW_STRING ) {
2219 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2220 SVs_TEMP | SvUTF8(sv));
2224 if (s + 1 < send && (s[1] == '\\'))
2225 s++; /* all that, just for this */
2230 SvCUR_set(sv, d - SvPVX_const(sv));
2232 if ( PL_hints & HINT_NEW_STRING )
2233 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2238 * Now come three functions related to double-quote context,
2239 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2240 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2241 * interact with PL_lex_state, and create fake ( ... ) argument lists
2242 * to handle functions and concatenation.
2246 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2251 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2253 * Pattern matching will set PL_lex_op to the pattern-matching op to
2254 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2256 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2258 * Everything else becomes a FUNC.
2260 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2261 * had an OP_CONST or OP_READLINE). This just sets us up for a
2262 * call to S_sublex_push().
2266 S_sublex_start(pTHX)
2268 const I32 op_type = pl_yylval.ival;
2270 if (op_type == OP_NULL) {
2271 pl_yylval.opval = PL_lex_op;
2275 if (op_type == OP_CONST) {
2276 SV *sv = tokeq(PL_lex_stuff);
2278 if (SvTYPE(sv) == SVt_PVIV) {
2279 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2281 const char * const p = SvPV_const(sv, len);
2282 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2286 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2287 PL_lex_stuff = NULL;
2291 PL_sublex_info.super_state = PL_lex_state;
2292 PL_sublex_info.sub_inwhat = (U16)op_type;
2293 PL_sublex_info.sub_op = PL_lex_op;
2294 PL_lex_state = LEX_INTERPPUSH;
2298 pl_yylval.opval = PL_lex_op;
2308 * Create a new scope to save the lexing state. The scope will be
2309 * ended in S_sublex_done. Returns a '(', starting the function arguments
2310 * to the uc, lc, etc. found before.
2311 * Sets PL_lex_state to LEX_INTERPCONCAT.
2318 const bool is_heredoc = PL_multi_close == '<';
2321 PL_lex_state = PL_sublex_info.super_state;
2322 SAVEI8(PL_lex_dojoin);
2323 SAVEI32(PL_lex_brackets);
2324 SAVEI32(PL_lex_allbrackets);
2325 SAVEI32(PL_lex_formbrack);
2326 SAVEI8(PL_lex_fakeeof);
2327 SAVEI32(PL_lex_casemods);
2328 SAVEI32(PL_lex_starts);
2329 SAVEI8(PL_lex_state);
2330 SAVESPTR(PL_lex_repl);
2331 SAVEVPTR(PL_lex_inpat);
2332 SAVEI16(PL_lex_inwhat);
2335 SAVECOPLINE(PL_curcop);
2336 SAVEI32(PL_multi_end);
2337 SAVEI32(PL_parser->herelines);
2338 PL_parser->herelines = 0;
2340 SAVEI8(PL_multi_close);
2341 SAVEPPTR(PL_bufptr);
2342 SAVEPPTR(PL_bufend);
2343 SAVEPPTR(PL_oldbufptr);
2344 SAVEPPTR(PL_oldoldbufptr);
2345 SAVEPPTR(PL_last_lop);
2346 SAVEPPTR(PL_last_uni);
2347 SAVEPPTR(PL_linestart);
2348 SAVESPTR(PL_linestr);
2349 SAVEGENERICPV(PL_lex_brackstack);
2350 SAVEGENERICPV(PL_lex_casestack);
2351 SAVEGENERICPV(PL_parser->lex_shared);
2352 SAVEBOOL(PL_parser->lex_re_reparsing);
2353 SAVEI32(PL_copline);
2355 /* The here-doc parser needs to be able to peek into outer lexing
2356 scopes to find the body of the here-doc. So we put PL_linestr and
2357 PL_bufptr into lex_shared, to ‘share’ those values.
2359 PL_parser->lex_shared->ls_linestr = PL_linestr;
2360 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2362 PL_linestr = PL_lex_stuff;
2363 PL_lex_repl = PL_sublex_info.repl;
2364 PL_lex_stuff = NULL;
2365 PL_sublex_info.repl = NULL;
2367 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2368 = SvPVX(PL_linestr);
2369 PL_bufend += SvCUR(PL_linestr);
2370 PL_last_lop = PL_last_uni = NULL;
2371 SAVEFREESV(PL_linestr);
2372 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2374 PL_lex_dojoin = FALSE;
2375 PL_lex_brackets = PL_lex_formbrack = 0;
2376 PL_lex_allbrackets = 0;
2377 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2378 Newx(PL_lex_brackstack, 120, char);
2379 Newx(PL_lex_casestack, 12, char);
2380 PL_lex_casemods = 0;
2381 *PL_lex_casestack = '\0';
2383 PL_lex_state = LEX_INTERPCONCAT;
2385 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2386 PL_copline = NOLINE;
2388 Newxz(shared, 1, LEXSHARED);
2389 shared->ls_prev = PL_parser->lex_shared;
2390 PL_parser->lex_shared = shared;
2392 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2393 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2394 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2395 PL_lex_inpat = PL_sublex_info.sub_op;
2397 PL_lex_inpat = NULL;
2399 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2400 PL_in_eval &= ~EVAL_RE_REPARSING;
2407 * Restores lexer state after a S_sublex_push.
2413 if (!PL_lex_starts++) {
2414 SV * const sv = newSVpvs("");
2415 if (SvUTF8(PL_linestr))
2417 PL_expect = XOPERATOR;
2418 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2422 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2423 PL_lex_state = LEX_INTERPCASEMOD;
2427 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2428 assert(PL_lex_inwhat != OP_TRANSR);
2430 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2431 PL_linestr = PL_lex_repl;
2433 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2434 PL_bufend += SvCUR(PL_linestr);
2435 PL_last_lop = PL_last_uni = NULL;
2436 PL_lex_dojoin = FALSE;
2437 PL_lex_brackets = 0;
2438 PL_lex_allbrackets = 0;
2439 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2440 PL_lex_casemods = 0;
2441 *PL_lex_casestack = '\0';
2443 if (SvEVALED(PL_lex_repl)) {
2444 PL_lex_state = LEX_INTERPNORMAL;
2446 /* we don't clear PL_lex_repl here, so that we can check later
2447 whether this is an evalled subst; that means we rely on the
2448 logic to ensure sublex_done() is called again only via the
2449 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2452 PL_lex_state = LEX_INTERPCONCAT;
2455 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2456 CopLINE(PL_curcop) +=
2457 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2458 + PL_parser->herelines;
2459 PL_parser->herelines = 0;
2464 const line_t l = CopLINE(PL_curcop);
2466 if (PL_multi_close == '<')
2467 PL_parser->herelines += l - PL_multi_end;
2468 PL_bufend = SvPVX(PL_linestr);
2469 PL_bufend += SvCUR(PL_linestr);
2470 PL_expect = XOPERATOR;
2471 PL_sublex_info.sub_inwhat = 0;
2476 PERL_STATIC_INLINE SV*
2477 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2479 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2480 * interior, hence to the "}". Finds what the name resolves to, returning
2481 * an SV* containing it; NULL if no valid one found */
2483 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2490 const U8* first_bad_char_loc;
2491 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2493 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2498 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2500 &first_bad_char_loc))
2502 /* If warnings are on, this will print a more detailed analysis of what
2503 * is wrong than the error message below */
2504 utf8n_to_uvchr(first_bad_char_loc,
2505 e - ((char *) first_bad_char_loc),
2508 /* We deliberately don't try to print the malformed character, which
2509 * might not print very well; it also may be just the first of many
2510 * malformations, so don't print what comes after it */
2511 yyerror(Perl_form(aTHX_
2512 "Malformed UTF-8 character immediately after '%.*s'",
2513 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2517 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2518 /* include the <}> */
2519 e - backslash_ptr + 1);
2521 SvREFCNT_dec_NN(res);
2525 /* See if the charnames handler is the Perl core's, and if so, we can skip
2526 * the validation needed for a user-supplied one, as Perl's does its own
2528 table = GvHV(PL_hintgv); /* ^H */
2529 cvp = hv_fetchs(table, "charnames", FALSE);
2530 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2531 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2533 const char * const name = HvNAME(stash);
2534 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2535 && strEQ(name, "_charnames")) {
2540 /* Here, it isn't Perl's charname handler. We can't rely on a
2541 * user-supplied handler to validate the input name. For non-ut8 input,
2542 * look to see that the first character is legal. Then loop through the
2543 * rest checking that each is a continuation */
2545 /* This code makes the reasonable assumption that the only Latin1-range
2546 * characters that begin a character name alias are alphabetic, otherwise
2547 * would have to create a isCHARNAME_BEGIN macro */
2550 if (! isALPHAU(*s)) {
2555 if (! isCHARNAME_CONT(*s)) {
2558 if (*s == ' ' && *(s-1) == ' ') {
2561 if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2562 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2563 "NO-BREAK SPACE in a charnames "
2564 "alias definition is deprecated");
2570 /* Similarly for utf8. For invariants can check directly; for other
2571 * Latin1, can calculate their code point and check; otherwise use a
2573 if (UTF8_IS_INVARIANT(*s)) {
2574 if (! isALPHAU(*s)) {
2578 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2579 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2585 if (! PL_utf8_charname_begin) {
2586 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2587 PL_utf8_charname_begin = _core_swash_init("utf8",
2588 "_Perl_Charname_Begin",
2590 1, 0, NULL, &flags);
2592 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2599 if (UTF8_IS_INVARIANT(*s)) {
2600 if (! isCHARNAME_CONT(*s)) {
2603 if (*s == ' ' && *(s-1) == ' ') {
2608 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2609 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2613 if (*s == *NBSP_UTF8
2614 && *(s+1) == *(NBSP_UTF8+1)
2615 && ckWARN_d(WARN_DEPRECATED))
2617 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2618 "NO-BREAK SPACE in a charnames "
2619 "alias definition is deprecated");
2624 if (! PL_utf8_charname_continue) {
2625 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2626 PL_utf8_charname_continue = _core_swash_init("utf8",
2627 "_Perl_Charname_Continue",
2629 1, 0, NULL, &flags);
2631 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2638 if (*(s-1) == ' ') {
2641 "charnames alias definitions may not contain trailing "
2642 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2643 (int)(s - backslash_ptr + 1), backslash_ptr,
2644 (int)(e - s + 1), s + 1
2646 UTF ? SVf_UTF8 : 0);
2650 if (SvUTF8(res)) { /* Don't accept malformed input */
2651 const U8* first_bad_char_loc;
2653 const char* const str = SvPV_const(res, len);
2654 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2655 /* If warnings are on, this will print a more detailed analysis of
2656 * what is wrong than the error message below */
2657 utf8n_to_uvchr(first_bad_char_loc,
2658 (char *) first_bad_char_loc - str,
2661 /* We deliberately don't try to print the malformed character,
2662 * which might not print very well; it also may be just the first
2663 * of many malformations, so don't print what comes after it */
2666 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2667 (int) (e - backslash_ptr + 1), backslash_ptr,
2668 (int) ((char *) first_bad_char_loc - str), str
2679 /* The final %.*s makes sure that should the trailing NUL be missing
2680 * that this print won't run off the end of the string */
2683 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2684 (int)(s - backslash_ptr + 1), backslash_ptr,
2685 (int)(e - s + 1), s + 1
2687 UTF ? SVf_UTF8 : 0);
2694 "charnames alias definitions may not contain a sequence of "
2695 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2696 (int)(s - backslash_ptr + 1), backslash_ptr,
2697 (int)(e - s + 1), s + 1
2699 UTF ? SVf_UTF8 : 0);
2706 Extracts the next constant part of a pattern, double-quoted string,
2707 or transliteration. This is terrifying code.
2709 For example, in parsing the double-quoted string "ab\x63$d", it would
2710 stop at the '$' and return an OP_CONST containing 'abc'.
2712 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2713 processing a pattern (PL_lex_inpat is true), a transliteration
2714 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2716 Returns a pointer to the character scanned up to. If this is
2717 advanced from the start pointer supplied (i.e. if anything was
2718 successfully parsed), will leave an OP_CONST for the substring scanned
2719 in pl_yylval. Caller must intuit reason for not parsing further
2720 by looking at the next characters herself.
2724 \N{FOO} => \N{U+hex_for_character_FOO}
2725 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2728 all other \-char, including \N and \N{ apart from \N{ABC}
2731 @ and $ where it appears to be a var, but not for $ as tail anchor
2736 In transliterations:
2737 characters are VERY literal, except for - not at the start or end
2738 of the string, which indicates a range. If the range is in bytes,
2739 scan_const expands the range to the full set of intermediate
2740 characters. If the range is in utf8, the hyphen is replaced with
2741 a certain range mark which will be handled by pmtrans() in op.c.
2743 In double-quoted strings:
2745 double-quoted style: \r and \n
2746 constants: \x31, etc.
2747 deprecated backrefs: \1 (in substitution replacements)
2748 case and quoting: \U \Q \E
2751 scan_const does *not* construct ops to handle interpolated strings.
2752 It stops processing as soon as it finds an embedded $ or @ variable
2753 and leaves it to the caller to work out what's going on.
2755 embedded arrays (whether in pattern or not) could be:
2756 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2758 $ in double-quoted strings must be the symbol of an embedded scalar.
2760 $ in pattern could be $foo or could be tail anchor. Assumption:
2761 it's a tail anchor if $ is the last thing in the string, or if it's
2762 followed by one of "()| \r\n\t"
2764 \1 (backreferences) are turned into $1 in substitutions
2766 The structure of the code is
2767 while (there's a character to process) {
2768 handle transliteration ranges
2769 skip regexp comments /(?#comment)/ and codes /(?{code})/
2770 skip #-initiated comments in //x patterns
2771 check for embedded arrays
2772 check for embedded scalars
2774 deprecate \1 in substitution replacements
2775 handle string-changing backslashes \l \U \Q \E, etc.
2776 switch (what was escaped) {
2777 handle \- in a transliteration (becomes a literal -)
2778 if a pattern and not \N{, go treat as regular character
2779 handle \132 (octal characters)
2780 handle \x15 and \x{1234} (hex characters)
2781 handle \N{name} (named characters, also \N{3,5} in a pattern)
2782 handle \cV (control characters)
2783 handle printf-style backslashes (\f, \r, \n, etc)
2786 } (end if backslash)
2787 handle regular character
2788 } (end while character to read)
2793 S_scan_const(pTHX_ char *start)
2795 char *send = PL_bufend; /* end of the constant */
2796 SV *sv = newSV(send - start); /* sv for the constant. See note below
2798 char *s = start; /* start of the constant */
2799 char *d = SvPVX(sv); /* destination for copies */
2800 bool dorange = FALSE; /* are we in a translit range? */
2801 bool didrange = FALSE; /* did we just finish a range? */
2802 bool in_charclass = FALSE; /* within /[...]/ */
2803 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2804 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2805 UTF8? But, this can show as true
2806 when the source isn't utf8, as for
2807 example when it is entirely composed
2809 SV *res; /* result from charnames */
2811 /* Note on sizing: The scanned constant is placed into sv, which is
2812 * initialized by newSV() assuming one byte of output for every byte of
2813 * input. This routine expects newSV() to allocate an extra byte for a
2814 * trailing NUL, which this routine will append if it gets to the end of
2815 * the input. There may be more bytes of input than output (eg., \N{LATIN
2816 * CAPITAL LETTER A}), or more output than input if the constant ends up
2817 * recoded to utf8, but each time a construct is found that might increase
2818 * the needed size, SvGROW() is called. Its size parameter each time is
2819 * based on the best guess estimate at the time, namely the length used so
2820 * far, plus the length the current construct will occupy, plus room for
2821 * the trailing NUL, plus one byte for every input byte still unscanned */
2823 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2826 UV literal_endpoint = 0;
2827 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2830 PERL_ARGS_ASSERT_SCAN_CONST;
2832 assert(PL_lex_inwhat != OP_TRANSR);
2833 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2834 /* If we are doing a trans and we know we want UTF8 set expectation */
2835 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2836 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2839 /* Protect sv from errors and fatal warnings. */
2840 ENTER_with_name("scan_const");
2843 while (s < send || dorange) {
2845 /* get transliterations out of the way (they're most literal) */
2846 if (PL_lex_inwhat == OP_TRANS) {
2847 /* expand a range A-Z to the full set of characters. AIE! */
2849 I32 i; /* current expanded character */
2850 I32 min; /* first character in range */
2851 I32 max; /* last character in range */
2862 char * const c = (char*)utf8_hop((U8*)d, -1);
2866 *c = (char) ILLEGAL_UTF8_BYTE;
2867 /* mark the range as done, and continue */
2873 i = d - SvPVX_const(sv); /* remember current offset */
2876 SvLEN(sv) + ((has_utf8)
2877 ? (512 - UTF_CONTINUATION_MARK
2880 /* How many two-byte within 0..255: 128 in UTF-8,
2881 * 96 in UTF-8-mod. */
2883 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2885 d = SvPVX(sv) + i; /* refresh d after realloc */
2889 for (j = 0; j <= 1; j++) {
2890 char * const c = (char*)utf8_hop((U8*)d, -1);
2891 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2897 max = (U8)0xff; /* only to \xff */
2898 uvmax = uv; /* \x{100} to uvmax */
2900 d = c; /* eat endpoint chars */
2905 d -= 2; /* eat the first char and the - */
2906 min = (U8)*d; /* first char in range */
2907 max = (U8)d[1]; /* last char in range */
2914 "Invalid range \"%c-%c\" in transliteration operator",
2915 (char)min, (char)max);
2919 /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
2920 * any subsets of these ranges into individual characters */
2921 if (literal_endpoint == 2 &&
2922 ((isLOWER_A(min) && isLOWER_A(max)) ||
2923 (isUPPER_A(min) && isUPPER_A(max))))
2925 for (i = min; i <= max; i++) {
2932 for (i = min; i <= max; i++)
2935 append_utf8_from_native_byte(i, &d);
2943 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2945 *d++ = (char) ILLEGAL_UTF8_BYTE;
2947 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2951 /* mark the range as done, and continue */
2955 literal_endpoint = 0;
2960 /* range begins (ignore - as first or last char) */
2961 else if (*s == '-' && s+1 < send && s != start) {
2963 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2970 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
2980 literal_endpoint = 0;
2981 native_range = TRUE;
2986 /* if we get here, we're not doing a transliteration */
2988 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2991 while (s1 >= start && *s1-- == '\\')
2994 in_charclass = TRUE;
2997 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3000 while (s1 >= start && *s1-- == '\\')
3003 in_charclass = FALSE;
3006 /* skip for regexp comments /(?#comment)/, except for the last
3007 * char, which will be done separately.
3008 * Stop on (?{..}) and friends */
3010 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3012 while (s+1 < send && *s != ')')
3015 else if (!PL_lex_casemods &&
3016 ( s[2] == '{' /* This should match regcomp.c */
3017 || (s[2] == '?' && s[3] == '{')))
3023 /* likewise skip #-initiated comments in //x patterns */
3024 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3025 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3026 while (s+1 < send && *s != '\n')
3030 /* no further processing of single-quoted regex */
3031 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3032 goto default_action;
3034 /* check for embedded arrays
3035 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3037 else if (*s == '@' && s[1]) {
3038 if (isWORDCHAR_lazy_if(s+1,UTF))
3040 if (strchr(":'{$", s[1]))
3042 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3043 break; /* in regexp, neither @+ nor @- are interpolated */
3046 /* check for embedded scalars. only stop if we're sure it's a
3049 else if (*s == '$') {
3050 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3052 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3054 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3055 "Possible unintended interpolation of $\\ in regex");
3057 break; /* in regexp, $ might be tail anchor */
3061 /* End of else if chain - OP_TRANS rejoin rest */
3064 if (*s == '\\' && s+1 < send) {
3065 char* e; /* Can be used for ending '}', etc. */
3069 /* warn on \1 - \9 in substitution replacements, but note that \11
3070 * is an octal; and \19 is \1 followed by '9' */
3071 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3072 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3074 /* diag_listed_as: \%d better written as $%d */
3075 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3080 /* string-change backslash escapes */
3081 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3085 /* In a pattern, process \N, but skip any other backslash escapes.
3086 * This is because we don't want to translate an escape sequence
3087 * into a meta symbol and have the regex compiler use the meta
3088 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3089 * in spite of this, we do have to process \N here while the proper
3090 * charnames handler is in scope. See bugs #56444 and #62056.
3092 * There is a complication because \N in a pattern may also stand
3093 * for 'match a non-nl', and not mean a charname, in which case its
3094 * processing should be deferred to the regex compiler. To be a
3095 * charname it must be followed immediately by a '{', and not look
3096 * like \N followed by a curly quantifier, i.e., not something like
3097 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3099 else if (PL_lex_inpat
3102 || regcurly(s + 1)))
3105 goto default_action;
3110 /* quoted - in transliterations */
3112 if (PL_lex_inwhat == OP_TRANS) {
3119 if ((isALPHANUMERIC(*s)))
3120 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3121 "Unrecognized escape \\%c passed through",
3123 /* default action is to copy the quoted character */
3124 goto default_action;
3127 /* eg. \132 indicates the octal constant 0132 */
3128 case '0': case '1': case '2': case '3':
3129 case '4': case '5': case '6': case '7':
3131 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3133 uv = grok_oct(s, &len, &flags, NULL);
3135 if (len < 3 && s < send && isDIGIT(*s)
3136 && ckWARN(WARN_MISC))
3138 Perl_warner(aTHX_ packWARN(WARN_MISC),
3139 "%s", form_short_octal_warning(s, len));
3142 goto NUM_ESCAPE_INSERT;
3144 /* eg. \o{24} indicates the octal constant \024 */
3149 bool valid = grok_bslash_o(&s, &uv, &error,
3150 TRUE, /* Output warning */
3151 FALSE, /* Not strict */
3152 TRUE, /* Output warnings for
3159 goto NUM_ESCAPE_INSERT;
3162 /* eg. \x24 indicates the hex constant 0x24 */
3167 bool valid = grok_bslash_x(&s, &uv, &error,
3168 TRUE, /* Output warning */
3169 FALSE, /* Not strict */
3170 TRUE, /* Output warnings for
3180 /* Insert oct or hex escaped character. There will always be
3181 * enough room in sv since such escapes will be longer than any
3182 * UTF-8 sequence they can end up as, except if they force us
3183 * to recode the rest of the string into utf8 */
3185 /* Here uv is the ordinal of the next character being added */
3186 if (!UVCHR_IS_INVARIANT(uv)) {
3187 if (!has_utf8 && uv > 255) {
3188 /* Might need to recode whatever we have accumulated so
3189 * far if it contains any chars variant in utf8 or
3192 SvCUR_set(sv, d - SvPVX_const(sv));
3195 /* See Note on sizing above. */
3196 sv_utf8_upgrade_flags_grow(
3198 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3199 /* Above-latin1 in string
3200 * implies no encoding */
3201 |SV_UTF8_NO_ENCODING,
3202 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3203 d = SvPVX(sv) + SvCUR(sv);
3208 d = (char*)uvchr_to_utf8((U8*)d, uv);
3209 if (PL_lex_inwhat == OP_TRANS &&
3210 PL_sublex_info.sub_op) {
3211 PL_sublex_info.sub_op->op_private |=
3212 (PL_lex_repl ? OPpTRANS_FROM_UTF
3216 if (uv > 255 && !dorange)
3217 native_range = FALSE;
3230 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3231 * named character, like \N{LATIN SMALL LETTER A}, or a named
3232 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3233 * GRAVE}. For convenience all three forms are referred to as
3234 * "named characters" below.
3236 * For patterns, \N also can mean to match a non-newline. Code
3237 * before this 'switch' statement should already have handled
3238 * this situation, and hence this code only has to deal with
3239 * the named character cases.
3241 * For non-patterns, the named characters are converted to
3242 * their string equivalents. In patterns, named characters are
3243 * not converted to their ultimate forms for the same reasons
3244 * that other escapes aren't. Instead, they are converted to
3245 * the \N{U+...} form to get the value from the charnames that
3246 * is in effect right now, while preserving the fact that it
3247 * was a named character, so that the regex compiler knows
3250 * The structure of this section of code (besides checking for
3251 * errors and upgrading to utf8) is:
3252 * If the named character is of the form \N{U+...}, pass it
3253 * through if a pattern; otherwise convert the code point
3255 * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
3256 * if a pattern; otherwise convert to utf8
3258 * If the regex compiler should ever need to differentiate
3259 * between the \N{U+...} and \N{name} forms, that could easily
3260 * be done here by stripping any leading zeros from the
3261 * \N{U+...} case, and adding them to the other one. */
3263 /* Here, 's' points to the 'N'; the test below is guaranteed to
3264 * succeed if we are being called on a pattern, as we already
3265 * know from a test above that the next character is a '{'. A
3266 * non-pattern \N must mean 'named character', which requires
3270 yyerror("Missing braces on \\N{}");
3275 /* If there is no matching '}', it is an error. */
3276 if (! (e = strchr(s, '}'))) {
3277 if (! PL_lex_inpat) {
3278 yyerror("Missing right brace on \\N{}");
3280 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3285 /* Here it looks like a named character */
3287 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3288 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3289 | PERL_SCAN_SILENT_ILLDIGIT
3290 | PERL_SCAN_DISALLOW_PREFIX;
3293 s += 2; /* Skip to next char after the 'U+' */
3295 uv = grok_hex(s, &len, &flags, NULL);
3297 || ( len != (STRLEN)(e - s) && s[len] != '.'
3301 yyerror("Invalid hexadecimal number in \\N{U+...}");
3308 s -= 5; /* Include the '\N{U+' */
3309 /* On EBCDIC platforms, in \N{U+...}, the '...' is a
3310 * Unicode value, so convert to native so downstream
3311 * code can continue to assume it's native */
3312 /* XXX This should be in the regexp parser,
3313 because doing it here makes /\N{U+41}/ and
3314 =~ '\N{U+41}' do different things. */
3315 d += my_snprintf(d, e - s + 1 + 1, /* includes the '}'
3318 (unsigned int) UNI_TO_NATIVE(uv));
3323 uv = grok_hex(s, &len, &flags, NULL);
3325 || (len != (STRLEN)(e - s) && s[len] != '.'))
3329 d, e - s + 1 + 1, ".%X",
3330 (unsigned int)UNI_TO_NATIVE(uv)
3336 /* On non-EBCDIC platforms, pass it through unchanged.
3337 * The reason we evaluate the numbers is to make
3338 * sure there wasn't a syntax error. */
3339 const char * const orig_s = s - 5;
3343 uv = grok_hex(s, &len, &flags, NULL);
3345 || (len != (STRLEN)(e - s) && s[len] != '.'))
3348 /* +1 is for the '}' */
3349 Copy(orig_s, d, e - orig_s + 1, char);
3350 d += e - orig_s + 1;
3353 else { /* Not a pattern: convert the hex to string */
3355 /* If the destination is not in utf8, unconditionally
3356 * recode it to be so. This is because \N{} implies
3357 * Unicode semantics, and scalars have to be in utf8
3358 * to guarantee those semantics */
3360 SvCUR_set(sv, d - SvPVX_const(sv));
3363 /* See Note on sizing above. */
3364 sv_utf8_upgrade_flags_grow(
3366 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3367 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3368 d = SvPVX(sv) + SvCUR(sv);
3372 /* Add the (Unicode) code point to the output. */
3373 if (UNI_IS_INVARIANT(uv)) {
3374 *d++ = (char) LATIN1_TO_NATIVE(uv);
3377 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3381 else /* Here is \N{NAME} but not \N{U+...}. */
3382 if ((res = get_and_check_backslash_N_name(s, e)))
3385 const char *str = SvPV_const(res, len);
3388 if (! len) { /* The name resolved to an empty string */
3389 Copy("\\N{}", d, 4, char);
3393 /* In order to not lose information for the regex
3394 * compiler, pass the result in the specially made
3395 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3396 * the code points in hex of each character
3397 * returned by charnames */
3399 const char *str_end = str + len;
3400 const STRLEN off = d - SvPVX_const(sv);
3402 if (! SvUTF8(res)) {
3403 /* For the non-UTF-8 case, we can determine the
3404 * exact length needed without having to parse
3405 * through the string. Each character takes up
3406 * 2 hex digits plus either a trailing dot or
3408 const char initial_text[] = "\\N{U+";
3409 const STRLEN initial_len = sizeof(initial_text)
3411 d = off + SvGROW(sv, off
3414 /* +1 for trailing NUL */
3417 + (STRLEN)(send - e));
3418 Copy(initial_text, d, initial_len, char);
3420 while (str < str_end) {
3423 my_snprintf(hex_string,
3425 "%02X.", (U8) *str);
3426 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
3427 Copy(hex_string, d, 3, char);
3431 d--; /* Below, we will overwrite the final
3432 dot with a right brace */
3435 STRLEN char_length; /* cur char's byte length */
3437 /* and the number of bytes after this is
3438 * translated into hex digits */
3439 STRLEN output_length;
3441 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3442 * for max('U+', '.'); and 1 for NUL */
3443 char hex_string[2 * UTF8_MAXBYTES + 5];
3445 /* Get the first character of the result. */
3446 U32 uv = utf8n_to_uvchr((U8 *) str,
3450 /* Convert first code point to hex, including
3451 * the boiler plate before it. */
3453 my_snprintf(hex_string, sizeof(hex_string),
3457 /* Make sure there is enough space to hold it */
3458 d = off + SvGROW(sv, off
3460 + (STRLEN)(send - e)
3461 + 2); /* '}' + NUL */
3463 Copy(hex_string, d, output_length, char);
3466 /* For each subsequent character, append dot and
3467 * its ordinal in hex */
3468 while ((str += char_length) < str_end) {
3469 const STRLEN off = d - SvPVX_const(sv);
3470 U32 uv = utf8n_to_uvchr((U8 *) str,
3475 my_snprintf(hex_string,
3480 d = off + SvGROW(sv, off
3482 + (STRLEN)(send - e)
3483 + 2); /* '}' + NUL */
3484 Copy(hex_string, d, output_length, char);
3489 *d++ = '}'; /* Done. Add the trailing brace */
3492 else { /* Here, not in a pattern. Convert the name to a
3495 /* If destination is not in utf8, unconditionally
3496 * recode it to be so. This is because \N{} implies
3497 * Unicode semantics, and scalars have to be in utf8
3498 * to guarantee those semantics */
3500 SvCUR_set(sv, d - SvPVX_const(sv));
3503 /* See Note on sizing above. */
3504 sv_utf8_upgrade_flags_grow(sv,
3505 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3506 len + (STRLEN)(send - s) + 1);
3507 d = SvPVX(sv) + SvCUR(sv);
3509 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3511 /* See Note on sizing above. (NOTE: SvCUR() is not
3512 * set correctly here). */
3513 const STRLEN off = d - SvPVX_const(sv);
3514 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3516 if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
3517 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3518 str = SvPV_const(res, len);
3520 Copy(str, d, len, char);
3526 } /* End \N{NAME} */
3529 native_range = FALSE; /* \N{} is defined to be Unicode */
3531 s = e + 1; /* Point to just after the '}' */
3534 /* \c is a control character */
3538 *d++ = grok_bslash_c(*s++, 1);
3541 yyerror("Missing control char name in \\c");
3545 /* printf-style backslashes, formfeeds, newlines, etc */
3571 } /* end if (backslash) */
3578 /* If we started with encoded form, or already know we want it,
3579 then encode the next character */
3580 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3584 /* One might think that it is wasted effort in the case of the
3585 * source being utf8 (this_utf8 == TRUE) to take the next character
3586 * in the source, convert it to an unsigned value, and then convert
3587 * it back again. But the source has not been validated here. The
3588 * routine that does the conversion checks for errors like
3591 const UV nextuv = (this_utf8)
3592 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3594 const STRLEN need = UNISKIP(nextuv);
3596 SvCUR_set(sv, d - SvPVX_const(sv));
3599 /* See Note on sizing above. */
3600 sv_utf8_upgrade_flags_grow(sv,
3601 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3602 need + (STRLEN)(send - s) + 1);
3603 d = SvPVX(sv) + SvCUR(sv);
3605 } else if (need > len) {
3606 /* encoded value larger than old, may need extra space (NOTE:
3607 * SvCUR() is not set correctly here). See Note on sizing
3609 const STRLEN off = d - SvPVX_const(sv);
3610 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3614 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3616 if (uv > 255 && !dorange)
3617 native_range = FALSE;
3623 } /* while loop to process each character */
3625 /* terminate the string and set up the sv */
3627 SvCUR_set(sv, d - SvPVX_const(sv));
3628 if (SvCUR(sv) >= SvLEN(sv))
3629 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3630 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3633 if (IN_ENCODING && !has_utf8) {
3634 sv_recode_to_utf8(sv, _get_encoding());
3640 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3641 PL_sublex_info.sub_op->op_private |=
3642 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3646 /* shrink the sv if we allocated more than we used */
3647 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3648 SvPV_shrink_to_cur(sv);
3651 /* return the substring (via pl_yylval) only if we parsed anything */
3654 for (; s2 < s; s2++) {
3656 COPLINE_INC_WITH_HERELINES;
3658 SvREFCNT_inc_simple_void_NN(sv);
3659 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3660 && ! PL_parser->lex_re_reparsing)
3662 const char *const key = PL_lex_inpat ? "qr" : "q";
3663 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3667 if (PL_lex_inwhat == OP_TRANS) {
3670 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3673 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3681 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3684 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3686 LEAVE_with_name("scan_const");
3691 * Returns TRUE if there's more to the expression (e.g., a subscript),
3694 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3696 * ->[ and ->{ return TRUE
3697 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3698 * { and [ outside a pattern are always subscripts, so return TRUE
3699 * if we're outside a pattern and it's not { or [, then return FALSE
3700 * if we're in a pattern and the first char is a {
3701 * {4,5} (any digits around the comma) returns FALSE
3702 * if we're in a pattern and the first char is a [
3704 * [SOMETHING] has a funky algorithm to decide whether it's a
3705 * character class or not. It has to deal with things like
3706 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3707 * anything else returns TRUE
3710 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3713 S_intuit_more(pTHX_ char *s)
3715 PERL_ARGS_ASSERT_INTUIT_MORE;
3717 if (PL_lex_brackets)
3719 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3721 if (*s == '-' && s[1] == '>'
3722 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3723 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3724 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3726 if (*s != '{' && *s != '[')
3731 /* In a pattern, so maybe we have {n,m}. */
3739 /* On the other hand, maybe we have a character class */
3742 if (*s == ']' || *s == '^')
3745 /* this is terrifying, and it works */
3748 const char * const send = strchr(s,']');
3749 unsigned char un_char, last_un_char;
3750 char tmpbuf[sizeof PL_tokenbuf * 4];
3752 if (!send) /* has to be an expression */
3754 weight = 2; /* let's weigh the evidence */
3758 else if (isDIGIT(*s)) {
3760 if (isDIGIT(s[1]) && s[2] == ']')
3766 Zero(seen,256,char);
3768 for (; s < send; s++) {
3769 last_un_char = un_char;
3770 un_char = (unsigned char)*s;
3775 weight -= seen[un_char] * 10;
3776 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3778 char *tmp = PL_bufend;
3779 PL_bufend = (char*)send;
3780 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3782 len = (int)strlen(tmpbuf);
3783 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3784 UTF ? SVf_UTF8 : 0, SVt_PV))
3789 else if (*s == '$' && s[1] &&
3790 strchr("[#!%*<>()-=",s[1])) {
3791 if (/*{*/ strchr("])} =",s[2]))
3800 if (strchr("wds]",s[1]))
3802 else if (seen[(U8)'\''] || seen[(U8)'"'])
3804 else if (strchr("rnftbxcav",s[1]))
3806 else if (isDIGIT(s[1])) {
3808 while (s[1] && isDIGIT(s[1]))
3818 if (strchr("aA01! ",last_un_char))
3820 if (strchr("zZ79~",s[1]))
3822 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3823 weight -= 5; /* cope with negative subscript */
3826 if (!isWORDCHAR(last_un_char)
3827 && !(last_un_char == '$' || last_un_char == '@'
3828 || last_un_char == '&')
3829 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3833 if (keyword(d, s - d, 0))
3836 if (un_char == last_un_char + 1)
3838 weight -= seen[un_char];
3843 if (weight >= 0) /* probably a character class */
3853 * Does all the checking to disambiguate
3855 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3856 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3858 * First argument is the stuff after the first token, e.g. "bar".
3860 * Not a method if foo is a filehandle.
3861 * Not a method if foo is a subroutine prototyped to take a filehandle.
3862 * Not a method if it's really "Foo $bar"
3863 * Method if it's "foo $bar"
3864 * Not a method if it's really "print foo $bar"
3865 * Method if it's really "foo package::" (interpreted as package->foo)
3866 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3867 * Not a method if bar is a filehandle or package, but is quoted with
3872 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
3874 char *s = start + (*start == '$');
3875 char tmpbuf[sizeof PL_tokenbuf];
3878 /* Mustn't actually add anything to a symbol table.
3879 But also don't want to "initialise" any placeholder
3880 constants that might already be there into full
3881 blown PVGVs with attached PVCV. */
3883 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
3885 PERL_ARGS_ASSERT_INTUIT_METHOD;
3887 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3889 if (cv && SvPOK(cv)) {
3890 const char *proto = CvPROTO(cv);
3892 while (*proto && (isSPACE(*proto) || *proto == ';'))
3899 if (*start == '$') {
3900 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3901 isUPPER(*PL_tokenbuf))
3906 return *s == '(' ? FUNCMETH : METHOD;
3909 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3910 /* start is the beginning of the possible filehandle/object,
3911 * and s is the end of it
3912 * tmpbuf is a copy of it (but with single quotes as double colons)
3915 if (!keyword(tmpbuf, len, 0)) {
3916 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3921 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3922 if (indirgv && GvCVu(indirgv))
3924 /* filehandle or package name makes it a method */
3925 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3927 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3928 return 0; /* no assumptions -- "=>" quotes bareword */
3930 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3931 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3932 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3936 return *s == '(' ? FUNCMETH : METHOD;
3942 /* Encoded script support. filter_add() effectively inserts a
3943 * 'pre-processing' function into the current source input stream.
3944 * Note that the filter function only applies to the current source file
3945 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3947 * The datasv parameter (which may be NULL) can be used to pass
3948 * private data to this instance of the filter. The filter function
3949 * can recover the SV using the FILTER_DATA macro and use it to
3950 * store private buffers and state information.
3952 * The supplied datasv parameter is upgraded to a PVIO type
3953 * and the IoDIRP/IoANY field is used to store the function pointer,
3954 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3955 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3956 * private use must be set using malloc'd pointers.
3960 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3968 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3969 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3971 if (!PL_rsfp_filters)
3972 PL_rsfp_filters = newAV();
3975 SvUPGRADE(datasv, SVt_PVIO);
3976 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3977 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3978 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3979 FPTR2DPTR(void *, IoANY(datasv)),
3980 SvPV_nolen(datasv)));
3981 av_unshift(PL_rsfp_filters, 1);
3982 av_store(PL_rsfp_filters, 0, datasv) ;
3984 !PL_parser->filtered
3985 && PL_parser->lex_flags & LEX_EVALBYTES
3986 && PL_bufptr < PL_bufend
3988 const char *s = PL_bufptr;
3989 while (s < PL_bufend) {
3991 SV *linestr = PL_parser->linestr;
3992 char *buf = SvPVX(linestr);
3993 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3994 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3995 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3996 STRLEN const linestart_pos = PL_parser->linestart - buf;
3997 STRLEN const last_uni_pos =
3998 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3999 STRLEN const last_lop_pos =
4000 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4001 av_push(PL_rsfp_filters, linestr);
4002 PL_parser->linestr =
4003 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4004 buf = SvPVX(PL_parser->linestr);
4005 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4006 PL_parser->bufptr = buf + bufptr_pos;
4007 PL_parser->oldbufptr = buf + oldbufptr_pos;
4008 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4009 PL_parser->linestart = buf + linestart_pos;
4010 if (PL_parser->last_uni)
4011 PL_parser->last_uni = buf + last_uni_pos;
4012 if (PL_parser->last_lop)
4013 PL_parser->last_lop = buf + last_lop_pos;
4014 SvLEN(linestr) = SvCUR(linestr);
4015 SvCUR(linestr) = s-SvPVX(linestr);
4016 PL_parser->filtered = 1;
4026 /* Delete most recently added instance of this filter function. */
4028 Perl_filter_del(pTHX_ filter_t funcp)
4032 PERL_ARGS_ASSERT_FILTER_DEL;
4035 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4036 FPTR2DPTR(void*, funcp)));
4038 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4040 /* if filter is on top of stack (usual case) just pop it off */
4041 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4042 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4043 sv_free(av_pop(PL_rsfp_filters));
4047 /* we need to search for the correct entry and clear it */
4048 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4052 /* Invoke the idxth filter function for the current rsfp. */
4053 /* maxlen 0 = read one text line */
4055 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4059 /* This API is bad. It should have been using unsigned int for maxlen.
4060 Not sure if we want to change the API, but if not we should sanity
4061 check the value here. */
4062 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4064 PERL_ARGS_ASSERT_FILTER_READ;
4066 if (!PL_parser || !PL_rsfp_filters)
4068 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4069 /* Provide a default input filter to make life easy. */
4070 /* Note that we append to the line. This is handy. */
4071 DEBUG_P(PerlIO_printf(Perl_debug_log,
4072 "filter_read %d: from rsfp\n", idx));
4073 if (correct_length) {
4076 const int old_len = SvCUR(buf_sv);
4078 /* ensure buf_sv is large enough */
4079 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4080 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4081 correct_length)) <= 0) {
4082 if (PerlIO_error(PL_rsfp))
4083 return -1; /* error */
4085 return 0 ; /* end of file */
4087 SvCUR_set(buf_sv, old_len + len) ;
4088 SvPVX(buf_sv)[old_len + len] = '\0';
4091 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4092 if (PerlIO_error(PL_rsfp))
4093 return -1; /* error */
4095 return 0 ; /* end of file */
4098 return SvCUR(buf_sv);
4100 /* Skip this filter slot if filter has been deleted */
4101 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4102 DEBUG_P(PerlIO_printf(Perl_debug_log,
4103 "filter_read %d: skipped (filter deleted)\n",
4105 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4107 if (SvTYPE(datasv) != SVt_PVIO) {
4108 if (correct_length) {
4110 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4111 if (!remainder) return 0; /* eof */
4112 if (correct_length > remainder) correct_length = remainder;
4113 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4114 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4117 const char *s = SvEND(datasv);
4118 const char *send = SvPVX(datasv) + SvLEN(datasv);
4126 if (s == send) return 0; /* eof */
4127 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4128 SvCUR_set(datasv, s-SvPVX(datasv));
4130 return SvCUR(buf_sv);
4132 /* Get function pointer hidden within datasv */
4133 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4134 DEBUG_P(PerlIO_printf(Perl_debug_log,
4135 "filter_read %d: via function %p (%s)\n",
4136 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4137 /* Call function. The function is expected to */
4138 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4139 /* Return: <0:error, =0:eof, >0:not eof */
4140 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4144 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4146 PERL_ARGS_ASSERT_FILTER_GETS;
4148 #ifdef PERL_CR_FILTER
4149 if (!PL_rsfp_filters) {
4150 filter_add(S_cr_textfilter,NULL);
4153 if (PL_rsfp_filters) {
4155 SvCUR_set(sv, 0); /* start with empty line */
4156 if (FILTER_READ(0, sv, 0) > 0)
4157 return ( SvPVX(sv) ) ;
4162 return (sv_gets(sv, PL_rsfp, append));
4166 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4170 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4172 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4176 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4177 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4179 return GvHV(gv); /* Foo:: */
4182 /* use constant CLASS => 'MyClass' */
4183 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4184 if (gv && GvCV(gv)) {
4185 SV * const sv = cv_const_sv(GvCV(gv));
4187 return gv_stashsv(sv, 0);
4190 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4195 S_tokenize_use(pTHX_ int is_use, char *s) {
4196 PERL_ARGS_ASSERT_TOKENIZE_USE;
4198 if (PL_expect != XSTATE)
4199 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4200 is_use ? "use" : "no"));
4203 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4204 s = force_version(s, TRUE);
4205 if (*s == ';' || *s == '}'
4206 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4207 NEXTVAL_NEXTTOKE.opval = NULL;
4210 else if (*s == 'v') {
4211 s = force_word(s,WORD,FALSE,TRUE);
4212 s = force_version(s, FALSE);
4216 s = force_word(s,WORD,FALSE,TRUE);
4217 s = force_version(s, FALSE);
4219 pl_yylval.ival = is_use;
4223 static const char* const exp_name[] =
4224 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4225 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4230 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4232 S_word_takes_any_delimeter(char *p, STRLEN len)
4234 return (len == 1 && strchr("msyq", p[0])) ||
4236 (p[0] == 't' && p[1] == 'r') ||
4237 (p[0] == 'q' && strchr("qwxr", p[1]))));
4241 S_check_scalar_slice(pTHX_ char *s)
4244 while (*s == ' ' || *s == '\t') s++;
4245 if (*s == 'q' && s[1] == 'w'
4246 && !isWORDCHAR_lazy_if(s+2,UTF))
4248 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4249 s += UTF ? UTF8SKIP(s) : 1;
4250 if (*s == '}' || *s == ']')
4251 pl_yylval.ival = OPpSLICEWARNING;
4257 Works out what to call the token just pulled out of the input
4258 stream. The yacc parser takes care of taking the ops we return and
4259 stitching them into a tree.
4262 The type of the next token
4265 Switch based on the current state:
4266 - if we already built the token before, use it
4267 - if we have a case modifier in a string, deal with that
4268 - handle other cases of interpolation inside a string
4269 - scan the next line if we are inside a format
4270 In the normal state switch on the next character:
4272 if alphabetic, go to key lookup
4273 unrecoginized character - croak
4274 - 0/4/26: handle end-of-line or EOF
4275 - cases for whitespace
4276 - \n and #: handle comments and line numbers
4277 - various operators, brackets and sigils
4280 - 'v': vstrings (or go to key lookup)
4281 - 'x' repetition operator (or go to key lookup)
4282 - other ASCII alphanumerics (key lookup begins here):
4285 scan built-in keyword (but do nothing with it yet)
4286 check for statement label
4287 check for lexical subs
4288 goto just_a_word if there is one
4289 see whether built-in keyword is overridden
4290 switch on keyword number:
4291 - default: just_a_word:
4292 not a built-in keyword; handle bareword lookup
4293 disambiguate between method and sub call
4294 fall back to bareword
4295 - cases for built-in keywords
4303 char *s = PL_bufptr;
4307 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4311 /* orig_keyword, gvp, and gv are initialized here because
4312 * jump to the label just_a_word_zero can bypass their
4313 * initialization later. */
4314 I32 orig_keyword = 0;
4319 SV* tmp = newSVpvs("");
4320 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4321 (IV)CopLINE(PL_curcop),
4322 lex_state_names[PL_lex_state],
4323 exp_name[PL_expect],
4324 pv_display(tmp, s, strlen(s), 0, 60));
4328 switch (PL_lex_state) {
4330 case LEX_INTERPNORMAL:
4333 /* when we've already built the next token, just pull it out of the queue */
4336 pl_yylval = PL_nextval[PL_nexttoke];
4338 PL_lex_state = PL_lex_defer;
4339 PL_lex_defer = LEX_NORMAL;
4343 next_type = PL_nexttype[PL_nexttoke];
4344 if (next_type & (7<<24)) {
4345 if (next_type & (1<<24)) {
4346 if (PL_lex_brackets > 100)
4347 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4348 PL_lex_brackstack[PL_lex_brackets++] =
4349 (char) ((next_type >> 16) & 0xff);
4351 if (next_type & (2<<24))
4352 PL_lex_allbrackets++;
4353 if (next_type & (4<<24))
4354 PL_lex_allbrackets--;
4355 next_type &= 0xffff;
4357 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4360 /* interpolated case modifiers like \L \U, including \Q and \E.
4361 when we get here, PL_bufptr is at the \
4363 case LEX_INTERPCASEMOD:
4365 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4367 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4368 PL_bufptr, PL_bufend, *PL_bufptr);
4370 /* handle \E or end of string */
4371 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4373 if (PL_lex_casemods) {
4374 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4375 PL_lex_casestack[PL_lex_casemods] = '\0';
4377 if (PL_bufptr != PL_bufend
4378 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4379 || oldmod == 'F')) {
4381 PL_lex_state = LEX_INTERPCONCAT;
4383 PL_lex_allbrackets--;
4386 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4387 /* Got an unpaired \E */
4388 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4389 "Useless use of \\E");
4391 if (PL_bufptr != PL_bufend)
4393 PL_lex_state = LEX_INTERPCONCAT;
4397 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4398 "### Saw case modifier\n"); });
4400 if (s[1] == '\\' && s[2] == 'E') {
4402 PL_lex_state = LEX_INTERPCONCAT;
4407 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4408 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4409 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4410 (strchr(PL_lex_casestack, 'L')
4411 || strchr(PL_lex_casestack, 'U')
4412 || strchr(PL_lex_casestack, 'F'))) {
4413 PL_lex_casestack[--PL_lex_casemods] = '\0';
4414 PL_lex_allbrackets--;
4417 if (PL_lex_casemods > 10)
4418 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4419 PL_lex_casestack[PL_lex_casemods++] = *s;
4420 PL_lex_casestack[PL_lex_casemods] = '\0';
4421 PL_lex_state = LEX_INTERPCONCAT;
4422 NEXTVAL_NEXTTOKE.ival = 0;
4423 force_next((2<<24)|'(');
4425 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4427 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4429 NEXTVAL_NEXTTOKE.ival = OP_LC;
4431 NEXTVAL_NEXTTOKE.ival = OP_UC;
4433 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4435 NEXTVAL_NEXTTOKE.ival = OP_FC;
4437 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4441 if (PL_lex_starts) {
4444 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4445 if (PL_lex_casemods == 1 && PL_lex_inpat)
4448 AopNOASSIGN(OP_CONCAT);
4454 case LEX_INTERPPUSH:
4455 return REPORT(sublex_push());
4457 case LEX_INTERPSTART:
4458 if (PL_bufptr == PL_bufend)
4459 return REPORT(sublex_done());
4460 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4461 "### Interpolated variable\n"); });
4463 /* for /@a/, we leave the joining for the regex engine to do
4464 * (unless we're within \Q etc) */
4465 PL_lex_dojoin = (*PL_bufptr == '@'
4466 && (!PL_lex_inpat || PL_lex_casemods));
4467 PL_lex_state = LEX_INTERPNORMAL;
4468 if (PL_lex_dojoin) {
4469 NEXTVAL_NEXTTOKE.ival = 0;
4471 force_ident("\"", '$');
4472 NEXTVAL_NEXTTOKE.ival = 0;
4474 NEXTVAL_NEXTTOKE.ival = 0;
4475 force_next((2<<24)|'(');
4476 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4479 /* Convert (?{...}) and friends to 'do {...}' */
4480 if (PL_lex_inpat && *PL_bufptr == '(') {
4481 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4483 if (*PL_bufptr != '{')
4485 PL_expect = XTERMBLOCK;
4489 if (PL_lex_starts++) {
4491 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4492 if (!PL_lex_casemods && PL_lex_inpat)
4495 AopNOASSIGN(OP_CONCAT);
4499 case LEX_INTERPENDMAYBE:
4500 if (intuit_more(PL_bufptr)) {
4501 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4507 if (PL_lex_dojoin) {
4508 const U8 dojoin_was = PL_lex_dojoin;
4509 PL_lex_dojoin = FALSE;
4510 PL_lex_state = LEX_INTERPCONCAT;
4511 PL_lex_allbrackets--;
4512 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4514 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4515 && SvEVALED(PL_lex_repl))
4517 if (PL_bufptr != PL_bufend)
4518 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4521 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4522 re_eval_str. If the here-doc body’s length equals the previous
4523 value of re_eval_start, re_eval_start will now be null. So
4524 check re_eval_str as well. */
4525 if (PL_parser->lex_shared->re_eval_start
4526 || PL_parser->lex_shared->re_eval_str) {
4528 if (*PL_bufptr != ')')
4529 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4531 /* having compiled a (?{..}) expression, return the original
4532 * text too, as a const */
4533 if (PL_parser->lex_shared->re_eval_str) {
4534 sv = PL_parser->lex_shared->re_eval_str;
4535 PL_parser->lex_shared->re_eval_str = NULL;
4537 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4538 SvPV_shrink_to_cur(sv);
4540 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4541 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4542 NEXTVAL_NEXTTOKE.opval =
4543 (OP*)newSVOP(OP_CONST, 0,
4546 PL_parser->lex_shared->re_eval_start = NULL;
4552 case LEX_INTERPCONCAT:
4554 if (PL_lex_brackets)
4555 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4556 (long) PL_lex_brackets);
4558 if (PL_bufptr == PL_bufend)
4559 return REPORT(sublex_done());
4561 /* m'foo' still needs to be parsed for possible (?{...}) */
4562 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4563 SV *sv = newSVsv(PL_linestr);
4565 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4569 s = scan_const(PL_bufptr);
4571 PL_lex_state = LEX_INTERPCASEMOD;
4573 PL_lex_state = LEX_INTERPSTART;
4576 if (s != PL_bufptr) {
4577 NEXTVAL_NEXTTOKE = pl_yylval;
4580 if (PL_lex_starts++) {
4581 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4582 if (!PL_lex_casemods && PL_lex_inpat)
4585 AopNOASSIGN(OP_CONCAT);
4595 s = scan_formline(PL_bufptr);
4596 if (!PL_lex_formbrack)
4605 /* We really do *not* want PL_linestr ever becoming a COW. */
4606 assert (!SvIsCOW(PL_linestr));
4608 PL_oldoldbufptr = PL_oldbufptr;
4610 PL_parser->saw_infix_sigil = 0;
4615 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
4618 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4619 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4621 SVs_TEMP | SVf_UTF8),
4622 10, UNI_DISPLAY_ISPRINT)
4623 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4624 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4625 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4626 d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4630 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4631 UTF8fARG(UTF, (s - d), d),
4636 goto fake_eof; /* emulate EOF on ^D or ^Z */
4638 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4641 if (PL_lex_brackets &&
4642 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4643 yyerror((const char *)
4645 ? "Format not terminated"
4646 : "Missing right curly or square bracket"));
4648 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4649 "### Tokener got EOF\n");
4653 if (s++ < PL_bufend)
4654 goto retry; /* ignore stray nulls */
4657 if (!PL_in_eval && !PL_preambled) {
4658 PL_preambled = TRUE;
4660 /* Generate a string of Perl code to load the debugger.
4661 * If PERL5DB is set, it will return the contents of that,
4662 * otherwise a compile-time require of perl5db.pl. */
4664 const char * const pdb = PerlEnv_getenv("PERL5DB");
4667 sv_setpv(PL_linestr, pdb);
4668 sv_catpvs(PL_linestr,";");
4670 SETERRNO(0,SS_NORMAL);
4671 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4673 PL_parser->preambling = CopLINE(PL_curcop);
4675 sv_setpvs(PL_linestr,"");
4676 if (PL_preambleav) {
4677 SV **svp = AvARRAY(PL_preambleav);
4678 SV **const end = svp + AvFILLp(PL_preambleav);
4680 sv_catsv(PL_linestr, *svp);
4682 sv_catpvs(PL_linestr, ";");
4684 sv_free(MUTABLE_SV(PL_preambleav));
4685 PL_preambleav = NULL;
4688 sv_catpvs(PL_linestr,
4689 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4690 if (PL_minus_n || PL_minus_p) {
4691 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4693 sv_catpvs(PL_linestr,"chomp;");
4696 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4697 || *PL_splitstr == '"')
4698 && strchr(PL_splitstr + 1, *PL_splitstr))
4699 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4701 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4702 bytes can be used as quoting characters. :-) */
4703 const char *splits = PL_splitstr;
4704 sv_catpvs(PL_linestr, "our @F=split(q\0");
4707 if (*splits == '\\')
4708 sv_catpvn(PL_linestr, splits, 1);
4709 sv_catpvn(PL_linestr, splits, 1);
4710 } while (*splits++);
4711 /* This loop will embed the trailing NUL of
4712 PL_linestr as the last thing it does before
4714 sv_catpvs(PL_linestr, ");");
4718 sv_catpvs(PL_linestr,"our @F=split(' ');");
4721 sv_catpvs(PL_linestr, "\n");
4722 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4723 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4724 PL_last_lop = PL_last_uni = NULL;
4725 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4726 update_debugger_info(PL_linestr, NULL, 0);
4731 bof = PL_rsfp ? TRUE : FALSE;
4734 fake_eof = LEX_FAKE_EOF;
4736 PL_bufptr = PL_bufend;
4737 COPLINE_INC_WITH_HERELINES;
4738 if (!lex_next_chunk(fake_eof)) {
4739 CopLINE_dec(PL_curcop);
4741 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4743 CopLINE_dec(PL_curcop);
4745 /* If it looks like the start of a BOM or raw UTF-16,
4746 * check if it in fact is. */
4747 if (bof && PL_rsfp &&
4749 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
4752 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4753 bof = (offset == (Off_t)SvCUR(PL_linestr));
4754 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4755 /* offset may include swallowed CR */
4757 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4760 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4761 s = swallow_bom((U8*)s);
4764 if (PL_parser->in_pod) {
4765 /* Incest with pod. */
4766 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4767 sv_setpvs(PL_linestr, "");
4768 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4769 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4770 PL_last_lop = PL_last_uni = NULL;
4771 PL_parser->in_pod = 0;
4774 if (PL_rsfp || PL_parser->filtered)
4776 } while (PL_parser->in_pod);
4777 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4778 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4779 PL_last_lop = PL_last_uni = NULL;
4780 if (CopLINE(PL_curcop) == 1) {
4781 while (s < PL_bufend && isSPACE(*s))
4783 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4787 if (*s == '#' && *(s+1) == '!')
4789 #ifdef ALTERNATE_SHEBANG
4791 static char const as[] = ALTERNATE_SHEBANG;
4792 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4793 d = s + (sizeof(as) - 1);
4795 #endif /* ALTERNATE_SHEBANG */
4804 while (*d && !isSPACE(*d))
4808 #ifdef ARG_ZERO_IS_SCRIPT
4809 if (ipathend > ipath) {
4811 * HP-UX (at least) sets argv[0] to the script name,
4812 * which makes $^X incorrect. And Digital UNIX and Linux,
4813 * at least, set argv[0] to the basename of the Perl
4814 * interpreter. So, having found "#!", we'll set it right.
4816 SV* copfilesv = CopFILESV(PL_curcop);
4819 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4821 assert(SvPOK(x) || SvGMAGICAL(x));
4822 if (sv_eq(x, copfilesv)) {
4823 sv_setpvn(x, ipath, ipathend - ipath);
4829 const char *bstart = SvPV_const(copfilesv, blen);
4830 const char * const lstart = SvPV_const(x, llen);
4832 bstart += blen - llen;
4833 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4834 sv_setpvn(x, ipath, ipathend - ipath);
4841 /* Anything to do if no copfilesv? */
4843 TAINT_NOT; /* $^X is always tainted, but that's OK */
4845 #endif /* ARG_ZERO_IS_SCRIPT */
4850 d = instr(s,"perl -");
4852 d = instr(s,"perl");
4854 /* avoid getting into infinite loops when shebang
4855 * line contains "Perl" rather than "perl" */
4857 for (d = ipathend-4; d >= ipath; --d) {
4858 if (isALPHA_FOLD_EQ(*d, 'p')
4859 && !ibcmp(d, "perl", 4))
4869 #ifdef ALTERNATE_SHEBANG
4871 * If the ALTERNATE_SHEBANG on this system starts with a
4872 * character that can be part of a Perl expression, then if
4873 * we see it but not "perl", we're probably looking at the
4874 * start of Perl code, not a request to hand off to some
4875 * other interpreter. Similarly, if "perl" is there, but
4876 * not in the first 'word' of the line, we assume the line
4877 * contains the start of the Perl program.
4879 if (d && *s != '#') {
4880 const char *c = ipath;
4881 while (*c && !strchr("; \t\r\n\f\v#", *c))
4884 d = NULL; /* "perl" not in first word; ignore */
4886 *s = '#'; /* Don't try to parse shebang line */
4888 #endif /* ALTERNATE_SHEBANG */
4893 !instr(s,"indir") &&
4894 instr(PL_origargv[0],"perl"))
4901 while (s < PL_bufend && isSPACE(*s))
4903 if (s < PL_bufend) {
4904 Newx(newargv,PL_origargc+3,char*);
4906 while (s < PL_bufend && !isSPACE(*s))
4909 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4912 newargv = PL_origargv;
4915 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4917 Perl_croak(aTHX_ "Can't exec %s", ipath);
4920 while (*d && !isSPACE(*d))
4922 while (SPACE_OR_TAB(*d))
4926 const bool switches_done = PL_doswitches;
4927 const U32 oldpdb = PL_perldb;
4928 const bool oldn = PL_minus_n;
4929 const bool oldp = PL_minus_p;
4933 bool baduni = FALSE;
4935 const char *d2 = d1 + 1;
4936 if (parse_unicode_opts((const char **)&d2)
4940 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
4941 const char * const m = d1;
4942 while (*d1 && !isSPACE(*d1))
4944 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4947 d1 = moreswitches(d1);
4949 if (PL_doswitches && !switches_done) {
4950 int argc = PL_origargc;
4951 char **argv = PL_origargv;
4954 } while (argc && argv[0][0] == '-' && argv[0][1]);
4955 init_argv_symbols(argc,argv);
4957 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4958 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4959 /* if we have already added "LINE: while (<>) {",
4960 we must not do it again */
4962 sv_setpvs(PL_linestr, "");
4963 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4964 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4965 PL_last_lop = PL_last_uni = NULL;
4966 PL_preambled = FALSE;
4967 if (PERLDB_LINE || PERLDB_SAVESRC)
4968 (void)gv_fetchfile(PL_origfilename);
4975 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4976 PL_lex_state = LEX_FORMLINE;
4977 NEXTVAL_NEXTTOKE.ival = 0;
4978 force_next(FORMRBRACK);
4983 #ifdef PERL_STRICT_CR
4984 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4986 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4988 case ' ': case '\t': case '\f': case '\v':
4993 if (PL_lex_state != LEX_NORMAL ||
4994 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
4995 const bool in_comment = *s == '#';
4996 if (*s == '#' && s == PL_linestart && PL_in_eval
4997 && !PL_rsfp && !PL_parser->filtered) {
4998 /* handle eval qq[#line 1 "foo"\n ...] */
4999 CopLINE_dec(PL_curcop);
5003 while (d < PL_bufend && *d != '\n')
5007 else if (d > PL_bufend)
5008 /* Found by Ilya: feed random input to Perl. */
5009 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5012 if (in_comment && d == PL_bufend
5013 && PL_lex_state == LEX_INTERPNORMAL
5014 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5015 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5018 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5019 PL_lex_state = LEX_FORMLINE;
5020 NEXTVAL_NEXTTOKE.ival = 0;
5021 force_next(FORMRBRACK);
5026 while (s < PL_bufend && *s != '\n')
5034 else if (s > PL_bufend)
5035 /* Found by Ilya: feed random input to Perl. */
5036 Perl_croak(aTHX_ "panic: input overflow");
5040 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5048 while (s < PL_bufend && SPACE_OR_TAB(*s))
5051 if (strnEQ(s,"=>",2)) {
5052 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5053 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5054 OPERATOR('-'); /* unary minus */
5057 case 'r': ftst = OP_FTEREAD; break;
5058 case 'w': ftst = OP_FTEWRITE; break;
5059 case 'x': ftst = OP_FTEEXEC; break;
5060 case 'o': ftst = OP_FTEOWNED; break;
5061 case 'R': ftst = OP_FTRREAD; break;
5062 case 'W': ftst = OP_FTRWRITE; break;
5063 case 'X': ftst = OP_FTREXEC; break;
5064 case 'O': ftst = OP_FTROWNED; break;
5065 case 'e': ftst = OP_FTIS; break;
5066 case 'z': ftst = OP_FTZERO; break;
5067 case 's': ftst = OP_FTSIZE; break;
5068 case 'f': ftst = OP_FTFILE; break;
5069 case 'd': ftst = OP_FTDIR; break;
5070 case 'l': ftst = OP_FTLINK; break;
5071 case 'p': ftst = OP_FTPIPE; break;
5072 case 'S': ftst = OP_FTSOCK; break;
5073 case 'u': ftst = OP_FTSUID; break;
5074 case 'g': ftst = OP_FTSGID; break;
5075 case 'k': ftst = OP_FTSVTX; break;
5076 case 'b': ftst = OP_FTBLK; break;
5077 case 'c': ftst = OP_FTCHR; break;
5078 case 't': ftst = OP_FTTTY; break;
5079 case 'T': ftst = OP_FTTEXT; break;
5080 case 'B': ftst = OP_FTBINARY; break;
5081 case 'M': case 'A': case 'C':
5082 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5084 case 'M': ftst = OP_FTMTIME; break;
5085 case 'A': ftst = OP_FTATIME; break;
5086 case 'C': ftst = OP_FTCTIME; break;
5094 PL_last_uni = PL_oldbufptr;
5095 PL_last_lop_op = (OPCODE)ftst;
5096 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5097 "### Saw file test %c\n", (int)tmp);
5102 /* Assume it was a minus followed by a one-letter named
5103 * subroutine call (or a -bareword), then. */
5104 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5105 "### '-%c' looked like a file test but was not\n",
5112 const char tmp = *s++;
5115 if (PL_expect == XOPERATOR)
5120 else if (*s == '>') {
5123 if (FEATURE_POSTDEREF_IS_ENABLED && (
5124 ((*s == '$' || *s == '&') && s[1] == '*')
5125 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5126 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5127 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5130 Perl_ck_warner_d(aTHX_
5131 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5132 "Postfix dereference is experimental"
5134 PL_expect = XPOSTDEREF;
5137 if (isIDFIRST_lazy_if(s,UTF)) {
5138 s = force_word(s,METHOD,FALSE,TRUE);
5146 if (PL_expect == XOPERATOR) {
5147 if (*s == '=' && !PL_lex_allbrackets &&
5148 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5155 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5157 OPERATOR('-'); /* unary minus */
5163 const char tmp = *s++;
5166 if (PL_expect == XOPERATOR)
5171 if (PL_expect == XOPERATOR) {
5172 if (*s == '=' && !PL_lex_allbrackets &&
5173 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5180 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5187 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5188 if (PL_expect != XOPERATOR) {
5189 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5190 PL_expect = XOPERATOR;
5191 force_ident(PL_tokenbuf, '*');
5199 if (*s == '=' && !PL_lex_allbrackets &&
5200 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5206 if (*s == '=' && !PL_lex_allbrackets &&
5207 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5211 PL_parser->saw_infix_sigil = 1;
5216 if (PL_expect == XOPERATOR) {
5217 if (s[1] == '=' && !PL_lex_allbrackets &&
5218 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5221 PL_parser->saw_infix_sigil = 1;
5224 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5225 PL_tokenbuf[0] = '%';
5226 s = scan_ident(s, PL_tokenbuf + 1,
5227 sizeof PL_tokenbuf - 1, FALSE);
5229 if (!PL_tokenbuf[1]) {
5232 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5234 PL_tokenbuf[0] = '@';
5236 PL_expect = XOPERATOR;
5237 force_ident_maybe_lex('%');
5241 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5242 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5247 if (PL_lex_brackets > 100)
5248 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5249 PL_lex_brackstack[PL_lex_brackets++] = 0;
5250 PL_lex_allbrackets++;
5252 const char tmp = *s++;
5257 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5259 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5262 Perl_ck_warner_d(aTHX_
5263 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5264 "Smartmatch is experimental");
5270 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5277 goto just_a_word_zero_gv;
5283 switch (PL_expect) {
5285 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5287 PL_bufptr = s; /* update in case we back off */
5290 "Use of := for an empty attribute list is not allowed");
5297 PL_expect = XTERMBLOCK;
5301 while (isIDFIRST_lazy_if(s,UTF)) {
5304 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5305 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5306 if (tmp < 0) tmp = -tmp;
5321 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5323 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5324 COPLINE_SET_FROM_MULTI_END;
5326 /* MUST advance bufptr here to avoid bogus
5327 "at end of line" context messages from yyerror().
5329 PL_bufptr = s + len;
5330 yyerror("Unterminated attribute parameter in attribute list");
5334 return REPORT(0); /* EOF indicator */
5338 sv_catsv(sv, PL_lex_stuff);
5339 attrs = op_append_elem(OP_LIST, attrs,
5340 newSVOP(OP_CONST, 0, sv));
5341 SvREFCNT_dec(PL_lex_stuff);
5342 PL_lex_stuff = NULL;
5345 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5347 if (PL_in_my == KEY_our) {
5348 deprecate(":unique");
5351 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5354 /* NOTE: any CV attrs applied here need to be part of
5355 the CVf_BUILTIN_ATTRS define in cv.h! */
5356 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5358 CvLVALUE_on(PL_compcv);
5360 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5362 deprecate(":locked");
5364 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5366 CvMETHOD_on(PL_compcv);
5368 else if (!PL_in_my && len == 5
5369 && strnEQ(SvPVX(sv), "const", len))
5372 Perl_ck_warner_d(aTHX_
5373 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5374 ":const is experimental"
5376 CvANONCONST_on(PL_compcv);
5377 if (!CvANON(PL_compcv))
5378 yyerror(":const is not permitted on named "
5381 /* After we've set the flags, it could be argued that
5382 we don't need to do the attributes.pm-based setting
5383 process, and shouldn't bother appending recognized
5384 flags. To experiment with that, uncomment the
5385 following "else". (Note that's already been
5386 uncommented. That keeps the above-applied built-in
5387 attributes from being intercepted (and possibly
5388 rejected) by a package's attribute routines, but is
5389 justified by the performance win for the common case
5390 of applying only built-in attributes.) */
5392 attrs = op_append_elem(OP_LIST, attrs,
5393 newSVOP(OP_CONST, 0,
5397 if (*s == ':' && s[1] != ':')
5400 break; /* require real whitespace or :'s */
5401 /* XXX losing whitespace on sequential attributes here */
5404 if (*s != ';' && *s != '}' &&
5405 !(PL_expect == XOPERATOR
5406 ? (*s == '=' || *s == ')')
5407 : (*s == '{' || *s == '('))) {
5408 const char q = ((*s == '\'') ? '"' : '\'');
5409 /* If here for an expression, and parsed no attrs, back
5411 if (PL_expect == XOPERATOR && !attrs) {
5415 /* MUST advance bufptr here to avoid bogus "at end of line"
5416 context messages from yyerror().
5419 yyerror( (const char *)
5421 ? Perl_form(aTHX_ "Invalid separator character "
5422 "%c%c%c in attribute list", q, *s, q)
5423 : "Unterminated attribute list" ) );
5431 NEXTVAL_NEXTTOKE.opval = attrs;
5437 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5441 PL_lex_allbrackets--;
5445 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5446 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5450 PL_lex_allbrackets++;
5453 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5460 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5463 PL_lex_allbrackets--;
5469 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5472 if (PL_lex_brackets <= 0)
5473 /* diag_listed_as: Unmatched right %s bracket */
5474 yyerror("Unmatched right square bracket");
5477 PL_lex_allbrackets--;
5478 if (PL_lex_state == LEX_INTERPNORMAL) {
5479 if (PL_lex_brackets == 0) {
5480 if (*s == '-' && s[1] == '>')
5481 PL_lex_state = LEX_INTERPENDMAYBE;
5482 else if (*s != '[' && *s != '{')
5483 PL_lex_state = LEX_INTERPEND;
5490 if (PL_lex_brackets > 100) {
5491 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5493 switch (PL_expect) {
5495 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5496 PL_lex_allbrackets++;
5497 OPERATOR(HASHBRACK);
5499 while (s < PL_bufend && SPACE_OR_TAB(*s))
5502 PL_tokenbuf[0] = '\0';
5503 if (d < PL_bufend && *d == '-') {
5504 PL_tokenbuf[0] = '-';
5506 while (d < PL_bufend && SPACE_OR_TAB(*d))
5509 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5510 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5512 while (d < PL_bufend && SPACE_OR_TAB(*d))
5515 const char minus = (PL_tokenbuf[0] == '-');
5516 s = force_word(s + minus, WORD, FALSE, TRUE);
5524 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5525 PL_lex_allbrackets++;
5530 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5531 PL_lex_allbrackets++;
5535 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5536 PL_lex_allbrackets++;
5541 if (PL_oldoldbufptr == PL_last_lop)
5542 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5544 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5545 PL_lex_allbrackets++;
5548 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5550 /* This hack is to get the ${} in the message. */
5552 yyerror("syntax error");
5555 OPERATOR(HASHBRACK);
5557 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5558 /* ${...} or @{...} etc., but not print {...}
5559 * Skip the disambiguation and treat this as a block.
5561 goto block_expectation;
5563 /* This hack serves to disambiguate a pair of curlies
5564 * as being a block or an anon hash. Normally, expectation
5565 * determines that, but in cases where we're not in a
5566 * position to expect anything in particular (like inside
5567 * eval"") we have to resolve the ambiguity. This code
5568 * covers the case where the first term in the curlies is a
5569 * quoted string. Most other cases need to be explicitly
5570 * disambiguated by prepending a "+" before the opening
5571 * curly in order to force resolution as an anon hash.
5573 * XXX should probably propagate the outer expectation
5574 * into eval"" to rely less on this hack, but that could
5575 * potentially break current behavior of eval"".
5579 if (*s == '\'' || *s == '"' || *s == '`') {
5580 /* common case: get past first string, handling escapes */
5581 for (t++; t < PL_bufend && *t != *s;)
5586 else if (*s == 'q') {
5589 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5590 && !isWORDCHAR(*t))))
5592 /* skip q//-like construct */
5594 char open, close, term;
5597 while (t < PL_bufend && isSPACE(*t))
5599 /* check for q => */
5600 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5601 OPERATOR(HASHBRACK);
5605 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5609 for (t++; t < PL_bufend; t++) {
5610 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5612 else if (*t == open)
5616 for (t++; t < PL_bufend; t++) {
5617 if (*t == '\\' && t+1 < PL_bufend)
5619 else if (*t == close && --brackets <= 0)
5621 else if (*t == open)
5628 /* skip plain q word */
5629 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5632 else if (isWORDCHAR_lazy_if(t,UTF)) {
5634 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5637 while (t < PL_bufend && isSPACE(*t))
5639 /* if comma follows first term, call it an anon hash */
5640 /* XXX it could be a comma expression with loop modifiers */
5641 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5642 || (*t == '=' && t[1] == '>')))
5643 OPERATOR(HASHBRACK);
5644 if (PL_expect == XREF)
5647 /* If there is an opening brace or 'sub:', treat it
5648 as a term to make ${{...}}{k} and &{sub:attr...}
5649 dwim. Otherwise, treat it as a statement, so
5650 map {no strict; ...} works.
5657 if (strnEQ(s, "sub", 3)) {
5668 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5674 pl_yylval.ival = CopLINE(PL_curcop);
5675 PL_copline = NOLINE; /* invalidate current command line number */
5676 TOKEN(formbrack ? '=' : '{');
5678 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5682 if (PL_lex_brackets <= 0)
5683 /* diag_listed_as: Unmatched right %s bracket */
5684 yyerror("Unmatched right curly bracket");
5686 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5687 PL_lex_allbrackets--;
5688 if (PL_lex_state == LEX_INTERPNORMAL) {
5689 if (PL_lex_brackets == 0) {
5690 if (PL_expect & XFAKEBRACK) {
5691 PL_expect &= XENUMMASK;
5692 PL_lex_state = LEX_INTERPEND;
5694 return yylex(); /* ignore fake brackets */
5696 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5697 && SvEVALED(PL_lex_repl))
5698 PL_lex_state = LEX_INTERPEND;
5699 else if (*s == '-' && s[1] == '>')
5700 PL_lex_state = LEX_INTERPENDMAYBE;
5701 else if (*s != '[' && *s != '{')
5702 PL_lex_state = LEX_INTERPEND;
5705 if (PL_expect & XFAKEBRACK) {
5706 PL_expect &= XENUMMASK;
5708 return yylex(); /* ignore fake brackets */
5710 force_next(formbrack ? '.' : '}');
5711 if (formbrack) LEAVE;
5712 if (formbrack == 2) { /* means . where arguments were expected */
5718 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5721 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5722 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5729 if (PL_expect == XOPERATOR) {
5730 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5731 && isIDFIRST_lazy_if(s,UTF))
5733 CopLINE_dec(PL_curcop);
5734 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5735 CopLINE_inc(PL_curcop);
5737 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5738 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5742 PL_parser->saw_infix_sigil = 1;
5746 PL_tokenbuf[0] = '&';
5747 s = scan_ident(s - 1, PL_tokenbuf + 1,
5748 sizeof PL_tokenbuf - 1, TRUE);
5749 if (PL_tokenbuf[1]) {
5750 PL_expect = XOPERATOR;
5751 force_ident_maybe_lex('&');
5755 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5761 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5762 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5769 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5770 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5778 const char tmp = *s++;
5780 if (!PL_lex_allbrackets &&
5781 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5788 if (!PL_lex_allbrackets &&
5789 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5797 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5798 && strchr("+-*/%.^&|<",tmp))
5799 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5800 "Reversed %c= operator",(int)tmp);
5802 if (PL_expect == XSTATE && isALPHA(tmp) &&
5803 (s == PL_linestart+1 || s[-2] == '\n') )
5805 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
5806 || PL_lex_state != LEX_NORMAL) {
5811 if (strnEQ(s,"=cut",4)) {
5825 PL_parser->in_pod = 1;
5829 if (PL_expect == XBLOCK) {
5831 #ifdef PERL_STRICT_CR
5832 while (SPACE_OR_TAB(*t))
5834 while (SPACE_OR_TAB(*t) || *t == '\r')
5837 if (*t == '\n' || *t == '#') {
5840 SAVEI8(PL_parser->form_lex_state);
5841 SAVEI32(PL_lex_formbrack);
5842 PL_parser->form_lex_state = PL_lex_state;
5843 PL_lex_formbrack = PL_lex_brackets + 1;
5847 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5856 const char tmp = *s++;
5858 /* was this !=~ where !~ was meant?
5859 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5861 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5862 const char *t = s+1;
5864 while (t < PL_bufend && isSPACE(*t))
5867 if (*t == '/' || *t == '?' ||
5868 ((*t == 'm' || *t == 's' || *t == 'y')
5869 && !isWORDCHAR(t[1])) ||
5870 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
5871 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5872 "!=~ should be !~");
5874 if (!PL_lex_allbrackets &&
5875 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5887 if (PL_expect != XOPERATOR) {
5888 if (s[1] != '<' && !strchr(s,'>'))
5890 if (s[1] == '<' && s[2] != '>')
5891 s = scan_heredoc(s);
5893 s = scan_inputsymbol(s);
5894 PL_expect = XOPERATOR;
5895 TOKEN(sublex_start());
5901 if (*s == '=' && !PL_lex_allbrackets &&
5902 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5906 SHop(OP_LEFT_SHIFT);
5911 if (!PL_lex_allbrackets &&
5912 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5919 if (!PL_lex_allbrackets &&
5920 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5928 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5936 const char tmp = *s++;
5938 if (*s == '=' && !PL_lex_allbrackets &&
5939 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5943 SHop(OP_RIGHT_SHIFT);
5945 else if (tmp == '=') {
5946 if (!PL_lex_allbrackets &&
5947 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5955 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5964 if (PL_expect == XOPERATOR) {
5965 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5966 return deprecate_commaless_var_list();
5969 else if (PL_expect == XPOSTDEREF) {
5972 POSTDEREF(DOLSHARP);
5977 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5978 PL_tokenbuf[0] = '@';
5979 s = scan_ident(s + 1, PL_tokenbuf + 1,
5980 sizeof PL_tokenbuf - 1, FALSE);
5981 if (PL_expect == XOPERATOR)
5982 no_op("Array length", s);
5983 if (!PL_tokenbuf[1])
5985 PL_expect = XOPERATOR;
5986 force_ident_maybe_lex('#');
5990 PL_tokenbuf[0] = '$';
5991 s = scan_ident(s, PL_tokenbuf + 1,
5992 sizeof PL_tokenbuf - 1, FALSE);
5993 if (PL_expect == XOPERATOR)
5995 if (!PL_tokenbuf[1]) {
5997 yyerror("Final $ should be \\$ or $name");
6003 const char tmp = *s;
6004 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6007 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6008 && intuit_more(s)) {
6010 PL_tokenbuf[0] = '@';
6011 if (ckWARN(WARN_SYNTAX)) {
6014 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6017 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6018 while (t < PL_bufend && *t != ']')
6020 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6021 "Multidimensional syntax %.*s not supported",
6022 (int)((t - PL_bufptr) + 1), PL_bufptr);
6026 else if (*s == '{') {
6028 PL_tokenbuf[0] = '%';
6029 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6030 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6032 char tmpbuf[sizeof PL_tokenbuf];
6035 } while (isSPACE(*t));
6036 if (isIDFIRST_lazy_if(t,UTF)) {
6038 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6043 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6044 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6045 "You need to quote \"%"UTF8f"\"",
6046 UTF8fARG(UTF, len, tmpbuf));
6052 PL_expect = XOPERATOR;
6053 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6054 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6055 if (!islop || PL_last_lop_op == OP_GREPSTART)
6056 PL_expect = XOPERATOR;
6057 else if (strchr("$@\"'`q", *s))
6058 PL_expect = XTERM; /* e.g. print $fh "foo" */
6059 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6060 PL_expect = XTERM; /* e.g. print $fh &sub */
6061 else if (isIDFIRST_lazy_if(s,UTF)) {
6062 char tmpbuf[sizeof PL_tokenbuf];
6064 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6065 if ((t2 = keyword(tmpbuf, len, 0))) {
6066 /* binary operators exclude handle interpretations */
6078 PL_expect = XTERM; /* e.g. print $fh length() */
6083 PL_expect = XTERM; /* e.g. print $fh subr() */
6086 else if (isDIGIT(*s))
6087 PL_expect = XTERM; /* e.g. print $fh 3 */
6088 else if (*s == '.' && isDIGIT(s[1]))
6089 PL_expect = XTERM; /* e.g. print $fh .3 */
6090 else if ((*s == '?' || *s == '-' || *s == '+')
6091 && !isSPACE(s[1]) && s[1] != '=')
6092 PL_expect = XTERM; /* e.g. print $fh -1 */
6093 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6095 PL_expect = XTERM; /* e.g. print $fh /.../
6096 XXX except DORDOR operator
6098 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6100 PL_expect = XTERM; /* print $fh <<"EOF" */
6103 force_ident_maybe_lex('$');
6107 if (PL_expect == XOPERATOR)
6109 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6110 PL_tokenbuf[0] = '@';
6111 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6113 if (!PL_tokenbuf[1]) {
6116 if (PL_lex_state == LEX_NORMAL)
6118 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6120 PL_tokenbuf[0] = '%';
6122 /* Warn about @ where they meant $. */
6123 if (*s == '[' || *s == '{') {
6124 if (ckWARN(WARN_SYNTAX)) {
6125 S_check_scalar_slice(aTHX_ s);
6129 PL_expect = XOPERATOR;
6130 force_ident_maybe_lex('@');
6133 case '/': /* may be division, defined-or, or pattern */
6134 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6135 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6136 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6141 else if (PL_expect == XOPERATOR) {
6143 if (*s == '=' && !PL_lex_allbrackets &&
6144 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6151 /* Disable warning on "study /blah/" */
6152 if (PL_oldoldbufptr == PL_last_uni
6153 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6154 || memNE(PL_last_uni, "study", 5)
6155 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6158 s = scan_pat(s,OP_MATCH);
6159 TERM(sublex_start());
6162 case '?': /* conditional */
6164 if (!PL_lex_allbrackets &&
6165 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6169 PL_lex_allbrackets++;
6173 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6174 #ifdef PERL_STRICT_CR
6177 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6179 && (s == PL_linestart || s[-1] == '\n') )
6182 formbrack = 2; /* dot seen where arguments expected */
6185 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6189 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6192 if (!PL_lex_allbrackets &&
6193 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6200 pl_yylval.ival = OPf_SPECIAL;
6206 if (*s == '=' && !PL_lex_allbrackets &&
6207 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6214 case '0': case '1': case '2': case '3': case '4':
6215 case '5': case '6': case '7': case '8': case '9':
6216 s = scan_num(s, &pl_yylval);
6217 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6218 if (PL_expect == XOPERATOR)
6223 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6226 COPLINE_SET_FROM_MULTI_END;
6227 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6228 if (PL_expect == XOPERATOR) {
6229 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6230 return deprecate_commaless_var_list();
6235 pl_yylval.ival = OP_CONST;
6236 TERM(sublex_start());
6239 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6242 printbuf("### Saw string before %s\n", s);
6244 PerlIO_printf(Perl_debug_log,
6245 "### Saw unterminated string\n");
6247 if (PL_expect == XOPERATOR) {
6248 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6249 return deprecate_commaless_var_list();
6256 pl_yylval.ival = OP_CONST;
6257 /* FIXME. I think that this can be const if char *d is replaced by
6258 more localised variables. */
6259 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6260 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6261 pl_yylval.ival = OP_STRINGIFY;
6265 if (pl_yylval.ival == OP_CONST)
6266 COPLINE_SET_FROM_MULTI_END;
6267 TERM(sublex_start());
6270 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6271 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6272 if (PL_expect == XOPERATOR)
6273 no_op("Backticks",s);
6276 pl_yylval.ival = OP_BACKTICK;
6277 TERM(sublex_start());
6281 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6283 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6285 if (PL_expect == XOPERATOR)
6286 no_op("Backslash",s);
6290 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6291 char *start = s + 2;
6292 while (isDIGIT(*start) || *start == '_')
6294 if (*start == '.' && isDIGIT(start[1])) {
6295 s = scan_num(s, &pl_yylval);
6298 else if ((*start == ':' && start[1] == ':')
6299 || (PL_expect == XSTATE && *start == ':'))
6301 else if (PL_expect == XSTATE) {
6303 while (d < PL_bufend && isSPACE(*d)) d++;
6304 if (*d == ':') goto keylookup;
6306 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6307 if (!isALPHA(*start) && (PL_expect == XTERM
6308 || PL_expect == XREF || PL_expect == XSTATE
6309 || PL_expect == XTERMORDORDOR)) {
6310 GV *const gv = gv_fetchpvn_flags(s, start - s,
6311 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6313 s = scan_num(s, &pl_yylval);
6320 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6373 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6375 /* Some keywords can be followed by any delimiter, including ':' */
6376 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6378 /* x::* is just a word, unless x is "CORE" */
6379 if (!anydelim && *s == ':' && s[1] == ':') {
6380 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6385 while (d < PL_bufend && isSPACE(*d))
6386 d++; /* no comments skipped here, or s### is misparsed */
6388 /* Is this a word before a => operator? */
6389 if (*d == '=' && d[1] == '>') {
6393 = (OP*)newSVOP(OP_CONST, 0,
6394 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6395 pl_yylval.opval->op_private = OPpCONST_BARE;
6399 /* Check for plugged-in keyword */
6403 char *saved_bufptr = PL_bufptr;
6405 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6407 if (result == KEYWORD_PLUGIN_DECLINE) {
6408 /* not a plugged-in keyword */
6409 PL_bufptr = saved_bufptr;
6410 } else if (result == KEYWORD_PLUGIN_STMT) {
6411 pl_yylval.opval = o;
6413 if (!PL_nexttoke) PL_expect = XSTATE;
6414 return REPORT(PLUGSTMT);
6415 } else if (result == KEYWORD_PLUGIN_EXPR) {
6416 pl_yylval.opval = o;
6418 if (!PL_nexttoke) PL_expect = XOPERATOR;
6419 return REPORT(PLUGEXPR);
6421 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6426 /* Check for built-in keyword */
6427 tmp = keyword(PL_tokenbuf, len, 0);
6429 /* Is this a label? */
6430 if (!anydelim && PL_expect == XSTATE
6431 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6433 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6434 pl_yylval.pval[len] = '\0';
6435 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6440 /* Check for lexical sub */
6441 if (PL_expect != XOPERATOR) {
6442 char tmpbuf[sizeof PL_tokenbuf + 1];
6444 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6445 off = pad_findmy_pvn(tmpbuf, len+1, 0);
6446 if (off != NOT_IN_PAD) {
6447 assert(off); /* we assume this is boolean-true below */
6448 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6449 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6450 HEK * const stashname = HvNAME_HEK(stash);
6451 sv = newSVhek(stashname);
6452 sv_catpvs(sv, "::");
6453 sv_catpvn_flags(sv, PL_tokenbuf, len,
6454 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6455 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6465 rv2cv_op = newOP(OP_PADANY, 0);
6466 rv2cv_op->op_targ = off;
6467 cv = find_lexical_cv(off);
6475 if (tmp < 0) { /* second-class keyword? */
6476 GV *ogv = NULL; /* override (winner) */
6477 GV *hgv = NULL; /* hidden (loser) */
6478 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6480 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6481 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6485 if (GvIMPORTED_CV(gv))
6487 else if (! CvMETHOD(cv))
6491 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6495 ? GvCVu(gv) && GvIMPORTED_CV(gv)
6496 : SvPCS_IMPORTED(gv)
6497 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6506 tmp = 0; /* overridden by import or by GLOBAL */
6509 && -tmp==KEY_lock /* XXX generalizable kludge */
6512 tmp = 0; /* any sub overrides "weak" keyword */
6514 else { /* no override */
6516 if (tmp == KEY_dump) {
6517 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6518 "dump() better written as CORE::dump()");
6522 if (hgv && tmp != KEY_x) /* never ambiguous */
6523 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6524 "Ambiguous call resolved as CORE::%s(), "
6525 "qualify as such or use &",
6530 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6531 && (!anydelim || *s != '#')) {
6532 /* no override, and not s### either; skipspace is safe here
6533 * check for => on following line */
6535 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6536 STRLEN soff = s - SvPVX(PL_linestr);
6537 s = skipspace_flags(s, LEX_NO_INCLINE);
6538 arrow = *s == '=' && s[1] == '>';
6539 PL_bufptr = SvPVX(PL_linestr) + bufoff;
6540 s = SvPVX(PL_linestr) + soff;
6548 default: /* not a keyword */
6549 /* Trade off - by using this evil construction we can pull the
6550 variable gv into the block labelled keylookup. If not, then
6551 we have to give it function scope so that the goto from the
6552 earlier ':' case doesn't bypass the initialisation. */
6554 just_a_word_zero_gv:
6566 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6570 /* Get the rest if it looks like a package qualifier */
6572 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6574 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6577 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6578 UTF8fARG(UTF, len, PL_tokenbuf),
6579 *s == '\'' ? "'" : "::");
6584 if (PL_expect == XOPERATOR) {
6585 if (PL_bufptr == PL_linestart) {
6586 CopLINE_dec(PL_curcop);
6587 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6588 CopLINE_inc(PL_curcop);
6591 no_op("Bareword",s);
6594 /* See if the name is "Foo::",
6595 in which case Foo is a bareword
6596 (and a package name). */
6599 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6601 if (ckWARN(WARN_BAREWORD)
6602 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6603 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6604 "Bareword \"%"UTF8f"\" refers to nonexistent package",
6605 UTF8fARG(UTF, len, PL_tokenbuf));
6607 PL_tokenbuf[len] = '\0';
6616 /* if we saw a global override before, get the right name */
6619 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6622 SV * const tmp_sv = sv;
6623 sv = newSVpvs("CORE::GLOBAL::");
6624 sv_catsv(sv, tmp_sv);
6625 SvREFCNT_dec(tmp_sv);
6629 /* Presume this is going to be a bareword of some sort. */
6631 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6632 pl_yylval.opval->op_private = OPpCONST_BARE;
6634 /* And if "Foo::", then that's what it certainly is. */
6640 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6641 const_op->op_private = OPpCONST_BARE;
6643 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6647 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6650 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6653 /* Use this var to track whether intuit_method has been
6654 called. intuit_method returns 0 or > 255. */
6657 /* See if it's the indirect object for a list operator. */
6659 if (PL_oldoldbufptr &&
6660 PL_oldoldbufptr < PL_bufptr &&
6661 (PL_oldoldbufptr == PL_last_lop
6662 || PL_oldoldbufptr == PL_last_uni) &&
6663 /* NO SKIPSPACE BEFORE HERE! */
6664 (PL_expect == XREF ||
6665 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6667 bool immediate_paren = *s == '(';
6669 /* (Now we can afford to cross potential line boundary.) */
6672 /* Two barewords in a row may indicate method call. */
6674 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6675 (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6679 /* If not a declared subroutine, it's an indirect object. */
6680 /* (But it's an indir obj regardless for sort.) */
6681 /* Also, if "_" follows a filetest operator, it's a bareword */
6684 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6686 (PL_last_lop_op != OP_MAPSTART &&
6687 PL_last_lop_op != OP_GREPSTART))))
6688 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6689 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6692 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6697 PL_expect = XOPERATOR;
6700 /* Is this a word before a => operator? */
6701 if (*s == '=' && s[1] == '>' && !pkgname) {
6704 if (gvp || (lex && !off)) {
6705 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
6706 /* This is our own scalar, created a few lines
6707 above, so this is safe. */
6709 sv_setpv(sv, PL_tokenbuf);
6710 if (UTF && !IN_BYTES
6711 && is_utf8_string((U8*)PL_tokenbuf, len))
6718 /* If followed by a paren, it's certainly a subroutine. */
6723 while (SPACE_OR_TAB(*d))
6725 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
6730 NEXTVAL_NEXTTOKE.opval =
6731 off ? rv2cv_op : pl_yylval.opval;
6733 op_free(pl_yylval.opval), force_next(PRIVATEREF);
6734 else op_free(rv2cv_op), force_next(WORD);
6739 /* If followed by var or block, call it a method (unless sub) */
6741 if ((*s == '$' || *s == '{') && !cv) {
6743 PL_last_lop = PL_oldbufptr;
6744 PL_last_lop_op = OP_METHOD;
6745 if (!PL_lex_allbrackets &&
6746 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6747 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6748 PL_expect = XBLOCKTERM;
6750 return REPORT(METHOD);
6753 /* If followed by a bareword, see if it looks like indir obj. */
6755 if (tmp == 1 && !orig_keyword
6756 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6757 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
6760 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
6762 sv_setpvn(sv, PL_tokenbuf, len);
6763 if (UTF && !IN_BYTES
6764 && is_utf8_string((U8*)PL_tokenbuf, len))
6766 else SvUTF8_off(sv);
6769 if (tmp == METHOD && !PL_lex_allbrackets &&
6770 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6771 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6775 /* Not a method, so call it a subroutine (if defined) */
6778 /* Check for a constant sub */
6779 if ((sv = cv_const_sv_or_av(cv))) {
6782 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6783 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6784 if (SvTYPE(sv) == SVt_PVAV)
6785 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
6788 pl_yylval.opval->op_private = 0;
6789 pl_yylval.opval->op_folded = 1;
6790 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6795 op_free(pl_yylval.opval);
6797 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6798 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6799 PL_last_lop = PL_oldbufptr;
6800 PL_last_lop_op = OP_ENTERSUB;
6801 /* Is there a prototype? */
6805 STRLEN protolen = CvPROTOLEN(cv);
6806 const char *proto = CvPROTO(cv);
6808 proto = S_strip_spaces(aTHX_ proto, &protolen);
6811 if ((optional = *proto == ';'))
6814 while (*proto == ';');
6818 *proto == '$' || *proto == '_'
6819 || *proto == '*' || *proto == '+'
6824 *proto == '\\' && proto[1] && proto[2] == '\0'
6827 UNIPROTO(UNIOPSUB,optional);
6828 if (*proto == '\\' && proto[1] == '[') {
6829 const char *p = proto + 2;
6830 while(*p && *p != ']')
6832 if(*p == ']' && !p[1])
6833 UNIPROTO(UNIOPSUB,optional);
6835 if (*proto == '&' && *s == '{') {
6837 sv_setpvs(PL_subname, "__ANON__");
6839 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6840 if (!PL_lex_allbrackets &&
6841 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6842 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6846 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6848 force_next(off ? PRIVATEREF : WORD);
6849 if (!PL_lex_allbrackets &&
6850 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6851 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6855 /* Call it a bare word */
6857 if (PL_hints & HINT_STRICT_SUBS)
6858 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6861 /* after "print" and similar functions (corresponding to
6862 * "F? L" in opcode.pl), whatever wasn't already parsed as
6863 * a filehandle should be subject to "strict subs".
6864 * Likewise for the optional indirect-object argument to system
6865 * or exec, which can't be a bareword */
6866 if ((PL_last_lop_op == OP_PRINT
6867 || PL_last_lop_op == OP_PRTF
6868 || PL_last_lop_op == OP_SAY
6869 || PL_last_lop_op == OP_SYSTEM
6870 || PL_last_lop_op == OP_EXEC)
6871 && (PL_hints & HINT_STRICT_SUBS))
6872 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6873 if (lastchar != '-') {
6874 if (ckWARN(WARN_RESERVED)) {
6878 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
6880 /* PL_warn_reserved is constant */
6881 GCC_DIAG_IGNORE(-Wformat-nonliteral);
6882 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6892 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
6893 && saw_infix_sigil) {
6894 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6895 "Operator or semicolon missing before %c%"UTF8f,
6897 UTF8fARG(UTF, strlen(PL_tokenbuf),
6899 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6900 "Ambiguous use of %c resolved as operator %c",
6901 lastchar, lastchar);
6908 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
6913 (OP*)newSVOP(OP_CONST, 0,
6914 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6917 case KEY___PACKAGE__:
6919 (OP*)newSVOP(OP_CONST, 0,
6921 ? newSVhek(HvNAME_HEK(PL_curstash))
6928 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6929 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6932 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6934 gv_init(gv,stash,"DATA",4,0);
6937 GvIOp(gv) = newIO();
6938 IoIFP(GvIOp(gv)) = PL_rsfp;
6939 #if defined(HAS_FCNTL) && defined(F_SETFD)
6941 const int fd = PerlIO_fileno(PL_rsfp);
6942 fcntl(fd,F_SETFD,fd >= 3);
6945 /* Mark this internal pseudo-handle as clean */
6946 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6947 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6948 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6950 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6951 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6952 /* if the script was opened in binmode, we need to revert
6953 * it to text mode for compatibility; but only iff it has CRs
6954 * XXX this is a questionable hack at best. */
6955 if (PL_bufend-PL_bufptr > 2
6956 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6959 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6960 loc = PerlIO_tell(PL_rsfp);
6961 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6964 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6966 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6967 #endif /* NETWARE */
6969 PerlIO_seek(PL_rsfp, loc, 0);
6973 #ifdef PERLIO_LAYERS
6976 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6977 else if (IN_ENCODING) {
6983 XPUSHs(_get_encoding());
6985 call_method("name", G_SCALAR);
6989 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6990 Perl_form(aTHX_ ":encoding(%"SVf")",
7003 FUN0OP(CvCLONE(PL_compcv)
7004 ? newOP(OP_RUNCV, 0)
7005 : newPVOP(OP_RUNCV,0,NULL));
7014 if (PL_expect == XSTATE) {
7025 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7026 if ((*s == ':' && s[1] == ':')
7027 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7031 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7035 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7036 UTF8fARG(UTF, len, PL_tokenbuf));
7039 else if (tmp == KEY_require || tmp == KEY_do
7041 /* that's a way to remember we saw "CORE::" */
7053 LOP(OP_ACCEPT,XTERM);
7056 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7061 LOP(OP_ATAN2,XTERM);
7067 LOP(OP_BINMODE,XTERM);
7070 LOP(OP_BLESS,XTERM);
7079 /* We have to disambiguate the two senses of
7080 "continue". If the next token is a '{' then
7081 treat it as the start of a continue block;
7082 otherwise treat it as a control operator.
7092 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7102 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7111 if (!PL_cryptseen) {
7112 PL_cryptseen = TRUE;
7116 LOP(OP_CRYPT,XTERM);
7119 LOP(OP_CHMOD,XTERM);
7122 LOP(OP_CHOWN,XTERM);
7125 LOP(OP_CONNECT,XTERM);
7145 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7147 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7148 && !keyword(PL_tokenbuf + 1, len, 0)) {
7151 force_ident_maybe_lex('&');
7156 if (orig_keyword == KEY_do) {
7165 PL_hints |= HINT_BLOCK_SCOPE;
7175 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7176 STR_WITH_LEN("NDBM_File::"),
7177 STR_WITH_LEN("DB_File::"),
7178 STR_WITH_LEN("GDBM_File::"),
7179 STR_WITH_LEN("SDBM_File::"),
7180 STR_WITH_LEN("ODBM_File::"),
7182 LOP(OP_DBMOPEN,XTERM);
7194 pl_yylval.ival = CopLINE(PL_curcop);
7198 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7210 if (*s == '{') { /* block eval */
7211 PL_expect = XTERMBLOCK;
7212 UNIBRACK(OP_ENTERTRY);
7214 else { /* string eval */
7216 UNIBRACK(OP_ENTEREVAL);
7221 UNIBRACK(-OP_ENTEREVAL);
7235 case KEY_endhostent:
7241 case KEY_endservent:
7244 case KEY_endprotoent:
7255 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7257 pl_yylval.ival = CopLINE(PL_curcop);
7259 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7262 if ((PL_bufend - p) >= 3 &&
7263 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7265 else if ((PL_bufend - p) >= 4 &&
7266 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7269 /* skip optional package name, as in "for my abc $x (..)" */
7270 if (isIDFIRST_lazy_if(p,UTF)) {
7271 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7275 Perl_croak(aTHX_ "Missing $ on loop variable");
7280 LOP(OP_FORMLINE,XTERM);
7289 LOP(OP_FCNTL,XTERM);
7295 LOP(OP_FLOCK,XTERM);
7298 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7303 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7308 LOP(OP_GREPSTART, XREF);
7325 case KEY_getpriority:
7326 LOP(OP_GETPRIORITY,XTERM);
7328 case KEY_getprotobyname:
7331 case KEY_getprotobynumber:
7332 LOP(OP_GPBYNUMBER,XTERM);
7334 case KEY_getprotoent:
7346 case KEY_getpeername:
7347 UNI(OP_GETPEERNAME);
7349 case KEY_gethostbyname:
7352 case KEY_gethostbyaddr:
7353 LOP(OP_GHBYADDR,XTERM);
7355 case KEY_gethostent:
7358 case KEY_getnetbyname:
7361 case KEY_getnetbyaddr:
7362 LOP(OP_GNBYADDR,XTERM);
7367 case KEY_getservbyname:
7368 LOP(OP_GSBYNAME,XTERM);
7370 case KEY_getservbyport:
7371 LOP(OP_GSBYPORT,XTERM);
7373 case KEY_getservent:
7376 case KEY_getsockname:
7377 UNI(OP_GETSOCKNAME);
7379 case KEY_getsockopt:
7380 LOP(OP_GSOCKOPT,XTERM);
7395 pl_yylval.ival = CopLINE(PL_curcop);
7396 Perl_ck_warner_d(aTHX_
7397 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7398 "given is experimental");
7403 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7411 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7413 pl_yylval.ival = CopLINE(PL_curcop);
7417 LOP(OP_INDEX,XTERM);
7423 LOP(OP_IOCTL,XTERM);
7451 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7456 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7470 LOP(OP_LISTEN,XTERM);
7479 s = scan_pat(s,OP_MATCH);
7480 TERM(sublex_start());
7483 LOP(OP_MAPSTART, XREF);
7486 LOP(OP_MKDIR,XTERM);
7489 LOP(OP_MSGCTL,XTERM);
7492 LOP(OP_MSGGET,XTERM);
7495 LOP(OP_MSGRCV,XTERM);
7498 LOP(OP_MSGSND,XTERM);
7503 PL_in_my = (U16)tmp;
7505 if (isIDFIRST_lazy_if(s,UTF)) {
7506 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7507 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7509 if (!FEATURE_LEXSUBS_IS_ENABLED)
7511 "Experimental \"%s\" subs not enabled",
7512 tmp == KEY_my ? "my" :
7513 tmp == KEY_state ? "state" : "our");
7514 Perl_ck_warner_d(aTHX_
7515 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7516 "The lexical_subs feature is experimental");
7519 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7520 if (!PL_in_my_stash) {
7524 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7525 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7526 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7536 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7541 s = tokenize_use(0, s);
7545 if (*s == '(' || (s = skipspace(s), *s == '('))
7548 if (!PL_lex_allbrackets &&
7549 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7550 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7556 if (isIDFIRST_lazy_if(s,UTF)) {
7558 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7560 for (t=d; isSPACE(*t);)
7562 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7564 && !(t[0] == '=' && t[1] == '>')
7565 && !(t[0] == ':' && t[1] == ':')
7566 && !keyword(s, d-s, 0)
7568 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7569 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7570 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7576 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7578 pl_yylval.ival = OP_OR;
7588 LOP(OP_OPEN_DIR,XTERM);
7591 checkcomma(s,PL_tokenbuf,"filehandle");
7595 checkcomma(s,PL_tokenbuf,"filehandle");
7614 s = force_word(s,WORD,FALSE,TRUE);
7616 s = force_strict_version(s);
7620 LOP(OP_PIPE_OP,XTERM);
7623 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7626 COPLINE_SET_FROM_MULTI_END;
7627 pl_yylval.ival = OP_CONST;
7628 TERM(sublex_start());
7635 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7638 COPLINE_SET_FROM_MULTI_END;
7639 PL_expect = XOPERATOR;
7640 if (SvCUR(PL_lex_stuff)) {
7641 int warned_comma = !ckWARN(WARN_QW);
7642 int warned_comment = warned_comma;
7643 d = SvPV_force(PL_lex_stuff, len);
7645 for (; isSPACE(*d) && len; --len, ++d)
7650 if (!warned_comma || !warned_comment) {
7651 for (; !isSPACE(*d) && len; --len, ++d) {
7652 if (!warned_comma && *d == ',') {
7653 Perl_warner(aTHX_ packWARN(WARN_QW),
7654 "Possible attempt to separate words with commas");
7657 else if (!warned_comment && *d == '#') {
7658 Perl_warner(aTHX_ packWARN(WARN_QW),
7659 "Possible attempt to put comments in qw() list");
7665 for (; !isSPACE(*d) && len; --len, ++d)
7668 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7669 words = op_append_elem(OP_LIST, words,
7670 newSVOP(OP_CONST, 0, tokeq(sv)));
7675 words = newNULLLIST();
7677 SvREFCNT_dec(PL_lex_stuff);
7678 PL_lex_stuff = NULL;
7680 PL_expect = XOPERATOR;
7681 pl_yylval.opval = sawparens(words);
7686 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7689 pl_yylval.ival = OP_STRINGIFY;
7690 if (SvIVX(PL_lex_stuff) == '\'')
7691 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7692 TERM(sublex_start());
7695 s = scan_pat(s,OP_QR);
7696 TERM(sublex_start());
7699 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7702 pl_yylval.ival = OP_BACKTICK;
7703 TERM(sublex_start());
7711 s = force_version(s, FALSE);
7713 else if (*s != 'v' || !isDIGIT(s[1])
7714 || (s = force_version(s, TRUE), *s == 'v'))
7716 *PL_tokenbuf = '\0';
7717 s = force_word(s,WORD,TRUE,TRUE);
7718 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7719 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7720 GV_ADD | (UTF ? SVf_UTF8 : 0));
7722 yyerror("<> at require-statement should be quotes");
7724 if (orig_keyword == KEY_require) {
7730 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7732 PL_last_uni = PL_oldbufptr;
7733 PL_last_lop_op = OP_REQUIRE;
7735 return REPORT( (int)REQUIRE );
7744 LOP(OP_RENAME,XTERM);
7753 LOP(OP_RINDEX,XTERM);
7762 UNIDOR(OP_READLINE);
7765 UNIDOR(OP_BACKTICK);
7774 LOP(OP_REVERSE,XTERM);
7777 UNIDOR(OP_READLINK);
7784 if (pl_yylval.opval)
7785 TERM(sublex_start());
7787 TOKEN(1); /* force error */
7790 checkcomma(s,PL_tokenbuf,"filehandle");
7800 LOP(OP_SELECT,XTERM);
7806 LOP(OP_SEMCTL,XTERM);
7809 LOP(OP_SEMGET,XTERM);
7812 LOP(OP_SEMOP,XTERM);
7818 LOP(OP_SETPGRP,XTERM);
7820 case KEY_setpriority:
7821 LOP(OP_SETPRIORITY,XTERM);
7823 case KEY_sethostent:
7829 case KEY_setservent:
7832 case KEY_setprotoent:
7842 LOP(OP_SEEKDIR,XTERM);
7844 case KEY_setsockopt:
7845 LOP(OP_SSOCKOPT,XTERM);
7851 LOP(OP_SHMCTL,XTERM);
7854 LOP(OP_SHMGET,XTERM);
7857 LOP(OP_SHMREAD,XTERM);
7860 LOP(OP_SHMWRITE,XTERM);
7863 LOP(OP_SHUTDOWN,XTERM);
7872 LOP(OP_SOCKET,XTERM);
7874 case KEY_socketpair:
7875 LOP(OP_SOCKPAIR,XTERM);
7878 checkcomma(s,PL_tokenbuf,"subroutine name");
7881 s = force_word(s,WORD,TRUE,TRUE);
7885 LOP(OP_SPLIT,XTERM);
7888 LOP(OP_SPRINTF,XTERM);
7891 LOP(OP_SPLICE,XTERM);
7906 LOP(OP_SUBSTR,XTERM);
7912 char * const tmpbuf = PL_tokenbuf + 1;
7913 expectation attrful;
7914 bool have_name, have_proto;
7915 const int key = tmp;
7916 SV *format_name = NULL;
7921 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7922 (*s == ':' && s[1] == ':'))
7926 attrful = XATTRBLOCK;
7927 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
7929 if (key == KEY_format)
7930 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
7932 if (memchr(tmpbuf, ':', len) || key != KEY_sub
7934 PL_tokenbuf, len + 1, 0
7936 sv_setpvn(PL_subname, tmpbuf, len);
7938 sv_setsv(PL_subname,PL_curstname);
7939 sv_catpvs(PL_subname,"::");
7940 sv_catpvn(PL_subname,tmpbuf,len);
7942 if (SvUTF8(PL_linestr))
7943 SvUTF8_on(PL_subname);
7950 if (key == KEY_my || key == KEY_our || key==KEY_state)
7953 /* diag_listed_as: Missing name in "%s sub" */
7955 "Missing name in \"%s\"", PL_bufptr);
7957 PL_expect = XTERMBLOCK;
7958 attrful = XATTRTERM;
7959 sv_setpvs(PL_subname,"?");
7963 if (key == KEY_format) {
7965 NEXTVAL_NEXTTOKE.opval
7966 = (OP*)newSVOP(OP_CONST,0, format_name);
7967 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
7973 /* Look for a prototype */
7974 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
7975 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7976 COPLINE_SET_FROM_MULTI_END;
7978 Perl_croak(aTHX_ "Prototype not terminated");
7979 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
7987 if (*s == ':' && s[1] != ':')
7988 PL_expect = attrful;
7989 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
7991 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7992 else if (*s != ';' && *s != '}')
7993 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7997 NEXTVAL_NEXTTOKE.opval =
7998 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7999 PL_lex_stuff = NULL;
8004 sv_setpvs(PL_subname, "__ANON__");
8006 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8009 force_ident_maybe_lex('&');
8014 LOP(OP_SYSTEM,XREF);
8017 LOP(OP_SYMLINK,XTERM);
8020 LOP(OP_SYSCALL,XTERM);
8023 LOP(OP_SYSOPEN,XTERM);
8026 LOP(OP_SYSSEEK,XTERM);
8029 LOP(OP_SYSREAD,XTERM);
8032 LOP(OP_SYSWRITE,XTERM);
8037 TERM(sublex_start());
8058 LOP(OP_TRUNCATE,XTERM);
8070 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8072 pl_yylval.ival = CopLINE(PL_curcop);
8076 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8078 pl_yylval.ival = CopLINE(PL_curcop);
8082 LOP(OP_UNLINK,XTERM);
8088 LOP(OP_UNPACK,XTERM);
8091 LOP(OP_UTIME,XTERM);
8097 LOP(OP_UNSHIFT,XTERM);
8100 s = tokenize_use(1, s);
8110 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8112 pl_yylval.ival = CopLINE(PL_curcop);
8113 Perl_ck_warner_d(aTHX_
8114 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8115 "when is experimental");
8119 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8121 pl_yylval.ival = CopLINE(PL_curcop);
8125 PL_hints |= HINT_BLOCK_SCOPE;
8132 LOP(OP_WAITPID,XTERM);
8138 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8139 * we use the same number on EBCDIC */
8140 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8144 if (PL_expect == XOPERATOR) {
8145 if (*s == '=' && !PL_lex_allbrackets &&
8146 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8154 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8156 pl_yylval.ival = OP_XOR;
8165 Looks up an identifier in the pad or in a package
8168 PRIVATEREF if this is a lexical name.
8169 WORD if this belongs to a package.
8172 if we're in a my declaration
8173 croak if they tried to say my($foo::bar)
8174 build the ops for a my() declaration
8175 if it's an access to a my() variable
8176 build ops for access to a my() variable
8177 if in a dq string, and they've said @foo and we can't find @foo
8179 build ops for a bareword
8183 S_pending_ident(pTHX)
8186 const char pit = (char)pl_yylval.ival;
8187 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8188 /* All routes through this function want to know if there is a colon. */
8189 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8191 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8192 "### Pending identifier '%s'\n", PL_tokenbuf); });
8194 /* if we're in a my(), we can't allow dynamics here.
8195 $foo'bar has already been turned into $foo::bar, so
8196 just check for colons.
8198 if it's a legal name, the OP is a PADANY.
8201 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8203 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8204 "variable %s in \"our\"",
8205 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8206 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8210 /* "my" variable %s can't be in a package */
8211 /* PL_no_myglob is constant */
8212 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8213 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8214 PL_in_my == KEY_my ? "my" : "state",
8215 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8217 UTF ? SVf_UTF8 : 0);
8221 pl_yylval.opval = newOP(OP_PADANY, 0);
8222 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8223 UTF ? SVf_UTF8 : 0);
8229 build the ops for accesses to a my() variable.
8234 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8236 if (tmp != NOT_IN_PAD) {
8237 /* might be an "our" variable" */
8238 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8239 /* build ops for a bareword */
8240 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8241 HEK * const stashname = HvNAME_HEK(stash);
8242 SV * const sym = newSVhek(stashname);
8243 sv_catpvs(sym, "::");
8244 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8245 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8246 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8250 ((PL_tokenbuf[0] == '$') ? SVt_PV
8251 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8256 pl_yylval.opval = newOP(OP_PADANY, 0);
8257 pl_yylval.opval->op_targ = tmp;
8263 Whine if they've said @foo in a doublequoted string,
8264 and @foo isn't a variable we can find in the symbol
8267 if (ckWARN(WARN_AMBIGUOUS) &&
8268 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8269 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8270 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8271 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8272 /* DO NOT warn for @- and @+ */
8273 && !( PL_tokenbuf[2] == '\0' &&
8274 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8277 /* Downgraded from fatal to warning 20000522 mjd */
8278 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8279 "Possible unintended interpolation of %"UTF8f
8281 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8285 /* build ops for a bareword */
8286 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8287 newSVpvn_flags(PL_tokenbuf + 1,
8289 UTF ? SVf_UTF8 : 0 ));
8290 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8292 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8293 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8294 | ( UTF ? SVf_UTF8 : 0 ),
8295 ((PL_tokenbuf[0] == '$') ? SVt_PV
8296 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8302 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8304 PERL_ARGS_ASSERT_CHECKCOMMA;
8306 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8307 if (ckWARN(WARN_SYNTAX)) {
8310 for (w = s+2; *w && level; w++) {
8318 /* the list of chars below is for end of statements or
8319 * block / parens, boolean operators (&&, ||, //) and branch
8320 * constructs (or, and, if, until, unless, while, err, for).
8321 * Not a very solid hack... */
8322 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8323 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8324 "%s (...) interpreted as function",name);
8327 while (s < PL_bufend && isSPACE(*s))
8331 while (s < PL_bufend && isSPACE(*s))
8333 if (isIDFIRST_lazy_if(s,UTF)) {
8334 const char * const w = s;
8335 s += UTF ? UTF8SKIP(s) : 1;
8336 while (isWORDCHAR_lazy_if(s,UTF))
8337 s += UTF ? UTF8SKIP(s) : 1;
8338 while (s < PL_bufend && isSPACE(*s))
8343 if (keyword(w, s - w, 0))
8346 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8347 if (gv && GvCVu(gv))
8351 Copy(w, tmpbuf+1, s - w, char);
8353 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8354 if (off != NOT_IN_PAD) return;
8356 Perl_croak(aTHX_ "No comma allowed after %s", what);
8361 /* S_new_constant(): do any overload::constant lookup.
8363 Either returns sv, or mortalizes/frees sv and returns a new SV*.
8364 Best used as sv=new_constant(..., sv, ...).
8365 If s, pv are NULL, calls subroutine with one argument,
8366 and <type> is used with error messages only.
8367 <type> is assumed to be well formed UTF-8 */
8370 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8371 SV *sv, SV *pv, const char *type, STRLEN typelen)
8374 HV * table = GvHV(PL_hintgv); /* ^H */
8379 const char *why1 = "", *why2 = "", *why3 = "";
8381 PERL_ARGS_ASSERT_NEW_CONSTANT;
8382 /* We assume that this is true: */
8383 if (*key == 'c') { assert (strEQ(key, "charnames")); }
8386 /* charnames doesn't work well if there have been errors found */
8387 if (PL_error_count > 0 && *key == 'c')
8389 SvREFCNT_dec_NN(sv);
8390 return &PL_sv_undef;
8393 sv_2mortal(sv); /* Parent created it permanently */
8395 || ! (PL_hints & HINT_LOCALIZE_HH)
8396 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8401 /* Here haven't found what we're looking for. If it is charnames,
8402 * perhaps it needs to be loaded. Try doing that before giving up */
8404 Perl_load_module(aTHX_
8406 newSVpvs("_charnames"),
8407 /* version parameter; no need to specify it, as if
8408 * we get too early a version, will fail anyway,
8409 * not being able to find '_charnames' */
8414 assert(sp == PL_stack_sp);
8415 table = GvHV(PL_hintgv);
8417 && (PL_hints & HINT_LOCALIZE_HH)
8418 && (cvp = hv_fetch(table, key, keylen, FALSE))
8424 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8425 msg = Perl_form(aTHX_
8426 "Constant(%.*s) unknown",
8427 (int)(type ? typelen : len),
8433 why3 = "} is not defined";
8436 msg = Perl_form(aTHX_
8437 /* The +3 is for '\N{'; -4 for that, plus '}' */
8438 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8442 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8443 (int)(type ? typelen : len),
8444 (type ? type: s), why1, why2, why3);
8447 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8448 return SvREFCNT_inc_simple_NN(sv);
8453 pv = newSVpvn_flags(s, len, SVs_TEMP);
8455 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8457 typesv = &PL_sv_undef;
8459 PUSHSTACKi(PERLSI_OVERLOAD);
8471 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8475 /* Check the eval first */
8476 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8478 const char * errstr;
8479 sv_catpvs(errsv, "Propagated");
8480 errstr = SvPV_const(errsv, errlen);
8481 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8483 res = SvREFCNT_inc_simple_NN(sv);
8487 SvREFCNT_inc_simple_void_NN(res);
8496 why1 = "Call to &{$^H{";
8498 why3 = "}} did not return a defined value";
8500 (void)sv_2mortal(sv);
8507 PERL_STATIC_INLINE void
8508 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8509 PERL_ARGS_ASSERT_PARSE_IDENT;
8513 Perl_croak(aTHX_ "%s", ident_too_long);
8514 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8515 /* The UTF-8 case must come first, otherwise things
8516 * like c\N{COMBINING TILDE} would start failing, as the
8517 * isWORDCHAR_A case below would gobble the 'c' up.
8520 char *t = *s + UTF8SKIP(*s);
8521 while (isIDCONT_utf8((U8*)t))
8523 if (*d + (t - *s) > e)
8524 Perl_croak(aTHX_ "%s", ident_too_long);
8525 Copy(*s, *d, t - *s, char);
8529 else if ( isWORDCHAR_A(**s) ) {
8532 } while (isWORDCHAR_A(**s) && *d < e);
8534 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8539 else if (allow_package && **s == ':' && (*s)[1] == ':'
8540 /* Disallow things like Foo::$bar. For the curious, this is
8541 * the code path that triggers the "Bad name after" warning
8542 * when looking for barewords.
8544 && (*s)[2] != '$') {
8554 /* Returns a NUL terminated string, with the length of the string written to
8558 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8561 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8562 bool is_utf8 = cBOOL(UTF);
8564 PERL_ARGS_ASSERT_SCAN_WORD;
8566 parse_ident(&s, &d, e, allow_package, is_utf8);
8573 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8575 I32 herelines = PL_parser->herelines;
8576 SSize_t bracket = -1;
8579 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8580 bool is_utf8 = cBOOL(UTF);
8581 I32 orig_copline = 0, tmp_copline = 0;
8583 PERL_ARGS_ASSERT_SCAN_IDENT;
8588 while (isDIGIT(*s)) {
8590 Perl_croak(aTHX_ "%s", ident_too_long);
8595 parse_ident(&s, &d, e, 1, is_utf8);
8600 /* Either a digit variable, or parse_ident() found an identifier
8601 (anything valid as a bareword), so job done and return. */
8602 if (PL_lex_state != LEX_NORMAL)
8603 PL_lex_state = LEX_INTERPENDMAYBE;
8606 if (*s == '$' && s[1] &&
8607 (isIDFIRST_lazy_if(s+1,is_utf8)
8608 || isDIGIT_A((U8)s[1])
8611 || strnEQ(s+1,"::",2)) )
8613 /* Dereferencing a value in a scalar variable.
8614 The alternatives are different syntaxes for a scalar variable.
8615 Using ' as a leading package separator isn't allowed. :: is. */
8618 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
8620 bracket = s - SvPVX(PL_linestr);
8622 orig_copline = CopLINE(PL_curcop);
8623 if (s < PL_bufend && isSPACE(*s)) {
8628 /* Is the byte 'd' a legal single character identifier name? 'u' is true
8629 * iff Unicode semantics are to be used. The legal ones are any of:
8630 * a) all ASCII characters except:
8631 * 1) space-type ones, like \t and SPACE;
8634 * The final case currently doesn't get this far in the program, so we
8635 * don't test for it. If that were to change, it would be ok to allow it.
8636 * c) When not under Unicode rules, any upper Latin1 character
8637 * d) Otherwise, when unicode rules are used, all XIDS characters.
8639 * Because all ASCII characters have the same representation whether
8640 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8641 * '{' without knowing if is UTF-8 or not.
8642 * EBCDIC already uses the rules that ASCII platforms will use after the
8643 * deprecation cycle; see comment below about the deprecation. */
8645 # define VALID_LEN_ONE_IDENT(s, is_utf8) \
8646 (isGRAPH_A(*(s)) || ((is_utf8) \
8647 ? isIDFIRST_utf8((U8*) (s)) \
8649 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8651 # define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \
8652 && LIKELY(*(s) != '\0') \
8654 || isASCII_utf8((U8*) (s)) \
8655 || isIDFIRST_utf8((U8*) (s))))
8657 if ((s <= PL_bufend - (is_utf8)
8660 && VALID_LEN_ONE_IDENT(s, is_utf8))
8662 /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
8663 * because often it has no graphic representation. (We can't get to
8664 * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8667 ? ! isGRAPH_utf8( (U8*) s)
8668 : (! isGRAPH_L1( (U8) *s)
8669 || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
8671 /* Split messages for back compat */
8672 if (isCNTRL_A( (U8) *s)) {
8673 deprecate("literal control characters in variable names");
8676 deprecate("literal non-graphic characters in variable names");
8681 const STRLEN skip = UTF8SKIP(s);
8684 for ( i = 0; i < skip; i++ )
8692 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8693 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8697 /* Warn about ambiguous code after unary operators if {...} notation isn't
8698 used. There's no difference in ambiguity; it's merely a heuristic
8699 about when not to warn. */
8700 else if (ck_uni && bracket == -1)
8702 if (bracket != -1) {
8703 /* If we were processing {...} notation then... */
8704 if (isIDFIRST_lazy_if(d,is_utf8)) {
8705 /* if it starts as a valid identifier, assume that it is one.
8706 (the later check for } being at the expected point will trap
8707 cases where this doesn't pan out.) */
8708 d += is_utf8 ? UTF8SKIP(d) : 1;
8709 parse_ident(&s, &d, e, 1, is_utf8);
8711 tmp_copline = CopLINE(PL_curcop);
8712 if (s < PL_bufend && isSPACE(*s)) {
8715 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8716 /* ${foo[0]} and ${foo{bar}} notation. */
8717 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8718 const char * const brack =
8720 ((*s == '[') ? "[...]" : "{...}");
8721 orig_copline = CopLINE(PL_curcop);
8722 CopLINE_set(PL_curcop, tmp_copline);
8723 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8724 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8725 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8726 funny, dest, brack, funny, dest, brack);
8727 CopLINE_set(PL_curcop, orig_copline);
8730 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8731 PL_lex_allbrackets++;
8735 /* Handle extended ${^Foo} variables
8736 * 1999-02-27 mjd-perl-patch@plover.com */
8737 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
8741 while (isWORDCHAR(*s) && d < e) {
8745 Perl_croak(aTHX_ "%s", ident_too_long);
8750 tmp_copline = CopLINE(PL_curcop);
8751 if (s < PL_bufend && isSPACE(*s)) {
8755 /* Expect to find a closing } after consuming any trailing whitespace.
8759 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8760 PL_lex_state = LEX_INTERPEND;
8763 if (PL_lex_state == LEX_NORMAL) {
8764 if (ckWARN(WARN_AMBIGUOUS) &&
8765 (keyword(dest, d - dest, 0)
8766 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
8768 SV *tmp = newSVpvn_flags( dest, d - dest,
8769 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
8772 orig_copline = CopLINE(PL_curcop);
8773 CopLINE_set(PL_curcop, tmp_copline);
8774 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8775 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
8776 funny, SVfARG(tmp), funny, SVfARG(tmp));
8777 CopLINE_set(PL_curcop, orig_copline);
8782 /* Didn't find the closing } at the point we expected, so restore
8783 state such that the next thing to process is the opening { and */
8784 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
8785 CopLINE_set(PL_curcop, orig_copline);
8786 PL_parser->herelines = herelines;
8790 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8791 PL_lex_state = LEX_INTERPEND;
8796 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
8798 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
8799 * found in the parse starting at 's', based on the subset that are valid
8800 * in this context input to this routine in 'valid_flags'. Advances s.
8801 * Returns TRUE if the input should be treated as a valid flag, so the next
8802 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
8803 * upon first call on the current regex. This routine will set it to any
8804 * charset modifier found. The caller shouldn't change it. This way,
8805 * another charset modifier encountered in the parse can be detected as an
8806 * error, as we have decided to allow only one */
8809 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
8811 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8812 if (isWORDCHAR_lazy_if(*s, UTF)) {
8813 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
8814 UTF ? SVf_UTF8 : 0);
8816 /* Pretend that it worked, so will continue processing before
8825 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
8826 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8827 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8828 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8829 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8830 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8831 case LOCALE_PAT_MOD:
8833 goto multiple_charsets;
8835 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8838 case UNICODE_PAT_MOD:
8840 goto multiple_charsets;
8842 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
8845 case ASCII_RESTRICT_PAT_MOD:
8847 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8851 /* Error if previous modifier wasn't an 'a', but if it was, see
8852 * if, and accept, a second occurrence (only) */
8854 || get_regex_charset(*pmfl)
8855 != REGEX_ASCII_RESTRICTED_CHARSET)
8857 goto multiple_charsets;
8859 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8863 case DEPENDS_PAT_MOD:
8865 goto multiple_charsets;
8867 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
8876 if (*charset != c) {
8877 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8879 else if (c == 'a') {
8880 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
8881 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8884 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8887 /* Pretend that it worked, so will continue processing before dieing */
8893 S_scan_pat(pTHX_ char *start, I32 type)
8897 const char * const valid_flags =
8898 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
8899 char charset = '\0'; /* character set modifier */
8900 unsigned int x_mod_count = 0;
8902 PERL_ARGS_ASSERT_SCAN_PAT;
8904 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
8906 Perl_croak(aTHX_ "Search pattern not terminated");
8908 pm = (PMOP*)newPMOP(type, 0);
8909 if (PL_multi_open == '?') {
8910 /* This is the only point in the code that sets PMf_ONCE: */
8911 pm->op_pmflags |= PMf_ONCE;
8913 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8914 allows us to restrict the list needed by reset to just the ??
8916 assert(type != OP_TRANS);
8918 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
8921 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
8924 elements = mg->mg_len / sizeof(PMOP**);
8925 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8926 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8927 mg->mg_len = elements * sizeof(PMOP**);
8928 PmopSTASH_set(pm,PL_curstash);
8932 /* if qr/...(?{..}).../, then need to parse the pattern within a new
8933 * anon CV. False positives like qr/[(?{]/ are harmless */
8935 if (type == OP_QR) {
8937 char *e, *p = SvPV(PL_lex_stuff, len);
8939 for (; p < e; p++) {
8940 if (p[0] == '(' && p[1] == '?'
8941 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
8943 pm->op_pmflags |= PMf_HAS_CV;
8947 pm->op_pmflags |= PMf_IS_QR;
8950 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
8951 &s, &charset, &x_mod_count))
8953 /* issue a warning if /c is specified,but /g is not */
8954 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
8956 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8957 "Use of /c modifier is meaningless without /g" );
8960 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
8962 PL_lex_op = (OP*)pm;
8963 pl_yylval.ival = OP_MATCH;
8968 S_scan_subst(pTHX_ char *start)
8975 char charset = '\0'; /* character set modifier */
8976 unsigned int x_mod_count = 0;
8979 PERL_ARGS_ASSERT_SCAN_SUBST;
8981 pl_yylval.ival = OP_NULL;
8983 s = scan_str(start, TRUE, FALSE, FALSE, &t);
8986 Perl_croak(aTHX_ "Substitution pattern not terminated");
8990 first_start = PL_multi_start;
8991 first_line = CopLINE(PL_curcop);
8992 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8995 SvREFCNT_dec(PL_lex_stuff);
8996 PL_lex_stuff = NULL;
8998 Perl_croak(aTHX_ "Substitution replacement not terminated");
9000 PL_multi_start = first_start; /* so whole substitution is taken together */
9002 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9006 if (*s == EXEC_PAT_MOD) {
9010 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9011 &s, &charset, &x_mod_count))
9017 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9019 if ((pm->op_pmflags & PMf_CONTINUE)) {
9020 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9024 SV * const repl = newSVpvs("");
9027 pm->op_pmflags |= PMf_EVAL;
9030 sv_catpvs(repl, "eval ");
9032 sv_catpvs(repl, "do ");
9034 sv_catpvs(repl, "{");
9035 sv_catsv(repl, PL_sublex_info.repl);
9036 sv_catpvs(repl, "}");
9038 SvREFCNT_dec(PL_sublex_info.repl);
9039 PL_sublex_info.repl = repl;
9041 if (CopLINE(PL_curcop) != first_line) {
9042 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9043 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9044 CopLINE(PL_curcop) - first_line;
9045 CopLINE_set(PL_curcop, first_line);
9048 PL_lex_op = (OP*)pm;
9049 pl_yylval.ival = OP_SUBST;
9054 S_scan_trans(pTHX_ char *start)
9061 bool nondestruct = 0;
9064 PERL_ARGS_ASSERT_SCAN_TRANS;
9066 pl_yylval.ival = OP_NULL;
9068 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9070 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9074 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9077 SvREFCNT_dec(PL_lex_stuff);
9078 PL_lex_stuff = NULL;
9080 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9083 complement = del = squash = 0;
9087 complement = OPpTRANS_COMPLEMENT;
9090 del = OPpTRANS_DELETE;
9093 squash = OPpTRANS_SQUASH;
9105 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9106 o->op_private &= ~OPpTRANS_ALL;
9107 o->op_private |= del|squash|complement|
9108 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9109 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9112 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9119 Takes a pointer to the first < in <<FOO.
9120 Returns a pointer to the byte following <<FOO.
9122 This function scans a heredoc, which involves different methods
9123 depending on whether we are in a string eval, quoted construct, etc.
9124 This is because PL_linestr could containing a single line of input, or
9125 a whole string being evalled, or the contents of the current quote-
9128 The two basic methods are:
9129 - Steal lines from the input stream
9130 - Scan the heredoc in PL_linestr and remove it therefrom
9132 In a file scope or filtered eval, the first method is used; in a
9133 string eval, the second.
9135 In a quote-like operator, we have to choose between the two,
9136 depending on where we can find a newline. We peek into outer lex-
9137 ing scopes until we find one with a newline in it. If we reach the
9138 outermost lexing scope and it is a file, we use the stream method.
9139 Otherwise it is treated as an eval.
9143 S_scan_heredoc(pTHX_ char *s)
9145 I32 op_type = OP_SCALAR;
9152 const bool infile = PL_rsfp || PL_parser->filtered;
9153 const line_t origline = CopLINE(PL_curcop);
9154 LEXSHARED *shared = PL_parser->lex_shared;
9156 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9159 d = PL_tokenbuf + 1;
9160 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9161 *PL_tokenbuf = '\n';
9163 while (SPACE_OR_TAB(*peek))
9165 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9168 s = delimcpy(d, e, s, PL_bufend, term, &len);
9170 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9176 /* <<\FOO is equivalent to <<'FOO' */
9180 if (!isWORDCHAR_lazy_if(s,UTF))
9181 deprecate("bare << to mean <<\"\"");
9182 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9187 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9188 Perl_croak(aTHX_ "Delimiter for here document is too long");
9191 len = d - PL_tokenbuf;
9193 #ifndef PERL_STRICT_CR
9194 d = strchr(s, '\r');
9196 char * const olds = s;
9198 while (s < PL_bufend) {
9204 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9213 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9218 tmpstr = newSV_type(SVt_PVIV);
9222 SvIV_set(tmpstr, -1);
9224 else if (term == '`') {
9225 op_type = OP_BACKTICK;
9226 SvIV_set(tmpstr, '\\');
9229 PL_multi_start = origline + 1 + PL_parser->herelines;
9230 PL_multi_open = PL_multi_close = '<';
9231 /* inside a string eval or quote-like operator */
9232 if (!infile || PL_lex_inwhat) {
9235 char * const olds = s;
9236 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9237 /* These two fields are not set until an inner lexing scope is
9238 entered. But we need them set here. */
9239 shared->ls_bufptr = s;
9240 shared->ls_linestr = PL_linestr;
9242 /* Look for a newline. If the current buffer does not have one,
9243 peek into the line buffer of the parent lexing scope, going
9244 up as many levels as necessary to find one with a newline
9247 while (!(s = (char *)memchr(
9248 (void *)shared->ls_bufptr, '\n',
9249 SvEND(shared->ls_linestr)-shared->ls_bufptr
9251 shared = shared->ls_prev;
9252 /* shared is only null if we have gone beyond the outermost
9253 lexing scope. In a file, we will have broken out of the
9254 loop in the previous iteration. In an eval, the string buf-
9255 fer ends with "\n;", so the while condition above will have
9256 evaluated to false. So shared can never be null. */
9258 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9259 most lexing scope. In a file, shared->ls_linestr at that
9260 level is just one line, so there is no body to steal. */
9261 if (infile && !shared->ls_prev) {
9267 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9270 linestr = shared->ls_linestr;
9271 bufend = SvEND(linestr);
9273 while (s < bufend - len + 1 &&
9274 memNE(s,PL_tokenbuf,len) ) {
9276 ++PL_parser->herelines;
9278 if (s >= bufend - len + 1) {
9281 sv_setpvn(tmpstr,d+1,s-d);
9283 /* the preceding stmt passes a newline */
9284 PL_parser->herelines++;
9286 /* s now points to the newline after the heredoc terminator.
9287 d points to the newline before the body of the heredoc.
9290 /* We are going to modify linestr in place here, so set
9291 aside copies of the string if necessary for re-evals or
9293 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9294 check shared->re_eval_str. */
9295 if (shared->re_eval_start || shared->re_eval_str) {
9296 /* Set aside the rest of the regexp */
9297 if (!shared->re_eval_str)
9298 shared->re_eval_str =
9299 newSVpvn(shared->re_eval_start,
9300 bufend - shared->re_eval_start);
9301 shared->re_eval_start -= s-d;
9303 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9304 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9305 cx->blk_eval.cur_text == linestr)
9307 cx->blk_eval.cur_text = newSVsv(linestr);
9308 SvSCREAM_on(cx->blk_eval.cur_text);
9310 /* Copy everything from s onwards back to d. */
9311 Move(s,d,bufend-s + 1,char);
9312 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9313 /* Setting PL_bufend only applies when we have not dug deeper
9314 into other scopes, because sublex_done sets PL_bufend to
9315 SvEND(PL_linestr). */
9316 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9323 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9324 term = PL_tokenbuf[1];
9326 linestr_save = PL_linestr; /* must restore this afterwards */
9327 d = s; /* and this */
9328 PL_linestr = newSVpvs("");
9329 PL_bufend = SvPVX(PL_linestr);
9331 PL_bufptr = PL_bufend;
9332 CopLINE_set(PL_curcop,
9333 origline + 1 + PL_parser->herelines);
9334 if (!lex_next_chunk(LEX_NO_TERM)
9335 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9336 /* Simply freeing linestr_save might seem simpler here, as it
9337 does not matter what PL_linestr points to, since we are
9338 about to croak; but in a quote-like op, linestr_save
9339 will have been prospectively freed already, via
9340 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9341 restore PL_linestr. */
9342 SvREFCNT_dec_NN(PL_linestr);
9343 PL_linestr = linestr_save;
9346 CopLINE_set(PL_curcop, origline);
9347 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9348 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9349 /* ^That should be enough to avoid this needing to grow: */
9350 sv_catpvs(PL_linestr, "\n\0");
9351 assert(s == SvPVX(PL_linestr));
9352 PL_bufend = SvEND(PL_linestr);
9355 PL_parser->herelines++;
9356 PL_last_lop = PL_last_uni = NULL;
9357 #ifndef PERL_STRICT_CR
9358 if (PL_bufend - PL_linestart >= 2) {
9359 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9360 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9362 PL_bufend[-2] = '\n';
9364 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9366 else if (PL_bufend[-1] == '\r')
9367 PL_bufend[-1] = '\n';
9369 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9370 PL_bufend[-1] = '\n';
9372 if (*s == term && PL_bufend-s >= len
9373 && memEQ(s,PL_tokenbuf + 1,len)) {
9374 SvREFCNT_dec(PL_linestr);
9375 PL_linestr = linestr_save;
9376 PL_linestart = SvPVX(linestr_save);
9377 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9382 sv_catsv(tmpstr,PL_linestr);
9386 PL_multi_end = origline + PL_parser->herelines;
9387 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9388 SvPV_shrink_to_cur(tmpstr);
9391 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9393 else if (IN_ENCODING)
9394 sv_recode_to_utf8(tmpstr, _get_encoding());
9396 PL_lex_stuff = tmpstr;
9397 pl_yylval.ival = op_type;
9401 SvREFCNT_dec(tmpstr);
9402 CopLINE_set(PL_curcop, origline);
9403 missingterm(PL_tokenbuf + 1);
9407 takes: current position in input buffer
9408 returns: new position in input buffer
9409 side-effects: pl_yylval and lex_op are set.
9414 <<>> read from ARGV without magic open
9415 <FH> read from filehandle
9416 <pkg::FH> read from package qualified filehandle
9417 <pkg'FH> read from package qualified filehandle
9418 <$fh> read from filehandle in $fh
9424 S_scan_inputsymbol(pTHX_ char *start)
9426 char *s = start; /* current position in buffer */
9429 bool nomagicopen = FALSE;
9430 char *d = PL_tokenbuf; /* start of temp holding space */
9431 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9433 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9435 end = strchr(s, '\n');
9438 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9445 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9447 /* die if we didn't have space for the contents of the <>,
9448 or if it didn't end, or if we see a newline
9451 if (len >= (I32)sizeof PL_tokenbuf)
9452 Perl_croak(aTHX_ "Excessively long <> operator");
9454 Perl_croak(aTHX_ "Unterminated <> operator");
9459 Remember, only scalar variables are interpreted as filehandles by
9460 this code. Anything more complex (e.g., <$fh{$num}>) will be
9461 treated as a glob() call.
9462 This code makes use of the fact that except for the $ at the front,
9463 a scalar variable and a filehandle look the same.
9465 if (*d == '$' && d[1]) d++;
9467 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9468 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9469 d += UTF ? UTF8SKIP(d) : 1;
9471 /* If we've tried to read what we allow filehandles to look like, and
9472 there's still text left, then it must be a glob() and not a getline.
9473 Use scan_str to pull out the stuff between the <> and treat it
9474 as nothing more than a string.
9477 if (d - PL_tokenbuf != len) {
9478 pl_yylval.ival = OP_GLOB;
9479 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9481 Perl_croak(aTHX_ "Glob not terminated");
9485 bool readline_overriden = FALSE;
9487 /* we're in a filehandle read situation */
9490 /* turn <> into <ARGV> */
9492 Copy("ARGV",d,5,char);
9494 /* Check whether readline() is overriden */
9495 if ((gv_readline = gv_override("readline",8)))
9496 readline_overriden = TRUE;
9498 /* if <$fh>, create the ops to turn the variable into a
9502 /* try to find it in the pad for this block, otherwise find
9503 add symbol table ops
9505 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9506 if (tmp != NOT_IN_PAD) {
9507 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9508 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9509 HEK * const stashname = HvNAME_HEK(stash);
9510 SV * const sym = sv_2mortal(newSVhek(stashname));
9511 sv_catpvs(sym, "::");
9517 OP * const o = newOP(OP_PADSV, 0);
9519 PL_lex_op = readline_overriden
9520 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9521 op_append_elem(OP_LIST, o,
9522 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9523 : (OP*)newUNOP(OP_READLINE, 0, o);
9531 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9533 PL_lex_op = readline_overriden
9534 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9535 op_append_elem(OP_LIST,
9536 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9537 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9538 : (OP*)newUNOP(OP_READLINE, 0,
9539 newUNOP(OP_RV2SV, 0,
9540 newGVOP(OP_GV, 0, gv)));
9542 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9543 pl_yylval.ival = OP_NULL;
9546 /* If it's none of the above, it must be a literal filehandle
9547 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9549 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9550 PL_lex_op = readline_overriden
9551 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9552 op_append_elem(OP_LIST,
9553 newGVOP(OP_GV, 0, gv),
9554 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9555 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9556 pl_yylval.ival = OP_NULL;
9566 start position in buffer
9567 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
9568 only if they are of the open/close form
9569 keep_delims preserve the delimiters around the string
9570 re_reparse compiling a run-time /(?{})/:
9571 collapse // to /, and skip encoding src
9572 delimp if non-null, this is set to the position of
9573 the closing delimiter, or just after it if
9574 the closing and opening delimiters differ
9575 (i.e., the opening delimiter of a substitu-
9577 returns: position to continue reading from buffer
9578 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9579 updates the read buffer.
9581 This subroutine pulls a string out of the input. It is called for:
9582 q single quotes q(literal text)
9583 ' single quotes 'literal text'
9584 qq double quotes qq(interpolate $here please)
9585 " double quotes "interpolate $here please"
9586 qx backticks qx(/bin/ls -l)
9587 ` backticks `/bin/ls -l`
9588 qw quote words @EXPORT_OK = qw( func() $spam )
9589 m// regexp match m/this/
9590 s/// regexp substitute s/this/that/
9591 tr/// string transliterate tr/this/that/
9592 y/// string transliterate y/this/that/
9593 ($*@) sub prototypes sub foo ($)
9594 (stuff) sub attr parameters sub foo : attr(stuff)
9595 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9597 In most of these cases (all but <>, patterns and transliterate)
9598 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9599 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9600 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9603 It skips whitespace before the string starts, and treats the first
9604 character as the delimiter. If the delimiter is one of ([{< then
9605 the corresponding "close" character )]}> is used as the closing
9606 delimiter. It allows quoting of delimiters, and if the string has
9607 balanced delimiters ([{<>}]) it allows nesting.
9609 On success, the SV with the resulting string is put into lex_stuff or,
9610 if that is already non-NULL, into lex_repl. The second case occurs only
9611 when parsing the RHS of the special constructs s/// and tr/// (y///).
9612 For convenience, the terminating delimiter character is stuffed into
9617 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9621 SV *sv; /* scalar value: string */
9622 const char *tmps; /* temp string, used for delimiter matching */
9623 char *s = start; /* current position in the buffer */
9624 char term; /* terminating character */
9625 char *to; /* current position in the sv's data */
9626 I32 brackets = 1; /* bracket nesting level */
9627 bool has_utf8 = FALSE; /* is there any utf8 content? */
9628 I32 termcode; /* terminating char. code */
9629 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9630 STRLEN termlen; /* length of terminating string */
9631 int last_off = 0; /* last position for nesting bracket */
9634 PERL_ARGS_ASSERT_SCAN_STR;
9636 /* skip space before the delimiter */
9641 /* mark where we are, in case we need to report errors */
9644 /* after skipping whitespace, the next character is the terminator */
9647 termcode = termstr[0] = term;
9651 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9652 Copy(s, termstr, termlen, U8);
9653 if (!UTF8_IS_INVARIANT(term))
9657 /* mark where we are */
9658 PL_multi_start = CopLINE(PL_curcop);
9659 PL_multi_open = term;
9660 herelines = PL_parser->herelines;
9662 /* find corresponding closing delimiter */
9663 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9664 termcode = termstr[0] = term = tmps[5];
9666 PL_multi_close = term;
9668 if (PL_multi_open == PL_multi_close) {
9669 keep_bracketed_quoted = FALSE;
9672 /* create a new SV to hold the contents. 79 is the SV's initial length.
9673 What a random number. */
9674 sv = newSV_type(SVt_PVIV);
9676 SvIV_set(sv, termcode);
9677 (void)SvPOK_only(sv); /* validate pointer */
9679 /* move past delimiter and try to read a complete string */
9681 sv_catpvn(sv, s, termlen);
9684 if (IN_ENCODING && !UTF && !re_reparse) {
9688 int offset = s - SvPVX_const(PL_linestr);
9689 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
9690 &offset, (char*)termstr, termlen);
9694 if (SvIsCOW(PL_linestr)) {
9695 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
9696 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
9697 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
9698 char *buf = SvPVX(PL_linestr);
9699 bufend_pos = PL_parser->bufend - buf;
9700 bufptr_pos = PL_parser->bufptr - buf;
9701 oldbufptr_pos = PL_parser->oldbufptr - buf;
9702 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
9703 linestart_pos = PL_parser->linestart - buf;
9704 last_uni_pos = PL_parser->last_uni
9705 ? PL_parser->last_uni - buf
9707 last_lop_pos = PL_parser->last_lop
9708 ? PL_parser->last_lop - buf
9711 PL_parser->lex_shared->re_eval_start ?
9712 PL_parser->lex_shared->re_eval_start - buf : 0;
9715 sv_force_normal(PL_linestr);
9717 buf = SvPVX(PL_linestr);
9718 PL_parser->bufend = buf + bufend_pos;
9719 PL_parser->bufptr = buf + bufptr_pos;
9720 PL_parser->oldbufptr = buf + oldbufptr_pos;
9721 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
9722 PL_parser->linestart = buf + linestart_pos;
9723 if (PL_parser->last_uni)
9724 PL_parser->last_uni = buf + last_uni_pos;
9725 if (PL_parser->last_lop)
9726 PL_parser->last_lop = buf + last_lop_pos;
9727 if (PL_parser->lex_shared->re_eval_start)
9728 PL_parser->lex_shared->re_eval_start =
9729 buf + re_eval_start_pos;
9732 ns = SvPVX_const(PL_linestr) + offset;
9733 svlast = SvEND(sv) - 1;
9735 for (; s < ns; s++) {
9736 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9737 COPLINE_INC_WITH_HERELINES;
9740 goto read_more_line;
9742 /* handle quoted delimiters */
9743 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9745 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9747 if ((svlast-1 - t) % 2) {
9748 if (!keep_bracketed_quoted) {
9751 SvCUR_set(sv, SvCUR(sv) - 1);
9756 if (PL_multi_open == PL_multi_close) {
9762 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9763 /* At here, all closes are "was quoted" one,
9764 so we don't check PL_multi_close. */
9766 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
9771 else if (*t == PL_multi_open)
9779 SvCUR_set(sv, w - SvPVX_const(sv));
9781 last_off = w - SvPVX(sv);
9782 if (--brackets <= 0)
9788 SvCUR_set(sv, SvCUR(sv) - 1);
9794 /* extend sv if need be */
9795 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9796 /* set 'to' to the next character in the sv's string */
9797 to = SvPVX(sv)+SvCUR(sv);
9799 /* if open delimiter is the close delimiter read unbridle */
9800 if (PL_multi_open == PL_multi_close) {
9801 for (; s < PL_bufend; s++,to++) {
9802 /* embedded newlines increment the current line number */
9803 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9804 COPLINE_INC_WITH_HERELINES;
9805 /* handle quoted delimiters */
9806 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9807 if (!keep_bracketed_quoted
9809 || (re_reparse && s[1] == '\\'))
9812 else /* any other quotes are simply copied straight through */
9815 /* terminate when run out of buffer (the for() condition), or
9816 have found the terminator */
9817 else if (*s == term) {
9820 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9823 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9829 /* if the terminator isn't the same as the start character (e.g.,
9830 matched brackets), we have to allow more in the quoting, and
9831 be prepared for nested brackets.
9834 /* read until we run out of string, or we find the terminator */
9835 for (; s < PL_bufend; s++,to++) {
9836 /* embedded newlines increment the line count */
9837 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9838 COPLINE_INC_WITH_HERELINES;
9839 /* backslashes can escape the open or closing characters */
9840 if (*s == '\\' && s+1 < PL_bufend) {
9841 if (!keep_bracketed_quoted &&
9842 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
9849 /* allow nested opens and closes */
9850 else if (*s == PL_multi_close && --brackets <= 0)
9852 else if (*s == PL_multi_open)
9854 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9859 /* terminate the copied string and update the sv's end-of-string */
9861 SvCUR_set(sv, to - SvPVX_const(sv));
9864 * this next chunk reads more into the buffer if we're not done yet
9868 break; /* handle case where we are done yet :-) */
9870 #ifndef PERL_STRICT_CR
9871 if (to - SvPVX_const(sv) >= 2) {
9872 if ((to[-2] == '\r' && to[-1] == '\n') ||
9873 (to[-2] == '\n' && to[-1] == '\r'))
9877 SvCUR_set(sv, to - SvPVX_const(sv));
9879 else if (to[-1] == '\r')
9882 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
9887 /* if we're out of file, or a read fails, bail and reset the current
9888 line marker so we can report where the unterminated string began
9890 COPLINE_INC_WITH_HERELINES;
9891 PL_bufptr = PL_bufend;
9892 if (!lex_next_chunk(0)) {
9894 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9900 /* at this point, we have successfully read the delimited string */
9902 if (!IN_ENCODING || UTF || re_reparse) {
9905 sv_catpvn(sv, s, termlen);
9908 if (has_utf8 || (IN_ENCODING && !re_reparse))
9911 PL_multi_end = CopLINE(PL_curcop);
9912 CopLINE_set(PL_curcop, PL_multi_start);
9913 PL_parser->herelines = herelines;
9915 /* if we allocated too much space, give some back */
9916 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9917 SvLEN_set(sv, SvCUR(sv) + 1);
9918 SvPV_renew(sv, SvLEN(sv));
9921 /* decide whether this is the first or second quoted string we've read
9926 PL_sublex_info.repl = sv;
9929 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
9935 takes: pointer to position in buffer
9936 returns: pointer to new position in buffer
9937 side-effects: builds ops for the constant in pl_yylval.op
9939 Read a number in any of the formats that Perl accepts:
9941 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9942 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
9943 0b[01](_?[01])* binary integers
9944 0[0-7](_?[0-7])* octal integers
9945 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
9946 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
9948 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
9951 If it reads a number without a decimal point or an exponent, it will
9952 try converting the number to an integer and see if it can do so
9953 without loss of precision.
9957 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
9959 const char *s = start; /* current position in buffer */
9960 char *d; /* destination in temp buffer */
9961 char *e; /* end of temp buffer */
9962 NV nv; /* number read, as a double */
9963 SV *sv = NULL; /* place to put the converted number */
9964 bool floatit; /* boolean: int or float? */
9965 const char *lastub = NULL; /* position of last underbar */
9966 static const char* const number_too_long = "Number too long";
9967 /* Hexadecimal floating point.
9969 * In many places (where we have quads and NV is IEEE 754 double)
9970 * we can fit the mantissa bits of a NV into an unsigned quad.
9971 * (Note that UVs might not be quads even when we have quads.)
9972 * This will not work everywhere, though (either no quads, or
9973 * using long doubles), in which case we have to resort to NV,
9974 * which will probably mean horrible loss of precision due to
9975 * multiple fp operations. */
9978 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
9979 # define HEXFP_UQUAD
9980 Uquad_t hexfp_uquad = 0;
9981 int hexfp_frac_bits = 0;
9986 NV hexfp_mult = 1.0;
9987 UV high_non_zero = 0; /* highest digit */
9989 PERL_ARGS_ASSERT_SCAN_NUM;
9991 /* We use the first character to decide what type of number this is */
9995 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
9997 /* if it starts with a 0, it could be an octal number, a decimal in
9998 0.13 disguise, or a hexadecimal number, or a binary number. */
10002 u holds the "number so far"
10003 shift the power of 2 of the base
10004 (hex == 4, octal == 3, binary == 1)
10005 overflowed was the number more than we can hold?
10007 Shift is used when we add a digit. It also serves as an "are
10008 we in octal/hex/binary?" indicator to disallow hex characters
10009 when in octal mode.
10014 bool overflowed = FALSE;
10015 bool just_zero = TRUE; /* just plain 0 or binary number? */
10016 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10017 static const char* const bases[5] =
10018 { "", "binary", "", "octal", "hexadecimal" };
10019 static const char* const Bases[5] =
10020 { "", "Binary", "", "Octal", "Hexadecimal" };
10021 static const char* const maxima[5] =
10023 "0b11111111111111111111111111111111",
10027 const char *base, *Base, *max;
10029 /* check for hex */
10030 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10034 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10039 /* check for a decimal in disguise */
10040 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10042 /* so it must be octal */
10049 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10050 "Misplaced _ in number");
10054 base = bases[shift];
10055 Base = Bases[shift];
10056 max = maxima[shift];
10058 /* read the rest of the number */
10060 /* x is used in the overflow test,
10061 b is the digit we're adding on. */
10066 /* if we don't mention it, we're done */
10070 /* _ are ignored -- but warned about if consecutive */
10072 if (lastub && s == lastub + 1)
10073 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10074 "Misplaced _ in number");
10078 /* 8 and 9 are not octal */
10079 case '8': case '9':
10081 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10085 case '2': case '3': case '4':
10086 case '5': case '6': case '7':
10088 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10091 case '0': case '1':
10092 b = *s++ & 15; /* ASCII digit -> value of digit */
10096 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10097 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10098 /* make sure they said 0x */
10101 b = (*s++ & 7) + 9;
10103 /* Prepare to put the digit we have onto the end
10104 of the number so far. We check for overflows.
10110 x = u << shift; /* make room for the digit */
10112 total_bits += shift;
10114 if ((x >> shift) != u
10115 && !(PL_hints & HINT_NEW_BINARY)) {
10118 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10119 "Integer overflow in %s number",
10122 u = x | b; /* add the digit to the end */
10125 n *= nvshift[shift];
10126 /* If an NV has not enough bits in its
10127 * mantissa to represent an UV this summing of
10128 * small low-order numbers is a waste of time
10129 * (because the NV cannot preserve the
10130 * low-order bits anyway): we could just
10131 * remember when did we overflow and in the
10132 * end just multiply n by the right
10137 if (high_non_zero == 0 && b > 0)
10140 /* this could be hexfp, but peek ahead
10141 * to avoid matching ".." */
10142 if (UNLIKELY(HEXFP_PEEK(s))) {
10150 /* if we get here, we had success: make a scalar value from
10155 /* final misplaced underbar check */
10156 if (s[-1] == '_') {
10157 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10160 if (UNLIKELY(HEXFP_PEEK(s))) {
10161 /* Do sloppy (on the underbars) but quick detection
10162 * (and value construction) for hexfp, the decimal
10163 * detection will shortly be more thorough with the
10164 * underbar checks. */
10168 #else /* HEXFP_NV */
10173 NV mult = 1 / 16.0;
10176 while (isXDIGIT(*h) || *h == '_') {
10177 if (isXDIGIT(*h)) {
10178 U8 b = XDIGIT_VALUE(*h);
10179 total_bits += shift;
10181 hexfp_uquad <<= shift;
10183 hexfp_frac_bits += shift;
10184 #else /* HEXFP_NV */
10185 hexfp_nv += b * mult;
10193 if (total_bits >= 4) {
10194 if (high_non_zero < 0x8)
10196 if (high_non_zero < 0x4)
10198 if (high_non_zero < 0x2)
10202 if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
10203 bool negexp = FALSE;
10207 else if (*h == '-') {
10213 while (isDIGIT(*h) || *h == '_') {
10216 hexfp_exp += *h - '0';
10219 -hexfp_exp < NV_MIN_EXP - 1) {
10220 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10221 "Hexadecimal float: exponent underflow");
10228 hexfp_exp > NV_MAX_EXP - 1) {
10229 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10230 "Hexadecimal float: exponent overflow");
10239 hexfp_exp = -hexfp_exp;
10241 hexfp_exp -= hexfp_frac_bits;
10243 hexfp_mult = pow(2.0, hexfp_exp);
10251 if (n > 4294967295.0)
10252 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10253 "%s number > %s non-portable",
10259 if (u > 0xffffffff)
10260 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10261 "%s number > %s non-portable",
10266 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10267 sv = new_constant(start, s - start, "integer",
10268 sv, NULL, NULL, 0);
10269 else if (PL_hints & HINT_NEW_BINARY)
10270 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10275 handle decimal numbers.
10276 we're also sent here when we read a 0 as the first digit
10278 case '1': case '2': case '3': case '4': case '5':
10279 case '6': case '7': case '8': case '9': case '.':
10282 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10291 /* read next group of digits and _ and copy into d */
10292 while (isDIGIT(*s) || *s == '_' ||
10293 UNLIKELY(hexfp && isXDIGIT(*s))) {
10294 /* skip underscores, checking for misplaced ones
10298 if (lastub && s == lastub + 1)
10299 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10300 "Misplaced _ in number");
10304 /* check for end of fixed-length buffer */
10306 Perl_croak(aTHX_ "%s", number_too_long);
10307 /* if we're ok, copy the character */
10312 /* final misplaced underbar check */
10313 if (lastub && s == lastub + 1) {
10314 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10317 /* read a decimal portion if there is one. avoid
10318 3..5 being interpreted as the number 3. followed
10321 if (*s == '.' && s[1] != '.') {
10326 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10327 "Misplaced _ in number");
10331 /* copy, ignoring underbars, until we run out of digits.
10333 for (; isDIGIT(*s) || *s == '_' ||
10334 UNLIKELY(hexfp && isXDIGIT(*s));
10336 /* fixed length buffer check */
10338 Perl_croak(aTHX_ "%s", number_too_long);
10340 if (lastub && s == lastub + 1)
10341 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10342 "Misplaced _ in number");
10348 /* fractional part ending in underbar? */
10349 if (s[-1] == '_') {
10350 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10351 "Misplaced _ in number");
10353 if (*s == '.' && isDIGIT(s[1])) {
10354 /* oops, it's really a v-string, but without the "v" */
10360 /* read exponent part, if present */
10361 if ((isALPHA_FOLD_EQ(*s, 'e')
10362 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10363 && strchr("+-0123456789_", s[1]))
10367 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10368 ditto for p (hexfloats) */
10369 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10370 /* At least some Mach atof()s don't grok 'E' */
10373 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10380 /* stray preinitial _ */
10382 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10383 "Misplaced _ in number");
10387 /* allow positive or negative exponent */
10388 if (*s == '+' || *s == '-')
10391 /* stray initial _ */
10393 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10394 "Misplaced _ in number");
10398 /* read digits of exponent */
10399 while (isDIGIT(*s) || *s == '_') {
10402 Perl_croak(aTHX_ "%s", number_too_long);
10406 if (((lastub && s == lastub + 1) ||
10407 (!isDIGIT(s[1]) && s[1] != '_')))
10408 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10409 "Misplaced _ in number");
10417 We try to do an integer conversion first if no characters
10418 indicating "float" have been found.
10423 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10425 if (flags == IS_NUMBER_IN_UV) {
10427 sv = newSViv(uv); /* Prefer IVs over UVs. */
10430 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10431 if (uv <= (UV) IV_MIN)
10432 sv = newSViv(-(IV)uv);
10439 STORE_NUMERIC_LOCAL_SET_STANDARD();
10440 /* terminate the string */
10442 if (UNLIKELY(hexfp)) {
10443 # ifdef NV_MANT_DIG
10444 if (total_bits > NV_MANT_DIG)
10445 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10446 "Hexadecimal float: mantissa overflow");
10449 nv = hexfp_uquad * hexfp_mult;
10450 #else /* HEXFP_NV */
10451 nv = hexfp_nv * hexfp_mult;
10454 nv = Atof(PL_tokenbuf);
10456 RESTORE_NUMERIC_LOCAL();
10461 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10462 const char *const key = floatit ? "float" : "integer";
10463 const STRLEN keylen = floatit ? 5 : 7;
10464 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10465 key, keylen, sv, NULL, NULL, 0);
10469 /* if it starts with a v, it could be a v-string */
10472 sv = newSV(5); /* preallocate storage space */
10473 ENTER_with_name("scan_vstring");
10475 s = scan_vstring(s, PL_bufend, sv);
10476 SvREFCNT_inc_simple_void_NN(sv);
10477 LEAVE_with_name("scan_vstring");
10481 /* make the op for the constant and return */
10484 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10486 lvalp->opval = NULL;
10492 S_scan_formline(pTHX_ char *s)
10496 SV * const stuff = newSVpvs("");
10497 bool needargs = FALSE;
10498 bool eofmt = FALSE;
10500 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10502 while (!needargs) {
10505 #ifdef PERL_STRICT_CR
10506 while (SPACE_OR_TAB(*t))
10509 while (SPACE_OR_TAB(*t) || *t == '\r')
10512 if (*t == '\n' || t == PL_bufend) {
10517 eol = (char *) memchr(s,'\n',PL_bufend-s);
10521 for (t = s; t < eol; t++) {
10522 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10524 goto enough; /* ~~ must be first line in formline */
10526 if (*t == '@' || *t == '^')
10530 sv_catpvn(stuff, s, eol-s);
10531 #ifndef PERL_STRICT_CR
10532 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10533 char *end = SvPVX(stuff) + SvCUR(stuff);
10536 SvCUR_set(stuff, SvCUR(stuff) - 1);
10544 if ((PL_rsfp || PL_parser->filtered)
10545 && PL_parser->form_lex_state == LEX_NORMAL) {
10547 PL_bufptr = PL_bufend;
10548 COPLINE_INC_WITH_HERELINES;
10549 got_some = lex_next_chunk(0);
10550 CopLINE_dec(PL_curcop);
10558 if (!SvCUR(stuff) || needargs)
10559 PL_lex_state = PL_parser->form_lex_state;
10560 if (SvCUR(stuff)) {
10561 PL_expect = XSTATE;
10563 const char *s2 = s;
10564 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10568 PL_expect = XTERMBLOCK;
10569 NEXTVAL_NEXTTOKE.ival = 0;
10572 NEXTVAL_NEXTTOKE.ival = 0;
10573 force_next(FORMLBRACK);
10576 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10578 else if (IN_ENCODING)
10579 sv_recode_to_utf8(stuff, _get_encoding());
10581 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10585 SvREFCNT_dec(stuff);
10587 PL_lex_formbrack = 0;
10593 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10595 const I32 oldsavestack_ix = PL_savestack_ix;
10596 CV* const outsidecv = PL_compcv;
10598 SAVEI32(PL_subline);
10599 save_item(PL_subname);
10600 SAVESPTR(PL_compcv);
10602 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10603 CvFLAGS(PL_compcv) |= flags;
10605 PL_subline = CopLINE(PL_curcop);
10606 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10607 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10608 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10609 if (outsidecv && CvPADLIST(outsidecv))
10610 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10612 return oldsavestack_ix;
10616 S_yywarn(pTHX_ const char *const s, U32 flags)
10618 PERL_ARGS_ASSERT_YYWARN;
10620 PL_in_eval |= EVAL_WARNONLY;
10621 yyerror_pv(s, flags);
10626 Perl_yyerror(pTHX_ const char *const s)
10628 PERL_ARGS_ASSERT_YYERROR;
10629 return yyerror_pvn(s, strlen(s), 0);
10633 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10635 PERL_ARGS_ASSERT_YYERROR_PV;
10636 return yyerror_pvn(s, strlen(s), flags);
10640 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10642 const char *context = NULL;
10645 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10646 int yychar = PL_parser->yychar;
10648 PERL_ARGS_ASSERT_YYERROR_PVN;
10650 if (!yychar || (yychar == ';' && !PL_rsfp))
10651 sv_catpvs(where_sv, "at EOF");
10652 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10653 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10654 PL_oldbufptr != PL_bufptr) {
10657 The code below is removed for NetWare because it abends/crashes on NetWare
10658 when the script has error such as not having the closing quotes like:
10659 if ($var eq "value)
10660 Checking of white spaces is anyway done in NetWare code.
10663 while (isSPACE(*PL_oldoldbufptr))
10666 context = PL_oldoldbufptr;
10667 contlen = PL_bufptr - PL_oldoldbufptr;
10669 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10670 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10673 The code below is removed for NetWare because it abends/crashes on NetWare
10674 when the script has error such as not having the closing quotes like:
10675 if ($var eq "value)
10676 Checking of white spaces is anyway done in NetWare code.
10679 while (isSPACE(*PL_oldbufptr))
10682 context = PL_oldbufptr;
10683 contlen = PL_bufptr - PL_oldbufptr;
10685 else if (yychar > 255)
10686 sv_catpvs(where_sv, "next token ???");
10687 else if (yychar == YYEMPTY) {
10688 if (PL_lex_state == LEX_NORMAL ||
10689 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10690 sv_catpvs(where_sv, "at end of line");
10691 else if (PL_lex_inpat)
10692 sv_catpvs(where_sv, "within pattern");
10694 sv_catpvs(where_sv, "within string");
10697 sv_catpvs(where_sv, "next char ");
10699 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10700 else if (isPRINT_LC(yychar)) {
10701 const char string = yychar;
10702 sv_catpvn(where_sv, &string, 1);
10705 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10707 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10708 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10709 OutCopFILE(PL_curcop),
10710 (IV)(PL_parser->preambling == NOLINE
10711 ? CopLINE(PL_curcop)
10712 : PL_parser->preambling));
10714 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10715 UTF8fARG(UTF, contlen, context));
10717 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10718 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10719 Perl_sv_catpvf(aTHX_ msg,
10720 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10721 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10724 if (PL_in_eval & EVAL_WARNONLY) {
10725 PL_in_eval &= ~EVAL_WARNONLY;
10726 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10730 if (PL_error_count >= 10) {
10732 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10733 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10734 SVfARG(errsv), OutCopFILE(PL_curcop));
10736 Perl_croak(aTHX_ "%s has too many errors.\n",
10737 OutCopFILE(PL_curcop));
10740 PL_in_my_stash = NULL;
10745 S_swallow_bom(pTHX_ U8 *s)
10747 const STRLEN slen = SvCUR(PL_linestr);
10749 PERL_ARGS_ASSERT_SWALLOW_BOM;
10753 if (s[1] == 0xFE) {
10754 /* UTF-16 little-endian? (or UTF-32LE?) */
10755 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10756 /* diag_listed_as: Unsupported script encoding %s */
10757 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10758 #ifndef PERL_NO_UTF16_FILTER
10759 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10761 if (PL_bufend > (char*)s) {
10762 s = add_utf16_textfilter(s, TRUE);
10765 /* diag_listed_as: Unsupported script encoding %s */
10766 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10771 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10772 #ifndef PERL_NO_UTF16_FILTER
10773 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10775 if (PL_bufend > (char *)s) {
10776 s = add_utf16_textfilter(s, FALSE);
10779 /* diag_listed_as: Unsupported script encoding %s */
10780 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10784 case BOM_UTF8_FIRST_BYTE: {
10785 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
10786 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
10787 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10788 s += len + 1; /* UTF-8 */
10795 if (s[2] == 0xFE && s[3] == 0xFF) {
10796 /* UTF-32 big-endian */
10797 /* diag_listed_as: Unsupported script encoding %s */
10798 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10801 else if (s[2] == 0 && s[3] != 0) {
10804 * are a good indicator of UTF-16BE. */
10805 #ifndef PERL_NO_UTF16_FILTER
10806 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10807 s = add_utf16_textfilter(s, FALSE);
10809 /* diag_listed_as: Unsupported script encoding %s */
10810 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10817 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10820 * are a good indicator of UTF-16LE. */
10821 #ifndef PERL_NO_UTF16_FILTER
10822 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10823 s = add_utf16_textfilter(s, TRUE);
10825 /* diag_listed_as: Unsupported script encoding %s */
10826 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10834 #ifndef PERL_NO_UTF16_FILTER
10836 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10838 SV *const filter = FILTER_DATA(idx);
10839 /* We re-use this each time round, throwing the contents away before we
10841 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10842 SV *const utf8_buffer = filter;
10843 IV status = IoPAGE(filter);
10844 const bool reverse = cBOOL(IoLINES(filter));
10847 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10849 /* As we're automatically added, at the lowest level, and hence only called
10850 from this file, we can be sure that we're not called in block mode. Hence
10851 don't bother writing code to deal with block mode. */
10853 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10856 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10858 DEBUG_P(PerlIO_printf(Perl_debug_log,
10859 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10860 FPTR2DPTR(void *, S_utf16_textfilter),
10861 reverse ? 'l' : 'b', idx, maxlen, status,
10862 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10869 /* First, look in our buffer of existing UTF-8 data: */
10870 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10874 } else if (status == 0) {
10876 IoPAGE(filter) = 0;
10877 nl = SvEND(utf8_buffer);
10880 STRLEN got = nl - SvPVX(utf8_buffer);
10881 /* Did we have anything to append? */
10883 sv_catpvn(sv, SvPVX(utf8_buffer), got);
10884 /* Everything else in this code works just fine if SVp_POK isn't
10885 set. This, however, needs it, and we need it to work, else
10886 we loop infinitely because the buffer is never consumed. */
10887 sv_chop(utf8_buffer, nl);
10891 /* OK, not a complete line there, so need to read some more UTF-16.
10892 Read an extra octect if the buffer currently has an odd number. */
10896 if (SvCUR(utf16_buffer) >= 2) {
10897 /* Location of the high octet of the last complete code point.
10898 Gosh, UTF-16 is a pain. All the benefits of variable length,
10899 *coupled* with all the benefits of partial reads and
10901 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10902 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10904 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10908 /* We have the first half of a surrogate. Read more. */
10909 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10912 status = FILTER_READ(idx + 1, utf16_buffer,
10913 160 + (SvCUR(utf16_buffer) & 1));
10914 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10915 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10918 IoPAGE(filter) = status;
10923 chars = SvCUR(utf16_buffer) >> 1;
10924 have = SvCUR(utf8_buffer);
10925 SvGROW(utf8_buffer, have + chars * 3 + 1);
10928 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10929 (U8*)SvPVX_const(utf8_buffer) + have,
10930 chars * 2, &newlen);
10932 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10933 (U8*)SvPVX_const(utf8_buffer) + have,
10934 chars * 2, &newlen);
10936 SvCUR_set(utf8_buffer, have + newlen);
10939 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10940 it's private to us, and utf16_to_utf8{,reversed} take a
10941 (pointer,length) pair, rather than a NUL-terminated string. */
10942 if(SvCUR(utf16_buffer) & 1) {
10943 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10944 SvCUR_set(utf16_buffer, 1);
10946 SvCUR_set(utf16_buffer, 0);
10949 DEBUG_P(PerlIO_printf(Perl_debug_log,
10950 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10952 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10953 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
10958 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10960 SV *filter = filter_add(S_utf16_textfilter, NULL);
10962 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10964 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
10965 sv_setpvs(filter, "");
10966 IoLINES(filter) = reversed;
10967 IoPAGE(filter) = 1; /* Not EOF */
10969 /* Sadly, we have to return a valid pointer, come what may, so we have to
10970 ignore any error return from this. */
10971 SvCUR_set(PL_linestr, 0);
10972 if (FILTER_READ(0, PL_linestr, 0)) {
10973 SvUTF8_on(PL_linestr);
10975 SvUTF8_on(PL_linestr);
10977 PL_bufend = SvEND(PL_linestr);
10978 return (U8*)SvPVX(PL_linestr);
10983 Returns a pointer to the next character after the parsed
10984 vstring, as well as updating the passed in sv.
10986 Function must be called like
10988 sv = sv_2mortal(newSV(5));
10989 s = scan_vstring(s,e,sv);
10991 where s and e are the start and end of the string.
10992 The sv should already be large enough to store the vstring
10993 passed in, for performance reasons.
10995 This function may croak if fatal warnings are enabled in the
10996 calling scope, hence the sv_2mortal in the example (to prevent
10997 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11003 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11005 const char *pos = s;
11006 const char *start = s;
11008 PERL_ARGS_ASSERT_SCAN_VSTRING;
11010 if (*pos == 'v') pos++; /* get past 'v' */
11011 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11013 if ( *pos != '.') {
11014 /* this may not be a v-string if followed by => */
11015 const char *next = pos;
11016 while (next < e && isSPACE(*next))
11018 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11019 /* return string not v-string */
11020 sv_setpvn(sv,(char *)s,pos-s);
11021 return (char *)pos;
11025 if (!isALPHA(*pos)) {
11026 U8 tmpbuf[UTF8_MAXBYTES+1];
11029 s++; /* get past 'v' */
11034 /* this is atoi() that tolerates underscores */
11037 const char *end = pos;
11039 while (--end >= s) {
11041 const UV orev = rev;
11042 rev += (*end - '0') * mult;
11045 /* diag_listed_as: Integer overflow in %s number */
11046 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11047 "Integer overflow in decimal number");
11051 if (rev > 0x7FFFFFFF)
11052 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11054 /* Append native character for the rev point */
11055 tmpend = uvchr_to_utf8(tmpbuf, rev);
11056 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11057 if (!UVCHR_IS_INVARIANT(rev))
11059 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11065 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11069 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11076 Perl_keyword_plugin_standard(pTHX_
11077 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11079 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11080 PERL_UNUSED_CONTEXT;
11081 PERL_UNUSED_ARG(keyword_ptr);
11082 PERL_UNUSED_ARG(keyword_len);
11083 PERL_UNUSED_ARG(op_ptr);
11084 return KEYWORD_PLUGIN_DECLINE;
11087 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11089 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11091 SAVEI32(PL_lex_brackets);
11092 if (PL_lex_brackets > 100)
11093 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11094 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11095 SAVEI32(PL_lex_allbrackets);
11096 PL_lex_allbrackets = 0;
11097 SAVEI8(PL_lex_fakeeof);
11098 PL_lex_fakeeof = (U8)fakeeof;
11099 if(yyparse(gramtype) && !PL_parser->error_count)
11100 qerror(Perl_mess(aTHX_ "Parse error"));
11103 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11105 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11109 SAVEVPTR(PL_eval_root);
11110 PL_eval_root = NULL;
11111 parse_recdescent(gramtype, fakeeof);
11117 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11119 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11122 if (flags & ~PARSE_OPTIONAL)
11123 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11124 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11125 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11126 if (!PL_parser->error_count)
11127 qerror(Perl_mess(aTHX_ "Parse error"));
11128 exprop = newOP(OP_NULL, 0);
11134 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11136 Parse a Perl arithmetic expression. This may contain operators of precedence
11137 down to the bit shift operators. The expression must be followed (and thus
11138 terminated) either by a comparison or lower-precedence operator or by
11139 something that would normally terminate an expression such as semicolon.
11140 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11141 otherwise it is mandatory. It is up to the caller to ensure that the
11142 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11143 the source of the code to be parsed and the lexical context for the
11146 The op tree representing the expression is returned. If an optional
11147 expression is absent, a null pointer is returned, otherwise the pointer
11150 If an error occurs in parsing or compilation, in most cases a valid op
11151 tree is returned anyway. The error is reflected in the parser state,
11152 normally resulting in a single exception at the top level of parsing
11153 which covers all the compilation errors that occurred. Some compilation
11154 errors, however, will throw an exception immediately.
11160 Perl_parse_arithexpr(pTHX_ U32 flags)
11162 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11166 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11168 Parse a Perl term expression. This may contain operators of precedence
11169 down to the assignment operators. The expression must be followed (and thus
11170 terminated) either by a comma or lower-precedence operator or by
11171 something that would normally terminate an expression such as semicolon.
11172 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11173 otherwise it is mandatory. It is up to the caller to ensure that the
11174 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11175 the source of the code to be parsed and the lexical context for the
11178 The op tree representing the expression is returned. If an optional
11179 expression is absent, a null pointer is returned, otherwise the pointer
11182 If an error occurs in parsing or compilation, in most cases a valid op
11183 tree is returned anyway. The error is reflected in the parser state,
11184 normally resulting in a single exception at the top level of parsing
11185 which covers all the compilation errors that occurred. Some compilation
11186 errors, however, will throw an exception immediately.
11192 Perl_parse_termexpr(pTHX_ U32 flags)
11194 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11198 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11200 Parse a Perl list expression. This may contain operators of precedence
11201 down to the comma operator. The expression must be followed (and thus
11202 terminated) either by a low-precedence logic operator such as C<or> or by
11203 something that would normally terminate an expression such as semicolon.
11204 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11205 otherwise it is mandatory. It is up to the caller to ensure that the
11206 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11207 the source of the code to be parsed and the lexical context for the
11210 The op tree representing the expression is returned. If an optional
11211 expression is absent, a null pointer is returned, otherwise the pointer
11214 If an error occurs in parsing or compilation, in most cases a valid op
11215 tree is returned anyway. The error is reflected in the parser state,
11216 normally resulting in a single exception at the top level of parsing
11217 which covers all the compilation errors that occurred. Some compilation
11218 errors, however, will throw an exception immediately.
11224 Perl_parse_listexpr(pTHX_ U32 flags)
11226 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11230 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11232 Parse a single complete Perl expression. This allows the full
11233 expression grammar, including the lowest-precedence operators such
11234 as C<or>. The expression must be followed (and thus terminated) by a
11235 token that an expression would normally be terminated by: end-of-file,
11236 closing bracketing punctuation, semicolon, or one of the keywords that
11237 signals a postfix expression-statement modifier. If I<flags> includes
11238 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11239 mandatory. It is up to the caller to ensure that the dynamic parser
11240 state (L</PL_parser> et al) is correctly set to reflect the source of
11241 the code to be parsed and the lexical context for the expression.
11243 The op tree representing the expression is returned. If an optional
11244 expression is absent, a null pointer is returned, otherwise the pointer
11247 If an error occurs in parsing or compilation, in most cases a valid op
11248 tree is returned anyway. The error is reflected in the parser state,
11249 normally resulting in a single exception at the top level of parsing
11250 which covers all the compilation errors that occurred. Some compilation
11251 errors, however, will throw an exception immediately.
11257 Perl_parse_fullexpr(pTHX_ U32 flags)
11259 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11263 =for apidoc Amx|OP *|parse_block|U32 flags
11265 Parse a single complete Perl code block. This consists of an opening
11266 brace, a sequence of statements, and a closing brace. The block
11267 constitutes a lexical scope, so C<my> variables and various compile-time
11268 effects can be contained within it. It is up to the caller to ensure
11269 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11270 reflect the source of the code to be parsed and the lexical context for
11273 The op tree representing the code block is returned. This is always a
11274 real op, never a null pointer. It will normally be a C<lineseq> list,
11275 including C<nextstate> or equivalent ops. No ops to construct any kind
11276 of runtime scope are included by virtue of it being a block.
11278 If an error occurs in parsing or compilation, in most cases a valid op
11279 tree (most likely null) is returned anyway. The error is reflected in
11280 the parser state, normally resulting in a single exception at the top
11281 level of parsing which covers all the compilation errors that occurred.
11282 Some compilation errors, however, will throw an exception immediately.
11284 The I<flags> parameter is reserved for future use, and must always
11291 Perl_parse_block(pTHX_ U32 flags)
11294 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11295 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11299 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11301 Parse a single unadorned Perl statement. This may be a normal imperative
11302 statement or a declaration that has compile-time effect. It does not
11303 include any label or other affixture. It is up to the caller to ensure
11304 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11305 reflect the source of the code to be parsed and the lexical context for
11308 The op tree representing the statement is returned. This may be a
11309 null pointer if the statement is null, for example if it was actually
11310 a subroutine definition (which has compile-time side effects). If not
11311 null, it will be ops directly implementing the statement, suitable to
11312 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11313 equivalent op (except for those embedded in a scope contained entirely
11314 within the statement).
11316 If an error occurs in parsing or compilation, in most cases a valid op
11317 tree (most likely null) is returned anyway. The error is reflected in
11318 the parser state, normally resulting in a single exception at the top
11319 level of parsing which covers all the compilation errors that occurred.
11320 Some compilation errors, however, will throw an exception immediately.
11322 The I<flags> parameter is reserved for future use, and must always
11329 Perl_parse_barestmt(pTHX_ U32 flags)
11332 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11333 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11337 =for apidoc Amx|SV *|parse_label|U32 flags
11339 Parse a single label, possibly optional, of the type that may prefix a
11340 Perl statement. It is up to the caller to ensure that the dynamic parser
11341 state (L</PL_parser> et al) is correctly set to reflect the source of
11342 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11343 label is optional, otherwise it is mandatory.
11345 The name of the label is returned in the form of a fresh scalar. If an
11346 optional label is absent, a null pointer is returned.
11348 If an error occurs in parsing, which can only occur if the label is
11349 mandatory, a valid label is returned anyway. The error is reflected in
11350 the parser state, normally resulting in a single exception at the top
11351 level of parsing which covers all the compilation errors that occurred.
11357 Perl_parse_label(pTHX_ U32 flags)
11359 if (flags & ~PARSE_OPTIONAL)
11360 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11361 if (PL_lex_state == LEX_KNOWNEXT) {
11362 PL_parser->yychar = yylex();
11363 if (PL_parser->yychar == LABEL) {
11364 char * const lpv = pl_yylval.pval;
11365 STRLEN llen = strlen(lpv);
11366 PL_parser->yychar = YYEMPTY;
11367 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11374 STRLEN wlen, bufptr_pos;
11377 if (!isIDFIRST_lazy_if(s, UTF))
11379 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11380 if (word_takes_any_delimeter(s, wlen))
11382 bufptr_pos = s - SvPVX(PL_linestr);
11384 lex_read_space(LEX_KEEP_PREVIOUS);
11386 s = SvPVX(PL_linestr) + bufptr_pos;
11387 if (t[0] == ':' && t[1] != ':') {
11388 PL_oldoldbufptr = PL_oldbufptr;
11391 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11395 if (flags & PARSE_OPTIONAL) {
11398 qerror(Perl_mess(aTHX_ "Parse error"));
11399 return newSVpvs("x");
11406 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11408 Parse a single complete Perl statement. This may be a normal imperative
11409 statement or a declaration that has compile-time effect, and may include
11410 optional labels. It is up to the caller to ensure that the dynamic
11411 parser state (L</PL_parser> et al) is correctly set to reflect the source
11412 of the code to be parsed and the lexical context for the statement.
11414 The op tree representing the statement is returned. This may be a
11415 null pointer if the statement is null, for example if it was actually
11416 a subroutine definition (which has compile-time side effects). If not
11417 null, it will be the result of a L</newSTATEOP> call, normally including
11418 a C<nextstate> or equivalent op.
11420 If an error occurs in parsing or compilation, in most cases a valid op
11421 tree (most likely null) is returned anyway. The error is reflected in
11422 the parser state, normally resulting in a single exception at the top
11423 level of parsing which covers all the compilation errors that occurred.
11424 Some compilation errors, however, will throw an exception immediately.
11426 The I<flags> parameter is reserved for future use, and must always
11433 Perl_parse_fullstmt(pTHX_ U32 flags)
11436 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11437 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11441 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11443 Parse a sequence of zero or more Perl statements. These may be normal
11444 imperative statements, including optional labels, or declarations
11445 that have compile-time effect, or any mixture thereof. The statement
11446 sequence ends when a closing brace or end-of-file is encountered in a
11447 place where a new statement could have validly started. It is up to
11448 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11449 is correctly set to reflect the source of the code to be parsed and the
11450 lexical context for the statements.
11452 The op tree representing the statement sequence is returned. This may
11453 be a null pointer if the statements were all null, for example if there
11454 were no statements or if there were only subroutine definitions (which
11455 have compile-time side effects). If not null, it will be a C<lineseq>
11456 list, normally including C<nextstate> or equivalent ops.
11458 If an error occurs in parsing or compilation, in most cases a valid op
11459 tree is returned anyway. The error is reflected in the parser state,
11460 normally resulting in a single exception at the top level of parsing
11461 which covers all the compilation errors that occurred. Some compilation
11462 errors, however, will throw an exception immediately.
11464 The I<flags> parameter is reserved for future use, and must always
11471 Perl_parse_stmtseq(pTHX_ U32 flags)
11476 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11477 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11478 c = lex_peek_unichar(0);
11479 if (c != -1 && c != /*{*/'}')
11480 qerror(Perl_mess(aTHX_ "Parse error"));
11484 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11486 S_lex_token_boundary(pTHX)
11488 PL_oldoldbufptr = PL_oldbufptr;
11489 PL_oldbufptr = PL_bufptr;
11492 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11494 S_parse_opt_lexvar(pTHX)
11499 lex_token_boundary();
11500 sigil = lex_read_unichar(0);
11501 if (lex_peek_unichar(0) == '#') {
11502 qerror(Perl_mess(aTHX_ "Parse error"));
11506 c = lex_peek_unichar(0);
11507 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11510 d = PL_tokenbuf + 1;
11511 PL_tokenbuf[0] = (char)sigil;
11512 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11514 if (d == PL_tokenbuf+1)
11516 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11517 OPf_MOD | (OPpLVAL_INTRO<<8));
11518 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11523 Perl_parse_subsignature(pTHX)
11526 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11527 OP *initops = NULL;
11529 c = lex_peek_unichar(0);
11530 while (c != /*(*/')') {
11534 if (prev_type == 2)
11535 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11536 var = parse_opt_lexvar();
11538 newBINOP(OP_AELEM, 0,
11539 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11541 newSVOP(OP_CONST, 0, newSViv(pos))) :
11544 c = lex_peek_unichar(0);
11546 lex_token_boundary();
11547 lex_read_unichar(0);
11549 c = lex_peek_unichar(0);
11550 if (c == ',' || c == /*(*/')') {
11552 qerror(Perl_mess(aTHX_ "Optional parameter "
11553 "lacks default expression"));
11555 OP *defexpr = parse_termexpr(0);
11556 if (defexpr->op_type == OP_UNDEF &&
11557 !(defexpr->op_flags & OPf_KIDS)) {
11562 scalar(newUNOP(OP_RV2AV, 0,
11563 newGVOP(OP_GV, 0, PL_defgv))),
11564 newSVOP(OP_CONST, 0, newSViv(pos+1)));
11566 newCONDOP(0, ifop, expr, defexpr) :
11567 newLOGOP(OP_OR, 0, ifop, defexpr);
11572 if (prev_type == 1)
11573 qerror(Perl_mess(aTHX_ "Mandatory parameter "
11574 "follows optional parameter"));
11576 min_arity = pos + 1;
11578 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11580 initops = op_append_list(OP_LINESEQ, initops,
11581 newSTATEOP(0, NULL, expr));
11587 if (prev_type == 2)
11588 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11589 var = parse_opt_lexvar();
11591 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11592 newBINOP(OP_BIT_AND, 0,
11593 scalar(newUNOP(OP_RV2AV, 0,
11594 newGVOP(OP_GV, 0, PL_defgv))),
11595 newSVOP(OP_CONST, 0, newSViv(1))),
11596 op_convert_list(OP_DIE, 0,
11597 op_convert_list(OP_SPRINTF, 0,
11598 op_append_list(OP_LIST,
11599 newSVOP(OP_CONST, 0,
11600 newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
11602 op_append_list(OP_LIST,
11603 newSVOP(OP_CONST, 0, newSViv(1)),
11604 newSVOP(OP_CONST, 0, newSViv(2))),
11605 newOP(OP_CALLER, 0))))));
11606 if (pos != min_arity)
11607 chkop = newLOGOP(OP_AND, 0,
11609 scalar(newUNOP(OP_RV2AV, 0,
11610 newGVOP(OP_GV, 0, PL_defgv))),
11611 newSVOP(OP_CONST, 0, newSViv(pos))),
11613 initops = op_append_list(OP_LINESEQ,
11614 newSTATEOP(0, NULL, chkop),
11619 op_prepend_elem(OP_ASLICE,
11620 newOP(OP_PUSHMARK, 0),
11621 newLISTOP(OP_ASLICE, 0,
11623 newSVOP(OP_CONST, 0, newSViv(pos)),
11624 newUNOP(OP_AV2ARYLEN, 0,
11625 ref(newUNOP(OP_RV2AV, 0,
11626 newGVOP(OP_GV, 0, PL_defgv)),
11628 ref(newUNOP(OP_RV2AV, 0,
11629 newGVOP(OP_GV, 0, PL_defgv)),
11631 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11632 initops = op_append_list(OP_LINESEQ, initops,
11633 newSTATEOP(0, NULL,
11634 newASSIGNOP(OPf_STACKED, var, 0, slice)));
11641 qerror(Perl_mess(aTHX_ "Parse error"));
11645 c = lex_peek_unichar(0);
11647 case /*(*/')': break;
11650 lex_token_boundary();
11651 lex_read_unichar(0);
11653 c = lex_peek_unichar(0);
11654 } while (c == ',');
11660 if (min_arity != 0) {
11661 initops = op_append_list(OP_LINESEQ,
11662 newSTATEOP(0, NULL,
11665 scalar(newUNOP(OP_RV2AV, 0,
11666 newGVOP(OP_GV, 0, PL_defgv))),
11667 newSVOP(OP_CONST, 0, newSViv(min_arity))),
11668 op_convert_list(OP_DIE, 0,
11669 op_convert_list(OP_SPRINTF, 0,
11670 op_append_list(OP_LIST,
11671 newSVOP(OP_CONST, 0,
11672 newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
11674 op_append_list(OP_LIST,
11675 newSVOP(OP_CONST, 0, newSViv(1)),
11676 newSVOP(OP_CONST, 0, newSViv(2))),
11677 newOP(OP_CALLER, 0))))))),
11680 if (max_arity != -1) {
11681 initops = op_append_list(OP_LINESEQ,
11682 newSTATEOP(0, NULL,
11685 scalar(newUNOP(OP_RV2AV, 0,
11686 newGVOP(OP_GV, 0, PL_defgv))),
11687 newSVOP(OP_CONST, 0, newSViv(max_arity))),
11688 op_convert_list(OP_DIE, 0,
11689 op_convert_list(OP_SPRINTF, 0,
11690 op_append_list(OP_LIST,
11691 newSVOP(OP_CONST, 0,
11692 newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
11694 op_append_list(OP_LIST,
11695 newSVOP(OP_CONST, 0, newSViv(1)),
11696 newSVOP(OP_CONST, 0, newSViv(2))),
11697 newOP(OP_CALLER, 0))))))),
11705 * c-indentation-style: bsd
11706 * c-basic-offset: 4
11707 * indent-tabs-mode: nil
11710 * ex: set ts=8 sts=4 sw=4 et: