3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_inline.h"
42 #include "invlist_inline.h"
44 #define new_constant(a,b,c,d,e,f,g, h) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_dojoin (PL_parser->lex_dojoin)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_preambled (PL_parser->preambled)
68 #define PL_linestr (PL_parser->linestr)
69 #define PL_expect (PL_parser->expect)
70 #define PL_copline (PL_parser->copline)
71 #define PL_bufptr (PL_parser->bufptr)
72 #define PL_oldbufptr (PL_parser->oldbufptr)
73 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
74 #define PL_linestart (PL_parser->linestart)
75 #define PL_bufend (PL_parser->bufend)
76 #define PL_last_uni (PL_parser->last_uni)
77 #define PL_last_lop (PL_parser->last_lop)
78 #define PL_last_lop_op (PL_parser->last_lop_op)
79 #define PL_lex_state (PL_parser->lex_state)
80 #define PL_rsfp (PL_parser->rsfp)
81 #define PL_rsfp_filters (PL_parser->rsfp_filters)
82 #define PL_in_my (PL_parser->in_my)
83 #define PL_in_my_stash (PL_parser->in_my_stash)
84 #define PL_tokenbuf (PL_parser->tokenbuf)
85 #define PL_multi_end (PL_parser->multi_end)
86 #define PL_error_count (PL_parser->error_count)
88 # define PL_nexttoke (PL_parser->nexttoke)
89 # define PL_nexttype (PL_parser->nexttype)
90 # define PL_nextval (PL_parser->nextval)
93 #define SvEVALED(sv) \
94 (SvTYPE(sv) >= SVt_PVNV \
95 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
97 static const char* const ident_too_long = "Identifier too long";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
152 static const char* const lex_state_names[] = {
167 #include "keywords.h"
169 /* CLINE is a macro that ensures PL_copline has a sane value */
171 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
174 * Convenience functions to return different tokens and prime the
175 * lexer for the next token. They all take an argument.
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
185 * LOOPX : loop exiting command (goto, last, dump, etc)
186 * FTST : file test operator
187 * FUN0 : zero-argument function
188 * FUN0OP : zero-argument function, with its op created in this file
189 * FUN1 : not used, except for not, which isn't a UNIOP
190 * BOop : bitwise or or xor
192 * BCop : bitwise complement
193 * SHop : shift operator
194 * PWop : power operator
195 * PMop : pattern-matching operator
196 * Aop : addition-level operator
197 * AopNOASSIGN : addition-level operator that is never part of .=
198 * Mop : multiplication-level operator
199 * Eop : equality-testing operator
200 * Rop : relational operator <= != gt
202 * Also see LOP and lop() below.
205 #ifdef DEBUGGING /* Serve -DT. */
206 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
208 # define REPORT(retval) (retval)
211 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
212 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
213 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
214 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
215 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
216 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
217 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
218 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
219 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
221 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
223 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
224 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
225 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
226 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
227 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
228 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
229 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
231 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
232 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
233 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
234 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
235 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
236 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
237 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
238 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
240 /* This bit of chicanery makes a unary function followed by
241 * a parenthesis into a function with one argument, highest precedence.
242 * The UNIDOR macro is for unary functions that can be followed by the //
243 * operator (such as C<shift // 0>).
245 #define UNI3(f,x,have_x) { \
246 pl_yylval.ival = f; \
247 if (have_x) PL_expect = x; \
249 PL_last_uni = PL_oldbufptr; \
250 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
252 return REPORT( (int)FUNC1 ); \
254 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
256 #define UNI(f) UNI3(f,XTERM,1)
257 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
258 #define UNIPROTO(f,optional) { \
259 if (optional) PL_last_uni = PL_oldbufptr; \
263 #define UNIBRACK(f) UNI3(f,0,0)
265 /* grandfather return to old style */
268 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
269 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
270 pl_yylval.ival = (f); \
276 #define COPLINE_INC_WITH_HERELINES \
278 CopLINE_inc(PL_curcop); \
279 if (PL_parser->herelines) \
280 CopLINE(PL_curcop) += PL_parser->herelines, \
281 PL_parser->herelines = 0; \
283 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
284 * is no sublex_push to follow. */
285 #define COPLINE_SET_FROM_MULTI_END \
287 CopLINE_set(PL_curcop, PL_multi_end); \
288 if (PL_multi_end != PL_multi_start) \
289 PL_parser->herelines = 0; \
295 /* how to interpret the pl_yylval associated with the token */
299 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
304 static struct debug_tokens {
306 enum token_type type;
308 } const debug_tokens[] =
310 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
311 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
312 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
313 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
314 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
315 { ARROW, TOKENTYPE_NONE, "ARROW" },
316 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
317 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
318 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
319 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
320 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
321 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
322 { DO, TOKENTYPE_NONE, "DO" },
323 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
324 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
325 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
326 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
327 { ELSE, TOKENTYPE_NONE, "ELSE" },
328 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
329 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
330 { FOR, TOKENTYPE_IVAL, "FOR" },
331 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
332 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
333 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
334 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
335 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
336 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
337 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
338 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
339 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
340 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
341 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
342 { IF, TOKENTYPE_IVAL, "IF" },
343 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
344 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
345 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
346 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
347 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
348 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
349 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
350 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
351 { MY, TOKENTYPE_IVAL, "MY" },
352 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
353 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
354 { OROP, TOKENTYPE_IVAL, "OROP" },
355 { OROR, TOKENTYPE_NONE, "OROR" },
356 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
357 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
358 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
359 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
360 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
361 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
362 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
363 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
364 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
365 { PREINC, TOKENTYPE_NONE, "PREINC" },
366 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
367 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
368 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
369 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
370 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
371 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
372 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
375 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
376 { THING, TOKENTYPE_OPVAL, "THING" },
377 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
378 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
379 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
380 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
381 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
382 { USE, TOKENTYPE_IVAL, "USE" },
383 { WHEN, TOKENTYPE_IVAL, "WHEN" },
384 { WHILE, TOKENTYPE_IVAL, "WHILE" },
385 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
386 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
387 { 0, TOKENTYPE_NONE, NULL }
390 /* dump the returned token in rv, plus any optional arg in pl_yylval */
393 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
395 PERL_ARGS_ASSERT_TOKEREPORT;
398 const char *name = NULL;
399 enum token_type type = TOKENTYPE_NONE;
400 const struct debug_tokens *p;
401 SV* const report = newSVpvs("<== ");
403 for (p = debug_tokens; p->token; p++) {
404 if (p->token == (int)rv) {
411 Perl_sv_catpv(aTHX_ report, name);
412 else if (isGRAPH(rv))
414 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
416 sv_catpvs(report, " (pending identifier)");
419 sv_catpvs(report, "EOF");
421 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
426 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
428 case TOKENTYPE_OPNUM:
429 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
430 PL_op_name[lvalp->ival]);
433 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
435 case TOKENTYPE_OPVAL:
437 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
438 PL_op_name[lvalp->opval->op_type]);
439 if (lvalp->opval->op_type == OP_CONST) {
440 Perl_sv_catpvf(aTHX_ report, " %s",
441 SvPEEK(cSVOPx_sv(lvalp->opval)));
446 sv_catpvs(report, "(opval=null)");
449 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
455 /* print the buffer with suitable escapes */
458 S_printbuf(pTHX_ const char *const fmt, const char *const s)
460 SV* const tmp = newSVpvs("");
462 PERL_ARGS_ASSERT_PRINTBUF;
464 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
465 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
466 GCC_DIAG_RESTORE_STMT;
475 * This subroutine looks for an '=' next to the operator that has just been
476 * parsed and turns it into an ASSIGNOP if it finds one.
480 S_ao(pTHX_ int toketype)
482 if (*PL_bufptr == '=') {
484 if (toketype == ANDAND)
485 pl_yylval.ival = OP_ANDASSIGN;
486 else if (toketype == OROR)
487 pl_yylval.ival = OP_ORASSIGN;
488 else if (toketype == DORDOR)
489 pl_yylval.ival = OP_DORASSIGN;
492 return REPORT(toketype);
497 * When Perl expects an operator and finds something else, no_op
498 * prints the warning. It always prints "<something> found where
499 * operator expected. It prints "Missing semicolon on previous line?"
500 * if the surprise occurs at the start of the line. "do you need to
501 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502 * where the compiler doesn't know if foo is a method call or a function.
503 * It prints "Missing operator before end of line" if there's nothing
504 * after the missing operator, or "... before <...>" if there is something
505 * after the missing operator.
507 * PL_bufptr is expected to point to the start of the thing that was found,
508 * and s after the next token or partial token.
512 S_no_op(pTHX_ const char *const what, char *s)
514 char * const oldbp = PL_bufptr;
515 const bool is_first = (PL_oldbufptr == PL_linestart);
517 PERL_ARGS_ASSERT_NO_OP;
523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
524 if (ckWARN_d(WARN_SYNTAX)) {
526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 "\t(Missing semicolon on previous line?)\n");
528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
533 for (t = PL_oldoldbufptr;
534 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
535 t += UTF ? UTF8SKIP(t) : 1)
539 if (t < PL_bufptr && isSPACE(*t))
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
541 "\t(Do you need to predeclare %" UTF8f "?)\n",
542 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Missing operator before %" UTF8f "?)\n",
548 UTF8fARG(UTF, s - oldbp, oldbp));
556 * Complain about missing quote/regexp/heredoc terminator.
557 * If it's called with NULL then it cauterizes the line buffer.
558 * If we're in a delimited string and the delimiter is a control
559 * character, it's reformatted into a two-char sequence like ^C.
564 S_missingterm(pTHX_ char *s, STRLEN len)
566 char tmpbuf[UTF8_MAXBYTES + 1];
571 char * const nl = (char *) my_memrchr(s, '\n', len);
578 else if (PL_multi_close < 32) {
580 tmpbuf[1] = (char)toCTRL(PL_multi_close);
586 if (LIKELY(PL_multi_close < 256)) {
587 *tmpbuf = (char)PL_multi_close;
592 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
599 q = memchr(s, '"', len) ? '\'' : '"';
600 sv = sv_2mortal(newSVpvn(s, len));
603 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
604 " anywhere before EOF", q, SVfARG(sv), q);
610 * Check whether the named feature is enabled.
613 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
615 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
617 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
619 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
621 if (namelen > MAX_FEATURE_LEN)
623 memcpy(&he_name[8], name, namelen);
625 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
626 REFCOUNTED_HE_EXISTS));
630 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
631 * utf16-to-utf8-reversed.
634 #ifdef PERL_CR_FILTER
638 const char *s = SvPVX_const(sv);
639 const char * const e = s + SvCUR(sv);
641 PERL_ARGS_ASSERT_STRIP_RETURN;
643 /* outer loop optimized to do nothing if there are no CR-LFs */
645 if (*s++ == '\r' && *s == '\n') {
646 /* hit a CR-LF, need to copy the rest */
650 if (*s == '\r' && s[1] == '\n')
661 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
663 const I32 count = FILTER_READ(idx+1, sv, maxlen);
664 if (count > 0 && !maxlen)
671 =for apidoc lex_start
673 Creates and initialises a new lexer/parser state object, supplying
674 a context in which to lex and parse from a new source of Perl code.
675 A pointer to the new state object is placed in L</PL_parser>. An entry
676 is made on the save stack so that upon unwinding, the new state object
677 will be destroyed and the former value of L</PL_parser> will be restored.
678 Nothing else need be done to clean up the parsing context.
680 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
681 non-null, provides a string (in SV form) containing code to be parsed.
682 A copy of the string is made, so subsequent modification of C<line>
683 does not affect parsing. C<rsfp>, if non-null, provides an input stream
684 from which code will be read to be parsed. If both are non-null, the
685 code in C<line> comes first and must consist of complete lines of input,
686 and C<rsfp> supplies the remainder of the source.
688 The C<flags> parameter is reserved for future use. Currently it is only
689 used by perl internally, so extensions should always pass zero.
694 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
695 can share filters with the current parser.
696 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
697 caller, hence isn't owned by the parser, so shouldn't be closed on parser
698 destruction. This is used to handle the case of defaulting to reading the
699 script from the standard input because no filename was given on the command
700 line (without getting confused by situation where STDIN has been closed, so
701 the script handle is opened on fd 0) */
704 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
706 const char *s = NULL;
707 yy_parser *parser, *oparser;
709 if (flags && flags & ~LEX_START_FLAGS)
710 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
712 /* create and initialise a parser */
714 Newxz(parser, 1, yy_parser);
715 parser->old_parser = oparser = PL_parser;
718 parser->stack = NULL;
719 parser->stack_max1 = NULL;
722 /* on scope exit, free this parser and restore any outer one */
724 parser->saved_curcop = PL_curcop;
726 /* initialise lexer state */
728 parser->nexttoke = 0;
729 parser->error_count = oparser ? oparser->error_count : 0;
730 parser->copline = parser->preambling = NOLINE;
731 parser->lex_state = LEX_NORMAL;
732 parser->expect = XSTATE;
734 parser->recheck_utf8_validity = TRUE;
735 parser->rsfp_filters =
736 !(flags & LEX_START_SAME_FILTER) || !oparser
738 : MUTABLE_AV(SvREFCNT_inc(
739 oparser->rsfp_filters
740 ? oparser->rsfp_filters
741 : (oparser->rsfp_filters = newAV())
744 Newx(parser->lex_brackstack, 120, char);
745 Newx(parser->lex_casestack, 12, char);
746 *parser->lex_casestack = '\0';
747 Newxz(parser->lex_shared, 1, LEXSHARED);
751 const U8* first_bad_char_loc;
753 s = SvPV_const(line, len);
756 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
758 &first_bad_char_loc)))
760 _force_out_malformed_utf8_message(first_bad_char_loc,
761 (U8 *) s + SvCUR(line),
763 1 /* 1 means die */ );
764 NOT_REACHED; /* NOTREACHED */
767 parser->linestr = flags & LEX_START_COPIED
768 ? SvREFCNT_inc_simple_NN(line)
769 : newSVpvn_flags(s, len, SvUTF8(line));
771 sv_catpvs(parser->linestr, "\n;");
773 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
776 parser->oldoldbufptr =
779 parser->linestart = SvPVX(parser->linestr);
780 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
781 parser->last_lop = parser->last_uni = NULL;
783 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
784 |LEX_DONT_CLOSE_RSFP));
785 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
786 |LEX_DONT_CLOSE_RSFP));
788 parser->in_pod = parser->filtered = 0;
792 /* delete a parser object */
795 Perl_parser_free(pTHX_ const yy_parser *parser)
797 PERL_ARGS_ASSERT_PARSER_FREE;
799 PL_curcop = parser->saved_curcop;
800 SvREFCNT_dec(parser->linestr);
802 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
803 PerlIO_clearerr(parser->rsfp);
804 else if (parser->rsfp && (!parser->old_parser
805 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
806 PerlIO_close(parser->rsfp);
807 SvREFCNT_dec(parser->rsfp_filters);
808 SvREFCNT_dec(parser->lex_stuff);
809 SvREFCNT_dec(parser->lex_sub_repl);
811 Safefree(parser->lex_brackstack);
812 Safefree(parser->lex_casestack);
813 Safefree(parser->lex_shared);
814 PL_parser = parser->old_parser;
819 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
821 I32 nexttoke = parser->nexttoke;
822 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
824 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
825 && parser->nextval[nexttoke].opval
826 && parser->nextval[nexttoke].opval->op_slabbed
827 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
828 op_free(parser->nextval[nexttoke].opval);
829 parser->nextval[nexttoke].opval = NULL;
836 =for apidoc AmxUN|SV *|PL_parser-E<gt>linestr
838 Buffer scalar containing the chunk currently under consideration of the
839 text currently being lexed. This is always a plain string scalar (for
840 which C<SvPOK> is true). It is not intended to be used as a scalar by
841 normal scalar means; instead refer to the buffer directly by the pointer
842 variables described below.
844 The lexer maintains various C<char*> pointers to things in the
845 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
846 reallocated, all of these pointers must be updated. Don't attempt to
847 do this manually, but rather use L</lex_grow_linestr> if you need to
848 reallocate the buffer.
850 The content of the text chunk in the buffer is commonly exactly one
851 complete line of input, up to and including a newline terminator,
852 but there are situations where it is otherwise. The octets of the
853 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
854 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
855 flag on this scalar, which may disagree with it.
857 For direct examination of the buffer, the variable
858 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
859 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
860 of these pointers is usually preferable to examination of the scalar
861 through normal scalar means.
863 =for apidoc AmxUN|char *|PL_parser-E<gt>bufend
865 Direct pointer to the end of the chunk of text currently being lexed, the
866 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
867 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
868 always located at the end of the buffer, and does not count as part of
869 the buffer's contents.
871 =for apidoc AmxUN|char *|PL_parser-E<gt>bufptr
873 Points to the current position of lexing inside the lexer buffer.
874 Characters around this point may be freely examined, within
875 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
876 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
877 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
879 Lexing code (whether in the Perl core or not) moves this pointer past
880 the characters that it consumes. It is also expected to perform some
881 bookkeeping whenever a newline character is consumed. This movement
882 can be more conveniently performed by the function L</lex_read_to>,
883 which handles newlines appropriately.
885 Interpretation of the buffer's octets can be abstracted out by
886 using the slightly higher-level functions L</lex_peek_unichar> and
887 L</lex_read_unichar>.
889 =for apidoc AmxUN|char *|PL_parser-E<gt>linestart
891 Points to the start of the current line inside the lexer buffer.
892 This is useful for indicating at which column an error occurred, and
893 not much else. This must be updated by any lexing code that consumes
894 a newline; the function L</lex_read_to> handles this detail.
900 =for apidoc lex_bufutf8
902 Indicates whether the octets in the lexer buffer
903 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
904 of Unicode characters. If not, they should be interpreted as Latin-1
905 characters. This is analogous to the C<SvUTF8> flag for scalars.
907 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
908 contains valid UTF-8. Lexing code must be robust in the face of invalid
911 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
912 is significant, but not the whole story regarding the input character
913 encoding. Normally, when a file is being read, the scalar contains octets
914 and its C<SvUTF8> flag is off, but the octets should be interpreted as
915 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
916 however, the scalar may have the C<SvUTF8> flag on, and in this case its
917 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
918 is in effect. This logic may change in the future; use this function
919 instead of implementing the logic yourself.
925 Perl_lex_bufutf8(pTHX)
931 =for apidoc lex_grow_linestr
933 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
934 at least C<len> octets (including terminating C<NUL>). Returns a
935 pointer to the reallocated buffer. This is necessary before making
936 any direct modification of the buffer that would increase its length.
937 L</lex_stuff_pvn> provides a more convenient way to insert text into
940 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
941 this function updates all of the lexer's variables that point directly
948 Perl_lex_grow_linestr(pTHX_ STRLEN len)
952 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
953 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
956 linestr = PL_parser->linestr;
957 buf = SvPVX(linestr);
958 if (len <= SvLEN(linestr))
961 /* Is the lex_shared linestr SV the same as the current linestr SV?
962 * Only in this case does re_eval_start need adjusting, since it
963 * points within lex_shared->ls_linestr's buffer */
964 current = ( !PL_parser->lex_shared->ls_linestr
965 || linestr == PL_parser->lex_shared->ls_linestr);
967 bufend_pos = PL_parser->bufend - buf;
968 bufptr_pos = PL_parser->bufptr - buf;
969 oldbufptr_pos = PL_parser->oldbufptr - buf;
970 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
971 linestart_pos = PL_parser->linestart - buf;
972 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
973 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
974 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
975 PL_parser->lex_shared->re_eval_start - buf : 0;
977 buf = sv_grow(linestr, len);
979 PL_parser->bufend = buf + bufend_pos;
980 PL_parser->bufptr = buf + bufptr_pos;
981 PL_parser->oldbufptr = buf + oldbufptr_pos;
982 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
983 PL_parser->linestart = buf + linestart_pos;
984 if (PL_parser->last_uni)
985 PL_parser->last_uni = buf + last_uni_pos;
986 if (PL_parser->last_lop)
987 PL_parser->last_lop = buf + last_lop_pos;
988 if (current && PL_parser->lex_shared->re_eval_start)
989 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
994 =for apidoc lex_stuff_pvn
996 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
997 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
998 reallocating the buffer if necessary. This means that lexing code that
999 runs later will see the characters as if they had appeared in the input.
1000 It is not recommended to do this as part of normal parsing, and most
1001 uses of this facility run the risk of the inserted characters being
1002 interpreted in an unintended manner.
1004 The string to be inserted is represented by C<len> octets starting
1005 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1006 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1007 The characters are recoded for the lexer buffer, according to how the
1008 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1009 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1010 function is more convenient.
1016 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1020 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1021 if (flags & ~(LEX_STUFF_UTF8))
1022 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1024 if (flags & LEX_STUFF_UTF8) {
1027 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1029 const char *p, *e = pv+len;;
1032 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033 bufptr = PL_parser->bufptr;
1034 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035 SvCUR_set(PL_parser->linestr,
1036 SvCUR(PL_parser->linestr) + len+highhalf);
1037 PL_parser->bufend += len+highhalf;
1038 for (p = pv; p != e; p++) {
1039 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1043 if (flags & LEX_STUFF_UTF8) {
1044 STRLEN highhalf = 0;
1045 const char *p, *e = pv+len;
1046 for (p = pv; p != e; p++) {
1048 if (UTF8_IS_ABOVE_LATIN1(c)) {
1049 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1050 "non-Latin-1 character into Latin-1 input");
1051 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1054 } else assert(UTF8_IS_INVARIANT(c));
1058 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1059 bufptr = PL_parser->bufptr;
1060 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1061 SvCUR_set(PL_parser->linestr,
1062 SvCUR(PL_parser->linestr) + len-highhalf);
1063 PL_parser->bufend += len-highhalf;
1066 if (UTF8_IS_INVARIANT(*p)) {
1072 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1078 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1079 bufptr = PL_parser->bufptr;
1080 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1081 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1082 PL_parser->bufend += len;
1083 Copy(pv, bufptr, len, char);
1089 =for apidoc lex_stuff_pv
1091 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1092 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1093 reallocating the buffer if necessary. This means that lexing code that
1094 runs later will see the characters as if they had appeared in the input.
1095 It is not recommended to do this as part of normal parsing, and most
1096 uses of this facility run the risk of the inserted characters being
1097 interpreted in an unintended manner.
1099 The string to be inserted is represented by octets starting at C<pv>
1100 and continuing to the first nul. These octets are interpreted as either
1101 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1102 in C<flags>. The characters are recoded for the lexer buffer, according
1103 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1104 If it is not convenient to nul-terminate a string to be inserted, the
1105 L</lex_stuff_pvn> function is more appropriate.
1111 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1113 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1114 lex_stuff_pvn(pv, strlen(pv), flags);
1118 =for apidoc lex_stuff_sv
1120 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1121 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1122 reallocating the buffer if necessary. This means that lexing code that
1123 runs later will see the characters as if they had appeared in the input.
1124 It is not recommended to do this as part of normal parsing, and most
1125 uses of this facility run the risk of the inserted characters being
1126 interpreted in an unintended manner.
1128 The string to be inserted is the string value of C<sv>. The characters
1129 are recoded for the lexer buffer, according to how the buffer is currently
1130 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1131 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1132 need to construct a scalar.
1138 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1142 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1144 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1146 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1150 =for apidoc lex_unstuff
1152 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1153 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1154 This hides the discarded text from any lexing code that runs later,
1155 as if the text had never appeared.
1157 This is not the normal way to consume lexed text. For that, use
1164 Perl_lex_unstuff(pTHX_ char *ptr)
1168 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1169 buf = PL_parser->bufptr;
1171 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1174 bufend = PL_parser->bufend;
1176 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1177 unstuff_len = ptr - buf;
1178 Move(ptr, buf, bufend+1-ptr, char);
1179 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1180 PL_parser->bufend = bufend - unstuff_len;
1184 =for apidoc lex_read_to
1186 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1187 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1188 performing the correct bookkeeping whenever a newline character is passed.
1189 This is the normal way to consume lexed text.
1191 Interpretation of the buffer's octets can be abstracted out by
1192 using the slightly higher-level functions L</lex_peek_unichar> and
1193 L</lex_read_unichar>.
1199 Perl_lex_read_to(pTHX_ char *ptr)
1202 PERL_ARGS_ASSERT_LEX_READ_TO;
1203 s = PL_parser->bufptr;
1204 if (ptr < s || ptr > PL_parser->bufend)
1205 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1206 for (; s != ptr; s++)
1208 COPLINE_INC_WITH_HERELINES;
1209 PL_parser->linestart = s+1;
1211 PL_parser->bufptr = ptr;
1215 =for apidoc lex_discard_to
1217 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1218 up to C<ptr>. The remaining content of the buffer will be moved, and
1219 all pointers into the buffer updated appropriately. C<ptr> must not
1220 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1221 it is not permitted to discard text that has yet to be lexed.
1223 Normally it is not necessarily to do this directly, because it suffices to
1224 use the implicit discarding behaviour of L</lex_next_chunk> and things
1225 based on it. However, if a token stretches across multiple lines,
1226 and the lexing code has kept multiple lines of text in the buffer for
1227 that purpose, then after completion of the token it would be wise to
1228 explicitly discard the now-unneeded earlier lines, to avoid future
1229 multi-line tokens growing the buffer without bound.
1235 Perl_lex_discard_to(pTHX_ char *ptr)
1239 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1240 buf = SvPVX(PL_parser->linestr);
1242 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1245 if (ptr > PL_parser->bufptr)
1246 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1247 discard_len = ptr - buf;
1248 if (PL_parser->oldbufptr < ptr)
1249 PL_parser->oldbufptr = ptr;
1250 if (PL_parser->oldoldbufptr < ptr)
1251 PL_parser->oldoldbufptr = ptr;
1252 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1253 PL_parser->last_uni = NULL;
1254 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1255 PL_parser->last_lop = NULL;
1256 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1257 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1258 PL_parser->bufend -= discard_len;
1259 PL_parser->bufptr -= discard_len;
1260 PL_parser->oldbufptr -= discard_len;
1261 PL_parser->oldoldbufptr -= discard_len;
1262 if (PL_parser->last_uni)
1263 PL_parser->last_uni -= discard_len;
1264 if (PL_parser->last_lop)
1265 PL_parser->last_lop -= discard_len;
1269 Perl_notify_parser_that_changed_to_utf8(pTHX)
1271 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1272 * off to on. At compile time, this has the effect of entering a 'use
1273 * utf8' section. This means that any input was not previously checked for
1274 * UTF-8 (because it was off), but now we do need to check it, or our
1275 * assumptions about the input being sane could be wrong, and we could
1276 * segfault. This routine just sets a flag so that the next time we look
1277 * at the input we do the well-formed UTF-8 check. If we aren't in the
1278 * proper phase, there may not be a parser object, but if there is, setting
1279 * the flag is harmless */
1282 PL_parser->recheck_utf8_validity = TRUE;
1287 =for apidoc lex_next_chunk
1289 Reads in the next chunk of text to be lexed, appending it to
1290 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1291 looked to the end of the current chunk and wants to know more. It is
1292 usual, but not necessary, for lexing to have consumed the entirety of
1293 the current chunk at this time.
1295 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1296 chunk (i.e., the current chunk has been entirely consumed), normally the
1297 current chunk will be discarded at the same time that the new chunk is
1298 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1299 will not be discarded. If the current chunk has not been entirely
1300 consumed, then it will not be discarded regardless of the flag.
1302 Returns true if some new text was added to the buffer, or false if the
1303 buffer has reached the end of the input text.
1308 #define LEX_FAKE_EOF 0x80000000
1309 #define LEX_NO_TERM 0x40000000 /* here-doc */
1312 Perl_lex_next_chunk(pTHX_ U32 flags)
1316 STRLEN old_bufend_pos, new_bufend_pos;
1317 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1318 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1319 bool got_some_for_debugger = 0;
1322 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1323 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1324 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1326 linestr = PL_parser->linestr;
1327 buf = SvPVX(linestr);
1328 if (!(flags & LEX_KEEP_PREVIOUS)
1329 && PL_parser->bufptr == PL_parser->bufend)
1331 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1333 if (PL_parser->last_uni != PL_parser->bufend)
1334 PL_parser->last_uni = NULL;
1335 if (PL_parser->last_lop != PL_parser->bufend)
1336 PL_parser->last_lop = NULL;
1337 last_uni_pos = last_lop_pos = 0;
1339 SvCUR_set(linestr, 0);
1341 old_bufend_pos = PL_parser->bufend - buf;
1342 bufptr_pos = PL_parser->bufptr - buf;
1343 oldbufptr_pos = PL_parser->oldbufptr - buf;
1344 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1345 linestart_pos = PL_parser->linestart - buf;
1346 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1347 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1349 if (flags & LEX_FAKE_EOF) {
1351 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1353 } else if (filter_gets(linestr, old_bufend_pos)) {
1355 got_some_for_debugger = 1;
1356 } else if (flags & LEX_NO_TERM) {
1359 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1362 /* End of real input. Close filehandle (unless it was STDIN),
1363 * then add implicit termination.
1365 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1366 PerlIO_clearerr(PL_parser->rsfp);
1367 else if (PL_parser->rsfp)
1368 (void)PerlIO_close(PL_parser->rsfp);
1369 PL_parser->rsfp = NULL;
1370 PL_parser->in_pod = PL_parser->filtered = 0;
1371 if (!PL_in_eval && PL_minus_p) {
1373 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1374 PL_minus_n = PL_minus_p = 0;
1375 } else if (!PL_in_eval && PL_minus_n) {
1376 sv_catpvs(linestr, /*{*/";}");
1379 sv_catpvs(linestr, ";");
1382 buf = SvPVX(linestr);
1383 new_bufend_pos = SvCUR(linestr);
1384 PL_parser->bufend = buf + new_bufend_pos;
1385 PL_parser->bufptr = buf + bufptr_pos;
1388 const U8* first_bad_char_loc;
1389 if (UNLIKELY(! is_utf8_string_loc(
1390 (U8 *) PL_parser->bufptr,
1391 PL_parser->bufend - PL_parser->bufptr,
1392 &first_bad_char_loc)))
1394 _force_out_malformed_utf8_message(first_bad_char_loc,
1395 (U8 *) PL_parser->bufend,
1397 1 /* 1 means die */ );
1398 NOT_REACHED; /* NOTREACHED */
1402 PL_parser->oldbufptr = buf + oldbufptr_pos;
1403 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1404 PL_parser->linestart = buf + linestart_pos;
1405 if (PL_parser->last_uni)
1406 PL_parser->last_uni = buf + last_uni_pos;
1407 if (PL_parser->last_lop)
1408 PL_parser->last_lop = buf + last_lop_pos;
1409 if (PL_parser->preambling != NOLINE) {
1410 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1411 PL_parser->preambling = NOLINE;
1413 if ( got_some_for_debugger
1414 && PERLDB_LINE_OR_SAVESRC
1415 && PL_curstash != PL_debstash)
1417 /* debugger active and we're not compiling the debugger code,
1418 * so store the line into the debugger's array of lines
1420 update_debugger_info(NULL, buf+old_bufend_pos,
1421 new_bufend_pos-old_bufend_pos);
1427 =for apidoc lex_peek_unichar
1429 Looks ahead one (Unicode) character in the text currently being lexed.
1430 Returns the codepoint (unsigned integer value) of the next character,
1431 or -1 if lexing has reached the end of the input text. To consume the
1432 peeked character, use L</lex_read_unichar>.
1434 If the next character is in (or extends into) the next chunk of input
1435 text, the next chunk will be read in. Normally the current chunk will be
1436 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1437 bit set, then the current chunk will not be discarded.
1439 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1440 is encountered, an exception is generated.
1446 Perl_lex_peek_unichar(pTHX_ U32 flags)
1450 if (flags & ~(LEX_KEEP_PREVIOUS))
1451 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1452 s = PL_parser->bufptr;
1453 bufend = PL_parser->bufend;
1459 if (!lex_next_chunk(flags))
1461 s = PL_parser->bufptr;
1462 bufend = PL_parser->bufend;
1465 if (UTF8_IS_INVARIANT(head))
1467 if (UTF8_IS_START(head)) {
1468 len = UTF8SKIP(&head);
1469 while ((STRLEN)(bufend-s) < len) {
1470 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1472 s = PL_parser->bufptr;
1473 bufend = PL_parser->bufend;
1476 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1477 if (retlen == (STRLEN)-1) {
1478 _force_out_malformed_utf8_message((U8 *) s,
1481 1 /* 1 means die */ );
1482 NOT_REACHED; /* NOTREACHED */
1487 if (!lex_next_chunk(flags))
1489 s = PL_parser->bufptr;
1496 =for apidoc lex_read_unichar
1498 Reads the next (Unicode) character in the text currently being lexed.
1499 Returns the codepoint (unsigned integer value) of the character read,
1500 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1501 if lexing has reached the end of the input text. To non-destructively
1502 examine the next character, use L</lex_peek_unichar> instead.
1504 If the next character is in (or extends into) the next chunk of input
1505 text, the next chunk will be read in. Normally the current chunk will be
1506 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1507 bit set, then the current chunk will not be discarded.
1509 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1510 is encountered, an exception is generated.
1516 Perl_lex_read_unichar(pTHX_ U32 flags)
1519 if (flags & ~(LEX_KEEP_PREVIOUS))
1520 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1521 c = lex_peek_unichar(flags);
1524 COPLINE_INC_WITH_HERELINES;
1526 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1528 ++(PL_parser->bufptr);
1534 =for apidoc lex_read_space
1536 Reads optional spaces, in Perl style, in the text currently being
1537 lexed. The spaces may include ordinary whitespace characters and
1538 Perl-style comments. C<#line> directives are processed if encountered.
1539 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1540 at a non-space character (or the end of the input text).
1542 If spaces extend into the next chunk of input text, the next chunk will
1543 be read in. Normally the current chunk will be discarded at the same
1544 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1545 chunk will not be discarded.
1550 #define LEX_NO_INCLINE 0x40000000
1551 #define LEX_NO_NEXT_CHUNK 0x80000000
1554 Perl_lex_read_space(pTHX_ U32 flags)
1557 const bool can_incline = !(flags & LEX_NO_INCLINE);
1558 bool need_incline = 0;
1559 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1560 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1561 s = PL_parser->bufptr;
1562 bufend = PL_parser->bufend;
1568 } while (!(c == '\n' || (c == 0 && s == bufend)));
1569 } else if (c == '\n') {
1572 PL_parser->linestart = s;
1578 } else if (isSPACE(c)) {
1580 } else if (c == 0 && s == bufend) {
1583 if (flags & LEX_NO_NEXT_CHUNK)
1585 PL_parser->bufptr = s;
1586 l = CopLINE(PL_curcop);
1587 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1588 got_more = lex_next_chunk(flags);
1589 CopLINE_set(PL_curcop, l);
1590 s = PL_parser->bufptr;
1591 bufend = PL_parser->bufend;
1594 if (can_incline && need_incline && PL_parser->rsfp) {
1604 PL_parser->bufptr = s;
1609 =for apidoc validate_proto
1611 This function performs syntax checking on a prototype, C<proto>.
1612 If C<warn> is true, any illegal characters or mismatched brackets
1613 will trigger illegalproto warnings, declaring that they were
1614 detected in the prototype for C<name>.
1616 The return value is C<true> if this is a valid prototype, and
1617 C<false> if it is not, regardless of whether C<warn> was C<true> or
1620 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1627 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1629 STRLEN len, origlen;
1631 bool bad_proto = FALSE;
1632 bool in_brackets = FALSE;
1633 bool after_slash = FALSE;
1634 char greedy_proto = ' ';
1635 bool proto_after_greedy_proto = FALSE;
1636 bool must_be_last = FALSE;
1637 bool underscore = FALSE;
1638 bool bad_proto_after_underscore = FALSE;
1640 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1645 p = SvPV(proto, len);
1647 for (; len--; p++) {
1650 proto_after_greedy_proto = TRUE;
1652 if (!strchr(";@%", *p))
1653 bad_proto_after_underscore = TRUE;
1656 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1663 in_brackets = FALSE;
1664 else if ((*p == '@' || *p == '%')
1668 must_be_last = TRUE;
1677 after_slash = FALSE;
1682 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1685 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1686 origlen, UNI_DISPLAY_ISPRINT)
1687 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1689 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1690 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1691 sv_catpvs(name2, "::");
1692 sv_catsv(name2, (SV *)name);
1696 if (proto_after_greedy_proto)
1697 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1698 "Prototype after '%c' for %" SVf " : %s",
1699 greedy_proto, SVfARG(name), p);
1701 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1702 "Missing ']' in prototype for %" SVf " : %s",
1705 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1706 "Illegal character in prototype for %" SVf " : %s",
1708 if (bad_proto_after_underscore)
1709 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1710 "Illegal character after '_' in prototype for %" SVf " : %s",
1714 return (! (proto_after_greedy_proto || bad_proto) );
1719 * This subroutine has nothing to do with tilting, whether at windmills
1720 * or pinball tables. Its name is short for "increment line". It
1721 * increments the current line number in CopLINE(PL_curcop) and checks
1722 * to see whether the line starts with a comment of the form
1723 * # line 500 "foo.pm"
1724 * If so, it sets the current line number and file to the values in the comment.
1728 S_incline(pTHX_ const char *s, const char *end)
1736 PERL_ARGS_ASSERT_INCLINE;
1740 COPLINE_INC_WITH_HERELINES;
1741 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1742 && s+1 == PL_bufend && *s == ';') {
1743 /* fake newline in string eval */
1744 CopLINE_dec(PL_curcop);
1749 while (SPACE_OR_TAB(*s))
1751 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1752 s += sizeof("line") - 1;
1755 if (SPACE_OR_TAB(*s))
1759 while (SPACE_OR_TAB(*s))
1767 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1769 while (SPACE_OR_TAB(*s))
1771 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1777 while (*t && !isSPACE(*t))
1781 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1783 if (*e != '\n' && *e != '\0')
1784 return; /* false alarm */
1786 if (!grok_atoUV(n, &uv, &e))
1788 line_num = ((line_t)uv) - 1;
1791 const STRLEN len = t - s;
1793 if (!PL_rsfp && !PL_parser->filtered) {
1794 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1795 * to *{"::_<newfilename"} */
1796 /* However, the long form of evals is only turned on by the
1797 debugger - usually they're "(eval %lu)" */
1798 GV * const cfgv = CopFILEGV(PL_curcop);
1801 STRLEN tmplen2 = len;
1805 if (tmplen2 + 2 <= sizeof smallbuf)
1808 Newx(tmpbuf2, tmplen2 + 2, char);
1813 memcpy(tmpbuf2 + 2, s, tmplen2);
1816 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1818 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1819 /* adjust ${"::_<newfilename"} to store the new file name */
1820 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1821 /* The line number may differ. If that is the case,
1822 alias the saved lines that are in the array.
1823 Otherwise alias the whole array. */
1824 if (CopLINE(PL_curcop) == line_num) {
1825 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1826 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1828 else if (GvAV(cfgv)) {
1829 AV * const av = GvAV(cfgv);
1830 const line_t start = CopLINE(PL_curcop)+1;
1831 SSize_t items = AvFILLp(av) - start;
1833 AV * const av2 = GvAVn(gv2);
1834 SV **svp = AvARRAY(av) + start;
1835 Size_t l = line_num+1;
1836 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1837 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1842 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1845 CopFILE_free(PL_curcop);
1846 CopFILE_setn(PL_curcop, s, len);
1848 CopLINE_set(PL_curcop, line_num);
1852 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1854 AV *av = CopFILEAVx(PL_curcop);
1857 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1859 sv = *av_fetch(av, 0, 1);
1860 SvUPGRADE(sv, SVt_PVMG);
1862 if (!SvPOK(sv)) SvPVCLEAR(sv);
1864 sv_catsv(sv, orig_sv);
1866 sv_catpvn(sv, buf, len);
1871 if (PL_parser->preambling == NOLINE)
1872 av_store(av, CopLINE(PL_curcop), sv);
1878 * Called to gobble the appropriate amount and type of whitespace.
1879 * Skips comments as well.
1880 * Returns the next character after the whitespace that is skipped.
1883 * Same thing, but look ahead without incrementing line numbers or
1884 * adjusting PL_linestart.
1887 #define skipspace(s) skipspace_flags(s, 0)
1888 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1891 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1893 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1894 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1895 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1898 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1900 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1901 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1902 LEX_NO_NEXT_CHUNK : 0));
1904 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1905 if (PL_linestart > PL_bufptr)
1906 PL_bufptr = PL_linestart;
1914 * Check the unary operators to ensure there's no ambiguity in how they're
1915 * used. An ambiguous piece of code would be:
1917 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1918 * the +5 is its argument.
1926 if (PL_oldoldbufptr != PL_last_uni)
1928 while (isSPACE(*PL_last_uni))
1931 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1932 s += UTF ? UTF8SKIP(s) : 1;
1933 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1936 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1937 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1938 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1942 * LOP : macro to build a list operator. Its behaviour has been replaced
1943 * with a subroutine, S_lop() for which LOP is just another name.
1946 #define LOP(f,x) return lop(f,x,s)
1950 * Build a list operator (or something that might be one). The rules:
1951 * - if we have a next token, then it's a list operator (no parens) for
1952 * which the next token has already been parsed; e.g.,
1955 * - if the next thing is an opening paren, then it's a function
1956 * - else it's a list operator
1960 S_lop(pTHX_ I32 f, U8 x, char *s)
1962 PERL_ARGS_ASSERT_LOP;
1967 PL_last_lop = PL_oldbufptr;
1968 PL_last_lop_op = (OPCODE)f;
1973 return REPORT(FUNC);
1976 return REPORT(FUNC);
1979 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1980 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1981 return REPORT(LSTOP);
1987 * When the lexer realizes it knows the next token (for instance,
1988 * it is reordering tokens for the parser) then it can call S_force_next
1989 * to know what token to return the next time the lexer is called. Caller
1990 * will need to set PL_nextval[] and possibly PL_expect to ensure
1991 * the lexer handles the token correctly.
1995 S_force_next(pTHX_ I32 type)
1999 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2000 tokereport(type, &NEXTVAL_NEXTTOKE);
2003 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2004 PL_nexttype[PL_nexttoke] = type;
2011 * This subroutine handles postfix deref syntax after the arrow has already
2012 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2013 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2014 * only the first, leaving yylex to find the next.
2018 S_postderef(pTHX_ int const funny, char const next)
2020 assert(funny == DOLSHARP || strchr("$@%&*", funny));
2022 PL_expect = XOPERATOR;
2023 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2024 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2025 PL_lex_state = LEX_INTERPEND;
2027 force_next(POSTJOIN);
2033 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2034 && !PL_lex_brackets)
2036 PL_expect = XOPERATOR;
2045 int yyc = PL_parser->yychar;
2046 if (yyc != YYEMPTY) {
2048 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2049 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2050 PL_lex_allbrackets--;
2052 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2053 } else if (yyc == '('/*)*/) {
2054 PL_lex_allbrackets--;
2059 PL_parser->yychar = YYEMPTY;
2064 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2066 SV * const sv = newSVpvn_utf8(start, len,
2070 && is_utf8_non_invariant_string((const U8*)start, len));
2076 * When the lexer knows the next thing is a word (for instance, it has
2077 * just seen -> and it knows that the next char is a word char, then
2078 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2082 * char *start : buffer position (must be within PL_linestr)
2083 * int token : PL_next* will be this type of bare word
2084 * (e.g., METHOD,BAREWORD)
2085 * int check_keyword : if true, Perl checks to make sure the word isn't
2086 * a keyword (do this if the word is a label, e.g. goto FOO)
2087 * int allow_pack : if true, : characters will also be allowed (require,
2088 * use, etc. do this)
2092 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2097 PERL_ARGS_ASSERT_FORCE_WORD;
2099 start = skipspace(start);
2101 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2102 || (allow_pack && *s == ':' && s[1] == ':') )
2104 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2105 if (check_keyword) {
2106 char *s2 = PL_tokenbuf;
2108 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2109 s2 += sizeof("CORE::") - 1;
2110 len2 -= sizeof("CORE::") - 1;
2112 if (keyword(s2, len2, 0))
2115 if (token == METHOD) {
2120 PL_expect = XOPERATOR;
2123 NEXTVAL_NEXTTOKE.opval
2124 = newSVOP(OP_CONST,0,
2125 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2126 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2134 * Called when the lexer wants $foo *foo &foo etc, but the program
2135 * text only contains the "foo" portion. The first argument is a pointer
2136 * to the "foo", and the second argument is the type symbol to prefix.
2137 * Forces the next token to be a "BAREWORD".
2138 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2142 S_force_ident(pTHX_ const char *s, int kind)
2144 PERL_ARGS_ASSERT_FORCE_IDENT;
2147 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2148 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2149 UTF ? SVf_UTF8 : 0));
2150 NEXTVAL_NEXTTOKE.opval = o;
2151 force_next(BAREWORD);
2153 o->op_private = OPpCONST_ENTERED;
2154 /* XXX see note in pp_entereval() for why we forgo typo
2155 warnings if the symbol must be introduced in an eval.
2157 gv_fetchpvn_flags(s, len,
2158 (PL_in_eval ? GV_ADDMULTI
2159 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2160 kind == '$' ? SVt_PV :
2161 kind == '@' ? SVt_PVAV :
2162 kind == '%' ? SVt_PVHV :
2170 S_force_ident_maybe_lex(pTHX_ char pit)
2172 NEXTVAL_NEXTTOKE.ival = pit;
2177 Perl_str_to_version(pTHX_ SV *sv)
2182 const char *start = SvPV_const(sv,len);
2183 const char * const end = start + len;
2184 const bool utf = cBOOL(SvUTF8(sv));
2186 PERL_ARGS_ASSERT_STR_TO_VERSION;
2188 while (start < end) {
2192 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2197 retval += ((NV)n)/nshift;
2206 * Forces the next token to be a version number.
2207 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2208 * and if "guessing" is TRUE, then no new token is created (and the caller
2209 * must use an alternative parsing method).
2213 S_force_version(pTHX_ char *s, int guessing)
2218 PERL_ARGS_ASSERT_FORCE_VERSION;
2226 while (isDIGIT(*d) || *d == '_' || *d == '.')
2228 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2230 s = scan_num(s, &pl_yylval);
2231 version = pl_yylval.opval;
2232 ver = cSVOPx(version)->op_sv;
2233 if (SvPOK(ver) && !SvNIOK(ver)) {
2234 SvUPGRADE(ver, SVt_PVNV);
2235 SvNV_set(ver, str_to_version(ver));
2236 SvNOK_on(ver); /* hint that it is a version */
2239 else if (guessing) {
2244 /* NOTE: The parser sees the package name and the VERSION swapped */
2245 NEXTVAL_NEXTTOKE.opval = version;
2246 force_next(BAREWORD);
2252 * S_force_strict_version
2253 * Forces the next token to be a version number using strict syntax rules.
2257 S_force_strict_version(pTHX_ char *s)
2260 const char *errstr = NULL;
2262 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2264 while (isSPACE(*s)) /* leading whitespace */
2267 if (is_STRICT_VERSION(s,&errstr)) {
2269 s = (char *)scan_version(s, ver, 0);
2270 version = newSVOP(OP_CONST, 0, ver);
2272 else if ((*s != ';' && *s != '{' && *s != '}' )
2273 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2277 yyerror(errstr); /* version required */
2281 /* NOTE: The parser sees the package name and the VERSION swapped */
2282 NEXTVAL_NEXTTOKE.opval = version;
2283 force_next(BAREWORD);
2290 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2291 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2292 * unchanged, and a new SV containing the modified input is returned.
2296 S_tokeq(pTHX_ SV *sv)
2303 PERL_ARGS_ASSERT_TOKEQ;
2307 assert (!SvIsCOW(sv));
2308 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2312 /* This is relying on the SV being "well formed" with a trailing '\0' */
2313 while (s < send && !(*s == '\\' && s[1] == '\\'))
2318 if ( PL_hints & HINT_NEW_STRING ) {
2319 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2320 SVs_TEMP | SvUTF8(sv));
2324 if (s + 1 < send && (s[1] == '\\'))
2325 s++; /* all that, just for this */
2330 SvCUR_set(sv, d - SvPVX_const(sv));
2332 if ( PL_hints & HINT_NEW_STRING )
2333 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2338 * Now come three functions related to double-quote context,
2339 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2340 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2341 * interact with PL_lex_state, and create fake ( ... ) argument lists
2342 * to handle functions and concatenation.
2346 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2351 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2353 * Pattern matching will set PL_lex_op to the pattern-matching op to
2354 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2356 * OP_CONST is easy--just make the new op and return.
2358 * Everything else becomes a FUNC.
2360 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2361 * had an OP_CONST. This just sets us up for a
2362 * call to S_sublex_push().
2366 S_sublex_start(pTHX)
2368 const I32 op_type = pl_yylval.ival;
2370 if (op_type == OP_NULL) {
2371 pl_yylval.opval = PL_lex_op;
2375 if (op_type == OP_CONST) {
2376 SV *sv = PL_lex_stuff;
2377 PL_lex_stuff = NULL;
2380 if (SvTYPE(sv) == SVt_PVIV) {
2381 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2383 const char * const p = SvPV_const(sv, len);
2384 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2388 pl_yylval.opval = newSVOP(op_type, 0, sv);
2392 PL_parser->lex_super_state = PL_lex_state;
2393 PL_parser->lex_sub_inwhat = (U16)op_type;
2394 PL_parser->lex_sub_op = PL_lex_op;
2395 PL_parser->sub_no_recover = FALSE;
2396 PL_parser->sub_error_count = PL_error_count;
2397 PL_lex_state = LEX_INTERPPUSH;
2401 pl_yylval.opval = PL_lex_op;
2411 * Create a new scope to save the lexing state. The scope will be
2412 * ended in S_sublex_done. Returns a '(', starting the function arguments
2413 * to the uc, lc, etc. found before.
2414 * Sets PL_lex_state to LEX_INTERPCONCAT.
2421 const bool is_heredoc = PL_multi_close == '<';
2424 PL_lex_state = PL_parser->lex_super_state;
2425 SAVEI8(PL_lex_dojoin);
2426 SAVEI32(PL_lex_brackets);
2427 SAVEI32(PL_lex_allbrackets);
2428 SAVEI32(PL_lex_formbrack);
2429 SAVEI8(PL_lex_fakeeof);
2430 SAVEI32(PL_lex_casemods);
2431 SAVEI32(PL_lex_starts);
2432 SAVEI8(PL_lex_state);
2433 SAVESPTR(PL_lex_repl);
2434 SAVEVPTR(PL_lex_inpat);
2435 SAVEI16(PL_lex_inwhat);
2438 SAVECOPLINE(PL_curcop);
2439 SAVEI32(PL_multi_end);
2440 SAVEI32(PL_parser->herelines);
2441 PL_parser->herelines = 0;
2443 SAVEIV(PL_multi_close);
2444 SAVEPPTR(PL_bufptr);
2445 SAVEPPTR(PL_bufend);
2446 SAVEPPTR(PL_oldbufptr);
2447 SAVEPPTR(PL_oldoldbufptr);
2448 SAVEPPTR(PL_last_lop);
2449 SAVEPPTR(PL_last_uni);
2450 SAVEPPTR(PL_linestart);
2451 SAVESPTR(PL_linestr);
2452 SAVEGENERICPV(PL_lex_brackstack);
2453 SAVEGENERICPV(PL_lex_casestack);
2454 SAVEGENERICPV(PL_parser->lex_shared);
2455 SAVEBOOL(PL_parser->lex_re_reparsing);
2456 SAVEI32(PL_copline);
2458 /* The here-doc parser needs to be able to peek into outer lexing
2459 scopes to find the body of the here-doc. So we put PL_linestr and
2460 PL_bufptr into lex_shared, to ‘share’ those values.
2462 PL_parser->lex_shared->ls_linestr = PL_linestr;
2463 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2465 PL_linestr = PL_lex_stuff;
2466 PL_lex_repl = PL_parser->lex_sub_repl;
2467 PL_lex_stuff = NULL;
2468 PL_parser->lex_sub_repl = NULL;
2470 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2471 set for an inner quote-like operator and then an error causes scope-
2472 popping. We must not have a PL_lex_stuff value left dangling, as
2473 that breaks assumptions elsewhere. See bug #123617. */
2474 SAVEGENERICSV(PL_lex_stuff);
2475 SAVEGENERICSV(PL_parser->lex_sub_repl);
2477 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2478 = SvPVX(PL_linestr);
2479 PL_bufend += SvCUR(PL_linestr);
2480 PL_last_lop = PL_last_uni = NULL;
2481 SAVEFREESV(PL_linestr);
2482 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2484 PL_lex_dojoin = FALSE;
2485 PL_lex_brackets = PL_lex_formbrack = 0;
2486 PL_lex_allbrackets = 0;
2487 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2488 Newx(PL_lex_brackstack, 120, char);
2489 Newx(PL_lex_casestack, 12, char);
2490 PL_lex_casemods = 0;
2491 *PL_lex_casestack = '\0';
2493 PL_lex_state = LEX_INTERPCONCAT;
2495 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2496 PL_copline = NOLINE;
2498 Newxz(shared, 1, LEXSHARED);
2499 shared->ls_prev = PL_parser->lex_shared;
2500 PL_parser->lex_shared = shared;
2502 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2503 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2504 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2505 PL_lex_inpat = PL_parser->lex_sub_op;
2507 PL_lex_inpat = NULL;
2509 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2510 PL_in_eval &= ~EVAL_RE_REPARSING;
2517 * Restores lexer state after a S_sublex_push.
2523 if (!PL_lex_starts++) {
2524 SV * const sv = newSVpvs("");
2525 if (SvUTF8(PL_linestr))
2527 PL_expect = XOPERATOR;
2528 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2532 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2533 PL_lex_state = LEX_INTERPCASEMOD;
2537 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2538 assert(PL_lex_inwhat != OP_TRANSR);
2540 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2541 PL_linestr = PL_lex_repl;
2543 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2544 PL_bufend += SvCUR(PL_linestr);
2545 PL_last_lop = PL_last_uni = NULL;
2546 PL_lex_dojoin = FALSE;
2547 PL_lex_brackets = 0;
2548 PL_lex_allbrackets = 0;
2549 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2550 PL_lex_casemods = 0;
2551 *PL_lex_casestack = '\0';
2553 if (SvEVALED(PL_lex_repl)) {
2554 PL_lex_state = LEX_INTERPNORMAL;
2556 /* we don't clear PL_lex_repl here, so that we can check later
2557 whether this is an evalled subst; that means we rely on the
2558 logic to ensure sublex_done() is called again only via the
2559 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2562 PL_lex_state = LEX_INTERPCONCAT;
2565 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2566 CopLINE(PL_curcop) +=
2567 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2568 + PL_parser->herelines;
2569 PL_parser->herelines = 0;
2574 const line_t l = CopLINE(PL_curcop);
2576 if (PL_parser->sub_error_count != PL_error_count) {
2577 if (PL_parser->sub_no_recover) {
2582 if (PL_multi_close == '<')
2583 PL_parser->herelines += l - PL_multi_end;
2584 PL_bufend = SvPVX(PL_linestr);
2585 PL_bufend += SvCUR(PL_linestr);
2586 PL_expect = XOPERATOR;
2592 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2594 /* This justs wraps get_and_check_backslash_N_name() to output any error
2595 * message it returns. */
2597 const char * error_msg = NULL;
2600 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2602 /* charnames doesn't work well if there have been errors found */
2603 if (PL_error_count > 0) {
2607 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2610 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2617 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2618 const char* const e,
2620 const char ** error_msg)
2622 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2623 * interior, hence to the "}". Finds what the name resolves to, returning
2624 * an SV* containing it; NULL if no valid one found.
2626 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2627 * doesn't have to be. */
2635 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2638 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2641 assert(s > (char *) 3);
2643 res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2646 SvREFCNT_dec_NN(res);
2647 /* diag_listed_as: Unknown charname '%s' */
2648 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2652 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2653 /* include the <}> */
2654 e - backslash_ptr + 1, error_msg);
2656 SvREFCNT_dec_NN(res);
2660 /* See if the charnames handler is the Perl core's, and if so, we can skip
2661 * the validation needed for a user-supplied one, as Perl's does its own
2663 table = GvHV(PL_hintgv); /* ^H */
2664 cvp = hv_fetchs(table, "charnames", FALSE);
2665 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2666 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2668 const char * const name = HvNAME(stash);
2669 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2674 /* Here, it isn't Perl's charname handler. We can't rely on a
2675 * user-supplied handler to validate the input name. For non-ut8 input,
2676 * look to see that the first character is legal. Then loop through the
2677 * rest checking that each is a continuation */
2679 /* This code makes the reasonable assumption that the only Latin1-range
2680 * characters that begin a character name alias are alphabetic, otherwise
2681 * would have to create a isCHARNAME_BEGIN macro */
2684 if (! isALPHAU(*s)) {
2689 if (! isCHARNAME_CONT(*s)) {
2692 if (*s == ' ' && *(s-1) == ' ') {
2699 /* Similarly for utf8. For invariants can check directly; for other
2700 * Latin1, can calculate their code point and check; otherwise use a
2702 if (UTF8_IS_INVARIANT(*s)) {
2703 if (! isALPHAU(*s)) {
2707 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2708 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2714 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2715 utf8_to_uvchr_buf((U8 *) s,
2725 if (UTF8_IS_INVARIANT(*s)) {
2726 if (! isCHARNAME_CONT(*s)) {
2729 if (*s == ' ' && *(s-1) == ' ') {
2734 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2735 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2742 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2743 utf8_to_uvchr_buf((U8 *) s,
2753 if (*(s-1) == ' ') {
2754 /* diag_listed_as: charnames alias definitions may not contain
2755 trailing white-space; marked by <-- HERE in %s
2757 *error_msg = Perl_form(aTHX_
2758 "charnames alias definitions may not contain trailing "
2759 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2760 (int)(s - backslash_ptr + 1), backslash_ptr,
2761 (int)(e - s + 1), s + 1);
2765 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2766 const U8* first_bad_char_loc;
2768 const char* const str = SvPV_const(res, len);
2769 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2770 &first_bad_char_loc)))
2772 _force_out_malformed_utf8_message(first_bad_char_loc,
2773 (U8 *) PL_parser->bufend,
2775 0 /* 0 means don't die */ );
2776 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2777 immediately after '%s' */
2778 *error_msg = Perl_form(aTHX_
2779 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2780 (int) (e - backslash_ptr + 1), backslash_ptr,
2781 (int) ((char *) first_bad_char_loc - str), str);
2790 /* The final %.*s makes sure that should the trailing NUL be missing
2791 * that this print won't run off the end of the string */
2792 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2794 *error_msg = Perl_form(aTHX_
2795 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2796 (int)(s - backslash_ptr + 1), backslash_ptr,
2797 (int)(e - s + 1), s + 1);
2802 /* diag_listed_as: charnames alias definitions may not contain a
2803 sequence of multiple spaces; marked by <-- HERE
2805 *error_msg = Perl_form(aTHX_
2806 "charnames alias definitions may not contain a sequence of "
2807 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2808 (int)(s - backslash_ptr + 1), backslash_ptr,
2809 (int)(e - s + 1), s + 1);
2816 Extracts the next constant part of a pattern, double-quoted string,
2817 or transliteration. This is terrifying code.
2819 For example, in parsing the double-quoted string "ab\x63$d", it would
2820 stop at the '$' and return an OP_CONST containing 'abc'.
2822 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2823 processing a pattern (PL_lex_inpat is true), a transliteration
2824 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2826 Returns a pointer to the character scanned up to. If this is
2827 advanced from the start pointer supplied (i.e. if anything was
2828 successfully parsed), will leave an OP_CONST for the substring scanned
2829 in pl_yylval. Caller must intuit reason for not parsing further
2830 by looking at the next characters herself.
2834 \N{FOO} => \N{U+hex_for_character_FOO}
2835 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2838 all other \-char, including \N and \N{ apart from \N{ABC}
2841 @ and $ where it appears to be a var, but not for $ as tail anchor
2845 In transliterations:
2846 characters are VERY literal, except for - not at the start or end
2847 of the string, which indicates a range. However some backslash sequences
2848 are recognized: \r, \n, and the like
2849 \007 \o{}, \x{}, \N{}
2850 If all elements in the transliteration are below 256,
2851 scan_const expands the range to the full set of intermediate
2852 characters. If the range is in utf8, the hyphen is replaced with
2853 a certain range mark which will be handled by pmtrans() in op.c.
2855 In double-quoted strings:
2857 all those recognized in transliterations
2858 deprecated backrefs: \1 (in substitution replacements)
2859 case and quoting: \U \Q \E
2862 scan_const does *not* construct ops to handle interpolated strings.
2863 It stops processing as soon as it finds an embedded $ or @ variable
2864 and leaves it to the caller to work out what's going on.
2866 embedded arrays (whether in pattern or not) could be:
2867 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2869 $ in double-quoted strings must be the symbol of an embedded scalar.
2871 $ in pattern could be $foo or could be tail anchor. Assumption:
2872 it's a tail anchor if $ is the last thing in the string, or if it's
2873 followed by one of "()| \r\n\t"
2875 \1 (backreferences) are turned into $1 in substitutions
2877 The structure of the code is
2878 while (there's a character to process) {
2879 handle transliteration ranges
2880 skip regexp comments /(?#comment)/ and codes /(?{code})/
2881 skip #-initiated comments in //x patterns
2882 check for embedded arrays
2883 check for embedded scalars
2885 deprecate \1 in substitution replacements
2886 handle string-changing backslashes \l \U \Q \E, etc.
2887 switch (what was escaped) {
2888 handle \- in a transliteration (becomes a literal -)
2889 if a pattern and not \N{, go treat as regular character
2890 handle \132 (octal characters)
2891 handle \x15 and \x{1234} (hex characters)
2892 handle \N{name} (named characters, also \N{3,5} in a pattern)
2893 handle \cV (control characters)
2894 handle printf-style backslashes (\f, \r, \n, etc)
2897 } (end if backslash)
2898 handle regular character
2899 } (end while character to read)
2904 S_scan_const(pTHX_ char *start)
2906 char *send = PL_bufend; /* end of the constant */
2907 SV *sv = newSV(send - start); /* sv for the constant. See note below
2909 char *s = start; /* start of the constant */
2910 char *d = SvPVX(sv); /* destination for copies */
2911 bool dorange = FALSE; /* are we in a translit range? */
2912 bool didrange = FALSE; /* did we just finish a range? */
2913 bool in_charclass = FALSE; /* within /[...]/ */
2914 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
2915 bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2916 UTF8? But, this can show as true
2917 when the source isn't utf8, as for
2918 example when it is entirely composed
2920 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
2921 number of characters found so far
2922 that will expand (into 2 bytes)
2923 should we have to convert to
2925 SV *res; /* result from charnames */
2926 STRLEN offset_to_max = 0; /* The offset in the output to where the range
2927 high-end character is temporarily placed */
2929 /* Does something require special handling in tr/// ? This avoids extra
2930 * work in a less likely case. As such, khw didn't feel it was worth
2931 * adding any branches to the more mainline code to handle this, which
2932 * means that this doesn't get set in some circumstances when things like
2933 * \x{100} get expanded out. As a result there needs to be extra testing
2934 * done in the tr code */
2935 bool has_above_latin1 = FALSE;
2937 /* Note on sizing: The scanned constant is placed into sv, which is
2938 * initialized by newSV() assuming one byte of output for every byte of
2939 * input. This routine expects newSV() to allocate an extra byte for a
2940 * trailing NUL, which this routine will append if it gets to the end of
2941 * the input. There may be more bytes of input than output (eg., \N{LATIN
2942 * CAPITAL LETTER A}), or more output than input if the constant ends up
2943 * recoded to utf8, but each time a construct is found that might increase
2944 * the needed size, SvGROW() is called. Its size parameter each time is
2945 * based on the best guess estimate at the time, namely the length used so
2946 * far, plus the length the current construct will occupy, plus room for
2947 * the trailing NUL, plus one byte for every input byte still unscanned */
2949 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2952 int backslash_N = 0; /* ? was the character from \N{} */
2953 int non_portable_endpoint = 0; /* ? In a range is an endpoint
2954 platform-specific like \x65 */
2957 PERL_ARGS_ASSERT_SCAN_CONST;
2959 assert(PL_lex_inwhat != OP_TRANSR);
2960 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2961 /* If we are doing a trans and we know we want UTF8 set expectation */
2962 d_is_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2963 s_is_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2966 /* Protect sv from errors and fatal warnings. */
2967 ENTER_with_name("scan_const");
2970 /* A bunch of code in the loop below assumes that if s[n] exists and is not
2971 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
2973 assert(*send == '\0');
2976 || dorange /* Handle tr/// range at right edge of input */
2979 /* get transliterations out of the way (they're most literal) */
2980 if (PL_lex_inwhat == OP_TRANS) {
2982 /* But there isn't any special handling necessary unless there is a
2983 * range, so for most cases we just drop down and handle the value
2984 * as any other. There are two exceptions.
2986 * 1. A hyphen indicates that we are actually going to have a
2987 * range. In this case, skip the '-', set a flag, then drop
2988 * down to handle what should be the end range value.
2989 * 2. After we've handled that value, the next time through, that
2990 * flag is set and we fix up the range.
2992 * Ranges entirely within Latin1 are expanded out entirely, in
2993 * order to make the transliteration a simple table look-up.
2994 * Ranges that extend above Latin1 have to be done differently, so
2995 * there is no advantage to expanding them here, so they are
2996 * stored here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte
2997 * signifies a hyphen without any possible ambiguity. On EBCDIC
2998 * machines, if the range is expressed as Unicode, the Latin1
2999 * portion is expanded out even if the range extends above
3000 * Latin1. This is because each code point in it has to be
3001 * processed here individually to get its native translation */
3005 /* Here, we don't think we're in a range. If the new character
3006 * is not a hyphen; or if it is a hyphen, but it's too close to
3007 * either edge to indicate a range, or if we haven't output any
3008 * characters yet then it's a regular character. */
3009 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
3011 /* A regular character. Process like any other, but first
3012 * clear any flags */
3016 non_portable_endpoint = 0;
3019 /* The tests here for being above Latin1 and similar ones
3020 * in the following 'else' suffice to find all such
3021 * occurences in the constant, except those added by a
3022 * backslash escape sequence, like \x{100}. Mostly, those
3023 * set 'has_above_latin1' as appropriate */
3024 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3025 has_above_latin1 = TRUE;
3028 /* Drops down to generic code to process current byte */
3030 else { /* Is a '-' in the context where it means a range */
3031 if (didrange) { /* Something like y/A-C-Z// */
3032 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3038 s++; /* Skip past the hyphen */
3040 /* d now points to where the end-range character will be
3041 * placed. Save it so won't have to go finding it later,
3042 * and drop down to get that character. (Actually we
3043 * instead save the offset, to handle the case where a
3044 * realloc in the meantime could change the actual
3045 * pointer). We'll finish processing the range the next
3046 * time through the loop */
3047 offset_to_max = d - SvPVX_const(sv);
3049 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3050 has_above_latin1 = TRUE;
3053 /* Drops down to generic code to process current byte */
3055 } /* End of not a range */
3057 /* Here we have parsed a range. Now must handle it. At this
3059 * 'sv' is a SV* that contains the output string we are
3060 * constructing. The final two characters in that string
3061 * are the range start and range end, in order.
3062 * 'd' points to just beyond the range end in the 'sv' string,
3063 * where we would next place something
3064 * 'offset_to_max' is the offset in 'sv' at which the character
3065 * (the range's maximum end point) before 'd' begins.
3067 char * max_ptr = SvPVX(sv) + offset_to_max;
3070 IV range_max; /* last character in range */
3072 Size_t offset_to_min = 0;
3075 bool convert_unicode;
3076 IV real_range_max = 0;
3078 /* Get the code point values of the range ends. */
3080 /* We know the utf8 is valid, because we just constructed
3081 * it ourselves in previous loop iterations */
3082 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3083 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3084 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3086 /* This compensates for not all code setting
3087 * 'has_above_latin1', so that we don't skip stuff that
3088 * should be executed */
3089 if (range_max > 255) {
3090 has_above_latin1 = TRUE;
3094 min_ptr = max_ptr - 1;
3095 range_min = * (U8*) min_ptr;
3096 range_max = * (U8*) max_ptr;
3099 /* If the range is just a single code point, like tr/a-a/.../,
3100 * that code point is already in the output, twice. We can
3101 * just back up over the second instance and avoid all the rest
3102 * of the work. But if it is a variant character, it's been
3103 * counted twice, so decrement. (This unlikely scenario is
3104 * special cased, like the one for a range of 2 code points
3105 * below, only because the main-line code below needs a range
3106 * of 3 or more to work without special casing. Might as well
3107 * get it out of the way now.) */
3108 if (UNLIKELY(range_max == range_min)) {
3110 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3111 utf8_variant_count--;
3117 /* On EBCDIC platforms, we may have to deal with portable
3118 * ranges. These happen if at least one range endpoint is a
3119 * Unicode value (\N{...}), or if the range is a subset of
3120 * [A-Z] or [a-z], and both ends are literal characters,
3121 * like 'A', and not like \x{C1} */
3123 cBOOL(backslash_N) /* \N{} forces Unicode,
3124 hence portable range */
3125 || ( ! non_portable_endpoint
3126 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3127 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3128 if (convert_unicode) {
3130 /* Special handling is needed for these portable ranges.
3131 * They are defined to be in Unicode terms, which includes
3132 * all the Unicode code points between the end points.
3133 * Convert to Unicode to get the Unicode range. Later we
3134 * will convert each code point in the range back to
3136 range_min = NATIVE_TO_UNI(range_min);
3137 range_max = NATIVE_TO_UNI(range_max);
3141 if (range_min > range_max) {
3143 if (convert_unicode) {
3144 /* Need to convert back to native for meaningful
3145 * messages for this platform */
3146 range_min = UNI_TO_NATIVE(range_min);
3147 range_max = UNI_TO_NATIVE(range_max);
3150 /* Use the characters themselves for the error message if
3151 * ASCII printables; otherwise some visible representation
3153 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3155 "Invalid range \"%c-%c\" in transliteration operator",
3156 (char)range_min, (char)range_max);
3159 else if (convert_unicode) {
3160 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3162 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3163 UVXf "}\" in transliteration operator",
3164 range_min, range_max);
3168 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3170 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3171 " in transliteration operator",
3172 range_min, range_max);
3176 /* If the range is exactly two code points long, they are
3177 * already both in the output */
3178 if (UNLIKELY(range_min + 1 == range_max)) {
3182 /* Here the range contains at least 3 code points */
3186 /* If everything in the transliteration is below 256, we
3187 * can avoid special handling later. A translation table
3188 * for each of those bytes is created by op.c. So we
3189 * expand out all ranges to their constituent code points.
3190 * But if we've encountered something above 255, the
3191 * expanding won't help, so skip doing that. But if it's
3192 * EBCDIC, we may have to look at each character below 256
3193 * if we have to convert to/from Unicode values */
3194 if ( has_above_latin1
3196 && (range_min > 255 || ! convert_unicode)
3199 const STRLEN off = d - SvPVX(sv);
3200 const STRLEN extra = 1 + (send - s) + 1;
3203 /* Move the high character one byte to the right; then
3204 * insert between it and the range begin, an illegal
3205 * byte which serves to indicate this is a range (using
3206 * a '-' would be ambiguous). */
3208 if (off + extra > SvLEN(sv)) {
3209 d = off + SvGROW(sv, off + extra);
3210 max_ptr = d - off + offset_to_max;
3214 while (e-- > max_ptr) {
3217 *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3221 /* Here, we're going to expand out the range. For EBCDIC
3222 * the range can extend above 255 (not so in ASCII), so
3223 * for EBCDIC, split it into the parts above and below
3226 if (range_max > 255) {
3227 real_range_max = range_max;
3233 /* Here we need to expand out the string to contain each
3234 * character in the range. Grow the output to handle this.
3235 * For non-UTF8, we need a byte for each code point in the
3236 * range, minus the three that we've already allocated for: the
3237 * hyphen, the min, and the max. For UTF-8, we need this
3238 * plus an extra byte for each code point that occupies two
3239 * bytes (is variant) when in UTF-8 (except we've already
3240 * allocated for the end points, including if they are
3241 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3242 * platforms, it's easy to calculate a precise number. To
3243 * start, we count the variants in the range, which we need
3244 * elsewhere in this function anyway. (For the case where it
3245 * isn't easy to calculate, 'extras' has been initialized to 0,
3246 * and the calculation is done in a loop further down.) */
3248 if (convert_unicode)
3251 /* This is executed unconditionally on ASCII, and for
3252 * Unicode ranges on EBCDIC. Under these conditions, all
3253 * code points above a certain value are variant; and none
3254 * under that value are. We just need to find out how much
3255 * of the range is above that value. We don't count the
3256 * end points here, as they will already have been counted
3257 * as they were parsed. */
3258 if (range_min >= UTF_CONTINUATION_MARK) {
3260 /* The whole range is made up of variants */
3261 extras = (range_max - 1) - (range_min + 1) + 1;
3263 else if (range_max >= UTF_CONTINUATION_MARK) {
3265 /* Only the higher portion of the range is variants */
3266 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3269 utf8_variant_count += extras;
3272 /* The base growth is the number of code points in the range,
3273 * not including the endpoints, which have already been sized
3274 * for (and output). We don't subtract for the hyphen, as it
3275 * has been parsed but not output, and the SvGROW below is
3276 * based only on what's been output plus what's left to parse.
3278 grow = (range_max - 1) - (range_min + 1) + 1;
3282 /* In some cases in EBCDIC, we haven't yet calculated a
3283 * precise amount needed for the UTF-8 variants. Just
3284 * assume the worst case, that everything will expand by a
3286 if (! convert_unicode) {
3292 /* Otherwise we know exactly how many variants there
3293 * are in the range. */
3298 /* Grow, but position the output to overwrite the range min end
3299 * point, because in some cases we overwrite that */
3300 SvCUR_set(sv, d - SvPVX_const(sv));
3301 offset_to_min = min_ptr - SvPVX_const(sv);
3303 /* See Note on sizing above. */
3304 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3307 + 1 /* Trailing NUL */ );
3309 /* Now, we can expand out the range. */
3311 if (convert_unicode) {
3314 /* Recall that the min and max are now in Unicode terms, so
3315 * we have to convert each character to its native
3318 for (i = range_min; i <= range_max; i++) {
3319 append_utf8_from_native_byte(
3320 LATIN1_TO_NATIVE((U8) i),
3325 for (i = range_min; i <= range_max; i++) {
3326 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3332 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3334 /* Here, no conversions are necessary, which means that the
3335 * first character in the range is already in 'd' and
3336 * valid, so we can skip overwriting it */
3340 for (i = range_min + 1; i <= range_max; i++) {
3341 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3347 assert(range_min + 1 <= range_max);
3348 for (i = range_min + 1; i < range_max; i++) {
3350 /* In this case on EBCDIC, we haven't calculated
3351 * the variants. Do it here, as we go along */
3352 if (! UVCHR_IS_INVARIANT(i)) {
3353 utf8_variant_count++;
3359 /* The range_max is done outside the loop so as to
3360 * avoid having to special case not incrementing
3361 * 'utf8_variant_count' on EBCDIC (it's already been
3362 * counted when originally parsed) */
3363 *d++ = (char) range_max;
3368 /* If the original range extended above 255, add in that
3370 if (real_range_max) {
3371 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3372 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3373 if (real_range_max > 0x100) {
3374 if (real_range_max > 0x101) {
3375 *d++ = (char) ILLEGAL_UTF8_BYTE;
3377 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3383 /* mark the range as done, and continue */
3387 non_portable_endpoint = 0;
3391 } /* End of is a range */
3392 } /* End of transliteration. Joins main code after these else's */
3393 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3396 while (s1 >= start && *s1-- == '\\')
3399 in_charclass = TRUE;
3401 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3404 while (s1 >= start && *s1-- == '\\')
3407 in_charclass = FALSE;
3409 /* skip for regexp comments /(?#comment)/, except for the last
3410 * char, which will be done separately. Stop on (?{..}) and
3412 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3415 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3417 while (s + len < send && *s != ')') {
3418 Copy(s, d, len, U8);
3421 len = UTF8_SAFE_SKIP(s, send);
3424 else while (s+1 < send && *s != ')') {
3428 else if (!PL_lex_casemods
3429 && ( s[2] == '{' /* This should match regcomp.c */
3430 || (s[2] == '?' && s[3] == '{')))
3435 /* likewise skip #-initiated comments in //x patterns */
3439 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3441 while (s < send && *s != '\n')
3444 /* no further processing of single-quoted regex */
3445 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3446 goto default_action;
3448 /* check for embedded arrays
3449 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3451 else if (*s == '@' && s[1]) {
3453 ? isIDFIRST_utf8_safe(s+1, send)
3454 : isWORDCHAR_A(s[1]))
3458 if (strchr(":'{$", s[1]))
3460 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3461 break; /* in regexp, neither @+ nor @- are interpolated */
3463 /* check for embedded scalars. only stop if we're sure it's a
3465 else if (*s == '$') {
3466 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3468 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3470 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3471 "Possible unintended interpolation of $\\ in regex");
3473 break; /* in regexp, $ might be tail anchor */
3477 /* End of else if chain - OP_TRANS rejoin rest */
3479 if (UNLIKELY(s >= send)) {
3485 if (*s == '\\' && s+1 < send) {
3486 char* e; /* Can be used for ending '}', etc. */
3490 /* warn on \1 - \9 in substitution replacements, but note that \11
3491 * is an octal; and \19 is \1 followed by '9' */
3492 if (PL_lex_inwhat == OP_SUBST
3498 /* diag_listed_as: \%d better written as $%d */
3499 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3504 /* string-change backslash escapes */
3505 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3509 /* In a pattern, process \N, but skip any other backslash escapes.
3510 * This is because we don't want to translate an escape sequence
3511 * into a meta symbol and have the regex compiler use the meta
3512 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3513 * in spite of this, we do have to process \N here while the proper
3514 * charnames handler is in scope. See bugs #56444 and #62056.
3516 * There is a complication because \N in a pattern may also stand
3517 * for 'match a non-nl', and not mean a charname, in which case its
3518 * processing should be deferred to the regex compiler. To be a
3519 * charname it must be followed immediately by a '{', and not look
3520 * like \N followed by a curly quantifier, i.e., not something like
3521 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3523 else if (PL_lex_inpat
3526 || regcurly(s + 1)))
3529 goto default_action;
3535 if ((isALPHANUMERIC(*s)))
3536 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3537 "Unrecognized escape \\%c passed through",
3539 /* default action is to copy the quoted character */
3540 goto default_action;
3543 /* eg. \132 indicates the octal constant 0132 */
3544 case '0': case '1': case '2': case '3':
3545 case '4': case '5': case '6': case '7':
3547 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3549 uv = grok_oct(s, &len, &flags, NULL);
3551 if (len < 3 && s < send && isDIGIT(*s)
3552 && ckWARN(WARN_MISC))
3554 Perl_warner(aTHX_ packWARN(WARN_MISC),
3555 "%s", form_short_octal_warning(s, len));
3558 goto NUM_ESCAPE_INSERT;
3560 /* eg. \o{24} indicates the octal constant \024 */
3565 bool valid = grok_bslash_o(&s, send,
3567 TRUE, /* Output warning */
3568 FALSE, /* Not strict */
3569 TRUE, /* Output warnings for
3574 uv = 0; /* drop through to ensure range ends are set */
3576 goto NUM_ESCAPE_INSERT;
3579 /* eg. \x24 indicates the hex constant 0x24 */
3584 bool valid = grok_bslash_x(&s, send,
3586 TRUE, /* Output warning */
3587 FALSE, /* Not strict */
3588 TRUE, /* Output warnings for
3593 uv = 0; /* drop through to ensure range ends are set */
3598 /* Insert oct or hex escaped character. */
3600 /* Here uv is the ordinal of the next character being added */
3601 if (UVCHR_IS_INVARIANT(uv)) {
3605 if (!d_is_utf8 && uv > 255) {
3607 /* Here, 'uv' won't fit unless we convert to UTF-8.
3608 * If we've only seen invariants so far, all we have to
3609 * do is turn on the flag */
3610 if (utf8_variant_count == 0) {
3614 SvCUR_set(sv, d - SvPVX_const(sv));
3618 sv_utf8_upgrade_flags_grow(
3620 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3622 /* Since we're having to grow here,
3623 * make sure we have enough room for
3624 * this escape and a NUL, so the
3625 * code immediately below won't have
3626 * to actually grow again */
3628 + (STRLEN)(send - s) + 1);
3629 d = SvPVX(sv) + SvCUR(sv);
3632 has_above_latin1 = TRUE;
3638 utf8_variant_count++;
3641 /* Usually, there will already be enough room in 'sv'
3642 * since such escapes are likely longer than any UTF-8
3643 * sequence they can end up as. This isn't the case on
3644 * EBCDIC where \x{40000000} contains 12 bytes, and the
3645 * UTF-8 for it contains 14. And, we have to allow for
3646 * a trailing NUL. It probably can't happen on ASCII
3647 * platforms, but be safe. See Note on sizing above. */
3648 const STRLEN needed = d - SvPVX(sv)
3652 if (UNLIKELY(needed > SvLEN(sv))) {
3653 SvCUR_set(sv, d - SvPVX_const(sv));
3654 d = SvCUR(sv) + SvGROW(sv, needed);
3657 d = (char*)uvchr_to_utf8((U8*)d, uv);
3658 if (PL_lex_inwhat == OP_TRANS
3659 && PL_parser->lex_sub_op)
3661 PL_parser->lex_sub_op->op_private |=
3662 (PL_lex_repl ? OPpTRANS_FROM_UTF
3668 non_portable_endpoint++;
3673 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3674 * named character, like \N{LATIN SMALL LETTER A}, or a named
3675 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3676 * GRAVE} (except y/// can't handle the latter, croaking). For
3677 * convenience all three forms are referred to as "named
3678 * characters" below.
3680 * For patterns, \N also can mean to match a non-newline. Code
3681 * before this 'switch' statement should already have handled
3682 * this situation, and hence this code only has to deal with
3683 * the named character cases.
3685 * For non-patterns, the named characters are converted to
3686 * their string equivalents. In patterns, named characters are
3687 * not converted to their ultimate forms for the same reasons
3688 * that other escapes aren't (mainly that the ultimate
3689 * character could be considered a meta-symbol by the regex
3690 * compiler). Instead, they are converted to the \N{U+...}
3691 * form to get the value from the charnames that is in effect
3692 * right now, while preserving the fact that it was a named
3693 * character, so that the regex compiler knows this.
3695 * The structure of this section of code (besides checking for
3696 * errors and upgrading to utf8) is:
3697 * If the named character is of the form \N{U+...}, pass it
3698 * through if a pattern; otherwise convert the code point
3700 * Otherwise must be some \N{NAME}: convert to
3701 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3703 * Transliteration is an exception. The conversion to utf8 is
3704 * only done if the code point requires it to be representable.
3706 * Here, 's' points to the 'N'; the test below is guaranteed to
3707 * succeed if we are being called on a pattern, as we already
3708 * know from a test above that the next character is a '{'. A
3709 * non-pattern \N must mean 'named character', which requires
3713 yyerror("Missing braces on \\N{}");
3719 /* If there is no matching '}', it is an error. */
3720 if (! (e = (char *) memchr(s, '}', send - s))) {
3721 if (! PL_lex_inpat) {
3722 yyerror("Missing right brace on \\N{}");
3724 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3726 yyquit(); /* Have exhausted the input. */
3729 /* Here it looks like a named character */
3731 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3732 s += 2; /* Skip to next char after the 'U+' */
3735 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3736 /* Check the syntax. */
3739 if (!isXDIGIT(*s)) {
3742 "Invalid hexadecimal number in \\N{U+...}"
3751 else if ((*s == '.' || *s == '_')
3757 /* Pass everything through unchanged.
3758 * +1 is for the '}' */
3759 Copy(orig_s, d, e - orig_s + 1, char);
3760 d += e - orig_s + 1;
3762 else { /* Not a pattern: convert the hex to string */
3763 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3764 | PERL_SCAN_SILENT_ILLDIGIT
3765 | PERL_SCAN_DISALLOW_PREFIX;
3767 uv = grok_hex(s, &len, &flags, NULL);
3768 if (len == 0 || (len != (STRLEN)(e - s)))
3771 /* For non-tr///, if the destination is not in utf8,
3772 * unconditionally recode it to be so. This is
3773 * because \N{} implies Unicode semantics, and scalars
3774 * have to be in utf8 to guarantee those semantics.
3775 * tr/// doesn't care about Unicode rules, so no need
3776 * there to upgrade to UTF-8 for small enough code
3778 if (! d_is_utf8 && ( uv > 0xFF
3779 || PL_lex_inwhat != OP_TRANS))
3781 /* See Note on sizing above. */
3782 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3784 SvCUR_set(sv, d - SvPVX_const(sv));
3788 if (utf8_variant_count == 0) {
3790 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3793 sv_utf8_upgrade_flags_grow(
3795 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3797 d = SvPVX(sv) + SvCUR(sv);
3801 has_above_latin1 = TRUE;
3804 /* Add the (Unicode) code point to the output. */
3805 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3806 *d++ = (char) LATIN1_TO_NATIVE(uv);
3809 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3813 else /* Here is \N{NAME} but not \N{U+...}. */
3814 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3815 { /* Failed. We should die eventually, but for now use a NUL
3819 else { /* Successfully evaluated the name */
3821 const char *str = SvPV_const(res, len);
3824 if (! len) { /* The name resolved to an empty string */
3825 const char empty_N[] = "\\N{_}";
3826 Copy(empty_N, d, sizeof(empty_N) - 1, char);
3827 d += sizeof(empty_N) - 1;
3830 /* In order to not lose information for the regex
3831 * compiler, pass the result in the specially made
3832 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3833 * the code points in hex of each character
3834 * returned by charnames */
3836 const char *str_end = str + len;
3837 const STRLEN off = d - SvPVX_const(sv);
3839 if (! SvUTF8(res)) {
3840 /* For the non-UTF-8 case, we can determine the
3841 * exact length needed without having to parse
3842 * through the string. Each character takes up
3843 * 2 hex digits plus either a trailing dot or
3845 const char initial_text[] = "\\N{U+";
3846 const STRLEN initial_len = sizeof(initial_text)
3848 d = off + SvGROW(sv, off
3851 /* +1 for trailing NUL */
3854 + (STRLEN)(send - e));
3855 Copy(initial_text, d, initial_len, char);
3857 while (str < str_end) {
3860 my_snprintf(hex_string,
3864 /* The regex compiler is
3865 * expecting Unicode, not
3867 NATIVE_TO_LATIN1(*str));
3868 PERL_MY_SNPRINTF_POST_GUARD(len,
3869 sizeof(hex_string));
3870 Copy(hex_string, d, 3, char);
3874 d--; /* Below, we will overwrite the final
3875 dot with a right brace */
3878 STRLEN char_length; /* cur char's byte length */
3880 /* and the number of bytes after this is
3881 * translated into hex digits */
3882 STRLEN output_length;
3884 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3885 * for max('U+', '.'); and 1 for NUL */
3886 char hex_string[2 * UTF8_MAXBYTES + 5];
3888 /* Get the first character of the result. */
3889 U32 uv = utf8n_to_uvchr((U8 *) str,
3893 /* Convert first code point to Unicode hex,
3894 * including the boiler plate before it. */
3896 my_snprintf(hex_string, sizeof(hex_string),
3898 (unsigned int) NATIVE_TO_UNI(uv));
3900 /* Make sure there is enough space to hold it */
3901 d = off + SvGROW(sv, off
3903 + (STRLEN)(send - e)
3904 + 2); /* '}' + NUL */
3906 Copy(hex_string, d, output_length, char);
3909 /* For each subsequent character, append dot and
3910 * its Unicode code point in hex */
3911 while ((str += char_length) < str_end) {
3912 const STRLEN off = d - SvPVX_const(sv);
3913 U32 uv = utf8n_to_uvchr((U8 *) str,
3918 my_snprintf(hex_string,
3921 (unsigned int) NATIVE_TO_UNI(uv));
3923 d = off + SvGROW(sv, off
3925 + (STRLEN)(send - e)
3926 + 2); /* '}' + NUL */
3927 Copy(hex_string, d, output_length, char);
3932 *d++ = '}'; /* Done. Add the trailing brace */
3935 else { /* Here, not in a pattern. Convert the name to a
3938 if (PL_lex_inwhat == OP_TRANS) {
3939 str = SvPV_const(res, len);
3940 if (len > ((SvUTF8(res))
3944 yyerror(Perl_form(aTHX_
3945 "%.*s must not be a named sequence"
3946 " in transliteration operator",
3947 /* +1 to include the "}" */
3948 (int) (e + 1 - start), start));
3950 goto end_backslash_N;
3953 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3954 has_above_latin1 = TRUE;
3958 else if (! SvUTF8(res)) {
3959 /* Make sure \N{} return is UTF-8. This is because
3960 * \N{} implies Unicode semantics, and scalars have
3961 * to be in utf8 to guarantee those semantics; but
3962 * not needed in tr/// */
3963 sv_utf8_upgrade_flags(res, 0);
3964 str = SvPV_const(res, len);
3967 /* Upgrade destination to be utf8 if this new
3969 if (! d_is_utf8 && SvUTF8(res)) {
3970 /* See Note on sizing above. */
3971 const STRLEN extra = len + (send - s) + 1;
3973 SvCUR_set(sv, d - SvPVX_const(sv));
3977 if (utf8_variant_count == 0) {
3979 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3982 sv_utf8_upgrade_flags_grow(sv,
3983 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3985 d = SvPVX(sv) + SvCUR(sv);
3988 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3990 /* See Note on sizing above. (NOTE: SvCUR() is not
3991 * set correctly here). */
3992 const STRLEN extra = len + (send - e) + 1;
3993 const STRLEN off = d - SvPVX_const(sv);
3994 d = off + SvGROW(sv, off + extra);
3996 Copy(str, d, len, char);
4002 } /* End \N{NAME} */
4006 backslash_N++; /* \N{} is defined to be Unicode */
4008 s = e + 1; /* Point to just after the '}' */
4011 /* \c is a control character */
4015 *d++ = grok_bslash_c(*s, 1);
4018 yyerror("Missing control char name in \\c");
4019 yyquit(); /* Are at end of input, no sense continuing */
4022 non_portable_endpoint++;
4026 /* printf-style backslashes, formfeeds, newlines, etc */
4052 } /* end if (backslash) */
4055 /* Just copy the input to the output, though we may have to convert
4058 * If the input has the same representation in UTF-8 as not, it will be
4059 * a single byte, and we don't care about UTF8ness; just copy the byte */
4060 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4063 else if (! s_is_utf8 && ! d_is_utf8) {
4064 /* If neither source nor output is UTF-8, is also a single byte,
4065 * just copy it; but this byte counts should we later have to
4066 * convert to UTF-8 */
4068 utf8_variant_count++;
4070 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4071 const STRLEN len = UTF8SKIP(s);
4073 /* We expect the source to have already been checked for
4075 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4077 Copy(s, d, len, U8);
4081 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4082 STRLEN need = send - s + 1; /* See Note on sizing above. */
4084 SvCUR_set(sv, d - SvPVX_const(sv));
4088 if (utf8_variant_count == 0) {
4090 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4093 sv_utf8_upgrade_flags_grow(sv,
4094 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4096 d = SvPVX(sv) + SvCUR(sv);
4099 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4101 else { /* UTF8ness matters: convert this non-UTF8 source char to
4102 UTF-8 for output. It will occupy 2 bytes, but don't include
4103 the input byte since we haven't incremented 's' yet. See
4104 Note on sizing above. */
4105 const STRLEN off = d - SvPVX(sv);
4106 const STRLEN extra = 2 + (send - s - 1) + 1;
4107 if (off + extra > SvLEN(sv)) {
4108 d = off + SvGROW(sv, off + extra);
4110 *d++ = UTF8_EIGHT_BIT_HI(*s);
4111 *d++ = UTF8_EIGHT_BIT_LO(*s);
4114 } /* while loop to process each character */
4117 const STRLEN off = d - SvPVX(sv);
4119 /* See if room for the terminating NUL */
4120 if (UNLIKELY(off >= SvLEN(sv))) {
4124 if (off > SvLEN(sv))
4126 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4127 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4129 /* Whew! Here we don't have room for the terminating NUL, but
4130 * everything else so far has fit. It's not too late to grow
4131 * to fit the NUL and continue on. But it is a bug, as the code
4132 * above was supposed to have made room for this, so under
4133 * DEBUGGING builds, we panic anyway. */
4134 d = off + SvGROW(sv, off + 1);
4138 /* terminate the string and set up the sv */
4140 SvCUR_set(sv, d - SvPVX_const(sv));
4145 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4146 PL_parser->lex_sub_op->op_private |=
4147 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4151 /* shrink the sv if we allocated more than we used */
4152 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4153 SvPV_shrink_to_cur(sv);
4156 /* return the substring (via pl_yylval) only if we parsed anything */
4159 for (; s2 < s; s2++) {
4161 COPLINE_INC_WITH_HERELINES;
4163 SvREFCNT_inc_simple_void_NN(sv);
4164 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4165 && ! PL_parser->lex_re_reparsing)
4167 const char *const key = PL_lex_inpat ? "qr" : "q";
4168 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4172 if (PL_lex_inwhat == OP_TRANS) {
4175 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4178 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4186 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4187 type, typelen, NULL);
4189 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4191 LEAVE_with_name("scan_const");
4196 * Returns TRUE if there's more to the expression (e.g., a subscript),
4199 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4201 * ->[ and ->{ return TRUE
4202 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4203 * { and [ outside a pattern are always subscripts, so return TRUE
4204 * if we're outside a pattern and it's not { or [, then return FALSE
4205 * if we're in a pattern and the first char is a {
4206 * {4,5} (any digits around the comma) returns FALSE
4207 * if we're in a pattern and the first char is a [
4209 * [SOMETHING] has a funky algorithm to decide whether it's a
4210 * character class or not. It has to deal with things like
4211 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4212 * anything else returns TRUE
4215 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4218 S_intuit_more(pTHX_ char *s, char *e)
4220 PERL_ARGS_ASSERT_INTUIT_MORE;
4222 if (PL_lex_brackets)
4224 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4226 if (*s == '-' && s[1] == '>'
4227 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4228 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4229 ||(s[2] == '@' && strchr("*[{",s[3])) ))
4231 if (*s != '{' && *s != '[')
4233 PL_parser->sub_no_recover = TRUE;
4237 /* In a pattern, so maybe we have {n,m}. */
4245 /* On the other hand, maybe we have a character class */
4248 if (*s == ']' || *s == '^')
4251 /* this is terrifying, and it works */
4254 const char * const send = (char *) memchr(s, ']', e - s);
4255 unsigned char un_char, last_un_char;
4256 char tmpbuf[sizeof PL_tokenbuf * 4];
4258 if (!send) /* has to be an expression */
4260 weight = 2; /* let's weigh the evidence */
4264 else if (isDIGIT(*s)) {
4266 if (isDIGIT(s[1]) && s[2] == ']')
4272 Zero(seen,256,char);
4274 for (; s < send; s++) {
4275 last_un_char = un_char;
4276 un_char = (unsigned char)*s;
4281 weight -= seen[un_char] * 10;
4282 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4284 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4285 len = (int)strlen(tmpbuf);
4286 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4287 UTF ? SVf_UTF8 : 0, SVt_PV))
4294 && strchr("[#!%*<>()-=",s[1]))
4296 if (/*{*/ strchr("])} =",s[2]))
4305 if (strchr("wds]",s[1]))
4307 else if (seen[(U8)'\''] || seen[(U8)'"'])
4309 else if (strchr("rnftbxcav",s[1]))
4311 else if (isDIGIT(s[1])) {
4313 while (s[1] && isDIGIT(s[1]))
4323 if (strchr("aA01! ",last_un_char))
4325 if (strchr("zZ79~",s[1]))
4327 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4328 weight -= 5; /* cope with negative subscript */
4331 if (!isWORDCHAR(last_un_char)
4332 && !(last_un_char == '$' || last_un_char == '@'
4333 || last_un_char == '&')
4334 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4338 if (keyword(d, s - d, 0))
4341 if (un_char == last_un_char + 1)
4343 weight -= seen[un_char];
4348 if (weight >= 0) /* probably a character class */
4358 * Does all the checking to disambiguate
4360 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4361 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4363 * First argument is the stuff after the first token, e.g. "bar".
4365 * Not a method if foo is a filehandle.
4366 * Not a method if foo is a subroutine prototyped to take a filehandle.
4367 * Not a method if it's really "Foo $bar"
4368 * Method if it's "foo $bar"
4369 * Not a method if it's really "print foo $bar"
4370 * Method if it's really "foo package::" (interpreted as package->foo)
4371 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4372 * Not a method if bar is a filehandle or package, but is quoted with
4377 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4379 char *s = start + (*start == '$');
4380 char tmpbuf[sizeof PL_tokenbuf];
4383 /* Mustn't actually add anything to a symbol table.
4384 But also don't want to "initialise" any placeholder
4385 constants that might already be there into full
4386 blown PVGVs with attached PVCV. */
4388 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4390 PERL_ARGS_ASSERT_INTUIT_METHOD;
4392 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4394 if (cv && SvPOK(cv)) {
4395 const char *proto = CvPROTO(cv);
4397 while (*proto && (isSPACE(*proto) || *proto == ';'))
4404 if (*start == '$') {
4405 SSize_t start_off = start - SvPVX(PL_linestr);
4406 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4407 || isUPPER(*PL_tokenbuf))
4409 /* this could be $# */
4412 PL_bufptr = SvPVX(PL_linestr) + start_off;
4414 return *s == '(' ? FUNCMETH : METHOD;
4417 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4418 /* start is the beginning of the possible filehandle/object,
4419 * and s is the end of it
4420 * tmpbuf is a copy of it (but with single quotes as double colons)
4423 if (!keyword(tmpbuf, len, 0)) {
4424 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4429 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4430 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4432 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4433 && (!isGV(indirgv) || GvCVu(indirgv)))
4435 /* filehandle or package name makes it a method */
4436 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4438 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4439 return 0; /* no assumptions -- "=>" quotes bareword */
4441 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4442 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4443 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4445 force_next(BAREWORD);
4447 return *s == '(' ? FUNCMETH : METHOD;
4453 /* Encoded script support. filter_add() effectively inserts a
4454 * 'pre-processing' function into the current source input stream.
4455 * Note that the filter function only applies to the current source file
4456 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4458 * The datasv parameter (which may be NULL) can be used to pass
4459 * private data to this instance of the filter. The filter function
4460 * can recover the SV using the FILTER_DATA macro and use it to
4461 * store private buffers and state information.
4463 * The supplied datasv parameter is upgraded to a PVIO type
4464 * and the IoDIRP/IoANY field is used to store the function pointer,
4465 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4466 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4467 * private use must be set using malloc'd pointers.
4471 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4479 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4480 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4482 if (!PL_rsfp_filters)
4483 PL_rsfp_filters = newAV();
4486 SvUPGRADE(datasv, SVt_PVIO);
4487 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4488 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4489 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4490 FPTR2DPTR(void *, IoANY(datasv)),
4491 SvPV_nolen(datasv)));
4492 av_unshift(PL_rsfp_filters, 1);
4493 av_store(PL_rsfp_filters, 0, datasv) ;
4495 !PL_parser->filtered
4496 && PL_parser->lex_flags & LEX_EVALBYTES
4497 && PL_bufptr < PL_bufend
4499 const char *s = PL_bufptr;
4500 while (s < PL_bufend) {
4502 SV *linestr = PL_parser->linestr;
4503 char *buf = SvPVX(linestr);
4504 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4505 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4506 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4507 STRLEN const linestart_pos = PL_parser->linestart - buf;
4508 STRLEN const last_uni_pos =
4509 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4510 STRLEN const last_lop_pos =
4511 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4512 av_push(PL_rsfp_filters, linestr);
4513 PL_parser->linestr =
4514 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4515 buf = SvPVX(PL_parser->linestr);
4516 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4517 PL_parser->bufptr = buf + bufptr_pos;
4518 PL_parser->oldbufptr = buf + oldbufptr_pos;
4519 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4520 PL_parser->linestart = buf + linestart_pos;
4521 if (PL_parser->last_uni)
4522 PL_parser->last_uni = buf + last_uni_pos;
4523 if (PL_parser->last_lop)
4524 PL_parser->last_lop = buf + last_lop_pos;
4525 SvLEN_set(linestr, SvCUR(linestr));
4526 SvCUR_set(linestr, s - SvPVX(linestr));
4527 PL_parser->filtered = 1;
4537 /* Delete most recently added instance of this filter function. */
4539 Perl_filter_del(pTHX_ filter_t funcp)
4543 PERL_ARGS_ASSERT_FILTER_DEL;
4546 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4547 FPTR2DPTR(void*, funcp)));
4549 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4551 /* if filter is on top of stack (usual case) just pop it off */
4552 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4553 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4554 sv_free(av_pop(PL_rsfp_filters));
4558 /* we need to search for the correct entry and clear it */
4559 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4563 /* Invoke the idxth filter function for the current rsfp. */
4564 /* maxlen 0 = read one text line */
4566 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4571 /* This API is bad. It should have been using unsigned int for maxlen.
4572 Not sure if we want to change the API, but if not we should sanity
4573 check the value here. */
4574 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4576 PERL_ARGS_ASSERT_FILTER_READ;
4578 if (!PL_parser || !PL_rsfp_filters)
4580 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4581 /* Provide a default input filter to make life easy. */
4582 /* Note that we append to the line. This is handy. */
4583 DEBUG_P(PerlIO_printf(Perl_debug_log,
4584 "filter_read %d: from rsfp\n", idx));
4585 if (correct_length) {
4588 const int old_len = SvCUR(buf_sv);
4590 /* ensure buf_sv is large enough */
4591 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4592 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4593 correct_length)) <= 0) {
4594 if (PerlIO_error(PL_rsfp))
4595 return -1; /* error */
4597 return 0 ; /* end of file */
4599 SvCUR_set(buf_sv, old_len + len) ;
4600 SvPVX(buf_sv)[old_len + len] = '\0';
4603 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4604 if (PerlIO_error(PL_rsfp))
4605 return -1; /* error */
4607 return 0 ; /* end of file */
4610 return SvCUR(buf_sv);
4612 /* Skip this filter slot if filter has been deleted */
4613 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4614 DEBUG_P(PerlIO_printf(Perl_debug_log,
4615 "filter_read %d: skipped (filter deleted)\n",
4617 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4619 if (SvTYPE(datasv) != SVt_PVIO) {
4620 if (correct_length) {
4622 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4623 if (!remainder) return 0; /* eof */
4624 if (correct_length > remainder) correct_length = remainder;
4625 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4626 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4629 const char *s = SvEND(datasv);
4630 const char *send = SvPVX(datasv) + SvLEN(datasv);
4638 if (s == send) return 0; /* eof */
4639 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4640 SvCUR_set(datasv, s-SvPVX(datasv));
4642 return SvCUR(buf_sv);
4644 /* Get function pointer hidden within datasv */
4645 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4646 DEBUG_P(PerlIO_printf(Perl_debug_log,
4647 "filter_read %d: via function %p (%s)\n",
4648 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4649 /* Call function. The function is expected to */
4650 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4651 /* Return: <0:error, =0:eof, >0:not eof */
4653 save_scalar(PL_errgv);
4654 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4660 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4662 PERL_ARGS_ASSERT_FILTER_GETS;
4664 #ifdef PERL_CR_FILTER
4665 if (!PL_rsfp_filters) {
4666 filter_add(S_cr_textfilter,NULL);
4669 if (PL_rsfp_filters) {
4671 SvCUR_set(sv, 0); /* start with empty line */
4672 if (FILTER_READ(0, sv, 0) > 0)
4673 return ( SvPVX(sv) ) ;
4678 return (sv_gets(sv, PL_rsfp, append));
4682 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4686 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4688 if (memEQs(pkgname, len, "__PACKAGE__"))
4692 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4693 && (gv = gv_fetchpvn_flags(pkgname,
4695 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4697 return GvHV(gv); /* Foo:: */
4700 /* use constant CLASS => 'MyClass' */
4701 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4702 if (gv && GvCV(gv)) {
4703 SV * const sv = cv_const_sv(GvCV(gv));
4705 return gv_stashsv(sv, 0);
4708 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4713 S_tokenize_use(pTHX_ int is_use, char *s) {
4714 PERL_ARGS_ASSERT_TOKENIZE_USE;
4716 if (PL_expect != XSTATE)
4717 /* diag_listed_as: "use" not allowed in expression */
4718 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4719 is_use ? "use" : "no"));
4722 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4723 s = force_version(s, TRUE);
4724 if (*s == ';' || *s == '}'
4725 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4726 NEXTVAL_NEXTTOKE.opval = NULL;
4727 force_next(BAREWORD);
4729 else if (*s == 'v') {
4730 s = force_word(s,BAREWORD,FALSE,TRUE);
4731 s = force_version(s, FALSE);
4735 s = force_word(s,BAREWORD,FALSE,TRUE);
4736 s = force_version(s, FALSE);
4738 pl_yylval.ival = is_use;
4742 static const char* const exp_name[] =
4743 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4744 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4745 "SIGVAR", "TERMORDORDOR"
4749 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4751 S_word_takes_any_delimiter(char *p, STRLEN len)
4753 return (len == 1 && strchr("msyq", p[0]))
4755 && ((p[0] == 't' && p[1] == 'r')
4756 || (p[0] == 'q' && strchr("qwxr", p[1]))));
4760 S_check_scalar_slice(pTHX_ char *s)
4763 while (SPACE_OR_TAB(*s)) s++;
4764 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4770 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4771 || (*s && strchr(" \t$#+-'\"", *s)))
4773 s += UTF ? UTF8SKIP(s) : 1;
4775 if (*s == '}' || *s == ']')
4776 pl_yylval.ival = OPpSLICEWARNING;
4779 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4781 S_lex_token_boundary(pTHX)
4783 PL_oldoldbufptr = PL_oldbufptr;
4784 PL_oldbufptr = PL_bufptr;
4787 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4789 S_vcs_conflict_marker(pTHX_ char *s)
4791 lex_token_boundary();
4793 yyerror("Version control conflict marker");
4794 while (s < PL_bufend && *s != '\n')
4802 Works out what to call the token just pulled out of the input
4803 stream. The yacc parser takes care of taking the ops we return and
4804 stitching them into a tree.
4807 The type of the next token
4810 Check if we have already built the token; if so, use it.
4811 Switch based on the current state:
4812 - if we have a case modifier in a string, deal with that
4813 - handle other cases of interpolation inside a string
4814 - scan the next line if we are inside a format
4815 In the normal state, switch on the next character:
4817 if alphabetic, go to key lookup
4818 unrecognized character - croak
4819 - 0/4/26: handle end-of-line or EOF
4820 - cases for whitespace
4821 - \n and #: handle comments and line numbers
4822 - various operators, brackets and sigils
4825 - 'v': vstrings (or go to key lookup)
4826 - 'x' repetition operator (or go to key lookup)
4827 - other ASCII alphanumerics (key lookup begins here):
4830 scan built-in keyword (but do nothing with it yet)
4831 check for statement label
4832 check for lexical subs
4833 goto just_a_word if there is one
4834 see whether built-in keyword is overridden
4835 switch on keyword number:
4836 - default: just_a_word:
4837 not a built-in keyword; handle bareword lookup
4838 disambiguate between method and sub call
4839 fall back to bareword
4840 - cases for built-in keywords
4848 char *s = PL_bufptr;
4852 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4856 /* orig_keyword, gvp, and gv are initialized here because
4857 * jump to the label just_a_word_zero can bypass their
4858 * initialization later. */
4859 I32 orig_keyword = 0;
4863 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4864 const U8* first_bad_char_loc;
4865 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4866 PL_bufend - PL_bufptr,
4867 &first_bad_char_loc)))
4869 _force_out_malformed_utf8_message(first_bad_char_loc,
4872 1 /* 1 means die */ );
4873 NOT_REACHED; /* NOTREACHED */
4875 PL_parser->recheck_utf8_validity = FALSE;
4878 SV* tmp = newSVpvs("");
4879 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4880 (IV)CopLINE(PL_curcop),
4881 lex_state_names[PL_lex_state],
4882 exp_name[PL_expect],
4883 pv_display(tmp, s, strlen(s), 0, 60));
4887 /* when we've already built the next token, just pull it out of the queue */
4890 pl_yylval = PL_nextval[PL_nexttoke];
4893 next_type = PL_nexttype[PL_nexttoke];
4894 if (next_type & (7<<24)) {
4895 if (next_type & (1<<24)) {
4896 if (PL_lex_brackets > 100)
4897 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4898 PL_lex_brackstack[PL_lex_brackets++] =
4899 (char) ((next_type >> 16) & 0xff);
4901 if (next_type & (2<<24))
4902 PL_lex_allbrackets++;
4903 if (next_type & (4<<24))
4904 PL_lex_allbrackets--;
4905 next_type &= 0xffff;
4907 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4911 switch (PL_lex_state) {
4913 case LEX_INTERPNORMAL:
4916 /* interpolated case modifiers like \L \U, including \Q and \E.
4917 when we get here, PL_bufptr is at the \
4919 case LEX_INTERPCASEMOD:
4921 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4923 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4924 PL_bufptr, PL_bufend, *PL_bufptr);
4926 /* handle \E or end of string */
4927 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4929 if (PL_lex_casemods) {
4930 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4931 PL_lex_casestack[PL_lex_casemods] = '\0';
4933 if (PL_bufptr != PL_bufend
4934 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4935 || oldmod == 'F')) {
4937 PL_lex_state = LEX_INTERPCONCAT;
4939 PL_lex_allbrackets--;
4942 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4943 /* Got an unpaired \E */
4944 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4945 "Useless use of \\E");
4947 if (PL_bufptr != PL_bufend)
4949 PL_lex_state = LEX_INTERPCONCAT;
4953 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4954 "### Saw case modifier\n"); });
4956 if (s[1] == '\\' && s[2] == 'E') {
4958 PL_lex_state = LEX_INTERPCONCAT;
4963 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
4964 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
4966 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4968 if ((*s == 'L' || *s == 'U' || *s == 'F')
4969 && (strpbrk(PL_lex_casestack, "LUF")))
4971 PL_lex_casestack[--PL_lex_casemods] = '\0';
4972 PL_lex_allbrackets--;
4975 if (PL_lex_casemods > 10)
4976 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4977 PL_lex_casestack[PL_lex_casemods++] = *s;
4978 PL_lex_casestack[PL_lex_casemods] = '\0';
4979 PL_lex_state = LEX_INTERPCONCAT;
4980 NEXTVAL_NEXTTOKE.ival = 0;
4981 force_next((2<<24)|'(');
4983 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4985 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4987 NEXTVAL_NEXTTOKE.ival = OP_LC;
4989 NEXTVAL_NEXTTOKE.ival = OP_UC;
4991 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4993 NEXTVAL_NEXTTOKE.ival = OP_FC;
4995 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4999 if (PL_lex_starts) {
5002 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5003 if (PL_lex_casemods == 1 && PL_lex_inpat)
5006 AopNOASSIGN(OP_CONCAT);
5012 case LEX_INTERPPUSH:
5013 return REPORT(sublex_push());
5015 case LEX_INTERPSTART:
5016 if (PL_bufptr == PL_bufend)
5017 return REPORT(sublex_done());
5018 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
5019 "### Interpolated variable\n"); });
5021 /* for /@a/, we leave the joining for the regex engine to do
5022 * (unless we're within \Q etc) */
5023 PL_lex_dojoin = (*PL_bufptr == '@'
5024 && (!PL_lex_inpat || PL_lex_casemods));
5025 PL_lex_state = LEX_INTERPNORMAL;
5026 if (PL_lex_dojoin) {
5027 NEXTVAL_NEXTTOKE.ival = 0;
5029 force_ident("\"", '$');
5030 NEXTVAL_NEXTTOKE.ival = 0;
5032 NEXTVAL_NEXTTOKE.ival = 0;
5033 force_next((2<<24)|'(');
5034 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5037 /* Convert (?{...}) and friends to 'do {...}' */
5038 if (PL_lex_inpat && *PL_bufptr == '(') {
5039 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5041 if (*PL_bufptr != '{')
5043 PL_expect = XTERMBLOCK;
5047 if (PL_lex_starts++) {
5049 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5050 if (!PL_lex_casemods && PL_lex_inpat)
5053 AopNOASSIGN(OP_CONCAT);
5057 case LEX_INTERPENDMAYBE:
5058 if (intuit_more(PL_bufptr, PL_bufend)) {
5059 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5065 if (PL_lex_dojoin) {
5066 const U8 dojoin_was = PL_lex_dojoin;
5067 PL_lex_dojoin = FALSE;
5068 PL_lex_state = LEX_INTERPCONCAT;
5069 PL_lex_allbrackets--;
5070 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
5072 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5073 && SvEVALED(PL_lex_repl))
5075 if (PL_bufptr != PL_bufend)
5076 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5079 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5080 re_eval_str. If the here-doc body’s length equals the previous
5081 value of re_eval_start, re_eval_start will now be null. So
5082 check re_eval_str as well. */
5083 if (PL_parser->lex_shared->re_eval_start
5084 || PL_parser->lex_shared->re_eval_str) {
5086 if (*PL_bufptr != ')')
5087 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5089 /* having compiled a (?{..}) expression, return the original
5090 * text too, as a const */
5091 if (PL_parser->lex_shared->re_eval_str) {
5092 sv = PL_parser->lex_shared->re_eval_str;
5093 PL_parser->lex_shared->re_eval_str = NULL;
5095 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5096 SvPV_shrink_to_cur(sv);
5098 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5099 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5100 NEXTVAL_NEXTTOKE.opval =
5101 newSVOP(OP_CONST, 0,
5104 PL_parser->lex_shared->re_eval_start = NULL;
5110 case LEX_INTERPCONCAT:
5112 if (PL_lex_brackets)
5113 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5114 (long) PL_lex_brackets);
5116 if (PL_bufptr == PL_bufend)
5117 return REPORT(sublex_done());
5119 /* m'foo' still needs to be parsed for possible (?{...}) */
5120 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5121 SV *sv = newSVsv(PL_linestr);
5123 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5127 int save_error_count = PL_error_count;
5129 s = scan_const(PL_bufptr);
5131 /* Set flag if this was a pattern and there were errors. op.c will
5132 * refuse to compile a pattern with this flag set. Otherwise, we
5133 * could get segfaults, etc. */
5134 if (PL_lex_inpat && PL_error_count > save_error_count) {
5135 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5138 PL_lex_state = LEX_INTERPCASEMOD;
5140 PL_lex_state = LEX_INTERPSTART;
5143 if (s != PL_bufptr) {
5144 NEXTVAL_NEXTTOKE = pl_yylval;
5147 if (PL_lex_starts++) {
5148 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5149 if (!PL_lex_casemods && PL_lex_inpat)
5152 AopNOASSIGN(OP_CONCAT);
5162 if (PL_parser->sub_error_count != PL_error_count) {
5163 /* There was an error parsing a formline, which tends to
5165 Unlike interpolated sub-parsing, we can't treat any of
5166 these as recoverable, so no need to check sub_no_recover.
5170 assert(PL_lex_formbrack);
5171 s = scan_formline(PL_bufptr);
5172 if (!PL_lex_formbrack)
5181 /* We really do *not* want PL_linestr ever becoming a COW. */
5182 assert (!SvIsCOW(PL_linestr));
5184 PL_oldoldbufptr = PL_oldbufptr;
5186 PL_parser->saw_infix_sigil = 0;
5188 if (PL_in_my == KEY_sigvar) {
5189 /* we expect the sigil and optional var name part of a
5190 * signature element here. Since a '$' is not necessarily
5191 * followed by a var name, handle it specially here; the general
5192 * yylex code would otherwise try to interpret whatever follows
5193 * as a var; e.g. ($, ...) would be seen as the var '$,'
5200 PL_bufptr = s; /* for error reporting */
5205 /* spot stuff that looks like an prototype */
5206 if (strchr("$:@%&*;\\[]", *s)) {
5207 yyerror("Illegal character following sigil in a subroutine signature");
5210 /* '$#' is banned, while '$ # comment' isn't */
5212 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5216 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5217 char *dest = PL_tokenbuf + 1;
5218 /* read var name, including sigil, into PL_tokenbuf */
5219 PL_tokenbuf[0] = sigil;
5220 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5221 0, cBOOL(UTF), FALSE, FALSE);
5223 assert(PL_tokenbuf[1]); /* we have a variable name */
5231 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5232 * as the ASSIGNOP, and exclude other tokens that start with =
5234 if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
5235 /* save now to report with the same context as we did when
5236 * all ASSIGNOPS were accepted */
5240 NEXTVAL_NEXTTOKE.ival = 0;
5241 force_next(ASSIGNOP);
5244 else if (*s == ',' || *s == ')') {
5245 PL_expect = XOPERATOR;
5248 /* make sure the context shows the unexpected character and
5249 * hopefully a bit more */
5251 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5253 PL_bufptr = s; /* for error reporting */
5254 yyerror("Illegal operator following parameter in a subroutine signature");
5258 NEXTVAL_NEXTTOKE.ival = sigil;
5259 force_next('p'); /* force a signature pending identifier */
5266 case ',': /* handle ($a,,$b) */
5271 yyerror("A signature parameter must start with '$', '@' or '%'");
5272 /* very crude error recovery: skip to likely next signature
5274 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5285 if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5289 else if (isALNUMC(*s)) {
5293 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5296 STRLEN skiplen = UTF8SKIP(s);
5297 STRLEN stravail = PL_bufend - s;
5298 c = sv_uni_display(dsv, newSVpvn_flags(s,
5299 skiplen > stravail ? stravail : skiplen,
5300 SVs_TEMP | SVf_UTF8),
5301 10, UNI_DISPLAY_ISPRINT);
5304 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5307 if (s >= PL_linestart) {
5311 /* somehow (probably due to a parse failure), PL_linestart has advanced
5312 * pass PL_bufptr, get a reasonable beginning of line
5315 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
5318 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
5319 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5320 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
5323 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5324 UTF8fARG(UTF, (s - d), d),
5329 goto fake_eof; /* emulate EOF on ^D or ^Z */
5331 if ((!PL_rsfp || PL_lex_inwhat)
5332 && (!PL_parser->filtered || s+1 < PL_bufend)) {
5336 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5338 yyerror((const char *)
5340 ? "Format not terminated"
5341 : "Missing right curly or square bracket"));
5343 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5344 "### Tokener got EOF\n");
5348 if (s++ < PL_bufend)
5349 goto retry; /* ignore stray nulls */
5352 if (!PL_in_eval && !PL_preambled) {
5353 PL_preambled = TRUE;
5355 /* Generate a string of Perl code to load the debugger.
5356 * If PERL5DB is set, it will return the contents of that,
5357 * otherwise a compile-time require of perl5db.pl. */
5359 const char * const pdb = PerlEnv_getenv("PERL5DB");
5362 sv_setpv(PL_linestr, pdb);
5363 sv_catpvs(PL_linestr,";");
5365 SETERRNO(0,SS_NORMAL);
5366 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5368 PL_parser->preambling = CopLINE(PL_curcop);
5370 SvPVCLEAR(PL_linestr);
5371 if (PL_preambleav) {
5372 SV **svp = AvARRAY(PL_preambleav);
5373 SV **const end = svp + AvFILLp(PL_preambleav);
5375 sv_catsv(PL_linestr, *svp);
5377 sv_catpvs(PL_linestr, ";");
5379 sv_free(MUTABLE_SV(PL_preambleav));
5380 PL_preambleav = NULL;
5383 sv_catpvs(PL_linestr,
5384 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5385 if (PL_minus_n || PL_minus_p) {
5386 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5388 sv_catpvs(PL_linestr,"chomp;");
5391 if ( ( *PL_splitstr == '/'
5392 || *PL_splitstr == '\''
5393 || *PL_splitstr == '"')
5394 && strchr(PL_splitstr + 1, *PL_splitstr))
5396 /* strchr is ok, because -F pattern can't contain
5398 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5401 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5402 bytes can be used as quoting characters. :-) */
5403 const char *splits = PL_splitstr;
5404 sv_catpvs(PL_linestr, "our @F=split(q\0");
5407 if (*splits == '\\')
5408 sv_catpvn(PL_linestr, splits, 1);
5409 sv_catpvn(PL_linestr, splits, 1);
5410 } while (*splits++);
5411 /* This loop will embed the trailing NUL of
5412 PL_linestr as the last thing it does before
5414 sv_catpvs(PL_linestr, ");");
5418 sv_catpvs(PL_linestr,"our @F=split(' ');");
5421 sv_catpvs(PL_linestr, "\n");
5422 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5423 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5424 PL_last_lop = PL_last_uni = NULL;
5425 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5426 update_debugger_info(PL_linestr, NULL, 0);
5431 bof = cBOOL(PL_rsfp);
5434 fake_eof = LEX_FAKE_EOF;
5436 PL_bufptr = PL_bufend;
5437 COPLINE_INC_WITH_HERELINES;
5438 if (!lex_next_chunk(fake_eof)) {
5439 CopLINE_dec(PL_curcop);
5441 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5443 CopLINE_dec(PL_curcop);
5445 /* If it looks like the start of a BOM or raw UTF-16,
5446 * check if it in fact is. */
5449 || *(U8*)s == BOM_UTF8_FIRST_BYTE
5453 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5454 bof = (offset == (Off_t)SvCUR(PL_linestr));
5455 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5456 /* offset may include swallowed CR */
5458 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5461 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5462 s = swallow_bom((U8*)s);
5465 if (PL_parser->in_pod) {
5466 /* Incest with pod. */
5467 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
5470 SvPVCLEAR(PL_linestr);
5471 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5472 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5473 PL_last_lop = PL_last_uni = NULL;
5474 PL_parser->in_pod = 0;
5477 if (PL_rsfp || PL_parser->filtered)
5478 incline(s, PL_bufend);
5479 } while (PL_parser->in_pod);
5480 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5481 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5482 PL_last_lop = PL_last_uni = NULL;
5483 if (CopLINE(PL_curcop) == 1) {
5484 while (s < PL_bufend && isSPACE(*s))
5486 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5490 if (*s == '#' && *(s+1) == '!')
5492 #ifdef ALTERNATE_SHEBANG
5494 static char const as[] = ALTERNATE_SHEBANG;
5495 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5496 d = s + (sizeof(as) - 1);
5498 #endif /* ALTERNATE_SHEBANG */
5507 while (*d && !isSPACE(*d))
5511 #ifdef ARG_ZERO_IS_SCRIPT
5512 if (ipathend > ipath) {
5514 * HP-UX (at least) sets argv[0] to the script name,
5515 * which makes $^X incorrect. And Digital UNIX and Linux,
5516 * at least, set argv[0] to the basename of the Perl
5517 * interpreter. So, having found "#!", we'll set it right.
5519 SV* copfilesv = CopFILESV(PL_curcop);
5522 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5524 assert(SvPOK(x) || SvGMAGICAL(x));
5525 if (sv_eq(x, copfilesv)) {
5526 sv_setpvn(x, ipath, ipathend - ipath);
5532 const char *bstart = SvPV_const(copfilesv, blen);
5533 const char * const lstart = SvPV_const(x, llen);
5535 bstart += blen - llen;
5536 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5537 sv_setpvn(x, ipath, ipathend - ipath);
5544 /* Anything to do if no copfilesv? */
5546 TAINT_NOT; /* $^X is always tainted, but that's OK */
5548 #endif /* ARG_ZERO_IS_SCRIPT */
5553 d = instr(s,"perl -");
5555 d = instr(s,"perl");
5557 /* avoid getting into infinite loops when shebang
5558 * line contains "Perl" rather than "perl" */
5560 for (d = ipathend-4; d >= ipath; --d) {
5561 if (isALPHA_FOLD_EQ(*d, 'p')
5562 && !ibcmp(d, "perl", 4))
5572 #ifdef ALTERNATE_SHEBANG
5574 * If the ALTERNATE_SHEBANG on this system starts with a
5575 * character that can be part of a Perl expression, then if
5576 * we see it but not "perl", we're probably looking at the
5577 * start of Perl code, not a request to hand off to some
5578 * other interpreter. Similarly, if "perl" is there, but
5579 * not in the first 'word' of the line, we assume the line
5580 * contains the start of the Perl program.
5582 if (d && *s != '#') {
5583 const char *c = ipath;
5584 while (*c && !strchr("; \t\r\n\f\v#", *c))
5587 d = NULL; /* "perl" not in first word; ignore */
5589 *s = '#'; /* Don't try to parse shebang line */
5591 #endif /* ALTERNATE_SHEBANG */
5596 && !instr(s,"indir")
5597 && instr(PL_origargv[0],"perl"))
5604 while (s < PL_bufend && isSPACE(*s))
5606 if (s < PL_bufend) {
5607 Newx(newargv,PL_origargc+3,char*);
5609 while (s < PL_bufend && !isSPACE(*s))
5612 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5615 newargv = PL_origargv;
5618 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5620 Perl_croak(aTHX_ "Can't exec %s", ipath);
5623 while (*d && !isSPACE(*d))
5625 while (SPACE_OR_TAB(*d))
5629 const bool switches_done = PL_doswitches;
5630 const U32 oldpdb = PL_perldb;
5631 const bool oldn = PL_minus_n;
5632 const bool oldp = PL_minus_p;
5636 bool baduni = FALSE;
5638 const char *d2 = d1 + 1;
5639 if (parse_unicode_opts((const char **)&d2)
5643 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5644 const char * const m = d1;
5645 while (*d1 && !isSPACE(*d1))
5647 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5650 d1 = moreswitches(d1);
5652 if (PL_doswitches && !switches_done) {
5653 int argc = PL_origargc;
5654 char **argv = PL_origargv;
5657 } while (argc && argv[0][0] == '-' && argv[0][1]);
5658 init_argv_symbols(argc,argv);
5660 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5661 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5662 /* if we have already added "LINE: while (<>) {",
5663 we must not do it again */
5665 SvPVCLEAR(PL_linestr);
5666 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5667 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5668 PL_last_lop = PL_last_uni = NULL;
5669 PL_preambled = FALSE;
5670 if (PERLDB_LINE_OR_SAVESRC)
5671 (void)gv_fetchfile(PL_origfilename);
5678 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5679 PL_lex_state = LEX_FORMLINE;
5680 force_next(FORMRBRACK);
5685 #ifdef PERL_STRICT_CR
5686 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5688 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5690 case ' ': case '\t': case '\f': case '\v':
5695 if (PL_lex_state != LEX_NORMAL
5696 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5698 const bool in_comment = *s == '#';
5699 if (*s == '#' && s == PL_linestart && PL_in_eval
5700 && !PL_rsfp && !PL_parser->filtered) {
5701 /* handle eval qq[#line 1 "foo"\n ...] */
5702 CopLINE_dec(PL_curcop);
5703 incline(s, PL_bufend);
5706 while (d < PL_bufend && *d != '\n')
5711 if (in_comment && d == PL_bufend
5712 && PL_lex_state == LEX_INTERPNORMAL
5713 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5714 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5716 incline(s, PL_bufend);
5717 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5718 PL_lex_state = LEX_FORMLINE;
5719 force_next(FORMRBRACK);
5724 while (s < PL_bufend && *s != '\n')
5730 incline(s, PL_bufend);
5735 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5743 while (s < PL_bufend && SPACE_OR_TAB(*s))
5746 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5747 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5748 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5749 OPERATOR('-'); /* unary minus */
5752 case 'r': ftst = OP_FTEREAD; break;
5753 case 'w': ftst = OP_FTEWRITE; break;
5754 case 'x': ftst = OP_FTEEXEC; break;
5755 case 'o': ftst = OP_FTEOWNED; break;
5756 case 'R': ftst = OP_FTRREAD; break;
5757 case 'W': ftst = OP_FTRWRITE; break;
5758 case 'X': ftst = OP_FTREXEC; break;
5759 case 'O': ftst = OP_FTROWNED; break;
5760 case 'e': ftst = OP_FTIS; break;
5761 case 'z': ftst = OP_FTZERO; break;
5762 case 's': ftst = OP_FTSIZE; break;
5763 case 'f': ftst = OP_FTFILE; break;
5764 case 'd': ftst = OP_FTDIR; break;
5765 case 'l': ftst = OP_FTLINK; break;
5766 case 'p': ftst = OP_FTPIPE; break;
5767 case 'S': ftst = OP_FTSOCK; break;
5768 case 'u': ftst = OP_FTSUID; break;
5769 case 'g': ftst = OP_FTSGID; break;
5770 case 'k': ftst = OP_FTSVTX; break;
5771 case 'b': ftst = OP_FTBLK; break;
5772 case 'c': ftst = OP_FTCHR; break;
5773 case 't': ftst = OP_FTTTY; break;
5774 case 'T': ftst = OP_FTTEXT; break;
5775 case 'B': ftst = OP_FTBINARY; break;
5776 case 'M': case 'A': case 'C':
5777 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5779 case 'M': ftst = OP_FTMTIME; break;
5780 case 'A': ftst = OP_FTATIME; break;
5781 case 'C': ftst = OP_FTCTIME; break;
5789 PL_last_uni = PL_oldbufptr;
5790 PL_last_lop_op = (OPCODE)ftst;
5791 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5792 "### Saw file test %c\n", (int)tmp);
5797 /* Assume it was a minus followed by a one-letter named
5798 * subroutine call (or a -bareword), then. */
5799 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5800 "### '-%c' looked like a file test but was not\n",
5807 const char tmp = *s++;
5810 if (PL_expect == XOPERATOR)
5815 else if (*s == '>') {
5818 if (((*s == '$' || *s == '&') && s[1] == '*')
5819 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5820 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5821 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5824 PL_expect = XPOSTDEREF;
5827 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5828 s = force_word(s,METHOD,FALSE,TRUE);
5836 if (PL_expect == XOPERATOR) {
5838 && !PL_lex_allbrackets
5839 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5847 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5849 OPERATOR('-'); /* unary minus */
5855 const char tmp = *s++;
5858 if (PL_expect == XOPERATOR)
5863 if (PL_expect == XOPERATOR) {
5865 && !PL_lex_allbrackets
5866 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5874 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5881 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5882 if (PL_expect != XOPERATOR) {
5883 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5884 PL_expect = XOPERATOR;
5885 force_ident(PL_tokenbuf, '*');
5893 if (*s == '=' && !PL_lex_allbrackets
5894 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5902 && !PL_lex_allbrackets
5903 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5908 PL_parser->saw_infix_sigil = 1;
5913 if (PL_expect == XOPERATOR) {
5915 && !PL_lex_allbrackets
5916 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5921 PL_parser->saw_infix_sigil = 1;
5924 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5925 PL_tokenbuf[0] = '%';
5926 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5928 if (!PL_tokenbuf[1]) {
5931 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5932 && intuit_more(s, PL_bufend)) {
5934 PL_tokenbuf[0] = '@';
5936 PL_expect = XOPERATOR;
5937 force_ident_maybe_lex('%');
5942 bof = FEATURE_BITWISE_IS_ENABLED;
5943 if (bof && s[1] == '.')
5945 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5946 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5952 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5954 if (PL_lex_brackets > 100)
5955 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5956 PL_lex_brackstack[PL_lex_brackets++] = 0;
5957 PL_lex_allbrackets++;
5959 const char tmp = *s++;
5964 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5966 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5969 Perl_ck_warner_d(aTHX_
5970 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5971 "Smartmatch is experimental");
5975 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5977 BCop(OP_SCOMPLEMENT);
5979 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5981 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5988 goto just_a_word_zero_gv;
5994 switch (PL_expect) {
5996 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5998 PL_bufptr = s; /* update in case we back off */
6001 "Use of := for an empty attribute list is not allowed");
6008 PL_expect = XTERMBLOCK;
6010 /* NB: as well as parsing normal attributes, we also end up
6011 * here if there is something looking like attributes
6012 * following a signature (which is illegal, but used to be
6013 * legal in 5.20..5.26). If the latter, we still parse the
6014 * attributes so that error messages(s) are less confusing,
6015 * but ignore them (parser->sig_seen).
6019 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6020 bool sig = PL_parser->sig_seen;
6023 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6024 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
6025 if (tmp < 0) tmp = -tmp;
6040 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6042 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6047 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6049 COPLINE_SET_FROM_MULTI_END;
6052 sv_catsv(sv, PL_lex_stuff);
6053 attrs = op_append_elem(OP_LIST, attrs,
6054 newSVOP(OP_CONST, 0, sv));
6055 SvREFCNT_dec_NN(PL_lex_stuff);
6056 PL_lex_stuff = NULL;
6059 /* NOTE: any CV attrs applied here need to be part of
6060 the CVf_BUILTIN_ATTRS define in cv.h! */
6061 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
6064 CvLVALUE_on(PL_compcv);
6066 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
6069 CvMETHOD_on(PL_compcv);
6071 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
6075 Perl_ck_warner_d(aTHX_
6076 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
6077 ":const is experimental"
6079 CvANONCONST_on(PL_compcv);
6080 if (!CvANON(PL_compcv))
6081 yyerror(":const is not permitted on named "
6085 /* After we've set the flags, it could be argued that
6086 we don't need to do the attributes.pm-based setting
6087 process, and shouldn't bother appending recognized
6088 flags. To experiment with that, uncomment the
6089 following "else". (Note that's already been
6090 uncommented. That keeps the above-applied built-in
6091 attributes from being intercepted (and possibly
6092 rejected) by a package's attribute routines, but is
6093 justified by the performance win for the common case
6094 of applying only built-in attributes.) */
6096 attrs = op_append_elem(OP_LIST, attrs,
6097 newSVOP(OP_CONST, 0,
6101 if (*s == ':' && s[1] != ':')
6104 break; /* require real whitespace or :'s */
6105 /* XXX losing whitespace on sequential attributes here */
6110 && !(PL_expect == XOPERATOR
6111 ? (*s == '=' || *s == ')')
6112 : (*s == '{' || *s == '(')))
6114 const char q = ((*s == '\'') ? '"' : '\'');
6115 /* If here for an expression, and parsed no attrs, back
6117 if (PL_expect == XOPERATOR && !attrs) {
6121 /* MUST advance bufptr here to avoid bogus "at end of line"
6122 context messages from yyerror().
6125 yyerror( (const char *)
6127 ? Perl_form(aTHX_ "Invalid separator character "
6128 "%c%c%c in attribute list", q, *s, q)
6129 : "Unterminated attribute list" ) );
6136 if (PL_parser->sig_seen) {
6137 /* see comment about about sig_seen and parser error
6141 Perl_croak(aTHX_ "Subroutine attributes must come "
6142 "before the signature");
6145 NEXTVAL_NEXTTOKE.opval = attrs;
6151 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6155 PL_lex_allbrackets--;
6159 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6160 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6164 PL_lex_allbrackets++;
6167 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6174 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6177 PL_lex_allbrackets--;
6183 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6186 if (PL_lex_brackets <= 0)
6187 /* diag_listed_as: Unmatched right %s bracket */
6188 yyerror("Unmatched right square bracket");
6191 PL_lex_allbrackets--;
6192 if (PL_lex_state == LEX_INTERPNORMAL) {
6193 if (PL_lex_brackets == 0) {
6194 if (*s == '-' && s[1] == '>')
6195 PL_lex_state = LEX_INTERPENDMAYBE;
6196 else if (*s != '[' && *s != '{')
6197 PL_lex_state = LEX_INTERPEND;
6204 if (PL_lex_brackets > 100) {
6205 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6207 switch (PL_expect) {
6210 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6211 PL_lex_allbrackets++;
6212 OPERATOR(HASHBRACK);
6214 while (s < PL_bufend && SPACE_OR_TAB(*s))
6217 PL_tokenbuf[0] = '\0';
6218 if (d < PL_bufend && *d == '-') {
6219 PL_tokenbuf[0] = '-';
6221 while (d < PL_bufend && SPACE_OR_TAB(*d))
6224 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6225 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6227 while (d < PL_bufend && SPACE_OR_TAB(*d))
6230 const char minus = (PL_tokenbuf[0] == '-');
6231 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6239 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6240 PL_lex_allbrackets++;
6245 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6246 PL_lex_allbrackets++;
6250 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6251 PL_lex_allbrackets++;
6256 if (PL_oldoldbufptr == PL_last_lop)
6257 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6259 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6260 PL_lex_allbrackets++;
6263 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6265 /* This hack is to get the ${} in the message. */
6267 yyerror("syntax error");
6270 OPERATOR(HASHBRACK);
6272 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6273 /* ${...} or @{...} etc., but not print {...}
6274 * Skip the disambiguation and treat this as a block.
6276 goto block_expectation;
6278 /* This hack serves to disambiguate a pair of curlies
6279 * as being a block or an anon hash. Normally, expectation
6280 * determines that, but in cases where we're not in a
6281 * position to expect anything in particular (like inside
6282 * eval"") we have to resolve the ambiguity. This code
6283 * covers the case where the first term in the curlies is a
6284 * quoted string. Most other cases need to be explicitly
6285 * disambiguated by prepending a "+" before the opening
6286 * curly in order to force resolution as an anon hash.
6288 * XXX should probably propagate the outer expectation
6289 * into eval"" to rely less on this hack, but that could
6290 * potentially break current behavior of eval"".
6294 if (*s == '\'' || *s == '"' || *s == '`') {
6295 /* common case: get past first string, handling escapes */
6296 for (t++; t < PL_bufend && *t != *s;)
6301 else if (*s == 'q') {
6304 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6305 && !isWORDCHAR(*t))))
6307 /* skip q//-like construct */
6309 char open, close, term;
6312 while (t < PL_bufend && isSPACE(*t))
6314 /* check for q => */
6315 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6316 OPERATOR(HASHBRACK);
6320 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6324 for (t++; t < PL_bufend; t++) {
6325 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6327 else if (*t == open)
6331 for (t++; t < PL_bufend; t++) {
6332 if (*t == '\\' && t+1 < PL_bufend)
6334 else if (*t == close && --brackets <= 0)
6336 else if (*t == open)
6343 /* skip plain q word */
6344 while ( t < PL_bufend
6345 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6347 t += UTF ? UTF8SKIP(t) : 1;
6350 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6351 t += UTF ? UTF8SKIP(t) : 1;
6352 while ( t < PL_bufend
6353 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6355 t += UTF ? UTF8SKIP(t) : 1;
6358 while (t < PL_bufend && isSPACE(*t))
6360 /* if comma follows first term, call it an anon hash */
6361 /* XXX it could be a comma expression with loop modifiers */
6362 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6363 || (*t == '=' && t[1] == '>')))
6364 OPERATOR(HASHBRACK);
6365 if (PL_expect == XREF)
6368 /* If there is an opening brace or 'sub:', treat it
6369 as a term to make ${{...}}{k} and &{sub:attr...}
6370 dwim. Otherwise, treat it as a statement, so
6371 map {no strict; ...} works.
6378 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6391 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6397 pl_yylval.ival = CopLINE(PL_curcop);
6398 PL_copline = NOLINE; /* invalidate current command line number */
6399 TOKEN(formbrack ? '=' : '{');
6401 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6404 assert(s != PL_bufend);
6406 if (PL_lex_brackets <= 0)
6407 /* diag_listed_as: Unmatched right %s bracket */
6408 yyerror("Unmatched right curly bracket");
6410 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6411 PL_lex_allbrackets--;
6412 if (PL_lex_state == LEX_INTERPNORMAL) {
6413 if (PL_lex_brackets == 0) {
6414 if (PL_expect & XFAKEBRACK) {
6415 PL_expect &= XENUMMASK;
6416 PL_lex_state = LEX_INTERPEND;
6418 return yylex(); /* ignore fake brackets */
6420 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6421 && SvEVALED(PL_lex_repl))
6422 PL_lex_state = LEX_INTERPEND;
6423 else if (*s == '-' && s[1] == '>')
6424 PL_lex_state = LEX_INTERPENDMAYBE;
6425 else if (*s != '[' && *s != '{')
6426 PL_lex_state = LEX_INTERPEND;
6429 if (PL_expect & XFAKEBRACK) {
6430 PL_expect &= XENUMMASK;
6432 return yylex(); /* ignore fake brackets */
6434 force_next(formbrack ? '.' : '}');
6435 if (formbrack) LEAVE_with_name("lex_format");
6436 if (formbrack == 2) { /* means . where arguments were expected */
6442 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6445 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6446 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6453 if (PL_expect == XOPERATOR) {
6454 if ( PL_bufptr == PL_linestart
6455 && ckWARN(WARN_SEMICOLON)
6456 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6458 CopLINE_dec(PL_curcop);
6459 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6460 CopLINE_inc(PL_curcop);
6463 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6465 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6466 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6472 PL_parser->saw_infix_sigil = 1;
6473 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6479 PL_tokenbuf[0] = '&';
6480 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6481 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6482 if (PL_tokenbuf[1]) {
6483 force_ident_maybe_lex('&');
6492 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6493 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6501 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6503 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6504 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6508 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6512 const char tmp = *s++;
6514 if ( (s == PL_linestart+2 || s[-3] == '\n')
6515 && memBEGINs(s, (STRLEN) (PL_bufend - s), "====="))
6517 s = vcs_conflict_marker(s + 5);
6520 if (!PL_lex_allbrackets
6521 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6529 if (!PL_lex_allbrackets
6530 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6539 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6540 && strchr("+-*/%.^&|<",tmp))
6541 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6542 "Reversed %c= operator",(int)tmp);
6544 if (PL_expect == XSTATE
6546 && (s == PL_linestart+1 || s[-2] == '\n') )
6548 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6549 || PL_lex_state != LEX_NORMAL)
6554 incline(s, PL_bufend);
6555 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
6557 s = (char *) memchr(s,'\n', d - s);
6562 incline(s, PL_bufend);
6570 PL_parser->in_pod = 1;
6574 if (PL_expect == XBLOCK) {
6576 #ifdef PERL_STRICT_CR
6577 while (SPACE_OR_TAB(*t))
6579 while (SPACE_OR_TAB(*t) || *t == '\r')
6582 if (*t == '\n' || *t == '#') {
6584 ENTER_with_name("lex_format");
6585 SAVEI8(PL_parser->form_lex_state);
6586 SAVEI32(PL_lex_formbrack);
6587 PL_parser->form_lex_state = PL_lex_state;
6588 PL_lex_formbrack = PL_lex_brackets + 1;
6589 PL_parser->sub_error_count = PL_error_count;
6593 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6602 const char tmp = *s++;
6604 /* was this !=~ where !~ was meant?
6605 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6607 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6608 const char *t = s+1;
6610 while (t < PL_bufend && isSPACE(*t))
6613 if (*t == '/' || *t == '?'
6614 || ((*t == 'm' || *t == 's' || *t == 'y')
6615 && !isWORDCHAR(t[1]))
6616 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6617 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6618 "!=~ should be !~");
6620 if (!PL_lex_allbrackets
6621 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6634 if (PL_expect != XOPERATOR) {
6635 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6637 if (s[1] == '<' && s[2] != '>') {
6638 if ( (s == PL_linestart || s[-1] == '\n')
6639 && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
6641 s = vcs_conflict_marker(s + 7);
6644 s = scan_heredoc(s);
6647 s = scan_inputsymbol(s);
6648 PL_expect = XOPERATOR;
6649 TOKEN(sublex_start());
6655 if ( (s == PL_linestart+2 || s[-3] == '\n')
6656 && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<"))
6658 s = vcs_conflict_marker(s + 5);
6661 if (*s == '=' && !PL_lex_allbrackets
6662 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6667 SHop(OP_LEFT_SHIFT);
6672 if (!PL_lex_allbrackets
6673 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6681 if (!PL_lex_allbrackets
6682 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6691 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6699 const char tmp = *s++;
6701 if ( (s == PL_linestart+2 || s[-3] == '\n')
6702 && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>"))
6704 s = vcs_conflict_marker(s + 5);
6707 if (*s == '=' && !PL_lex_allbrackets
6708 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6713 SHop(OP_RIGHT_SHIFT);
6715 else if (tmp == '=') {
6716 if (!PL_lex_allbrackets
6717 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6726 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6735 if (PL_expect == XPOSTDEREF) {
6738 POSTDEREF(DOLSHARP);
6744 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6745 || strchr("{$:+-@", s[2])))
6747 PL_tokenbuf[0] = '@';
6748 s = scan_ident(s + 1, PL_tokenbuf + 1,
6749 sizeof PL_tokenbuf - 1, FALSE);
6750 if (PL_expect == XOPERATOR) {
6752 if (PL_bufptr > s) {
6754 PL_bufptr = PL_oldbufptr;
6756 no_op("Array length", d);
6758 if (!PL_tokenbuf[1])
6760 PL_expect = XOPERATOR;
6761 force_ident_maybe_lex('#');
6765 PL_tokenbuf[0] = '$';
6766 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6767 if (PL_expect == XOPERATOR) {
6769 if (PL_bufptr > s) {
6771 PL_bufptr = PL_oldbufptr;
6775 if (!PL_tokenbuf[1]) {
6777 yyerror("Final $ should be \\$ or $name");
6783 const char tmp = *s;
6784 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6787 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6788 && intuit_more(s, PL_bufend)) {
6790 PL_tokenbuf[0] = '@';
6791 if (ckWARN(WARN_SYNTAX)) {
6795 || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6798 t += UTF ? UTF8SKIP(t) : 1;
6801 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6802 while (t < PL_bufend && *t != ']')
6804 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6805 "Multidimensional syntax %" UTF8f " not supported",
6806 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6810 else if (*s == '{') {
6812 PL_tokenbuf[0] = '%';
6813 if ( strEQ(PL_tokenbuf+1, "SIG")
6814 && ckWARN(WARN_SYNTAX)
6815 && (t = (char *) memchr(s, '}', PL_bufend - s))
6816 && (t = (char *) memchr(t, '=', PL_bufend - t)))
6818 char tmpbuf[sizeof PL_tokenbuf];
6821 } while (isSPACE(*t));
6822 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6824 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6829 && get_cvn_flags(tmpbuf, len, UTF
6833 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6834 "You need to quote \"%" UTF8f "\"",
6835 UTF8fARG(UTF, len, tmpbuf));
6842 PL_expect = XOPERATOR;
6843 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
6844 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6845 if (!islop || PL_last_lop_op == OP_GREPSTART)
6846 PL_expect = XOPERATOR;
6847 else if (strchr("$@\"'`q", *s))
6848 PL_expect = XTERM; /* e.g. print $fh "foo" */
6849 else if ( strchr("&*<%", *s)
6850 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6852 PL_expect = XTERM; /* e.g. print $fh &sub */
6854 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6855 char tmpbuf[sizeof PL_tokenbuf];
6857 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6858 if ((t2 = keyword(tmpbuf, len, 0))) {
6859 /* binary operators exclude handle interpretations */
6871 PL_expect = XTERM; /* e.g. print $fh length() */
6876 PL_expect = XTERM; /* e.g. print $fh subr() */
6879 else if (isDIGIT(*s))
6880 PL_expect = XTERM; /* e.g. print $fh 3 */
6881 else if (*s == '.' && isDIGIT(s[1]))
6882 PL_expect = XTERM; /* e.g. print $fh .3 */
6883 else if ((*s == '?' || *s == '-' || *s == '+')
6884 && !isSPACE(s[1]) && s[1] != '=')
6885 PL_expect = XTERM; /* e.g. print $fh -1 */
6886 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6888 PL_expect = XTERM; /* e.g. print $fh /.../
6889 XXX except DORDOR operator
6891 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6893 PL_expect = XTERM; /* print $fh <<"EOF" */
6896 force_ident_maybe_lex('$');
6900 if (PL_expect == XPOSTDEREF)
6902 PL_tokenbuf[0] = '@';
6903 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6904 if (PL_expect == XOPERATOR) {
6906 if (PL_bufptr > s) {
6908 PL_bufptr = PL_oldbufptr;
6913 if (!PL_tokenbuf[1]) {
6916 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6918 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6919 && intuit_more(s, PL_bufend))
6922 PL_tokenbuf[0] = '%';
6924 /* Warn about @ where they meant $. */
6925 if (*s == '[' || *s == '{') {
6926 if (ckWARN(WARN_SYNTAX)) {
6927 S_check_scalar_slice(aTHX_ s);
6931 PL_expect = XOPERATOR;
6932 force_ident_maybe_lex('@');
6935 case '/': /* may be division, defined-or, or pattern */
6936 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6937 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6938 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6943 else if (PL_expect == XOPERATOR) {
6945 if (*s == '=' && !PL_lex_allbrackets
6946 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6954 /* Disable warning on "study /blah/" */
6955 if ( PL_oldoldbufptr == PL_last_uni
6956 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6957 || memNE(PL_last_uni, "study", 5)
6958 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6961 s = scan_pat(s,OP_MATCH);
6962 TERM(sublex_start());
6965 case '?': /* conditional */
6967 if (!PL_lex_allbrackets
6968 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6973 PL_lex_allbrackets++;
6977 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6978 #ifdef PERL_STRICT_CR
6981 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6983 && (s == PL_linestart || s[-1] == '\n') )
6986 formbrack = 2; /* dot seen where arguments expected */
6989 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6993 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6996 if (!PL_lex_allbrackets
6997 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
7005 pl_yylval.ival = OPf_SPECIAL;
7011 if (*s == '=' && !PL_lex_allbrackets
7012 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
7020 case '0': case '1': case '2': case '3': case '4':
7021 case '5': case '6': case '7': case '8': case '9':
7022 s = scan_num(s, &pl_yylval);
7023 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
7024 if (PL_expect == XOPERATOR)
7029 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7031 missingterm(NULL, 0);
7032 COPLINE_SET_FROM_MULTI_END;
7033 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
7034 if (PL_expect == XOPERATOR) {
7037 pl_yylval.ival = OP_CONST;
7038 TERM(sublex_start());
7041 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7044 printbuf("### Saw string before %s\n", s);
7046 PerlIO_printf(Perl_debug_log,
7047 "### Saw unterminated string\n");
7049 if (PL_expect == XOPERATOR) {
7053 missingterm(NULL, 0);
7054 pl_yylval.ival = OP_CONST;
7055 /* FIXME. I think that this can be const if char *d is replaced by
7056 more localised variables. */
7057 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
7058 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
7059 pl_yylval.ival = OP_STRINGIFY;
7063 if (pl_yylval.ival == OP_CONST)
7064 COPLINE_SET_FROM_MULTI_END;
7065 TERM(sublex_start());
7068 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7071 printbuf("### Saw backtick string before %s\n", s);
7073 PerlIO_printf(Perl_debug_log,
7074 "### Saw unterminated backtick string\n");
7076 if (PL_expect == XOPERATOR)
7077 no_op("Backticks",s);
7079 missingterm(NULL, 0);
7080 pl_yylval.ival = OP_BACKTICK;
7081 TERM(sublex_start());
7085 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7087 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
7089 if (PL_expect == XOPERATOR)
7090 no_op("Backslash",s);
7094 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
7095 char *start = s + 2;
7096 while (isDIGIT(*start) || *start == '_')
7098 if (*start == '.' && isDIGIT(start[1])) {
7099 s = scan_num(s, &pl_yylval);
7102 else if ((*start == ':' && start[1] == ':')
7103 || (PL_expect == XSTATE && *start == ':'))
7105 else if (PL_expect == XSTATE) {
7107 while (d < PL_bufend && isSPACE(*d)) d++;
7108 if (*d == ':') goto keylookup;
7110 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
7111 if (!isALPHA(*start) && (PL_expect == XTERM
7112 || PL_expect == XREF || PL_expect == XSTATE
7113 || PL_expect == XTERMORDORDOR)) {
7114 GV *const gv = gv_fetchpvn_flags(s, start - s,
7115 UTF ? SVf_UTF8 : 0, SVt_PVCV);
7117 s = scan_num(s, &pl_yylval);
7124 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7177 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7179 /* Some keywords can be followed by any delimiter, including ':' */
7180 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
7182 /* x::* is just a word, unless x is "CORE" */
7183 if (!anydelim && *s == ':' && s[1] == ':') {
7184 if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE;
7189 while (d < PL_bufend && isSPACE(*d))
7190 d++; /* no comments skipped here, or s### is misparsed */
7192 /* Is this a word before a => operator? */
7193 if (*d == '=' && d[1] == '>') {
7197 = newSVOP(OP_CONST, 0,
7198 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7199 pl_yylval.opval->op_private = OPpCONST_BARE;
7203 /* Check for plugged-in keyword */
7207 char *saved_bufptr = PL_bufptr;
7209 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7211 if (result == KEYWORD_PLUGIN_DECLINE) {
7212 /* not a plugged-in keyword */
7213 PL_bufptr = saved_bufptr;
7214 } else if (result == KEYWORD_PLUGIN_STMT) {
7215 pl_yylval.opval = o;
7217 if (!PL_nexttoke) PL_expect = XSTATE;
7218 return REPORT(PLUGSTMT);
7219 } else if (result == KEYWORD_PLUGIN_EXPR) {
7220 pl_yylval.opval = o;
7222 if (!PL_nexttoke) PL_expect = XOPERATOR;
7223 return REPORT(PLUGEXPR);
7225 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7230 /* Check for built-in keyword */
7231 tmp = keyword(PL_tokenbuf, len, 0);
7233 /* Is this a label? */
7234 if (!anydelim && PL_expect == XSTATE
7235 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7238 newSVOP(OP_CONST, 0,
7239 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
7244 /* Check for lexical sub */
7245 if (PL_expect != XOPERATOR) {
7246 char tmpbuf[sizeof PL_tokenbuf + 1];
7248 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7249 off = pad_findmy_pvn(tmpbuf, len+1, 0);
7250 if (off != NOT_IN_PAD) {
7251 assert(off); /* we assume this is boolean-true below */
7252 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7253 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7254 HEK * const stashname = HvNAME_HEK(stash);
7255 sv = newSVhek(stashname);
7256 sv_catpvs(sv, "::");
7257 sv_catpvn_flags(sv, PL_tokenbuf, len,
7258 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7259 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7269 rv2cv_op = newOP(OP_PADANY, 0);
7270 rv2cv_op->op_targ = off;
7271 cv = find_lexical_cv(off);
7279 if (tmp < 0) { /* second-class keyword? */
7280 GV *ogv = NULL; /* override (winner) */
7281 GV *hgv = NULL; /* hidden (loser) */
7282 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7284 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7285 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7287 && (cv = GvCVu(gv)))
7289 if (GvIMPORTED_CV(gv))
7291 else if (! CvMETHOD(cv))
7295 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7298 && (isGV_with_GP(gv)
7299 ? GvCVu(gv) && GvIMPORTED_CV(gv)
7300 : SvPCS_IMPORTED(gv)
7301 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7309 tmp = 0; /* overridden by import or by GLOBAL */
7312 && -tmp==KEY_lock /* XXX generalizable kludge */
7315 tmp = 0; /* any sub overrides "weak" keyword */
7317 else { /* no override */
7319 if (tmp == KEY_dump) {
7320 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
7324 if (hgv && tmp != KEY_x) /* never ambiguous */
7325 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7326 "Ambiguous call resolved as CORE::%s(), "
7327 "qualify as such or use &",
7332 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7333 && (!anydelim || *s != '#')) {
7334 /* no override, and not s### either; skipspace is safe here
7335 * check for => on following line */
7337 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7338 STRLEN soff = s - SvPVX(PL_linestr);
7340 arrow = *s == '=' && s[1] == '>';
7341 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7342 s = SvPVX(PL_linestr) + soff;
7350 /* Trade off - by using this evil construction we can pull the
7351 variable gv into the block labelled keylookup. If not, then
7352 we have to give it function scope so that the goto from the
7353 earlier ':' case doesn't bypass the initialisation. */
7354 just_a_word_zero_gv:
7364 default: /* not a keyword */
7367 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7369 bool no_op_error = FALSE;
7371 if (PL_expect == XOPERATOR) {
7372 if (PL_bufptr == PL_linestart) {
7373 CopLINE_dec(PL_curcop);
7374 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7375 CopLINE_inc(PL_curcop);
7378 /* We want to call no_op with s pointing after the
7379 bareword, so defer it. But we want it to come
7380 before the Bad name croak. */
7384 /* Get the rest if it looks like a package qualifier */
7386 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7388 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7391 no_op("Bareword",s);
7392 no_op_error = FALSE;
7395 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7396 UTF8fARG(UTF, len, PL_tokenbuf),
7397 *s == '\'' ? "'" : "::");
7403 no_op("Bareword",s);
7405 /* See if the name is "Foo::",
7406 in which case Foo is a bareword
7407 (and a package name). */
7410 && PL_tokenbuf[len - 2] == ':'
7411 && PL_tokenbuf[len - 1] == ':')
7413 if (ckWARN(WARN_BAREWORD)
7414 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7415 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7416 "Bareword \"%" UTF8f
7417 "\" refers to nonexistent package",
7418 UTF8fARG(UTF, len, PL_tokenbuf));
7420 PL_tokenbuf[len] = '\0';
7429 /* if we saw a global override before, get the right name */
7432 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7435 SV * const tmp_sv = sv;
7436 sv = newSVpvs("CORE::GLOBAL::");
7437 sv_catsv(sv, tmp_sv);
7438 SvREFCNT_dec(tmp_sv);
7442 /* Presume this is going to be a bareword of some sort. */
7444 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7445 pl_yylval.opval->op_private = OPpCONST_BARE;
7447 /* And if "Foo::", then that's what it certainly is. */
7453 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7454 const_op->op_private = OPpCONST_BARE;
7456 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7460 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7463 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7466 /* Use this var to track whether intuit_method has been
7467 called. intuit_method returns 0 or > 255. */
7470 /* See if it's the indirect object for a list operator. */
7473 && PL_oldoldbufptr < PL_bufptr
7474 && (PL_oldoldbufptr == PL_last_lop
7475 || PL_oldoldbufptr == PL_last_uni)
7476 && /* NO SKIPSPACE BEFORE HERE! */
7478 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7481 bool immediate_paren = *s == '(';
7484 /* (Now we can afford to cross potential line boundary.) */
7487 /* intuit_method() can indirectly call lex_next_chunk(),
7490 s_off = s - SvPVX(PL_linestr);
7491 /* Two barewords in a row may indicate method call. */
7492 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7494 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7496 /* the code at method: doesn't use s */
7499 s = SvPVX(PL_linestr) + s_off;
7501 /* If not a declared subroutine, it's an indirect object. */
7502 /* (But it's an indir obj regardless for sort.) */
7503 /* Also, if "_" follows a filetest operator, it's a bareword */
7506 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7508 && (PL_last_lop_op != OP_MAPSTART
7509 && PL_last_lop_op != OP_GREPSTART))))
7510 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7511 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7515 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7520 PL_expect = XOPERATOR;
7523 /* Is this a word before a => operator? */
7524 if (*s == '=' && s[1] == '>' && !pkgname) {
7527 if (gvp || (lex && !off)) {
7528 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7529 /* This is our own scalar, created a few lines
7530 above, so this is safe. */
7532 sv_setpv(sv, PL_tokenbuf);
7533 if (UTF && !IN_BYTES
7534 && is_utf8_string((U8*)PL_tokenbuf, len))
7541 /* If followed by a paren, it's certainly a subroutine. */
7546 while (SPACE_OR_TAB(*d))
7548 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7553 NEXTVAL_NEXTTOKE.opval =
7554 off ? rv2cv_op : pl_yylval.opval;
7556 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7557 else op_free(rv2cv_op), force_next(BAREWORD);
7562 /* If followed by var or block, call it a method (unless sub) */
7564 if ((*s == '$' || *s == '{') && !cv) {
7566 PL_last_lop = PL_oldbufptr;
7567 PL_last_lop_op = OP_METHOD;
7568 if (!PL_lex_allbrackets
7569 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7571 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7573 PL_expect = XBLOCKTERM;
7575 return REPORT(METHOD);
7578 /* If followed by a bareword, see if it looks like indir obj. */
7582 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7583 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7587 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7589 sv_setpvn(sv, PL_tokenbuf, len);
7590 if (UTF && !IN_BYTES
7591 && is_utf8_string((U8*)PL_tokenbuf, len))
7593 else SvUTF8_off(sv);
7596 if (tmp == METHOD && !PL_lex_allbrackets
7597 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7599 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7604 /* Not a method, so call it a subroutine (if defined) */
7607 /* Check for a constant sub */
7608 if ((sv = cv_const_sv_or_av(cv))) {
7611 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7612 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7613 if (SvTYPE(sv) == SVt_PVAV)
7614 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7617 pl_yylval.opval->op_private = 0;
7618 pl_yylval.opval->op_folded = 1;
7619 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7624 op_free(pl_yylval.opval);
7626 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7627 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7628 PL_last_lop = PL_oldbufptr;
7629 PL_last_lop_op = OP_ENTERSUB;
7630 /* Is there a prototype? */
7634 STRLEN protolen = CvPROTOLEN(cv);
7635 const char *proto = CvPROTO(cv);
7637 proto = S_strip_spaces(aTHX_ proto, &protolen);
7640 if ((optional = *proto == ';'))
7643 while (*proto == ';');
7647 *proto == '$' || *proto == '_'
7648 || *proto == '*' || *proto == '+'
7653 *proto == '\\' && proto[1] && proto[2] == '\0'
7656 UNIPROTO(UNIOPSUB,optional);
7657 if (*proto == '\\' && proto[1] == '[') {
7658 const char *p = proto + 2;
7659 while(*p && *p != ']')
7661 if(*p == ']' && !p[1])
7662 UNIPROTO(UNIOPSUB,optional);
7664 if (*proto == '&' && *s == '{') {
7666 sv_setpvs(PL_subname, "__ANON__");
7668 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7669 if (!PL_lex_allbrackets
7670 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7672 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7677 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7679 force_next(off ? PRIVATEREF : BAREWORD);
7680 if (!PL_lex_allbrackets
7681 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7683 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7688 /* Call it a bare word */
7690 if (PL_hints & HINT_STRICT_SUBS)
7691 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7694 /* after "print" and similar functions (corresponding to
7695 * "F? L" in opcode.pl), whatever wasn't already parsed as
7696 * a filehandle should be subject to "strict subs".
7697 * Likewise for the optional indirect-object argument to system
7698 * or exec, which can't be a bareword */
7699 if ((PL_last_lop_op == OP_PRINT
7700 || PL_last_lop_op == OP_PRTF
7701 || PL_last_lop_op == OP_SAY
7702 || PL_last_lop_op == OP_SYSTEM
7703 || PL_last_lop_op == OP_EXEC)
7704 && (PL_hints & HINT_STRICT_SUBS))
7705 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7706 if (lastchar != '-') {
7707 if (ckWARN(WARN_RESERVED)) {
7711 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7713 /* PL_warn_reserved is constant */
7714 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7715 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7717 GCC_DIAG_RESTORE_STMT;
7725 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7726 && saw_infix_sigil) {
7727 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7728 "Operator or semicolon missing before %c%" UTF8f,
7730 UTF8fARG(UTF, strlen(PL_tokenbuf),
7732 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7733 "Ambiguous use of %c resolved as operator %c",
7734 lastchar, lastchar);
7741 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7746 newSVOP(OP_CONST, 0,
7747 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7750 case KEY___PACKAGE__:
7752 newSVOP(OP_CONST, 0,
7754 ? newSVhek(HvNAME_HEK(PL_curstash))
7761 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7762 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7765 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7767 gv_init(gv,stash,"DATA",4,0);
7770 GvIOp(gv) = newIO();
7771 IoIFP(GvIOp(gv)) = PL_rsfp;
7772 /* Mark this internal pseudo-handle as clean */
7773 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7774 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7775 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7777 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7778 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7779 /* if the script was opened in binmode, we need to revert
7780 * it to text mode for compatibility; but only iff it has CRs
7781 * XXX this is a questionable hack at best. */
7782 if (PL_bufend-PL_bufptr > 2
7783 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7786 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7787 loc = PerlIO_tell(PL_rsfp);
7788 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7791 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7793 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7794 #endif /* NETWARE */
7796 PerlIO_seek(PL_rsfp, loc, 0);
7800 #ifdef PERLIO_LAYERS
7803 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7812 FUN0OP(CvCLONE(PL_compcv)
7813 ? newOP(OP_RUNCV, 0)
7814 : newPVOP(OP_RUNCV,0,NULL));
7823 if (PL_expect == XSTATE) {
7834 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7835 if ((*s == ':' && s[1] == ':')
7836 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7840 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7844 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7845 UTF8fARG(UTF, len, PL_tokenbuf));
7848 else if (tmp == KEY_require || tmp == KEY_do
7850 /* that's a way to remember we saw "CORE::" */
7862 LOP(OP_ACCEPT,XTERM);
7865 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7870 LOP(OP_ATAN2,XTERM);
7876 LOP(OP_BINMODE,XTERM);
7879 LOP(OP_BLESS,XTERM);
7888 /* We have to disambiguate the two senses of
7889 "continue". If the next token is a '{' then
7890 treat it as the start of a continue block;
7891 otherwise treat it as a control operator.
7901 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7911 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7920 if (!PL_cryptseen) {
7921 PL_cryptseen = TRUE;
7925 LOP(OP_CRYPT,XTERM);
7928 LOP(OP_CHMOD,XTERM);
7931 LOP(OP_CHOWN,XTERM);
7934 LOP(OP_CONNECT,XTERM);
7954 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7956 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7957 && !keyword(PL_tokenbuf + 1, len, 0)) {
7958 SSize_t off = s-SvPVX(PL_linestr);
7960 s = SvPVX(PL_linestr)+off;
7962 force_ident_maybe_lex('&');
7967 if (orig_keyword == KEY_do) {
7976 PL_hints |= HINT_BLOCK_SCOPE;
7986 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7987 STR_WITH_LEN("NDBM_File::"),
7988 STR_WITH_LEN("DB_File::"),
7989 STR_WITH_LEN("GDBM_File::"),
7990 STR_WITH_LEN("SDBM_File::"),
7991 STR_WITH_LEN("ODBM_File::"),
7993 LOP(OP_DBMOPEN,XTERM);
8005 pl_yylval.ival = CopLINE(PL_curcop);
8009 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8021 if (*s == '{') { /* block eval */
8022 PL_expect = XTERMBLOCK;
8023 UNIBRACK(OP_ENTERTRY);
8025 else { /* string eval */
8027 UNIBRACK(OP_ENTEREVAL);
8032 UNIBRACK(-OP_ENTEREVAL);
8046 case KEY_endhostent:
8052 case KEY_endservent:
8055 case KEY_endprotoent:
8066 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8068 pl_yylval.ival = CopLINE(PL_curcop);
8070 if ( PL_expect == XSTATE
8071 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
8074 SSize_t s_off = s - SvPVX(PL_linestr);
8076 if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "my")
8077 && isSPACE(*(p + 2)))
8081 else if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")
8082 && isSPACE(*(p + 3)))
8088 /* skip optional package name, as in "for my abc $x (..)" */
8089 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
8090 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8093 if (*p != '$' && *p != '\\')
8094 Perl_croak(aTHX_ "Missing $ on loop variable");
8096 /* The buffer may have been reallocated, update s */
8097 s = SvPVX(PL_linestr) + s_off;
8102 LOP(OP_FORMLINE,XTERM);
8111 LOP(OP_FCNTL,XTERM);
8117 LOP(OP_FLOCK,XTERM);
8120 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8125 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8130 LOP(OP_GREPSTART, XREF);
8147 case KEY_getpriority:
8148 LOP(OP_GETPRIORITY,XTERM);
8150 case KEY_getprotobyname:
8153 case KEY_getprotobynumber:
8154 LOP(OP_GPBYNUMBER,XTERM);
8156 case KEY_getprotoent:
8168 case KEY_getpeername:
8169 UNI(OP_GETPEERNAME);
8171 case KEY_gethostbyname:
8174 case KEY_gethostbyaddr:
8175 LOP(OP_GHBYADDR,XTERM);
8177 case KEY_gethostent:
8180 case KEY_getnetbyname:
8183 case KEY_getnetbyaddr:
8184 LOP(OP_GNBYADDR,XTERM);
8189 case KEY_getservbyname:
8190 LOP(OP_GSBYNAME,XTERM);
8192 case KEY_getservbyport:
8193 LOP(OP_GSBYPORT,XTERM);
8195 case KEY_getservent:
8198 case KEY_getsockname:
8199 UNI(OP_GETSOCKNAME);
8201 case KEY_getsockopt:
8202 LOP(OP_GSOCKOPT,XTERM);
8217 pl_yylval.ival = CopLINE(PL_curcop);
8218 Perl_ck_warner_d(aTHX_
8219 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8220 "given is experimental");
8225 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8233 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8235 pl_yylval.ival = CopLINE(PL_curcop);
8239 LOP(OP_INDEX,XTERM);
8245 LOP(OP_IOCTL,XTERM);
8272 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8277 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8291 LOP(OP_LISTEN,XTERM);
8300 s = scan_pat(s,OP_MATCH);
8301 TERM(sublex_start());
8304 LOP(OP_MAPSTART, XREF);
8307 LOP(OP_MKDIR,XTERM);
8310 LOP(OP_MSGCTL,XTERM);
8313 LOP(OP_MSGGET,XTERM);
8316 LOP(OP_MSGRCV,XTERM);
8319 LOP(OP_MSGSND,XTERM);
8326 yyerror(Perl_form(aTHX_
8327 "Can't redeclare \"%s\" in \"%s\"",
8328 tmp == KEY_my ? "my" :
8329 tmp == KEY_state ? "state" : "our",
8330 PL_in_my == KEY_my ? "my" :
8331 PL_in_my == KEY_state ? "state" : "our"));
8333 PL_in_my = (U16)tmp;
8335 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8336 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8337 if (memEQs(PL_tokenbuf, len, "sub"))
8339 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8340 if (!PL_in_my_stash) {
8344 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8345 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8346 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8349 else if (*s == '\\') {
8350 if (!FEATURE_MYREF_IS_ENABLED)
8351 Perl_croak(aTHX_ "The experimental declared_refs "
8352 "feature is not enabled");
8353 Perl_ck_warner_d(aTHX_
8354 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8355 "Declaring references is experimental");
8363 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8368 s = tokenize_use(0, s);
8372 if (*s == '(' || (s = skipspace(s), *s == '('))
8375 if (!PL_lex_allbrackets
8376 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8378 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8385 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8387 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8389 for (t=d; isSPACE(*t);)
8391 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8393 && !(t[0] == '=' && t[1] == '>')
8394 && !(t[0] == ':' && t[1] == ':')
8395 && !keyword(s, d-s, 0)
8397 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8398 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8399 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8405 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8407 pl_yylval.ival = OP_OR;
8417 LOP(OP_OPEN_DIR,XTERM);
8420 checkcomma(s,PL_tokenbuf,"filehandle");
8424 checkcomma(s,PL_tokenbuf,"filehandle");
8443 s = force_word(s,BAREWORD,FALSE,TRUE);
8445 s = force_strict_version(s);
8449 LOP(OP_PIPE_OP,XTERM);
8452 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8454 missingterm(NULL, 0);
8455 COPLINE_SET_FROM_MULTI_END;
8456 pl_yylval.ival = OP_CONST;
8457 TERM(sublex_start());
8464 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8466 missingterm(NULL, 0);
8467 COPLINE_SET_FROM_MULTI_END;
8468 PL_expect = XOPERATOR;
8469 if (SvCUR(PL_lex_stuff)) {
8470 int warned_comma = !ckWARN(WARN_QW);
8471 int warned_comment = warned_comma;
8472 d = SvPV_force(PL_lex_stuff, len);
8474 for (; isSPACE(*d) && len; --len, ++d)
8479 if (!warned_comma || !warned_comment) {
8480 for (; !isSPACE(*d) && len; --len, ++d) {
8481 if (!warned_comma && *d == ',') {
8482 Perl_warner(aTHX_ packWARN(WARN_QW),
8483 "Possible attempt to separate words with commas");
8486 else if (!warned_comment && *d == '#') {
8487 Perl_warner(aTHX_ packWARN(WARN_QW),
8488 "Possible attempt to put comments in qw() list");
8494 for (; !isSPACE(*d) && len; --len, ++d)
8497 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8498 words = op_append_elem(OP_LIST, words,
8499 newSVOP(OP_CONST, 0, tokeq(sv)));
8504 words = newNULLLIST();
8505 SvREFCNT_dec_NN(PL_lex_stuff);
8506 PL_lex_stuff = NULL;
8507 PL_expect = XOPERATOR;
8508 pl_yylval.opval = sawparens(words);
8513 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8515 missingterm(NULL, 0);
8516 pl_yylval.ival = OP_STRINGIFY;
8517 if (SvIVX(PL_lex_stuff) == '\'')
8518 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8519 TERM(sublex_start());
8522 s = scan_pat(s,OP_QR);
8523 TERM(sublex_start());
8526 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8528 missingterm(NULL, 0);
8529 pl_yylval.ival = OP_BACKTICK;
8530 TERM(sublex_start());
8538 s = force_version(s, FALSE);
8540 else if (*s != 'v' || !isDIGIT(s[1])
8541 || (s = force_version(s, TRUE), *s == 'v'))
8543 *PL_tokenbuf = '\0';
8544 s = force_word(s,BAREWORD,TRUE,TRUE);
8545 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8546 PL_tokenbuf + sizeof(PL_tokenbuf),
8549 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8550 GV_ADD | (UTF ? SVf_UTF8 : 0));
8553 yyerror("<> at require-statement should be quotes");
8555 if (orig_keyword == KEY_require) {
8561 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8563 PL_last_uni = PL_oldbufptr;
8564 PL_last_lop_op = OP_REQUIRE;
8566 return REPORT( (int)REQUIRE );
8575 LOP(OP_RENAME,XTERM);
8584 LOP(OP_RINDEX,XTERM);
8593 UNIDOR(OP_READLINE);
8596 UNIDOR(OP_BACKTICK);
8605 LOP(OP_REVERSE,XTERM);
8608 UNIDOR(OP_READLINK);
8615 if (pl_yylval.opval)
8616 TERM(sublex_start());
8618 TOKEN(1); /* force error */
8621 checkcomma(s,PL_tokenbuf,"filehandle");
8631 LOP(OP_SELECT,XTERM);
8637 LOP(OP_SEMCTL,XTERM);
8640 LOP(OP_SEMGET,XTERM);
8643 LOP(OP_SEMOP,XTERM);
8649 LOP(OP_SETPGRP,XTERM);
8651 case KEY_setpriority:
8652 LOP(OP_SETPRIORITY,XTERM);
8654 case KEY_sethostent:
8660 case KEY_setservent:
8663 case KEY_setprotoent:
8673 LOP(OP_SEEKDIR,XTERM);
8675 case KEY_setsockopt:
8676 LOP(OP_SSOCKOPT,XTERM);
8682 LOP(OP_SHMCTL,XTERM);
8685 LOP(OP_SHMGET,XTERM);
8688 LOP(OP_SHMREAD,XTERM);
8691 LOP(OP_SHMWRITE,XTERM);
8694 LOP(OP_SHUTDOWN,XTERM);
8703 LOP(OP_SOCKET,XTERM);
8705 case KEY_socketpair:
8706 LOP(OP_SOCKPAIR,XTERM);
8709 checkcomma(s,PL_tokenbuf,"subroutine name");
8712 s = force_word(s,BAREWORD,TRUE,TRUE);
8716 LOP(OP_SPLIT,XTERM);
8719 LOP(OP_SPRINTF,XTERM);
8722 LOP(OP_SPLICE,XTERM);
8737 LOP(OP_SUBSTR,XTERM);
8743 char * const tmpbuf = PL_tokenbuf + 1;
8744 bool have_name, have_proto;
8745 const int key = tmp;
8746 SV *format_name = NULL;
8747 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
8749 SSize_t off = s-SvPVX(PL_linestr);
8751 d = SvPVX(PL_linestr)+off;
8753 SAVEBOOL(PL_parser->sig_seen);
8754 PL_parser->sig_seen = FALSE;
8756 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8758 || (*s == ':' && s[1] == ':'))
8761 PL_expect = XATTRBLOCK;
8762 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8764 if (key == KEY_format)
8765 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8767 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8769 PL_tokenbuf, len + 1, 0
8771 sv_setpvn(PL_subname, tmpbuf, len);
8773 sv_setsv(PL_subname,PL_curstname);
8774 sv_catpvs(PL_subname,"::");
8775 sv_catpvn(PL_subname,tmpbuf,len);
8777 if (SvUTF8(PL_linestr))
8778 SvUTF8_on(PL_subname);
8785 if (key == KEY_my || key == KEY_our || key==KEY_state)
8788 /* diag_listed_as: Missing name in "%s sub" */
8790 "Missing name in \"%s\"", PL_bufptr);
8792 PL_expect = XATTRTERM;
8793 sv_setpvs(PL_subname,"?");
8797 if (key == KEY_format) {
8799 NEXTVAL_NEXTTOKE.opval
8800 = newSVOP(OP_CONST,0, format_name);
8801 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8802 force_next(BAREWORD);
8807 /* Look for a prototype */
8808 if (*s == '(' && !is_sigsub) {
8809 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8811 Perl_croak(aTHX_ "Prototype not terminated");
8812 COPLINE_SET_FROM_MULTI_END;
8813 (void)validate_proto(PL_subname, PL_lex_stuff,
8814 ckWARN(WARN_ILLEGALPROTO), 0);
8822 if ( !(*s == ':' && s[1] != ':')
8823 && (*s != '{' && *s != '(') && key != KEY_format)
8825 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8826 key == KEY_DESTROY || key == KEY_BEGIN ||
8827 key == KEY_UNITCHECK || key == KEY_CHECK ||
8828 key == KEY_INIT || key == KEY_END ||
8829 key == KEY_my || key == KEY_state ||
8832 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8833 else if (*s != ';' && *s != '}')
8834 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8838 NEXTVAL_NEXTTOKE.opval =
8839 newSVOP(OP_CONST, 0, PL_lex_stuff);
8840 PL_lex_stuff = NULL;
8845 sv_setpvs(PL_subname, "__ANON__");
8847 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8853 force_ident_maybe_lex('&');
8861 LOP(OP_SYSTEM,XREF);
8864 LOP(OP_SYMLINK,XTERM);
8867 LOP(OP_SYSCALL,XTERM);
8870 LOP(OP_SYSOPEN,XTERM);
8873 LOP(OP_SYSSEEK,XTERM);
8876 LOP(OP_SYSREAD,XTERM);
8879 LOP(OP_SYSWRITE,XTERM);
8884 TERM(sublex_start());
8905 LOP(OP_TRUNCATE,XTERM);
8917 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8919 pl_yylval.ival = CopLINE(PL_curcop);
8923 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8925 pl_yylval.ival = CopLINE(PL_curcop);
8929 LOP(OP_UNLINK,XTERM);
8935 LOP(OP_UNPACK,XTERM);
8938 LOP(OP_UTIME,XTERM);
8944 LOP(OP_UNSHIFT,XTERM);
8947 s = tokenize_use(1, s);
8957 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8959 pl_yylval.ival = CopLINE(PL_curcop);
8960 Perl_ck_warner_d(aTHX_
8961 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8962 "when is experimental");
8966 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8968 pl_yylval.ival = CopLINE(PL_curcop);
8972 PL_hints |= HINT_BLOCK_SCOPE;
8979 LOP(OP_WAITPID,XTERM);
8985 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8986 * we use the same number on EBCDIC */
8987 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8991 if (PL_expect == XOPERATOR) {
8992 if (*s == '=' && !PL_lex_allbrackets
8993 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9003 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
9005 pl_yylval.ival = OP_XOR;
9014 Looks up an identifier in the pad or in a package
9016 is_sig indicates that this is a subroutine signature variable
9017 rather than a plain pad var.
9020 PRIVATEREF if this is a lexical name.
9021 BAREWORD if this belongs to a package.
9024 if we're in a my declaration
9025 croak if they tried to say my($foo::bar)
9026 build the ops for a my() declaration
9027 if it's an access to a my() variable
9028 build ops for access to a my() variable
9029 if in a dq string, and they've said @foo and we can't find @foo
9031 build ops for a bareword
9035 S_pending_ident(pTHX)
9038 const char pit = (char)pl_yylval.ival;
9039 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9040 /* All routes through this function want to know if there is a colon. */
9041 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9043 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9044 "### Pending identifier '%s'\n", PL_tokenbuf); });
9045 assert(tokenbuf_len >= 2);
9047 /* if we're in a my(), we can't allow dynamics here.
9048 $foo'bar has already been turned into $foo::bar, so
9049 just check for colons.
9051 if it's a legal name, the OP is a PADANY.
9054 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9056 /* diag_listed_as: No package name allowed for variable %s
9058 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9059 "%se %s in \"our\"",
9060 *PL_tokenbuf=='&' ?"subroutin":"variabl",
9061 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9062 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9067 /* "my" variable %s can't be in a package */
9068 /* PL_no_myglob is constant */
9069 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9070 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9071 PL_in_my == KEY_my ? "my" : "state",
9072 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
9074 UTF ? SVf_UTF8 : 0);
9075 GCC_DIAG_RESTORE_STMT;
9078 if (PL_in_my == KEY_sigvar) {
9079 /* A signature 'padop' needs in addition, an op_first to
9080 * point to a child sigdefelem, and an extra field to hold
9081 * the signature index. We can achieve both by using an
9082 * UNOP_AUX and (ab)using the op_aux field to hold the
9083 * index. If we ever need more fields, use a real malloced
9084 * aux strut instead.
9086 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9087 INT2PTR(UNOP_AUX_item *,
9088 (PL_parser->sig_elems)));
9089 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9090 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9094 o = newOP(OP_PADANY, 0);
9095 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9096 UTF ? SVf_UTF8 : 0);
9097 if (PL_in_my == KEY_sigvar)
9100 pl_yylval.opval = o;
9106 build the ops for accesses to a my() variable.
9111 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9113 if (tmp != NOT_IN_PAD) {
9114 /* might be an "our" variable" */
9115 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9116 /* build ops for a bareword */
9117 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9118 HEK * const stashname = HvNAME_HEK(stash);
9119 SV * const sym = newSVhek(stashname);
9120 sv_catpvs(sym, "::");
9121 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9122 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9123 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9127 ((PL_tokenbuf[0] == '$') ? SVt_PV
9128 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9133 pl_yylval.opval = newOP(OP_PADANY, 0);
9134 pl_yylval.opval->op_targ = tmp;
9140 Whine if they've said @foo or @foo{key} in a doublequoted string,
9141 and @foo (or %foo) isn't a variable we can find in the symbol
9144 if (ckWARN(WARN_AMBIGUOUS)
9146 && PL_lex_state != LEX_NORMAL
9147 && !PL_lex_brackets)
9149 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9150 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9152 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9155 /* Downgraded from fatal to warning 20000522 mjd */
9156 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9157 "Possible unintended interpolation of %" UTF8f
9159 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9163 /* build ops for a bareword */
9164 pl_yylval.opval = newSVOP(OP_CONST, 0,
9165 newSVpvn_flags(PL_tokenbuf + 1,
9166 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9167 UTF ? SVf_UTF8 : 0 ));
9168 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9170 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9171 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9172 | ( UTF ? SVf_UTF8 : 0 ),
9173 ((PL_tokenbuf[0] == '$') ? SVt_PV
9174 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9180 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9182 PERL_ARGS_ASSERT_CHECKCOMMA;
9184 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9185 if (ckWARN(WARN_SYNTAX)) {
9188 for (w = s+2; *w && level; w++) {
9196 /* the list of chars below is for end of statements or
9197 * block / parens, boolean operators (&&, ||, //) and branch
9198 * constructs (or, and, if, until, unless, while, err, for).
9199 * Not a very solid hack... */
9200 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9201 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9202 "%s (...) interpreted as function",name);
9205 while (s < PL_bufend && isSPACE(*s))
9209 while (s < PL_bufend && isSPACE(*s))
9211 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9212 const char * const w = s;
9213 s += UTF ? UTF8SKIP(s) : 1;
9214 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9215 s += UTF ? UTF8SKIP(s) : 1;
9216 while (s < PL_bufend && isSPACE(*s))
9220 if (keyword(w, s - w, 0))
9223 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9224 if (gv && GvCVu(gv))
9229 Copy(w, tmpbuf+1, s - w, char);
9231 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9232 if (off != NOT_IN_PAD) return;
9234 Perl_croak(aTHX_ "No comma allowed after %s", what);
9239 /* S_new_constant(): do any overload::constant lookup.
9241 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9242 Best used as sv=new_constant(..., sv, ...).
9243 If s, pv are NULL, calls subroutine with one argument,
9244 and <type> is used with error messages only.
9245 <type> is assumed to be well formed UTF-8.
9247 If error_msg is not NULL, *error_msg will be set to any error encountered.
9248 Otherwise yyerror() will be used to output it */
9251 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9252 SV *sv, SV *pv, const char *type, STRLEN typelen,
9253 const char ** error_msg)
9256 HV * table = GvHV(PL_hintgv); /* ^H */
9261 const char *why1 = "", *why2 = "", *why3 = "";
9263 PERL_ARGS_ASSERT_NEW_CONSTANT;
9264 /* We assume that this is true: */
9265 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9268 sv_2mortal(sv); /* Parent created it permanently */
9270 || ! (PL_hints & HINT_LOCALIZE_HH)
9271 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9276 /* Here haven't found what we're looking for. If it is charnames,
9277 * perhaps it needs to be loaded. Try doing that before giving up */
9279 Perl_load_module(aTHX_
9281 newSVpvs("_charnames"),
9282 /* version parameter; no need to specify it, as if
9283 * we get too early a version, will fail anyway,
9284 * not being able to find '_charnames' */
9289 assert(sp == PL_stack_sp);
9290 table = GvHV(PL_hintgv);
9292 && (PL_hints & HINT_LOCALIZE_HH)
9293 && (cvp = hv_fetch(table, key, keylen, FALSE))
9299 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9300 msg = Perl_form(aTHX_
9301 "Constant(%.*s) unknown",
9302 (int)(type ? typelen : len),
9308 why3 = "} is not defined";
9311 msg = Perl_form(aTHX_
9312 /* The +3 is for '\N{'; -4 for that, plus '}' */
9313 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9317 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9318 (int)(type ? typelen : len),
9319 (type ? type: s), why1, why2, why3);
9326 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9328 return SvREFCNT_inc_simple_NN(sv);
9333 pv = newSVpvn_flags(s, len, SVs_TEMP);
9335 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9337 typesv = &PL_sv_undef;
9339 PUSHSTACKi(PERLSI_OVERLOAD);
9351 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9355 /* Check the eval first */
9356 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9358 const char * errstr;
9359 sv_catpvs(errsv, "Propagated");
9360 errstr = SvPV_const(errsv, errlen);
9361 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9363 res = SvREFCNT_inc_simple_NN(sv);
9367 SvREFCNT_inc_simple_void_NN(res);
9376 why1 = "Call to &{$^H{";
9378 why3 = "}} did not return a defined value";
9380 (void)sv_2mortal(sv);
9387 PERL_STATIC_INLINE void
9388 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9389 bool is_utf8, bool check_dollar, bool tick_warn)
9392 const char *olds = *s;
9393 PERL_ARGS_ASSERT_PARSE_IDENT;
9395 while (*s < PL_bufend) {
9397 Perl_croak(aTHX_ "%s", ident_too_long);
9398 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9399 /* The UTF-8 case must come first, otherwise things
9400 * like c\N{COMBINING TILDE} would start failing, as the
9401 * isWORDCHAR_A case below would gobble the 'c' up.
9404 char *t = *s + UTF8SKIP(*s);
9405 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9408 if (*d + (t - *s) > e)
9409 Perl_croak(aTHX_ "%s", ident_too_long);
9410 Copy(*s, *d, t - *s, char);
9414 else if ( isWORDCHAR_A(**s) ) {
9417 } while (isWORDCHAR_A(**s) && *d < e);
9419 else if ( allow_package
9421 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9428 else if (allow_package && **s == ':' && (*s)[1] == ':'
9429 /* Disallow things like Foo::$bar. For the curious, this is
9430 * the code path that triggers the "Bad name after" warning
9431 * when looking for barewords.
9433 && !(check_dollar && (*s)[2] == '$')) {
9440 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9441 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9444 Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9447 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9448 "Old package separator used in string");
9449 if (olds[-1] == '#')
9453 if (*olds == '\'') {
9460 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9461 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9462 UTF8fARG(is_utf8, d2-d, d));
9467 /* Returns a NUL terminated string, with the length of the string written to
9471 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9474 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9475 bool is_utf8 = cBOOL(UTF);
9477 PERL_ARGS_ASSERT_SCAN_WORD;
9479 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9485 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9486 * iff Unicode semantics are to be used. The legal ones are any of:
9487 * a) all ASCII characters except:
9488 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9490 * The final case currently doesn't get this far in the program, so we
9491 * don't test for it. If that were to change, it would be ok to allow it.
9492 * b) When not under Unicode rules, any upper Latin1 character
9493 * c) Otherwise, when unicode rules are used, all XIDS characters.
9495 * Because all ASCII characters have the same representation whether
9496 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9497 * '{' without knowing if is UTF-8 or not. */
9498 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
9499 (isGRAPH_A(*(s)) || ((is_utf8) \
9500 ? isIDFIRST_utf8_safe(s, e) \
9502 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9505 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9507 I32 herelines = PL_parser->herelines;
9508 SSize_t bracket = -1;
9511 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9512 bool is_utf8 = cBOOL(UTF);
9513 I32 orig_copline = 0, tmp_copline = 0;
9515 PERL_ARGS_ASSERT_SCAN_IDENT;
9517 if (isSPACE(*s) || !*s)
9520 while (isDIGIT(*s)) {
9522 Perl_croak(aTHX_ "%s", ident_too_long);
9526 else { /* See if it is a "normal" identifier */
9527 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9532 /* Either a digit variable, or parse_ident() found an identifier
9533 (anything valid as a bareword), so job done and return. */
9534 if (PL_lex_state != LEX_NORMAL)
9535 PL_lex_state = LEX_INTERPENDMAYBE;
9539 /* Here, it is not a run-of-the-mill identifier name */
9541 if (*s == '$' && s[1]
9542 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9543 || isDIGIT_A((U8)s[1])
9546 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9548 /* Dereferencing a value in a scalar variable.
9549 The alternatives are different syntaxes for a scalar variable.
9550 Using ' as a leading package separator isn't allowed. :: is. */
9553 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9555 bracket = s - SvPVX(PL_linestr);
9557 orig_copline = CopLINE(PL_curcop);
9558 if (s < PL_bufend && isSPACE(*s)) {
9562 if ((s <= PL_bufend - (is_utf8)
9565 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9568 const STRLEN skip = UTF8SKIP(s);
9571 for ( i = 0; i < skip; i++ )
9579 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9580 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9584 /* Warn about ambiguous code after unary operators if {...} notation isn't
9585 used. There's no difference in ambiguity; it's merely a heuristic
9586 about when not to warn. */
9587 else if (ck_uni && bracket == -1)
9589 if (bracket != -1) {
9592 /* If we were processing {...} notation then... */
9593 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9594 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9597 /* note we have to check for a normal identifier first,
9598 * as it handles utf8 symbols, and only after that has
9599 * been ruled out can we look at the caret words */
9600 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9601 /* if it starts as a valid identifier, assume that it is one.
9602 (the later check for } being at the expected point will trap
9603 cases where this doesn't pan out.) */
9604 d += is_utf8 ? UTF8SKIP(d) : 1;
9605 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
9608 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9610 while (isWORDCHAR(*s) && d < e) {
9614 Perl_croak(aTHX_ "%s", ident_too_long);
9617 tmp_copline = CopLINE(PL_curcop);
9618 if (s < PL_bufend && isSPACE(*s)) {
9621 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9622 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
9623 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9624 const char * const brack =
9626 ((*s == '[') ? "[...]" : "{...}");
9627 orig_copline = CopLINE(PL_curcop);
9628 CopLINE_set(PL_curcop, tmp_copline);
9629 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9630 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9631 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9632 funny, dest, brack, funny, dest, brack);
9633 CopLINE_set(PL_curcop, orig_copline);
9636 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9637 PL_lex_allbrackets++;
9643 tmp_copline = CopLINE(PL_curcop);
9644 if ((skip = s < PL_bufend && isSPACE(*s))) {
9645 /* Avoid incrementing line numbers or resetting PL_linestart,
9646 in case we have to back up. */
9647 STRLEN s_off = s - SvPVX(PL_linestr);
9649 s = SvPVX(PL_linestr) + s_off;
9654 /* Expect to find a closing } after consuming any trailing whitespace.
9657 /* Now increment line numbers if applicable. */
9661 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9662 PL_lex_state = LEX_INTERPEND;
9665 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
9666 if (ckWARN(WARN_AMBIGUOUS)
9667 && (keyword(dest, d - dest, 0)
9668 || get_cvn_flags(dest, d - dest, is_utf8
9672 SV *tmp = newSVpvn_flags( dest, d - dest,
9673 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9676 orig_copline = CopLINE(PL_curcop);
9677 CopLINE_set(PL_curcop, tmp_copline);
9678 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9679 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9680 funny, SVfARG(tmp), funny, SVfARG(tmp));
9681 CopLINE_set(PL_curcop, orig_copline);
9686 /* Didn't find the closing } at the point we expected, so restore
9687 state such that the next thing to process is the opening { and */
9688 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9689 CopLINE_set(PL_curcop, orig_copline);
9690 PL_parser->herelines = herelines;
9692 PL_parser->sub_no_recover = TRUE;
9695 else if ( PL_lex_state == LEX_INTERPNORMAL
9697 && !intuit_more(s, PL_bufend))
9698 PL_lex_state = LEX_INTERPEND;
9703 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9705 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9706 * found in the parse starting at 's', based on the subset that are valid
9707 * in this context input to this routine in 'valid_flags'. Advances s.
9708 * Returns TRUE if the input should be treated as a valid flag, so the next
9709 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9710 * upon first call on the current regex. This routine will set it to any
9711 * charset modifier found. The caller shouldn't change it. This way,
9712 * another charset modifier encountered in the parse can be detected as an
9713 * error, as we have decided to allow only one */
9716 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9718 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9719 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9720 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9721 UTF ? SVf_UTF8 : 0);
9723 /* Pretend that it worked, so will continue processing before
9732 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9733 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9734 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9735 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9736 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9737 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9738 case LOCALE_PAT_MOD:
9740 goto multiple_charsets;
9742 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9745 case UNICODE_PAT_MOD:
9747 goto multiple_charsets;
9749 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9752 case ASCII_RESTRICT_PAT_MOD:
9754 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9758 /* Error if previous modifier wasn't an 'a', but if it was, see
9759 * if, and accept, a second occurrence (only) */
9761 || get_regex_charset(*pmfl)
9762 != REGEX_ASCII_RESTRICTED_CHARSET)
9764 goto multiple_charsets;
9766 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9770 case DEPENDS_PAT_MOD:
9772 goto multiple_charsets;
9774 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9783 if (*charset != c) {
9784 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9786 else if (c == 'a') {
9787 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9788 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9791 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9794 /* Pretend that it worked, so will continue processing before dieing */
9800 S_scan_pat(pTHX_ char *start, I32 type)
9804 const char * const valid_flags =
9805 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9806 char charset = '\0'; /* character set modifier */
9807 unsigned int x_mod_count = 0;
9809 PERL_ARGS_ASSERT_SCAN_PAT;
9811 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9813 Perl_croak(aTHX_ "Search pattern not terminated");
9815 pm = (PMOP*)newPMOP(type, 0);
9816 if (PL_multi_open == '?') {
9817 /* This is the only point in the code that sets PMf_ONCE: */
9818 pm->op_pmflags |= PMf_ONCE;
9820 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9821 allows us to restrict the list needed by reset to just the ??
9823 assert(type != OP_TRANS);
9825 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9828 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9831 elements = mg->mg_len / sizeof(PMOP**);
9832 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9833 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9834 mg->mg_len = elements * sizeof(PMOP**);
9835 PmopSTASH_set(pm,PL_curstash);
9839 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9840 * anon CV. False positives like qr/[(?{]/ are harmless */
9842 if (type == OP_QR) {
9844 char *e, *p = SvPV(PL_lex_stuff, len);
9846 for (; p < e; p++) {
9847 if (p[0] == '(' && p[1] == '?'
9848 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9850 pm->op_pmflags |= PMf_HAS_CV;
9854 pm->op_pmflags |= PMf_IS_QR;
9857 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9858 &s, &charset, &x_mod_count))
9860 /* issue a warning if /c is specified,but /g is not */
9861 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9863 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9864 "Use of /c modifier is meaningless without /g" );
9867 PL_lex_op = (OP*)pm;
9868 pl_yylval.ival = OP_MATCH;
9873 S_scan_subst(pTHX_ char *start)
9879 line_t linediff = 0;
9881 char charset = '\0'; /* character set modifier */
9882 unsigned int x_mod_count = 0;
9885 PERL_ARGS_ASSERT_SCAN_SUBST;
9887 pl_yylval.ival = OP_NULL;
9889 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9892 Perl_croak(aTHX_ "Substitution pattern not terminated");
9896 first_start = PL_multi_start;
9897 first_line = CopLINE(PL_curcop);
9898 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9900 SvREFCNT_dec_NN(PL_lex_stuff);
9901 PL_lex_stuff = NULL;
9902 Perl_croak(aTHX_ "Substitution replacement not terminated");
9904 PL_multi_start = first_start; /* so whole substitution is taken together */
9906 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9910 if (*s == EXEC_PAT_MOD) {
9914 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9915 &s, &charset, &x_mod_count))
9921 if ((pm->op_pmflags & PMf_CONTINUE)) {
9922 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9926 SV * const repl = newSVpvs("");
9929 pm->op_pmflags |= PMf_EVAL;
9930 for (; es > 1; es--) {
9931 sv_catpvs(repl, "eval ");
9933 sv_catpvs(repl, "do {");
9934 sv_catsv(repl, PL_parser->lex_sub_repl);
9935 sv_catpvs(repl, "}");
9936 SvREFCNT_dec(PL_parser->lex_sub_repl);
9937 PL_parser->lex_sub_repl = repl;
9941 linediff = CopLINE(PL_curcop) - first_line;
9943 CopLINE_set(PL_curcop, first_line);
9945 if (linediff || es) {
9946 /* the IVX field indicates that the replacement string is a s///e;
9947 * the NVX field indicates how many src code lines the replacement
9949 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9950 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
9951 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9955 PL_lex_op = (OP*)pm;
9956 pl_yylval.ival = OP_SUBST;
9961 S_scan_trans(pTHX_ char *start)
9968 bool nondestruct = 0;
9971 PERL_ARGS_ASSERT_SCAN_TRANS;
9973 pl_yylval.ival = OP_NULL;
9975 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9977 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9981 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9983 SvREFCNT_dec_NN(PL_lex_stuff);
9984 PL_lex_stuff = NULL;
9985 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9988 complement = del = squash = 0;
9992 complement = OPpTRANS_COMPLEMENT;
9995 del = OPpTRANS_DELETE;
9998 squash = OPpTRANS_SQUASH;
10010 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10011 o->op_private &= ~OPpTRANS_ALL;
10012 o->op_private |= del|squash|complement|
10013 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
10014 (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF : 0);
10017 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10024 Takes a pointer to the first < in <<FOO.
10025 Returns a pointer to the byte following <<FOO.
10027 This function scans a heredoc, which involves different methods
10028 depending on whether we are in a string eval, quoted construct, etc.
10029 This is because PL_linestr could containing a single line of input, or
10030 a whole string being evalled, or the contents of the current quote-
10033 The two basic methods are:
10034 - Steal lines from the input stream
10035 - Scan the heredoc in PL_linestr and remove it therefrom
10037 In a file scope or filtered eval, the first method is used; in a
10038 string eval, the second.
10040 In a quote-like operator, we have to choose between the two,
10041 depending on where we can find a newline. We peek into outer lex-
10042 ing scopes until we find one with a newline in it. If we reach the
10043 outermost lexing scope and it is a file, we use the stream method.
10044 Otherwise it is treated as an eval.
10048 S_scan_heredoc(pTHX_ char *s)
10050 I32 op_type = OP_SCALAR;
10058 I32 indent_len = 0;
10059 bool indented = FALSE;
10060 const bool infile = PL_rsfp || PL_parser->filtered;
10061 const line_t origline = CopLINE(PL_curcop);
10062 LEXSHARED *shared = PL_parser->lex_shared;
10064 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10067 d = PL_tokenbuf + 1;
10068 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10069 *PL_tokenbuf = '\n';
10072 if (*peek == '~') {
10077 while (SPACE_OR_TAB(*peek))
10080 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10083 s = delimcpy(d, e, s, PL_bufend, term, &len);
10084 if (s == PL_bufend)
10085 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10091 /* <<\FOO is equivalent to <<'FOO' */
10096 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10097 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10101 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10102 peek += UTF ? UTF8SKIP(peek) : 1;
10105 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10106 Copy(s, d, len, char);
10111 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10112 Perl_croak(aTHX_ "Delimiter for here document is too long");
10116 len = d - PL_tokenbuf;
10118 #ifndef PERL_STRICT_CR
10119 d = (char *) memchr(s, '\r', PL_bufend - s);
10121 char * const olds = s;
10123 while (s < PL_bufend) {
10129 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10138 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10143 tmpstr = newSV_type(SVt_PVIV);
10144 SvGROW(tmpstr, 80);
10145 if (term == '\'') {
10146 op_type = OP_CONST;
10147 SvIV_set(tmpstr, -1);
10149 else if (term == '`') {
10150 op_type = OP_BACKTICK;
10151 SvIV_set(tmpstr, '\\');
10154 PL_multi_start = origline + 1 + PL_parser->herelines;
10155 PL_multi_open = PL_multi_close = '<';
10157 /* inside a string eval or quote-like operator */
10158 if (!infile || PL_lex_inwhat) {
10161 char * const olds = s;
10162 PERL_CONTEXT * const cx = CX_CUR();
10163 /* These two fields are not set until an inner lexing scope is
10164 entered. But we need them set here. */
10165 shared->ls_bufptr = s;
10166 shared->ls_linestr = PL_linestr;
10168 if (PL_lex_inwhat) {
10169 /* Look for a newline. If the current buffer does not have one,
10170 peek into the line buffer of the parent lexing scope, going
10171 up as many levels as necessary to find one with a newline
10174 while (!(s = (char *)memchr(
10175 (void *)shared->ls_bufptr, '\n',
10176 SvEND(shared->ls_linestr)-shared->ls_bufptr
10179 shared = shared->ls_prev;
10180 /* shared is only null if we have gone beyond the outermost
10181 lexing scope. In a file, we will have broken out of the
10182 loop in the previous iteration. In an eval, the string buf-
10183 fer ends with "\n;", so the while condition above will have
10184 evaluated to false. So shared can never be null. Or so you
10185 might think. Odd syntax errors like s;@{<<; can gobble up
10186 the implicit semicolon at the end of a flie, causing the
10187 file handle to be closed even when we are not in a string
10188 eval. So shared may be null in that case.
10189 (Closing '>>}' here to balance the earlier open brace for
10190 editors that look for matched pairs.) */
10191 if (UNLIKELY(!shared))
10193 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10194 most lexing scope. In a file, shared->ls_linestr at that
10195 level is just one line, so there is no body to steal. */
10196 if (infile && !shared->ls_prev) {
10202 else { /* eval or we've already hit EOF */
10203 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10208 linestr = shared->ls_linestr;
10209 bufend = SvEND(linestr);
10214 while (s < bufend - len + 1) {
10216 ++PL_parser->herelines;
10218 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10222 /* Only valid if it's preceded by whitespace only */
10223 while (backup != myolds && --backup >= myolds) {
10224 if (! SPACE_OR_TAB(*backup)) {
10230 /* No whitespace or all! */
10231 if (backup == s || *backup == '\n') {
10232 Newx(indent, indent_len + 1, char);
10233 memcpy(indent, backup + 1, indent_len);
10234 indent[indent_len] = 0;
10235 s--; /* before our delimiter */
10236 PL_parser->herelines--; /* this line doesn't count */
10243 while (s < bufend - len + 1
10244 && memNE(s,PL_tokenbuf,len) )
10247 ++PL_parser->herelines;
10251 if (s >= bufend - len + 1) {
10255 sv_setpvn(tmpstr,d+1,s-d);
10257 /* the preceding stmt passes a newline */
10258 PL_parser->herelines++;
10260 /* s now points to the newline after the heredoc terminator.
10261 d points to the newline before the body of the heredoc.
10264 /* We are going to modify linestr in place here, so set
10265 aside copies of the string if necessary for re-evals or
10267 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10268 check shared->re_eval_str. */
10269 if (shared->re_eval_start || shared->re_eval_str) {
10270 /* Set aside the rest of the regexp */
10271 if (!shared->re_eval_str)
10272 shared->re_eval_str =
10273 newSVpvn(shared->re_eval_start,
10274 bufend - shared->re_eval_start);
10275 shared->re_eval_start -= s-d;
10278 if (cxstack_ix >= 0
10279 && CxTYPE(cx) == CXt_EVAL
10280 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10281 && cx->blk_eval.cur_text == linestr)
10283 cx->blk_eval.cur_text = newSVsv(linestr);
10284 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10287 /* Copy everything from s onwards back to d. */
10288 Move(s,d,bufend-s + 1,char);
10289 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10290 /* Setting PL_bufend only applies when we have not dug deeper
10291 into other scopes, because sublex_done sets PL_bufend to
10292 SvEND(PL_linestr). */
10293 if (shared == PL_parser->lex_shared)
10294 PL_bufend = SvEND(linestr);
10299 char *oldbufptr_save;
10300 char *oldoldbufptr_save;
10302 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10303 term = PL_tokenbuf[1];
10305 linestr_save = PL_linestr; /* must restore this afterwards */
10306 d = s; /* and this */
10307 oldbufptr_save = PL_oldbufptr;
10308 oldoldbufptr_save = PL_oldoldbufptr;
10309 PL_linestr = newSVpvs("");
10310 PL_bufend = SvPVX(PL_linestr);
10313 PL_bufptr = PL_bufend;
10314 CopLINE_set(PL_curcop,
10315 origline + 1 + PL_parser->herelines);
10317 if ( !lex_next_chunk(LEX_NO_TERM)
10318 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10320 /* Simply freeing linestr_save might seem simpler here, as it
10321 does not matter what PL_linestr points to, since we are
10322 about to croak; but in a quote-like op, linestr_save
10323 will have been prospectively freed already, via
10324 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10325 restore PL_linestr. */
10326 SvREFCNT_dec_NN(PL_linestr);
10327 PL_linestr = linestr_save;
10328 PL_oldbufptr = oldbufptr_save;
10329 PL_oldoldbufptr = oldoldbufptr_save;
10333 CopLINE_set(PL_curcop, origline);
10335 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10336 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10337 /* ^That should be enough to avoid this needing to grow: */
10338 sv_catpvs(PL_linestr, "\n\0");
10339 assert(s == SvPVX(PL_linestr));
10340 PL_bufend = SvEND(PL_linestr);
10344 PL_parser->herelines++;
10345 PL_last_lop = PL_last_uni = NULL;
10347 #ifndef PERL_STRICT_CR
10348 if (PL_bufend - PL_linestart >= 2) {
10349 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10350 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10352 PL_bufend[-2] = '\n';
10354 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10356 else if (PL_bufend[-1] == '\r')
10357 PL_bufend[-1] = '\n';
10359 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10360 PL_bufend[-1] = '\n';
10363 if (indented && (PL_bufend-s) >= len) {
10364 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10367 char *backup = found;
10370 /* Only valid if it's preceded by whitespace only */
10371 while (backup != s && --backup >= s) {
10372 if (! SPACE_OR_TAB(*backup)) {
10378 /* All whitespace or none! */
10379 if (backup == found || SPACE_OR_TAB(*backup)) {
10380 Newx(indent, indent_len + 1, char);
10381 memcpy(indent, backup, indent_len);
10382 indent[indent_len] = 0;
10383 SvREFCNT_dec(PL_linestr);
10384 PL_linestr = linestr_save;
10385 PL_linestart = SvPVX(linestr_save);
10386 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10387 PL_oldbufptr = oldbufptr_save;
10388 PL_oldoldbufptr = oldoldbufptr_save;
10394 /* Didn't find it */
10395 sv_catsv(tmpstr,PL_linestr);
10398 if (*s == term && PL_bufend-s >= len
10399 && memEQ(s,PL_tokenbuf + 1,len))
10401 SvREFCNT_dec(PL_linestr);
10402 PL_linestr = linestr_save;
10403 PL_linestart = SvPVX(linestr_save);
10404 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10405 PL_oldbufptr = oldbufptr_save;
10406 PL_oldoldbufptr = oldoldbufptr_save;
10411 sv_catsv(tmpstr,PL_linestr);
10417 PL_multi_end = origline + PL_parser->herelines;
10419 if (indented && indent) {
10420 STRLEN linecount = 1;
10421 STRLEN herelen = SvCUR(tmpstr);
10422 char *ss = SvPVX(tmpstr);
10423 char *se = ss + herelen;
10424 SV *newstr = newSV(herelen+1);
10427 /* Trim leading whitespace */
10429 /* newline only? Copy and move on */
10431 sv_catpvs(newstr,"\n");
10435 /* Found our indentation? Strip it */
10437 else if (se - ss >= indent_len
10438 && memEQ(ss, indent, indent_len))
10443 while ((ss + le) < se && *(ss + le) != '\n')
10446 sv_catpvn(newstr, ss, le);
10449 /* Line doesn't begin with our indentation? Croak */
10454 "Indentation on line %d of here-doc doesn't match delimiter",
10460 /* avoid sv_setsv() as we dont wan't to COW here */
10461 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10463 SvREFCNT_dec_NN(newstr);
10466 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10467 SvPV_shrink_to_cur(tmpstr);
10471 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10475 PL_lex_stuff = tmpstr;
10476 pl_yylval.ival = op_type;
10482 SvREFCNT_dec(tmpstr);
10483 CopLINE_set(PL_curcop, origline);
10484 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10488 /* scan_inputsymbol
10489 takes: position of first '<' in input buffer
10490 returns: position of first char following the matching '>' in
10492 side-effects: pl_yylval and lex_op are set.
10497 <<>> read from ARGV without magic open
10498 <FH> read from filehandle
10499 <pkg::FH> read from package qualified filehandle
10500 <pkg'FH> read from package qualified filehandle
10501 <$fh> read from filehandle in $fh
10502 <*.h> filename glob
10507 S_scan_inputsymbol(pTHX_ char *start)
10509 char *s = start; /* current position in buffer */
10512 bool nomagicopen = FALSE;
10513 char *d = PL_tokenbuf; /* start of temp holding space */
10514 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10516 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10518 end = (char *) memchr(s, '\n', PL_bufend - s);
10521 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10522 nomagicopen = TRUE;
10528 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10530 /* die if we didn't have space for the contents of the <>,
10531 or if it didn't end, or if we see a newline
10534 if (len >= (I32)sizeof PL_tokenbuf)
10535 Perl_croak(aTHX_ "Excessively long <> operator");
10537 Perl_croak(aTHX_ "Unterminated <> operator");
10542 Remember, only scalar variables are interpreted as filehandles by
10543 this code. Anything more complex (e.g., <$fh{$num}>) will be
10544 treated as a glob() call.
10545 This code makes use of the fact that except for the $ at the front,
10546 a scalar variable and a filehandle look the same.
10548 if (*d == '$' && d[1]) d++;
10550 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10551 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10552 d += UTF ? UTF8SKIP(d) : 1;
10555 /* If we've tried to read what we allow filehandles to look like, and
10556 there's still text left, then it must be a glob() and not a getline.
10557 Use scan_str to pull out the stuff between the <> and treat it
10558 as nothing more than a string.
10561 if (d - PL_tokenbuf != len) {
10562 pl_yylval.ival = OP_GLOB;
10563 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10565 Perl_croak(aTHX_ "Glob not terminated");
10569 bool readline_overriden = FALSE;
10571 /* we're in a filehandle read situation */
10574 /* turn <> into <ARGV> */
10576 Copy("ARGV",d,5,char);
10578 /* Check whether readline() is overriden */
10579 if ((gv_readline = gv_override("readline",8)))
10580 readline_overriden = TRUE;
10582 /* if <$fh>, create the ops to turn the variable into a
10586 /* try to find it in the pad for this block, otherwise find
10587 add symbol table ops
10589 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10590 if (tmp != NOT_IN_PAD) {
10591 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10592 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10593 HEK * const stashname = HvNAME_HEK(stash);
10594 SV * const sym = sv_2mortal(newSVhek(stashname));
10595 sv_catpvs(sym, "::");
10596 sv_catpv(sym, d+1);
10601 OP * const o = newOP(OP_PADSV, 0);
10603 PL_lex_op = readline_overriden
10604 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10605 op_append_elem(OP_LIST, o,
10606 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10607 : newUNOP(OP_READLINE, 0, o);
10615 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10617 PL_lex_op = readline_overriden
10618 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10619 op_append_elem(OP_LIST,
10620 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10621 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10622 : newUNOP(OP_READLINE, 0,
10623 newUNOP(OP_RV2SV, 0,
10624 newGVOP(OP_GV, 0, gv)));
10626 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10627 pl_yylval.ival = OP_NULL;
10630 /* If it's none of the above, it must be a literal filehandle
10631 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10633 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10634 PL_lex_op = readline_overriden
10635 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10636 op_append_elem(OP_LIST,
10637 newGVOP(OP_GV, 0, gv),
10638 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10639 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10640 pl_yylval.ival = OP_NULL;
10650 start position in buffer
10651 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
10652 only if they are of the open/close form
10653 keep_delims preserve the delimiters around the string
10654 re_reparse compiling a run-time /(?{})/:
10655 collapse // to /, and skip encoding src
10656 delimp if non-null, this is set to the position of
10657 the closing delimiter, or just after it if
10658 the closing and opening delimiters differ
10659 (i.e., the opening delimiter of a substitu-
10661 returns: position to continue reading from buffer
10662 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10663 updates the read buffer.
10665 This subroutine pulls a string out of the input. It is called for:
10666 q single quotes q(literal text)
10667 ' single quotes 'literal text'
10668 qq double quotes qq(interpolate $here please)
10669 " double quotes "interpolate $here please"
10670 qx backticks qx(/bin/ls -l)
10671 ` backticks `/bin/ls -l`
10672 qw quote words @EXPORT_OK = qw( func() $spam )
10673 m// regexp match m/this/
10674 s/// regexp substitute s/this/that/
10675 tr/// string transliterate tr/this/that/
10676 y/// string transliterate y/this/that/
10677 ($*@) sub prototypes sub foo ($)
10678 (stuff) sub attr parameters sub foo : attr(stuff)
10679 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10681 In most of these cases (all but <>, patterns and transliterate)
10682 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10683 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10684 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10687 It skips whitespace before the string starts, and treats the first
10688 character as the delimiter. If the delimiter is one of ([{< then
10689 the corresponding "close" character )]}> is used as the closing
10690 delimiter. It allows quoting of delimiters, and if the string has
10691 balanced delimiters ([{<>}]) it allows nesting.
10693 On success, the SV with the resulting string is put into lex_stuff or,
10694 if that is already non-NULL, into lex_repl. The second case occurs only
10695 when parsing the RHS of the special constructs s/// and tr/// (y///).
10696 For convenience, the terminating delimiter character is stuffed into
10701 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10705 SV *sv; /* scalar value: string */
10706 const char *tmps; /* temp string, used for delimiter matching */
10707 char *s = start; /* current position in the buffer */
10708 char term; /* terminating character */
10709 char *to; /* current position in the sv's data */
10710 I32 brackets = 1; /* bracket nesting level */
10711 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
10712 IV termcode; /* terminating char. code */
10713 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
10714 STRLEN termlen; /* length of terminating string */
10717 /* The delimiters that have a mirror-image closing one */
10718 const char * opening_delims = "([{<";
10719 const char * closing_delims = ")]}>";
10721 /* The only non-UTF character that isn't a stand alone grapheme is
10722 * white-space, hence can't be a delimiter. */
10723 const char * non_grapheme_msg = "Use of unassigned code point or"
10724 " non-standalone grapheme for a delimiter"
10726 PERL_ARGS_ASSERT_SCAN_STR;
10728 /* skip space before the delimiter */
10733 /* mark where we are, in case we need to report errors */
10736 /* after skipping whitespace, the next character is the terminator */
10738 if (!UTF || UTF8_IS_INVARIANT(term)) {
10739 termcode = termstr[0] = term;
10743 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10744 if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
10749 yyerror(non_grapheme_msg);
10752 Copy(s, termstr, termlen, U8);
10755 /* mark where we are */
10756 PL_multi_start = CopLINE(PL_curcop);
10757 PL_multi_open = termcode;
10758 herelines = PL_parser->herelines;
10760 /* If the delimiter has a mirror-image closing one, get it */
10761 if (term && (tmps = strchr(opening_delims, term))) {
10762 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10765 PL_multi_close = termcode;
10767 if (PL_multi_open == PL_multi_close) {
10768 keep_bracketed_quoted = FALSE;
10771 /* create a new SV to hold the contents. 79 is the SV's initial length.
10772 What a random number. */
10773 sv = newSV_type(SVt_PVIV);
10775 SvIV_set(sv, termcode);
10776 (void)SvPOK_only(sv); /* validate pointer */
10778 /* move past delimiter and try to read a complete string */
10780 sv_catpvn(sv, s, termlen);
10783 /* extend sv if need be */
10784 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10785 /* set 'to' to the next character in the sv's string */
10786 to = SvPVX(sv)+SvCUR(sv);
10788 /* if open delimiter is the close delimiter read unbridle */
10789 if (PL_multi_open == PL_multi_close) {
10790 for (; s < PL_bufend; s++,to++) {
10791 /* embedded newlines increment the current line number */
10792 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10793 COPLINE_INC_WITH_HERELINES;
10794 /* handle quoted delimiters */
10795 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10796 if (!keep_bracketed_quoted
10798 || (re_reparse && s[1] == '\\'))
10801 else /* any other quotes are simply copied straight through */
10804 /* terminate when run out of buffer (the for() condition), or
10805 have found the terminator */
10806 else if (*s == term) { /* First byte of terminator matches */
10807 if (termlen == 1) /* If is the only byte, are done */
10810 /* If the remainder of the terminator matches, also are
10811 * done, after checking that is a separate grapheme */
10812 if ( s + termlen <= PL_bufend
10813 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10816 && UNLIKELY(! _is_grapheme((U8 *) start,
10821 yyerror(non_grapheme_msg);
10826 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10834 /* if the terminator isn't the same as the start character (e.g.,
10835 matched brackets), we have to allow more in the quoting, and
10836 be prepared for nested brackets.
10839 /* read until we run out of string, or we find the terminator */
10840 for (; s < PL_bufend; s++,to++) {
10841 /* embedded newlines increment the line count */
10842 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10843 COPLINE_INC_WITH_HERELINES;
10844 /* backslashes can escape the open or closing characters */
10845 if (*s == '\\' && s+1 < PL_bufend) {
10846 if (!keep_bracketed_quoted
10847 && ( ((UV)s[1] == PL_multi_open)
10848 || ((UV)s[1] == PL_multi_close) ))
10855 /* allow nested opens and closes */
10856 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10858 else if ((UV)*s == PL_multi_open)
10860 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10865 /* terminate the copied string and update the sv's end-of-string */
10867 SvCUR_set(sv, to - SvPVX_const(sv));
10870 * this next chunk reads more into the buffer if we're not done yet
10874 break; /* handle case where we are done yet :-) */
10876 #ifndef PERL_STRICT_CR
10877 if (to - SvPVX_const(sv) >= 2) {
10878 if ( (to[-2] == '\r' && to[-1] == '\n')
10879 || (to[-2] == '\n' && to[-1] == '\r'))
10883 SvCUR_set(sv, to - SvPVX_const(sv));
10885 else if (to[-1] == '\r')
10888 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10892 /* if we're out of file, or a read fails, bail and reset the current
10893 line marker so we can report where the unterminated string began
10895 COPLINE_INC_WITH_HERELINES;
10896 PL_bufptr = PL_bufend;
10897 if (!lex_next_chunk(0)) {
10899 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10902 s = start = PL_bufptr;
10905 /* at this point, we have successfully read the delimited string */
10908 sv_catpvn(sv, s, termlen);
10914 PL_multi_end = CopLINE(PL_curcop);
10915 CopLINE_set(PL_curcop, PL_multi_start);
10916 PL_parser->herelines = herelines;
10918 /* if we allocated too much space, give some back */
10919 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10920 SvLEN_set(sv, SvCUR(sv) + 1);
10921 SvPV_renew(sv, SvLEN(sv));
10924 /* decide whether this is the first or second quoted string we've read
10929 PL_parser->lex_sub_repl = sv;
10932 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10938 takes: pointer to position in buffer
10939 returns: pointer to new position in buffer
10940 side-effects: builds ops for the constant in pl_yylval.op
10942 Read a number in any of the formats that Perl accepts:
10944 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10945 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10946 0b[01](_?[01])* binary integers
10947 0[0-7](_?[0-7])* octal integers
10948 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
10949 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
10951 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10954 If it reads a number without a decimal point or an exponent, it will
10955 try converting the number to an integer and see if it can do so
10956 without loss of precision.
10960 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10962 const char *s = start; /* current position in buffer */
10963 char *d; /* destination in temp buffer */
10964 char *e; /* end of temp buffer */
10965 NV nv; /* number read, as a double */
10966 SV *sv = NULL; /* place to put the converted number */
10967 bool floatit; /* boolean: int or float? */
10968 const char *lastub = NULL; /* position of last underbar */
10969 static const char* const number_too_long = "Number too long";
10970 bool warned_about_underscore = 0;
10971 #define WARN_ABOUT_UNDERSCORE() \
10973 if (!warned_about_underscore) { \
10974 warned_about_underscore = 1; \
10975 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10976 "Misplaced _ in number"); \
10979 /* Hexadecimal floating point.
10981 * In many places (where we have quads and NV is IEEE 754 double)
10982 * we can fit the mantissa bits of a NV into an unsigned quad.
10983 * (Note that UVs might not be quads even when we have quads.)
10984 * This will not work everywhere, though (either no quads, or
10985 * using long doubles), in which case we have to resort to NV,
10986 * which will probably mean horrible loss of precision due to
10987 * multiple fp operations. */
10988 bool hexfp = FALSE;
10989 int total_bits = 0;
10990 int significant_bits = 0;
10991 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10992 # define HEXFP_UQUAD
10993 Uquad_t hexfp_uquad = 0;
10994 int hexfp_frac_bits = 0;
10999 NV hexfp_mult = 1.0;
11000 UV high_non_zero = 0; /* highest digit */
11001 int non_zero_integer_digits = 0;
11003 PERL_ARGS_ASSERT_SCAN_NUM;
11005 /* We use the first character to decide what type of number this is */
11009 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11011 /* if it starts with a 0, it could be an octal number, a decimal in
11012 0.13 disguise, or a hexadecimal number, or a binary number. */
11016 u holds the "number so far"
11017 shift the power of 2 of the base
11018 (hex == 4, octal == 3, binary == 1)
11019 overflowed was the number more than we can hold?
11021 Shift is used when we add a digit. It also serves as an "are
11022 we in octal/hex/binary?" indicator to disallow hex characters
11023 when in octal mode.
11028 bool overflowed = FALSE;
11029 bool just_zero = TRUE; /* just plain 0 or binary number? */
11030 bool has_digs = FALSE;
11031 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11032 static const char* const bases[5] =
11033 { "", "binary", "", "octal", "hexadecimal" };
11034 static const char* const Bases[5] =
11035 { "", "Binary", "", "Octal", "Hexadecimal" };
11036 static const char* const maxima[5] =
11038 "0b11111111111111111111111111111111",
11042 const char *base, *Base, *max;
11044 /* check for hex */
11045 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11049 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11054 /* check for a decimal in disguise */
11055 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11057 /* so it must be octal */
11064 WARN_ABOUT_UNDERSCORE();
11068 base = bases[shift];
11069 Base = Bases[shift];
11070 max = maxima[shift];
11072 /* read the rest of the number */
11074 /* x is used in the overflow test,
11075 b is the digit we're adding on. */
11080 /* if we don't mention it, we're done */
11084 /* _ are ignored -- but warned about if consecutive */
11086 if (lastub && s == lastub + 1)
11087 WARN_ABOUT_UNDERSCORE();
11091 /* 8 and 9 are not octal */
11092 case '8': case '9':
11094 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11098 case '2': case '3': case '4':
11099 case '5': case '6': case '7':
11101 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11104 case '0': case '1':
11105 b = *s++ & 15; /* ASCII digit -> value of digit */
11109 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11110 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11111 /* make sure they said 0x */
11114 b = (*s++ & 7) + 9;
11116 /* Prepare to put the digit we have onto the end
11117 of the number so far. We check for overflows.
11124 assert(shift >= 0);
11125 x = u << shift; /* make room for the digit */
11127 total_bits += shift;
11129 if ((x >> shift) != u
11130 && !(PL_hints & HINT_NEW_BINARY)) {
11133 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11134 "Integer overflow in %s number",
11137 u = x | b; /* add the digit to the end */
11140 n *= nvshift[shift];
11141 /* If an NV has not enough bits in its
11142 * mantissa to represent an UV this summing of
11143 * small low-order numbers is a waste of time
11144 * (because the NV cannot preserve the
11145 * low-order bits anyway): we could just
11146 * remember when did we overflow and in the
11147 * end just multiply n by the right
11152 if (high_non_zero == 0 && b > 0)
11156 non_zero_integer_digits++;
11158 /* this could be hexfp, but peek ahead
11159 * to avoid matching ".." */
11160 if (UNLIKELY(HEXFP_PEEK(s))) {
11168 /* if we get here, we had success: make a scalar value from
11173 /* final misplaced underbar check */
11175 WARN_ABOUT_UNDERSCORE();
11177 if (UNLIKELY(HEXFP_PEEK(s))) {
11178 /* Do sloppy (on the underbars) but quick detection
11179 * (and value construction) for hexfp, the decimal
11180 * detection will shortly be more thorough with the
11181 * underbar checks. */
11183 significant_bits = non_zero_integer_digits * shift;
11186 #else /* HEXFP_NV */
11189 /* Ignore the leading zero bits of
11190 * the high (first) non-zero digit. */
11191 if (high_non_zero) {
11192 if (high_non_zero < 0x8)
11193 significant_bits--;
11194 if (high_non_zero < 0x4)
11195 significant_bits--;
11196 if (high_non_zero < 0x2)
11197 significant_bits--;
11204 bool accumulate = TRUE;
11206 int lim = 1 << shift;
11207 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11209 if (isXDIGIT(*h)) {
11210 significant_bits += shift;
11213 if (significant_bits < NV_MANT_DIG) {
11214 /* We are in the long "run" of xdigits,
11215 * accumulate the full four bits. */
11216 assert(shift >= 0);
11217 hexfp_uquad <<= shift;
11219 hexfp_frac_bits += shift;
11220 } else if (significant_bits - shift < NV_MANT_DIG) {
11221 /* We are at a hexdigit either at,
11222 * or straddling, the edge of mantissa.
11223 * We will try grabbing as many as
11224 * possible bits. */
11226 significant_bits - NV_MANT_DIG;
11230 hexfp_uquad <<= tail;
11231 assert((shift - tail) >= 0);
11232 hexfp_uquad |= b >> (shift - tail);
11233 hexfp_frac_bits += tail;
11235 /* Ignore the trailing zero bits
11236 * of the last non-zero xdigit.
11238 * The assumption here is that if
11239 * one has input of e.g. the xdigit
11240 * eight (0x8), there is only one
11241 * bit being input, not the full
11242 * four bits. Conversely, if one
11243 * specifies a zero xdigit, the
11244 * assumption is that one really
11245 * wants all those bits to be zero. */
11247 if ((b & 0x1) == 0x0) {
11248 significant_bits--;
11249 if ((b & 0x2) == 0x0) {
11250 significant_bits--;
11251 if ((b & 0x4) == 0x0) {
11252 significant_bits--;
11258 accumulate = FALSE;
11261 /* Keep skipping the xdigits, and
11262 * accumulating the significant bits,
11263 * but do not shift the uquad
11264 * (which would catastrophically drop
11265 * high-order bits) or accumulate the
11266 * xdigits anymore. */
11268 #else /* HEXFP_NV */
11270 nv_mult /= nvshift[shift];
11272 hexfp_nv += b * nv_mult;
11274 accumulate = FALSE;
11278 if (significant_bits >= NV_MANT_DIG)
11279 accumulate = FALSE;
11283 if ((total_bits > 0 || significant_bits > 0) &&
11284 isALPHA_FOLD_EQ(*h, 'p')) {
11285 bool negexp = FALSE;
11289 else if (*h == '-') {
11295 while (isDIGIT(*h) || *h == '_') {
11298 hexfp_exp += *h - '0';
11301 && -hexfp_exp < NV_MIN_EXP - 1) {
11302 /* NOTE: this means that the exponent
11303 * underflow warning happens for
11304 * the IEEE 754 subnormals (denormals),
11305 * because DBL_MIN_EXP etc are the lowest
11306 * possible binary (or, rather, DBL_RADIX-base)
11307 * exponent for normals, not subnormals.
11309 * This may or may not be a good thing. */
11310 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11311 "Hexadecimal float: exponent underflow");
11317 && hexfp_exp > NV_MAX_EXP - 1) {
11318 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11319 "Hexadecimal float: exponent overflow");
11327 hexfp_exp = -hexfp_exp;
11329 hexfp_exp -= hexfp_frac_bits;
11331 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11338 if (shift != 3 && !has_digs) {
11339 /* 0x or 0b with no digits, treat it as if the x or b is part of the
11346 if (n > 4294967295.0)
11347 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11348 "%s number > %s non-portable",
11354 if (u > 0xffffffff)
11355 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11356 "%s number > %s non-portable",
11361 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11362 sv = new_constant(start, s - start, "integer",
11363 sv, NULL, NULL, 0, NULL);
11364 else if (PL_hints & HINT_NEW_BINARY)
11365 sv = new_constant(start, s - start, "binary",
11366 sv, NULL, NULL, 0, NULL);
11371 handle decimal numbers.
11372 we're also sent here when we read a 0 as the first digit
11374 case '1': case '2': case '3': case '4': case '5':
11375 case '6': case '7': case '8': case '9': case '.':
11378 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11387 /* read next group of digits and _ and copy into d */
11390 || UNLIKELY(hexfp && isXDIGIT(*s)))
11392 /* skip underscores, checking for misplaced ones
11396 if (lastub && s == lastub + 1)
11397 WARN_ABOUT_UNDERSCORE();
11401 /* check for end of fixed-length buffer */
11403 Perl_croak(aTHX_ "%s", number_too_long);
11404 /* if we're ok, copy the character */
11409 /* final misplaced underbar check */
11410 if (lastub && s == lastub + 1)
11411 WARN_ABOUT_UNDERSCORE();
11413 /* read a decimal portion if there is one. avoid
11414 3..5 being interpreted as the number 3. followed
11417 if (*s == '.' && s[1] != '.') {
11422 WARN_ABOUT_UNDERSCORE();
11426 /* copy, ignoring underbars, until we run out of digits.
11430 || UNLIKELY(hexfp && isXDIGIT(*s));
11433 /* fixed length buffer check */
11435 Perl_croak(aTHX_ "%s", number_too_long);
11437 if (lastub && s == lastub + 1)
11438 WARN_ABOUT_UNDERSCORE();
11444 /* fractional part ending in underbar? */
11446 WARN_ABOUT_UNDERSCORE();
11447 if (*s == '.' && isDIGIT(s[1])) {
11448 /* oops, it's really a v-string, but without the "v" */
11454 /* read exponent part, if present */
11455 if ((isALPHA_FOLD_EQ(*s, 'e')
11456 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11457 && strchr("+-0123456789_", s[1]))
11459 int exp_digits = 0;
11460 const char *save_s = s;
11463 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11464 ditto for p (hexfloats) */
11465 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11466 /* At least some Mach atof()s don't grok 'E' */
11469 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11476 /* stray preinitial _ */
11478 WARN_ABOUT_UNDERSCORE();
11482 /* allow positive or negative exponent */
11483 if (*s == '+' || *s == '-')
11486 /* stray initial _ */
11488 WARN_ABOUT_UNDERSCORE();
11492 /* read digits of exponent */
11493 while (isDIGIT(*s) || *s == '_') {
11497 Perl_croak(aTHX_ "%s", number_too_long);
11501 if (((lastub && s == lastub + 1)
11502 || (!isDIGIT(s[1]) && s[1] != '_')))
11503 WARN_ABOUT_UNDERSCORE();
11509 /* no exponent digits, the [eEpP] could be for something else,
11510 * though in practice we don't get here for p since that's preparsed
11511 * earlier, and results in only the 0xX being consumed, so behave similarly
11512 * for decimal floats and consume only the D.DD, leaving the [eE] to the
11525 We try to do an integer conversion first if no characters
11526 indicating "float" have been found.
11531 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11533 if (flags == IS_NUMBER_IN_UV) {
11535 sv = newSViv(uv); /* Prefer IVs over UVs. */
11538 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11539 if (uv <= (UV) IV_MIN)
11540 sv = newSViv(-(IV)uv);
11547 /* terminate the string */
11549 if (UNLIKELY(hexfp)) {
11550 # ifdef NV_MANT_DIG
11551 if (significant_bits > NV_MANT_DIG)
11552 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11553 "Hexadecimal float: mantissa overflow");
11556 nv = hexfp_uquad * hexfp_mult;
11557 #else /* HEXFP_NV */
11558 nv = hexfp_nv * hexfp_mult;
11561 nv = Atof(PL_tokenbuf);
11567 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11568 const char *const key = floatit ? "float" : "integer";
11569 const STRLEN keylen = floatit ? 5 : 7;
11570 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11571 key, keylen, sv, NULL, NULL, 0, NULL);
11575 /* if it starts with a v, it could be a v-string */
11578 sv = newSV(5); /* preallocate storage space */
11579 ENTER_with_name("scan_vstring");
11581 s = scan_vstring(s, PL_bufend, sv);
11582 SvREFCNT_inc_simple_void_NN(sv);
11583 LEAVE_with_name("scan_vstring");
11587 /* make the op for the constant and return */
11590 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11592 lvalp->opval = NULL;
11598 S_scan_formline(pTHX_ char *s)
11600 SV * const stuff = newSVpvs("");
11601 bool needargs = FALSE;
11602 bool eofmt = FALSE;
11604 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11606 while (!needargs) {
11610 #ifdef PERL_STRICT_CR
11611 while (SPACE_OR_TAB(*t))
11614 while (SPACE_OR_TAB(*t) || *t == '\r')
11617 if (*t == '\n' || t == PL_bufend) {
11622 eol = (char *) memchr(s,'\n',PL_bufend-s);
11627 for (t = s; t < eol; t++) {
11628 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11630 goto enough; /* ~~ must be first line in formline */
11632 if (*t == '@' || *t == '^')
11636 sv_catpvn(stuff, s, eol-s);
11637 #ifndef PERL_STRICT_CR
11638 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11639 char *end = SvPVX(stuff) + SvCUR(stuff);
11642 SvCUR_set(stuff, SvCUR(stuff) - 1);
11650 if ((PL_rsfp || PL_parser->filtered)
11651 && PL_parser->form_lex_state == LEX_NORMAL) {
11653 PL_bufptr = PL_bufend;
11654 COPLINE_INC_WITH_HERELINES;
11655 got_some = lex_next_chunk(0);
11656 CopLINE_dec(PL_curcop);
11661 incline(s, PL_bufend);
11664 if (!SvCUR(stuff) || needargs)
11665 PL_lex_state = PL_parser->form_lex_state;
11666 if (SvCUR(stuff)) {
11667 PL_expect = XSTATE;
11669 const char *s2 = s;
11670 while (isSPACE(*s2) && *s2 != '\n')
11673 PL_expect = XTERMBLOCK;
11674 NEXTVAL_NEXTTOKE.ival = 0;
11677 NEXTVAL_NEXTTOKE.ival = 0;
11678 force_next(FORMLBRACK);
11681 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11684 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11688 SvREFCNT_dec(stuff);
11690 PL_lex_formbrack = 0;
11696 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11698 const I32 oldsavestack_ix = PL_savestack_ix;
11699 CV* const outsidecv = PL_compcv;
11701 SAVEI32(PL_subline);
11702 save_item(PL_subname);
11703 SAVESPTR(PL_compcv);
11705 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11706 CvFLAGS(PL_compcv) |= flags;
11708 PL_subline = CopLINE(PL_curcop);
11709 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11710 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11711 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11712 if (outsidecv && CvPADLIST(outsidecv))
11713 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11715 return oldsavestack_ix;
11719 /* Do extra initialisation of a CV (typically one just created by
11720 * start_subparse()) if that CV is for a named sub
11724 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
11726 PERL_ARGS_ASSERT_INIT_NAMED_CV;
11728 if (nameop->op_type == OP_CONST) {
11729 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
11730 if ( strEQ(name, "BEGIN")
11731 || strEQ(name, "END")
11732 || strEQ(name, "INIT")
11733 || strEQ(name, "CHECK")
11734 || strEQ(name, "UNITCHECK")
11739 /* State subs inside anonymous subs need to be
11740 clonable themselves. */
11741 if ( CvANON(CvOUTSIDE(cv))
11742 || CvCLONE(CvOUTSIDE(cv))
11743 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
11745 ))[nameop->op_targ])
11752 S_yywarn(pTHX_ const char *const s, U32 flags)
11754 PERL_ARGS_ASSERT_YYWARN;
11756 PL_in_eval |= EVAL_WARNONLY;
11757 yyerror_pv(s, flags);
11762 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11764 PERL_ARGS_ASSERT_ABORT_EXECUTION;
11767 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11770 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11772 NOT_REACHED; /* NOTREACHED */
11778 /* Called, after at least one error has been found, to abort the parse now,
11779 * instead of trying to forge ahead */
11781 yyerror_pvn(NULL, 0, 0);
11785 Perl_yyerror(pTHX_ const char *const s)
11787 PERL_ARGS_ASSERT_YYERROR;
11788 return yyerror_pvn(s, strlen(s), 0);
11792 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11794 PERL_ARGS_ASSERT_YYERROR_PV;
11795 return yyerror_pvn(s, strlen(s), flags);
11799 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11801 const char *context = NULL;
11804 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11805 int yychar = PL_parser->yychar;
11807 /* Output error message 's' with length 'len'. 'flags' are SV flags that
11808 * apply. If the number of errors found is large enough, it abandons
11809 * parsing. If 's' is NULL, there is no message, and it abandons
11810 * processing unconditionally */
11813 if (!yychar || (yychar == ';' && !PL_rsfp))
11814 sv_catpvs(where_sv, "at EOF");
11815 else if ( PL_oldoldbufptr
11816 && PL_bufptr > PL_oldoldbufptr
11817 && PL_bufptr - PL_oldoldbufptr < 200
11818 && PL_oldoldbufptr != PL_oldbufptr
11819 && PL_oldbufptr != PL_bufptr)
11823 The code below is removed for NetWare because it
11824 abends/crashes on NetWare when the script has error such as
11825 not having the closing quotes like:
11826 if ($var eq "value)
11827 Checking of white spaces is anyway done in NetWare code.
11830 while (isSPACE(*PL_oldoldbufptr))
11833 context = PL_oldoldbufptr;
11834 contlen = PL_bufptr - PL_oldoldbufptr;
11836 else if ( PL_oldbufptr
11837 && PL_bufptr > PL_oldbufptr
11838 && PL_bufptr - PL_oldbufptr < 200
11839 && PL_oldbufptr != PL_bufptr) {
11842 The code below is removed for NetWare because it
11843 abends/crashes on NetWare when the script has error such as
11844 not having the closing quotes like:
11845 if ($var eq "value)
11846 Checking of white spaces is anyway done in NetWare code.
11849 while (isSPACE(*PL_oldbufptr))
11852 context = PL_oldbufptr;
11853 contlen = PL_bufptr - PL_oldbufptr;
11855 else if (yychar > 255)
11856 sv_catpvs(where_sv, "next token ???");
11857 else if (yychar == YYEMPTY) {
11858 if (PL_lex_state == LEX_NORMAL)
11859 sv_catpvs(where_sv, "at end of line");
11860 else if (PL_lex_inpat)
11861 sv_catpvs(where_sv, "within pattern");
11863 sv_catpvs(where_sv, "within string");
11866 sv_catpvs(where_sv, "next char ");
11868 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11869 else if (isPRINT_LC(yychar)) {
11870 const char string = yychar;
11871 sv_catpvn(where_sv, &string, 1);
11874 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11876 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11877 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11878 OutCopFILE(PL_curcop),
11879 (IV)(PL_parser->preambling == NOLINE
11880 ? CopLINE(PL_curcop)
11881 : PL_parser->preambling));
11883 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11884 UTF8fARG(UTF, contlen, context));
11886 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11887 if ( PL_multi_start < PL_multi_end
11888 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11890 Perl_sv_catpvf(aTHX_ msg,
11891 " (Might be a runaway multi-line %c%c string starting on"
11892 " line %" IVdf ")\n",
11893 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11896 if (PL_in_eval & EVAL_WARNONLY) {
11897 PL_in_eval &= ~EVAL_WARNONLY;
11898 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11904 if (s == NULL || PL_error_count >= 10) {
11905 const char * msg = "";
11906 const char * const name = OutCopFILE(PL_curcop);
11909 SV * errsv = ERRSV;
11910 if (SvCUR(errsv)) {
11911 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11916 abort_execution(msg, name);
11919 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11923 PL_in_my_stash = NULL;
11928 S_swallow_bom(pTHX_ U8 *s)
11930 const STRLEN slen = SvCUR(PL_linestr);
11932 PERL_ARGS_ASSERT_SWALLOW_BOM;
11936 if (s[1] == 0xFE) {
11937 /* UTF-16 little-endian? (or UTF-32LE?) */
11938 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11939 /* diag_listed_as: Unsupported script encoding %s */
11940 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11941 #ifndef PERL_NO_UTF16_FILTER
11943 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11946 if (PL_bufend > (char*)s) {
11947 s = add_utf16_textfilter(s, TRUE);
11950 /* diag_listed_as: Unsupported script encoding %s */
11951 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11956 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11957 #ifndef PERL_NO_UTF16_FILTER
11959 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11962 if (PL_bufend > (char *)s) {
11963 s = add_utf16_textfilter(s, FALSE);
11966 /* diag_listed_as: Unsupported script encoding %s */
11967 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11971 case BOM_UTF8_FIRST_BYTE: {
11972 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
11974 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11976 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
11983 if (s[2] == 0xFE && s[3] == 0xFF) {
11984 /* UTF-32 big-endian */
11985 /* diag_listed_as: Unsupported script encoding %s */
11986 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11989 else if (s[2] == 0 && s[3] != 0) {
11992 * are a good indicator of UTF-16BE. */
11993 #ifndef PERL_NO_UTF16_FILTER
11995 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11997 s = add_utf16_textfilter(s, FALSE);
11999 /* diag_listed_as: Unsupported script encoding %s */
12000 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12007 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12010 * are a good indicator of UTF-16LE. */
12011 #ifndef PERL_NO_UTF16_FILTER
12013 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12015 s = add_utf16_textfilter(s, TRUE);
12017 /* diag_listed_as: Unsupported script encoding %s */
12018 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12026 #ifndef PERL_NO_UTF16_FILTER
12028 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12030 SV *const filter = FILTER_DATA(idx);
12031 /* We re-use this each time round, throwing the contents away before we
12033 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12034 SV *const utf8_buffer = filter;
12035 IV status = IoPAGE(filter);
12036 const bool reverse = cBOOL(IoLINES(filter));
12039 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12041 /* As we're automatically added, at the lowest level, and hence only called
12042 from this file, we can be sure that we're not called in block mode. Hence
12043 don't bother writing code to deal with block mode. */
12045 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12048 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12050 DEBUG_P(PerlIO_printf(Perl_debug_log,
12051 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12052 FPTR2DPTR(void *, S_utf16_textfilter),
12053 reverse ? 'l' : 'b', idx, maxlen, status,
12054 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12061 /* First, look in our buffer of existing UTF-8 data: */
12062 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12066 } else if (status == 0) {
12068 IoPAGE(filter) = 0;
12069 nl = SvEND(utf8_buffer);
12072 STRLEN got = nl - SvPVX(utf8_buffer);
12073 /* Did we have anything to append? */
12075 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12076 /* Everything else in this code works just fine if SVp_POK isn't
12077 set. This, however, needs it, and we need it to work, else
12078 we loop infinitely because the buffer is never consumed. */
12079 sv_chop(utf8_buffer, nl);
12083 /* OK, not a complete line there, so need to read some more UTF-16.
12084 Read an extra octect if the buffer currently has an odd number. */
12088 if (SvCUR(utf16_buffer) >= 2) {
12089 /* Location of the high octet of the last complete code point.
12090 Gosh, UTF-16 is a pain. All the benefits of variable length,
12091 *coupled* with all the benefits of partial reads and
12093 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12094 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12096 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12100 /* We have the first half of a surrogate. Read more. */
12101 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12104 status = FILTER_READ(idx + 1, utf16_buffer,
12105 160 + (SvCUR(utf16_buffer) & 1));
12106 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12107 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12110 IoPAGE(filter) = status;
12115 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12116 * require 4 bytes per char */
12117 chars = SvCUR(utf16_buffer) >> 1;
12118 have = SvCUR(utf8_buffer);
12120 /* Assume the worst case size as noted by the functions: twice the
12121 * number of input bytes */
12122 SvGROW(utf8_buffer, have + chars * 4 + 1);
12125 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12126 (U8*)SvPVX_const(utf8_buffer) + have,
12127 chars * 2, &newlen);
12129 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12130 (U8*)SvPVX_const(utf8_buffer) + have,
12131 chars * 2, &newlen);
12133 SvCUR_set(utf8_buffer, have + newlen);
12136 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12137 it's private to us, and utf16_to_utf8{,reversed} take a
12138 (pointer,length) pair, rather than a NUL-terminated string. */
12139 if(SvCUR(utf16_buffer) & 1) {
12140 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12141 SvCUR_set(utf16_buffer, 1);
12143 SvCUR_set(utf16_buffer, 0);
12146 DEBUG_P(PerlIO_printf(Perl_debug_log,
12147 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12149 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12150 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12155 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12157 SV *filter = filter_add(S_utf16_textfilter, NULL);
12159 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12161 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12163 IoLINES(filter) = reversed;
12164 IoPAGE(filter) = 1; /* Not EOF */
12166 /* Sadly, we have to return a valid pointer, come what may, so we have to
12167 ignore any error return from this. */
12168 SvCUR_set(PL_linestr, 0);
12169 if (FILTER_READ(0, PL_linestr, 0)) {
12170 SvUTF8_on(PL_linestr);
12172 SvUTF8_on(PL_linestr);
12174 PL_bufend = SvEND(PL_linestr);
12175 return (U8*)SvPVX(PL_linestr);
12180 Returns a pointer to the next character after the parsed
12181 vstring, as well as updating the passed in sv.
12183 Function must be called like
12185 sv = sv_2mortal(newSV(5));
12186 s = scan_vstring(s,e,sv);
12188 where s and e are the start and end of the string.
12189 The sv should already be large enough to store the vstring
12190 passed in, for performance reasons.
12192 This function may croak if fatal warnings are enabled in the
12193 calling scope, hence the sv_2mortal in the example (to prevent
12194 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12200 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12202 const char *pos = s;
12203 const char *start = s;
12205 PERL_ARGS_ASSERT_SCAN_VSTRING;
12207 if (*pos == 'v') pos++; /* get past 'v' */
12208 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12210 if ( *pos != '.') {
12211 /* this may not be a v-string if followed by => */
12212 const char *next = pos;
12213 while (next < e && isSPACE(*next))
12215 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12216 /* return string not v-string */
12217 sv_setpvn(sv,(char *)s,pos-s);
12218 return (char *)pos;
12222 if (!isALPHA(*pos)) {
12223 U8 tmpbuf[UTF8_MAXBYTES+1];
12226 s++; /* get past 'v' */
12231 /* this is atoi() that tolerates underscores */
12234 const char *end = pos;
12236 while (--end >= s) {
12238 const UV orev = rev;
12239 rev += (*end - '0') * mult;
12242 /* diag_listed_as: Integer overflow in %s number */
12243 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12244 "Integer overflow in decimal number");
12248 /* Append native character for the rev point */
12249 tmpend = uvchr_to_utf8(tmpbuf, rev);
12250 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12251 if (!UVCHR_IS_INVARIANT(rev))
12253 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12259 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12263 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12270 Perl_keyword_plugin_standard(pTHX_
12271 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12273 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12274 PERL_UNUSED_CONTEXT;
12275 PERL_UNUSED_ARG(keyword_ptr);
12276 PERL_UNUSED_ARG(keyword_len);
12277 PERL_UNUSED_ARG(op_ptr);
12278 return KEYWORD_PLUGIN_DECLINE;
12282 =for apidoc wrap_keyword_plugin
12284 Puts a C function into the chain of keyword plugins. This is the
12285 preferred way to manipulate the L</PL_keyword_plugin> variable.
12286 C<new_plugin> is a pointer to the C function that is to be added to the
12287 keyword plugin chain, and C<old_plugin_p> points to the storage location
12288 where a pointer to the next function in the chain will be stored. The
12289 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12290 while the value previously stored there is written to C<*old_plugin_p>.
12292 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12293 to hook keyword parsing may find itself invoked more than once per
12294 process, typically in different threads. To handle that situation, this
12295 function is idempotent. The location C<*old_plugin_p> must initially
12296 (once per process) contain a null pointer. A C variable of static
12297 duration (declared at file scope, typically also marked C<static> to give
12298 it internal linkage) will be implicitly initialised appropriately, if it
12299 does not have an explicit initialiser. This function will only actually
12300 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12301 function is also thread safe on the small scale. It uses appropriate
12302 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12304 When this function is called, the function referenced by C<new_plugin>
12305 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12306 In a threading situation, C<new_plugin> may be called immediately, even
12307 before this function has returned. C<*old_plugin_p> will always be
12308 appropriately set before C<new_plugin> is called. If C<new_plugin>
12309 decides not to do anything special with the identifier that it is given
12310 (which is the usual case for most calls to a keyword plugin), it must
12311 chain the plugin function referenced by C<*old_plugin_p>.
12313 Taken all together, XS code to install a keyword plugin should typically
12314 look something like this:
12316 static Perl_keyword_plugin_t next_keyword_plugin;
12317 static OP *my_keyword_plugin(pTHX_
12318 char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
12320 if (memEQs(keyword_ptr, keyword_len,
12321 "my_new_keyword")) {
12324 return next_keyword_plugin(aTHX_
12325 keyword_ptr, keyword_len, op_ptr);
12329 wrap_keyword_plugin(my_keyword_plugin,
12330 &next_keyword_plugin);
12332 Direct access to L</PL_keyword_plugin> should be avoided.
12338 Perl_wrap_keyword_plugin(pTHX_
12339 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12343 PERL_UNUSED_CONTEXT;
12344 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12345 if (*old_plugin_p) return;
12346 KEYWORD_PLUGIN_MUTEX_LOCK;
12347 if (!*old_plugin_p) {
12348 *old_plugin_p = PL_keyword_plugin;
12349 PL_keyword_plugin = new_plugin;
12351 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12354 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12356 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12358 SAVEI32(PL_lex_brackets);
12359 if (PL_lex_brackets > 100)
12360 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12361 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12362 SAVEI32(PL_lex_allbrackets);
12363 PL_lex_allbrackets = 0;
12364 SAVEI8(PL_lex_fakeeof);
12365 PL_lex_fakeeof = (U8)fakeeof;
12366 if(yyparse(gramtype) && !PL_parser->error_count)
12367 qerror(Perl_mess(aTHX_ "Parse error"));
12370 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12372 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12376 SAVEVPTR(PL_eval_root);
12377 PL_eval_root = NULL;
12378 parse_recdescent(gramtype, fakeeof);
12384 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12386 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12389 if (flags & ~PARSE_OPTIONAL)
12390 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12391 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12392 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12393 if (!PL_parser->error_count)
12394 qerror(Perl_mess(aTHX_ "Parse error"));
12395 exprop = newOP(OP_NULL, 0);
12401 =for apidoc parse_arithexpr
12403 Parse a Perl arithmetic expression. This may contain operators of precedence
12404 down to the bit shift operators. The expression must be followed (and thus
12405 terminated) either by a comparison or lower-precedence operator or by
12406 something that would normally terminate an expression such as semicolon.
12407 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12408 otherwise it is mandatory. It is up to the caller to ensure that the
12409 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12410 the source of the code to be parsed and the lexical context for the
12413 The op tree representing the expression is returned. If an optional
12414 expression is absent, a null pointer is returned, otherwise the pointer
12417 If an error occurs in parsing or compilation, in most cases a valid op
12418 tree is returned anyway. The error is reflected in the parser state,
12419 normally resulting in a single exception at the top level of parsing
12420 which covers all the compilation errors that occurred. Some compilation
12421 errors, however, will throw an exception immediately.
12427 Perl_parse_arithexpr(pTHX_ U32 flags)
12429 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12433 =for apidoc parse_termexpr
12435 Parse a Perl term expression. This may contain operators of precedence
12436 down to the assignment operators. The expression must be followed (and thus
12437 terminated) either by a comma or lower-precedence operator or by
12438 something that would normally terminate an expression such as semicolon.
12439 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12440 otherwise it is mandatory. It is up to the caller to ensure that the
12441 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12442 the source of the code to be parsed and the lexical context for the
12445 The op tree representing the expression is returned. If an optional
12446 expression is absent, a null pointer is returned, otherwise the pointer
12449 If an error occurs in parsing or compilation, in most cases a valid op
12450 tree is returned anyway. The error is reflected in the parser state,
12451 normally resulting in a single exception at the top level of parsing
12452 which covers all the compilation errors that occurred. Some compilation
12453 errors, however, will throw an exception immediately.
12459 Perl_parse_termexpr(pTHX_ U32 flags)
12461 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12465 =for apidoc parse_listexpr
12467 Parse a Perl list expression. This may contain operators of precedence
12468 down to the comma operator. The expression must be followed (and thus
12469 terminated) either by a low-precedence logic operator such as C<or> or by
12470 something that would normally terminate an expression such as semicolon.
12471 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12472 otherwise it is mandatory. It is up to the caller to ensure that the
12473 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12474 the source of the code to be parsed and the lexical context for the
12477 The op tree representing the expression is returned. If an optional
12478 expression is absent, a null pointer is returned, otherwise the pointer
12481 If an error occurs in parsing or compilation, in most cases a valid op
12482 tree is returned anyway. The error is reflected in the parser state,
12483 normally resulting in a single exception at the top level of parsing
12484 which covers all the compilation errors that occurred. Some compilation
12485 errors, however, will throw an exception immediately.
12491 Perl_parse_listexpr(pTHX_ U32 flags)
12493 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12497 =for apidoc parse_fullexpr
12499 Parse a single complete Perl expression. This allows the full
12500 expression grammar, including the lowest-precedence operators such
12501 as C<or>. The expression must be followed (and thus terminated) by a
12502 token that an expression would normally be terminated by: end-of-file,
12503 closing bracketing punctuation, semicolon, or one of the keywords that
12504 signals a postfix expression-statement modifier. If C<flags> has the
12505 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12506 mandatory. It is up to the caller to ensure that the dynamic parser
12507 state (L</PL_parser> et al) is correctly set to reflect the source of
12508 the code to be parsed and the lexical context for the expression.
12510 The op tree representing the expression is returned. If an optional
12511 expression is absent, a null pointer is returned, otherwise the pointer
12514 If an error occurs in parsing or compilation, in most cases a valid op
12515 tree is returned anyway. The error is reflected in the parser state,
12516 normally resulting in a single exception at the top level of parsing
12517 which covers all the compilation errors that occurred. Some compilation
12518 errors, however, will throw an exception immediately.
12524 Perl_parse_fullexpr(pTHX_ U32 flags)
12526 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12530 =for apidoc parse_block
12532 Parse a single complete Perl code block. This consists of an opening
12533 brace, a sequence of statements, and a closing brace. The block
12534 constitutes a lexical scope, so C<my> variables and various compile-time
12535 effects can be contained within it. It is up to the caller to ensure
12536 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12537 reflect the source of the code to be parsed and the lexical context for
12540 The op tree representing the code block is returned. This is always a
12541 real op, never a null pointer. It will normally be a C<lineseq> list,
12542 including C<nextstate> or equivalent ops. No ops to construct any kind
12543 of runtime scope are included by virtue of it being a block.
12545 If an error occurs in parsing or compilation, in most cases a valid op
12546 tree (most likely null) is returned anyway. The error is reflected in
12547 the parser state, normally resulting in a single exception at the top
12548 level of parsing which covers all the compilation errors that occurred.
12549 Some compilation errors, however, will throw an exception immediately.
12551 The C<flags> parameter is reserved for future use, and must always
12558 Perl_parse_block(pTHX_ U32 flags)
12561 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12562 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12566 =for apidoc parse_barestmt
12568 Parse a single unadorned Perl statement. This may be a normal imperative
12569 statement or a declaration that has compile-time effect. It does not
12570 include any label or other affixture. It is up to the caller to ensure
12571 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12572 reflect the source of the code to be parsed and the lexical context for
12575 The op tree representing the statement is returned. This may be a
12576 null pointer if the statement is null, for example if it was actually
12577 a subroutine definition (which has compile-time side effects). If not
12578 null, it will be ops directly implementing the statement, suitable to
12579 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12580 equivalent op (except for those embedded in a scope contained entirely
12581 within the statement).
12583 If an error occurs in parsing or compilation, in most cases a valid op
12584 tree (most likely null) is returned anyway. The error is reflected in
12585 the parser state, normally resulting in a single exception at the top
12586 level of parsing which covers all the compilation errors that occurred.
12587 Some compilation errors, however, will throw an exception immediately.
12589 The C<flags> parameter is reserved for future use, and must always
12596 Perl_parse_barestmt(pTHX_ U32 flags)
12599 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12600 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12604 =for apidoc parse_label
12606 Parse a single label, possibly optional, of the type that may prefix a
12607 Perl statement. It is up to the caller to ensure that the dynamic parser
12608 state (L</PL_parser> et al) is correctly set to reflect the source of
12609 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12610 label is optional, otherwise it is mandatory.
12612 The name of the label is returned in the form of a fresh scalar. If an
12613 optional label is absent, a null pointer is returned.
12615 If an error occurs in parsing, which can only occur if the label is
12616 mandatory, a valid label is returned anyway. The error is reflected in
12617 the parser state, normally resulting in a single exception at the top
12618 level of parsing which covers all the compilation errors that occurred.
12624 Perl_parse_label(pTHX_ U32 flags)
12626 if (flags & ~PARSE_OPTIONAL)
12627 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12629 PL_parser->yychar = yylex();
12630 if (PL_parser->yychar == LABEL) {
12631 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
12632 PL_parser->yychar = YYEMPTY;
12633 cSVOPx(pl_yylval.opval)->op_sv = NULL;
12634 op_free(pl_yylval.opval);
12642 STRLEN wlen, bufptr_pos;
12645 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12647 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12648 if (word_takes_any_delimiter(s, wlen))
12650 bufptr_pos = s - SvPVX(PL_linestr);
12652 lex_read_space(LEX_KEEP_PREVIOUS);
12654 s = SvPVX(PL_linestr) + bufptr_pos;
12655 if (t[0] == ':' && t[1] != ':') {
12656 PL_oldoldbufptr = PL_oldbufptr;
12659 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12663 if (flags & PARSE_OPTIONAL) {
12666 qerror(Perl_mess(aTHX_ "Parse error"));
12667 return newSVpvs("x");
12674 =for apidoc parse_fullstmt
12676 Parse a single complete Perl statement. This may be a normal imperative
12677 statement or a declaration that has compile-time effect, and may include
12678 optional labels. It is up to the caller to ensure that the dynamic
12679 parser state (L</PL_parser> et al) is correctly set to reflect the source
12680 of the code to be parsed and the lexical context for the statement.
12682 The op tree representing the statement is returned. This may be a
12683 null pointer if the statement is null, for example if it was actually
12684 a subroutine definition (which has compile-time side effects). If not
12685 null, it will be the result of a L</newSTATEOP> call, normally including
12686 a C<nextstate> or equivalent op.
12688 If an error occurs in parsing or compilation, in most cases a valid op
12689 tree (most likely null) is returned anyway. The error is reflected in
12690 the parser state, normally resulting in a single exception at the top
12691 level of parsing which covers all the compilation errors that occurred.
12692 Some compilation errors, however, will throw an exception immediately.
12694 The C<flags> parameter is reserved for future use, and must always
12701 Perl_parse_fullstmt(pTHX_ U32 flags)
12704 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12705 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12709 =for apidoc parse_stmtseq
12711 Parse a sequence of zero or more Perl statements. These may be normal
12712 imperative statements, including optional labels, or declarations
12713 that have compile-time effect, or any mixture thereof. The statement
12714 sequence ends when a closing brace or end-of-file is encountered in a
12715 place where a new statement could have validly started. It is up to
12716 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12717 is correctly set to reflect the source of the code to be parsed and the
12718 lexical context for the statements.
12720 The op tree representing the statement sequence is returned. This may
12721 be a null pointer if the statements were all null, for example if there
12722 were no statements or if there were only subroutine definitions (which
12723 have compile-time side effects). If not null, it will be a C<lineseq>
12724 list, normally including C<nextstate> or equivalent ops.
12726 If an error occurs in parsing or compilation, in most cases a valid op
12727 tree is returned anyway. The error is reflected in the parser state,
12728 normally resulting in a single exception at the top level of parsing
12729 which covers all the compilation errors that occurred. Some compilation
12730 errors, however, will throw an exception immediately.
12732 The C<flags> parameter is reserved for future use, and must always
12739 Perl_parse_stmtseq(pTHX_ U32 flags)
12744 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12745 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12746 c = lex_peek_unichar(0);
12747 if (c != -1 && c != /*{*/'}')
12748 qerror(Perl_mess(aTHX_ "Parse error"));
12753 * ex: set ts=8 sts=4 sw=4 et: