3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
185 #include "keywords.h"
187 /* CLINE is a macro that ensures PL_copline has a sane value */
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
192 # define SKIPSPACE0(s) skipspace0(s)
193 # define SKIPSPACE1(s) skipspace1(s)
194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195 # define PEEKSPACE(s) skipspace2(s,0)
197 # define SKIPSPACE0(s) skipspace(s)
198 # define SKIPSPACE1(s) skipspace(s)
199 # define SKIPSPACE2(s,tsv) skipspace(s)
200 # define PEEKSPACE(s) skipspace(s)
204 * Convenience functions to return different tokens and prime the
205 * lexer for the next token. They all take an argument.
207 * TOKEN : generic token (used for '(', DOLSHARP, etc)
208 * OPERATOR : generic operator
209 * AOPERATOR : assignment operator
210 * PREBLOCK : beginning the block after an if, while, foreach, ...
211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212 * PREREF : *EXPR where EXPR is not a simple identifier
213 * TERM : expression term
214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
215 * LOOPX : loop exiting command (goto, last, dump, etc)
216 * FTST : file test operator
217 * FUN0 : zero-argument function
218 * FUN0OP : zero-argument function, with its op created in this file
219 * FUN1 : not used, except for not, which isn't a UNIOP
220 * BOop : bitwise or or xor
222 * SHop : shift operator
223 * PWop : power operator
224 * PMop : pattern-matching operator
225 * Aop : addition-level operator
226 * Mop : multiplication-level operator
227 * Eop : equality-testing operator
228 * Rop : relational operator <= != gt
230 * Also see LOP and lop() below.
233 #ifdef DEBUGGING /* Serve -DT. */
234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
236 # define REPORT(retval) (retval)
239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
262 /* This bit of chicanery makes a unary function followed by
263 * a parenthesis into a function with one argument, highest precedence.
264 * The UNIDOR macro is for unary functions that can be followed by the //
265 * operator (such as C<shift // 0>).
267 #define UNI3(f,x,have_x) { \
268 pl_yylval.ival = f; \
269 if (have_x) PL_expect = x; \
271 PL_last_uni = PL_oldbufptr; \
272 PL_last_lop_op = f; \
274 return REPORT( (int)FUNC1 ); \
276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 #define UNI(f) UNI3(f,XTERM,1)
279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
280 #define UNIPROTO(f,optional) { \
281 if (optional) PL_last_uni = PL_oldbufptr; \
285 #define UNIBRACK(f) UNI3(f,0,0)
287 /* grandfather return to old style */
290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
292 pl_yylval.ival = (f); \
298 #define COPLINE_INC_WITH_HERELINES \
300 CopLINE_inc(PL_curcop); \
301 if (PL_parser->herelines) \
302 CopLINE(PL_curcop) += PL_parser->herelines, \
303 PL_parser->herelines = 0; \
305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
306 * is no sublex_push to follow. */
307 #define COPLINE_SET_FROM_MULTI_END \
309 CopLINE_set(PL_curcop, PL_multi_end); \
310 if (PL_multi_end != PL_multi_start) \
311 PL_parser->herelines = 0; \
317 /* how to interpret the pl_yylval associated with the token */
321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 static struct debug_tokens {
328 enum token_type type;
330 } const debug_tokens[] =
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
355 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
363 { IF, TOKENTYPE_IVAL, "IF" },
364 { LABEL, TOKENTYPE_PVAL, "LABEL" },
365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
370 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
371 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
372 { MY, TOKENTYPE_IVAL, "MY" },
373 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
374 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
375 { OROP, TOKENTYPE_IVAL, "OROP" },
376 { OROR, TOKENTYPE_NONE, "OROR" },
377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
378 { PEG, TOKENTYPE_NONE, "PEG" },
379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
384 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
385 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
386 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
387 { PREINC, TOKENTYPE_NONE, "PREINC" },
388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
390 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
391 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
394 { SUB, TOKENTYPE_NONE, "SUB" },
395 { THING, TOKENTYPE_OPVAL, "THING" },
396 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
401 { USE, TOKENTYPE_IVAL, "USE" },
402 { WHEN, TOKENTYPE_IVAL, "WHEN" },
403 { WHILE, TOKENTYPE_IVAL, "WHILE" },
404 { WORD, TOKENTYPE_OPVAL, "WORD" },
405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
406 { 0, TOKENTYPE_NONE, NULL }
409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
416 PERL_ARGS_ASSERT_TOKEREPORT;
419 const char *name = NULL;
420 enum token_type type = TOKENTYPE_NONE;
421 const struct debug_tokens *p;
422 SV* const report = newSVpvs("<== ");
424 for (p = debug_tokens; p->token; p++) {
425 if (p->token == (int)rv) {
432 Perl_sv_catpv(aTHX_ report, name);
433 else if ((char)rv > ' ' && (char)rv <= '~')
435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
437 sv_catpvs(report, " (pending identifier)");
440 sv_catpvs(report, "EOF");
442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
449 case TOKENTYPE_OPNUM:
450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
451 PL_op_name[lvalp->ival]);
454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
456 case TOKENTYPE_OPVAL:
458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
459 PL_op_name[lvalp->opval->op_type]);
460 if (lvalp->opval->op_type == OP_CONST) {
461 Perl_sv_catpvf(aTHX_ report, " %s",
462 SvPEEK(cSVOPx_sv(lvalp->opval)));
467 sv_catpvs(report, "(opval=null)");
470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
476 /* print the buffer with suitable escapes */
479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
481 SV* const tmp = newSVpvs("");
483 PERL_ARGS_ASSERT_PRINTBUF;
485 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
492 S_deprecate_commaless_var_list(pTHX) {
494 deprecate("comma-less variable list");
495 return REPORT(','); /* grandfather non-comma-format format */
501 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
502 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
506 S_ao(pTHX_ int toketype)
509 if (*PL_bufptr == '=') {
511 if (toketype == ANDAND)
512 pl_yylval.ival = OP_ANDASSIGN;
513 else if (toketype == OROR)
514 pl_yylval.ival = OP_ORASSIGN;
515 else if (toketype == DORDOR)
516 pl_yylval.ival = OP_DORASSIGN;
524 * When Perl expects an operator and finds something else, no_op
525 * prints the warning. It always prints "<something> found where
526 * operator expected. It prints "Missing semicolon on previous line?"
527 * if the surprise occurs at the start of the line. "do you need to
528 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
529 * where the compiler doesn't know if foo is a method call or a function.
530 * It prints "Missing operator before end of line" if there's nothing
531 * after the missing operator, or "... before <...>" if there is something
532 * after the missing operator.
536 S_no_op(pTHX_ const char *const what, char *s)
539 char * const oldbp = PL_bufptr;
540 const bool is_first = (PL_oldbufptr == PL_linestart);
542 PERL_ARGS_ASSERT_NO_OP;
548 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
549 if (ckWARN_d(WARN_SYNTAX)) {
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
552 "\t(Missing semicolon on previous line?)\n");
553 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
555 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
556 t += UTF ? UTF8SKIP(t) : 1)
558 if (t < PL_bufptr && isSPACE(*t))
559 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
560 "\t(Do you need to predeclare %"UTF8f"?)\n",
561 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
565 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
566 "\t(Missing operator before %"UTF8f"?)\n",
567 UTF8fARG(UTF, s - oldbp, oldbp));
575 * Complain about missing quote/regexp/heredoc terminator.
576 * If it's called with NULL then it cauterizes the line buffer.
577 * If we're in a delimited string and the delimiter is a control
578 * character, it's reformatted into a two-char sequence like ^C.
583 S_missingterm(pTHX_ char *s)
589 char * const nl = strrchr(s,'\n');
593 else if ((U8) PL_multi_close < 32) {
595 tmpbuf[1] = (char)toCTRL(PL_multi_close);
600 *tmpbuf = (char)PL_multi_close;
604 q = strchr(s,'"') ? '\'' : '"';
605 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
611 * Check whether the named feature is enabled.
614 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
617 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
619 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
621 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
623 if (namelen > MAX_FEATURE_LEN)
625 memcpy(&he_name[8], name, namelen);
627 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
628 REFCOUNTED_HE_EXISTS));
632 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
633 * utf16-to-utf8-reversed.
636 #ifdef PERL_CR_FILTER
640 const char *s = SvPVX_const(sv);
641 const char * const e = s + SvCUR(sv);
643 PERL_ARGS_ASSERT_STRIP_RETURN;
645 /* outer loop optimized to do nothing if there are no CR-LFs */
647 if (*s++ == '\r' && *s == '\n') {
648 /* hit a CR-LF, need to copy the rest */
652 if (*s == '\r' && s[1] == '\n')
663 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
665 const I32 count = FILTER_READ(idx+1, sv, maxlen);
666 if (count > 0 && !maxlen)
673 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
675 Creates and initialises a new lexer/parser state object, supplying
676 a context in which to lex and parse from a new source of Perl code.
677 A pointer to the new state object is placed in L</PL_parser>. An entry
678 is made on the save stack so that upon unwinding the new state object
679 will be destroyed and the former value of L</PL_parser> will be restored.
680 Nothing else need be done to clean up the parsing context.
682 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
683 non-null, provides a string (in SV form) containing code to be parsed.
684 A copy of the string is made, so subsequent modification of I<line>
685 does not affect parsing. I<rsfp>, if non-null, provides an input stream
686 from which code will be read to be parsed. If both are non-null, the
687 code in I<line> comes first and must consist of complete lines of input,
688 and I<rsfp> supplies the remainder of the source.
690 The I<flags> parameter is reserved for future use. Currently it is only
691 used by perl internally, so extensions should always pass zero.
696 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
697 can share filters with the current parser.
698 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
699 caller, hence isn't owned by the parser, so shouldn't be closed on parser
700 destruction. This is used to handle the case of defaulting to reading the
701 script from the standard input because no filename was given on the command
702 line (without getting confused by situation where STDIN has been closed, so
703 the script handle is opened on fd 0) */
706 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
709 const char *s = NULL;
710 yy_parser *parser, *oparser;
711 if (flags && flags & ~LEX_START_FLAGS)
712 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
714 /* create and initialise a parser */
716 Newxz(parser, 1, yy_parser);
717 parser->old_parser = oparser = PL_parser;
720 parser->stack = NULL;
722 parser->stack_size = 0;
724 /* on scope exit, free this parser and restore any outer one */
726 parser->saved_curcop = PL_curcop;
728 /* initialise lexer state */
731 parser->curforce = -1;
733 parser->nexttoke = 0;
735 parser->error_count = oparser ? oparser->error_count : 0;
736 parser->copline = parser->preambling = NOLINE;
737 parser->lex_state = LEX_NORMAL;
738 parser->expect = XSTATE;
740 parser->rsfp_filters =
741 !(flags & LEX_START_SAME_FILTER) || !oparser
743 : MUTABLE_AV(SvREFCNT_inc(
744 oparser->rsfp_filters
745 ? oparser->rsfp_filters
746 : (oparser->rsfp_filters = newAV())
749 Newx(parser->lex_brackstack, 120, char);
750 Newx(parser->lex_casestack, 12, char);
751 *parser->lex_casestack = '\0';
752 Newxz(parser->lex_shared, 1, LEXSHARED);
756 s = SvPV_const(line, len);
757 parser->linestr = flags & LEX_START_COPIED
758 ? SvREFCNT_inc_simple_NN(line)
759 : newSVpvn_flags(s, len, SvUTF8(line));
760 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
762 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
764 parser->oldoldbufptr =
767 parser->linestart = SvPVX(parser->linestr);
768 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
769 parser->last_lop = parser->last_uni = NULL;
771 assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
772 |LEX_DONT_CLOSE_RSFP));
773 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
774 |LEX_DONT_CLOSE_RSFP));
776 parser->in_pod = parser->filtered = 0;
780 /* delete a parser object */
783 Perl_parser_free(pTHX_ const yy_parser *parser)
785 PERL_ARGS_ASSERT_PARSER_FREE;
787 PL_curcop = parser->saved_curcop;
788 SvREFCNT_dec(parser->linestr);
790 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
791 PerlIO_clearerr(parser->rsfp);
792 else if (parser->rsfp && (!parser->old_parser ||
793 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
794 PerlIO_close(parser->rsfp);
795 SvREFCNT_dec(parser->rsfp_filters);
796 SvREFCNT_dec(parser->lex_stuff);
797 SvREFCNT_dec(parser->sublex_info.repl);
799 Safefree(parser->lex_brackstack);
800 Safefree(parser->lex_casestack);
801 Safefree(parser->lex_shared);
802 PL_parser = parser->old_parser;
807 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
810 I32 nexttoke = parser->lasttoke;
812 I32 nexttoke = parser->nexttoke;
814 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
817 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
819 && parser->nexttoke[nexttoke].next_val.opval
820 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
821 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
822 op_free(parser->nexttoke[nexttoke].next_val.opval);
823 parser->nexttoke[nexttoke].next_val.opval = NULL;
826 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
827 && parser->nextval[nexttoke].opval
828 && parser->nextval[nexttoke].opval->op_slabbed
829 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
830 op_free(parser->nextval[nexttoke].opval);
831 parser->nextval[nexttoke].opval = NULL;
839 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
841 Buffer scalar containing the chunk currently under consideration of the
842 text currently being lexed. This is always a plain string scalar (for
843 which C<SvPOK> is true). It is not intended to be used as a scalar by
844 normal scalar means; instead refer to the buffer directly by the pointer
845 variables described below.
847 The lexer maintains various C<char*> pointers to things in the
848 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
849 reallocated, all of these pointers must be updated. Don't attempt to
850 do this manually, but rather use L</lex_grow_linestr> if you need to
851 reallocate the buffer.
853 The content of the text chunk in the buffer is commonly exactly one
854 complete line of input, up to and including a newline terminator,
855 but there are situations where it is otherwise. The octets of the
856 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
857 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
858 flag on this scalar, which may disagree with it.
860 For direct examination of the buffer, the variable
861 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
862 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
863 of these pointers is usually preferable to examination of the scalar
864 through normal scalar means.
866 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
868 Direct pointer to the end of the chunk of text currently being lexed, the
869 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
870 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
871 always located at the end of the buffer, and does not count as part of
872 the buffer's contents.
874 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
876 Points to the current position of lexing inside the lexer buffer.
877 Characters around this point may be freely examined, within
878 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
879 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
880 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
882 Lexing code (whether in the Perl core or not) moves this pointer past
883 the characters that it consumes. It is also expected to perform some
884 bookkeeping whenever a newline character is consumed. This movement
885 can be more conveniently performed by the function L</lex_read_to>,
886 which handles newlines appropriately.
888 Interpretation of the buffer's octets can be abstracted out by
889 using the slightly higher-level functions L</lex_peek_unichar> and
890 L</lex_read_unichar>.
892 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
894 Points to the start of the current line inside the lexer buffer.
895 This is useful for indicating at which column an error occurred, and
896 not much else. This must be updated by any lexing code that consumes
897 a newline; the function L</lex_read_to> handles this detail.
903 =for apidoc Amx|bool|lex_bufutf8
905 Indicates whether the octets in the lexer buffer
906 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
907 of Unicode characters. If not, they should be interpreted as Latin-1
908 characters. This is analogous to the C<SvUTF8> flag for scalars.
910 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
911 contains valid UTF-8. Lexing code must be robust in the face of invalid
914 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
915 is significant, but not the whole story regarding the input character
916 encoding. Normally, when a file is being read, the scalar contains octets
917 and its C<SvUTF8> flag is off, but the octets should be interpreted as
918 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
919 however, the scalar may have the C<SvUTF8> flag on, and in this case its
920 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
921 is in effect. This logic may change in the future; use this function
922 instead of implementing the logic yourself.
928 Perl_lex_bufutf8(pTHX)
934 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
936 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
937 at least I<len> octets (including terminating NUL). Returns a
938 pointer to the reallocated buffer. This is necessary before making
939 any direct modification of the buffer that would increase its length.
940 L</lex_stuff_pvn> provides a more convenient way to insert text into
943 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
944 this function updates all of the lexer's variables that point directly
951 Perl_lex_grow_linestr(pTHX_ STRLEN len)
955 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
956 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
957 linestr = PL_parser->linestr;
958 buf = SvPVX(linestr);
959 if (len <= SvLEN(linestr))
961 bufend_pos = PL_parser->bufend - buf;
962 bufptr_pos = PL_parser->bufptr - buf;
963 oldbufptr_pos = PL_parser->oldbufptr - buf;
964 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
965 linestart_pos = PL_parser->linestart - buf;
966 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
967 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
968 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
969 PL_parser->lex_shared->re_eval_start - buf : 0;
971 buf = sv_grow(linestr, len);
973 PL_parser->bufend = buf + bufend_pos;
974 PL_parser->bufptr = buf + bufptr_pos;
975 PL_parser->oldbufptr = buf + oldbufptr_pos;
976 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
977 PL_parser->linestart = buf + linestart_pos;
978 if (PL_parser->last_uni)
979 PL_parser->last_uni = buf + last_uni_pos;
980 if (PL_parser->last_lop)
981 PL_parser->last_lop = buf + last_lop_pos;
982 if (PL_parser->lex_shared->re_eval_start)
983 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
988 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
990 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
991 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
992 reallocating the buffer if necessary. This means that lexing code that
993 runs later will see the characters as if they had appeared in the input.
994 It is not recommended to do this as part of normal parsing, and most
995 uses of this facility run the risk of the inserted characters being
996 interpreted in an unintended manner.
998 The string to be inserted is represented by I<len> octets starting
999 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1000 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1001 The characters are recoded for the lexer buffer, according to how the
1002 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1003 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1004 function is more convenient.
1010 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1014 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1015 if (flags & ~(LEX_STUFF_UTF8))
1016 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1018 if (flags & LEX_STUFF_UTF8) {
1021 STRLEN highhalf = 0; /* Count of variants */
1022 const char *p, *e = pv+len;
1023 for (p = pv; p != e; p++) {
1024 if (! UTF8_IS_INVARIANT(*p)) {
1030 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1031 bufptr = PL_parser->bufptr;
1032 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1033 SvCUR_set(PL_parser->linestr,
1034 SvCUR(PL_parser->linestr) + len+highhalf);
1035 PL_parser->bufend += len+highhalf;
1036 for (p = pv; p != e; p++) {
1038 if (! UTF8_IS_INVARIANT(c)) {
1039 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1040 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1042 *bufptr++ = (char)c;
1047 if (flags & LEX_STUFF_UTF8) {
1048 STRLEN highhalf = 0;
1049 const char *p, *e = pv+len;
1050 for (p = pv; p != e; p++) {
1052 if (UTF8_IS_ABOVE_LATIN1(c)) {
1053 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1054 "non-Latin-1 character into Latin-1 input");
1055 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1058 } else if (! UTF8_IS_INVARIANT(c)) {
1059 /* malformed UTF-8 */
1061 SAVESPTR(PL_warnhook);
1062 PL_warnhook = PERL_WARNHOOK_FATAL;
1063 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1069 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1070 bufptr = PL_parser->bufptr;
1071 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1072 SvCUR_set(PL_parser->linestr,
1073 SvCUR(PL_parser->linestr) + len-highhalf);
1074 PL_parser->bufend += len-highhalf;
1077 if (UTF8_IS_INVARIANT(*p)) {
1083 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1089 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1090 bufptr = PL_parser->bufptr;
1091 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1092 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1093 PL_parser->bufend += len;
1094 Copy(pv, bufptr, len, char);
1100 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1102 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1103 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1104 reallocating the buffer if necessary. This means that lexing code that
1105 runs later will see the characters as if they had appeared in the input.
1106 It is not recommended to do this as part of normal parsing, and most
1107 uses of this facility run the risk of the inserted characters being
1108 interpreted in an unintended manner.
1110 The string to be inserted is represented by octets starting at I<pv>
1111 and continuing to the first nul. These octets are interpreted as either
1112 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1113 in I<flags>. The characters are recoded for the lexer buffer, according
1114 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1115 If it is not convenient to nul-terminate a string to be inserted, the
1116 L</lex_stuff_pvn> function is more appropriate.
1122 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1124 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1125 lex_stuff_pvn(pv, strlen(pv), flags);
1129 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1131 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1132 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1133 reallocating the buffer if necessary. This means that lexing code that
1134 runs later will see the characters as if they had appeared in the input.
1135 It is not recommended to do this as part of normal parsing, and most
1136 uses of this facility run the risk of the inserted characters being
1137 interpreted in an unintended manner.
1139 The string to be inserted is the string value of I<sv>. The characters
1140 are recoded for the lexer buffer, according to how the buffer is currently
1141 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1142 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1143 need to construct a scalar.
1149 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1153 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1155 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1157 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1161 =for apidoc Amx|void|lex_unstuff|char *ptr
1163 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1164 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1165 This hides the discarded text from any lexing code that runs later,
1166 as if the text had never appeared.
1168 This is not the normal way to consume lexed text. For that, use
1175 Perl_lex_unstuff(pTHX_ char *ptr)
1179 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1180 buf = PL_parser->bufptr;
1182 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1185 bufend = PL_parser->bufend;
1187 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1188 unstuff_len = ptr - buf;
1189 Move(ptr, buf, bufend+1-ptr, char);
1190 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1191 PL_parser->bufend = bufend - unstuff_len;
1195 =for apidoc Amx|void|lex_read_to|char *ptr
1197 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1198 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1199 performing the correct bookkeeping whenever a newline character is passed.
1200 This is the normal way to consume lexed text.
1202 Interpretation of the buffer's octets can be abstracted out by
1203 using the slightly higher-level functions L</lex_peek_unichar> and
1204 L</lex_read_unichar>.
1210 Perl_lex_read_to(pTHX_ char *ptr)
1213 PERL_ARGS_ASSERT_LEX_READ_TO;
1214 s = PL_parser->bufptr;
1215 if (ptr < s || ptr > PL_parser->bufend)
1216 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1217 for (; s != ptr; s++)
1219 COPLINE_INC_WITH_HERELINES;
1220 PL_parser->linestart = s+1;
1222 PL_parser->bufptr = ptr;
1226 =for apidoc Amx|void|lex_discard_to|char *ptr
1228 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1229 up to I<ptr>. The remaining content of the buffer will be moved, and
1230 all pointers into the buffer updated appropriately. I<ptr> must not
1231 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1232 it is not permitted to discard text that has yet to be lexed.
1234 Normally it is not necessarily to do this directly, because it suffices to
1235 use the implicit discarding behaviour of L</lex_next_chunk> and things
1236 based on it. However, if a token stretches across multiple lines,
1237 and the lexing code has kept multiple lines of text in the buffer for
1238 that purpose, then after completion of the token it would be wise to
1239 explicitly discard the now-unneeded earlier lines, to avoid future
1240 multi-line tokens growing the buffer without bound.
1246 Perl_lex_discard_to(pTHX_ char *ptr)
1250 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1251 buf = SvPVX(PL_parser->linestr);
1253 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1256 if (ptr > PL_parser->bufptr)
1257 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1258 discard_len = ptr - buf;
1259 if (PL_parser->oldbufptr < ptr)
1260 PL_parser->oldbufptr = ptr;
1261 if (PL_parser->oldoldbufptr < ptr)
1262 PL_parser->oldoldbufptr = ptr;
1263 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1264 PL_parser->last_uni = NULL;
1265 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1266 PL_parser->last_lop = NULL;
1267 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1268 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1269 PL_parser->bufend -= discard_len;
1270 PL_parser->bufptr -= discard_len;
1271 PL_parser->oldbufptr -= discard_len;
1272 PL_parser->oldoldbufptr -= discard_len;
1273 if (PL_parser->last_uni)
1274 PL_parser->last_uni -= discard_len;
1275 if (PL_parser->last_lop)
1276 PL_parser->last_lop -= discard_len;
1280 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1282 Reads in the next chunk of text to be lexed, appending it to
1283 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1284 looked to the end of the current chunk and wants to know more. It is
1285 usual, but not necessary, for lexing to have consumed the entirety of
1286 the current chunk at this time.
1288 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1289 chunk (i.e., the current chunk has been entirely consumed), normally the
1290 current chunk will be discarded at the same time that the new chunk is
1291 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1292 will not be discarded. If the current chunk has not been entirely
1293 consumed, then it will not be discarded regardless of the flag.
1295 Returns true if some new text was added to the buffer, or false if the
1296 buffer has reached the end of the input text.
1301 #define LEX_FAKE_EOF 0x80000000
1302 #define LEX_NO_TERM 0x40000000
1305 Perl_lex_next_chunk(pTHX_ U32 flags)
1309 STRLEN old_bufend_pos, new_bufend_pos;
1310 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1311 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1312 bool got_some_for_debugger = 0;
1314 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1315 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1316 linestr = PL_parser->linestr;
1317 buf = SvPVX(linestr);
1318 if (!(flags & LEX_KEEP_PREVIOUS) &&
1319 PL_parser->bufptr == PL_parser->bufend) {
1320 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1322 if (PL_parser->last_uni != PL_parser->bufend)
1323 PL_parser->last_uni = NULL;
1324 if (PL_parser->last_lop != PL_parser->bufend)
1325 PL_parser->last_lop = NULL;
1326 last_uni_pos = last_lop_pos = 0;
1330 old_bufend_pos = PL_parser->bufend - buf;
1331 bufptr_pos = PL_parser->bufptr - buf;
1332 oldbufptr_pos = PL_parser->oldbufptr - buf;
1333 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1334 linestart_pos = PL_parser->linestart - buf;
1335 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1336 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1338 if (flags & LEX_FAKE_EOF) {
1340 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1342 } else if (filter_gets(linestr, old_bufend_pos)) {
1344 got_some_for_debugger = 1;
1345 } else if (flags & LEX_NO_TERM) {
1348 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1349 sv_setpvs(linestr, "");
1351 /* End of real input. Close filehandle (unless it was STDIN),
1352 * then add implicit termination.
1354 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1355 PerlIO_clearerr(PL_parser->rsfp);
1356 else if (PL_parser->rsfp)
1357 (void)PerlIO_close(PL_parser->rsfp);
1358 PL_parser->rsfp = NULL;
1359 PL_parser->in_pod = PL_parser->filtered = 0;
1361 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1364 if (!PL_in_eval && PL_minus_p) {
1366 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1367 PL_minus_n = PL_minus_p = 0;
1368 } else if (!PL_in_eval && PL_minus_n) {
1369 sv_catpvs(linestr, /*{*/";}");
1372 sv_catpvs(linestr, ";");
1375 buf = SvPVX(linestr);
1376 new_bufend_pos = SvCUR(linestr);
1377 PL_parser->bufend = buf + new_bufend_pos;
1378 PL_parser->bufptr = buf + bufptr_pos;
1379 PL_parser->oldbufptr = buf + oldbufptr_pos;
1380 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1381 PL_parser->linestart = buf + linestart_pos;
1382 if (PL_parser->last_uni)
1383 PL_parser->last_uni = buf + last_uni_pos;
1384 if (PL_parser->last_lop)
1385 PL_parser->last_lop = buf + last_lop_pos;
1386 if (PL_parser->preambling != NOLINE) {
1387 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1388 PL_parser->preambling = NOLINE;
1390 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1391 PL_curstash != PL_debstash) {
1392 /* debugger active and we're not compiling the debugger code,
1393 * so store the line into the debugger's array of lines
1395 update_debugger_info(NULL, buf+old_bufend_pos,
1396 new_bufend_pos-old_bufend_pos);
1402 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1404 Looks ahead one (Unicode) character in the text currently being lexed.
1405 Returns the codepoint (unsigned integer value) of the next character,
1406 or -1 if lexing has reached the end of the input text. To consume the
1407 peeked character, use L</lex_read_unichar>.
1409 If the next character is in (or extends into) the next chunk of input
1410 text, the next chunk will be read in. Normally the current chunk will be
1411 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1412 then the current chunk will not be discarded.
1414 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1415 is encountered, an exception is generated.
1421 Perl_lex_peek_unichar(pTHX_ U32 flags)
1425 if (flags & ~(LEX_KEEP_PREVIOUS))
1426 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1427 s = PL_parser->bufptr;
1428 bufend = PL_parser->bufend;
1434 if (!lex_next_chunk(flags))
1436 s = PL_parser->bufptr;
1437 bufend = PL_parser->bufend;
1440 if (UTF8_IS_INVARIANT(head))
1442 if (UTF8_IS_START(head)) {
1443 len = UTF8SKIP(&head);
1444 while ((STRLEN)(bufend-s) < len) {
1445 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1447 s = PL_parser->bufptr;
1448 bufend = PL_parser->bufend;
1451 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1452 if (retlen == (STRLEN)-1) {
1453 /* malformed UTF-8 */
1455 SAVESPTR(PL_warnhook);
1456 PL_warnhook = PERL_WARNHOOK_FATAL;
1457 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1463 if (!lex_next_chunk(flags))
1465 s = PL_parser->bufptr;
1472 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1474 Reads the next (Unicode) character in the text currently being lexed.
1475 Returns the codepoint (unsigned integer value) of the character read,
1476 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1477 if lexing has reached the end of the input text. To non-destructively
1478 examine the next character, use L</lex_peek_unichar> instead.
1480 If the next character is in (or extends into) the next chunk of input
1481 text, the next chunk will be read in. Normally the current chunk will be
1482 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1483 then the current chunk will not be discarded.
1485 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1486 is encountered, an exception is generated.
1492 Perl_lex_read_unichar(pTHX_ U32 flags)
1495 if (flags & ~(LEX_KEEP_PREVIOUS))
1496 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1497 c = lex_peek_unichar(flags);
1500 COPLINE_INC_WITH_HERELINES;
1502 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1504 ++(PL_parser->bufptr);
1510 =for apidoc Amx|void|lex_read_space|U32 flags
1512 Reads optional spaces, in Perl style, in the text currently being
1513 lexed. The spaces may include ordinary whitespace characters and
1514 Perl-style comments. C<#line> directives are processed if encountered.
1515 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1516 at a non-space character (or the end of the input text).
1518 If spaces extend into the next chunk of input text, the next chunk will
1519 be read in. Normally the current chunk will be discarded at the same
1520 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1521 chunk will not be discarded.
1526 #define LEX_NO_INCLINE 0x40000000
1527 #define LEX_NO_NEXT_CHUNK 0x80000000
1530 Perl_lex_read_space(pTHX_ U32 flags)
1533 const bool can_incline = !(flags & LEX_NO_INCLINE);
1534 bool need_incline = 0;
1535 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1536 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1539 sv_free(PL_skipwhite);
1540 PL_skipwhite = NULL;
1543 PL_skipwhite = newSVpvs("");
1544 #endif /* PERL_MAD */
1545 s = PL_parser->bufptr;
1546 bufend = PL_parser->bufend;
1552 } while (!(c == '\n' || (c == 0 && s == bufend)));
1553 } else if (c == '\n') {
1556 PL_parser->linestart = s;
1562 } else if (isSPACE(c)) {
1564 } else if (c == 0 && s == bufend) {
1569 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1570 #endif /* PERL_MAD */
1571 if (flags & LEX_NO_NEXT_CHUNK)
1573 PL_parser->bufptr = s;
1574 l = CopLINE(PL_curcop);
1575 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1576 got_more = lex_next_chunk(flags);
1577 CopLINE_set(PL_curcop, l);
1578 s = PL_parser->bufptr;
1579 bufend = PL_parser->bufend;
1582 if (can_incline && need_incline && PL_parser->rsfp) {
1592 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1593 #endif /* PERL_MAD */
1594 PL_parser->bufptr = s;
1599 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1601 This function performs syntax checking on a prototype, C<proto>.
1602 If C<warn> is true, any illegal characters or mismatched brackets
1603 will trigger illegalproto warnings, declaring that they were
1604 detected in the prototype for C<name>.
1606 The return value is C<true> if this is a valid prototype, and
1607 C<false> if it is not, regardless of whether C<warn> was C<true> or
1610 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1617 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1619 STRLEN len, origlen;
1620 char *p = proto ? SvPV(proto, len) : NULL;
1621 bool bad_proto = FALSE;
1622 bool in_brackets = FALSE;
1623 bool after_slash = FALSE;
1624 char greedy_proto = ' ';
1625 bool proto_after_greedy_proto = FALSE;
1626 bool must_be_last = FALSE;
1627 bool underscore = FALSE;
1628 bool bad_proto_after_underscore = FALSE;
1630 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1636 for (; len--; p++) {
1639 proto_after_greedy_proto = TRUE;
1641 if (!strchr(";@%", *p))
1642 bad_proto_after_underscore = TRUE;
1645 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1652 in_brackets = FALSE;
1653 else if ((*p == '@' || *p == '%') &&
1656 must_be_last = TRUE;
1665 after_slash = FALSE;
1670 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1673 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1674 origlen, UNI_DISPLAY_ISPRINT)
1675 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1677 if (proto_after_greedy_proto)
1678 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1679 "Prototype after '%c' for %"SVf" : %s",
1680 greedy_proto, SVfARG(name), p);
1682 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1683 "Missing ']' in prototype for %"SVf" : %s",
1686 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1687 "Illegal character in prototype for %"SVf" : %s",
1689 if (bad_proto_after_underscore)
1690 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1691 "Illegal character after '_' in prototype for %"SVf" : %s",
1695 return (! (proto_after_greedy_proto || bad_proto) );
1700 * This subroutine has nothing to do with tilting, whether at windmills
1701 * or pinball tables. Its name is short for "increment line". It
1702 * increments the current line number in CopLINE(PL_curcop) and checks
1703 * to see whether the line starts with a comment of the form
1704 * # line 500 "foo.pm"
1705 * If so, it sets the current line number and file to the values in the comment.
1709 S_incline(pTHX_ const char *s)
1717 PERL_ARGS_ASSERT_INCLINE;
1719 COPLINE_INC_WITH_HERELINES;
1720 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1721 && s+1 == PL_bufend && *s == ';') {
1722 /* fake newline in string eval */
1723 CopLINE_dec(PL_curcop);
1728 while (SPACE_OR_TAB(*s))
1730 if (strnEQ(s, "line", 4))
1734 if (SPACE_OR_TAB(*s))
1738 while (SPACE_OR_TAB(*s))
1746 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1748 while (SPACE_OR_TAB(*s))
1750 if (*s == '"' && (t = strchr(s+1, '"'))) {
1756 while (!isSPACE(*t))
1760 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1762 if (*e != '\n' && *e != '\0')
1763 return; /* false alarm */
1765 line_num = atoi(n)-1;
1768 const STRLEN len = t - s;
1770 if (!PL_rsfp && !PL_parser->filtered) {
1771 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1772 * to *{"::_<newfilename"} */
1773 /* However, the long form of evals is only turned on by the
1774 debugger - usually they're "(eval %lu)" */
1775 GV * const cfgv = CopFILEGV(PL_curcop);
1778 STRLEN tmplen2 = len;
1782 if (tmplen2 + 2 <= sizeof smallbuf)
1785 Newx(tmpbuf2, tmplen2 + 2, char);
1790 memcpy(tmpbuf2 + 2, s, tmplen2);
1793 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1795 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1796 /* adjust ${"::_<newfilename"} to store the new file name */
1797 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1798 /* The line number may differ. If that is the case,
1799 alias the saved lines that are in the array.
1800 Otherwise alias the whole array. */
1801 if (CopLINE(PL_curcop) == line_num) {
1802 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1803 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1805 else if (GvAV(cfgv)) {
1806 AV * const av = GvAV(cfgv);
1807 const I32 start = CopLINE(PL_curcop)+1;
1808 I32 items = AvFILLp(av) - start;
1810 AV * const av2 = GvAVn(gv2);
1811 SV **svp = AvARRAY(av) + start;
1812 I32 l = (I32)line_num+1;
1814 av_store(av2, l++, SvREFCNT_inc(*svp++));
1819 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1822 CopFILE_free(PL_curcop);
1823 CopFILE_setn(PL_curcop, s, len);
1825 CopLINE_set(PL_curcop, line_num);
1828 #define skipspace(s) skipspace_flags(s, 0)
1831 /* skip space before PL_thistoken */
1834 S_skipspace0(pTHX_ char *s)
1836 PERL_ARGS_ASSERT_SKIPSPACE0;
1843 PL_thiswhite = newSVpvs("");
1844 sv_catsv(PL_thiswhite, PL_skipwhite);
1845 sv_free(PL_skipwhite);
1848 PL_realtokenstart = s - SvPVX(PL_linestr);
1852 /* skip space after PL_thistoken */
1855 S_skipspace1(pTHX_ char *s)
1857 const char *start = s;
1858 I32 startoff = start - SvPVX(PL_linestr);
1860 PERL_ARGS_ASSERT_SKIPSPACE1;
1865 start = SvPVX(PL_linestr) + startoff;
1866 if (!PL_thistoken && PL_realtokenstart >= 0) {
1867 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1868 PL_thistoken = newSVpvn(tstart, start - tstart);
1870 PL_realtokenstart = -1;
1873 PL_nextwhite = newSVpvs("");
1874 sv_catsv(PL_nextwhite, PL_skipwhite);
1875 sv_free(PL_skipwhite);
1882 S_skipspace2(pTHX_ char *s, SV **svp)
1885 const I32 startoff = s - SvPVX(PL_linestr);
1887 PERL_ARGS_ASSERT_SKIPSPACE2;
1890 if (!PL_madskills || !svp)
1892 start = SvPVX(PL_linestr) + startoff;
1893 if (!PL_thistoken && PL_realtokenstart >= 0) {
1894 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1895 PL_thistoken = newSVpvn(tstart, start - tstart);
1896 PL_realtokenstart = -1;
1900 *svp = newSVpvs("");
1901 sv_setsv(*svp, PL_skipwhite);
1902 sv_free(PL_skipwhite);
1911 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1913 AV *av = CopFILEAVx(PL_curcop);
1916 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1918 sv = *av_fetch(av, 0, 1);
1919 SvUPGRADE(sv, SVt_PVMG);
1921 if (!SvPOK(sv)) sv_setpvs(sv,"");
1923 sv_catsv(sv, orig_sv);
1925 sv_catpvn(sv, buf, len);
1930 if (PL_parser->preambling == NOLINE)
1931 av_store(av, CopLINE(PL_curcop), sv);
1937 * Called to gobble the appropriate amount and type of whitespace.
1938 * Skips comments as well.
1942 S_skipspace_flags(pTHX_ char *s, U32 flags)
1946 #endif /* PERL_MAD */
1947 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1950 sv_free(PL_skipwhite);
1951 PL_skipwhite = NULL;
1953 #endif /* PERL_MAD */
1954 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1955 while (s < PL_bufend && SPACE_OR_TAB(*s))
1958 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1960 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1961 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1962 LEX_NO_NEXT_CHUNK : 0));
1964 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1965 if (PL_linestart > PL_bufptr)
1966 PL_bufptr = PL_linestart;
1971 PL_skipwhite = newSVpvn(start, s-start);
1972 #endif /* PERL_MAD */
1978 * Check the unary operators to ensure there's no ambiguity in how they're
1979 * used. An ambiguous piece of code would be:
1981 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1982 * the +5 is its argument.
1992 if (PL_oldoldbufptr != PL_last_uni)
1994 while (isSPACE(*PL_last_uni))
1997 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1999 if ((t = strchr(s, '(')) && t < PL_bufptr)
2002 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2003 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2004 (int)(s - PL_last_uni), PL_last_uni);
2008 * LOP : macro to build a list operator. Its behaviour has been replaced
2009 * with a subroutine, S_lop() for which LOP is just another name.
2012 #define LOP(f,x) return lop(f,x,s)
2016 * Build a list operator (or something that might be one). The rules:
2017 * - if we have a next token, then it's a list operator [why?]
2018 * - if the next thing is an opening paren, then it's a function
2019 * - else it's a list operator
2023 S_lop(pTHX_ I32 f, int x, char *s)
2027 PERL_ARGS_ASSERT_LOP;
2033 PL_last_lop = PL_oldbufptr;
2034 PL_last_lop_op = (OPCODE)f;
2043 return REPORT(FUNC);
2046 return REPORT(FUNC);
2049 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2050 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2051 return REPORT(LSTOP);
2058 * Sets up for an eventual force_next(). start_force(0) basically does
2059 * an unshift, while start_force(-1) does a push. yylex removes items
2064 S_start_force(pTHX_ int where)
2068 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2069 where = PL_lasttoke;
2070 assert(PL_curforce < 0 || PL_curforce == where);
2071 if (PL_curforce != where) {
2072 for (i = PL_lasttoke; i > where; --i) {
2073 PL_nexttoke[i] = PL_nexttoke[i-1];
2077 if (PL_curforce < 0) /* in case of duplicate start_force() */
2078 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2079 PL_curforce = where;
2082 curmad('^', newSVpvs(""));
2083 CURMAD('_', PL_nextwhite);
2088 S_curmad(pTHX_ char slot, SV *sv)
2094 if (PL_curforce < 0)
2095 where = &PL_thismad;
2097 where = &PL_nexttoke[PL_curforce].next_mad;
2103 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2105 else if (PL_encoding) {
2106 sv_recode_to_utf8(sv, PL_encoding);
2111 /* keep a slot open for the head of the list? */
2112 if (slot != '_' && *where && (*where)->mad_key == '^') {
2113 (*where)->mad_key = slot;
2114 sv_free(MUTABLE_SV(((*where)->mad_val)));
2115 (*where)->mad_val = (void*)sv;
2118 addmad(newMADsv(slot, sv), where, 0);
2121 # define start_force(where) NOOP
2122 # define curmad(slot, sv) NOOP
2127 * When the lexer realizes it knows the next token (for instance,
2128 * it is reordering tokens for the parser) then it can call S_force_next
2129 * to know what token to return the next time the lexer is called. Caller
2130 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2131 * and possibly PL_expect to ensure the lexer handles the token correctly.
2135 S_force_next(pTHX_ I32 type)
2140 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2141 tokereport(type, &NEXTVAL_NEXTTOKE);
2145 if (PL_curforce < 0)
2146 start_force(PL_lasttoke);
2147 PL_nexttoke[PL_curforce].next_type = type;
2148 if (PL_lex_state != LEX_KNOWNEXT)
2149 PL_lex_defer = PL_lex_state;
2150 PL_lex_state = LEX_KNOWNEXT;
2151 PL_lex_expect = PL_expect;
2154 PL_nexttype[PL_nexttoke] = type;
2156 if (PL_lex_state != LEX_KNOWNEXT) {
2157 PL_lex_defer = PL_lex_state;
2158 PL_lex_expect = PL_expect;
2159 PL_lex_state = LEX_KNOWNEXT;
2167 * This subroutine handles postfix deref syntax after the arrow has already
2168 * been emitted. @* $* etc. are emitted as two separate token right here.
2169 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2170 * only the first, leaving yylex to find the next.
2174 S_postderef(pTHX_ char const funny, char const next)
2177 assert(strchr("$@%&*", funny));
2178 assert(strchr("*[{", next));
2180 PL_expect = XOPERATOR;
2181 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2182 assert('@' == funny || '$' == funny);
2183 PL_lex_state = LEX_INTERPEND;
2184 start_force(PL_curforce);
2185 force_next(POSTJOIN);
2187 start_force(PL_curforce);
2192 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2193 && !PL_lex_brackets)
2195 PL_expect = XOPERATOR;
2204 int yyc = PL_parser->yychar;
2205 if (yyc != YYEMPTY) {
2208 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2209 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2210 PL_lex_allbrackets--;
2212 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2213 } else if (yyc == '('/*)*/) {
2214 PL_lex_allbrackets--;
2219 PL_parser->yychar = YYEMPTY;
2224 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 SV * const sv = newSVpvn_utf8(start, len,
2230 && !is_ascii_string((const U8*)start, len)
2231 && is_utf8_string((const U8*)start, len));
2237 * When the lexer knows the next thing is a word (for instance, it has
2238 * just seen -> and it knows that the next char is a word char, then
2239 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2243 * char *start : buffer position (must be within PL_linestr)
2244 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2245 * int check_keyword : if true, Perl checks to make sure the word isn't
2246 * a keyword (do this if the word is a label, e.g. goto FOO)
2247 * int allow_pack : if true, : characters will also be allowed (require,
2248 * use, etc. do this)
2249 * int allow_initial_tick : used by the "sub" lexer only.
2253 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2259 PERL_ARGS_ASSERT_FORCE_WORD;
2261 start = SKIPSPACE1(start);
2263 if (isIDFIRST_lazy_if(s,UTF) ||
2264 (allow_pack && *s == ':') )
2266 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2267 if (check_keyword) {
2268 char *s2 = PL_tokenbuf;
2269 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2271 if (keyword(s2, len, 0))
2274 start_force(PL_curforce);
2276 curmad('X', newSVpvn(start,s-start));
2277 if (token == METHOD) {
2282 PL_expect = XOPERATOR;
2286 curmad('g', newSVpvs( "forced" ));
2287 NEXTVAL_NEXTTOKE.opval
2288 = (OP*)newSVOP(OP_CONST,0,
2289 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2290 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2298 * Called when the lexer wants $foo *foo &foo etc, but the program
2299 * text only contains the "foo" portion. The first argument is a pointer
2300 * to the "foo", and the second argument is the type symbol to prefix.
2301 * Forces the next token to be a "WORD".
2302 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2306 S_force_ident(pTHX_ const char *s, int kind)
2310 PERL_ARGS_ASSERT_FORCE_IDENT;
2313 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2314 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2315 UTF ? SVf_UTF8 : 0));
2316 start_force(PL_curforce);
2317 NEXTVAL_NEXTTOKE.opval = o;
2320 o->op_private = OPpCONST_ENTERED;
2321 /* XXX see note in pp_entereval() for why we forgo typo
2322 warnings if the symbol must be introduced in an eval.
2324 gv_fetchpvn_flags(s, len,
2325 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2326 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2327 kind == '$' ? SVt_PV :
2328 kind == '@' ? SVt_PVAV :
2329 kind == '%' ? SVt_PVHV :
2337 S_force_ident_maybe_lex(pTHX_ char pit)
2339 start_force(PL_curforce);
2340 NEXTVAL_NEXTTOKE.ival = pit;
2345 Perl_str_to_version(pTHX_ SV *sv)
2350 const char *start = SvPV_const(sv,len);
2351 const char * const end = start + len;
2352 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2354 PERL_ARGS_ASSERT_STR_TO_VERSION;
2356 while (start < end) {
2360 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2365 retval += ((NV)n)/nshift;
2374 * Forces the next token to be a version number.
2375 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2376 * and if "guessing" is TRUE, then no new token is created (and the caller
2377 * must use an alternative parsing method).
2381 S_force_version(pTHX_ char *s, int guessing)
2387 I32 startoff = s - SvPVX(PL_linestr);
2390 PERL_ARGS_ASSERT_FORCE_VERSION;
2398 while (isDIGIT(*d) || *d == '_' || *d == '.')
2402 start_force(PL_curforce);
2403 curmad('X', newSVpvn(s,d-s));
2406 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2408 #ifdef USE_LOCALE_NUMERIC
2409 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2410 setlocale(LC_NUMERIC, "C");
2412 s = scan_num(s, &pl_yylval);
2413 #ifdef USE_LOCALE_NUMERIC
2414 setlocale(LC_NUMERIC, loc);
2417 version = pl_yylval.opval;
2418 ver = cSVOPx(version)->op_sv;
2419 if (SvPOK(ver) && !SvNIOK(ver)) {
2420 SvUPGRADE(ver, SVt_PVNV);
2421 SvNV_set(ver, str_to_version(ver));
2422 SvNOK_on(ver); /* hint that it is a version */
2425 else if (guessing) {
2428 sv_free(PL_nextwhite); /* let next token collect whitespace */
2430 s = SvPVX(PL_linestr) + startoff;
2438 if (PL_madskills && !version) {
2439 sv_free(PL_nextwhite); /* let next token collect whitespace */
2441 s = SvPVX(PL_linestr) + startoff;
2444 /* NOTE: The parser sees the package name and the VERSION swapped */
2445 start_force(PL_curforce);
2446 NEXTVAL_NEXTTOKE.opval = version;
2453 * S_force_strict_version
2454 * Forces the next token to be a version number using strict syntax rules.
2458 S_force_strict_version(pTHX_ char *s)
2463 I32 startoff = s - SvPVX(PL_linestr);
2465 const char *errstr = NULL;
2467 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2469 while (isSPACE(*s)) /* leading whitespace */
2472 if (is_STRICT_VERSION(s,&errstr)) {
2474 s = (char *)scan_version(s, ver, 0);
2475 version = newSVOP(OP_CONST, 0, ver);
2477 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2478 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2482 yyerror(errstr); /* version required */
2487 if (PL_madskills && !version) {
2488 sv_free(PL_nextwhite); /* let next token collect whitespace */
2490 s = SvPVX(PL_linestr) + startoff;
2493 /* NOTE: The parser sees the package name and the VERSION swapped */
2494 start_force(PL_curforce);
2495 NEXTVAL_NEXTTOKE.opval = version;
2503 * Tokenize a quoted string passed in as an SV. It finds the next
2504 * chunk, up to end of string or a backslash. It may make a new
2505 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2510 S_tokeq(pTHX_ SV *sv)
2518 PERL_ARGS_ASSERT_TOKEQ;
2522 assert (!SvIsCOW(sv));
2523 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2527 /* This is relying on the SV being "well formed" with a trailing '\0' */
2528 while (s < send && !(*s == '\\' && s[1] == '\\'))
2533 if ( PL_hints & HINT_NEW_STRING ) {
2534 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2535 SVs_TEMP | SvUTF8(sv));
2539 if (s + 1 < send && (s[1] == '\\'))
2540 s++; /* all that, just for this */
2545 SvCUR_set(sv, d - SvPVX_const(sv));
2547 if ( PL_hints & HINT_NEW_STRING )
2548 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2553 * Now come three functions related to double-quote context,
2554 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2555 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2556 * interact with PL_lex_state, and create fake ( ... ) argument lists
2557 * to handle functions and concatenation.
2561 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2566 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2568 * Pattern matching will set PL_lex_op to the pattern-matching op to
2569 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2571 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2573 * Everything else becomes a FUNC.
2575 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2576 * had an OP_CONST or OP_READLINE). This just sets us up for a
2577 * call to S_sublex_push().
2581 S_sublex_start(pTHX)
2584 const I32 op_type = pl_yylval.ival;
2586 if (op_type == OP_NULL) {
2587 pl_yylval.opval = PL_lex_op;
2591 if (op_type == OP_CONST || op_type == OP_READLINE) {
2592 SV *sv = tokeq(PL_lex_stuff);
2594 if (SvTYPE(sv) == SVt_PVIV) {
2595 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2597 const char * const p = SvPV_const(sv, len);
2598 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2602 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2603 PL_lex_stuff = NULL;
2604 /* Allow <FH> // "foo" */
2605 if (op_type == OP_READLINE)
2606 PL_expect = XTERMORDORDOR;
2609 else if (op_type == OP_BACKTICK && PL_lex_op) {
2610 /* readpipe() was overridden */
2611 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2612 pl_yylval.opval = PL_lex_op;
2614 PL_lex_stuff = NULL;
2618 PL_sublex_info.super_state = PL_lex_state;
2619 PL_sublex_info.sub_inwhat = (U16)op_type;
2620 PL_sublex_info.sub_op = PL_lex_op;
2621 PL_lex_state = LEX_INTERPPUSH;
2625 pl_yylval.opval = PL_lex_op;
2635 * Create a new scope to save the lexing state. The scope will be
2636 * ended in S_sublex_done. Returns a '(', starting the function arguments
2637 * to the uc, lc, etc. found before.
2638 * Sets PL_lex_state to LEX_INTERPCONCAT.
2646 const bool is_heredoc = PL_multi_close == '<';
2649 PL_lex_state = PL_sublex_info.super_state;
2650 SAVEI8(PL_lex_dojoin);
2651 SAVEI32(PL_lex_brackets);
2652 SAVEI32(PL_lex_allbrackets);
2653 SAVEI32(PL_lex_formbrack);
2654 SAVEI8(PL_lex_fakeeof);
2655 SAVEI32(PL_lex_casemods);
2656 SAVEI32(PL_lex_starts);
2657 SAVEI8(PL_lex_state);
2658 SAVESPTR(PL_lex_repl);
2659 SAVEVPTR(PL_lex_inpat);
2660 SAVEI16(PL_lex_inwhat);
2663 SAVECOPLINE(PL_curcop);
2664 SAVEI32(PL_multi_end);
2665 SAVEI32(PL_parser->herelines);
2666 PL_parser->herelines = 0;
2668 SAVEI8(PL_multi_close);
2669 SAVEPPTR(PL_bufptr);
2670 SAVEPPTR(PL_bufend);
2671 SAVEPPTR(PL_oldbufptr);
2672 SAVEPPTR(PL_oldoldbufptr);
2673 SAVEPPTR(PL_last_lop);
2674 SAVEPPTR(PL_last_uni);
2675 SAVEPPTR(PL_linestart);
2676 SAVESPTR(PL_linestr);
2677 SAVEGENERICPV(PL_lex_brackstack);
2678 SAVEGENERICPV(PL_lex_casestack);
2679 SAVEGENERICPV(PL_parser->lex_shared);
2680 SAVEBOOL(PL_parser->lex_re_reparsing);
2681 SAVEI32(PL_copline);
2683 /* The here-doc parser needs to be able to peek into outer lexing
2684 scopes to find the body of the here-doc. So we put PL_linestr and
2685 PL_bufptr into lex_shared, to ‘share’ those values.
2687 PL_parser->lex_shared->ls_linestr = PL_linestr;
2688 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2690 PL_linestr = PL_lex_stuff;
2691 PL_lex_repl = PL_sublex_info.repl;
2692 PL_lex_stuff = NULL;
2693 PL_sublex_info.repl = NULL;
2695 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2696 = SvPVX(PL_linestr);
2697 PL_bufend += SvCUR(PL_linestr);
2698 PL_last_lop = PL_last_uni = NULL;
2699 SAVEFREESV(PL_linestr);
2700 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2702 PL_lex_dojoin = FALSE;
2703 PL_lex_brackets = PL_lex_formbrack = 0;
2704 PL_lex_allbrackets = 0;
2705 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2706 Newx(PL_lex_brackstack, 120, char);
2707 Newx(PL_lex_casestack, 12, char);
2708 PL_lex_casemods = 0;
2709 *PL_lex_casestack = '\0';
2711 PL_lex_state = LEX_INTERPCONCAT;
2713 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2714 PL_copline = NOLINE;
2716 Newxz(shared, 1, LEXSHARED);
2717 shared->ls_prev = PL_parser->lex_shared;
2718 PL_parser->lex_shared = shared;
2720 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2721 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2722 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2723 PL_lex_inpat = PL_sublex_info.sub_op;
2725 PL_lex_inpat = NULL;
2727 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2728 PL_in_eval &= ~EVAL_RE_REPARSING;
2735 * Restores lexer state after a S_sublex_push.
2742 if (!PL_lex_starts++) {
2743 SV * const sv = newSVpvs("");
2744 if (SvUTF8(PL_linestr))
2746 PL_expect = XOPERATOR;
2747 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2751 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2752 PL_lex_state = LEX_INTERPCASEMOD;
2756 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2757 assert(PL_lex_inwhat != OP_TRANSR);
2758 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2759 PL_linestr = PL_lex_repl;
2761 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2762 PL_bufend += SvCUR(PL_linestr);
2763 PL_last_lop = PL_last_uni = NULL;
2764 PL_lex_dojoin = FALSE;
2765 PL_lex_brackets = 0;
2766 PL_lex_allbrackets = 0;
2767 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2768 PL_lex_casemods = 0;
2769 *PL_lex_casestack = '\0';
2771 if (SvEVALED(PL_lex_repl)) {
2772 PL_lex_state = LEX_INTERPNORMAL;
2774 /* we don't clear PL_lex_repl here, so that we can check later
2775 whether this is an evalled subst; that means we rely on the
2776 logic to ensure sublex_done() is called again only via the
2777 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2780 PL_lex_state = LEX_INTERPCONCAT;
2783 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2784 CopLINE(PL_curcop) +=
2785 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2786 + PL_parser->herelines;
2787 PL_parser->herelines = 0;
2792 const line_t l = CopLINE(PL_curcop);
2797 PL_endwhite = newSVpvs("");
2798 sv_catsv(PL_endwhite, PL_thiswhite);
2802 sv_setpvs(PL_thistoken,"");
2804 PL_realtokenstart = -1;
2808 if (PL_multi_close == '<')
2809 PL_parser->herelines += l - PL_multi_end;
2810 PL_bufend = SvPVX(PL_linestr);
2811 PL_bufend += SvCUR(PL_linestr);
2812 PL_expect = XOPERATOR;
2813 PL_sublex_info.sub_inwhat = 0;
2818 PERL_STATIC_INLINE SV*
2819 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2821 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2822 * interior, hence to the "}". Finds what the name resolves to, returning
2823 * an SV* containing it; NULL if no valid one found */
2825 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2832 const U8* first_bad_char_loc;
2833 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2835 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2837 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2839 &first_bad_char_loc))
2841 /* If warnings are on, this will print a more detailed analysis of what
2842 * is wrong than the error message below */
2843 utf8n_to_uvchr(first_bad_char_loc,
2844 e - ((char *) first_bad_char_loc),
2847 /* We deliberately don't try to print the malformed character, which
2848 * might not print very well; it also may be just the first of many
2849 * malformations, so don't print what comes after it */
2850 yyerror(Perl_form(aTHX_
2851 "Malformed UTF-8 character immediately after '%.*s'",
2852 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2856 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2857 /* include the <}> */
2858 e - backslash_ptr + 1);
2860 SvREFCNT_dec_NN(res);
2864 /* See if the charnames handler is the Perl core's, and if so, we can skip
2865 * the validation needed for a user-supplied one, as Perl's does its own
2867 table = GvHV(PL_hintgv); /* ^H */
2868 cvp = hv_fetchs(table, "charnames", FALSE);
2869 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2870 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2872 const char * const name = HvNAME(stash);
2873 if strEQ(name, "_charnames") {
2878 /* Here, it isn't Perl's charname handler. We can't rely on a
2879 * user-supplied handler to validate the input name. For non-ut8 input,
2880 * look to see that the first character is legal. Then loop through the
2881 * rest checking that each is a continuation */
2883 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2887 if (! isALPHAU(*s)) {
2892 if (! isCHARNAME_CONT(*s)) {
2895 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2896 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2897 "A sequence of multiple spaces in a charnames "
2898 "alias definition is deprecated");
2902 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2903 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2904 "Trailing white-space in a charnames alias "
2905 "definition is deprecated");
2909 /* Similarly for utf8. For invariants can check directly; for other
2910 * Latin1, can calculate their code point and check; otherwise use a
2912 if (UTF8_IS_INVARIANT(*s)) {
2913 if (! isALPHAU(*s)) {
2917 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2918 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2924 if (! PL_utf8_charname_begin) {
2925 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2926 PL_utf8_charname_begin = _core_swash_init("utf8",
2927 "_Perl_Charname_Begin",
2929 1, 0, NULL, &flags);
2931 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2938 if (UTF8_IS_INVARIANT(*s)) {
2939 if (! isCHARNAME_CONT(*s)) {
2942 if (*s == ' ' && *(s-1) == ' '
2943 && ckWARN_d(WARN_DEPRECATED)) {
2944 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2945 "A sequence of multiple spaces in a charnam"
2946 "es alias definition is deprecated");
2950 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2951 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2958 if (! PL_utf8_charname_continue) {
2959 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2960 PL_utf8_charname_continue = _core_swash_init("utf8",
2961 "_Perl_Charname_Continue",
2963 1, 0, NULL, &flags);
2965 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2971 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2972 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2973 "Trailing white-space in a charnames alias "
2974 "definition is deprecated");
2978 if (SvUTF8(res)) { /* Don't accept malformed input */
2979 const U8* first_bad_char_loc;
2981 const char* const str = SvPV_const(res, len);
2982 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2983 /* If warnings are on, this will print a more detailed analysis of
2984 * what is wrong than the error message below */
2985 utf8n_to_uvchr(first_bad_char_loc,
2986 (char *) first_bad_char_loc - str,
2989 /* We deliberately don't try to print the malformed character,
2990 * which might not print very well; it also may be just the first
2991 * of many malformations, so don't print what comes after it */
2994 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2995 (int) (e - backslash_ptr + 1), backslash_ptr,
2996 (int) ((char *) first_bad_char_loc - str), str
3006 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
3008 /* The final %.*s makes sure that should the trailing NUL be missing
3009 * that this print won't run off the end of the string */
3012 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3013 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
3014 (int)(e - s + bad_char_size), s + bad_char_size
3016 UTF ? SVf_UTF8 : 0);
3024 Extracts the next constant part of a pattern, double-quoted string,
3025 or transliteration. This is terrifying code.
3027 For example, in parsing the double-quoted string "ab\x63$d", it would
3028 stop at the '$' and return an OP_CONST containing 'abc'.
3030 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3031 processing a pattern (PL_lex_inpat is true), a transliteration
3032 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3034 Returns a pointer to the character scanned up to. If this is
3035 advanced from the start pointer supplied (i.e. if anything was
3036 successfully parsed), will leave an OP_CONST for the substring scanned
3037 in pl_yylval. Caller must intuit reason for not parsing further
3038 by looking at the next characters herself.
3042 \N{FOO} => \N{U+hex_for_character_FOO}
3043 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3046 all other \-char, including \N and \N{ apart from \N{ABC}
3049 @ and $ where it appears to be a var, but not for $ as tail anchor
3054 In transliterations:
3055 characters are VERY literal, except for - not at the start or end
3056 of the string, which indicates a range. If the range is in bytes,
3057 scan_const expands the range to the full set of intermediate
3058 characters. If the range is in utf8, the hyphen is replaced with
3059 a certain range mark which will be handled by pmtrans() in op.c.
3061 In double-quoted strings:
3063 double-quoted style: \r and \n
3064 constants: \x31, etc.
3065 deprecated backrefs: \1 (in substitution replacements)
3066 case and quoting: \U \Q \E
3069 scan_const does *not* construct ops to handle interpolated strings.
3070 It stops processing as soon as it finds an embedded $ or @ variable
3071 and leaves it to the caller to work out what's going on.
3073 embedded arrays (whether in pattern or not) could be:
3074 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3076 $ in double-quoted strings must be the symbol of an embedded scalar.
3078 $ in pattern could be $foo or could be tail anchor. Assumption:
3079 it's a tail anchor if $ is the last thing in the string, or if it's
3080 followed by one of "()| \r\n\t"
3082 \1 (backreferences) are turned into $1 in substitutions
3084 The structure of the code is
3085 while (there's a character to process) {
3086 handle transliteration ranges
3087 skip regexp comments /(?#comment)/ and codes /(?{code})/
3088 skip #-initiated comments in //x patterns
3089 check for embedded arrays
3090 check for embedded scalars
3092 deprecate \1 in substitution replacements
3093 handle string-changing backslashes \l \U \Q \E, etc.
3094 switch (what was escaped) {
3095 handle \- in a transliteration (becomes a literal -)
3096 if a pattern and not \N{, go treat as regular character
3097 handle \132 (octal characters)
3098 handle \x15 and \x{1234} (hex characters)
3099 handle \N{name} (named characters, also \N{3,5} in a pattern)
3100 handle \cV (control characters)
3101 handle printf-style backslashes (\f, \r, \n, etc)
3104 } (end if backslash)
3105 handle regular character
3106 } (end while character to read)
3111 S_scan_const(pTHX_ char *start)
3114 char *send = PL_bufend; /* end of the constant */
3115 SV *sv = newSV(send - start); /* sv for the constant. See
3116 note below on sizing. */
3117 char *s = start; /* start of the constant */
3118 char *d = SvPVX(sv); /* destination for copies */
3119 bool dorange = FALSE; /* are we in a translit range? */
3120 bool didrange = FALSE; /* did we just finish a range? */
3121 bool in_charclass = FALSE; /* within /[...]/ */
3122 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3123 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3124 to be UTF8? But, this can
3125 show as true when the source
3126 isn't utf8, as for example
3127 when it is entirely composed
3129 SV *res; /* result from charnames */
3131 /* Note on sizing: The scanned constant is placed into sv, which is
3132 * initialized by newSV() assuming one byte of output for every byte of
3133 * input. This routine expects newSV() to allocate an extra byte for a
3134 * trailing NUL, which this routine will append if it gets to the end of
3135 * the input. There may be more bytes of input than output (eg., \N{LATIN
3136 * CAPITAL LETTER A}), or more output than input if the constant ends up
3137 * recoded to utf8, but each time a construct is found that might increase
3138 * the needed size, SvGROW() is called. Its size parameter each time is
3139 * based on the best guess estimate at the time, namely the length used so
3140 * far, plus the length the current construct will occupy, plus room for
3141 * the trailing NUL, plus one byte for every input byte still unscanned */
3143 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3146 UV literal_endpoint = 0;
3147 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3150 PERL_ARGS_ASSERT_SCAN_CONST;
3152 assert(PL_lex_inwhat != OP_TRANSR);
3153 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3154 /* If we are doing a trans and we know we want UTF8 set expectation */
3155 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3156 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3159 /* Protect sv from errors and fatal warnings. */
3160 ENTER_with_name("scan_const");
3163 while (s < send || dorange) {
3165 /* get transliterations out of the way (they're most literal) */
3166 if (PL_lex_inwhat == OP_TRANS) {
3167 /* expand a range A-Z to the full set of characters. AIE! */
3169 I32 i; /* current expanded character */
3170 I32 min; /* first character in range */
3171 I32 max; /* last character in range */
3182 char * const c = (char*)utf8_hop((U8*)d, -1);
3186 *c = (char) ILLEGAL_UTF8_BYTE;
3187 /* mark the range as done, and continue */
3193 i = d - SvPVX_const(sv); /* remember current offset */
3196 SvLEN(sv) + (has_utf8 ?
3197 (512 - UTF_CONTINUATION_MARK +
3200 /* How many two-byte within 0..255: 128 in UTF-8,
3201 * 96 in UTF-8-mod. */
3203 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3205 d = SvPVX(sv) + i; /* refresh d after realloc */
3209 for (j = 0; j <= 1; j++) {
3210 char * const c = (char*)utf8_hop((U8*)d, -1);
3211 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3217 max = (U8)0xff; /* only to \xff */
3218 uvmax = uv; /* \x{100} to uvmax */
3220 d = c; /* eat endpoint chars */
3225 d -= 2; /* eat the first char and the - */
3226 min = (U8)*d; /* first char in range */
3227 max = (U8)d[1]; /* last char in range */
3234 "Invalid range \"%c-%c\" in transliteration operator",
3235 (char)min, (char)max);
3239 if (literal_endpoint == 2 &&
3240 ((isLOWER_A(min) && isLOWER_A(max)) ||
3241 (isUPPER_A(min) && isUPPER_A(max))))
3243 for (i = min; i <= max; i++) {
3250 for (i = min; i <= max; i++)
3253 append_utf8_from_native_byte(i, &d);
3261 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3263 *d++ = (char) ILLEGAL_UTF8_BYTE;
3265 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3269 /* mark the range as done, and continue */
3273 literal_endpoint = 0;
3278 /* range begins (ignore - as first or last char) */
3279 else if (*s == '-' && s+1 < send && s != start) {
3281 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3288 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3298 literal_endpoint = 0;
3299 native_range = TRUE;
3304 /* if we get here, we're not doing a transliteration */
3306 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3309 while (s1 >= start && *s1-- == '\\')
3312 in_charclass = TRUE;
3315 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3318 while (s1 >= start && *s1-- == '\\')
3321 in_charclass = FALSE;
3324 /* skip for regexp comments /(?#comment)/, except for the last
3325 * char, which will be done separately.
3326 * Stop on (?{..}) and friends */
3328 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3330 while (s+1 < send && *s != ')')
3333 else if (!PL_lex_casemods &&
3334 ( s[2] == '{' /* This should match regcomp.c */
3335 || (s[2] == '?' && s[3] == '{')))
3341 /* likewise skip #-initiated comments in //x patterns */
3342 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3343 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3344 while (s+1 < send && *s != '\n')
3348 /* no further processing of single-quoted regex */
3349 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3350 goto default_action;
3352 /* check for embedded arrays
3353 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3355 else if (*s == '@' && s[1]) {
3356 if (isWORDCHAR_lazy_if(s+1,UTF))
3358 if (strchr(":'{$", s[1]))
3360 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3361 break; /* in regexp, neither @+ nor @- are interpolated */
3364 /* check for embedded scalars. only stop if we're sure it's a
3367 else if (*s == '$') {
3368 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3370 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3372 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3373 "Possible unintended interpolation of $\\ in regex");
3375 break; /* in regexp, $ might be tail anchor */
3379 /* End of else if chain - OP_TRANS rejoin rest */
3382 if (*s == '\\' && s+1 < send) {
3383 char* e; /* Can be used for ending '}', etc. */
3387 /* warn on \1 - \9 in substitution replacements, but note that \11
3388 * is an octal; and \19 is \1 followed by '9' */
3389 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3390 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3392 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3397 /* string-change backslash escapes */
3398 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3402 /* In a pattern, process \N, but skip any other backslash escapes.
3403 * This is because we don't want to translate an escape sequence
3404 * into a meta symbol and have the regex compiler use the meta
3405 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3406 * in spite of this, we do have to process \N here while the proper
3407 * charnames handler is in scope. See bugs #56444 and #62056.
3408 * There is a complication because \N in a pattern may also stand
3409 * for 'match a non-nl', and not mean a charname, in which case its
3410 * processing should be deferred to the regex compiler. To be a
3411 * charname it must be followed immediately by a '{', and not look
3412 * like \N followed by a curly quantifier, i.e., not something like
3413 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3415 else if (PL_lex_inpat
3418 || regcurly(s + 1, FALSE)))
3421 goto default_action;
3426 /* quoted - in transliterations */
3428 if (PL_lex_inwhat == OP_TRANS) {
3435 if ((isALPHANUMERIC(*s)))
3436 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3437 "Unrecognized escape \\%c passed through",
3439 /* default action is to copy the quoted character */
3440 goto default_action;
3443 /* eg. \132 indicates the octal constant 0132 */
3444 case '0': case '1': case '2': case '3':
3445 case '4': case '5': case '6': case '7':
3447 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3449 uv = grok_oct(s, &len, &flags, NULL);
3451 if (len < 3 && s < send && isDIGIT(*s)
3452 && ckWARN(WARN_MISC))
3454 Perl_warner(aTHX_ packWARN(WARN_MISC),
3455 "%s", form_short_octal_warning(s, len));
3458 goto NUM_ESCAPE_INSERT;
3460 /* eg. \o{24} indicates the octal constant \024 */
3465 bool valid = grok_bslash_o(&s, &uv, &error,
3466 TRUE, /* Output warning */
3467 FALSE, /* Not strict */
3468 TRUE, /* Output warnings for
3475 goto NUM_ESCAPE_INSERT;
3478 /* eg. \x24 indicates the hex constant 0x24 */
3483 bool valid = grok_bslash_x(&s, &uv, &error,
3484 TRUE, /* Output warning */
3485 FALSE, /* Not strict */
3486 TRUE, /* Output warnings for
3496 /* Insert oct or hex escaped character. There will always be
3497 * enough room in sv since such escapes will be longer than any
3498 * UTF-8 sequence they can end up as, except if they force us
3499 * to recode the rest of the string into utf8 */
3501 /* Here uv is the ordinal of the next character being added */
3502 if (!UVCHR_IS_INVARIANT(uv)) {
3503 if (!has_utf8 && uv > 255) {
3504 /* Might need to recode whatever we have accumulated so
3505 * far if it contains any chars variant in utf8 or
3508 SvCUR_set(sv, d - SvPVX_const(sv));
3511 /* See Note on sizing above. */
3512 sv_utf8_upgrade_flags_grow(sv,
3513 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3514 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3515 d = SvPVX(sv) + SvCUR(sv);
3520 d = (char*)uvchr_to_utf8((U8*)d, uv);
3521 if (PL_lex_inwhat == OP_TRANS &&
3522 PL_sublex_info.sub_op) {
3523 PL_sublex_info.sub_op->op_private |=
3524 (PL_lex_repl ? OPpTRANS_FROM_UTF
3528 if (uv > 255 && !dorange)
3529 native_range = FALSE;
3542 /* In a non-pattern \N must be a named character, like \N{LATIN
3543 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3544 * mean to match a non-newline. For non-patterns, named
3545 * characters are converted to their string equivalents. In
3546 * patterns, named characters are not converted to their
3547 * ultimate forms for the same reasons that other escapes
3548 * aren't. Instead, they are converted to the \N{U+...} form
3549 * to get the value from the charnames that is in effect right
3550 * now, while preserving the fact that it was a named character
3551 * so that the regex compiler knows this */
3553 /* The structure of this section of code (besides checking for
3554 * errors and upgrading to utf8) is:
3555 * Further disambiguate between the two meanings of \N, and if
3556 * not a charname, go process it elsewhere
3557 * If of form \N{U+...}, pass it through if a pattern;
3558 * otherwise convert to utf8
3559 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3560 * pattern; otherwise convert to utf8 */
3562 /* Here, s points to the 'N'; the test below is guaranteed to
3563 * succeed if we are being called on a pattern as we already
3564 * know from a test above that the next character is a '{'.
3565 * On a non-pattern \N must mean 'named sequence, which
3566 * requires braces */
3569 yyerror("Missing braces on \\N{}");
3574 /* If there is no matching '}', it is an error. */
3575 if (! (e = strchr(s, '}'))) {
3576 if (! PL_lex_inpat) {
3577 yyerror("Missing right brace on \\N{}");
3579 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3584 /* Here it looks like a named character */
3586 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3587 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3588 | PERL_SCAN_DISALLOW_PREFIX;
3591 /* For \N{U+...}, the '...' is a unicode value even on
3592 * EBCDIC machines */
3593 s += 2; /* Skip to next char after the 'U+' */
3595 uv = grok_hex(s, &len, &flags, NULL);
3596 if (len == 0 || len != (STRLEN)(e - s)) {
3597 yyerror("Invalid hexadecimal number in \\N{U+...}");
3604 /* On non-EBCDIC platforms, pass through to the regex
3605 * compiler unchanged. The reason we evaluated the
3606 * number above is to make sure there wasn't a syntax
3607 * error. But on EBCDIC we convert to native so
3608 * downstream code can continue to assume it's native
3610 s -= 5; /* Include the '\N{U+' */
3612 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3615 (unsigned int) UNI_TO_NATIVE(uv));
3617 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3621 else { /* Not a pattern: convert the hex to string */
3623 /* If destination is not in utf8, unconditionally
3624 * recode it to be so. This is because \N{} implies
3625 * Unicode semantics, and scalars have to be in utf8
3626 * to guarantee those semantics */
3628 SvCUR_set(sv, d - SvPVX_const(sv));
3631 /* See Note on sizing above. */
3632 sv_utf8_upgrade_flags_grow(
3634 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3635 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3636 d = SvPVX(sv) + SvCUR(sv);
3640 /* Add the (Unicode) code point to the output. */
3641 if (UNI_IS_INVARIANT(uv)) {
3642 *d++ = (char) LATIN1_TO_NATIVE(uv);
3645 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3649 else /* Here is \N{NAME} but not \N{U+...}. */
3650 if ((res = get_and_check_backslash_N_name(s, e)))
3653 const char *str = SvPV_const(res, len);
3656 if (! len) { /* The name resolved to an empty string */
3657 Copy("\\N{}", d, 4, char);
3661 /* In order to not lose information for the regex
3662 * compiler, pass the result in the specially made
3663 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3664 * the code points in hex of each character
3665 * returned by charnames */
3667 const char *str_end = str + len;
3668 const STRLEN off = d - SvPVX_const(sv);
3670 if (! SvUTF8(res)) {
3671 /* For the non-UTF-8 case, we can determine the
3672 * exact length needed without having to parse
3673 * through the string. Each character takes up
3674 * 2 hex digits plus either a trailing dot or
3676 d = off + SvGROW(sv, off
3678 + 6 /* For the "\N{U+", and
3680 + (STRLEN)(send - e));
3681 Copy("\\N{U+", d, 5, char);
3683 while (str < str_end) {
3685 my_snprintf(hex_string, sizeof(hex_string),
3686 "%02X.", (U8) *str);
3687 Copy(hex_string, d, 3, char);
3691 d--; /* We will overwrite below the final
3692 dot with a right brace */
3695 STRLEN char_length; /* cur char's byte length */
3697 /* and the number of bytes after this is
3698 * translated into hex digits */
3699 STRLEN output_length;
3701 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3702 * for max('U+', '.'); and 1 for NUL */
3703 char hex_string[2 * UTF8_MAXBYTES + 5];
3705 /* Get the first character of the result. */
3706 U32 uv = utf8n_to_uvchr((U8 *) str,
3710 /* Convert first code point to hex, including
3711 * the boiler plate before it. */
3713 my_snprintf(hex_string, sizeof(hex_string),
3717 /* Make sure there is enough space to hold it */
3718 d = off + SvGROW(sv, off
3720 + (STRLEN)(send - e)
3721 + 2); /* '}' + NUL */
3723 Copy(hex_string, d, output_length, char);
3726 /* For each subsequent character, append dot and
3727 * its ordinal in hex */
3728 while ((str += char_length) < str_end) {
3729 const STRLEN off = d - SvPVX_const(sv);
3730 U32 uv = utf8n_to_uvchr((U8 *) str,
3735 my_snprintf(hex_string,
3740 d = off + SvGROW(sv, off
3742 + (STRLEN)(send - e)
3743 + 2); /* '}' + NUL */
3744 Copy(hex_string, d, output_length, char);
3749 *d++ = '}'; /* Done. Add the trailing brace */
3752 else { /* Here, not in a pattern. Convert the name to a
3755 /* If destination is not in utf8, unconditionally
3756 * recode it to be so. This is because \N{} implies
3757 * Unicode semantics, and scalars have to be in utf8
3758 * to guarantee those semantics */
3760 SvCUR_set(sv, d - SvPVX_const(sv));
3763 /* See Note on sizing above. */
3764 sv_utf8_upgrade_flags_grow(sv,
3765 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3766 len + (STRLEN)(send - s) + 1);
3767 d = SvPVX(sv) + SvCUR(sv);
3769 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3771 /* See Note on sizing above. (NOTE: SvCUR() is not
3772 * set correctly here). */
3773 const STRLEN off = d - SvPVX_const(sv);
3774 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3776 Copy(str, d, len, char);
3782 } /* End \N{NAME} */
3785 native_range = FALSE; /* \N{} is defined to be Unicode */
3787 s = e + 1; /* Point to just after the '}' */
3790 /* \c is a control character */
3794 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3797 yyerror("Missing control char name in \\c");
3801 /* printf-style backslashes, formfeeds, newlines, etc */
3818 *d++ = ASCII_TO_NATIVE('\033');
3827 } /* end if (backslash) */
3834 /* If we started with encoded form, or already know we want it,
3835 then encode the next character */
3836 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3840 /* One might think that it is wasted effort in the case of the
3841 * source being utf8 (this_utf8 == TRUE) to take the next character
3842 * in the source, convert it to an unsigned value, and then convert
3843 * it back again. But the source has not been validated here. The
3844 * routine that does the conversion checks for errors like
3847 const UV nextuv = (this_utf8)
3848 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3850 const STRLEN need = UNISKIP(nextuv);
3852 SvCUR_set(sv, d - SvPVX_const(sv));
3855 /* See Note on sizing above. */
3856 sv_utf8_upgrade_flags_grow(sv,
3857 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3858 need + (STRLEN)(send - s) + 1);
3859 d = SvPVX(sv) + SvCUR(sv);
3861 } else if (need > len) {
3862 /* encoded value larger than old, may need extra space (NOTE:
3863 * SvCUR() is not set correctly here). See Note on sizing
3865 const STRLEN off = d - SvPVX_const(sv);
3866 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3870 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3872 if (uv > 255 && !dorange)
3873 native_range = FALSE;
3879 } /* while loop to process each character */
3881 /* terminate the string and set up the sv */
3883 SvCUR_set(sv, d - SvPVX_const(sv));
3884 if (SvCUR(sv) >= SvLEN(sv))
3885 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3886 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3889 if (PL_encoding && !has_utf8) {
3890 sv_recode_to_utf8(sv, PL_encoding);
3896 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3897 PL_sublex_info.sub_op->op_private |=
3898 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3902 /* shrink the sv if we allocated more than we used */
3903 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3904 SvPV_shrink_to_cur(sv);
3907 /* return the substring (via pl_yylval) only if we parsed anything */
3910 for (; s2 < s; s2++) {
3912 COPLINE_INC_WITH_HERELINES;
3914 SvREFCNT_inc_simple_void_NN(sv);
3915 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3916 && ! PL_parser->lex_re_reparsing)
3918 const char *const key = PL_lex_inpat ? "qr" : "q";
3919 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3923 if (PL_lex_inwhat == OP_TRANS) {
3926 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3929 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3937 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3940 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3942 LEAVE_with_name("scan_const");
3947 * Returns TRUE if there's more to the expression (e.g., a subscript),
3950 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3952 * ->[ and ->{ return TRUE
3953 * ->$* ->@* ->@[ and ->@{ return TRUE if postfix_interpolate is enabled
3954 * { and [ outside a pattern are always subscripts, so return TRUE
3955 * if we're outside a pattern and it's not { or [, then return FALSE
3956 * if we're in a pattern and the first char is a {
3957 * {4,5} (any digits around the comma) returns FALSE
3958 * if we're in a pattern and the first char is a [
3960 * [SOMETHING] has a funky algorithm to decide whether it's a
3961 * character class or not. It has to deal with things like
3962 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3963 * anything else returns TRUE
3966 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3969 S_intuit_more(pTHX_ char *s)
3973 PERL_ARGS_ASSERT_INTUIT_MORE;
3975 if (PL_lex_brackets)
3977 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3979 if (*s == '-' && s[1] == '>'
3980 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3981 && ( (s[2] == '$' && s[3] == '*')
3982 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3984 if (*s != '{' && *s != '[')
3989 /* In a pattern, so maybe we have {n,m}. */
3991 if (regcurly(s, FALSE)) {
3997 /* On the other hand, maybe we have a character class */
4000 if (*s == ']' || *s == '^')
4003 /* this is terrifying, and it works */
4006 const char * const send = strchr(s,']');
4007 unsigned char un_char, last_un_char;
4008 char tmpbuf[sizeof PL_tokenbuf * 4];
4010 if (!send) /* has to be an expression */
4012 weight = 2; /* let's weigh the evidence */
4016 else if (isDIGIT(*s)) {
4018 if (isDIGIT(s[1]) && s[2] == ']')
4024 Zero(seen,256,char);
4026 for (; s < send; s++) {
4027 last_un_char = un_char;
4028 un_char = (unsigned char)*s;
4033 weight -= seen[un_char] * 10;
4034 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4036 char *tmp = PL_bufend;
4037 PL_bufend = (char*)send;
4038 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4040 len = (int)strlen(tmpbuf);
4041 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4042 UTF ? SVf_UTF8 : 0, SVt_PV))
4047 else if (*s == '$' && s[1] &&
4048 strchr("[#!%*<>()-=",s[1])) {
4049 if (/*{*/ strchr("])} =",s[2]))
4058 if (strchr("wds]",s[1]))
4060 else if (seen[(U8)'\''] || seen[(U8)'"'])
4062 else if (strchr("rnftbxcav",s[1]))
4064 else if (isDIGIT(s[1])) {
4066 while (s[1] && isDIGIT(s[1]))
4076 if (strchr("aA01! ",last_un_char))
4078 if (strchr("zZ79~",s[1]))
4080 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4081 weight -= 5; /* cope with negative subscript */
4084 if (!isWORDCHAR(last_un_char)
4085 && !(last_un_char == '$' || last_un_char == '@'
4086 || last_un_char == '&')
4087 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4092 if (keyword(tmpbuf, d - tmpbuf, 0))
4095 if (un_char == last_un_char + 1)
4097 weight -= seen[un_char];
4102 if (weight >= 0) /* probably a character class */
4112 * Does all the checking to disambiguate
4114 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4115 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4117 * First argument is the stuff after the first token, e.g. "bar".
4119 * Not a method if foo is a filehandle.
4120 * Not a method if foo is a subroutine prototyped to take a filehandle.
4121 * Not a method if it's really "Foo $bar"
4122 * Method if it's "foo $bar"
4123 * Not a method if it's really "print foo $bar"
4124 * Method if it's really "foo package::" (interpreted as package->foo)
4125 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4126 * Not a method if bar is a filehandle or package, but is quoted with
4131 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4134 char *s = start + (*start == '$');
4135 char tmpbuf[sizeof PL_tokenbuf];
4142 PERL_ARGS_ASSERT_INTUIT_METHOD;
4144 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4146 if (cv && SvPOK(cv)) {
4147 const char *proto = CvPROTO(cv);
4149 while (*proto && (isSPACE(*proto) || *proto == ';'))
4156 if (*start == '$') {
4157 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4158 isUPPER(*PL_tokenbuf))
4161 len = start - SvPVX(PL_linestr);
4165 start = SvPVX(PL_linestr) + len;
4169 return *s == '(' ? FUNCMETH : METHOD;
4172 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4173 /* start is the beginning of the possible filehandle/object,
4174 * and s is the end of it
4175 * tmpbuf is a copy of it (but with single quotes as double colons)
4178 if (!keyword(tmpbuf, len, 0)) {
4179 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4183 soff = s - SvPVX(PL_linestr);
4187 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4188 if (indirgv && GvCVu(indirgv))
4190 /* filehandle or package name makes it a method */
4191 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4193 soff = s - SvPVX(PL_linestr);
4196 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4197 return 0; /* no assumptions -- "=>" quotes bareword */
4199 start_force(PL_curforce);
4200 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4201 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4202 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4204 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4205 ( UTF ? SVf_UTF8 : 0 )));
4210 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4212 return *s == '(' ? FUNCMETH : METHOD;
4218 /* Encoded script support. filter_add() effectively inserts a
4219 * 'pre-processing' function into the current source input stream.
4220 * Note that the filter function only applies to the current source file
4221 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4223 * The datasv parameter (which may be NULL) can be used to pass
4224 * private data to this instance of the filter. The filter function
4225 * can recover the SV using the FILTER_DATA macro and use it to
4226 * store private buffers and state information.
4228 * The supplied datasv parameter is upgraded to a PVIO type
4229 * and the IoDIRP/IoANY field is used to store the function pointer,
4230 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4231 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4232 * private use must be set using malloc'd pointers.
4236 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4245 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4246 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4248 if (!PL_rsfp_filters)
4249 PL_rsfp_filters = newAV();
4252 SvUPGRADE(datasv, SVt_PVIO);
4253 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4254 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4255 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4256 FPTR2DPTR(void *, IoANY(datasv)),
4257 SvPV_nolen(datasv)));
4258 av_unshift(PL_rsfp_filters, 1);
4259 av_store(PL_rsfp_filters, 0, datasv) ;
4261 !PL_parser->filtered
4262 && PL_parser->lex_flags & LEX_EVALBYTES
4263 && PL_bufptr < PL_bufend
4265 const char *s = PL_bufptr;
4266 while (s < PL_bufend) {
4268 SV *linestr = PL_parser->linestr;
4269 char *buf = SvPVX(linestr);
4270 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4271 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4272 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4273 STRLEN const linestart_pos = PL_parser->linestart - buf;
4274 STRLEN const last_uni_pos =
4275 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4276 STRLEN const last_lop_pos =
4277 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4278 av_push(PL_rsfp_filters, linestr);
4279 PL_parser->linestr =
4280 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4281 buf = SvPVX(PL_parser->linestr);
4282 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4283 PL_parser->bufptr = buf + bufptr_pos;
4284 PL_parser->oldbufptr = buf + oldbufptr_pos;
4285 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4286 PL_parser->linestart = buf + linestart_pos;
4287 if (PL_parser->last_uni)
4288 PL_parser->last_uni = buf + last_uni_pos;
4289 if (PL_parser->last_lop)
4290 PL_parser->last_lop = buf + last_lop_pos;
4291 SvLEN(linestr) = SvCUR(linestr);
4292 SvCUR(linestr) = s-SvPVX(linestr);
4293 PL_parser->filtered = 1;
4303 /* Delete most recently added instance of this filter function. */
4305 Perl_filter_del(pTHX_ filter_t funcp)
4310 PERL_ARGS_ASSERT_FILTER_DEL;
4313 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4314 FPTR2DPTR(void*, funcp)));
4316 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4318 /* if filter is on top of stack (usual case) just pop it off */
4319 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4320 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4321 sv_free(av_pop(PL_rsfp_filters));
4325 /* we need to search for the correct entry and clear it */
4326 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4330 /* Invoke the idxth filter function for the current rsfp. */
4331 /* maxlen 0 = read one text line */
4333 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4338 /* This API is bad. It should have been using unsigned int for maxlen.
4339 Not sure if we want to change the API, but if not we should sanity
4340 check the value here. */
4341 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4343 PERL_ARGS_ASSERT_FILTER_READ;
4345 if (!PL_parser || !PL_rsfp_filters)
4347 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4348 /* Provide a default input filter to make life easy. */
4349 /* Note that we append to the line. This is handy. */
4350 DEBUG_P(PerlIO_printf(Perl_debug_log,
4351 "filter_read %d: from rsfp\n", idx));
4352 if (correct_length) {
4355 const int old_len = SvCUR(buf_sv);
4357 /* ensure buf_sv is large enough */
4358 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4359 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4360 correct_length)) <= 0) {
4361 if (PerlIO_error(PL_rsfp))
4362 return -1; /* error */
4364 return 0 ; /* end of file */
4366 SvCUR_set(buf_sv, old_len + len) ;
4367 SvPVX(buf_sv)[old_len + len] = '\0';
4370 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4371 if (PerlIO_error(PL_rsfp))
4372 return -1; /* error */
4374 return 0 ; /* end of file */
4377 return SvCUR(buf_sv);
4379 /* Skip this filter slot if filter has been deleted */
4380 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4381 DEBUG_P(PerlIO_printf(Perl_debug_log,
4382 "filter_read %d: skipped (filter deleted)\n",
4384 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4386 if (SvTYPE(datasv) != SVt_PVIO) {
4387 if (correct_length) {
4389 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4390 if (!remainder) return 0; /* eof */
4391 if (correct_length > remainder) correct_length = remainder;
4392 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4393 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4396 const char *s = SvEND(datasv);
4397 const char *send = SvPVX(datasv) + SvLEN(datasv);
4405 if (s == send) return 0; /* eof */
4406 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4407 SvCUR_set(datasv, s-SvPVX(datasv));
4409 return SvCUR(buf_sv);
4411 /* Get function pointer hidden within datasv */
4412 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4413 DEBUG_P(PerlIO_printf(Perl_debug_log,
4414 "filter_read %d: via function %p (%s)\n",
4415 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4416 /* Call function. The function is expected to */
4417 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4418 /* Return: <0:error, =0:eof, >0:not eof */
4419 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4423 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4427 PERL_ARGS_ASSERT_FILTER_GETS;
4429 #ifdef PERL_CR_FILTER
4430 if (!PL_rsfp_filters) {
4431 filter_add(S_cr_textfilter,NULL);
4434 if (PL_rsfp_filters) {
4436 SvCUR_set(sv, 0); /* start with empty line */
4437 if (FILTER_READ(0, sv, 0) > 0)
4438 return ( SvPVX(sv) ) ;
4443 return (sv_gets(sv, PL_rsfp, append));
4447 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4452 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4454 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4458 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4459 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4461 return GvHV(gv); /* Foo:: */
4464 /* use constant CLASS => 'MyClass' */
4465 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4466 if (gv && GvCV(gv)) {
4467 SV * const sv = cv_const_sv(GvCV(gv));
4469 pkgname = SvPV_const(sv, len);
4472 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4476 * S_readpipe_override
4477 * Check whether readpipe() is overridden, and generates the appropriate
4478 * optree, provided sublex_start() is called afterwards.
4481 S_readpipe_override(pTHX)
4484 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4485 pl_yylval.ival = OP_BACKTICK;
4487 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4489 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4490 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4491 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4493 COPLINE_SET_FROM_MULTI_END;
4494 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4495 op_append_elem(OP_LIST,
4496 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4497 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4504 * The intent of this yylex wrapper is to minimize the changes to the
4505 * tokener when we aren't interested in collecting madprops. It remains
4506 * to be seen how successful this strategy will be...
4513 char *s = PL_bufptr;
4515 /* make sure PL_thiswhite is initialized */
4519 /* previous token ate up our whitespace? */
4520 if (!PL_lasttoke && PL_nextwhite) {
4521 PL_thiswhite = PL_nextwhite;
4525 /* isolate the token, and figure out where it is without whitespace */
4526 PL_realtokenstart = -1;
4530 assert(PL_curforce < 0);
4532 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4533 if (!PL_thistoken) {
4534 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4535 PL_thistoken = newSVpvs("");
4537 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4538 PL_thistoken = newSVpvn(tstart, s - tstart);
4541 if (PL_thismad) /* install head */
4542 CURMAD('X', PL_thistoken);
4545 /* last whitespace of a sublex? */
4546 if (optype == ')' && PL_endwhite) {
4547 CURMAD('X', PL_endwhite);
4552 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4553 if (!PL_thiswhite && !PL_endwhite && !optype) {
4554 sv_free(PL_thistoken);
4559 /* put off final whitespace till peg */
4560 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4561 PL_nextwhite = PL_thiswhite;
4564 else if (PL_thisopen) {
4565 CURMAD('q', PL_thisopen);
4567 sv_free(PL_thistoken);
4571 /* Store actual token text as madprop X */
4572 CURMAD('X', PL_thistoken);
4576 /* add preceding whitespace as madprop _ */
4577 CURMAD('_', PL_thiswhite);
4581 /* add quoted material as madprop = */
4582 CURMAD('=', PL_thisstuff);
4586 /* add terminating quote as madprop Q */
4587 CURMAD('Q', PL_thisclose);
4591 /* special processing based on optype */
4595 /* opval doesn't need a TOKEN since it can already store mp */
4605 if (pl_yylval.opval)
4606 append_madprops(PL_thismad, pl_yylval.opval, 0);
4614 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4627 /* remember any fake bracket that lexer is about to discard */
4628 if (PL_lex_brackets == 1 &&
4629 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4632 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4635 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4636 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4639 break; /* don't bother looking for trailing comment */
4648 /* attach a trailing comment to its statement instead of next token */
4652 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4654 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4656 if (*s == '\n' || *s == '#') {
4657 while (s < PL_bufend && *s != '\n')
4661 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4662 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4675 /* Create new token struct. Note: opvals return early above. */
4676 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4683 S_tokenize_use(pTHX_ int is_use, char *s) {
4686 PERL_ARGS_ASSERT_TOKENIZE_USE;
4688 if (PL_expect != XSTATE)
4689 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4690 is_use ? "use" : "no"));
4693 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4694 s = force_version(s, TRUE);
4695 if (*s == ';' || *s == '}'
4696 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4697 start_force(PL_curforce);
4698 NEXTVAL_NEXTTOKE.opval = NULL;
4701 else if (*s == 'v') {
4702 s = force_word(s,WORD,FALSE,TRUE);
4703 s = force_version(s, FALSE);
4707 s = force_word(s,WORD,FALSE,TRUE);
4708 s = force_version(s, FALSE);
4710 pl_yylval.ival = is_use;
4714 static const char* const exp_name[] =
4715 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4716 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4720 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4722 S_word_takes_any_delimeter(char *p, STRLEN len)
4724 return (len == 1 && strchr("msyq", p[0])) ||
4726 (p[0] == 't' && p[1] == 'r') ||
4727 (p[0] == 'q' && strchr("qwxr", p[1]))));
4731 S_check_scalar_slice(pTHX_ char *s)
4734 while (*s == ' ' || *s == '\t') s++;
4735 if (*s == 'q' && s[1] == 'w'
4736 && !isWORDCHAR_lazy_if(s+2,UTF))
4738 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4739 s += UTF ? UTF8SKIP(s) : 1;
4740 if (*s == '}' || *s == ']')
4741 pl_yylval.ival = OPpSLICEWARNING;
4747 Works out what to call the token just pulled out of the input
4748 stream. The yacc parser takes care of taking the ops we return and
4749 stitching them into a tree.
4752 The type of the next token
4755 Switch based on the current state:
4756 - if we already built the token before, use it
4757 - if we have a case modifier in a string, deal with that
4758 - handle other cases of interpolation inside a string
4759 - scan the next line if we are inside a format
4760 In the normal state switch on the next character:
4762 if alphabetic, go to key lookup
4763 unrecoginized character - croak
4764 - 0/4/26: handle end-of-line or EOF
4765 - cases for whitespace
4766 - \n and #: handle comments and line numbers
4767 - various operators, brackets and sigils
4770 - 'v': vstrings (or go to key lookup)
4771 - 'x' repetition operator (or go to key lookup)
4772 - other ASCII alphanumerics (key lookup begins here):
4775 scan built-in keyword (but do nothing with it yet)
4776 check for statement label
4777 check for lexical subs
4778 goto just_a_word if there is one
4779 see whether built-in keyword is overridden
4780 switch on keyword number:
4781 - default: just_a_word:
4782 not a built-in keyword; handle bareword lookup
4783 disambiguate between method and sub call
4784 fall back to bareword
4785 - cases for built-in keywords
4793 char *s = PL_bufptr;
4797 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4801 /* orig_keyword, gvp, and gv are initialized here because
4802 * jump to the label just_a_word_zero can bypass their
4803 * initialization later. */
4804 I32 orig_keyword = 0;
4809 SV* tmp = newSVpvs("");
4810 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4811 (IV)CopLINE(PL_curcop),
4812 lex_state_names[PL_lex_state],
4813 exp_name[PL_expect],
4814 pv_display(tmp, s, strlen(s), 0, 60));
4818 switch (PL_lex_state) {
4820 case LEX_INTERPNORMAL:
4823 /* when we've already built the next token, just pull it out of the queue */
4827 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4829 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4830 PL_nexttoke[PL_lasttoke].next_mad = 0;
4831 if (PL_thismad && PL_thismad->mad_key == '_') {
4832 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4833 PL_thismad->mad_val = 0;
4834 mad_free(PL_thismad);
4839 PL_lex_state = PL_lex_defer;
4840 PL_expect = PL_lex_expect;
4841 PL_lex_defer = LEX_NORMAL;
4842 if (!PL_nexttoke[PL_lasttoke].next_type)
4847 pl_yylval = PL_nextval[PL_nexttoke];
4849 PL_lex_state = PL_lex_defer;
4850 PL_expect = PL_lex_expect;
4851 PL_lex_defer = LEX_NORMAL;
4857 next_type = PL_nexttoke[PL_lasttoke].next_type;
4859 next_type = PL_nexttype[PL_nexttoke];
4861 if (next_type & (7<<24)) {
4862 if (next_type & (1<<24)) {
4863 if (PL_lex_brackets > 100)
4864 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4865 PL_lex_brackstack[PL_lex_brackets++] =
4866 (char) ((next_type >> 16) & 0xff);
4868 if (next_type & (2<<24))
4869 PL_lex_allbrackets++;
4870 if (next_type & (4<<24))
4871 PL_lex_allbrackets--;
4872 next_type &= 0xffff;
4874 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4877 /* interpolated case modifiers like \L \U, including \Q and \E.
4878 when we get here, PL_bufptr is at the \
4880 case LEX_INTERPCASEMOD:
4882 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4884 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4885 PL_bufptr, PL_bufend, *PL_bufptr);
4887 /* handle \E or end of string */
4888 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4890 if (PL_lex_casemods) {
4891 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4892 PL_lex_casestack[PL_lex_casemods] = '\0';
4894 if (PL_bufptr != PL_bufend
4895 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4896 || oldmod == 'F')) {
4898 PL_lex_state = LEX_INTERPCONCAT;
4901 PL_thistoken = newSVpvs("\\E");
4904 PL_lex_allbrackets--;
4907 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4908 /* Got an unpaired \E */
4909 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4910 "Useless use of \\E");
4913 while (PL_bufptr != PL_bufend &&
4914 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4917 PL_thiswhite = newSVpvs("");
4918 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4923 if (PL_bufptr != PL_bufend)
4926 PL_lex_state = LEX_INTERPCONCAT;
4930 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4931 "### Saw case modifier\n"); });
4933 if (s[1] == '\\' && s[2] == 'E') {
4937 PL_thiswhite = newSVpvs("");
4938 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4942 PL_lex_state = LEX_INTERPCONCAT;
4947 if (!PL_madskills) /* when just compiling don't need correct */
4948 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4949 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4950 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4951 (strchr(PL_lex_casestack, 'L')
4952 || strchr(PL_lex_casestack, 'U')
4953 || strchr(PL_lex_casestack, 'F'))) {
4954 PL_lex_casestack[--PL_lex_casemods] = '\0';
4955 PL_lex_allbrackets--;
4958 if (PL_lex_casemods > 10)
4959 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4960 PL_lex_casestack[PL_lex_casemods++] = *s;
4961 PL_lex_casestack[PL_lex_casemods] = '\0';
4962 PL_lex_state = LEX_INTERPCONCAT;
4963 start_force(PL_curforce);
4964 NEXTVAL_NEXTTOKE.ival = 0;
4965 force_next((2<<24)|'(');
4966 start_force(PL_curforce);
4968 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4970 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4972 NEXTVAL_NEXTTOKE.ival = OP_LC;
4974 NEXTVAL_NEXTTOKE.ival = OP_UC;
4976 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4978 NEXTVAL_NEXTTOKE.ival = OP_FC;
4980 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4982 SV* const tmpsv = newSVpvs("\\ ");
4983 /* replace the space with the character we want to escape
4985 SvPVX(tmpsv)[1] = *s;
4991 if (PL_lex_starts) {
4997 sv_free(PL_thistoken);
4998 PL_thistoken = newSVpvs("");
5001 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5002 if (PL_lex_casemods == 1 && PL_lex_inpat)
5011 case LEX_INTERPPUSH:
5012 return REPORT(sublex_push());
5014 case LEX_INTERPSTART:
5015 if (PL_bufptr == PL_bufend)
5016 return REPORT(sublex_done());
5017 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
5018 "### Interpolated variable\n"); });
5020 /* for /@a/, we leave the joining for the regex engine to do
5021 * (unless we're within \Q etc) */
5022 PL_lex_dojoin = (*PL_bufptr == '@'
5023 && (!PL_lex_inpat || PL_lex_casemods));
5024 PL_lex_state = LEX_INTERPNORMAL;
5025 if (PL_lex_dojoin) {
5026 start_force(PL_curforce);
5027 NEXTVAL_NEXTTOKE.ival = 0;
5029 start_force(PL_curforce);
5030 force_ident("\"", '$');
5031 start_force(PL_curforce);
5032 NEXTVAL_NEXTTOKE.ival = 0;
5034 start_force(PL_curforce);
5035 NEXTVAL_NEXTTOKE.ival = 0;
5036 force_next((2<<24)|'(');
5037 start_force(PL_curforce);
5038 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5041 /* Convert (?{...}) and friends to 'do {...}' */
5042 if (PL_lex_inpat && *PL_bufptr == '(') {
5043 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5045 if (*PL_bufptr != '{')
5047 start_force(PL_curforce);
5048 /* XXX probably need a CURMAD(something) here */
5049 PL_expect = XTERMBLOCK;
5053 if (PL_lex_starts++) {
5058 sv_free(PL_thistoken);
5059 PL_thistoken = newSVpvs("");
5062 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5063 if (!PL_lex_casemods && PL_lex_inpat)
5070 case LEX_INTERPENDMAYBE:
5071 if (intuit_more(PL_bufptr)) {
5072 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5078 if (PL_lex_dojoin) {
5079 const U8 dojoin_was = PL_lex_dojoin;
5080 PL_lex_dojoin = FALSE;
5081 PL_lex_state = LEX_INTERPCONCAT;
5085 sv_free(PL_thistoken);
5086 PL_thistoken = newSVpvs("");
5089 PL_lex_allbrackets--;
5090 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5092 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5093 && SvEVALED(PL_lex_repl))
5095 if (PL_bufptr != PL_bufend)
5096 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5099 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5100 re_eval_str. If the here-doc body’s length equals the previous
5101 value of re_eval_start, re_eval_start will now be null. So
5102 check re_eval_str as well. */
5103 if (PL_parser->lex_shared->re_eval_start
5104 || PL_parser->lex_shared->re_eval_str) {
5106 if (*PL_bufptr != ')')
5107 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5109 /* having compiled a (?{..}) expression, return the original
5110 * text too, as a const */
5111 if (PL_parser->lex_shared->re_eval_str) {
5112 sv = PL_parser->lex_shared->re_eval_str;
5113 PL_parser->lex_shared->re_eval_str = NULL;
5115 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5116 SvPV_shrink_to_cur(sv);
5118 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5119 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5120 start_force(PL_curforce);
5121 /* XXX probably need a CURMAD(something) here */
5122 NEXTVAL_NEXTTOKE.opval =
5123 (OP*)newSVOP(OP_CONST, 0,
5126 PL_parser->lex_shared->re_eval_start = NULL;
5132 case LEX_INTERPCONCAT:
5134 if (PL_lex_brackets)
5135 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5136 (long) PL_lex_brackets);
5138 if (PL_bufptr == PL_bufend)
5139 return REPORT(sublex_done());
5141 /* m'foo' still needs to be parsed for possible (?{...}) */
5142 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5143 SV *sv = newSVsv(PL_linestr);
5145 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5149 s = scan_const(PL_bufptr);
5151 PL_lex_state = LEX_INTERPCASEMOD;
5153 PL_lex_state = LEX_INTERPSTART;
5156 if (s != PL_bufptr) {
5157 start_force(PL_curforce);
5159 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5161 NEXTVAL_NEXTTOKE = pl_yylval;
5164 if (PL_lex_starts++) {
5168 sv_free(PL_thistoken);
5169 PL_thistoken = newSVpvs("");
5172 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5173 if (!PL_lex_casemods && PL_lex_inpat)
5186 s = scan_formline(PL_bufptr);
5187 if (!PL_lex_formbrack)
5196 /* We really do *not* want PL_linestr ever becoming a COW. */
5197 assert (!SvIsCOW(PL_linestr));
5199 PL_oldoldbufptr = PL_oldbufptr;
5201 PL_parser->saw_infix_sigil = 0;
5206 sv_free(PL_thistoken);
5209 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5213 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5216 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5217 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5219 SVs_TEMP | SVf_UTF8),
5220 10, UNI_DISPLAY_ISPRINT)
5221 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5222 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5223 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5224 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5228 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5229 UTF8fARG(UTF, (s - d), d),
5234 goto fake_eof; /* emulate EOF on ^D or ^Z */
5240 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5243 if (PL_lex_brackets &&
5244 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5245 yyerror((const char *)
5247 ? "Format not terminated"
5248 : "Missing right curly or square bracket"));
5250 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5251 "### Tokener got EOF\n");
5255 if (s++ < PL_bufend)
5256 goto retry; /* ignore stray nulls */
5259 if (!PL_in_eval && !PL_preambled) {
5260 PL_preambled = TRUE;
5266 /* Generate a string of Perl code to load the debugger.
5267 * If PERL5DB is set, it will return the contents of that,
5268 * otherwise a compile-time require of perl5db.pl. */
5270 const char * const pdb = PerlEnv_getenv("PERL5DB");
5273 sv_setpv(PL_linestr, pdb);
5274 sv_catpvs(PL_linestr,";");
5276 SETERRNO(0,SS_NORMAL);
5277 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5279 PL_parser->preambling = CopLINE(PL_curcop);
5281 sv_setpvs(PL_linestr,"");
5282 if (PL_preambleav) {
5283 SV **svp = AvARRAY(PL_preambleav);
5284 SV **const end = svp + AvFILLp(PL_preambleav);
5286 sv_catsv(PL_linestr, *svp);
5288 sv_catpvs(PL_linestr, ";");
5290 sv_free(MUTABLE_SV(PL_preambleav));
5291 PL_preambleav = NULL;
5294 sv_catpvs(PL_linestr,
5295 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5296 if (PL_minus_n || PL_minus_p) {
5297 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5299 sv_catpvs(PL_linestr,"chomp;");
5302 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5303 || *PL_splitstr == '"')
5304 && strchr(PL_splitstr + 1, *PL_splitstr))
5305 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5307 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5308 bytes can be used as quoting characters. :-) */
5309 const char *splits = PL_splitstr;
5310 sv_catpvs(PL_linestr, "our @F=split(q\0");
5313 if (*splits == '\\')
5314 sv_catpvn(PL_linestr, splits, 1);
5315 sv_catpvn(PL_linestr, splits, 1);
5316 } while (*splits++);
5317 /* This loop will embed the trailing NUL of
5318 PL_linestr as the last thing it does before
5320 sv_catpvs(PL_linestr, ");");
5324 sv_catpvs(PL_linestr,"our @F=split(' ');");
5327 sv_catpvs(PL_linestr, "\n");
5328 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5329 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5330 PL_last_lop = PL_last_uni = NULL;
5331 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5332 update_debugger_info(PL_linestr, NULL, 0);
5337 bof = PL_rsfp ? TRUE : FALSE;
5340 fake_eof = LEX_FAKE_EOF;
5342 PL_bufptr = PL_bufend;
5343 COPLINE_INC_WITH_HERELINES;
5344 if (!lex_next_chunk(fake_eof)) {
5345 CopLINE_dec(PL_curcop);
5347 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5349 CopLINE_dec(PL_curcop);
5352 PL_realtokenstart = -1;
5355 /* If it looks like the start of a BOM or raw UTF-16,
5356 * check if it in fact is. */
5357 if (bof && PL_rsfp &&
5359 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5362 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5363 bof = (offset == (Off_t)SvCUR(PL_linestr));
5364 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5365 /* offset may include swallowed CR */
5367 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5370 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5371 s = swallow_bom((U8*)s);
5374 if (PL_parser->in_pod) {
5375 /* Incest with pod. */
5378 sv_catsv(PL_thiswhite, PL_linestr);
5380 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5381 sv_setpvs(PL_linestr, "");
5382 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5383 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5384 PL_last_lop = PL_last_uni = NULL;
5385 PL_parser->in_pod = 0;
5388 if (PL_rsfp || PL_parser->filtered)
5390 } while (PL_parser->in_pod);
5391 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5392 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5393 PL_last_lop = PL_last_uni = NULL;
5394 if (CopLINE(PL_curcop) == 1) {
5395 while (s < PL_bufend && isSPACE(*s))
5397 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5401 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5405 if (*s == '#' && *(s+1) == '!')
5407 #ifdef ALTERNATE_SHEBANG
5409 static char const as[] = ALTERNATE_SHEBANG;
5410 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5411 d = s + (sizeof(as) - 1);
5413 #endif /* ALTERNATE_SHEBANG */
5422 while (*d && !isSPACE(*d))
5426 #ifdef ARG_ZERO_IS_SCRIPT
5427 if (ipathend > ipath) {
5429 * HP-UX (at least) sets argv[0] to the script name,
5430 * which makes $^X incorrect. And Digital UNIX and Linux,
5431 * at least, set argv[0] to the basename of the Perl
5432 * interpreter. So, having found "#!", we'll set it right.
5434 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5436 assert(SvPOK(x) || SvGMAGICAL(x));
5437 if (sv_eq(x, CopFILESV(PL_curcop))) {
5438 sv_setpvn(x, ipath, ipathend - ipath);
5444 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5445 const char * const lstart = SvPV_const(x,llen);
5447 bstart += blen - llen;
5448 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5449 sv_setpvn(x, ipath, ipathend - ipath);
5454 TAINT_NOT; /* $^X is always tainted, but that's OK */
5456 #endif /* ARG_ZERO_IS_SCRIPT */
5461 d = instr(s,"perl -");
5463 d = instr(s,"perl");
5465 /* avoid getting into infinite loops when shebang
5466 * line contains "Perl" rather than "perl" */
5468 for (d = ipathend-4; d >= ipath; --d) {
5469 if ((*d == 'p' || *d == 'P')
5470 && !ibcmp(d, "perl", 4))
5480 #ifdef ALTERNATE_SHEBANG
5482 * If the ALTERNATE_SHEBANG on this system starts with a
5483 * character that can be part of a Perl expression, then if
5484 * we see it but not "perl", we're probably looking at the
5485 * start of Perl code, not a request to hand off to some
5486 * other interpreter. Similarly, if "perl" is there, but
5487 * not in the first 'word' of the line, we assume the line
5488 * contains the start of the Perl program.
5490 if (d && *s != '#') {
5491 const char *c = ipath;
5492 while (*c && !strchr("; \t\r\n\f\v#", *c))
5495 d = NULL; /* "perl" not in first word; ignore */
5497 *s = '#'; /* Don't try to parse shebang line */
5499 #endif /* ALTERNATE_SHEBANG */
5504 !instr(s,"indir") &&
5505 instr(PL_origargv[0],"perl"))
5512 while (s < PL_bufend && isSPACE(*s))
5514 if (s < PL_bufend) {
5515 Newx(newargv,PL_origargc+3,char*);
5517 while (s < PL_bufend && !isSPACE(*s))
5520 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5523 newargv = PL_origargv;
5526 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5528 Perl_croak(aTHX_ "Can't exec %s", ipath);
5531 while (*d && !isSPACE(*d))
5533 while (SPACE_OR_TAB(*d))
5537 const bool switches_done = PL_doswitches;
5538 const U32 oldpdb = PL_perldb;
5539 const bool oldn = PL_minus_n;
5540 const bool oldp = PL_minus_p;
5544 bool baduni = FALSE;
5546 const char *d2 = d1 + 1;
5547 if (parse_unicode_opts((const char **)&d2)
5551 if (baduni || *d1 == 'M' || *d1 == 'm') {
5552 const char * const m = d1;
5553 while (*d1 && !isSPACE(*d1))
5555 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5558 d1 = moreswitches(d1);
5560 if (PL_doswitches && !switches_done) {
5561 int argc = PL_origargc;
5562 char **argv = PL_origargv;
5565 } while (argc && argv[0][0] == '-' && argv[0][1]);
5566 init_argv_symbols(argc,argv);
5568 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5569 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5570 /* if we have already added "LINE: while (<>) {",
5571 we must not do it again */
5573 sv_setpvs(PL_linestr, "");
5574 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5575 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5576 PL_last_lop = PL_last_uni = NULL;
5577 PL_preambled = FALSE;
5578 if (PERLDB_LINE || PERLDB_SAVESRC)
5579 (void)gv_fetchfile(PL_origfilename);
5586 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5587 PL_lex_state = LEX_FORMLINE;
5588 start_force(PL_curforce);
5589 NEXTVAL_NEXTTOKE.ival = 0;
5590 force_next(FORMRBRACK);
5595 #ifdef PERL_STRICT_CR
5596 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5598 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5600 case ' ': case '\t': case '\f': case 013:
5602 PL_realtokenstart = -1;
5605 PL_thiswhite = newSVpvs("");
5606 sv_catpvn(PL_thiswhite, s, 1);
5614 PL_realtokenstart = -1;
5618 if (PL_lex_state != LEX_NORMAL ||
5619 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5620 if (*s == '#' && s == PL_linestart && PL_in_eval
5621 && !PL_rsfp && !PL_parser->filtered) {
5622 /* handle eval qq[#line 1 "foo"\n ...] */
5623 CopLINE_dec(PL_curcop);
5626 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5628 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5632 const bool in_comment = *s == '#';
5634 while (d < PL_bufend && *d != '\n')
5638 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5639 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5643 PL_thiswhite = newSVpvn(s, d - s);
5646 if (in_comment && d == PL_bufend
5647 && PL_lex_state == LEX_INTERPNORMAL
5648 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5649 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5652 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5653 PL_lex_state = LEX_FORMLINE;
5654 start_force(PL_curforce);
5655 NEXTVAL_NEXTTOKE.ival = 0;
5656 force_next(FORMRBRACK);
5662 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5663 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5666 TOKEN(PEG); /* make sure any #! line is accessible */
5672 if (PL_madskills) d = s;
5673 while (s < PL_bufend && *s != '\n')
5681 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5682 Perl_croak(aTHX_ "panic: input overflow");
5684 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5686 PL_thiswhite = newSVpvs("");
5687 if (CopLINE(PL_curcop) == 1) {
5688 sv_setpvs(PL_thiswhite, "");
5691 sv_catpvn(PL_thiswhite, d, s - d);
5698 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5706 while (s < PL_bufend && SPACE_OR_TAB(*s))
5709 if (strnEQ(s,"=>",2)) {
5710 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5711 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5712 OPERATOR('-'); /* unary minus */
5715 case 'r': ftst = OP_FTEREAD; break;
5716 case 'w': ftst = OP_FTEWRITE; break;
5717 case 'x': ftst = OP_FTEEXEC; break;
5718 case 'o': ftst = OP_FTEOWNED; break;
5719 case 'R': ftst = OP_FTRREAD; break;
5720 case 'W': ftst = OP_FTRWRITE; break;
5721 case 'X': ftst = OP_FTREXEC; break;
5722 case 'O': ftst = OP_FTROWNED; break;
5723 case 'e': ftst = OP_FTIS; break;
5724 case 'z': ftst = OP_FTZERO; break;
5725 case 's': ftst = OP_FTSIZE; break;
5726 case 'f': ftst = OP_FTFILE; break;
5727 case 'd': ftst = OP_FTDIR; break;
5728 case 'l': ftst = OP_FTLINK; break;
5729 case 'p': ftst = OP_FTPIPE; break;
5730 case 'S': ftst = OP_FTSOCK; break;
5731 case 'u': ftst = OP_FTSUID; break;
5732 case 'g': ftst = OP_FTSGID; break;
5733 case 'k': ftst = OP_FTSVTX; break;
5734 case 'b': ftst = OP_FTBLK; break;
5735 case 'c': ftst = OP_FTCHR; break;
5736 case 't': ftst = OP_FTTTY; break;
5737 case 'T': ftst = OP_FTTEXT; break;
5738 case 'B': ftst = OP_FTBINARY; break;
5739 case 'M': case 'A': case 'C':
5740 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5742 case 'M': ftst = OP_FTMTIME; break;
5743 case 'A': ftst = OP_FTATIME; break;
5744 case 'C': ftst = OP_FTCTIME; break;
5752 PL_last_uni = PL_oldbufptr;
5753 PL_last_lop_op = (OPCODE)ftst;
5754 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5755 "### Saw file test %c\n", (int)tmp);
5760 /* Assume it was a minus followed by a one-letter named
5761 * subroutine call (or a -bareword), then. */
5762 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5763 "### '-%c' looked like a file test but was not\n",
5770 const char tmp = *s++;
5773 if (PL_expect == XOPERATOR)
5778 else if (*s == '>') {
5781 if (FEATURE_POSTDEREF_IS_ENABLED && (
5782 ((*s == '$' || *s == '&') && s[1] == '*')
5783 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5784 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5787 Perl_ck_warner_d(aTHX_
5788 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5789 "Postfix dereference is experimental"
5791 PL_expect = XPOSTDEREF;
5794 if (isIDFIRST_lazy_if(s,UTF)) {
5795 s = force_word(s,METHOD,FALSE,TRUE);
5803 if (PL_expect == XOPERATOR) {
5804 if (*s == '=' && !PL_lex_allbrackets &&
5805 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5812 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5814 OPERATOR('-'); /* unary minus */
5820 const char tmp = *s++;
5823 if (PL_expect == XOPERATOR)
5828 if (PL_expect == XOPERATOR) {
5829 if (*s == '=' && !PL_lex_allbrackets &&
5830 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5837 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5844 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5845 if (PL_expect != XOPERATOR) {
5846 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5847 PL_expect = XOPERATOR;
5848 force_ident(PL_tokenbuf, '*');
5856 if (*s == '=' && !PL_lex_allbrackets &&
5857 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5863 if (*s == '=' && !PL_lex_allbrackets &&
5864 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5868 PL_parser->saw_infix_sigil = 1;
5873 if (PL_expect == XOPERATOR) {
5874 if (s[1] == '=' && !PL_lex_allbrackets &&
5875 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5878 PL_parser->saw_infix_sigil = 1;
5881 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5882 PL_tokenbuf[0] = '%';
5883 s = scan_ident(s, PL_tokenbuf + 1,
5884 sizeof PL_tokenbuf - 1, FALSE);
5886 if (!PL_tokenbuf[1]) {
5889 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5891 PL_tokenbuf[0] = '@';
5893 /* Warn about % where they meant $. */
5894 if (*s == '[' || *s == '{') {
5895 if (ckWARN(WARN_SYNTAX)) {
5896 S_check_scalar_slice(aTHX_ s);
5900 PL_expect = XOPERATOR;
5901 force_ident_maybe_lex('%');
5905 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5906 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5911 if (PL_lex_brackets > 100)
5912 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5913 PL_lex_brackstack[PL_lex_brackets++] = 0;
5914 PL_lex_allbrackets++;
5916 const char tmp = *s++;
5921 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5923 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5926 Perl_ck_warner_d(aTHX_
5927 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5928 "Smartmatch is experimental");
5934 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5941 goto just_a_word_zero_gv;
5944 switch (PL_expect) {
5950 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5952 PL_bufptr = s; /* update in case we back off */
5955 "Use of := for an empty attribute list is not allowed");
5962 PL_expect = XTERMBLOCK;
5965 stuffstart = s - SvPVX(PL_linestr) - 1;
5969 while (isIDFIRST_lazy_if(s,UTF)) {
5972 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5973 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5974 if (tmp < 0) tmp = -tmp;
5989 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5991 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5992 COPLINE_SET_FROM_MULTI_END;
5994 /* MUST advance bufptr here to avoid bogus
5995 "at end of line" context messages from yyerror().
5997 PL_bufptr = s + len;
5998 yyerror("Unterminated attribute parameter in attribute list");
6002 return REPORT(0); /* EOF indicator */
6006 sv_catsv(sv, PL_lex_stuff);
6007 attrs = op_append_elem(OP_LIST, attrs,
6008 newSVOP(OP_CONST, 0, sv));
6009 SvREFCNT_dec(PL_lex_stuff);
6010 PL_lex_stuff = NULL;
6013 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
6015 if (PL_in_my == KEY_our) {
6016 deprecate(":unique");
6019 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
6022 /* NOTE: any CV attrs applied here need to be part of
6023 the CVf_BUILTIN_ATTRS define in cv.h! */
6024 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
6026 CvLVALUE_on(PL_compcv);
6028 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
6030 deprecate(":locked");
6032 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
6034 CvMETHOD_on(PL_compcv);
6036 /* After we've set the flags, it could be argued that
6037 we don't need to do the attributes.pm-based setting
6038 process, and shouldn't bother appending recognized
6039 flags. To experiment with that, uncomment the
6040 following "else". (Note that's already been
6041 uncommented. That keeps the above-applied built-in
6042 attributes from being intercepted (and possibly
6043 rejected) by a package's attribute routines, but is
6044 justified by the performance win for the common case
6045 of applying only built-in attributes.) */
6047 attrs = op_append_elem(OP_LIST, attrs,
6048 newSVOP(OP_CONST, 0,
6052 if (*s == ':' && s[1] != ':')
6055 break; /* require real whitespace or :'s */
6056 /* XXX losing whitespace on sequential attributes here */
6060 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
6061 if (*s != ';' && *s != '}' && *s != tmp
6062 && (tmp != '=' || *s != ')')) {
6063 const char q = ((*s == '\'') ? '"' : '\'');
6064 /* If here for an expression, and parsed no attrs, back
6066 if (tmp == '=' && !attrs) {
6070 /* MUST advance bufptr here to avoid bogus "at end of line"
6071 context messages from yyerror().
6074 yyerror( (const char *)
6076 ? Perl_form(aTHX_ "Invalid separator character "
6077 "%c%c%c in attribute list", q, *s, q)
6078 : "Unterminated attribute list" ) );
6086 start_force(PL_curforce);
6087 NEXTVAL_NEXTTOKE.opval = attrs;
6088 CURMAD('_', PL_nextwhite);
6093 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6094 (s - SvPVX(PL_linestr)) - stuffstart);
6099 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6103 PL_lex_allbrackets--;
6107 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6108 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6112 PL_lex_allbrackets++;
6115 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6121 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6124 PL_lex_allbrackets--;
6130 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6133 if (PL_lex_brackets <= 0)
6134 yyerror("Unmatched right square bracket");
6137 PL_lex_allbrackets--;
6138 if (PL_lex_state == LEX_INTERPNORMAL) {
6139 if (PL_lex_brackets == 0) {
6140 if (*s == '-' && s[1] == '>')
6141 PL_lex_state = LEX_INTERPENDMAYBE;
6142 else if (*s != '[' && *s != '{')
6143 PL_lex_state = LEX_INTERPEND;
6150 if (PL_lex_brackets > 100) {
6151 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6153 switch (PL_expect) {
6155 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6156 PL_lex_allbrackets++;
6157 OPERATOR(HASHBRACK);
6159 while (s < PL_bufend && SPACE_OR_TAB(*s))
6162 PL_tokenbuf[0] = '\0';
6163 if (d < PL_bufend && *d == '-') {
6164 PL_tokenbuf[0] = '-';
6166 while (d < PL_bufend && SPACE_OR_TAB(*d))
6169 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6170 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6172 while (d < PL_bufend && SPACE_OR_TAB(*d))
6175 const char minus = (PL_tokenbuf[0] == '-');
6176 s = force_word(s + minus, WORD, FALSE, TRUE);
6184 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6185 PL_lex_allbrackets++;
6190 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6191 PL_lex_allbrackets++;
6196 if (PL_oldoldbufptr == PL_last_lop)
6197 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6199 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6200 PL_lex_allbrackets++;
6203 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6205 /* This hack is to get the ${} in the message. */
6207 yyerror("syntax error");
6210 OPERATOR(HASHBRACK);
6212 /* This hack serves to disambiguate a pair of curlies
6213 * as being a block or an anon hash. Normally, expectation
6214 * determines that, but in cases where we're not in a
6215 * position to expect anything in particular (like inside
6216 * eval"") we have to resolve the ambiguity. This code
6217 * covers the case where the first term in the curlies is a
6218 * quoted string. Most other cases need to be explicitly
6219 * disambiguated by prepending a "+" before the opening
6220 * curly in order to force resolution as an anon hash.
6222 * XXX should probably propagate the outer expectation
6223 * into eval"" to rely less on this hack, but that could
6224 * potentially break current behavior of eval"".
6228 if (*s == '\'' || *s == '"' || *s == '`') {
6229 /* common case: get past first string, handling escapes */
6230 for (t++; t < PL_bufend && *t != *s;)
6231 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6235 else if (*s == 'q') {
6238 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6239 && !isWORDCHAR(*t))))
6241 /* skip q//-like construct */
6243 char open, close, term;
6246 while (t < PL_bufend && isSPACE(*t))
6248 /* check for q => */
6249 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6250 OPERATOR(HASHBRACK);
6254 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6258 for (t++; t < PL_bufend; t++) {
6259 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6261 else if (*t == open)
6265 for (t++; t < PL_bufend; t++) {
6266 if (*t == '\\' && t+1 < PL_bufend)
6268 else if (*t == close && --brackets <= 0)
6270 else if (*t == open)
6277 /* skip plain q word */
6278 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6281 else if (isWORDCHAR_lazy_if(t,UTF)) {
6283 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6286 while (t < PL_bufend && isSPACE(*t))
6288 /* if comma follows first term, call it an anon hash */
6289 /* XXX it could be a comma expression with loop modifiers */
6290 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6291 || (*t == '=' && t[1] == '>')))
6292 OPERATOR(HASHBRACK);
6293 if (PL_expect == XREF)
6296 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6302 pl_yylval.ival = CopLINE(PL_curcop);
6303 if (isSPACE(*s) || *s == '#')
6304 PL_copline = NOLINE; /* invalidate current command line number */
6305 TOKEN(formbrack ? '=' : '{');
6307 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6311 if (PL_lex_brackets <= 0)
6312 yyerror("Unmatched right curly bracket");
6314 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6315 PL_lex_allbrackets--;
6316 if (PL_lex_state == LEX_INTERPNORMAL) {
6317 if (PL_lex_brackets == 0) {
6318 if (PL_expect & XFAKEBRACK) {
6319 PL_expect &= XENUMMASK;
6320 PL_lex_state = LEX_INTERPEND;
6325 PL_thiswhite = newSVpvs("");
6326 sv_catpvs(PL_thiswhite,"}");
6329 return yylex(); /* ignore fake brackets */
6331 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6332 && SvEVALED(PL_lex_repl))
6333 PL_lex_state = LEX_INTERPEND;
6334 else if (*s == '-' && s[1] == '>')
6335 PL_lex_state = LEX_INTERPENDMAYBE;
6336 else if (*s != '[' && *s != '{')
6337 PL_lex_state = LEX_INTERPEND;
6340 if (PL_expect & XFAKEBRACK) {
6341 PL_expect &= XENUMMASK;
6343 return yylex(); /* ignore fake brackets */
6345 start_force(PL_curforce);
6347 curmad('X', newSVpvn(s-1,1));
6348 CURMAD('_', PL_thiswhite);
6350 force_next(formbrack ? '.' : '}');
6351 if (formbrack) LEAVE;
6353 if (PL_madskills && !PL_thistoken)
6354 PL_thistoken = newSVpvs("");
6356 if (formbrack == 2) { /* means . where arguments were expected */
6357 start_force(PL_curforce);
6363 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6366 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6367 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6374 if (PL_expect == XOPERATOR) {
6375 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6376 && isIDFIRST_lazy_if(s,UTF))
6378 CopLINE_dec(PL_curcop);
6379 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6380 CopLINE_inc(PL_curcop);
6382 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6383 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6387 PL_parser->saw_infix_sigil = 1;
6391 PL_tokenbuf[0] = '&';
6392 s = scan_ident(s - 1, PL_tokenbuf + 1,
6393 sizeof PL_tokenbuf - 1, TRUE);
6394 if (PL_tokenbuf[1]) {
6395 PL_expect = XOPERATOR;
6396 force_ident_maybe_lex('&');
6400 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6406 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6407 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6414 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6415 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6423 const char tmp = *s++;
6425 if (!PL_lex_allbrackets &&
6426 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6433 if (!PL_lex_allbrackets &&
6434 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6442 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6443 && strchr("+-*/%.^&|<",tmp))
6444 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6445 "Reversed %c= operator",(int)tmp);
6447 if (PL_expect == XSTATE && isALPHA(tmp) &&
6448 (s == PL_linestart+1 || s[-2] == '\n') )
6450 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6451 || PL_lex_state != LEX_NORMAL) {
6456 if (strnEQ(s,"=cut",4)) {
6472 PL_thiswhite = newSVpvs("");
6473 sv_catpvn(PL_thiswhite, PL_linestart,
6474 PL_bufend - PL_linestart);
6478 PL_parser->in_pod = 1;
6482 if (PL_expect == XBLOCK) {
6484 #ifdef PERL_STRICT_CR
6485 while (SPACE_OR_TAB(*t))
6487 while (SPACE_OR_TAB(*t) || *t == '\r')
6490 if (*t == '\n' || *t == '#') {
6493 SAVEI8(PL_parser->form_lex_state);
6494 SAVEI32(PL_lex_formbrack);
6495 PL_parser->form_lex_state = PL_lex_state;
6496 PL_lex_formbrack = PL_lex_brackets + 1;
6500 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6509 const char tmp = *s++;
6511 /* was this !=~ where !~ was meant?
6512 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6514 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6515 const char *t = s+1;
6517 while (t < PL_bufend && isSPACE(*t))
6520 if (*t == '/' || *t == '?' ||
6521 ((*t == 'm' || *t == 's' || *t == 'y')
6522 && !isWORDCHAR(t[1])) ||
6523 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6524 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6525 "!=~ should be !~");
6527 if (!PL_lex_allbrackets &&
6528 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6540 if (PL_expect != XOPERATOR) {
6541 if (s[1] != '<' && !strchr(s,'>'))
6544 s = scan_heredoc(s);
6546 s = scan_inputsymbol(s);
6547 PL_expect = XOPERATOR;
6548 TOKEN(sublex_start());
6554 if (*s == '=' && !PL_lex_allbrackets &&
6555 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6559 SHop(OP_LEFT_SHIFT);
6564 if (!PL_lex_allbrackets &&
6565 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6572 if (!PL_lex_allbrackets &&
6573 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6581 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6589 const char tmp = *s++;
6591 if (*s == '=' && !PL_lex_allbrackets &&
6592 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6596 SHop(OP_RIGHT_SHIFT);
6598 else if (tmp == '=') {
6599 if (!PL_lex_allbrackets &&
6600 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6608 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6617 if (PL_expect == XOPERATOR) {
6618 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6619 return deprecate_commaless_var_list();
6622 else if (PL_expect == XPOSTDEREF) POSTDEREF('$');
6624 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6625 PL_tokenbuf[0] = '@';
6626 s = scan_ident(s + 1, PL_tokenbuf + 1,
6627 sizeof PL_tokenbuf - 1, FALSE);
6628 if (PL_expect == XOPERATOR)
6629 no_op("Array length", s);
6630 if (!PL_tokenbuf[1])
6632 PL_expect = XOPERATOR;
6633 force_ident_maybe_lex('#');
6637 PL_tokenbuf[0] = '$';
6638 s = scan_ident(s, PL_tokenbuf + 1,
6639 sizeof PL_tokenbuf - 1, FALSE);
6640 if (PL_expect == XOPERATOR)
6642 if (!PL_tokenbuf[1]) {
6644 yyerror("Final $ should be \\$ or $name");
6650 const char tmp = *s;
6651 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6654 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6655 && intuit_more(s)) {
6657 PL_tokenbuf[0] = '@';
6658 if (ckWARN(WARN_SYNTAX)) {
6661 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6664 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6665 while (t < PL_bufend && *t != ']')
6667 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6668 "Multidimensional syntax %.*s not supported",
6669 (int)((t - PL_bufptr) + 1), PL_bufptr);
6673 else if (*s == '{') {
6675 PL_tokenbuf[0] = '%';
6676 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6677 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6679 char tmpbuf[sizeof PL_tokenbuf];
6682 } while (isSPACE(*t));
6683 if (isIDFIRST_lazy_if(t,UTF)) {
6685 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6690 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6691 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6692 "You need to quote \"%"UTF8f"\"",
6693 UTF8fARG(UTF, len, tmpbuf));
6699 PL_expect = XOPERATOR;
6700 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6701 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6702 if (!islop || PL_last_lop_op == OP_GREPSTART)
6703 PL_expect = XOPERATOR;
6704 else if (strchr("$@\"'`q", *s))
6705 PL_expect = XTERM; /* e.g. print $fh "foo" */
6706 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6707 PL_expect = XTERM; /* e.g. print $fh &sub */
6708 else if (isIDFIRST_lazy_if(s,UTF)) {
6709 char tmpbuf[sizeof PL_tokenbuf];
6711 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6712 if ((t2 = keyword(tmpbuf, len, 0))) {
6713 /* binary operators exclude handle interpretations */
6725 PL_expect = XTERM; /* e.g. print $fh length() */
6730 PL_expect = XTERM; /* e.g. print $fh subr() */
6733 else if (isDIGIT(*s))
6734 PL_expect = XTERM; /* e.g. print $fh 3 */
6735 else if (*s == '.' && isDIGIT(s[1]))
6736 PL_expect = XTERM; /* e.g. print $fh .3 */
6737 else if ((*s == '?' || *s == '-' || *s == '+')
6738 && !isSPACE(s[1]) && s[1] != '=')
6739 PL_expect = XTERM; /* e.g. print $fh -1 */
6740 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6742 PL_expect = XTERM; /* e.g. print $fh /.../
6743 XXX except DORDOR operator
6745 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6747 PL_expect = XTERM; /* print $fh <<"EOF" */
6750 force_ident_maybe_lex('$');
6754 if (PL_expect == XOPERATOR)
6756 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6757 PL_tokenbuf[0] = '@';
6758 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6760 if (!PL_tokenbuf[1]) {
6763 if (PL_lex_state == LEX_NORMAL)
6765 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6767 PL_tokenbuf[0] = '%';
6769 /* Warn about @ where they meant $. */
6770 if (*s == '[' || *s == '{') {
6771 if (ckWARN(WARN_SYNTAX)) {
6772 S_check_scalar_slice(aTHX_ s);
6776 PL_expect = XOPERATOR;
6777 force_ident_maybe_lex('@');
6780 case '/': /* may be division, defined-or, or pattern */
6781 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6782 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6783 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6788 case '?': /* may either be conditional or pattern */
6789 if (PL_expect == XOPERATOR) {
6792 if (!PL_lex_allbrackets &&
6793 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6797 PL_lex_allbrackets++;
6803 /* A // operator. */
6804 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6805 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6806 LEX_FAKEEOF_LOGIC)) {
6814 if (*s == '=' && !PL_lex_allbrackets &&
6815 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6824 /* Disable warning on "study /blah/" */
6825 if (PL_oldoldbufptr == PL_last_uni
6826 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6827 || memNE(PL_last_uni, "study", 5)
6828 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6832 deprecate("?PATTERN? without explicit operator");
6833 s = scan_pat(s,OP_MATCH);
6834 TERM(sublex_start());
6838 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6839 #ifdef PERL_STRICT_CR
6842 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6844 && (s == PL_linestart || s[-1] == '\n') )
6847 formbrack = 2; /* dot seen where arguments expected */
6850 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6854 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6857 if (!PL_lex_allbrackets &&
6858 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6865 pl_yylval.ival = OPf_SPECIAL;
6871 if (*s == '=' && !PL_lex_allbrackets &&
6872 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6879 case '0': case '1': case '2': case '3': case '4':
6880 case '5': case '6': case '7': case '8': case '9':
6881 s = scan_num(s, &pl_yylval);
6882 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6883 if (PL_expect == XOPERATOR)
6888 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6889 COPLINE_SET_FROM_MULTI_END;
6890 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6891 if (PL_expect == XOPERATOR) {
6892 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6893 return deprecate_commaless_var_list();
6900 pl_yylval.ival = OP_CONST;
6901 TERM(sublex_start());
6904 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6907 printbuf("### Saw string before %s\n", s);
6909 PerlIO_printf(Perl_debug_log,
6910 "### Saw unterminated string\n");
6912 if (PL_expect == XOPERATOR) {
6913 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6914 return deprecate_commaless_var_list();
6921 pl_yylval.ival = OP_CONST;
6922 /* FIXME. I think that this can be const if char *d is replaced by
6923 more localised variables. */
6924 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6925 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6926 pl_yylval.ival = OP_STRINGIFY;
6930 if (pl_yylval.ival == OP_CONST)
6931 COPLINE_SET_FROM_MULTI_END;
6932 TERM(sublex_start());
6935 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6936 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6937 if (PL_expect == XOPERATOR)
6938 no_op("Backticks",s);
6941 readpipe_override();
6942 TERM(sublex_start());
6946 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6948 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6950 if (PL_expect == XOPERATOR)
6951 no_op("Backslash",s);
6955 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6956 char *start = s + 2;
6957 while (isDIGIT(*start) || *start == '_')
6959 if (*start == '.' && isDIGIT(start[1])) {
6960 s = scan_num(s, &pl_yylval);
6963 else if ((*start == ':' && start[1] == ':')
6964 || (PL_expect == XSTATE && *start == ':'))
6966 else if (PL_expect == XSTATE) {
6968 while (d < PL_bufend && isSPACE(*d)) d++;
6969 if (*d == ':') goto keylookup;
6971 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6972 if (!isALPHA(*start) && (PL_expect == XTERM
6973 || PL_expect == XREF || PL_expect == XSTATE
6974 || PL_expect == XTERMORDORDOR)) {
6975 GV *const gv = gv_fetchpvn_flags(s, start - s,
6976 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6978 s = scan_num(s, &pl_yylval);
6985 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7038 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7040 /* Some keywords can be followed by any delimiter, including ':' */
7041 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7043 /* x::* is just a word, unless x is "CORE" */
7044 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
7048 while (d < PL_bufend && isSPACE(*d))
7049 d++; /* no comments skipped here, or s### is misparsed */
7051 /* Is this a word before a => operator? */
7052 if (*d == '=' && d[1] == '>') {
7056 = (OP*)newSVOP(OP_CONST, 0,
7057 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7058 pl_yylval.opval->op_private = OPpCONST_BARE;
7062 /* Check for plugged-in keyword */
7066 char *saved_bufptr = PL_bufptr;
7068 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7070 if (result == KEYWORD_PLUGIN_DECLINE) {
7071 /* not a plugged-in keyword */
7072 PL_bufptr = saved_bufptr;
7073 } else if (result == KEYWORD_PLUGIN_STMT) {
7074 pl_yylval.opval = o;
7077 return REPORT(PLUGSTMT);
7078 } else if (result == KEYWORD_PLUGIN_EXPR) {
7079 pl_yylval.opval = o;
7081 PL_expect = XOPERATOR;
7082 return REPORT(PLUGEXPR);
7084 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7089 /* Check for built-in keyword */
7090 tmp = keyword(PL_tokenbuf, len, 0);
7092 /* Is this a label? */
7093 if (!anydelim && PL_expect == XSTATE
7094 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7096 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7097 pl_yylval.pval[len] = '\0';
7098 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7103 /* Check for lexical sub */
7104 if (PL_expect != XOPERATOR) {
7105 char tmpbuf[sizeof PL_tokenbuf + 1];
7107 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7108 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7109 if (off != NOT_IN_PAD) {
7110 assert(off); /* we assume this is boolean-true below */
7111 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7112 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7113 HEK * const stashname = HvNAME_HEK(stash);
7114 sv = newSVhek(stashname);
7115 sv_catpvs(sv, "::");
7116 sv_catpvn_flags(sv, PL_tokenbuf, len,
7117 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7118 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7128 rv2cv_op = newOP(OP_PADANY, 0);
7129 rv2cv_op->op_targ = off;
7130 cv = find_lexical_cv(off);
7138 if (tmp < 0) { /* second-class keyword? */
7139 GV *ogv = NULL; /* override (winner) */
7140 GV *hgv = NULL; /* hidden (loser) */
7141 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7143 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7144 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7147 if (GvIMPORTED_CV(gv))
7149 else if (! CvMETHOD(cv))
7153 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7154 UTF ? -(I32)len : (I32)len, FALSE)) &&
7155 (gv = *gvp) && isGV_with_GP(gv) &&
7156 GvCVu(gv) && GvIMPORTED_CV(gv))
7163 tmp = 0; /* overridden by import or by GLOBAL */
7166 && -tmp==KEY_lock /* XXX generalizable kludge */
7169 tmp = 0; /* any sub overrides "weak" keyword */
7171 else { /* no override */
7173 if (tmp == KEY_dump) {
7174 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7175 "dump() better written as CORE::dump()");
7179 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
7180 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7181 "Ambiguous call resolved as CORE::%s(), "
7182 "qualify as such or use &",
7187 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7188 && (!anydelim || *s != '#')) {
7189 /* no override, and not s### either; skipspace is safe here
7190 * check for => on following line */
7192 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7193 STRLEN soff = s - SvPVX(PL_linestr);
7194 s = skipspace_flags(s, LEX_NO_INCLINE);
7195 arrow = *s == '=' && s[1] == '>';
7196 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7197 s = SvPVX(PL_linestr) + soff;
7205 default: /* not a keyword */
7206 /* Trade off - by using this evil construction we can pull the
7207 variable gv into the block labelled keylookup. If not, then
7208 we have to give it function scope so that the goto from the
7209 earlier ':' case doesn't bypass the initialisation. */
7211 just_a_word_zero_gv:
7223 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7224 const char penultchar =
7225 lastchar && PL_bufptr - 2 >= PL_linestart
7229 SV *nextPL_nextwhite = 0;
7233 /* Get the rest if it looks like a package qualifier */
7235 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7237 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7240 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7241 UTF8fARG(UTF, len, PL_tokenbuf),
7242 *s == '\'' ? "'" : "::");
7247 if (PL_expect == XOPERATOR) {
7248 if (PL_bufptr == PL_linestart) {
7249 CopLINE_dec(PL_curcop);
7250 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7251 CopLINE_inc(PL_curcop);
7254 no_op("Bareword",s);
7257 /* Look for a subroutine with this name in current package,
7258 unless this is a lexical sub, or name is "Foo::",
7259 in which case Foo is a bareword
7260 (and a package name). */
7262 if (len > 2 && !PL_madskills &&
7263 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7265 if (ckWARN(WARN_BAREWORD)
7266 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7267 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7268 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7269 UTF8fARG(UTF, len, PL_tokenbuf));
7271 PL_tokenbuf[len] = '\0';
7277 /* Mustn't actually add anything to a symbol table.
7278 But also don't want to "initialise" any placeholder
7279 constants that might already be there into full
7280 blown PVGVs with attached PVCV. */
7281 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7282 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7288 /* if we saw a global override before, get the right name */
7291 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7292 len ? len : strlen(PL_tokenbuf));
7294 SV * const tmp_sv = sv;
7295 sv = newSVpvs("CORE::GLOBAL::");
7296 sv_catsv(sv, tmp_sv);
7297 SvREFCNT_dec(tmp_sv);
7301 if (PL_madskills && !PL_thistoken) {
7302 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7303 PL_thistoken = newSVpvn(start,s - start);
7304 PL_realtokenstart = s - SvPVX(PL_linestr);
7308 /* Presume this is going to be a bareword of some sort. */
7310 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7311 pl_yylval.opval->op_private = OPpCONST_BARE;
7313 /* And if "Foo::", then that's what it certainly is. */
7319 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7320 const_op->op_private = OPpCONST_BARE;
7321 rv2cv_op = newCVREF(0, const_op);
7322 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7325 /* See if it's the indirect object for a list operator. */
7327 if (PL_oldoldbufptr &&
7328 PL_oldoldbufptr < PL_bufptr &&
7329 (PL_oldoldbufptr == PL_last_lop
7330 || PL_oldoldbufptr == PL_last_uni) &&
7331 /* NO SKIPSPACE BEFORE HERE! */
7332 (PL_expect == XREF ||
7333 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7335 bool immediate_paren = *s == '(';
7337 /* (Now we can afford to cross potential line boundary.) */
7338 s = SKIPSPACE2(s,nextPL_nextwhite);
7340 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7343 /* Two barewords in a row may indicate method call. */
7345 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7346 (tmp = intuit_method(s, gv, cv))) {
7348 if (tmp == METHOD && !PL_lex_allbrackets &&
7349 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7350 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7354 /* If not a declared subroutine, it's an indirect object. */
7355 /* (But it's an indir obj regardless for sort.) */
7356 /* Also, if "_" follows a filetest operator, it's a bareword */
7359 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7361 (PL_last_lop_op != OP_MAPSTART &&
7362 PL_last_lop_op != OP_GREPSTART))))
7363 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7364 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7367 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7372 PL_expect = XOPERATOR;
7375 s = SKIPSPACE2(s,nextPL_nextwhite);
7376 PL_nextwhite = nextPL_nextwhite;
7381 /* Is this a word before a => operator? */
7382 if (*s == '=' && s[1] == '>' && !pkgname) {
7385 /* This is our own scalar, created a few lines above,
7387 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7388 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7389 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7390 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7391 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7395 /* If followed by a paren, it's certainly a subroutine. */
7400 while (SPACE_OR_TAB(*d))
7402 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7409 PL_nextwhite = PL_thiswhite;
7412 start_force(PL_curforce);
7414 NEXTVAL_NEXTTOKE.opval =
7415 off ? rv2cv_op : pl_yylval.opval;
7416 PL_expect = XOPERATOR;
7419 PL_nextwhite = nextPL_nextwhite;
7420 curmad('X', PL_thistoken);
7421 PL_thistoken = newSVpvs("");
7425 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7426 else op_free(rv2cv_op), force_next(WORD);
7431 /* If followed by var or block, call it a method (unless sub) */
7433 if ((*s == '$' || *s == '{') && !cv) {
7435 PL_last_lop = PL_oldbufptr;
7436 PL_last_lop_op = OP_METHOD;
7437 if (!PL_lex_allbrackets &&
7438 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7439 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7443 /* If followed by a bareword, see if it looks like indir obj. */
7446 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7447 && (tmp = intuit_method(s, gv, cv))) {
7449 if (tmp == METHOD && !PL_lex_allbrackets &&
7450 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7451 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7455 /* Not a method, so call it a subroutine (if defined) */
7458 if (lastchar == '-' && penultchar != '-') {
7459 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7460 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7461 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7462 UTF8fARG(UTF, l, PL_tokenbuf),
7463 UTF8fARG(UTF, l, PL_tokenbuf));
7465 /* Check for a constant sub */
7466 if ((sv = cv_const_sv_or_av(cv))) {
7469 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7470 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7471 if (SvTYPE(sv) == SVt_PVAV)
7472 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7475 pl_yylval.opval->op_private = 0;
7476 pl_yylval.opval->op_folded = 1;
7477 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7482 op_free(pl_yylval.opval);
7484 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7485 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7486 PL_last_lop = PL_oldbufptr;
7487 PL_last_lop_op = OP_ENTERSUB;
7488 /* Is there a prototype? */
7495 STRLEN protolen = CvPROTOLEN(cv);
7496 const char *proto = CvPROTO(cv);
7498 proto = S_strip_spaces(aTHX_ proto, &protolen);
7501 if ((optional = *proto == ';'))
7504 while (*proto == ';');
7508 *proto == '$' || *proto == '_'
7509 || *proto == '*' || *proto == '+'
7514 *proto == '\\' && proto[1] && proto[2] == '\0'
7517 UNIPROTO(UNIOPSUB,optional);
7518 if (*proto == '\\' && proto[1] == '[') {
7519 const char *p = proto + 2;
7520 while(*p && *p != ']')
7522 if(*p == ']' && !p[1])
7523 UNIPROTO(UNIOPSUB,optional);
7525 if (*proto == '&' && *s == '{') {
7527 sv_setpvs(PL_subname, "__ANON__");
7529 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7530 if (!PL_lex_allbrackets &&
7531 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7532 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7539 PL_nextwhite = PL_thiswhite;
7542 start_force(PL_curforce);
7543 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7546 PL_nextwhite = nextPL_nextwhite;
7547 curmad('X', PL_thistoken);
7548 PL_thistoken = newSVpvs("");
7550 force_next(off ? PRIVATEREF : WORD);
7551 if (!PL_lex_allbrackets &&
7552 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7553 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7558 /* Guess harder when madskills require "best effort". */
7559 if (PL_madskills && (!gv || !GvCVu(gv))) {
7560 int probable_sub = 0;
7561 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7563 else if (isALPHA(*s)) {
7567 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7568 if (!keyword(tmpbuf, tmplen, 0))
7571 while (d < PL_bufend && isSPACE(*d))
7573 if (*d == '=' && d[1] == '>')
7578 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7580 op_free(pl_yylval.opval);
7582 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7583 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7584 PL_last_lop = PL_oldbufptr;
7585 PL_last_lop_op = OP_ENTERSUB;
7586 PL_nextwhite = PL_thiswhite;
7588 start_force(PL_curforce);
7589 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7591 PL_nextwhite = nextPL_nextwhite;
7592 curmad('X', PL_thistoken);
7593 PL_thistoken = newSVpvs("");
7594 force_next(off ? PRIVATEREF : WORD);
7595 if (!PL_lex_allbrackets &&
7596 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7597 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7601 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7603 force_next(off ? PRIVATEREF : WORD);
7604 if (!PL_lex_allbrackets &&
7605 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7606 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7611 /* Call it a bare word */
7613 if (PL_hints & HINT_STRICT_SUBS)
7614 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7617 /* after "print" and similar functions (corresponding to
7618 * "F? L" in opcode.pl), whatever wasn't already parsed as
7619 * a filehandle should be subject to "strict subs".
7620 * Likewise for the optional indirect-object argument to system
7621 * or exec, which can't be a bareword */
7622 if ((PL_last_lop_op == OP_PRINT
7623 || PL_last_lop_op == OP_PRTF
7624 || PL_last_lop_op == OP_SAY
7625 || PL_last_lop_op == OP_SYSTEM
7626 || PL_last_lop_op == OP_EXEC)
7627 && (PL_hints & HINT_STRICT_SUBS))
7628 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7629 if (lastchar != '-') {
7630 if (ckWARN(WARN_RESERVED)) {
7634 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7635 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7643 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7644 && saw_infix_sigil) {
7645 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7646 "Operator or semicolon missing before %c%"UTF8f,
7648 UTF8fARG(UTF, strlen(PL_tokenbuf),
7650 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7651 "Ambiguous use of %c resolved as operator %c",
7652 lastchar, lastchar);
7659 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7664 (OP*)newSVOP(OP_CONST, 0,
7665 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7668 case KEY___PACKAGE__:
7670 (OP*)newSVOP(OP_CONST, 0,
7672 ? newSVhek(HvNAME_HEK(PL_curstash))
7679 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7680 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7683 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7685 gv_init(gv,stash,"DATA",4,0);
7688 GvIOp(gv) = newIO();
7689 IoIFP(GvIOp(gv)) = PL_rsfp;
7690 #if defined(HAS_FCNTL) && defined(F_SETFD)
7692 const int fd = PerlIO_fileno(PL_rsfp);
7693 fcntl(fd,F_SETFD,fd >= 3);
7696 /* Mark this internal pseudo-handle as clean */
7697 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7698 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7699 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7701 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7702 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7703 /* if the script was opened in binmode, we need to revert
7704 * it to text mode for compatibility; but only iff it has CRs
7705 * XXX this is a questionable hack at best. */
7706 if (PL_bufend-PL_bufptr > 2
7707 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7710 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7711 loc = PerlIO_tell(PL_rsfp);
7712 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7715 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7717 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7718 #endif /* NETWARE */
7720 PerlIO_seek(PL_rsfp, loc, 0);
7724 #ifdef PERLIO_LAYERS
7727 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7728 else if (PL_encoding) {
7735 XPUSHs(PL_encoding);
7737 call_method("name", G_SCALAR);
7741 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7742 Perl_form(aTHX_ ":encoding(%"SVf")",
7751 if (PL_realtokenstart >= 0) {
7752 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7754 PL_endwhite = newSVpvs("");
7755 sv_catsv(PL_endwhite, PL_thiswhite);
7757 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7758 PL_realtokenstart = -1;
7760 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7770 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7779 if (PL_expect == XSTATE) {
7786 if (*s == ':' && s[1] == ':') {
7790 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7791 if ((*s == ':' && s[1] == ':')
7792 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7796 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7800 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7801 UTF8fARG(UTF, len, PL_tokenbuf));
7804 else if (tmp == KEY_require || tmp == KEY_do
7806 /* that's a way to remember we saw "CORE::" */
7819 LOP(OP_ACCEPT,XTERM);
7822 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7827 LOP(OP_ATAN2,XTERM);
7833 LOP(OP_BINMODE,XTERM);
7836 LOP(OP_BLESS,XTERM);
7845 /* We have to disambiguate the two senses of
7846 "continue". If the next token is a '{' then
7847 treat it as the start of a continue block;
7848 otherwise treat it as a control operator.
7858 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7868 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7877 if (!PL_cryptseen) {
7878 PL_cryptseen = TRUE;
7882 LOP(OP_CRYPT,XTERM);
7885 LOP(OP_CHMOD,XTERM);
7888 LOP(OP_CHOWN,XTERM);
7891 LOP(OP_CONNECT,XTERM);
7911 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7913 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7916 force_ident_maybe_lex('&');
7921 if (orig_keyword == KEY_do) {
7930 PL_hints |= HINT_BLOCK_SCOPE;
7940 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7941 STR_WITH_LEN("NDBM_File::"),
7942 STR_WITH_LEN("DB_File::"),
7943 STR_WITH_LEN("GDBM_File::"),
7944 STR_WITH_LEN("SDBM_File::"),
7945 STR_WITH_LEN("ODBM_File::"),
7947 LOP(OP_DBMOPEN,XTERM);
7953 PL_expect = XOPERATOR;
7954 s = force_word(s,WORD,TRUE,FALSE);
7961 pl_yylval.ival = CopLINE(PL_curcop);
7965 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7979 if (*s == '{') { /* block eval */
7980 PL_expect = XTERMBLOCK;
7981 UNIBRACK(OP_ENTERTRY);
7983 else { /* string eval */
7985 UNIBRACK(OP_ENTEREVAL);
7990 UNIBRACK(-OP_ENTEREVAL);
8004 case KEY_endhostent:
8010 case KEY_endservent:
8013 case KEY_endprotoent:
8024 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8026 pl_yylval.ival = CopLINE(PL_curcop);
8028 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8031 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8034 if ((PL_bufend - p) >= 3 &&
8035 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8037 else if ((PL_bufend - p) >= 4 &&
8038 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8041 /* skip optional package name, as in "for my abc $x (..)" */
8042 if (isIDFIRST_lazy_if(p,UTF)) {
8043 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8047 Perl_croak(aTHX_ "Missing $ on loop variable");
8049 s = SvPVX(PL_linestr) + soff;
8055 LOP(OP_FORMLINE,XTERM);
8064 LOP(OP_FCNTL,XTERM);
8070 LOP(OP_FLOCK,XTERM);
8073 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8078 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8083 LOP(OP_GREPSTART, XREF);
8086 PL_expect = XOPERATOR;
8087 s = force_word(s,WORD,TRUE,FALSE);
8102 case KEY_getpriority:
8103 LOP(OP_GETPRIORITY,XTERM);
8105 case KEY_getprotobyname:
8108 case KEY_getprotobynumber:
8109 LOP(OP_GPBYNUMBER,XTERM);
8111 case KEY_getprotoent:
8123 case KEY_getpeername:
8124 UNI(OP_GETPEERNAME);
8126 case KEY_gethostbyname:
8129 case KEY_gethostbyaddr:
8130 LOP(OP_GHBYADDR,XTERM);
8132 case KEY_gethostent:
8135 case KEY_getnetbyname:
8138 case KEY_getnetbyaddr:
8139 LOP(OP_GNBYADDR,XTERM);
8144 case KEY_getservbyname:
8145 LOP(OP_GSBYNAME,XTERM);
8147 case KEY_getservbyport:
8148 LOP(OP_GSBYPORT,XTERM);
8150 case KEY_getservent:
8153 case KEY_getsockname:
8154 UNI(OP_GETSOCKNAME);
8156 case KEY_getsockopt:
8157 LOP(OP_GSOCKOPT,XTERM);
8172 pl_yylval.ival = CopLINE(PL_curcop);
8173 Perl_ck_warner_d(aTHX_
8174 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8175 "given is experimental");
8180 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8188 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8190 pl_yylval.ival = CopLINE(PL_curcop);
8194 LOP(OP_INDEX,XTERM);
8200 LOP(OP_IOCTL,XTERM);
8212 PL_expect = XOPERATOR;
8213 s = force_word(s,WORD,TRUE,FALSE);
8230 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8235 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8249 LOP(OP_LISTEN,XTERM);
8258 s = scan_pat(s,OP_MATCH);
8259 TERM(sublex_start());
8262 LOP(OP_MAPSTART, XREF);
8265 LOP(OP_MKDIR,XTERM);
8268 LOP(OP_MSGCTL,XTERM);
8271 LOP(OP_MSGGET,XTERM);
8274 LOP(OP_MSGRCV,XTERM);
8277 LOP(OP_MSGSND,XTERM);
8282 PL_in_my = (U16)tmp;
8284 if (isIDFIRST_lazy_if(s,UTF)) {
8288 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8289 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8291 if (!FEATURE_LEXSUBS_IS_ENABLED)
8293 "Experimental \"%s\" subs not enabled",
8294 tmp == KEY_my ? "my" :
8295 tmp == KEY_state ? "state" : "our");
8296 Perl_ck_warner_d(aTHX_
8297 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8298 "The lexical_subs feature is experimental");
8301 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8302 if (!PL_in_my_stash) {
8305 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8306 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8309 if (PL_madskills) { /* just add type to declarator token */
8310 sv_catsv(PL_thistoken, PL_nextwhite);
8312 sv_catpvn(PL_thistoken, start, s - start);
8320 PL_expect = XOPERATOR;
8321 s = force_word(s,WORD,TRUE,FALSE);
8325 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8330 s = tokenize_use(0, s);
8334 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8337 if (!PL_lex_allbrackets &&
8338 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8339 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8345 if (isIDFIRST_lazy_if(s,UTF)) {
8347 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8349 for (t=d; isSPACE(*t);)
8351 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8353 && !(t[0] == '=' && t[1] == '>')
8354 && !(t[0] == ':' && t[1] == ':')
8355 && !keyword(s, d-s, 0)
8357 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8358 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8359 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8365 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8367 pl_yylval.ival = OP_OR;
8377 LOP(OP_OPEN_DIR,XTERM);
8380 checkcomma(s,PL_tokenbuf,"filehandle");
8384 checkcomma(s,PL_tokenbuf,"filehandle");
8403 s = force_word(s,WORD,FALSE,TRUE);
8405 s = force_strict_version(s);
8406 PL_lex_expect = XBLOCK;
8410 LOP(OP_PIPE_OP,XTERM);
8413 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8414 COPLINE_SET_FROM_MULTI_END;
8417 pl_yylval.ival = OP_CONST;
8418 TERM(sublex_start());
8425 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8426 COPLINE_SET_FROM_MULTI_END;
8429 PL_expect = XOPERATOR;
8430 if (SvCUR(PL_lex_stuff)) {
8431 int warned_comma = !ckWARN(WARN_QW);
8432 int warned_comment = warned_comma;
8433 d = SvPV_force(PL_lex_stuff, len);
8435 for (; isSPACE(*d) && len; --len, ++d)
8440 if (!warned_comma || !warned_comment) {
8441 for (; !isSPACE(*d) && len; --len, ++d) {
8442 if (!warned_comma && *d == ',') {
8443 Perl_warner(aTHX_ packWARN(WARN_QW),
8444 "Possible attempt to separate words with commas");
8447 else if (!warned_comment && *d == '#') {
8448 Perl_warner(aTHX_ packWARN(WARN_QW),
8449 "Possible attempt to put comments in qw() list");
8455 for (; !isSPACE(*d) && len; --len, ++d)
8458 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8459 words = op_append_elem(OP_LIST, words,
8460 newSVOP(OP_CONST, 0, tokeq(sv)));
8465 words = newNULLLIST();
8467 SvREFCNT_dec(PL_lex_stuff);
8468 PL_lex_stuff = NULL;
8470 PL_expect = XOPERATOR;
8471 pl_yylval.opval = sawparens(words);
8476 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8479 pl_yylval.ival = OP_STRINGIFY;
8480 if (SvIVX(PL_lex_stuff) == '\'')
8481 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8482 TERM(sublex_start());
8485 s = scan_pat(s,OP_QR);
8486 TERM(sublex_start());
8489 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8492 readpipe_override();
8493 TERM(sublex_start());
8500 PL_expect = XOPERATOR;
8502 s = force_version(s, FALSE);
8504 else if (*s != 'v' || !isDIGIT(s[1])
8505 || (s = force_version(s, TRUE), *s == 'v'))
8507 *PL_tokenbuf = '\0';
8508 s = force_word(s,WORD,TRUE,TRUE);
8509 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8510 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8511 GV_ADD | (UTF ? SVf_UTF8 : 0));
8513 yyerror("<> should be quotes");
8515 if (orig_keyword == KEY_require) {
8523 PL_last_uni = PL_oldbufptr;
8524 PL_last_lop_op = OP_REQUIRE;
8526 return REPORT( (int)REQUIRE );
8532 PL_expect = XOPERATOR;
8533 s = force_word(s,WORD,TRUE,FALSE);
8537 LOP(OP_RENAME,XTERM);
8546 LOP(OP_RINDEX,XTERM);
8555 UNIDOR(OP_READLINE);
8558 UNIDOR(OP_BACKTICK);
8567 LOP(OP_REVERSE,XTERM);
8570 UNIDOR(OP_READLINK);
8577 if (pl_yylval.opval)
8578 TERM(sublex_start());
8580 TOKEN(1); /* force error */
8583 checkcomma(s,PL_tokenbuf,"filehandle");
8593 LOP(OP_SELECT,XTERM);
8599 LOP(OP_SEMCTL,XTERM);
8602 LOP(OP_SEMGET,XTERM);
8605 LOP(OP_SEMOP,XTERM);
8611 LOP(OP_SETPGRP,XTERM);
8613 case KEY_setpriority:
8614 LOP(OP_SETPRIORITY,XTERM);
8616 case KEY_sethostent:
8622 case KEY_setservent:
8625 case KEY_setprotoent:
8635 LOP(OP_SEEKDIR,XTERM);
8637 case KEY_setsockopt:
8638 LOP(OP_SSOCKOPT,XTERM);
8644 LOP(OP_SHMCTL,XTERM);
8647 LOP(OP_SHMGET,XTERM);
8650 LOP(OP_SHMREAD,XTERM);
8653 LOP(OP_SHMWRITE,XTERM);
8656 LOP(OP_SHUTDOWN,XTERM);
8665 LOP(OP_SOCKET,XTERM);
8667 case KEY_socketpair:
8668 LOP(OP_SOCKPAIR,XTERM);
8671 checkcomma(s,PL_tokenbuf,"subroutine name");
8674 s = force_word(s,WORD,TRUE,TRUE);
8678 LOP(OP_SPLIT,XTERM);
8681 LOP(OP_SPRINTF,XTERM);
8684 LOP(OP_SPLICE,XTERM);
8699 LOP(OP_SUBSTR,XTERM);
8705 char * const tmpbuf = PL_tokenbuf + 1;
8706 expectation attrful;
8707 bool have_name, have_proto;
8708 const int key = tmp;
8710 SV *format_name = NULL;
8716 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8717 SV *subtoken = PL_madskills
8718 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8723 s = SKIPSPACE2(s,tmpwhite);
8729 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8730 (*s == ':' && s[1] == ':'))
8733 SV *nametoke = NULL;
8737 attrful = XATTRBLOCK;
8738 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8742 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8744 if (key == KEY_format)
8745 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8748 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8750 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8752 sv_setpvn(PL_subname, tmpbuf, len);
8754 sv_setsv(PL_subname,PL_curstname);
8755 sv_catpvs(PL_subname,"::");
8756 sv_catpvn(PL_subname,tmpbuf,len);
8758 if (SvUTF8(PL_linestr))
8759 SvUTF8_on(PL_subname);
8765 CURMAD('X', nametoke);
8766 CURMAD('_', tmpwhite);
8767 force_ident_maybe_lex('&');
8769 s = SKIPSPACE2(d,tmpwhite);
8775 if (key == KEY_my || key == KEY_our || key==KEY_state)
8778 /* diag_listed_as: Missing name in "%s sub" */
8780 "Missing name in \"%s\"", PL_bufptr);
8782 PL_expect = XTERMBLOCK;
8783 attrful = XATTRTERM;
8784 sv_setpvs(PL_subname,"?");
8788 if (key == KEY_format) {
8790 PL_thistoken = subtoken;
8794 start_force(PL_curforce);
8795 NEXTVAL_NEXTTOKE.opval
8796 = (OP*)newSVOP(OP_CONST,0, format_name);
8797 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8804 /* Look for a prototype */
8806 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8807 COPLINE_SET_FROM_MULTI_END;
8809 Perl_croak(aTHX_ "Prototype not terminated");
8810 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8815 CURMAD('q', PL_thisopen);
8816 CURMAD('_', tmpwhite);
8817 CURMAD('=', PL_thisstuff);
8818 CURMAD('Q', PL_thisclose);
8819 NEXTVAL_NEXTTOKE.opval =
8820 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8821 PL_lex_stuff = NULL;
8824 s = SKIPSPACE2(s,tmpwhite);
8832 if (*s == ':' && s[1] != ':')
8833 PL_expect = attrful;
8834 else if (*s != '{' && key == KEY_sub) {
8836 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8837 else if (*s != ';' && *s != '}')
8838 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8845 curmad('^', newSVpvs(""));
8846 CURMAD('_', tmpwhite);
8850 PL_thistoken = subtoken;
8851 PERL_UNUSED_VAR(have_proto);
8854 NEXTVAL_NEXTTOKE.opval =
8855 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8856 PL_lex_stuff = NULL;
8862 sv_setpvs(PL_subname, "__ANON__");
8864 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8868 force_ident_maybe_lex('&');
8874 LOP(OP_SYSTEM,XREF);
8877 LOP(OP_SYMLINK,XTERM);
8880 LOP(OP_SYSCALL,XTERM);
8883 LOP(OP_SYSOPEN,XTERM);
8886 LOP(OP_SYSSEEK,XTERM);
8889 LOP(OP_SYSREAD,XTERM);
8892 LOP(OP_SYSWRITE,XTERM);
8897 TERM(sublex_start());
8918 LOP(OP_TRUNCATE,XTERM);
8930 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8932 pl_yylval.ival = CopLINE(PL_curcop);
8936 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8938 pl_yylval.ival = CopLINE(PL_curcop);
8942 LOP(OP_UNLINK,XTERM);
8948 LOP(OP_UNPACK,XTERM);
8951 LOP(OP_UTIME,XTERM);
8957 LOP(OP_UNSHIFT,XTERM);
8960 s = tokenize_use(1, s);
8970 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8972 pl_yylval.ival = CopLINE(PL_curcop);
8973 Perl_ck_warner_d(aTHX_
8974 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8975 "when is experimental");
8979 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8981 pl_yylval.ival = CopLINE(PL_curcop);
8985 PL_hints |= HINT_BLOCK_SCOPE;
8992 LOP(OP_WAITPID,XTERM);
8998 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8999 * we use the same number on EBCDIC */
9000 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
9004 if (PL_expect == XOPERATOR) {
9005 if (*s == '=' && !PL_lex_allbrackets &&
9006 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9014 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
9016 pl_yylval.ival = OP_XOR;
9025 Looks up an identifier in the pad or in a package
9028 PRIVATEREF if this is a lexical name.
9029 WORD if this belongs to a package.
9032 if we're in a my declaration
9033 croak if they tried to say my($foo::bar)
9034 build the ops for a my() declaration
9035 if it's an access to a my() variable
9036 build ops for access to a my() variable
9037 if in a dq string, and they've said @foo and we can't find @foo
9039 build ops for a bareword
9043 S_pending_ident(pTHX)
9047 const char pit = (char)pl_yylval.ival;
9048 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9049 /* All routes through this function want to know if there is a colon. */
9050 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9052 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9053 "### Pending identifier '%s'\n", PL_tokenbuf); });
9055 /* if we're in a my(), we can't allow dynamics here.
9056 $foo'bar has already been turned into $foo::bar, so
9057 just check for colons.
9059 if it's a legal name, the OP is a PADANY.
9062 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9064 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9065 "variable %s in \"our\"",
9066 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9067 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9071 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9072 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9073 UTF ? SVf_UTF8 : 0);
9075 pl_yylval.opval = newOP(OP_PADANY, 0);
9076 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9077 UTF ? SVf_UTF8 : 0);
9083 build the ops for accesses to a my() variable.
9088 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9089 UTF ? SVf_UTF8 : 0);
9090 if (tmp != NOT_IN_PAD) {
9091 /* might be an "our" variable" */
9092 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9093 /* build ops for a bareword */
9094 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9095 HEK * const stashname = HvNAME_HEK(stash);
9096 SV * const sym = newSVhek(stashname);
9097 sv_catpvs(sym, "::");
9098 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9099 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9100 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9104 ? (GV_ADDMULTI | GV_ADDINEVAL)
9107 ((PL_tokenbuf[0] == '$') ? SVt_PV
9108 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9113 pl_yylval.opval = newOP(OP_PADANY, 0);
9114 pl_yylval.opval->op_targ = tmp;
9120 Whine if they've said @foo in a doublequoted string,
9121 and @foo isn't a variable we can find in the symbol
9124 if (ckWARN(WARN_AMBIGUOUS) &&
9125 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9126 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9127 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9128 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9129 /* DO NOT warn for @- and @+ */
9130 && !( PL_tokenbuf[2] == '\0' &&
9131 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9134 /* Downgraded from fatal to warning 20000522 mjd */
9135 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9136 "Possible unintended interpolation of %"UTF8f
9138 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9142 /* build ops for a bareword */
9143 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9144 newSVpvn_flags(PL_tokenbuf + 1,
9146 UTF ? SVf_UTF8 : 0 ));
9147 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9149 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9150 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9151 | ( UTF ? SVf_UTF8 : 0 ),
9152 ((PL_tokenbuf[0] == '$') ? SVt_PV
9153 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9159 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9163 PERL_ARGS_ASSERT_CHECKCOMMA;
9165 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9166 if (ckWARN(WARN_SYNTAX)) {
9169 for (w = s+2; *w && level; w++) {
9177 /* the list of chars below is for end of statements or
9178 * block / parens, boolean operators (&&, ||, //) and branch
9179 * constructs (or, and, if, until, unless, while, err, for).
9180 * Not a very solid hack... */
9181 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9182 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9183 "%s (...) interpreted as function",name);
9186 while (s < PL_bufend && isSPACE(*s))
9190 while (s < PL_bufend && isSPACE(*s))
9192 if (isIDFIRST_lazy_if(s,UTF)) {
9193 const char * const w = s;
9194 s += UTF ? UTF8SKIP(s) : 1;
9195 while (isWORDCHAR_lazy_if(s,UTF))
9196 s += UTF ? UTF8SKIP(s) : 1;
9197 while (s < PL_bufend && isSPACE(*s))
9201 if (keyword(w, s - w, 0))
9204 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9205 if (gv && GvCVu(gv))
9207 Perl_croak(aTHX_ "No comma allowed after %s", what);
9212 /* S_new_constant(): do any overload::constant lookup.
9214 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9215 Best used as sv=new_constant(..., sv, ...).
9216 If s, pv are NULL, calls subroutine with one argument,
9217 and <type> is used with error messages only.
9218 <type> is assumed to be well formed UTF-8 */
9221 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9222 SV *sv, SV *pv, const char *type, STRLEN typelen)
9225 HV * table = GvHV(PL_hintgv); /* ^H */
9230 const char *why1 = "", *why2 = "", *why3 = "";
9232 PERL_ARGS_ASSERT_NEW_CONSTANT;
9233 /* We assume that this is true: */
9234 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9237 /* charnames doesn't work well if there have been errors found */
9238 if (PL_error_count > 0 && *key == 'c')
9240 SvREFCNT_dec_NN(sv);
9241 return &PL_sv_undef;
9244 sv_2mortal(sv); /* Parent created it permanently */
9246 || ! (PL_hints & HINT_LOCALIZE_HH)
9247 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9252 /* Here haven't found what we're looking for. If it is charnames,
9253 * perhaps it needs to be loaded. Try doing that before giving up */
9255 Perl_load_module(aTHX_
9257 newSVpvs("_charnames"),
9258 /* version parameter; no need to specify it, as if
9259 * we get too early a version, will fail anyway,
9260 * not being able to find '_charnames' */
9266 table = GvHV(PL_hintgv);
9268 && (PL_hints & HINT_LOCALIZE_HH)
9269 && (cvp = hv_fetch(table, key, keylen, FALSE))
9275 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9276 msg = Perl_form(aTHX_
9277 "Constant(%.*s) unknown",
9278 (int)(type ? typelen : len),
9284 why3 = "} is not defined";
9287 msg = Perl_form(aTHX_
9288 /* The +3 is for '\N{'; -4 for that, plus '}' */
9289 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9293 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9294 (int)(type ? typelen : len),
9295 (type ? type: s), why1, why2, why3);
9298 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9299 return SvREFCNT_inc_simple_NN(sv);
9304 pv = newSVpvn_flags(s, len, SVs_TEMP);
9306 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9308 typesv = &PL_sv_undef;
9310 PUSHSTACKi(PERLSI_OVERLOAD);
9322 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9326 /* Check the eval first */
9327 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9329 const char * errstr;
9330 sv_catpvs(errsv, "Propagated");
9331 errstr = SvPV_const(errsv, errlen);
9332 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9334 res = SvREFCNT_inc_simple_NN(sv);
9338 SvREFCNT_inc_simple_void_NN(res);
9347 why1 = "Call to &{$^H{";
9349 why3 = "}} did not return a defined value";
9351 (void)sv_2mortal(sv);
9358 PERL_STATIC_INLINE void
9359 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9361 PERL_ARGS_ASSERT_PARSE_IDENT;
9365 Perl_croak(aTHX_ "%s", ident_too_long);
9366 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9367 /* The UTF-8 case must come first, otherwise things
9368 * like c\N{COMBINING TILDE} would start failing, as the
9369 * isWORDCHAR_A case below would gobble the 'c' up.
9372 char *t = *s + UTF8SKIP(*s);
9373 while (isIDCONT_utf8((U8*)t))
9375 if (*d + (t - *s) > e)
9376 Perl_croak(aTHX_ "%s", ident_too_long);
9377 Copy(*s, *d, t - *s, char);
9381 else if ( isWORDCHAR_A(**s) ) {
9384 } while (isWORDCHAR_A(**s) && *d < e);
9386 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9391 else if (allow_package && **s == ':' && (*s)[1] == ':'
9392 /* Disallow things like Foo::$bar. For the curious, this is
9393 * the code path that triggers the "Bad name after" warning
9394 * when looking for barewords.
9396 && (*s)[2] != '$') {
9406 /* Returns a NUL terminated string, with the length of the string written to
9410 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9414 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9415 bool is_utf8 = cBOOL(UTF);
9417 PERL_ARGS_ASSERT_SCAN_WORD;
9419 parse_ident(&s, &d, e, allow_package, is_utf8);
9426 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9429 I32 herelines = PL_parser->herelines;
9430 SSize_t bracket = -1;
9433 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9434 bool is_utf8 = cBOOL(UTF);
9435 I32 orig_copline = 0, tmp_copline = 0;
9437 PERL_ARGS_ASSERT_SCAN_IDENT;
9442 while (isDIGIT(*s)) {
9444 Perl_croak(aTHX_ "%s", ident_too_long);
9449 parse_ident(&s, &d, e, 1, is_utf8);
9454 /* Either a digit variable, or parse_ident() found an identifier
9455 (anything valid as a bareword), so job done and return. */
9456 if (PL_lex_state != LEX_NORMAL)
9457 PL_lex_state = LEX_INTERPENDMAYBE;
9460 if (*s == '$' && s[1] &&
9461 (isIDFIRST_lazy_if(s+1,is_utf8)
9462 || isDIGIT_A((U8)s[1])
9465 || strnEQ(s+1,"::",2)) )
9467 /* Dereferencing a value in a scalar variable.
9468 The alternatives are different syntaxes for a scalar variable.
9469 Using ' as a leading package separator isn't allowed. :: is. */
9472 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9474 bracket = s - SvPVX(PL_linestr);
9476 orig_copline = CopLINE(PL_curcop);
9477 if (s < PL_bufend && isSPACE(*s)) {
9482 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9483 * iff Unicode semantics are to be used. The legal ones are any of:
9485 * b) ASCII punctuation
9486 * c) When not under Unicode rules, any upper Latin1 character
9487 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9488 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
9490 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9491 || isDIGIT_A((U8)(d)) \
9492 || (!(u) && !isASCII((U8)(d))) \
9493 || ((((U8)(d)) < 32) \
9494 && (((((U8)(d)) >= 14) \
9495 || (((U8)(d)) <= 8 && (d) != 0) \
9496 || (((U8)(d)) == 13)))) \
9497 || (((U8)(d)) == toCTRL('?')))
9499 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9501 if ( isCNTRL_A((U8)*s) ) {
9502 deprecate("literal control characters in variable names");
9506 const STRLEN skip = UTF8SKIP(s);
9509 for ( i = 0; i < skip; i++ )
9517 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9518 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9522 /* Warn about ambiguous code after unary operators if {...} notation isn't
9523 used. There's no difference in ambiguity; it's merely a heuristic
9524 about when not to warn. */
9525 else if (ck_uni && bracket == -1)
9527 if (bracket != -1) {
9528 /* If we were processing {...} notation then... */
9529 if (isIDFIRST_lazy_if(d,is_utf8)) {
9530 /* if it starts as a valid identifier, assume that it is one.
9531 (the later check for } being at the expected point will trap
9532 cases where this doesn't pan out.) */
9533 d += is_utf8 ? UTF8SKIP(d) : 1;
9534 parse_ident(&s, &d, e, 1, is_utf8);
9536 tmp_copline = CopLINE(PL_curcop);
9537 if (s < PL_bufend && isSPACE(*s)) {
9540 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9541 /* ${foo[0]} and ${foo{bar}} notation. */
9542 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9543 const char * const brack =
9545 ((*s == '[') ? "[...]" : "{...}");
9546 orig_copline = CopLINE(PL_curcop);
9547 CopLINE_set(PL_curcop, tmp_copline);
9548 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9549 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9550 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9551 funny, dest, brack, funny, dest, brack);
9552 CopLINE_set(PL_curcop, orig_copline);
9555 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9556 PL_lex_allbrackets++;
9560 /* Handle extended ${^Foo} variables
9561 * 1999-02-27 mjd-perl-patch@plover.com */
9562 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9566 while (isWORDCHAR(*s) && d < e) {
9570 Perl_croak(aTHX_ "%s", ident_too_long);
9575 tmp_copline = CopLINE(PL_curcop);
9576 if (s < PL_bufend && isSPACE(*s)) {
9580 /* Expect to find a closing } after consuming any trailing whitespace.
9584 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9585 PL_lex_state = LEX_INTERPEND;
9588 if (PL_lex_state == LEX_NORMAL) {
9589 if (ckWARN(WARN_AMBIGUOUS) &&
9590 (keyword(dest, d - dest, 0)
9591 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9593 SV *tmp = newSVpvn_flags( dest, d - dest,
9594 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9597 orig_copline = CopLINE(PL_curcop);
9598 CopLINE_set(PL_curcop, tmp_copline);
9599 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9600 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9601 funny, tmp, funny, tmp);
9602 CopLINE_set(PL_curcop, orig_copline);
9607 /* Didn't find the closing } at the point we expected, so restore
9608 state such that the next thing to process is the opening { and */
9609 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9610 CopLINE_set(PL_curcop, orig_copline);
9611 PL_parser->herelines = herelines;
9615 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9616 PL_lex_state = LEX_INTERPEND;
9621 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9623 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9624 * the parse starting at 's', based on the subset that are valid in this
9625 * context input to this routine in 'valid_flags'. Advances s. Returns
9626 * TRUE if the input should be treated as a valid flag, so the next char
9627 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9628 * first call on the current regex. This routine will set it to any
9629 * charset modifier found. The caller shouldn't change it. This way,
9630 * another charset modifier encountered in the parse can be detected as an
9631 * error, as we have decided to allow only one */
9634 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9636 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9637 if (isWORDCHAR_lazy_if(*s, UTF)) {
9638 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9639 UTF ? SVf_UTF8 : 0);
9641 /* Pretend that it worked, so will continue processing before
9650 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9651 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9652 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9653 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9654 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9655 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9656 case LOCALE_PAT_MOD:
9658 goto multiple_charsets;
9660 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9663 case UNICODE_PAT_MOD:
9665 goto multiple_charsets;
9667 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9670 case ASCII_RESTRICT_PAT_MOD:
9672 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9676 /* Error if previous modifier wasn't an 'a', but if it was, see
9677 * if, and accept, a second occurrence (only) */
9679 || get_regex_charset(*pmfl)
9680 != REGEX_ASCII_RESTRICTED_CHARSET)
9682 goto multiple_charsets;
9684 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9688 case DEPENDS_PAT_MOD:
9690 goto multiple_charsets;
9692 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9701 if (*charset != c) {
9702 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9704 else if (c == 'a') {
9705 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9708 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9711 /* Pretend that it worked, so will continue processing before dieing */
9717 S_scan_pat(pTHX_ char *start, I32 type)
9722 const char * const valid_flags =
9723 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9724 char charset = '\0'; /* character set modifier */
9729 PERL_ARGS_ASSERT_SCAN_PAT;
9731 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9732 TRUE /* look for escaped bracketed metas */ );
9735 const char * const delimiter = skipspace(start);
9739 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9740 : "Search pattern not terminated" ));
9743 pm = (PMOP*)newPMOP(type, 0);
9744 if (PL_multi_open == '?') {
9745 /* This is the only point in the code that sets PMf_ONCE: */
9746 pm->op_pmflags |= PMf_ONCE;
9748 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9749 allows us to restrict the list needed by reset to just the ??
9751 assert(type != OP_TRANS);
9753 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9756 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9759 elements = mg->mg_len / sizeof(PMOP**);
9760 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9761 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9762 mg->mg_len = elements * sizeof(PMOP**);
9763 PmopSTASH_set(pm,PL_curstash);
9770 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9771 * anon CV. False positives like qr/[(?{]/ are harmless */
9773 if (type == OP_QR) {
9775 char *e, *p = SvPV(PL_lex_stuff, len);
9777 for (; p < e; p++) {
9778 if (p[0] == '(' && p[1] == '?'
9779 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9781 pm->op_pmflags |= PMf_HAS_CV;
9785 pm->op_pmflags |= PMf_IS_QR;
9788 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9790 if (PL_madskills && modstart != s) {
9791 SV* tmptoken = newSVpvn(modstart, s - modstart);
9792 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9795 /* issue a warning if /c is specified,but /g is not */
9796 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9798 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9799 "Use of /c modifier is meaningless without /g" );
9802 PL_lex_op = (OP*)pm;
9803 pl_yylval.ival = OP_MATCH;
9808 S_scan_subst(pTHX_ char *start)
9816 char charset = '\0'; /* character set modifier */
9821 PERL_ARGS_ASSERT_SCAN_SUBST;
9823 pl_yylval.ival = OP_NULL;
9825 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9826 TRUE /* look for escaped bracketed metas */ );
9829 Perl_croak(aTHX_ "Substitution pattern not terminated");
9831 if (s[-1] == PL_multi_open)
9835 CURMAD('q', PL_thisopen);
9836 CURMAD('_', PL_thiswhite);
9837 CURMAD('E', PL_thisstuff);
9838 CURMAD('Q', PL_thisclose);
9839 PL_realtokenstart = s - SvPVX(PL_linestr);
9843 first_start = PL_multi_start;
9844 first_line = CopLINE(PL_curcop);
9845 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9848 SvREFCNT_dec(PL_lex_stuff);
9849 PL_lex_stuff = NULL;
9851 Perl_croak(aTHX_ "Substitution replacement not terminated");
9853 PL_multi_start = first_start; /* so whole substitution is taken together */
9855 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9859 CURMAD('z', PL_thisopen);
9860 CURMAD('R', PL_thisstuff);
9861 CURMAD('Z', PL_thisclose);
9867 if (*s == EXEC_PAT_MOD) {
9871 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9880 curmad('m', newSVpvn(modstart, s - modstart));
9881 append_madprops(PL_thismad, (OP*)pm, 0);
9885 if ((pm->op_pmflags & PMf_CONTINUE)) {
9886 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9890 SV * const repl = newSVpvs("");
9893 pm->op_pmflags |= PMf_EVAL;
9896 sv_catpvs(repl, "eval ");
9898 sv_catpvs(repl, "do ");
9900 sv_catpvs(repl, "{");
9901 sv_catsv(repl, PL_sublex_info.repl);
9902 sv_catpvs(repl, "}");
9904 SvREFCNT_dec(PL_sublex_info.repl);
9905 PL_sublex_info.repl = repl;
9907 if (CopLINE(PL_curcop) != first_line) {
9908 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9909 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9910 CopLINE(PL_curcop) - first_line;
9911 CopLINE_set(PL_curcop, first_line);
9914 PL_lex_op = (OP*)pm;
9915 pl_yylval.ival = OP_SUBST;
9920 S_scan_trans(pTHX_ char *start)
9928 bool nondestruct = 0;
9933 PERL_ARGS_ASSERT_SCAN_TRANS;
9935 pl_yylval.ival = OP_NULL;
9937 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9939 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9941 if (s[-1] == PL_multi_open)
9945 CURMAD('q', PL_thisopen);
9946 CURMAD('_', PL_thiswhite);
9947 CURMAD('E', PL_thisstuff);
9948 CURMAD('Q', PL_thisclose);
9949 PL_realtokenstart = s - SvPVX(PL_linestr);
9953 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9956 SvREFCNT_dec(PL_lex_stuff);
9957 PL_lex_stuff = NULL;
9959 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9962 CURMAD('z', PL_thisopen);
9963 CURMAD('R', PL_thisstuff);
9964 CURMAD('Z', PL_thisclose);
9967 complement = del = squash = 0;
9974 complement = OPpTRANS_COMPLEMENT;
9977 del = OPpTRANS_DELETE;
9980 squash = OPpTRANS_SQUASH;
9992 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9993 o->op_private &= ~OPpTRANS_ALL;
9994 o->op_private |= del|squash|complement|
9995 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9996 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9999 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10002 if (PL_madskills) {
10004 curmad('m', newSVpvn(modstart, s - modstart));
10005 append_madprops(PL_thismad, o, 0);
10014 Takes a pointer to the first < in <<FOO.
10015 Returns a pointer to the byte following <<FOO.
10017 This function scans a heredoc, which involves different methods
10018 depending on whether we are in a string eval, quoted construct, etc.
10019 This is because PL_linestr could containing a single line of input, or
10020 a whole string being evalled, or the contents of the current quote-
10023 The two basic methods are:
10024 - Steal lines from the input stream
10025 - Scan the heredoc in PL_linestr and remove it therefrom
10027 In a file scope or filtered eval, the first method is used; in a
10028 string eval, the second.
10030 In a quote-like operator, we have to choose between the two,
10031 depending on where we can find a newline. We peek into outer lex-
10032 ing scopes until we find one with a newline in it. If we reach the
10033 outermost lexing scope and it is a file, we use the stream method.
10034 Otherwise it is treated as an eval.
10038 S_scan_heredoc(pTHX_ char *s)
10041 I32 op_type = OP_SCALAR;
10048 const bool infile = PL_rsfp || PL_parser->filtered;
10049 const line_t origline = CopLINE(PL_curcop);
10050 LEXSHARED *shared = PL_parser->lex_shared;
10052 I32 stuffstart = s - SvPVX(PL_linestr);
10055 PL_realtokenstart = -1;
10058 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10061 d = PL_tokenbuf + 1;
10062 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10063 *PL_tokenbuf = '\n';
10065 while (SPACE_OR_TAB(*peek))
10067 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10070 s = delimcpy(d, e, s, PL_bufend, term, &len);
10071 if (s == PL_bufend)
10072 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10078 /* <<\FOO is equivalent to <<'FOO' */
10082 if (!isWORDCHAR_lazy_if(s,UTF))
10083 deprecate("bare << to mean <<\"\"");
10084 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10089 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10090 Perl_croak(aTHX_ "Delimiter for here document is too long");
10093 len = d - PL_tokenbuf;
10096 if (PL_madskills) {
10097 tstart = PL_tokenbuf + 1;
10098 PL_thisclose = newSVpvn(tstart, len - 1);
10099 tstart = SvPVX(PL_linestr) + stuffstart;
10100 PL_thisopen = newSVpvn(tstart, s - tstart);
10101 stuffstart = s - SvPVX(PL_linestr);
10104 #ifndef PERL_STRICT_CR
10105 d = strchr(s, '\r');
10107 char * const olds = s;
10109 while (s < PL_bufend) {
10115 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10124 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10129 if (PL_madskills) {
10130 tstart = SvPVX(PL_linestr) + stuffstart;
10132 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10134 PL_thisstuff = newSVpvn(tstart, s - tstart);
10137 stuffstart = s - SvPVX(PL_linestr);
10140 tmpstr = newSV_type(SVt_PVIV);
10141 SvGROW(tmpstr, 80);
10142 if (term == '\'') {
10143 op_type = OP_CONST;
10144 SvIV_set(tmpstr, -1);
10146 else if (term == '`') {
10147 op_type = OP_BACKTICK;
10148 SvIV_set(tmpstr, '\\');
10151 PL_multi_start = origline + 1 + PL_parser->herelines;
10152 PL_multi_open = PL_multi_close = '<';
10153 /* inside a string eval or quote-like operator */
10154 if (!infile || PL_lex_inwhat) {
10157 char * const olds = s;
10158 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10159 /* These two fields are not set until an inner lexing scope is
10160 entered. But we need them set here. */
10161 shared->ls_bufptr = s;
10162 shared->ls_linestr = PL_linestr;
10164 /* Look for a newline. If the current buffer does not have one,
10165 peek into the line buffer of the parent lexing scope, going
10166 up as many levels as necessary to find one with a newline
10169 while (!(s = (char *)memchr(
10170 (void *)shared->ls_bufptr, '\n',
10171 SvEND(shared->ls_linestr)-shared->ls_bufptr
10173 shared = shared->ls_prev;
10174 /* shared is only null if we have gone beyond the outermost
10175 lexing scope. In a file, we will have broken out of the
10176 loop in the previous iteration. In an eval, the string buf-
10177 fer ends with "\n;", so the while condition above will have
10178 evaluated to false. So shared can never be null. */
10180 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10181 most lexing scope. In a file, shared->ls_linestr at that
10182 level is just one line, so there is no body to steal. */
10183 if (infile && !shared->ls_prev) {
10189 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10192 linestr = shared->ls_linestr;
10193 bufend = SvEND(linestr);
10195 while (s < bufend - len + 1 &&
10196 memNE(s,PL_tokenbuf,len) ) {
10198 ++PL_parser->herelines;
10200 if (s >= bufend - len + 1) {
10203 sv_setpvn(tmpstr,d+1,s-d);
10205 if (PL_madskills) {
10207 sv_catpvn(PL_thisstuff, d + 1, s - d);
10209 PL_thisstuff = newSVpvn(d + 1, s - d);
10210 stuffstart = s - SvPVX(PL_linestr);
10214 /* the preceding stmt passes a newline */
10215 PL_parser->herelines++;
10217 /* s now points to the newline after the heredoc terminator.
10218 d points to the newline before the body of the heredoc.
10221 /* We are going to modify linestr in place here, so set
10222 aside copies of the string if necessary for re-evals or
10224 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10225 check shared->re_eval_str. */
10226 if (shared->re_eval_start || shared->re_eval_str) {
10227 /* Set aside the rest of the regexp */
10228 if (!shared->re_eval_str)
10229 shared->re_eval_str =
10230 newSVpvn(shared->re_eval_start,
10231 bufend - shared->re_eval_start);
10232 shared->re_eval_start -= s-d;
10234 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10235 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10236 cx->blk_eval.cur_text == linestr)
10238 cx->blk_eval.cur_text = newSVsv(linestr);
10239 SvSCREAM_on(cx->blk_eval.cur_text);
10241 /* Copy everything from s onwards back to d. */
10242 Move(s,d,bufend-s + 1,char);
10243 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10244 /* Setting PL_bufend only applies when we have not dug deeper
10245 into other scopes, because sublex_done sets PL_bufend to
10246 SvEND(PL_linestr). */
10247 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10254 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10255 term = PL_tokenbuf[1];
10257 linestr_save = PL_linestr; /* must restore this afterwards */
10258 d = s; /* and this */
10259 PL_linestr = newSVpvs("");
10260 PL_bufend = SvPVX(PL_linestr);
10263 if (PL_madskills) {
10264 tstart = SvPVX(PL_linestr) + stuffstart;
10266 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10268 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10271 PL_bufptr = PL_bufend;
10272 CopLINE_set(PL_curcop,
10273 origline + 1 + PL_parser->herelines);
10274 if (!lex_next_chunk(LEX_NO_TERM)
10275 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10276 SvREFCNT_dec(linestr_save);
10279 CopLINE_set(PL_curcop, origline);
10280 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10281 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10282 /* ^That should be enough to avoid this needing to grow: */
10283 sv_catpvs(PL_linestr, "\n\0");
10284 assert(s == SvPVX(PL_linestr));
10285 PL_bufend = SvEND(PL_linestr);
10289 stuffstart = s - SvPVX(PL_linestr);
10291 PL_parser->herelines++;
10292 PL_last_lop = PL_last_uni = NULL;
10293 #ifndef PERL_STRICT_CR
10294 if (PL_bufend - PL_linestart >= 2) {
10295 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10296 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10298 PL_bufend[-2] = '\n';
10300 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10302 else if (PL_bufend[-1] == '\r')
10303 PL_bufend[-1] = '\n';
10305 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10306 PL_bufend[-1] = '\n';
10308 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10309 SvREFCNT_dec(PL_linestr);
10310 PL_linestr = linestr_save;
10311 PL_linestart = SvPVX(linestr_save);
10312 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10317 sv_catsv(tmpstr,PL_linestr);
10321 PL_multi_end = origline + PL_parser->herelines;
10322 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10323 SvPV_shrink_to_cur(tmpstr);
10326 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10328 else if (PL_encoding)
10329 sv_recode_to_utf8(tmpstr, PL_encoding);
10331 PL_lex_stuff = tmpstr;
10332 pl_yylval.ival = op_type;
10336 SvREFCNT_dec(tmpstr);
10337 CopLINE_set(PL_curcop, origline);
10338 missingterm(PL_tokenbuf + 1);
10341 /* scan_inputsymbol
10342 takes: current position in input buffer
10343 returns: new position in input buffer
10344 side-effects: pl_yylval and lex_op are set.
10349 <FH> read from filehandle
10350 <pkg::FH> read from package qualified filehandle
10351 <pkg'FH> read from package qualified filehandle
10352 <$fh> read from filehandle in $fh
10353 <*.h> filename glob
10358 S_scan_inputsymbol(pTHX_ char *start)
10361 char *s = start; /* current position in buffer */
10364 char *d = PL_tokenbuf; /* start of temp holding space */
10365 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10367 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10369 end = strchr(s, '\n');
10372 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10374 /* die if we didn't have space for the contents of the <>,
10375 or if it didn't end, or if we see a newline
10378 if (len >= (I32)sizeof PL_tokenbuf)
10379 Perl_croak(aTHX_ "Excessively long <> operator");
10381 Perl_croak(aTHX_ "Unterminated <> operator");
10386 Remember, only scalar variables are interpreted as filehandles by
10387 this code. Anything more complex (e.g., <$fh{$num}>) will be
10388 treated as a glob() call.
10389 This code makes use of the fact that except for the $ at the front,
10390 a scalar variable and a filehandle look the same.
10392 if (*d == '$' && d[1]) d++;
10394 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10395 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10396 d += UTF ? UTF8SKIP(d) : 1;
10398 /* If we've tried to read what we allow filehandles to look like, and
10399 there's still text left, then it must be a glob() and not a getline.
10400 Use scan_str to pull out the stuff between the <> and treat it
10401 as nothing more than a string.
10404 if (d - PL_tokenbuf != len) {
10405 pl_yylval.ival = OP_GLOB;
10406 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10408 Perl_croak(aTHX_ "Glob not terminated");
10412 bool readline_overriden = FALSE;
10415 /* we're in a filehandle read situation */
10418 /* turn <> into <ARGV> */
10420 Copy("ARGV",d,5,char);
10422 /* Check whether readline() is overriden */
10423 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10425 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10427 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10428 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10429 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10430 readline_overriden = TRUE;
10432 /* if <$fh>, create the ops to turn the variable into a
10436 /* try to find it in the pad for this block, otherwise find
10437 add symbol table ops
10439 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10440 if (tmp != NOT_IN_PAD) {
10441 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10442 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10443 HEK * const stashname = HvNAME_HEK(stash);
10444 SV * const sym = sv_2mortal(newSVhek(stashname));
10445 sv_catpvs(sym, "::");
10446 sv_catpv(sym, d+1);
10451 OP * const o = newOP(OP_PADSV, 0);
10453 PL_lex_op = readline_overriden
10454 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10455 op_append_elem(OP_LIST, o,
10456 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10457 : (OP*)newUNOP(OP_READLINE, 0, o);
10466 ? (GV_ADDMULTI | GV_ADDINEVAL)
10467 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10469 PL_lex_op = readline_overriden
10470 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10471 op_append_elem(OP_LIST,
10472 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10473 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10474 : (OP*)newUNOP(OP_READLINE, 0,
10475 newUNOP(OP_RV2SV, 0,
10476 newGVOP(OP_GV, 0, gv)));
10478 if (!readline_overriden)
10479 PL_lex_op->op_flags |= OPf_SPECIAL;
10480 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10481 pl_yylval.ival = OP_NULL;
10484 /* If it's none of the above, it must be a literal filehandle
10485 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10487 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10488 PL_lex_op = readline_overriden
10489 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10490 op_append_elem(OP_LIST,
10491 newGVOP(OP_GV, 0, gv),
10492 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10493 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10494 pl_yylval.ival = OP_NULL;
10504 start position in buffer
10505 keep_quoted preserve \ on the embedded delimiter(s)
10506 keep_delims preserve the delimiters around the string
10507 re_reparse compiling a run-time /(?{})/:
10508 collapse // to /, and skip encoding src
10509 deprecate_escaped_meta issue a deprecation warning for cer-
10510 tain paired metacharacters that appear
10512 returns: position to continue reading from buffer
10513 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10514 updates the read buffer.
10516 This subroutine pulls a string out of the input. It is called for:
10517 q single quotes q(literal text)
10518 ' single quotes 'literal text'
10519 qq double quotes qq(interpolate $here please)
10520 " double quotes "interpolate $here please"
10521 qx backticks qx(/bin/ls -l)
10522 ` backticks `/bin/ls -l`
10523 qw quote words @EXPORT_OK = qw( func() $spam )
10524 m// regexp match m/this/
10525 s/// regexp substitute s/this/that/
10526 tr/// string transliterate tr/this/that/
10527 y/// string transliterate y/this/that/
10528 ($*@) sub prototypes sub foo ($)
10529 (stuff) sub attr parameters sub foo : attr(stuff)
10530 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10532 In most of these cases (all but <>, patterns and transliterate)
10533 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10534 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10535 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10538 It skips whitespace before the string starts, and treats the first
10539 character as the delimiter. If the delimiter is one of ([{< then
10540 the corresponding "close" character )]}> is used as the closing
10541 delimiter. It allows quoting of delimiters, and if the string has
10542 balanced delimiters ([{<>}]) it allows nesting.
10544 On success, the SV with the resulting string is put into lex_stuff or,
10545 if that is already non-NULL, into lex_repl. The second case occurs only
10546 when parsing the RHS of the special constructs s/// and tr/// (y///).
10547 For convenience, the terminating delimiter character is stuffed into
10552 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10553 bool deprecate_escaped_meta
10557 SV *sv; /* scalar value: string */
10558 const char *tmps; /* temp string, used for delimiter matching */
10559 char *s = start; /* current position in the buffer */
10560 char term; /* terminating character */
10561 char *to; /* current position in the sv's data */
10562 I32 brackets = 1; /* bracket nesting level */
10563 bool has_utf8 = FALSE; /* is there any utf8 content? */
10564 I32 termcode; /* terminating char. code */
10565 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10566 STRLEN termlen; /* length of terminating string */
10567 int last_off = 0; /* last position for nesting bracket */
10568 char *escaped_open = NULL;
10575 PERL_ARGS_ASSERT_SCAN_STR;
10577 /* skip space before the delimiter */
10583 if (PL_realtokenstart >= 0) {
10584 stuffstart = PL_realtokenstart;
10585 PL_realtokenstart = -1;
10588 stuffstart = start - SvPVX(PL_linestr);
10590 /* mark where we are, in case we need to report errors */
10593 /* after skipping whitespace, the next character is the terminator */
10596 termcode = termstr[0] = term;
10600 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10601 Copy(s, termstr, termlen, U8);
10602 if (!UTF8_IS_INVARIANT(term))
10606 /* mark where we are */
10607 PL_multi_start = CopLINE(PL_curcop);
10608 PL_multi_open = term;
10609 herelines = PL_parser->herelines;
10611 /* find corresponding closing delimiter */
10612 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10613 termcode = termstr[0] = term = tmps[5];
10615 PL_multi_close = term;
10617 /* A warning is raised if the input parameter requires it for escaped (by a
10618 * backslash) paired metacharacters {} [] and () when the delimiters are
10619 * those same characters, and the backslash is ineffective. This doesn't
10620 * happen for <>, as they aren't metas. */
10621 if (deprecate_escaped_meta
10622 && (PL_multi_open == PL_multi_close
10623 || PL_multi_open == '<'
10624 || ! ckWARN_d(WARN_DEPRECATED)))
10626 deprecate_escaped_meta = FALSE;
10629 /* create a new SV to hold the contents. 79 is the SV's initial length.
10630 What a random number. */
10631 sv = newSV_type(SVt_PVIV);
10633 SvIV_set(sv, termcode);
10634 (void)SvPOK_only(sv); /* validate pointer */
10636 /* move past delimiter and try to read a complete string */
10638 sv_catpvn(sv, s, termlen);
10641 tstart = SvPVX(PL_linestr) + stuffstart;
10642 if (PL_madskills && !PL_thisopen && !keep_delims) {
10643 PL_thisopen = newSVpvn(tstart, s - tstart);
10644 stuffstart = s - SvPVX(PL_linestr);
10648 if (PL_encoding && !UTF && !re_reparse) {
10652 int offset = s - SvPVX_const(PL_linestr);
10653 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10654 &offset, (char*)termstr, termlen);
10658 if (SvIsCOW(PL_linestr)) {
10659 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10660 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10661 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10662 char *buf = SvPVX(PL_linestr);
10663 bufend_pos = PL_parser->bufend - buf;
10664 bufptr_pos = PL_parser->bufptr - buf;
10665 oldbufptr_pos = PL_parser->oldbufptr - buf;
10666 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10667 linestart_pos = PL_parser->linestart - buf;
10668 last_uni_pos = PL_parser->last_uni
10669 ? PL_parser->last_uni - buf
10671 last_lop_pos = PL_parser->last_lop
10672 ? PL_parser->last_lop - buf
10674 re_eval_start_pos =
10675 PL_parser->lex_shared->re_eval_start ?
10676 PL_parser->lex_shared->re_eval_start - buf : 0;
10679 sv_force_normal(PL_linestr);
10681 buf = SvPVX(PL_linestr);
10682 PL_parser->bufend = buf + bufend_pos;
10683 PL_parser->bufptr = buf + bufptr_pos;
10684 PL_parser->oldbufptr = buf + oldbufptr_pos;
10685 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10686 PL_parser->linestart = buf + linestart_pos;
10687 if (PL_parser->last_uni)
10688 PL_parser->last_uni = buf + last_uni_pos;
10689 if (PL_parser->last_lop)
10690 PL_parser->last_lop = buf + last_lop_pos;
10691 if (PL_parser->lex_shared->re_eval_start)
10692 PL_parser->lex_shared->re_eval_start =
10693 buf + re_eval_start_pos;
10696 ns = SvPVX_const(PL_linestr) + offset;
10697 svlast = SvEND(sv) - 1;
10699 for (; s < ns; s++) {
10700 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10701 COPLINE_INC_WITH_HERELINES;
10704 goto read_more_line;
10706 /* handle quoted delimiters */
10707 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10709 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10711 if ((svlast-1 - t) % 2) {
10712 if (!keep_quoted) {
10713 *(svlast-1) = term;
10715 SvCUR_set(sv, SvCUR(sv) - 1);
10720 if (PL_multi_open == PL_multi_close) {
10726 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10727 /* At here, all closes are "was quoted" one,
10728 so we don't check PL_multi_close. */
10730 if (!keep_quoted && *(t+1) == PL_multi_open)
10735 else if (*t == PL_multi_open)
10743 SvCUR_set(sv, w - SvPVX_const(sv));
10745 last_off = w - SvPVX(sv);
10746 if (--brackets <= 0)
10751 if (!keep_delims) {
10752 SvCUR_set(sv, SvCUR(sv) - 1);
10758 /* extend sv if need be */
10759 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10760 /* set 'to' to the next character in the sv's string */
10761 to = SvPVX(sv)+SvCUR(sv);
10763 /* if open delimiter is the close delimiter read unbridle */
10764 if (PL_multi_open == PL_multi_close) {
10765 for (; s < PL_bufend; s++,to++) {
10766 /* embedded newlines increment the current line number */
10767 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10768 COPLINE_INC_WITH_HERELINES;
10769 /* handle quoted delimiters */
10770 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10773 || (re_reparse && s[1] == '\\'))
10776 /* any other quotes are simply copied straight through */
10780 /* terminate when run out of buffer (the for() condition), or
10781 have found the terminator */
10782 else if (*s == term) {
10785 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10788 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10794 /* if the terminator isn't the same as the start character (e.g.,
10795 matched brackets), we have to allow more in the quoting, and
10796 be prepared for nested brackets.
10799 /* read until we run out of string, or we find the terminator */
10800 for (; s < PL_bufend; s++,to++) {
10801 /* embedded newlines increment the line count */
10802 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10803 COPLINE_INC_WITH_HERELINES;
10804 /* backslashes can escape the open or closing characters */
10805 if (*s == '\\' && s+1 < PL_bufend) {
10806 if (!keep_quoted &&
10807 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10811 /* Here, 'deprecate_escaped_meta' is true iff the
10812 * delimiters are paired metacharacters, and 's' points
10813 * to an occurrence of one of them within the string,
10814 * which was preceded by a backslash. If this is a
10815 * context where the delimiter is also a metacharacter,
10816 * the backslash is useless, and deprecated. () and []
10817 * are meta in any context. {} are meta only when
10818 * appearing in a quantifier or in things like '\p{'
10819 * (but '\\p{' isn't meta). They also aren't meta
10820 * unless there is a matching closed, escaped char
10821 * later on within the string. If 's' points to an
10822 * open, set a flag; if to a close, test that flag, and
10823 * raise a warning if it was set */
10825 if (deprecate_escaped_meta) {
10826 if (*s == PL_multi_open) {
10830 /* Look for a closing '\}' */
10831 else if (regcurly(s, TRUE)) {
10834 /* Look for e.g. '\x{' */
10835 else if (s - start > 2
10836 && _generic_isCC(*(s-2),
10837 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10838 { /* Exclude '\\x', '\\\\x', etc. */
10839 char *lookbehind = s - 4;
10840 bool is_meta = TRUE;
10841 while (lookbehind >= start
10842 && *lookbehind == '\\')
10844 is_meta = ! is_meta;
10852 else if (escaped_open) {
10853 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10854 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10855 escaped_open = NULL;
10862 /* allow nested opens and closes */
10863 else if (*s == PL_multi_close && --brackets <= 0)
10865 else if (*s == PL_multi_open)
10867 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10872 /* terminate the copied string and update the sv's end-of-string */
10874 SvCUR_set(sv, to - SvPVX_const(sv));
10877 * this next chunk reads more into the buffer if we're not done yet
10881 break; /* handle case where we are done yet :-) */
10883 #ifndef PERL_STRICT_CR
10884 if (to - SvPVX_const(sv) >= 2) {
10885 if ((to[-2] == '\r' && to[-1] == '\n') ||
10886 (to[-2] == '\n' && to[-1] == '\r'))
10890 SvCUR_set(sv, to - SvPVX_const(sv));
10892 else if (to[-1] == '\r')
10895 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10900 /* if we're out of file, or a read fails, bail and reset the current
10901 line marker so we can report where the unterminated string began
10904 if (PL_madskills) {
10905 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10907 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10909 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10912 COPLINE_INC_WITH_HERELINES;
10913 PL_bufptr = PL_bufend;
10914 if (!lex_next_chunk(0)) {
10916 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10925 /* at this point, we have successfully read the delimited string */
10927 if (!PL_encoding || UTF || re_reparse) {
10929 if (PL_madskills) {
10930 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10931 const int len = s - tstart;
10933 sv_catpvn(PL_thisstuff, tstart, len);
10935 PL_thisstuff = newSVpvn(tstart, len);
10936 if (!PL_thisclose && !keep_delims)
10937 PL_thisclose = newSVpvn(s,termlen);
10942 sv_catpvn(sv, s, termlen);
10947 if (PL_madskills) {
10948 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10949 const int len = s - tstart - termlen;
10951 sv_catpvn(PL_thisstuff, tstart, len);
10953 PL_thisstuff = newSVpvn(tstart, len);
10954 if (!PL_thisclose && !keep_delims)
10955 PL_thisclose = newSVpvn(s - termlen,termlen);
10959 if (has_utf8 || (PL_encoding && !re_reparse))
10962 PL_multi_end = CopLINE(PL_curcop);
10963 CopLINE_set(PL_curcop, PL_multi_start);
10964 PL_parser->herelines = herelines;
10966 /* if we allocated too much space, give some back */
10967 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10968 SvLEN_set(sv, SvCUR(sv) + 1);
10969 SvPV_renew(sv, SvLEN(sv));
10972 /* decide whether this is the first or second quoted string we've read
10977 PL_sublex_info.repl = sv;
10985 takes: pointer to position in buffer
10986 returns: pointer to new position in buffer
10987 side-effects: builds ops for the constant in pl_yylval.op
10989 Read a number in any of the formats that Perl accepts:
10991 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10992 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10995 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10997 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11000 If it reads a number without a decimal point or an exponent, it will
11001 try converting the number to an integer and see if it can do so
11002 without loss of precision.
11006 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11009 const char *s = start; /* current position in buffer */
11010 char *d; /* destination in temp buffer */
11011 char *e; /* end of temp buffer */
11012 NV nv; /* number read, as a double */
11013 SV *sv = NULL; /* place to put the converted number */
11014 bool floatit; /* boolean: int or float? */
11015 const char *lastub = NULL; /* position of last underbar */
11016 static const char* const number_too_long = "Number too long";
11018 PERL_ARGS_ASSERT_SCAN_NUM;
11020 /* We use the first character to decide what type of number this is */
11024 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11026 /* if it starts with a 0, it could be an octal number, a decimal in
11027 0.13 disguise, or a hexadecimal number, or a binary number. */
11031 u holds the "number so far"
11032 shift the power of 2 of the base
11033 (hex == 4, octal == 3, binary == 1)
11034 overflowed was the number more than we can hold?
11036 Shift is used when we add a digit. It also serves as an "are
11037 we in octal/hex/binary?" indicator to disallow hex characters
11038 when in octal mode.
11043 bool overflowed = FALSE;
11044 bool just_zero = TRUE; /* just plain 0 or binary number? */
11045 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11046 static const char* const bases[5] =
11047 { "", "binary", "", "octal", "hexadecimal" };
11048 static const char* const Bases[5] =
11049 { "", "Binary", "", "Octal", "Hexadecimal" };
11050 static const char* const maxima[5] =
11052 "0b11111111111111111111111111111111",
11056 const char *base, *Base, *max;
11058 /* check for hex */
11059 if (s[1] == 'x' || s[1] == 'X') {
11063 } else if (s[1] == 'b' || s[1] == 'B') {
11068 /* check for a decimal in disguise */
11069 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11071 /* so it must be octal */
11078 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11079 "Misplaced _ in number");
11083 base = bases[shift];
11084 Base = Bases[shift];
11085 max = maxima[shift];
11087 /* read the rest of the number */
11089 /* x is used in the overflow test,
11090 b is the digit we're adding on. */
11095 /* if we don't mention it, we're done */
11099 /* _ are ignored -- but warned about if consecutive */
11101 if (lastub && s == lastub + 1)
11102 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11103 "Misplaced _ in number");
11107 /* 8 and 9 are not octal */
11108 case '8': case '9':
11110 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11114 case '2': case '3': case '4':
11115 case '5': case '6': case '7':
11117 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11120 case '0': case '1':
11121 b = *s++ & 15; /* ASCII digit -> value of digit */
11125 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11126 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11127 /* make sure they said 0x */
11130 b = (*s++ & 7) + 9;
11132 /* Prepare to put the digit we have onto the end
11133 of the number so far. We check for overflows.
11139 x = u << shift; /* make room for the digit */
11141 if ((x >> shift) != u
11142 && !(PL_hints & HINT_NEW_BINARY)) {
11145 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11146 "Integer overflow in %s number",
11149 u = x | b; /* add the digit to the end */
11152 n *= nvshift[shift];
11153 /* If an NV has not enough bits in its
11154 * mantissa to represent an UV this summing of
11155 * small low-order numbers is a waste of time
11156 * (because the NV cannot preserve the
11157 * low-order bits anyway): we could just
11158 * remember when did we overflow and in the
11159 * end just multiply n by the right
11167 /* if we get here, we had success: make a scalar value from
11172 /* final misplaced underbar check */
11173 if (s[-1] == '_') {
11174 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11178 if (n > 4294967295.0)
11179 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11180 "%s number > %s non-portable",
11186 if (u > 0xffffffff)
11187 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11188 "%s number > %s non-portable",
11193 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11194 sv = new_constant(start, s - start, "integer",
11195 sv, NULL, NULL, 0);
11196 else if (PL_hints & HINT_NEW_BINARY)
11197 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11202 handle decimal numbers.
11203 we're also sent here when we read a 0 as the first digit
11205 case '1': case '2': case '3': case '4': case '5':
11206 case '6': case '7': case '8': case '9': case '.':
11209 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11212 /* read next group of digits and _ and copy into d */
11213 while (isDIGIT(*s) || *s == '_') {
11214 /* skip underscores, checking for misplaced ones
11218 if (lastub && s == lastub + 1)
11219 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11220 "Misplaced _ in number");
11224 /* check for end of fixed-length buffer */
11226 Perl_croak(aTHX_ "%s", number_too_long);
11227 /* if we're ok, copy the character */
11232 /* final misplaced underbar check */
11233 if (lastub && s == lastub + 1) {
11234 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11237 /* read a decimal portion if there is one. avoid
11238 3..5 being interpreted as the number 3. followed
11241 if (*s == '.' && s[1] != '.') {
11246 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11247 "Misplaced _ in number");
11251 /* copy, ignoring underbars, until we run out of digits.
11253 for (; isDIGIT(*s) || *s == '_'; s++) {
11254 /* fixed length buffer check */
11256 Perl_croak(aTHX_ "%s", number_too_long);
11258 if (lastub && s == lastub + 1)
11259 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11260 "Misplaced _ in number");
11266 /* fractional part ending in underbar? */
11267 if (s[-1] == '_') {
11268 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11269 "Misplaced _ in number");
11271 if (*s == '.' && isDIGIT(s[1])) {
11272 /* oops, it's really a v-string, but without the "v" */
11278 /* read exponent part, if present */
11279 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11283 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11284 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11286 /* stray preinitial _ */
11288 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11289 "Misplaced _ in number");
11293 /* allow positive or negative exponent */
11294 if (*s == '+' || *s == '-')
11297 /* stray initial _ */
11299 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11300 "Misplaced _ in number");
11304 /* read digits of exponent */
11305 while (isDIGIT(*s) || *s == '_') {
11308 Perl_croak(aTHX_ "%s", number_too_long);
11312 if (((lastub && s == lastub + 1) ||
11313 (!isDIGIT(s[1]) && s[1] != '_')))
11314 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11315 "Misplaced _ in number");
11323 We try to do an integer conversion first if no characters
11324 indicating "float" have been found.
11329 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11331 if (flags == IS_NUMBER_IN_UV) {
11333 sv = newSViv(uv); /* Prefer IVs over UVs. */
11336 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11337 if (uv <= (UV) IV_MIN)
11338 sv = newSViv(-(IV)uv);
11345 /* terminate the string */
11347 nv = Atof(PL_tokenbuf);
11352 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11353 const char *const key = floatit ? "float" : "integer";
11354 const STRLEN keylen = floatit ? 5 : 7;
11355 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11356 key, keylen, sv, NULL, NULL, 0);
11360 /* if it starts with a v, it could be a v-string */
11363 sv = newSV(5); /* preallocate storage space */
11364 ENTER_with_name("scan_vstring");
11366 s = scan_vstring(s, PL_bufend, sv);
11367 SvREFCNT_inc_simple_void_NN(sv);
11368 LEAVE_with_name("scan_vstring");
11372 /* make the op for the constant and return */
11375 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11377 lvalp->opval = NULL;
11383 S_scan_formline(pTHX_ char *s)
11388 SV * const stuff = newSVpvs("");
11389 bool needargs = FALSE;
11390 bool eofmt = FALSE;
11392 char *tokenstart = s;
11393 SV* savewhite = NULL;
11395 if (PL_madskills) {
11396 savewhite = PL_thiswhite;
11401 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11403 while (!needargs) {
11406 #ifdef PERL_STRICT_CR
11407 while (SPACE_OR_TAB(*t))
11410 while (SPACE_OR_TAB(*t) || *t == '\r')
11413 if (*t == '\n' || t == PL_bufend) {
11418 eol = (char *) memchr(s,'\n',PL_bufend-s);
11422 for (t = s; t < eol; t++) {
11423 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11425 goto enough; /* ~~ must be first line in formline */
11427 if (*t == '@' || *t == '^')
11431 sv_catpvn(stuff, s, eol-s);
11432 #ifndef PERL_STRICT_CR
11433 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11434 char *end = SvPVX(stuff) + SvCUR(stuff);
11437 SvCUR_set(stuff, SvCUR(stuff) - 1);
11445 if ((PL_rsfp || PL_parser->filtered)
11446 && PL_parser->form_lex_state == LEX_NORMAL) {
11449 if (PL_madskills) {
11451 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11453 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11456 PL_bufptr = PL_bufend;
11457 COPLINE_INC_WITH_HERELINES;
11458 got_some = lex_next_chunk(0);
11459 CopLINE_dec(PL_curcop);
11462 tokenstart = PL_bufptr;
11470 if (!SvCUR(stuff) || needargs)
11471 PL_lex_state = PL_parser->form_lex_state;
11472 if (SvCUR(stuff)) {
11473 PL_expect = XSTATE;
11475 start_force(PL_curforce);
11476 NEXTVAL_NEXTTOKE.ival = 0;
11477 force_next(FORMLBRACK);
11480 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11482 else if (PL_encoding)
11483 sv_recode_to_utf8(stuff, PL_encoding);
11485 start_force(PL_curforce);
11486 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11490 SvREFCNT_dec(stuff);
11492 PL_lex_formbrack = 0;
11495 if (PL_madskills) {
11497 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11499 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11500 PL_thiswhite = savewhite;
11507 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11510 const I32 oldsavestack_ix = PL_savestack_ix;
11511 CV* const outsidecv = PL_compcv;
11513 SAVEI32(PL_subline);
11514 save_item(PL_subname);
11515 SAVESPTR(PL_compcv);
11517 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11518 CvFLAGS(PL_compcv) |= flags;
11520 PL_subline = CopLINE(PL_curcop);
11521 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11522 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11523 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11524 if (outsidecv && CvPADLIST(outsidecv))
11525 CvPADLIST(PL_compcv)->xpadl_outid =
11526 PadlistNAMES(CvPADLIST(outsidecv));
11528 return oldsavestack_ix;
11532 S_yywarn(pTHX_ const char *const s, U32 flags)
11536 PERL_ARGS_ASSERT_YYWARN;
11538 PL_in_eval |= EVAL_WARNONLY;
11539 yyerror_pv(s, flags);
11540 PL_in_eval &= ~EVAL_WARNONLY;
11545 Perl_yyerror(pTHX_ const char *const s)
11547 PERL_ARGS_ASSERT_YYERROR;
11548 return yyerror_pvn(s, strlen(s), 0);
11552 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11554 PERL_ARGS_ASSERT_YYERROR_PV;
11555 return yyerror_pvn(s, strlen(s), flags);
11559 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11562 const char *context = NULL;
11565 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11566 int yychar = PL_parser->yychar;
11568 PERL_ARGS_ASSERT_YYERROR_PVN;
11570 if (!yychar || (yychar == ';' && !PL_rsfp))
11571 sv_catpvs(where_sv, "at EOF");
11572 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11573 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11574 PL_oldbufptr != PL_bufptr) {
11577 The code below is removed for NetWare because it abends/crashes on NetWare
11578 when the script has error such as not having the closing quotes like:
11579 if ($var eq "value)
11580 Checking of white spaces is anyway done in NetWare code.
11583 while (isSPACE(*PL_oldoldbufptr))
11586 context = PL_oldoldbufptr;
11587 contlen = PL_bufptr - PL_oldoldbufptr;
11589 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11590 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11593 The code below is removed for NetWare because it abends/crashes on NetWare
11594 when the script has error such as not having the closing quotes like:
11595 if ($var eq "value)
11596 Checking of white spaces is anyway done in NetWare code.
11599 while (isSPACE(*PL_oldbufptr))
11602 context = PL_oldbufptr;
11603 contlen = PL_bufptr - PL_oldbufptr;
11605 else if (yychar > 255)
11606 sv_catpvs(where_sv, "next token ???");
11607 else if (yychar == -2) { /* YYEMPTY */
11608 if (PL_lex_state == LEX_NORMAL ||
11609 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11610 sv_catpvs(where_sv, "at end of line");
11611 else if (PL_lex_inpat)
11612 sv_catpvs(where_sv, "within pattern");
11614 sv_catpvs(where_sv, "within string");
11617 sv_catpvs(where_sv, "next char ");
11619 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11620 else if (isPRINT_LC(yychar)) {
11621 const char string = yychar;
11622 sv_catpvn(where_sv, &string, 1);
11625 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11627 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11628 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11629 OutCopFILE(PL_curcop),
11630 (IV)(PL_parser->preambling == NOLINE
11631 ? CopLINE(PL_curcop)
11632 : PL_parser->preambling));
11634 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11635 UTF8fARG(UTF, contlen, context));
11637 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11638 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11639 Perl_sv_catpvf(aTHX_ msg,
11640 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11641 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11644 if (PL_in_eval & EVAL_WARNONLY) {
11645 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11649 if (PL_error_count >= 10) {
11651 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11652 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11653 SVfARG(errsv), OutCopFILE(PL_curcop));
11655 Perl_croak(aTHX_ "%s has too many errors.\n",
11656 OutCopFILE(PL_curcop));
11659 PL_in_my_stash = NULL;
11664 S_swallow_bom(pTHX_ U8 *s)
11667 const STRLEN slen = SvCUR(PL_linestr);
11669 PERL_ARGS_ASSERT_SWALLOW_BOM;
11673 if (s[1] == 0xFE) {
11674 /* UTF-16 little-endian? (or UTF-32LE?) */
11675 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11676 /* diag_listed_as: Unsupported script encoding %s */
11677 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11678 #ifndef PERL_NO_UTF16_FILTER
11679 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11681 if (PL_bufend > (char*)s) {
11682 s = add_utf16_textfilter(s, TRUE);
11685 /* diag_listed_as: Unsupported script encoding %s */
11686 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11691 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11692 #ifndef PERL_NO_UTF16_FILTER
11693 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11695 if (PL_bufend > (char *)s) {
11696 s = add_utf16_textfilter(s, FALSE);
11699 /* diag_listed_as: Unsupported script encoding %s */
11700 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11704 case BOM_UTF8_FIRST_BYTE: {
11705 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11706 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11707 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11708 s += len + 1; /* UTF-8 */
11715 if (s[2] == 0xFE && s[3] == 0xFF) {
11716 /* UTF-32 big-endian */
11717 /* diag_listed_as: Unsupported script encoding %s */
11718 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11721 else if (s[2] == 0 && s[3] != 0) {
11724 * are a good indicator of UTF-16BE. */
11725 #ifndef PERL_NO_UTF16_FILTER
11726 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11727 s = add_utf16_textfilter(s, FALSE);
11729 /* diag_listed_as: Unsupported script encoding %s */
11730 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11736 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11739 * are a good indicator of UTF-16LE. */
11740 #ifndef PERL_NO_UTF16_FILTER
11741 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11742 s = add_utf16_textfilter(s, TRUE);
11744 /* diag_listed_as: Unsupported script encoding %s */
11745 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11753 #ifndef PERL_NO_UTF16_FILTER
11755 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11758 SV *const filter = FILTER_DATA(idx);
11759 /* We re-use this each time round, throwing the contents away before we
11761 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11762 SV *const utf8_buffer = filter;
11763 IV status = IoPAGE(filter);
11764 const bool reverse = cBOOL(IoLINES(filter));
11767 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11769 /* As we're automatically added, at the lowest level, and hence only called
11770 from this file, we can be sure that we're not called in block mode. Hence
11771 don't bother writing code to deal with block mode. */
11773 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11776 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11778 DEBUG_P(PerlIO_printf(Perl_debug_log,
11779 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11780 FPTR2DPTR(void *, S_utf16_textfilter),
11781 reverse ? 'l' : 'b', idx, maxlen, status,
11782 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11789 /* First, look in our buffer of existing UTF-8 data: */
11790 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11794 } else if (status == 0) {
11796 IoPAGE(filter) = 0;
11797 nl = SvEND(utf8_buffer);
11800 STRLEN got = nl - SvPVX(utf8_buffer);
11801 /* Did we have anything to append? */
11803 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11804 /* Everything else in this code works just fine if SVp_POK isn't
11805 set. This, however, needs it, and we need it to work, else
11806 we loop infinitely because the buffer is never consumed. */
11807 sv_chop(utf8_buffer, nl);
11811 /* OK, not a complete line there, so need to read some more UTF-16.
11812 Read an extra octect if the buffer currently has an odd number. */
11816 if (SvCUR(utf16_buffer) >= 2) {
11817 /* Location of the high octet of the last complete code point.
11818 Gosh, UTF-16 is a pain. All the benefits of variable length,
11819 *coupled* with all the benefits of partial reads and
11821 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11822 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11824 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11828 /* We have the first half of a surrogate. Read more. */
11829 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11832 status = FILTER_READ(idx + 1, utf16_buffer,
11833 160 + (SvCUR(utf16_buffer) & 1));
11834 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11835 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11838 IoPAGE(filter) = status;
11843 chars = SvCUR(utf16_buffer) >> 1;
11844 have = SvCUR(utf8_buffer);
11845 SvGROW(utf8_buffer, have + chars * 3 + 1);
11848 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11849 (U8*)SvPVX_const(utf8_buffer) + have,
11850 chars * 2, &newlen);
11852 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11853 (U8*)SvPVX_const(utf8_buffer) + have,
11854 chars * 2, &newlen);
11856 SvCUR_set(utf8_buffer, have + newlen);
11859 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11860 it's private to us, and utf16_to_utf8{,reversed} take a
11861 (pointer,length) pair, rather than a NUL-terminated string. */
11862 if(SvCUR(utf16_buffer) & 1) {
11863 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11864 SvCUR_set(utf16_buffer, 1);
11866 SvCUR_set(utf16_buffer, 0);
11869 DEBUG_P(PerlIO_printf(Perl_debug_log,
11870 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11872 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11873 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11878 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11880 SV *filter = filter_add(S_utf16_textfilter, NULL);
11882 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11884 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11885 sv_setpvs(filter, "");
11886 IoLINES(filter) = reversed;
11887 IoPAGE(filter) = 1; /* Not EOF */
11889 /* Sadly, we have to return a valid pointer, come what may, so we have to
11890 ignore any error return from this. */
11891 SvCUR_set(PL_linestr, 0);
11892 if (FILTER_READ(0, PL_linestr, 0)) {
11893 SvUTF8_on(PL_linestr);
11895 SvUTF8_on(PL_linestr);
11897 PL_bufend = SvEND(PL_linestr);
11898 return (U8*)SvPVX(PL_linestr);
11903 Returns a pointer to the next character after the parsed
11904 vstring, as well as updating the passed in sv.
11906 Function must be called like
11908 sv = sv_2mortal(newSV(5));
11909 s = scan_vstring(s,e,sv);
11911 where s and e are the start and end of the string.
11912 The sv should already be large enough to store the vstring
11913 passed in, for performance reasons.
11915 This function may croak if fatal warnings are enabled in the
11916 calling scope, hence the sv_2mortal in the example (to prevent
11917 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11923 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11926 const char *pos = s;
11927 const char *start = s;
11929 PERL_ARGS_ASSERT_SCAN_VSTRING;
11931 if (*pos == 'v') pos++; /* get past 'v' */
11932 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11934 if ( *pos != '.') {
11935 /* this may not be a v-string if followed by => */
11936 const char *next = pos;
11937 while (next < e && isSPACE(*next))
11939 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11940 /* return string not v-string */
11941 sv_setpvn(sv,(char *)s,pos-s);
11942 return (char *)pos;
11946 if (!isALPHA(*pos)) {
11947 U8 tmpbuf[UTF8_MAXBYTES+1];
11950 s++; /* get past 'v' */
11955 /* this is atoi() that tolerates underscores */
11958 const char *end = pos;
11960 while (--end >= s) {
11962 const UV orev = rev;
11963 rev += (*end - '0') * mult;
11966 /* diag_listed_as: Integer overflow in %s number */
11967 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11968 "Integer overflow in decimal number");
11972 if (rev > 0x7FFFFFFF)
11973 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11975 /* Append native character for the rev point */
11976 tmpend = uvchr_to_utf8(tmpbuf, rev);
11977 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11978 if (!UVCHR_IS_INVARIANT(rev))
11980 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11986 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11990 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11997 Perl_keyword_plugin_standard(pTHX_
11998 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12000 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12001 PERL_UNUSED_CONTEXT;
12002 PERL_UNUSED_ARG(keyword_ptr);
12003 PERL_UNUSED_ARG(keyword_len);
12004 PERL_UNUSED_ARG(op_ptr);
12005 return KEYWORD_PLUGIN_DECLINE;
12008 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12010 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12012 SAVEI32(PL_lex_brackets);
12013 if (PL_lex_brackets > 100)
12014 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12015 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12016 SAVEI32(PL_lex_allbrackets);
12017 PL_lex_allbrackets = 0;
12018 SAVEI8(PL_lex_fakeeof);
12019 PL_lex_fakeeof = (U8)fakeeof;
12020 if(yyparse(gramtype) && !PL_parser->error_count)
12021 qerror(Perl_mess(aTHX_ "Parse error"));
12024 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12026 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12030 SAVEVPTR(PL_eval_root);
12031 PL_eval_root = NULL;
12032 parse_recdescent(gramtype, fakeeof);
12038 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12040 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12043 if (flags & ~PARSE_OPTIONAL)
12044 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12045 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12046 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12047 if (!PL_parser->error_count)
12048 qerror(Perl_mess(aTHX_ "Parse error"));
12049 exprop = newOP(OP_NULL, 0);
12055 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12057 Parse a Perl arithmetic expression. This may contain operators of precedence
12058 down to the bit shift operators. The expression must be followed (and thus
12059 terminated) either by a comparison or lower-precedence operator or by
12060 something that would normally terminate an expression such as semicolon.
12061 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12062 otherwise it is mandatory. It is up to the caller to ensure that the
12063 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12064 the source of the code to be parsed and the lexical context for the
12067 The op tree representing the expression is returned. If an optional
12068 expression is absent, a null pointer is returned, otherwise the pointer
12071 If an error occurs in parsing or compilation, in most cases a valid op
12072 tree is returned anyway. The error is reflected in the parser state,
12073 normally resulting in a single exception at the top level of parsing
12074 which covers all the compilation errors that occurred. Some compilation
12075 errors, however, will throw an exception immediately.
12081 Perl_parse_arithexpr(pTHX_ U32 flags)
12083 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12087 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12089 Parse a Perl term expression. This may contain operators of precedence
12090 down to the assignment operators. The expression must be followed (and thus
12091 terminated) either by a comma or lower-precedence operator or by
12092 something that would normally terminate an expression such as semicolon.
12093 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12094 otherwise it is mandatory. It is up to the caller to ensure that the
12095 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12096 the source of the code to be parsed and the lexical context for the
12099 The op tree representing the expression is returned. If an optional
12100 expression is absent, a null pointer is returned, otherwise the pointer
12103 If an error occurs in parsing or compilation, in most cases a valid op
12104 tree is returned anyway. The error is reflected in the parser state,
12105 normally resulting in a single exception at the top level of parsing
12106 which covers all the compilation errors that occurred. Some compilation
12107 errors, however, will throw an exception immediately.
12113 Perl_parse_termexpr(pTHX_ U32 flags)
12115 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12119 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12121 Parse a Perl list expression. This may contain operators of precedence
12122 down to the comma operator. The expression must be followed (and thus
12123 terminated) either by a low-precedence logic operator such as C<or> or by
12124 something that would normally terminate an expression such as semicolon.
12125 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12126 otherwise it is mandatory. It is up to the caller to ensure that the
12127 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12128 the source of the code to be parsed and the lexical context for the
12131 The op tree representing the expression is returned. If an optional
12132 expression is absent, a null pointer is returned, otherwise the pointer
12135 If an error occurs in parsing or compilation, in most cases a valid op
12136 tree is returned anyway. The error is reflected in the parser state,
12137 normally resulting in a single exception at the top level of parsing
12138 which covers all the compilation errors that occurred. Some compilation
12139 errors, however, will throw an exception immediately.
12145 Perl_parse_listexpr(pTHX_ U32 flags)
12147 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12151 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12153 Parse a single complete Perl expression. This allows the full
12154 expression grammar, including the lowest-precedence operators such
12155 as C<or>. The expression must be followed (and thus terminated) by a
12156 token that an expression would normally be terminated by: end-of-file,
12157 closing bracketing punctuation, semicolon, or one of the keywords that
12158 signals a postfix expression-statement modifier. If I<flags> includes
12159 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12160 mandatory. It is up to the caller to ensure that the dynamic parser
12161 state (L</PL_parser> et al) is correctly set to reflect the source of
12162 the code to be parsed and the lexical context for the expression.
12164 The op tree representing the expression is returned. If an optional
12165 expression is absent, a null pointer is returned, otherwise the pointer
12168 If an error occurs in parsing or compilation, in most cases a valid op
12169 tree is returned anyway. The error is reflected in the parser state,
12170 normally resulting in a single exception at the top level of parsing
12171 which covers all the compilation errors that occurred. Some compilation
12172 errors, however, will throw an exception immediately.
12178 Perl_parse_fullexpr(pTHX_ U32 flags)
12180 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12184 =for apidoc Amx|OP *|parse_block|U32 flags
12186 Parse a single complete Perl code block. This consists of an opening
12187 brace, a sequence of statements, and a closing brace. The block
12188 constitutes a lexical scope, so C<my> variables and various compile-time
12189 effects can be contained within it. It is up to the caller to ensure
12190 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12191 reflect the source of the code to be parsed and the lexical context for
12194 The op tree representing the code block is returned. This is always a
12195 real op, never a null pointer. It will normally be a C<lineseq> list,
12196 including C<nextstate> or equivalent ops. No ops to construct any kind
12197 of runtime scope are included by virtue of it being a block.
12199 If an error occurs in parsing or compilation, in most cases a valid op
12200 tree (most likely null) is returned anyway. The error is reflected in
12201 the parser state, normally resulting in a single exception at the top
12202 level of parsing which covers all the compilation errors that occurred.
12203 Some compilation errors, however, will throw an exception immediately.
12205 The I<flags> parameter is reserved for future use, and must always
12212 Perl_parse_block(pTHX_ U32 flags)
12215 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12216 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12220 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12222 Parse a single unadorned Perl statement. This may be a normal imperative
12223 statement or a declaration that has compile-time effect. It does not
12224 include any label or other affixture. It is up to the caller to ensure
12225 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12226 reflect the source of the code to be parsed and the lexical context for
12229 The op tree representing the statement is returned. This may be a
12230 null pointer if the statement is null, for example if it was actually
12231 a subroutine definition (which has compile-time side effects). If not
12232 null, it will be ops directly implementing the statement, suitable to
12233 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12234 equivalent op (except for those embedded in a scope contained entirely
12235 within the statement).
12237 If an error occurs in parsing or compilation, in most cases a valid op
12238 tree (most likely null) is returned anyway. The error is reflected in
12239 the parser state, normally resulting in a single exception at the top
12240 level of parsing which covers all the compilation errors that occurred.
12241 Some compilation errors, however, will throw an exception immediately.
12243 The I<flags> parameter is reserved for future use, and must always
12250 Perl_parse_barestmt(pTHX_ U32 flags)
12253 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12254 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12258 =for apidoc Amx|SV *|parse_label|U32 flags
12260 Parse a single label, possibly optional, of the type that may prefix a
12261 Perl statement. It is up to the caller to ensure that the dynamic parser
12262 state (L</PL_parser> et al) is correctly set to reflect the source of
12263 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12264 label is optional, otherwise it is mandatory.
12266 The name of the label is returned in the form of a fresh scalar. If an
12267 optional label is absent, a null pointer is returned.
12269 If an error occurs in parsing, which can only occur if the label is
12270 mandatory, a valid label is returned anyway. The error is reflected in
12271 the parser state, normally resulting in a single exception at the top
12272 level of parsing which covers all the compilation errors that occurred.
12278 Perl_parse_label(pTHX_ U32 flags)
12280 if (flags & ~PARSE_OPTIONAL)
12281 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12282 if (PL_lex_state == LEX_KNOWNEXT) {
12283 PL_parser->yychar = yylex();
12284 if (PL_parser->yychar == LABEL) {
12285 char * const lpv = pl_yylval.pval;
12286 STRLEN llen = strlen(lpv);
12287 PL_parser->yychar = YYEMPTY;
12288 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12295 STRLEN wlen, bufptr_pos;
12298 if (!isIDFIRST_lazy_if(s, UTF))
12300 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12301 if (word_takes_any_delimeter(s, wlen))
12303 bufptr_pos = s - SvPVX(PL_linestr);
12305 lex_read_space(LEX_KEEP_PREVIOUS);
12307 s = SvPVX(PL_linestr) + bufptr_pos;
12308 if (t[0] == ':' && t[1] != ':') {
12309 PL_oldoldbufptr = PL_oldbufptr;
12312 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12316 if (flags & PARSE_OPTIONAL) {
12319 qerror(Perl_mess(aTHX_ "Parse error"));
12320 return newSVpvs("x");
12327 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12329 Parse a single complete Perl statement. This may be a normal imperative
12330 statement or a declaration that has compile-time effect, and may include
12331 optional labels. It is up to the caller to ensure that the dynamic
12332 parser state (L</PL_parser> et al) is correctly set to reflect the source
12333 of the code to be parsed and the lexical context for the statement.
12335 The op tree representing the statement is returned. This may be a
12336 null pointer if the statement is null, for example if it was actually
12337 a subroutine definition (which has compile-time side effects). If not
12338 null, it will be the result of a L</newSTATEOP> call, normally including
12339 a C<nextstate> or equivalent op.
12341 If an error occurs in parsing or compilation, in most cases a valid op
12342 tree (most likely null) is returned anyway. The error is reflected in
12343 the parser state, normally resulting in a single exception at the top
12344 level of parsing which covers all the compilation errors that occurred.
12345 Some compilation errors, however, will throw an exception immediately.
12347 The I<flags> parameter is reserved for future use, and must always
12354 Perl_parse_fullstmt(pTHX_ U32 flags)
12357 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12358 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12362 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12364 Parse a sequence of zero or more Perl statements. These may be normal
12365 imperative statements, including optional labels, or declarations
12366 that have compile-time effect, or any mixture thereof. The statement
12367 sequence ends when a closing brace or end-of-file is encountered in a
12368 place where a new statement could have validly started. It is up to
12369 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12370 is correctly set to reflect the source of the code to be parsed and the
12371 lexical context for the statements.
12373 The op tree representing the statement sequence is returned. This may
12374 be a null pointer if the statements were all null, for example if there
12375 were no statements or if there were only subroutine definitions (which
12376 have compile-time side effects). If not null, it will be a C<lineseq>
12377 list, normally including C<nextstate> or equivalent ops.
12379 If an error occurs in parsing or compilation, in most cases a valid op
12380 tree is returned anyway. The error is reflected in the parser state,
12381 normally resulting in a single exception at the top level of parsing
12382 which covers all the compilation errors that occurred. Some compilation
12383 errors, however, will throw an exception immediately.
12385 The I<flags> parameter is reserved for future use, and must always
12392 Perl_parse_stmtseq(pTHX_ U32 flags)
12397 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12398 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12399 c = lex_peek_unichar(0);
12400 if (c != -1 && c != /*{*/'}')
12401 qerror(Perl_mess(aTHX_ "Parse error"));
12407 * c-indentation-style: bsd
12408 * c-basic-offset: 4
12409 * indent-tabs-mode: nil
12412 * ex: set ts=8 sts=4 sw=4 et: